From c76a3fedfa62c44e166ee9e2d89b52647d04f9f6 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Sat, 7 Mar 2026 18:07:51 -0500 Subject: [PATCH] feat(parser): add Date and QDate types with serialization support Signed-off-by: UncleSp1d3r --- AGENTS.md | 5 +- Cargo.lock | 104 +++++ Cargo.toml | 1 + ROADMAP.md | 2 +- docs/src/api-reference.md | 28 +- src/evaluator/strength.rs | 36 +- src/evaluator/types/date.rs | 715 +++++++++++++++++++++++++++++++++++ src/evaluator/types/mod.rs | 16 +- src/evaluator/types/tests.rs | 162 ++++++++ src/parser/ast.rs | 52 ++- src/parser/codegen.rs | 10 + src/parser/types.rs | 71 +++- 12 files changed, 1178 insertions(+), 24 deletions(-) create mode 100644 src/evaluator/types/date.rs diff --git a/AGENTS.md b/AGENTS.md index aed9486b..83d75dc6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -208,7 +208,7 @@ cargo test --doc # Test documentation examples ### Currently Implemented (v0.1.0) - **Offsets**: Absolute and from-end specifications (indirect and relative are parsed but not yet evaluated) -- **Types**: `byte`, `short`, `long`, `quad`, `float`, `double`, `string` with endianness support; unsigned variants `ubyte`, `ushort`/`ubeshort`/`uleshort`, `ulong`/`ubelong`/`ulelong`, `uquad`/`ubequad`/`ulequad`; float/double endian variants `befloat`/`lefloat`, `bedouble`/`ledouble`; types are signed by default (libmagic-compatible) +- **Types**: `byte`, `short`, `long`, `quad`, `float`, `double`, `string` with endianness support; unsigned variants `ubyte`, `ushort`/`ubeshort`/`uleshort`, `ulong`/`ubelong`/`ulelong`, `uquad`/`ubequad`/`ulequad`; float/double endian variants `befloat`/`lefloat`, `bedouble`/`ledouble`; 32-bit date/timestamp types `date`/`ldate`/`bedate`/`beldate`/`ledate`/`leldate`; 64-bit date/timestamp types `qdate`/`qldate`/`beqdate`/`beqldate`/`leqdate`/`leqldate`; date values formatted as `"Www Mmm DD HH:MM:SS YYYY"` matching GNU `file` output; types are signed by default (libmagic-compatible) - **Operators**: `=` (equal), `!=` (not equal), `<` (less than), `>` (greater than), `<=` (less equal), `>=` (greater equal), `&` (bitwise AND with optional mask), `^` (bitwise XOR), `~` (bitwise NOT), `x` (any value) - **Nested Rules**: Hierarchical rule evaluation with proper indentation - **String Matching**: Exact string matching with null-termination @@ -216,7 +216,7 @@ cargo test --doc # Test documentation examples ### Planned Features (v1.0+) - Regex type: Pattern matching with binary-safe regex support -- Additional types: floats, doubles, dates +- Additional types: pascal strings - Search type: Multi-pattern string searching ### Future Enhancement: Binary-Safe Regex Handling @@ -240,7 +240,6 @@ impl BinaryRegex for regex::bytes::Regex { - No regex/search pattern matching - 64-bit integer types: `quad`/`uquad`, `bequad`/`ubequad`, `lequad`/`ulequad` are implemented; `qquad` (128-bit) is not yet supported -- No date/time types (date, qdate, ldate, qldate) - String evaluation reads until first NUL or end-of-buffer by default; `max_length: Some(_)` is supported internally but no dedicated fixed-length string parser syntax exists yet ### Operators diff --git a/Cargo.lock b/Cargo.lock index f855f0b2..5908a24a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,6 +20,15 @@ dependencies = [ "cc", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anes" version = "0.1.6" @@ -184,6 +193,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "num-traits", + "windows-link", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -287,6 +307,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "criterion" version = "0.8.2" @@ -500,6 +526,30 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "id-arena" version = "2.3.0" @@ -581,6 +631,7 @@ dependencies = [ "assert_cmd", "byteorder", "cfg-if", + "chrono", "clap", "clap-stdin", "clap_complete", @@ -1273,12 +1324,65 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.59.0" diff --git a/Cargo.toml b/Cargo.toml index 6d2a7843..f417cdb8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -145,6 +145,7 @@ path = "src/main.rs" [dependencies] byteorder = "1.5.0" cfg-if = "1.0.4" +chrono = { version = "0.4.41", default-features = false, features = ["std", "clock"] } clap = { version = "4.5.60", features = ["derive"] } clap-stdin = "0.8.1" clap_complete = "4.5.66" diff --git a/ROADMAP.md b/ROADMAP.md index 4baa5d1f..041c5c5e 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -32,7 +32,7 @@ See [GitHub Milestones](https://github.com/EvilBit-Labs/libmagic-rs/milestones) - [ ] Convert `evaluator/types.rs` to directory module ([#63](https://github.com/EvilBit-Labs/libmagic-rs/issues/63)) - [ ] Regex and search types ([#39](https://github.com/EvilBit-Labs/libmagic-rs/issues/39)) - [ ] Float and double types ([#40](https://github.com/EvilBit-Labs/libmagic-rs/issues/40)) -- [ ] Date and timestamp types ([#41](https://github.com/EvilBit-Labs/libmagic-rs/issues/41)) +- [x] Date and timestamp types ([#41](https://github.com/EvilBit-Labs/libmagic-rs/issues/41)) - [ ] Pascal string type ([#43](https://github.com/EvilBit-Labs/libmagic-rs/issues/43)) - [ ] Meta-types: default, clear, name, use, indirect ([#42](https://github.com/EvilBit-Labs/libmagic-rs/issues/42)) diff --git a/docs/src/api-reference.md b/docs/src/api-reference.md index 1a685f85..3b8b2ded 100644 --- a/docs/src/api-reference.md +++ b/docs/src/api-reference.md @@ -287,13 +287,13 @@ Value types for matching. use libmagic_rs::Value; ``` -| Variant | Description | -| ---------------- | ------------------------------------------------------- | -| `Uint(u64)` | Unsigned integer | -| `Int(i64)` | Signed integer | -| `Float(f64)` | Floating-point value (added in v0.5.0) | -| `Bytes(Vec)` | Byte sequence | -| `String(String)` | String value | +| Variant | Description | +| ---------------- | -------------------------------------- | +| `Uint(u64)` | Unsigned integer | +| `Int(i64)` | Signed integer | +| `Float(f64)` | Floating-point value (added in v0.5.0) | +| `Bytes(Vec)` | Byte sequence | +| `String(String)` | String value | The `Value` enum derives `PartialEq` but no longer derives `Eq` (removed in v0.5.0 to support floating-point values). @@ -411,14 +411,14 @@ Result from internal evaluation. use libmagic_rs::evaluator::MatchResult; ``` -| Field | Type | Description | -| ------------ | -------- | ------------------------------------- | -| `message` | `String` | Match description | -| `offset` | `usize` | Match offset | -| `level` | `u32` | Rule level | -| `value` | `Value` | Matched value | +| Field | Type | Description | +| ------------ | ---------- | ----------------------------------------- | +| `message` | `String` | Match description | +| `offset` | `usize` | Match offset | +| `level` | `u32` | Rule level | +| `value` | `Value` | Matched value | | `type_kind` | `TypeKind` | Type used to read value (added in v0.5.0) | -| `confidence` | `f64` | Confidence score | +| `confidence` | `f64` | Confidence score | ## Output Module diff --git a/src/evaluator/strength.rs b/src/evaluator/strength.rs index 4ee12593..5baa7078 100644 --- a/src/evaluator/strength.rs +++ b/src/evaluator/strength.rs @@ -78,9 +78,9 @@ pub fn calculate_default_strength(rule: &MagicRule) -> i32 { if max_length.is_some() { base + 5 } else { base } } // 64-bit types are most specific among numerics - TypeKind::Quad { .. } | TypeKind::Double { .. } => 16, + TypeKind::Quad { .. } | TypeKind::Double { .. } | TypeKind::QDate { .. } => 16, // 32-bit types are fairly specific - TypeKind::Long { .. } | TypeKind::Float { .. } => 15, + TypeKind::Long { .. } | TypeKind::Float { .. } | TypeKind::Date { .. } => 15, // 16-bit integers are moderately specific TypeKind::Short { .. } => 10, // Single bytes are least specific @@ -431,6 +431,38 @@ mod tests { assert_eq!(strength, 36); } + #[test] + fn test_strength_type_date() { + let rule = make_rule( + TypeKind::Date { + endian: Endianness::Big, + utc: true, + }, + Operator::Equal, + OffsetSpec::Absolute(0), + Value::Uint(0), + ); + let strength = calculate_default_strength(&rule); + // Date: 15, Equal: 10, Absolute: 10, Numeric: 0 = 35 + assert_eq!(strength, 35); + } + + #[test] + fn test_strength_type_qdate() { + let rule = make_rule( + TypeKind::QDate { + endian: Endianness::Little, + utc: false, + }, + Operator::Equal, + OffsetSpec::Absolute(0), + Value::Uint(0), + ); + let strength = calculate_default_strength(&rule); + // QDate: 16, Equal: 10, Absolute: 10, Numeric: 0 = 36 + assert_eq!(strength, 36); + } + #[test] fn test_strength_type_string() { let rule = make_rule( diff --git a/src/evaluator/types/date.rs b/src/evaluator/types/date.rs new file mode 100644 index 00000000..2977f85c --- /dev/null +++ b/src/evaluator/types/date.rs @@ -0,0 +1,715 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +use super::TypeReadError; +use crate::parser::ast::{Endianness, Value}; +use byteorder::{BigEndian, ByteOrder, LittleEndian, NativeEndian}; + +/// Day-of-week names matching GNU `file` output format. +const DAY_NAMES: [&str; 7] = ["Thu", "Fri", "Sat", "Sun", "Mon", "Tue", "Wed"]; + +/// Month names matching GNU `file` output format. +const MONTH_NAMES: [&str; 12] = [ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", +]; + +/// Safely reads a 32-bit Unix timestamp from the buffer at the specified offset +/// and formats it as a human-readable date string. +/// +/// The 4 bytes are interpreted as an unsigned 32-bit integer representing seconds +/// since the Unix epoch (1970-01-01 00:00:00 UTC). The result is returned as a +/// `Value::String` formatted like `"Thu Jan 1 00:00:00 1970"`, matching GNU `file` +/// output for `date` types. +/// +/// # Arguments +/// +/// * `buffer` - The byte buffer to read from +/// * `offset` - The offset position to start reading from +/// * `endian` - The byte order to use when interpreting the bytes +/// * `utc` - Whether to format as UTC (true) or local time (false) +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_date; +/// use libmagic_rs::parser::ast::{Endianness, Value}; +/// +/// // Unix epoch (0) in big-endian +/// let buffer = &[0x00, 0x00, 0x00, 0x00]; +/// let result = read_date(buffer, 0, Endianness::Big, true).unwrap(); +/// assert_eq!(result, Value::String("Thu Jan 1 00:00:00 1970".to_string())); +/// ``` +/// +/// # Errors +/// +/// Returns `TypeReadError::BufferOverrun` if fewer than 4 bytes are available at the +/// requested offset. +pub fn read_date( + buffer: &[u8], + offset: usize, + endian: Endianness, + utc: bool, +) -> Result { + let end = offset.checked_add(4).ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + let bytes = buffer + .get(offset..end) + .ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + + let secs = match endian { + Endianness::Little => LittleEndian::read_u32(bytes), + Endianness::Big => BigEndian::read_u32(bytes), + Endianness::Native => NativeEndian::read_u32(bytes), + }; + + Ok(Value::String(format_unix_timestamp_32(secs, utc))) +} + +/// Safely reads a 64-bit Unix timestamp from the buffer at the specified offset +/// and formats it as a human-readable date string. +/// +/// The 8 bytes are interpreted as an unsigned 64-bit integer representing seconds +/// since the Unix epoch (1970-01-01 00:00:00 UTC). The result is returned as a +/// `Value::String` formatted like `"Thu Jan 1 00:00:00 1970"`, matching GNU `file` +/// output for `qdate` types. +/// +/// # Arguments +/// +/// * `buffer` - The byte buffer to read from +/// * `offset` - The offset position to start reading from +/// * `endian` - The byte order to use when interpreting the bytes +/// * `utc` - Whether to format as UTC (true) or local time (false) +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_qdate; +/// use libmagic_rs::parser::ast::{Endianness, Value}; +/// +/// // Unix epoch (0) in little-endian +/// let buffer = &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; +/// let result = read_qdate(buffer, 0, Endianness::Little, true).unwrap(); +/// assert_eq!(result, Value::String("Thu Jan 1 00:00:00 1970".to_string())); +/// ``` +/// +/// # Errors +/// +/// Returns `TypeReadError::BufferOverrun` if fewer than 8 bytes are available at the +/// requested offset. +pub fn read_qdate( + buffer: &[u8], + offset: usize, + endian: Endianness, + utc: bool, +) -> Result { + let end = offset.checked_add(8).ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + let bytes = buffer + .get(offset..end) + .ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + + let secs = match endian { + Endianness::Little => LittleEndian::read_u64(bytes), + Endianness::Big => BigEndian::read_u64(bytes), + Endianness::Native => NativeEndian::read_u64(bytes), + }; + + Ok(Value::String(format_unix_timestamp_64(secs, utc))) +} + +/// Formats a numeric timestamp value (from a rule operand) as a date string. +/// +/// This is the shared formatter used by `coerce_value_to_type` to normalize +/// numeric expected values into the same `Value::String` representation produced +/// by `read_date` / `read_qdate`, ensuring operator comparisons work correctly. +pub(crate) fn format_timestamp_value(secs: u64, utc: bool) -> String { + format_unix_timestamp_64(secs, utc) +} + +/// Formats a 32-bit Unix timestamp as a human-readable date string. +fn format_unix_timestamp_32(secs: u32, utc: bool) -> String { + format_unix_timestamp_64(u64::from(secs), utc) +} + +/// Returns the local timezone offset in seconds east of UTC for the given timestamp. +/// +/// Uses the `chrono` crate to determine the UTC offset for the given timestamp +/// in-process, without spawning external processes. Returns 0 if the offset +/// cannot be determined (e.g., timestamps that overflow `i64`). +#[allow(clippy::cast_possible_truncation)] +fn local_utc_offset_secs(unix_secs: u64) -> i64 { + use chrono::{DateTime, Local, Offset}; + + let Ok(ts) = i64::try_from(unix_secs) else { + return 0; + }; + + let Some(utc_dt) = DateTime::from_timestamp(ts, 0) else { + return 0; + }; + + let local_dt = utc_dt.with_timezone(&Local); + i64::from(local_dt.offset().fix().local_minus_utc()) +} + +/// Formats a 64-bit Unix timestamp (seconds since epoch) as a human-readable date +/// string matching GNU `file` output: `"Www Mmm DD HH:MM:SS YYYY"`. +/// +/// When `utc` is true, the timestamp is formatted in UTC. When false, the system's +/// local timezone offset is applied. +/// +/// Uses signed `i128` arithmetic so that negative timezone adjustments near epoch +/// produce valid pre-1970 dates instead of clamping to zero. +/// +/// Uses an O(1) civil-date conversion algorithm based on days since epoch, avoiding +/// any iterative year-walking that could hang on large timestamps. +#[allow( + clippy::arithmetic_side_effects, + clippy::integer_division, + clippy::modulo_arithmetic, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_sign_loss +)] +fn format_unix_timestamp_64(secs: u64, utc: bool) -> String { + // Use i128 for safe arithmetic with timezone offsets, supporting pre-epoch results + let effective_secs: i128 = if utc { + i128::from(secs) + } else { + let offset = local_utc_offset_secs(secs); + i128::from(secs) + i128::from(offset) + }; + + // Day of week: Jan 1 1970 was a Thursday (index 0 in DAY_NAMES) + // Use Euclidean division/remainder for correct handling of negative values + let total_days = effective_secs.div_euclid(86400); + let day_of_week = total_days.rem_euclid(7) as usize; + let dow_name = DAY_NAMES[day_of_week]; + + // Break total seconds into time-of-day components + let day_secs = effective_secs.rem_euclid(86400); + let hour = day_secs / 3600; + let minute = (day_secs % 3600) / 60; + let second = day_secs % 60; + + // O(1) civil-date conversion using Howard Hinnant's algorithm, adapted for i128. + // Shift epoch from 1970-01-01 to 0000-03-01 for easier leap-year math. + // Use Euclidean division for correct handling of negative day counts. + let z = total_days + 719_468; // days from 0000-03-01 to Unix epoch + let era = z.div_euclid(146_097); // 400-year era + let doe = z - era * 146_097; // day of era [0, 146096] + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365; // year of era + let y = yoe + era * 400; // absolute year (March-based) + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); // day of year [0, 365] + let mp = (5 * doy + 2) / 153; // month index [0, 11] (March=0) + let day = doy - (153 * mp + 2) / 5 + 1; // day of month [1, 31] + let month = if mp < 10 { mp + 3 } else { mp - 9 }; // calendar month [1, 12] + let year = if month <= 2 { y + 1 } else { y }; // adjust year for Jan/Feb + + let month_name = MONTH_NAMES[(month - 1) as usize]; + + format!("{dow_name} {month_name} {day:2} {hour:02}:{minute:02}:{second:02} {year}") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_read_date_endianness() { + let cases: Vec<(&[u8], Endianness, &str)> = vec![ + // Epoch in LE + ( + &[0x00, 0x00, 0x00, 0x00], + Endianness::Little, + "Thu Jan 1 00:00:00 1970", + ), + // Epoch in BE + ( + &[0x00, 0x00, 0x00, 0x00], + Endianness::Big, + "Thu Jan 1 00:00:00 1970", + ), + // 1_000_000_000 = 0x3B9ACA00 in BE + ( + &[0x3B, 0x9A, 0xCA, 0x00], + Endianness::Big, + "Sun Sep 9 01:46:40 2001", + ), + // 1_000_000_000 = 0x3B9ACA00 in LE (bytes reversed) + ( + &[0x00, 0xCA, 0x9A, 0x3B], + Endianness::Little, + "Sun Sep 9 01:46:40 2001", + ), + ]; + + for (buffer, endian, expected) in cases { + let result = read_date(buffer, 0, endian, true).unwrap(); + assert_eq!( + result, + Value::String(expected.to_string()), + "endian={endian:?}, expected={expected}" + ); + } + } + + #[test] + fn test_read_date_native_endian() { + // Epoch bytes -- both LE and BE are all zeros, so native must also work + let buffer = &[0x00, 0x00, 0x00, 0x00]; + let result = read_date(buffer, 0, Endianness::Native, true).unwrap(); + assert_eq!( + result, + Value::String("Thu Jan 1 00:00:00 1970".to_string()) + ); + } + + #[test] + fn test_read_date_utc_vs_local() { + // Use a known timestamp: 1_000_000_000 (2001-09-09 01:46:40 UTC) + let buffer = &[0x3B, 0x9A, 0xCA, 0x00]; // BE + let utc_result = read_date(buffer, 0, Endianness::Big, true).unwrap(); + let local_result = read_date(buffer, 0, Endianness::Big, false).unwrap(); + + // UTC must produce the known string + assert_eq!( + utc_result, + Value::String("Sun Sep 9 01:46:40 2001".to_string()), + "UTC date should match expected" + ); + + // Both should return Value::String + match (&utc_result, &local_result) { + (Value::String(utc_s), Value::String(local_s)) => { + // If the system timezone offset differs from UTC, the strings will differ + let offset = local_utc_offset_secs(1_000_000_000); + if offset != 0 { + assert_ne!( + utc_s, local_s, + "UTC and local should differ when timezone offset is non-zero" + ); + } + } + _ => panic!("Expected Value::String for both utc and local"), + } + } + + #[test] + fn test_read_date_at_offset() { + // Two bytes of padding, then epoch in BE + let buffer = &[0xaa, 0xbb, 0x00, 0x00, 0x00, 0x00]; + let result = read_date(buffer, 2, Endianness::Big, true).unwrap(); + assert_eq!( + result, + Value::String("Thu Jan 1 00:00:00 1970".to_string()) + ); + } + + #[test] + fn test_read_date_returns_value_string() { + let buffer = &[0x00, 0x00, 0x00, 0x00]; + match read_date(buffer, 0, Endianness::Big, true).unwrap() { + Value::String(_) => {} + other => panic!("Expected Value::String, got {other:?}"), + } + } + + #[test] + fn test_read_date_buffer_overrun() { + // Too few bytes + assert_eq!( + read_date(&[0x00, 0x00, 0x80], 0, Endianness::Little, true).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 3, + } + ); + + // Empty buffer + assert_eq!( + read_date(&[], 0, Endianness::Big, true).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 0, + } + ); + + // Offset past end + assert_eq!( + read_date(&[0x00; 8], 6, Endianness::Little, true).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 6, + buffer_len: 8, + } + ); + } + + #[test] + fn test_read_date_offset_overflow() { + let buffer = &[0x00; 4]; + assert_eq!( + read_date(buffer, usize::MAX, Endianness::Little, true).unwrap_err(), + TypeReadError::BufferOverrun { + offset: usize::MAX, + buffer_len: 4, + } + ); + } + + #[test] + fn test_read_qdate_endianness() { + let cases: Vec<(&[u8], Endianness, &str)> = vec![ + // Epoch in LE + ( + &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + Endianness::Little, + "Thu Jan 1 00:00:00 1970", + ), + // Epoch in BE + ( + &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + Endianness::Big, + "Thu Jan 1 00:00:00 1970", + ), + // Epoch in Native + ( + &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + Endianness::Native, + "Thu Jan 1 00:00:00 1970", + ), + // 1_000_000_000u64 = 0x000000003B9ACA00 in BE + ( + &[0x00, 0x00, 0x00, 0x00, 0x3B, 0x9A, 0xCA, 0x00], + Endianness::Big, + "Sun Sep 9 01:46:40 2001", + ), + // 1_000_000_000u64 in LE + ( + &[0x00, 0xCA, 0x9A, 0x3B, 0x00, 0x00, 0x00, 0x00], + Endianness::Little, + "Sun Sep 9 01:46:40 2001", + ), + ]; + + for (buffer, endian, expected) in cases { + let result = read_qdate(buffer, 0, endian, true).unwrap(); + assert_eq!( + result, + Value::String(expected.to_string()), + "endian={endian:?}, expected={expected}" + ); + } + } + + #[test] + fn test_read_qdate_native_endian() { + // Epoch bytes -- all zeros, so native must work regardless of platform + let buffer = &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + let result = read_qdate(buffer, 0, Endianness::Native, true).unwrap(); + assert_eq!( + result, + Value::String("Thu Jan 1 00:00:00 1970".to_string()) + ); + } + + #[test] + fn test_read_qdate_utc_vs_local() { + // 1_000_000_000u64 in BE + let buffer = &[0x00, 0x00, 0x00, 0x00, 0x3B, 0x9A, 0xCA, 0x00]; + let utc_result = read_qdate(buffer, 0, Endianness::Big, true).unwrap(); + let local_result = read_qdate(buffer, 0, Endianness::Big, false).unwrap(); + + // UTC must produce the known string + assert_eq!( + utc_result, + Value::String("Sun Sep 9 01:46:40 2001".to_string()), + "UTC qdate should match expected" + ); + + match (&utc_result, &local_result) { + (Value::String(utc_s), Value::String(local_s)) => { + let offset = local_utc_offset_secs(1_000_000_000); + if offset != 0 { + assert_ne!( + utc_s, local_s, + "UTC and local qdate should differ when timezone offset is non-zero" + ); + } + } + _ => panic!("Expected Value::String for both utc and local qdate"), + } + } + + #[test] + fn test_read_qdate_at_offset() { + // Three bytes of padding, then epoch in BE + let buffer = &[ + 0xaa, 0xbb, 0xcc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + let result = read_qdate(buffer, 3, Endianness::Big, true).unwrap(); + assert_eq!( + result, + Value::String("Thu Jan 1 00:00:00 1970".to_string()) + ); + } + + #[test] + fn test_read_qdate_returns_value_string() { + let buffer = &[0x00; 8]; + match read_qdate(buffer, 0, Endianness::Big, true).unwrap() { + Value::String(_) => {} + other => panic!("Expected Value::String, got {other:?}"), + } + } + + #[test] + fn test_read_qdate_buffer_overrun() { + // Too few bytes + assert_eq!( + read_qdate(&[0x00; 7], 0, Endianness::Little, true).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 7, + } + ); + + // Empty buffer + assert_eq!( + read_qdate(&[], 0, Endianness::Big, true).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 0, + } + ); + + // Offset past end + assert_eq!( + read_qdate(&[0x00; 16], 10, Endianness::Little, true).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 10, + buffer_len: 16, + } + ); + } + + #[test] + fn test_read_qdate_offset_overflow() { + let buffer = &[0x00; 8]; + assert_eq!( + read_qdate(buffer, usize::MAX, Endianness::Little, true).unwrap_err(), + TypeReadError::BufferOverrun { + offset: usize::MAX, + buffer_len: 8, + } + ); + } + + #[test] + fn test_format_unix_timestamp_known_dates() { + // Verify O(1) algorithm against known dates + let cases: Vec<(u64, &str)> = vec![ + (0, "Thu Jan 1 00:00:00 1970"), + (1, "Thu Jan 1 00:00:01 1970"), + (86400, "Fri Jan 2 00:00:00 1970"), + // 2000-01-01 00:00:00 UTC = 946684800 + (946_684_800, "Sat Jan 1 00:00:00 2000"), + // 2001-09-09 01:46:40 UTC = 1000000000 + (1_000_000_000, "Sun Sep 9 01:46:40 2001"), + // Leap year date: 2000-02-29 = 951782400 + (951_782_400, "Tue Feb 29 00:00:00 2000"), + // Non-leap year: 2001-03-01 = 983404800 + (983_404_800, "Thu Mar 1 00:00:00 2001"), + // Max u32 value: 4294967295 = 2106-02-07 06:28:15 + (4_294_967_295, "Sun Feb 7 06:28:15 2106"), + ]; + + for (secs, expected) in cases { + let result = format_unix_timestamp_64(secs, true); + assert_eq!(result, expected, "timestamp={secs}"); + } + } + + #[test] + fn test_format_unix_timestamp_large_qdate_value() { + // Verify very large u64 timestamp completes and returns valid string. + // This would hang with an iterative year-walk algorithm. + let large_ts: u64 = u64::MAX / 86400 * 86400; // largest aligned day boundary + let result = format_unix_timestamp_64(large_ts, true); + // Should complete without hanging and contain a year + assert!( + !result.is_empty(), + "Large timestamp should produce non-empty string" + ); + // Should contain a valid day-of-week prefix + assert!( + DAY_NAMES.iter().any(|d| result.starts_with(d)), + "Large timestamp result should start with a valid day name: {result}" + ); + } + + #[test] + fn test_format_timestamp_value_consistency() { + // Verify format_timestamp_value produces the same output as read_date + let secs = 1_000_000_000_u64; + let expected = format_timestamp_value(secs, true); + let buffer = &[0x3B, 0x9A, 0xCA, 0x00]; // 1_000_000_000 in BE + let read_result = read_date(buffer, 0, Endianness::Big, true).unwrap(); + assert_eq!(read_result, Value::String(expected)); + } + + #[test] + fn test_local_utc_offset_known_timestamp() { + // Verify that local_utc_offset_secs returns a plausible value + let offset = local_utc_offset_secs(1_000_000_000); + // UTC offsets range from -12h to +14h + assert!( + (-43200..=50400).contains(&offset), + "Offset {offset} should be within valid UTC offset range" + ); + } + + #[test] + fn test_local_utc_offset_overflow_timestamp() { + // Timestamps exceeding i64::MAX should return 0 + let offset = local_utc_offset_secs(u64::MAX); + assert_eq!(offset, 0, "Overflow timestamp should return 0 offset"); + } + + #[test] + fn test_pre_epoch_local_time_signed_arithmetic() { + // Directly test the formatting algorithm with a pre-epoch effective time. + // Simulate timestamp=0 with a -28800 offset (UTC-8) by calling the + // formatter in UTC mode with a large-enough timestamp and verifying the + // algorithm handles negative effective seconds correctly. + // + // We test the internal algorithm by verifying known pre-epoch equivalent: + // effective_secs = -28800 corresponds to 1969-12-31 16:00:00 + // We can't directly call format_unix_timestamp_64 with negative input + // (it takes u64), so we verify via local_utc_offset_secs behavior. + let offset = local_utc_offset_secs(0); + if offset < 0 { + // On west-of-UTC systems, local date at epoch should be Dec 31, 1969 + let result = read_date(&[0x00; 4], 0, Endianness::Big, false).unwrap(); + match result { + Value::String(s) => { + assert!( + s.contains("1969"), + "Epoch in west-of-UTC zone should show 1969, got: {s}" + ); + } + _ => panic!("Expected Value::String"), + } + } + } + + #[test] + fn test_utc_vs_local_formatted_strings_date() { + // Table-driven UTC vs local for read_date with specific expected strings + let cases: Vec<(&[u8], Endianness, u32, &str)> = vec![ + // Epoch + ( + &[0x00, 0x00, 0x00, 0x00], + Endianness::Big, + 0, + "Thu Jan 1 00:00:00 1970", + ), + // 1_000_000_000 + ( + &[0x3B, 0x9A, 0xCA, 0x00], + Endianness::Big, + 1_000_000_000, + "Sun Sep 9 01:46:40 2001", + ), + ]; + + for (buffer, endian, ts, expected_utc) in cases { + let utc = read_date(buffer, 0, endian, true).unwrap(); + let local = read_date(buffer, 0, endian, false).unwrap(); + + // UTC result must match known string + assert_eq!( + utc, + Value::String(expected_utc.to_string()), + "UTC date for ts={ts}" + ); + + // Local result must be a valid string + match &local { + Value::String(s) => { + assert!( + DAY_NAMES.iter().any(|d| s.starts_with(d)), + "Local date should start with valid day: {s}" + ); + } + other => panic!("Expected Value::String for local, got {other:?}"), + } + + // If timezone offset is non-zero, they must differ + let offset = local_utc_offset_secs(u64::from(ts)); + if offset != 0 { + assert_ne!(utc, local, "UTC and local should differ for ts={ts}"); + } + } + } + + #[test] + fn test_utc_vs_local_formatted_strings_qdate() { + // Table-driven UTC vs local for read_qdate with specific expected strings + let cases: Vec<(&[u8], Endianness, u64, &str)> = vec![ + // Epoch + ( + &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + Endianness::Big, + 0, + "Thu Jan 1 00:00:00 1970", + ), + // 1_000_000_000 + ( + &[0x00, 0x00, 0x00, 0x00, 0x3B, 0x9A, 0xCA, 0x00], + Endianness::Big, + 1_000_000_000, + "Sun Sep 9 01:46:40 2001", + ), + ]; + + for (buffer, endian, ts, expected_utc) in cases { + let utc = read_qdate(buffer, 0, endian, true).unwrap(); + let local = read_qdate(buffer, 0, endian, false).unwrap(); + + // UTC result must match known string + assert_eq!( + utc, + Value::String(expected_utc.to_string()), + "UTC qdate for ts={ts}" + ); + + // Local result must be a valid string + match &local { + Value::String(s) => { + assert!( + DAY_NAMES.iter().any(|d| s.starts_with(d)), + "Local qdate should start with valid day: {s}" + ); + } + other => panic!("Expected Value::String for local qdate, got {other:?}"), + } + + // If timezone offset is non-zero, they must differ + let offset = local_utc_offset_secs(ts); + if offset != 0 { + assert_ne!(utc, local, "UTC and local qdate should differ for ts={ts}"); + } + } + } +} diff --git a/src/evaluator/types/mod.rs b/src/evaluator/types/mod.rs index f26d0c49..96becb13 100644 --- a/src/evaluator/types/mod.rs +++ b/src/evaluator/types/mod.rs @@ -6,6 +6,7 @@ //! This module exposes the public type-reading API and dispatches to focused //! submodules for numeric and string handling. +mod date; mod float; mod numeric; mod string; @@ -13,6 +14,8 @@ mod string; use crate::parser::ast::{TypeKind, Value}; use thiserror::Error; +use date::format_timestamp_value; +pub use date::{read_date, read_qdate}; pub use float::{read_double, read_float}; pub use numeric::{read_byte, read_long, read_quad, read_short}; pub use string::read_string; @@ -31,7 +34,7 @@ pub enum TypeReadError { buffer_len: usize, }, /// Unsupported type variant (reserved for future types not yet evaluatable, - /// e.g., regex, float, date). + /// e.g., regex, date, timestamp). #[error("Unsupported type: {type_name}")] UnsupportedType { /// The name of the unsupported type. @@ -75,6 +78,8 @@ pub fn read_typed_value( TypeKind::Quad { endian, signed } => read_quad(buffer, offset, *endian, *signed), TypeKind::Float { endian } => read_float(buffer, offset, *endian), TypeKind::Double { endian } => read_double(buffer, offset, *endian), + TypeKind::Date { endian, utc } => read_date(buffer, offset, *endian, *utc), + TypeKind::QDate { endian, utc } => read_qdate(buffer, offset, *endian, *utc), TypeKind::String { max_length } => read_string(buffer, offset, *max_length), } } @@ -117,6 +122,15 @@ pub fn coerce_value_to_type(value: &Value, type_kind: &TypeKind) -> Value { // parsed f64 literals compare correctly against f32-widened file values. #[allow(clippy::cast_possible_truncation)] (Value::Float(v), TypeKind::Float { .. }) => Value::Float(f64::from(*v as f32)), + // Normalize numeric expected values for date types into formatted timestamp + // strings so they match the Value::String representation from read_date/read_qdate. + (Value::Uint(v), TypeKind::Date { utc, .. } | TypeKind::QDate { utc, .. }) => { + Value::String(format_timestamp_value(*v, *utc)) + } + #[allow(clippy::cast_sign_loss)] + (Value::Int(v), TypeKind::Date { utc, .. } | TypeKind::QDate { utc, .. }) if *v >= 0 => { + Value::String(format_timestamp_value(*v as u64, *utc)) + } _ => value.clone(), } } diff --git a/src/evaluator/types/tests.rs b/src/evaluator/types/tests.rs index 9cb7c0e0..75cf06f4 100644 --- a/src/evaluator/types/tests.rs +++ b/src/evaluator/types/tests.rs @@ -306,6 +306,74 @@ fn test_coerce_value_to_type_double_preserves_f64() { assert_eq!(coerced, Value::Float(0.1_f64)); } +#[test] +fn test_read_typed_value_date() { + // 0x00000001 BE = 1 second after epoch + let buffer = &[0x00, 0x00, 0x00, 0x01]; + let result = read_typed_value( + buffer, + 0, + &TypeKind::Date { + endian: Endianness::Big, + utc: true, + }, + ) + .unwrap(); + assert_eq!( + result, + Value::String("Thu Jan 1 00:00:01 1970".to_string()) + ); + + // Same bytes in LE = 0x01000000 = 16777216 seconds + let result_le = read_typed_value( + buffer, + 0, + &TypeKind::Date { + endian: Endianness::Little, + utc: true, + }, + ) + .unwrap(); + match result_le { + Value::String(_) => {} + other => panic!("Expected Value::String, got {other:?}"), + } +} + +#[test] +fn test_read_typed_value_qdate() { + // 0x0000000000000001 BE = 1 second after epoch + let buffer = &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01]; + let result = read_typed_value( + buffer, + 0, + &TypeKind::QDate { + endian: Endianness::Big, + utc: true, + }, + ) + .unwrap(); + assert_eq!( + result, + Value::String("Thu Jan 1 00:00:01 1970".to_string()) + ); + + // Same bytes in LE + let result_le = read_typed_value( + buffer, + 0, + &TypeKind::QDate { + endian: Endianness::Little, + utc: true, + }, + ) + .unwrap(); + match result_le { + Value::String(_) => {} + other => panic!("Expected Value::String, got {other:?}"), + } +} + #[test] fn test_read_typed_value_signed_vs_unsigned() { let buffer = &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff]; @@ -564,3 +632,97 @@ fn test_coerce_value_to_type() { ); } } + +#[test] +fn test_coerce_value_to_type_date_numeric() { + // Numeric expected values for Date types should be formatted as timestamp strings + let date_type = TypeKind::Date { + endian: Endianness::Big, + utc: true, + }; + + // Uint(0) -> epoch string + let result = coerce_value_to_type(&Value::Uint(0), &date_type); + assert_eq!( + result, + Value::String("Thu Jan 1 00:00:00 1970".to_string()) + ); + + // Uint(1_000_000_000) -> known date + let result = coerce_value_to_type(&Value::Uint(1_000_000_000), &date_type); + assert_eq!( + result, + Value::String("Sun Sep 9 01:46:40 2001".to_string()) + ); + + // Int(0) -> epoch string + let result = coerce_value_to_type(&Value::Int(0), &date_type); + assert_eq!( + result, + Value::String("Thu Jan 1 00:00:00 1970".to_string()) + ); + + // Negative Int should pass through unchanged + let result = coerce_value_to_type(&Value::Int(-1), &date_type); + assert_eq!(result, Value::Int(-1)); + + // String values should pass through unchanged + let s = Value::String("already a string".to_string()); + let result = coerce_value_to_type(&s, &date_type); + assert_eq!(result, s); +} + +#[test] +fn test_coerce_value_to_type_qdate_numeric() { + // Numeric expected values for QDate types should be formatted as timestamp strings + let qdate_type = TypeKind::QDate { + endian: Endianness::Big, + utc: true, + }; + + let result = coerce_value_to_type(&Value::Uint(0), &qdate_type); + assert_eq!( + result, + Value::String("Thu Jan 1 00:00:00 1970".to_string()) + ); + + let result = coerce_value_to_type(&Value::Uint(1_000_000_000), &qdate_type); + assert_eq!( + result, + Value::String("Sun Sep 9 01:46:40 2001".to_string()) + ); +} + +#[test] +fn test_coerce_date_matches_read_date() { + // Verify that coerced numeric operands match the Value::String from read_date + let buffer = &[0x3B, 0x9A, 0xCA, 0x00]; // 1_000_000_000 in BE + let date_type = TypeKind::Date { + endian: Endianness::Big, + utc: true, + }; + + let read_val = read_date(buffer, 0, Endianness::Big, true).unwrap(); + let coerced = coerce_value_to_type(&Value::Uint(1_000_000_000), &date_type); + assert_eq!( + read_val, coerced, + "Coerced value should match read_date output" + ); +} + +#[test] +fn test_coerce_qdate_matches_read_qdate() { + // Verify that coerced numeric operands match the Value::String from read_qdate + let buffer = &[0x00, 0x00, 0x00, 0x00, 0x3B, 0x9A, 0xCA, 0x00]; // 1_000_000_000 in BE + let qdate_type = TypeKind::QDate { + endian: Endianness::Big, + utc: true, + }; + + let read_val = read_qdate(buffer, 0, Endianness::Big, true).unwrap(); + let coerced = coerce_value_to_type(&Value::Uint(1_000_000_000), &qdate_type); + assert_eq!( + read_val, coerced, + "Coerced value should match read_qdate output" + ); +} diff --git a/src/parser/ast.rs b/src/parser/ast.rs index b4ae0a7b..6324c8a2 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -143,6 +143,38 @@ pub enum TypeKind { /// Byte order endian: Endianness, }, + /// 32-bit Unix timestamp (seconds since epoch) + /// + /// # Examples + /// + /// ``` + /// use libmagic_rs::parser::ast::{TypeKind, Endianness}; + /// + /// let date = TypeKind::Date { endian: Endianness::Big, utc: true }; + /// assert_eq!(date, TypeKind::Date { endian: Endianness::Big, utc: true }); + /// ``` + Date { + /// Byte order + endian: Endianness, + /// true = UTC, false = local time + utc: bool, + }, + /// 64-bit Unix timestamp (seconds since epoch) + /// + /// # Examples + /// + /// ``` + /// use libmagic_rs::parser::ast::{TypeKind, Endianness}; + /// + /// let qdate = TypeKind::QDate { endian: Endianness::Little, utc: false }; + /// assert_eq!(qdate, TypeKind::QDate { endian: Endianness::Little, utc: false }); + /// ``` + QDate { + /// Byte order + endian: Endianness, + /// true = UTC, false = local time + utc: bool, + }, /// String data String { /// Maximum length to read @@ -171,8 +203,8 @@ impl TypeKind { match self { Self::Byte { .. } => Some(8), Self::Short { .. } => Some(16), - Self::Long { .. } | Self::Float { .. } => Some(32), - Self::Quad { .. } | Self::Double { .. } => Some(64), + Self::Long { .. } | Self::Float { .. } | Self::Date { .. } => Some(32), + Self::Quad { .. } | Self::Double { .. } | Self::QDate { .. } => Some(64), Self::String { .. } => None, } } @@ -812,6 +844,22 @@ mod tests { TypeKind::Double { endian: Endianness::Native, }, + TypeKind::Date { + endian: Endianness::Big, + utc: true, + }, + TypeKind::Date { + endian: Endianness::Little, + utc: false, + }, + TypeKind::QDate { + endian: Endianness::Native, + utc: true, + }, + TypeKind::QDate { + endian: Endianness::Big, + utc: false, + }, TypeKind::String { max_length: None }, TypeKind::String { max_length: Some(128), diff --git a/src/parser/codegen.rs b/src/parser/codegen.rs index 38474e3b..658c8577 100644 --- a/src/parser/codegen.rs +++ b/src/parser/codegen.rs @@ -195,6 +195,16 @@ pub fn serialize_type_kind(typ: &TypeKind) -> String { "TypeKind::Double {{ endian: {} }}", serialize_endianness(*endian) ), + TypeKind::Date { endian, utc } => format!( + "TypeKind::Date {{ endian: {}, utc: {} }}", + serialize_endianness(*endian), + utc + ), + TypeKind::QDate { endian, utc } => format!( + "TypeKind::QDate {{ endian: {}, utc: {} }}", + serialize_endianness(*endian), + utc + ), TypeKind::String { max_length } => match max_length { Some(value) => { format!("TypeKind::String {{ max_length: Some({value}) }}") diff --git a/src/parser/types.rs b/src/parser/types.rs index 6b19202a..026775bc 100644 --- a/src/parser/types.rs +++ b/src/parser/types.rs @@ -80,6 +80,21 @@ pub fn parse_type_keyword(input: &str) -> IResult<&str, &str> { tag("lefloat"), tag("float"), )), + // Date types -- 32-bit (date) and 64-bit (qdate) + alt(( + tag("beqldate"), + tag("leqldate"), + tag("beqdate"), + tag("leqdate"), + tag("qldate"), + tag("qdate"), + tag("beldate"), + tag("leldate"), + tag("bedate"), + tag("ldate"), + tag("ledate"), + tag("date"), + )), // String types (1 branch, will grow with pstring/search/regex) tag("string"), )) @@ -118,6 +133,7 @@ pub fn parse_type_keyword(input: &str) -> IResult<&str, &str> { /// Panics if `type_name` is not a recognized type keyword. This function should /// only be called with values returned by [`parse_type_keyword`]. #[must_use] +#[allow(clippy::too_many_lines)] pub fn type_keyword_to_kind(type_name: &str) -> TypeKind { match type_name { // BYTE types (8-bit) @@ -224,6 +240,58 @@ pub fn type_keyword_to_kind(type_name: &str) -> TypeKind { endian: Endianness::Little, }, + // DATE types (32-bit Unix timestamp) + "date" => TypeKind::Date { + endian: Endianness::Native, + utc: true, + }, + "ldate" => TypeKind::Date { + endian: Endianness::Native, + utc: false, + }, + "bedate" => TypeKind::Date { + endian: Endianness::Big, + utc: true, + }, + "beldate" => TypeKind::Date { + endian: Endianness::Big, + utc: false, + }, + "ledate" => TypeKind::Date { + endian: Endianness::Little, + utc: true, + }, + "leldate" => TypeKind::Date { + endian: Endianness::Little, + utc: false, + }, + + // QDATE types (64-bit Unix timestamp) + "qdate" => TypeKind::QDate { + endian: Endianness::Native, + utc: true, + }, + "qldate" => TypeKind::QDate { + endian: Endianness::Native, + utc: false, + }, + "beqdate" => TypeKind::QDate { + endian: Endianness::Big, + utc: true, + }, + "beqldate" => TypeKind::QDate { + endian: Endianness::Big, + utc: false, + }, + "leqdate" => TypeKind::QDate { + endian: Endianness::Little, + utc: true, + }, + "leqldate" => TypeKind::QDate { + endian: Endianness::Little, + utc: false, + }, + // STRING type "string" => TypeKind::String { max_length: None }, @@ -402,7 +470,8 @@ mod tests { "byte", "ubyte", "short", "ushort", "leshort", "uleshort", "beshort", "ubeshort", "long", "ulong", "lelong", "ulelong", "belong", "ubelong", "quad", "uquad", "lequad", "ulequad", "bequad", "ubequad", "float", "befloat", "lefloat", "double", "bedouble", - "ledouble", "string", + "ledouble", "date", "ldate", "bedate", "beldate", "ledate", "leldate", "qdate", + "qldate", "beqdate", "beqldate", "leqdate", "leqldate", "string", ]; for keyword in keywords { let (rest, parsed) = parse_type_keyword(keyword).unwrap();