diff --git a/docs/src/architecture.md b/docs/src/architecture.md index b0db2168..be4dc94a 100644 --- a/docs/src/architecture.md +++ b/docs/src/architecture.md @@ -130,7 +130,11 @@ The evaluator executes magic rules against file buffers to identify file types. - `engine/`: Core evaluation engine submodule - `mod.rs`: `evaluate_single_rule`, `evaluate_rules`, and `evaluate_rules_with_config` functions - `tests.rs`: Engine unit tests -- `types.rs`: Type interpretation with endianness handling and signedness coercion +- `types/`: Type interpretation submodule + - `mod.rs`: Public API surface with `read_typed_value`, `coerce_value_to_type`, and type re-exports + - `numeric.rs`: Numeric type handling (`read_byte`, `read_short`, `read_long`, `read_quad`) with endianness and signedness support + - `string.rs`: String type handling (`read_string`) with null-termination and UTF-8 conversion + - `tests.rs`: Module tests - `offset/`: Offset resolution submodule - `mod.rs`: Dispatcher (`resolve_offset`) and re-exports - `absolute.rs`: `OffsetError`, `resolve_absolute_offset` @@ -142,7 +146,7 @@ The evaluator executes magic rules against file buffers to identify file types. - `comparison.rs`: `compare_values`, `apply_less_than`/`greater_than`/`less_equal`/`greater_equal` - `bitwise.rs`: `apply_bitwise_and`, `apply_bitwise_and_mask`, `apply_bitwise_xor`, `apply_bitwise_not` -**Organization Note:** The evaluator module was refactored to split a monolithic 2,638-line `mod.rs` into focused submodules, keeping the public API surface in `mod.rs` and moving core evaluation logic to `engine/mod.rs`. This maintains the same public API through re-exports (no breaking changes) while improving code organization and staying within the 500-600 line module guideline. +**Organization Note:** The evaluator module has been refactored to split monolithic files into focused submodules. The initial refactoring split a 2,638-line `mod.rs` into `engine/` submodules, and a subsequent refactoring reorganized the 1,836-line `types.rs` into `types/` submodules for numeric and string handling. The public API surface remains in `mod.rs` with core logic distributed across focused submodules. This maintains the same public API through re-exports (no breaking changes) while improving code organization and staying within the 500-600 line module guideline. **Implemented Features:** diff --git a/src/evaluator/types.rs b/src/evaluator/types.rs deleted file mode 100644 index f2729a96..00000000 --- a/src/evaluator/types.rs +++ /dev/null @@ -1,1836 +0,0 @@ -// Copyright (c) 2025-2026 the libmagic-rs contributors -// SPDX-License-Identifier: Apache-2.0 - -//! Type interpretation for reading and converting bytes from file buffers -//! -//! This module provides functions for safely reading different data types from byte buffers -//! with proper bounds checking and error handling. - -use crate::parser::ast::{Endianness, TypeKind, Value}; -use byteorder::{BigEndian, ByteOrder, LittleEndian, NativeEndian}; -use thiserror::Error; - -/// Errors that can occur during type reading operations -#[derive(Debug, Error, PartialEq, Eq)] -pub enum TypeReadError { - /// Buffer access beyond available data - #[error( - "Buffer overrun: attempted to read at offset {offset} but buffer length is {buffer_len}" - )] - BufferOverrun { - /// The offset that was attempted to be accessed - offset: usize, - /// The actual length of the buffer - buffer_len: usize, - }, - /// Unsupported type variant - #[error("Unsupported type: {type_name}")] - UnsupportedType { - /// The name of the unsupported type - type_name: String, - }, -} - -/// Safely reads a single byte from the buffer at the specified offset -/// -/// This function provides secure byte reading with comprehensive bounds checking -/// to prevent buffer overruns and potential security vulnerabilities. -/// -/// # Arguments -/// -/// * `buffer` - The byte buffer to read from -/// * `offset` - The offset position to read the byte from -/// * `signed` - Whether to interpret the byte as signed (`i8`) or unsigned (`u8`) -/// -/// # Returns -/// -/// Returns `Ok(Value::Uint(byte_value))` for unsigned reads or -/// `Ok(Value::Int(byte_value))` for signed reads if the read is successful, or -/// `Err(TypeReadError::BufferOverrun)` if the offset is beyond the buffer bounds. -/// -/// # Security -/// -/// This function performs strict bounds checking to prevent: -/// - Buffer overruns that could lead to memory safety issues -/// - Reading uninitialized or out-of-bounds memory -/// - Integer overflow in offset calculations -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::read_byte; -/// use libmagic_rs::parser::ast::Value; -/// -/// let buffer = &[0x7f, 0x80, 0x4c, 0x46]; // example bytes -/// -/// // Read unsigned byte (0x80 = 128) -/// let result = read_byte(buffer, 1, false).unwrap(); -/// assert_eq!(result, Value::Uint(0x80)); -/// -/// // Read signed byte (0x80 = -128) -/// let result = read_byte(buffer, 1, true).unwrap(); -/// assert_eq!(result, Value::Int(-128)); -/// ``` -/// -/// # Errors -/// -/// Returns `TypeReadError::BufferOverrun` if the offset is greater than or equal to -/// the buffer length. -pub fn read_byte(buffer: &[u8], offset: usize, signed: bool) -> Result { - buffer - .get(offset) - .map(|&byte| { - if signed { - // Wrapping is intentional: e.g., 0x80 -> -128 as i8 - #[allow(clippy::cast_possible_wrap)] - Value::Int(i64::from(byte as i8)) - } else { - Value::Uint(u64::from(byte)) - } - }) - .ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - }) -} - -/// Safely reads a 16-bit integer from the buffer at the specified offset -/// -/// # Arguments -/// -/// * `buffer` - The byte buffer to read from -/// * `offset` - The offset position to read the 16-bit value from -/// * `endian` - The byte order to use for interpretation -/// * `signed` - Whether to interpret the value as signed or unsigned -/// -/// # Returns -/// -/// Returns `Ok(Value::Uint(value))` for unsigned values or `Ok(Value::Int(value))` for signed values -/// if the read is successful, or `Err(TypeReadError::BufferOverrun)` if there are insufficient bytes. -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::read_short; -/// use libmagic_rs::parser::ast::{Endianness, Value}; -/// -/// let buffer = &[0x34, 0x12, 0xff, 0x7f]; // Little-endian data -/// -/// // Read unsigned little-endian short (0x1234) -/// let result = read_short(buffer, 0, Endianness::Little, false).unwrap(); -/// assert_eq!(result, Value::Uint(0x1234)); -/// -/// // Read signed little-endian short (0x7fff = 32767) -/// let result = read_short(buffer, 2, Endianness::Little, true).unwrap(); -/// assert_eq!(result, Value::Int(32767)); -/// ``` -/// -/// # Errors -/// -/// Returns `TypeReadError::BufferOverrun` if there are fewer than 2 bytes available -/// starting at the specified offset. -pub fn read_short( - buffer: &[u8], - offset: usize, - endian: Endianness, - signed: bool, -) -> Result { - let end = offset.checked_add(2).ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - })?; - let bytes = buffer - .get(offset..end) - .ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - })?; - - let value = match endian { - Endianness::Little => LittleEndian::read_u16(bytes), - Endianness::Big => BigEndian::read_u16(bytes), - Endianness::Native => NativeEndian::read_u16(bytes), - }; - - if signed { - #[allow(clippy::cast_possible_wrap)] - Ok(Value::Int(i64::from(value as i16))) - } else { - Ok(Value::Uint(u64::from(value))) - } -} - -/// Safely reads a 32-bit integer from the buffer at the specified offset -/// -/// # Arguments -/// -/// * `buffer` - The byte buffer to read from -/// * `offset` - The offset position to read the 32-bit value from -/// * `endian` - The byte order to use for interpretation -/// * `signed` - Whether to interpret the value as signed or unsigned -/// -/// # Returns -/// -/// Returns `Ok(Value::Uint(value))` for unsigned values or `Ok(Value::Int(value))` for signed values -/// if the read is successful, or `Err(TypeReadError::BufferOverrun)` if there are insufficient bytes. -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::read_long; -/// use libmagic_rs::parser::ast::{Endianness, Value}; -/// -/// let buffer = &[0x78, 0x56, 0x34, 0x12, 0xff, 0xff, 0xff, 0x7f]; -/// -/// // Read unsigned little-endian long (0x12345678) -/// let result = read_long(buffer, 0, Endianness::Little, false).unwrap(); -/// assert_eq!(result, Value::Uint(0x12345678)); -/// -/// // Read signed little-endian long (0x7fffffff = 2147483647) -/// let result = read_long(buffer, 4, Endianness::Little, true).unwrap(); -/// assert_eq!(result, Value::Int(2147483647)); -/// ``` -/// -/// # Errors -/// -/// Returns `TypeReadError::BufferOverrun` if there are fewer than 4 bytes available -/// starting at the specified offset. -pub fn read_long( - buffer: &[u8], - offset: usize, - endian: Endianness, - signed: bool, -) -> Result { - let end = offset.checked_add(4).ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - })?; - let bytes = buffer - .get(offset..end) - .ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - })?; - - let value = match endian { - Endianness::Little => LittleEndian::read_u32(bytes), - Endianness::Big => BigEndian::read_u32(bytes), - Endianness::Native => NativeEndian::read_u32(bytes), - }; - - if signed { - #[allow(clippy::cast_possible_wrap)] - Ok(Value::Int(i64::from(value as i32))) - } else { - Ok(Value::Uint(u64::from(value))) - } -} - -/// Safely reads a 64-bit integer from the buffer at the specified offset -/// -/// # Arguments -/// -/// * `buffer` - The byte buffer to read from -/// * `offset` - The offset position to read the 64-bit value from -/// * `endian` - The byte order to use for interpretation -/// * `signed` - Whether to interpret the value as signed or unsigned -/// -/// # Returns -/// -/// Returns `Ok(Value::Uint(value))` for unsigned values or `Ok(Value::Int(value))` for signed values -/// if the read is successful, or `Err(TypeReadError::BufferOverrun)` if there are insufficient bytes. -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::read_quad; -/// use libmagic_rs::parser::ast::{Endianness, Value}; -/// -/// let buffer = &[0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12]; -/// -/// // Read unsigned little-endian quad (0x1234567890abcdef) -/// let result = read_quad(buffer, 0, Endianness::Little, false).unwrap(); -/// assert_eq!(result, Value::Uint(0x1234_5678_90ab_cdef)); -/// -/// // Read signed little-endian quad (positive value fits in i64) -/// let result = read_quad(buffer, 0, Endianness::Little, true).unwrap(); -/// assert_eq!(result, Value::Int(0x1234_5678_90ab_cdef)); -/// -/// // Read signed little-endian quad with high bit set (sign extension) -/// let neg_buffer = &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80]; -/// let result = read_quad(neg_buffer, 0, Endianness::Little, true).unwrap(); -/// assert_eq!(result, Value::Int(-9_223_372_036_854_775_808)); // i64::MIN -/// ``` -/// -/// # Errors -/// -/// Returns `TypeReadError::BufferOverrun` if there are fewer than 8 bytes available -/// starting at the specified offset. -pub fn read_quad( - buffer: &[u8], - offset: usize, - endian: Endianness, - signed: bool, -) -> Result { - let end = offset.checked_add(8).ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - })?; - let bytes = buffer - .get(offset..end) - .ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - })?; - - let value = match endian { - Endianness::Little => LittleEndian::read_u64(bytes), - Endianness::Big => BigEndian::read_u64(bytes), - Endianness::Native => NativeEndian::read_u64(bytes), - }; - - if signed { - #[allow(clippy::cast_possible_wrap)] - Ok(Value::Int(value as i64)) - } else { - Ok(Value::Uint(value)) - } -} - -/// Safely reads a null-terminated string from the buffer at the specified offset -/// -/// This function reads bytes from the buffer starting at the given offset until it encounters -/// a null byte (0x00) or reaches the maximum length limit. The resulting bytes are converted -/// to a UTF-8 string with proper error handling for invalid sequences. -/// -/// # Arguments -/// -/// * `buffer` - The byte buffer to read from -/// * `offset` - The offset position to start reading the string from -/// * `max_length` - Optional maximum number of bytes to read excluding the null terminator. -/// If a NUL is found within `max_length` bytes, it is not counted in the result length. -/// If no NUL is found, up to `max_length` bytes are returned with no trailing NUL. -/// When `None`, reads until the first NUL or end of buffer. -/// -/// # Returns -/// -/// Returns `Ok(Value::String(string))` if the read is successful, or an appropriate error -/// if the read fails due to buffer overrun or invalid UTF-8 sequences. -/// -/// # Security -/// -/// This function provides several security guarantees: -/// - Bounds checking prevents reading beyond buffer limits -/// - Length limits prevent excessive memory allocation -/// - UTF-8 validation ensures string safety -/// - Null termination handling prevents runaway reads -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::read_string; -/// use libmagic_rs::parser::ast::Value; -/// -/// // Null-terminated string -/// let buffer = b"Hello\x00World"; -/// let result = read_string(buffer, 0, None).unwrap(); -/// assert_eq!(result, Value::String("Hello".to_string())); -/// -/// // String with length limit -/// let buffer = b"VeryLongString\x00"; -/// let result = read_string(buffer, 0, Some(4)).unwrap(); -/// assert_eq!(result, Value::String("Very".to_string())); -/// -/// // String without null terminator (reads to max_length) -/// let buffer = b"NoNull"; -/// let result = read_string(buffer, 0, Some(6)).unwrap(); -/// assert_eq!(result, Value::String("NoNull".to_string())); -/// -/// // NUL found within max_length (NUL not counted in result) -/// let buffer = b"Hello\x00World"; -/// let result = read_string(buffer, 0, Some(10)).unwrap(); -/// assert_eq!(result, Value::String("Hello".to_string())); -/// -/// // No NUL found, returns exactly max_length bytes -/// let buffer = b"ABCDEF"; -/// let result = read_string(buffer, 0, Some(4)).unwrap(); -/// assert_eq!(result, Value::String("ABCD".to_string())); -/// ``` -/// -/// # Errors -/// -/// Returns `TypeReadError::BufferOverrun` if the offset is greater than or equal to the buffer length. -pub fn read_string( - buffer: &[u8], - offset: usize, - max_length: Option, -) -> Result { - // Check if offset is within buffer bounds - if offset >= buffer.len() { - return Err(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - }); - } - - // Get the slice starting from offset - let remaining_buffer = &buffer[offset..]; - - // Determine the actual length to read (uses memchr for efficient null byte scanning) - let read_length = if let Some(max_len) = max_length { - // Find null terminator within max_length, or use max_length if no null found - let search_len = std::cmp::min(max_len, remaining_buffer.len()); - memchr::memchr(0, &remaining_buffer[..search_len]).unwrap_or(search_len) - } else { - // Find null terminator in entire remaining buffer - memchr::memchr(0, remaining_buffer).unwrap_or(remaining_buffer.len()) - }; - - // Extract the string bytes (excluding null terminator) - let string_bytes = &remaining_buffer[..read_length]; - - // Convert to UTF-8 string, replacing invalid sequences with replacement character - let string_value = String::from_utf8_lossy(string_bytes).into_owned(); - - Ok(Value::String(string_value)) -} - -/// Reads and interprets bytes according to the specified `TypeKind` -/// -/// This is the main interface for type interpretation that dispatches to the appropriate -/// reading function based on the `TypeKind` variant. -/// -/// # Arguments -/// -/// * `buffer` - The byte buffer to read from -/// * `offset` - The offset position to read from -/// * `type_kind` - The type specification that determines how to interpret the bytes -/// -/// # Returns -/// -/// Returns the interpreted value as a `Value` enum variant, or an error if the read fails. -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::read_typed_value; -/// use libmagic_rs::parser::ast::{TypeKind, Endianness, Value}; -/// -/// let buffer = &[0x7f, 0x45, 0x4c, 0x46, 0x34, 0x12]; -/// -/// // Read an unsigned byte -/// let byte_result = read_typed_value(buffer, 0, &TypeKind::Byte { signed: false }).unwrap(); -/// assert_eq!(byte_result, Value::Uint(0x7f)); -/// -/// // Read a little-endian short -/// let short_type = TypeKind::Short { -/// endian: Endianness::Little, -/// signed: false, -/// }; -/// let short_result = read_typed_value(buffer, 4, &short_type).unwrap(); -/// assert_eq!(short_result, Value::Uint(0x1234)); -/// ``` -/// -/// # Errors -/// -/// Returns `TypeReadError::BufferOverrun` if there are insufficient bytes for the requested type, -/// or `TypeReadError::UnsupportedType` for type variants that are not yet implemented. -pub fn read_typed_value( - buffer: &[u8], - offset: usize, - type_kind: &TypeKind, -) -> Result { - match type_kind { - TypeKind::Byte { signed } => read_byte(buffer, offset, *signed), - TypeKind::Short { endian, signed } => read_short(buffer, offset, *endian, *signed), - TypeKind::Long { endian, signed } => read_long(buffer, offset, *endian, *signed), - TypeKind::Quad { endian, signed } => read_quad(buffer, offset, *endian, *signed), - TypeKind::String { max_length } => read_string(buffer, offset, *max_length), - } -} - -/// Coerce a rule's expected value to match the type's signedness and width. -/// -/// In libmagic, comparison values like `0xff` in `0 byte =0xff` are interpreted -/// at the type's bit width. For a signed byte, `0xff` means `-1` (the signed -/// interpretation of that bit pattern). This function performs that coercion so -/// that comparisons work correctly regardless of how the value was parsed. -/// -/// Only affects `Value::Uint` values paired with signed types whose values exceed -/// the signed range. All other combinations pass through unchanged. -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::coerce_value_to_type; -/// use libmagic_rs::parser::ast::{TypeKind, Value}; -/// -/// // 0xff for signed byte -> -1 -/// let coerced = coerce_value_to_type(&Value::Uint(0xff), &TypeKind::Byte { signed: true }); -/// assert_eq!(coerced, Value::Int(-1)); -/// -/// // 0x7f for signed byte -> unchanged (fits in signed range) -/// let coerced = coerce_value_to_type(&Value::Uint(0x7f), &TypeKind::Byte { signed: true }); -/// assert_eq!(coerced, Value::Uint(0x7f)); -/// -/// // Unsigned types pass through unchanged -/// let coerced = coerce_value_to_type(&Value::Uint(0xff), &TypeKind::Byte { signed: false }); -/// assert_eq!(coerced, Value::Uint(0xff)); -/// ``` -#[must_use] -pub fn coerce_value_to_type(value: &Value, type_kind: &TypeKind) -> Value { - match (value, type_kind) { - (Value::Uint(v), TypeKind::Byte { signed: true }) if *v > i8::MAX as u64 => - { - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - Value::Int(i64::from(*v as u8 as i8)) - } - (Value::Uint(v), TypeKind::Short { signed: true, .. }) if *v > i16::MAX as u64 => - { - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - Value::Int(i64::from(*v as u16 as i16)) - } - (Value::Uint(v), TypeKind::Long { signed: true, .. }) if *v > i32::MAX as u64 => - { - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - Value::Int(i64::from(*v as u32 as i32)) - } - (Value::Uint(v), TypeKind::Quad { signed: true, .. }) if *v > i64::MAX as u64 => - { - #[allow(clippy::cast_possible_wrap)] - Value::Int(*v as i64) - } - _ => value.clone(), - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_read_byte_values() { - // All 256 unsigned values - let buffer: Vec = (0..=255).collect(); - for (i, &byte) in buffer.iter().enumerate() { - assert_eq!( - read_byte(&buffer, i, false).unwrap(), - Value::Uint(u64::from(byte)) - ); - } - } - - #[test] - fn test_read_byte_out_of_bounds() { - // Empty buffer - assert_eq!( - read_byte(&[], 0, false).unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 0 - } - ); - // Just past end - assert_eq!( - read_byte(&[0x42], 1, false).unwrap_err(), - TypeReadError::BufferOverrun { - offset: 1, - buffer_len: 1 - } - ); - // Way past end - assert_eq!( - read_byte(&[1, 2, 3], 100, false).unwrap_err(), - TypeReadError::BufferOverrun { - offset: 100, - buffer_len: 3 - } - ); - } - - #[test] - fn test_read_byte_signedness() { - let cases: Vec<(u8, bool, Value)> = vec![ - (0x00, false, Value::Uint(0)), - (0x7f, false, Value::Uint(127)), - (0x80, false, Value::Uint(128)), - (0xff, false, Value::Uint(255)), - (0x00, true, Value::Int(0)), - (0x7f, true, Value::Int(127)), - (0x80, true, Value::Int(-128)), - (0xff, true, Value::Int(-1)), - ]; - for (byte, signed, expected) in cases { - let result = read_byte(&[byte], 0, signed).unwrap(); - assert_eq!(result, expected, "byte=0x{byte:02x}, signed={signed}"); - } - } - - #[test] - fn test_type_read_error_display() { - let error = TypeReadError::BufferOverrun { - offset: 10, - buffer_len: 5, - }; - let msg = format!("{error}"); - assert!(msg.contains("offset 10")); - assert!(msg.contains("buffer length is 5")); - } - - // Tests for read_short function - #[test] - fn test_read_short_little_endian_unsigned() { - let buffer = &[0x34, 0x12, 0x78, 0x56]; // 0x1234, 0x5678 in little-endian - - // Read first short (0x1234) - let result = read_short(buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(result, Value::Uint(0x1234)); - - // Read second short (0x5678) - let result = read_short(buffer, 2, Endianness::Little, false).unwrap(); - assert_eq!(result, Value::Uint(0x5678)); - } - - #[test] - fn test_read_short_big_endian_unsigned() { - let buffer = &[0x12, 0x34, 0x56, 0x78]; // 0x1234, 0x5678 in big-endian - - // Read first short (0x1234) - let result = read_short(buffer, 0, Endianness::Big, false).unwrap(); - assert_eq!(result, Value::Uint(0x1234)); - - // Read second short (0x5678) - let result = read_short(buffer, 2, Endianness::Big, false).unwrap(); - assert_eq!(result, Value::Uint(0x5678)); - } - - #[test] - fn test_read_short_native_endian_unsigned() { - let buffer = &[0x34, 0x12, 0x78, 0x56]; - - // Read using native endianness - let result = read_short(buffer, 0, Endianness::Native, false).unwrap(); - - // The exact value depends on the system's endianness, but it should be valid - match result { - Value::Uint(val) => { - // Should be either 0x1234 (little-endian) or 0x3412 (big-endian) - assert!(val == 0x1234 || val == 0x3412); - } - _ => panic!("Expected Value::Uint variant"), - } - } - - #[test] - fn test_read_short_signed_positive() { - let buffer = &[0xff, 0x7f]; // 0x7fff = 32767 in little-endian - - let result = read_short(buffer, 0, Endianness::Little, true).unwrap(); - assert_eq!(result, Value::Int(32767)); - } - - #[test] - fn test_read_short_signed_negative() { - let buffer = &[0x00, 0x80]; // 0x8000 = -32768 in little-endian (signed) - - let result = read_short(buffer, 0, Endianness::Little, true).unwrap(); - assert_eq!(result, Value::Int(-32768)); - } - - #[test] - fn test_read_short_signed_vs_unsigned() { - let buffer = &[0xff, 0xff]; // 0xffff - - // Unsigned interpretation - let unsigned_result = read_short(buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(unsigned_result, Value::Uint(65535)); - - // Signed interpretation - let signed_result = read_short(buffer, 0, Endianness::Little, true).unwrap(); - assert_eq!(signed_result, Value::Int(-1)); - } - - #[test] - fn test_read_short_buffer_overrun() { - let buffer = &[0x12]; // Only 1 byte available - - // Should fail when trying to read 2 bytes - let result = read_short(buffer, 0, Endianness::Little, false); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 1 - } - ); - } - - #[test] - fn test_read_short_offset_out_of_bounds() { - let buffer = &[0x12, 0x34, 0x56]; - - // Should fail when trying to read 2 bytes starting at offset 2 (only 1 byte left) - let result = read_short(buffer, 2, Endianness::Little, false); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 2, - buffer_len: 3 - } - ); - } - - #[test] - fn test_read_short_empty_buffer() { - let buffer = &[]; - - let result = read_short(buffer, 0, Endianness::Little, false); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 0 - } - ); - } - - #[test] - fn test_read_short_all_endianness_variants() { - let buffer = &[0x12, 0x34]; - - // Test all endianness variants - let little = read_short(buffer, 0, Endianness::Little, false).unwrap(); - let big = read_short(buffer, 0, Endianness::Big, false).unwrap(); - let native = read_short(buffer, 0, Endianness::Native, false).unwrap(); - - // Little-endian: 0x3412, Big-endian: 0x1234 - assert_eq!(little, Value::Uint(0x3412)); - assert_eq!(big, Value::Uint(0x1234)); - - // Native should match one of them - match native { - Value::Uint(val) => assert!(val == 0x1234 || val == 0x3412), - _ => panic!("Expected Value::Uint variant"), - } - } - - // Tests for read_long function - #[test] - fn test_read_long_little_endian_unsigned() { - let buffer = &[0x78, 0x56, 0x34, 0x12, 0xbc, 0x9a, 0x78, 0x56]; // 0x12345678, 0x56789abc - - // Read first long (0x12345678) - let result = read_long(buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(result, Value::Uint(0x1234_5678)); - - // Read second long (0x56789abc) - let result = read_long(buffer, 4, Endianness::Little, false).unwrap(); - assert_eq!(result, Value::Uint(0x5678_9abc)); - } - - #[test] - fn test_read_long_big_endian_unsigned() { - let buffer = &[0x12, 0x34, 0x56, 0x78, 0x56, 0x78, 0x9a, 0xbc]; // 0x12345678, 0x56789abc - - // Read first long (0x12345678) - let result = read_long(buffer, 0, Endianness::Big, false).unwrap(); - assert_eq!(result, Value::Uint(0x1234_5678)); - - // Read second long (0x56789abc) - let result = read_long(buffer, 4, Endianness::Big, false).unwrap(); - assert_eq!(result, Value::Uint(0x5678_9abc)); - } - - #[test] - fn test_read_long_native_endian_unsigned() { - let buffer = &[0x78, 0x56, 0x34, 0x12]; - - // Read using native endianness - let result = read_long(buffer, 0, Endianness::Native, false).unwrap(); - - // The exact value depends on the system's endianness, but it should be valid - match result { - Value::Uint(val) => { - // Should be either 0x12345678 (little-endian) or 0x78563412 (big-endian) - assert!(val == 0x1234_5678 || val == 0x7856_3412); - } - _ => panic!("Expected Value::Uint variant"), - } - } - - #[test] - fn test_read_long_signed_positive() { - let buffer = &[0xff, 0xff, 0xff, 0x7f]; // 0x7fffffff = 2147483647 in little-endian - - let result = read_long(buffer, 0, Endianness::Little, true).unwrap(); - assert_eq!(result, Value::Int(2_147_483_647)); - } - - #[test] - fn test_read_long_signed_negative() { - let buffer = &[0x00, 0x00, 0x00, 0x80]; // 0x80000000 = -2147483648 in little-endian (signed) - - let result = read_long(buffer, 0, Endianness::Little, true).unwrap(); - assert_eq!(result, Value::Int(-2_147_483_648)); - } - - #[test] - fn test_read_long_signed_vs_unsigned() { - let buffer = &[0xff, 0xff, 0xff, 0xff]; // 0xffffffff - - // Unsigned interpretation - let unsigned_result = read_long(buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(unsigned_result, Value::Uint(4_294_967_295)); - - // Signed interpretation - let signed_result = read_long(buffer, 0, Endianness::Little, true).unwrap(); - assert_eq!(signed_result, Value::Int(-1)); - } - - #[test] - fn test_read_long_buffer_overrun() { - let buffer = &[0x12, 0x34, 0x56]; // Only 3 bytes available - - // Should fail when trying to read 4 bytes - let result = read_long(buffer, 0, Endianness::Little, false); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 3 - } - ); - } - - #[test] - fn test_read_long_offset_out_of_bounds() { - let buffer = &[0x12, 0x34, 0x56, 0x78, 0x9a]; - - // Should fail when trying to read 4 bytes starting at offset 2 (only 3 bytes left) - let result = read_long(buffer, 2, Endianness::Little, false); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 2, - buffer_len: 5 - } - ); - } - - #[test] - fn test_read_long_empty_buffer() { - let buffer = &[]; - - let result = read_long(buffer, 0, Endianness::Little, false); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 0 - } - ); - } - - #[test] - fn test_read_long_all_endianness_variants() { - let buffer = &[0x12, 0x34, 0x56, 0x78]; - - // Test all endianness variants - let little = read_long(buffer, 0, Endianness::Little, false).unwrap(); - let big = read_long(buffer, 0, Endianness::Big, false).unwrap(); - let native = read_long(buffer, 0, Endianness::Native, false).unwrap(); - - // Little-endian: 0x78563412, Big-endian: 0x12345678 - assert_eq!(little, Value::Uint(0x7856_3412)); - assert_eq!(big, Value::Uint(0x1234_5678)); - - // Native should match one of them - match native { - Value::Uint(val) => assert!(val == 0x1234_5678 || val == 0x7856_3412), - _ => panic!("Expected Value::Uint variant"), - } - } - - #[test] - fn test_read_long_extreme_values() { - // Test maximum unsigned 32-bit value - let max_buffer = &[0xff, 0xff, 0xff, 0xff]; - let max_result = read_long(max_buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(max_result, Value::Uint(u64::from(u32::MAX))); - - // Test zero value - let zero_buffer = &[0x00, 0x00, 0x00, 0x00]; - let zero_result = read_long(zero_buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(zero_result, Value::Uint(0)); - } - - // Tests for read_quad function - #[test] - fn test_read_quad_endianness_and_signedness() { - let cases: Vec<(&[u8], Endianness, bool, Value)> = vec![ - // Little-endian unsigned - ( - &[0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12], - Endianness::Little, - false, - Value::Uint(0x1234_5678_90ab_cdef), - ), - // Big-endian unsigned - ( - &[0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef], - Endianness::Big, - false, - Value::Uint(0x1234_5678_90ab_cdef), - ), - // Little-endian signed positive - ( - &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f], - Endianness::Little, - true, - Value::Int(i64::MAX), - ), - // Little-endian signed negative - ( - &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80], - Endianness::Little, - true, - Value::Int(i64::MIN), - ), - // Big-endian signed negative (-1) - ( - &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], - Endianness::Big, - true, - Value::Int(-1), - ), - // Unsigned max value - ( - &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], - Endianness::Little, - false, - Value::Uint(u64::MAX), - ), - // Zero - ( - &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], - Endianness::Little, - false, - Value::Uint(0), - ), - ]; - for (buffer, endian, signed, expected) in cases { - let result = read_quad(buffer, 0, endian, signed).unwrap(); - assert_eq!(result, expected, "endian={endian:?}, signed={signed}"); - } - } - - #[test] - fn test_read_quad_buffer_overrun() { - // Too few bytes (only 7) - let buffer = &[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07]; - assert_eq!( - read_quad(buffer, 0, Endianness::Little, false).unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 7 - } - ); - - // Empty buffer - assert_eq!( - read_quad(&[], 0, Endianness::Big, false).unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 0 - } - ); - - // Offset past end - let buffer = &[0x00; 16]; - assert_eq!( - read_quad(buffer, 10, Endianness::Little, false).unwrap_err(), - TypeReadError::BufferOverrun { - offset: 10, - buffer_len: 16 - } - ); - } - - #[test] - fn test_read_quad_at_offset() { - let buffer = &[0x00, 0x00, 0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12]; - let result = read_quad(buffer, 2, Endianness::Little, false).unwrap(); - assert_eq!(result, Value::Uint(0x1234_5678_90ab_cdef)); - } - - #[test] - fn test_read_short_extreme_values() { - // Test maximum unsigned 16-bit value - let max_buffer = &[0xff, 0xff]; - let max_result = read_short(max_buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(max_result, Value::Uint(u64::from(u16::MAX))); - - // Test zero value - let zero_buffer = &[0x00, 0x00]; - let zero_result = read_short(zero_buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(zero_result, Value::Uint(0)); - } - - #[test] - fn test_multi_byte_reading_consistency() { - // Test that reading the same bytes with different functions gives consistent results - let buffer = &[0x34, 0x12, 0x78, 0x56, 0xbc, 0x9a, 0xde, 0xf0]; - - // Read as individual bytes - let byte0 = read_byte(buffer, 0, false).unwrap(); - let byte1 = read_byte(buffer, 1, false).unwrap(); - - // Read as short - let short = read_short(buffer, 0, Endianness::Little, false).unwrap(); - - // Verify consistency - match (byte0, byte1, short) { - (Value::Uint(b0), Value::Uint(b1), Value::Uint(s)) => { - assert_eq!(s, b0 + (b1 << 8)); // Little-endian composition - } - _ => panic!("Expected all Uint values"), - } - } - - // Tests for UnsupportedType error - #[test] - fn test_unsupported_type_error() { - let error = TypeReadError::UnsupportedType { - type_name: "CustomType".to_string(), - }; - - let error_string = format!("{error}"); - assert!(error_string.contains("Unsupported type")); - assert!(error_string.contains("CustomType")); - } - - #[test] - fn test_unsupported_type_error_debug() { - let error = TypeReadError::UnsupportedType { - type_name: "TestType".to_string(), - }; - - let debug_string = format!("{error:?}"); - assert!(debug_string.contains("UnsupportedType")); - assert!(debug_string.contains("TestType")); - } - - #[test] - fn test_unsupported_type_error_equality() { - let error1 = TypeReadError::UnsupportedType { - type_name: "Type1".to_string(), - }; - let error2 = TypeReadError::UnsupportedType { - type_name: "Type1".to_string(), - }; - let error3 = TypeReadError::UnsupportedType { - type_name: "Type2".to_string(), - }; - - assert_eq!(error1, error2); - assert_ne!(error1, error3); - } - - // Tests for read_typed_value function - #[test] - fn test_read_typed_value_byte() { - let buffer = &[0x7f, 0x45, 0x4c, 0x46]; - let type_kind = TypeKind::Byte { signed: false }; - - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::Uint(0x7f)); - - let result = read_typed_value(buffer, 3, &type_kind).unwrap(); - assert_eq!(result, Value::Uint(0x46)); - } - - #[test] - fn test_read_typed_value_short_unsigned_little_endian() { - let buffer = &[0x34, 0x12, 0x78, 0x56]; - let type_kind = TypeKind::Short { - endian: Endianness::Little, - signed: false, - }; - - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::Uint(0x1234)); - - let result = read_typed_value(buffer, 2, &type_kind).unwrap(); - assert_eq!(result, Value::Uint(0x5678)); - } - - #[test] - fn test_read_typed_value_short_signed_big_endian() { - let buffer = &[0x80, 0x00, 0x7f, 0xff]; - let type_kind = TypeKind::Short { - endian: Endianness::Big, - signed: true, - }; - - // 0x8000 = -32768 in signed 16-bit - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::Int(-32768)); - - // 0x7fff = 32767 in signed 16-bit - let result = read_typed_value(buffer, 2, &type_kind).unwrap(); - assert_eq!(result, Value::Int(32767)); - } - - #[test] - fn test_read_typed_value_long_unsigned_little_endian() { - let buffer = &[0x78, 0x56, 0x34, 0x12, 0xbc, 0x9a, 0x78, 0x56]; - let type_kind = TypeKind::Long { - endian: Endianness::Little, - signed: false, - }; - - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::Uint(0x1234_5678)); - - let result = read_typed_value(buffer, 4, &type_kind).unwrap(); - assert_eq!(result, Value::Uint(0x5678_9abc)); - } - - #[test] - fn test_read_typed_value_long_signed_big_endian() { - let buffer = &[0x80, 0x00, 0x00, 0x00, 0x7f, 0xff, 0xff, 0xff]; - let type_kind = TypeKind::Long { - endian: Endianness::Big, - signed: true, - }; - - // 0x80000000 = -2147483648 in signed 32-bit - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::Int(-2_147_483_648)); - - // 0x7fffffff = 2147483647 in signed 32-bit - let result = read_typed_value(buffer, 4, &type_kind).unwrap(); - assert_eq!(result, Value::Int(2_147_483_647)); - } - - #[test] - fn test_read_typed_value_native_endian() { - let buffer = &[0x34, 0x12, 0x78, 0x56, 0xbc, 0x9a, 0xde, 0xf0]; - - // Test short with native endianness - let short_type = TypeKind::Short { - endian: Endianness::Native, - signed: false, - }; - - let result = read_typed_value(buffer, 0, &short_type).unwrap(); - match result { - Value::Uint(val) => { - // Should be either 0x1234 (little-endian) or 0x3412 (big-endian) - assert!(val == 0x1234 || val == 0x3412); - } - _ => panic!("Expected Value::Uint variant"), - } - } - - #[test] - fn test_read_typed_value_string() { - let buffer = b"Hello\x00World\x00"; - let type_kind = TypeKind::String { max_length: None }; - - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::String("Hello".to_string())); - - let result = read_typed_value(buffer, 6, &type_kind).unwrap(); - assert_eq!(result, Value::String("World".to_string())); - } - - #[test] - fn test_read_typed_value_string_with_max_length() { - let buffer = b"VeryLongString\x00"; - let type_kind = TypeKind::String { - max_length: Some(4), - }; - - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::String("Very".to_string())); - } - - #[test] - fn test_read_typed_value_buffer_overrun() { - let buffer = &[0x12]; - let type_kind = TypeKind::Short { - endian: Endianness::Little, - signed: false, - }; - - let result = read_typed_value(buffer, 0, &type_kind); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 1 - } - ); - } - - // Tests for read_string function - #[test] - fn test_read_string_null_terminated() { - let buffer = b"Hello\x00World"; - - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("Hello".to_string())); - } - - #[test] - fn test_read_string_null_terminated_at_offset() { - let buffer = b"Prefix\x00Hello\x00Suffix"; - - let result = read_string(buffer, 7, None).unwrap(); - assert_eq!(result, Value::String("Hello".to_string())); - } - - #[test] - fn test_read_string_with_max_length_shorter_than_null() { - let buffer = b"VeryLongString\x00"; - - // Max length is shorter than the null terminator position - let result = read_string(buffer, 0, Some(4)).unwrap(); - assert_eq!(result, Value::String("Very".to_string())); - } - - #[test] - fn test_read_string_with_max_length_longer_than_null() { - let buffer = b"Short\x00LongerSuffix"; - - // Max length is longer than the null terminator position - let result = read_string(buffer, 0, Some(10)).unwrap(); - assert_eq!(result, Value::String("Short".to_string())); - } - - #[test] - fn test_read_string_no_null_terminator_with_max_length() { - let buffer = b"NoNullTerminator"; - - // Should read up to max_length when no null terminator is found - let result = read_string(buffer, 0, Some(6)).unwrap(); - assert_eq!(result, Value::String("NoNull".to_string())); - } - - #[test] - fn test_read_string_no_null_terminator_no_max_length() { - let buffer = b"NoNullTerminator"; - - // Should read entire remaining buffer when no null terminator and no max_length - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("NoNullTerminator".to_string())); - } - - #[test] - fn test_read_string_empty_string() { - let buffer = b"\x00Hello"; - - // Should return empty string when null terminator is at offset - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String(String::new())); - } - - #[test] - fn test_read_string_empty_buffer() { - let buffer = b""; - - // Should fail with buffer overrun for empty buffer - let result = read_string(buffer, 0, None); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 0 - } - ); - } - - #[test] - fn test_read_string_offset_out_of_bounds() { - let buffer = b"Hello"; - - // Should fail when offset is beyond buffer length - let result = read_string(buffer, 10, None); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 10, - buffer_len: 5 - } - ); - } - - #[test] - fn test_read_string_offset_at_buffer_end() { - let buffer = b"Hello"; - - // Should fail when offset equals buffer length - let result = read_string(buffer, 5, None); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 5, - buffer_len: 5 - } - ); - } - - #[test] - fn test_read_string_max_length_zero() { - let buffer = b"Hello\x00World"; - - // Should return empty string when max_length is 0 - let result = read_string(buffer, 0, Some(0)).unwrap(); - assert_eq!(result, Value::String(String::new())); - } - - #[test] - fn test_read_string_max_length_larger_than_buffer() { - let buffer = b"Short"; - - // Should read entire buffer when max_length exceeds buffer size - let result = read_string(buffer, 0, Some(100)).unwrap(); - assert_eq!(result, Value::String("Short".to_string())); - } - - #[test] - fn test_read_string_utf8_valid() { - let buffer = b"Caf\xc3\xa9\x00"; // "Café" in UTF-8 - - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("Café".to_string())); - } - - #[test] - fn test_read_string_utf8_invalid() { - let buffer = b"Invalid\xff\xfe\x00"; // Invalid UTF-8 sequence - - let result = read_string(buffer, 0, None).unwrap(); - // Should use replacement characters for invalid UTF-8 - assert!(matches!(result, Value::String(_))); - if let Value::String(s) = result { - assert!(s.starts_with("Invalid")); - assert!(s.contains('\u{FFFD}')); // UTF-8 replacement character - } - } - - #[test] - fn test_read_string_binary_data() { - let buffer = &[0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00, 0x80, 0x90]; // "Hello" + binary - - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("Hello".to_string())); - } - - #[test] - fn test_read_string_multiple_nulls() { - let buffer = b"First\x00\x00Second\x00"; - - // Should stop at first null terminator - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("First".to_string())); - - // Reading from second null should return empty string - let result = read_string(buffer, 6, None).unwrap(); - assert_eq!(result, Value::String(String::new())); - } - - #[test] - fn test_read_string_ascii_control_characters() { - let buffer = b"Hello\x09World\x00"; // Tab character in string - - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("Hello\tWorld".to_string())); - } - - #[test] - fn test_read_string_single_character() { - let buffer = b"A\x00"; - - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("A".to_string())); - } - - #[test] - fn test_read_string_max_length_exact_match() { - let buffer = b"Exact\x00"; - - // Max length exactly matches string length (excluding null) - let result = read_string(buffer, 0, Some(5)).unwrap(); - assert_eq!(result, Value::String("Exact".to_string())); - } - - #[test] - fn test_read_string_at_buffer_boundary() { - let buffer = b"Hello"; - - // Reading from last character position - let result = read_string(buffer, 4, Some(1)).unwrap(); - assert_eq!(result, Value::String("o".to_string())); - } - - #[test] - fn test_read_string_whitespace_handling() { - let buffer = b" Spaces \x00"; - - // Should preserve whitespace in strings - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String(" Spaces ".to_string())); - } - - #[test] - fn test_read_string_newline_characters() { - let buffer = b"Line1\nLine2\r\n\x00"; - - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("Line1\nLine2\r\n".to_string())); - } - - #[test] - fn test_read_string_consistency_with_typed_value() { - let buffer = b"Test\x00String"; - - // Test that read_string and read_typed_value produce same results - let direct_result = read_string(buffer, 0, None).unwrap(); - - let type_kind = TypeKind::String { max_length: None }; - let typed_result = read_typed_value(buffer, 0, &type_kind).unwrap(); - - assert_eq!(direct_result, typed_result); - assert_eq!(typed_result, Value::String("Test".to_string())); - } - - #[test] - fn test_read_string_consistency_with_max_length() { - let buffer = b"LongString\x00"; - - // Test consistency between direct call and typed_value call with max_length - let direct_result = read_string(buffer, 0, Some(4)).unwrap(); - - let type_kind = TypeKind::String { - max_length: Some(4), - }; - let typed_result = read_typed_value(buffer, 0, &type_kind).unwrap(); - - assert_eq!(direct_result, typed_result); - assert_eq!(typed_result, Value::String("Long".to_string())); - } - - #[test] - fn test_read_string_edge_case_combinations() { - // Test various edge case combinations - let test_cases = [ - (b"" as &[u8], 0, None, true), // Empty buffer should fail - (b"\x00", 0, None, false), // Just null terminator - (b"A", 0, Some(0), false), // Zero max length - (b"AB", 1, Some(1), false), // Single char at offset - ]; - - for (buffer, offset, max_length, should_fail) in test_cases { - let result = read_string(buffer, offset, max_length); - - if should_fail { - assert!( - result.is_err(), - "Expected failure for buffer {buffer:?}, offset {offset}, max_length {max_length:?}" - ); - } else { - assert!( - result.is_ok(), - "Expected success for buffer {buffer:?}, offset {offset}, max_length {max_length:?}" - ); - } - } - } -} - -#[test] -fn test_read_typed_value_buffer_overrun() { - let buffer = &[0x12, 0x34]; - - // Try to read a long (4 bytes) from a 2-byte buffer - let long_type = TypeKind::Long { - endian: Endianness::Little, - signed: false, - }; - let result = read_typed_value(buffer, 0, &long_type); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 2 - } - ); - - // Try to read a short (2 bytes) at offset 1 from a 2-byte buffer - let short_type = TypeKind::Short { - endian: Endianness::Little, - signed: false, - }; - let result = read_typed_value(buffer, 1, &short_type); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 1, - buffer_len: 2 - } - ); -} - -#[test] -fn test_read_typed_value_all_supported_types() { - let buffer = &[0x7f, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12, 0xbc, 0x9a]; - - // Test all supported TypeKind variants - let test_cases = vec![ - (TypeKind::Byte { signed: false }, 0, Value::Uint(0x7f)), - ( - TypeKind::Short { - endian: Endianness::Little, - signed: false, - }, - 1, - Value::Uint(0x1234), // bytes [0x34, 0x12] -> 0x1234 little-endian - ), - ( - TypeKind::Short { - endian: Endianness::Big, - signed: false, - }, - 1, - Value::Uint(0x3412), // bytes [0x34, 0x12] -> 0x3412 big-endian - ), - ( - TypeKind::Long { - endian: Endianness::Little, - signed: false, - }, - 1, - Value::Uint(0x5678_1234), // bytes [0x34, 0x12, 0x78, 0x56] -> 0x56781234 little-endian - ), - ( - TypeKind::Long { - endian: Endianness::Big, - signed: false, - }, - 1, - Value::Uint(0x3412_7856), // bytes [0x34, 0x12, 0x78, 0x56] -> 0x34127856 big-endian - ), - ]; - - for (type_kind, offset, expected) in test_cases { - let result = read_typed_value(buffer, offset, &type_kind).unwrap(); - assert_eq!(result, expected, "Failed for type: {type_kind:?}"); - } -} - -#[test] -fn test_read_typed_value_signed_vs_unsigned() { - let buffer = &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff]; - - // Test signed vs unsigned interpretation for shorts - let unsigned_short = TypeKind::Short { - endian: Endianness::Little, - signed: false, - }; - let signed_short = TypeKind::Short { - endian: Endianness::Little, - signed: true, - }; - - let unsigned_result = read_typed_value(buffer, 0, &unsigned_short).unwrap(); - let signed_result = read_typed_value(buffer, 0, &signed_short).unwrap(); - - assert_eq!(unsigned_result, Value::Uint(65535)); - assert_eq!(signed_result, Value::Int(-1)); - - // Test signed vs unsigned interpretation for longs - let unsigned_long = TypeKind::Long { - endian: Endianness::Little, - signed: false, - }; - let signed_long = TypeKind::Long { - endian: Endianness::Little, - signed: true, - }; - - let unsigned_result = read_typed_value(buffer, 0, &unsigned_long).unwrap(); - let signed_result = read_typed_value(buffer, 0, &signed_long).unwrap(); - - assert_eq!(unsigned_result, Value::Uint(4_294_967_295)); - assert_eq!(signed_result, Value::Int(-1)); -} - -#[test] -fn test_read_typed_value_consistency_with_direct_calls() { - let buffer = &[0x34, 0x12, 0x78, 0x56, 0xbc, 0x9a, 0xde, 0xf0]; - - // Test that read_typed_value gives same results as direct function calls - let byte_type = TypeKind::Byte { signed: false }; - let direct_byte = read_byte(buffer, 0, false).unwrap(); - let typed_byte = read_typed_value(buffer, 0, &byte_type).unwrap(); - assert_eq!(direct_byte, typed_byte); - - let short_type = TypeKind::Short { - endian: Endianness::Little, - signed: false, - }; - let direct_short = read_short(buffer, 0, Endianness::Little, false).unwrap(); - let typed_short = read_typed_value(buffer, 0, &short_type).unwrap(); - assert_eq!(direct_short, typed_short); - - let long_type = TypeKind::Long { - endian: Endianness::Big, - signed: true, - }; - let direct_long = read_long(buffer, 0, Endianness::Big, true).unwrap(); - let typed_long = read_typed_value(buffer, 0, &long_type).unwrap(); - assert_eq!(direct_long, typed_long); -} - -#[test] -fn test_read_typed_value_empty_buffer() { - let buffer = &[]; - - // All types should fail on empty buffer - let types = vec![ - TypeKind::Byte { signed: false }, - TypeKind::Short { - endian: Endianness::Little, - signed: false, - }, - TypeKind::Long { - endian: Endianness::Little, - signed: false, - }, - ]; - - for type_kind in types { - let result = read_typed_value(buffer, 0, &type_kind); - assert!(result.is_err()); - match result.unwrap_err() { - TypeReadError::BufferOverrun { offset, buffer_len } => { - assert_eq!(offset, 0); - assert_eq!(buffer_len, 0); - } - TypeReadError::UnsupportedType { .. } => panic!("Expected BufferOverrun error"), - } - } -} - -#[test] -#[allow(clippy::too_many_lines)] -fn test_coerce_value_to_type() { - let cases = [ - // Signed byte: values above i8::MAX get coerced - ( - Value::Uint(0xff), - TypeKind::Byte { signed: true }, - Value::Int(-1), - ), - ( - Value::Uint(0x80), - TypeKind::Byte { signed: true }, - Value::Int(-128), - ), - ( - Value::Uint(0xfe), - TypeKind::Byte { signed: true }, - Value::Int(-2), - ), - // Signed byte: values in signed range pass through - ( - Value::Uint(0x7f), - TypeKind::Byte { signed: true }, - Value::Uint(0x7f), - ), - ( - Value::Uint(0), - TypeKind::Byte { signed: true }, - Value::Uint(0), - ), - ( - Value::Uint(1), - TypeKind::Byte { signed: true }, - Value::Uint(1), - ), - // Unsigned byte: all values pass through - ( - Value::Uint(0xff), - TypeKind::Byte { signed: false }, - Value::Uint(0xff), - ), - ( - Value::Uint(0x80), - TypeKind::Byte { signed: false }, - Value::Uint(0x80), - ), - // Signed short: values above i16::MAX get coerced - ( - Value::Uint(0xffff), - TypeKind::Short { - endian: Endianness::Native, - signed: true, - }, - Value::Int(-1), - ), - ( - Value::Uint(0x8000), - TypeKind::Short { - endian: Endianness::Native, - signed: true, - }, - Value::Int(-32768), - ), - ( - Value::Uint(0xffd8), - TypeKind::Short { - endian: Endianness::Big, - signed: true, - }, - Value::Int(-40), - ), - // Signed short: values in signed range pass through - ( - Value::Uint(0x7fff), - TypeKind::Short { - endian: Endianness::Native, - signed: true, - }, - Value::Uint(0x7fff), - ), - // Unsigned short: all values pass through - ( - Value::Uint(0xffff), - TypeKind::Short { - endian: Endianness::Native, - signed: false, - }, - Value::Uint(0xffff), - ), - // Signed long: values above i32::MAX get coerced - ( - Value::Uint(0xffff_ffff), - TypeKind::Long { - endian: Endianness::Native, - signed: true, - }, - Value::Int(-1), - ), - ( - Value::Uint(0x8000_0000), - TypeKind::Long { - endian: Endianness::Native, - signed: true, - }, - Value::Int(-2_147_483_648), - ), - ( - Value::Uint(0x8950_4e47), - TypeKind::Long { - endian: Endianness::Big, - signed: true, - }, - Value::Int(-1_991_225_785), - ), - // Signed long: values in signed range pass through - ( - Value::Uint(0x7fff_ffff), - TypeKind::Long { - endian: Endianness::Native, - signed: true, - }, - Value::Uint(0x7fff_ffff), - ), - // Unsigned long: all values pass through - ( - Value::Uint(0xffff_ffff), - TypeKind::Long { - endian: Endianness::Native, - signed: false, - }, - Value::Uint(0xffff_ffff), - ), - // Signed quad: values above i64::MAX get coerced - ( - Value::Uint(0xffff_ffff_ffff_ffff), - TypeKind::Quad { - endian: Endianness::Native, - signed: true, - }, - Value::Int(-1), - ), - ( - Value::Uint(0x8000_0000_0000_0000), - TypeKind::Quad { - endian: Endianness::Native, - signed: true, - }, - Value::Int(i64::MIN), - ), - // Signed quad: values in signed range pass through - ( - Value::Uint(0x7fff_ffff_ffff_ffff), - TypeKind::Quad { - endian: Endianness::Native, - signed: true, - }, - Value::Uint(0x7fff_ffff_ffff_ffff), - ), - // Unsigned quad: all values pass through - ( - Value::Uint(0xffff_ffff_ffff_ffff), - TypeKind::Quad { - endian: Endianness::Native, - signed: false, - }, - Value::Uint(0xffff_ffff_ffff_ffff), - ), - // Non-Uint values pass through unchanged - ( - Value::Int(-1), - TypeKind::Byte { signed: true }, - Value::Int(-1), - ), - ( - Value::Int(42), - TypeKind::Long { - endian: Endianness::Native, - signed: true, - }, - Value::Int(42), - ), - // String type: values pass through - ( - Value::Uint(0xff), - TypeKind::String { max_length: None }, - Value::Uint(0xff), - ), - ]; - - for (i, (input, type_kind, expected)) in cases.iter().enumerate() { - let result = coerce_value_to_type(input, type_kind); - assert_eq!( - result, *expected, - "Case {i}: coerce({input:?}, {type_kind:?})" - ); - } -} diff --git a/src/evaluator/types/mod.rs b/src/evaluator/types/mod.rs new file mode 100644 index 00000000..0224d28e --- /dev/null +++ b/src/evaluator/types/mod.rs @@ -0,0 +1,117 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +//! Type interpretation for reading and converting bytes from file buffers. +//! +//! This module exposes the public type-reading API and dispatches to focused +//! submodules for numeric and string handling. + +mod numeric; +mod string; + +use crate::parser::ast::{TypeKind, Value}; +use thiserror::Error; + +pub use numeric::{read_byte, read_long, read_quad, read_short}; +pub use string::read_string; + +/// Errors that can occur during type reading operations. +#[derive(Debug, Error, PartialEq, Eq)] +pub enum TypeReadError { + /// Buffer access beyond available data. + #[error( + "Buffer overrun: attempted to read at offset {offset} but buffer length is {buffer_len}" + )] + BufferOverrun { + /// The offset that was attempted to be accessed. + offset: usize, + /// The actual length of the buffer. + buffer_len: usize, + }, + /// Unsupported type variant (reserved for future types not yet evaluatable, + /// e.g., regex, float, date). + #[error("Unsupported type: {type_name}")] + UnsupportedType { + /// The name of the unsupported type. + type_name: String, + }, +} + +/// Reads bytes according to the specified `TypeKind`. +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_typed_value; +/// use libmagic_rs::parser::ast::{Endianness, TypeKind, Value}; +/// +/// let buffer = &[0x7f, 0x45, 0x4c, 0x46, 0x34, 0x12]; +/// let byte_result = read_typed_value(buffer, 0, &TypeKind::Byte { signed: false }).unwrap(); +/// assert_eq!(byte_result, Value::Uint(0x7f)); +/// +/// let short_type = TypeKind::Short { +/// endian: Endianness::Little, +/// signed: false, +/// }; +/// let short_result = read_typed_value(buffer, 4, &short_type).unwrap(); +/// assert_eq!(short_result, Value::Uint(0x1234)); +/// ``` +/// +/// # Errors +/// +/// Returns `TypeReadError::BufferOverrun` when the requested value extends past +/// the buffer bounds. +pub fn read_typed_value( + buffer: &[u8], + offset: usize, + type_kind: &TypeKind, +) -> Result { + match type_kind { + TypeKind::Byte { signed } => read_byte(buffer, offset, *signed), + TypeKind::Short { endian, signed } => read_short(buffer, offset, *endian, *signed), + TypeKind::Long { endian, signed } => read_long(buffer, offset, *endian, *signed), + TypeKind::Quad { endian, signed } => read_quad(buffer, offset, *endian, *signed), + TypeKind::String { max_length } => read_string(buffer, offset, *max_length), + } +} + +/// Coerces a rule value to the signed width implied by `type_kind`. +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::coerce_value_to_type; +/// use libmagic_rs::parser::ast::{TypeKind, Value}; +/// +/// let coerced = coerce_value_to_type(&Value::Uint(0xff), &TypeKind::Byte { signed: true }); +/// assert_eq!(coerced, Value::Int(-1)); +/// ``` +#[must_use] +pub fn coerce_value_to_type(value: &Value, type_kind: &TypeKind) -> Value { + match (value, type_kind) { + (Value::Uint(v), TypeKind::Byte { signed: true }) if *v > i8::MAX as u64 => + { + #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] + Value::Int(i64::from(*v as u8 as i8)) + } + (Value::Uint(v), TypeKind::Short { signed: true, .. }) if *v > i16::MAX as u64 => + { + #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] + Value::Int(i64::from(*v as u16 as i16)) + } + (Value::Uint(v), TypeKind::Long { signed: true, .. }) if *v > i32::MAX as u64 => + { + #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] + Value::Int(i64::from(*v as u32 as i32)) + } + (Value::Uint(v), TypeKind::Quad { signed: true, .. }) if *v > i64::MAX as u64 => + { + #[allow(clippy::cast_possible_wrap)] + Value::Int(*v as i64) + } + _ => value.clone(), + } +} + +#[cfg(test)] +mod tests; diff --git a/src/evaluator/types/numeric.rs b/src/evaluator/types/numeric.rs new file mode 100644 index 00000000..ce96fde5 --- /dev/null +++ b/src/evaluator/types/numeric.rs @@ -0,0 +1,668 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +use super::TypeReadError; +use crate::parser::ast::{Endianness, Value}; +use byteorder::{BigEndian, ByteOrder, LittleEndian, NativeEndian}; + +/// Safely reads a single byte from the buffer at the specified offset. +/// +/// # Arguments +/// +/// * `buffer` - The byte buffer to read from +/// * `offset` - The offset position to read the byte from +/// * `signed` - Whether to interpret the byte as signed (`i8`) or unsigned (`u8`) +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_byte; +/// use libmagic_rs::parser::ast::Value; +/// +/// let buffer = &[0x7f, 0x80, 0x4c, 0x46]; +/// +/// let result = read_byte(buffer, 1, false).unwrap(); +/// assert_eq!(result, Value::Uint(0x80)); +/// +/// let result = read_byte(buffer, 1, true).unwrap(); +/// assert_eq!(result, Value::Int(-128)); +/// ``` +/// # Errors +/// Returns `TypeReadError::BufferOverrun` if `offset` is outside the buffer. +pub fn read_byte(buffer: &[u8], offset: usize, signed: bool) -> Result { + buffer + .get(offset) + .map(|&byte| { + if signed { + #[allow(clippy::cast_possible_wrap)] + Value::Int(i64::from(byte as i8)) + } else { + Value::Uint(u64::from(byte)) + } + }) + .ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + }) +} + +/// Safely reads a 16-bit integer from the buffer at the specified offset. +/// +/// # Arguments +/// +/// * `buffer` - The byte buffer to read from +/// * `offset` - The offset position to start reading from +/// * `endian` - The byte order to use when interpreting the bytes +/// * `signed` - Whether to interpret the value as signed (`i16`) or unsigned (`u16`) +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_short; +/// use libmagic_rs::parser::ast::{Endianness, Value}; +/// +/// let buffer = &[0x34, 0x12, 0xff, 0x7f]; +/// +/// let result = read_short(buffer, 0, Endianness::Little, false).unwrap(); +/// assert_eq!(result, Value::Uint(0x1234)); +/// +/// let result = read_short(buffer, 2, Endianness::Little, true).unwrap(); +/// assert_eq!(result, Value::Int(32767)); +/// ``` +/// # Errors +/// Returns `TypeReadError::BufferOverrun` if fewer than 2 bytes are available at the +/// requested offset. +pub fn read_short( + buffer: &[u8], + offset: usize, + endian: Endianness, + signed: bool, +) -> Result { + let end = offset.checked_add(2).ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + let bytes = buffer + .get(offset..end) + .ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + + let value = match endian { + Endianness::Little => LittleEndian::read_u16(bytes), + Endianness::Big => BigEndian::read_u16(bytes), + Endianness::Native => NativeEndian::read_u16(bytes), + }; + + if signed { + #[allow(clippy::cast_possible_wrap)] + Ok(Value::Int(i64::from(value as i16))) + } else { + Ok(Value::Uint(u64::from(value))) + } +} + +/// Safely reads a 32-bit integer from the buffer at the specified offset. +/// +/// # Arguments +/// +/// * `buffer` - The byte buffer to read from +/// * `offset` - The offset position to start reading from +/// * `endian` - The byte order to use when interpreting the bytes +/// * `signed` - Whether to interpret the value as signed (`i32`) or unsigned (`u32`) +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_long; +/// use libmagic_rs::parser::ast::{Endianness, Value}; +/// +/// let buffer = &[0x78, 0x56, 0x34, 0x12, 0xff, 0xff, 0xff, 0x7f]; +/// +/// let result = read_long(buffer, 0, Endianness::Little, false).unwrap(); +/// assert_eq!(result, Value::Uint(0x12345678)); +/// +/// let result = read_long(buffer, 4, Endianness::Little, true).unwrap(); +/// assert_eq!(result, Value::Int(2147483647)); +/// ``` +/// # Errors +/// Returns `TypeReadError::BufferOverrun` if fewer than 4 bytes are available at the +/// requested offset. +pub fn read_long( + buffer: &[u8], + offset: usize, + endian: Endianness, + signed: bool, +) -> Result { + let end = offset.checked_add(4).ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + let bytes = buffer + .get(offset..end) + .ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + + let value = match endian { + Endianness::Little => LittleEndian::read_u32(bytes), + Endianness::Big => BigEndian::read_u32(bytes), + Endianness::Native => NativeEndian::read_u32(bytes), + }; + + if signed { + #[allow(clippy::cast_possible_wrap)] + Ok(Value::Int(i64::from(value as i32))) + } else { + Ok(Value::Uint(u64::from(value))) + } +} + +/// Safely reads a 64-bit integer from the buffer at the specified offset. +/// +/// # Arguments +/// +/// * `buffer` - The byte buffer to read from +/// * `offset` - The offset position to start reading from +/// * `endian` - The byte order to use when interpreting the bytes +/// * `signed` - Whether to interpret the value as signed (`i64`) or unsigned (`u64`) +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_quad; +/// use libmagic_rs::parser::ast::{Endianness, Value}; +/// +/// let buffer = &[0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12]; +/// +/// let result = read_quad(buffer, 0, Endianness::Little, false).unwrap(); +/// assert_eq!(result, Value::Uint(0x1234_5678_90ab_cdef)); +/// +/// let result = read_quad(buffer, 0, Endianness::Little, true).unwrap(); +/// assert_eq!(result, Value::Int(0x1234_5678_90ab_cdef)); +/// ``` +/// # Errors +/// Returns `TypeReadError::BufferOverrun` if fewer than 8 bytes are available at the +/// requested offset. +pub fn read_quad( + buffer: &[u8], + offset: usize, + endian: Endianness, + signed: bool, +) -> Result { + let end = offset.checked_add(8).ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + let bytes = buffer + .get(offset..end) + .ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + + let value = match endian { + Endianness::Little => LittleEndian::read_u64(bytes), + Endianness::Big => BigEndian::read_u64(bytes), + Endianness::Native => NativeEndian::read_u64(bytes), + }; + + if signed { + #[allow(clippy::cast_possible_wrap)] + Ok(Value::Int(value as i64)) + } else { + Ok(Value::Uint(value)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_read_byte_values() { + let buffer: Vec = (0..=255).collect(); + for (i, &byte) in buffer.iter().enumerate() { + assert_eq!( + read_byte(&buffer, i, false).unwrap(), + Value::Uint(u64::from(byte)) + ); + } + } + + #[test] + fn test_read_byte_out_of_bounds() { + assert_eq!( + read_byte(&[], 0, false).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 0 + } + ); + assert_eq!( + read_byte(&[0x42], 1, false).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 1, + buffer_len: 1 + } + ); + assert_eq!( + read_byte(&[1, 2, 3], 100, false).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 100, + buffer_len: 3 + } + ); + } + + #[test] + fn test_read_byte_signedness() { + let cases: Vec<(u8, bool, Value)> = vec![ + (0x00, false, Value::Uint(0)), + (0x7f, false, Value::Uint(127)), + (0x80, false, Value::Uint(128)), + (0xff, false, Value::Uint(255)), + (0x00, true, Value::Int(0)), + (0x7f, true, Value::Int(127)), + (0x80, true, Value::Int(-128)), + (0xff, true, Value::Int(-1)), + ]; + for (byte, signed, expected) in cases { + let result = read_byte(&[byte], 0, signed).unwrap(); + assert_eq!(result, expected, "byte=0x{byte:02x}, signed={signed}"); + } + } + + #[test] + fn test_read_short_little_endian_unsigned() { + let buffer = &[0x34, 0x12, 0x78, 0x56]; + let result = read_short(buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(result, Value::Uint(0x1234)); + + let result = read_short(buffer, 2, Endianness::Little, false).unwrap(); + assert_eq!(result, Value::Uint(0x5678)); + } + + #[test] + fn test_read_short_big_endian_unsigned() { + let buffer = &[0x12, 0x34, 0x56, 0x78]; + let result = read_short(buffer, 0, Endianness::Big, false).unwrap(); + assert_eq!(result, Value::Uint(0x1234)); + + let result = read_short(buffer, 2, Endianness::Big, false).unwrap(); + assert_eq!(result, Value::Uint(0x5678)); + } + + #[test] + fn test_read_short_native_endian_unsigned() { + let buffer = &[0x34, 0x12, 0x78, 0x56]; + let result = read_short(buffer, 0, Endianness::Native, false).unwrap(); + + match result { + Value::Uint(val) => assert!(val == 0x1234 || val == 0x3412), + _ => panic!("Expected Value::Uint variant"), + } + } + + #[test] + fn test_read_short_signed_positive() { + let buffer = &[0xff, 0x7f]; + let result = read_short(buffer, 0, Endianness::Little, true).unwrap(); + assert_eq!(result, Value::Int(32767)); + } + + #[test] + fn test_read_short_signed_negative() { + let buffer = &[0x00, 0x80]; + let result = read_short(buffer, 0, Endianness::Little, true).unwrap(); + assert_eq!(result, Value::Int(-32768)); + } + + #[test] + fn test_read_short_signed_vs_unsigned() { + let buffer = &[0xff, 0xff]; + let unsigned_result = read_short(buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(unsigned_result, Value::Uint(65535)); + + let signed_result = read_short(buffer, 0, Endianness::Little, true).unwrap(); + assert_eq!(signed_result, Value::Int(-1)); + } + + #[test] + fn test_read_short_buffer_overrun() { + let buffer = &[0x12]; + let result = read_short(buffer, 0, Endianness::Little, false); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 1 + } + ); + } + + #[test] + fn test_read_short_offset_out_of_bounds() { + let buffer = &[0x12, 0x34, 0x56]; + let result = read_short(buffer, 2, Endianness::Little, false); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 2, + buffer_len: 3 + } + ); + } + + #[test] + fn test_read_short_empty_buffer() { + let buffer = &[]; + let result = read_short(buffer, 0, Endianness::Little, false); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 0 + } + ); + } + + #[test] + fn test_read_short_all_endianness_variants() { + let buffer = &[0x12, 0x34]; + let little = read_short(buffer, 0, Endianness::Little, false).unwrap(); + let big = read_short(buffer, 0, Endianness::Big, false).unwrap(); + let native = read_short(buffer, 0, Endianness::Native, false).unwrap(); + + assert_eq!(little, Value::Uint(0x3412)); + assert_eq!(big, Value::Uint(0x1234)); + + match native { + Value::Uint(val) => assert!(val == 0x1234 || val == 0x3412), + _ => panic!("Expected Value::Uint variant"), + } + } + + #[test] + fn test_read_short_extreme_values() { + let max_buffer = &[0xff, 0xff]; + let max_result = read_short(max_buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(max_result, Value::Uint(u64::from(u16::MAX))); + + let zero_buffer = &[0x00, 0x00]; + let zero_result = read_short(zero_buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(zero_result, Value::Uint(0)); + } + + #[test] + fn test_read_long_little_endian_unsigned() { + let buffer = &[0x78, 0x56, 0x34, 0x12, 0xbc, 0x9a, 0x78, 0x56]; + let result = read_long(buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(result, Value::Uint(0x1234_5678)); + + let result = read_long(buffer, 4, Endianness::Little, false).unwrap(); + assert_eq!(result, Value::Uint(0x5678_9abc)); + } + + #[test] + fn test_read_long_big_endian_unsigned() { + let buffer = &[0x12, 0x34, 0x56, 0x78, 0x56, 0x78, 0x9a, 0xbc]; + let result = read_long(buffer, 0, Endianness::Big, false).unwrap(); + assert_eq!(result, Value::Uint(0x1234_5678)); + + let result = read_long(buffer, 4, Endianness::Big, false).unwrap(); + assert_eq!(result, Value::Uint(0x5678_9abc)); + } + + #[test] + fn test_read_long_native_endian_unsigned() { + let buffer = &[0x78, 0x56, 0x34, 0x12]; + let result = read_long(buffer, 0, Endianness::Native, false).unwrap(); + + match result { + Value::Uint(val) => assert!(val == 0x1234_5678 || val == 0x7856_3412), + _ => panic!("Expected Value::Uint variant"), + } + } + + #[test] + fn test_read_long_signed_positive() { + let buffer = &[0xff, 0xff, 0xff, 0x7f]; + let result = read_long(buffer, 0, Endianness::Little, true).unwrap(); + assert_eq!(result, Value::Int(2_147_483_647)); + } + + #[test] + fn test_read_long_signed_negative() { + let buffer = &[0x00, 0x00, 0x00, 0x80]; + let result = read_long(buffer, 0, Endianness::Little, true).unwrap(); + assert_eq!(result, Value::Int(-2_147_483_648)); + } + + #[test] + fn test_read_long_signed_vs_unsigned() { + let buffer = &[0xff, 0xff, 0xff, 0xff]; + let unsigned_result = read_long(buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(unsigned_result, Value::Uint(4_294_967_295)); + + let signed_result = read_long(buffer, 0, Endianness::Little, true).unwrap(); + assert_eq!(signed_result, Value::Int(-1)); + } + + #[test] + fn test_read_long_buffer_overrun() { + let buffer = &[0x12, 0x34, 0x56]; + let result = read_long(buffer, 0, Endianness::Little, false); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 3 + } + ); + } + + #[test] + fn test_read_long_offset_out_of_bounds() { + let buffer = &[0x12, 0x34, 0x56, 0x78, 0x9a]; + let result = read_long(buffer, 2, Endianness::Little, false); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 2, + buffer_len: 5 + } + ); + } + + #[test] + fn test_read_long_empty_buffer() { + let buffer = &[]; + let result = read_long(buffer, 0, Endianness::Little, false); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 0 + } + ); + } + + #[test] + fn test_read_long_all_endianness_variants() { + let buffer = &[0x12, 0x34, 0x56, 0x78]; + let little = read_long(buffer, 0, Endianness::Little, false).unwrap(); + let big = read_long(buffer, 0, Endianness::Big, false).unwrap(); + let native = read_long(buffer, 0, Endianness::Native, false).unwrap(); + + assert_eq!(little, Value::Uint(0x7856_3412)); + assert_eq!(big, Value::Uint(0x1234_5678)); + + match native { + Value::Uint(val) => assert!(val == 0x1234_5678 || val == 0x7856_3412), + _ => panic!("Expected Value::Uint variant"), + } + } + + #[test] + fn test_read_long_extreme_values() { + let max_buffer = &[0xff, 0xff, 0xff, 0xff]; + let max_result = read_long(max_buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(max_result, Value::Uint(u64::from(u32::MAX))); + + let zero_buffer = &[0x00, 0x00, 0x00, 0x00]; + let zero_result = read_long(zero_buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(zero_result, Value::Uint(0)); + } + + #[test] + fn test_read_quad_endianness_and_signedness() { + let cases: Vec<(&[u8], Endianness, bool, Value)> = vec![ + ( + &[0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12], + Endianness::Little, + false, + Value::Uint(0x1234_5678_90ab_cdef), + ), + ( + &[0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef], + Endianness::Big, + false, + Value::Uint(0x1234_5678_90ab_cdef), + ), + ( + &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f], + Endianness::Little, + true, + Value::Int(i64::MAX), + ), + ( + &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80], + Endianness::Little, + true, + Value::Int(i64::MIN), + ), + ( + &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], + Endianness::Big, + true, + Value::Int(-1), + ), + ( + &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], + Endianness::Little, + false, + Value::Uint(u64::MAX), + ), + ( + &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + Endianness::Little, + false, + Value::Uint(0), + ), + ]; + for (buffer, endian, signed, expected) in cases { + let result = read_quad(buffer, 0, endian, signed).unwrap(); + assert_eq!(result, expected, "endian={endian:?}, signed={signed}"); + } + } + + #[test] + fn test_read_quad_buffer_overrun() { + let buffer = &[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07]; + assert_eq!( + read_quad(buffer, 0, Endianness::Little, false).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 7 + } + ); + + assert_eq!( + read_quad(&[], 0, Endianness::Big, false).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 0 + } + ); + + let buffer = &[0x00; 16]; + assert_eq!( + read_quad(buffer, 10, Endianness::Little, false).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 10, + buffer_len: 16 + } + ); + } + + #[test] + fn test_read_quad_at_offset() { + let buffer = &[0x00, 0x00, 0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12]; + let result = read_quad(buffer, 2, Endianness::Little, false).unwrap(); + assert_eq!(result, Value::Uint(0x1234_5678_90ab_cdef)); + } + + #[test] + fn test_read_short_offset_overflow() { + let buffer = &[0x12, 0x34]; + let result = read_short(buffer, usize::MAX, Endianness::Little, false); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: usize::MAX, + buffer_len: 2, + } + ); + } + + #[test] + fn test_read_long_offset_overflow() { + let buffer = &[0x12, 0x34, 0x56, 0x78]; + let result = read_long(buffer, usize::MAX, Endianness::Little, false); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: usize::MAX, + buffer_len: 4, + } + ); + } + + #[test] + fn test_read_quad_offset_overflow() { + let buffer = &[0x01; 8]; + let result = read_quad(buffer, usize::MAX, Endianness::Little, false); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: usize::MAX, + buffer_len: 8, + } + ); + } + + #[test] + fn test_multi_byte_reading_consistency() { + let buffer = &[0x34, 0x12, 0x78, 0x56, 0xbc, 0x9a, 0xde, 0xf0]; + + let byte0 = read_byte(buffer, 0, false).unwrap(); + let byte1 = read_byte(buffer, 1, false).unwrap(); + let short = read_short(buffer, 0, Endianness::Little, false).unwrap(); + + match (byte0, byte1, short) { + (Value::Uint(b0), Value::Uint(b1), Value::Uint(s)) => { + assert_eq!(s, b0 + (b1 << 8)); + } + _ => panic!("Expected all Uint values"), + } + } +} diff --git a/src/evaluator/types/string.rs b/src/evaluator/types/string.rs new file mode 100644 index 00000000..5c36a5d8 --- /dev/null +++ b/src/evaluator/types/string.rs @@ -0,0 +1,325 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +use super::TypeReadError; +use crate::parser::ast::Value; + +/// Safely reads a null-terminated string from the buffer at the specified offset. +/// +/// This function reads bytes from the buffer starting at the given offset until it +/// encounters a null byte (0x00) or reaches the maximum length limit. The resulting +/// bytes are converted to a UTF-8 string with proper error handling for invalid +/// sequences. +/// +/// # Arguments +/// +/// * `buffer` - The byte buffer to read from +/// * `offset` - The offset position to start reading the string from +/// * `max_length` - Optional maximum number of bytes to read excluding the null terminator. +/// If a NUL is found within `max_length` bytes, it is not counted in the result length. +/// If no NUL is found, up to `max_length` bytes are returned with no trailing NUL. +/// When `None`, reads until the first NUL or end of buffer. +/// +/// # Returns +/// +/// Returns `Ok(Value::String(string))` if the read is successful. Invalid UTF-8 byte +/// sequences are replaced with the Unicode replacement character (U+FFFD) rather than +/// producing an error. +/// +/// # Security +/// +/// This function provides several security guarantees: +/// - Bounds checking prevents reading beyond buffer limits +/// - Length limits prevent excessive memory allocation +/// - Invalid UTF-8 sequences are safely replaced with U+FFFD, preventing undefined behavior +/// - Null termination handling prevents runaway reads +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_string; +/// use libmagic_rs::parser::ast::Value; +/// +/// let buffer = b"Hello\x00World"; +/// let result = read_string(buffer, 0, None).unwrap(); +/// assert_eq!(result, Value::String("Hello".to_string())); +/// +/// let buffer = b"VeryLongString\x00"; +/// let result = read_string(buffer, 0, Some(4)).unwrap(); +/// assert_eq!(result, Value::String("Very".to_string())); +/// +/// let buffer = b"NoNull"; +/// let result = read_string(buffer, 0, Some(6)).unwrap(); +/// assert_eq!(result, Value::String("NoNull".to_string())); +/// ``` +/// +/// # Errors +/// +/// Returns `TypeReadError::BufferOverrun` if the offset is greater than or equal to the buffer +/// length. +pub fn read_string( + buffer: &[u8], + offset: usize, + max_length: Option, +) -> Result { + if offset >= buffer.len() { + return Err(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + }); + } + + let remaining_buffer = &buffer[offset..]; + let read_length = if let Some(max_len) = max_length { + let search_len = std::cmp::min(max_len, remaining_buffer.len()); + memchr::memchr(0, &remaining_buffer[..search_len]).unwrap_or(search_len) + } else { + memchr::memchr(0, remaining_buffer).unwrap_or(remaining_buffer.len()) + }; + + let string_bytes = &remaining_buffer[..read_length]; + let string_value = String::from_utf8_lossy(string_bytes).into_owned(); + + Ok(Value::String(string_value)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::evaluator::types::read_typed_value; + use crate::parser::ast::TypeKind; + + #[test] + fn test_read_string_null_terminated() { + let buffer = b"Hello\x00World"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("Hello".to_string())); + } + + #[test] + fn test_read_string_null_terminated_at_offset() { + let buffer = b"Prefix\x00Hello\x00Suffix"; + let result = read_string(buffer, 7, None).unwrap(); + assert_eq!(result, Value::String("Hello".to_string())); + } + + #[test] + fn test_read_string_with_max_length_shorter_than_null() { + let buffer = b"VeryLongString\x00"; + let result = read_string(buffer, 0, Some(4)).unwrap(); + assert_eq!(result, Value::String("Very".to_string())); + } + + #[test] + fn test_read_string_with_max_length_longer_than_null() { + let buffer = b"Short\x00LongerSuffix"; + let result = read_string(buffer, 0, Some(10)).unwrap(); + assert_eq!(result, Value::String("Short".to_string())); + } + + #[test] + fn test_read_string_no_null_terminator_with_max_length() { + let buffer = b"NoNullTerminator"; + let result = read_string(buffer, 0, Some(6)).unwrap(); + assert_eq!(result, Value::String("NoNull".to_string())); + } + + #[test] + fn test_read_string_no_null_terminator_no_max_length() { + let buffer = b"NoNullTerminator"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("NoNullTerminator".to_string())); + } + + #[test] + fn test_read_string_empty_string() { + let buffer = b"\x00Hello"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String(String::new())); + } + + #[test] + fn test_read_string_empty_buffer() { + let buffer = b""; + let result = read_string(buffer, 0, None); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 0 + } + ); + } + + #[test] + fn test_read_string_offset_out_of_bounds() { + let buffer = b"Hello"; + let result = read_string(buffer, 10, None); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 10, + buffer_len: 5 + } + ); + } + + #[test] + fn test_read_string_offset_at_buffer_end() { + let buffer = b"Hello"; + let result = read_string(buffer, 5, None); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 5, + buffer_len: 5 + } + ); + } + + #[test] + fn test_read_string_max_length_zero() { + let buffer = b"Hello\x00World"; + let result = read_string(buffer, 0, Some(0)).unwrap(); + assert_eq!(result, Value::String(String::new())); + } + + #[test] + fn test_read_string_max_length_larger_than_buffer() { + let buffer = b"Short"; + let result = read_string(buffer, 0, Some(100)).unwrap(); + assert_eq!(result, Value::String("Short".to_string())); + } + + #[test] + fn test_read_string_utf8_valid() { + let buffer = b"Caf\xc3\xa9\x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("Café".to_string())); + } + + #[test] + fn test_read_string_utf8_invalid() { + let buffer = b"Invalid\xff\xfe\x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert!(matches!(result, Value::String(_))); + if let Value::String(s) = result { + assert!(s.starts_with("Invalid")); + assert!(s.contains('\u{FFFD}')); + } + } + + #[test] + fn test_read_string_binary_data() { + let buffer = &[0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00, 0x80, 0x90]; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("Hello".to_string())); + } + + #[test] + fn test_read_string_multiple_nulls() { + let buffer = b"First\x00\x00Second\x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("First".to_string())); + + let result = read_string(buffer, 6, None).unwrap(); + assert_eq!(result, Value::String(String::new())); + } + + #[test] + fn test_read_string_ascii_control_characters() { + let buffer = b"Hello\x09World\x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("Hello\tWorld".to_string())); + } + + #[test] + fn test_read_string_single_character() { + let buffer = b"A\x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("A".to_string())); + } + + #[test] + fn test_read_string_max_length_exact_match() { + let buffer = b"Exact\x00"; + let result = read_string(buffer, 0, Some(5)).unwrap(); + assert_eq!(result, Value::String("Exact".to_string())); + } + + #[test] + fn test_read_string_at_buffer_boundary() { + let buffer = b"Hello"; + let result = read_string(buffer, 4, Some(1)).unwrap(); + assert_eq!(result, Value::String("o".to_string())); + } + + #[test] + fn test_read_string_whitespace_handling() { + let buffer = b" Spaces \x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String(" Spaces ".to_string())); + } + + #[test] + fn test_read_string_newline_characters() { + let buffer = b"Line1\nLine2\r\n\x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("Line1\nLine2\r\n".to_string())); + } + + #[test] + fn test_read_string_consistency_with_typed_value() { + let buffer = b"Test\x00String"; + let direct_result = read_string(buffer, 0, None).unwrap(); + + let type_kind = TypeKind::String { max_length: None }; + let typed_result = read_typed_value(buffer, 0, &type_kind).unwrap(); + + assert_eq!(direct_result, typed_result); + assert_eq!(typed_result, Value::String("Test".to_string())); + } + + #[test] + fn test_read_string_consistency_with_max_length() { + let buffer = b"LongString\x00"; + let direct_result = read_string(buffer, 0, Some(4)).unwrap(); + + let type_kind = TypeKind::String { + max_length: Some(4), + }; + let typed_result = read_typed_value(buffer, 0, &type_kind).unwrap(); + + assert_eq!(direct_result, typed_result); + assert_eq!(typed_result, Value::String("Long".to_string())); + } + + #[test] + fn test_read_string_edge_case_combinations() { + let test_cases = [ + (b"" as &[u8], 0, None, true), + (b"\x00", 0, None, false), + (b"A", 0, Some(0), false), + (b"AB", 1, Some(1), false), + ]; + + for (buffer, offset, max_length, should_fail) in test_cases { + let result = read_string(buffer, offset, max_length); + + if should_fail { + assert!( + result.is_err(), + "Expected failure for buffer {buffer:?}, offset {offset}, max_length {max_length:?}" + ); + } else { + assert!( + result.is_ok(), + "Expected success for buffer {buffer:?}, offset {offset}, max_length {max_length:?}" + ); + } + } + } +} diff --git a/src/evaluator/types/tests.rs b/src/evaluator/types/tests.rs new file mode 100644 index 00000000..eea0a90e --- /dev/null +++ b/src/evaluator/types/tests.rs @@ -0,0 +1,463 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +use super::*; +use crate::parser::ast::Endianness; + +#[test] +fn test_type_read_error_display() { + let error = TypeReadError::BufferOverrun { + offset: 10, + buffer_len: 5, + }; + let msg = format!("{error}"); + assert!(msg.contains("offset 10")); + assert!(msg.contains("buffer length is 5")); +} + +#[test] +fn test_unsupported_type_error_variants() { + let error = TypeReadError::UnsupportedType { + type_name: "CustomType".to_string(), + }; + assert!(format!("{error}").contains("CustomType")); + assert!(format!("{error:?}").contains("UnsupportedType")); + + assert_eq!( + error, + TypeReadError::UnsupportedType { + type_name: "CustomType".to_string(), + } + ); +} + +#[test] +fn test_read_typed_value_numeric_dispatch() { + let byte = read_typed_value(&[0x7f, 0x46], 0, &TypeKind::Byte { signed: false }).unwrap(); + assert_eq!(byte, Value::Uint(0x7f)); + + let short = read_typed_value( + &[0x34, 0x12, 0x78, 0x56], + 0, + &TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap(); + assert_eq!(short, Value::Uint(0x1234)); + + let short_signed = read_typed_value( + &[0x80, 0x00, 0x7f, 0xff], + 0, + &TypeKind::Short { + endian: Endianness::Big, + signed: true, + }, + ) + .unwrap(); + assert_eq!(short_signed, Value::Int(-32768)); + + let long = read_typed_value( + &[0x78, 0x56, 0x34, 0x12, 0xbc, 0x9a, 0x78, 0x56], + 0, + &TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap(); + assert_eq!(long, Value::Uint(0x1234_5678)); + + let long_signed = read_typed_value( + &[0x80, 0x00, 0x00, 0x00], + 0, + &TypeKind::Long { + endian: Endianness::Big, + signed: true, + }, + ) + .unwrap(); + assert_eq!(long_signed, Value::Int(-2_147_483_648)); + + let quad = read_typed_value( + &[0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12], + 0, + &TypeKind::Quad { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap(); + assert_eq!(quad, Value::Uint(0x1234_5678_90ab_cdef)); +} + +#[test] +fn test_read_typed_value_native_endian() { + let result = read_typed_value( + &[0x34, 0x12], + 0, + &TypeKind::Short { + endian: Endianness::Native, + signed: false, + }, + ) + .unwrap(); + + match result { + Value::Uint(val) => assert!(val == 0x1234 || val == 0x3412), + _ => panic!("Expected Value::Uint variant"), + } +} + +#[test] +fn test_read_typed_value_string_dispatch() { + let buffer = b"Hello\x00World\x00"; + + let result = read_typed_value(buffer, 0, &TypeKind::String { max_length: None }).unwrap(); + assert_eq!(result, Value::String("Hello".to_string())); + + let result = read_typed_value( + b"VeryLongString\x00", + 0, + &TypeKind::String { + max_length: Some(4), + }, + ) + .unwrap(); + assert_eq!(result, Value::String("Very".to_string())); +} + +#[test] +fn test_read_typed_value_buffer_overrun() { + let short_error = read_typed_value( + &[0x12], + 0, + &TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap_err(); + assert_eq!( + short_error, + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 1 + } + ); + + let long_error = read_typed_value( + &[0x12, 0x34], + 0, + &TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap_err(); + assert_eq!( + long_error, + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 2 + } + ); +} + +#[test] +fn test_read_typed_value_all_supported_types() { + let buffer = &[0x7f, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12, 0xbc, 0x9a]; + let test_cases = [ + (TypeKind::Byte { signed: false }, 0, Value::Uint(0x7f)), + ( + TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + 1, + Value::Uint(0x1234), + ), + ( + TypeKind::Short { + endian: Endianness::Big, + signed: false, + }, + 1, + Value::Uint(0x3412), + ), + ( + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + 1, + Value::Uint(0x5678_1234), + ), + ( + TypeKind::Long { + endian: Endianness::Big, + signed: false, + }, + 1, + Value::Uint(0x3412_7856), + ), + ( + TypeKind::Quad { + endian: Endianness::Little, + signed: false, + }, + 1, + Value::Uint(0x9abc_1234_5678_1234), + ), + ]; + + for (type_kind, offset, expected) in test_cases { + let result = read_typed_value(buffer, offset, &type_kind).unwrap(); + assert_eq!(result, expected, "Failed for type: {type_kind:?}"); + } +} + +#[test] +fn test_read_typed_value_signed_vs_unsigned() { + let buffer = &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff]; + + let unsigned_short = read_typed_value( + buffer, + 0, + &TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap(); + let signed_short = read_typed_value( + buffer, + 0, + &TypeKind::Short { + endian: Endianness::Little, + signed: true, + }, + ) + .unwrap(); + assert_eq!(unsigned_short, Value::Uint(65535)); + assert_eq!(signed_short, Value::Int(-1)); + + let unsigned_long = read_typed_value( + buffer, + 0, + &TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap(); + let signed_long = read_typed_value( + buffer, + 0, + &TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + ) + .unwrap(); + assert_eq!(unsigned_long, Value::Uint(4_294_967_295)); + assert_eq!(signed_long, Value::Int(-1)); +} + +#[test] +fn test_read_typed_value_consistency_with_direct_calls() { + let buffer = &[0x34, 0x12, 0x78, 0x56, 0xbc, 0x9a, 0xde, 0xf0]; + + assert_eq!( + read_byte(buffer, 0, false).unwrap(), + read_typed_value(buffer, 0, &TypeKind::Byte { signed: false }).unwrap() + ); + assert_eq!( + read_short(buffer, 0, Endianness::Little, false).unwrap(), + read_typed_value( + buffer, + 0, + &TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap() + ); + assert_eq!( + read_long(buffer, 0, Endianness::Big, true).unwrap(), + read_typed_value( + buffer, + 0, + &TypeKind::Long { + endian: Endianness::Big, + signed: true, + }, + ) + .unwrap() + ); +} + +#[test] +fn test_read_typed_value_empty_buffer() { + for type_kind in [ + TypeKind::Byte { signed: false }, + TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + ] { + let result = read_typed_value(&[], 0, &type_kind); + assert!(result.is_err()); + match result.unwrap_err() { + TypeReadError::BufferOverrun { offset, buffer_len } => { + assert_eq!(offset, 0); + assert_eq!(buffer_len, 0); + } + TypeReadError::UnsupportedType { .. } => panic!("Expected BufferOverrun error"), + } + } +} + +#[test] +#[allow(clippy::too_many_lines)] +fn test_coerce_value_to_type() { + let cases = [ + ( + Value::Uint(0xff), + TypeKind::Byte { signed: true }, + Value::Int(-1), + ), + ( + Value::Uint(0x80), + TypeKind::Byte { signed: true }, + Value::Int(-128), + ), + ( + Value::Uint(0xfe), + TypeKind::Byte { signed: true }, + Value::Int(-2), + ), + ( + Value::Uint(0x7f), + TypeKind::Byte { signed: true }, + Value::Uint(0x7f), + ), + ( + Value::Uint(0xff), + TypeKind::Byte { signed: false }, + Value::Uint(0xff), + ), + ( + Value::Uint(0xffff), + TypeKind::Short { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-1), + ), + ( + Value::Uint(0x8000), + TypeKind::Short { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-32768), + ), + ( + Value::Uint(0x7fff), + TypeKind::Short { + endian: Endianness::Native, + signed: true, + }, + Value::Uint(0x7fff), + ), + ( + Value::Uint(0xffff_ffff), + TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-1), + ), + ( + Value::Uint(0x8000_0000), + TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-2_147_483_648), + ), + ( + Value::Uint(0x7fff_ffff), + TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + Value::Uint(0x7fff_ffff), + ), + ( + Value::Uint(0xffff_ffff_ffff_ffff), + TypeKind::Quad { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-1), + ), + ( + Value::Uint(0x8000_0000_0000_0000), + TypeKind::Quad { + endian: Endianness::Native, + signed: true, + }, + Value::Int(i64::MIN), + ), + ( + Value::Uint(0x7fff_ffff_ffff_ffff), + TypeKind::Quad { + endian: Endianness::Native, + signed: true, + }, + Value::Uint(0x7fff_ffff_ffff_ffff), + ), + ( + Value::Uint(0xffff_ffff_ffff_ffff), + TypeKind::Quad { + endian: Endianness::Native, + signed: false, + }, + Value::Uint(0xffff_ffff_ffff_ffff), + ), + ( + Value::Int(-1), + TypeKind::Byte { signed: true }, + Value::Int(-1), + ), + ( + Value::Int(42), + TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + Value::Int(42), + ), + ( + Value::Uint(0xff), + TypeKind::String { max_length: None }, + Value::Uint(0xff), + ), + ]; + + for (i, (input, type_kind, expected)) in cases.iter().enumerate() { + let result = coerce_value_to_type(input, type_kind); + assert_eq!( + result, *expected, + "Case {i}: coerce({input:?}, {type_kind:?})" + ); + } +}