From 87be9cac1999e313da9a32acd92c5e420b7e99d1 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Fri, 6 Mar 2026 16:46:48 -0500 Subject: [PATCH 1/3] refactor(evaluator): reorganize types module into submodules This commit restructures the `types` module by splitting it into focused submodules for numeric and string handling. The main `types.rs` file has been removed, and its functionality has been distributed across `numeric.rs`, `string.rs`, and a new `mod.rs` that serves as the public API surface. This change enhances code organization and maintainability while preserving existing functionality. - Removed `types.rs` and migrated its content to `numeric.rs` and `string.rs`. - Introduced a new `mod.rs` to expose the public API and manage submodule imports. - Updated error handling and type reading functions to ensure consistency across the new structure. No public API changes have been made, and all existing tests have been updated accordingly to reflect the new module organization. Signed-off-by: UncleSp1d3r --- src/evaluator/types.rs | 1836 -------------------------------- src/evaluator/types/mod.rs | 116 ++ src/evaluator/types/numeric.rs | 598 +++++++++++ src/evaluator/types/string.rs | 324 ++++++ src/evaluator/types/tests.rs | 463 ++++++++ 5 files changed, 1501 insertions(+), 1836 deletions(-) delete mode 100644 src/evaluator/types.rs create mode 100644 src/evaluator/types/mod.rs create mode 100644 src/evaluator/types/numeric.rs create mode 100644 src/evaluator/types/string.rs create mode 100644 src/evaluator/types/tests.rs diff --git a/src/evaluator/types.rs b/src/evaluator/types.rs deleted file mode 100644 index f2729a96..00000000 --- a/src/evaluator/types.rs +++ /dev/null @@ -1,1836 +0,0 @@ -// Copyright (c) 2025-2026 the libmagic-rs contributors -// SPDX-License-Identifier: Apache-2.0 - -//! Type interpretation for reading and converting bytes from file buffers -//! -//! This module provides functions for safely reading different data types from byte buffers -//! with proper bounds checking and error handling. - -use crate::parser::ast::{Endianness, TypeKind, Value}; -use byteorder::{BigEndian, ByteOrder, LittleEndian, NativeEndian}; -use thiserror::Error; - -/// Errors that can occur during type reading operations -#[derive(Debug, Error, PartialEq, Eq)] -pub enum TypeReadError { - /// Buffer access beyond available data - #[error( - "Buffer overrun: attempted to read at offset {offset} but buffer length is {buffer_len}" - )] - BufferOverrun { - /// The offset that was attempted to be accessed - offset: usize, - /// The actual length of the buffer - buffer_len: usize, - }, - /// Unsupported type variant - #[error("Unsupported type: {type_name}")] - UnsupportedType { - /// The name of the unsupported type - type_name: String, - }, -} - -/// Safely reads a single byte from the buffer at the specified offset -/// -/// This function provides secure byte reading with comprehensive bounds checking -/// to prevent buffer overruns and potential security vulnerabilities. -/// -/// # Arguments -/// -/// * `buffer` - The byte buffer to read from -/// * `offset` - The offset position to read the byte from -/// * `signed` - Whether to interpret the byte as signed (`i8`) or unsigned (`u8`) -/// -/// # Returns -/// -/// Returns `Ok(Value::Uint(byte_value))` for unsigned reads or -/// `Ok(Value::Int(byte_value))` for signed reads if the read is successful, or -/// `Err(TypeReadError::BufferOverrun)` if the offset is beyond the buffer bounds. -/// -/// # Security -/// -/// This function performs strict bounds checking to prevent: -/// - Buffer overruns that could lead to memory safety issues -/// - Reading uninitialized or out-of-bounds memory -/// - Integer overflow in offset calculations -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::read_byte; -/// use libmagic_rs::parser::ast::Value; -/// -/// let buffer = &[0x7f, 0x80, 0x4c, 0x46]; // example bytes -/// -/// // Read unsigned byte (0x80 = 128) -/// let result = read_byte(buffer, 1, false).unwrap(); -/// assert_eq!(result, Value::Uint(0x80)); -/// -/// // Read signed byte (0x80 = -128) -/// let result = read_byte(buffer, 1, true).unwrap(); -/// assert_eq!(result, Value::Int(-128)); -/// ``` -/// -/// # Errors -/// -/// Returns `TypeReadError::BufferOverrun` if the offset is greater than or equal to -/// the buffer length. -pub fn read_byte(buffer: &[u8], offset: usize, signed: bool) -> Result { - buffer - .get(offset) - .map(|&byte| { - if signed { - // Wrapping is intentional: e.g., 0x80 -> -128 as i8 - #[allow(clippy::cast_possible_wrap)] - Value::Int(i64::from(byte as i8)) - } else { - Value::Uint(u64::from(byte)) - } - }) - .ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - }) -} - -/// Safely reads a 16-bit integer from the buffer at the specified offset -/// -/// # Arguments -/// -/// * `buffer` - The byte buffer to read from -/// * `offset` - The offset position to read the 16-bit value from -/// * `endian` - The byte order to use for interpretation -/// * `signed` - Whether to interpret the value as signed or unsigned -/// -/// # Returns -/// -/// Returns `Ok(Value::Uint(value))` for unsigned values or `Ok(Value::Int(value))` for signed values -/// if the read is successful, or `Err(TypeReadError::BufferOverrun)` if there are insufficient bytes. -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::read_short; -/// use libmagic_rs::parser::ast::{Endianness, Value}; -/// -/// let buffer = &[0x34, 0x12, 0xff, 0x7f]; // Little-endian data -/// -/// // Read unsigned little-endian short (0x1234) -/// let result = read_short(buffer, 0, Endianness::Little, false).unwrap(); -/// assert_eq!(result, Value::Uint(0x1234)); -/// -/// // Read signed little-endian short (0x7fff = 32767) -/// let result = read_short(buffer, 2, Endianness::Little, true).unwrap(); -/// assert_eq!(result, Value::Int(32767)); -/// ``` -/// -/// # Errors -/// -/// Returns `TypeReadError::BufferOverrun` if there are fewer than 2 bytes available -/// starting at the specified offset. -pub fn read_short( - buffer: &[u8], - offset: usize, - endian: Endianness, - signed: bool, -) -> Result { - let end = offset.checked_add(2).ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - })?; - let bytes = buffer - .get(offset..end) - .ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - })?; - - let value = match endian { - Endianness::Little => LittleEndian::read_u16(bytes), - Endianness::Big => BigEndian::read_u16(bytes), - Endianness::Native => NativeEndian::read_u16(bytes), - }; - - if signed { - #[allow(clippy::cast_possible_wrap)] - Ok(Value::Int(i64::from(value as i16))) - } else { - Ok(Value::Uint(u64::from(value))) - } -} - -/// Safely reads a 32-bit integer from the buffer at the specified offset -/// -/// # Arguments -/// -/// * `buffer` - The byte buffer to read from -/// * `offset` - The offset position to read the 32-bit value from -/// * `endian` - The byte order to use for interpretation -/// * `signed` - Whether to interpret the value as signed or unsigned -/// -/// # Returns -/// -/// Returns `Ok(Value::Uint(value))` for unsigned values or `Ok(Value::Int(value))` for signed values -/// if the read is successful, or `Err(TypeReadError::BufferOverrun)` if there are insufficient bytes. -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::read_long; -/// use libmagic_rs::parser::ast::{Endianness, Value}; -/// -/// let buffer = &[0x78, 0x56, 0x34, 0x12, 0xff, 0xff, 0xff, 0x7f]; -/// -/// // Read unsigned little-endian long (0x12345678) -/// let result = read_long(buffer, 0, Endianness::Little, false).unwrap(); -/// assert_eq!(result, Value::Uint(0x12345678)); -/// -/// // Read signed little-endian long (0x7fffffff = 2147483647) -/// let result = read_long(buffer, 4, Endianness::Little, true).unwrap(); -/// assert_eq!(result, Value::Int(2147483647)); -/// ``` -/// -/// # Errors -/// -/// Returns `TypeReadError::BufferOverrun` if there are fewer than 4 bytes available -/// starting at the specified offset. -pub fn read_long( - buffer: &[u8], - offset: usize, - endian: Endianness, - signed: bool, -) -> Result { - let end = offset.checked_add(4).ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - })?; - let bytes = buffer - .get(offset..end) - .ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - })?; - - let value = match endian { - Endianness::Little => LittleEndian::read_u32(bytes), - Endianness::Big => BigEndian::read_u32(bytes), - Endianness::Native => NativeEndian::read_u32(bytes), - }; - - if signed { - #[allow(clippy::cast_possible_wrap)] - Ok(Value::Int(i64::from(value as i32))) - } else { - Ok(Value::Uint(u64::from(value))) - } -} - -/// Safely reads a 64-bit integer from the buffer at the specified offset -/// -/// # Arguments -/// -/// * `buffer` - The byte buffer to read from -/// * `offset` - The offset position to read the 64-bit value from -/// * `endian` - The byte order to use for interpretation -/// * `signed` - Whether to interpret the value as signed or unsigned -/// -/// # Returns -/// -/// Returns `Ok(Value::Uint(value))` for unsigned values or `Ok(Value::Int(value))` for signed values -/// if the read is successful, or `Err(TypeReadError::BufferOverrun)` if there are insufficient bytes. -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::read_quad; -/// use libmagic_rs::parser::ast::{Endianness, Value}; -/// -/// let buffer = &[0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12]; -/// -/// // Read unsigned little-endian quad (0x1234567890abcdef) -/// let result = read_quad(buffer, 0, Endianness::Little, false).unwrap(); -/// assert_eq!(result, Value::Uint(0x1234_5678_90ab_cdef)); -/// -/// // Read signed little-endian quad (positive value fits in i64) -/// let result = read_quad(buffer, 0, Endianness::Little, true).unwrap(); -/// assert_eq!(result, Value::Int(0x1234_5678_90ab_cdef)); -/// -/// // Read signed little-endian quad with high bit set (sign extension) -/// let neg_buffer = &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80]; -/// let result = read_quad(neg_buffer, 0, Endianness::Little, true).unwrap(); -/// assert_eq!(result, Value::Int(-9_223_372_036_854_775_808)); // i64::MIN -/// ``` -/// -/// # Errors -/// -/// Returns `TypeReadError::BufferOverrun` if there are fewer than 8 bytes available -/// starting at the specified offset. -pub fn read_quad( - buffer: &[u8], - offset: usize, - endian: Endianness, - signed: bool, -) -> Result { - let end = offset.checked_add(8).ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - })?; - let bytes = buffer - .get(offset..end) - .ok_or(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - })?; - - let value = match endian { - Endianness::Little => LittleEndian::read_u64(bytes), - Endianness::Big => BigEndian::read_u64(bytes), - Endianness::Native => NativeEndian::read_u64(bytes), - }; - - if signed { - #[allow(clippy::cast_possible_wrap)] - Ok(Value::Int(value as i64)) - } else { - Ok(Value::Uint(value)) - } -} - -/// Safely reads a null-terminated string from the buffer at the specified offset -/// -/// This function reads bytes from the buffer starting at the given offset until it encounters -/// a null byte (0x00) or reaches the maximum length limit. The resulting bytes are converted -/// to a UTF-8 string with proper error handling for invalid sequences. -/// -/// # Arguments -/// -/// * `buffer` - The byte buffer to read from -/// * `offset` - The offset position to start reading the string from -/// * `max_length` - Optional maximum number of bytes to read excluding the null terminator. -/// If a NUL is found within `max_length` bytes, it is not counted in the result length. -/// If no NUL is found, up to `max_length` bytes are returned with no trailing NUL. -/// When `None`, reads until the first NUL or end of buffer. -/// -/// # Returns -/// -/// Returns `Ok(Value::String(string))` if the read is successful, or an appropriate error -/// if the read fails due to buffer overrun or invalid UTF-8 sequences. -/// -/// # Security -/// -/// This function provides several security guarantees: -/// - Bounds checking prevents reading beyond buffer limits -/// - Length limits prevent excessive memory allocation -/// - UTF-8 validation ensures string safety -/// - Null termination handling prevents runaway reads -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::read_string; -/// use libmagic_rs::parser::ast::Value; -/// -/// // Null-terminated string -/// let buffer = b"Hello\x00World"; -/// let result = read_string(buffer, 0, None).unwrap(); -/// assert_eq!(result, Value::String("Hello".to_string())); -/// -/// // String with length limit -/// let buffer = b"VeryLongString\x00"; -/// let result = read_string(buffer, 0, Some(4)).unwrap(); -/// assert_eq!(result, Value::String("Very".to_string())); -/// -/// // String without null terminator (reads to max_length) -/// let buffer = b"NoNull"; -/// let result = read_string(buffer, 0, Some(6)).unwrap(); -/// assert_eq!(result, Value::String("NoNull".to_string())); -/// -/// // NUL found within max_length (NUL not counted in result) -/// let buffer = b"Hello\x00World"; -/// let result = read_string(buffer, 0, Some(10)).unwrap(); -/// assert_eq!(result, Value::String("Hello".to_string())); -/// -/// // No NUL found, returns exactly max_length bytes -/// let buffer = b"ABCDEF"; -/// let result = read_string(buffer, 0, Some(4)).unwrap(); -/// assert_eq!(result, Value::String("ABCD".to_string())); -/// ``` -/// -/// # Errors -/// -/// Returns `TypeReadError::BufferOverrun` if the offset is greater than or equal to the buffer length. -pub fn read_string( - buffer: &[u8], - offset: usize, - max_length: Option, -) -> Result { - // Check if offset is within buffer bounds - if offset >= buffer.len() { - return Err(TypeReadError::BufferOverrun { - offset, - buffer_len: buffer.len(), - }); - } - - // Get the slice starting from offset - let remaining_buffer = &buffer[offset..]; - - // Determine the actual length to read (uses memchr for efficient null byte scanning) - let read_length = if let Some(max_len) = max_length { - // Find null terminator within max_length, or use max_length if no null found - let search_len = std::cmp::min(max_len, remaining_buffer.len()); - memchr::memchr(0, &remaining_buffer[..search_len]).unwrap_or(search_len) - } else { - // Find null terminator in entire remaining buffer - memchr::memchr(0, remaining_buffer).unwrap_or(remaining_buffer.len()) - }; - - // Extract the string bytes (excluding null terminator) - let string_bytes = &remaining_buffer[..read_length]; - - // Convert to UTF-8 string, replacing invalid sequences with replacement character - let string_value = String::from_utf8_lossy(string_bytes).into_owned(); - - Ok(Value::String(string_value)) -} - -/// Reads and interprets bytes according to the specified `TypeKind` -/// -/// This is the main interface for type interpretation that dispatches to the appropriate -/// reading function based on the `TypeKind` variant. -/// -/// # Arguments -/// -/// * `buffer` - The byte buffer to read from -/// * `offset` - The offset position to read from -/// * `type_kind` - The type specification that determines how to interpret the bytes -/// -/// # Returns -/// -/// Returns the interpreted value as a `Value` enum variant, or an error if the read fails. -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::read_typed_value; -/// use libmagic_rs::parser::ast::{TypeKind, Endianness, Value}; -/// -/// let buffer = &[0x7f, 0x45, 0x4c, 0x46, 0x34, 0x12]; -/// -/// // Read an unsigned byte -/// let byte_result = read_typed_value(buffer, 0, &TypeKind::Byte { signed: false }).unwrap(); -/// assert_eq!(byte_result, Value::Uint(0x7f)); -/// -/// // Read a little-endian short -/// let short_type = TypeKind::Short { -/// endian: Endianness::Little, -/// signed: false, -/// }; -/// let short_result = read_typed_value(buffer, 4, &short_type).unwrap(); -/// assert_eq!(short_result, Value::Uint(0x1234)); -/// ``` -/// -/// # Errors -/// -/// Returns `TypeReadError::BufferOverrun` if there are insufficient bytes for the requested type, -/// or `TypeReadError::UnsupportedType` for type variants that are not yet implemented. -pub fn read_typed_value( - buffer: &[u8], - offset: usize, - type_kind: &TypeKind, -) -> Result { - match type_kind { - TypeKind::Byte { signed } => read_byte(buffer, offset, *signed), - TypeKind::Short { endian, signed } => read_short(buffer, offset, *endian, *signed), - TypeKind::Long { endian, signed } => read_long(buffer, offset, *endian, *signed), - TypeKind::Quad { endian, signed } => read_quad(buffer, offset, *endian, *signed), - TypeKind::String { max_length } => read_string(buffer, offset, *max_length), - } -} - -/// Coerce a rule's expected value to match the type's signedness and width. -/// -/// In libmagic, comparison values like `0xff` in `0 byte =0xff` are interpreted -/// at the type's bit width. For a signed byte, `0xff` means `-1` (the signed -/// interpretation of that bit pattern). This function performs that coercion so -/// that comparisons work correctly regardless of how the value was parsed. -/// -/// Only affects `Value::Uint` values paired with signed types whose values exceed -/// the signed range. All other combinations pass through unchanged. -/// -/// # Examples -/// -/// ``` -/// use libmagic_rs::evaluator::types::coerce_value_to_type; -/// use libmagic_rs::parser::ast::{TypeKind, Value}; -/// -/// // 0xff for signed byte -> -1 -/// let coerced = coerce_value_to_type(&Value::Uint(0xff), &TypeKind::Byte { signed: true }); -/// assert_eq!(coerced, Value::Int(-1)); -/// -/// // 0x7f for signed byte -> unchanged (fits in signed range) -/// let coerced = coerce_value_to_type(&Value::Uint(0x7f), &TypeKind::Byte { signed: true }); -/// assert_eq!(coerced, Value::Uint(0x7f)); -/// -/// // Unsigned types pass through unchanged -/// let coerced = coerce_value_to_type(&Value::Uint(0xff), &TypeKind::Byte { signed: false }); -/// assert_eq!(coerced, Value::Uint(0xff)); -/// ``` -#[must_use] -pub fn coerce_value_to_type(value: &Value, type_kind: &TypeKind) -> Value { - match (value, type_kind) { - (Value::Uint(v), TypeKind::Byte { signed: true }) if *v > i8::MAX as u64 => - { - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - Value::Int(i64::from(*v as u8 as i8)) - } - (Value::Uint(v), TypeKind::Short { signed: true, .. }) if *v > i16::MAX as u64 => - { - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - Value::Int(i64::from(*v as u16 as i16)) - } - (Value::Uint(v), TypeKind::Long { signed: true, .. }) if *v > i32::MAX as u64 => - { - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - Value::Int(i64::from(*v as u32 as i32)) - } - (Value::Uint(v), TypeKind::Quad { signed: true, .. }) if *v > i64::MAX as u64 => - { - #[allow(clippy::cast_possible_wrap)] - Value::Int(*v as i64) - } - _ => value.clone(), - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_read_byte_values() { - // All 256 unsigned values - let buffer: Vec = (0..=255).collect(); - for (i, &byte) in buffer.iter().enumerate() { - assert_eq!( - read_byte(&buffer, i, false).unwrap(), - Value::Uint(u64::from(byte)) - ); - } - } - - #[test] - fn test_read_byte_out_of_bounds() { - // Empty buffer - assert_eq!( - read_byte(&[], 0, false).unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 0 - } - ); - // Just past end - assert_eq!( - read_byte(&[0x42], 1, false).unwrap_err(), - TypeReadError::BufferOverrun { - offset: 1, - buffer_len: 1 - } - ); - // Way past end - assert_eq!( - read_byte(&[1, 2, 3], 100, false).unwrap_err(), - TypeReadError::BufferOverrun { - offset: 100, - buffer_len: 3 - } - ); - } - - #[test] - fn test_read_byte_signedness() { - let cases: Vec<(u8, bool, Value)> = vec![ - (0x00, false, Value::Uint(0)), - (0x7f, false, Value::Uint(127)), - (0x80, false, Value::Uint(128)), - (0xff, false, Value::Uint(255)), - (0x00, true, Value::Int(0)), - (0x7f, true, Value::Int(127)), - (0x80, true, Value::Int(-128)), - (0xff, true, Value::Int(-1)), - ]; - for (byte, signed, expected) in cases { - let result = read_byte(&[byte], 0, signed).unwrap(); - assert_eq!(result, expected, "byte=0x{byte:02x}, signed={signed}"); - } - } - - #[test] - fn test_type_read_error_display() { - let error = TypeReadError::BufferOverrun { - offset: 10, - buffer_len: 5, - }; - let msg = format!("{error}"); - assert!(msg.contains("offset 10")); - assert!(msg.contains("buffer length is 5")); - } - - // Tests for read_short function - #[test] - fn test_read_short_little_endian_unsigned() { - let buffer = &[0x34, 0x12, 0x78, 0x56]; // 0x1234, 0x5678 in little-endian - - // Read first short (0x1234) - let result = read_short(buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(result, Value::Uint(0x1234)); - - // Read second short (0x5678) - let result = read_short(buffer, 2, Endianness::Little, false).unwrap(); - assert_eq!(result, Value::Uint(0x5678)); - } - - #[test] - fn test_read_short_big_endian_unsigned() { - let buffer = &[0x12, 0x34, 0x56, 0x78]; // 0x1234, 0x5678 in big-endian - - // Read first short (0x1234) - let result = read_short(buffer, 0, Endianness::Big, false).unwrap(); - assert_eq!(result, Value::Uint(0x1234)); - - // Read second short (0x5678) - let result = read_short(buffer, 2, Endianness::Big, false).unwrap(); - assert_eq!(result, Value::Uint(0x5678)); - } - - #[test] - fn test_read_short_native_endian_unsigned() { - let buffer = &[0x34, 0x12, 0x78, 0x56]; - - // Read using native endianness - let result = read_short(buffer, 0, Endianness::Native, false).unwrap(); - - // The exact value depends on the system's endianness, but it should be valid - match result { - Value::Uint(val) => { - // Should be either 0x1234 (little-endian) or 0x3412 (big-endian) - assert!(val == 0x1234 || val == 0x3412); - } - _ => panic!("Expected Value::Uint variant"), - } - } - - #[test] - fn test_read_short_signed_positive() { - let buffer = &[0xff, 0x7f]; // 0x7fff = 32767 in little-endian - - let result = read_short(buffer, 0, Endianness::Little, true).unwrap(); - assert_eq!(result, Value::Int(32767)); - } - - #[test] - fn test_read_short_signed_negative() { - let buffer = &[0x00, 0x80]; // 0x8000 = -32768 in little-endian (signed) - - let result = read_short(buffer, 0, Endianness::Little, true).unwrap(); - assert_eq!(result, Value::Int(-32768)); - } - - #[test] - fn test_read_short_signed_vs_unsigned() { - let buffer = &[0xff, 0xff]; // 0xffff - - // Unsigned interpretation - let unsigned_result = read_short(buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(unsigned_result, Value::Uint(65535)); - - // Signed interpretation - let signed_result = read_short(buffer, 0, Endianness::Little, true).unwrap(); - assert_eq!(signed_result, Value::Int(-1)); - } - - #[test] - fn test_read_short_buffer_overrun() { - let buffer = &[0x12]; // Only 1 byte available - - // Should fail when trying to read 2 bytes - let result = read_short(buffer, 0, Endianness::Little, false); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 1 - } - ); - } - - #[test] - fn test_read_short_offset_out_of_bounds() { - let buffer = &[0x12, 0x34, 0x56]; - - // Should fail when trying to read 2 bytes starting at offset 2 (only 1 byte left) - let result = read_short(buffer, 2, Endianness::Little, false); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 2, - buffer_len: 3 - } - ); - } - - #[test] - fn test_read_short_empty_buffer() { - let buffer = &[]; - - let result = read_short(buffer, 0, Endianness::Little, false); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 0 - } - ); - } - - #[test] - fn test_read_short_all_endianness_variants() { - let buffer = &[0x12, 0x34]; - - // Test all endianness variants - let little = read_short(buffer, 0, Endianness::Little, false).unwrap(); - let big = read_short(buffer, 0, Endianness::Big, false).unwrap(); - let native = read_short(buffer, 0, Endianness::Native, false).unwrap(); - - // Little-endian: 0x3412, Big-endian: 0x1234 - assert_eq!(little, Value::Uint(0x3412)); - assert_eq!(big, Value::Uint(0x1234)); - - // Native should match one of them - match native { - Value::Uint(val) => assert!(val == 0x1234 || val == 0x3412), - _ => panic!("Expected Value::Uint variant"), - } - } - - // Tests for read_long function - #[test] - fn test_read_long_little_endian_unsigned() { - let buffer = &[0x78, 0x56, 0x34, 0x12, 0xbc, 0x9a, 0x78, 0x56]; // 0x12345678, 0x56789abc - - // Read first long (0x12345678) - let result = read_long(buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(result, Value::Uint(0x1234_5678)); - - // Read second long (0x56789abc) - let result = read_long(buffer, 4, Endianness::Little, false).unwrap(); - assert_eq!(result, Value::Uint(0x5678_9abc)); - } - - #[test] - fn test_read_long_big_endian_unsigned() { - let buffer = &[0x12, 0x34, 0x56, 0x78, 0x56, 0x78, 0x9a, 0xbc]; // 0x12345678, 0x56789abc - - // Read first long (0x12345678) - let result = read_long(buffer, 0, Endianness::Big, false).unwrap(); - assert_eq!(result, Value::Uint(0x1234_5678)); - - // Read second long (0x56789abc) - let result = read_long(buffer, 4, Endianness::Big, false).unwrap(); - assert_eq!(result, Value::Uint(0x5678_9abc)); - } - - #[test] - fn test_read_long_native_endian_unsigned() { - let buffer = &[0x78, 0x56, 0x34, 0x12]; - - // Read using native endianness - let result = read_long(buffer, 0, Endianness::Native, false).unwrap(); - - // The exact value depends on the system's endianness, but it should be valid - match result { - Value::Uint(val) => { - // Should be either 0x12345678 (little-endian) or 0x78563412 (big-endian) - assert!(val == 0x1234_5678 || val == 0x7856_3412); - } - _ => panic!("Expected Value::Uint variant"), - } - } - - #[test] - fn test_read_long_signed_positive() { - let buffer = &[0xff, 0xff, 0xff, 0x7f]; // 0x7fffffff = 2147483647 in little-endian - - let result = read_long(buffer, 0, Endianness::Little, true).unwrap(); - assert_eq!(result, Value::Int(2_147_483_647)); - } - - #[test] - fn test_read_long_signed_negative() { - let buffer = &[0x00, 0x00, 0x00, 0x80]; // 0x80000000 = -2147483648 in little-endian (signed) - - let result = read_long(buffer, 0, Endianness::Little, true).unwrap(); - assert_eq!(result, Value::Int(-2_147_483_648)); - } - - #[test] - fn test_read_long_signed_vs_unsigned() { - let buffer = &[0xff, 0xff, 0xff, 0xff]; // 0xffffffff - - // Unsigned interpretation - let unsigned_result = read_long(buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(unsigned_result, Value::Uint(4_294_967_295)); - - // Signed interpretation - let signed_result = read_long(buffer, 0, Endianness::Little, true).unwrap(); - assert_eq!(signed_result, Value::Int(-1)); - } - - #[test] - fn test_read_long_buffer_overrun() { - let buffer = &[0x12, 0x34, 0x56]; // Only 3 bytes available - - // Should fail when trying to read 4 bytes - let result = read_long(buffer, 0, Endianness::Little, false); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 3 - } - ); - } - - #[test] - fn test_read_long_offset_out_of_bounds() { - let buffer = &[0x12, 0x34, 0x56, 0x78, 0x9a]; - - // Should fail when trying to read 4 bytes starting at offset 2 (only 3 bytes left) - let result = read_long(buffer, 2, Endianness::Little, false); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 2, - buffer_len: 5 - } - ); - } - - #[test] - fn test_read_long_empty_buffer() { - let buffer = &[]; - - let result = read_long(buffer, 0, Endianness::Little, false); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 0 - } - ); - } - - #[test] - fn test_read_long_all_endianness_variants() { - let buffer = &[0x12, 0x34, 0x56, 0x78]; - - // Test all endianness variants - let little = read_long(buffer, 0, Endianness::Little, false).unwrap(); - let big = read_long(buffer, 0, Endianness::Big, false).unwrap(); - let native = read_long(buffer, 0, Endianness::Native, false).unwrap(); - - // Little-endian: 0x78563412, Big-endian: 0x12345678 - assert_eq!(little, Value::Uint(0x7856_3412)); - assert_eq!(big, Value::Uint(0x1234_5678)); - - // Native should match one of them - match native { - Value::Uint(val) => assert!(val == 0x1234_5678 || val == 0x7856_3412), - _ => panic!("Expected Value::Uint variant"), - } - } - - #[test] - fn test_read_long_extreme_values() { - // Test maximum unsigned 32-bit value - let max_buffer = &[0xff, 0xff, 0xff, 0xff]; - let max_result = read_long(max_buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(max_result, Value::Uint(u64::from(u32::MAX))); - - // Test zero value - let zero_buffer = &[0x00, 0x00, 0x00, 0x00]; - let zero_result = read_long(zero_buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(zero_result, Value::Uint(0)); - } - - // Tests for read_quad function - #[test] - fn test_read_quad_endianness_and_signedness() { - let cases: Vec<(&[u8], Endianness, bool, Value)> = vec![ - // Little-endian unsigned - ( - &[0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12], - Endianness::Little, - false, - Value::Uint(0x1234_5678_90ab_cdef), - ), - // Big-endian unsigned - ( - &[0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef], - Endianness::Big, - false, - Value::Uint(0x1234_5678_90ab_cdef), - ), - // Little-endian signed positive - ( - &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f], - Endianness::Little, - true, - Value::Int(i64::MAX), - ), - // Little-endian signed negative - ( - &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80], - Endianness::Little, - true, - Value::Int(i64::MIN), - ), - // Big-endian signed negative (-1) - ( - &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], - Endianness::Big, - true, - Value::Int(-1), - ), - // Unsigned max value - ( - &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], - Endianness::Little, - false, - Value::Uint(u64::MAX), - ), - // Zero - ( - &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], - Endianness::Little, - false, - Value::Uint(0), - ), - ]; - for (buffer, endian, signed, expected) in cases { - let result = read_quad(buffer, 0, endian, signed).unwrap(); - assert_eq!(result, expected, "endian={endian:?}, signed={signed}"); - } - } - - #[test] - fn test_read_quad_buffer_overrun() { - // Too few bytes (only 7) - let buffer = &[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07]; - assert_eq!( - read_quad(buffer, 0, Endianness::Little, false).unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 7 - } - ); - - // Empty buffer - assert_eq!( - read_quad(&[], 0, Endianness::Big, false).unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 0 - } - ); - - // Offset past end - let buffer = &[0x00; 16]; - assert_eq!( - read_quad(buffer, 10, Endianness::Little, false).unwrap_err(), - TypeReadError::BufferOverrun { - offset: 10, - buffer_len: 16 - } - ); - } - - #[test] - fn test_read_quad_at_offset() { - let buffer = &[0x00, 0x00, 0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12]; - let result = read_quad(buffer, 2, Endianness::Little, false).unwrap(); - assert_eq!(result, Value::Uint(0x1234_5678_90ab_cdef)); - } - - #[test] - fn test_read_short_extreme_values() { - // Test maximum unsigned 16-bit value - let max_buffer = &[0xff, 0xff]; - let max_result = read_short(max_buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(max_result, Value::Uint(u64::from(u16::MAX))); - - // Test zero value - let zero_buffer = &[0x00, 0x00]; - let zero_result = read_short(zero_buffer, 0, Endianness::Little, false).unwrap(); - assert_eq!(zero_result, Value::Uint(0)); - } - - #[test] - fn test_multi_byte_reading_consistency() { - // Test that reading the same bytes with different functions gives consistent results - let buffer = &[0x34, 0x12, 0x78, 0x56, 0xbc, 0x9a, 0xde, 0xf0]; - - // Read as individual bytes - let byte0 = read_byte(buffer, 0, false).unwrap(); - let byte1 = read_byte(buffer, 1, false).unwrap(); - - // Read as short - let short = read_short(buffer, 0, Endianness::Little, false).unwrap(); - - // Verify consistency - match (byte0, byte1, short) { - (Value::Uint(b0), Value::Uint(b1), Value::Uint(s)) => { - assert_eq!(s, b0 + (b1 << 8)); // Little-endian composition - } - _ => panic!("Expected all Uint values"), - } - } - - // Tests for UnsupportedType error - #[test] - fn test_unsupported_type_error() { - let error = TypeReadError::UnsupportedType { - type_name: "CustomType".to_string(), - }; - - let error_string = format!("{error}"); - assert!(error_string.contains("Unsupported type")); - assert!(error_string.contains("CustomType")); - } - - #[test] - fn test_unsupported_type_error_debug() { - let error = TypeReadError::UnsupportedType { - type_name: "TestType".to_string(), - }; - - let debug_string = format!("{error:?}"); - assert!(debug_string.contains("UnsupportedType")); - assert!(debug_string.contains("TestType")); - } - - #[test] - fn test_unsupported_type_error_equality() { - let error1 = TypeReadError::UnsupportedType { - type_name: "Type1".to_string(), - }; - let error2 = TypeReadError::UnsupportedType { - type_name: "Type1".to_string(), - }; - let error3 = TypeReadError::UnsupportedType { - type_name: "Type2".to_string(), - }; - - assert_eq!(error1, error2); - assert_ne!(error1, error3); - } - - // Tests for read_typed_value function - #[test] - fn test_read_typed_value_byte() { - let buffer = &[0x7f, 0x45, 0x4c, 0x46]; - let type_kind = TypeKind::Byte { signed: false }; - - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::Uint(0x7f)); - - let result = read_typed_value(buffer, 3, &type_kind).unwrap(); - assert_eq!(result, Value::Uint(0x46)); - } - - #[test] - fn test_read_typed_value_short_unsigned_little_endian() { - let buffer = &[0x34, 0x12, 0x78, 0x56]; - let type_kind = TypeKind::Short { - endian: Endianness::Little, - signed: false, - }; - - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::Uint(0x1234)); - - let result = read_typed_value(buffer, 2, &type_kind).unwrap(); - assert_eq!(result, Value::Uint(0x5678)); - } - - #[test] - fn test_read_typed_value_short_signed_big_endian() { - let buffer = &[0x80, 0x00, 0x7f, 0xff]; - let type_kind = TypeKind::Short { - endian: Endianness::Big, - signed: true, - }; - - // 0x8000 = -32768 in signed 16-bit - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::Int(-32768)); - - // 0x7fff = 32767 in signed 16-bit - let result = read_typed_value(buffer, 2, &type_kind).unwrap(); - assert_eq!(result, Value::Int(32767)); - } - - #[test] - fn test_read_typed_value_long_unsigned_little_endian() { - let buffer = &[0x78, 0x56, 0x34, 0x12, 0xbc, 0x9a, 0x78, 0x56]; - let type_kind = TypeKind::Long { - endian: Endianness::Little, - signed: false, - }; - - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::Uint(0x1234_5678)); - - let result = read_typed_value(buffer, 4, &type_kind).unwrap(); - assert_eq!(result, Value::Uint(0x5678_9abc)); - } - - #[test] - fn test_read_typed_value_long_signed_big_endian() { - let buffer = &[0x80, 0x00, 0x00, 0x00, 0x7f, 0xff, 0xff, 0xff]; - let type_kind = TypeKind::Long { - endian: Endianness::Big, - signed: true, - }; - - // 0x80000000 = -2147483648 in signed 32-bit - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::Int(-2_147_483_648)); - - // 0x7fffffff = 2147483647 in signed 32-bit - let result = read_typed_value(buffer, 4, &type_kind).unwrap(); - assert_eq!(result, Value::Int(2_147_483_647)); - } - - #[test] - fn test_read_typed_value_native_endian() { - let buffer = &[0x34, 0x12, 0x78, 0x56, 0xbc, 0x9a, 0xde, 0xf0]; - - // Test short with native endianness - let short_type = TypeKind::Short { - endian: Endianness::Native, - signed: false, - }; - - let result = read_typed_value(buffer, 0, &short_type).unwrap(); - match result { - Value::Uint(val) => { - // Should be either 0x1234 (little-endian) or 0x3412 (big-endian) - assert!(val == 0x1234 || val == 0x3412); - } - _ => panic!("Expected Value::Uint variant"), - } - } - - #[test] - fn test_read_typed_value_string() { - let buffer = b"Hello\x00World\x00"; - let type_kind = TypeKind::String { max_length: None }; - - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::String("Hello".to_string())); - - let result = read_typed_value(buffer, 6, &type_kind).unwrap(); - assert_eq!(result, Value::String("World".to_string())); - } - - #[test] - fn test_read_typed_value_string_with_max_length() { - let buffer = b"VeryLongString\x00"; - let type_kind = TypeKind::String { - max_length: Some(4), - }; - - let result = read_typed_value(buffer, 0, &type_kind).unwrap(); - assert_eq!(result, Value::String("Very".to_string())); - } - - #[test] - fn test_read_typed_value_buffer_overrun() { - let buffer = &[0x12]; - let type_kind = TypeKind::Short { - endian: Endianness::Little, - signed: false, - }; - - let result = read_typed_value(buffer, 0, &type_kind); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 1 - } - ); - } - - // Tests for read_string function - #[test] - fn test_read_string_null_terminated() { - let buffer = b"Hello\x00World"; - - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("Hello".to_string())); - } - - #[test] - fn test_read_string_null_terminated_at_offset() { - let buffer = b"Prefix\x00Hello\x00Suffix"; - - let result = read_string(buffer, 7, None).unwrap(); - assert_eq!(result, Value::String("Hello".to_string())); - } - - #[test] - fn test_read_string_with_max_length_shorter_than_null() { - let buffer = b"VeryLongString\x00"; - - // Max length is shorter than the null terminator position - let result = read_string(buffer, 0, Some(4)).unwrap(); - assert_eq!(result, Value::String("Very".to_string())); - } - - #[test] - fn test_read_string_with_max_length_longer_than_null() { - let buffer = b"Short\x00LongerSuffix"; - - // Max length is longer than the null terminator position - let result = read_string(buffer, 0, Some(10)).unwrap(); - assert_eq!(result, Value::String("Short".to_string())); - } - - #[test] - fn test_read_string_no_null_terminator_with_max_length() { - let buffer = b"NoNullTerminator"; - - // Should read up to max_length when no null terminator is found - let result = read_string(buffer, 0, Some(6)).unwrap(); - assert_eq!(result, Value::String("NoNull".to_string())); - } - - #[test] - fn test_read_string_no_null_terminator_no_max_length() { - let buffer = b"NoNullTerminator"; - - // Should read entire remaining buffer when no null terminator and no max_length - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("NoNullTerminator".to_string())); - } - - #[test] - fn test_read_string_empty_string() { - let buffer = b"\x00Hello"; - - // Should return empty string when null terminator is at offset - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String(String::new())); - } - - #[test] - fn test_read_string_empty_buffer() { - let buffer = b""; - - // Should fail with buffer overrun for empty buffer - let result = read_string(buffer, 0, None); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 0 - } - ); - } - - #[test] - fn test_read_string_offset_out_of_bounds() { - let buffer = b"Hello"; - - // Should fail when offset is beyond buffer length - let result = read_string(buffer, 10, None); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 10, - buffer_len: 5 - } - ); - } - - #[test] - fn test_read_string_offset_at_buffer_end() { - let buffer = b"Hello"; - - // Should fail when offset equals buffer length - let result = read_string(buffer, 5, None); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 5, - buffer_len: 5 - } - ); - } - - #[test] - fn test_read_string_max_length_zero() { - let buffer = b"Hello\x00World"; - - // Should return empty string when max_length is 0 - let result = read_string(buffer, 0, Some(0)).unwrap(); - assert_eq!(result, Value::String(String::new())); - } - - #[test] - fn test_read_string_max_length_larger_than_buffer() { - let buffer = b"Short"; - - // Should read entire buffer when max_length exceeds buffer size - let result = read_string(buffer, 0, Some(100)).unwrap(); - assert_eq!(result, Value::String("Short".to_string())); - } - - #[test] - fn test_read_string_utf8_valid() { - let buffer = b"Caf\xc3\xa9\x00"; // "Café" in UTF-8 - - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("Café".to_string())); - } - - #[test] - fn test_read_string_utf8_invalid() { - let buffer = b"Invalid\xff\xfe\x00"; // Invalid UTF-8 sequence - - let result = read_string(buffer, 0, None).unwrap(); - // Should use replacement characters for invalid UTF-8 - assert!(matches!(result, Value::String(_))); - if let Value::String(s) = result { - assert!(s.starts_with("Invalid")); - assert!(s.contains('\u{FFFD}')); // UTF-8 replacement character - } - } - - #[test] - fn test_read_string_binary_data() { - let buffer = &[0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00, 0x80, 0x90]; // "Hello" + binary - - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("Hello".to_string())); - } - - #[test] - fn test_read_string_multiple_nulls() { - let buffer = b"First\x00\x00Second\x00"; - - // Should stop at first null terminator - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("First".to_string())); - - // Reading from second null should return empty string - let result = read_string(buffer, 6, None).unwrap(); - assert_eq!(result, Value::String(String::new())); - } - - #[test] - fn test_read_string_ascii_control_characters() { - let buffer = b"Hello\x09World\x00"; // Tab character in string - - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("Hello\tWorld".to_string())); - } - - #[test] - fn test_read_string_single_character() { - let buffer = b"A\x00"; - - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("A".to_string())); - } - - #[test] - fn test_read_string_max_length_exact_match() { - let buffer = b"Exact\x00"; - - // Max length exactly matches string length (excluding null) - let result = read_string(buffer, 0, Some(5)).unwrap(); - assert_eq!(result, Value::String("Exact".to_string())); - } - - #[test] - fn test_read_string_at_buffer_boundary() { - let buffer = b"Hello"; - - // Reading from last character position - let result = read_string(buffer, 4, Some(1)).unwrap(); - assert_eq!(result, Value::String("o".to_string())); - } - - #[test] - fn test_read_string_whitespace_handling() { - let buffer = b" Spaces \x00"; - - // Should preserve whitespace in strings - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String(" Spaces ".to_string())); - } - - #[test] - fn test_read_string_newline_characters() { - let buffer = b"Line1\nLine2\r\n\x00"; - - let result = read_string(buffer, 0, None).unwrap(); - assert_eq!(result, Value::String("Line1\nLine2\r\n".to_string())); - } - - #[test] - fn test_read_string_consistency_with_typed_value() { - let buffer = b"Test\x00String"; - - // Test that read_string and read_typed_value produce same results - let direct_result = read_string(buffer, 0, None).unwrap(); - - let type_kind = TypeKind::String { max_length: None }; - let typed_result = read_typed_value(buffer, 0, &type_kind).unwrap(); - - assert_eq!(direct_result, typed_result); - assert_eq!(typed_result, Value::String("Test".to_string())); - } - - #[test] - fn test_read_string_consistency_with_max_length() { - let buffer = b"LongString\x00"; - - // Test consistency between direct call and typed_value call with max_length - let direct_result = read_string(buffer, 0, Some(4)).unwrap(); - - let type_kind = TypeKind::String { - max_length: Some(4), - }; - let typed_result = read_typed_value(buffer, 0, &type_kind).unwrap(); - - assert_eq!(direct_result, typed_result); - assert_eq!(typed_result, Value::String("Long".to_string())); - } - - #[test] - fn test_read_string_edge_case_combinations() { - // Test various edge case combinations - let test_cases = [ - (b"" as &[u8], 0, None, true), // Empty buffer should fail - (b"\x00", 0, None, false), // Just null terminator - (b"A", 0, Some(0), false), // Zero max length - (b"AB", 1, Some(1), false), // Single char at offset - ]; - - for (buffer, offset, max_length, should_fail) in test_cases { - let result = read_string(buffer, offset, max_length); - - if should_fail { - assert!( - result.is_err(), - "Expected failure for buffer {buffer:?}, offset {offset}, max_length {max_length:?}" - ); - } else { - assert!( - result.is_ok(), - "Expected success for buffer {buffer:?}, offset {offset}, max_length {max_length:?}" - ); - } - } - } -} - -#[test] -fn test_read_typed_value_buffer_overrun() { - let buffer = &[0x12, 0x34]; - - // Try to read a long (4 bytes) from a 2-byte buffer - let long_type = TypeKind::Long { - endian: Endianness::Little, - signed: false, - }; - let result = read_typed_value(buffer, 0, &long_type); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 2 - } - ); - - // Try to read a short (2 bytes) at offset 1 from a 2-byte buffer - let short_type = TypeKind::Short { - endian: Endianness::Little, - signed: false, - }; - let result = read_typed_value(buffer, 1, &short_type); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset: 1, - buffer_len: 2 - } - ); -} - -#[test] -fn test_read_typed_value_all_supported_types() { - let buffer = &[0x7f, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12, 0xbc, 0x9a]; - - // Test all supported TypeKind variants - let test_cases = vec![ - (TypeKind::Byte { signed: false }, 0, Value::Uint(0x7f)), - ( - TypeKind::Short { - endian: Endianness::Little, - signed: false, - }, - 1, - Value::Uint(0x1234), // bytes [0x34, 0x12] -> 0x1234 little-endian - ), - ( - TypeKind::Short { - endian: Endianness::Big, - signed: false, - }, - 1, - Value::Uint(0x3412), // bytes [0x34, 0x12] -> 0x3412 big-endian - ), - ( - TypeKind::Long { - endian: Endianness::Little, - signed: false, - }, - 1, - Value::Uint(0x5678_1234), // bytes [0x34, 0x12, 0x78, 0x56] -> 0x56781234 little-endian - ), - ( - TypeKind::Long { - endian: Endianness::Big, - signed: false, - }, - 1, - Value::Uint(0x3412_7856), // bytes [0x34, 0x12, 0x78, 0x56] -> 0x34127856 big-endian - ), - ]; - - for (type_kind, offset, expected) in test_cases { - let result = read_typed_value(buffer, offset, &type_kind).unwrap(); - assert_eq!(result, expected, "Failed for type: {type_kind:?}"); - } -} - -#[test] -fn test_read_typed_value_signed_vs_unsigned() { - let buffer = &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff]; - - // Test signed vs unsigned interpretation for shorts - let unsigned_short = TypeKind::Short { - endian: Endianness::Little, - signed: false, - }; - let signed_short = TypeKind::Short { - endian: Endianness::Little, - signed: true, - }; - - let unsigned_result = read_typed_value(buffer, 0, &unsigned_short).unwrap(); - let signed_result = read_typed_value(buffer, 0, &signed_short).unwrap(); - - assert_eq!(unsigned_result, Value::Uint(65535)); - assert_eq!(signed_result, Value::Int(-1)); - - // Test signed vs unsigned interpretation for longs - let unsigned_long = TypeKind::Long { - endian: Endianness::Little, - signed: false, - }; - let signed_long = TypeKind::Long { - endian: Endianness::Little, - signed: true, - }; - - let unsigned_result = read_typed_value(buffer, 0, &unsigned_long).unwrap(); - let signed_result = read_typed_value(buffer, 0, &signed_long).unwrap(); - - assert_eq!(unsigned_result, Value::Uint(4_294_967_295)); - assert_eq!(signed_result, Value::Int(-1)); -} - -#[test] -fn test_read_typed_value_consistency_with_direct_calls() { - let buffer = &[0x34, 0x12, 0x78, 0x56, 0xbc, 0x9a, 0xde, 0xf0]; - - // Test that read_typed_value gives same results as direct function calls - let byte_type = TypeKind::Byte { signed: false }; - let direct_byte = read_byte(buffer, 0, false).unwrap(); - let typed_byte = read_typed_value(buffer, 0, &byte_type).unwrap(); - assert_eq!(direct_byte, typed_byte); - - let short_type = TypeKind::Short { - endian: Endianness::Little, - signed: false, - }; - let direct_short = read_short(buffer, 0, Endianness::Little, false).unwrap(); - let typed_short = read_typed_value(buffer, 0, &short_type).unwrap(); - assert_eq!(direct_short, typed_short); - - let long_type = TypeKind::Long { - endian: Endianness::Big, - signed: true, - }; - let direct_long = read_long(buffer, 0, Endianness::Big, true).unwrap(); - let typed_long = read_typed_value(buffer, 0, &long_type).unwrap(); - assert_eq!(direct_long, typed_long); -} - -#[test] -fn test_read_typed_value_empty_buffer() { - let buffer = &[]; - - // All types should fail on empty buffer - let types = vec![ - TypeKind::Byte { signed: false }, - TypeKind::Short { - endian: Endianness::Little, - signed: false, - }, - TypeKind::Long { - endian: Endianness::Little, - signed: false, - }, - ]; - - for type_kind in types { - let result = read_typed_value(buffer, 0, &type_kind); - assert!(result.is_err()); - match result.unwrap_err() { - TypeReadError::BufferOverrun { offset, buffer_len } => { - assert_eq!(offset, 0); - assert_eq!(buffer_len, 0); - } - TypeReadError::UnsupportedType { .. } => panic!("Expected BufferOverrun error"), - } - } -} - -#[test] -#[allow(clippy::too_many_lines)] -fn test_coerce_value_to_type() { - let cases = [ - // Signed byte: values above i8::MAX get coerced - ( - Value::Uint(0xff), - TypeKind::Byte { signed: true }, - Value::Int(-1), - ), - ( - Value::Uint(0x80), - TypeKind::Byte { signed: true }, - Value::Int(-128), - ), - ( - Value::Uint(0xfe), - TypeKind::Byte { signed: true }, - Value::Int(-2), - ), - // Signed byte: values in signed range pass through - ( - Value::Uint(0x7f), - TypeKind::Byte { signed: true }, - Value::Uint(0x7f), - ), - ( - Value::Uint(0), - TypeKind::Byte { signed: true }, - Value::Uint(0), - ), - ( - Value::Uint(1), - TypeKind::Byte { signed: true }, - Value::Uint(1), - ), - // Unsigned byte: all values pass through - ( - Value::Uint(0xff), - TypeKind::Byte { signed: false }, - Value::Uint(0xff), - ), - ( - Value::Uint(0x80), - TypeKind::Byte { signed: false }, - Value::Uint(0x80), - ), - // Signed short: values above i16::MAX get coerced - ( - Value::Uint(0xffff), - TypeKind::Short { - endian: Endianness::Native, - signed: true, - }, - Value::Int(-1), - ), - ( - Value::Uint(0x8000), - TypeKind::Short { - endian: Endianness::Native, - signed: true, - }, - Value::Int(-32768), - ), - ( - Value::Uint(0xffd8), - TypeKind::Short { - endian: Endianness::Big, - signed: true, - }, - Value::Int(-40), - ), - // Signed short: values in signed range pass through - ( - Value::Uint(0x7fff), - TypeKind::Short { - endian: Endianness::Native, - signed: true, - }, - Value::Uint(0x7fff), - ), - // Unsigned short: all values pass through - ( - Value::Uint(0xffff), - TypeKind::Short { - endian: Endianness::Native, - signed: false, - }, - Value::Uint(0xffff), - ), - // Signed long: values above i32::MAX get coerced - ( - Value::Uint(0xffff_ffff), - TypeKind::Long { - endian: Endianness::Native, - signed: true, - }, - Value::Int(-1), - ), - ( - Value::Uint(0x8000_0000), - TypeKind::Long { - endian: Endianness::Native, - signed: true, - }, - Value::Int(-2_147_483_648), - ), - ( - Value::Uint(0x8950_4e47), - TypeKind::Long { - endian: Endianness::Big, - signed: true, - }, - Value::Int(-1_991_225_785), - ), - // Signed long: values in signed range pass through - ( - Value::Uint(0x7fff_ffff), - TypeKind::Long { - endian: Endianness::Native, - signed: true, - }, - Value::Uint(0x7fff_ffff), - ), - // Unsigned long: all values pass through - ( - Value::Uint(0xffff_ffff), - TypeKind::Long { - endian: Endianness::Native, - signed: false, - }, - Value::Uint(0xffff_ffff), - ), - // Signed quad: values above i64::MAX get coerced - ( - Value::Uint(0xffff_ffff_ffff_ffff), - TypeKind::Quad { - endian: Endianness::Native, - signed: true, - }, - Value::Int(-1), - ), - ( - Value::Uint(0x8000_0000_0000_0000), - TypeKind::Quad { - endian: Endianness::Native, - signed: true, - }, - Value::Int(i64::MIN), - ), - // Signed quad: values in signed range pass through - ( - Value::Uint(0x7fff_ffff_ffff_ffff), - TypeKind::Quad { - endian: Endianness::Native, - signed: true, - }, - Value::Uint(0x7fff_ffff_ffff_ffff), - ), - // Unsigned quad: all values pass through - ( - Value::Uint(0xffff_ffff_ffff_ffff), - TypeKind::Quad { - endian: Endianness::Native, - signed: false, - }, - Value::Uint(0xffff_ffff_ffff_ffff), - ), - // Non-Uint values pass through unchanged - ( - Value::Int(-1), - TypeKind::Byte { signed: true }, - Value::Int(-1), - ), - ( - Value::Int(42), - TypeKind::Long { - endian: Endianness::Native, - signed: true, - }, - Value::Int(42), - ), - // String type: values pass through - ( - Value::Uint(0xff), - TypeKind::String { max_length: None }, - Value::Uint(0xff), - ), - ]; - - for (i, (input, type_kind, expected)) in cases.iter().enumerate() { - let result = coerce_value_to_type(input, type_kind); - assert_eq!( - result, *expected, - "Case {i}: coerce({input:?}, {type_kind:?})" - ); - } -} diff --git a/src/evaluator/types/mod.rs b/src/evaluator/types/mod.rs new file mode 100644 index 00000000..0d6f8678 --- /dev/null +++ b/src/evaluator/types/mod.rs @@ -0,0 +1,116 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +//! Type interpretation for reading and converting bytes from file buffers. +//! +//! This module exposes the public type-reading API and dispatches to focused +//! submodules for numeric and string handling. + +mod numeric; +mod string; + +use crate::parser::ast::{TypeKind, Value}; +use thiserror::Error; + +pub use numeric::{read_byte, read_long, read_quad, read_short}; +pub use string::read_string; + +/// Errors that can occur during type reading operations. +#[derive(Debug, Error, PartialEq, Eq)] +pub enum TypeReadError { + /// Buffer access beyond available data. + #[error( + "Buffer overrun: attempted to read at offset {offset} but buffer length is {buffer_len}" + )] + BufferOverrun { + /// The offset that was attempted to be accessed. + offset: usize, + /// The actual length of the buffer. + buffer_len: usize, + }, + /// Unsupported type variant. + #[error("Unsupported type: {type_name}")] + UnsupportedType { + /// The name of the unsupported type. + type_name: String, + }, +} + +/// Reads bytes according to the specified `TypeKind`. +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_typed_value; +/// use libmagic_rs::parser::ast::{Endianness, TypeKind, Value}; +/// +/// let buffer = &[0x7f, 0x45, 0x4c, 0x46, 0x34, 0x12]; +/// let byte_result = read_typed_value(buffer, 0, &TypeKind::Byte { signed: false }).unwrap(); +/// assert_eq!(byte_result, Value::Uint(0x7f)); +/// +/// let short_type = TypeKind::Short { +/// endian: Endianness::Little, +/// signed: false, +/// }; +/// let short_result = read_typed_value(buffer, 4, &short_type).unwrap(); +/// assert_eq!(short_result, Value::Uint(0x1234)); +/// ``` +/// +/// # Errors +/// +/// Returns `TypeReadError::BufferOverrun` when the requested value extends past +/// the buffer bounds. +pub fn read_typed_value( + buffer: &[u8], + offset: usize, + type_kind: &TypeKind, +) -> Result { + match type_kind { + TypeKind::Byte { signed } => read_byte(buffer, offset, *signed), + TypeKind::Short { endian, signed } => read_short(buffer, offset, *endian, *signed), + TypeKind::Long { endian, signed } => read_long(buffer, offset, *endian, *signed), + TypeKind::Quad { endian, signed } => read_quad(buffer, offset, *endian, *signed), + TypeKind::String { max_length } => read_string(buffer, offset, *max_length), + } +} + +/// Coerces a rule value to the signed width implied by `type_kind`. +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::coerce_value_to_type; +/// use libmagic_rs::parser::ast::{TypeKind, Value}; +/// +/// let coerced = coerce_value_to_type(&Value::Uint(0xff), &TypeKind::Byte { signed: true }); +/// assert_eq!(coerced, Value::Int(-1)); +/// ``` +#[must_use] +pub fn coerce_value_to_type(value: &Value, type_kind: &TypeKind) -> Value { + match (value, type_kind) { + (Value::Uint(v), TypeKind::Byte { signed: true }) if *v > i8::MAX as u64 => + { + #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] + Value::Int(i64::from(*v as u8 as i8)) + } + (Value::Uint(v), TypeKind::Short { signed: true, .. }) if *v > i16::MAX as u64 => + { + #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] + Value::Int(i64::from(*v as u16 as i16)) + } + (Value::Uint(v), TypeKind::Long { signed: true, .. }) if *v > i32::MAX as u64 => + { + #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] + Value::Int(i64::from(*v as u32 as i32)) + } + (Value::Uint(v), TypeKind::Quad { signed: true, .. }) if *v > i64::MAX as u64 => + { + #[allow(clippy::cast_possible_wrap)] + Value::Int(*v as i64) + } + _ => value.clone(), + } +} + +#[cfg(test)] +mod tests; diff --git a/src/evaluator/types/numeric.rs b/src/evaluator/types/numeric.rs new file mode 100644 index 00000000..2ced68a6 --- /dev/null +++ b/src/evaluator/types/numeric.rs @@ -0,0 +1,598 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +use super::TypeReadError; +use crate::parser::ast::{Endianness, Value}; +use byteorder::{BigEndian, ByteOrder, LittleEndian, NativeEndian}; + +/// Safely reads a single byte from the buffer at the specified offset. +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_byte; +/// use libmagic_rs::parser::ast::Value; +/// +/// let buffer = &[0x7f, 0x80, 0x4c, 0x46]; +/// +/// let result = read_byte(buffer, 1, false).unwrap(); +/// assert_eq!(result, Value::Uint(0x80)); +/// +/// let result = read_byte(buffer, 1, true).unwrap(); +/// assert_eq!(result, Value::Int(-128)); +/// ``` +/// # Errors +/// Returns `TypeReadError::BufferOverrun` if `offset` is outside the buffer. +pub fn read_byte(buffer: &[u8], offset: usize, signed: bool) -> Result { + buffer + .get(offset) + .map(|&byte| { + if signed { + #[allow(clippy::cast_possible_wrap)] + Value::Int(i64::from(byte as i8)) + } else { + Value::Uint(u64::from(byte)) + } + }) + .ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + }) +} + +/// Safely reads a 16-bit integer from the buffer at the specified offset. +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_short; +/// use libmagic_rs::parser::ast::{Endianness, Value}; +/// +/// let buffer = &[0x34, 0x12, 0xff, 0x7f]; +/// +/// let result = read_short(buffer, 0, Endianness::Little, false).unwrap(); +/// assert_eq!(result, Value::Uint(0x1234)); +/// +/// let result = read_short(buffer, 2, Endianness::Little, true).unwrap(); +/// assert_eq!(result, Value::Int(32767)); +/// ``` +/// # Errors +/// Returns `TypeReadError::BufferOverrun` if fewer than 2 bytes are available at the +/// requested offset. +pub fn read_short( + buffer: &[u8], + offset: usize, + endian: Endianness, + signed: bool, +) -> Result { + let end = offset.checked_add(2).ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + let bytes = buffer + .get(offset..end) + .ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + + let value = match endian { + Endianness::Little => LittleEndian::read_u16(bytes), + Endianness::Big => BigEndian::read_u16(bytes), + Endianness::Native => NativeEndian::read_u16(bytes), + }; + + if signed { + #[allow(clippy::cast_possible_wrap)] + Ok(Value::Int(i64::from(value as i16))) + } else { + Ok(Value::Uint(u64::from(value))) + } +} + +/// Safely reads a 32-bit integer from the buffer at the specified offset. +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_long; +/// use libmagic_rs::parser::ast::{Endianness, Value}; +/// +/// let buffer = &[0x78, 0x56, 0x34, 0x12, 0xff, 0xff, 0xff, 0x7f]; +/// +/// let result = read_long(buffer, 0, Endianness::Little, false).unwrap(); +/// assert_eq!(result, Value::Uint(0x12345678)); +/// +/// let result = read_long(buffer, 4, Endianness::Little, true).unwrap(); +/// assert_eq!(result, Value::Int(2147483647)); +/// ``` +/// # Errors +/// Returns `TypeReadError::BufferOverrun` if fewer than 4 bytes are available at the +/// requested offset. +pub fn read_long( + buffer: &[u8], + offset: usize, + endian: Endianness, + signed: bool, +) -> Result { + let end = offset.checked_add(4).ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + let bytes = buffer + .get(offset..end) + .ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + + let value = match endian { + Endianness::Little => LittleEndian::read_u32(bytes), + Endianness::Big => BigEndian::read_u32(bytes), + Endianness::Native => NativeEndian::read_u32(bytes), + }; + + if signed { + #[allow(clippy::cast_possible_wrap)] + Ok(Value::Int(i64::from(value as i32))) + } else { + Ok(Value::Uint(u64::from(value))) + } +} + +/// Safely reads a 64-bit integer from the buffer at the specified offset. +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_quad; +/// use libmagic_rs::parser::ast::{Endianness, Value}; +/// +/// let buffer = &[0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12]; +/// +/// let result = read_quad(buffer, 0, Endianness::Little, false).unwrap(); +/// assert_eq!(result, Value::Uint(0x1234_5678_90ab_cdef)); +/// +/// let result = read_quad(buffer, 0, Endianness::Little, true).unwrap(); +/// assert_eq!(result, Value::Int(0x1234_5678_90ab_cdef)); +/// ``` +/// # Errors +/// Returns `TypeReadError::BufferOverrun` if fewer than 8 bytes are available at the +/// requested offset. +pub fn read_quad( + buffer: &[u8], + offset: usize, + endian: Endianness, + signed: bool, +) -> Result { + let end = offset.checked_add(8).ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + let bytes = buffer + .get(offset..end) + .ok_or(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + })?; + + let value = match endian { + Endianness::Little => LittleEndian::read_u64(bytes), + Endianness::Big => BigEndian::read_u64(bytes), + Endianness::Native => NativeEndian::read_u64(bytes), + }; + + if signed { + #[allow(clippy::cast_possible_wrap)] + Ok(Value::Int(value as i64)) + } else { + Ok(Value::Uint(value)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_read_byte_values() { + let buffer: Vec = (0..=255).collect(); + for (i, &byte) in buffer.iter().enumerate() { + assert_eq!( + read_byte(&buffer, i, false).unwrap(), + Value::Uint(u64::from(byte)) + ); + } + } + + #[test] + fn test_read_byte_out_of_bounds() { + assert_eq!( + read_byte(&[], 0, false).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 0 + } + ); + assert_eq!( + read_byte(&[0x42], 1, false).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 1, + buffer_len: 1 + } + ); + assert_eq!( + read_byte(&[1, 2, 3], 100, false).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 100, + buffer_len: 3 + } + ); + } + + #[test] + fn test_read_byte_signedness() { + let cases: Vec<(u8, bool, Value)> = vec![ + (0x00, false, Value::Uint(0)), + (0x7f, false, Value::Uint(127)), + (0x80, false, Value::Uint(128)), + (0xff, false, Value::Uint(255)), + (0x00, true, Value::Int(0)), + (0x7f, true, Value::Int(127)), + (0x80, true, Value::Int(-128)), + (0xff, true, Value::Int(-1)), + ]; + for (byte, signed, expected) in cases { + let result = read_byte(&[byte], 0, signed).unwrap(); + assert_eq!(result, expected, "byte=0x{byte:02x}, signed={signed}"); + } + } + + #[test] + fn test_read_short_little_endian_unsigned() { + let buffer = &[0x34, 0x12, 0x78, 0x56]; + let result = read_short(buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(result, Value::Uint(0x1234)); + + let result = read_short(buffer, 2, Endianness::Little, false).unwrap(); + assert_eq!(result, Value::Uint(0x5678)); + } + + #[test] + fn test_read_short_big_endian_unsigned() { + let buffer = &[0x12, 0x34, 0x56, 0x78]; + let result = read_short(buffer, 0, Endianness::Big, false).unwrap(); + assert_eq!(result, Value::Uint(0x1234)); + + let result = read_short(buffer, 2, Endianness::Big, false).unwrap(); + assert_eq!(result, Value::Uint(0x5678)); + } + + #[test] + fn test_read_short_native_endian_unsigned() { + let buffer = &[0x34, 0x12, 0x78, 0x56]; + let result = read_short(buffer, 0, Endianness::Native, false).unwrap(); + + match result { + Value::Uint(val) => assert!(val == 0x1234 || val == 0x3412), + _ => panic!("Expected Value::Uint variant"), + } + } + + #[test] + fn test_read_short_signed_positive() { + let buffer = &[0xff, 0x7f]; + let result = read_short(buffer, 0, Endianness::Little, true).unwrap(); + assert_eq!(result, Value::Int(32767)); + } + + #[test] + fn test_read_short_signed_negative() { + let buffer = &[0x00, 0x80]; + let result = read_short(buffer, 0, Endianness::Little, true).unwrap(); + assert_eq!(result, Value::Int(-32768)); + } + + #[test] + fn test_read_short_signed_vs_unsigned() { + let buffer = &[0xff, 0xff]; + let unsigned_result = read_short(buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(unsigned_result, Value::Uint(65535)); + + let signed_result = read_short(buffer, 0, Endianness::Little, true).unwrap(); + assert_eq!(signed_result, Value::Int(-1)); + } + + #[test] + fn test_read_short_buffer_overrun() { + let buffer = &[0x12]; + let result = read_short(buffer, 0, Endianness::Little, false); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 1 + } + ); + } + + #[test] + fn test_read_short_offset_out_of_bounds() { + let buffer = &[0x12, 0x34, 0x56]; + let result = read_short(buffer, 2, Endianness::Little, false); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 2, + buffer_len: 3 + } + ); + } + + #[test] + fn test_read_short_empty_buffer() { + let buffer = &[]; + let result = read_short(buffer, 0, Endianness::Little, false); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 0 + } + ); + } + + #[test] + fn test_read_short_all_endianness_variants() { + let buffer = &[0x12, 0x34]; + let little = read_short(buffer, 0, Endianness::Little, false).unwrap(); + let big = read_short(buffer, 0, Endianness::Big, false).unwrap(); + let native = read_short(buffer, 0, Endianness::Native, false).unwrap(); + + assert_eq!(little, Value::Uint(0x3412)); + assert_eq!(big, Value::Uint(0x1234)); + + match native { + Value::Uint(val) => assert!(val == 0x1234 || val == 0x3412), + _ => panic!("Expected Value::Uint variant"), + } + } + + #[test] + fn test_read_short_extreme_values() { + let max_buffer = &[0xff, 0xff]; + let max_result = read_short(max_buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(max_result, Value::Uint(u64::from(u16::MAX))); + + let zero_buffer = &[0x00, 0x00]; + let zero_result = read_short(zero_buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(zero_result, Value::Uint(0)); + } + + #[test] + fn test_read_long_little_endian_unsigned() { + let buffer = &[0x78, 0x56, 0x34, 0x12, 0xbc, 0x9a, 0x78, 0x56]; + let result = read_long(buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(result, Value::Uint(0x1234_5678)); + + let result = read_long(buffer, 4, Endianness::Little, false).unwrap(); + assert_eq!(result, Value::Uint(0x5678_9abc)); + } + + #[test] + fn test_read_long_big_endian_unsigned() { + let buffer = &[0x12, 0x34, 0x56, 0x78, 0x56, 0x78, 0x9a, 0xbc]; + let result = read_long(buffer, 0, Endianness::Big, false).unwrap(); + assert_eq!(result, Value::Uint(0x1234_5678)); + + let result = read_long(buffer, 4, Endianness::Big, false).unwrap(); + assert_eq!(result, Value::Uint(0x5678_9abc)); + } + + #[test] + fn test_read_long_native_endian_unsigned() { + let buffer = &[0x78, 0x56, 0x34, 0x12]; + let result = read_long(buffer, 0, Endianness::Native, false).unwrap(); + + match result { + Value::Uint(val) => assert!(val == 0x1234_5678 || val == 0x7856_3412), + _ => panic!("Expected Value::Uint variant"), + } + } + + #[test] + fn test_read_long_signed_positive() { + let buffer = &[0xff, 0xff, 0xff, 0x7f]; + let result = read_long(buffer, 0, Endianness::Little, true).unwrap(); + assert_eq!(result, Value::Int(2_147_483_647)); + } + + #[test] + fn test_read_long_signed_negative() { + let buffer = &[0x00, 0x00, 0x00, 0x80]; + let result = read_long(buffer, 0, Endianness::Little, true).unwrap(); + assert_eq!(result, Value::Int(-2_147_483_648)); + } + + #[test] + fn test_read_long_signed_vs_unsigned() { + let buffer = &[0xff, 0xff, 0xff, 0xff]; + let unsigned_result = read_long(buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(unsigned_result, Value::Uint(4_294_967_295)); + + let signed_result = read_long(buffer, 0, Endianness::Little, true).unwrap(); + assert_eq!(signed_result, Value::Int(-1)); + } + + #[test] + fn test_read_long_buffer_overrun() { + let buffer = &[0x12, 0x34, 0x56]; + let result = read_long(buffer, 0, Endianness::Little, false); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 3 + } + ); + } + + #[test] + fn test_read_long_offset_out_of_bounds() { + let buffer = &[0x12, 0x34, 0x56, 0x78, 0x9a]; + let result = read_long(buffer, 2, Endianness::Little, false); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 2, + buffer_len: 5 + } + ); + } + + #[test] + fn test_read_long_empty_buffer() { + let buffer = &[]; + let result = read_long(buffer, 0, Endianness::Little, false); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 0 + } + ); + } + + #[test] + fn test_read_long_all_endianness_variants() { + let buffer = &[0x12, 0x34, 0x56, 0x78]; + let little = read_long(buffer, 0, Endianness::Little, false).unwrap(); + let big = read_long(buffer, 0, Endianness::Big, false).unwrap(); + let native = read_long(buffer, 0, Endianness::Native, false).unwrap(); + + assert_eq!(little, Value::Uint(0x7856_3412)); + assert_eq!(big, Value::Uint(0x1234_5678)); + + match native { + Value::Uint(val) => assert!(val == 0x1234_5678 || val == 0x7856_3412), + _ => panic!("Expected Value::Uint variant"), + } + } + + #[test] + fn test_read_long_extreme_values() { + let max_buffer = &[0xff, 0xff, 0xff, 0xff]; + let max_result = read_long(max_buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(max_result, Value::Uint(u64::from(u32::MAX))); + + let zero_buffer = &[0x00, 0x00, 0x00, 0x00]; + let zero_result = read_long(zero_buffer, 0, Endianness::Little, false).unwrap(); + assert_eq!(zero_result, Value::Uint(0)); + } + + #[test] + fn test_read_quad_endianness_and_signedness() { + let cases: Vec<(&[u8], Endianness, bool, Value)> = vec![ + ( + &[0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12], + Endianness::Little, + false, + Value::Uint(0x1234_5678_90ab_cdef), + ), + ( + &[0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef], + Endianness::Big, + false, + Value::Uint(0x1234_5678_90ab_cdef), + ), + ( + &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f], + Endianness::Little, + true, + Value::Int(i64::MAX), + ), + ( + &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80], + Endianness::Little, + true, + Value::Int(i64::MIN), + ), + ( + &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], + Endianness::Big, + true, + Value::Int(-1), + ), + ( + &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], + Endianness::Little, + false, + Value::Uint(u64::MAX), + ), + ( + &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + Endianness::Little, + false, + Value::Uint(0), + ), + ]; + for (buffer, endian, signed, expected) in cases { + let result = read_quad(buffer, 0, endian, signed).unwrap(); + assert_eq!(result, expected, "endian={endian:?}, signed={signed}"); + } + } + + #[test] + fn test_read_quad_buffer_overrun() { + let buffer = &[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07]; + assert_eq!( + read_quad(buffer, 0, Endianness::Little, false).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 7 + } + ); + + assert_eq!( + read_quad(&[], 0, Endianness::Big, false).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 0 + } + ); + + let buffer = &[0x00; 16]; + assert_eq!( + read_quad(buffer, 10, Endianness::Little, false).unwrap_err(), + TypeReadError::BufferOverrun { + offset: 10, + buffer_len: 16 + } + ); + } + + #[test] + fn test_read_quad_at_offset() { + let buffer = &[0x00, 0x00, 0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12]; + let result = read_quad(buffer, 2, Endianness::Little, false).unwrap(); + assert_eq!(result, Value::Uint(0x1234_5678_90ab_cdef)); + } + + #[test] + fn test_multi_byte_reading_consistency() { + let buffer = &[0x34, 0x12, 0x78, 0x56, 0xbc, 0x9a, 0xde, 0xf0]; + + let byte0 = read_byte(buffer, 0, false).unwrap(); + let byte1 = read_byte(buffer, 1, false).unwrap(); + let short = read_short(buffer, 0, Endianness::Little, false).unwrap(); + + match (byte0, byte1, short) { + (Value::Uint(b0), Value::Uint(b1), Value::Uint(s)) => { + assert_eq!(s, b0 + (b1 << 8)); + } + _ => panic!("Expected all Uint values"), + } + } +} diff --git a/src/evaluator/types/string.rs b/src/evaluator/types/string.rs new file mode 100644 index 00000000..b39000e7 --- /dev/null +++ b/src/evaluator/types/string.rs @@ -0,0 +1,324 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +use super::TypeReadError; +use crate::parser::ast::Value; + +/// Safely reads a null-terminated string from the buffer at the specified offset. +/// +/// This function reads bytes from the buffer starting at the given offset until it +/// encounters a null byte (0x00) or reaches the maximum length limit. The resulting +/// bytes are converted to a UTF-8 string with proper error handling for invalid +/// sequences. +/// +/// # Arguments +/// +/// * `buffer` - The byte buffer to read from +/// * `offset` - The offset position to start reading the string from +/// * `max_length` - Optional maximum number of bytes to read excluding the null terminator. +/// If a NUL is found within `max_length` bytes, it is not counted in the result length. +/// If no NUL is found, up to `max_length` bytes are returned with no trailing NUL. +/// When `None`, reads until the first NUL or end of buffer. +/// +/// # Returns +/// +/// Returns `Ok(Value::String(string))` if the read is successful, or an appropriate error +/// if the read fails due to buffer overrun or invalid UTF-8 sequences. +/// +/// # Security +/// +/// This function provides several security guarantees: +/// - Bounds checking prevents reading beyond buffer limits +/// - Length limits prevent excessive memory allocation +/// - UTF-8 validation ensures string safety +/// - Null termination handling prevents runaway reads +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::read_string; +/// use libmagic_rs::parser::ast::Value; +/// +/// let buffer = b"Hello\x00World"; +/// let result = read_string(buffer, 0, None).unwrap(); +/// assert_eq!(result, Value::String("Hello".to_string())); +/// +/// let buffer = b"VeryLongString\x00"; +/// let result = read_string(buffer, 0, Some(4)).unwrap(); +/// assert_eq!(result, Value::String("Very".to_string())); +/// +/// let buffer = b"NoNull"; +/// let result = read_string(buffer, 0, Some(6)).unwrap(); +/// assert_eq!(result, Value::String("NoNull".to_string())); +/// ``` +/// +/// # Errors +/// +/// Returns `TypeReadError::BufferOverrun` if the offset is greater than or equal to the buffer +/// length. +pub fn read_string( + buffer: &[u8], + offset: usize, + max_length: Option, +) -> Result { + if offset >= buffer.len() { + return Err(TypeReadError::BufferOverrun { + offset, + buffer_len: buffer.len(), + }); + } + + let remaining_buffer = &buffer[offset..]; + let read_length = if let Some(max_len) = max_length { + let search_len = std::cmp::min(max_len, remaining_buffer.len()); + memchr::memchr(0, &remaining_buffer[..search_len]).unwrap_or(search_len) + } else { + memchr::memchr(0, remaining_buffer).unwrap_or(remaining_buffer.len()) + }; + + let string_bytes = &remaining_buffer[..read_length]; + let string_value = String::from_utf8_lossy(string_bytes).into_owned(); + + Ok(Value::String(string_value)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::evaluator::types::read_typed_value; + use crate::parser::ast::TypeKind; + + #[test] + fn test_read_string_null_terminated() { + let buffer = b"Hello\x00World"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("Hello".to_string())); + } + + #[test] + fn test_read_string_null_terminated_at_offset() { + let buffer = b"Prefix\x00Hello\x00Suffix"; + let result = read_string(buffer, 7, None).unwrap(); + assert_eq!(result, Value::String("Hello".to_string())); + } + + #[test] + fn test_read_string_with_max_length_shorter_than_null() { + let buffer = b"VeryLongString\x00"; + let result = read_string(buffer, 0, Some(4)).unwrap(); + assert_eq!(result, Value::String("Very".to_string())); + } + + #[test] + fn test_read_string_with_max_length_longer_than_null() { + let buffer = b"Short\x00LongerSuffix"; + let result = read_string(buffer, 0, Some(10)).unwrap(); + assert_eq!(result, Value::String("Short".to_string())); + } + + #[test] + fn test_read_string_no_null_terminator_with_max_length() { + let buffer = b"NoNullTerminator"; + let result = read_string(buffer, 0, Some(6)).unwrap(); + assert_eq!(result, Value::String("NoNull".to_string())); + } + + #[test] + fn test_read_string_no_null_terminator_no_max_length() { + let buffer = b"NoNullTerminator"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("NoNullTerminator".to_string())); + } + + #[test] + fn test_read_string_empty_string() { + let buffer = b"\x00Hello"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String(String::new())); + } + + #[test] + fn test_read_string_empty_buffer() { + let buffer = b""; + let result = read_string(buffer, 0, None); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 0 + } + ); + } + + #[test] + fn test_read_string_offset_out_of_bounds() { + let buffer = b"Hello"; + let result = read_string(buffer, 10, None); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 10, + buffer_len: 5 + } + ); + } + + #[test] + fn test_read_string_offset_at_buffer_end() { + let buffer = b"Hello"; + let result = read_string(buffer, 5, None); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + TypeReadError::BufferOverrun { + offset: 5, + buffer_len: 5 + } + ); + } + + #[test] + fn test_read_string_max_length_zero() { + let buffer = b"Hello\x00World"; + let result = read_string(buffer, 0, Some(0)).unwrap(); + assert_eq!(result, Value::String(String::new())); + } + + #[test] + fn test_read_string_max_length_larger_than_buffer() { + let buffer = b"Short"; + let result = read_string(buffer, 0, Some(100)).unwrap(); + assert_eq!(result, Value::String("Short".to_string())); + } + + #[test] + fn test_read_string_utf8_valid() { + let buffer = b"Caf\xc3\xa9\x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("Café".to_string())); + } + + #[test] + fn test_read_string_utf8_invalid() { + let buffer = b"Invalid\xff\xfe\x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert!(matches!(result, Value::String(_))); + if let Value::String(s) = result { + assert!(s.starts_with("Invalid")); + assert!(s.contains('\u{FFFD}')); + } + } + + #[test] + fn test_read_string_binary_data() { + let buffer = &[0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00, 0x80, 0x90]; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("Hello".to_string())); + } + + #[test] + fn test_read_string_multiple_nulls() { + let buffer = b"First\x00\x00Second\x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("First".to_string())); + + let result = read_string(buffer, 6, None).unwrap(); + assert_eq!(result, Value::String(String::new())); + } + + #[test] + fn test_read_string_ascii_control_characters() { + let buffer = b"Hello\x09World\x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("Hello\tWorld".to_string())); + } + + #[test] + fn test_read_string_single_character() { + let buffer = b"A\x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("A".to_string())); + } + + #[test] + fn test_read_string_max_length_exact_match() { + let buffer = b"Exact\x00"; + let result = read_string(buffer, 0, Some(5)).unwrap(); + assert_eq!(result, Value::String("Exact".to_string())); + } + + #[test] + fn test_read_string_at_buffer_boundary() { + let buffer = b"Hello"; + let result = read_string(buffer, 4, Some(1)).unwrap(); + assert_eq!(result, Value::String("o".to_string())); + } + + #[test] + fn test_read_string_whitespace_handling() { + let buffer = b" Spaces \x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String(" Spaces ".to_string())); + } + + #[test] + fn test_read_string_newline_characters() { + let buffer = b"Line1\nLine2\r\n\x00"; + let result = read_string(buffer, 0, None).unwrap(); + assert_eq!(result, Value::String("Line1\nLine2\r\n".to_string())); + } + + #[test] + fn test_read_string_consistency_with_typed_value() { + let buffer = b"Test\x00String"; + let direct_result = read_string(buffer, 0, None).unwrap(); + + let type_kind = TypeKind::String { max_length: None }; + let typed_result = read_typed_value(buffer, 0, &type_kind).unwrap(); + + assert_eq!(direct_result, typed_result); + assert_eq!(typed_result, Value::String("Test".to_string())); + } + + #[test] + fn test_read_string_consistency_with_max_length() { + let buffer = b"LongString\x00"; + let direct_result = read_string(buffer, 0, Some(4)).unwrap(); + + let type_kind = TypeKind::String { + max_length: Some(4), + }; + let typed_result = read_typed_value(buffer, 0, &type_kind).unwrap(); + + assert_eq!(direct_result, typed_result); + assert_eq!(typed_result, Value::String("Long".to_string())); + } + + #[test] + fn test_read_string_edge_case_combinations() { + let test_cases = [ + (b"" as &[u8], 0, None, true), + (b"\x00", 0, None, false), + (b"A", 0, Some(0), false), + (b"AB", 1, Some(1), false), + ]; + + for (buffer, offset, max_length, should_fail) in test_cases { + let result = read_string(buffer, offset, max_length); + + if should_fail { + assert!( + result.is_err(), + "Expected failure for buffer {buffer:?}, offset {offset}, max_length {max_length:?}" + ); + } else { + assert!( + result.is_ok(), + "Expected success for buffer {buffer:?}, offset {offset}, max_length {max_length:?}" + ); + } + } + } +} diff --git a/src/evaluator/types/tests.rs b/src/evaluator/types/tests.rs new file mode 100644 index 00000000..eea0a90e --- /dev/null +++ b/src/evaluator/types/tests.rs @@ -0,0 +1,463 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +use super::*; +use crate::parser::ast::Endianness; + +#[test] +fn test_type_read_error_display() { + let error = TypeReadError::BufferOverrun { + offset: 10, + buffer_len: 5, + }; + let msg = format!("{error}"); + assert!(msg.contains("offset 10")); + assert!(msg.contains("buffer length is 5")); +} + +#[test] +fn test_unsupported_type_error_variants() { + let error = TypeReadError::UnsupportedType { + type_name: "CustomType".to_string(), + }; + assert!(format!("{error}").contains("CustomType")); + assert!(format!("{error:?}").contains("UnsupportedType")); + + assert_eq!( + error, + TypeReadError::UnsupportedType { + type_name: "CustomType".to_string(), + } + ); +} + +#[test] +fn test_read_typed_value_numeric_dispatch() { + let byte = read_typed_value(&[0x7f, 0x46], 0, &TypeKind::Byte { signed: false }).unwrap(); + assert_eq!(byte, Value::Uint(0x7f)); + + let short = read_typed_value( + &[0x34, 0x12, 0x78, 0x56], + 0, + &TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap(); + assert_eq!(short, Value::Uint(0x1234)); + + let short_signed = read_typed_value( + &[0x80, 0x00, 0x7f, 0xff], + 0, + &TypeKind::Short { + endian: Endianness::Big, + signed: true, + }, + ) + .unwrap(); + assert_eq!(short_signed, Value::Int(-32768)); + + let long = read_typed_value( + &[0x78, 0x56, 0x34, 0x12, 0xbc, 0x9a, 0x78, 0x56], + 0, + &TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap(); + assert_eq!(long, Value::Uint(0x1234_5678)); + + let long_signed = read_typed_value( + &[0x80, 0x00, 0x00, 0x00], + 0, + &TypeKind::Long { + endian: Endianness::Big, + signed: true, + }, + ) + .unwrap(); + assert_eq!(long_signed, Value::Int(-2_147_483_648)); + + let quad = read_typed_value( + &[0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12], + 0, + &TypeKind::Quad { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap(); + assert_eq!(quad, Value::Uint(0x1234_5678_90ab_cdef)); +} + +#[test] +fn test_read_typed_value_native_endian() { + let result = read_typed_value( + &[0x34, 0x12], + 0, + &TypeKind::Short { + endian: Endianness::Native, + signed: false, + }, + ) + .unwrap(); + + match result { + Value::Uint(val) => assert!(val == 0x1234 || val == 0x3412), + _ => panic!("Expected Value::Uint variant"), + } +} + +#[test] +fn test_read_typed_value_string_dispatch() { + let buffer = b"Hello\x00World\x00"; + + let result = read_typed_value(buffer, 0, &TypeKind::String { max_length: None }).unwrap(); + assert_eq!(result, Value::String("Hello".to_string())); + + let result = read_typed_value( + b"VeryLongString\x00", + 0, + &TypeKind::String { + max_length: Some(4), + }, + ) + .unwrap(); + assert_eq!(result, Value::String("Very".to_string())); +} + +#[test] +fn test_read_typed_value_buffer_overrun() { + let short_error = read_typed_value( + &[0x12], + 0, + &TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap_err(); + assert_eq!( + short_error, + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 1 + } + ); + + let long_error = read_typed_value( + &[0x12, 0x34], + 0, + &TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap_err(); + assert_eq!( + long_error, + TypeReadError::BufferOverrun { + offset: 0, + buffer_len: 2 + } + ); +} + +#[test] +fn test_read_typed_value_all_supported_types() { + let buffer = &[0x7f, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12, 0xbc, 0x9a]; + let test_cases = [ + (TypeKind::Byte { signed: false }, 0, Value::Uint(0x7f)), + ( + TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + 1, + Value::Uint(0x1234), + ), + ( + TypeKind::Short { + endian: Endianness::Big, + signed: false, + }, + 1, + Value::Uint(0x3412), + ), + ( + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + 1, + Value::Uint(0x5678_1234), + ), + ( + TypeKind::Long { + endian: Endianness::Big, + signed: false, + }, + 1, + Value::Uint(0x3412_7856), + ), + ( + TypeKind::Quad { + endian: Endianness::Little, + signed: false, + }, + 1, + Value::Uint(0x9abc_1234_5678_1234), + ), + ]; + + for (type_kind, offset, expected) in test_cases { + let result = read_typed_value(buffer, offset, &type_kind).unwrap(); + assert_eq!(result, expected, "Failed for type: {type_kind:?}"); + } +} + +#[test] +fn test_read_typed_value_signed_vs_unsigned() { + let buffer = &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff]; + + let unsigned_short = read_typed_value( + buffer, + 0, + &TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap(); + let signed_short = read_typed_value( + buffer, + 0, + &TypeKind::Short { + endian: Endianness::Little, + signed: true, + }, + ) + .unwrap(); + assert_eq!(unsigned_short, Value::Uint(65535)); + assert_eq!(signed_short, Value::Int(-1)); + + let unsigned_long = read_typed_value( + buffer, + 0, + &TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap(); + let signed_long = read_typed_value( + buffer, + 0, + &TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + ) + .unwrap(); + assert_eq!(unsigned_long, Value::Uint(4_294_967_295)); + assert_eq!(signed_long, Value::Int(-1)); +} + +#[test] +fn test_read_typed_value_consistency_with_direct_calls() { + let buffer = &[0x34, 0x12, 0x78, 0x56, 0xbc, 0x9a, 0xde, 0xf0]; + + assert_eq!( + read_byte(buffer, 0, false).unwrap(), + read_typed_value(buffer, 0, &TypeKind::Byte { signed: false }).unwrap() + ); + assert_eq!( + read_short(buffer, 0, Endianness::Little, false).unwrap(), + read_typed_value( + buffer, + 0, + &TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + ) + .unwrap() + ); + assert_eq!( + read_long(buffer, 0, Endianness::Big, true).unwrap(), + read_typed_value( + buffer, + 0, + &TypeKind::Long { + endian: Endianness::Big, + signed: true, + }, + ) + .unwrap() + ); +} + +#[test] +fn test_read_typed_value_empty_buffer() { + for type_kind in [ + TypeKind::Byte { signed: false }, + TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + ] { + let result = read_typed_value(&[], 0, &type_kind); + assert!(result.is_err()); + match result.unwrap_err() { + TypeReadError::BufferOverrun { offset, buffer_len } => { + assert_eq!(offset, 0); + assert_eq!(buffer_len, 0); + } + TypeReadError::UnsupportedType { .. } => panic!("Expected BufferOverrun error"), + } + } +} + +#[test] +#[allow(clippy::too_many_lines)] +fn test_coerce_value_to_type() { + let cases = [ + ( + Value::Uint(0xff), + TypeKind::Byte { signed: true }, + Value::Int(-1), + ), + ( + Value::Uint(0x80), + TypeKind::Byte { signed: true }, + Value::Int(-128), + ), + ( + Value::Uint(0xfe), + TypeKind::Byte { signed: true }, + Value::Int(-2), + ), + ( + Value::Uint(0x7f), + TypeKind::Byte { signed: true }, + Value::Uint(0x7f), + ), + ( + Value::Uint(0xff), + TypeKind::Byte { signed: false }, + Value::Uint(0xff), + ), + ( + Value::Uint(0xffff), + TypeKind::Short { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-1), + ), + ( + Value::Uint(0x8000), + TypeKind::Short { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-32768), + ), + ( + Value::Uint(0x7fff), + TypeKind::Short { + endian: Endianness::Native, + signed: true, + }, + Value::Uint(0x7fff), + ), + ( + Value::Uint(0xffff_ffff), + TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-1), + ), + ( + Value::Uint(0x8000_0000), + TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-2_147_483_648), + ), + ( + Value::Uint(0x7fff_ffff), + TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + Value::Uint(0x7fff_ffff), + ), + ( + Value::Uint(0xffff_ffff_ffff_ffff), + TypeKind::Quad { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-1), + ), + ( + Value::Uint(0x8000_0000_0000_0000), + TypeKind::Quad { + endian: Endianness::Native, + signed: true, + }, + Value::Int(i64::MIN), + ), + ( + Value::Uint(0x7fff_ffff_ffff_ffff), + TypeKind::Quad { + endian: Endianness::Native, + signed: true, + }, + Value::Uint(0x7fff_ffff_ffff_ffff), + ), + ( + Value::Uint(0xffff_ffff_ffff_ffff), + TypeKind::Quad { + endian: Endianness::Native, + signed: false, + }, + Value::Uint(0xffff_ffff_ffff_ffff), + ), + ( + Value::Int(-1), + TypeKind::Byte { signed: true }, + Value::Int(-1), + ), + ( + Value::Int(42), + TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + Value::Int(42), + ), + ( + Value::Uint(0xff), + TypeKind::String { max_length: None }, + Value::Uint(0xff), + ), + ]; + + for (i, (input, type_kind, expected)) in cases.iter().enumerate() { + let result = coerce_value_to_type(input, type_kind); + assert_eq!( + result, *expected, + "Case {i}: coerce({input:?}, {type_kind:?})" + ); + } +} From 3f031417690f9d7f5201e52b3bc21978ece49411 Mon Sep 17 00:00:00 2001 From: UncleSp1d3r Date: Fri, 6 Mar 2026 17:20:24 -0500 Subject: [PATCH 2/3] fix(evaluator): improve types module docs and add overflow tests - Fix inaccurate read_string docs that claimed errors for invalid UTF-8 (actually uses from_utf8_lossy replacement) - Restore # Arguments doc sections on numeric read functions - Add checked_add overflow tests for read_short/read_long/read_quad - Clarify UnsupportedType variant is reserved for future types Co-Authored-By: Claude Opus 4.6 Signed-off-by: UncleSp1d3r --- src/evaluator/types/mod.rs | 3 +- src/evaluator/types/numeric.rs | 70 ++++++++++++++++++++++++++++++++++ src/evaluator/types/string.rs | 7 ++-- 3 files changed, 76 insertions(+), 4 deletions(-) diff --git a/src/evaluator/types/mod.rs b/src/evaluator/types/mod.rs index 0d6f8678..0224d28e 100644 --- a/src/evaluator/types/mod.rs +++ b/src/evaluator/types/mod.rs @@ -28,7 +28,8 @@ pub enum TypeReadError { /// The actual length of the buffer. buffer_len: usize, }, - /// Unsupported type variant. + /// Unsupported type variant (reserved for future types not yet evaluatable, + /// e.g., regex, float, date). #[error("Unsupported type: {type_name}")] UnsupportedType { /// The name of the unsupported type. diff --git a/src/evaluator/types/numeric.rs b/src/evaluator/types/numeric.rs index 2ced68a6..ce96fde5 100644 --- a/src/evaluator/types/numeric.rs +++ b/src/evaluator/types/numeric.rs @@ -6,6 +6,13 @@ use crate::parser::ast::{Endianness, Value}; use byteorder::{BigEndian, ByteOrder, LittleEndian, NativeEndian}; /// Safely reads a single byte from the buffer at the specified offset. +/// +/// # Arguments +/// +/// * `buffer` - The byte buffer to read from +/// * `offset` - The offset position to read the byte from +/// * `signed` - Whether to interpret the byte as signed (`i8`) or unsigned (`u8`) +/// /// # Examples /// /// ``` @@ -40,6 +47,14 @@ pub fn read_byte(buffer: &[u8], offset: usize, signed: bool) -> Result Date: Fri, 6 Mar 2026 22:23:08 +0000 Subject: [PATCH 3/3] docs: Dosu updates for PR #156 --- docs/src/architecture.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/src/architecture.md b/docs/src/architecture.md index b0db2168..be4dc94a 100644 --- a/docs/src/architecture.md +++ b/docs/src/architecture.md @@ -130,7 +130,11 @@ The evaluator executes magic rules against file buffers to identify file types. - `engine/`: Core evaluation engine submodule - `mod.rs`: `evaluate_single_rule`, `evaluate_rules`, and `evaluate_rules_with_config` functions - `tests.rs`: Engine unit tests -- `types.rs`: Type interpretation with endianness handling and signedness coercion +- `types/`: Type interpretation submodule + - `mod.rs`: Public API surface with `read_typed_value`, `coerce_value_to_type`, and type re-exports + - `numeric.rs`: Numeric type handling (`read_byte`, `read_short`, `read_long`, `read_quad`) with endianness and signedness support + - `string.rs`: String type handling (`read_string`) with null-termination and UTF-8 conversion + - `tests.rs`: Module tests - `offset/`: Offset resolution submodule - `mod.rs`: Dispatcher (`resolve_offset`) and re-exports - `absolute.rs`: `OffsetError`, `resolve_absolute_offset` @@ -142,7 +146,7 @@ The evaluator executes magic rules against file buffers to identify file types. - `comparison.rs`: `compare_values`, `apply_less_than`/`greater_than`/`less_equal`/`greater_equal` - `bitwise.rs`: `apply_bitwise_and`, `apply_bitwise_and_mask`, `apply_bitwise_xor`, `apply_bitwise_not` -**Organization Note:** The evaluator module was refactored to split a monolithic 2,638-line `mod.rs` into focused submodules, keeping the public API surface in `mod.rs` and moving core evaluation logic to `engine/mod.rs`. This maintains the same public API through re-exports (no breaking changes) while improving code organization and staying within the 500-600 line module guideline. +**Organization Note:** The evaluator module has been refactored to split monolithic files into focused submodules. The initial refactoring split a 2,638-line `mod.rs` into `engine/` submodules, and a subsequent refactoring reorganized the 1,836-line `types.rs` into `types/` submodules for numeric and string handling. The public API surface remains in `mod.rs` with core logic distributed across focused submodules. This maintains the same public API through re-exports (no breaking changes) while improving code organization and staying within the 500-600 line module guideline. **Implemented Features:**