From 819638c0fa47d5ad4936243d85e9e8cb21713789 Mon Sep 17 00:00:00 2001 From: bakgio <76126058+bakgio@users.noreply.github.com> Date: Wed, 22 Apr 2026 11:55:05 +0300 Subject: [PATCH 1/2] Add ergonomic byte helpers and BoxPath parsing --- README.md | 4 +- examples/extract_mdhd_bytes.rs | 41 ++++ examples/extract_track_ids_typed_bytes.rs | 39 ++++ .../extract_track_ids_typed_parsed_path.rs | 31 +++ examples/probe_track_count_bytes.rs | 23 ++ examples/rewrite_emsg_bytes.rs | 67 ++++++ src/extract.rs | 202 +++++++++++++++++- src/probe.rs | 20 +- src/rewrite.rs | 45 +++- src/walk.rs | 111 ++++++++++ tests/boxpath.rs | 105 +++++++++ tests/extract.rs | 195 ++++++++++++++++- tests/probe.rs | 32 ++- tests/rewrite.rs | 63 +++++- 14 files changed, 968 insertions(+), 10 deletions(-) create mode 100644 examples/extract_mdhd_bytes.rs create mode 100644 examples/extract_track_ids_typed_bytes.rs create mode 100644 examples/extract_track_ids_typed_parsed_path.rs create mode 100644 examples/probe_track_count_bytes.rs create mode 100644 examples/rewrite_emsg_bytes.rs create mode 100644 tests/boxpath.rs diff --git a/README.md b/README.md index 5080387..75c85a0 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ - Typed MP4 and ISOBMFF box model with registry-backed custom box support - Low-level traversal, extraction, stringify, probe, and writer APIs -- Thin typed path-based helpers for common extraction and rewrite flows +- Thin typed path-based helpers and byte-slice convenience wrappers for common extraction, rewrite, and probe flows - Built-in CLI for `dump`, `extract`, `probe`, `psshdump`, `edit`, and `divide` - Shared-fixture coverage for regular MP4, fragmented MP4, encrypted init segments, and QuickTime-style metadata cases @@ -69,7 +69,7 @@ mp4forge psshdump encrypted_init.mp4 `mp4forge` currently ships without public Cargo feature flags. -> See the [`examples/`](./examples) directory for both the low-level and high-level public API story, including typed extraction in `extract_track_ids_typed.rs`, typed rewrite in `rewrite_emsg.rs`, structure walking, probing, writer-backed rewrite, and custom box registration. +> See the [`examples/`](./examples) directory for the crate's low-level and high-level API usage patterns. ## License diff --git a/examples/extract_mdhd_bytes.rs b/examples/extract_mdhd_bytes.rs new file mode 100644 index 0000000..1f927f8 --- /dev/null +++ b/examples/extract_mdhd_bytes.rs @@ -0,0 +1,41 @@ +use std::env; +use std::error::Error; +use std::fs::File; + +use mp4forge::FourCc; +use mp4forge::extract::{extract_box_bytes, extract_box_payload_bytes}; +use mp4forge::walk::BoxPath; + +fn main() { + if let Err(error) = run() { + eprintln!("{error}"); + std::process::exit(1); + } +} + +fn run() -> Result<(), Box> { + let Some(input_path) = env::args().nth(1) else { + return Err("usage: cargo run --example extract_mdhd_bytes -- ".into()); + }; + + let box_path = BoxPath::from([ + FourCc::from_bytes(*b"moov"), + FourCc::from_bytes(*b"trak"), + FourCc::from_bytes(*b"mdia"), + FourCc::from_bytes(*b"mdhd"), + ]); + + let mut file = File::open(input_path)?; + let boxes = extract_box_bytes(&mut file, None, box_path.clone())?; + let payloads = extract_box_payload_bytes(&mut file, None, box_path)?; + + for (index, (box_bytes, payload_bytes)) in boxes.iter().zip(payloads.iter()).enumerate() { + println!( + "match {index}: total_bytes={} payload_bytes={}", + box_bytes.len(), + payload_bytes.len() + ); + } + + Ok(()) +} diff --git a/examples/extract_track_ids_typed_bytes.rs b/examples/extract_track_ids_typed_bytes.rs new file mode 100644 index 0000000..dd79a59 --- /dev/null +++ b/examples/extract_track_ids_typed_bytes.rs @@ -0,0 +1,39 @@ +use std::env; +use std::error::Error; +use std::fs; + +use mp4forge::FourCc; +use mp4forge::boxes::iso14496_12::Tkhd; +use mp4forge::extract::extract_box_as_bytes; +use mp4forge::walk::BoxPath; + +fn main() { + if let Err(error) = run() { + eprintln!("{error}"); + std::process::exit(1); + } +} + +fn run() -> Result<(), Box> { + let Some(path) = env::args().nth(1) else { + return Err( + "usage: cargo run --example extract_track_ids_typed_bytes -- ".into(), + ); + }; + + let input = fs::read(path)?; + let headers = extract_box_as_bytes::( + &input, + BoxPath::from([ + FourCc::from_bytes(*b"moov"), + FourCc::from_bytes(*b"trak"), + FourCc::from_bytes(*b"tkhd"), + ]), + )?; + + for tkhd in headers { + println!("track ID: {}", tkhd.track_id); + } + + Ok(()) +} diff --git a/examples/extract_track_ids_typed_parsed_path.rs b/examples/extract_track_ids_typed_parsed_path.rs new file mode 100644 index 0000000..f55c875 --- /dev/null +++ b/examples/extract_track_ids_typed_parsed_path.rs @@ -0,0 +1,31 @@ +use std::env; +use std::error::Error; +use std::fs::File; + +use mp4forge::boxes::iso14496_12::Tkhd; +use mp4forge::extract::extract_box_as; +use mp4forge::walk::BoxPath; + +fn main() { + if let Err(error) = run() { + eprintln!("{error}"); + std::process::exit(1); + } +} + +fn run() -> Result<(), Box> { + let Some(path) = env::args().nth(1) else { + return Err( + "usage: cargo run --example extract_track_ids_typed_parsed_path -- ".into(), + ); + }; + + let mut file = File::open(path)?; + let headers = extract_box_as::<_, Tkhd>(&mut file, None, BoxPath::parse("moov/trak/tkhd")?)?; + + for tkhd in headers { + println!("track ID: {}", tkhd.track_id); + } + + Ok(()) +} diff --git a/examples/probe_track_count_bytes.rs b/examples/probe_track_count_bytes.rs new file mode 100644 index 0000000..760bf99 --- /dev/null +++ b/examples/probe_track_count_bytes.rs @@ -0,0 +1,23 @@ +use std::env; +use std::error::Error; +use std::fs; + +use mp4forge::probe::probe_bytes; + +fn main() { + if let Err(error) = run() { + eprintln!("{error}"); + std::process::exit(1); + } +} + +fn run() -> Result<(), Box> { + let Some(path) = env::args().nth(1) else { + return Err("usage: cargo run --example probe_track_count_bytes -- ".into()); + }; + + let input = fs::read(path)?; + let info = probe_bytes(&input)?; + println!("track num: {}", info.tracks.len()); + Ok(()) +} diff --git a/examples/rewrite_emsg_bytes.rs b/examples/rewrite_emsg_bytes.rs new file mode 100644 index 0000000..63a2592 --- /dev/null +++ b/examples/rewrite_emsg_bytes.rs @@ -0,0 +1,67 @@ +use std::env; +use std::error::Error; +use std::fs; + +use mp4forge::FourCc; +use mp4forge::boxes::iso14496_12::Emsg; +use mp4forge::rewrite::rewrite_box_as_bytes; +use mp4forge::walk::BoxPath; + +fn main() { + if let Err(error) = run() { + eprintln!("{error}"); + std::process::exit(1); + } +} + +fn run() -> Result<(), Box> { + let Some(output_path) = env::args().nth(1) else { + return Err("usage: cargo run --example rewrite_emsg_bytes -- ".into()); + }; + + let input = sample_emsg_file(); + let output = rewrite_box_as_bytes::( + &input, + BoxPath::from([FourCc::from_bytes(*b"emsg")]), + |emsg| { + emsg.message_data = b"hello world".to_vec(); + }, + )?; + fs::write(output_path, output)?; + + Ok(()) +} + +fn sample_emsg_file() -> Vec { + let mut emsg_payload = vec![0x00, 0x00, 0x00, 0x00]; + append_null_terminated_string(&mut emsg_payload, "urn:test"); + append_null_terminated_string(&mut emsg_payload, "demo"); + append_u32(&mut emsg_payload, 1000); + append_u32(&mut emsg_payload, 0); + append_u32(&mut emsg_payload, 5); + append_u32(&mut emsg_payload, 1); + emsg_payload.extend_from_slice(b"hello"); + + let mut file = Vec::new(); + file.extend_from_slice(&box_bytes("free", &[0x01, 0x02, 0x03])); + file.extend_from_slice(&box_bytes("emsg", &emsg_payload)); + file.extend_from_slice(&box_bytes("free", &[0x04, 0x05])); + file +} + +fn append_null_terminated_string(dst: &mut Vec, value: &str) { + dst.extend_from_slice(value.as_bytes()); + dst.push(0x00); +} + +fn append_u32(dst: &mut Vec, value: u32) { + dst.extend_from_slice(&value.to_be_bytes()); +} + +fn box_bytes(box_type: &str, payload: &[u8]) -> Vec { + let mut box_bytes = Vec::with_capacity(8 + payload.len()); + box_bytes.extend_from_slice(&((payload.len() + 8) as u32).to_be_bytes()); + box_bytes.extend_from_slice(box_type.as_bytes()); + box_bytes.extend_from_slice(payload); + box_bytes +} diff --git a/src/extract.rs b/src/extract.rs index 4698e6f..4b6c820 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -1,12 +1,13 @@ //! Path-based box extraction helpers built on the structure walker. //! //! This module keeps the existing low-level extraction surface available while also exposing thin -//! typed helpers for callers that already know the payload type they expect at a given path. +//! typed helpers for callers that already know the payload type they expect at a given path, +//! including exact raw-byte helpers and byte-slice convenience wrappers for in-memory workflows. use std::any::type_name; use std::error::Error; use std::fmt; -use std::io::{self, Read, Seek}; +use std::io::{self, Cursor, Read, Seek}; use crate::BoxInfo; use crate::FourCc; @@ -132,6 +133,103 @@ where extract_boxes_as_with_registry(reader, parent, paths, ®istry) } +/// Extracts every box that matches `path` and returns each match as exact serialized bytes, +/// including the original box header. +/// +/// When `parent` is present, `path` is evaluated relative to that box. Returns an empty vector +/// when no boxes match. +pub fn extract_box_bytes( + reader: &mut R, + parent: Option<&BoxInfo>, + path: BoxPath, +) -> Result>, ExtractError> +where + R: Read + Seek, +{ + let paths = [path]; + extract_boxes_bytes(reader, parent, &paths) +} + +/// Extracts every box that matches any path in `paths` and returns each match as exact serialized +/// bytes, including the original box header. +/// +/// When `parent` is present, every path is evaluated relative to that box. The returned bytes are +/// copied directly from the source stream without decoding or re-encoding, so the original header +/// form and payload bytes are preserved verbatim. +pub fn extract_boxes_bytes( + reader: &mut R, + parent: Option<&BoxInfo>, + paths: &[BoxPath], +) -> Result>, ExtractError> +where + R: Read + Seek, +{ + let registry = default_registry(); + extract_boxes_bytes_with_registry(reader, parent, paths, ®istry) +} + +/// Extracts every box that matches `path` and returns each matched payload as exact on-disk bytes. +/// +/// When `parent` is present, `path` is evaluated relative to that box. For container boxes, the +/// returned payload bytes still include any serialized child boxes because those bytes are part of +/// the matched payload. +pub fn extract_box_payload_bytes( + reader: &mut R, + parent: Option<&BoxInfo>, + path: BoxPath, +) -> Result>, ExtractError> +where + R: Read + Seek, +{ + let paths = [path]; + extract_boxes_payload_bytes(reader, parent, &paths) +} + +/// Extracts every box that matches any path in `paths` and returns each matched payload as exact +/// on-disk bytes. +/// +/// When `parent` is present, every path is evaluated relative to that box. The returned bytes are +/// copied directly from the source stream without decoding or re-encoding, preserving the payload +/// exactly as stored in the file. +pub fn extract_boxes_payload_bytes( + reader: &mut R, + parent: Option<&BoxInfo>, + paths: &[BoxPath], +) -> Result>, ExtractError> +where + R: Read + Seek, +{ + let registry = default_registry(); + extract_boxes_payload_bytes_with_registry(reader, parent, paths, ®istry) +} + +/// Extracts every box that matches `path`, decodes the payloads, and clones them as `T` from an +/// in-memory MP4 byte slice. +/// +/// This is equivalent to calling [`extract_box_as`] with `Cursor<&[u8]>` and no parent box. Paths +/// are always evaluated from the file root. Returns an empty vector when no boxes match. +pub fn extract_box_as_bytes(input: &[u8], path: BoxPath) -> Result, ExtractError> +where + T: CodecBox + Clone + 'static, +{ + let paths = [path]; + extract_boxes_as_bytes::(input, &paths) +} + +/// Extracts every box that matches any path in `paths`, decodes the payloads, and clones them as +/// `T` from an in-memory MP4 byte slice. +/// +/// This is equivalent to calling [`extract_boxes_as`] with `Cursor<&[u8]>` and no parent box. +/// Every matched box must decode to `T`, otherwise +/// [`ExtractError::UnexpectedPayloadType`] is returned with the matched path and offset. +pub fn extract_boxes_as_bytes(input: &[u8], paths: &[BoxPath]) -> Result, ExtractError> +where + T: CodecBox + Clone + 'static, +{ + let mut reader = Cursor::new(input); + extract_boxes_as(&mut reader, None, paths) +} + /// Extracts every box that matches any path in `paths` using `registry` and returns the matching /// header metadata. /// @@ -177,6 +275,40 @@ where Ok(matches) } +/// Extracts every box that matches any path in `paths` using `registry` and returns each match as +/// exact serialized bytes, including the original box header. +/// +/// Use this when custom or context-sensitive box registrations are required to walk into matched +/// subtrees while preserving the matched bytes verbatim. +pub fn extract_boxes_bytes_with_registry( + reader: &mut R, + parent: Option<&BoxInfo>, + paths: &[BoxPath], + registry: &BoxRegistry, +) -> Result>, ExtractError> +where + R: Read + Seek, +{ + extract_matched_bytes(reader, parent, paths, registry, ExtractedByteRange::FullBox) +} + +/// Extracts every box that matches any path in `paths` using `registry` and returns each matched +/// payload as exact on-disk bytes. +/// +/// Use this when custom or context-sensitive box registrations are required to walk into matched +/// subtrees while preserving the matched payload bytes verbatim. +pub fn extract_boxes_payload_bytes_with_registry( + reader: &mut R, + parent: Option<&BoxInfo>, + paths: &[BoxPath], + registry: &BoxRegistry, +) -> Result>, ExtractError> +where + R: Read + Seek, +{ + extract_matched_bytes(reader, parent, paths, registry, ExtractedByteRange::Payload) +} + /// Extracts every box that matches any path in `paths`, decodes the payloads with `registry`, and /// clones them as `T`. /// @@ -219,6 +351,12 @@ struct MatchedBox { path: BoxPath, } +#[derive(Clone, Copy)] +enum ExtractedByteRange { + FullBox, + Payload, +} + fn collect_matches( reader: &mut R, parent: Option<&BoxInfo>, @@ -272,6 +410,26 @@ where Ok(matches) } +fn extract_matched_bytes( + reader: &mut R, + parent: Option<&BoxInfo>, + paths: &[BoxPath], + registry: &BoxRegistry, + range: ExtractedByteRange, +) -> Result>, ExtractError> +where + R: Read + Seek, +{ + let matched_boxes = collect_matches(reader, parent, paths, registry)?; + let mut extracted = Vec::with_capacity(matched_boxes.len()); + + for matched in matched_boxes { + extracted.push(read_matched_bytes(reader, &matched.info, range)?); + } + + Ok(extracted) +} + fn decode_payload( reader: &mut R, matched: &MatchedBox, @@ -299,6 +457,46 @@ where Ok(payload) } +fn read_matched_bytes( + reader: &mut R, + info: &BoxInfo, + range: ExtractedByteRange, +) -> Result, ExtractError> +where + R: Read + Seek, +{ + let len = match range { + ExtractedByteRange::FullBox => { + info.seek_to_start(reader)?; + info.size() + } + ExtractedByteRange::Payload => { + info.seek_to_payload(reader)?; + info.payload_size()? + } + }; + read_exact_bytes(reader, len) +} + +fn read_exact_bytes(reader: &mut R, len: u64) -> Result, ExtractError> +where + R: Read, +{ + let mut bytes = usize::try_from(len) + .map(Vec::with_capacity) + .unwrap_or_else(|_| Vec::new()); + + // `Read::read_to_end` on a `Take` reader does not error on an early underlying EOF, so the + // copied byte count must be checked explicitly to preserve exact-byte semantics. + let mut limited = reader.take(len); + let copied = limited.read_to_end(&mut bytes)? as u64; + if copied != len { + return Err(io::Error::from(io::ErrorKind::UnexpectedEof).into()); + } + + Ok(bytes) +} + fn validate_paths(paths: &[BoxPath]) -> Result<(), ExtractError> { if paths.iter().any(BoxPath::is_empty) { return Err(ExtractError::EmptyPath); diff --git a/src/probe.rs b/src/probe.rs index bf7dbdf..01cdcef 100644 --- a/src/probe.rs +++ b/src/probe.rs @@ -1,4 +1,5 @@ -//! File-summary helpers built on the extraction and box layers. +//! File-summary helpers built on the extraction and box layers, with byte-slice convenience entry +//! points for in-memory probe flows. use std::error::Error; use std::fmt; @@ -264,6 +265,15 @@ where Ok(summary) } +/// Probes an in-memory MP4 byte slice and returns high-level movie, track, and fragment +/// summaries. +/// +/// This is equivalent to calling [`probe`] with `Cursor<&[u8]>`. +pub fn probe_bytes(input: &[u8]) -> Result { + let mut reader = Cursor::new(input); + probe(&mut reader) +} + /// Legacy fragmented-file probe entry point that currently aliases [`probe`]. pub fn probe_fra(reader: &mut R) -> Result where @@ -272,6 +282,14 @@ where probe(reader) } +/// Legacy fragmented-file probe entry point for in-memory MP4 bytes. +/// +/// This currently aliases [`probe_bytes`] for callers that already use the `probe_fra` naming. +pub fn probe_fra_bytes(input: &[u8]) -> Result { + let mut reader = Cursor::new(input); + probe_fra(&mut reader) +} + /// Detects the AAC object profile exposed by an `esds` descriptor stream. pub fn detect_aac_profile(esds: &Esds) -> Result, ProbeError> { let Some(decoder_config) = esds.decoder_config_descriptor() else { diff --git a/src/rewrite.rs b/src/rewrite.rs index f11bf1f..9d96b52 100644 --- a/src/rewrite.rs +++ b/src/rewrite.rs @@ -1,12 +1,13 @@ //! Path-based typed payload rewrite helpers built on the writer layer. //! //! These helpers preserve the existing low-level writer flow for advanced use cases while offering -//! a small typed API for common "find payloads at this path and mutate them" rewrite operations. +//! a small typed API for common "find payloads at this path and mutate them" rewrite operations, +//! including byte-slice wrappers for in-memory rewrite flows. use std::any::type_name; use std::error::Error; use std::fmt; -use std::io::{self, Read, Seek, SeekFrom, Write}; +use std::io::{self, Cursor, Read, Seek, SeekFrom, Write}; use crate::FourCc; use crate::boxes::iso14496_12::Ftyp; @@ -64,6 +65,46 @@ where rewrite_boxes_as_with_registry(reader, writer, paths, ®istry, edit) } +/// Rewrites every payload at `path` in an in-memory MP4 byte slice and returns the rewritten +/// bytes. +/// +/// This is equivalent to calling [`rewrite_box_as`] with `Cursor<&[u8]>` input and `Vec` +/// output storage. The edit closure runs once per matched box in depth-first order, and unmatched +/// boxes are copied through verbatim. +pub fn rewrite_box_as_bytes( + input: &[u8], + path: BoxPath, + edit: F, +) -> Result, RewriteError> +where + T: CodecBox + 'static, + F: FnMut(&mut T), +{ + let paths = [path]; + rewrite_boxes_as_bytes::(input, &paths, edit) +} + +/// Rewrites every payload that matches any path in `paths` in an in-memory MP4 byte slice and +/// returns the rewritten bytes. +/// +/// This is equivalent to calling [`rewrite_boxes_as`] with `Cursor<&[u8]>` input and `Vec` +/// output storage. Every matched payload must decode to `T`, otherwise +/// [`RewriteError::UnexpectedPayloadType`] is returned with the matched path and offset. +pub fn rewrite_boxes_as_bytes( + input: &[u8], + paths: &[BoxPath], + edit: F, +) -> Result, RewriteError> +where + T: CodecBox + 'static, + F: FnMut(&mut T), +{ + let mut reader = Cursor::new(input); + let mut writer = Cursor::new(Vec::with_capacity(input.len())); + rewrite_boxes_as(&mut reader, &mut writer, paths, edit)?; + Ok(writer.into_inner()) +} + /// Rewrites every payload that matches any path in `paths` using `registry`, downcasts each match /// to `T`, and applies `edit`. /// diff --git a/src/walk.rs b/src/walk.rs index ff40817..4bef1ac 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -4,17 +4,21 @@ use std::error::Error; use std::fmt; use std::io::{self, Read, Seek, SeekFrom, Write}; use std::ops::Deref; +use std::str::FromStr; use crate::FourCc; use crate::boxes::iso14496_12::Ftyp; use crate::boxes::metadata::Keys; use crate::boxes::{BoxLookupContext, BoxRegistry, default_registry}; use crate::codec::{CodecError, DynCodecBox, unmarshal, unmarshal_any_with_context}; +use crate::fourcc::ParseFourCcError; use crate::header::{BoxInfo, HeaderError, SMALL_HEADER_SIZE}; const FTYP: FourCc = FourCc::from_bytes(*b"ftyp"); const KEYS: FourCc = FourCc::from_bytes(*b"keys"); const QT_BRAND: FourCc = FourCc::from_bytes(*b"qt "); +const ROOT_MARKER: &str = ""; +const WILDCARD_SEGMENT: &str = "*"; /// Depth-first traversal decision returned by a walk visitor. #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -29,6 +33,10 @@ pub enum WalkControl { /// /// Path comparisons used by the extraction and rewrite helpers honor [`FourCc::ANY`] as a /// wildcard segment. +/// +/// In addition to low-level array-based construction, paths can be parsed from slash-delimited +/// strings such as `moov/trak/tkhd`. The segment `*` maps to [`FourCc::ANY`], and the string +/// `` maps to the empty path. #[derive(Clone, Debug, Default, PartialEq, Eq)] pub struct BoxPath(Vec); @@ -53,6 +61,41 @@ impl BoxPath { self.0.len() } + /// Parses a slash-delimited path string into a [`BoxPath`]. + /// + /// Each non-wildcard segment must contain exactly four bytes and is parsed using + /// [`FourCc::from_str`]. The segment `*` maps to [`FourCc::ANY`], and `` returns the + /// empty path. + pub fn parse(value: &str) -> Result { + if value == ROOT_MARKER { + return Ok(Self::empty()); + } + + let mut path = Vec::new(); + for (index, segment) in value.split('/').enumerate() { + if segment.is_empty() { + return Err(ParseBoxPathError::EmptySegment { index }); + } + if segment == ROOT_MARKER { + return Err(ParseBoxPathError::RootMarkerMustAppearAlone); + } + if segment == WILDCARD_SEGMENT { + path.push(FourCc::ANY); + continue; + } + + let box_type = + FourCc::try_from(segment).map_err(|source| ParseBoxPathError::InvalidSegment { + index, + segment: segment.to_owned(), + source, + })?; + path.push(box_type); + } + + Ok(Self(path)) + } + fn child_path(&self, box_type: FourCc) -> Self { let mut path = self.0.clone(); path.push(box_type); @@ -115,6 +158,14 @@ impl From> for BoxPath { } } +impl TryFrom<&str> for BoxPath { + type Error = ParseBoxPathError; + + fn try_from(value: &str) -> Result { + Self::parse(value) + } +} + impl From<[FourCc; N]> for BoxPath { fn from(value: [FourCc; N]) -> Self { Self(value.into()) @@ -127,6 +178,66 @@ impl FromIterator for BoxPath { } } +impl FromStr for BoxPath { + type Err = ParseBoxPathError; + + fn from_str(s: &str) -> Result { + Self::parse(s) + } +} + +/// Error returned when a string cannot be parsed as a [`BoxPath`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum ParseBoxPathError { + /// One segment between path separators was empty. + EmptySegment { + /// Zero-based index of the empty segment. + index: usize, + }, + /// One segment was neither `*` nor a valid four-byte [`FourCc`]. + InvalidSegment { + /// Zero-based index of the invalid segment. + index: usize, + /// Original segment text from the parsed path string. + segment: String, + /// Underlying four-character-code parse failure. + source: ParseFourCcError, + }, + /// The special `` marker was combined with additional segments. + RootMarkerMustAppearAlone, +} + +impl fmt::Display for ParseBoxPathError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::EmptySegment { index } => { + write!(f, "box path segment {} must not be empty", index + 1) + } + Self::InvalidSegment { + index, + segment, + source, + } => write!( + f, + "invalid box path segment {} ({segment:?}): {source}", + index + 1 + ), + Self::RootMarkerMustAppearAlone => { + write!(f, "box path root marker {ROOT_MARKER:?} must appear alone") + } + } + } +} + +impl Error for ParseBoxPathError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + match self { + Self::InvalidSegment { source, .. } => Some(source), + Self::EmptySegment { .. } | Self::RootMarkerMustAppearAlone => None, + } + } +} + #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] pub(crate) struct PathMatch { pub(crate) forward_match: bool, diff --git a/tests/boxpath.rs b/tests/boxpath.rs new file mode 100644 index 0000000..0ee5fca --- /dev/null +++ b/tests/boxpath.rs @@ -0,0 +1,105 @@ +use std::io::Cursor; +use std::str::FromStr; + +use mp4forge::boxes::iso14496_12::{Moov, Trak, Udta}; +use mp4forge::codec::{CodecBox, marshal}; +use mp4forge::extract::extract_box; +use mp4forge::walk::{BoxPath, ParseBoxPathError}; +use mp4forge::{BoxInfo, FourCc}; + +#[test] +fn parse_box_path_accepts_slash_delimited_segments() { + let path = BoxPath::from_str("moov/trak/tkhd").unwrap(); + + assert_eq!( + path.as_slice(), + &[fourcc("moov"), fourcc("trak"), fourcc("tkhd")] + ); +} + +#[test] +fn parse_box_path_supports_wildcard_segments_in_extract_matching() { + let trak = encode_supported_box(&Trak, &[]); + let udta = encode_supported_box(&Udta, &[]); + let moov = encode_supported_box(&Moov, &[trak, udta].concat()); + + let extracted = extract_box( + &mut Cursor::new(moov), + None, + BoxPath::parse("moov/*").unwrap(), + ) + .unwrap(); + + assert_eq!( + extracted.iter().map(BoxInfo::box_type).collect::>(), + vec![fourcc("trak"), fourcc("udta")] + ); +} + +#[test] +fn parse_box_path_supports_root_marker() { + let path = BoxPath::parse("").unwrap(); + assert!(path.is_empty()); +} + +#[test] +fn parse_box_path_rejects_invalid_segment_lengths_with_context() { + let error = BoxPath::parse("moov/trakk").unwrap_err(); + + assert!(matches!( + error, + ParseBoxPathError::InvalidSegment { + index: 1, + ref segment, + source + } if segment == "trakk" && source.len() == 5 + )); + assert_eq!( + error.to_string(), + "invalid box path segment 2 (\"trakk\"): fourcc values must be exactly 4 bytes, got 5" + ); +} + +#[test] +fn parse_box_path_reports_empty_segments_and_misplaced_root_marker() { + let empty_segment = BoxPath::parse("moov//trak").unwrap_err(); + assert!(matches!( + empty_segment, + ParseBoxPathError::EmptySegment { index: 1 } + )); + assert_eq!( + empty_segment.to_string(), + "box path segment 2 must not be empty" + ); + + let root_marker = BoxPath::parse("/trak").unwrap_err(); + assert!(matches!( + root_marker, + ParseBoxPathError::RootMarkerMustAppearAlone + )); + assert_eq!( + root_marker.to_string(), + "box path root marker \"\" must appear alone" + ); +} + +fn fourcc(value: &str) -> FourCc { + FourCc::try_from(value).unwrap() +} + +fn encode_supported_box(box_value: &B, children: &[u8]) -> Vec +where + B: CodecBox, +{ + let mut payload = Vec::new(); + marshal(&mut payload, box_value, None).unwrap(); + payload.extend_from_slice(children); + encode_raw_box(box_value.box_type(), &payload) +} + +fn encode_raw_box(box_type: FourCc, payload: &[u8]) -> Vec { + let info = BoxInfo::new(box_type, 8 + payload.len() as u64); + let mut bytes = info.encode(); + bytes.extend_from_slice(payload); + bytes +} diff --git a/tests/extract.rs b/tests/extract.rs index 82c919c..a20c1f1 100644 --- a/tests/extract.rs +++ b/tests/extract.rs @@ -7,7 +7,9 @@ use mp4forge::boxes::metadata::{ }; use mp4forge::codec::{CodecBox, marshal}; use mp4forge::extract::{ - ExtractError, extract_box, extract_box_as, extract_box_with_payload, extract_boxes, + ExtractError, extract_box, extract_box_as, extract_box_as_bytes, extract_box_bytes, + extract_box_payload_bytes, extract_box_with_payload, extract_boxes, extract_boxes_as_bytes, + extract_boxes_bytes, extract_boxes_payload_bytes, }; use mp4forge::stringify::stringify; use mp4forge::walk::BoxPath; @@ -151,6 +153,158 @@ fn extract_box_as_returns_typed_payloads() { ); } +#[test] +fn extract_box_bytes_preserve_exact_leaf_box_bytes_for_relative_paths() { + let leaf = encode_raw_box(fourcc("zzzz"), &[0xde, 0xad, 0xbe, 0xef]); + let udta = encode_supported_box(&Udta, &leaf); + let moov = encode_supported_box(&Moov, &udta); + + let parent = extract_box( + &mut Cursor::new(moov.clone()), + None, + BoxPath::from([fourcc("moov")]), + ) + .unwrap() + .pop() + .unwrap(); + + let extracted = extract_box_bytes( + &mut Cursor::new(moov), + Some(&parent), + BoxPath::from([fourcc("udta"), fourcc("zzzz")]), + ) + .unwrap(); + + assert_eq!(extracted, vec![leaf]); +} + +#[test] +fn extract_box_payload_bytes_preserve_exact_container_payload_bytes() { + let leaf = encode_raw_box(fourcc("zzzz"), &[0xde, 0xad, 0xbe, 0xef]); + let udta = encode_supported_box(&Udta, &leaf); + let moov = encode_supported_box(&Moov, &udta); + + let extracted = extract_box_payload_bytes( + &mut Cursor::new(moov), + None, + BoxPath::from([fourcc("moov"), fourcc("udta")]), + ) + .unwrap(); + + assert_eq!(extracted, vec![leaf]); +} + +#[test] +fn extract_box_as_bytes_returns_typed_payloads_without_cursor() { + let mut tkhd_a = Tkhd::default(); + tkhd_a.track_id = 1; + let mut tkhd_b = Tkhd::default(); + tkhd_b.track_id = 2; + let trak_a = encode_supported_box(&Trak, &encode_supported_box(&tkhd_a, &[])); + let trak_b = encode_supported_box(&Trak, &encode_supported_box(&tkhd_b, &[])); + let moov = encode_supported_box(&Moov, &[trak_a, trak_b].concat()); + + let extracted = extract_box_as_bytes::( + &moov, + BoxPath::from([fourcc("moov"), fourcc("trak"), fourcc("tkhd")]), + ) + .unwrap(); + + assert_eq!(extracted.len(), 2); + assert_eq!( + extracted + .iter() + .map(|tkhd| tkhd.track_id) + .collect::>(), + vec![1, 2] + ); +} + +#[test] +fn extract_boxes_bytes_match_shared_fixture_box_ranges() { + let sample = std::fs::read(fixture_path("sample.mp4")).unwrap(); + let paths = [ + BoxPath::from([fourcc("ftyp")]), + BoxPath::from([ + fourcc("moov"), + fourcc("trak"), + fourcc("mdia"), + fourcc("mdhd"), + ]), + ]; + + let infos = extract_boxes(&mut Cursor::new(sample.clone()), None, &paths).unwrap(); + let extracted = extract_boxes_bytes(&mut Cursor::new(sample.clone()), None, &paths).unwrap(); + + assert_eq!(infos.len(), extracted.len()); + for (info, bytes) in infos.iter().zip(extracted.iter()) { + assert_eq!(bytes, &box_bytes_from_file(&sample, info)); + } +} + +#[test] +fn extract_boxes_payload_bytes_match_shared_fixture_payload_ranges() { + let sample = std::fs::read(fixture_path("sample.mp4")).unwrap(); + let paths = [BoxPath::from([ + fourcc("moov"), + fourcc("trak"), + fourcc("mdia"), + fourcc("mdhd"), + ])]; + + let infos = extract_boxes(&mut Cursor::new(sample.clone()), None, &paths).unwrap(); + let extracted = + extract_boxes_payload_bytes(&mut Cursor::new(sample.clone()), None, &paths).unwrap(); + + assert_eq!(infos.len(), extracted.len()); + for (info, bytes) in infos.iter().zip(extracted.iter()) { + assert_eq!(bytes, &payload_bytes_from_file(&sample, info)); + } +} + +#[test] +fn extract_boxes_as_bytes_supports_multiple_root_paths() { + let mut root_tkhd = Tkhd::default(); + root_tkhd.track_id = 1; + let root_trak = encode_supported_box(&Trak, &encode_supported_box(&root_tkhd, &[])); + + let mut nested_tkhd = Tkhd::default(); + nested_tkhd.track_id = 2; + let nested_trak = encode_supported_box(&Trak, &encode_supported_box(&nested_tkhd, &[])); + let moov = encode_supported_box(&Moov, &nested_trak); + + let file = [root_trak, moov].concat(); + let extracted = extract_boxes_as_bytes::( + &file, + &[ + BoxPath::from([fourcc("trak"), fourcc("tkhd")]), + BoxPath::from([fourcc("moov"), fourcc("trak"), fourcc("tkhd")]), + ], + ) + .unwrap(); + + assert_eq!( + extracted + .iter() + .map(|tkhd| tkhd.track_id) + .collect::>(), + vec![1, 2] + ); +} + +#[test] +fn extract_box_payload_bytes_return_empty_when_nothing_matches() { + let moov = encode_supported_box(&Moov, &[]); + let extracted = extract_box_payload_bytes( + &mut Cursor::new(moov), + None, + BoxPath::from([fourcc("zzzz")]), + ) + .unwrap(); + + assert!(extracted.is_empty()); +} + #[test] fn extract_box_as_uses_walked_lookup_context() { let qt = fourcc("qt "); @@ -200,6 +354,33 @@ fn extract_box_as_uses_walked_lookup_context() { assert_eq!(extracted[0].data.data, b"1.0.0"); } +#[test] +fn extract_box_as_bytes_reports_payload_type_context() { + let mut tkhd = Tkhd::default(); + tkhd.track_id = 7; + let trak = encode_supported_box(&Trak, &encode_supported_box(&tkhd, &[])); + let moov = encode_supported_box(&Moov, &trak); + + let error = extract_box_as_bytes::( + &moov, + BoxPath::from([fourcc("moov"), fourcc("trak"), fourcc("tkhd")]), + ) + .unwrap_err(); + + assert!(matches!( + error, + ExtractError::UnexpectedPayloadType { + ref path, + box_type, + offset, + expected_type + } if path.as_slice() == [fourcc("moov"), fourcc("trak"), fourcc("tkhd")] + && box_type == fourcc("tkhd") + && offset == 16 + && expected_type == std::any::type_name::() + )); +} + #[test] fn extract_box_as_reports_payload_type_context() { let mut tkhd = Tkhd::default(); @@ -334,3 +515,15 @@ fn encode_raw_box(box_type: FourCc, payload: &[u8]) -> Vec { bytes.extend_from_slice(payload); bytes } + +fn box_bytes_from_file(file: &[u8], info: &BoxInfo) -> Vec { + let start = usize::try_from(info.offset()).unwrap(); + let end = usize::try_from(info.offset() + info.size()).unwrap(); + file[start..end].to_vec() +} + +fn payload_bytes_from_file(file: &[u8], info: &BoxInfo) -> Vec { + let start = usize::try_from(info.offset() + info.header_size()).unwrap(); + let end = usize::try_from(info.offset() + info.size()).unwrap(); + file[start..end].to_vec() +} diff --git a/tests/probe.rs b/tests/probe.rs index aa57ec9..2843ea1 100644 --- a/tests/probe.rs +++ b/tests/probe.rs @@ -17,7 +17,8 @@ use mp4forge::boxes::iso14496_14::{ use mp4forge::codec::{CodecBox, MutableBox, marshal}; use mp4forge::probe::{ AacProfileInfo, EditListEntry, TrackCodec, average_sample_bitrate, average_segment_bitrate, - detect_aac_profile, find_idr_frames, max_sample_bitrate, max_segment_bitrate, probe, probe_fra, + detect_aac_profile, find_idr_frames, max_sample_bitrate, max_segment_bitrate, probe, + probe_bytes, probe_fra, probe_fra_bytes, }; use mp4forge::{BoxInfo, FourCc}; @@ -119,6 +120,14 @@ fn probe_summarizes_movie_tracks_samples_and_codecs() { assert_eq!(idr_frames, vec![0]); } +#[test] +fn probe_bytes_matches_cursor_based_probe() { + let file = build_movie_file(); + let expected = probe(&mut Cursor::new(file.clone())).unwrap(); + let actual = probe_bytes(&file).unwrap(); + assert_eq!(actual, expected); +} + #[test] fn probe_and_probe_fra_summarize_fragment_runs() { let file = build_fragment_file(); @@ -157,6 +166,27 @@ fn probe_and_probe_fra_summarize_fragment_runs() { assert_eq!(second.size, 36); } +#[test] +fn probe_fra_bytes_matches_cursor_based_probe_fra() { + let file = build_fragment_file(); + let expected = probe_fra(&mut Cursor::new(file.clone())).unwrap(); + let actual = probe_fra_bytes(&file).unwrap(); + assert_eq!(actual, expected); +} + +#[test] +fn probe_bytes_propagates_decode_errors() { + let file = encode_raw_box(fourcc("ftyp"), &[0x69, 0x73]); + let expected = probe(&mut Cursor::new(file.clone())).unwrap_err(); + let actual = probe_bytes(&file).unwrap_err(); + + assert_eq!( + std::mem::discriminant(&actual), + std::mem::discriminant(&expected) + ); + assert_eq!(actual.to_string(), expected.to_string()); +} + #[test] fn detect_aac_profile_matches_reference_cases() { let cases = [ diff --git a/tests/rewrite.rs b/tests/rewrite.rs index 864bdb3..6648a1c 100644 --- a/tests/rewrite.rs +++ b/tests/rewrite.rs @@ -7,7 +7,9 @@ use std::io::Cursor; use mp4forge::boxes::iso14496_12::{Meta, Moof, Tfdt, Traf}; use mp4forge::extract::extract_box_as; -use mp4forge::rewrite::{RewriteError, rewrite_box_as}; +use mp4forge::rewrite::{ + RewriteError, rewrite_box_as, rewrite_box_as_bytes, rewrite_boxes_as_bytes, +}; use mp4forge::walk::BoxPath; use support::{encode_raw_box, encode_supported_box, fixture_path, fourcc}; @@ -40,6 +42,29 @@ fn rewrite_box_as_updates_matching_typed_payloads() { assert_eq!(tfdt[0].base_media_decode_time_v0, 12_345); } +#[test] +fn rewrite_box_as_bytes_updates_matching_typed_payloads() { + let input = build_rewrite_input_file(); + let output = rewrite_box_as_bytes::( + &input, + BoxPath::from([fourcc("moof"), fourcc("traf"), fourcc("tfdt")]), + |tfdt| { + tfdt.base_media_decode_time_v0 = 12_345; + }, + ) + .unwrap(); + + let tfdt = extract_box_as::<_, Tfdt>( + &mut Cursor::new(output), + None, + BoxPath::from([fourcc("moof"), fourcc("traf"), fourcc("tfdt")]), + ) + .unwrap(); + + assert_eq!(tfdt.len(), 1); + assert_eq!(tfdt[0].base_media_decode_time_v0, 12_345); +} + #[test] fn rewrite_box_as_returns_zero_and_preserves_bytes_when_nothing_matches() { let input = fs::read(fixture_path("sample_fragmented.mp4")).unwrap(); @@ -58,6 +83,17 @@ fn rewrite_box_as_returns_zero_and_preserves_bytes_when_nothing_matches() { assert_eq!(output.into_inner(), input); } +#[test] +fn rewrite_boxes_as_bytes_preserves_bytes_when_nothing_matches() { + let input = fs::read(fixture_path("sample_fragmented.mp4")).unwrap(); + + let output = + rewrite_boxes_as_bytes::(&input, &[BoxPath::from([fourcc("zzzz")])], |_| {}) + .unwrap(); + + assert_eq!(output, input); +} + #[test] fn rewrite_box_as_reports_payload_type_context() { let input = build_rewrite_input_file(); @@ -86,6 +122,31 @@ fn rewrite_box_as_reports_payload_type_context() { )); } +#[test] +fn rewrite_box_as_bytes_reports_payload_type_context() { + let input = build_rewrite_input_file(); + + let error = rewrite_box_as_bytes::( + &input, + BoxPath::from([fourcc("moof"), fourcc("traf"), fourcc("tfdt")]), + |_| {}, + ) + .unwrap_err(); + + assert!(matches!( + error, + RewriteError::UnexpectedPayloadType { + ref path, + box_type, + offset, + expected_type + } if path.as_slice() == [fourcc("moof"), fourcc("traf"), fourcc("tfdt")] + && box_type == fourcc("tfdt") + && offset == 16 + && expected_type == std::any::type_name::() + )); +} + #[test] fn rewrite_box_as_reports_payload_decode_context() { let mut bytes = encode_raw_box(fourcc("tfdt"), &[0x00, 0x00, 0x00, 0x00]); From 13468c409c38af380d10da98a1a8db28c4cd3d3c Mon Sep 17 00:00:00 2001 From: bakgio <76126058+bakgio@users.noreply.github.com> Date: Wed, 22 Apr 2026 12:03:10 +0300 Subject: [PATCH 2/2] Vesion Update --- .github/ISSUE_TEMPLATE/bug_report.yml | 2 +- CHANGELOG.md | 8 ++++++++ Cargo.toml | 2 +- README.md | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 0d23d1a..8343118 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -13,7 +13,7 @@ body: attributes: label: mp4forge Version description: Which version are you using? - placeholder: "0.2.0" + placeholder: "0.3.0" validations: required: true diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f86db5..0a10c19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# 0.3.0 (April 22, 2026) + +- Added byte-slice convenience helpers for typed extract, rewrite, and probe workflows so higher-level integrations can stay in-memory without dropping to the lower-level APIs +- Added exact raw box-byte extraction helpers for full-box and payload-only reads, including registry-aware variants for custom box decoding workflows +- Added additive `BoxPath` string parsing with `BoxPath::parse`, `FromStr`, and `TryFrom<&str>` so ergonomic path construction can build on the existing low-level API +- Expanded examples, tests, and comparison coverage around the new ergonomic helpers while preserving the existing low-level usage paths +- Refined public docs and README guidance for the new helper surface + # 0.2.0 (April 21, 2026) - Added typed path-based extraction helpers for common read flows: `extract_box_as`, `extract_boxes_as`, and `extract_boxes_as_with_registry` diff --git a/Cargo.toml b/Cargo.toml index 8716ca8..5652f91 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mp4forge" -version = "0.2.0" +version = "0.3.0" edition = "2024" rust-version = "1.88" authors = ["bakgio"] diff --git a/README.md b/README.md index 75c85a0..37c793b 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ ```toml [dependencies] -mp4forge = "0.2.0" +mp4forge = "0.3.0" ``` Install the CLI from crates.io: