diff --git a/Cargo.toml b/Cargo.toml index 1a53c1b..db836cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,9 @@ chrono = "0.4.41" compressed-rtf = "1.0.0" thiserror = "2" +[dev-dependencies] +clap = { version = "4", features = ["derive"] } # for example CLI + [lib] name = "tiny_msg" path = "src/lib.rs" diff --git a/examples/extractor.rs b/examples/extractor.rs new file mode 100644 index 0000000..96b6e17 --- /dev/null +++ b/examples/extractor.rs @@ -0,0 +1,75 @@ +use std::{fs, path::Path}; + +use cfb::CompoundFile; +use clap::Parser; +use tiny_msg::{MsgError, MsgReader}; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Input .msg file + file: String, + + /// Output directory + #[arg(short, long)] + out_dir: String, +} + +fn main() -> Result<(), MsgError> { + let args = Args::parse(); + let out_dir = Path::new(&args.out_dir); + if !out_dir.is_dir() { + fs::create_dir_all(out_dir)?; + } + let file = std::fs::File::open(&args.file)?; + let mut compound_file = CompoundFile::open(file).unwrap(); + extract_attachments_recursively(&mut compound_file, Path::new("/"), out_dir)?; + Ok(()) +} + +fn extract_attachments_recursively( + cfb: &mut CompoundFile, + path: &Path, + out_dir: &Path, +) -> Result<(), MsgError> { + let mut msg = MsgReader::new(cfb, path); + + // Extract simple attachments + for attachment in msg.attachments()? { + let mut full_path = out_dir.join(&attachment.name); + if full_path.exists() { + // Avoid overwriting files with the same name + let mut counter = 1; + loop { + let mut new_name = full_path.file_stem().unwrap().to_str().unwrap().to_string(); + new_name.push_str(&format!(" ({counter})",)); + if let Some(ext) = full_path.extension() { + new_name.push('.'); + new_name.push_str(ext.to_str().unwrap()); + } + full_path.set_file_name(new_name); + if !full_path.exists() { + break; + } + counter += 1; + } + } + fs::write(&full_path, &attachment.data)?; + println!("Saved attachment to {}", full_path.to_str().unwrap()); + } + + // Recurse into embedded messages + for embedded_path in msg.embedded_messages()? { + let mut msg_reader = MsgReader::new(cfb, &embedded_path); + let subject = msg_reader + .subject() + .unwrap_or_else(|_| "Untitled".to_string()); + let new_out_dir = out_dir.join(subject); + if !new_out_dir.is_dir() { + fs::create_dir_all(&new_out_dir)?; + } + extract_attachments_recursively(cfb, &embedded_path, &new_out_dir)?; + } + + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index 55fe32f..6d17612 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,8 +31,8 @@ pub struct MsgReader<'c, 'p, F> { } pub struct Attachment { - name: String, - data: Vec, + pub name: String, + pub data: Vec, } impl Debug for Attachment { @@ -227,7 +227,8 @@ where .collect(); let res = attachment_paths .into_iter() - .filter(|a| self.inner.is_storage(a.join("__substg1.0_3701000D"))) + .map(|a| a.join("__substg1.0_3701000D")) + .filter(|a| self.inner.is_storage(a)) .collect(); Ok(res) }