From f40997d8cb77ef14166d97d418ee52bf36e8fcb7 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Tue, 1 Jul 2025 11:29:10 +0800 Subject: [PATCH] feat: implement owned struct Email --- Cargo.toml | 2 +- examples/extractor.rs | 2 +- examples/hello.rs | 18 ++--- src/lib.rs | 168 +++++++++++++++++++++++++++++++----------- 4 files changed, 136 insertions(+), 54 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index db836cb..c4cd6e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tiny_msg" description = "A tiny Outlook Email Message (.msg) reader" -version = "0.1.0" +version = "0.2.0" authors = ["Matthew Wong "] readme = "README.md" license = "MIT" diff --git a/examples/extractor.rs b/examples/extractor.rs index 96b6e17..d20258c 100644 --- a/examples/extractor.rs +++ b/examples/extractor.rs @@ -62,7 +62,7 @@ fn extract_attachments_recursively( for embedded_path in msg.embedded_messages()? { let mut msg_reader = MsgReader::new(cfb, &embedded_path); let subject = msg_reader - .subject() + .pr_subject() .unwrap_or_else(|_| "Untitled".to_string()); let new_out_dir = out_dir.join(subject); if !new_out_dir.is_dir() { diff --git a/examples/hello.rs b/examples/hello.rs index 116d053..d4ffd23 100644 --- a/examples/hello.rs +++ b/examples/hello.rs @@ -1,14 +1,12 @@ -use std::path::Path; -use tiny_msg::MsgReader; +use tiny_msg::Email; fn main() { - let mut cfb = cfb::open("path/to/your.msg").unwrap(); - let mut reader = MsgReader::new(&mut cfb, Path::new("/")); + let email = Email::from_path("sample/sample1.msg"); - dbg!(&reader.from()); - dbg!(&reader.to()); - dbg!(&reader.cc()); - dbg!(&reader.sent_date()); - dbg!(&reader.subject()); - dbg!(&reader.body()); + dbg!(&email.from); + dbg!(&email.to); + dbg!(&email.cc); + dbg!(&email.sent_date); + dbg!(&email.subject); + dbg!(&email.body); } diff --git a/src/lib.rs b/src/lib.rs index 784375b..ef7e0cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,12 @@ use std::{ - collections::HashMap, fmt::Debug, - io::{Read, Seek}, + io::{Cursor, Read, Seek}, path::{Path, PathBuf}, }; use cfb::CompoundFile; -use chrono::{DateTime, FixedOffset}; +use chrono::{DateTime, Utc}; use compressed_rtf::decompress_rtf; use thiserror::Error; @@ -25,11 +24,13 @@ pub enum MsgError { type Result = std::result::Result; +/// A low-level API for reading data from a .msg file. pub struct MsgReader<'c, 'p, F> { inner: &'c mut CompoundFile, path: &'p Path, } +#[derive(Clone)] pub struct Attachment { pub name: String, pub data: Vec, @@ -39,26 +40,90 @@ impl Debug for Attachment { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Attachment") .field("name", &self.name) - .field("data", &self.data.len()) + .field("data of size", &self.data.len()) .finish() } } -fn pack_u8s_to_u16s_le_padded(bytes: &[u8]) -> Vec { - let mut result = Vec::with_capacity(bytes.len().div_ceil(2)); - let mut i = 0; - while i < bytes.len() { - let lsb = bytes[i]; - let msb = if i + 1 < bytes.len() { - bytes[i + 1] - } else { - // Pad with zero if there's an odd number of bytes - 0x00 - }; - result.push(u16::from_le_bytes([lsb, msb])); - i += 2; // Move to the next pair +/// A high-level API for interacting with .msg files, providing an owned data structure. +#[derive(Debug, Clone)] +pub struct Email { + pub from: Option<(String, String)>, + pub sent_date: Option>, + pub to: Vec<(String, String)>, + pub cc: Vec<(String, String)>, + pub bcc: Vec<(String, String)>, + pub subject: Option, + pub body: Option, + pub attachments: Vec, + pub embedded_messages: Vec, +} + +impl Email { + pub fn from_path>(file: P) -> Self { + Self::from_path_internal(file.as_ref(), Path::new("/")) + } + pub fn from_bytes>(bytes: B) -> Self { + Self::from_bytes_internal(bytes.as_ref(), Path::new("/")) + } + + fn from_path_internal(file: &Path, subpath: &Path) -> Self { + let mut comp = cfb::open(file).unwrap(); + let mut reader = MsgReader::new(&mut comp, subpath); + let from = reader.from().ok(); + let sent_date = reader.sent_date().ok(); + let to = reader.to().unwrap_or_default(); + let cc = reader.cc().unwrap_or_default(); + let bcc = reader.bcc().unwrap_or_default(); + let subject = reader.pr_subject().ok(); + let body = reader.body().ok(); + let attachments = reader.attachments().unwrap_or_default(); + let emb_paths = reader.embedded_messages().unwrap(); + let embedded_messages: Vec<_> = emb_paths + .into_iter() + .map(|emb_path| Self::from_path_internal(file, &emb_path)) + .collect(); + Self { + from, + sent_date, + to, + cc, + bcc, + subject, + body, + attachments, + embedded_messages, + } + } + fn from_bytes_internal(bytes: &[u8], subpath: &Path) -> Self { + let cur = Cursor::new(bytes); + let mut comp = CompoundFile::open(cur).unwrap(); + let mut reader = MsgReader::new(&mut comp, subpath); + let from = reader.from().ok(); + let sent_date = reader.sent_date().ok(); + let to = reader.to().unwrap_or_default(); + let cc = reader.cc().unwrap_or_default(); + let bcc = reader.bcc().unwrap_or_default(); + let subject = reader.pr_subject().ok(); + let body = reader.body().ok(); + let attachments = reader.attachments().unwrap_or_default(); + let emb_paths = reader.embedded_messages().unwrap(); + let embedded_messages: Vec<_> = emb_paths + .into_iter() + .map(|emb_path| Self::from_bytes_internal(bytes, &emb_path)) + .collect(); + Self { + from, + sent_date, + to, + cc, + bcc, + subject, + body, + attachments, + embedded_messages, + } } - result } impl<'c, 'p, F> MsgReader<'c, 'p, F> @@ -87,13 +152,13 @@ where content.read_to_end(&mut buf).unwrap(); Ok(buf) } - fn read_path_binary(&mut self, subpath: &Path) -> Result> { + pub fn read_path_as_binary(&mut self, subpath: &Path) -> Result> { let mut content = self.inner.open_stream(self.path.join(subpath))?; let mut buf = vec![]; content.read_to_end(&mut buf).unwrap(); Ok(buf) } - fn read_path_string(&mut self, subpath: &Path) -> Result { + pub fn read_path_as_string(&mut self, subpath: &Path) -> Result { let mut content = self.inner.open_stream(self.path.join(subpath))?; let mut buf = vec![]; content.read_to_end(&mut buf).unwrap(); @@ -101,22 +166,22 @@ where .map_err(|_e| MsgError::Encoding) .map(|x| x.trim_end_matches('\0').to_string()) } - pub fn subject(&mut self) -> Result { + pub fn pr_subject(&mut self) -> Result { self.read_simple_string("0037") // PR_SUBJECT } - fn pr_sender_name(&mut self) -> Result { + pub fn pr_sender_name(&mut self) -> Result { self.read_simple_string("0C1A") } - fn pr_sender_email_adress_str(&mut self) -> Result { + pub fn pr_sender_email_adress_str(&mut self) -> Result { self.read_simple_string("0C19") } - fn pr_smtp_sender_address(&mut self) -> Result { + pub fn pr_smtp_sender_address(&mut self) -> Result { self.read_simple_string("5D01") } - fn pr_smtp_address(&mut self) -> Result { + pub fn pr_smtp_address(&mut self) -> Result { self.read_simple_string("39FE") } - fn sender_address(&mut self) -> Result { + pub fn sender_address(&mut self) -> Result { self.pr_sender_email_adress_str() .or_else(|_| self.pr_smtp_address()) .or_else(|_| self.pr_smtp_sender_address()) @@ -127,11 +192,11 @@ where pub fn pr_transport_message_headers(&mut self) -> Result { self.read_simple_string("007D") } - fn pr_body_html(&mut self) -> Result { + pub fn pr_body_html(&mut self) -> Result { let bin = self.read_simple_binary("1013")?; String::from_utf8(bin).map_err(|_| MsgError::Encoding) } - fn pr_rtf_compressed(&mut self) -> Result> { + pub fn pr_rtf_compressed(&mut self) -> Result> { self.read_simple_binary("1009") } fn rtf(&mut self) -> Result { @@ -141,7 +206,7 @@ where pub fn body(&mut self) -> Result { self.pr_body_html().or_else(|_| self.rtf()) } - pub fn sent_date(&mut self) -> Result> { + pub fn sent_date(&mut self) -> Result> { let headers = self.pr_transport_message_headers()?; let dateline = headers .lines() @@ -150,9 +215,11 @@ where .split_once(": ") .ok_or(MsgError::Encoding)? .1; - chrono::DateTime::parse_from_rfc2822(dateline).map_err(|_| MsgError::Encoding) + chrono::DateTime::parse_from_rfc2822(dateline) + .map_err(|_| MsgError::Encoding) + .map(|d| d.with_timezone(&Utc)) } - fn recipients(&mut self) -> Result> { + fn recipients(&mut self) -> Result> { let recip_paths: Vec<_> = self .inner .read_storage(self.path)? @@ -162,36 +229,36 @@ where recip_paths .iter() .map(|r| { - let name = self.read_path_string(&r.join("__substg1.0_3001001F"))?; - let address = self.read_path_string(&r.join("__substg1.0_39FE001F"))?; + let name = self.read_path_as_string(&r.join("__substg1.0_3001001F"))?; + let address = self.read_path_as_string(&r.join("__substg1.0_39FE001F"))?; Ok((name, address)) }) .collect() } - pub fn to(&mut self) -> Result> { + pub fn to(&mut self) -> Result> { let to_field = self.read_simple_string("0E04")?; let to_list: Vec<_> = to_field.split(";").map(|n| n.trim()).collect(); - let output: HashMap = self + let output: Vec<(String, String)> = self .recipients()? .into_iter() .filter(|(k, _v)| to_list.contains(&&k[..])) .collect(); Ok(output) } - pub fn cc(&mut self) -> Result> { + pub fn cc(&mut self) -> Result> { let cc_field = self.read_simple_string("0E03")?; let cc_list: Vec<_> = cc_field.split(";").map(|n| n.trim()).collect(); - let output: HashMap = self + let output: Vec<(String, String)> = self .recipients()? .into_iter() .filter(|(k, _v)| cc_list.contains(&&k[..])) .collect(); Ok(output) } - pub fn bcc(&mut self) -> Result> { + pub fn bcc(&mut self) -> Result> { let bcc_field = self.read_simple_string("0E02")?; let bcc_list: Vec<_> = bcc_field.split(";").map(|n| n.trim()).collect(); - let output: HashMap = self + let output: Vec<(String, String)> = self .recipients()? .into_iter() .filter(|(k, _v)| bcc_list.contains(&&k[..])) @@ -209,9 +276,9 @@ where .iter() .flat_map(|a| { let name = self - .read_path_string(&a.join("__substg1.0_3704001F")) - .or_else(|_| self.read_path_string(&a.join("__substg1.0_3001001F")))?; - let data = self.read_path_binary(&a.join("__substg1.0_37010102"))?; + .read_path_as_string(&a.join("__substg1.0_3704001F")) + .or_else(|_| self.read_path_as_string(&a.join("__substg1.0_3001001F")))?; + let data = self.read_path_as_binary(&a.join("__substg1.0_37010102"))?; let output: Result = Ok(Attachment { name, data }); output }) @@ -233,3 +300,20 @@ where Ok(res) } } + +fn pack_u8s_to_u16s_le_padded(bytes: &[u8]) -> Vec { + let mut result = Vec::with_capacity(bytes.len().div_ceil(2)); + let mut i = 0; + while i < bytes.len() { + let lsb = bytes[i]; + let msb = if i + 1 < bytes.len() { + bytes[i + 1] + } else { + // Pad with zero if there's an odd number of bytes + 0x00 + }; + result.push(u16::from_le_bytes([lsb, msb])); + i += 2; // Move to the next pair + } + result +}