A parser for Microsoft Outlook .msg files (OLE Compound Document format).
Extracts message metadata, body content, recipients, attachments, and transport
headers as specified in [MS-OXMSG] and [MS-OXPROPS].
Add this to your Cargo.toml:
[dependencies]
msg_parser = "0.3"use msg_parser::Outlook;
fn main() {
let outlook = Outlook::from_path("email.msg").unwrap();
println!("From: {} <{}>", outlook.sender.name, outlook.sender.email);
println!("Subject: {}", outlook.subject);
for person in &outlook.to {
println!("To: {} <{}>", person.name, person.email);
}
for person in &outlook.cc {
println!("CC: {} <{}>", person.name, person.email);
}
}use msg_parser::Outlook;
// From a file path
let outlook = Outlook::from_path("email.msg").unwrap();
// From a byte slice (accepts &[u8], Vec<u8>, or anything AsRef<[u8]>)
let bytes = std::fs::read("email.msg").unwrap();
let outlook = Outlook::from_slice(&bytes).unwrap();
// Passing a Vec<u8> directly also works
let outlook = Outlook::from_slice(bytes).unwrap();
// From any std::io::Read source (file, stdin, network, etc.)
let file = std::fs::File::open("email.msg").unwrap();
let outlook = Outlook::from_reader(file).unwrap();Outlook, Person, and Attachment all implement Display for
human-readable output:
let outlook = Outlook::from_path("email.msg").unwrap();
// Prints a summary: From, Subject, To, CC, BCC, Date, Attachments
println!("{}", outlook);let outlook = Outlook::from_path("email.msg").unwrap();
for attach in &outlook.attachments {
let name = if attach.long_file_name.is_empty() {
&attach.file_name // 8.3 short name fallback
} else {
&attach.long_file_name // full original filename
};
std::fs::write(name, &attach.payload_bytes).unwrap();
}Attachments with attach_method == 5 are nested .msg files (embedded
messages). Use as_message() to parse them recursively:
let outlook = Outlook::from_path("email.msg").unwrap();
for attach in &outlook.attachments {
if let Some(Ok(nested)) = attach.as_message() {
println!("Embedded message subject: {}", nested.subject);
println!("Embedded from: {}", nested.sender);
// You can access all fields on the nested message, including
// its own attachments (which may also be embedded .msg files)
}
}
// Or use the convenience method:
for attach in &outlook.attachments {
if attach.is_embedded_message() {
println!("{} is an embedded .msg", attach.display_name);
}
}HTML bodies reference inline images via cid: URIs. Use content_id to
resolve them:
let outlook = Outlook::from_path("email.msg").unwrap();
let mut html = outlook.html.clone();
for attach in &outlook.attachments {
if !attach.content_id.is_empty() {
// Replace cid: references with actual data
let cid_ref = format!("cid:{}", attach.content_id);
let data_uri = format!(
"data:{};base64,{}",
attach.mime_tag,
base64_encode(&attach.payload_bytes), // your base64 encoder
);
html = html.replace(&cid_ref, &data_uri);
}
}Many .msg files store the body as compressed RTF rather than HTML.
Use rtf_decompressed() to get the raw RTF, or html_from_rtf() to
extract embedded HTML:
let outlook = Outlook::from_path("email.msg").unwrap();
// Get the best available HTML body
let html = if !outlook.html.is_empty() {
outlook.html.clone()
} else {
// Many messages embed HTML inside compressed RTF
outlook.html_from_rtf().unwrap_or_default()
};
// Or work with the raw decompressed RTF directly
if let Some(rtf_bytes) = outlook.rtf_decompressed() {
std::fs::write("body.rtf", &rtf_bytes).unwrap();
}The parser automatically resolves MAPI named properties — both well-known
dispID-based properties (e.g. ReminderSet, InternetAccountName,
AppointmentStartWhole) and custom string-named properties stored in the
__nameid_version1.0 streams. These are merged into the same property maps
used for standard MAPI properties, so they appear transparently in the parsed
output and JSON serialization.
let outlook = Outlook::from_path("email.msg").unwrap();
// Timestamps (ISO 8601 UTC, empty string if unavailable)
println!("Delivered: {}", outlook.message_delivery_time);
println!("Submitted: {}", outlook.client_submit_time);
println!("Created: {}", outlook.creation_time);
println!("Modified: {}", outlook.last_modification_time);
// Classification
println!("Class: {}", outlook.message_class); // e.g. "IPM.Note"
println!("Importance: {}", outlook.importance); // 0=Low, 1=Normal, 2=High
println!("Sensitivity: {}", outlook.sensitivity); // 0=Normal, 1=Personal, 2=Private, 3=Confidentiallet outlook = Outlook::from_path("email.msg").unwrap();
let json = outlook.to_json().unwrap();
println!("{}", json);| Field | Type | Description |
|---|---|---|
headers |
TransportHeaders |
SMTP transport headers (raw + parsed fields) |
sender |
Person |
Sender name and email |
to |
Vec<Person> |
Primary recipients |
cc |
Vec<Person> |
Carbon-copy recipients |
bcc |
Vec<Person> |
Blind carbon-copy recipients |
subject |
String |
Subject line |
body |
String |
Plain-text body |
html |
String |
HTML body |
rtf_compressed |
String |
RTF body (hex-encoded) |
message_class |
String |
Message class (e.g. "IPM.Note") |
importance |
u32 |
0=Low, 1=Normal, 2=High |
sensitivity |
u32 |
0=Normal, 1=Personal, 2=Private, 3=Confidential |
client_submit_time |
String |
ISO 8601 UTC timestamp |
message_delivery_time |
String |
ISO 8601 UTC timestamp |
creation_time |
String |
ISO 8601 UTC timestamp |
last_modification_time |
String |
ISO 8601 UTC timestamp |
attachments |
Vec<Attachment> |
File attachments with metadata and raw bytes |
| Field | Type | Description |
|---|---|---|
display_name |
String |
Display name shown in the mail client |
payload |
String |
Hex-encoded attachment content |
payload_bytes |
Vec<u8> |
Raw attachment bytes |
extension |
String |
File extension (e.g. ".pdf") |
mime_tag |
String |
MIME type (e.g. "image/png") |
file_name |
String |
Short 8.3 filename |
long_file_name |
String |
Full original filename |
attach_method |
u32 |
1=file, 5=embedded .msg, 6=OLE object |
content_id |
String |
Content-ID for inline images |
| Method | Returns | Description |
|---|---|---|
Outlook::from_path(path) |
Result<Outlook, Error> |
Parse from filesystem path |
Outlook::from_slice(bytes) |
Result<Outlook, Error> |
Parse from byte slice or Vec<u8> |
Outlook::from_reader(reader) |
Result<Outlook, Error> |
Parse from any Read source |
Outlook::to_json() |
Result<String, Error> |
Serialize to JSON |
Outlook::rtf_decompressed() |
Option<Vec<u8>> |
Decompress RTF body |
Outlook::html_from_rtf() |
Option<String> |
Extract HTML from compressed RTF |
Attachment::as_message() |
Option<Result<Outlook, Error>> |
Parse embedded .msg attachment |
Attachment::is_embedded_message() |
bool |
Check if attachment is embedded .msg |
from_path and from_slice use an optimized zero-copy header parsing path
(Reader::from_bytes) that avoids the double-allocation overhead of streaming
through BufReader. For large .msg files this reduces peak memory usage and
parse time compared to the generic from_reader path.
- Rust edition 2024 (rustc 1.85+)
cargo run --example parse-email
# or with a specific file:
cargo run --example parse-email -- path/to/email.msgcargo testFeel free to open pull requests to contribute, enhance, or fix bugs.
License: MIT