Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions Mail2Bug/Email/EmailBodyProcessingUtils.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
Expand Down Expand Up @@ -33,21 +33,30 @@ public static string GetLastMessageText_Html(string rawBody)
{
CQ dom = rawBody;

const string messageSeparatorStyle = "border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in";
const string outlookDesktopSeparatorStyle = "border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in";
const string outlookMobileSeparatorStyle = "display:inline-block;width:98%";

// There's no well-defined way to parse the latest email from a thread
// We have to use heuristics to cover different email clients
foreach (IDomObject element in dom["*"])
{
// Lots of email clients insert html elements as message delimiters which have styling but no inner text
// This block checks for some of these patterns
if (element.NodeName == "DIV")
if (string.Equals(element.NodeName, "div", StringComparison.OrdinalIgnoreCase) &&
(element.Id == "divRplyFwdMsg" || element.Id == "x_divRplyFwdMsg" || outlookDesktopSeparatorStyle.Equals(element.GetAttribute("style"))))
{
if (element.Id == "divRplyFwdMsg" || element.Id == "x_divRplyFwdMsg" || messageSeparatorStyle.Equals(element.GetAttribute("style")))
Comment thread
EduardoRomo marked this conversation as resolved.
{
IDomContainer parent = element.ParentNode;
element.Remove();
RemoveSubsequent(parent);
break;
}
IDomContainer parent = element.ParentNode;
RemoveSubsequent(parent);
parent.Remove();
break;
}

if (string.Equals(element.NodeName, "hr", StringComparison.OrdinalIgnoreCase) &&
outlookMobileSeparatorStyle.Equals(element.GetAttribute("style")))
Comment thread
EduardoRomo marked this conversation as resolved.
{
RemoveSubsequent(element);
element.Remove();
break;
}

if (!element.ChildElements.Any() && !string.IsNullOrWhiteSpace(element.InnerText))
Expand Down
57 changes: 21 additions & 36 deletions Mail2BugUnitTests/EmailBodyProcessingUtilsUnitTest.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
Expand Down Expand Up @@ -45,7 +45,7 @@ public void TestConvertHtmlMessageToPlainTextBasic()

// Can't have '<' or '>' chars in the content, since it breaks the HTML processing. This is OK, since real HTML should never have these
// chracters either (they will be escaped as &lt; and &gt;)
var expectedText =
var expectedText =
StringFactory.GenerateRandomString(properties, _rand.Next()).Trim().Replace("<", "").Replace(">", "");
var htmlText = string.Format("<html><head></head><body><p>{0}</p></body></html>", expectedText);
var plainText = EmailBodyProcessingUtils.ConvertHtmlMessageToPlainText(htmlText);
Expand Down Expand Up @@ -118,42 +118,27 @@ This is a boring email.
}

[TestMethod]
public void TestGetLastMessageText_Previous_FromColon()
public void TestGetLastMessageText_EmailClientsSchemas()
{
string original = @"<html>
<body>
<div class=""wrapppingBothMessageAndReply"">
<p class=""customStyling"">This is random text with some custom styling and outlook-generated elements.<o:p></o:p></p>
<div>
<div style=""border:none;border-top:solid"">
<p class=""customStyling""><b>From:</b> someAddress;<br><b>Subject:</b> RE: Build error<o:p></o:p>
</p>
</div>
</div>
<p class=""customStyling"">
<o:p>&nbsp;</o:p>
</p>
<p class=""customStyling"">text of the reply
</p>
</div>
<div>Something after the containing div</div>
</body>
</html>";

// Note: it's acceptable to not preserve whitespace because it's
// manipulating HTML, not plain text. As long as the rendered page isn't impacted, all is well
// Note that we expect that both
// 1. Elements following the latest message are removed
// 2. Anything in the same element as the latest message but after the start of the previous should be cleared out
string expected = @"<html><head></head><body>
<div class=""wrapppingBothMessageAndReply"">
<p class=""customStyling"">This is random text with some custom styling and outlook-generated elements.<o:p></o:p></p>
<div>
<div style=""border:none;border-top:solid"">
<p class=""customStyling""><b></b></p></div></div></div></body></html>";
const string schemasFolder = "LastMessageSchemas";
foreach (var originalFilename in Directory.GetFiles(schemasFolder, "*.orig"))
{
Trace.WriteLine(string.Format("Processing email schema file {0}", originalFilename));

string actual = EmailBodyProcessingUtils.GetLastMessageText_Html(original);
Assert.AreEqual(Normalize(expected), Normalize(actual));
var baseFilename = Path.GetFileNameWithoutExtension(originalFilename);
var expectedFilename = Path.Combine(schemasFolder, baseFilename + ".expected");

var original = File.ReadAllText(originalFilename);
var expected = Normalize(File.ReadAllText(expectedFilename));
var actual = Normalize(EmailBodyProcessingUtils.GetLastMessageText_Html(original));

// Note: it's acceptable to not preserve whitespace because it's
// manipulating HTML, not plain text. As long as the rendered page isn't impacted, all is well
// Note that we expect that both
// 1. Elements following the latest message are removed
// 2. Anything in the same element as the latest message but after the start of the previous should be cleared out
Assert.AreEqual(expected, actual);
}
}

private static string Normalize(string text)
Expand Down
6 changes: 6 additions & 0 deletions Mail2BugUnitTests/LastMessageSchemas/FromColon.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<html><head></head><body>
<div class="wrapppingBothMessageAndReply">
<p class="customStyling">This is random text with some custom styling and outlook-generated elements.<o:p></o:p></p>
<div>
<div style="border:none;border-top:solid">
<p class="customStyling"><b></b></p></div></div></div></body></html>
19 changes: 19 additions & 0 deletions Mail2BugUnitTests/LastMessageSchemas/FromColon.orig
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<html>
<body>
<div class="wrapppingBothMessageAndReply">
<p class="customStyling">This is random text with some custom styling and outlook-generated elements.<o:p></o:p></p>
<div>
<div style="border:none;border-top:solid">
<p class="customStyling"><b>From:</b> someAddress;<br><b>Subject:</b> RE: Build error<o:p></o:p>
</p>
</div>
</div>
<p class="customStyling">
<o:p>&nbsp;</o:p>
</p>
<p class="customStyling">text of the reply
</p>
</div>
<div>Something after the containing div</div>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<html><head></head><body>
<div class="wrappingBothMessageAndReply">
<b><p class="customStyling">This is random text with some custom styling and outlook-generated elements.<o:p></o:p></p></b>
</div></body></html>
18 changes: 18 additions & 0 deletions Mail2BugUnitTests/LastMessageSchemas/OutlookDesktopSeparator.orig
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<html>
<body>
<div class="wrappingBothMessageAndReply">
<b><p class="customStyling">This is random text with some custom styling and outlook-generated elements.<o:p></o:p></p></b>
<div>
<div style="border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in">
<p class="customStyling">
<b>Old emails from: field and subject</b>
</p>
</div>
</div>
<p class="customStyling"><o:p>&nbsp;</o:p></p>
<div>
Something after the containing div
</div>
</div>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<html><head></head>
<body>
<div>
<div>
<div style="customStyle">Message content from iPhone</div>
</div>
<div><br></div>
<div class="ms-outlook-ios-signature"></div>
</div>
</body></html>
15 changes: 15 additions & 0 deletions Mail2BugUnitTests/LastMessageSchemas/OutlookMobileSeparator.orig
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<html><head></head>
<body>
<div>
<div>
<div style="customStyle">Message content from iPhone</div>
</div>
<div><br /></div>
<div class="ms-outlook-ios-signature"></div>
</div>
<hr style="display:inline-block;width:98%" tabindex="-1" />
<div>
Something after the containing div
</div>
</body>
</html>
18 changes: 18 additions & 0 deletions Mail2BugUnitTests/Mail2BugUnitTests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,24 @@
</ItemGroup>
<ItemGroup>
<None Include="app.config" />
<None Include="LastMessageSchemas\FromColon.expected">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="LastMessageSchemas\FromColon.orig">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="LastMessageSchemas\OutlookDesktopSeparator.orig">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="LastMessageSchemas\OutlookMobileSeparator.expected">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="LastMessageSchemas\OutlookMobileSeparator.orig">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="LastMessageSchemas\OutlookDesktopSeparator.expected">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="packages.config" />
<None Include="RegressionMessages\Basic.expected">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
Expand Down