diff --git a/Mail2Bug/Email/EmailBodyProcessingUtils.cs b/Mail2Bug/Email/EmailBodyProcessingUtils.cs
index c4020f2..e18b0c9 100644
--- a/Mail2Bug/Email/EmailBodyProcessingUtils.cs
+++ b/Mail2Bug/Email/EmailBodyProcessingUtils.cs
@@ -1,4 +1,4 @@
-using System;
+using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
@@ -33,21 +33,30 @@ public static string GetLastMessageText_Html(string rawBody)
{
CQ dom = rawBody;
- const string messageSeparatorStyle = "border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in";
+ const string outlookDesktopSeparatorStyle = "border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in";
+ const string outlookMobileSeparatorStyle = "display:inline-block;width:98%";
+ // There's no well-defined way to parse the latest email from a thread
+ // We have to use heuristics to cover different email clients
foreach (IDomObject element in dom["*"])
{
// Lots of email clients insert html elements as message delimiters which have styling but no inner text
// This block checks for some of these patterns
- if (element.NodeName == "DIV")
+ if (string.Equals(element.NodeName, "div", StringComparison.OrdinalIgnoreCase) &&
+ (element.Id == "divRplyFwdMsg" || element.Id == "x_divRplyFwdMsg" || outlookDesktopSeparatorStyle.Equals(element.GetAttribute("style"))))
{
- if (element.Id == "divRplyFwdMsg" || element.Id == "x_divRplyFwdMsg" || messageSeparatorStyle.Equals(element.GetAttribute("style")))
- {
- IDomContainer parent = element.ParentNode;
- element.Remove();
- RemoveSubsequent(parent);
- break;
- }
+ IDomContainer parent = element.ParentNode;
+ RemoveSubsequent(parent);
+ parent.Remove();
+ break;
+ }
+
+ if (string.Equals(element.NodeName, "hr", StringComparison.OrdinalIgnoreCase) &&
+ outlookMobileSeparatorStyle.Equals(element.GetAttribute("style")))
+ {
+ RemoveSubsequent(element);
+ element.Remove();
+ break;
}
if (!element.ChildElements.Any() && !string.IsNullOrWhiteSpace(element.InnerText))
diff --git a/Mail2BugUnitTests/EmailBodyProcessingUtilsUnitTest.cs b/Mail2BugUnitTests/EmailBodyProcessingUtilsUnitTest.cs
index a36ff08..c1b88f7 100644
--- a/Mail2BugUnitTests/EmailBodyProcessingUtilsUnitTest.cs
+++ b/Mail2BugUnitTests/EmailBodyProcessingUtilsUnitTest.cs
@@ -1,4 +1,4 @@
-using System;
+using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
@@ -45,7 +45,7 @@ public void TestConvertHtmlMessageToPlainTextBasic()
// Can't have '<' or '>' chars in the content, since it breaks the HTML processing. This is OK, since real HTML should never have these
// chracters either (they will be escaped as < and >)
- var expectedText =
+ var expectedText =
StringFactory.GenerateRandomString(properties, _rand.Next()).Trim().Replace("<", "").Replace(">", "");
var htmlText = string.Format("
{0}
", expectedText);
var plainText = EmailBodyProcessingUtils.ConvertHtmlMessageToPlainText(htmlText);
@@ -118,42 +118,27 @@ This is a boring email.
}
[TestMethod]
- public void TestGetLastMessageText_Previous_FromColon()
+ public void TestGetLastMessageText_EmailClientsSchemas()
{
- string original = @"
-
-
-
This is random text with some custom styling and outlook-generated elements.
-
-
-
From: someAddress;
Subject: RE: Build error
-
-
-
-
-
-
-
text of the reply
-
-
-Something after the containing div
-
-";
-
- // Note: it's acceptable to not preserve whitespace because it's
- // manipulating HTML, not plain text. As long as the rendered page isn't impacted, all is well
- // Note that we expect that both
- // 1. Elements following the latest message are removed
- // 2. Anything in the same element as the latest message but after the start of the previous should be cleared out
- string expected = @"
-
-
This is random text with some custom styling and outlook-generated elements.
-
";
+ const string schemasFolder = "LastMessageSchemas";
+ foreach (var originalFilename in Directory.GetFiles(schemasFolder, "*.orig"))
+ {
+ Trace.WriteLine(string.Format("Processing email schema file {0}", originalFilename));
- string actual = EmailBodyProcessingUtils.GetLastMessageText_Html(original);
- Assert.AreEqual(Normalize(expected), Normalize(actual));
+ var baseFilename = Path.GetFileNameWithoutExtension(originalFilename);
+ var expectedFilename = Path.Combine(schemasFolder, baseFilename + ".expected");
+
+ var original = File.ReadAllText(originalFilename);
+ var expected = Normalize(File.ReadAllText(expectedFilename));
+ var actual = Normalize(EmailBodyProcessingUtils.GetLastMessageText_Html(original));
+
+ // Note: it's acceptable to not preserve whitespace because it's
+ // manipulating HTML, not plain text. As long as the rendered page isn't impacted, all is well
+ // Note that we expect that both
+ // 1. Elements following the latest message are removed
+ // 2. Anything in the same element as the latest message but after the start of the previous should be cleared out
+ Assert.AreEqual(expected, actual);
+ }
}
private static string Normalize(string text)
diff --git a/Mail2BugUnitTests/LastMessageSchemas/FromColon.expected b/Mail2BugUnitTests/LastMessageSchemas/FromColon.expected
new file mode 100644
index 0000000..b91f36d
--- /dev/null
+++ b/Mail2BugUnitTests/LastMessageSchemas/FromColon.expected
@@ -0,0 +1,6 @@
+
+
+
This is random text with some custom styling and outlook-generated elements.
+
\ No newline at end of file
diff --git a/Mail2BugUnitTests/LastMessageSchemas/FromColon.orig b/Mail2BugUnitTests/LastMessageSchemas/FromColon.orig
new file mode 100644
index 0000000..74e0f93
--- /dev/null
+++ b/Mail2BugUnitTests/LastMessageSchemas/FromColon.orig
@@ -0,0 +1,19 @@
+
+
+
+
This is random text with some custom styling and outlook-generated elements.
+
+
+
From: someAddress;
Subject: RE: Build error
+
+
+
+
+
+
+
text of the reply
+
+
+Something after the containing div
+
+
\ No newline at end of file
diff --git a/Mail2BugUnitTests/LastMessageSchemas/OutlookDesktopSeparator.expected b/Mail2BugUnitTests/LastMessageSchemas/OutlookDesktopSeparator.expected
new file mode 100644
index 0000000..b8045b7
--- /dev/null
+++ b/Mail2BugUnitTests/LastMessageSchemas/OutlookDesktopSeparator.expected
@@ -0,0 +1,4 @@
+
+
+
This is random text with some custom styling and outlook-generated elements.
+
\ No newline at end of file
diff --git a/Mail2BugUnitTests/LastMessageSchemas/OutlookDesktopSeparator.orig b/Mail2BugUnitTests/LastMessageSchemas/OutlookDesktopSeparator.orig
new file mode 100644
index 0000000..7cae3d9
--- /dev/null
+++ b/Mail2BugUnitTests/LastMessageSchemas/OutlookDesktopSeparator.orig
@@ -0,0 +1,18 @@
+
+
+
+
This is random text with some custom styling and outlook-generated elements.
+
+
+
+ Old emails from: field and subject
+
+
+
+
+
+ Something after the containing div
+
+
+
+
\ No newline at end of file
diff --git a/Mail2BugUnitTests/LastMessageSchemas/OutlookMobileSeparator.expected b/Mail2BugUnitTests/LastMessageSchemas/OutlookMobileSeparator.expected
new file mode 100644
index 0000000..2fd11bc
--- /dev/null
+++ b/Mail2BugUnitTests/LastMessageSchemas/OutlookMobileSeparator.expected
@@ -0,0 +1,10 @@
+
+
+
+
+
Message content from iPhone
+
+
+
+
+
\ No newline at end of file
diff --git a/Mail2BugUnitTests/LastMessageSchemas/OutlookMobileSeparator.orig b/Mail2BugUnitTests/LastMessageSchemas/OutlookMobileSeparator.orig
new file mode 100644
index 0000000..049ab06
--- /dev/null
+++ b/Mail2BugUnitTests/LastMessageSchemas/OutlookMobileSeparator.orig
@@ -0,0 +1,15 @@
+
+
+
+
+
Message content from iPhone
+
+
+
+
+
+
+ Something after the containing div
+
+
+
\ No newline at end of file
diff --git a/Mail2BugUnitTests/Mail2BugUnitTests.csproj b/Mail2BugUnitTests/Mail2BugUnitTests.csproj
index 13a88ca..200a577 100644
--- a/Mail2BugUnitTests/Mail2BugUnitTests.csproj
+++ b/Mail2BugUnitTests/Mail2BugUnitTests.csproj
@@ -82,6 +82,24 @@
+
+ Always
+
+
+ Always
+
+
+ Always
+
+
+ Always
+
+
+ Always
+
+
+ Always
+
PreserveNewest