diff --git a/.gitignore b/.gitignore
index 9f24499..106d3d6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -187,4 +187,6 @@ FakesAssemblies/
# LightSwitch generated files
GeneratedArtifacts/
_Pvt_Extensions/
-ModelManifest.xml
\ No newline at end of file
+ModelManifest.xml
+
+.idea
diff --git a/LICENSE b/LICENSE
index 5f2dd7f..19e3071 100644
--- a/LICENSE
+++ b/LICENSE
@@ -502,4 +502,3 @@ necessary. Here is a sample; alter the names:
Ty Coon, President of Vice
That's all there is to it!
-
diff --git a/NameParser/NameParser/Config.cs b/NameParser/NameParser/Config.cs
index b52b088..6599049 100644
--- a/NameParser/NameParser/Config.cs
+++ b/NameParser/NameParser/Config.cs
@@ -9,498 +9,511 @@ public partial class HumanName
///
/// Any pieces that are not capitalized by capitalizing the first letter.
///
- public static readonly ISet> CapitalizationExceptions = new HashSet>
+ public static readonly IDictionary CapitalizationExceptions = new Dictionary(StringComparer.OrdinalIgnoreCase)
{
- Tuple.Create("ii", "II"),
- Tuple.Create("iii", "III"),
- Tuple.Create("iv", "IV"),
- Tuple.Create("md", "M.D."),
- Tuple.Create("phd", "Ph.D.")
+ { "ii", "II" },
+ { "iii", "III" },
+ { "iv", "IV" },
+ { "md", "M.D." },
+ { "phd", "Ph.D." }
};
///
/// Pieces that should join to their neighboring pieces, e.g. "and", "y" and "&". "of" and "the" are also include to facilitate joining multiple titles, e.g. "President of the United States".
///
- public static readonly ISet Conjunctions = new HashSet { "&", "and", "et", "e", "of", "the", "und", "y" };
-
+ public static readonly ISet Conjunctions = new HashSet(StringComparer.OrdinalIgnoreCase) { "&", "and", "et", "e", "of", "the", "und", "y" };
///
/// Name pieces that appear before a last name. They join to the piece that follows them to make one new piece.
///
- public static readonly ISet Prefixes = new HashSet
+ public static readonly ISet Prefixes = new HashSet(StringComparer.OrdinalIgnoreCase)
{
- "abu", "bon", "bin", "da", "dal", "de", "del", "dem", "den", "der", "de", "di", "dí", "het", "ibn", "in", "la", "le", "onder", "op", "san", "santa", "st", "ste", "'t", "ten", "van", "vel", "von"
+ "'t",
+ "abu",
+ "bin",
+ "bon",
+ "da",
+ "dal",
+ "de",
+ "del",
+ "dem",
+ "den",
+ "der",
+ "di",
+ "dí",
+ "het",
+ "ibn",
+ "in",
+ "la",
+ "le",
+ "onder",
+ "op",
+ "san",
+ "santa",
+ "st",
+ "ste",
+ "ten",
+ "van",
+ "vel",
+ "von"
};
///
/// For handling names that start with Mc or Mac such as McBride, MacDonald
///
- private static readonly Regex RegexMac = new Regex(@"^(ma?c)(\w+)", RegexOptions.IgnoreCase);
+ private static readonly Regex RegexMac = new Regex(@"^(ma?c)(\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
+ ///
+ /// For extracting optional quoted nicknames from full names.
+ ///
+ private static readonly Regex RegexNickname = new Regex(@"\s*(?:\((.+?)\))|(?:([""'])(.+?)\2)", RegexOptions.Compiled);
///
/// Pieces that come at the end of the name but are not last names. These potentially
/// conflict with initials that might be at the end of the name.
-
/// These may be updated in the future because some of them are actually titles that just
/// come at the end of the name, so semantically this is wrong. Positionally, it's correct.
///
- public static readonly ISet Suffixes = new HashSet()
+ public static readonly ISet Suffixes = new HashSet(StringComparer.OrdinalIgnoreCase)
{
- "esq",
- "esquire",
- "jr",
- "jnr",
- "sr",
- "snr",
"2",
- "i",
- "ii",
- "iii",
- "iv",
- "v",
- "clu",
- "chfc",
"cfp",
+ "chfc",
+ "clu",
"cpa",
"csm",
- "do",
"dds",
- "dpm",
"dmd",
- "md",
- "mba",
+ "do",
+ "dpm",
+ "esq",
+ "esquire",
+ "i",
+ "ii",
+ "iii",
+ "iv",
+ "jnr",
+ "jr",
"ma",
+ "mba",
+ "md",
+ "mp",
"phd",
"phr",
"pmp",
- "mp",
- "qc"
+ "qc",
+ "snr",
+ "sr",
+ "v"
};
///
/// When these titles appear with a single other name, that name is a first name, e.g.
/// "Sir John", "Sister Mary", "Queen Elizabeth".
///
- public static readonly ISet FirstNameTitles = new HashSet
+ public static readonly ISet FirstNameTitles = new HashSet(StringComparer.OrdinalIgnoreCase)
{
- "sir",
+ "aunt",
+ "auntie",
+ "brother",
"dame",
+ "father",
"king",
- "queen",
- "master",
"maid",
- "uncle",
- "auntie",
- "aunt",
- "brother",
- "sister",
+ "master",
"mother",
- "father",
- "pope"
+ "pope",
+ "queen",
+ "sir",
+ "sister",
+ "uncle"
};
-
///
/// **Cannot include things that could also be first names**, e.g. "dean".
/// Many of these from wikipedia: https://en.wikipedia.org/wiki/Title.
- /// The parser recognizes chains of these including conjunctions allowing
+ /// The parser recognizes chains of these including conjunctions allowing
/// recognition titles like "Deputy Secretary of State".
///
- public static readonly ISet Titles = new HashSet
+ public static readonly ISet Titles = new HashSet(FirstNameTitles, StringComparer.OrdinalIgnoreCase)
{
- //
- "sir",
- "dame",
- "king",
- "queen",
- "master",
- "maid",
- "uncle",
- "auntie",
- "aunt",
- "brother",
- "sister",
- "mother",
- "father",
- "pope",
- //
- "dr",
- "doctor",
- "miss",
- "misses",
- "mr",
- "mister",
- "mrs",
- "ms",
- "rev",
- "madam",
- "madame",
- "ab",
- "2ndlt",
- "amn",
+ "1lt",
+ "1sgt",
"1stlt",
+ "1stsgt",
+ "2lt",
+ "2ndlt",
"a1c",
- "capt",
- "sra",
- "maj",
- "ssgt",
- "ltcol",
- "tsgt",
- "col",
+ "ab",
+ "abbess",
+ "abbot",
+ "academic",
+ "acolyte",
+ "adept",
+ "adjutant",
+ "adm",
+ "admiral",
+ "advocate",
+ "air",
+ "akhoond",
+ "alderman",
+ "almoner",
+ "ambassador",
+ "amn",
+ "analytics",
+ "appellate",
+ "apprentice",
+ "arbitrator",
+ "archbishop",
+ "archdeacon",
+ "archdruid",
+ "archduchess",
+ "archduke",
+ "arhat",
+ "assistant",
+ "assoc",
+ "associate",
+ "asst",
+ "attache",
+ "attaché",
+ "attorney",
+ "ayatollah",
+ "baba",
+ "bailiff",
+ "banner",
+ "bard",
+ "baron",
+ "barrister",
+ "bearer",
+ "bench",
+ "bg",
+ "bgen",
+ "bishop",
+ "blessed",
+ "bodhisattva",
+ "brigadier",
"briggen",
- "1stsgt",
- "majgen",
- "smsgt",
- "ltgen",
- "cmsgt",
+ "buddha",
+ "burgess",
+ "business",
+ "bwana",
+ "canon",
+ "capt",
+ "captain",
+ "cardinal",
+ "catholicos",
"ccmsgt",
+ "cdr",
+ "ceo",
+ "cfo",
+ "chancellor",
+ "chaplain",
+ "chargé d'affaires",
+ "chief",
+ "chieftain",
+ "civil",
+ "clerk",
"cmsaf",
- "pvt",
- "2lt",
- "pv2",
- "1lt",
- "pfc",
- "cpt",
- "spc",
+ "cmsgt",
+ "coach",
+ "col",
+ "colonel",
+ "commander",
+ "commander-in-chief",
+ "commodore",
+ "comptroller",
+ "controller",
+ "corporal",
+ "corporate",
+ "councillor",
+ "courtier",
"cpl",
- "ltc",
- "sgt",
- "ssg",
- "bg",
- "sfc",
- "mg",
- "msg",
- "ltg",
- "1sgt",
- "sgm",
- "csm",
- "sma",
- "wo1",
- "wo2",
- "wo3",
- "wo4",
- "wo5",
- "ens",
- "sa",
- "ltjg",
- "sn",
- "lt",
- "po3",
- "lcdr",
- "po1",
- "po2",
- "cdr",
"cpo",
- "scpo",
- "mcpo",
- "vadm",
- "mcpoc",
- "adm",
- "mpco-cg",
- "lcpl",
- "gysgt",
- "bgen",
- "msgt",
- "mgysgt",
- "gen",
- "sgtmaj",
- "sgtmajmc",
- "wo-1",
+ "cpt",
+ "credit",
+ "criminal",
+ "csm",
+ "curator",
+ "customs",
"cwo-2",
"cwo-3",
"cwo-4",
"cwo-5",
- "rdml",
- "radm",
- "mcpon",
- "fadm",
"cwo2",
"cwo3",
"cwo4",
"cwo5",
- "rt",
- "lord",
- "lady",
- "duke",
- "dutchess",
- "representative",
- "rep",
- "senator",
- "cardinal",
- "secretary",
- "state",
- "foreign",
- "minister",
- "speaker",
- "president",
- "pres",
- "ceo",
- "cfo",
+ "deacon",
+ "delegate",
"deputy",
+ "designated",
+ "dir",
+ "director",
+ "discovery",
+ "district",
+ "division",
+ "docent",
+ "docket",
+ "doctor",
+ "doyen",
"dpty",
- "executive",
- "exec",
- "vice",
- "vc",
- "councillor",
- "manager",
- "mgr",
- "alderman",
- "delegate",
- "mayor",
- "lieutenant",
- "governor",
- "prefect",
- "prelate",
- "premier",
- "burgess",
- "ambassador",
- "envoy",
- "attaché",
- "chargé d'affaires",
- "provost",
- "marquis",
- "marquess",
- "marquise",
- "marchioness",
- "archduke",
- "archduchess",
- "viscount",
- "baron",
+ "dr",
+ "druid",
+ "duke",
+ "dutchess",
+ "edmi",
+ "edohen",
+ "effendi",
+ "ekegbian",
+ "elder",
+ "elerunwon",
"emperor",
"empress",
- "tsar",
- "tsarina",
- "leader",
- "abbess",
- "abbot",
+ "ens",
+ "envoy",
+ "exec",
+ "executive",
+ "fadm",
+ "family",
+ "federal",
+ "field",
+ "financial",
+ "first",
+ "flag",
+ "flying",
+ "foreign",
+ "forester",
"friar",
- "superior",
- "reverend",
- "bishop",
- "archbishop",
- "metropolitan",
- "presbyter",
- "priest",
- "priestess",
- "matriarch",
- "patriarch",
- "catholicos",
- "vicar",
- "chaplain",
- "canon",
- "pastor",
- "primate",
- "servant",
- "venerable",
- "blessed",
- "saint",
- "member",
- "solicitor",
- "mufti",
- "grand",
- "chancellor",
- "barrister",
- "bailiff",
- "attorney",
- "advocate",
- "deacon",
- "archdeacon",
- "acolyte",
- "elder",
- "monsignor",
- "almoner",
- "prof",
- "colonel",
+ "gen",
"general",
- "commodore",
- "air",
- "corporal",
- "staff",
- "chief",
- "first",
- "sergeant",
- "admiral",
- "high",
- "rear",
- "brigadier",
- "captain",
- "group",
- "commander",
- "commander-in-chief",
- "wing",
- "adjutant",
- "director",
- "dir",
"generalissimo",
- "resident",
- "surgeon",
- "officer",
- "controller",
- "academic",
- "analytics",
- "business",
- "credit",
- "financial",
+ "gentiluomo",
+ "giani",
+ "goodman",
+ "goodwife",
+ "governor",
+ "grand",
+ "group",
+ "guru",
+ "gyani",
+ "gysgt",
+ "hajji",
+ "headman",
+ "her",
+ "hereditary",
+ "high",
+ "his",
+ "hon",
+ "honorable",
+ "honourable",
+ "imam",
"information",
- "security",
+ "intelligence",
+ "intendant",
+ "journeyman",
+ "jr",
+ "judge",
+ "judicial",
+ "junior",
+ "king's",
+ "kingdom",
"knowledge",
+ "lady",
+ "lama",
+ "lamido",
+ "law",
+ "lcdr",
+ "lcpl",
+ "leader",
+ "lieutenant",
+ "lord",
+ "lt",
+ "ltc",
+ "ltcol",
+ "ltg",
+ "ltgen",
+ "ltjg",
+ "madam",
+ "madame",
+ "mag",
+ "mag-judge",
+ "mag/judge",
+ "magistrate",
+ "magistrate-judge",
+ "maharajah",
+ "maharani",
+ "mahdi",
+ "maj",
+ "majesty",
+ "majgen",
+ "manager",
+ "marcher",
+ "marchioness",
"marketing",
+ "marquess",
+ "marquis",
+ "marquise",
+ "matriarch",
+ "mayor",
+ "mcpo",
+ "mcpoc",
+ "mcpon",
+ "member",
+ "metropolitan",
+ "mg",
+ "mgr",
+ "mgysgt",
+ "minister",
+ "miss",
+ "misses",
+ "mister",
+ "monsignor",
+ "mpco-cg",
+ "mr",
+ "mrs",
+ "ms",
+ "msg",
+ "msgt",
+ "mufti",
+ "mullah",
+ "municipal",
+ "murshid",
+ "nanny",
+ "national",
+ "nurse",
+ "officer",
"operating",
+ "pastor",
+ "patriarch",
"petty",
- "risk",
- "strategy",
- "technical",
- "warrant",
- "corporate",
- "customs",
- "field",
- "flag",
- "flying",
- "intelligence",
+ "pfc",
+ "pharaoh",
"pilot",
+ "pir",
+ "po1",
+ "po2",
+ "po3",
"police",
"political",
- "revenue",
- "senior",
- "sr",
- "junior",
- "jr",
- "private",
- "principal",
+ "prefect",
+ "prelate",
+ "premier",
+ "pres",
+ "presbyter",
+ "president",
+ "presiding",
+ "priest",
+ "priestess",
+ "primate",
+ "prime",
"prin",
- "coach",
- "nurse",
- "nanny",
- "docent",
- "lama",
- "druid",
- "archdruid",
- "rabbi",
- "rebbe",
- "buddha",
- "ayatollah",
- "imam",
- "bodhisattva",
- "mullah",
- "mahdi",
- "saoshyant",
- "tirthankar",
- "vardapet",
- "pharaoh",
- "sultan",
- "sultana",
- "maharajah",
- "maharani",
- "vizier",
- "chieftain",
- "comptroller",
- "courtier",
- "curator",
- "doyen",
- "edohen",
- "ekegbian",
- "elerunwon",
- "forester",
- "gentiluomo",
- "headman",
- "intendant",
- "lamido",
- "marcher",
+ "prince",
+ "princess",
+ "principal",
"prior",
+ "private",
+ "pro",
+ "prof",
+ "provost",
+ "pslc",
"pursuivant",
+ "pv2",
+ "pvt",
+ "queen's",
+ "rabbi",
+ "radm",
"rangatira",
"ranger",
+ "rdml",
+ "rear",
+ "rebbe",
"registrar",
+ "rep",
+ "representative",
+ "resident",
+ "rev",
+ "revenue",
+ "reverend",
+ "right",
+ "risk",
+ "royal",
+ "rt",
+ "sa",
+ "saint",
+ "saoshyant",
+ "scpo",
+ "se",
+ "secretary",
+ "security",
"seigneur",
+ "senator",
+ "senior",
+ "senior-judge",
+ "sergeant",
+ "servant",
+ "sfc",
+ "sgm",
+ "sgt",
+ "sgtmaj",
+ "sgtmajmc",
"shehu",
"sheikh",
"sheriff",
+ "siddha",
+ "sma",
+ "smsgt",
+ "sn",
+ "solicitor",
+ "spc",
+ "speaker",
+ "special",
+ "sr",
+ "sra",
+ "ssg",
+ "ssgt",
+ "staff",
+ "state",
+ "states",
+ "strategy",
"subaltern",
"subedar",
+ "sultan",
+ "sultana",
+ "superior",
+ "supreme",
+ "surgeon",
+ "swordbearer",
"sysselmann",
+ "tax",
+ "technical",
"timi",
+ "tirthankar",
"treasurer",
+ "tsar",
+ "tsarina",
+ "tsgt",
+ "uk",
+ "united",
+ "us",
+ "vadm",
+ "vardapet",
+ "vc",
+ "venerable",
"verderer",
+ "vicar",
+ "vice",
+ "viscount",
+ "vizier",
"warden",
- "hereditary",
+ "warrant",
+ "wing",
+ "wo-1",
+ "wo1",
+ "wo2",
+ "wo3",
+ "wo4",
+ "wo5",
"woodman",
- "bearer",
- "banner",
- "swordbearer",
- "apprentice",
- "journeyman",
- "adept",
- "akhoond",
- "arhat",
- "bwana",
- "goodman",
- "goodwife",
- "bard",
- "hajji",
- "baba",
- "effendi",
- "giani",
- "gyani",
- "guru",
- "siddha",
- "pir",
- "murshid",
- "attache",
- "prime",
- "united",
- "states",
- "national",
- "associate",
- "assoc",
- "assistant",
- "asst",
- "supreme",
- "appellate",
- "judicial",
- "queen's",
- "king's",
- "prince",
- "princess",
- "bench",
- "right",
- "majesty",
- "his",
- "her",
- "kingdom",
- "royal",
- "honorable",
- "honourable",
- "hon",
- "magistrate",
- "mag",
- "judge",
- "designated",
- "us",
- "uk",
- "federal",
- "district",
- "arbitrator",
- "pro",
- "se",
- "law",
- "clerk",
- "docket",
- "pslc",
- "special",
- "municipal",
- "tax",
- "civil",
- "criminal",
- "family",
- "presiding",
- "division",
- "edmi",
- "discovery",
- "magistrate-judge",
- "mag-judge",
- "senior-judge",
- "mag/judge"
};
}
}
\ No newline at end of file
diff --git a/NameParser/NameParser/NameParser.csproj b/NameParser/NameParser/NameParser.csproj
index 7b8c4ac..175cdd9 100644
--- a/NameParser/NameParser/NameParser.csproj
+++ b/NameParser/NameParser/NameParser.csproj
@@ -1,10 +1,10 @@
- netstandard20;net45
+ netstandard2.0
true
1.5.0.0
1.5.0.0
- https://licenses.nuget.org/LGPL-2.1-or-later
+ https://licenses.nuget.org/LGPL-2.1-or-later
https://github.com/aeshirey/NameParserSharp
Parses full name into first name, middle name, last name, nickname, title, and suffix fields
1.5.0
diff --git a/NameParser/NameParser/Parser.cs b/NameParser/NameParser/Parser.cs
index 4240e5b..3ff7792 100644
--- a/NameParser/NameParser/Parser.cs
+++ b/NameParser/NameParser/Parser.cs
@@ -3,17 +3,17 @@ namespace NameParser
using System;
using System.Collections.Generic;
using System.Linq;
- using System.Text.RegularExpressions;
///
/// Parse a person's name into individual components.
/// Instantiation assigns to "fullName", and assignment to "fullName"
- /// triggers parseFullName. After parsing the name, these instance
+ /// triggers parseFullName. After parsing the name, these instance
/// attributes are available.
///
public partial class HumanName
{
#region Properties
+
///
/// Indicates whether any values were parsed out of the provided
///
@@ -26,20 +26,20 @@ public partial class HumanName
///
public string FullName
{
- get { return _FullName; }
+ get => _fullName;
private set
{
- _OriginalName = value;
- _FullName = _OriginalName;
+ _originalName = value;
+ _fullName = _originalName;
- _TitleList = new List();
- _FirstList = new List();
- _MiddleList = new List();
- _LastList = new List();
- _SuffixList = new List();
- _NicknameList = new List();
- _LastBaseList = new List();
- _LastPrefixList = new List();
+ _titleList = new List();
+ _firstList = new List();
+ _middleList = new List();
+ _lastList = new List();
+ _suffixList = new List();
+ _nicknameList = new List();
+ _lastBaseList = new List();
+ _lastPrefixList = new List();
if (!string.IsNullOrEmpty(value))
{
@@ -48,51 +48,52 @@ private set
}
}
- public string Title => string.Join(" ", _TitleList);
+ public string Title => string.Join(" ", _titleList);
- public string First => string.Join(" ", _FirstList);
+ public string First => string.Join(" ", _firstList);
- public string Middle => string.Join(" ", _MiddleList);
+ public string Middle => string.Join(" ", _middleList);
- public string Last => string.Join(" ", _LastList);
+ public string Last => string.Join(" ", _lastList);
- public string Suffix => string.Join(" ", _SuffixList);
+ public string Suffix => string.Join(" ", _suffixList);
- public string Nickname => string.Join(" ", _NicknameList);
+ public string Nickname => string.Join(" ", _nicknameList);
///
/// If is true and the input contains "&" or "and", the additional
/// name will be parsed out and put into a second record. For example,
/// "John D. and Catherine T. MacArthur" should be parsed as {John, D, MacArthur} with an AdditionalName
- /// set to the parsed value {Catherine, T, MacAthur}.
+ /// set to the parsed value {Catherine, T, MacArthur}.
///
public HumanName AdditionalName { get; private set; }
- public string LastBase => string.Join(" ", _LastBaseList);
- public string LastPrefixes => string.Join(" ", _LastPrefixList);
+ public string LastBase => string.Join(" ", _lastBaseList);
+ public string LastPrefixes => string.Join(" ", _lastPrefixList);
+
#endregion
- private string _FullName, _OriginalName;
+ private string _fullName;
+ private string _originalName;
- private IList _TitleList;
- private IList _FirstList;
- private IList _MiddleList;
- private IList _LastList;
- private IList _SuffixList;
- private IList _NicknameList;
- private IList _LastBaseList;
- private IList _LastPrefixList;
- private Prefer prefs;
+ private IList _titleList;
+ private IList _firstList;
+ private IList _middleList;
+ private IList _lastList;
+ private IList _suffixList;
+ private IList _nicknameList;
+ private IList _lastBaseList;
+ private IList _lastPrefixList;
+ private readonly Prefer _prefs;
public HumanName(string fullName, Prefer prefs = Prefer.Default)
{
if (fullName == null)
{
- throw new ArgumentNullException("fullName");
+ throw new ArgumentNullException(nameof(fullName));
}
- this.prefs = prefs;
-
+ _prefs = prefs;
FullName = fullName;
}
@@ -103,7 +104,7 @@ public HumanName(string fullName, Prefer prefs = Prefer.Default)
return true;
}
- if (((object)left == null) || ((object)right == null))
+ if ((object)left == null || (object)right == null)
{
return false;
}
@@ -123,6 +124,26 @@ public HumanName(string fullName, Prefer prefs = Prefer.Default)
return !(left == right);
}
+ public override bool Equals(object obj)
+ {
+ return obj is HumanName other && this == other;
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ var hash = 17;
+ hash = hash * 23 + (Title?.GetHashCode() ?? 0);
+ hash = hash * 23 + (First?.GetHashCode() ?? 0);
+ hash = hash * 23 + (Middle?.GetHashCode() ?? 0);
+ hash = hash * 23 + (Last?.GetHashCode() ?? 0);
+ hash = hash * 23 + (Suffix?.GetHashCode() ?? 0);
+ hash = hash * 23 + (Nickname?.GetHashCode() ?? 0);
+ return hash;
+ }
+ }
+
///
/// Return the parsed name as a dictionary of its attributes.
///
@@ -175,30 +196,41 @@ public Dictionary AsDictionary(bool includeEmpty = true)
return d;
}
- #region Parse helpers
+ #region Parse helpers
+
private static bool IsTitle(string value)
{
- return Titles.Contains(value.ToLower().Replace(".", string.Empty));
+ return Titles.Contains(value.Replace(".", string.Empty));
}
private static bool IsConjunction(string piece)
{
- return Conjunctions.Contains(piece.ToLower().Replace(".", string.Empty)) && !IsAnInitial(piece);
+ return Conjunctions.Contains(piece.Replace(".", string.Empty)) && !IsAnInitial(piece);
}
private static bool IsPrefix(string piece)
{
- return Prefixes.Contains(piece.ToLower().Replace(".", string.Empty)) && !IsAnInitial(piece);
+ return Prefixes.Contains(piece.Replace(".", string.Empty)) && !IsAnInitial(piece);
}
private static bool IsSuffix(string piece)
{
- return Suffixes.Contains(piece.Replace(".", string.Empty).ToLower()) && !IsAnInitial(piece);
+ return Suffixes.Contains(piece.Replace(".", string.Empty)) && !IsAnInitial(piece);
}
private static bool AreSuffixes(IEnumerable pieces)
{
- return pieces.Any() && pieces.All(IsSuffix);
+ var any = false;
+ foreach (var piece in pieces)
+ {
+ if (!IsSuffix(piece))
+ {
+ return false;
+ }
+ any = true;
+ }
+
+ return any;
}
///
@@ -208,11 +240,10 @@ private static bool AreSuffixes(IEnumerable pieces)
/// False if is a prefix (de, abu, bin), suffix (jr, iv, cpa), title (mr, pope), or initial (x, e.); true otherwise
private static bool IsRootname(string piece)
{
- var lcPiece = piece.ToLower().Replace(".", string.Empty);
-
- return !Suffixes.Contains(lcPiece)
- && !Prefixes.Contains(lcPiece)
- && !Titles.Contains(lcPiece)
+ var noDots = piece.Replace(".", string.Empty);
+ return !Suffixes.Contains(noDots)
+ && !Prefixes.Contains(noDots)
+ && !Titles.Contains(noDots)
&& !IsAnInitial(piece);
}
@@ -223,16 +254,16 @@ private static bool IsRootname(string piece)
/// True iff matches the regex "^[A-Za-z].?$"
private static bool IsAnInitial(string value)
{
- if (string.IsNullOrEmpty(value) || value.Length > 2)
- {
- return false;
- }
-
- return char.IsLetter(value[0]) && (value.Length == 1 || value[1] == '.');
+ return !string.IsNullOrEmpty(value)
+ && value.Length <= 2
+ && char.IsLetter(value[0])
+ && (value.Length == 1 || value[1] == '.');
}
+
#endregion
#region full name parser
+
///
/// If there are only two parts and one is a title, assume it's a last name
/// instead of a first name. e.g. Mr. Johnson. Unless it's a special title
@@ -242,18 +273,18 @@ private static bool IsAnInitial(string value)
private void PostProcessFirstnames()
{
if (!string.IsNullOrEmpty(Title)
- && !FirstNameTitles.Contains(Title.ToLower().Replace(".", string.Empty))
- && 1 == _FirstList.Count + _LastList.Count)
+ && !FirstNameTitles.Contains(Title.Replace(".", string.Empty))
+ && 1 == _firstList.Count + _lastList.Count)
{
- if (_FirstList.Any())
+ if (_firstList.Any())
{
- _LastList = _FirstList;
- _FirstList = new List();
+ _lastList = _firstList;
+ _firstList = new List();
}
else
{
- _FirstList = _LastList;
- _LastList = new List();
+ _firstList = _lastList;
+ _lastList = new List();
}
}
}
@@ -261,12 +292,12 @@ private void PostProcessFirstnames()
///
/// Parse out the last name components into prefixes and a base last name
/// in order to allow sorting. Prefixes are those in ,
- /// start off and are contiguous. See
+ /// start off and are contiguous. See
///
private void PostProcessLastname()
{
// parse out 'words' from the last name
- var words = _LastList
+ var words = _lastList
.SelectMany(part => part.Split(' '))
.ToList();
@@ -276,22 +307,20 @@ private void PostProcessLastname()
prefixCount++;
}
- if (this.prefs.HasFlag(Prefer.FirstOverPrefix)
- && this._FirstList.Count == 0
+ if (_prefs.HasFlag(Prefer.FirstOverPrefix)
+ && _firstList.Count == 0
&& prefixCount == 1
&& words.Count > 1)
{
- _FirstList = words.Take(1).ToList();
-
- _LastList = words.Skip(1).ToList();
+ _firstList = words.Take(1).ToList();
+ _lastList = words.Skip(1).ToList();
}
else
{
-
- _LastPrefixList = words.Take(prefixCount).ToList();
+ _lastPrefixList = words.Take(prefixCount).ToList();
}
- _LastBaseList = words.Skip(prefixCount).ToList();
+ _lastBaseList = words.Skip(prefixCount).ToList();
}
private void PostProcessAdditionalName()
@@ -306,7 +335,7 @@ private void PostProcessAdditionalName()
// the primary's last name from the secondary.
if (string.IsNullOrEmpty(Last))
{
- _LastList = AdditionalName._LastList;
+ _lastList = AdditionalName._lastList;
}
else
{
@@ -314,7 +343,7 @@ private void PostProcessAdditionalName()
var next = AdditionalName;
while (next != null && string.IsNullOrEmpty(next.Last))
{
- next._LastList = _LastList;
+ next._lastList = _lastList;
next = next.AdditionalName;
}
}
@@ -323,44 +352,41 @@ private void PostProcessAdditionalName()
///
/// The main parse method for the parser. This method is run upon assignment to the
/// fullName attribute or instantiation.
- ///
+ ///
/// Basic flow is to hand off to `pre_process` to handle nicknames. It
/// then splits on commas and chooses a code path depending on the number of commas.
/// `parsePieces` then splits those parts on spaces and
- /// `joinOnConjunctions` joins any pieces next to conjunctions.
+ /// `joinOnConjunctions` joins any pieces next to conjunctions.
///
private void ParseFullName()
{
if (ParseMultipleNames)
{
- if (_FullName.Contains('&'))
+ var split = _fullName.IndexOf('&');
+ if (split >= 0)
{
- var split = _FullName.IndexOf('&');
-
- var primary = _FullName.Substring(0, split);
-
- var secondary = _FullName.Substring(split + 1);
+ var primary = _fullName.Substring(0, split);
+ var secondary = _fullName.Substring(split + 1);
AdditionalName = new HumanName(secondary);
-
- _FullName = primary;
+ _fullName = primary;
}
- else if (_FullName.ToLowerInvariant().Contains(" and "))
+ else
{
- var split = _FullName.IndexOf(" and ", StringComparison.InvariantCultureIgnoreCase);
-
- var primary = _FullName.Substring(0, split);
-
- var secondary = _FullName.Substring(split + 5 /* length of " and " */);
- AdditionalName = new HumanName(secondary);
-
- _FullName = primary;
+ split = _fullName.IndexOf(" and ", StringComparison.InvariantCultureIgnoreCase);
+ if (split >= 0)
+ {
+ var primary = _fullName.Substring(0, split);
+ var secondary = _fullName.Substring(split + 5 /* length of " and " */);
+ AdditionalName = new HumanName(secondary);
+ _fullName = primary;
+ }
}
}
- ParseNicknames(ref _FullName, out _NicknameList);
+ ParseNicknames(ref _fullName, out _nicknameList);
// break up fullName by commas
- var parts = _FullName
+ var parts = _fullName
.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries)
.Select(part => part.Trim())
.ToList();
@@ -387,50 +413,50 @@ private void ParseFullName()
{
// some last names appear as titles (https://github.com/aeshirey/NameParserSharp/issues/9)
// if we've already parsed out titles, first, or middle names, something appearing as a title may in fact be a last name
- if (_FirstList.Count > 0 || _MiddleList.Count > 0)
+ if (_firstList.Count > 0 || _middleList.Count > 0)
{
- _LastList.Add(piece);
+ _lastList.Add(piece);
}
else
{
- _TitleList.Add(piece);
+ _titleList.Add(piece);
}
}
else if (string.IsNullOrEmpty(First))
{
- _FirstList.Add(piece);
+ _firstList.Add(piece);
}
else if (AreSuffixes(pieces.Skip(i + 1)))
{
- _LastList.Add(piece);
- _SuffixList = _SuffixList.Concat(pieces.Skip(i + 1)).ToList();
+ _lastList.Add(piece);
+ _suffixList = _suffixList.Concat(pieces.Skip(i + 1)).ToList();
break;
}
else if (!string.IsNullOrEmpty(nxt))
{
// another component exists, so this is likely a middle name
- _MiddleList.Add(piece);
+ _middleList.Add(piece);
}
else if (!ParseMultipleNames || AdditionalName == null)
{
// no additional name. some last names can appear to be suffixes. try to figure this out
- if (_LastList.Count > 0 && IsSuffix(piece))
+ if (_lastList.Count > 0 && IsSuffix(piece))
{
- _SuffixList.Add(piece);
+ _suffixList.Add(piece);
}
else
{
- _LastList.Add(piece);
+ _lastList.Add(piece);
}
}
- else if (AdditionalName._LastList.Any() && IsAnInitial(piece))
+ else if (AdditionalName._lastList.Any() && IsAnInitial(piece))
{
// the additional name has a last, and this one looks like a middle. we'll save as a middle and later will copy the last name.
- _MiddleList.Add(piece);
+ _middleList.Add(piece);
}
else
{
- _LastList.Add(piece);
+ _lastList.Add(piece);
}
}
}
@@ -438,8 +464,8 @@ private void ParseFullName()
{
// suffix comma: title first middle last [suffix], suffix [suffix] [, suffix]
// parts[0], parts[1:...]
- _SuffixList = _SuffixList.Concat(parts.Skip(1)).ToList();
- var pieces = ParsePieces(parts[0].Split(new[] { ' ' }));
+ _suffixList = _suffixList.Concat(parts.Skip(1)).ToList();
+ var pieces = ParsePieces(parts[0].Split(' '));
for (var i = 0; i < pieces.Length; i++)
{
@@ -448,20 +474,20 @@ private void ParseFullName()
if (IsTitle(piece) && (!string.IsNullOrEmpty(nxt) || pieces.Length == 1))
{
- _TitleList.Add(piece);
+ _titleList.Add(piece);
continue;
}
if (string.IsNullOrEmpty(First))
{
- _FirstList.Add(piece);
+ _firstList.Add(piece);
continue;
}
if (AreSuffixes(pieces.Skip(i + 1)))
{
- _LastList.Add(piece);
- _SuffixList = pieces.Skip(i + 1).Concat(_SuffixList).ToList();
+ _lastList.Add(piece);
+ _suffixList = pieces.Skip(i + 1).Concat(_suffixList).ToList();
break;
}
@@ -470,21 +496,21 @@ private void ParseFullName()
if (!string.IsNullOrEmpty(nxt))
{
// another component exists, so this is likely a middle name
- _MiddleList.Add(piece);
+ _middleList.Add(piece);
}
else if (!ParseMultipleNames || AdditionalName == null)
{
// no additional name, so treat this as a last
- _LastList.Add(piece);
+ _lastList.Add(piece);
}
- else if (AdditionalName._LastList.Any() && IsAnInitial(piece))
+ else if (AdditionalName._lastList.Any() && IsAnInitial(piece))
{
// the additional name has a last, and this one looks like a middle. we'll save as a middle and later will copy the last name.
- _MiddleList.Add(piece);
+ _middleList.Add(piece);
}
else
{
- _LastList.Add(piece);
+ _lastList.Add(piece);
}
}
}
@@ -492,22 +518,21 @@ private void ParseFullName()
{
// lastname comma: last [suffix], title first middles[,] suffix [,suffix]
// parts[0], parts[1], parts[2:...]
- var pieces = ParsePieces(parts[1].Split(new[] { ' ' }), 1);
+ var pieces = ParsePieces(parts[1].Split(' '), 1);
// lastname part may have suffixes in it
- var lastnamePieces = ParsePieces(parts[0].Split(new[] { ' ' }), 1);
+ var lastnamePieces = ParsePieces(parts[0].Split(' '), 1);
foreach (var piece in lastnamePieces)
{
-
- // the first one is always a last name, even if it look like a suffix
- if (IsSuffix(piece) && _LastList.Any())
+ // the first one is always a last name, even if it looks like a suffix
+ if (IsSuffix(piece) && _lastList.Any())
{
- _SuffixList.Add(piece);
+ _suffixList.Add(piece);
}
else
{
- _LastList.Add(piece);
+ _lastList.Add(piece);
}
}
@@ -515,36 +540,36 @@ private void ParseFullName()
{
var piece = pieces[i];
var nxt = i == pieces.Length - 1 ? string.Empty : pieces[i + 1];
-
if (IsTitle(piece) && (!string.IsNullOrEmpty(nxt) || pieces.Length == 1))
{
- _TitleList.Add(piece);
+ _titleList.Add(piece);
}
else if (string.IsNullOrEmpty(First))
{
- _FirstList.Add(piece);
+ _firstList.Add(piece);
}
else if (IsSuffix(piece))
{
- _SuffixList.Add(piece);
+ _suffixList.Add(piece);
}
else
{
- _MiddleList.Add(piece);
+ _middleList.Add(piece);
}
}
- if (parts.Count() >= 3 && !string.IsNullOrEmpty(parts[2]))
+
+ if (parts.Count >= 3 && !string.IsNullOrEmpty(parts[2]))
{
- _SuffixList = _SuffixList.Concat(parts.Skip(2)).ToList();
+ _suffixList = _suffixList.Concat(parts.Skip(2)).ToList();
}
}
- IsUnparsable = !_TitleList.Any()
- && !_FirstList.Any()
- && !_MiddleList.Any()
- && !_LastList.Any()
- && !_SuffixList.Any()
- && !_NicknameList.Any();
+ IsUnparsable = !_titleList.Any()
+ && !_firstList.Any()
+ && !_middleList.Any()
+ && !_lastList.Any()
+ && !_suffixList.Any()
+ && !_nicknameList.Any();
PostProcessFirstnames();
PostProcessLastname();
@@ -553,30 +578,27 @@ private void ParseFullName()
private static void ParseNicknames(ref string fullName, out IList nicknameList)
{
- // this regex is an improvement upon the original in that it adds apostrophes and appropriately captures
+ // this regex is an improvement upon the original in that it adds apostrophes and appropriately captures
// the nicknames in "john 'jack' kennedy", "richard (dick) nixon" and @"william ""bill"" clinton".
- // it also doesn't try to parse out improperly matched inputs that the python version would have such as
- // @"john (j"" jones", @"samuel (sammy"" samsonite"
+ // it also doesn't try to parse out improperly matched inputs that the python version would have such as
+ // @"john (j"" jones", @"samuel (sammy"" samsonite"
// https://code.google.com/p/python-nameparser/issues/detail?id=33
- var nicknameRegex = new Regex(@"\s*(?:\((.+?)\))|(?:([""'])(.+?)\2)");
nicknameList = new List();
- var match = nicknameRegex.Match(fullName);
+ var match = RegexNickname.Match(fullName);
var nicknameFound = false;
while (match.Success && match.Groups[0].Value.Length > 0)
{
nicknameFound = true;
-
// remove from the full name the nickname plus its identifying boundary (parens or quotes)
fullName = fullName.Replace(match.Groups[0].Value, string.Empty);
-
// keep only the nickname part
var matchGroup = match.Groups[0].Value.TrimStart().StartsWith("(") ? 1 : 3; // which regex group was used: 1 is for parens; 3 is single- or double-quoted nicknames
nicknameList.Add(match.Groups[matchGroup].Value);
- match = nicknameRegex.Match(fullName);
+ match = RegexNickname.Match(fullName);
}
// normalize whitespace
@@ -594,13 +616,10 @@ private static void ParseNicknames(ref string fullName, out IList nickna
/// pieces split on spaces and joined on conjunctions
protected static string[] ParsePieces(IEnumerable parts, int additionalPartsCount = 0)
{
- var tmp = new List();
- foreach (var part in parts)
- {
- tmp.AddRange(part.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Select(s => s.Trim(',')));
- }
-
- return joinOnConjunctions(tmp, additionalPartsCount);
+ var pieces = parts.SelectMany(part => part.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))
+ .Select(s => s.Trim(','))
+ .ToList();
+ return joinOnConjunctions(pieces, additionalPartsCount);
}
///
@@ -612,81 +631,79 @@ protected static string[] ParsePieces(IEnumerable parts, int additionalP
/// new list with piece next to conjunctions merged into one piece with spaces in it.
internal static string[] joinOnConjunctions(List pieces, int additionalPartsCount = 0)
{
- var length = pieces.Count() + additionalPartsCount;
+ var length = pieces.Count + additionalPartsCount;
- // don't join on conjuctions if there are only 2 parts
+ // don't join on conjunctions if there are only 2 parts
if (length < 3)
{
return pieces.ToArray();
}
- foreach (var conj in pieces.Where(IsConjunction).Reverse())
+ for (var i = pieces.Count - 1; i >= 0; i--)
{
+ var conj = pieces[i];
+ if (!IsConjunction(conj))
+ {
+ continue;
+ }
+
// loop through the pieces backwards, starting at the end of the list.
// Join conjunctions to the pieces on either side of them.
if (conj.Length == 1 && pieces.Count(IsRootname) < 4)
{
- // if there are only 3 total parts (minus known titles, suffixes and prefixes)
+ // if there are only 3 total parts (minus known titles, suffixes and prefixes)
// and this conjunction is a single letter, prefer treating it as an initial
// rather than a conjunction.
// http://code.google.com/p/python-nameparser/issues/detail?id=11
continue;
}
- var index = pieces.IndexOf(conj);
-
- if (index == -1)
- {
- continue;
- }
-
- if (index < pieces.Count() - 1)
+ if (i < pieces.Count - 1)
{
// if this is not the last piece
string newPiece;
- if (index == 0)
+ if (i == 0)
{
// if this is the first piece and it's a conjunction
- var nxt = pieces[index + 1];
-
+ var nxt = pieces[i + 1];
var cons = IsTitle(nxt) ? Conjunctions : Titles;
newPiece = string.Join(" ", pieces.Take(2));
cons.Add(newPiece);
- pieces[index] = newPiece;
- pieces.RemoveAt(index + 1);
+ pieces[i] = newPiece;
+ pieces.RemoveAt(i + 1);
continue;
}
- if (IsConjunction(pieces[index - 1]))
+ if (IsConjunction(pieces[i - 1]))
{
// if the piece in front of this one is a conjunction too,
- // add new_piece (this conjuction and the following piece)
- // to the conjuctions constant so that it is recognized
- // as a conjunction in the next loop.
+ // add new_piece (this conjunction and the following piece)
+ // to the conjunctions constant so that it is recognized
+ // as a conjunction in the next loop.
// e.g. for ["Lord","of","the Universe"], put "the Universe"
// into the conjunctions constant.
- newPiece = string.Join(" ", pieces.Skip(index).Take(2));
+ newPiece = string.Join(" ", pieces.Skip(i).Take(2));
Conjunctions.Add(newPiece);
- pieces[index] = newPiece;
- pieces.RemoveAt(index + 1);
+ pieces[i] = newPiece;
+ pieces.RemoveAt(i + 1);
continue;
}
- newPiece = string.Join(" ", pieces.Skip(index - 1).Take(3));
+ newPiece = string.Join(" ", pieces.Skip(i - 1).Take(3));
- if (IsTitle(pieces[index - 1]))
+ if (IsTitle(pieces[i - 1]))
{
- // if the second name is a title, assume the first one is too and add the
- // two titles with the conjunction between them to the titles constant
- // so the combo we just created gets parsed as a title.
+ // if the second name is a title, assume the first one is too and add the
+ // two titles with the conjunction between them to the titles constant
+ // so the combo we just created gets parsed as a title.
// e.g. "Mr. and Mrs." becomes a title.
Titles.Add(newPiece);
}
- pieces[index - 1] = newPiece;
- pieces.RemoveAt(index);
- pieces.RemoveAt(index);
+ pieces[i - 1] = newPiece;
+ pieces.RemoveAt(i);
+ pieces.RemoveAt(i);
}
}
@@ -720,6 +737,7 @@ internal static string[] joinOnConjunctions(List pieces, int additionalP
return pieces.ToArray();
}
+
#endregion
#region Capitalization Support
@@ -736,11 +754,9 @@ private static string CapitalizeWord(string word)
}
// "phd" => "Ph.D."; "ii" => "II"
- var exception = CapitalizationExceptions.FirstOrDefault(tup => tup.Item1 == wordLower);
-
- if (exception != null)
+ if (CapitalizationExceptions.TryGetValue(wordLower, out var exception))
{
- return exception.Item2;
+ return exception;
}
// special case: "macbeth" should be "MacBeth"; "mcbride" -> "McBride"
@@ -773,21 +789,22 @@ private static string CapitalizePiece(string piece)
///
public void Normalize()
{
- _TitleList = _TitleList.Select(CapitalizePiece).ToList();
- _FirstList = _FirstList.Select(CapitalizePiece).ToList();
- _MiddleList = _MiddleList.Select(CapitalizePiece).ToList();
- _LastList = _LastList.Select(CapitalizePiece).ToList(); // CapitalizePiece recognizes prefixes, so its okay to normalize "van der waals" like this
- _SuffixList = _SuffixList.Select(CapitalizePiece).ToList();
- _NicknameList = _NicknameList.Select(CapitalizePiece).ToList();
- _LastBaseList = _LastBaseList.Select(CapitalizePiece).ToList();
+ _titleList = _titleList.Select(CapitalizePiece).ToList();
+ _firstList = _firstList.Select(CapitalizePiece).ToList();
+ _middleList = _middleList.Select(CapitalizePiece).ToList();
+ _lastList = _lastList.Select(CapitalizePiece).ToList(); // CapitalizePiece recognizes prefixes, so it's okay to normalize "van der waals" like this
+ _suffixList = _suffixList.Select(CapitalizePiece).ToList();
+ _nicknameList = _nicknameList.Select(CapitalizePiece).ToList();
+ _lastBaseList = _lastBaseList.Select(CapitalizePiece).ToList();
// normalizing _LastPrefixList would effectively be a no-op, so don't bother calling it
- var fullNamePieces = _FullName
+ var fullNamePieces = _fullName
.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries)
.Select(CapitalizePiece);
- _FullName = string.Join(" ", fullNamePieces);
+ _fullName = string.Join(" ", fullNamePieces);
}
+
#endregion
}
-}
+}
\ No newline at end of file
diff --git a/NameParser/NameParser/Prefer.cs b/NameParser/NameParser/Prefer.cs
index d6c4627..51722a8 100644
--- a/NameParser/NameParser/Prefer.cs
+++ b/NameParser/NameParser/Prefer.cs
@@ -1,6 +1,4 @@
using System;
-using System.Collections.Generic;
-using System.Text;
namespace NameParser
{
@@ -10,12 +8,11 @@ public enum Prefer
Default = 0,
///
- /// For Issue #20, when the parser detects a Title and a Last with prefixes (eg, "Mr. Del Richards"),
- /// convert the prefix to a first name.
- ///
+ /// For Issue #20, when the parser detects a Title and a Last with prefixes (eg, "Mr. Del Richards"),
+ /// convert the prefix to a first name.
/// This can cause incorrect flipping of prefix to first (eg, "Mr. Van Rossum"), so you should use
/// this flag only when you know your data has a first name.
///
- FirstOverPrefix = 1,
+ FirstOverPrefix = 1
}
-}
+}
\ No newline at end of file
diff --git a/NameParser/NameParserTest/NameParserTest.csproj b/NameParser/NameParserTest/NameParserTest.csproj
index e015f5a..9288d72 100644
--- a/NameParser/NameParserTest/NameParserTest.csproj
+++ b/NameParser/NameParserTest/NameParserTest.csproj
@@ -1,11 +1,11 @@
- netcoreapp2.1
+ net8.0
-
-
-
+
+
+
diff --git a/NameParser/NameParserTest/NameParserTests.cs b/NameParser/NameParserTest/NameParserTests.cs
index 5f1cfdb..9695264 100644
--- a/NameParser/NameParserTest/NameParserTests.cs
+++ b/NameParser/NameParserTest/NameParserTests.cs
@@ -2,33 +2,31 @@
using NameParser;
using System;
-namespace NameParseTest
+namespace NameParserTest
{
[TestClass]
public class NameParserTests
{
[TestMethod]
- [ExpectedException(typeof(ArgumentNullException))]
public void NullInput()
{
- var parsed = new HumanName(null);
+ Assert.ThrowsExactly(() => new HumanName(null));
}
-
[TestMethod]
public void BlankInput()
{
var parsed = new HumanName(string.Empty);
- Assert.AreEqual(string.Empty, parsed.First);
- Assert.AreEqual(string.Empty, parsed.Middle);
- Assert.AreEqual(string.Empty, parsed.Last);
- Assert.AreEqual(string.Empty, parsed.Title);
- Assert.AreEqual(string.Empty, parsed.Nickname);
- Assert.AreEqual(string.Empty, parsed.Suffix);
+ Assert.IsEmpty(parsed.First);
+ Assert.IsEmpty(parsed.Middle);
+ Assert.IsEmpty(parsed.Last);
+ Assert.IsEmpty(parsed.Title);
+ Assert.IsEmpty(parsed.Nickname);
+ Assert.IsEmpty(parsed.Suffix);
}
[TestMethod]
- public void JFK()
+ public void Jfk()
{
var jfk = new HumanName("president john 'jack' fitzgerald kennedy");
@@ -36,11 +34,11 @@ public void JFK()
Assert.AreEqual("john", jfk.First);
Assert.AreEqual("fitzgerald", jfk.Middle);
Assert.AreEqual("kennedy", jfk.Last);
- Assert.AreEqual(string.Empty, jfk.Suffix);
+ Assert.IsEmpty(jfk.Suffix);
Assert.AreEqual("jack", jfk.Nickname);
Assert.AreEqual("president john fitzgerald kennedy", jfk.FullName);
Assert.AreEqual("kennedy", jfk.LastBase);
- Assert.AreEqual(string.Empty, jfk.LastPrefixes);
+ Assert.IsEmpty(jfk.LastPrefixes);
jfk.Normalize();
@@ -48,11 +46,11 @@ public void JFK()
Assert.AreEqual("John", jfk.First);
Assert.AreEqual("Fitzgerald", jfk.Middle);
Assert.AreEqual("Kennedy", jfk.Last);
- Assert.AreEqual(string.Empty, jfk.Suffix);
+ Assert.IsEmpty(jfk.Suffix);
Assert.AreEqual("Jack", jfk.Nickname);
Assert.AreEqual("President John Fitzgerald Kennedy", jfk.FullName);
Assert.AreEqual("Kennedy", jfk.LastBase);
- Assert.AreEqual(string.Empty, jfk.LastPrefixes);
+ Assert.IsEmpty(jfk.LastPrefixes);
}
[TestMethod]
@@ -62,25 +60,25 @@ public void Nixon()
Assert.AreEqual("mr president", nixon.Title);
Assert.AreEqual("richard", nixon.First);
- Assert.AreEqual(string.Empty, nixon.Middle);
+ Assert.IsEmpty(nixon.Middle);
Assert.AreEqual("nixon", nixon.Last);
- Assert.AreEqual(string.Empty, nixon.Suffix);
+ Assert.IsEmpty(nixon.Suffix);
Assert.AreEqual("dick", nixon.Nickname);
Assert.AreEqual("mr president richard nixon", nixon.FullName);
Assert.AreEqual("nixon", nixon.LastBase);
- Assert.AreEqual(string.Empty, nixon.LastPrefixes);
+ Assert.IsEmpty(nixon.LastPrefixes);
nixon.Normalize();
Assert.AreEqual("Mr President", nixon.Title);
Assert.AreEqual("Richard", nixon.First);
- Assert.AreEqual(string.Empty, nixon.Middle);
+ Assert.IsEmpty(nixon.Middle);
Assert.AreEqual("Nixon", nixon.Last);
- Assert.AreEqual(string.Empty, nixon.Suffix);
+ Assert.IsEmpty(nixon.Suffix);
Assert.AreEqual("Dick", nixon.Nickname);
Assert.AreEqual("Mr President Richard Nixon", nixon.FullName);
Assert.AreEqual("Nixon", nixon.LastBase);
- Assert.AreEqual(string.Empty, nixon.LastPrefixes);
+ Assert.IsEmpty(nixon.LastPrefixes);
}
[TestMethod]
@@ -88,24 +86,23 @@ public void TitleFirstOrLastName()
{
var mrJones = new HumanName("Mr. Jones");
Assert.AreEqual("Mr.", mrJones.Title);
- Assert.AreEqual(string.Empty, mrJones.First);
- Assert.AreEqual(string.Empty, mrJones.Middle);
+ Assert.IsEmpty(mrJones.First);
+ Assert.IsEmpty(mrJones.Middle);
Assert.AreEqual("Jones", mrJones.Last);
- Assert.AreEqual(string.Empty, mrJones.Suffix);
- Assert.AreEqual(string.Empty, mrJones.Nickname);
+ Assert.IsEmpty(mrJones.Suffix);
+ Assert.IsEmpty(mrJones.Nickname);
Assert.AreEqual("Jones", mrJones.LastBase);
- Assert.AreEqual(string.Empty, mrJones.LastPrefixes);
-
+ Assert.IsEmpty(mrJones.LastPrefixes);
var uncleAdam = new HumanName("Uncle Adam");
Assert.AreEqual("Uncle", uncleAdam.Title);
Assert.AreEqual("Adam", uncleAdam.First);
- Assert.AreEqual(string.Empty, uncleAdam.Middle);
- Assert.AreEqual(string.Empty, uncleAdam.Last);
- Assert.AreEqual(string.Empty, uncleAdam.Suffix);
- Assert.AreEqual(string.Empty, uncleAdam.Nickname);
- Assert.AreEqual(string.Empty, uncleAdam.LastBase);
- Assert.AreEqual(string.Empty, uncleAdam.LastPrefixes);
+ Assert.IsEmpty(uncleAdam.Middle);
+ Assert.IsEmpty(uncleAdam.Last);
+ Assert.IsEmpty(uncleAdam.Suffix);
+ Assert.IsEmpty(uncleAdam.Nickname);
+ Assert.IsEmpty(uncleAdam.LastBase);
+ Assert.IsEmpty(uncleAdam.LastPrefixes);
}
[TestMethod]
@@ -114,7 +111,7 @@ public void DifferentInputsSameValues()
var fml = new HumanName("john x smith");
var lfm = new HumanName("smith, john x");
- Assert.IsTrue(fml == lfm);
+ Assert.AreEqual(fml, lfm);
}
[TestMethod]
@@ -122,23 +119,23 @@ public void NicknameAtBeginning_DoubleQuote()
{
var parsed = new HumanName("\"TREY\" ROBERT HENRY BUSH III");
- Assert.AreEqual(parsed.First, "ROBERT");
- Assert.AreEqual(parsed.Middle, "HENRY");
- Assert.AreEqual(parsed.Last, "BUSH");
- Assert.AreEqual(parsed.Nickname, "TREY");
- Assert.AreEqual(parsed.Suffix, "III");
+ Assert.AreEqual("ROBERT", parsed.First);
+ Assert.AreEqual("HENRY", parsed.Middle);
+ Assert.AreEqual("BUSH", parsed.Last);
+ Assert.AreEqual("TREY", parsed.Nickname);
+ Assert.AreEqual("III", parsed.Suffix);
}
- [TestMethod]
+ [TestMethod]
public void NicknameAtBeginning_SingleQuote()
{
var parsed = new HumanName("'TREY' ROBERT HENRY BUSH III");
- Assert.AreEqual(parsed.First, "ROBERT");
- Assert.AreEqual(parsed.Middle, "HENRY");
- Assert.AreEqual(parsed.Last, "BUSH");
- Assert.AreEqual(parsed.Nickname, "TREY");
- Assert.AreEqual(parsed.Suffix, "III");
+ Assert.AreEqual("ROBERT", parsed.First);
+ Assert.AreEqual("HENRY", parsed.Middle);
+ Assert.AreEqual("BUSH", parsed.Last);
+ Assert.AreEqual("TREY", parsed.Nickname);
+ Assert.AreEqual("III", parsed.Suffix);
}
[TestMethod]
@@ -146,7 +143,7 @@ public void LastBaseAndPrefixes()
{
var parsed = new HumanName("John Smith");
Assert.AreEqual("Smith", parsed.Last);
- Assert.AreEqual(string.Empty, parsed.LastPrefixes);
+ Assert.IsEmpty(parsed.LastPrefixes);
Assert.AreEqual("Smith", parsed.LastBase);
parsed = new HumanName("johannes van der waals");
@@ -163,7 +160,7 @@ public void LastBaseAndPrefixes()
}
[TestMethod]
- public void TwoNames_MacAthur()
+ public void TwoNames_MacArthur()
{
HumanName.ParseMultipleNames = true;
var parsed = new HumanName("John D. and Catherine T. MacArthur");
@@ -180,7 +177,6 @@ public void TwoNames_MacAthur()
Assert.IsNull(parsed.AdditionalName.AdditionalName);
-
parsed = new HumanName("John D. & Catherine T. MacArthur");
Assert.AreEqual("John", parsed.First);
@@ -204,14 +200,14 @@ public void TwoNames_TitleFirstInitialLast()
Assert.AreEqual("Mr", parsed.Title);
Assert.AreEqual("S", parsed.First);
- Assert.AreEqual("", parsed.Middle);
+ Assert.IsEmpty(parsed.Middle);
Assert.AreEqual("Bloggs", parsed.Last);
Assert.IsNotNull(parsed.AdditionalName);
Assert.AreEqual("Miss", parsed.AdditionalName.Title);
Assert.AreEqual("L", parsed.AdditionalName.First);
- Assert.AreEqual("", parsed.AdditionalName.Middle);
+ Assert.IsEmpty(parsed.AdditionalName.Middle);
Assert.AreEqual("Jones", parsed.AdditionalName.Last);
Assert.IsNull(parsed.AdditionalName.AdditionalName);
@@ -268,8 +264,8 @@ public void ThreeNames()
public void Parens()
{
var johnSmith = new HumanName("(John Smith)");
- Assert.AreEqual(string.Empty, johnSmith.First);
- Assert.AreEqual(string.Empty, johnSmith.Last);
+ Assert.IsEmpty(johnSmith.First);
+ Assert.IsEmpty(johnSmith.Last);
Assert.AreEqual("John Smith", johnSmith.Nickname);
}
@@ -282,7 +278,6 @@ public void FirstMiddleLastSuffix_NoCommas()
Assert.AreEqual("Smith", john.Last);
Assert.AreEqual("III", john.Suffix);
-
var robert = new HumanName("Robert Lee Elder III");
Assert.AreEqual("Robert", robert.First);
Assert.AreEqual("Lee", robert.Middle);
@@ -295,10 +290,10 @@ public void TwoCommaWithMiddleName()
{
var parsed = new HumanName("Surname, John Middle, III");
- Assert.AreEqual(parsed.First, "John");
- Assert.AreEqual(parsed.Middle, "Middle");
- Assert.AreEqual(parsed.Last, "Surname");
- Assert.AreEqual(parsed.Suffix, "III");
+ Assert.AreEqual("John", parsed.First);
+ Assert.AreEqual("Middle", parsed.Middle);
+ Assert.AreEqual("Surname", parsed.Last);
+ Assert.AreEqual("III", parsed.Suffix);
}
[TestMethod]
@@ -311,7 +306,6 @@ public void FirstLastPrefixesLastSuffix_NoCommas()
Assert.AreEqual("De Leon", valeriano.Last);
Assert.AreEqual("JR.", valeriano.Suffix);
-
var quincy = new HumanName("Quincy De La Rosa Sr");
Assert.AreEqual("Quincy", quincy.First);
Assert.AreEqual("De La", quincy.LastPrefixes);
@@ -333,37 +327,33 @@ public void Prefix_AsFirstName(string full, string first, string middle, string
Assert.AreEqual(last, sut.Last);
}
-
[TestMethod]
public void Conjunctions()
{
- var mice = new HumanName("mrs and mrs mickey and minnie mouse");
+ Assert.IsNotNull(new HumanName("mrs and mrs mickey and minnie mouse"));
}
-
///
/// https://github.com/aeshirey/NameParserSharp/issues/18
///
[TestMethod]
public void AddToLists()
{
- var as_is = new HumanName("Mr. John Smith 2nd");
- Assert.AreEqual("Mr.", as_is.Title);
- Assert.AreEqual("John", as_is.First);
- Assert.AreEqual("Smith", as_is.Middle);
- Assert.AreEqual("2nd", as_is.Last);
- Assert.AreEqual("", as_is.Suffix);
-
+ var parsed = new HumanName("Mr. John Smith 2nd");
+ Assert.AreEqual("Mr.", parsed.Title);
+ Assert.AreEqual("John", parsed.First);
+ Assert.AreEqual("Smith", parsed.Middle);
+ Assert.AreEqual("2nd", parsed.Last);
+ Assert.IsEmpty(parsed.Suffix);
HumanName.Suffixes.Add("2nd");
- var with_2nd = new HumanName("Mr. John Smith 2nd");
- Assert.AreEqual("Mr.", with_2nd.Title);
- Assert.AreEqual("John", with_2nd.First);
- Assert.AreEqual("Smith", with_2nd.Last);
- Assert.AreEqual("2nd", with_2nd.Suffix);
+ var withSuffix = new HumanName("Mr. John Smith 2nd");
+ Assert.AreEqual("Mr.", withSuffix.Title);
+ Assert.AreEqual("John", withSuffix.First);
+ Assert.AreEqual("Smith", withSuffix.Last);
+ Assert.AreEqual("2nd", withSuffix.Suffix);
}
-
///
/// https://github.com/aeshirey/NameParserSharp/issues/20
///
@@ -371,18 +361,18 @@ public void AddToLists()
public void FirstNameIsPrefix()
{
// Default behavior
- var parsed_prefix = new HumanName("Mr. Del Richards");
- Assert.AreEqual(parsed_prefix.Title, "Mr.");
- Assert.AreEqual(parsed_prefix.First, "");
- Assert.AreEqual(parsed_prefix.Last, "Del Richards");
- Assert.AreEqual(parsed_prefix.LastPrefixes, "Del");
+ var parsedPrefix = new HumanName("Mr. Del Richards");
+ Assert.AreEqual("Mr.", parsedPrefix.Title);
+ Assert.IsEmpty(parsedPrefix.First);
+ Assert.AreEqual("Del Richards", parsedPrefix.Last);
+ Assert.AreEqual("Del", parsedPrefix.LastPrefixes);
// A single prefix should be treated as a first name when no first exists
- var parsed_first = new HumanName("Mr. Del Richards", Prefer.FirstOverPrefix);
- Assert.AreEqual(parsed_first.Title, "Mr.");
- Assert.AreEqual(parsed_first.First, "Del");
- Assert.AreEqual(parsed_first.Last, "Richards");
- Assert.AreEqual(parsed_first.LastPrefixes, "");
+ var parsedFirst = new HumanName("Mr. Del Richards", Prefer.FirstOverPrefix);
+ Assert.AreEqual("Mr.", parsedFirst.Title);
+ Assert.AreEqual("Del", parsedFirst.First);
+ Assert.AreEqual("Richards", parsedFirst.Last);
+ Assert.IsEmpty(parsedFirst.LastPrefixes);
}
}
}
diff --git a/README.md b/README.md
index 8777852..63f1905 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,11 @@
# NameParserSharp
-Based upon python [nameparser 0.36](https://pypi.python.org/pypi/nameparser), NameParserSharp is a C# library that parses a human name into constituent fields `Title`, `First`, `Middle`, `Last`, `Suffix`, and `Nickname` from the `HumanName` class. NameParserSharp implements the functionality of the Python project on which it is based in a C# idiomatic way. It also,
+Based upon python [nameparser 0.36](https://pypi.python.org/pypi/nameparser), NameParserSharp is a C# library that parses a human name into constituent fields `Title`, `First`, `Middle`, `Last`, `Suffix`, and `Nickname` from the `HumanName` class. NameParserSharp implements the functionality of the Python project on which it is based in a C# idiomatic way. It also,
* eliminates nearly all regular expressions for efficiency
* adds unit tests
* improves nickname handling to expand delimiters: `John (Jack) Torrence` == `John 'Jack' Torrence` == `John "Jack" Torrence`
* parses out multiple names from a single string as you might expect, as in `mr john and mrs jane doe`
-
## Installation
### Using NuGet Package Manager
@@ -79,6 +78,3 @@ name.Normalize();
Console.WriteLine(name.FullName); // Output: Juan de Garcia
```
-
-
-