From 682813c09a4b0fe50e1fa7b57f32c35e10993905 Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Mon, 25 Aug 2025 09:40:07 +1000 Subject: [PATCH 01/14] Minor linting fixes --- .gitignore | 4 +- LICENSE | 1 - NameParser/NameParser/Config.cs | 29 ++---- NameParser/NameParser/Parser.cs | 93 +++++++++----------- NameParser/NameParser/Prefer.cs | 11 +-- NameParser/NameParserTest/NameParserTests.cs | 31 +++---- README.md | 6 +- 7 files changed, 66 insertions(+), 109 deletions(-) diff --git a/.gitignore b/.gitignore index 9f24499..106d3d6 100644 --- a/.gitignore +++ b/.gitignore @@ -187,4 +187,6 @@ FakesAssemblies/ # LightSwitch generated files GeneratedArtifacts/ _Pvt_Extensions/ -ModelManifest.xml \ No newline at end of file +ModelManifest.xml + +.idea diff --git a/LICENSE b/LICENSE index 5f2dd7f..19e3071 100644 --- a/LICENSE +++ b/LICENSE @@ -502,4 +502,3 @@ necessary. Here is a sample; alter the names: Ty Coon, President of Vice That's all there is to it! - diff --git a/NameParser/NameParser/Config.cs b/NameParser/NameParser/Config.cs index b52b088..5e6a41a 100644 --- a/NameParser/NameParser/Config.cs +++ b/NameParser/NameParser/Config.cs @@ -23,13 +23,12 @@ public partial class HumanName /// public static readonly ISet Conjunctions = new HashSet { "&", "and", "et", "e", "of", "the", "und", "y" }; - /// /// Name pieces that appear before a last name. They join to the piece that follows them to make one new piece. /// - public static readonly ISet Prefixes = new HashSet + public static readonly ISet Prefixes = new HashSet { - "abu", "bon", "bin", "da", "dal", "de", "del", "dem", "den", "der", "de", "di", "dí", "het", "ibn", "in", "la", "le", "onder", "op", "san", "santa", "st", "ste", "'t", "ten", "van", "vel", "von" + "abu", "bon", "bin", "da", "dal", "de", "del", "dem", "den", "der", "di", "dí", "het", "ibn", "in", "la", "le", "onder", "op", "san", "santa", "st", "ste", "'t", "ten", "van", "vel", "von" }; /// @@ -40,11 +39,10 @@ public partial class HumanName /// /// Pieces that come at the end of the name but are not last names. These potentially /// conflict with initials that might be at the end of the name. - /// These may be updated in the future because some of them are actually titles that just /// come at the end of the name, so semantically this is wrong. Positionally, it's correct. /// - public static readonly ISet Suffixes = new HashSet() + public static readonly ISet Suffixes = new HashSet { "esq", "esquire", @@ -99,31 +97,14 @@ public partial class HumanName "pope" }; - /// /// **Cannot include things that could also be first names**, e.g. "dean". /// Many of these from wikipedia: https://en.wikipedia.org/wiki/Title. - /// The parser recognizes chains of these including conjunctions allowing + /// The parser recognizes chains of these including conjunctions allowing /// recognition titles like "Deputy Secretary of State". /// - public static readonly ISet Titles = new HashSet + public static readonly ISet Titles = new HashSet(FirstNameTitles) { - // - "sir", - "dame", - "king", - "queen", - "master", - "maid", - "uncle", - "auntie", - "aunt", - "brother", - "sister", - "mother", - "father", - "pope", - // "dr", "doctor", "miss", diff --git a/NameParser/NameParser/Parser.cs b/NameParser/NameParser/Parser.cs index 4240e5b..5de194b 100644 --- a/NameParser/NameParser/Parser.cs +++ b/NameParser/NameParser/Parser.cs @@ -8,12 +8,13 @@ namespace NameParser /// /// Parse a person's name into individual components. /// Instantiation assigns to "fullName", and assignment to "fullName" - /// triggers parseFullName. After parsing the name, these instance + /// triggers parseFullName. After parsing the name, these instance /// attributes are available. /// public partial class HumanName { #region Properties + /// /// Indicates whether any values were parsed out of the provided /// @@ -26,7 +27,7 @@ public partial class HumanName /// public string FullName { - get { return _FullName; } + get => _FullName; private set { _OriginalName = value; @@ -64,12 +65,13 @@ private set /// If is true and the input contains "&" or "and", the additional /// name will be parsed out and put into a second record. For example, /// "John D. and Catherine T. MacArthur" should be parsed as {John, D, MacArthur} with an AdditionalName - /// set to the parsed value {Catherine, T, MacAthur}. + /// set to the parsed value {Catherine, T, MacArthur}. /// public HumanName AdditionalName { get; private set; } public string LastBase => string.Join(" ", _LastBaseList); public string LastPrefixes => string.Join(" ", _LastPrefixList); + #endregion private string _FullName, _OriginalName; @@ -88,11 +90,10 @@ public HumanName(string fullName, Prefer prefs = Prefer.Default) { if (fullName == null) { - throw new ArgumentNullException("fullName"); + throw new ArgumentNullException(nameof(fullName)); } this.prefs = prefs; - FullName = fullName; } @@ -103,7 +104,7 @@ public HumanName(string fullName, Prefer prefs = Prefer.Default) return true; } - if (((object)left == null) || ((object)right == null)) + if ((object)left == null || (object)right == null) { return false; } @@ -175,7 +176,8 @@ public Dictionary AsDictionary(bool includeEmpty = true) return d; } - #region Parse helpers + #region Parse helpers + private static bool IsTitle(string value) { return Titles.Contains(value.ToLower().Replace(".", string.Empty)); @@ -209,7 +211,6 @@ private static bool AreSuffixes(IEnumerable pieces) private static bool IsRootname(string piece) { var lcPiece = piece.ToLower().Replace(".", string.Empty); - return !Suffixes.Contains(lcPiece) && !Prefixes.Contains(lcPiece) && !Titles.Contains(lcPiece) @@ -223,16 +224,16 @@ private static bool IsRootname(string piece) /// True iff matches the regex "^[A-Za-z].?$" private static bool IsAnInitial(string value) { - if (string.IsNullOrEmpty(value) || value.Length > 2) - { - return false; - } - - return char.IsLetter(value[0]) && (value.Length == 1 || value[1] == '.'); + return !string.IsNullOrEmpty(value) + && value.Length <= 2 + && char.IsLetter(value[0]) + && (value.Length == 1 || value[1] == '.'); } + #endregion #region full name parser + /// /// If there are only two parts and one is a title, assume it's a last name /// instead of a first name. e.g. Mr. Johnson. Unless it's a special title @@ -261,7 +262,7 @@ private void PostProcessFirstnames() /// /// Parse out the last name components into prefixes and a base last name /// in order to allow sorting. Prefixes are those in , - /// start off and are contiguous. See + /// start off and are contiguous. See /// private void PostProcessLastname() { @@ -276,18 +277,16 @@ private void PostProcessLastname() prefixCount++; } - if (this.prefs.HasFlag(Prefer.FirstOverPrefix) - && this._FirstList.Count == 0 + if (prefs.HasFlag(Prefer.FirstOverPrefix) + && _FirstList.Count == 0 && prefixCount == 1 && words.Count > 1) { _FirstList = words.Take(1).ToList(); - _LastList = words.Skip(1).ToList(); } else { - _LastPrefixList = words.Take(prefixCount).ToList(); } @@ -323,11 +322,11 @@ private void PostProcessAdditionalName() /// /// The main parse method for the parser. This method is run upon assignment to the /// fullName attribute or instantiation. - /// + /// /// Basic flow is to hand off to `pre_process` to handle nicknames. It /// then splits on commas and chooses a code path depending on the number of commas. /// `parsePieces` then splits those parts on spaces and - /// `joinOnConjunctions` joins any pieces next to conjunctions. + /// `joinOnConjunctions` joins any pieces next to conjunctions. /// private void ParseFullName() { @@ -336,23 +335,17 @@ private void ParseFullName() if (_FullName.Contains('&')) { var split = _FullName.IndexOf('&'); - var primary = _FullName.Substring(0, split); - var secondary = _FullName.Substring(split + 1); AdditionalName = new HumanName(secondary); - _FullName = primary; } else if (_FullName.ToLowerInvariant().Contains(" and ")) { var split = _FullName.IndexOf(" and ", StringComparison.InvariantCultureIgnoreCase); - var primary = _FullName.Substring(0, split); - var secondary = _FullName.Substring(split + 5 /* length of " and " */); AdditionalName = new HumanName(secondary); - _FullName = primary; } } @@ -439,7 +432,7 @@ private void ParseFullName() // suffix comma: title first middle last [suffix], suffix [suffix] [, suffix] // parts[0], parts[1:...] _SuffixList = _SuffixList.Concat(parts.Skip(1)).ToList(); - var pieces = ParsePieces(parts[0].Split(new[] { ' ' })); + var pieces = ParsePieces(parts[0].Split(' ')); for (var i = 0; i < pieces.Length; i++) { @@ -492,15 +485,14 @@ private void ParseFullName() { // lastname comma: last [suffix], title first middles[,] suffix [,suffix] // parts[0], parts[1], parts[2:...] - var pieces = ParsePieces(parts[1].Split(new[] { ' ' }), 1); + var pieces = ParsePieces(parts[1].Split(' '), 1); // lastname part may have suffixes in it - var lastnamePieces = ParsePieces(parts[0].Split(new[] { ' ' }), 1); + var lastnamePieces = ParsePieces(parts[0].Split(' '), 1); foreach (var piece in lastnamePieces) { - - // the first one is always a last name, even if it look like a suffix + // the first one is always a last name, even if it looks like a suffix if (IsSuffix(piece) && _LastList.Any()) { _SuffixList.Add(piece); @@ -515,7 +507,6 @@ private void ParseFullName() { var piece = pieces[i]; var nxt = i == pieces.Length - 1 ? string.Empty : pieces[i + 1]; - if (IsTitle(piece) && (!string.IsNullOrEmpty(nxt) || pieces.Length == 1)) { _TitleList.Add(piece); @@ -533,7 +524,8 @@ private void ParseFullName() _MiddleList.Add(piece); } } - if (parts.Count() >= 3 && !string.IsNullOrEmpty(parts[2])) + + if (parts.Count >= 3 && !string.IsNullOrEmpty(parts[2])) { _SuffixList = _SuffixList.Concat(parts.Skip(2)).ToList(); } @@ -553,10 +545,10 @@ private void ParseFullName() private static void ParseNicknames(ref string fullName, out IList nicknameList) { - // this regex is an improvement upon the original in that it adds apostrophes and appropriately captures + // this regex is an improvement upon the original in that it adds apostrophes and appropriately captures // the nicknames in "john 'jack' kennedy", "richard (dick) nixon" and @"william ""bill"" clinton". - // it also doesn't try to parse out improperly matched inputs that the python version would have such as - // @"john (j"" jones", @"samuel (sammy"" samsonite" + // it also doesn't try to parse out improperly matched inputs that the python version would have such as + // @"john (j"" jones", @"samuel (sammy"" samsonite" // https://code.google.com/p/python-nameparser/issues/detail?id=33 var nicknameRegex = new Regex(@"\s*(?:\((.+?)\))|(?:([""'])(.+?)\2)"); @@ -568,10 +560,8 @@ private static void ParseNicknames(ref string fullName, out IList nickna while (match.Success && match.Groups[0].Value.Length > 0) { nicknameFound = true; - // remove from the full name the nickname plus its identifying boundary (parens or quotes) fullName = fullName.Replace(match.Groups[0].Value, string.Empty); - // keep only the nickname part var matchGroup = match.Groups[0].Value.TrimStart().StartsWith("(") ? 1 : 3; // which regex group was used: 1 is for parens; 3 is single- or double-quoted nicknames nicknameList.Add(match.Groups[matchGroup].Value); @@ -612,9 +602,9 @@ protected static string[] ParsePieces(IEnumerable parts, int additionalP /// new list with piece next to conjunctions merged into one piece with spaces in it. internal static string[] joinOnConjunctions(List pieces, int additionalPartsCount = 0) { - var length = pieces.Count() + additionalPartsCount; + var length = pieces.Count + additionalPartsCount; - // don't join on conjuctions if there are only 2 parts + // don't join on conjunctions if there are only 2 parts if (length < 3) { return pieces.ToArray(); @@ -626,7 +616,7 @@ internal static string[] joinOnConjunctions(List pieces, int additionalP // Join conjunctions to the pieces on either side of them. if (conj.Length == 1 && pieces.Count(IsRootname) < 4) { - // if there are only 3 total parts (minus known titles, suffixes and prefixes) + // if there are only 3 total parts (minus known titles, suffixes and prefixes) // and this conjunction is a single letter, prefer treating it as an initial // rather than a conjunction. // http://code.google.com/p/python-nameparser/issues/detail?id=11 @@ -640,7 +630,7 @@ internal static string[] joinOnConjunctions(List pieces, int additionalP continue; } - if (index < pieces.Count() - 1) + if (index < pieces.Count - 1) { // if this is not the last piece string newPiece; @@ -648,7 +638,6 @@ internal static string[] joinOnConjunctions(List pieces, int additionalP { // if this is the first piece and it's a conjunction var nxt = pieces[index + 1]; - var cons = IsTitle(nxt) ? Conjunctions : Titles; newPiece = string.Join(" ", pieces.Take(2)); cons.Add(newPiece); @@ -660,9 +649,9 @@ internal static string[] joinOnConjunctions(List pieces, int additionalP if (IsConjunction(pieces[index - 1])) { // if the piece in front of this one is a conjunction too, - // add new_piece (this conjuction and the following piece) - // to the conjuctions constant so that it is recognized - // as a conjunction in the next loop. + // add new_piece (this conjunction and the following piece) + // to the conjunctions constant so that it is recognized + // as a conjunction in the next loop. // e.g. for ["Lord","of","the Universe"], put "the Universe" // into the conjunctions constant. @@ -677,9 +666,9 @@ internal static string[] joinOnConjunctions(List pieces, int additionalP if (IsTitle(pieces[index - 1])) { - // if the second name is a title, assume the first one is too and add the - // two titles with the conjunction between them to the titles constant - // so the combo we just created gets parsed as a title. + // if the second name is a title, assume the first one is too and add the + // two titles with the conjunction between them to the titles constant + // so the combo we just created gets parsed as a title. // e.g. "Mr. and Mrs." becomes a title. Titles.Add(newPiece); } @@ -720,6 +709,7 @@ internal static string[] joinOnConjunctions(List pieces, int additionalP return pieces.ToArray(); } + #endregion #region Capitalization Support @@ -776,7 +766,7 @@ public void Normalize() _TitleList = _TitleList.Select(CapitalizePiece).ToList(); _FirstList = _FirstList.Select(CapitalizePiece).ToList(); _MiddleList = _MiddleList.Select(CapitalizePiece).ToList(); - _LastList = _LastList.Select(CapitalizePiece).ToList(); // CapitalizePiece recognizes prefixes, so its okay to normalize "van der waals" like this + _LastList = _LastList.Select(CapitalizePiece).ToList(); // CapitalizePiece recognizes prefixes, so it's okay to normalize "van der waals" like this _SuffixList = _SuffixList.Select(CapitalizePiece).ToList(); _NicknameList = _NicknameList.Select(CapitalizePiece).ToList(); _LastBaseList = _LastBaseList.Select(CapitalizePiece).ToList(); @@ -788,6 +778,7 @@ public void Normalize() _FullName = string.Join(" ", fullNamePieces); } + #endregion } } diff --git a/NameParser/NameParser/Prefer.cs b/NameParser/NameParser/Prefer.cs index d6c4627..51722a8 100644 --- a/NameParser/NameParser/Prefer.cs +++ b/NameParser/NameParser/Prefer.cs @@ -1,6 +1,4 @@ using System; -using System.Collections.Generic; -using System.Text; namespace NameParser { @@ -10,12 +8,11 @@ public enum Prefer Default = 0, /// - /// For Issue #20, when the parser detects a Title and a Last with prefixes (eg, "Mr. Del Richards"), - /// convert the prefix to a first name. - /// + /// For Issue #20, when the parser detects a Title and a Last with prefixes (eg, "Mr. Del Richards"), + /// convert the prefix to a first name. /// This can cause incorrect flipping of prefix to first (eg, "Mr. Van Rossum"), so you should use /// this flag only when you know your data has a first name. /// - FirstOverPrefix = 1, + FirstOverPrefix = 1 } -} +} \ No newline at end of file diff --git a/NameParser/NameParserTest/NameParserTests.cs b/NameParser/NameParserTest/NameParserTests.cs index 5f1cfdb..6c87caf 100644 --- a/NameParser/NameParserTest/NameParserTests.cs +++ b/NameParser/NameParserTest/NameParserTests.cs @@ -2,7 +2,7 @@ using NameParser; using System; -namespace NameParseTest +namespace NameParserTest { [TestClass] public class NameParserTests @@ -11,10 +11,9 @@ public class NameParserTests [ExpectedException(typeof(ArgumentNullException))] public void NullInput() { - var parsed = new HumanName(null); + Assert.IsNotNull(new HumanName(null)); } - [TestMethod] public void BlankInput() { @@ -96,7 +95,6 @@ public void TitleFirstOrLastName() Assert.AreEqual("Jones", mrJones.LastBase); Assert.AreEqual(string.Empty, mrJones.LastPrefixes); - var uncleAdam = new HumanName("Uncle Adam"); Assert.AreEqual("Uncle", uncleAdam.Title); Assert.AreEqual("Adam", uncleAdam.First); @@ -128,8 +126,8 @@ public void NicknameAtBeginning_DoubleQuote() Assert.AreEqual(parsed.Nickname, "TREY"); Assert.AreEqual(parsed.Suffix, "III"); } - [TestMethod] + [TestMethod] public void NicknameAtBeginning_SingleQuote() { var parsed = new HumanName("'TREY' ROBERT HENRY BUSH III"); @@ -163,7 +161,7 @@ public void LastBaseAndPrefixes() } [TestMethod] - public void TwoNames_MacAthur() + public void TwoNames_MacArthur() { HumanName.ParseMultipleNames = true; var parsed = new HumanName("John D. and Catherine T. MacArthur"); @@ -180,7 +178,6 @@ public void TwoNames_MacAthur() Assert.IsNull(parsed.AdditionalName.AdditionalName); - parsed = new HumanName("John D. & Catherine T. MacArthur"); Assert.AreEqual("John", parsed.First); @@ -204,14 +201,14 @@ public void TwoNames_TitleFirstInitialLast() Assert.AreEqual("Mr", parsed.Title); Assert.AreEqual("S", parsed.First); - Assert.AreEqual("", parsed.Middle); + Assert.AreEqual(string.Empty, parsed.Middle); Assert.AreEqual("Bloggs", parsed.Last); Assert.IsNotNull(parsed.AdditionalName); Assert.AreEqual("Miss", parsed.AdditionalName.Title); Assert.AreEqual("L", parsed.AdditionalName.First); - Assert.AreEqual("", parsed.AdditionalName.Middle); + Assert.AreEqual(string.Empty, parsed.AdditionalName.Middle); Assert.AreEqual("Jones", parsed.AdditionalName.Last); Assert.IsNull(parsed.AdditionalName.AdditionalName); @@ -282,7 +279,6 @@ public void FirstMiddleLastSuffix_NoCommas() Assert.AreEqual("Smith", john.Last); Assert.AreEqual("III", john.Suffix); - var robert = new HumanName("Robert Lee Elder III"); Assert.AreEqual("Robert", robert.First); Assert.AreEqual("Lee", robert.Middle); @@ -311,7 +307,6 @@ public void FirstLastPrefixesLastSuffix_NoCommas() Assert.AreEqual("De Leon", valeriano.Last); Assert.AreEqual("JR.", valeriano.Suffix); - var quincy = new HumanName("Quincy De La Rosa Sr"); Assert.AreEqual("Quincy", quincy.First); Assert.AreEqual("De La", quincy.LastPrefixes); @@ -333,14 +328,12 @@ public void Prefix_AsFirstName(string full, string first, string middle, string Assert.AreEqual(last, sut.Last); } - [TestMethod] public void Conjunctions() { - var mice = new HumanName("mrs and mrs mickey and minnie mouse"); + Assert.IsNotNull(new HumanName("mrs and mrs mickey and minnie mouse")); } - /// /// https://github.com/aeshirey/NameParserSharp/issues/18 /// @@ -352,8 +345,7 @@ public void AddToLists() Assert.AreEqual("John", as_is.First); Assert.AreEqual("Smith", as_is.Middle); Assert.AreEqual("2nd", as_is.Last); - Assert.AreEqual("", as_is.Suffix); - + Assert.AreEqual(string.Empty, as_is.Suffix); HumanName.Suffixes.Add("2nd"); var with_2nd = new HumanName("Mr. John Smith 2nd"); @@ -363,7 +355,6 @@ public void AddToLists() Assert.AreEqual("2nd", with_2nd.Suffix); } - /// /// https://github.com/aeshirey/NameParserSharp/issues/20 /// @@ -373,7 +364,7 @@ public void FirstNameIsPrefix() // Default behavior var parsed_prefix = new HumanName("Mr. Del Richards"); Assert.AreEqual(parsed_prefix.Title, "Mr."); - Assert.AreEqual(parsed_prefix.First, ""); + Assert.AreEqual(parsed_prefix.First, string.Empty); Assert.AreEqual(parsed_prefix.Last, "Del Richards"); Assert.AreEqual(parsed_prefix.LastPrefixes, "Del"); @@ -382,7 +373,7 @@ public void FirstNameIsPrefix() Assert.AreEqual(parsed_first.Title, "Mr."); Assert.AreEqual(parsed_first.First, "Del"); Assert.AreEqual(parsed_first.Last, "Richards"); - Assert.AreEqual(parsed_first.LastPrefixes, ""); + Assert.AreEqual(parsed_first.LastPrefixes, string.Empty); } } -} +} \ No newline at end of file diff --git a/README.md b/README.md index 8777852..63f1905 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,11 @@ # NameParserSharp -Based upon python [nameparser 0.36](https://pypi.python.org/pypi/nameparser), NameParserSharp is a C# library that parses a human name into constituent fields `Title`, `First`, `Middle`, `Last`, `Suffix`, and `Nickname` from the `HumanName` class. NameParserSharp implements the functionality of the Python project on which it is based in a C# idiomatic way. It also, +Based upon python [nameparser 0.36](https://pypi.python.org/pypi/nameparser), NameParserSharp is a C# library that parses a human name into constituent fields `Title`, `First`, `Middle`, `Last`, `Suffix`, and `Nickname` from the `HumanName` class. NameParserSharp implements the functionality of the Python project on which it is based in a C# idiomatic way. It also, * eliminates nearly all regular expressions for efficiency * adds unit tests * improves nickname handling to expand delimiters: `John (Jack) Torrence` == `John 'Jack' Torrence` == `John "Jack" Torrence` * parses out multiple names from a single string as you might expect, as in `mr john and mrs jane doe` - ## Installation ### Using NuGet Package Manager @@ -79,6 +78,3 @@ name.Normalize(); Console.WriteLine(name.FullName); // Output: Juan de Garcia ``` - - - From c8507455a936bd68f0ad2fedcd1ddfe28a4472cf Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Wed, 27 Aug 2025 13:51:55 +1000 Subject: [PATCH 02/14] Idiomatic identifier names --- NameParser/NameParser/Parser.cs | 207 ++++++++++--------- NameParser/NameParserTest/NameParserTests.cs | 46 ++--- 2 files changed, 127 insertions(+), 126 deletions(-) diff --git a/NameParser/NameParser/Parser.cs b/NameParser/NameParser/Parser.cs index 5de194b..0f39db3 100644 --- a/NameParser/NameParser/Parser.cs +++ b/NameParser/NameParser/Parser.cs @@ -27,20 +27,20 @@ public partial class HumanName /// public string FullName { - get => _FullName; + get => _fullName; private set { - _OriginalName = value; - _FullName = _OriginalName; + _originalName = value; + _fullName = _originalName; - _TitleList = new List(); - _FirstList = new List(); - _MiddleList = new List(); - _LastList = new List(); - _SuffixList = new List(); - _NicknameList = new List(); - _LastBaseList = new List(); - _LastPrefixList = new List(); + _titleList = new List(); + _firstList = new List(); + _middleList = new List(); + _lastList = new List(); + _suffixList = new List(); + _nicknameList = new List(); + _lastBaseList = new List(); + _lastPrefixList = new List(); if (!string.IsNullOrEmpty(value)) { @@ -49,17 +49,17 @@ private set } } - public string Title => string.Join(" ", _TitleList); + public string Title => string.Join(" ", _titleList); - public string First => string.Join(" ", _FirstList); + public string First => string.Join(" ", _firstList); - public string Middle => string.Join(" ", _MiddleList); + public string Middle => string.Join(" ", _middleList); - public string Last => string.Join(" ", _LastList); + public string Last => string.Join(" ", _lastList); - public string Suffix => string.Join(" ", _SuffixList); + public string Suffix => string.Join(" ", _suffixList); - public string Nickname => string.Join(" ", _NicknameList); + public string Nickname => string.Join(" ", _nicknameList); /// /// If is true and the input contains "&" or "and", the additional @@ -69,22 +69,23 @@ private set /// public HumanName AdditionalName { get; private set; } - public string LastBase => string.Join(" ", _LastBaseList); - public string LastPrefixes => string.Join(" ", _LastPrefixList); + public string LastBase => string.Join(" ", _lastBaseList); + public string LastPrefixes => string.Join(" ", _lastPrefixList); #endregion - private string _FullName, _OriginalName; + private string _fullName; + private string _originalName; - private IList _TitleList; - private IList _FirstList; - private IList _MiddleList; - private IList _LastList; - private IList _SuffixList; - private IList _NicknameList; - private IList _LastBaseList; - private IList _LastPrefixList; - private Prefer prefs; + private IList _titleList; + private IList _firstList; + private IList _middleList; + private IList _lastList; + private IList _suffixList; + private IList _nicknameList; + private IList _lastBaseList; + private IList _lastPrefixList; + private readonly Prefer _prefs; public HumanName(string fullName, Prefer prefs = Prefer.Default) { @@ -93,7 +94,7 @@ public HumanName(string fullName, Prefer prefs = Prefer.Default) throw new ArgumentNullException(nameof(fullName)); } - this.prefs = prefs; + _prefs = prefs; FullName = fullName; } @@ -244,17 +245,17 @@ private void PostProcessFirstnames() { if (!string.IsNullOrEmpty(Title) && !FirstNameTitles.Contains(Title.ToLower().Replace(".", string.Empty)) - && 1 == _FirstList.Count + _LastList.Count) + && 1 == _firstList.Count + _lastList.Count) { - if (_FirstList.Any()) + if (_firstList.Any()) { - _LastList = _FirstList; - _FirstList = new List(); + _lastList = _firstList; + _firstList = new List(); } else { - _FirstList = _LastList; - _LastList = new List(); + _firstList = _lastList; + _lastList = new List(); } } } @@ -267,7 +268,7 @@ private void PostProcessFirstnames() private void PostProcessLastname() { // parse out 'words' from the last name - var words = _LastList + var words = _lastList .SelectMany(part => part.Split(' ')) .ToList(); @@ -277,20 +278,20 @@ private void PostProcessLastname() prefixCount++; } - if (prefs.HasFlag(Prefer.FirstOverPrefix) - && _FirstList.Count == 0 + if (_prefs.HasFlag(Prefer.FirstOverPrefix) + && _firstList.Count == 0 && prefixCount == 1 && words.Count > 1) { - _FirstList = words.Take(1).ToList(); - _LastList = words.Skip(1).ToList(); + _firstList = words.Take(1).ToList(); + _lastList = words.Skip(1).ToList(); } else { - _LastPrefixList = words.Take(prefixCount).ToList(); + _lastPrefixList = words.Take(prefixCount).ToList(); } - _LastBaseList = words.Skip(prefixCount).ToList(); + _lastBaseList = words.Skip(prefixCount).ToList(); } private void PostProcessAdditionalName() @@ -305,7 +306,7 @@ private void PostProcessAdditionalName() // the primary's last name from the secondary. if (string.IsNullOrEmpty(Last)) { - _LastList = AdditionalName._LastList; + _lastList = AdditionalName._lastList; } else { @@ -313,7 +314,7 @@ private void PostProcessAdditionalName() var next = AdditionalName; while (next != null && string.IsNullOrEmpty(next.Last)) { - next._LastList = _LastList; + next._lastList = _lastList; next = next.AdditionalName; } } @@ -332,28 +333,28 @@ private void ParseFullName() { if (ParseMultipleNames) { - if (_FullName.Contains('&')) + if (_fullName.Contains('&')) { - var split = _FullName.IndexOf('&'); - var primary = _FullName.Substring(0, split); - var secondary = _FullName.Substring(split + 1); + var split = _fullName.IndexOf('&'); + var primary = _fullName.Substring(0, split); + var secondary = _fullName.Substring(split + 1); AdditionalName = new HumanName(secondary); - _FullName = primary; + _fullName = primary; } - else if (_FullName.ToLowerInvariant().Contains(" and ")) + else if (_fullName.ToLowerInvariant().Contains(" and ")) { - var split = _FullName.IndexOf(" and ", StringComparison.InvariantCultureIgnoreCase); - var primary = _FullName.Substring(0, split); - var secondary = _FullName.Substring(split + 5 /* length of " and " */); + var split = _fullName.IndexOf(" and ", StringComparison.InvariantCultureIgnoreCase); + var primary = _fullName.Substring(0, split); + var secondary = _fullName.Substring(split + 5 /* length of " and " */); AdditionalName = new HumanName(secondary); - _FullName = primary; + _fullName = primary; } } - ParseNicknames(ref _FullName, out _NicknameList); + ParseNicknames(ref _fullName, out _nicknameList); // break up fullName by commas - var parts = _FullName + var parts = _fullName .Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries) .Select(part => part.Trim()) .ToList(); @@ -380,50 +381,50 @@ private void ParseFullName() { // some last names appear as titles (https://github.com/aeshirey/NameParserSharp/issues/9) // if we've already parsed out titles, first, or middle names, something appearing as a title may in fact be a last name - if (_FirstList.Count > 0 || _MiddleList.Count > 0) + if (_firstList.Count > 0 || _middleList.Count > 0) { - _LastList.Add(piece); + _lastList.Add(piece); } else { - _TitleList.Add(piece); + _titleList.Add(piece); } } else if (string.IsNullOrEmpty(First)) { - _FirstList.Add(piece); + _firstList.Add(piece); } else if (AreSuffixes(pieces.Skip(i + 1))) { - _LastList.Add(piece); - _SuffixList = _SuffixList.Concat(pieces.Skip(i + 1)).ToList(); + _lastList.Add(piece); + _suffixList = _suffixList.Concat(pieces.Skip(i + 1)).ToList(); break; } else if (!string.IsNullOrEmpty(nxt)) { // another component exists, so this is likely a middle name - _MiddleList.Add(piece); + _middleList.Add(piece); } else if (!ParseMultipleNames || AdditionalName == null) { // no additional name. some last names can appear to be suffixes. try to figure this out - if (_LastList.Count > 0 && IsSuffix(piece)) + if (_lastList.Count > 0 && IsSuffix(piece)) { - _SuffixList.Add(piece); + _suffixList.Add(piece); } else { - _LastList.Add(piece); + _lastList.Add(piece); } } - else if (AdditionalName._LastList.Any() && IsAnInitial(piece)) + else if (AdditionalName._lastList.Any() && IsAnInitial(piece)) { // the additional name has a last, and this one looks like a middle. we'll save as a middle and later will copy the last name. - _MiddleList.Add(piece); + _middleList.Add(piece); } else { - _LastList.Add(piece); + _lastList.Add(piece); } } } @@ -431,7 +432,7 @@ private void ParseFullName() { // suffix comma: title first middle last [suffix], suffix [suffix] [, suffix] // parts[0], parts[1:...] - _SuffixList = _SuffixList.Concat(parts.Skip(1)).ToList(); + _suffixList = _suffixList.Concat(parts.Skip(1)).ToList(); var pieces = ParsePieces(parts[0].Split(' ')); for (var i = 0; i < pieces.Length; i++) @@ -441,20 +442,20 @@ private void ParseFullName() if (IsTitle(piece) && (!string.IsNullOrEmpty(nxt) || pieces.Length == 1)) { - _TitleList.Add(piece); + _titleList.Add(piece); continue; } if (string.IsNullOrEmpty(First)) { - _FirstList.Add(piece); + _firstList.Add(piece); continue; } if (AreSuffixes(pieces.Skip(i + 1))) { - _LastList.Add(piece); - _SuffixList = pieces.Skip(i + 1).Concat(_SuffixList).ToList(); + _lastList.Add(piece); + _suffixList = pieces.Skip(i + 1).Concat(_suffixList).ToList(); break; } @@ -463,21 +464,21 @@ private void ParseFullName() if (!string.IsNullOrEmpty(nxt)) { // another component exists, so this is likely a middle name - _MiddleList.Add(piece); + _middleList.Add(piece); } else if (!ParseMultipleNames || AdditionalName == null) { // no additional name, so treat this as a last - _LastList.Add(piece); + _lastList.Add(piece); } - else if (AdditionalName._LastList.Any() && IsAnInitial(piece)) + else if (AdditionalName._lastList.Any() && IsAnInitial(piece)) { // the additional name has a last, and this one looks like a middle. we'll save as a middle and later will copy the last name. - _MiddleList.Add(piece); + _middleList.Add(piece); } else { - _LastList.Add(piece); + _lastList.Add(piece); } } } @@ -493,13 +494,13 @@ private void ParseFullName() foreach (var piece in lastnamePieces) { // the first one is always a last name, even if it looks like a suffix - if (IsSuffix(piece) && _LastList.Any()) + if (IsSuffix(piece) && _lastList.Any()) { - _SuffixList.Add(piece); + _suffixList.Add(piece); } else { - _LastList.Add(piece); + _lastList.Add(piece); } } @@ -509,34 +510,34 @@ private void ParseFullName() var nxt = i == pieces.Length - 1 ? string.Empty : pieces[i + 1]; if (IsTitle(piece) && (!string.IsNullOrEmpty(nxt) || pieces.Length == 1)) { - _TitleList.Add(piece); + _titleList.Add(piece); } else if (string.IsNullOrEmpty(First)) { - _FirstList.Add(piece); + _firstList.Add(piece); } else if (IsSuffix(piece)) { - _SuffixList.Add(piece); + _suffixList.Add(piece); } else { - _MiddleList.Add(piece); + _middleList.Add(piece); } } if (parts.Count >= 3 && !string.IsNullOrEmpty(parts[2])) { - _SuffixList = _SuffixList.Concat(parts.Skip(2)).ToList(); + _suffixList = _suffixList.Concat(parts.Skip(2)).ToList(); } } - IsUnparsable = !_TitleList.Any() - && !_FirstList.Any() - && !_MiddleList.Any() - && !_LastList.Any() - && !_SuffixList.Any() - && !_NicknameList.Any(); + IsUnparsable = !_titleList.Any() + && !_firstList.Any() + && !_middleList.Any() + && !_lastList.Any() + && !_suffixList.Any() + && !_nicknameList.Any(); PostProcessFirstnames(); PostProcessLastname(); @@ -763,20 +764,20 @@ private static string CapitalizePiece(string piece) /// public void Normalize() { - _TitleList = _TitleList.Select(CapitalizePiece).ToList(); - _FirstList = _FirstList.Select(CapitalizePiece).ToList(); - _MiddleList = _MiddleList.Select(CapitalizePiece).ToList(); - _LastList = _LastList.Select(CapitalizePiece).ToList(); // CapitalizePiece recognizes prefixes, so it's okay to normalize "van der waals" like this - _SuffixList = _SuffixList.Select(CapitalizePiece).ToList(); - _NicknameList = _NicknameList.Select(CapitalizePiece).ToList(); - _LastBaseList = _LastBaseList.Select(CapitalizePiece).ToList(); + _titleList = _titleList.Select(CapitalizePiece).ToList(); + _firstList = _firstList.Select(CapitalizePiece).ToList(); + _middleList = _middleList.Select(CapitalizePiece).ToList(); + _lastList = _lastList.Select(CapitalizePiece).ToList(); // CapitalizePiece recognizes prefixes, so it's okay to normalize "van der waals" like this + _suffixList = _suffixList.Select(CapitalizePiece).ToList(); + _nicknameList = _nicknameList.Select(CapitalizePiece).ToList(); + _lastBaseList = _lastBaseList.Select(CapitalizePiece).ToList(); // normalizing _LastPrefixList would effectively be a no-op, so don't bother calling it - var fullNamePieces = _FullName + var fullNamePieces = _fullName .Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries) .Select(CapitalizePiece); - _FullName = string.Join(" ", fullNamePieces); + _fullName = string.Join(" ", fullNamePieces); } #endregion diff --git a/NameParser/NameParserTest/NameParserTests.cs b/NameParser/NameParserTest/NameParserTests.cs index 6c87caf..345e663 100644 --- a/NameParser/NameParserTest/NameParserTests.cs +++ b/NameParser/NameParserTest/NameParserTests.cs @@ -27,7 +27,7 @@ public void BlankInput() } [TestMethod] - public void JFK() + public void Jfk() { var jfk = new HumanName("president john 'jack' fitzgerald kennedy"); @@ -340,19 +340,19 @@ public void Conjunctions() [TestMethod] public void AddToLists() { - var as_is = new HumanName("Mr. John Smith 2nd"); - Assert.AreEqual("Mr.", as_is.Title); - Assert.AreEqual("John", as_is.First); - Assert.AreEqual("Smith", as_is.Middle); - Assert.AreEqual("2nd", as_is.Last); - Assert.AreEqual(string.Empty, as_is.Suffix); + var parsed = new HumanName("Mr. John Smith 2nd"); + Assert.AreEqual("Mr.", parsed.Title); + Assert.AreEqual("John", parsed.First); + Assert.AreEqual("Smith", parsed.Middle); + Assert.AreEqual("2nd", parsed.Last); + Assert.AreEqual(string.Empty, parsed.Suffix); HumanName.Suffixes.Add("2nd"); - var with_2nd = new HumanName("Mr. John Smith 2nd"); - Assert.AreEqual("Mr.", with_2nd.Title); - Assert.AreEqual("John", with_2nd.First); - Assert.AreEqual("Smith", with_2nd.Last); - Assert.AreEqual("2nd", with_2nd.Suffix); + var withSuffix = new HumanName("Mr. John Smith 2nd"); + Assert.AreEqual("Mr.", withSuffix.Title); + Assert.AreEqual("John", withSuffix.First); + Assert.AreEqual("Smith", withSuffix.Last); + Assert.AreEqual("2nd", withSuffix.Suffix); } /// @@ -362,18 +362,18 @@ public void AddToLists() public void FirstNameIsPrefix() { // Default behavior - var parsed_prefix = new HumanName("Mr. Del Richards"); - Assert.AreEqual(parsed_prefix.Title, "Mr."); - Assert.AreEqual(parsed_prefix.First, string.Empty); - Assert.AreEqual(parsed_prefix.Last, "Del Richards"); - Assert.AreEqual(parsed_prefix.LastPrefixes, "Del"); + var parsedPrefix = new HumanName("Mr. Del Richards"); + Assert.AreEqual(parsedPrefix.Title, "Mr."); + Assert.AreEqual(parsedPrefix.First, string.Empty); + Assert.AreEqual(parsedPrefix.Last, "Del Richards"); + Assert.AreEqual(parsedPrefix.LastPrefixes, "Del"); // A single prefix should be treated as a first name when no first exists - var parsed_first = new HumanName("Mr. Del Richards", Prefer.FirstOverPrefix); - Assert.AreEqual(parsed_first.Title, "Mr."); - Assert.AreEqual(parsed_first.First, "Del"); - Assert.AreEqual(parsed_first.Last, "Richards"); - Assert.AreEqual(parsed_first.LastPrefixes, string.Empty); + var parsedFirst = new HumanName("Mr. Del Richards", Prefer.FirstOverPrefix); + Assert.AreEqual(parsedFirst.Title, "Mr."); + Assert.AreEqual(parsedFirst.First, "Del"); + Assert.AreEqual(parsedFirst.Last, "Richards"); + Assert.AreEqual(parsedFirst.LastPrefixes, string.Empty); } } -} \ No newline at end of file +} From 45c8c21e36dd777bc35b0ab79fa87875ef3c4216 Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Mon, 25 Aug 2025 09:53:12 +1000 Subject: [PATCH 03/14] Upgrade frameworks and packages --- NameParser/NameParser/NameParser.csproj | 2 +- .../NameParserTest/NameParserTest.csproj | 8 +- NameParser/NameParserTest/NameParserTests.cs | 73 +++++++++---------- 3 files changed, 41 insertions(+), 42 deletions(-) diff --git a/NameParser/NameParser/NameParser.csproj b/NameParser/NameParser/NameParser.csproj index 7b8c4ac..0f941f7 100644 --- a/NameParser/NameParser/NameParser.csproj +++ b/NameParser/NameParser/NameParser.csproj @@ -1,6 +1,6 @@ - netstandard20;net45 + netstandard2.0 true 1.5.0.0 1.5.0.0 diff --git a/NameParser/NameParserTest/NameParserTest.csproj b/NameParser/NameParserTest/NameParserTest.csproj index e015f5a..9288d72 100644 --- a/NameParser/NameParserTest/NameParserTest.csproj +++ b/NameParser/NameParserTest/NameParserTest.csproj @@ -1,11 +1,11 @@ - netcoreapp2.1 + net8.0 - - - + + + diff --git a/NameParser/NameParserTest/NameParserTests.cs b/NameParser/NameParserTest/NameParserTests.cs index 345e663..8655439 100644 --- a/NameParser/NameParserTest/NameParserTests.cs +++ b/NameParser/NameParserTest/NameParserTests.cs @@ -8,22 +8,21 @@ namespace NameParserTest public class NameParserTests { [TestMethod] - [ExpectedException(typeof(ArgumentNullException))] public void NullInput() { - Assert.IsNotNull(new HumanName(null)); + Assert.ThrowsExactly(() => new HumanName(null)); } [TestMethod] public void BlankInput() { var parsed = new HumanName(string.Empty); - Assert.AreEqual(string.Empty, parsed.First); - Assert.AreEqual(string.Empty, parsed.Middle); - Assert.AreEqual(string.Empty, parsed.Last); - Assert.AreEqual(string.Empty, parsed.Title); - Assert.AreEqual(string.Empty, parsed.Nickname); - Assert.AreEqual(string.Empty, parsed.Suffix); + Assert.IsEmpty(parsed.First); + Assert.IsEmpty(parsed.Middle); + Assert.IsEmpty(parsed.Last); + Assert.IsEmpty(parsed.Title); + Assert.IsEmpty(parsed.Nickname); + Assert.IsEmpty(parsed.Suffix); } [TestMethod] @@ -35,11 +34,11 @@ public void Jfk() Assert.AreEqual("john", jfk.First); Assert.AreEqual("fitzgerald", jfk.Middle); Assert.AreEqual("kennedy", jfk.Last); - Assert.AreEqual(string.Empty, jfk.Suffix); + Assert.IsEmpty(jfk.Suffix); Assert.AreEqual("jack", jfk.Nickname); Assert.AreEqual("president john fitzgerald kennedy", jfk.FullName); Assert.AreEqual("kennedy", jfk.LastBase); - Assert.AreEqual(string.Empty, jfk.LastPrefixes); + Assert.IsEmpty(jfk.LastPrefixes); jfk.Normalize(); @@ -47,11 +46,11 @@ public void Jfk() Assert.AreEqual("John", jfk.First); Assert.AreEqual("Fitzgerald", jfk.Middle); Assert.AreEqual("Kennedy", jfk.Last); - Assert.AreEqual(string.Empty, jfk.Suffix); + Assert.IsEmpty(jfk.Suffix); Assert.AreEqual("Jack", jfk.Nickname); Assert.AreEqual("President John Fitzgerald Kennedy", jfk.FullName); Assert.AreEqual("Kennedy", jfk.LastBase); - Assert.AreEqual(string.Empty, jfk.LastPrefixes); + Assert.IsEmpty(jfk.LastPrefixes); } [TestMethod] @@ -61,25 +60,25 @@ public void Nixon() Assert.AreEqual("mr president", nixon.Title); Assert.AreEqual("richard", nixon.First); - Assert.AreEqual(string.Empty, nixon.Middle); + Assert.IsEmpty(nixon.Middle); Assert.AreEqual("nixon", nixon.Last); - Assert.AreEqual(string.Empty, nixon.Suffix); + Assert.IsEmpty(nixon.Suffix); Assert.AreEqual("dick", nixon.Nickname); Assert.AreEqual("mr president richard nixon", nixon.FullName); Assert.AreEqual("nixon", nixon.LastBase); - Assert.AreEqual(string.Empty, nixon.LastPrefixes); + Assert.IsEmpty(nixon.LastPrefixes); nixon.Normalize(); Assert.AreEqual("Mr President", nixon.Title); Assert.AreEqual("Richard", nixon.First); - Assert.AreEqual(string.Empty, nixon.Middle); + Assert.IsEmpty(nixon.Middle); Assert.AreEqual("Nixon", nixon.Last); - Assert.AreEqual(string.Empty, nixon.Suffix); + Assert.IsEmpty(nixon.Suffix); Assert.AreEqual("Dick", nixon.Nickname); Assert.AreEqual("Mr President Richard Nixon", nixon.FullName); Assert.AreEqual("Nixon", nixon.LastBase); - Assert.AreEqual(string.Empty, nixon.LastPrefixes); + Assert.IsEmpty(nixon.LastPrefixes); } [TestMethod] @@ -87,23 +86,23 @@ public void TitleFirstOrLastName() { var mrJones = new HumanName("Mr. Jones"); Assert.AreEqual("Mr.", mrJones.Title); - Assert.AreEqual(string.Empty, mrJones.First); - Assert.AreEqual(string.Empty, mrJones.Middle); + Assert.IsEmpty(mrJones.First); + Assert.IsEmpty(mrJones.Middle); Assert.AreEqual("Jones", mrJones.Last); - Assert.AreEqual(string.Empty, mrJones.Suffix); - Assert.AreEqual(string.Empty, mrJones.Nickname); + Assert.IsEmpty(mrJones.Suffix); + Assert.IsEmpty(mrJones.Nickname); Assert.AreEqual("Jones", mrJones.LastBase); - Assert.AreEqual(string.Empty, mrJones.LastPrefixes); + Assert.IsEmpty(mrJones.LastPrefixes); var uncleAdam = new HumanName("Uncle Adam"); Assert.AreEqual("Uncle", uncleAdam.Title); Assert.AreEqual("Adam", uncleAdam.First); - Assert.AreEqual(string.Empty, uncleAdam.Middle); - Assert.AreEqual(string.Empty, uncleAdam.Last); - Assert.AreEqual(string.Empty, uncleAdam.Suffix); - Assert.AreEqual(string.Empty, uncleAdam.Nickname); - Assert.AreEqual(string.Empty, uncleAdam.LastBase); - Assert.AreEqual(string.Empty, uncleAdam.LastPrefixes); + Assert.IsEmpty(uncleAdam.Middle); + Assert.IsEmpty(uncleAdam.Last); + Assert.IsEmpty(uncleAdam.Suffix); + Assert.IsEmpty(uncleAdam.Nickname); + Assert.IsEmpty(uncleAdam.LastBase); + Assert.IsEmpty(uncleAdam.LastPrefixes); } [TestMethod] @@ -144,7 +143,7 @@ public void LastBaseAndPrefixes() { var parsed = new HumanName("John Smith"); Assert.AreEqual("Smith", parsed.Last); - Assert.AreEqual(string.Empty, parsed.LastPrefixes); + Assert.IsEmpty(parsed.LastPrefixes); Assert.AreEqual("Smith", parsed.LastBase); parsed = new HumanName("johannes van der waals"); @@ -201,14 +200,14 @@ public void TwoNames_TitleFirstInitialLast() Assert.AreEqual("Mr", parsed.Title); Assert.AreEqual("S", parsed.First); - Assert.AreEqual(string.Empty, parsed.Middle); + Assert.IsEmpty(parsed.Middle); Assert.AreEqual("Bloggs", parsed.Last); Assert.IsNotNull(parsed.AdditionalName); Assert.AreEqual("Miss", parsed.AdditionalName.Title); Assert.AreEqual("L", parsed.AdditionalName.First); - Assert.AreEqual(string.Empty, parsed.AdditionalName.Middle); + Assert.IsEmpty(parsed.AdditionalName.Middle); Assert.AreEqual("Jones", parsed.AdditionalName.Last); Assert.IsNull(parsed.AdditionalName.AdditionalName); @@ -265,8 +264,8 @@ public void ThreeNames() public void Parens() { var johnSmith = new HumanName("(John Smith)"); - Assert.AreEqual(string.Empty, johnSmith.First); - Assert.AreEqual(string.Empty, johnSmith.Last); + Assert.IsEmpty(johnSmith.First); + Assert.IsEmpty(johnSmith.Last); Assert.AreEqual("John Smith", johnSmith.Nickname); } @@ -345,7 +344,7 @@ public void AddToLists() Assert.AreEqual("John", parsed.First); Assert.AreEqual("Smith", parsed.Middle); Assert.AreEqual("2nd", parsed.Last); - Assert.AreEqual(string.Empty, parsed.Suffix); + Assert.IsEmpty(parsed.Suffix); HumanName.Suffixes.Add("2nd"); var withSuffix = new HumanName("Mr. John Smith 2nd"); @@ -364,7 +363,7 @@ public void FirstNameIsPrefix() // Default behavior var parsedPrefix = new HumanName("Mr. Del Richards"); Assert.AreEqual(parsedPrefix.Title, "Mr."); - Assert.AreEqual(parsedPrefix.First, string.Empty); + Assert.IsEmpty(parsedPrefix.First); Assert.AreEqual(parsedPrefix.Last, "Del Richards"); Assert.AreEqual(parsedPrefix.LastPrefixes, "Del"); @@ -373,7 +372,7 @@ public void FirstNameIsPrefix() Assert.AreEqual(parsedFirst.Title, "Mr."); Assert.AreEqual(parsedFirst.First, "Del"); Assert.AreEqual(parsedFirst.Last, "Richards"); - Assert.AreEqual(parsedFirst.LastPrefixes, string.Empty); + Assert.IsEmpty(parsedFirst.LastPrefixes); } } } From bf4c486128cf8f001ae75c9f22d5ead62a7effae Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Mon, 25 Aug 2025 17:25:27 +1000 Subject: [PATCH 04/14] Fix argument order passed to `Assert.AreEqual` --- NameParser/NameParserTest/NameParserTests.cs | 40 ++++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/NameParser/NameParserTest/NameParserTests.cs b/NameParser/NameParserTest/NameParserTests.cs index 8655439..783486f 100644 --- a/NameParser/NameParserTest/NameParserTests.cs +++ b/NameParser/NameParserTest/NameParserTests.cs @@ -119,11 +119,11 @@ public void NicknameAtBeginning_DoubleQuote() { var parsed = new HumanName("\"TREY\" ROBERT HENRY BUSH III"); - Assert.AreEqual(parsed.First, "ROBERT"); - Assert.AreEqual(parsed.Middle, "HENRY"); - Assert.AreEqual(parsed.Last, "BUSH"); - Assert.AreEqual(parsed.Nickname, "TREY"); - Assert.AreEqual(parsed.Suffix, "III"); + Assert.AreEqual("ROBERT", parsed.First); + Assert.AreEqual("HENRY", parsed.Middle); + Assert.AreEqual("BUSH", parsed.Last); + Assert.AreEqual("TREY", parsed.Nickname); + Assert.AreEqual("III", parsed.Suffix); } [TestMethod] @@ -131,11 +131,11 @@ public void NicknameAtBeginning_SingleQuote() { var parsed = new HumanName("'TREY' ROBERT HENRY BUSH III"); - Assert.AreEqual(parsed.First, "ROBERT"); - Assert.AreEqual(parsed.Middle, "HENRY"); - Assert.AreEqual(parsed.Last, "BUSH"); - Assert.AreEqual(parsed.Nickname, "TREY"); - Assert.AreEqual(parsed.Suffix, "III"); + Assert.AreEqual("ROBERT", parsed.First); + Assert.AreEqual("HENRY", parsed.Middle); + Assert.AreEqual("BUSH", parsed.Last); + Assert.AreEqual("TREY", parsed.Nickname); + Assert.AreEqual("III", parsed.Suffix); } [TestMethod] @@ -290,10 +290,10 @@ public void TwoCommaWithMiddleName() { var parsed = new HumanName("Surname, John Middle, III"); - Assert.AreEqual(parsed.First, "John"); - Assert.AreEqual(parsed.Middle, "Middle"); - Assert.AreEqual(parsed.Last, "Surname"); - Assert.AreEqual(parsed.Suffix, "III"); + Assert.AreEqual("John", parsed.First); + Assert.AreEqual("Middle", parsed.Middle); + Assert.AreEqual("Surname", parsed.Last); + Assert.AreEqual("III", parsed.Suffix); } [TestMethod] @@ -362,16 +362,16 @@ public void FirstNameIsPrefix() { // Default behavior var parsedPrefix = new HumanName("Mr. Del Richards"); - Assert.AreEqual(parsedPrefix.Title, "Mr."); + Assert.AreEqual("Mr.", parsedPrefix.Title); Assert.IsEmpty(parsedPrefix.First); - Assert.AreEqual(parsedPrefix.Last, "Del Richards"); - Assert.AreEqual(parsedPrefix.LastPrefixes, "Del"); + Assert.AreEqual("Del Richards", parsedPrefix.Last); + Assert.AreEqual("Del", parsedPrefix.LastPrefixes); // A single prefix should be treated as a first name when no first exists var parsedFirst = new HumanName("Mr. Del Richards", Prefer.FirstOverPrefix); - Assert.AreEqual(parsedFirst.Title, "Mr."); - Assert.AreEqual(parsedFirst.First, "Del"); - Assert.AreEqual(parsedFirst.Last, "Richards"); + Assert.AreEqual("Mr.", parsedFirst.Title); + Assert.AreEqual("Del", parsedFirst.First); + Assert.AreEqual("Richards", parsedFirst.Last); Assert.IsEmpty(parsedFirst.LastPrefixes); } } From 9cece6c6488d3770a6441b4bc8ac0eb7eb595921 Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Mon, 25 Aug 2025 09:54:18 +1000 Subject: [PATCH 05/14] Use a dictionary for `CapitalizationExceptions` --- NameParser/NameParser/Config.cs | 13 ++++++------- NameParser/NameParser/Parser.cs | 6 ++---- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/NameParser/NameParser/Config.cs b/NameParser/NameParser/Config.cs index 5e6a41a..6767e07 100644 --- a/NameParser/NameParser/Config.cs +++ b/NameParser/NameParser/Config.cs @@ -1,6 +1,5 @@ namespace NameParser { - using System; using System.Collections.Generic; using System.Text.RegularExpressions; @@ -9,13 +8,13 @@ public partial class HumanName /// /// Any pieces that are not capitalized by capitalizing the first letter. /// - public static readonly ISet> CapitalizationExceptions = new HashSet> + public static readonly IDictionary CapitalizationExceptions = new Dictionary { - Tuple.Create("ii", "II"), - Tuple.Create("iii", "III"), - Tuple.Create("iv", "IV"), - Tuple.Create("md", "M.D."), - Tuple.Create("phd", "Ph.D.") + { "ii", "II" }, + { "iii", "III" }, + { "iv", "IV" }, + { "md", "M.D." }, + { "phd", "Ph.D." } }; /// diff --git a/NameParser/NameParser/Parser.cs b/NameParser/NameParser/Parser.cs index 0f39db3..6d3025d 100644 --- a/NameParser/NameParser/Parser.cs +++ b/NameParser/NameParser/Parser.cs @@ -727,11 +727,9 @@ private static string CapitalizeWord(string word) } // "phd" => "Ph.D."; "ii" => "II" - var exception = CapitalizationExceptions.FirstOrDefault(tup => tup.Item1 == wordLower); - - if (exception != null) + if (CapitalizationExceptions.TryGetValue(wordLower, out var exception)) { - return exception.Item2; + return exception; } // special case: "macbeth" should be "MacBeth"; "mcbride" -> "McBride" From a51eae24cb6ca7d9baec18214637ed36261efe59 Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Mon, 25 Aug 2025 09:54:38 +1000 Subject: [PATCH 06/14] Compile regex objects --- NameParser/NameParser/Config.cs | 7 ++++++- NameParser/NameParser/Parser.cs | 6 ++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/NameParser/NameParser/Config.cs b/NameParser/NameParser/Config.cs index 6767e07..cbefd5f 100644 --- a/NameParser/NameParser/Config.cs +++ b/NameParser/NameParser/Config.cs @@ -33,7 +33,12 @@ public partial class HumanName /// /// For handling names that start with Mc or Mac such as McBride, MacDonald /// - private static readonly Regex RegexMac = new Regex(@"^(ma?c)(\w+)", RegexOptions.IgnoreCase); + private static readonly Regex RegexMac = new Regex(@"^(ma?c)(\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled); + + /// + /// For extracting optional quoted nicknames from full names. + /// + private static readonly Regex RegexNickname = new Regex(@"\s*(?:\((.+?)\))|(?:([""'])(.+?)\2)", RegexOptions.Compiled); /// /// Pieces that come at the end of the name but are not last names. These potentially diff --git a/NameParser/NameParser/Parser.cs b/NameParser/NameParser/Parser.cs index 6d3025d..56a5fc0 100644 --- a/NameParser/NameParser/Parser.cs +++ b/NameParser/NameParser/Parser.cs @@ -3,7 +3,6 @@ namespace NameParser using System; using System.Collections.Generic; using System.Linq; - using System.Text.RegularExpressions; /// /// Parse a person's name into individual components. @@ -552,10 +551,9 @@ private static void ParseNicknames(ref string fullName, out IList nickna // @"john (j"" jones", @"samuel (sammy"" samsonite" // https://code.google.com/p/python-nameparser/issues/detail?id=33 - var nicknameRegex = new Regex(@"\s*(?:\((.+?)\))|(?:([""'])(.+?)\2)"); nicknameList = new List(); - var match = nicknameRegex.Match(fullName); + var match = RegexNickname.Match(fullName); var nicknameFound = false; while (match.Success && match.Groups[0].Value.Length > 0) @@ -567,7 +565,7 @@ private static void ParseNicknames(ref string fullName, out IList nickna var matchGroup = match.Groups[0].Value.TrimStart().StartsWith("(") ? 1 : 3; // which regex group was used: 1 is for parens; 3 is single- or double-quoted nicknames nicknameList.Add(match.Groups[matchGroup].Value); - match = nicknameRegex.Match(fullName); + match = RegexNickname.Match(fullName); } // normalize whitespace From 8f3fdc7adbabc7d8346fcff0a5efe98b92b555bc Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Mon, 25 Aug 2025 10:00:32 +1000 Subject: [PATCH 07/14] Override `Equals` and `GetHashCode` --- NameParser/NameParser/Parser.cs | 20 ++++++++++++++++++++ NameParser/NameParserTest/NameParserTests.cs | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/NameParser/NameParser/Parser.cs b/NameParser/NameParser/Parser.cs index 56a5fc0..5dac374 100644 --- a/NameParser/NameParser/Parser.cs +++ b/NameParser/NameParser/Parser.cs @@ -124,6 +124,26 @@ public HumanName(string fullName, Prefer prefs = Prefer.Default) return !(left == right); } + public override bool Equals(object obj) + { + return obj is HumanName other && this == other; + } + + public override int GetHashCode() + { + unchecked + { + var hash = 17; + hash = hash * 23 + (Title?.GetHashCode() ?? 0); + hash = hash * 23 + (First?.GetHashCode() ?? 0); + hash = hash * 23 + (Middle?.GetHashCode() ?? 0); + hash = hash * 23 + (Last?.GetHashCode() ?? 0); + hash = hash * 23 + (Suffix?.GetHashCode() ?? 0); + hash = hash * 23 + (Nickname?.GetHashCode() ?? 0); + return hash; + } + } + /// /// Return the parsed name as a dictionary of its attributes. /// diff --git a/NameParser/NameParserTest/NameParserTests.cs b/NameParser/NameParserTest/NameParserTests.cs index 783486f..9695264 100644 --- a/NameParser/NameParserTest/NameParserTests.cs +++ b/NameParser/NameParserTest/NameParserTests.cs @@ -111,7 +111,7 @@ public void DifferentInputsSameValues() var fml = new HumanName("john x smith"); var lfm = new HumanName("smith, john x"); - Assert.IsTrue(fml == lfm); + Assert.AreEqual(fml, lfm); } [TestMethod] From 96b849612c3f6a839ec69137edb90a31f0d92239 Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Mon, 25 Aug 2025 10:02:41 +1000 Subject: [PATCH 08/14] Rename deprecated `PackageLicenseUrl` project element --- NameParser/NameParser/NameParser.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NameParser/NameParser/NameParser.csproj b/NameParser/NameParser/NameParser.csproj index 0f941f7..175cdd9 100644 --- a/NameParser/NameParser/NameParser.csproj +++ b/NameParser/NameParser/NameParser.csproj @@ -4,7 +4,7 @@ true 1.5.0.0 1.5.0.0 - https://licenses.nuget.org/LGPL-2.1-or-later + https://licenses.nuget.org/LGPL-2.1-or-later https://github.com/aeshirey/NameParserSharp Parses full name into first name, middle name, last name, nickname, title, and suffix fields 1.5.0 From 81021ac3c49ade5a84207d91dd03a139cb8d846a Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Mon, 25 Aug 2025 10:54:08 +1000 Subject: [PATCH 09/14] Sort static string set items --- NameParser/NameParser/Config.cs | 745 +++++++++++++++++--------------- 1 file changed, 386 insertions(+), 359 deletions(-) diff --git a/NameParser/NameParser/Config.cs b/NameParser/NameParser/Config.cs index cbefd5f..a8afe3f 100644 --- a/NameParser/NameParser/Config.cs +++ b/NameParser/NameParser/Config.cs @@ -27,7 +27,34 @@ public partial class HumanName /// public static readonly ISet Prefixes = new HashSet { - "abu", "bon", "bin", "da", "dal", "de", "del", "dem", "den", "der", "di", "dí", "het", "ibn", "in", "la", "le", "onder", "op", "san", "santa", "st", "ste", "'t", "ten", "van", "vel", "von" + "'t", + "abu", + "bin", + "bon", + "da", + "dal", + "de", + "del", + "dem", + "den", + "der", + "di", + "dí", + "het", + "ibn", + "in", + "la", + "le", + "onder", + "op", + "san", + "santa", + "st", + "ste", + "ten", + "van", + "vel", + "von" }; /// @@ -48,35 +75,35 @@ public partial class HumanName /// public static readonly ISet Suffixes = new HashSet { - "esq", - "esquire", - "jr", - "jnr", - "sr", - "snr", "2", - "i", - "ii", - "iii", - "iv", - "v", - "clu", - "chfc", "cfp", + "chfc", + "clu", "cpa", "csm", - "do", "dds", - "dpm", "dmd", - "md", - "mba", + "do", + "dpm", + "esq", + "esquire", + "i", + "ii", + "iii", + "iv", + "jnr", + "jr", "ma", + "mba", + "md", + "mp", "phd", "phr", "pmp", - "mp", - "qc" + "qc", + "snr", + "sr", + "v" }; /// @@ -85,20 +112,20 @@ public partial class HumanName /// public static readonly ISet FirstNameTitles = new HashSet { - "sir", + "aunt", + "auntie", + "brother", "dame", + "father", "king", - "queen", - "master", "maid", - "uncle", - "auntie", - "aunt", - "brother", - "sister", + "master", "mother", - "father", - "pope" + "pope", + "queen", + "sir", + "sister", + "uncle" }; /// @@ -109,383 +136,383 @@ public partial class HumanName /// public static readonly ISet Titles = new HashSet(FirstNameTitles) { - "dr", - "doctor", - "miss", - "misses", - "mr", - "mister", - "mrs", - "ms", - "rev", - "madam", - "madame", - "ab", - "2ndlt", - "amn", + "1lt", + "1sgt", "1stlt", + "1stsgt", + "2lt", + "2ndlt", "a1c", - "capt", - "sra", - "maj", - "ssgt", - "ltcol", - "tsgt", - "col", + "ab", + "abbess", + "abbot", + "academic", + "acolyte", + "adept", + "adjutant", + "adm", + "admiral", + "advocate", + "air", + "akhoond", + "alderman", + "almoner", + "ambassador", + "amn", + "analytics", + "appellate", + "apprentice", + "arbitrator", + "archbishop", + "archdeacon", + "archdruid", + "archduchess", + "archduke", + "arhat", + "assistant", + "assoc", + "associate", + "asst", + "attache", + "attaché", + "attorney", + "ayatollah", + "baba", + "bailiff", + "banner", + "bard", + "baron", + "barrister", + "bearer", + "bench", + "bg", + "bgen", + "bishop", + "blessed", + "bodhisattva", + "brigadier", "briggen", - "1stsgt", - "majgen", - "smsgt", - "ltgen", - "cmsgt", + "buddha", + "burgess", + "business", + "bwana", + "canon", + "capt", + "captain", + "cardinal", + "catholicos", "ccmsgt", + "cdr", + "ceo", + "cfo", + "chancellor", + "chaplain", + "chargé d'affaires", + "chief", + "chieftain", + "civil", + "clerk", "cmsaf", - "pvt", - "2lt", - "pv2", - "1lt", - "pfc", - "cpt", - "spc", + "cmsgt", + "coach", + "col", + "colonel", + "commander", + "commander-in-chief", + "commodore", + "comptroller", + "controller", + "corporal", + "corporate", + "councillor", + "courtier", "cpl", - "ltc", - "sgt", - "ssg", - "bg", - "sfc", - "mg", - "msg", - "ltg", - "1sgt", - "sgm", - "csm", - "sma", - "wo1", - "wo2", - "wo3", - "wo4", - "wo5", - "ens", - "sa", - "ltjg", - "sn", - "lt", - "po3", - "lcdr", - "po1", - "po2", - "cdr", "cpo", - "scpo", - "mcpo", - "vadm", - "mcpoc", - "adm", - "mpco-cg", - "lcpl", - "gysgt", - "bgen", - "msgt", - "mgysgt", - "gen", - "sgtmaj", - "sgtmajmc", - "wo-1", + "cpt", + "credit", + "criminal", + "csm", + "curator", + "customs", "cwo-2", "cwo-3", "cwo-4", "cwo-5", - "rdml", - "radm", - "mcpon", - "fadm", "cwo2", "cwo3", "cwo4", "cwo5", - "rt", - "lord", - "lady", - "duke", - "dutchess", - "representative", - "rep", - "senator", - "cardinal", - "secretary", - "state", - "foreign", - "minister", - "speaker", - "president", - "pres", - "ceo", - "cfo", + "deacon", + "delegate", "deputy", + "designated", + "dir", + "director", + "discovery", + "district", + "division", + "docent", + "docket", + "doctor", + "doyen", "dpty", - "executive", - "exec", - "vice", - "vc", - "councillor", - "manager", - "mgr", - "alderman", - "delegate", - "mayor", - "lieutenant", - "governor", - "prefect", - "prelate", - "premier", - "burgess", - "ambassador", - "envoy", - "attaché", - "chargé d'affaires", - "provost", - "marquis", - "marquess", - "marquise", - "marchioness", - "archduke", - "archduchess", - "viscount", - "baron", + "dr", + "druid", + "duke", + "dutchess", + "edmi", + "edohen", + "effendi", + "ekegbian", + "elder", + "elerunwon", "emperor", "empress", - "tsar", - "tsarina", - "leader", - "abbess", - "abbot", + "ens", + "envoy", + "exec", + "executive", + "fadm", + "family", + "federal", + "field", + "financial", + "first", + "flag", + "flying", + "foreign", + "forester", "friar", - "superior", - "reverend", - "bishop", - "archbishop", - "metropolitan", - "presbyter", - "priest", - "priestess", - "matriarch", - "patriarch", - "catholicos", - "vicar", - "chaplain", - "canon", - "pastor", - "primate", - "servant", - "venerable", - "blessed", - "saint", - "member", - "solicitor", - "mufti", - "grand", - "chancellor", - "barrister", - "bailiff", - "attorney", - "advocate", - "deacon", - "archdeacon", - "acolyte", - "elder", - "monsignor", - "almoner", - "prof", - "colonel", + "gen", "general", - "commodore", - "air", - "corporal", - "staff", - "chief", - "first", - "sergeant", - "admiral", - "high", - "rear", - "brigadier", - "captain", - "group", - "commander", - "commander-in-chief", - "wing", - "adjutant", - "director", - "dir", "generalissimo", - "resident", - "surgeon", - "officer", - "controller", - "academic", - "analytics", - "business", - "credit", - "financial", + "gentiluomo", + "giani", + "goodman", + "goodwife", + "governor", + "grand", + "group", + "guru", + "gyani", + "gysgt", + "hajji", + "headman", + "her", + "hereditary", + "high", + "his", + "hon", + "honorable", + "honourable", + "imam", "information", - "security", + "intelligence", + "intendant", + "journeyman", + "jr", + "judge", + "judicial", + "junior", + "king's", + "kingdom", "knowledge", + "lady", + "lama", + "lamido", + "law", + "lcdr", + "lcpl", + "leader", + "lieutenant", + "lord", + "lt", + "ltc", + "ltcol", + "ltg", + "ltgen", + "ltjg", + "madam", + "madame", + "mag", + "mag-judge", + "mag/judge", + "magistrate", + "magistrate-judge", + "maharajah", + "maharani", + "mahdi", + "maj", + "majesty", + "majgen", + "manager", + "marcher", + "marchioness", "marketing", + "marquess", + "marquis", + "marquise", + "matriarch", + "mayor", + "mcpo", + "mcpoc", + "mcpon", + "member", + "metropolitan", + "mg", + "mgr", + "mgysgt", + "minister", + "miss", + "misses", + "mister", + "monsignor", + "mpco-cg", + "mr", + "mrs", + "ms", + "msg", + "msgt", + "mufti", + "mullah", + "municipal", + "murshid", + "nanny", + "national", + "nurse", + "officer", "operating", + "pastor", + "patriarch", "petty", - "risk", - "strategy", - "technical", - "warrant", - "corporate", - "customs", - "field", - "flag", - "flying", - "intelligence", + "pfc", + "pharaoh", "pilot", + "pir", + "po1", + "po2", + "po3", "police", "political", - "revenue", - "senior", - "sr", - "junior", - "jr", - "private", - "principal", + "prefect", + "prelate", + "premier", + "pres", + "presbyter", + "president", + "presiding", + "priest", + "priestess", + "primate", + "prime", "prin", - "coach", - "nurse", - "nanny", - "docent", - "lama", - "druid", - "archdruid", - "rabbi", - "rebbe", - "buddha", - "ayatollah", - "imam", - "bodhisattva", - "mullah", - "mahdi", - "saoshyant", - "tirthankar", - "vardapet", - "pharaoh", - "sultan", - "sultana", - "maharajah", - "maharani", - "vizier", - "chieftain", - "comptroller", - "courtier", - "curator", - "doyen", - "edohen", - "ekegbian", - "elerunwon", - "forester", - "gentiluomo", - "headman", - "intendant", - "lamido", - "marcher", + "prince", + "princess", + "principal", "prior", + "private", + "pro", + "prof", + "provost", + "pslc", "pursuivant", + "pv2", + "pvt", + "queen's", + "rabbi", + "radm", "rangatira", "ranger", + "rdml", + "rear", + "rebbe", "registrar", + "rep", + "representative", + "resident", + "rev", + "revenue", + "reverend", + "right", + "risk", + "royal", + "rt", + "sa", + "saint", + "saoshyant", + "scpo", + "se", + "secretary", + "security", "seigneur", + "senator", + "senior", + "senior-judge", + "sergeant", + "servant", + "sfc", + "sgm", + "sgt", + "sgtmaj", + "sgtmajmc", "shehu", "sheikh", "sheriff", + "siddha", + "sma", + "smsgt", + "sn", + "solicitor", + "spc", + "speaker", + "special", + "sr", + "sra", + "ssg", + "ssgt", + "staff", + "state", + "states", + "strategy", "subaltern", "subedar", + "sultan", + "sultana", + "superior", + "supreme", + "surgeon", + "swordbearer", "sysselmann", + "tax", + "technical", "timi", + "tirthankar", "treasurer", + "tsar", + "tsarina", + "tsgt", + "uk", + "united", + "us", + "vadm", + "vardapet", + "vc", + "venerable", "verderer", + "vicar", + "vice", + "viscount", + "vizier", "warden", - "hereditary", + "warrant", + "wing", + "wo-1", + "wo1", + "wo2", + "wo3", + "wo4", + "wo5", "woodman", - "bearer", - "banner", - "swordbearer", - "apprentice", - "journeyman", - "adept", - "akhoond", - "arhat", - "bwana", - "goodman", - "goodwife", - "bard", - "hajji", - "baba", - "effendi", - "giani", - "gyani", - "guru", - "siddha", - "pir", - "murshid", - "attache", - "prime", - "united", - "states", - "national", - "associate", - "assoc", - "assistant", - "asst", - "supreme", - "appellate", - "judicial", - "queen's", - "king's", - "prince", - "princess", - "bench", - "right", - "majesty", - "his", - "her", - "kingdom", - "royal", - "honorable", - "honourable", - "hon", - "magistrate", - "mag", - "judge", - "designated", - "us", - "uk", - "federal", - "district", - "arbitrator", - "pro", - "se", - "law", - "clerk", - "docket", - "pslc", - "special", - "municipal", - "tax", - "civil", - "criminal", - "family", - "presiding", - "division", - "edmi", - "discovery", - "magistrate-judge", - "mag-judge", - "senior-judge", - "mag/judge" }; } } \ No newline at end of file From d94022f826ad21a32668cf3ea6e257a6d2c9f4cb Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Wed, 27 Aug 2025 14:12:18 +1000 Subject: [PATCH 10/14] Avoid multiple iteration --- NameParser/NameParser/Parser.cs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/NameParser/NameParser/Parser.cs b/NameParser/NameParser/Parser.cs index 5dac374..7f014a4 100644 --- a/NameParser/NameParser/Parser.cs +++ b/NameParser/NameParser/Parser.cs @@ -220,7 +220,17 @@ private static bool IsSuffix(string piece) private static bool AreSuffixes(IEnumerable pieces) { - return pieces.Any() && pieces.All(IsSuffix); + var any = false; + foreach (var piece in pieces) + { + if (!IsSuffix(piece)) + { + return false; + } + any = true; + } + + return any; } /// From 994ad02a3ad577655ccf8537ca5af43f82350dbf Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Wed, 27 Aug 2025 14:12:51 +1000 Subject: [PATCH 11/14] Prefer LINQ `SelectMany` over list `AddRange` --- NameParser/NameParser/Parser.cs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/NameParser/NameParser/Parser.cs b/NameParser/NameParser/Parser.cs index 7f014a4..3601e88 100644 --- a/NameParser/NameParser/Parser.cs +++ b/NameParser/NameParser/Parser.cs @@ -613,13 +613,10 @@ private static void ParseNicknames(ref string fullName, out IList nickna /// pieces split on spaces and joined on conjunctions protected static string[] ParsePieces(IEnumerable parts, int additionalPartsCount = 0) { - var tmp = new List(); - foreach (var part in parts) - { - tmp.AddRange(part.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Select(s => s.Trim(','))); - } - - return joinOnConjunctions(tmp, additionalPartsCount); + var pieces = parts.SelectMany(part => part.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)) + .Select(s => s.Trim(',')) + .ToList(); + return joinOnConjunctions(pieces, additionalPartsCount); } /// From 1e157376959329859d986c405531b85dfdd10d5a Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Mon, 25 Aug 2025 16:51:49 +1000 Subject: [PATCH 12/14] Avoid modifying collection whilst iterating --- NameParser/NameParser/Parser.cs | 45 ++++++++++++++++----------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/NameParser/NameParser/Parser.cs b/NameParser/NameParser/Parser.cs index 3601e88..eef5ef7 100644 --- a/NameParser/NameParser/Parser.cs +++ b/NameParser/NameParser/Parser.cs @@ -636,8 +636,14 @@ internal static string[] joinOnConjunctions(List pieces, int additionalP return pieces.ToArray(); } - foreach (var conj in pieces.Where(IsConjunction).Reverse()) + for (var i = pieces.Count - 1; i >= 0; i--) { + var conj = pieces[i]; + if (!IsConjunction(conj)) + { + continue; + } + // loop through the pieces backwards, starting at the end of the list. // Join conjunctions to the pieces on either side of them. if (conj.Length == 1 && pieces.Count(IsRootname) < 4) @@ -649,30 +655,23 @@ internal static string[] joinOnConjunctions(List pieces, int additionalP continue; } - var index = pieces.IndexOf(conj); - - if (index == -1) - { - continue; - } - - if (index < pieces.Count - 1) + if (i < pieces.Count - 1) { // if this is not the last piece string newPiece; - if (index == 0) + if (i == 0) { // if this is the first piece and it's a conjunction - var nxt = pieces[index + 1]; + var nxt = pieces[i + 1]; var cons = IsTitle(nxt) ? Conjunctions : Titles; newPiece = string.Join(" ", pieces.Take(2)); cons.Add(newPiece); - pieces[index] = newPiece; - pieces.RemoveAt(index + 1); + pieces[i] = newPiece; + pieces.RemoveAt(i + 1); continue; } - if (IsConjunction(pieces[index - 1])) + if (IsConjunction(pieces[i - 1])) { // if the piece in front of this one is a conjunction too, // add new_piece (this conjunction and the following piece) @@ -681,16 +680,16 @@ internal static string[] joinOnConjunctions(List pieces, int additionalP // e.g. for ["Lord","of","the Universe"], put "the Universe" // into the conjunctions constant. - newPiece = string.Join(" ", pieces.Skip(index).Take(2)); + newPiece = string.Join(" ", pieces.Skip(i).Take(2)); Conjunctions.Add(newPiece); - pieces[index] = newPiece; - pieces.RemoveAt(index + 1); + pieces[i] = newPiece; + pieces.RemoveAt(i + 1); continue; } - newPiece = string.Join(" ", pieces.Skip(index - 1).Take(3)); + newPiece = string.Join(" ", pieces.Skip(i - 1).Take(3)); - if (IsTitle(pieces[index - 1])) + if (IsTitle(pieces[i - 1])) { // if the second name is a title, assume the first one is too and add the // two titles with the conjunction between them to the titles constant @@ -699,9 +698,9 @@ internal static string[] joinOnConjunctions(List pieces, int additionalP Titles.Add(newPiece); } - pieces[index - 1] = newPiece; - pieces.RemoveAt(index); - pieces.RemoveAt(index); + pieces[i - 1] = newPiece; + pieces.RemoveAt(i); + pieces.RemoveAt(i); } } @@ -805,4 +804,4 @@ public void Normalize() #endregion } -} +} \ No newline at end of file From ab185ea6869aecf17120c9693edfed4f7df30ad9 Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Mon, 25 Aug 2025 21:39:36 +1000 Subject: [PATCH 13/14] Avoid string `Contains` followed by `IndexOf` --- NameParser/NameParser/Parser.cs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/NameParser/NameParser/Parser.cs b/NameParser/NameParser/Parser.cs index eef5ef7..ddb2177 100644 --- a/NameParser/NameParser/Parser.cs +++ b/NameParser/NameParser/Parser.cs @@ -362,21 +362,24 @@ private void ParseFullName() { if (ParseMultipleNames) { - if (_fullName.Contains('&')) + var split = _fullName.IndexOf('&'); + if (split >= 0) { - var split = _fullName.IndexOf('&'); var primary = _fullName.Substring(0, split); var secondary = _fullName.Substring(split + 1); AdditionalName = new HumanName(secondary); _fullName = primary; } - else if (_fullName.ToLowerInvariant().Contains(" and ")) + else { - var split = _fullName.IndexOf(" and ", StringComparison.InvariantCultureIgnoreCase); - var primary = _fullName.Substring(0, split); - var secondary = _fullName.Substring(split + 5 /* length of " and " */); - AdditionalName = new HumanName(secondary); - _fullName = primary; + split = _fullName.IndexOf(" and ", StringComparison.InvariantCultureIgnoreCase); + if (split >= 0) + { + var primary = _fullName.Substring(0, split); + var secondary = _fullName.Substring(split + 5 /* length of " and " */); + AdditionalName = new HumanName(secondary); + _fullName = primary; + } } } From 4b71bffcfcb48c7e73da768ad8838b2aeb84cdbf Mon Sep 17 00:00:00 2001 From: Nathan Baulch Date: Tue, 26 Aug 2025 08:28:58 +1000 Subject: [PATCH 14/14] Avoid `ToLower` purely for config lookups --- NameParser/NameParser/Config.cs | 13 +++++++------ NameParser/NameParser/Parser.cs | 18 +++++++++--------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/NameParser/NameParser/Config.cs b/NameParser/NameParser/Config.cs index a8afe3f..6599049 100644 --- a/NameParser/NameParser/Config.cs +++ b/NameParser/NameParser/Config.cs @@ -1,5 +1,6 @@ namespace NameParser { + using System; using System.Collections.Generic; using System.Text.RegularExpressions; @@ -8,7 +9,7 @@ public partial class HumanName /// /// Any pieces that are not capitalized by capitalizing the first letter. /// - public static readonly IDictionary CapitalizationExceptions = new Dictionary + public static readonly IDictionary CapitalizationExceptions = new Dictionary(StringComparer.OrdinalIgnoreCase) { { "ii", "II" }, { "iii", "III" }, @@ -20,12 +21,12 @@ public partial class HumanName /// /// Pieces that should join to their neighboring pieces, e.g. "and", "y" and "&". "of" and "the" are also include to facilitate joining multiple titles, e.g. "President of the United States". /// - public static readonly ISet Conjunctions = new HashSet { "&", "and", "et", "e", "of", "the", "und", "y" }; + public static readonly ISet Conjunctions = new HashSet(StringComparer.OrdinalIgnoreCase) { "&", "and", "et", "e", "of", "the", "und", "y" }; /// /// Name pieces that appear before a last name. They join to the piece that follows them to make one new piece. /// - public static readonly ISet Prefixes = new HashSet + public static readonly ISet Prefixes = new HashSet(StringComparer.OrdinalIgnoreCase) { "'t", "abu", @@ -73,7 +74,7 @@ public partial class HumanName /// These may be updated in the future because some of them are actually titles that just /// come at the end of the name, so semantically this is wrong. Positionally, it's correct. /// - public static readonly ISet Suffixes = new HashSet + public static readonly ISet Suffixes = new HashSet(StringComparer.OrdinalIgnoreCase) { "2", "cfp", @@ -110,7 +111,7 @@ public partial class HumanName /// When these titles appear with a single other name, that name is a first name, e.g. /// "Sir John", "Sister Mary", "Queen Elizabeth". /// - public static readonly ISet FirstNameTitles = new HashSet + public static readonly ISet FirstNameTitles = new HashSet(StringComparer.OrdinalIgnoreCase) { "aunt", "auntie", @@ -134,7 +135,7 @@ public partial class HumanName /// The parser recognizes chains of these including conjunctions allowing /// recognition titles like "Deputy Secretary of State". /// - public static readonly ISet Titles = new HashSet(FirstNameTitles) + public static readonly ISet Titles = new HashSet(FirstNameTitles, StringComparer.OrdinalIgnoreCase) { "1lt", "1sgt", diff --git a/NameParser/NameParser/Parser.cs b/NameParser/NameParser/Parser.cs index ddb2177..3ff7792 100644 --- a/NameParser/NameParser/Parser.cs +++ b/NameParser/NameParser/Parser.cs @@ -200,22 +200,22 @@ public Dictionary AsDictionary(bool includeEmpty = true) private static bool IsTitle(string value) { - return Titles.Contains(value.ToLower().Replace(".", string.Empty)); + return Titles.Contains(value.Replace(".", string.Empty)); } private static bool IsConjunction(string piece) { - return Conjunctions.Contains(piece.ToLower().Replace(".", string.Empty)) && !IsAnInitial(piece); + return Conjunctions.Contains(piece.Replace(".", string.Empty)) && !IsAnInitial(piece); } private static bool IsPrefix(string piece) { - return Prefixes.Contains(piece.ToLower().Replace(".", string.Empty)) && !IsAnInitial(piece); + return Prefixes.Contains(piece.Replace(".", string.Empty)) && !IsAnInitial(piece); } private static bool IsSuffix(string piece) { - return Suffixes.Contains(piece.Replace(".", string.Empty).ToLower()) && !IsAnInitial(piece); + return Suffixes.Contains(piece.Replace(".", string.Empty)) && !IsAnInitial(piece); } private static bool AreSuffixes(IEnumerable pieces) @@ -240,10 +240,10 @@ private static bool AreSuffixes(IEnumerable pieces) /// False if is a prefix (de, abu, bin), suffix (jr, iv, cpa), title (mr, pope), or initial (x, e.); true otherwise private static bool IsRootname(string piece) { - var lcPiece = piece.ToLower().Replace(".", string.Empty); - return !Suffixes.Contains(lcPiece) - && !Prefixes.Contains(lcPiece) - && !Titles.Contains(lcPiece) + var noDots = piece.Replace(".", string.Empty); + return !Suffixes.Contains(noDots) + && !Prefixes.Contains(noDots) + && !Titles.Contains(noDots) && !IsAnInitial(piece); } @@ -273,7 +273,7 @@ private static bool IsAnInitial(string value) private void PostProcessFirstnames() { if (!string.IsNullOrEmpty(Title) - && !FirstNameTitles.Contains(Title.ToLower().Replace(".", string.Empty)) + && !FirstNameTitles.Contains(Title.Replace(".", string.Empty)) && 1 == _firstList.Count + _lastList.Count) { if (_firstList.Any())