From a52274e0d0d3cee34d8d896a83d38e2d1610ec61 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 24 Sep 2025 14:15:31 -0400 Subject: [PATCH 1/9] Get update rows by reference; implement SF comparer; handle duplicate verses; add remark after ide tag Fix marker placement test given new proper out of order verse handling Add comment regarding row maps --- src/SIL.Machine/Corpora/ScriptureRef.cs | 34 +++++ .../ScriptureRefUsfmParserHandlerBase.cs | 2 +- .../Corpora/UpdateUsfmParserHandler.cs | 134 +++++++++++------- ...PlaceMarkersUsfmUpdateBlockHandlerTests.cs | 4 +- .../Corpora/UpdateUsfmParserHandlerTests.cs | 107 +++++++++++++- 5 files changed, 219 insertions(+), 62 deletions(-) diff --git a/src/SIL.Machine/Corpora/ScriptureRef.cs b/src/SIL.Machine/Corpora/ScriptureRef.cs index 73d230315..1603d7cc7 100644 --- a/src/SIL.Machine/Corpora/ScriptureRef.cs +++ b/src/SIL.Machine/Corpora/ScriptureRef.cs @@ -130,4 +130,38 @@ public override string ToString() return sb.ToString(); } } + + public class ScriptureRefComparer : IComparer, IEqualityComparer + { + public static ScriptureRefComparer Default { get; } = new ScriptureRefComparer(compareSegments: true); + public static ScriptureRefComparer IgnoreSegments { get; } = new ScriptureRefComparer(compareSegments: false); + private readonly bool _compareSegments; + + public ScriptureRefComparer(bool compareSegments = true) + { + _compareSegments = compareSegments; + } + + public int Compare(ScriptureRef x, ScriptureRef y) + { + return x.CompareTo(y, _compareSegments); + } + + public bool Equals(ScriptureRef x, ScriptureRef y) + { + return x.CompareTo(y, _compareSegments) == 0; + } + + public int GetHashCode(ScriptureRef obj) + { + int hashCode = 23; + hashCode = + hashCode * 31 + + (_compareSegments ? obj.VerseRef.BBBCCCVVVS.GetHashCode() : obj.VerseRef.BBBCCCVVV.GetHashCode()); + hashCode = hashCode * 31 + obj.Versification.GetHashCode(); + // Using ToRelaxed is necessary to maintain equality across relaxed refs, Equals properly handles relaxed ref comparison + hashCode = hashCode * 31 + obj.ToRelaxed().Path.GetSequenceHashCode(); + return hashCode; + } + } } diff --git a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs index 2c268ebf2..56f08ea9e 100644 --- a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs +++ b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs @@ -18,7 +18,7 @@ public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase private VerseRef _curVerseRef; private readonly Stack _curElements; private readonly Stack _curTextType; - private bool _duplicateVerse = false; + protected bool _duplicateVerse = false; protected ScriptureRefUsfmParserHandlerBase() { diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 2e7f77c31..b53f1dd23 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Linq; +using SIL.Scripture; namespace SIL.Machine.Corpora { @@ -41,7 +42,9 @@ public UpdateUsfmRow( */ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase { - private readonly IReadOnlyList _rows; + private readonly Dictionary> _rowMapIgnoreSegments; + private readonly Dictionary> _rowMapCheckSegments; + private readonly ScrVers _updateRowsVersification; private readonly List _tokens; private readonly List _updatedText; private readonly List _embedTokens; @@ -55,7 +58,6 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase private readonly Stack _updateBlockHandlers; private readonly List _remarks; private readonly Stack _replace; - private int _rowIndex; private int _tokenIndex; private readonly Func _errorHandler; @@ -72,7 +74,14 @@ public UpdateUsfmParserHandler( Func errorHandler = null ) { - _rows = rows ?? Array.Empty(); + // We need two maps so that update rows can be specified per segment + // but be handled correctly whether or not the USFM has segments for that verse + (_rowMapIgnoreSegments, _rowMapCheckSegments) = GetRowMap(rows ?? Array.Empty()); + _updateRowsVersification = ScrVers.English; + if (rows != null && rows.Count > 0) + { + _updateRowsVersification = rows.First(r => r.Refs.Count > 0).Refs[0].Versification; + } _tokens = new List(); _updatedText = new List(); _updateBlocks = new Stack(); @@ -137,7 +146,7 @@ IReadOnlyList attributes if (state.IsVerseText) { // Only strip paragraph markers in a verse - if (_paragraphBehavior == UpdateUsfmMarkerBehavior.Preserve) + if (_paragraphBehavior == UpdateUsfmMarkerBehavior.Preserve && !_duplicateVerse) { CollectUpdatableTokens(state); } @@ -232,14 +241,23 @@ string pubNumber base.Verse(state, number, marker, altNumber, pubNumber); - CollectReadonlyTokens(state); + if (_duplicateVerse) + { + SkipUpdatableTokens(state); + } + else + { + CollectReadonlyTokens(state); + } } public override void StartNote(UsfmParserState state, string marker, string caller, string category) { base.StartNote(state, marker, caller, category); - - CollectUpdatableTokens(state); + if (!_duplicateVerse) + CollectUpdatableTokens(state); + else + SkipUpdatableTokens(state); } public override void EndNote(UsfmParserState state, string marker, bool closed) @@ -319,7 +337,7 @@ public override void Text(UsfmParserState state, string text) base.Text(state, text); // strip out text in verses that are being replaced - if (ReplaceWithNewTokens(state)) + if (ReplaceWithNewTokens(state) || (_duplicateVerse && CurrentTextType == ScriptureTextType.Verse)) SkipUpdatableTokens(state); else CollectUpdatableTokens(state); @@ -390,15 +408,11 @@ public string GetUsfm(UsfmStylesheet stylesheet) remarkTokens.Add(new UsfmToken(UsfmTokenType.Paragraph, "rem", null, null)); remarkTokens.Add(new UsfmToken(remark)); } - - if (tokens.Count > 0 && tokens[0].Marker == "id") + if (tokens.Count > 0) { - int index = 1; - if (tokens.Count > 1 && tokens[1].Type == UsfmTokenType.Text) - { - index = 2; - } - while (tokens[index].Marker == "rem") + int index = 0; + HashSet markersToSkip = new HashSet() { "id", "ide", "rem" }; + while (markersToSkip.Contains(tokens[index].Marker)) { index++; if (tokens.Count > index && tokens[index].Type == UsfmTokenType.Text) @@ -407,51 +421,67 @@ public string GetUsfm(UsfmStylesheet stylesheet) tokens.InsertRange(index, remarkTokens); } } + return tokenizer.Detokenize(tokens); } - private (IReadOnlyList RowTexts, Dictionary Metadata) AdvanceRows( + private ( + Dictionary> RowMapIgnoreSegments, + Dictionary> RowMapCheckSegments + ) GetRowMap(IEnumerable rows) + { + var rowMapIgnoreSegments = new Dictionary>( + comparer: ScriptureRefComparer.IgnoreSegments + ); + var rowMapCheckSegments = new Dictionary>( + comparer: ScriptureRefComparer.Default + ); + foreach (UpdateUsfmRow row in rows) + { + ScriptureRef sr = row.Refs[0]; + if (!rowMapIgnoreSegments.ContainsKey(sr)) + rowMapIgnoreSegments[sr] = new List(); + rowMapIgnoreSegments[sr].Add(row); + if (!rowMapCheckSegments.ContainsKey(sr)) + rowMapCheckSegments[sr] = new List(); + rowMapCheckSegments[sr].Add(row); + } + return (rowMapIgnoreSegments, rowMapCheckSegments); + } + + private List GetRowsForRef(ScriptureRef sr) + { + var normalizedScriptureRef = sr.ChangeVersification(_updateRowsVersification); + if (_rowMapCheckSegments.TryGetValue(normalizedScriptureRef, out List rows)) + { + return rows; + } + else if (_rowMapIgnoreSegments.TryGetValue(normalizedScriptureRef, out rows)) + { + return rows; + } + return new List(); + } + + private (IReadOnlyList RowTexts, Dictionary Metadata) GetRows( IReadOnlyList segScrRefs ) { var rowTexts = new List(); Dictionary rowMetadata = null; - int sourceIndex = 0; - // search the sorted rows with updated text, starting from where we left off last. - while (_rowIndex < _rows.Count && sourceIndex < segScrRefs.Count) + foreach (ScriptureRef sr in segScrRefs) { - // get the set of references for the current row - int compare = 0; - UpdateUsfmRow row = _rows[_rowIndex]; - (IReadOnlyList rowScrRefs, string text, IReadOnlyDictionary metadata) = ( - row.Refs, - row.Text, - row.Metadata - ); - foreach (ScriptureRef rowScrRef in rowScrRefs) - { - while (sourceIndex < segScrRefs.Count) - { - compare = rowScrRef.CompareTo(segScrRefs[sourceIndex], compareSegments: false); - if (compare > 0) - // row is ahead of source, increment source - sourceIndex++; - else - break; - } - if (compare == 0) - { - // source and row match - // grab the text - both source and row will be incremented in due time... - rowTexts.Add(text); - rowMetadata = metadata.ToDictionary(kvp => kvp.Key, kvp => kvp.Value); - break; - } - } - if (compare <= 0) + List rows = GetRowsForRef(sr); + foreach (UpdateUsfmRow row in rows) { - // source is ahead row, increment row - _rowIndex++; + ( + IReadOnlyList rowScrRefs, + string text, + IReadOnlyDictionary metadata + ) = (row.Refs, row.Text, row.Metadata); + + rowTexts.Add(text); + rowMetadata = metadata.ToDictionary(kvp => kvp.Key, kvp => kvp.Value); } } return (rowTexts, rowMetadata); @@ -558,7 +588,7 @@ private bool HasNewText() private void StartUpdateBlock(IReadOnlyList scriptureRefs) { - (IReadOnlyList rowTexts, Dictionary metadata) = AdvanceRows(scriptureRefs); + (IReadOnlyList rowTexts, Dictionary metadata) = GetRows(scriptureRefs); _updateBlocks.Push( new UsfmUpdateBlock(scriptureRefs, metadata: metadata ?? new Dictionary()) ); diff --git a/tests/SIL.Machine.Tests/Corpora/PlaceMarkersUsfmUpdateBlockHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/PlaceMarkersUsfmUpdateBlockHandlerTests.cs index 9769e475d..b7503770f 100644 --- a/tests/SIL.Machine.Tests/Corpora/PlaceMarkersUsfmUpdateBlockHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/PlaceMarkersUsfmUpdateBlockHandlerTests.cs @@ -746,8 +746,8 @@ public void UpdateUsfm_VersesOutOfOrder() @"\id MAT \c 1 \v 2 new verse 2 -\v 1 -\p +\v 1 new verse 1 +\p new paragraph 2 "; AssertUsfmEquals(target, result); diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index a8fb6b2cc..654e13f30 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -428,13 +428,12 @@ public void GetUsfm_MergeVerseSegments() { var rows = new List { - new UpdateUsfmRow(ScrRef("MAT 2:2"), "Verse 2."), new UpdateUsfmRow(ScrRef("MAT 2:2a"), "Verse 2a."), new UpdateUsfmRow(ScrRef("MAT 2:2b"), "Verse 2b.") }; string target = UpdateUsfm(rows); - Assert.That(target, Contains.Substring("\\v 2-3 Verse 2. Verse 2a. Verse 2b.\r\n")); + Assert.That(target, Contains.Substring("\\v 2-3 Verse 2a. Verse 2b.\r\n")); } [Test] @@ -518,13 +517,12 @@ public void GetUsfm_NonVerse_Relaxed() "\\v 1 First verse of the first chapter. \\f + \\fr 1:1: \\ft This is a footnote for v1.\\f*\r\n" ) ); + // Updating using relaxed refs will not be perfect, but it's the best we can do while allowing for out of order rows Assert.That( target, - Contains.Substring("\\tr \\tc1 The first cell of the table. \\tc2 The second cell of the table.\r\n") - ); - Assert.That( - target, - Contains.Substring("\\tr \\tc1 The third cell of the table. \\tc2 Row two, column two.\r\n") + Contains.Substring( + "\\tr \\tc1 The first cell of the table. The third cell of the table. \\tc2 The second cell of the table.\r\n" + ) ); } @@ -1209,6 +1207,98 @@ public void GetUsfm_HeaderReferenceParagraphs() AssertUsfmEquals(target, resultP); } + [Test] + public void GetUsfm_OutOfOrderVerses() + { + var rows = new List + { + new UpdateUsfmRow(ScrRef("MAT 1:1"), "new verse 1"), + new UpdateUsfmRow(ScrRef("MAT 1:2"), "new verse 2"), + new UpdateUsfmRow(ScrRef("MAT 1:3"), "new verse 3"), + new UpdateUsfmRow(ScrRef("MAT 1:4"), "new verse 4"), + new UpdateUsfmRow(ScrRef("MAT 1:5"), "new verse 5"), + new UpdateUsfmRow(ScrRef("MAT 1:6a"), "new verse 6a"), + new UpdateUsfmRow(ScrRef("MAT 1:6b"), "new verse 6b"), + new UpdateUsfmRow(ScrRef("MAT 1:6b/1:s"), "new section"), + new UpdateUsfmRow(ScrRef("MAT 1:7"), "new verse 7"), + new UpdateUsfmRow(ScrRef("MAT 1:8"), "new verse 8"), + }; + + string usfm = + @"\id MAT +\c 1 +\s1 beginning-of-chapter header +\p +\v 1 verse 1 +\v 2 verse 2 +\v 3 verse 3 +\v 6b verse 6b +\s section +\v 7 verse 7 +\v 8 verse 8 +\v 4 verse 4 +\v 5 verse 5 +\v 6a verse 6a +"; + + string target = UpdateUsfm(rows, usfm, paragraphBehavior: UpdateUsfmMarkerBehavior.Strip); + string resultP = + @"\id MAT +\c 1 +\s1 beginning-of-chapter header +\p +\v 1 new verse 1 +\v 2 new verse 2 +\v 3 new verse 3 +\v 6b new verse 6b +\s new section +\v 7 new verse 7 +\v 8 new verse 8 +\v 4 new verse 4 +\v 5 new verse 5 +\v 6a new verse 6a +"; + AssertUsfmEquals(target, resultP); + } + + [Test] + public void GetUsfm_DuplicateVerses() + { + var rows = new List + { + new UpdateUsfmRow(ScrRef("MAT 1:1"), "new verse 1"), + new UpdateUsfmRow(ScrRef("MAT 1:2"), "new verse 2"), + new UpdateUsfmRow(ScrRef("MAT 1:3"), "new verse 3"), + new UpdateUsfmRow(ScrRef("MAT 1:4"), "new verse 4"), + }; + + string usfm = + @"\id MAT +\c 1 +\s1 beginning-of-chapter header +\p +\v 1 verse 1 +\v 2 verse 2 +\v 3 verse 3 +\v 3 another verse 3\f \fr 1.3 \ft Some duplicate verse three note \f* 1 +\p more verse three +\v 4 verse 4 +"; + + string target = UpdateUsfm(rows, usfm, paragraphBehavior: UpdateUsfmMarkerBehavior.Strip); + string resultP = + @"\id MAT +\c 1 +\s1 beginning-of-chapter header +\p +\v 1 new verse 1 +\v 2 new verse 2 +\v 3 new verse 3 +\v 4 new verse 4 +"; + AssertUsfmEquals(target, resultP); + } + [Test] public void GetUsfm_PreferExisting_AddRemark() { @@ -1219,6 +1309,7 @@ public void GetUsfm_PreferExisting_AddRemark() }; string usfm = @"\id MAT - Test +\ide UTF-8 \rem Existing remark \c 1 \v 1 Some text @@ -1233,6 +1324,7 @@ public void GetUsfm_PreferExisting_AddRemark() ); string result = @"\id MAT - Test +\ide UTF-8 \rem Existing remark \rem New remark \c 1 @@ -1251,6 +1343,7 @@ public void GetUsfm_PreferExisting_AddRemark() ); result = @"\id MAT - Test +\ide UTF-8 \rem Existing remark \rem New remark \rem New remark 2 From dafe32c565cd94eb6650af8da828cb39002c4441 Mon Sep 17 00:00:00 2001 From: Damien Daspit Date: Thu, 25 Sep 2025 17:39:19 -0400 Subject: [PATCH 2/9] Create rows map based on VerseRef instead of ScriptureRef - add compareSegments parameter to UpdateUsfmParserHandler --- .../Corpora/ParatextProjectTextUpdaterBase.cs | 6 +- .../ScriptureRefUsfmParserHandlerBase.cs | 15 +- .../Corpora/UpdateUsfmParserHandler.cs | 197 ++++++++++++------ src/SIL.Machine/Corpora/VerseRefComparer.cs | 19 +- .../Corpora/UpdateUsfmParserHandlerTests.cs | 28 ++- 5 files changed, 179 insertions(+), 86 deletions(-) diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 652732985..e35e514d4 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -29,7 +29,8 @@ public string UpdateUsfm( UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, IEnumerable preserveParagraphStyles = null, IEnumerable updateBlockHandlers = null, - IEnumerable remarks = null + IEnumerable remarks = null, + bool compareSegments = false ) { string fileName = _settings.GetBookFileName(bookId); @@ -51,7 +52,8 @@ public string UpdateUsfm( styleBehavior, preserveParagraphStyles, updateBlockHandlers, - remarks + remarks, + compareSegments ); try { diff --git a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs index 56f08ea9e..f5a52a795 100644 --- a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs +++ b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs @@ -18,7 +18,6 @@ public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase private VerseRef _curVerseRef; private readonly Stack _curElements; private readonly Stack _curTextType; - protected bool _duplicateVerse = false; protected ScriptureRefUsfmParserHandlerBase() { @@ -29,6 +28,8 @@ protected ScriptureRefUsfmParserHandlerBase() protected ScriptureTextType CurrentTextType => _curTextType.Count == 0 ? ScriptureTextType.None : _curTextType.Peek(); + protected bool DuplicateVerse { get; private set; } + private static readonly string[] EmbedStyles = new[] { "f", "fe", "x", "fig" }; private static bool IsEmbedStyle(string marker) @@ -66,13 +67,13 @@ public override void Verse( string pubNumber ) { - if (state.VerseRef.Equals(_curVerseRef) && !_duplicateVerse) + if (state.VerseRef.Equals(_curVerseRef) && !DuplicateVerse) { if (state.VerseRef.VerseNum > 0) { EndVerseText(state, CreateVerseRefs()); // ignore duplicate verses - _duplicateVerse = true; + DuplicateVerse = true; } } else if (VerseRef.AreOverlappingVersesRanges(verse1: number, verse2: _curVerseRef.Verse)) @@ -251,14 +252,14 @@ protected virtual void EndEmbedText(UsfmParserState state, ScriptureRef scriptur private void StartVerseText(UsfmParserState state) { - _duplicateVerse = false; + DuplicateVerse = false; _curTextType.Push(ScriptureTextType.Verse); StartVerseText(state, CreateVerseRefs()); } private void EndVerseText(UsfmParserState state) { - if (!_duplicateVerse && _curVerseRef.VerseNum > 0) + if (!DuplicateVerse && _curVerseRef.VerseNum > 0) EndVerseText(state, CreateVerseRefs()); if (_curVerseRef.VerseNum > 0) _curTextType.Pop(); @@ -291,7 +292,7 @@ private void StartEmbedText(UsfmParserState state, string marker) { if (_curVerseRef.IsDefault) UpdateVerseRef(state.VerseRef, marker); - if (!_duplicateVerse) + if (!DuplicateVerse) { CheckConvertVerseParaToNonVerse(state); NextElement(marker); @@ -302,7 +303,7 @@ private void StartEmbedText(UsfmParserState state, string marker) private void EndEmbedText(UsfmParserState state) { - if (!_duplicateVerse && _curTextType.Count > 0 && _curTextType.Peek() == ScriptureTextType.Embed) + if (!DuplicateVerse && _curTextType.Count > 0 && _curTextType.Peek() == ScriptureTextType.Embed) { EndEmbedText(state, CreateNonVerseRef()); _curTextType.Pop(); diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index b53f1dd23..76e908ca4 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -42,8 +42,12 @@ public UpdateUsfmRow( */ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase { - private readonly Dictionary> _rowMapIgnoreSegments; - private readonly Dictionary> _rowMapCheckSegments; + private readonly IReadOnlyList _rows; + private int _rowIndex; + private VerseRef _verseRowsRef; + private readonly List _verseRows; + private int _verseRowIndex; + private readonly Dictionary> _verseRowsMap; private readonly ScrVers _updateRowsVersification; private readonly List _tokens; private readonly List _updatedText; @@ -60,6 +64,7 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase private readonly Stack _replace; private int _tokenIndex; private readonly Func _errorHandler; + private readonly bool _compareSegments; public UpdateUsfmParserHandler( IReadOnlyList rows = null, @@ -71,17 +76,18 @@ public UpdateUsfmParserHandler( IEnumerable preserveParagraphStyles = null, IEnumerable updateBlockHandlers = null, IEnumerable remarks = null, - Func errorHandler = null + Func errorHandler = null, + bool compareSegments = false ) { - // We need two maps so that update rows can be specified per segment - // but be handled correctly whether or not the USFM has segments for that verse - (_rowMapIgnoreSegments, _rowMapCheckSegments) = GetRowMap(rows ?? Array.Empty()); + _rows = rows ?? Array.Empty(); + _verseRows = new List(); + _verseRowsMap = new Dictionary>( + compareSegments ? VerseRefComparer.Default : VerseRefComparer.IgnoreSegments + ); _updateRowsVersification = ScrVers.English; - if (rows != null && rows.Count > 0) - { - _updateRowsVersification = rows.First(r => r.Refs.Count > 0).Refs[0].Versification; - } + if (_rows.Count > 0) + _updateRowsVersification = _rows.First(r => r.Refs.Count > 0).Refs[0].Versification; _tokens = new List(); _updatedText = new List(); _updateBlocks = new Stack(); @@ -104,6 +110,7 @@ public UpdateUsfmParserHandler( _errorHandler = errorHandler; if (_errorHandler == null) _errorHandler = (error) => false; + _compareSegments = compareSegments; } public IReadOnlyList Tokens => _tokens; @@ -116,6 +123,10 @@ public override void EndUsfm(UsfmParserState state) public override void StartBook(UsfmParserState state, string marker, string code) { + _verseRowsRef = state.VerseRef; + UpdateVerseRowsMap(); + UpdateVerseRows(); + CollectReadonlyTokens(state); _updateBlocks.Push(new UsfmUpdateBlock()); var startBookTokens = new List(); @@ -146,7 +157,7 @@ IReadOnlyList attributes if (state.IsVerseText) { // Only strip paragraph markers in a verse - if (_paragraphBehavior == UpdateUsfmMarkerBehavior.Preserve && !_duplicateVerse) + if (_paragraphBehavior == UpdateUsfmMarkerBehavior.Preserve && !DuplicateVerse) { CollectUpdatableTokens(state); } @@ -202,6 +213,13 @@ string pubNumber { UseUpdatedText(); + if (!_verseRowsRef.Equals(state.VerseRef)) + { + _verseRowsRef = state.VerseRef; + UpdateVerseRowsMap(); + UpdateVerseRows(); + } + base.Chapter(state, number, marker, altNumber, pubNumber); CollectReadonlyTokens(state); @@ -239,9 +257,15 @@ string pubNumber } } + if (!_verseRowsRef.Equals(state.VerseRef)) + { + _verseRowsRef = state.VerseRef; + UpdateVerseRows(); + } + base.Verse(state, number, marker, altNumber, pubNumber); - if (_duplicateVerse) + if (DuplicateVerse) { SkipUpdatableTokens(state); } @@ -254,7 +278,7 @@ string pubNumber public override void StartNote(UsfmParserState state, string marker, string caller, string category) { base.StartNote(state, marker, caller, category); - if (!_duplicateVerse) + if (!DuplicateVerse) CollectUpdatableTokens(state); else SkipUpdatableTokens(state); @@ -337,7 +361,7 @@ public override void Text(UsfmParserState state, string text) base.Text(state, text); // strip out text in verses that are being replaced - if (ReplaceWithNewTokens(state) || (_duplicateVerse && CurrentTextType == ScriptureTextType.Verse)) + if (ReplaceWithNewTokens(state) || (DuplicateVerse && CurrentTextType == ScriptureTextType.Verse)) SkipUpdatableTokens(state); else CollectUpdatableTokens(state); @@ -425,63 +449,48 @@ public string GetUsfm(UsfmStylesheet stylesheet) return tokenizer.Detokenize(tokens); } - private ( - Dictionary> RowMapIgnoreSegments, - Dictionary> RowMapCheckSegments - ) GetRowMap(IEnumerable rows) - { - var rowMapIgnoreSegments = new Dictionary>( - comparer: ScriptureRefComparer.IgnoreSegments - ); - var rowMapCheckSegments = new Dictionary>( - comparer: ScriptureRefComparer.Default - ); - foreach (UpdateUsfmRow row in rows) - { - ScriptureRef sr = row.Refs[0]; - if (!rowMapIgnoreSegments.ContainsKey(sr)) - rowMapIgnoreSegments[sr] = new List(); - rowMapIgnoreSegments[sr].Add(row); - if (!rowMapCheckSegments.ContainsKey(sr)) - rowMapCheckSegments[sr] = new List(); - rowMapCheckSegments[sr].Add(row); - } - return (rowMapIgnoreSegments, rowMapCheckSegments); - } - - private List GetRowsForRef(ScriptureRef sr) - { - var normalizedScriptureRef = sr.ChangeVersification(_updateRowsVersification); - if (_rowMapCheckSegments.TryGetValue(normalizedScriptureRef, out List rows)) - { - return rows; - } - else if (_rowMapIgnoreSegments.TryGetValue(normalizedScriptureRef, out rows)) - { - return rows; - } - return new List(); - } - - private (IReadOnlyList RowTexts, Dictionary Metadata) GetRows( + private (IReadOnlyList RowTexts, Dictionary Metadata) AdvanceRows( IReadOnlyList segScrRefs ) { var rowTexts = new List(); Dictionary rowMetadata = null; - foreach (ScriptureRef sr in segScrRefs) + int sourceIndex = 0; + // search the sorted rows with updated text, starting from where we left off last. + while (_verseRowIndex < _verseRows.Count && sourceIndex < segScrRefs.Count) { - List rows = GetRowsForRef(sr); - foreach (UpdateUsfmRow row in rows) + // get the set of references for the current row + int compare = 0; + UpdateUsfmRow row = _rows[_verseRows[_verseRowIndex]]; + (IReadOnlyList rowScrRefs, string text, IReadOnlyDictionary metadata) = ( + row.Refs, + row.Text, + row.Metadata + ); + foreach (ScriptureRef rowScrRef in rowScrRefs) + { + while (sourceIndex < segScrRefs.Count) + { + compare = rowScrRef.CompareTo(segScrRefs[sourceIndex], compareSegments: _compareSegments); + if (compare > 0) + // row is ahead of source, increment source + sourceIndex++; + else + break; + } + if (compare == 0) + { + // source and row match + // grab the text - both source and row will be incremented in due time... + rowTexts.Add(text); + rowMetadata = metadata.ToDictionary(kvp => kvp.Key, kvp => kvp.Value); + break; + } + } + if (compare <= 0) { - ( - IReadOnlyList rowScrRefs, - string text, - IReadOnlyDictionary metadata - ) = (row.Refs, row.Text, row.Metadata); - - rowTexts.Add(text); - rowMetadata = metadata.ToDictionary(kvp => kvp.Key, kvp => kvp.Value); + // source is ahead row, increment row + _verseRowIndex++; } } return (rowTexts, rowMetadata); @@ -588,7 +597,7 @@ private bool HasNewText() private void StartUpdateBlock(IReadOnlyList scriptureRefs) { - (IReadOnlyList rowTexts, Dictionary metadata) = GetRows(scriptureRefs); + (IReadOnlyList rowTexts, Dictionary metadata) = AdvanceRows(scriptureRefs); _updateBlocks.Push( new UsfmUpdateBlock(scriptureRefs, metadata: metadata ?? new Dictionary()) ); @@ -679,5 +688,63 @@ private bool IsNonverseParagraph(UsfmParserState state, UsfmUpdateBlockElement e UsfmTag paraTag = state.Stylesheet.GetTag(paraToken.Marker); return paraTag.TextType != UsfmTextType.VerseText && paraTag.TextType != UsfmTextType.NotSpecified; } + + private void UpdateVerseRowsMap() + { + _verseRowsMap.Clear(); + while (_rowIndex < _rows.Count && _rows[_rowIndex].Refs[0].ChapterNum == _verseRowsRef.ChapterNum) + { + UpdateUsfmRow row = _rows[_rowIndex]; + var ri = new RowInfo(_rowIndex); + foreach (ScriptureRef sr in row.Refs) + { + if (!_verseRowsMap.TryGetValue(sr.VerseRef, out List rows)) + { + rows = new List(); + _verseRowsMap[sr.VerseRef] = rows; + } + rows.Add(ri); + } + _rowIndex++; + } + } + + private void UpdateVerseRows() + { + VerseRef vref = _verseRowsRef; + // We are using a dictionary, which uses an equality comparer. As a result, we need to change the + // source verse ref to use the row versification. If we used a SortedList, it wouldn't be necessary, but it + // would be less efficient. + vref.ChangeVersification(_updateRowsVersification); + + _verseRows.Clear(); + _verseRowIndex = 0; + + foreach (VerseRef vr in vref.AllVerses()) + { + if (_verseRowsMap.TryGetValue(vr, out List rows)) + { + foreach (RowInfo row in rows) + { + if (!row.IsConsumed) + { + _verseRows.Add(row.RowIndex); + row.IsConsumed = true; + } + } + } + } + } + + private class RowInfo + { + public RowInfo(int rowIndex) + { + RowIndex = rowIndex; + } + + public int RowIndex { get; set; } + public bool IsConsumed { get; set; } + } } } diff --git a/src/SIL.Machine/Corpora/VerseRefComparer.cs b/src/SIL.Machine/Corpora/VerseRefComparer.cs index 1d49f422e..c0aea6487 100644 --- a/src/SIL.Machine/Corpora/VerseRefComparer.cs +++ b/src/SIL.Machine/Corpora/VerseRefComparer.cs @@ -6,10 +6,10 @@ namespace SIL.Machine.Corpora { - public class VerseRefComparer : IComparer + public class VerseRefComparer : IComparer, IEqualityComparer { - public static IComparer Default { get; } = new VerseRefComparer(compareSegments: true); - public static IComparer IgnoreSegments { get; } = new VerseRefComparer(compareSegments: false); + public static VerseRefComparer Default { get; } = new VerseRefComparer(compareSegments: true); + public static VerseRefComparer IgnoreSegments { get; } = new VerseRefComparer(compareSegments: false); private readonly bool _compareSegments; @@ -37,5 +37,18 @@ public int Compare(VerseRef x, VerseRef y) } return xArray.Length.CompareTo(yArray.Length); } + + public bool Equals(VerseRef x, VerseRef y) + { + return Compare(x, y) == 0; + } + + public int GetHashCode(VerseRef obj) + { + int hashCode = 23; + hashCode = hashCode * 31 + (_compareSegments ? obj.BBBCCCVVVS.GetHashCode() : obj.BBBCCCVVV.GetHashCode()); + hashCode = hashCode * 31 + obj.Versification.GetHashCode(); + return hashCode; + } } } diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 654e13f30..6eb15cd07 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -428,12 +428,13 @@ public void GetUsfm_MergeVerseSegments() { var rows = new List { + new UpdateUsfmRow(ScrRef("MAT 2:2"), "Verse 2."), new UpdateUsfmRow(ScrRef("MAT 2:2a"), "Verse 2a."), new UpdateUsfmRow(ScrRef("MAT 2:2b"), "Verse 2b.") }; string target = UpdateUsfm(rows); - Assert.That(target, Contains.Substring("\\v 2-3 Verse 2a. Verse 2b.\r\n")); + Assert.That(target, Contains.Substring("\\v 2-3 Verse 2. Verse 2a. Verse 2b.\r\n")); } [Test] @@ -517,12 +518,13 @@ public void GetUsfm_NonVerse_Relaxed() "\\v 1 First verse of the first chapter. \\f + \\fr 1:1: \\ft This is a footnote for v1.\\f*\r\n" ) ); - // Updating using relaxed refs will not be perfect, but it's the best we can do while allowing for out of order rows Assert.That( target, - Contains.Substring( - "\\tr \\tc1 The first cell of the table. The third cell of the table. \\tc2 The second cell of the table.\r\n" - ) + Contains.Substring("\\tr \\tc1 The first cell of the table. \\tc2 The second cell of the table.\r\n") + ); + Assert.That( + target, + Contains.Substring("\\tr \\tc1 The third cell of the table. \\tc2 Row two, column two.\r\n") ); } @@ -1241,7 +1243,12 @@ public void GetUsfm_OutOfOrderVerses() \v 6a verse 6a "; - string target = UpdateUsfm(rows, usfm, paragraphBehavior: UpdateUsfmMarkerBehavior.Strip); + string target = UpdateUsfm( + rows, + usfm, + paragraphBehavior: UpdateUsfmMarkerBehavior.Strip, + compareSegments: true + ); string resultP = @"\id MAT \c 1 @@ -1371,7 +1378,8 @@ private static string UpdateUsfm( UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, IEnumerable? preserveParagraphStyles = null, IEnumerable? usfmUpdateBlockHandlers = null, - IEnumerable? remarks = null + IEnumerable? remarks = null, + bool compareSegments = false ) { if (source is null) @@ -1387,7 +1395,8 @@ private static string UpdateUsfm( styleBehavior, preserveParagraphStyles, usfmUpdateBlockHandlers, - remarks + remarks, + compareSegments ); } else @@ -1402,7 +1411,8 @@ private static string UpdateUsfm( styleBehavior, preserveParagraphStyles, usfmUpdateBlockHandlers, - remarks + remarks, + compareSegments ); UsfmParser.Parse(source, updater); return updater.GetUsfm(); From 8de251876c4c65e8cce6af0ac413d9595c7db4e0 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Mon, 29 Sep 2025 16:51:45 -0400 Subject: [PATCH 3/9] More scripture ref comparer to separate file; sort rows --- src/SIL.Machine/Corpora/ScriptureRef.cs | 34 ----------------- .../Corpora/ScriptureRefComparer.cs | 37 +++++++++++++++++++ .../Corpora/UpdateUsfmParserHandler.cs | 9 ++++- 3 files changed, 45 insertions(+), 35 deletions(-) create mode 100644 src/SIL.Machine/Corpora/ScriptureRefComparer.cs diff --git a/src/SIL.Machine/Corpora/ScriptureRef.cs b/src/SIL.Machine/Corpora/ScriptureRef.cs index 1603d7cc7..73d230315 100644 --- a/src/SIL.Machine/Corpora/ScriptureRef.cs +++ b/src/SIL.Machine/Corpora/ScriptureRef.cs @@ -130,38 +130,4 @@ public override string ToString() return sb.ToString(); } } - - public class ScriptureRefComparer : IComparer, IEqualityComparer - { - public static ScriptureRefComparer Default { get; } = new ScriptureRefComparer(compareSegments: true); - public static ScriptureRefComparer IgnoreSegments { get; } = new ScriptureRefComparer(compareSegments: false); - private readonly bool _compareSegments; - - public ScriptureRefComparer(bool compareSegments = true) - { - _compareSegments = compareSegments; - } - - public int Compare(ScriptureRef x, ScriptureRef y) - { - return x.CompareTo(y, _compareSegments); - } - - public bool Equals(ScriptureRef x, ScriptureRef y) - { - return x.CompareTo(y, _compareSegments) == 0; - } - - public int GetHashCode(ScriptureRef obj) - { - int hashCode = 23; - hashCode = - hashCode * 31 - + (_compareSegments ? obj.VerseRef.BBBCCCVVVS.GetHashCode() : obj.VerseRef.BBBCCCVVV.GetHashCode()); - hashCode = hashCode * 31 + obj.Versification.GetHashCode(); - // Using ToRelaxed is necessary to maintain equality across relaxed refs, Equals properly handles relaxed ref comparison - hashCode = hashCode * 31 + obj.ToRelaxed().Path.GetSequenceHashCode(); - return hashCode; - } - } } diff --git a/src/SIL.Machine/Corpora/ScriptureRefComparer.cs b/src/SIL.Machine/Corpora/ScriptureRefComparer.cs new file mode 100644 index 000000000..e3e7cc7c4 --- /dev/null +++ b/src/SIL.Machine/Corpora/ScriptureRefComparer.cs @@ -0,0 +1,37 @@ +using System.Collections.Generic; +using SIL.Extensions; +using SIL.Machine.Corpora; + +public class ScriptureRefComparer : IComparer, IEqualityComparer +{ + public static ScriptureRefComparer Default { get; } = new ScriptureRefComparer(compareSegments: true); + public static ScriptureRefComparer IgnoreSegments { get; } = new ScriptureRefComparer(compareSegments: false); + private readonly bool _compareSegments; + + public ScriptureRefComparer(bool compareSegments = true) + { + _compareSegments = compareSegments; + } + + public int Compare(ScriptureRef x, ScriptureRef y) + { + return x.CompareTo(y, _compareSegments); + } + + public bool Equals(ScriptureRef x, ScriptureRef y) + { + return x.CompareTo(y, _compareSegments) == 0; + } + + public int GetHashCode(ScriptureRef obj) + { + int hashCode = 23; + hashCode = + hashCode * 31 + + (_compareSegments ? obj.VerseRef.BBBCCCVVVS.GetHashCode() : obj.VerseRef.BBBCCCVVV.GetHashCode()); + hashCode = hashCode * 31 + obj.Versification.GetHashCode(); + // Using ToRelaxed is necessary to maintain equality across relaxed refs, Equals properly handles relaxed ref comparison + hashCode = hashCode * 31 + obj.ToRelaxed().Path.GetSequenceHashCode(); + return hashCode; + } +} diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 76e908ca4..cd2fae847 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -81,13 +81,20 @@ public UpdateUsfmParserHandler( ) { _rows = rows ?? Array.Empty(); + _rows = _rows + .Where(r => r.Refs.Count > 0) + .OrderBy( + r => r.Refs[0], + compareSegments ? ScriptureRefComparer.Default : ScriptureRefComparer.IgnoreSegments + ) + .ToArray(); _verseRows = new List(); _verseRowsMap = new Dictionary>( compareSegments ? VerseRefComparer.Default : VerseRefComparer.IgnoreSegments ); _updateRowsVersification = ScrVers.English; if (_rows.Count > 0) - _updateRowsVersification = _rows.First(r => r.Refs.Count > 0).Refs[0].Versification; + _updateRowsVersification = _rows[0].Refs[0].Versification; _tokens = new List(); _updatedText = new List(); _updateBlocks = new Stack(); From d245c273e50d9ad181b5f6a505b01ec4a719185e Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Mon, 29 Sep 2025 19:57:25 -0400 Subject: [PATCH 4/9] Pass error handler in UpdateUsfm --- src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index e35e514d4..85dc470a0 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -30,6 +30,7 @@ public string UpdateUsfm( IEnumerable preserveParagraphStyles = null, IEnumerable updateBlockHandlers = null, IEnumerable remarks = null, + Func errorHandler = null, bool compareSegments = false ) { @@ -53,6 +54,7 @@ public string UpdateUsfm( preserveParagraphStyles, updateBlockHandlers, remarks, + errorHandler, compareSegments ); try From 9fa4efb9ad7e4ee132e5bd39cfd027fe56c3fe1f Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Mon, 29 Sep 2025 20:06:57 -0400 Subject: [PATCH 5/9] Sort by verse ref not scripture ref; add error handler in tests --- src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs | 4 ++-- .../SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index cd2fae847..7e5a881a3 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -84,8 +84,8 @@ public UpdateUsfmParserHandler( _rows = _rows .Where(r => r.Refs.Count > 0) .OrderBy( - r => r.Refs[0], - compareSegments ? ScriptureRefComparer.Default : ScriptureRefComparer.IgnoreSegments + r => r.Refs[0].VerseRef, + compareSegments ? VerseRefComparer.Default : VerseRefComparer.IgnoreSegments ) .ToArray(); _verseRows = new List(); diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 6eb15cd07..6255fd228 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -1396,6 +1396,7 @@ private static string UpdateUsfm( preserveParagraphStyles, usfmUpdateBlockHandlers, remarks, + (_) => false, compareSegments ); } @@ -1412,6 +1413,7 @@ private static string UpdateUsfm( preserveParagraphStyles, usfmUpdateBlockHandlers, remarks, + (_) => false, compareSegments ); UsfmParser.Parse(source, updater); From a6d71c1f3c6d468330ba5617919a5c0d68a45723 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 30 Sep 2025 16:01:41 -0400 Subject: [PATCH 6/9] Do not sort rows --- src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 7e5a881a3..76e908ca4 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -81,20 +81,13 @@ public UpdateUsfmParserHandler( ) { _rows = rows ?? Array.Empty(); - _rows = _rows - .Where(r => r.Refs.Count > 0) - .OrderBy( - r => r.Refs[0].VerseRef, - compareSegments ? VerseRefComparer.Default : VerseRefComparer.IgnoreSegments - ) - .ToArray(); _verseRows = new List(); _verseRowsMap = new Dictionary>( compareSegments ? VerseRefComparer.Default : VerseRefComparer.IgnoreSegments ); _updateRowsVersification = ScrVers.English; if (_rows.Count > 0) - _updateRowsVersification = _rows[0].Refs[0].Versification; + _updateRowsVersification = _rows.First(r => r.Refs.Count > 0).Refs[0].Versification; _tokens = new List(); _updatedText = new List(); _updateBlocks = new Stack(); From 2743d44ebc3d9f3e5ce17d769b2ba805c0e94385 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 1 Oct 2025 10:48:17 -0400 Subject: [PATCH 7/9] Edit comment since rows will no longer necessarily be sorted --- src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 76e908ca4..1f3ae1759 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -456,7 +456,7 @@ IReadOnlyList segScrRefs var rowTexts = new List(); Dictionary rowMetadata = null; int sourceIndex = 0; - // search the sorted rows with updated text, starting from where we left off last. + // search the rows with updated text, starting from where we left off last. while (_verseRowIndex < _verseRows.Count && sourceIndex < segScrRefs.Count) { // get the set of references for the current row From 1a4a42fc7eb875438b71519054bec563b6e078bd Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 1 Oct 2025 14:43:38 -0400 Subject: [PATCH 8/9] Revert comment --- src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 1f3ae1759..76e908ca4 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -456,7 +456,7 @@ IReadOnlyList segScrRefs var rowTexts = new List(); Dictionary rowMetadata = null; int sourceIndex = 0; - // search the rows with updated text, starting from where we left off last. + // search the sorted rows with updated text, starting from where we left off last. while (_verseRowIndex < _verseRows.Count && sourceIndex < segScrRefs.Count) { // get the set of references for the current row From 56f6d276dae3531112725136459b15f87e7b9d00 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 1 Oct 2025 14:52:23 -0400 Subject: [PATCH 9/9] Add comment regarding the rows parameter --- src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 76e908ca4..4b9c37ef1 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -66,6 +66,7 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase private readonly Func _errorHandler; private readonly bool _compareSegments; + /// UpdateUsfmRows must be in order public UpdateUsfmParserHandler( IReadOnlyList rows = null, string idText = null,