diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 06faf34a..02cf07e3 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -27,7 +27,8 @@ public string UpdateUsfm( UpdateUsfmMarkerBehavior paragraphBehavior = UpdateUsfmMarkerBehavior.Preserve, UpdateUsfmMarkerBehavior embedBehavior = UpdateUsfmMarkerBehavior.Preserve, UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, - IReadOnlyCollection preserveParagraphStyles = null, + IEnumerable preserveParagraphStyles = null, + IEnumerable updateBlockHandlers = null, IEnumerable remarks = null ) { @@ -49,6 +50,7 @@ public string UpdateUsfm( embedBehavior, styleBehavior, preserveParagraphStyles, + updateBlockHandlers, remarks ); try diff --git a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs index ed30e722..242cc32d 100644 --- a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs +++ b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs @@ -10,7 +10,7 @@ public enum ScriptureTextType None, NonVerse, Verse, - NoteText + Embed } public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase @@ -19,9 +19,6 @@ public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase private readonly Stack _curElements; private readonly Stack _curTextType; private bool _duplicateVerse = false; - private bool _inEmbed; - protected bool InNoteText { get; private set; } - private bool _inNestedEmbed; protected ScriptureRefUsfmParserHandlerBase() { @@ -32,8 +29,12 @@ protected ScriptureRefUsfmParserHandlerBase() protected ScriptureTextType CurrentTextType => _curTextType.Count == 0 ? ScriptureTextType.None : _curTextType.Peek(); - private static readonly string[] EmbedStyles = new[] { "f", "fe", "fig", "fm", "x" }; - private static readonly char[] EmbedPartStartCharStyles = new[] { 'f', 'x', 'z' }; + private static readonly string[] EmbedStyles = new[] { "f", "fe", "x", "fig" }; + + private static bool IsEmbedStyle(string marker) + { + return marker != null && (EmbedStyles.Contains(marker.Trim('*')) || marker.StartsWith("z")); + } public override void EndUsfm(UsfmParserState state) { @@ -158,36 +159,6 @@ public override void EndSidebar(UsfmParserState state, string marker, bool close EndParentElement(); } - public override void StartNote(UsfmParserState state, string marker, string caller, string category) - { - _inEmbed = true; - StartEmbed(state, marker); - } - - public override void EndNote(UsfmParserState state, string marker, bool closed) - { - EndNoteTextWrapper(state); - EndEmbed(state, marker, null, closed); - _inEmbed = false; - } - - protected void StartEmbed(UsfmParserState state, string marker) - { - if (_curVerseRef.IsDefault) - UpdateVerseRef(state.VerseRef, marker); - - if (!_duplicateVerse) - { - // if we hit a note in a verse paragraph and we aren't in a verse, then start a non-verse segment - CheckConvertVerseParaToNonVerse(state); - NextElement(marker); - } - - StartEmbed(state, CreateNonVerseRef()); - } - - protected virtual void StartEmbed(UsfmParserState state, ScriptureRef scriptureRef) { } - protected virtual void EndEmbed( UsfmParserState state, string marker, @@ -214,23 +185,11 @@ public override void StartChar( IReadOnlyList attributes ) { - if (IsEmbedPartStyle(markerWithoutPlus) & InNoteText) - _inNestedEmbed = true; - // if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse // segment CheckConvertVerseParaToNonVerse(state); - if (IsEmbedStyle(markerWithoutPlus)) - { - _inEmbed = true; - StartEmbed(state, markerWithoutPlus); - } - - if (IsNoteText(markerWithoutPlus)) - { - StartNoteTextWrapper(state); - } + StartEmbedText(state, markerWithoutPlus); } public override void EndChar( @@ -240,22 +199,18 @@ public override void EndChar( bool closed ) { - if (IsEmbedPartStyle(marker)) - { - if (_inNestedEmbed) - { - _inNestedEmbed = false; - } - else - { - EndNoteTextWrapper(state); - } - } if (IsEmbedStyle(marker)) - { - EndEmbed(state, marker, attributes, closed); - _inEmbed = false; - } + EndEmbedText(state); + } + + public override void StartNote(UsfmParserState state, string marker, string caller, string category) + { + StartEmbedText(state, marker); + } + + public override void EndNote(UsfmParserState state, string marker, bool closed) + { + EndEmbedText(state); } protected virtual void StartVerseText(UsfmParserState state, IReadOnlyList scriptureRefs) { } @@ -266,26 +221,9 @@ protected virtual void StartNonVerseText(UsfmParserState state, ScriptureRef scr protected virtual void EndNonVerseText(UsfmParserState state, ScriptureRef scriptureRef) { } - protected virtual void StartNoteTextWrapper(UsfmParserState state) - { - InNoteText = true; - _curTextType.Push(ScriptureTextType.NoteText); - StartNoteText(state); - } - - protected virtual void StartNoteText(UsfmParserState state) { } - - protected virtual void EndNoteTextWrapper(UsfmParserState state) - { - if (_curTextType.Count > 0 && _curTextType.Peek() == ScriptureTextType.NoteText) - { - EndNoteText(state, CreateNonVerseRef()); - _curTextType.Pop(); - InNoteText = false; - } - } + protected virtual void StartEmbedText(UsfmParserState state, ScriptureRef scriptureRef) { } - protected virtual void EndNoteText(UsfmParserState state, ScriptureRef scriptureRef) { } + protected virtual void EndEmbedText(UsfmParserState state, ScriptureRef scriptureRef) { } private void StartVerseText(UsfmParserState state) { @@ -325,6 +263,28 @@ private void UpdateVerseRef(VerseRef verseRef, string marker) _curVerseRef = verseRef; } + private void StartEmbedText(UsfmParserState state, string marker) + { + if (_curVerseRef.IsDefault) + UpdateVerseRef(state.VerseRef, marker); + if (!_duplicateVerse) + { + CheckConvertVerseParaToNonVerse(state); + NextElement(marker); + _curTextType.Push(ScriptureTextType.Embed); + StartEmbedText(state, CreateNonVerseRef()); + } + } + + private void EndEmbedText(UsfmParserState state) + { + if (!_duplicateVerse && _curTextType.Count > 0 && _curTextType.Peek() == ScriptureTextType.Embed) + { + EndEmbedText(state, CreateNonVerseRef()); + _curTextType.Pop(); + } + } + private void NextElement(string marker) { ScriptureElement prevElem = _curElements.Pop(); @@ -378,36 +338,5 @@ private void CheckConvertVerseParaToNonVerse(UsfmParserState state) StartNonVerseText(state); } } - - protected bool IsInEmbed(string marker) - { - return _inEmbed || IsEmbedStyle(marker); - } - - protected bool IsInNestedEmbed(string marker) - { - return _inNestedEmbed - || ( - !(marker is null) - && marker.StartsWith("+") - && marker.Length > 1 - && IsEmbedPartStyle(marker.Substring(1)) - ); - } - - protected static bool IsNoteText(string marker) - { - return marker == "ft"; - } - - protected static bool IsEmbedPartStyle(string marker) - { - return !(marker is null) && marker.Length > 0 && marker[0].IsOneOf(EmbedPartStartCharStyles); - } - - protected static bool IsEmbedStyle(string marker) - { - return !(marker is null) && marker.Trim('*').IsOneOf(EmbedStyles); - } } } diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 532a89f0..ae28245f 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -25,21 +25,20 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase { private readonly IReadOnlyList<(IReadOnlyList, string)> _rows; private readonly List _tokens; - private readonly List _newTokens; - private readonly List _newEmbedTokens; + private readonly List _updatedText; + private readonly List _embedTokens; private readonly string _idText; private readonly UpdateUsfmTextBehavior _textBehavior; private readonly UpdateUsfmMarkerBehavior _paragraphBehavior; private readonly UpdateUsfmMarkerBehavior _embedBehavior; private readonly UpdateUsfmMarkerBehavior _styleBehavior; private readonly HashSet _preserveParagraphStyles; + private readonly Stack _updateBlocks; + private readonly Stack _updateBlockHandlers; private readonly List _remarks; private readonly Stack _replace; private int _rowIndex; private int _tokenIndex; - private bool _embedUpdated; - private bool _inPreservedParagraph; - private List _embedRowTexts; public UpdateUsfmParserHandler( IReadOnlyList<(IReadOnlyList, string)> rows = null, @@ -48,40 +47,45 @@ public UpdateUsfmParserHandler( UpdateUsfmMarkerBehavior paragraphBehavior = UpdateUsfmMarkerBehavior.Preserve, UpdateUsfmMarkerBehavior embedBehavior = UpdateUsfmMarkerBehavior.Preserve, UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, - IReadOnlyCollection preserveParagraphStyles = null, + IEnumerable preserveParagraphStyles = null, + IEnumerable updateBlockHandlers = null, IEnumerable remarks = null ) { _rows = rows ?? Array.Empty<(IReadOnlyList, string)>(); _tokens = new List(); - _newTokens = new List(); - _newEmbedTokens = new List(); + _updatedText = new List(); + _updateBlocks = new Stack(); + _embedTokens = new List(); _idText = idText; _replace = new Stack(); _textBehavior = textBehavior; _paragraphBehavior = paragraphBehavior; _embedBehavior = embedBehavior; _styleBehavior = styleBehavior; + _updateBlockHandlers = + updateBlockHandlers == null + ? new Stack() + : new Stack(updateBlockHandlers); _preserveParagraphStyles = preserveParagraphStyles == null ? new HashSet { "r", "rem" } : new HashSet(preserveParagraphStyles); _remarks = remarks == null ? new List() : remarks.ToList(); - _embedUpdated = false; - _embedRowTexts = new List(); } public IReadOnlyList Tokens => _tokens; public override void EndUsfm(UsfmParserState state) { - CollectTokens(state); + CollectUpdatableTokens(state); base.EndUsfm(state); } public override void StartBook(UsfmParserState state, string marker, string code) { - CollectTokens(state); + CollectReadonlyTokens(state); + _updateBlocks.Push(new UsfmUpdateBlock()); var startBookTokens = new List(); if (_idText != null) startBookTokens.Add(new UsfmToken(_idText + " ")); @@ -93,14 +97,17 @@ public override void StartBook(UsfmParserState state, string marker, string code startBookTokens.Add(new UsfmToken(remark)); } } - PushNewTokens(startBookTokens); + PushUpdatedText(startBookTokens); base.StartBook(state, marker, code); } public override void EndBook(UsfmParserState state, string marker) { + UseUpdatedText(); PopNewTokens(); + UsfmUpdateBlock updateBlock = _updateBlocks.Pop(); + _tokens.AddRange(updateBlock.GetTokens()); base.EndBook(state, marker); } @@ -112,56 +119,39 @@ public override void StartPara( IReadOnlyList attributes ) { - if (marker != null && _preserveParagraphStyles.Contains(marker)) - { - _inPreservedParagraph = true; - } if ( state.IsVerseText && (HasNewText() || _textBehavior == UpdateUsfmTextBehavior.StripExisting) && _paragraphBehavior == UpdateUsfmMarkerBehavior.Strip ) { - SkipTokens(state); + SkipUpdatableTokens(state); } else { - CollectTokens(state); + CollectUpdatableTokens(state); } base.StartPara(state, marker, unknown, attributes); } - public override void EndPara(UsfmParserState state, string marker) - { - base.EndPara(state, marker); - _inPreservedParagraph = false; - } - public override void StartRow(UsfmParserState state, string marker) { - CollectTokens(state); + CollectUpdatableTokens(state); base.StartRow(state, marker); } public override void StartCell(UsfmParserState state, string marker, string align, int colspan) { - CollectTokens(state); + CollectUpdatableTokens(state); base.StartCell(state, marker, align, colspan); } - public override void EndCell(UsfmParserState state, string marker) - { - CollectTokens(state); - - base.EndCell(state, marker); - } - public override void StartSidebar(UsfmParserState state, string marker, string category) { - CollectTokens(state); + CollectUpdatableTokens(state); base.StartSidebar(state, marker, category); } @@ -169,7 +159,7 @@ public override void StartSidebar(UsfmParserState state, string marker, string c public override void EndSidebar(UsfmParserState state, string marker, bool closed) { if (closed) - CollectTokens(state); + CollectUpdatableTokens(state); base.EndSidebar(state, marker, closed); } @@ -182,9 +172,11 @@ public override void Chapter( string pubNumber ) { - CollectTokens(state); + UseUpdatedText(); base.Chapter(state, number, marker, altNumber, pubNumber); + + CollectReadonlyTokens(state); } public override void Milestone( @@ -194,7 +186,7 @@ public override void Milestone( IReadOnlyList attributes ) { - CollectTokens(state); + CollectUpdatableTokens(state); base.Milestone(state, marker, startMilestone, attributes); } @@ -207,9 +199,25 @@ public override void Verse( string pubNumber ) { - CollectTokens(state); + UseUpdatedText(); base.Verse(state, number, marker, altNumber, pubNumber); + + CollectReadonlyTokens(state); + } + + public override void StartNote(UsfmParserState state, string marker, string caller, string category) + { + base.StartNote(state, marker, caller, category); + + CollectUpdatableTokens(state); + } + + public override void EndNote(UsfmParserState state, string marker, bool closed) + { + if (closed) + CollectUpdatableTokens(state); + base.EndNote(state, marker, closed); } public override void StartChar( @@ -219,13 +227,24 @@ public override void StartChar( IReadOnlyList attributes ) { - // strip out char-style markers in verses that are being replaced - if (ReplaceWithNewTokens(state)) - SkipTokens(state); - else - CollectTokens(state); - base.StartChar(state, markerWithoutPlus, unknown, attributes); + + if (CurrentTextType == ScriptureTextType.Embed) + { + CollectUpdatableTokens(state); + } + else + { + ReplaceWithNewTokens(state); + if (_styleBehavior == UpdateUsfmMarkerBehavior.Strip) + { + SkipUpdatableTokens(state); + } + else + { + CollectUpdatableTokens(state); + } + } } public override void EndChar( @@ -236,54 +255,34 @@ bool closed ) { // strip out char-style markers in verses that are being replaced - if (ReplaceWithNewTokens(state, closed: closed)) - SkipTokens(state); + if (CurrentTextType == ScriptureTextType.Embed) + { + CollectUpdatableTokens(state); + } else - CollectTokens(state); + { + ReplaceWithNewTokens(state); + if (_styleBehavior == UpdateUsfmMarkerBehavior.Strip) + { + SkipUpdatableTokens(state); + } + else + { + CollectUpdatableTokens(state); + } + } base.EndChar(state, marker, attributes, closed); } - protected override void StartEmbed(UsfmParserState state, ScriptureRef scriptureRef) - { - _embedRowTexts = AdvanceRows(new[] { scriptureRef }).ToList(); - _embedUpdated = _embedRowTexts.Count > 0; - - // strip out notes in verses that are being replaced - if (ReplaceWithNewTokens(state)) - SkipTokens(state); - else - CollectTokens(state); - } - - protected override void EndEmbed( - UsfmParserState state, - string marker, - IReadOnlyList attributes, - bool closed - ) - { - // strip out notes in verses that are being replaced - if (ReplaceWithNewTokens(state, closed: closed)) - SkipTokens(state); - else - CollectTokens(state); - - _embedRowTexts.Clear(); - _embedUpdated = false; - - base.EndEmbed(state, marker, attributes, closed); - } - public override void Ref(UsfmParserState state, string marker, string display, string target) { - // strip out ref in verses that are being replaced + base.Ref(state, marker, display, target); + if (ReplaceWithNewTokens(state)) - SkipTokens(state); + SkipUpdatableTokens(state); else - CollectTokens(state); - - base.Ref(state, marker, display, target); + CollectUpdatableTokens(state); } public override void Text(UsfmParserState state, string text) @@ -292,64 +291,56 @@ public override void Text(UsfmParserState state, string text) // strip out text in verses that are being replaced if (ReplaceWithNewTokens(state)) - SkipTokens(state); + SkipUpdatableTokens(state); else - CollectTokens(state); + CollectUpdatableTokens(state); } public override void OptBreak(UsfmParserState state) { - // strip out optbreaks in verses that are being replaced + base.OptBreak(state); if (ReplaceWithNewTokens(state)) - SkipTokens(state); + SkipUpdatableTokens(state); else - CollectTokens(state); - - base.OptBreak(state); + CollectUpdatableTokens(state); } public override void Unmatched(UsfmParserState state, string marker) { - // strip out unmatched end markers in verses that are being replaced + base.Unmatched(state, marker); + if (ReplaceWithNewTokens(state)) - SkipTokens(state); + SkipUpdatableTokens(state); else - CollectTokens(state); - - base.Unmatched(state, marker); + CollectUpdatableTokens(state); } protected override void StartVerseText(UsfmParserState state, IReadOnlyList scriptureRefs) { - IReadOnlyList rowTexts = AdvanceRows(scriptureRefs); - PushNewTokens(rowTexts.Select(t => new UsfmToken(t + " "))); + StartUpdateBlock(scriptureRefs); } protected override void EndVerseText(UsfmParserState state, IReadOnlyList scriptureRefs) { - PopNewTokens(); + EndUpdateBlock(scriptureRefs); } protected override void StartNonVerseText(UsfmParserState state, ScriptureRef scriptureRef) { - IReadOnlyList rowTexts = AdvanceRows(new[] { scriptureRef }); - PushNewTokens(rowTexts.Select(t => new UsfmToken(t + " "))); + StartUpdateBlock(new[] { scriptureRef }); } protected override void EndNonVerseText(UsfmParserState state, ScriptureRef scriptureRef) { - PopNewTokens(); + EndUpdateBlock(new[] { scriptureRef }); } - protected override void StartNoteText(UsfmParserState state) + protected override void EndEmbedText(UsfmParserState state, ScriptureRef scriptureRef) { - PushNewEmbedTokens(_embedRowTexts.Select(t => new UsfmToken(t + " "))); - } - - protected override void EndNoteText(UsfmParserState state, ScriptureRef scriptureRef) - { - _embedRowTexts.Clear(); - PopNewTokens(); + _updateBlocks + .Peek() + .AddEmbed(_embedTokens, markedForRemoval: _embedBehavior == UpdateUsfmMarkerBehavior.Strip); + _embedTokens.Clear(); } public string GetUsfm(string stylesheetFileName = "usfm.sty") @@ -401,83 +392,98 @@ private IReadOnlyList AdvanceRows(IReadOnlyList segScrRefs return rowTexts; } - private void CollectTokens(UsfmParserState state) + private void CollectUpdatableTokens(UsfmParserState state) { - _tokens.AddRange(_newTokens); - _newTokens.Clear(); + UseUpdatedText(); while (_tokenIndex <= state.Index + state.SpecialTokenCount) { - _tokens.Add(state.Tokens[_tokenIndex]); + UsfmToken token = state.Tokens[_tokenIndex]; + if (CurrentTextType == ScriptureTextType.Embed) + { + _embedTokens.Add(token); + } + else if ( + (CurrentTextType != ScriptureTextType.None || state.ParaTag?.Marker == "id") + && _updateBlocks.Count > 0 + ) + { + _updateBlocks.Peek().AddToken(token); + } + else + { + _tokens.Add(token); + } _tokenIndex++; } } - private void SkipTokens(UsfmParserState state) + private void CollectReadonlyTokens(UsfmParserState state) { - _tokenIndex = state.Index + 1 + state.SpecialTokenCount; + while (_tokenIndex <= state.Index + state.SpecialTokenCount) + { + UsfmToken token = state.Tokens[_tokenIndex]; + if (_updateBlocks.Count > 0) + { + _updateBlocks.Peek().AddToken(token); + } + else + { + _tokens.Add(token); + } + _tokenIndex++; + } } - private bool ReplaceWithNewTokens(UsfmParserState state, bool closed = true) + private void SkipUpdatableTokens(UsfmParserState state) { - string marker = state?.Token?.Marker; - bool inEmbed = IsInEmbed(marker); + while (_tokenIndex <= state.Index + state.SpecialTokenCount) + { + UsfmToken token = state.Tokens[_tokenIndex]; + if (CurrentTextType != ScriptureTextType.None || (state.ParaTag?.Marker == "id")) + { + if (_updateBlocks.Count > 0) + { + _updateBlocks.Peek().AddToken(token, markedForRemoval: true); + } + _tokenIndex++; + } + } + _tokenIndex = state.Index + state.SpecialTokenCount + 1; + } - bool inNestedEmbed = IsInNestedEmbed(marker); - bool isStyleTag = marker != null && !IsEmbedPartStyle(marker); + private bool ReplaceWithNewTokens(UsfmParserState state) + { + if (CurrentTextType == ScriptureTextType.Embed) + return false; bool existingText = state .Tokens.Skip(_tokenIndex) .Take(state.Index + 1 + state.SpecialTokenCount - _tokenIndex) .Any(t => t.Type == UsfmTokenType.Text && t.Text.Length > 0); - bool useNewTokens = - !IsInPreservedParagraph(marker) - && ( - _textBehavior == UpdateUsfmTextBehavior.StripExisting - || (HasNewText() && (!existingText || _textBehavior != UpdateUsfmTextBehavior.PreferExisting)) - ) - && (!inEmbed || (InNoteText && !inNestedEmbed && _embedBehavior == UpdateUsfmMarkerBehavior.Preserve)); - - if (useNewTokens) + bool useNewTokens = true; + if (IsInPreservedParagraph(state)) { - if (inEmbed) - AddNewEmbedTokens(); - else - AddNewTokens(); + useNewTokens = false; } - if ( - existingText - && (_textBehavior == UpdateUsfmTextBehavior.PreferExisting || IsInPreservedParagraph(marker)) + else if ( + _textBehavior != UpdateUsfmTextBehavior.StripExisting + && (!HasNewText() || (existingText && _textBehavior == UpdateUsfmTextBehavior.PreferExisting)) ) { - if (inEmbed) - ClearNewEmbedTokens(); - else - ClearNewTokens(); + useNewTokens = false; } - // figure out when to skip the existing text - bool embedInNewVerseText = - (_replace.Any(r => r) || _textBehavior == UpdateUsfmTextBehavior.StripExisting) && inEmbed; - if (embedInNewVerseText || _embedUpdated) - { - if (_embedBehavior == UpdateUsfmMarkerBehavior.Strip) - { - ClearNewEmbedTokens(); - return true; - } - - if (!InNoteText || inNestedEmbed) - return false; - } + if (useNewTokens) + UseUpdatedText(); - bool skipTokens = useNewTokens && closed; + bool clearNewTokens = + existingText + && (_textBehavior == UpdateUsfmTextBehavior.PreferExisting || IsInPreservedParagraph(state)); + if (clearNewTokens) + ClearUpdatedText(); - if (useNewTokens && isStyleTag) - { - skipTokens = _styleBehavior == UpdateUsfmMarkerBehavior.Strip; - } - return skipTokens; + return useNewTokens; } private bool HasNewText() @@ -485,41 +491,53 @@ private bool HasNewText() return _replace.Count > 0 && _replace.Peek(); } - private void PushNewTokens(IEnumerable tokens) - { - _replace.Push(tokens.Any()); - _newTokens.AddRange(tokens); - } - - private void AddNewTokens() + private void StartUpdateBlock(IReadOnlyList scriptureRefs) { - if (_newTokens.Count > 0) - _tokens.AddRange(_newTokens); - _newTokens.Clear(); + _updateBlocks.Push(new UsfmUpdateBlock(scriptureRefs)); + IReadOnlyList rowTexts = AdvanceRows(scriptureRefs); + PushUpdatedText(rowTexts.Select(t => new UsfmToken(t + " "))); } - private void ClearNewTokens() + private void EndUpdateBlock(IReadOnlyList scriptureRefs) { - _newTokens.Clear(); + UseUpdatedText(); + PopNewTokens(); + UsfmUpdateBlock updateBlock = _updateBlocks.Pop(); + updateBlock.UpdateRefs(scriptureRefs); + foreach (IUsfmUpdateBlockHandler handler in _updateBlockHandlers) + { + updateBlock = handler.ProcessBlock(updateBlock); + } + if ( + _updateBlocks.Count > 0 + && _updateBlocks.Peek().Elements.Last().Type == UsfmUpdateBlockElementType.Paragraph + ) + { + _updateBlocks.Peek().ExtendLastElement(updateBlock.GetTokens()); + } + else + { + _tokens.AddRange(updateBlock.GetTokens()); + } } - private void PushNewEmbedTokens(IEnumerable tokens) + private void PushUpdatedText(IEnumerable tokens) { _replace.Push(tokens.Any()); if (tokens.Any()) - _newEmbedTokens.AddRange(tokens); + _updatedText.AddRange(tokens); } - private void AddNewEmbedTokens() + private void UseUpdatedText() { - if (_newEmbedTokens.Count > 0) - _tokens.AddRange(_newEmbedTokens); - _newEmbedTokens.Clear(); + if (_updatedText.Count > 0) + _updateBlocks.Peek().AddText(_updatedText); + _updatedText.Clear(); } - private void ClearNewEmbedTokens() + private void ClearUpdatedText() { - _newEmbedTokens.Clear(); + _updatedText.Clear(); } private void PopNewTokens() @@ -527,9 +545,9 @@ private void PopNewTokens() _replace.Pop(); } - private bool IsInPreservedParagraph(string marker) + private bool IsInPreservedParagraph(UsfmParserState state) { - return _inPreservedParagraph || _preserveParagraphStyles.Contains(marker); + return state.ParaTag != null && _preserveParagraphStyles.Contains(state.ParaTag.Marker); } } } diff --git a/src/SIL.Machine/Corpora/UsfmParserState.cs b/src/SIL.Machine/Corpora/UsfmParserState.cs index 1ad2c85b..88ade395 100644 --- a/src/SIL.Machine/Corpora/UsfmParserState.cs +++ b/src/SIL.Machine/Corpora/UsfmParserState.cs @@ -157,6 +157,10 @@ public bool IsVerseText { get { + // Anything before verse 1 is not verse text + if (VerseRef.VerseNum == 0) + return false; + // Sidebars and notes are not verse text if (_stack.Any(e => e.Type == UsfmElementType.Sidebar || e.Type == UsfmElementType.Note)) return false; diff --git a/src/SIL.Machine/Corpora/UsfmTextBase.cs b/src/SIL.Machine/Corpora/UsfmTextBase.cs index ae6fb59f..9a16b102 100644 --- a/src/SIL.Machine/Corpora/UsfmTextBase.cs +++ b/src/SIL.Machine/Corpora/UsfmTextBase.cs @@ -258,9 +258,7 @@ public override void Text(UsfmParserState state, string text) } else if (text.Length > 0 && (CurrentTextType != ScriptureTextType.Verse || state.IsVerseText)) { - bool isEmbedOrNestedDontUpdate = - IsInEmbed(state.Token.Marker) && (!InNoteText || IsInNestedEmbed(state.Token.Marker)); - if (isEmbedOrNestedDontUpdate) + if (CurrentTextType == ScriptureTextType.Embed) return; if ( @@ -298,24 +296,6 @@ protected override void EndNonVerseText(UsfmParserState state, ScriptureRef scri _rows.Add(_text.CreateRow(scriptureRef, text, _sentenceStart)); } - protected override void StartNoteText(UsfmParserState state) - { - if (_text._includeMarkers) - return; - - _rowTexts.Push(new StringBuilder()); - } - - protected override void EndNoteText(UsfmParserState state, ScriptureRef scriptureRef) - { - if (_text._includeMarkers) - return; - - string text = _rowTexts.Pop().ToString(); - if (_text._includeAllText) - _rows.Add(_text.CreateRow(scriptureRef, text, _sentenceStart)); - } - private void OutputMarker(UsfmParserState state) { if (!_text._includeMarkers || _rowTexts.Count == 0) diff --git a/src/SIL.Machine/Corpora/UsfmUpdateBlock.cs b/src/SIL.Machine/Corpora/UsfmUpdateBlock.cs new file mode 100644 index 00000000..df64a4d6 --- /dev/null +++ b/src/SIL.Machine/Corpora/UsfmUpdateBlock.cs @@ -0,0 +1,102 @@ +using System.Collections.Generic; +using System.Linq; + +namespace SIL.Machine.Corpora +{ + public class UsfmUpdateBlock + { + public IReadOnlyList Refs + { + get => _refs; + } + public IReadOnlyList Elements + { + get => _elements; + } + + private readonly List _refs; + private readonly List _elements; + + public UsfmUpdateBlock( + IEnumerable refs = null, + IEnumerable elements = null + ) + { + _refs = refs != null ? refs.ToList() : new List(); + _elements = elements != null ? elements.ToList() : new List(); + } + + public void AddText(IEnumerable tokens) + { + _elements.Add(new UsfmUpdateBlockElement(UsfmUpdateBlockElementType.Text, tokens.ToList())); + } + + public void AddToken(UsfmToken token, bool markedForRemoval = false) + { + UsfmUpdateBlockElementType type; + switch (token.Type) + { + case UsfmTokenType.Text: + type = UsfmUpdateBlockElementType.Text; + break; + case UsfmTokenType.Paragraph: + type = UsfmUpdateBlockElementType.Paragraph; + break; + case UsfmTokenType.Character: + case UsfmTokenType.End: + type = UsfmUpdateBlockElementType.Style; + break; + default: + type = UsfmUpdateBlockElementType.Other; + break; + } + _elements.Add(new UsfmUpdateBlockElement(type, new List { token }, markedForRemoval)); + } + + public void AddEmbed(IEnumerable tokens, bool markedForRemoval = false) + { + _elements.Add( + new UsfmUpdateBlockElement(UsfmUpdateBlockElementType.Embed, tokens.ToList(), markedForRemoval) + ); + } + + public void ExtendLastElement(IEnumerable tokens) + { + _elements.Last().Tokens.AddRange(tokens); + } + + public void UpdateRefs(IEnumerable refs) + { + _refs.Clear(); + _refs.AddRange(refs); + } + + public List GetTokens() + { + return _elements.SelectMany(e => e.GetTokens()).ToList(); + } + + public override bool Equals(object obj) + { + if (!(obj is UsfmUpdateBlock)) + return false; + + UsfmUpdateBlock other = (UsfmUpdateBlock)obj; + + return _refs.SequenceEqual(other._refs) && _elements.SequenceEqual(other._elements); + } + + public override int GetHashCode() + { + int hash = 23; + hash = hash * 31 + _refs.GetHashCode(); + hash = hash * 31 + _elements.GetHashCode(); + return hash; + } + + public UsfmUpdateBlock Clone() + { + return new UsfmUpdateBlock(_refs, _elements); + } + } +} diff --git a/src/SIL.Machine/Corpora/UsfmUpdateBlockElement.cs b/src/SIL.Machine/Corpora/UsfmUpdateBlockElement.cs new file mode 100644 index 00000000..cb5a3131 --- /dev/null +++ b/src/SIL.Machine/Corpora/UsfmUpdateBlockElement.cs @@ -0,0 +1,36 @@ +using System.Collections.Generic; + +namespace SIL.Machine.Corpora +{ + public enum UsfmUpdateBlockElementType + { + Text, + Paragraph, + Embed, + Style, + Other + } + + public class UsfmUpdateBlockElement + { + public UsfmUpdateBlockElementType Type { get; } + public List Tokens { get; } + public bool MarkedForRemoval { get; } + + public UsfmUpdateBlockElement( + UsfmUpdateBlockElementType type, + List tokens, + bool markedForRemoval = false + ) + { + Type = type; + Tokens = tokens; + MarkedForRemoval = markedForRemoval; + } + + public List GetTokens() + { + return MarkedForRemoval ? new List() : new List(Tokens); + } + } +} diff --git a/src/SIL.Machine/Corpora/UsfmUpdateBlockHandler.cs b/src/SIL.Machine/Corpora/UsfmUpdateBlockHandler.cs new file mode 100644 index 00000000..a255665f --- /dev/null +++ b/src/SIL.Machine/Corpora/UsfmUpdateBlockHandler.cs @@ -0,0 +1,7 @@ +namespace SIL.Machine.Corpora +{ + public interface IUsfmUpdateBlockHandler + { + UsfmUpdateBlock ProcessBlock(UsfmUpdateBlock block); + } +} diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 0fbdf765..2b24b167 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -45,8 +45,8 @@ public void GetUsfm_StripAllText() \r keep this reference \rem and this reference too \ip but remove this text -\v 1 Chapter \add one\add*, \p verse \f + \fr 2:1: \ft This is a \fm ∆\fm* footnote.\f*one. -\v 2 Chapter \add one\add*, \p verse \f + \fr 2:1: \ft This is a \fm ∆\fm* footnote.\f*two. +\v 1 Chapter \add one\add*, \p verse \f + \fr 1:1: \ft This is a \+bd ∆\+bd* footnote.\f*one. +\v 2 Chapter \add one\add*, \p verse \f + \fr 1:2: \ft This is a \+bd ∆\+bd* footnote.\f*two. \v 3 Verse 3 \v 4 Verse 4 "; @@ -67,13 +67,13 @@ public void GetUsfm_StripAllText() \rem and this reference too \ip \v 1 Update 1 \add \add* -\p \f + \fr 2:1: \ft \fm ∆\fm*\f* +\p \f + \fr 1:1: \ft This is a \+bd ∆\+bd* footnote.\f* \v 2 \add \add* -\p \f + \fr 2:1: \ft \fm ∆\fm*\f* +\p \f + \fr 1:2: \ft This is a \+bd ∆\+bd* footnote.\f* \v 3 Update 3 \v 4 "; - Assess(target, result); + AssertUsfmEquals(target, result); target = UpdateUsfm( rows, @@ -95,7 +95,7 @@ public void GetUsfm_StripAllText() \v 3 Update 3 \v 4 "; - Assess(target, result); + AssertUsfmEquals(target, result); } [Test] @@ -131,7 +131,7 @@ public void GetUsfm_StripParagraphs_PreserveParagraphStyles() \v 1 Update 1 "; - Assess(target, result); + AssertUsfmEquals(target, result); var targetDiffParagraph = UpdateUsfm( rows, @@ -149,7 +149,7 @@ public void GetUsfm_StripParagraphs_PreserveParagraphStyles() \v 1 Update 1 "; - Assess(targetDiffParagraph, resultDiffParagraph); + AssertUsfmEquals(targetDiffParagraph, resultDiffParagraph); } [Test] @@ -179,7 +179,7 @@ public void GetUsfm_PreserveParagraphs() \v 1 Update 1 "; - Assess(target, result); + AssertUsfmEquals(target, result); var targetDiffParagraph = UpdateUsfm( rows, @@ -196,7 +196,7 @@ public void GetUsfm_PreserveParagraphs() \v 1 Update 1 "; - Assess(targetDiffParagraph, resultDiffParagraph); + AssertUsfmEquals(targetDiffParagraph, resultDiffParagraph); } [Test] @@ -206,6 +206,7 @@ public void GetUsfm_ParagraphInVerse() string usfm = @"\id MAT - Test \c 1 +\p paragraph not in a verse \v 1 verse 1 \p inner verse paragraph \s1 Section Header \v 2 Verse 2 \p inner verse paragraph @@ -216,12 +217,13 @@ public void GetUsfm_ParagraphInVerse() string result = @"\id MAT - Test \c 1 +\p paragraph not in a verse \v 1 Update 1 \s1 Section Header \v 2 Verse 2 \p inner verse paragraph "; - Assess(target, result); + AssertUsfmEquals(target, result); string targetStrip = UpdateUsfm( rows, @@ -233,12 +235,13 @@ public void GetUsfm_ParagraphInVerse() string resultStrip = @"\id MAT \c 1 +\p \v 1 Update 1 \s1 \v 2 "; - Assess(targetStrip, resultStrip); + AssertUsfmEquals(targetStrip, resultStrip); } [Test] @@ -264,7 +267,7 @@ public void GetUsfm_PreferExisting() \v 2 Update 2 \v 3 Other text "; - Assess(target, result); + AssertUsfmEquals(target, result); } [Test] @@ -294,13 +297,9 @@ public void GetUsfm_Verse_StripNote() } [Test] - public void GetUsfm_Verse_ReplaceNote() + public void GetUsfm_Verse_ReplaceWithNote() { - var rows = new List<(IReadOnlyList, string)> - { - (ScrRef("MAT 1:1"), "updated text"), - (ScrRef("MAT 1:1/1:f"), "This is a new footnote.") - }; + var rows = new List<(IReadOnlyList, string)> { (ScrRef("MAT 1:1"), "updated text") }; var usfm = @"\id MAT - Test \c 1 @@ -310,9 +309,9 @@ public void GetUsfm_Verse_ReplaceNote() var result = @"\id MAT - Test \c 1 -\v 1 updated text \f + \fr 2:1: \ft This is a new footnote. \f* +\v 1 updated text \f + \fr 2:1: \ft This is a footnote.\f* "; - Assess(target, result); + AssertUsfmEquals(target, result); } [Test] @@ -434,7 +433,7 @@ public void GetUsfm_MergeVerseSegments() }; string target = UpdateUsfm(rows); - Assert.That(target, Contains.Substring("\\v 2-3 Verse 2. Verse 2a. Verse 2b. \\fm ∆\\fm*\r\n")); + Assert.That(target, Contains.Substring("\\v 2-3 Verse 2. Verse 2a. Verse 2b.\r\n")); } [Test] @@ -533,7 +532,7 @@ public void GetUsfm_NonVerse_Sidebar() { var rows = new List<(IReadOnlyList, string)> { - (ScrRef("MAT 2:3/2:esb/1:ms"), "The first paragraph of the sidebar.") + (ScrRef("MAT 2:3/1:esb/1:ms"), "The first paragraph of the sidebar.") }; string target = UpdateUsfm(rows); @@ -565,7 +564,7 @@ public void GetUsfm_NonVerse_OptBreak() { var rows = new List<(IReadOnlyList, string)> { - (ScrRef("MAT 2:3/2:esb/2:p"), "The second paragraph of the sidebar.") + (ScrRef("MAT 2:3/1:esb/2:p"), "The second paragraph of the sidebar.") }; string target = UpdateUsfm(rows); @@ -597,18 +596,17 @@ public void GetUsfm_NonVerse_SkipNote() } [Test] - public void GetUsfm_NonVerse_ReplaceNote() + public void GetUsfm_NonVerse_ReplaceWithNote() { var rows = new List<(IReadOnlyList, string)> { - (ScrRef("MAT 1:0/3:ip"), "The introductory paragraph."), - (ScrRef("MAT 1:0/3:ip/1:fe"), "This is a new endnote.") + (ScrRef("MAT 1:0/3:ip"), "The introductory paragraph.") }; string target = UpdateUsfm(rows); Assert.That( target, - Contains.Substring("\\ip The introductory paragraph. \\fe + \\ft This is a new endnote. \\fe*\r\n") + Contains.Substring("\\ip The introductory paragraph. \\fe + \\ft This is an endnote.\\fe*\r\n") ); } @@ -671,84 +669,6 @@ public void GetUsfm_Verse_PretranslationsBeforeText() ); } - [Test] - public void EmbedStylePreservation() - { - var rows = new List<(IReadOnlyList, string)> - { - (ScrRef("MAT 1:1"), "Update the greeting"), - (ScrRef("MAT 1:1/1:f"), "Update the comment"), - (ScrRef("MAT 1:2"), "Update the greeting only"), - (ScrRef("MAT 1:3/1:f"), "Update the comment only"), - }; - var usfm = - @"\id MAT - Test -\c 1 -\v 1 Hello \f \fr 1.1 \ft Some \+bd note\+bd* \f*\bd World \bd* -\v 2 Good \f \fr 1.2 \ft Some other \+bd note\+bd* \f*\bd Morning \bd* -\v 3 Pleasant \f \fr 1.3 \ft A third \+bd note\+bd* \f*\bd Evening \bd* -"; - var target = UpdateUsfm( - rows, - usfm, - embedBehavior: UpdateUsfmMarkerBehavior.Preserve, - styleBehavior: UpdateUsfmMarkerBehavior.Preserve - ); - var resultPp = - @"\id MAT - Test -\c 1 -\v 1 Update the greeting \f \fr 1.1 \ft Update the comment \+bd \+bd*\f*\bd \bd* -\v 2 Update the greeting only \f \fr 1.2 \ft Some other \+bd note\+bd* \f*\bd \bd* -\v 3 Pleasant \f \fr 1.3 \ft Update the comment only \+bd \+bd*\f*\bd Evening \bd* -"; - Assess(target, resultPp); - - target = UpdateUsfm( - rows, - usfm, - embedBehavior: UpdateUsfmMarkerBehavior.Preserve, - styleBehavior: UpdateUsfmMarkerBehavior.Strip - ); - var resultPs = - @"\id MAT - Test -\c 1 -\v 1 Update the greeting \f \fr 1.1 \ft Update the comment \f* -\v 2 Update the greeting only \f \fr 1.2 \ft Some other \+bd note\+bd* \f* -\v 3 Pleasant \f \fr 1.3 \ft Update the comment only \f*\bd Evening \bd* -"; - Assess(target, resultPs); - - target = UpdateUsfm( - rows, - usfm, - embedBehavior: UpdateUsfmMarkerBehavior.Strip, - styleBehavior: UpdateUsfmMarkerBehavior.Preserve - ); - var resultSp = - @"\id MAT - Test -\c 1 -\v 1 Update the greeting \bd \bd* -\v 2 Update the greeting only \bd \bd* -\v 3 Pleasant \bd Evening \bd* -"; - Assess(target, resultSp); - - target = UpdateUsfm( - rows, - usfm, - embedBehavior: UpdateUsfmMarkerBehavior.Strip, - styleBehavior: UpdateUsfmMarkerBehavior.Strip - ); - var resultSs = - @"\id MAT - Test -\c 1 -\v 1 Update the greeting -\v 2 Update the greeting only -\v 3 Pleasant \bd Evening \bd* -"; - Assess(target, resultSs); - } - [Test] public void GetUsfm_StripParagraphs() { @@ -780,7 +700,7 @@ public void GetUsfm_StripParagraphs() \v 2 Hello \p World "; - Assess(target, resultP); + AssertUsfmEquals(target, resultP); target = UpdateUsfm(rows, usfm, paragraphBehavior: UpdateUsfmMarkerBehavior.Strip); var resultS = @@ -792,7 +712,7 @@ public void GetUsfm_StripParagraphs() \v 2 Hello \p World "; - Assess(target, resultS); + AssertUsfmEquals(target, resultS); } [Test] @@ -815,7 +735,7 @@ public void GetUsfm_PreservationRawStrings() \c 1 \v 1 Update all in one row \f \fr 1.1 \ft Some note \f* "; - Assess(target, result); + AssertUsfmEquals(target, result); } [Test] @@ -835,25 +755,7 @@ public void GetUsfm_BeginningOfVerseEmbed() \c 1 \v 1 Updated text "; - Assess(target, result); - } - - [Test] - public void EmptyNote() - { - var rows = new List<(IReadOnlyList, string)> { (ScrRef("MAT 1:1/1:f"), "Update the note") }; - var usfm = - @"\id MAT - Test -\c 1 -\v 1 Empty Note \f \fr 1.1 \ft \f* -"; - var target = UpdateUsfm(rows, usfm); - var result = - @"\id MAT - Test -\c 1 -\v 1 Empty Note \f \fr 1.1 \ft Update the note \f* -"; - Assess(target, result); + AssertUsfmEquals(target, result); } [Test] @@ -874,29 +776,29 @@ public void CrossReferenceDontUpdate() \c 1 \v 1 Cross reference verse \x - \xo 2:3-4 \xt Cool Book 3:24 \xta The annotation \x* and more content. "; - Assess(target, result); + AssertUsfmEquals(target, result); } [Test] - public void PreserveFigAndFm() + public void PreserveFig() { var rows = new List<(IReadOnlyList, string)> { (ScrRef("MAT 1:1"), "Update"), }; var usfm = @"\id MAT - Test \c 1 -\v 1 initial text \fig stuff\fig* more text \fm * \fm* and more. +\v 1 initial text \fig stuff\fig* more text and more. "; var target = UpdateUsfm(rows, usfm); var result = @"\id MAT - Test \c 1 -\v 1 Update \fig stuff\fig*\fm * \fm* +\v 1 Update \fig stuff\fig* "; - Assess(target, result); + AssertUsfmEquals(target, result); } [Test] - public void NestedXt() + public void NoteExplicitEndMarkers() { var rows = new List<(IReadOnlyList, string)> { @@ -906,15 +808,15 @@ public void NestedXt() var usfm = @"\id MAT - Test \c 1 -\v 1 initial text \f + \fr 15.8 \ft Text (\+xt reference\+xt*). And more.\f* and the end. +\v 1 initial text \f + \fr 2.4\fr* \fk The \+nd Lord\+nd*:\fk* \ft See \+nd Lord\+nd* in Word List.\ft*\f* and the end. "; var target = UpdateUsfm(rows, usfm); var result = @"\id MAT - Test \c 1 -\v 1 Update text \f + \fr 15.8 \ft Update note \+xt reference\+xt*\f* +\v 1 Update text \f + \fr 2.4\fr* \fk The \+nd Lord\+nd*:\fk* \ft See \+nd Lord\+nd* in Word List.\ft*\f* "; - Assess(target, result); + AssertUsfmEquals(target, result); target = UpdateUsfm(rows, usfm, embedBehavior: UpdateUsfmMarkerBehavior.Strip); var result2 = @@ -922,67 +824,327 @@ public void NestedXt() \c 1 \v 1 Update text "; - Assess(target, result2); + AssertUsfmEquals(target, result2); } [Test] - public void NonNestedXt() + public void UpdateBlock_Verse_PreserveParas() { - var rows = new List<(IReadOnlyList, string)> - { - (ScrRef("MAT 1:1"), "Update text"), - (ScrRef("MAT 1:1/1:f"), "Update note"), - }; + var rows = new List<(IReadOnlyList, string)> { (ScrRef("MAT 1:1"), "Update 1") }; var usfm = @"\id MAT - Test \c 1 -\v 1 initial text \f + \fr 15.8 \ft Text \xt reference\f* and the end. +\v 1 verse 1 \p inner verse paragraph "; - var target = UpdateUsfm(rows, usfm); - var result = + TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + UpdateUsfm( + rows, + usfm, + embedBehavior: UpdateUsfmMarkerBehavior.Preserve, + usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] + ); + + Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + + UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; + AssertUpdateBlockEquals( + usfmUpdateBlock, + ["MAT 1:1"], + (UsfmUpdateBlockElementType.Other, "\\v 1 ", false), + (UsfmUpdateBlockElementType.Text, "Update 1 ", false), + (UsfmUpdateBlockElementType.Text, "verse 1 ", true), + (UsfmUpdateBlockElementType.Paragraph, "\\p ", false), + (UsfmUpdateBlockElementType.Text, "inner verse paragraph ", true) + ); + } + + [Test] + public void UpdateBlock_Verse_StripParas() + { + var rows = new List<(IReadOnlyList, string)> { (ScrRef("MAT 1:1"), "Update 1") }; + var usfm = @"\id MAT - Test \c 1 -\v 1 Update text \f + \fr 15.8 \ft Update note \xt reference\f* +\v 1 verse 1 \p inner verse paragraph "; - Assess(target, result); + TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + UpdateUsfm( + rows, + usfm, + paragraphBehavior: UpdateUsfmMarkerBehavior.Strip, + usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] + ); - target = UpdateUsfm(rows, usfm, embedBehavior: UpdateUsfmMarkerBehavior.Strip); - var result2 = + Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + + UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; + AssertUpdateBlockEquals( + usfmUpdateBlock, + ["MAT 1:1"], + (UsfmUpdateBlockElementType.Other, "\\v 1 ", false), + (UsfmUpdateBlockElementType.Text, "Update 1 ", false), + (UsfmUpdateBlockElementType.Text, "verse 1 ", true), + (UsfmUpdateBlockElementType.Paragraph, "\\p ", true), + (UsfmUpdateBlockElementType.Text, "inner verse paragraph ", true) + ); + } + + [Test] + public void UpdateBlock_Verse_Range() + { + var rows = new List<(IReadOnlyList, string)> { (ScrRef("MAT 1:1"), "Update 1") }; + var usfm = @"\id MAT - Test \c 1 -\v 1 Update text +\v 1-3 verse 1 through 3 +"; + TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + UpdateUsfm( + rows, + usfm, + embedBehavior: UpdateUsfmMarkerBehavior.Preserve, + usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] + ); + + Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + + UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; + AssertUpdateBlockEquals( + usfmUpdateBlock, + ["MAT 1:1", "MAT 1:2", "MAT 1:3"], + (UsfmUpdateBlockElementType.Other, "\\v 1-3 ", false), + (UsfmUpdateBlockElementType.Text, "Update 1 ", false), + (UsfmUpdateBlockElementType.Text, "verse 1 through 3 ", true) + ); + } + + [Test] + public void UpdateBlock_Footnote_PreserveEmbeds() + { + var rows = new List<(IReadOnlyList, string)> { (ScrRef("MAT 1:1"), "Update 1") }; + var usfm = + @"\id MAT - Test +\c 1 +\v 1 verse\f \fr 1.1 \ft Some note \f* 1 +"; + TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + UpdateUsfm( + rows, + usfm, + embedBehavior: UpdateUsfmMarkerBehavior.Preserve, + usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] + ); + + Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + + UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; + AssertUpdateBlockEquals( + usfmUpdateBlock, + ["MAT 1:1"], + (UsfmUpdateBlockElementType.Other, "\\v 1 ", false), + (UsfmUpdateBlockElementType.Text, "Update 1 ", false), + (UsfmUpdateBlockElementType.Text, "verse", true), + (UsfmUpdateBlockElementType.Embed, "\\f \\fr 1.1 \\ft Some note \\f*", false), + (UsfmUpdateBlockElementType.Text, " 1 ", true) + ); + } + + [Test] + public void UpdateBlock_Footnote_StripEmbeds() + { + var rows = new List<(IReadOnlyList, string)> { (ScrRef("MAT 1:1"), "Update 1") }; + var usfm = + @"\id MAT - Test +\c 1 +\v 1 verse\f \fr 1.1 \ft Some note \f* 1 "; - Assess(target, result2); + TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + UpdateUsfm( + rows, + usfm, + embedBehavior: UpdateUsfmMarkerBehavior.Strip, + usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] + ); + + Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + + UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; + AssertUpdateBlockEquals( + usfmUpdateBlock, + ["MAT 1:1"], + (UsfmUpdateBlockElementType.Other, "\\v 1 ", false), + (UsfmUpdateBlockElementType.Text, "Update 1 ", false), + (UsfmUpdateBlockElementType.Text, "verse", true), + (UsfmUpdateBlockElementType.Embed, "\\f \\fr 1.1 \\ft Some note \\f*", true), + (UsfmUpdateBlockElementType.Text, " 1 ", true) + ); } [Test] - public void MultipleFtOnlyUpdateFirst() + public void UpdateBlock_NonVerse() { var rows = new List<(IReadOnlyList, string)> { - (ScrRef("MAT 1:1"), "Update text"), - (ScrRef("MAT 1:1/1:f"), "Update note"), + (ScrRef("MAT 1:0/1:s"), "Updated section Header") }; var usfm = @"\id MAT - Test +\s Section header \c 1 -\v 1 initial text \f + \fr 15.8 \ft first note \ft second note\f* and the end. +\v 1 verse 1 "; - var target = UpdateUsfm(rows, usfm); - var result = + TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + UpdateUsfm(rows, usfm, usfmUpdateBlockHandlers: [usfmUpdateBlockHandler]); + + Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(2)); + + UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; + AssertUpdateBlockEquals( + usfmUpdateBlock, + ["MAT 1:0/1:s"], + (UsfmUpdateBlockElementType.Text, "Updated section Header ", false), + (UsfmUpdateBlockElementType.Text, "Section header ", true) + ); + } + + [Test] + public void UpdateBlock_Verse_PreserveStyles() + { + var rows = new List<(IReadOnlyList, string)> { (ScrRef("MAT 1:1"), "Update 1") }; + var usfm = @"\id MAT - Test \c 1 -\v 1 Update text \f + \fr 15.8 \ft Update note \ft second note\f* +\v 1 verse \bd 1\bd* "; - Assess(target, result); + TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + UpdateUsfm( + rows, + usfm, + styleBehavior: UpdateUsfmMarkerBehavior.Preserve, + usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] + ); - target = UpdateUsfm(rows, usfm, embedBehavior: UpdateUsfmMarkerBehavior.Strip); - var result2 = + Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + + UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; + AssertUpdateBlockEquals( + usfmUpdateBlock, + ["MAT 1:1"], + (UsfmUpdateBlockElementType.Other, "\\v 1 ", false), + (UsfmUpdateBlockElementType.Text, "Update 1 ", false), + (UsfmUpdateBlockElementType.Text, "verse ", true), + (UsfmUpdateBlockElementType.Style, "\\bd ", false), + (UsfmUpdateBlockElementType.Text, "1", true), + (UsfmUpdateBlockElementType.Style, "\\bd*", false), + (UsfmUpdateBlockElementType.Text, " ", true) + ); + } + + [Test] + public void UpdateBlock_Verse_StripStyles() + { + var rows = new List<(IReadOnlyList, string)> { (ScrRef("MAT 1:1"), "Update 1") }; + var usfm = @"\id MAT - Test \c 1 -\v 1 Update text +\v 1 verse \bd 1\bd* +"; + TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + UpdateUsfm( + rows, + usfm, + styleBehavior: UpdateUsfmMarkerBehavior.Strip, + usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] + ); + + Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + + UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; + AssertUpdateBlockEquals( + usfmUpdateBlock, + ["MAT 1:1"], + (UsfmUpdateBlockElementType.Other, "\\v 1 ", false), + (UsfmUpdateBlockElementType.Text, "Update 1 ", false), + (UsfmUpdateBlockElementType.Text, "verse ", true), + (UsfmUpdateBlockElementType.Style, "\\bd ", true), + (UsfmUpdateBlockElementType.Text, "1", true), + (UsfmUpdateBlockElementType.Style, "\\bd*", true), + (UsfmUpdateBlockElementType.Text, " ", true) + ); + } + + [Test] + public void UpdateBlock_Verse_SectionHeader() + { + var rows = new List<(IReadOnlyList, string)> { (ScrRef("MAT 1:1"), "Update 1") }; + var usfm = + @"\id MAT - Test +\c 1 +\p +\v 1 Verse 1 +\s Section header +\p +\v 2 Verse 2 "; - Assess(target, result2); + TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + UpdateUsfm(rows, usfm, usfmUpdateBlockHandlers: [usfmUpdateBlockHandler]); + + Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(4)); + AssertUpdateBlockEquals(usfmUpdateBlockHandler.Blocks[0], ["MAT 1:0/1:p"]); + AssertUpdateBlockEquals( + usfmUpdateBlockHandler.Blocks[1], + ["MAT 1:1/1:s"], + (UsfmUpdateBlockElementType.Text, "Section header ", false) + ); + AssertUpdateBlockEquals( + usfmUpdateBlockHandler.Blocks[2], + ["MAT 1:1"], + (UsfmUpdateBlockElementType.Other, "\\v 1 ", false), + (UsfmUpdateBlockElementType.Text, "Update 1 ", false), + (UsfmUpdateBlockElementType.Text, "Verse 1 ", true), + (UsfmUpdateBlockElementType.Paragraph, "\\s Section header ", false), + (UsfmUpdateBlockElementType.Paragraph, "\\p ", false) + ); + AssertUpdateBlockEquals( + usfmUpdateBlockHandler.Blocks[3], + ["MAT 1:2"], + (UsfmUpdateBlockElementType.Other, "\\v 2 ", false), + (UsfmUpdateBlockElementType.Text, "Verse 2 ", false) + ); + } + + [Test] + public void UpdateBlock_Verse_SectionHeaderInVerse() + { + var rows = new List<(IReadOnlyList, string)> { (ScrRef("MAT 1:1"), "Update 1") }; + var usfm = + @"\id MAT - Test +\c 1 +\p +\v 1 Beginning of verse +\s Section header +\p end of verse +"; + TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + UpdateUsfm(rows, usfm, usfmUpdateBlockHandlers: [usfmUpdateBlockHandler]); + + Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(3)); + AssertUpdateBlockEquals(usfmUpdateBlockHandler.Blocks[0], ["MAT 1:0/1:p"]); + AssertUpdateBlockEquals( + usfmUpdateBlockHandler.Blocks[1], + ["MAT 1:1/1:s"], + (UsfmUpdateBlockElementType.Text, "Section header ", false) + ); + AssertUpdateBlockEquals( + usfmUpdateBlockHandler.Blocks[2], + ["MAT 1:1"], + (UsfmUpdateBlockElementType.Other, "\\v 1 ", false), + (UsfmUpdateBlockElementType.Text, "Update 1 ", false), + (UsfmUpdateBlockElementType.Text, "Beginning of verse ", true), + (UsfmUpdateBlockElementType.Paragraph, "\\s Section header ", false), + (UsfmUpdateBlockElementType.Paragraph, "\\p ", false), + (UsfmUpdateBlockElementType.Text, "end of verse ", true) + ); } private static ScriptureRef[] ScrRef(params string[] refs) @@ -998,7 +1160,8 @@ private static string UpdateUsfm( UpdateUsfmMarkerBehavior paragraphBehavior = UpdateUsfmMarkerBehavior.Preserve, UpdateUsfmMarkerBehavior embedBehavior = UpdateUsfmMarkerBehavior.Preserve, UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, - IReadOnlyCollection? preserveParagraphStyles = null + IEnumerable? preserveParagraphStyles = null, + IEnumerable? usfmUpdateBlockHandlers = null ) { if (source is null) @@ -1012,7 +1175,8 @@ private static string UpdateUsfm( paragraphBehavior, embedBehavior, styleBehavior, - preserveParagraphStyles + preserveParagraphStyles, + usfmUpdateBlockHandlers ); } else @@ -1025,21 +1189,61 @@ private static string UpdateUsfm( paragraphBehavior, embedBehavior, styleBehavior, - preserveParagraphStyles + preserveParagraphStyles, + usfmUpdateBlockHandlers ); UsfmParser.Parse(source, updater); return updater.GetUsfm(); } } - private static void Assess(string target, string truth) + private static void AssertUsfmEquals(string target, string truth) { Assert.That(target, Is.Not.Null); - var target_lines = target.Split(new[] { "\n" }, StringSplitOptions.None); - var truth_lines = truth.Split(new[] { "\n" }, StringSplitOptions.None); + var target_lines = target.Split(["\n"], StringSplitOptions.None); + var truth_lines = truth.Split(["\n"], StringSplitOptions.None); for (int i = 0; i < truth_lines.Length; i++) { Assert.That(target_lines[i].Trim(), Is.EqualTo(truth_lines[i].Trim()), message: $"Line {i}"); } } + + private static void AssertUpdateBlockEquals( + UsfmUpdateBlock block, + string[] expectedRefs, + params (UsfmUpdateBlockElementType, string, bool)[] expectedElements + ) + { + var parsedExtractedRefs = expectedRefs.Select(r => ScriptureRef.Parse(r)); + Assert.That(block.Refs.SequenceEqual(parsedExtractedRefs)); + Assert.That(block.Elements.Count, Is.EqualTo(expectedElements.Length)); + foreach ( + ( + UsfmUpdateBlockElement element, + (UsfmUpdateBlockElementType expectedType, string expectedUsfm, bool expectedMarkedForRemoval) + ) in block.Elements.Zip(expectedElements) + ) + { + Assert.That(element.Type, Is.EqualTo(expectedType)); + Assert.That(string.Join("", element.Tokens.Select(t => t.ToUsfm())), Is.EqualTo(expectedUsfm)); + Assert.That(element.MarkedForRemoval, Is.EqualTo(expectedMarkedForRemoval)); + } + } + + private class TestUsfmUpdateBlockHandler : IUsfmUpdateBlockHandler + { + public List Blocks { get; } + + public TestUsfmUpdateBlockHandler() + { + Blocks = new List(); + } + + public UsfmUpdateBlock ProcessBlock(UsfmUpdateBlock block) + { + UsfmUpdateBlock newBlock = block.Clone(); + Blocks.Add(newBlock); + return newBlock; + } + } } diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmFileTextTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmFileTextTests.cs index c00888e2..14288705 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmFileTextTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmFileTextTests.cs @@ -73,7 +73,7 @@ public void GetRows_NonEmptyText_AllText() IText text = corpus["MAT"]; TextRow[] rows = text.GetRows().ToArray(); - Assert.That(rows, Has.Length.EqualTo(50)); + Assert.That(rows, Has.Length.EqualTo(48)); Assert.That(rows[0].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:h", corpus.Versification))); Assert.That(rows[0].Text, Is.EqualTo("Matthew")); @@ -84,59 +84,47 @@ public void GetRows_NonEmptyText_AllText() Assert.That(rows[2].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/3:ip", corpus.Versification))); Assert.That(rows[2].Text, Is.EqualTo("An introduction to Matthew")); - Assert.That(rows[3].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/3:ip/1:fe", corpus.Versification))); - Assert.That(rows[3].Text, Is.EqualTo("This is an endnote.")); - - Assert.That(rows[4].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/4:p", corpus.Versification))); - Assert.That(rows[4].Text, Is.EqualTo("MAT 1 Here is another paragraph.")); + Assert.That(rows[3].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/4:p", corpus.Versification))); + Assert.That(rows[3].Text, Is.EqualTo("MAT 1 Here is another paragraph.")); Assert.That( - rows[7].Ref, + rows[6].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/7:weirdtaglookingthing", corpus.Versification)) ); - Assert.That(rows[7].Text, Is.EqualTo("that is not an actual tag.")); - - Assert.That(rows[8].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/8:s", corpus.Versification))); - Assert.That(rows[8].Text, Is.EqualTo("Chapter One")); - - Assert.That(rows[10].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:1/1:f", corpus.Versification))); - Assert.That(rows[10].Text, Is.EqualTo("This is a footnote for v1.")); + Assert.That(rows[6].Text, Is.EqualTo("that is not an actual tag.")); - Assert.That(rows[12].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:2/1:f", corpus.Versification))); - Assert.That(rows[12].Text, Is.EqualTo("This is a footnote for v2.")); + Assert.That(rows[7].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/8:s", corpus.Versification))); + Assert.That(rows[7].Text, Is.EqualTo("Chapter One")); - Assert.That(rows[19].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/1:tr/1:tc1", corpus.Versification))); - Assert.That(rows[19].Text, Is.EqualTo("Row one, column one.")); + Assert.That(rows[16].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/1:tr/1:tc1", corpus.Versification))); + Assert.That(rows[16].Text, Is.EqualTo("Row one, column one.")); - Assert.That(rows[20].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/1:tr/2:tc2", corpus.Versification))); - Assert.That(rows[20].Text, Is.EqualTo("Row one, column two.")); + Assert.That(rows[17].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/1:tr/2:tc2", corpus.Versification))); + Assert.That(rows[17].Text, Is.EqualTo("Row one, column two.")); - Assert.That(rows[21].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/2:tr/1:tc1", corpus.Versification))); - Assert.That(rows[21].Text, Is.EqualTo("Row two, column one.")); + Assert.That(rows[18].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/2:tr/1:tc1", corpus.Versification))); + Assert.That(rows[18].Text, Is.EqualTo("Row two, column one.")); - Assert.That(rows[22].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/2:tr/2:tc2", corpus.Versification))); - Assert.That(rows[22].Text, Is.EqualTo("Row two, column two.")); + Assert.That(rows[19].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/2:tr/2:tc2", corpus.Versification))); + Assert.That(rows[19].Text, Is.EqualTo("Row two, column two.")); - Assert.That(rows[23].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/3:s1", corpus.Versification))); - Assert.That(rows[23].Text, Is.EqualTo("Chapter Two")); - - Assert.That(rows[24].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/4:p", corpus.Versification))); - Assert.That(rows[24].Text, Is.Empty); + Assert.That(rows[20].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/3:s1", corpus.Versification))); + Assert.That(rows[20].Text, Is.EqualTo("Chapter Two")); - Assert.That(rows[26].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:1/1:f", corpus.Versification))); - Assert.That(rows[26].Text, Is.EqualTo("This is a footnote.")); + Assert.That(rows[21].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/4:p", corpus.Versification))); + Assert.That(rows[21].Text, Is.Empty); - Assert.That(rows[29].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/2:esb/1:ms", corpus.Versification))); - Assert.That(rows[29].Text, Is.EqualTo("This is a sidebar")); + Assert.That(rows[26].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/1:esb/1:ms", corpus.Versification))); + Assert.That(rows[26].Text, Is.EqualTo("This is a sidebar")); - Assert.That(rows[30].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/2:esb/2:p", corpus.Versification))); - Assert.That(rows[30].Text, Is.EqualTo("Here is some sidebar content.")); + Assert.That(rows[27].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/1:esb/2:p", corpus.Versification))); + Assert.That(rows[27].Text, Is.EqualTo("Here is some sidebar content.")); - Assert.That(rows[36].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:7a/1:s", corpus.Versification))); - Assert.That(rows[36].Text, Is.EqualTo("Section header")); + Assert.That(rows[33].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:7a/1:s", corpus.Versification))); + Assert.That(rows[33].Text, Is.EqualTo("Section header")); - Assert.That(rows[43].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:12/1:restore", corpus.Versification))); - Assert.That(rows[43].Text, Is.EqualTo("restore information")); + Assert.That(rows[40].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:12/1:restore", corpus.Versification))); + Assert.That(rows[40].Text, Is.EqualTo("restore information")); } [Test] @@ -256,7 +244,7 @@ public void GetRows_IncludeMarkers_AllText() IText text = corpus["MAT"]; TextRow[] rows = text.GetRows().ToArray(); - Assert.That(rows, Has.Length.EqualTo(46)); + Assert.That(rows, Has.Length.EqualTo(48)); Assert.That(rows[2].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/3:ip", corpus.Versification))); Assert.That(rows[2].Text, Is.EqualTo("An introduction to Matthew\\fe + \\ft This is an endnote.\\fe*")); @@ -286,13 +274,13 @@ public void GetRows_IncludeMarkers_AllText() Assert.That(rows[20].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/3:s1", corpus.Versification))); Assert.That(rows[20].Text, Is.EqualTo("Chapter \\it Two \\it*")); - Assert.That(rows[22].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:1", corpus.Versification))); + Assert.That(rows[23].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:1", corpus.Versification))); Assert.That( - rows[22].Text, + rows[23].Text, Is.EqualTo("Chapter \\add two\\add*, verse \\f + \\fr 2:1: \\ft This is a footnote.\\f*one.") ); - Assert.That(rows[26].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/2:esb/2:p", corpus.Versification))); - Assert.That(rows[26].Text, Is.EqualTo("Here is some sidebar // content.")); + Assert.That(rows[27].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/1:esb/2:p", corpus.Versification))); + Assert.That(rows[27].Text, Is.EqualTo("Here is some sidebar // content.")); } } diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs index d2589e5a..028968b3 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs @@ -148,11 +148,11 @@ public void GetRows_VersePara_BeginningNonVerseSegment() includeAllText: true ); - Assert.That(rows, Has.Length.EqualTo(5), string.Join(",", rows.Select(tr => tr.Text))); + Assert.That(rows, Has.Length.EqualTo(4), string.Join(",", rows.Select(tr => tr.Text))); Assert.That(rows[0].Text, Is.EqualTo("")); Assert.That(rows[0].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:q1"))); - Assert.That(rows[1].Text, Is.EqualTo("World")); - Assert.That(rows[1].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:q1/1:f"))); + Assert.That(rows[1].Text, Is.EqualTo("First verse in line!?!")); + Assert.That(rows[1].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:1"))); } [Test] @@ -169,11 +169,11 @@ public void GetRows_VersePara_CommentFirst() includeAllText: true ); - Assert.That(rows[0].Text, Is.EqualTo("World")); - Assert.That(rows[0].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:f"))); - Assert.That(rows[1].Text, Is.EqualTo("This is a comment")); - Assert.That(rows[1].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/2:ip"))); - Assert.That(rows, Has.Length.EqualTo(3), string.Join(",", rows.Select(tr => tr.Text))); + Assert.That(rows[0].Text, Is.EqualTo("This is a comment")); + Assert.That(rows[0].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/2:ip"))); + Assert.That(rows[1].Text, Is.EqualTo("First verse in line!?!")); + Assert.That(rows[1].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:1"))); + Assert.That(rows, Has.Length.EqualTo(2), string.Join(",", rows.Select(tr => tr.Text))); } [Test] @@ -224,8 +224,9 @@ public void GetRows_OptBreak_OutsideOfSegment() ); Assert.Multiple(() => { - Assert.That(rows, Has.Length.EqualTo(1), string.Join(",", rows.Select(tr => tr.Text))); - Assert.That(rows[0].Text, Is.EqualTo("This is the first verse.")); + Assert.That(rows, Has.Length.EqualTo(2), string.Join(",", rows.Select(tr => tr.Text))); + Assert.That(rows[0].Text, Is.EqualTo("")); + Assert.That(rows[1].Text, Is.EqualTo("This is the first verse.")); }); }