Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/SIL.Machine/Corpora/IUsfmUpdateBlockHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
using System;

namespace SIL.Machine.Corpora
{
public interface IUsfmUpdateBlockHandler
{
UsfmUpdateBlock ProcessBlock(UsfmUpdateBlock block);
}

public class UsfmUpdateBlockHandlerException : Exception
{
public UsfmUpdateBlock Block { get; }

public UsfmUpdateBlockHandlerException(string message, UsfmUpdateBlock block)
: base(message)
{
Block = block;
}

public UsfmUpdateBlockHandlerException(string message, Exception exception, UsfmUpdateBlock block)
: base(message, exception)
{
Block = block;
}
}
}
31 changes: 20 additions & 11 deletions src/SIL.Machine/Corpora/PlaceMarkersUsfmUpdateBlockHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ public UsfmUpdateBlock ProcessBlock(UsfmUpdateBlock block)
IReadOnlyList<string> targetTokens = alignmentInfo.TranslationTokens;
int sourceTokenIndex = 0;

string sourceSentence = "";
string targetSentence = "";
var toPlace = new List<UsfmUpdateBlockElement>();
var adjacentSourceTokens = new List<int>();
Expand All @@ -123,16 +122,9 @@ public UsfmUpdateBlock ProcessBlock(UsfmUpdateBlock block)
{
if (element.Type == UsfmUpdateBlockElementType.Text)
{
if (
element.MarkedForRemoval
|| (
element.Type == UsfmUpdateBlockElementType.Paragraph
&& alignmentInfo.ParagraphBehavior == UpdateUsfmMarkerBehavior.Strip
)
)
if (element.MarkedForRemoval)
{
string text = element.Tokens[0].ToUsfm();
sourceSentence += text;

// Track seen tokens
while (sourceTokenIndex < sourceTokens.Count && text.Contains(sourceTokens[sourceTokenIndex]))
Expand All @@ -152,7 +144,13 @@ public UsfmUpdateBlock ProcessBlock(UsfmUpdateBlock block)
}
}

if (element.MarkedForRemoval)
if (
element.MarkedForRemoval
|| (
element.Type == UsfmUpdateBlockElementType.Paragraph
&& alignmentInfo.ParagraphBehavior == UpdateUsfmMarkerBehavior.Strip
)
)
{
ignoredElements.Add(element);
}
Expand All @@ -174,7 +172,18 @@ public UsfmUpdateBlock ProcessBlock(UsfmUpdateBlock block)
int prevLength = 0;
foreach (string token in targetTokens)
{
targetTokenStarts.Add(targetSentence.IndexOf(token, targetTokenStarts.LastOrDefault() + prevLength));
int indexOfTargetTokenInSentence = targetSentence.IndexOf(
token,
targetTokenStarts.LastOrDefault() + prevLength
);
if (indexOfTargetTokenInSentence == -1)
{
throw new UsfmUpdateBlockHandlerException(
$"No token \"{token}\" found in text \"{targetSentence}\" at or beyond index {targetTokenStarts.LastOrDefault() + prevLength}. Is the versification correctly specified?",
block
);
}
targetTokenStarts.Add(indexOfTargetTokenInSentence);
prevLength = token.Length;
}

Expand Down
18 changes: 16 additions & 2 deletions src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase
private readonly Stack<bool> _replace;
private int _rowIndex;
private int _tokenIndex;
private readonly Func<UsfmUpdateBlockHandlerException, bool> _errorHandler;

public UpdateUsfmParserHandler(
IReadOnlyList<UpdateUsfmRow> rows = null,
Expand All @@ -67,7 +68,8 @@ public UpdateUsfmParserHandler(
UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip,
IEnumerable<string> preserveParagraphStyles = null,
IEnumerable<IUsfmUpdateBlockHandler> updateBlockHandlers = null,
IEnumerable<string> remarks = null
IEnumerable<string> remarks = null,
Func<UsfmUpdateBlockHandlerException, bool> errorHandler = null
)
{
_rows = rows ?? Array.Empty<UpdateUsfmRow>();
Expand All @@ -90,6 +92,9 @@ public UpdateUsfmParserHandler(
? new HashSet<string> { "r", "rem" }
: new HashSet<string>(preserveParagraphStyles);
_remarks = remarks == null ? new List<string>() : remarks.ToList();
_errorHandler = errorHandler;
if (_errorHandler == null)
_errorHandler = (error) => false;
}

public IReadOnlyList<UsfmToken> Tokens => _tokens;
Expand Down Expand Up @@ -576,7 +581,16 @@ private void EndUpdateBlock(UsfmParserState state, IReadOnlyList<ScriptureRef> s

foreach (IUsfmUpdateBlockHandler handler in _updateBlockHandlers)
{
updateBlock = handler.ProcessBlock(updateBlock);
try
{
updateBlock = handler.ProcessBlock(updateBlock);
}
catch (UsfmUpdateBlockHandlerException e)
{
bool shouldContinue = _errorHandler(e);
if (!shouldContinue)
throw;
}
}
List<UsfmToken> tokens = updateBlock.GetTokens();
foreach (UsfmUpdateBlockElement elem in Enumerable.Reverse(paraElems))
Expand Down
7 changes: 0 additions & 7 deletions src/SIL.Machine/Corpora/UsfmUpdateBlockHandler.cs

This file was deleted.

Loading