Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
using System;
using System.Collections.Generic;
using System.Linq;
using SIL.Extensions;
Expand Down
14 changes: 0 additions & 14 deletions src/SIL.Machine/Corpora/QuotationMarkDenormalizationFirstPass.cs

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
using System.Collections.Generic;
using System.Linq;
using SIL.Machine.PunctuationAnalysis;

namespace SIL.Machine.Corpora
namespace SIL.Machine.PunctuationAnalysis
{
public class FallbackQuotationMarkResolver : IQuotationMarkResolver
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
using System;
using System.IO;
using System.Text;
using SIL.Machine.PunctuationAnalysis;
using SIL.Machine.Corpora;

namespace SIL.Machine.Corpora
namespace SIL.Machine.PunctuationAnalysis
{
public abstract class ParatextProjectQuoteConventionDetector
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,42 @@

namespace SIL.Machine.PunctuationAnalysis
{
public class QuotationMarkCounter
{
private const double NegligibleProportionThreshold = 0.01;
private Dictionary<string, int> _quotationMarkCounts;
private int _totalQuotationMarkCount;

public QuotationMarkCounter()
{
Reset();
}

public void Reset()
{
_quotationMarkCounts = new Dictionary<string, int>();
_totalQuotationMarkCount = 0;
}

public void CountQuotationMarks(List<QuotationMarkStringMatch> quotationMarks)
{
foreach (var quotationMarkMatch in quotationMarks)
{
string mark = quotationMarkMatch.QuotationMark;
_quotationMarkCounts.UpdateValue(mark, () => 0, i => i + 1);
_totalQuotationMarkCount++;
}
}

public bool IsQuotationMarkProportionNegligible(string quotationMark)
{
if (_totalQuotationMarkCount == 0)
return true;
int quotationMarkCount = _quotationMarkCounts.TryGetValue(quotationMark, out int count) ? count : 0;
return ((double)quotationMarkCount / _totalQuotationMarkCount) < NegligibleProportionThreshold;
}
}

public class ApostropheProportionStatistics
{
private int _numCharacters;
Expand Down Expand Up @@ -385,19 +421,22 @@ public class PreliminaryQuotationMarkAnalyzer
private readonly QuoteConventionSet _quoteConventions;
private readonly PreliminaryApostropheAnalyzer _apostropheAnalyzer;
private readonly QuotationMarkSequences _quotationMarkSequences;
private readonly QuotationMarkCounter _quotationMarkCounts;

public PreliminaryQuotationMarkAnalyzer(QuoteConventionSet quoteConventions)
{
_quoteConventions = quoteConventions;
_apostropheAnalyzer = new PreliminaryApostropheAnalyzer();
_quotationMarkSequences = new QuotationMarkSequences();
_quotationMarkCounts = new QuotationMarkCounter();
Reset();
}

public void Reset()
{
_apostropheAnalyzer.Reset();
_quotationMarkSequences.Reset();
_quotationMarkCounts.Reset();
}

public QuoteConventionSet NarrowDownPossibleQuoteConventions(List<Chapter> chapters)
Expand All @@ -420,6 +459,7 @@ private void AnalyzeQuotationMarksForVerse(Verse verse)
).FindAllPotentialQuotationMarksInVerse(verse);
AnalyzeQuotationMarkSequence(quotationMarks);
_apostropheAnalyzer.ProcessQuotationMarks(verse.TextSegments.ToList(), quotationMarks);
_quotationMarkCounts.CountQuotationMarks(quotationMarks);
}

private void AnalyzeQuotationMarkSequence(List<QuotationMarkStringMatch> quotationMarks)
Expand Down Expand Up @@ -450,6 +490,9 @@ private List<string> FindOpeningQuotationMarks()

private bool IsOpeningQuotationMark(string quotationMark)
{
if (_quotationMarkCounts.IsQuotationMarkProportionNegligible(quotationMark))
return false;

if (_apostropheAnalyzer.IsApostropheOnly(quotationMark))
return false;

Expand All @@ -475,6 +518,9 @@ private List<string> FindClosingQuotationMarks()

private bool IsClosingQuotationMark(string quotationMark)
{
if (_quotationMarkCounts.IsQuotationMarkProportionNegligible(quotationMark))
return false;

if (_apostropheAnalyzer.IsApostropheOnly(quotationMark))
return false;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
namespace SIL.Machine.PunctuationAnalysis
{
// This is a convenience class so that users don't have to know to normalize the source quote convention
public class QuotationMarkDenormalizationFirstPass : QuotationMarkUpdateFirstPass
{
public QuotationMarkDenormalizationFirstPass(QuoteConvention targetQuoteConvention)
: base(targetQuoteConvention.Normalize(), targetQuoteConvention) { }
}
}
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
using SIL.Machine.PunctuationAnalysis;

namespace SIL.Machine.Corpora
namespace SIL.Machine.PunctuationAnalysis
{
public class QuotationMarkDenormalizationUsfmUpdateBlockHandler : QuoteConventionChangingUsfmUpdateBlockHandler
{
// This is a convenience class so that users don't have to know to normalize the source quote convention
public QuotationMarkDenormalizationUsfmUpdateBlockHandler(
QuoteConvention sourceQuoteConvention,
QuoteConvention targetQuoteConvention,
QuotationMarkUpdateSettings settings = null
)
: base(
sourceQuoteConvention.Normalize(),
targetQuoteConvention.Normalize(),
targetQuoteConvention,
settings ?? new QuotationMarkUpdateSettings()
) { }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
using System;
using System.Collections.Generic;
using System.Linq;
using SIL.Machine.PunctuationAnalysis;

namespace SIL.Machine.Corpora
namespace SIL.Machine.PunctuationAnalysis
{
// Determines the best strategy to take for each chapter
public class QuotationMarkUpdateFirstPass : UsfmStructureExtractor
Expand All @@ -12,46 +11,37 @@ public class QuotationMarkUpdateFirstPass : UsfmStructureExtractor
private readonly DepthBasedQuotationMarkResolver _quotationMarkResolver;
public bool WillFallbackModeWork { get; set; }

public QuotationMarkUpdateFirstPass(
QuoteConvention sourceQuoteConvention,
QuoteConvention targetQuoteConvention
)
public QuotationMarkUpdateFirstPass(QuoteConvention oldQuoteConvention, QuoteConvention newQuoteConvention)
{
_quotationMarkFinder = new QuotationMarkFinder(
new QuoteConventionSet(new List<QuoteConvention> { sourceQuoteConvention, targetQuoteConvention })
new QuoteConventionSet(new List<QuoteConvention> { oldQuoteConvention, newQuoteConvention })
);
_quotationMarkResolver = new DepthBasedQuotationMarkResolver(
new QuotationMarkUpdateResolutionSettings(sourceQuoteConvention)
new QuotationMarkUpdateResolutionSettings(oldQuoteConvention)
);
WillFallbackModeWork = CheckWhetherFallbackModeWillWork(sourceQuoteConvention, targetQuoteConvention);
WillFallbackModeWork = CheckWhetherFallbackModeWillWork(oldQuoteConvention, newQuoteConvention);
}

public bool CheckWhetherFallbackModeWillWork(
QuoteConvention sourceQuoteConvention,
QuoteConvention targetQuoteConvention
QuoteConvention oldQuoteConvention,
QuoteConvention newQuoteConvention
)
{
var targetMarkBySourceMark = new Dictionary<string, string>();
var newMarkByOldMark = new Dictionary<string, string>();
foreach (
int depth in Enumerable.Range(
1,
Math.Min(sourceQuoteConvention.NumLevels, targetQuoteConvention.NumLevels)
)
int depth in Enumerable.Range(1, Math.Min(oldQuoteConvention.NumLevels, newQuoteConvention.NumLevels))
)
{
string openingQuotationMark = sourceQuoteConvention.GetOpeningQuotationMarkAtDepth(depth);
string closingQuotationMark = targetQuoteConvention.GetClosingQuotationMarkAtDepth(depth);
string openingQuotationMark = oldQuoteConvention.GetOpeningQuotationMarkAtDepth(depth);
string closingQuotationMark = newQuoteConvention.GetClosingQuotationMarkAtDepth(depth);
if (
targetMarkBySourceMark.TryGetValue(
openingQuotationMark,
out string correspondingClosingQuotationMark
)
newMarkByOldMark.TryGetValue(openingQuotationMark, out string correspondingClosingQuotationMark)
&& correspondingClosingQuotationMark != closingQuotationMark
)
{
return false;
}
targetMarkBySourceMark[openingQuotationMark] = closingQuotationMark;
newMarkByOldMark[openingQuotationMark] = closingQuotationMark;
}
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ namespace SIL.Machine.PunctuationAnalysis
{
public class QuotationMarkUpdateResolutionSettings : IQuotationMarkResolutionSettings
{
private readonly QuoteConvention _sourceQuoteConvention;
private readonly QuoteConvention _oldQuoteConvention;
private readonly QuoteConventionSet _quoteConventionSingletonSet;

public QuotationMarkUpdateResolutionSettings(QuoteConvention sourceQuoteConvention)
public QuotationMarkUpdateResolutionSettings(QuoteConvention oldQuoteConvention)
{
_sourceQuoteConvention = sourceQuoteConvention;
_quoteConventionSingletonSet = new QuoteConventionSet(new List<QuoteConvention> { sourceQuoteConvention });
_oldQuoteConvention = oldQuoteConvention;
_quoteConventionSingletonSet = new QuoteConventionSet(new List<QuoteConvention> { oldQuoteConvention });
}

public bool AreMarksAValidPair(string openingMark, string closingMark)
Expand All @@ -31,7 +31,7 @@ public Regex GetOpeningQuotationMarkRegex()

public HashSet<int> GetPossibleDepths(string quotationMark, QuotationMarkDirection direction)
{
return _sourceQuoteConvention.GetPossibleDepths(quotationMark, direction);
return _oldQuoteConvention.GetPossibleDepths(quotationMark, direction);
}

public bool IsValidClosingQuotationMark(QuotationMarkStringMatch quotationMarkMatch)
Expand All @@ -46,7 +46,7 @@ public bool IsValidOpeningQuotationMark(QuotationMarkStringMatch quotationMarkMa

public bool MetadataMatchesQuotationMark(string quotationMark, int depth, QuotationMarkDirection direction)
{
return _sourceQuoteConvention.GetExpectedQuotationMark(depth, direction) == quotationMark;
return _oldQuoteConvention.GetExpectedQuotationMark(depth, direction) == quotationMark;
}

public bool ShouldRelyOnParagraphMarkers()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using System.Collections.Generic;

namespace SIL.Machine.Corpora
namespace SIL.Machine.PunctuationAnalysis
{
public class QuotationMarkUpdateSettings
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace SIL.Machine.Corpora
namespace SIL.Machine.PunctuationAnalysis
{
public enum QuotationMarkUpdateStrategy
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
using System.Collections.Generic;
using System.Linq;
using SIL.Machine.PunctuationAnalysis;
using SIL.Machine.Corpora;

namespace SIL.Machine.Corpora
namespace SIL.Machine.PunctuationAnalysis
{
public class QuoteConventionChangingUsfmUpdateBlockHandler : IUsfmUpdateBlockHandler
{
private readonly QuoteConvention _sourceQuoteConvention;
private readonly QuoteConvention _targetQuoteConvention;
private readonly QuoteConvention _oldQuoteConvention;
private readonly QuoteConvention _newQuoteConvention;
private readonly QuotationMarkUpdateSettings _settings;
protected QuotationMarkFinder QuotationMarkFinder { get; set; }
protected TextSegment.Builder NextScriptureTextSegmentBuilder { get; set; }
Expand All @@ -19,23 +19,23 @@ public class QuoteConventionChangingUsfmUpdateBlockHandler : IUsfmUpdateBlockHan
private int _currentVerseNumber;

public QuoteConventionChangingUsfmUpdateBlockHandler(
QuoteConvention sourceQuoteConvention,
QuoteConvention targetQuoteConvention,
QuoteConvention oldQuoteConvention,
QuoteConvention newQuoteConvention,
QuotationMarkUpdateSettings settings
)
{
_sourceQuoteConvention = sourceQuoteConvention;
_targetQuoteConvention = targetQuoteConvention;
_oldQuoteConvention = oldQuoteConvention;
_newQuoteConvention = newQuoteConvention;
_settings = settings;

QuotationMarkFinder = new QuotationMarkFinder(
new QuoteConventionSet(new List<QuoteConvention> { _sourceQuoteConvention })
new QuoteConventionSet(new List<QuoteConvention> { _oldQuoteConvention })
);

NextScriptureTextSegmentBuilder = new TextSegment.Builder();

IQuotationMarkResolutionSettings resolutionSettings = new QuotationMarkUpdateResolutionSettings(
sourceQuoteConvention
oldQuoteConvention
);

// Each embed represents a separate context for quotation marks
Expand Down Expand Up @@ -140,7 +140,7 @@ QuotationMarkMetadata resolvedQuotationMarkMatch
)
{
int previousLength = resolvedQuotationMarkMatch.Length;
resolvedQuotationMarkMatch.UpdateQuotationMark(_targetQuoteConvention);
resolvedQuotationMarkMatch.UpdateQuotationMark(_newQuoteConvention);
int updatedLength = resolvedQuotationMarkMatch.Length;

if (previousLength != updatedLength)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
using System.IO;
using System.IO.Compression;
using SIL.Machine.Corpora;

namespace SIL.Machine.Corpora
namespace SIL.Machine.PunctuationAnalysis
{
public class ZipParatextProjectQuoteConventionDetector : ParatextProjectQuoteConventionDetector
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ of the field which Yahweh God had made.
Assert.IsNotNull(standardEnglishQuoteConvention);

var quotationMarkDenormalizationFirstPass = new QuotationMarkDenormalizationFirstPass(
standardEnglishQuoteConvention,
standardEnglishQuoteConvention
);

Expand All @@ -46,7 +45,6 @@ of the field which Yahweh God had made.
quotationMarkDenormalizationFirstPass.FindBestChapterStrategies();

var quotationMarkDenormalizer = new QuotationMarkDenormalizationUsfmUpdateBlockHandler(
standardEnglishQuoteConvention,
standardEnglishQuoteConvention,
new QuotationMarkUpdateSettings(chapterStrategies: bestChapterStrategies)
);
Expand Down
Loading
Loading