From 8022b1b95b19de9925652200c33dcc251f3187a5 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Mon, 6 Oct 2025 15:59:27 -0400 Subject: [PATCH 1/6] Port https://github.com/sillsdev/machine.py/pull/236 --- .../ParatextProjectQuoteConventionDetector.cs | 20 ++- .../QuoteConventionDetector.cs | 4 +- .../PunctuationAnalysis/TextSegment.cs | 14 ++ .../UsfmStructureExtractor.cs | 24 ++- ...atextProjectQuoteConvetionDetectorTests.cs | 143 ++++++++++++++++-- .../Corpora/UsfmManualTests.cs | 4 +- .../QuotationConventionDetectorTests.cs | 2 +- .../UsfmStructureExtractorTests.cs | 54 +++++++ 8 files changed, 240 insertions(+), 25 deletions(-) diff --git a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs index 5cbdc52a..ff246e58 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs @@ -1,7 +1,10 @@ using System; +using System.Collections.Generic; using System.IO; +using System.Linq; using System.Text; using SIL.Machine.Corpora; +using SIL.Scripture; namespace SIL.Machine.PunctuationAnalysis { @@ -19,11 +22,22 @@ protected ParatextProjectQuoteConventionDetector(ParatextProjectSettingsParserBa _settings = settingsParser.Parse(); } - public QuoteConventionAnalysis GetQuoteConventionAnalysis(QuoteConventionDetector handler = null) + public QuoteConventionAnalysis GetQuoteConventionAnalysis( + QuoteConventionDetector handler = null, + Dictionary> includeChapters = null + ) { handler = handler ?? new QuoteConventionDetector(); - foreach (string fileName in _settings.GetAllScriptureBookFileNames()) + foreach ( + string bookId in Canon + .AllBookNumbers.Where(num => Canon.IsCanonical(num)) + .Select(num => Canon.BookNumberToId(num)) + ) { + if (includeChapters != null && includeChapters.ContainsKey(Canon.BookIdToNumber(bookId))) + continue; + + string fileName = _settings.GetBookFileName(bookId); if (!Exists(fileName)) continue; @@ -47,7 +61,7 @@ public QuoteConventionAnalysis GetQuoteConventionAnalysis(QuoteConventionDetecto throw new InvalidOperationException(sb.ToString(), ex); } } - return handler.DetectQuotationConvention(); + return handler.DetectQuoteConvention(includeChapters); } protected abstract bool Exists(string fileName); diff --git a/src/SIL.Machine/PunctuationAnalysis/QuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/QuoteConventionDetector.cs index bd6c7fea..9c561718 100644 --- a/src/SIL.Machine/PunctuationAnalysis/QuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/QuoteConventionDetector.cs @@ -56,9 +56,9 @@ private void CountQuotationMarksInChapter(Chapter chapter, QuoteConventionSet po _quotationMarkTabulator.Tabulate(resolvedQuotationMarks); } - public QuoteConventionAnalysis DetectQuotationConvention() + public QuoteConventionAnalysis DetectQuoteConvention(Dictionary> includeChapters = null) { - CountQuotationMarksInChapters(GetChapters()); + CountQuotationMarksInChapters(GetChapters(includeChapters)); (QuoteConvention bestQuoteConvention, double score) = QuoteConventions.Standard.FindMostSimilarConvention( _quotationMarkTabulator diff --git a/src/SIL.Machine/PunctuationAnalysis/TextSegment.cs b/src/SIL.Machine/PunctuationAnalysis/TextSegment.cs index 13ef13ec..e4a6d06f 100644 --- a/src/SIL.Machine/PunctuationAnalysis/TextSegment.cs +++ b/src/SIL.Machine/PunctuationAnalysis/TextSegment.cs @@ -12,6 +12,8 @@ public string Text get => _surrogatePairString.ToString(); private set => _surrogatePairString = new SurrogatePairString(value); } + public string Book { get; private set; } + public int Chapter { get; private set; } public UsfmMarkerType ImmediatePrecedingMarker { get; private set; } public HashSet MarkersInPrecedingContext { get; private set; } public TextSegment PreviousSegment { get; set; } @@ -139,6 +141,18 @@ public Builder AddPrecedingMarker(UsfmMarkerType marker) return this; } + public Builder SetBook(string code) + { + _textSegment.Book = code; + return this; + } + + public Builder SetChapter(int number) + { + _textSegment.Chapter = number; + return this; + } + public Builder SetUsfmToken(UsfmToken token) { _textSegment.UsfmToken = token; diff --git a/src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs b/src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs index ce2d6cd7..3f185fcc 100644 --- a/src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs +++ b/src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using SIL.Machine.Corpora; +using SIL.Scripture; namespace SIL.Machine.PunctuationAnalysis { @@ -14,9 +15,15 @@ public UsfmStructureExtractor() _nextTextSegmentBuilder = new TextSegment.Builder(); } + public void StartBook(UsfmParserState state, string marker, string code) + { + _nextTextSegmentBuilder.SetBook(code); + } + public void Chapter(UsfmParserState state, string number, string marker, string altNumber, string pubNumber) { _nextTextSegmentBuilder.AddPrecedingMarker(UsfmMarkerType.Chapter); + _nextTextSegmentBuilder.SetChapter(int.Parse(number)); } public void EndBook(UsfmParserState state, string marker) { } @@ -65,8 +72,6 @@ public void Ref(UsfmParserState state, string marker, string display, string tar _nextTextSegmentBuilder.AddPrecedingMarker(UsfmMarkerType.Embed); } - public void StartBook(UsfmParserState state, string marker, string code) { } - public void StartCell(UsfmParserState state, string marker, string align, int colspan) { } public void StartChar( @@ -127,13 +132,26 @@ public void Verse(UsfmParserState state, string number, string marker, string al _nextTextSegmentBuilder.AddPrecedingMarker(UsfmMarkerType.Verse); } - public List GetChapters() + public List GetChapters(Dictionary> includeChapters = null) { var chapters = new List(); + int currentBook = 0; + int currentChapter = 0; var currentChapterVerses = new List(); var currentVerseSegments = new List(); foreach (TextSegment textSegment in _textSegments) { + if (textSegment.Book != null) + currentBook = Canon.BookIdToNumber(textSegment.Book) - 1; //make 0-indexed + if (textSegment.Chapter > 0) + currentChapter = textSegment.Chapter; + if (includeChapters != null && currentBook > 0) + { + if (!includeChapters.TryGetValue(currentBook, out List bookChapters)) + continue; + if (currentChapter > 0 && bookChapters.Count > 0 && !bookChapters.Contains(currentChapter)) + continue; + } if (textSegment.MarkerIsInPrecedingContext(UsfmMarkerType.Verse)) { if (currentVerseSegments.Count > 0) diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs index 46a75faa..bab3b916 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs @@ -8,6 +8,11 @@ namespace SIL.Machine.Corpora; [TestFixture] public class ParatextProjectQuoteConventionDetectorTests { + private static readonly QuoteConvention StandardEnglishQuoteConvention = + QuoteConventions.Standard.GetQuoteConventionByName("standard_english"); + private static readonly QuoteConvention StandardFrenchQuoteConvention = + QuoteConventions.Standard.GetQuoteConventionByName("standard_french"); + [Test] public void TestGetQuotationAnalysis() { @@ -16,18 +21,9 @@ public void TestGetQuotationAnalysis() { { "41MATTest.SFM", - @"\id MAT -\c 1 -\v 1 Someone said, “This is something I am saying! -\v 2 This is also something I am saying” (that is, “something I am speaking”). -\p -\v 3 Other text, and someone else said, -\q1 -\v 4 “Things -\q2 someone else said! -\q3 and more things someone else said.” -\m That is why he said “things someone else said.” -\v 5 Then someone said, “More things someone said.”" + $@"\id MAT +{GetTestChapter(1, StandardEnglishQuoteConvention)} +" } } ); @@ -37,6 +33,100 @@ public void TestGetQuotationAnalysis() Assert.That(analysis.BestQuoteConvention.Name, Is.EqualTo("standard_english")); } + [Test] + public void TestGetQuotationByBook() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "41MATTest.SFM", + $@"\id MAT +{GetTestChapter(1, StandardEnglishQuoteConvention)} +" + }, + { + "42MRKTest.SFM", + $@"\id MRK +{GetTestChapter(1, StandardFrenchQuoteConvention)} +" + } + } + ); + QuoteConventionAnalysis analysis = env.GetQuoteConvention("MRK"); + Assert.That(analysis, Is.Not.Null); + Assert.That(analysis.BestQuoteConventionScore, Is.GreaterThan(0.8)); + Assert.That(analysis.BestQuoteConvention.Name, Is.EqualTo("standard_french")); + } + + [Test] + public void TestGetQuotationConventionByChapter() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "41MATTest.SFM", + $@"\id MAT +{GetTestChapter(1, StandardEnglishQuoteConvention)} +" + }, + { + "42MRKTest.SFM", + $@"\id MRK +{GetTestChapter(1, StandardEnglishQuoteConvention)} +{GetTestChapter(2, StandardFrenchQuoteConvention)} +{GetTestChapter(3, StandardEnglishQuoteConvention)} +{GetTestChapter(4, StandardEnglishQuoteConvention)} +{GetTestChapter(5, StandardFrenchQuoteConvention)} +" + } + } + ); + QuoteConventionAnalysis analysis = env.GetQuoteConvention("MRK2,4-5"); + Assert.That(analysis, Is.Not.Null); + Assert.That(analysis.BestQuoteConventionScore, Is.GreaterThan(0.66)); + Assert.That(analysis.BestQuoteConvention.Name, Is.EqualTo("standard_french")); + } + + [Test] + public void TestGetQuotationConventionByChapterIndeterminate() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "41MATTest.SFM", + $@"\id MAT +{GetTestChapter(1)} +{GetTestChapter(2, StandardEnglishQuoteConvention)} +{GetTestChapter(3)} +" + } + } + ); + QuoteConventionAnalysis analysis = env.GetQuoteConvention("MAT1,3"); + Assert.That(analysis, Is.Null); + } + + [Test] + public void TestGetQuotationConventionInvalidBookCode() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "41MATTest.SFM", + $@"\id LUK +{GetTestChapter(1, StandardEnglishQuoteConvention)} +" + } + } + ); + QuoteConventionAnalysis analysis = env.GetQuoteConvention("MAT"); + Assert.That(analysis, Is.Null); + } + private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary? files = null) { public ParatextProjectQuoteConventionDetector Detector { get; } = @@ -45,12 +135,37 @@ private class TestEnvironment(ParatextProjectSettings? settings = null, Dictiona files ?? new() ); - public QuoteConventionAnalysis GetQuoteConvention() + public QuoteConventionAnalysis GetQuoteConvention(string? scriptureRange = null) { - return Detector.GetQuoteConventionAnalysis(); + Dictionary>? chapters = null; + if (scriptureRange != null) + { + chapters = ScriptureRangeParser + .GetChapters(scriptureRange) + .ToDictionary(kvp => Canon.BookIdToNumber(kvp.Key) - 1, kvp => kvp.Value); // make 0-indexed + } + return Detector.GetQuoteConventionAnalysis(includeChapters: chapters); } } + private static string GetTestChapter(int number, QuoteConvention? quoteConvention = null) + { + string leftQuote = quoteConvention != null ? quoteConvention.GetOpeningQuotationMarkAtDepth(1) : ""; + string rightQuote = quoteConvention != null ? quoteConvention.GetClosingQuotationMarkAtDepth(1) : ""; + return $@"\c {number} +\v 1 Someone said, {leftQuote}This is something I am saying! +\v 2 This is also something I am saying{rightQuote} (that is, {leftQuote}something I am speaking{rightQuote}). +\p +\v 3 Other text, and someone else said, +\q1 +\v 4 {leftQuote}Things +\q2 someone else said! +\q3 and more things someone else said.{rightQuote} +\m That is why he said {leftQuote}things someone else said.{rightQuote} +\v 5 Then someone said, {leftQuote}More things someone said.{rightQuote} + "; + } + private class DefaultParatextProjectSettings( string name = "Test", string fullName = "TestProject", diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs index d5ddf36d..e02ba432 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs @@ -186,8 +186,8 @@ public void AnalyzeCorporaQuoteConventions() var quoteConventionDetector2 = new ZipParatextProjectQuoteConventionDetector(zipArchive2); quoteConventionDetector2.GetQuoteConventionAnalysis(targetHandler); - QuoteConventionAnalysis sourceAnalysis = sourceHandler.DetectQuotationConvention(); - QuoteConventionAnalysis targetAnalysis = targetHandler.DetectQuotationConvention(); + QuoteConventionAnalysis sourceAnalysis = sourceHandler.DetectQuoteConvention(); + QuoteConventionAnalysis targetAnalysis = targetHandler.DetectQuoteConvention(); Assert.Multiple(() => { diff --git a/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationConventionDetectorTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationConventionDetectorTests.cs index 8b34a377..212daca8 100644 --- a/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationConventionDetectorTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationConventionDetectorTests.cs @@ -368,6 +368,6 @@ public QuoteConventionAnalysis DetectQuotationConvention(string usfm) { var quoteConventionDetector = new QuoteConventionDetector(); UsfmParser.Parse(usfm, quoteConventionDetector); - return quoteConventionDetector.DetectQuotationConvention(); + return quoteConventionDetector.DetectQuoteConvention(); } } diff --git a/tests/SIL.Machine.Tests/PunctuationAnalysis/UsfmStructureExtractorTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/UsfmStructureExtractorTests.cs index 6615ec92..b89ac1c3 100644 --- a/tests/SIL.Machine.Tests/PunctuationAnalysis/UsfmStructureExtractorTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/UsfmStructureExtractorTests.cs @@ -16,6 +16,60 @@ public void SetUp() _verseTextParserState.SetVerseNum(1); } + [Test] + public void GetChaptersFilterByBook() + { + var usfmStructureExtractor = new UsfmStructureExtractor(); + usfmStructureExtractor.StartBook(_verseTextParserState, "id", "GEN"); + usfmStructureExtractor.Chapter(_verseTextParserState, "1", "c", null, null); + usfmStructureExtractor.Verse(_verseTextParserState, "1", "v", null, null); + usfmStructureExtractor.Text(_verseTextParserState, "test"); + + Assert.That( + usfmStructureExtractor.GetChapters(new Dictionary> { { 2, [1] } }), // EXO 1 + Has.Count.EqualTo(0) + ); + } + + [Test] + public void GetChaptersFilterByChapter() + { + var usfmStructureExtractor = new UsfmStructureExtractor(); + usfmStructureExtractor.StartBook(_verseTextParserState, "id", "MAT"); + usfmStructureExtractor.Chapter(_verseTextParserState, "1", "c", null, null); + usfmStructureExtractor.Verse(_verseTextParserState, "1", "v", null, null); + usfmStructureExtractor.Text(_verseTextParserState, "test"); + usfmStructureExtractor.Chapter(_verseTextParserState, "2", "c", null, null); + usfmStructureExtractor.Verse(_verseTextParserState, "1", "v", null, null); + usfmStructureExtractor.Text(_verseTextParserState, "test2"); + usfmStructureExtractor.Chapter(_verseTextParserState, "3", "c", null, null); + usfmStructureExtractor.Verse(_verseTextParserState, "1", "v", null, null); + usfmStructureExtractor.Text(_verseTextParserState, "test3"); + + List expectedChapters = + [ + new Chapter( + [ + new Verse( + [ + new TextSegment.Builder() + .SetText("test2") + .AddPrecedingMarker(UsfmMarkerType.Chapter) + .AddPrecedingMarker(UsfmMarkerType.Verse) + .Build() + ] + ) + ] + ) + ]; + List actualChapters = usfmStructureExtractor.GetChapters( + new Dictionary> { { 40, [2] } } + ); + AssertChapterEqual(expectedChapters, actualChapters); + Assert.That(actualChapters[0].Verses[0].TextSegments[0].PreviousSegment, Is.Null); + Assert.That(actualChapters[0].Verses[0].TextSegments[0].NextSegment, Is.Null); + } + [Test] public void ChapterAndVerseMarkers() { From f7810e349ef040628f26d831d09c17caa29d83c0 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Mon, 6 Oct 2025 16:37:54 -0400 Subject: [PATCH 2/6] Fix book logic --- .../ParatextProjectQuoteConventionDetector.cs | 2 +- src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs | 2 +- .../Corpora/ParatextProjectQuoteConvetionDetectorTests.cs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs index ff246e58..ea3a8bee 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs @@ -34,7 +34,7 @@ string bookId in Canon .Select(num => Canon.BookNumberToId(num)) ) { - if (includeChapters != null && includeChapters.ContainsKey(Canon.BookIdToNumber(bookId))) + if (includeChapters != null && !includeChapters.ContainsKey(Canon.BookIdToNumber(bookId))) continue; string fileName = _settings.GetBookFileName(bookId); diff --git a/src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs b/src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs index 3f185fcc..6b565491 100644 --- a/src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs +++ b/src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs @@ -142,7 +142,7 @@ public List GetChapters(Dictionary> includeChapters = nu foreach (TextSegment textSegment in _textSegments) { if (textSegment.Book != null) - currentBook = Canon.BookIdToNumber(textSegment.Book) - 1; //make 0-indexed + currentBook = Canon.BookIdToNumber(textSegment.Book); if (textSegment.Chapter > 0) currentChapter = textSegment.Chapter; if (includeChapters != null && currentBook > 0) diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs index bab3b916..7338d611 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs @@ -142,7 +142,7 @@ public QuoteConventionAnalysis GetQuoteConvention(string? scriptureRange = null) { chapters = ScriptureRangeParser .GetChapters(scriptureRange) - .ToDictionary(kvp => Canon.BookIdToNumber(kvp.Key) - 1, kvp => kvp.Value); // make 0-indexed + .ToDictionary(kvp => Canon.BookIdToNumber(kvp.Key), kvp => kvp.Value); } return Detector.GetQuoteConventionAnalysis(includeChapters: chapters); } From f2f93a1cb8bf3dee92afac30b7d9f7e429855bb7 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Mon, 6 Oct 2025 16:59:28 -0400 Subject: [PATCH 3/6] Add an overload to take a chapter-by-id dict for convenience in Serval --- .../ParatextProjectQuoteConventionDetector.cs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs index ea3a8bee..d0a9ca5f 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs @@ -22,6 +22,17 @@ protected ParatextProjectQuoteConventionDetector(ParatextProjectSettingsParserBa _settings = settingsParser.Parse(); } + public QuoteConventionAnalysis GetQuoteConventionAnalysis( + QuoteConventionDetector handler = null, + Dictionary> includeChapters = null + ) + { + return GetQuoteConventionAnalysis( + handler, + includeChapters.ToDictionary(kvp => Canon.BookIdToNumber(kvp.Key), kvp => kvp.Value) + ); + } + public QuoteConventionAnalysis GetQuoteConventionAnalysis( QuoteConventionDetector handler = null, Dictionary> includeChapters = null From b007c691327ee022e333ffd0f276ff03fb152290 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Mon, 6 Oct 2025 17:19:12 -0400 Subject: [PATCH 4/6] Fix ambiguous reference --- .../ParatextProjectQuoteConventionDetector.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs index d0a9ca5f..d5e49727 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs @@ -22,6 +22,12 @@ protected ParatextProjectQuoteConventionDetector(ParatextProjectSettingsParserBa _settings = settingsParser.Parse(); } + public QuoteConventionAnalysis GetQuoteConventionAnalysis(QuoteConventionDetector handler = null) + { + Dictionary> includeChapters = null; + return GetQuoteConventionAnalysis(handler, includeChapters); + } + public QuoteConventionAnalysis GetQuoteConventionAnalysis( QuoteConventionDetector handler = null, Dictionary> includeChapters = null From 0715eb34a78e585628531123d710f25275540c7f Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 7 Oct 2025 17:09:45 -0400 Subject: [PATCH 5/6] Address reviewer comments --- .../ParatextProjectQuoteConventionDetector.cs | 4 ++-- .../PunctuationAnalysis/QuoteConventionDetector.cs | 2 +- src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs index d5e49727..93fbe81c 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs @@ -30,7 +30,7 @@ public QuoteConventionAnalysis GetQuoteConventionAnalysis(QuoteConventionDetecto public QuoteConventionAnalysis GetQuoteConventionAnalysis( QuoteConventionDetector handler = null, - Dictionary> includeChapters = null + IReadOnlyDictionary> includeChapters = null ) { return GetQuoteConventionAnalysis( @@ -41,7 +41,7 @@ public QuoteConventionAnalysis GetQuoteConventionAnalysis( public QuoteConventionAnalysis GetQuoteConventionAnalysis( QuoteConventionDetector handler = null, - Dictionary> includeChapters = null + IReadOnlyDictionary> includeChapters = null ) { handler = handler ?? new QuoteConventionDetector(); diff --git a/src/SIL.Machine/PunctuationAnalysis/QuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/QuoteConventionDetector.cs index 9c561718..3200e2ab 100644 --- a/src/SIL.Machine/PunctuationAnalysis/QuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/QuoteConventionDetector.cs @@ -56,7 +56,7 @@ private void CountQuotationMarksInChapter(Chapter chapter, QuoteConventionSet po _quotationMarkTabulator.Tabulate(resolvedQuotationMarks); } - public QuoteConventionAnalysis DetectQuoteConvention(Dictionary> includeChapters = null) + public QuoteConventionAnalysis DetectQuoteConvention(IReadOnlyDictionary> includeChapters = null) { CountQuotationMarksInChapters(GetChapters(includeChapters)); diff --git a/src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs b/src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs index 6b565491..cf8cf27f 100644 --- a/src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs +++ b/src/SIL.Machine/PunctuationAnalysis/UsfmStructureExtractor.cs @@ -23,7 +23,7 @@ public void StartBook(UsfmParserState state, string marker, string code) public void Chapter(UsfmParserState state, string number, string marker, string altNumber, string pubNumber) { _nextTextSegmentBuilder.AddPrecedingMarker(UsfmMarkerType.Chapter); - _nextTextSegmentBuilder.SetChapter(int.Parse(number)); + _nextTextSegmentBuilder.SetChapter(state.VerseRef.ChapterNum); } public void EndBook(UsfmParserState state, string marker) { } @@ -132,7 +132,7 @@ public void Verse(UsfmParserState state, string number, string marker, string al _nextTextSegmentBuilder.AddPrecedingMarker(UsfmMarkerType.Verse); } - public List GetChapters(Dictionary> includeChapters = null) + public List GetChapters(IReadOnlyDictionary> includeChapters = null) { var chapters = new List(); int currentBook = 0; From f68f9d8017d924849fb4783e0a694dd2916ae828 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 7 Oct 2025 17:20:26 -0400 Subject: [PATCH 6/6] Fix broken test --- .../PunctuationAnalysis/UsfmStructureExtractorTests.cs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/SIL.Machine.Tests/PunctuationAnalysis/UsfmStructureExtractorTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/UsfmStructureExtractorTests.cs index b89ac1c3..77871027 100644 --- a/tests/SIL.Machine.Tests/PunctuationAnalysis/UsfmStructureExtractorTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/UsfmStructureExtractorTests.cs @@ -36,12 +36,15 @@ public void GetChaptersFilterByChapter() { var usfmStructureExtractor = new UsfmStructureExtractor(); usfmStructureExtractor.StartBook(_verseTextParserState, "id", "MAT"); + _verseTextParserState.SetChapterNum(1); usfmStructureExtractor.Chapter(_verseTextParserState, "1", "c", null, null); usfmStructureExtractor.Verse(_verseTextParserState, "1", "v", null, null); usfmStructureExtractor.Text(_verseTextParserState, "test"); + _verseTextParserState.SetChapterNum(2); usfmStructureExtractor.Chapter(_verseTextParserState, "2", "c", null, null); usfmStructureExtractor.Verse(_verseTextParserState, "1", "v", null, null); usfmStructureExtractor.Text(_verseTextParserState, "test2"); + _verseTextParserState.SetChapterNum(3); usfmStructureExtractor.Chapter(_verseTextParserState, "3", "c", null, null); usfmStructureExtractor.Verse(_verseTextParserState, "1", "v", null, null); usfmStructureExtractor.Text(_verseTextParserState, "test3"); @@ -547,5 +550,12 @@ public void SetVerseNum(int verseNum) vref.VerseNum = verseNum; VerseRef = vref; } + + public void SetChapterNum(int chapterNum) + { + VerseRef vref = VerseRef; + vref.ChapterNum = chapterNum; + VerseRef = vref; + } } }