Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion tests/SIL.Machine.Tests/Corpora/CorporaTestHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ internal static class CorporaTestHelpers
public static readonly string UsfmTestProjectPath = Path.Combine(TestDataPath, "usfm", "Tes");
public static readonly string UsfmTargetProjectPath = Path.Combine(TestDataPath, "usfm", "target");
public static readonly string UsfmTargetProjectZipPath = Path.Combine(TestDataPath, "project", "target");
public static readonly string UsfmTargetCustomVrsPath = Path.Combine(TestDataPath, "usfm", "target", "custom.vrs");
public static readonly string UsfmSourceProjectPath = Path.Combine(TestDataPath, "usfm", "source");
public static readonly string UsfmSourceProjectZipPath = Path.Combine(TestDataPath, "project", "source");
public static readonly string UsxTestProjectPath = Path.Combine(TestDataPath, "usx", "Tes");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ public class PlaceMarkersUsfmUpdateBlockHandlerTests
public void UpdateUsfm_ParagraphMarkers()
{
string source = "This is the first paragraph. This text is in English, and this test is for paragraph markers.";
string pretranslation =
string updateRows =
"Este es el primer párrafo. Este texto está en inglés y esta prueba es para marcadores de párrafo.";
PlaceMarkersAlignmentInfo alignInfo = new PlaceMarkersAlignmentInfo(
sourceTokens: Tokenizer.Tokenize(source).ToList(),
translationTokens: Tokenizer.Tokenize(pretranslation).ToList(),
translationTokens: Tokenizer.Tokenize(updateRows).ToList(),
alignment: ToWordAlignmentMatrix(
"0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 9-9 10-10 12-11 13-12 14-13 15-14 16-15 17-18 18-16 19-19"
),
Expand All @@ -28,7 +28,7 @@ public void UpdateUsfm_ParagraphMarkers()
[
new UpdateUsfmRow(
ScrRef("MAT 1:1"),
pretranslation,
updateRows,
new Dictionary<string, object> { { "alignment_info", alignInfo } }
)
];
Expand Down Expand Up @@ -62,11 +62,11 @@ public void UpdateUsfm_ParagraphMarkers()
public void UpdateUsfm_StyleMarkers()
{
string source = "This is the first sentence. This text is in English, and this test is for style markers.";
string pretranslation =
string updateRows =
"Esta es la primera oración. Este texto está en inglés y esta prueba es para marcadores de estilo.";
PlaceMarkersAlignmentInfo alignInfo = new PlaceMarkersAlignmentInfo(
sourceTokens: Tokenizer.Tokenize(source).ToList(),
translationTokens: Tokenizer.Tokenize(pretranslation).ToList(),
translationTokens: Tokenizer.Tokenize(updateRows).ToList(),
alignment: ToWordAlignmentMatrix(
"0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 9-9 10-10 12-11 13-12 14-13 15-14 16-15 17-18 18-16 19-19"
),
Expand All @@ -77,7 +77,7 @@ public void UpdateUsfm_StyleMarkers()
[
new UpdateUsfmRow(
ScrRef("MAT 1:1"),
pretranslation,
updateRows,
new Dictionary<string, object> { { "alignment_info", alignInfo } }
)
];
Expand All @@ -104,7 +104,7 @@ public void UpdateUsfm_StyleMarkers()

alignInfo = new PlaceMarkersAlignmentInfo(
sourceTokens: Tokenizer.Tokenize(source).ToList(),
translationTokens: Tokenizer.Tokenize(pretranslation).ToList(),
translationTokens: Tokenizer.Tokenize(updateRows).ToList(),
alignment: ToWordAlignmentMatrix(
"0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 9-9 10-10 12-11 13-12 14-13 15-14 16-15 17-18 18-16 19-19"
),
Expand All @@ -115,7 +115,7 @@ public void UpdateUsfm_StyleMarkers()
[
new UpdateUsfmRow(
ScrRef("MAT 1:1"),
pretranslation,
updateRows,
new Dictionary<string, object> { { "alignment_info", alignInfo } }
)
];
Expand Down
12 changes: 6 additions & 6 deletions tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -642,15 +642,15 @@ public void GetUsfm_Verse_LastSegment()
}

[Test]
public void GetUsfm_Verse_PretranslationsBeforeText()
public void GetUsfm_Verse_UpdateRowsBeforeText()
{
var rows = new List<UpdateUsfmRow>
{
new UpdateUsfmRow(ScrRef("GEN 1:1"), "Pretranslations before the start"),
new UpdateUsfmRow(ScrRef("GEN 1:2"), "Pretranslations before the start"),
new UpdateUsfmRow(ScrRef("GEN 1:3"), "Pretranslations before the start"),
new UpdateUsfmRow(ScrRef("GEN 1:4"), "Pretranslations before the start"),
new UpdateUsfmRow(ScrRef("GEN 1:5"), "Pretranslations before the start"),
new UpdateUsfmRow(ScrRef("GEN 1:1"), "Update rows before the start"),
new UpdateUsfmRow(ScrRef("GEN 1:2"), "Update rows before the start"),
new UpdateUsfmRow(ScrRef("GEN 1:3"), "Update rows before the start"),
new UpdateUsfmRow(ScrRef("GEN 1:4"), "Update rows before the start"),
new UpdateUsfmRow(ScrRef("GEN 1:5"), "Update rows before the start"),
new UpdateUsfmRow(ScrRef("MAT 1:0/3:ip"), "The introductory paragraph.")
};

Expand Down
123 changes: 3 additions & 120 deletions tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System.IO.Compression;
using System.Text.Json;
using NUnit.Framework;
using SIL.Machine.PunctuationAnalysis;

Expand Down Expand Up @@ -28,8 +27,8 @@ public void ParseParallelCorpusAsync()
List<ParallelTextRow> rows = pCorpus.GetRows().ToList();
Assert.That(rows, Has.Count.GreaterThan(0));

// insert the source into the target as pretranslations to make sure that USFM generation works
IReadOnlyList<UpdateUsfmRow> pretranslations = rows.Select(r => new UpdateUsfmRow(
// insert the source into the target as update rows to make sure that USFM generation works
IReadOnlyList<UpdateUsfmRow> updateRows = rows.Select(r => new UpdateUsfmRow(
(IReadOnlyList<ScriptureRef>)r.SourceRefs.Select(s => (ScriptureRef)s).ToList(),
r.SourceText
))
Expand All @@ -51,127 +50,11 @@ string sfmFileName in Directory
string bookId;
if (!targetSettings.IsBookFileName(sfmFileName, out bookId))
continue;
string newUsfm = updater.UpdateUsfm(
bookId,
pretranslations,
textBehavior: UpdateUsfmTextBehavior.StripExisting
);
string newUsfm = updater.UpdateUsfm(bookId, updateRows, textBehavior: UpdateUsfmTextBehavior.StripExisting);
Assert.That(newUsfm, Is.Not.Null);
}
}

public record PretranslationDto
{
public required string TextId { get; init; }
public required IReadOnlyList<string> Refs { get; init; }
public required string Translation { get; init; }
}

public static readonly string PretranslationPath = Path.Combine(
CorporaTestHelpers.TestDataPath,
"pretranslations.json"
);
public static readonly string ParatextProjectPath = Path.Combine(CorporaTestHelpers.TestDataPath, "project");

[Test]
[Ignore("This is for manual testing only. Remove this tag to run the test.")]
/*
In order to run this test on specific projects, place the Paratext projects or Paratext project zips in the Corpora/TestData/project/ folder.
If only testing one project, you can instead place the project in the Corpora/TestData/ folder and rename it to "project"
*/
public async Task CreateUsfmFile()
{
async Task GetUsfmAsync(string projectPath)
{
ParatextProjectSettingsParserBase parser;
ZipArchive? projectArchive = null;
try
{
projectArchive = ZipFile.Open(projectPath, ZipArchiveMode.Read);
parser = new ZipParatextProjectSettingsParser(projectArchive);
}
catch (UnauthorizedAccessException)
{
parser = new FileParatextProjectSettingsParser(projectPath);
}
ParatextProjectSettings settings = parser.Parse();

// Read text from pretranslations file
using Stream pretranslationStream = File.OpenRead(PretranslationPath);
UpdateUsfmRow[] pretranslations = await JsonSerializer
.DeserializeAsyncEnumerable<PretranslationDto>(
pretranslationStream,
new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }
)
.Select(p => new UpdateUsfmRow(
(IReadOnlyList<ScriptureRef>)(
p?.Refs.Select(r => ScriptureRef.Parse(r, settings.Versification).ToRelaxed()).ToArray() ?? []
),
p?.Translation ?? ""
))
.ToArrayAsync();
List<string> bookIds = [];
ParatextProjectTextUpdaterBase updater;
if (projectArchive == null)
{
bookIds = (
Directory
.EnumerateFiles(projectPath, $"{settings.FileNamePrefix}*{settings.FileNameSuffix}")
.Select(path => new DirectoryInfo(path).Name)
.Select(filename =>
{
string bookId;
if (settings.IsBookFileName(filename, out bookId))
return bookId;
else
return "";
})
.Where(id => id != "")
).ToList();
updater = new FileParatextProjectTextUpdater(projectPath);
}
else
{
bookIds = projectArchive
.Entries.Where(e =>
e.Name.StartsWith(settings.FileNamePrefix) && e.Name.EndsWith(settings.FileNameSuffix)
)
.Select(e =>
{
string bookId;
if (settings.IsBookFileName(e.Name, out bookId))
return bookId;
else
return "";
})
.Where(id => id != "")
.ToList();
updater = new ZipParatextProjectTextUpdater(projectArchive);
}
foreach (string bookId in bookIds)
{
string newUsfm = updater.UpdateUsfm(
bookId,
pretranslations,
textBehavior: UpdateUsfmTextBehavior.StripExisting
);
Assert.That(newUsfm, Is.Not.Null);
}
}
if (!File.Exists(Path.Combine(ParatextProjectPath, "Settings.xml")))
{
Assert.Multiple(() =>
{
foreach (string subdir in Directory.EnumerateFiles(ParatextProjectPath))
Assert.DoesNotThrowAsync(async () => await GetUsfmAsync(subdir), $"Failed to parse {subdir}");
});
}
else
{
await GetUsfmAsync(ParatextProjectPath);
}
}

[Test]
[Ignore("This is for manual testing only. Remove this tag to run the test.")]
public void AnalyzeCorporaQuoteConventions()
Expand Down
Loading