Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly implement relaxed comparison for UsfmTextUpdater #204

Merged
merged 1 commit into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions src/SIL.Machine/Corpora/ScriptureElement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,28 @@ public ScriptureElement(int position, string name)
public int Position { get; }
public string Name { get; }

int IComparable<ScriptureElement>.CompareTo(ScriptureElement other)
public ScriptureElement ToRelaxed()
{
return CompareTo(other, strict: true);
return new ScriptureElement(0, Name);
}

public int CompareTo(ScriptureElement other, bool strict = true)
public int CompareTo(ScriptureElement other)
{
if (strict)
if (Position == 0 || other.Position == 0)
{
int res = Position.CompareTo(other.Position);
if (res != 0)
return res;
if (Name == other.Name)
return 0;
// position 0 is always greater than any other position
if (Position == 0 && other.Position != 0)
return 1;
if (other.Position == 0 && Position != 0)
return -1;
return Name.CompareTo(other.Name);
}

if (Name == other.Name)
return 0;
// position 0 is always greater than any other position
if (Position == 0 && other.Position != 0)
return 1;
if (other.Position == 0 && Position != 0)
return -1;
int res = Position.CompareTo(other.Position);
if (res != 0)
return res;
return Name.CompareTo(other.Name);
}

Expand Down
11 changes: 8 additions & 3 deletions src/SIL.Machine/Corpora/ScriptureRef.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ public ScriptureRef(VerseRef verseRef = default, IEnumerable<ScriptureElement> p
public bool IsEmpty => VerseRef.IsDefault;
public bool IsVerse => VerseRef.VerseNum != 0 && Path.Count == 0;

public ScriptureRef ToRelaxed()
{
return new ScriptureRef(VerseRef, Path.Select(pe => pe.ToRelaxed()));
}

public ScriptureRef ChangeVersification(ScrVers versification)
{
VerseRef vr = VerseRef.Clone();
Expand All @@ -73,7 +78,7 @@ int IComparable<ScriptureRef>.CompareTo(ScriptureRef other)
return CompareTo(other, compareSegments: true);
}

public int CompareTo(ScriptureRef other, bool compareSegments = true, bool strict = true)
public int CompareTo(ScriptureRef other, bool compareSegments = true)
{
IComparer<VerseRef> comparer = compareSegments ? VerseRefComparer.Default : VerseRefComparer.IgnoreSegments;
int res = comparer.Compare(VerseRef, other.VerseRef);
Expand All @@ -82,12 +87,12 @@ public int CompareTo(ScriptureRef other, bool compareSegments = true, bool stric

foreach ((ScriptureElement se1, ScriptureElement se2) in Path.Zip(other.Path))
{
res = se1.CompareTo(se2, strict);
res = se1.CompareTo(se2);
if (res != 0)
return res;
}

return Path.Count - other.Path.Count;
return Path.Count.CompareTo(other.Path.Count);
}

public int CompareTo(object obj)
Expand Down
9 changes: 1 addition & 8 deletions src/SIL.Machine/Corpora/UsfmTextUpdater.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ public class UsfmTextUpdater : ScriptureRefUsfmParserHandlerBase
private readonly List<UsfmToken> _newTokens;
private readonly string _idText;
private readonly bool _stripAllText;
private readonly bool _strictComparison;
private readonly bool _preferExistingText;
private readonly Stack<bool> _replace;
private int _rowIndex;
Expand All @@ -25,7 +24,6 @@ public UsfmTextUpdater(
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> rows = null,
string idText = null,
bool stripAllText = false,
bool strictComparison = true,
bool preferExistingText = false
)
{
Expand All @@ -34,7 +32,6 @@ public UsfmTextUpdater(
_newTokens = new List<UsfmToken>();
_idText = idText;
_stripAllText = stripAllText;
_strictComparison = strictComparison;
_replace = new Stack<bool>();
_preferExistingText = preferExistingText;
}
Expand Down Expand Up @@ -315,11 +312,7 @@ private IReadOnlyList<string> AdvanceRows(IReadOnlyList<ScriptureRef> segScrRefs
{
while (sourceIndex < segScrRefs.Count)
{
compare = rowScrRef.CompareTo(
segScrRefs[sourceIndex],
compareSegments: false,
_strictComparison
);
compare = rowScrRef.CompareTo(segScrRefs[sourceIndex], compareSegments: false);
if (compare > 0)
// source is ahead of row, increment source
sourceIndex++;
Expand Down
44 changes: 9 additions & 35 deletions tests/SIL.Machine.Tests/Corpora/ScriptureRefTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,42 +17,16 @@ public class ScriptureRefTests
[TestCase("MAT 1:0/2:p", "MAT 1:0/1:p", ExpectedResult = 1, Description = "NonVerseGreaterThan")]
[TestCase("MAT 1:0/1:esb", "MAT 1:0/1:esb/1:p", ExpectedResult = -1, Description = "NonVerseParentChild")]
[TestCase("MAT 1:0/2:esb", "MAT 1:0/1:esb/1:p", ExpectedResult = 1, Description = "NonVerseParentOtherChild")]
public int CompareTo_Strict(string ref1Str, string ref2Str)
[TestCase("MAT 1:0/p", "MAT 1:0/2:p", ExpectedResult = 0, Description = "RelaxedSameMarker")]
[TestCase("MAT 1:0/p", "MAT 1:0/2:esb", ExpectedResult = 1, Description = "RelaxedSameLevel")]
[TestCase("MAT 1:0/esb", "MAT 1:0/1:esb/1:p", ExpectedResult = -1, Description = "RelaxedParentChild")]
[TestCase("MAT 1:0/2:esb", "MAT 1:0/esb/p", ExpectedResult = -1, Description = "ParentRelaxedChild")]
public int CompareTo(string ref1Str, string ref2Str)
{
var ref1 = ScriptureRef.Parse(ref1Str);
var ref2 = ScriptureRef.Parse(ref2Str);

int result = ref1.CompareTo(ref2);

// this tests the IComparable<ScriptureRef>.CompareTo method responds the same as the ScriptureRef.CompareTo method.
int result2 = ((IComparable<ScriptureRef>)ref1).CompareTo(ref2);
Assert.That(result, Is.EqualTo(result2));

if (result < 0)
result = -1;
else if (result > 0)
result = 1;
return result;
}

[TestCase("MAT 1:1", "MAT 1:2", ExpectedResult = -1, Description = "VerseLessThan")]
[TestCase("MAT 1:1", "MAT 1:1", ExpectedResult = 0, Description = "VerseEqualTo")]
[TestCase("MAT 1:2", "MAT 1:1", ExpectedResult = 1, Description = "VerseGreaterThan")]
[TestCase("MAT 1:0/1:p", "MAT 1:0/2:p", ExpectedResult = 0, Description = "NonVerseSameMarkerDifferentPosition")]
[TestCase("MAT 1:0/1:esb", "MAT 1:0/1:esb/1:p", ExpectedResult = -1, Description = "NonVerseParentChild")]
[TestCase("MAT 1:0/2:esb", "MAT 1:0/1:esb/1:p", ExpectedResult = -1, Description = "NonVerseParentOtherChild")]
public int CompareTo_Relaxed(string ref1Str, string ref2Str)
{
var ref1 = ScriptureRef.Parse(ref1Str);
var ref2 = ScriptureRef.Parse(ref2Str);

int result = ref1.CompareTo(ref2, strict: false);

if (result < 0)
result = -1;
else if (result > 0)
result = 1;
return result;
return ref1.CompareTo(ref2);
}

[TestCase]
Expand All @@ -64,9 +38,9 @@ public void IsEqualTo()
var obj1 = "A different type";
Assert.Multiple(() =>
{
Assert.That(ref1.Equals(ref1dup));
Assert.That(!ref1.Equals(ref2));
Assert.That(!ref1.Equals(obj1));
Assert.That(ref1.Equals(ref1dup), Is.True);
Assert.That(ref1.Equals(ref2), Is.False);
Assert.That(ref1.Equals(obj1), Is.False);
});
}

Expand Down
68 changes: 25 additions & 43 deletions tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,21 @@ public class UsfmManualTests
[Ignore("This is for manual testing only. Remove this tag to run the test.")]
public void ParseParallelCorpus()
{
var tCorpus = new ParatextTextCorpus(
projectDir: CorporaTestHelpers.UsfmTargetProjectPath,
includeAllText: true,
includeMarkers: true
);
ParatextTextCorpus tCorpus =
new(projectDir: CorporaTestHelpers.UsfmTargetProjectPath, includeAllText: true, includeMarkers: true);

var sCorpus = new ParatextTextCorpus(
projectDir: CorporaTestHelpers.UsfmSourceProjectPath,
includeAllText: true,
includeMarkers: true
);
ParatextTextCorpus sCorpus =
new(projectDir: CorporaTestHelpers.UsfmSourceProjectPath, includeAllText: true, includeMarkers: true);

ParallelTextCorpus pCorpus = new ParallelTextCorpus(
sCorpus,
tCorpus,
alignmentCorpus: null,
rowRefComparer: null
)
{
AllSourceRows = true,
AllTargetRows = false
};
ParallelTextCorpus pCorpus =
new(sCorpus, tCorpus, alignmentCorpus: null, rowRefComparer: null)
{
AllSourceRows = true,
AllTargetRows = false
};

var rows = pCorpus.GetRows().ToList();
Assert.That(rows.Count, Is.Not.Zero);
List<ParallelTextRow> rows = pCorpus.GetRows().ToList();
Assert.That(rows, Has.Count.GreaterThan(0));
}

public record PretranslationDto
Expand All @@ -54,28 +44,25 @@ public record PretranslationDto
[Ignore("This is for manual testing only. Remove this tag to run the test.")]
public async Task CreateUsfmFile()
{
var parser = new FileParatextProjectSettingsParser(ParatextProjectPath);
FileParatextProjectSettingsParser parser = new(ParatextProjectPath);
ParatextProjectSettings settings = parser.Parse();

// Read text from pretranslations file
Stream pretranslationStream = File.OpenRead(PretranslationPath);
IAsyncEnumerable<PretranslationDto?>? pretranslations =
JsonSerializer.DeserializeAsyncEnumerable<PretranslationDto>(
using Stream pretranslationStream = File.OpenRead(PretranslationPath);
(IReadOnlyList<ScriptureRef>, string)[] pretranslations = await JsonSerializer
.DeserializeAsyncEnumerable<PretranslationDto>(
pretranslationStream,
new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }
);

var pretranslationsList = (await pretranslations.ToListAsync())
.Where(p => p is not null)
)
.Select(p =>
(
(IReadOnlyList<ScriptureRef>)
p!.Refs.Select(r => ScriptureRef.Parse(r, settings.Versification)).ToList(),
p.Translation
(IReadOnlyList<ScriptureRef>)(
p?.Refs.Select(r => ScriptureRef.Parse(r, settings.Versification).ToRelaxed()).ToArray() ?? []
),
p?.Translation ?? ""
)
)
.OrderBy(p => p.Item1[0])
.ToList();
.ToArrayAsync();

foreach (
string sfmFileName in Directory.EnumerateFiles(
Expand All @@ -84,15 +71,10 @@ string sfmFileName in Directory.EnumerateFiles(
)
)
{
var updater = new UsfmTextUpdater(
pretranslationsList,
stripAllText: true,
strictComparison: false,
preferExistingText: true
);
var usfm = await File.ReadAllTextAsync(sfmFileName);
var updater = new UsfmTextUpdater(pretranslations, stripAllText: true, preferExistingText: true);
string usfm = await File.ReadAllTextAsync(sfmFileName);
UsfmParser.Parse(usfm, updater, settings.Stylesheet, settings.Versification);
var newUsfm = updater.GetUsfm(settings.Stylesheet);
string newUsfm = updater.GetUsfm(settings.Stylesheet);
Assert.That(newUsfm, Is.Not.Null);
}
}
Expand Down
4 changes: 1 addition & 3 deletions tests/SIL.Machine.Tests/Corpora/UsfmTextUpdaterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ public void GetUsfm_NonVerse_Relaxed()
(ScrRef("MAT 2:0/tr/tc1"), "The third cell of the table.")
};

string target = UpdateUsfm(rows, strictComparison: false);
string target = UpdateUsfm(rows);
Assert.That(target, Contains.Substring("\\s The first chapter.\r\n"));
Assert.That(target, Contains.Substring("\\v 1 First verse of the first chapter.\r\n"));
Assert.That(
Expand Down Expand Up @@ -401,7 +401,6 @@ private static string UpdateUsfm(
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)>? rows = null,
string? idText = null,
bool stripAllText = false,
bool strictComparison = true,
bool preferExistingText = false
)
{
Expand All @@ -410,7 +409,6 @@ private static string UpdateUsfm(
rows,
idText,
stripAllText: stripAllText,
strictComparison: strictComparison,
preferExistingText: preferExistingText
);
UsfmParser.Parse(source, updater);
Expand Down
Loading