From dbf098baded732e20a70cd3460043d556505a2d3 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 31 Jan 2018 15:45:40 +0100 Subject: [PATCH 1/2] Fix DiffLevenshtein counting single runes as multiple edits --- diffmatchpatch/diff.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index 0d1c2d4..cb25b43 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -1236,9 +1236,9 @@ func (dmp *DiffMatchPatch) DiffLevenshtein(diffs []Diff) int { for _, aDiff := range diffs { switch aDiff.Type { case DiffInsert: - insertions += len(aDiff.Text) + insertions += utf8.RuneCountInString(aDiff.Text) case DiffDelete: - deletions += len(aDiff.Text) + deletions += utf8.RuneCountInString(aDiff.Text) case DiffEqual: // A deletion and an insertion is one substitution. levenshtein += max(insertions, deletions) From f7f9a5cc31d1a867e139c1d0dded822cb29d0fb6 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 31 Jan 2018 16:34:27 +0100 Subject: [PATCH 2/2] Add the utf-8 test for DiffLevenshtein --- diffmatchpatch/diff_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go index 8596999..5c165b1 100644 --- a/diffmatchpatch/diff_test.go +++ b/diffmatchpatch/diff_test.go @@ -1153,9 +1153,9 @@ func TestDiffLevenshtein(t *testing.T) { dmp := New() for i, tc := range []TestCase{ - {"Levenshtein with trailing equality", []Diff{{DiffDelete, "abc"}, {DiffInsert, "1234"}, {DiffEqual, "xyz"}}, 4}, - {"Levenshtein with leading equality", []Diff{{DiffEqual, "xyz"}, {DiffDelete, "abc"}, {DiffInsert, "1234"}}, 4}, - {"Levenshtein with middle equality", []Diff{{DiffDelete, "abc"}, {DiffEqual, "xyz"}, {DiffInsert, "1234"}}, 7}, + {"Levenshtein with trailing equality", []Diff{{DiffDelete, "абв"}, {DiffInsert, "1234"}, {DiffEqual, "эюя"}}, 4}, + {"Levenshtein with leading equality", []Diff{{DiffEqual, "эюя"}, {DiffDelete, "абв"}, {DiffInsert, "1234"}}, 4}, + {"Levenshtein with middle equality", []Diff{{DiffDelete, "абв"}, {DiffEqual, "эюя"}, {DiffInsert, "1234"}}, 7}, } { actual := dmp.DiffLevenshtein(tc.Diffs) assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name))