-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(algorithms): implement weighted ratio
Signed-off-by: SphericalKat <[email protected]>
- Loading branch information
1 parent
d48ad62
commit 7aa2c0e
Showing
8 changed files
with
182 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import 'dart:collection'; | ||
import 'dart:math'; | ||
|
||
import '../applicable.dart'; | ||
|
||
class TokenSet { | ||
int apply(String s1, String s2, Applicable ratio) { | ||
Set<String> tokens1 = HashSet.from(s1.split("\\s+")); | ||
Set<String> tokens2 = HashSet.from(s2.split("\\s+")); | ||
|
||
Set<String> intersection = tokens1.intersection(tokens2); | ||
Set<String> diff1to2 = tokens1.difference(tokens2); | ||
Set<String> diff2to1 = tokens2.difference(tokens1); | ||
|
||
String sortedInter = (intersection.toList()..sort()).join(" ").trim(); | ||
String sorted1to2 = | ||
(sortedInter + " " + (diff1to2.toList()..sort()).join(" ")).trim(); | ||
String sorted2to1 = | ||
(sortedInter + " " + (diff2to1.toList()..sort()).join(" ")).trim(); | ||
|
||
List<int> results = []; | ||
|
||
results.add(ratio.apply(sortedInter, sorted1to2)); | ||
results.add(ratio.apply(sortedInter, sorted2to1)); | ||
results.add(ratio.apply(sorted1to2, sorted2to1)); | ||
|
||
return results.reduce(max); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import '../applicable.dart'; | ||
|
||
class TokenSort { | ||
int apply(String s1, String s2, Applicable ratio) { | ||
String sorted1 = sort(s1); | ||
String sorted2 = sort(s2); | ||
|
||
return ratio.apply(sorted1, sorted2); | ||
} | ||
|
||
static String sort(String s) { | ||
List<String> words = s.split("\\s+")..sort(); | ||
String joined = words.join(" "); | ||
return joined.trim(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import 'dart:math'; | ||
|
||
import '../applicable.dart'; | ||
import '../fuzzy_search.dart'; | ||
|
||
class WeightedRatio implements Applicable { | ||
static const UNBASE_SCALE = 0.95; | ||
static const PARTIAL_SCALE = 0.90; | ||
static const TRY_PARTIALS = true; | ||
|
||
@override | ||
int apply(String s1, String s2) { | ||
int len1 = s1.length; | ||
int len2 = s2.length; | ||
|
||
if (len1 == 0 || len2 == 0) { | ||
return 0; | ||
} | ||
|
||
bool tryPartials = TRY_PARTIALS; | ||
double unbaseScale = UNBASE_SCALE; | ||
double partialScale = PARTIAL_SCALE; | ||
|
||
int base = ratio(s1, s2); | ||
double lenRatio = max(len1, len2) / min(len1, len2); | ||
|
||
tryPartials = lenRatio >= 1.5; | ||
if (lenRatio > 8) partialScale = 0.6; | ||
|
||
if (tryPartials) { | ||
double partial = partialRatio(s1, s2) * partialScale; | ||
double partialSor = | ||
tokenSortPartialRatio(s1, s2) * unbaseScale * partialScale; | ||
double partialSet = | ||
tokenSetPartialRatio(s1, s2) * unbaseScale * partialScale; | ||
|
||
return [base, partial, partialSor, partialSet].reduce(max).round(); | ||
} else { | ||
double tokenSort = tokenSortRatio(s1, s2) * unbaseScale; | ||
double tokenSet = tokenSetRatio(s1, s2) * unbaseScale; | ||
|
||
return [base, tokenSort, tokenSet].reduce(max).round(); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
/// A ratio/algorithm that can be applied | ||
abstract class Applicable { | ||
/// Returns the score of similarity computed from [s1] and [s2] | ||
int apply(String s1, String s2); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import 'algorithms/token_set.dart'; | ||
import 'algorithms/token_sort.dart'; | ||
import 'ratios/partial_ratio.dart'; | ||
import 'ratios/simple_ratio.dart'; | ||
|
||
int ratio(String s1, String s2) { | ||
return SimpleRatio().apply(s1, s2); | ||
} | ||
|
||
int partialRatio(String s1, String s2) { | ||
return PartialRatio().apply(s1, s2); | ||
} | ||
|
||
int tokenSortRatio(String s1, String s2) { | ||
return TokenSort().apply(s1, s2, SimpleRatio()); | ||
} | ||
|
||
int tokenSortPartialRatio(String s1, String s2) { | ||
return TokenSort().apply(s1, s2, PartialRatio()); | ||
} | ||
|
||
int tokenSetRatio(String s1, String s2) { | ||
return TokenSet().apply(s1, s2, SimpleRatio()); | ||
} | ||
|
||
int tokenSetPartialRatio(String s1, String s2) { | ||
return TokenSet().apply(s1, s2, PartialRatio()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import '../applicable.dart'; | ||
import '../diffutils/diff_utils.dart'; | ||
import '../diffutils/structs/matching_block.dart'; | ||
|
||
import 'dart:math'; | ||
|
||
class PartialRatio implements Applicable { | ||
@override | ||
int apply(String s1, String s2) { | ||
String shorter; | ||
String longer; | ||
|
||
if (s1.length < s2.length) { | ||
shorter = s1; | ||
longer = s2; | ||
} else { | ||
shorter = s2; | ||
longer = s1; | ||
} | ||
|
||
List<MatchingBlock> matchingBlocks = | ||
DiffUtils.getMatchingBlocks(shorter, longer); | ||
|
||
List<double> scores = []; | ||
|
||
for (MatchingBlock mb in matchingBlocks) { | ||
int dist = mb.dpos! - mb.spos!; | ||
|
||
int longStart = dist > 0 ? dist : 0; | ||
int longEnd = longStart + shorter.length; | ||
|
||
if (longEnd > longer.length) longEnd = longer.length; | ||
|
||
String longSubstr = longer.substring(longStart, longEnd); | ||
|
||
double ratio = DiffUtils.getRatio(shorter, longSubstr); | ||
|
||
if (ratio > 0.995) { | ||
return 100; | ||
} else { | ||
scores.add(ratio); | ||
} | ||
} | ||
|
||
return scores.isEmpty ? 0 : (100 * scores.reduce(max)).round(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import '../applicable.dart'; | ||
import '../diffutils/diff_utils.dart'; | ||
|
||
class SimpleRatio implements Applicable { | ||
@override | ||
int apply(String s1, String s2) { | ||
return (100 * DiffUtils.getRatio(s1, s2)).round(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
abstract class ToStringFunction<T> { | ||
String apply(T item); | ||
} |