Skip to content

Commit

Permalink
feat(algorithms): implement weighted ratio
Browse files Browse the repository at this point in the history
Signed-off-by: SphericalKat <[email protected]>
  • Loading branch information
SphericalKat committed Mar 27, 2021
1 parent d48ad62 commit 7aa2c0e
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 0 deletions.
29 changes: 29 additions & 0 deletions lib/algorithms/token_set.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import 'dart:collection';
import 'dart:math';

import '../applicable.dart';

class TokenSet {
int apply(String s1, String s2, Applicable ratio) {
Set<String> tokens1 = HashSet.from(s1.split("\\s+"));
Set<String> tokens2 = HashSet.from(s2.split("\\s+"));

Set<String> intersection = tokens1.intersection(tokens2);
Set<String> diff1to2 = tokens1.difference(tokens2);
Set<String> diff2to1 = tokens2.difference(tokens1);

String sortedInter = (intersection.toList()..sort()).join(" ").trim();
String sorted1to2 =
(sortedInter + " " + (diff1to2.toList()..sort()).join(" ")).trim();
String sorted2to1 =
(sortedInter + " " + (diff2to1.toList()..sort()).join(" ")).trim();

List<int> results = [];

results.add(ratio.apply(sortedInter, sorted1to2));
results.add(ratio.apply(sortedInter, sorted2to1));
results.add(ratio.apply(sorted1to2, sorted2to1));

return results.reduce(max);
}
}
16 changes: 16 additions & 0 deletions lib/algorithms/token_sort.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import '../applicable.dart';

class TokenSort {
int apply(String s1, String s2, Applicable ratio) {
String sorted1 = sort(s1);
String sorted2 = sort(s2);

return ratio.apply(sorted1, sorted2);
}

static String sort(String s) {
List<String> words = s.split("\\s+")..sort();
String joined = words.join(" ");
return joined.trim();
}
}
45 changes: 45 additions & 0 deletions lib/algorithms/weighted_ratio.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import 'dart:math';

import '../applicable.dart';
import '../fuzzy_search.dart';

class WeightedRatio implements Applicable {
static const UNBASE_SCALE = 0.95;
static const PARTIAL_SCALE = 0.90;
static const TRY_PARTIALS = true;

@override
int apply(String s1, String s2) {
int len1 = s1.length;
int len2 = s2.length;

if (len1 == 0 || len2 == 0) {
return 0;
}

bool tryPartials = TRY_PARTIALS;
double unbaseScale = UNBASE_SCALE;
double partialScale = PARTIAL_SCALE;

int base = ratio(s1, s2);
double lenRatio = max(len1, len2) / min(len1, len2);

tryPartials = lenRatio >= 1.5;
if (lenRatio > 8) partialScale = 0.6;

if (tryPartials) {
double partial = partialRatio(s1, s2) * partialScale;
double partialSor =
tokenSortPartialRatio(s1, s2) * unbaseScale * partialScale;
double partialSet =
tokenSetPartialRatio(s1, s2) * unbaseScale * partialScale;

return [base, partial, partialSor, partialSet].reduce(max).round();
} else {
double tokenSort = tokenSortRatio(s1, s2) * unbaseScale;
double tokenSet = tokenSetRatio(s1, s2) * unbaseScale;

return [base, tokenSort, tokenSet].reduce(max).round();
}
}
}
5 changes: 5 additions & 0 deletions lib/applicable.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/// A ratio/algorithm that can be applied
abstract class Applicable {
/// Returns the score of similarity computed from [s1] and [s2]
int apply(String s1, String s2);
}
28 changes: 28 additions & 0 deletions lib/fuzzy_search.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import 'algorithms/token_set.dart';
import 'algorithms/token_sort.dart';
import 'ratios/partial_ratio.dart';
import 'ratios/simple_ratio.dart';

int ratio(String s1, String s2) {
return SimpleRatio().apply(s1, s2);
}

int partialRatio(String s1, String s2) {
return PartialRatio().apply(s1, s2);
}

int tokenSortRatio(String s1, String s2) {
return TokenSort().apply(s1, s2, SimpleRatio());
}

int tokenSortPartialRatio(String s1, String s2) {
return TokenSort().apply(s1, s2, PartialRatio());
}

int tokenSetRatio(String s1, String s2) {
return TokenSet().apply(s1, s2, SimpleRatio());
}

int tokenSetPartialRatio(String s1, String s2) {
return TokenSet().apply(s1, s2, PartialRatio());
}
47 changes: 47 additions & 0 deletions lib/ratios/partial_ratio.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import '../applicable.dart';
import '../diffutils/diff_utils.dart';
import '../diffutils/structs/matching_block.dart';

import 'dart:math';

class PartialRatio implements Applicable {
@override
int apply(String s1, String s2) {
String shorter;
String longer;

if (s1.length < s2.length) {
shorter = s1;
longer = s2;
} else {
shorter = s2;
longer = s1;
}

List<MatchingBlock> matchingBlocks =
DiffUtils.getMatchingBlocks(shorter, longer);

List<double> scores = [];

for (MatchingBlock mb in matchingBlocks) {
int dist = mb.dpos! - mb.spos!;

int longStart = dist > 0 ? dist : 0;
int longEnd = longStart + shorter.length;

if (longEnd > longer.length) longEnd = longer.length;

String longSubstr = longer.substring(longStart, longEnd);

double ratio = DiffUtils.getRatio(shorter, longSubstr);

if (ratio > 0.995) {
return 100;
} else {
scores.add(ratio);
}
}

return scores.isEmpty ? 0 : (100 * scores.reduce(max)).round();
}
}
9 changes: 9 additions & 0 deletions lib/ratios/simple_ratio.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import '../applicable.dart';
import '../diffutils/diff_utils.dart';

class SimpleRatio implements Applicable {
@override
int apply(String s1, String s2) {
return (100 * DiffUtils.getRatio(s1, s2)).round();
}
}
3 changes: 3 additions & 0 deletions lib/to_string_function.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
abstract class ToStringFunction<T> {
String apply(T item);
}

0 comments on commit 7aa2c0e

Please sign in to comment.