File tree 2 files changed +42966
-28214
lines changed
2 files changed +42966
-28214
lines changed Original file line number Diff line number Diff line change @@ -38,7 +38,7 @@ check_usage = () ->
38
38
process .exit (0 )
39
39
40
40
# after all passwords are counted, discard pws with counts <= COUNTS
41
- CUTOFF = 15
41
+ CUTOFF = 10
42
42
43
43
# to save memory, after every batch of size BATCH_SIZE, go through counts and delete
44
44
# long tail of entries with only one count.
@@ -52,6 +52,14 @@ normalize = (token) ->
52
52
token .toLowerCase ()
53
53
54
54
should_include = (password , xato_rank ) ->
55
+ for i in [0 ... password .length ]
56
+ if password .charCodeAt (i) > 127
57
+ # xato mostly contains ascii-only passwords, so in practice
58
+ # this will only skip one or two things. were that not the case /
59
+ # were this used on a different data source, consider using
60
+ # a unidecode-like library instead, similar to count_wikipedia / count_wiktionary
61
+ console .log ' SKIPPING non-ascii password=#{password}, rank=#{xato_rank}'
62
+ return false
55
63
matches = []
56
64
for matcher in [
57
65
matching .spatial_match
You can’t perform that action at this time.
0 commit comments