-
Notifications
You must be signed in to change notification settings - Fork 20
/
w
executable file
·112 lines (107 loc) · 2.95 KB
/
w
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/bin/sh
#! -*-perl-*-
eval 'exec perl -x $0 ${1+"$@"}'
if 0;
# ~/bin/w
# Search for potentially misspelled words
# Output is:
# misspellled
# woord (WOORD, Woord, woord, woord's)
use File::Basename;
use Cwd 'abs_path';
my $dirname = dirname(abs_path(__FILE__));
# originally this was a dict of all words
# but sorting it overflowed
%letter_map = ();
# now we have a dict per letter
# skip files that don't exist (including dangling symlinks)
if (scalar @ARGV) {
@ARGV = grep {-r || $_ eq '-'} @ARGV;
unless (scalar @ARGV) {
print STDERR "None of the provided files are readable\n";
exit 0;
}
}
# read all input
while (<<>>) {
next unless /./;
s/^/ /;
while (s/([^\\])\\[rtn]/$1 /g) {}
# https://www.fileformat.info/info/unicode/char/2019/
my $rsqm = "\xE2\x80\x99";
s/$rsqm/'/g;
s/[^a-zA-Z']+/ /g;
while (s/([A-Z]{2,})([A-Z][a-z]{2,})/ $1 $2 /g) {}
while (s/([a-z']+)([A-Z])/$1 $2/g) {}
for my $token (split /\s+/, $_) {
$token =~ s/^[^Ii]?'+(.*)/$1/;
$token =~ s/(.*?)'+$/$1/;
next unless $token =~ /./;
my $key = lc $token;
$key =~ s/''+/'/g;
$key =~ s/'[sd]$//;
my $char = substr $key, 0, 1;
$letter_map{$char} = () unless defined $letter_map{$char};
my %word_map = ();
%word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
$word_map{$token} = 1;
$letter_map{$char}{$key} = \%word_map;
}
}
# group related words
for my $char (sort keys %letter_map) {
for my $plural_key (sort keys(%{$letter_map{$char}})) {
my $key = $plural_key;
if ($key =~ /.s$/) {
if ($key =~ /ies$/) {
$key =~ s/ies$/y/;
} else {
$key =~ s/s$//;
}
} elsif ($key =~ /.[^aeiou]ed$/) {
$key =~ s/ed$//;
} else {
next;
}
next unless defined $letter_map{$char}{$key};
my %word_map = %{$letter_map{$char}{$key}};
for $word (keys(%{$letter_map{$char}{$plural_key}})) {
$word_map{$word} = 1;
}
$letter_map{$char}{$key} = \%word_map;
delete $letter_map{$char}{$plural_key};
}
}
# exclude dictionary words
my $dict = "$dirname/words";
$dict = '/usr/share/dict/words' unless -e $dict;
open DICT, '<', $dict;
while ($word = <DICT>) {
chomp $word;
my $lower_word = lc $word;
my $char = substr $lower_word, 0, 1;
next unless defined $letter_map{$char}{$lower_word};
delete $letter_map{$char}{$word};
next if $lower_word eq $word;
my %word_map = %{$letter_map{$char}{$lower_word}};
delete $word_map{$word};
if (%word_map) {
$letter_map{$char}{$lower_word} = \%word_map;
} else {
delete $letter_map{$char}{$lower_word};
}
}
close DICT;
# display the remainder
for my $char (sort keys %letter_map) {
for $key (sort keys(%{$letter_map{$char}})) {
my %word_map = %{$letter_map{$char}{$key}};
my @words = keys(%word_map);
if (scalar(@words) > 1) {
print $key." (".(join ", ", sort { length($a) <=> length($b) || $a cmp $b } @words).")";
} else {
print $words[0];
}
print "\n";
}
}