-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathredir.pl
52 lines (45 loc) · 939 Bytes
/
redir.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/perl
## READS DUPELOG looking for candidate URLs for redirection
# or scurf-cleanup
use strict;
use warnings;
sub longest_common_prefix {
my $prefix = shift;
for (@_) {
chop $prefix while (! /^$prefix/);
}
return $prefix;
}
sub longest_common_suffix {
(my $s1, my $s2) = @_;
my $r1 = reverse $s1;
my $r2 = reverse $s2;
return reverse( longest_common_prefix($r1,$r2));
}
open (LOG, "<", "DUPELOG") or die "Could not open duplicate log: $!\n";
my $curr='';
my %hash;
while (<LOG>) {
chomp;
if (/^http/) {
s/[?*+()]/./g;
if ($curr) {
my $lcs = longest_common_suffix($curr, $_);
$lcs =~ s/^[^\/]*\///;
if ($lcs) {
$curr =~ s/$lcs$//;
s/$lcs$//;
my $sampla = join(' <-> ',sort($curr, $_));
# print "$sampla\n";
$hash{$sampla}++;
}
$curr = '';
}
else {
$curr = $_;
}
}
}
close LOG;
print "$hash{$_} $_\n" foreach (sort {$hash{$b} <=> $hash{$a}} keys %hash);
exit 0;