-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathuniq_order_preserved.pl
executable file
·99 lines (83 loc) · 2.28 KB
/
uniq_order_preserved.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/perl -T
#
# Author: Hari Sekhon
# Date: 2015-02-07 16:06:33 +0000 (Sat, 07 Feb 2015)
#
# https://github.com/HariSekhon/DevOps-Perl-tools
#
# License: see accompanying Hari Sekhon LICENSE file
#
# If you're using my code you're welcome to connect with me on LinkedIn
# and optionally send me feedback to help improve or steer this or other code I publish
#
# https://www.linkedin.com/in/HariSekhon
#
$DESCRIPTION = "Filter program to print only non-repeated lines in input - unlike the unix command 'uniq' lines do not have to be adjacent, this is order preserving compared to 'sort | uniq'. I rustled this up quickly after needing to parse unique missing modules for building but maintaining order as some modules depend on others being built first
Works as a standard unix filter program taking either standard input or files supplied as arguments
Since this must maintain unique lines in memory for comparison, do not use this on very large files/inputs
Tested on Mac and Linux
";
$VERSION = "0.1";
use strict;
use warnings;
BEGIN {
use File::Basename;
use lib dirname(__FILE__) . "/lib";
}
use HariSekhonUtils;
$usage_line = "usage: $progname [file1] [file2] ...";
my $fh;
my $ignore_case;
my $ignore_whitespace;
my %uniq;
%options = (
"i|ignore-case" => [ \$ignore_case, "Ignore case in comparisons" ],
"w|ignore-whitespace" => [ \$ignore_whitespace, "Ignore whitespace in comparisons" ],
);
splice @usage_order, 6, 0, qw/ignore-case ignore-whitespace/;
get_options();
if(@ARGV){
foreach(@ARGV){
$_ eq "-" and next;
validate_file($_);
}
};
sub transformations ($) {
my $string = shift;
if($ignore_case){
$string = lc $string;
}
if($ignore_whitespace){
$string =~ s/\s+//g;
}
return $string;
}
sub uniq($){
my $line = $_[0];
if(defined($uniq{$line})){
return 0;
} else {
$uniq{$line} = 1;
}
return 1;
}
sub print_uniq ($) {
my $fh = shift;
my $string2;
while(<$fh>){
$string2 = transformations($_);
print $_ if uniq ($string2);
}
}
if(@ARGV){
foreach(@ARGV){
if($_ eq "-"){
$fh = *STDIN;
} else {
$fh = open_file $_;
}
print_uniq($fh);
}
} else {
print_uniq(*STDIN);
}