From 452bdda9c0b2aaf808f2a728b6893fc4e432f2bb Mon Sep 17 00:00:00 2001 From: Tim De Pauw Date: Fri, 27 Apr 2018 12:37:46 +0200 Subject: [PATCH] Support --files0-from --- README.md | 16 ++++--- internal/app/lwc/config.go | 77 +++++++++++++++++++++++--------- internal/app/lwc/config_test.go | 40 ++++++++++++++--- internal/app/lwc/output.go | 10 ++--- internal/app/lwc/output_test.go | 11 ++++- internal/app/lwc/process.go | 72 ++++++++++++++--------------- internal/app/lwc/process_test.go | 22 --------- internal/app/lwc/root.go | 5 +-- internal/pkg/lwcutil/chan.go | 41 +++++++++++++++++ internal/pkg/lwcutil/scan.go | 21 +++++++++ 10 files changed, 216 insertions(+), 99 deletions(-) delete mode 100644 internal/app/lwc/process_test.go create mode 100644 internal/pkg/lwcutil/chan.go create mode 100644 internal/pkg/lwcutil/scan.go diff --git a/README.md b/README.md index 7d3595b..2edfcb8 100644 --- a/README.md +++ b/README.md @@ -31,14 +31,15 @@ Without any options, `lwc` will count the number of lines, words, and bytes in standard input, and write them to standard output. Contrary to `wc`, it will also update standard output while it is still counting. -The following [`wc` options](https://en.wikipedia.org/wiki/Wc_(Unix)) are -currently supported: +All the standard [`wc` options](https://en.wikipedia.org/wiki/Wc_(Unix)) are +supported: - `--lines` or `-l` - `--words` or `-w` - `--chars` or `-m` - `--bytes` or `-c` - `--max-line-length` or `-L` +- `--files0-from=F` - `--help` - `--version` @@ -60,10 +61,15 @@ Run a slow command and count the number of bytes logged: slow-command | lwc --bytes ``` -## TODO +## Caveats -- Support `--files0-from` -- Add tests +- The `--lines` option is currently implemented differently from `wc`'s. Where + `wc` will count the number of newline characters, `lwc` will count the actual + number of lines. Hence, if there is no newline at the end of its input, `lwc` + will still count the line, while `wc` won't. + +- While `lwc` is pretty fast, you'll still get better performance out of `wc`. + Benchmarks will be added at some point, but it's currently not a priority. ## JavaScript Version diff --git a/internal/app/lwc/config.go b/internal/app/lwc/config.go index 85c0766..b2f882f 100644 --- a/internal/app/lwc/config.go +++ b/internal/app/lwc/config.go @@ -1,6 +1,7 @@ package lwc import ( + "bufio" "fmt" "time" @@ -16,6 +17,7 @@ type Config struct { Chars bool Bytes bool MaxLineLength bool + Files0From string Interval time.Duration Help bool Version bool @@ -27,30 +29,65 @@ func (c *Config) PrintUsage() { c.g.PrintUsage(lwcutil.GetStdout()) } -func BuildConfig(args []string) Config { +func NewConfig(args []string) *Config { intervalMs := DEFAULT_INTERVAL - g := getopt.New() - var config Config - config.g = g - g.FlagLong(&config.Lines, "lines", 'l', "print the newline counts") - g.FlagLong(&config.Words, "words", 'w', "print the word counts") - g.FlagLong(&config.Chars, "chars", 'm', "print the character counts") - g.FlagLong(&config.Bytes, "bytes", 'c', "print the byte counts") - g.FlagLong(&config.MaxLineLength, "max-line-length", 'L', "print the maximum display width") - g.FlagLong(&intervalMs, "interval", 'i', + var c Config + c.g = getopt.New() + c.g.FlagLong(&c.Lines, "lines", 'l', "print the newline counts") + c.g.FlagLong(&c.Words, "words", 'w', "print the word counts") + c.g.FlagLong(&c.Chars, "chars", 'm', "print the character counts") + c.g.FlagLong(&c.Bytes, "bytes", 'c', "print the byte counts") + c.g.FlagLong(&c.MaxLineLength, "max-line-length", 'L', "print the maximum display width") + c.g.FlagLong(&c.Files0From, "files0-from", 0, "read input from the files specified by NUL-terminated names in file F") + c.g.FlagLong(&intervalMs, "interval", 'i', fmt.Sprintf("set update interval in ms (default %d ms)", DEFAULT_INTERVAL)) - g.FlagLong(&config.Help, "help", 'h', "display this help and exit") - g.FlagLong(&config.Version, "version", 'V', "output version information and exit") - g.Parse(args) + c.g.FlagLong(&c.Help, "help", 'h', "display this help and exit") + c.g.FlagLong(&c.Version, "version", 'V', "output version information and exit") + c.g.Parse(args) if intervalMs < 0 { lwcutil.Fatal("Update interval cannot be negative") } - config.Interval = time.Duration(intervalMs) * time.Millisecond - config.Files = g.Args() - if !(config.Lines || config.Words || config.Chars || config.Bytes) { - config.Lines = true - config.Words = true - config.Bytes = true + c.Interval = time.Duration(intervalMs) * time.Millisecond + c.Files = c.g.Args() + if !(c.Lines || c.Words || c.Chars || c.Bytes || c.MaxLineLength) { + c.Lines = true + c.Words = true + c.Bytes = true + } + return &c +} + +func (config *Config) Processors() []Processor { + var temp [5]Processor + i := 0 + if config.Lines { + temp[i] = Processor{bufio.ScanLines, ScanCount} + i++ + } + if config.Words { + temp[i] = Processor{bufio.ScanWords, ScanCount} + i++ + } + if config.Chars { + temp[i] = Processor{bufio.ScanRunes, ScanCount} + i++ + } + if config.Bytes { + temp[i] = Processor{bufio.ScanBytes, ScanCount} + i++ + } + if config.MaxLineLength { + temp[i] = Processor{bufio.ScanLines, ScanMaxLength} + i++ + } + return temp[0:i] +} + +func (config *Config) FilesChan() *chan string { + if config.Files0From != "" { + reader := lwcutil.OpenFile(config.Files0From) + return lwcutil.NewFilesChanFromReader(reader, byte(0)) + } else { + return lwcutil.NewFilesChanFromSlice(config.Files) } - return config } diff --git a/internal/app/lwc/config_test.go b/internal/app/lwc/config_test.go index c9e7ee0..d0649f8 100644 --- a/internal/app/lwc/config_test.go +++ b/internal/app/lwc/config_test.go @@ -19,6 +19,7 @@ var configTests = []configTest{ []string{}, Config{ true, true, false, true, false, + "", time.Duration(DEFAULT_INTERVAL) * time.Millisecond, false, false, []string{}, @@ -29,6 +30,7 @@ var configTests = []configTest{ []string{"-w", "--lines"}, Config{ true, true, false, false, false, + "", time.Duration(DEFAULT_INTERVAL) * time.Millisecond, false, false, []string{}, @@ -39,6 +41,7 @@ var configTests = []configTest{ []string{"foo"}, Config{ true, true, false, true, false, + "", time.Duration(DEFAULT_INTERVAL) * time.Millisecond, false, false, []string{"foo"}, @@ -49,6 +52,7 @@ var configTests = []configTest{ []string{"--", "/path/to/file"}, Config{ true, true, false, true, false, + "", time.Duration(DEFAULT_INTERVAL) * time.Millisecond, false, false, []string{"/path/to/file"}, @@ -59,6 +63,7 @@ var configTests = []configTest{ []string{"--max-line-length", "--bytes", "/etc/passwd", "/etc/group"}, Config{ false, false, false, true, true, + "", time.Duration(DEFAULT_INTERVAL) * time.Millisecond, false, false, []string{"/etc/passwd", "/etc/group"}, @@ -69,6 +74,7 @@ var configTests = []configTest{ []string{"-i", "5000"}, Config{ true, true, false, true, false, + "", time.Duration(5000) * time.Millisecond, false, false, []string{}, @@ -79,6 +85,7 @@ var configTests = []configTest{ []string{"--interval=2000"}, Config{ true, true, false, true, false, + "", time.Duration(2000) * time.Millisecond, false, false, []string{}, @@ -89,6 +96,7 @@ var configTests = []configTest{ []string{"--interval", "3000"}, Config{ true, true, false, true, false, + "", time.Duration(3000) * time.Millisecond, false, false, []string{}, @@ -99,6 +107,7 @@ var configTests = []configTest{ []string{"-i", "0"}, Config{ true, true, false, true, false, + "", time.Duration(0), false, false, []string{}, @@ -109,27 +118,44 @@ var configTests = []configTest{ func TestBuildConfig(t *testing.T) { for i, test := range configTests { - actual := BuildConfig(append([]string{"lwc"}, test.args...)) - // Clear getopt Set because we don't want to compare it + actual := NewConfig(append([]string{"lwc"}, test.args...)) + // Unref getopt to make comparison work actual.g = nil - if !reflect.DeepEqual(test.expected, actual) { - t.Errorf("Test #%d failed: expecting %#v, got %#v", i, test.expected, actual) + if !reflect.DeepEqual(test.expected, *actual) { + t.Errorf("Test #%d failed: expecting config %#v, got %#v", i, test.expected, actual) } } } func TestNegativeUpdateIntervalError(t *testing.T) { - BuildConfig([]string{"lwc", "--interval", "-1"}) + NewConfig([]string{"lwc", "--interval", "-1"}) if lwcutil.LastError != "Update interval cannot be negative" { t.Errorf("Expecting update interval error, got %#v", lwcutil.LastError) } } func TestPrintUsage(t *testing.T) { - config := BuildConfig([]string{"lwc"}) - config.PrintUsage() + c := NewConfig([]string{"lwc"}) + c.PrintUsage() out := string(lwcutil.FlushStdoutBuffer()) if !strings.HasPrefix(out, "Usage: lwc ") { t.Errorf("Expecting usage information, got %#v", out) } } + +func TestConfigProcessors(t *testing.T) { + config := Config{ + true, true, true, true, true, + "", + time.Millisecond, + false, false, + []string{}, + nil, + } + actualProcs := config.Processors() + actualCount := len(actualProcs) + expectedCount := 5 + if expectedCount != actualCount { + t.Fatalf("Expecting %d processors, got %d", expectedCount, actualCount) + } +} diff --git a/internal/app/lwc/output.go b/internal/app/lwc/output.go index 2af47d6..4d81ac6 100644 --- a/internal/app/lwc/output.go +++ b/internal/app/lwc/output.go @@ -13,7 +13,7 @@ const CARRIAGE_RETURN byte = 13 const LINE_FEED byte = 10 const SPACE byte = 32 -func FormatCounts(counts *[]uint64, label string, cr bool, lf bool) *bytes.Buffer { +func FormatCounts(counts *[]uint64, name string, cr bool, lf bool) *bytes.Buffer { buf := new(bytes.Buffer) if cr { buf.WriteByte(CARRIAGE_RETURN) @@ -23,9 +23,9 @@ func FormatCounts(counts *[]uint64, label string, cr bool, lf bool) *bytes.Buffe buf.WriteByte(SPACE) buf.WriteString(fmt.Sprintf(COUNT_FORMAT, (*counts)[i])) } - if label != "" { + if name != "" { buf.WriteByte(SPACE) - buf.WriteString(label) + buf.WriteString(name) } if lf { buf.WriteByte(LINE_FEED) @@ -33,8 +33,8 @@ func FormatCounts(counts *[]uint64, label string, cr bool, lf bool) *bytes.Buffe return buf } -func PrintCounts(counts *[]uint64, label string, cr bool, lf bool) { - lwcutil.GetStdout().Write(FormatCounts(counts, label, cr, lf).Bytes()) +func PrintCounts(counts *[]uint64, name string, cr bool, lf bool) { + lwcutil.GetStdout().Write(FormatCounts(counts, name, cr, lf).Bytes()) } func PollCounts(name string, counts *[]uint64, interval time.Duration, done chan bool) { diff --git a/internal/app/lwc/output_test.go b/internal/app/lwc/output_test.go index ed270e2..766f7ee 100644 --- a/internal/app/lwc/output_test.go +++ b/internal/app/lwc/output_test.go @@ -94,7 +94,7 @@ func TestFormatCounts(t *testing.T) { } } -func TestPrintCounts(t *testing.T) { +func TestPrintNamedCounts(t *testing.T) { PrintCounts(&[]uint64{1, 2, 3}, "file", true, true) actual := string(lwcutil.FlushStdoutBuffer()) expected := "\r 1 2 3 file\n" @@ -102,3 +102,12 @@ func TestPrintCounts(t *testing.T) { t.Errorf("Expecting %#v, got %#v", expected, actual) } } + +func TestPrintStdinCounts(t *testing.T) { + PrintCounts(&[]uint64{1, 2, 3}, "", true, true) + actual := string(lwcutil.FlushStdoutBuffer()) + expected := "\r 1 2 3\n" + if expected != actual { + t.Errorf("Expecting %#v, got %#v", expected, actual) + } +} diff --git a/internal/app/lwc/process.go b/internal/app/lwc/process.go index 7dc0db3..c1ccb59 100644 --- a/internal/app/lwc/process.go +++ b/internal/app/lwc/process.go @@ -16,32 +16,6 @@ type Processor struct { Scan ScanFunc } -func BuildProcessors(config *Config) []Processor { - var temp [5]Processor - i := 0 - if config.Lines { - temp[i] = Processor{bufio.ScanLines, ScanCount} - i++ - } - if config.Words { - temp[i] = Processor{bufio.ScanWords, ScanCount} - i++ - } - if config.Chars { - temp[i] = Processor{bufio.ScanRunes, ScanCount} - i++ - } - if config.Bytes { - temp[i] = Processor{bufio.ScanBytes, ScanCount} - i++ - } - if config.MaxLineLength { - temp[i] = Processor{bufio.ScanLines, ScanMaxLength} - i++ - } - return temp[0:i] -} - func ProcessReader(reader io.Reader, processor Processor, count *uint64, total *uint64) { scanner := bufio.NewScanner(reader) scanner.Split(processor.Split) @@ -51,7 +25,18 @@ func ProcessReader(reader io.Reader, processor Processor, count *uint64, total * } } -func ProcessFile(file *os.File, name string, processors []Processor, totals *[]uint64, interval time.Duration) { +func OpenFile(namePtr *string) (string, *os.File) { + if namePtr == nil { + return "", os.Stdin + } else { + return *namePtr, lwcutil.OpenFile(*namePtr) + } +} + +func ProcessFile(namePtr *string, processors []Processor, totals *[]uint64, interval time.Duration) { + // Open input file (can be stdin) + name, file := OpenFile(namePtr) + numCounts := len(processors) // Create counters @@ -99,26 +84,41 @@ func ProcessFile(file *os.File, name string, processors []Processor, totals *[]u PrintCounts(&counts, name, true, true) } -func ProcessFiles(config *Config, processors []Processor) { +func ProcessFiles(config *Config) { + files := config.FilesChan() + processors := config.Processors() + + name1 := <-*files + // If no files given, process stdin - if len(config.Files) == 0 { - ProcessFile(os.Stdin, "", processors, nil, config.Interval) + if name1 == "" { + ProcessFile(nil, processors, nil, config.Interval) return } numCounts := len(processors) + var totals *[]uint64 + + name2 := <-*files // If more than one file given, also calculate totals - var totals *[]uint64 - if len(config.Files) > 1 { + if name2 != "" { totalsRaw := make([]uint64, numCounts) totals = &totalsRaw } - // Process files sequentially - for _, name := range config.Files { - file := lwcutil.OpenFile(name) - ProcessFile(file, name, processors, totals, config.Interval) + ProcessFile(&name1, processors, totals, config.Interval) + + if name2 != "" { + ProcessFile(&name2, processors, totals, config.Interval) + + // Process files sequentially + for name := range *files { + if name == lwcutil.END_OF_FILES { + break + } + ProcessFile(&name, processors, totals, config.Interval) + } } // If we were keeping totals, print them now diff --git a/internal/app/lwc/process_test.go b/internal/app/lwc/process_test.go deleted file mode 100644 index 14779c5..0000000 --- a/internal/app/lwc/process_test.go +++ /dev/null @@ -1,22 +0,0 @@ -package lwc - -import ( - "testing" - "time" -) - -func TestBuildProcessors(t *testing.T) { - config := Config{ - true, true, true, true, true, - time.Millisecond, - false, false, - []string{}, - nil, - } - actualProcs := BuildProcessors(&config) - actualCount := len(actualProcs) - expectedCount := 5 - if expectedCount != actualCount { - t.Fatalf("Expecting %d processors, got %d", expectedCount, actualCount) - } -} diff --git a/internal/app/lwc/root.go b/internal/app/lwc/root.go index 29ed478..fc192c8 100644 --- a/internal/app/lwc/root.go +++ b/internal/app/lwc/root.go @@ -7,7 +7,7 @@ import ( func Run(version string) { // Read command-line args - config := BuildConfig(os.Args) + config := NewConfig(os.Args) switch { case config.Version: @@ -18,7 +18,6 @@ func Run(version string) { config.PrintUsage() default: // Process input - processors := BuildProcessors(&config) - ProcessFiles(&config, processors) + ProcessFiles(config) } } diff --git a/internal/pkg/lwcutil/chan.go b/internal/pkg/lwcutil/chan.go new file mode 100644 index 0000000..a679cd6 --- /dev/null +++ b/internal/pkg/lwcutil/chan.go @@ -0,0 +1,41 @@ +package lwcutil + +import ( + "bufio" + "io" +) + +const END_OF_FILES string = "" + +func ValidateFileName(name string) { + if len(name) == 0 { + Fatal("invalid zero-length file name") + } +} + +func NewFilesChanFromSlice(values []string) *chan string { + c := make(chan string) + go func() { + for _, value := range values { + ValidateFileName(value) + c <- value + } + c <- END_OF_FILES + }() + return &c +} + +func NewFilesChanFromReader(reader io.Reader, separator byte) *chan string { + c := make(chan string) + scanner := bufio.NewScanner(reader) + scanner.Split(SplitOnByte(0)) + go func() { + for scanner.Scan() { + name := scanner.Text() + ValidateFileName(name) + c <- name + } + c <- END_OF_FILES + }() + return &c +} diff --git a/internal/pkg/lwcutil/scan.go b/internal/pkg/lwcutil/scan.go new file mode 100644 index 0000000..eda79d2 --- /dev/null +++ b/internal/pkg/lwcutil/scan.go @@ -0,0 +1,21 @@ +package lwcutil + +import ( + "bufio" + "bytes" +) + +func SplitOnByte(b byte) bufio.SplitFunc { + return func(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + if i := bytes.IndexByte(data, b); i >= 0 { + return i + 1, data[0:i], nil + } + if atEOF { + return len(data), data, nil + } + return 0, nil, nil + } +}