Skip to content

Commit f8d212a

Browse files
authored
Merge pull request #10 from bitfield/freq
Add Freq() filter
2 parents b7ef548 + ec68984 commit f8d212a

File tree

6 files changed

+108
-1
lines changed

6 files changed

+108
-1
lines changed

README.md

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,45 @@ fmt.Println(output)
334334
`First()` reads its input and passes on the first N lines of it (like Unix [`head`](examples/head/main.go)):
335335

336336
```go
337-
Stdin().First(10).Stdout()
337+
script.Stdin().First(10).Stdout()
338+
```
339+
340+
### Freq
341+
342+
`Freq()` counts the frequencies of input lines, and outputs only the unique lines in the input, each prefixed with a count of its frequency, in descending order of frequency (that is, most frequent lines first). Lines with the same frequency will be sorted alphabetically. For example, given this input:
343+
344+
```
345+
banana
346+
apple
347+
orange
348+
apple
349+
banana
350+
```
351+
352+
and a program like:
353+
354+
```go
355+
script.Stdin().Freq().Stdout()
356+
```
357+
358+
the output will be:
359+
360+
```
361+
2 apple
362+
2 banana
363+
1 orange
364+
```
365+
366+
This is a common pattern in shell scripts to find the most frequently-occurring lines in a file:
367+
368+
```sh
369+
sort testdata/freq.input.txt |uniq -c |sort -rn
370+
```
371+
372+
`Freq()`'s behaviour is like the combination of Unix `sort`, `uniq -c`, and `sort -rn` used here. You can use `Freq()` in combination with `First()` to get, for example, the ten most common lines in a file:
373+
374+
```go
375+
script.Stdin().Freq().First(10).Stdout()
338376
```
339377

340378
### Join

filters.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ package script
33
import (
44
"bufio"
55
"bytes"
6+
"fmt"
67
"io"
78
"os"
89
"os/exec"
910
"regexp"
11+
"sort"
1012
"strings"
1113
)
1214

@@ -157,3 +159,38 @@ func (p *Pipe) First(lines int) *Pipe {
157159
}
158160
return Echo(output.String())
159161
}
162+
163+
// Freq reads from the pipe, and returns a new pipe containing only unique lines
164+
// from the input, prefixed with a frequency count, in descending numerical
165+
// order (most frequent lines first). Lines with equal frequency will be sorted
166+
// alphabetically. If there is an error reading the pipe, the pipe's error
167+
// status is also set.
168+
func (p *Pipe) Freq() *Pipe {
169+
if p == nil || p.Error() != nil {
170+
return p
171+
}
172+
var freq = map[string]int{}
173+
p.EachLine(func(line string, out *strings.Builder) {
174+
freq[line]++
175+
})
176+
type frequency struct {
177+
line string
178+
count int
179+
}
180+
var freqs = make([]frequency, 0, len(freq))
181+
for line, count := range freq {
182+
freqs = append(freqs, frequency{line, count})
183+
}
184+
sort.Slice(freqs, func(i, j int) bool {
185+
if freqs[i].count == freqs[j].count {
186+
return freqs[i].line < freqs[j].line
187+
}
188+
return freqs[i].count > freqs[j].count
189+
})
190+
var output strings.Builder
191+
for _, item := range freqs {
192+
output.WriteString(fmt.Sprintf("%d %s", item.count, item.line))
193+
output.WriteRune('\n')
194+
}
195+
return Echo(output.String())
196+
}

filters_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,3 +255,18 @@ func TestFirst(t *testing.T) {
255255
t.Errorf("want %q, got %q", want, got)
256256
}
257257
}
258+
259+
func TestFreq(t *testing.T) {
260+
t.Parallel()
261+
want, err := ioutil.ReadFile("testdata/freq.golden.txt")
262+
if err != nil {
263+
t.Fatal(err)
264+
}
265+
got, err := File("testdata/freq.input.txt").Freq().Bytes()
266+
if err != nil {
267+
t.Error(err)
268+
}
269+
if !bytes.Equal(got, want) {
270+
t.Errorf("want %q, got %q", want, got)
271+
}
272+
}

pipes_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ func doMethodsOnPipe(t *testing.T, p *Pipe, kind string) {
102102
p.Read([]byte{})
103103
action = "First()"
104104
p.First(1)
105+
action = "Freq()"
106+
p.Freq()
105107
}
106108

107109
func TestNilPipes(t *testing.T) {

testdata/freq.golden.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
4 apple
2+
4 banana
3+
2 orange
4+
1 kumquat

testdata/freq.input.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
apple
2+
orange
3+
banana
4+
banana
5+
apple
6+
orange
7+
kumquat
8+
apple
9+
apple
10+
banana
11+
banana

0 commit comments

Comments
 (0)