Skip to content

Commit

Permalink
Fix up ShannonEntropyBits (#127)
Browse files Browse the repository at this point in the history
Breaking, but typo to embarrassing to keep ;)
  • Loading branch information
klauspost authored Jun 17, 2019
1 parent 4dbb2ac commit 5e801bf
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 23 deletions.
17 changes: 10 additions & 7 deletions compressible.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,20 +62,23 @@ func Estimate(b []byte) float64 {
return math.Pow((prediction+entropy)/2, 0.9)
}

// SnannonEntropyBits returns the number of bits minimum required to represent
// ShannonEntropyBits returns the number of bits minimum required to represent
// an entropy encoding of the input bytes.
// https://en.wiktionary.org/wiki/Shannon_entropy
func SnannonEntropyBits(b []byte) int {
func ShannonEntropyBits(b []byte) int {
if len(b) == 0 {
return 0
}
var hist [256]int
for _, c := range b {
hist[c]++
}
shannon := float64(0)
total := float64(len(b))
for i := range hist[:] {
n := float64(hist[i])
if n > 0 {
shannon += math.Log2(total/n) * n
invTotal := 1.0 / float64(len(b))
for _, v := range hist[:] {
if v > 0 {
n := float64(v)
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
}
}
return int(math.Ceil(shannon))
Expand Down
32 changes: 16 additions & 16 deletions compressible_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
b.SetBytes(int64(len(testData)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
SnannonEntropyBits(testData)
ShannonEntropyBits(testData)
}
b.Log(SnannonEntropyBits(testData))
b.Log(ShannonEntropyBits(testData))
})

// (predictable, high entropy distibution)
Expand All @@ -148,9 +148,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
b.SetBytes(int64(len(testData)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
SnannonEntropyBits(testData)
ShannonEntropyBits(testData)
}
b.Log(SnannonEntropyBits(testData))
b.Log(ShannonEntropyBits(testData))
})

// (not predictable, high entropy distibution)
Expand All @@ -160,9 +160,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
b.SetBytes(int64(len(testData)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
SnannonEntropyBits(testData)
ShannonEntropyBits(testData)
}
b.Log(SnannonEntropyBits(testData))
b.Log(ShannonEntropyBits(testData))
})

// (not predictable, high entropy distibution)
Expand All @@ -172,9 +172,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
b.SetBytes(int64(len(testData)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
SnannonEntropyBits(testData)
ShannonEntropyBits(testData)
}
b.Log(SnannonEntropyBits(testData))
b.Log(ShannonEntropyBits(testData))
})

// (not predictable, high entropy distibution)
Expand All @@ -184,9 +184,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
b.SetBytes(int64(len(testData)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
SnannonEntropyBits(testData)
ShannonEntropyBits(testData)
}
b.Log(SnannonEntropyBits(testData))
b.Log(ShannonEntropyBits(testData))
})

// (not predictable, high entropy distibution)
Expand All @@ -196,9 +196,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
b.SetBytes(int64(len(testData)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
SnannonEntropyBits(testData)
ShannonEntropyBits(testData)
}
b.Log(SnannonEntropyBits(testData))
b.Log(ShannonEntropyBits(testData))
})

// (not predictable, medium entropy distibution)
Expand All @@ -211,9 +211,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
b.SetBytes(int64(len(testData)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
SnannonEntropyBits(testData)
ShannonEntropyBits(testData)
}
b.Log(SnannonEntropyBits(testData))
b.Log(ShannonEntropyBits(testData))
})
// (medium predictable, medium entropy distibution)
b.Run("text", func(b *testing.B) {
Expand All @@ -234,9 +234,9 @@ Thoughts?`)
b.SetBytes(int64(len(testData)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
SnannonEntropyBits(testData)
ShannonEntropyBits(testData)
}
b.Log(SnannonEntropyBits(testData))
b.Log(ShannonEntropyBits(testData))
})
}

Expand Down

0 comments on commit 5e801bf

Please sign in to comment.