Improve TotalWords counter func

It is obviously more efficient when we do not care about the actual words.

```
BenchmarkTotalWords-4            100000         18795 ns/op           0 B/op           0 allocs/op
BenchmarkTotalWordsOld-4          30000         46751 ns/op        6400 B/op           1 allocs/op
```
This commit is contained in:
Bjørn Erik Pedersen 2016-08-17 06:37:19 +02:00
parent bcd434794a
commit 4abaec5c04
3 changed files with 56 additions and 10 deletions

View file

@ -384,8 +384,25 @@ func RenderBytes(ctx *RenderingContext) []byte {
}
}
// TotalWords returns an int of the total number of words in a given content.
// TotalWords counts instance of one or more consecutive white space
// characters, as defined by unicode.IsSpace, in s.
// This is a cheaper way of word counting than the obvious len(strings.Fields(s)).
func TotalWords(s string) int {
n := 0
inWord := false
for _, r := range s {
wasInWord := inWord
inWord = !unicode.IsSpace(r)
if inWord && !wasInWord {
n++
}
}
return n
}
// Old implementation only kept for benchmark comparison.
// TODO(bep) remove
func totalWordsOld(s string) int {
return len(strings.Fields(s))
}

View file

@ -408,12 +408,45 @@ func TestExtractNoTOC(t *testing.T) {
}
}
func TestTotalWords(t *testing.T) {
testString := "Two, Words!"
actualWordCount := TotalWords(testString)
var totalWordsBenchmarkString = strings.Repeat("Hugo Rocks ", 200)
if actualWordCount != 2 {
t.Errorf("Actual word count (%d) for test string (%s) did not match 2.", actualWordCount, testString)
func TestTotalWords(t *testing.T) {
for i, this := range []struct {
s string
words int
}{
{"Two, Words!", 2},
{"Word", 1},
{"", 0},
{"One, Two, Three", 3},
{totalWordsBenchmarkString, 400},
} {
actualWordCount := TotalWords(this.s)
if actualWordCount != this.words {
t.Errorf("[%d] Actual word count (%d) for test string (%s) did not match %d", i, actualWordCount, this.s, this.words)
}
}
}
func BenchmarkTotalWords(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
wordCount := TotalWords(totalWordsBenchmarkString)
if wordCount != 400 {
b.Fatal("Wordcount error")
}
}
}
func BenchmarkTotalWordsOld(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
wordCount := totalWordsOld(totalWordsBenchmarkString)
if wordCount != 400 {
b.Fatal("Wordcount error")
}
}
}

View file

@ -486,10 +486,6 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) {
}
func (p *Page) analyzePage() {
// TODO(bep)
if true {
return
}
if p.isCJKLanguage {
p.WordCount = 0
for _, word := range p.PlainWords() {