diff --git a/helpers/content.go b/helpers/content.go index bb7819175..9d35675f7 100644 --- a/helpers/content.go +++ b/helpers/content.go @@ -384,8 +384,25 @@ func RenderBytes(ctx *RenderingContext) []byte { } } -// TotalWords returns an int of the total number of words in a given content. +// TotalWords counts instance of one or more consecutive white space +// characters, as defined by unicode.IsSpace, in s. +// This is a cheaper way of word counting than the obvious len(strings.Fields(s)). func TotalWords(s string) int { + n := 0 + inWord := false + for _, r := range s { + wasInWord := inWord + inWord = !unicode.IsSpace(r) + if inWord && !wasInWord { + n++ + } + } + return n +} + +// Old implementation only kept for benchmark comparison. +// TODO(bep) remove +func totalWordsOld(s string) int { return len(strings.Fields(s)) } diff --git a/helpers/content_test.go b/helpers/content_test.go index 5165a7a26..82af70f8f 100644 --- a/helpers/content_test.go +++ b/helpers/content_test.go @@ -408,12 +408,45 @@ func TestExtractNoTOC(t *testing.T) { } } -func TestTotalWords(t *testing.T) { - testString := "Two, Words!" - actualWordCount := TotalWords(testString) +var totalWordsBenchmarkString = strings.Repeat("Hugo Rocks ", 200) - if actualWordCount != 2 { - t.Errorf("Actual word count (%d) for test string (%s) did not match 2.", actualWordCount, testString) +func TestTotalWords(t *testing.T) { + + for i, this := range []struct { + s string + words int + }{ + {"Two, Words!", 2}, + {"Word", 1}, + {"", 0}, + {"One, Two, Three", 3}, + {totalWordsBenchmarkString, 400}, + } { + actualWordCount := TotalWords(this.s) + + if actualWordCount != this.words { + t.Errorf("[%d] Actual word count (%d) for test string (%s) did not match %d", i, actualWordCount, this.s, this.words) + } + } +} + +func BenchmarkTotalWords(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + wordCount := TotalWords(totalWordsBenchmarkString) + if wordCount != 400 { + b.Fatal("Wordcount error") + } + } +} + +func BenchmarkTotalWordsOld(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + wordCount := totalWordsOld(totalWordsBenchmarkString) + if wordCount != 400 { + b.Fatal("Wordcount error") + } } } diff --git a/hugolib/page.go b/hugolib/page.go index 0784f5bf8..66d099bc0 100644 --- a/hugolib/page.go +++ b/hugolib/page.go @@ -486,10 +486,6 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) { } func (p *Page) analyzePage() { - // TODO(bep) - if true { - return - } if p.isCJKLanguage { p.WordCount = 0 for _, word := range p.PlainWords() {