diff --git a/helpers/content.go b/helpers/content.go index 53176de64..bb7819175 100644 --- a/helpers/content.go +++ b/helpers/content.go @@ -21,6 +21,7 @@ import ( "bytes" "html/template" "os/exec" + "unicode" "unicode/utf8" "github.com/miekg/mmark" @@ -424,10 +425,55 @@ func TruncateWordsByRune(words []string, max int) (string, bool) { return strings.Join(words, " "), false } -// TruncateWordsToWholeSentence takes content and an int -// and returns entire sentences from content, delimited by the int -// and whether it's truncated or not. -func TruncateWordsToWholeSentence(words []string, max int) (string, bool) { +// TruncateWordsToWholeSentence takes content and truncates to whole sentence +// limited by max number of words. It also returns whether it is truncated. +func TruncateWordsToWholeSentence(s string, max int) (string, bool) { + + var ( + wordCount = 0 + lastWordIndex = -1 + ) + + for i, r := range s { + if unicode.IsSpace(r) { + wordCount++ + lastWordIndex = i + + if wordCount >= max { + break + } + + } + } + + if lastWordIndex == -1 { + return s, false + } + + endIndex := -1 + + for j, r := range s[lastWordIndex:] { + if isEndOfSentence(r) { + endIndex = j + lastWordIndex + utf8.RuneLen(r) + break + } + } + + if endIndex == -1 { + return s, false + } + + return strings.TrimSpace(s[:endIndex]), endIndex < len(s) +} + +func isEndOfSentence(r rune) bool { + return r == '.' || r == '?' || r == '!' || r == '"' || r == '\n' +} + +// Kept only for benchmark. +func truncateWordsToWholeSentenceOld(content string, max int) (string, bool) { + words := strings.Fields(content) + if max >= len(words) { return strings.Join(words, " "), false } diff --git a/helpers/content_test.go b/helpers/content_test.go index 3a038ea12..5165a7a26 100644 --- a/helpers/content_test.go +++ b/helpers/content_test.go @@ -64,6 +64,22 @@ func TestBytesToHTML(t *testing.T) { assert.Equal(t, template.HTML("dobedobedo"), BytesToHTML([]byte("dobedobedo"))) } +var benchmarkTruncateString = strings.Repeat("This is a sentence about nothing.", 20) + +func BenchmarkTestTruncateWordsToWholeSentence(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + TruncateWordsToWholeSentence(benchmarkTruncateString, SummaryLength) + } +} + +func BenchmarkTestTruncateWordsToWholeSentenceOld(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + truncateWordsToWholeSentenceOld(benchmarkTruncateString, SummaryLength) + } +} + func TestTruncateWordsToWholeSentence(t *testing.T) { type test struct { input, expected string @@ -77,10 +93,11 @@ func TestTruncateWordsToWholeSentence(t *testing.T) { {"This is a sentence.", "This is a sentence.", 5, false}, {"This is also a sentence!", "This is also a sentence!", 1, false}, {"To be. Or not to be. That's the question.", "To be.", 1, true}, - {" \nThis is not a sentence\n ", "This is not a", 4, true}, + {" \nThis is not a sentence\nAnd this is another", "This is not a sentence", 4, true}, + {"", "", 10, false}, } for i, d := range data { - output, truncated := TruncateWordsToWholeSentence(strings.Fields(d.input), d.max) + output, truncated := TruncateWordsToWholeSentence(d.input, d.max) if d.expected != output { t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output) } diff --git a/hugolib/page.go b/hugolib/page.go index bc54aac91..0784f5bf8 100644 --- a/hugolib/page.go +++ b/hugolib/page.go @@ -89,6 +89,7 @@ type Page struct { plain string // TODO should be []byte plainWords []string plainInit sync.Once + plainWordsInit sync.Once renderingConfig *helpers.Blackfriday renderingConfigInit sync.Once pageMenus PageMenus @@ -147,14 +148,20 @@ func (p *Page) Plain() string { } func (p *Page) PlainWords() []string { - p.initPlain() + p.initPlainWords() return p.plainWords } func (p *Page) initPlain() { p.plainInit.Do(func() { p.plain = helpers.StripHTML(string(p.Content)) - p.plainWords = strings.Fields(p.plain) + return + }) +} + +func (p *Page) initPlainWords() { + p.plainWordsInit.Do(func() { + p.plainWords = strings.Fields(p.Plain()) return }) } @@ -335,7 +342,7 @@ func (p *Page) setAutoSummary() error { if p.isCJKLanguage { summary, truncated = helpers.TruncateWordsByRune(p.PlainWords(), helpers.SummaryLength) } else { - summary, truncated = helpers.TruncateWordsToWholeSentence(p.PlainWords(), helpers.SummaryLength) + summary, truncated = helpers.TruncateWordsToWholeSentence(p.Plain(), helpers.SummaryLength) } p.Summary = template.HTML(summary) p.Truncated = truncated @@ -479,6 +486,10 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) { } func (p *Page) analyzePage() { + // TODO(bep) + if true { + return + } if p.isCJKLanguage { p.WordCount = 0 for _, word := range p.PlainWords() {