diff --git a/helpers/content.go b/helpers/content.go index 8e3fda505..6bb7ed4d3 100644 --- a/helpers/content.go +++ b/helpers/content.go @@ -19,6 +19,7 @@ package helpers import ( "bytes" + "unicode/utf8" "html/template" "os/exec" @@ -386,21 +387,57 @@ func TruncateWords(s string, max int) string { // and returns entire sentences from content, delimited by the int // and whether it's truncated or not. func TruncateWordsToWholeSentence(words []string, max int) (string, bool) { - if max >= len(words) { - return strings.Join(words, " "), false - } - - for counter, word := range words[max:] { - if strings.HasSuffix(word, ".") || - strings.HasSuffix(word, "?") || - strings.HasSuffix(word, ".\"") || - strings.HasSuffix(word, "!") { - upper := max + counter + 1 - return strings.Join(words[:upper], " "), (upper < len(words)) + count := 0 + index, word := 0, "" + truncated := false + + for index, word = range words { + runeCount := utf8.RuneCountInString(word) + if len(word) == runeCount { + count++; + } else { + if count + runeCount <= max { + count += runeCount + } else { + offset := 0 + for count < max { + _, width := utf8.DecodeRuneInString(word[offset:]) + offset += width + count++ + } + words[index] = word[:offset] + truncated = true + } + } + + if count >= max { + if index < len(words) - 1 { + truncated = true + } + break } } - - return strings.Join(words[:max], " "), true + + index += 1 + + if index < len(words) { + for counter, word := range words[index:] { + if len(word) != utf8.RuneCountInString(word) { + break + } + if strings.HasSuffix(word, ".") || + strings.HasSuffix(word, "?") || + strings.HasSuffix(word, ".\"") || + strings.HasSuffix(word, "!") { + upper := index + counter + 1 + return strings.Join(words[:upper], " "), (upper < len(words)) + } + } + } else if index > len(words) { + return strings.Join(words, " "), truncated + } + + return strings.Join(words[:index], " "), truncated } // GetAsciidocContent calls asciidoctor or asciidoc as an external helper diff --git a/helpers/content_test.go b/helpers/content_test.go index 602ca3785..f614011c0 100644 --- a/helpers/content_test.go +++ b/helpers/content_test.go @@ -54,6 +54,8 @@ func TestTruncateWordsToWholeSentence(t *testing.T) { {"a b c", "a b c", 12, false}, {"a b c", "a b c", 3, false}, {"a", "a", 1, false}, + {"Hello 中国", "Hello 中", 2, true}, + {"Hello 中国", "Hello 中国", 3, false}, {"This is a sentence.", "This is a sentence.", 5, false}, {"This is also a sentence!", "This is also a sentence!", 1, false}, {"To be. Or not to be. That's the question.", "To be.", 1, true}, diff --git a/hugolib/page.go b/hugolib/page.go index b80e92257..c50e2da18 100644 --- a/hugolib/page.go +++ b/hugolib/page.go @@ -31,6 +31,7 @@ import ( "strings" "sync" "time" + "unicode/utf8" "github.com/spf13/cast" bp "github.com/spf13/hugo/bufferpool" @@ -362,7 +363,16 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) { } func (p *Page) analyzePage() { - p.WordCount = len(p.PlainWords()) + p.WordCount = 0 + for _, word := range p.PlainWords() { + runeCount := utf8.RuneCountInString(word) + if len(word) == runeCount { + p.WordCount++ + } else { + p.WordCount += runeCount + } + } + p.FuzzyWordCount = int((p.WordCount+100)/100) * 100 p.ReadingTime = int((p.WordCount + 212) / 213) }