tpl: Fix countwords to handle special chars

Fixes #8479
This commit is contained in:
Julien Midedji 2021-05-03 09:10:06 +02:00 committed by GitHub
parent e1c328df25
commit 7a2c10ae60
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 0 deletions

View file

@ -17,6 +17,7 @@ package strings
import (
"errors"
"html/template"
"regexp"
"strings"
"unicode/utf8"
@ -75,6 +76,15 @@ func (ns *Namespace) CountWords(s interface{}) (int, error) {
return 0, _errors.Wrap(err, "Failed to convert content to string")
}
isCJKLanguage, err := regexp.MatchString(`\p{Han}|\p{Hangul}|\p{Hiragana}|\p{Katakana}`, ss)
if err != nil {
return 0, _errors.Wrap(err, "Failed to match regex pattern against string")
}
if !isCJKLanguage {
return len(strings.Fields(helpers.StripHTML((ss)))), nil
}
counter := 0
for _, word := range strings.Fields(helpers.StripHTML(ss)) {
runeCount := utf8.RuneCountInString(word)

View file

@ -210,6 +210,9 @@ func TestCountWords(t *testing.T) {
{"Do Be Do Be Do", 5},
{"旁边", 2},
{`<div class="test">旁边</div>`, 2},
{"Here's to you...", 3},
{"Heres to you...", 3},
{"Heres to you…", 3},
// errors
{tstNoStringer{}, false},
} {