From 77c60a3440806067109347d04eb5368b65ea0fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Sun, 12 Jul 2015 11:05:37 +0200 Subject: [PATCH] Add RuneCount to Page Fixes #1266 --- docs/content/templates/variables.md | 1 + helpers/general.go | 5 +++++ hugolib/page.go | 15 +++++++++++++++ hugolib/page_test.go | 25 +++++++++++++++++++++++++ 4 files changed, 46 insertions(+) diff --git a/docs/content/templates/variables.md b/docs/content/templates/variables.md index cfe010e36..ca30174e2 100644 --- a/docs/content/templates/variables.md +++ b/docs/content/templates/variables.md @@ -47,6 +47,7 @@ matter, content or derived from file location. **.NextInSection** Pointer to the following content within the same section (based on pub date)
**.FuzzyWordCount** The approximate number of words in the content.
**.WordCount** The number of words in the content.
+**.RuneCount** The number of [runes](http://blog.golang.org/strings) in the content, excluding any whitespace. This may be a good alternative to `.WordCount` for Japanese and other CJK languages where a word-split by spaces makes no sense. **.ReadingTime** The estimated time it takes to read the content in minutes.
**.Weight** Assigned weight (in the front matter) to this content, used in sorting.
**.RawContent** Raw Markdown content without the metadata header. Useful with [remarkjs.com](http://remarkjs.com)
diff --git a/helpers/general.go b/helpers/general.go index 540fb4332..c6adc855a 100644 --- a/helpers/general.go +++ b/helpers/general.go @@ -228,6 +228,11 @@ func Md5String(f string) string { return hex.EncodeToString(h.Sum([]byte{})) } +// IsWhitespace determines if the given rune is whitespace. +func IsWhitespace(r rune) bool { + return r == ' ' || r == '\t' || r == '\n' || r == '\r' +} + // Seq creates a sequence of integers. // It's named and used as GNU's seq. // Examples: diff --git a/hugolib/page.go b/hugolib/page.go index d8d8c00e6..1fa010abe 100644 --- a/hugolib/page.go +++ b/hugolib/page.go @@ -66,6 +66,7 @@ type Page struct { contentShortCodes map[string]string plain string // TODO should be []byte plainWords []string + plainRuneCount int plainInit sync.Once renderingConfig *helpers.Blackfriday renderingConfigInit sync.Once @@ -108,10 +109,24 @@ func (p *Page) PlainWords() []string { return p.plainWords } +// RuneCount returns the rune count, excluding any whitespace, of the plain content. +func (p *Page) RuneCount() int { + p.initPlain() + return p.plainRuneCount +} + func (p *Page) initPlain() { p.plainInit.Do(func() { p.plain = helpers.StripHTML(string(p.Content)) p.plainWords = strings.Fields(p.plain) + runeCount := 0 + for _, r := range p.plain { + if !helpers.IsWhitespace(r) { + runeCount++ + } + } + p.plainRuneCount = runeCount + return }) } diff --git a/hugolib/page_test.go b/hugolib/page_test.go index 2bf078f18..cdb38651f 100644 --- a/hugolib/page_test.go +++ b/hugolib/page_test.go @@ -138,6 +138,16 @@ title: Simple Summary Same Line Some more text +` + + SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES = `--- +title: Simple +--- + + +€ € € € € + + ` SIMPLE_PAGE_WITH_LONG_CONTENT = `--- @@ -470,6 +480,21 @@ func TestPageWithDate(t *testing.T) { checkPageDate(t, p, d) } +func TestRuneCount(t *testing.T) { + p, _ := NewPage("simple.md") + _, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES)) + p.Convert() + p.analyzePage() + if err != nil { + t.Fatalf("Unable to create a page with frontmatter and body content: %s", err) + } + + if p.RuneCount() != 5 { + t.Fatalf("incorrect rune count for content '%s'. expected %v, got %v", p.plain, 1, p.RuneCount()) + + } +} + func TestWordCount(t *testing.T) { p, _ := NewPage("simple.md") _, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_LONG_CONTENT))