Add RuneCount to Page

Fixes #1266
This commit is contained in:
Bjørn Erik Pedersen 2015-07-12 11:05:37 +02:00
parent 90af334c21
commit 77c60a3440
4 changed files with 46 additions and 0 deletions

View file

@ -47,6 +47,7 @@ matter, content or derived from file location.
**.NextInSection** Pointer to the following content within the same section (based on pub date)<br>
**.FuzzyWordCount** The approximate number of words in the content.<br>
**.WordCount** The number of words in the content.<br>
**.RuneCount** The number of [runes](http://blog.golang.org/strings) in the content, excluding any whitespace. This may be a good alternative to `.WordCount` for Japanese and other CJK languages where a word-split by spaces makes no sense.
**.ReadingTime** The estimated time it takes to read the content in minutes.<br>
**.Weight** Assigned weight (in the front matter) to this content, used in sorting.<br>
**.RawContent** Raw Markdown content without the metadata header. Useful with [remarkjs.com](http://remarkjs.com)<br>

View file

@ -228,6 +228,11 @@ func Md5String(f string) string {
return hex.EncodeToString(h.Sum([]byte{}))
}
// IsWhitespace determines if the given rune is whitespace.
func IsWhitespace(r rune) bool {
return r == ' ' || r == '\t' || r == '\n' || r == '\r'
}
// Seq creates a sequence of integers.
// It's named and used as GNU's seq.
// Examples:

View file

@ -66,6 +66,7 @@ type Page struct {
contentShortCodes map[string]string
plain string // TODO should be []byte
plainWords []string
plainRuneCount int
plainInit sync.Once
renderingConfig *helpers.Blackfriday
renderingConfigInit sync.Once
@ -108,10 +109,24 @@ func (p *Page) PlainWords() []string {
return p.plainWords
}
// RuneCount returns the rune count, excluding any whitespace, of the plain content.
func (p *Page) RuneCount() int {
p.initPlain()
return p.plainRuneCount
}
func (p *Page) initPlain() {
p.plainInit.Do(func() {
p.plain = helpers.StripHTML(string(p.Content))
p.plainWords = strings.Fields(p.plain)
runeCount := 0
for _, r := range p.plain {
if !helpers.IsWhitespace(r) {
runeCount++
}
}
p.plainRuneCount = runeCount
return
})
}

View file

@ -138,6 +138,16 @@ title: Simple
Summary Same Line<!--more-->
Some more text
`
SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES = `---
title: Simple
---
`
SIMPLE_PAGE_WITH_LONG_CONTENT = `---
@ -470,6 +480,21 @@ func TestPageWithDate(t *testing.T) {
checkPageDate(t, p, d)
}
func TestRuneCount(t *testing.T) {
p, _ := NewPage("simple.md")
_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES))
p.Convert()
p.analyzePage()
if err != nil {
t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
}
if p.RuneCount() != 5 {
t.Fatalf("incorrect rune count for content '%s'. expected %v, got %v", p.plain, 1, p.RuneCount())
}
}
func TestWordCount(t *testing.T) {
p, _ := NewPage("simple.md")
_, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_LONG_CONTENT))