From 1b7ecfc2e176315b69914756c70b46306561e4d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Thu, 18 Oct 2018 09:47:39 +0200 Subject: [PATCH] hugolib: Use []byte in shortcode parsing See #5324 --- hugolib/page.go | 2 +- hugolib/shortcode.go | 24 +++++++++++++----------- hugolib/shortcode_test.go | 2 +- parser/pageparser/item.go | 4 ++++ parser/pageparser/pagelexer.go | 11 ----------- parser/pageparser/pageparser.go | 10 +++++++++- 6 files changed, 28 insertions(+), 25 deletions(-) diff --git a/hugolib/page.go b/hugolib/page.go index 0359769e3..e867dd525 100644 --- a/hugolib/page.go +++ b/hugolib/page.go @@ -1871,7 +1871,7 @@ func (p *Page) SaveSource() error { // TODO(bep) lazy consolidate func (p *Page) processShortcodes() error { p.shortcodeState = newShortcodeHandler(p) - tmpContent, err := p.shortcodeState.extractShortcodes(string(p.workContent), p.withoutContent()) + tmpContent, err := p.shortcodeState.extractShortcodes(p.workContent, p.withoutContent()) if err != nil { return err } diff --git a/hugolib/shortcode.go b/hugolib/shortcode.go index f7141031d..a21a10ad2 100644 --- a/hugolib/shortcode.go +++ b/hugolib/shortcode.go @@ -553,9 +553,9 @@ Loop: return sc, nil case currItem.IsText(): - sc.inner = append(sc.inner, currItem.Val) + sc.inner = append(sc.inner, currItem.ValStr()) case currItem.IsShortcodeName(): - sc.name = currItem.Val + sc.name = currItem.ValStr() // We pick the first template for an arbitrary output format // if more than one. It is "all inner or no inner". tmpl := getShortcodeTemplateForTemplateKey(scKey{}, sc.name, p.s.Tmpl) @@ -576,11 +576,11 @@ Loop: // named params if sc.params == nil { params := make(map[string]string) - params[currItem.Val] = pt.Next().Val + params[currItem.ValStr()] = pt.Next().ValStr() sc.params = params } else { if params, ok := sc.params.(map[string]string); ok { - params[currItem.Val] = pt.Next().Val + params[currItem.ValStr()] = pt.Next().ValStr() } else { return sc, errShortCodeIllegalState } @@ -590,11 +590,11 @@ Loop: // positional params if sc.params == nil { var params []string - params = append(params, currItem.Val) + params = append(params, currItem.ValStr()) sc.params = params } else { if params, ok := sc.params.([]string); ok { - params = append(params, currItem.Val) + params = append(params, currItem.ValStr()) sc.params = params } else { return sc, errShortCodeIllegalState @@ -613,19 +613,21 @@ Loop: return sc, nil } -func (s *shortcodeHandler) extractShortcodes(stringToParse string, p *PageWithoutContent) (string, error) { +var shortCodeStart = []byte("{{") - startIdx := strings.Index(stringToParse, "{{") +func (s *shortcodeHandler) extractShortcodes(input []byte, p *PageWithoutContent) (string, error) { + + startIdx := bytes.Index(input, shortCodeStart) // short cut for docs with no shortcodes if startIdx < 0 { - return stringToParse, nil + return string(input), nil } // the parser takes a string; // since this is an internal API, it could make sense to use the mutable []byte all the way, but // it seems that the time isn't really spent in the byte copy operations, and the impl. gets a lot cleaner - pt := pageparser.ParseFrom(stringToParse, startIdx) + pt := pageparser.ParseFrom(input, startIdx) result := bp.GetBuffer() defer bp.PutBuffer(result) @@ -642,7 +644,7 @@ Loop: switch { case currItem.IsText(): - result.WriteString(currItem.Val) + result.WriteString(currItem.ValStr()) case currItem.IsLeftShortcodeDelim(): // let extractShortcode handle left delim (will do so recursively) pt.Backup() diff --git a/hugolib/shortcode_test.go b/hugolib/shortcode_test.go index 3385d31f0..f8837810c 100644 --- a/hugolib/shortcode_test.go +++ b/hugolib/shortcode_test.go @@ -424,7 +424,7 @@ func TestExtractShortcodes(t *testing.T) { return fmt.Sprintf("HAHA%s-%dHBHB", shortcodePlaceholderPrefix, counter) } - content, err := s.extractShortcodes(this.input, p.withoutContent()) + content, err := s.extractShortcodes([]byte(this.input), p.withoutContent()) if b, ok := this.expect.(bool); ok && !b { if err == nil { diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go index 35bc8e268..6e93bb696 100644 --- a/parser/pageparser/item.go +++ b/parser/pageparser/item.go @@ -21,6 +21,10 @@ type Item struct { Val []byte } +func (i Item) ValStr() string { + return string(i.Val) +} + func (i Item) IsText() bool { return i.typ == tText } diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go index 3bdfb6c33..c15e977ca 100644 --- a/parser/pageparser/pagelexer.go +++ b/parser/pageparser/pagelexer.go @@ -60,17 +60,6 @@ type pageLexer struct { items []Item } -func Parse(s string) *Tokens { - return ParseFrom(s, 0) -} - -func ParseFrom(s string, from int) *Tokens { - input := []byte(s) - lexer := newPageLexer(input, pos(from), lexMainSection) // TODO(bep) 2errors - lexer.run() - return &Tokens{lexer: lexer} -} - // note: the input position here is normally 0 (start), but // can be set if position of first shortcode is known // TODO(bep) 2errors byte diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go index 5534ee64b..948c05edf 100644 --- a/parser/pageparser/pageparser.go +++ b/parser/pageparser/pageparser.go @@ -17,7 +17,15 @@ // See slides here: http://cuddle.googlecode.com/hg/talk/lex.html package pageparser -// The lexical scanning below +func Parse(input []byte) *Tokens { + return ParseFrom(input, 0) +} + +func ParseFrom(input []byte, from int) *Tokens { + lexer := newPageLexer(input, pos(from), lexMainSection) // TODO(bep) 2errors + lexer.run() + return &Tokens{lexer: lexer} +} type Tokens struct { lexer *pageLexer