hugo/parser/pageparser/item.go
Bjørn Erik Pedersen 9cd54cab20 Move the emoji parsing to pageparser
This avoids double parsing the page content when `enableEmoji=true`.

This commit also adds some general improvements to the parser, making it in general much faster:

```bash
benchmark                     old ns/op     new ns/op     delta
BenchmarkShortcodeLexer-4     90258         101730        +12.71%
BenchmarkParse-4              148940        15037         -89.90%

benchmark                     old allocs     new allocs     delta
BenchmarkShortcodeLexer-4     456            700            +53.51%
BenchmarkParse-4              28             33             +17.86%

benchmark                     old bytes     new bytes     delta
BenchmarkShortcodeLexer-4     69875         81014         +15.94%
BenchmarkParse-4              8128          8304          +2.17%
```

Running some site benchmarks with Emoji support turned on:

```bash
benchmark                                                                                     old ns/op     new ns/op     delta
BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4     924556797     818115620     -11.51%

benchmark                                                                                     old allocs     new allocs     delta
BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4     4112613        4133787        +0.51%

benchmark                                                                                     old bytes     new bytes     delta
BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4     426982864     424363832     -0.61%
```

Fixes #5534
2018-12-20 20:08:01 +01:00

135 lines
2.9 KiB
Go

// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pageparser
import (
"bytes"
"fmt"
)
type Item struct {
Type ItemType
Pos int
Val []byte
}
type Items []Item
func (i Item) ValStr() string {
return string(i.Val)
}
func (i Item) IsText() bool {
return i.Type == tText
}
func (i Item) IsNonWhitespace() bool {
return len(bytes.TrimSpace(i.Val)) > 0
}
func (i Item) IsShortcodeName() bool {
return i.Type == tScName
}
func (i Item) IsInlineShortcodeName() bool {
return i.Type == tScNameInline
}
func (i Item) IsLeftShortcodeDelim() bool {
return i.Type == tLeftDelimScWithMarkup || i.Type == tLeftDelimScNoMarkup
}
func (i Item) IsRightShortcodeDelim() bool {
return i.Type == tRightDelimScWithMarkup || i.Type == tRightDelimScNoMarkup
}
func (i Item) IsShortcodeClose() bool {
return i.Type == tScClose
}
func (i Item) IsShortcodeParam() bool {
return i.Type == tScParam
}
func (i Item) IsShortcodeParamVal() bool {
return i.Type == tScParamVal
}
func (i Item) IsShortcodeMarkupDelimiter() bool {
return i.Type == tLeftDelimScWithMarkup || i.Type == tRightDelimScWithMarkup
}
func (i Item) IsFrontMatter() bool {
return i.Type >= TypeFrontMatterYAML && i.Type <= TypeFrontMatterORG
}
func (i Item) IsDone() bool {
return i.Type == tError || i.Type == tEOF
}
func (i Item) IsEOF() bool {
return i.Type == tEOF
}
func (i Item) IsError() bool {
return i.Type == tError
}
func (i Item) String() string {
switch {
case i.Type == tEOF:
return "EOF"
case i.Type == tError:
return string(i.Val)
case i.Type > tKeywordMarker:
return fmt.Sprintf("<%s>", i.Val)
case len(i.Val) > 50:
return fmt.Sprintf("%v:%.20q...", i.Type, i.Val)
}
return fmt.Sprintf("%v:[%s]", i.Type, i.Val)
}
type ItemType int
const (
tError ItemType = iota
tEOF
// page items
TypeHTMLStart // document starting with < as first non-whitespace
TypeLeadSummaryDivider // <!--more-->, # more
TypeFrontMatterYAML
TypeFrontMatterTOML
TypeFrontMatterJSON
TypeFrontMatterORG
TypeEmoji
TypeIgnore // // The BOM Unicode byte order marker and possibly others
// shortcode items
tLeftDelimScNoMarkup
tRightDelimScNoMarkup
tLeftDelimScWithMarkup
tRightDelimScWithMarkup
tScClose
tScName
tScNameInline
tScParam
tScParamVal
tText // plain text
// preserved for later - keywords come after this
tKeywordMarker
)