mirror of
https://github.com/gohugoio/hugo.git
synced 2024-07-05 08:42:12 +00:00
absurlreplacer: write replacements directly to the byte buffer
The extra step isn't needed and this makes the code simpler. And slightly faster: benchmark old ns/op new ns/op delta BenchmarkAbsUrl 19987 17498 -12.45% BenchmarkXmlAbsUrl 10606 9503 -10.40% benchmark old allocs new allocs delta BenchmarkAbsUrl 28 24 -14.29% BenchmarkXmlAbsUrl 14 12 -14.29% benchmark old bytes new bytes delta BenchmarkAbsUrl 3512 3297 -6.12% BenchmarkXmlAbsUrl 2059 1963 -4.66%
This commit is contained in:
parent
3ba279c2e5
commit
c641ffea3a
|
@ -5,13 +5,9 @@ import (
|
||||||
bp "github.com/spf13/hugo/bufferpool"
|
bp "github.com/spf13/hugo/bufferpool"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
// position (in bytes)
|
|
||||||
type pos int
|
|
||||||
|
|
||||||
type matchState int
|
type matchState int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -21,37 +17,23 @@ const (
|
||||||
matchStateFull
|
matchStateFull
|
||||||
)
|
)
|
||||||
|
|
||||||
type item struct {
|
|
||||||
typ itemType
|
|
||||||
pos pos
|
|
||||||
val []byte
|
|
||||||
}
|
|
||||||
|
|
||||||
type itemType int
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
tText itemType = iota
|
matchPrefixSrc int = iota
|
||||||
|
matchPrefixHref
|
||||||
// matches
|
|
||||||
tSrcdq
|
|
||||||
tHrefdq
|
|
||||||
tSrcsq
|
|
||||||
tHrefsq
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type contentlexer struct {
|
type contentlexer struct {
|
||||||
content []byte
|
content []byte
|
||||||
|
|
||||||
pos pos // input position
|
pos int // input position
|
||||||
start pos // item start position
|
start int // item start position
|
||||||
width pos // width of last element
|
width int // width of last element
|
||||||
|
|
||||||
matchers []absurlMatcher
|
matchers []absurlMatcher
|
||||||
state stateFunc
|
state stateFunc
|
||||||
prefixLookup *prefixes
|
prefixLookup *prefixes
|
||||||
|
|
||||||
// items delivered to client
|
b *bytes.Buffer
|
||||||
items []item
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type stateFunc func(*contentlexer) stateFunc
|
type stateFunc func(*contentlexer) stateFunc
|
||||||
|
@ -112,36 +94,55 @@ func (l *contentlexer) match(r rune) {
|
||||||
l.prefixLookup.ms = matchStateNone
|
l.prefixLookup.ms = matchStateNone
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *contentlexer) emit(t itemType) {
|
func (l *contentlexer) emit() {
|
||||||
l.items = append(l.items, item{t, l.start, l.content[l.start:l.pos]})
|
l.b.Write(l.content[l.start:l.pos])
|
||||||
l.start = l.pos
|
l.start = l.pos
|
||||||
}
|
}
|
||||||
|
|
||||||
var mainPrefixRunes = []prefixRunes{{'s', 'r', 'c', '='}, {'h', 'r', 'e', 'f', '='}}
|
var mainPrefixRunes = []prefixRunes{{'s', 'r', 'c', '='}, {'h', 'r', 'e', 'f', '='}}
|
||||||
|
|
||||||
var itemSlicePool = &sync.Pool{
|
|
||||||
New: func() interface{} {
|
|
||||||
return make([]item, 0, 8)
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *contentlexer) runReplacer() {
|
|
||||||
for l.state = lexReplacements; l.state != nil; {
|
|
||||||
l.state = l.state(l)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type absurlMatcher struct {
|
type absurlMatcher struct {
|
||||||
replaceType itemType
|
prefix int
|
||||||
match []byte
|
match []byte
|
||||||
replacement []byte
|
replacement []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a absurlMatcher) isSourceType() bool {
|
func (a absurlMatcher) isSourceType() bool {
|
||||||
return a.replaceType == tSrcdq || a.replaceType == tSrcsq
|
return a.prefix == matchPrefixSrc
|
||||||
}
|
}
|
||||||
|
|
||||||
func lexReplacements(l *contentlexer) stateFunc {
|
func checkCandidate(l *contentlexer) {
|
||||||
|
isSource := l.prefixLookup.first == 's'
|
||||||
|
for _, m := range l.matchers {
|
||||||
|
|
||||||
|
if isSource && !m.isSourceType() || !isSource && m.isSourceType() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if bytes.HasPrefix(l.content[l.pos:], m.match) {
|
||||||
|
// check for schemaless urls
|
||||||
|
posAfter := l.pos + len(m.match)
|
||||||
|
if int(posAfter) >= len(l.content) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
r, _ := utf8.DecodeRune(l.content[posAfter:])
|
||||||
|
if r == '/' {
|
||||||
|
// schemaless: skip
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if l.pos > l.start {
|
||||||
|
l.emit()
|
||||||
|
}
|
||||||
|
l.pos += len(m.match)
|
||||||
|
l.b.Write(m.replacement)
|
||||||
|
l.start = l.pos
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *contentlexer) replace() {
|
||||||
contentLength := len(l.content)
|
contentLength := len(l.content)
|
||||||
var r rune
|
var r rune
|
||||||
|
|
||||||
|
@ -156,7 +157,7 @@ func lexReplacements(l *contentlexer) stateFunc {
|
||||||
if r >= utf8.RuneSelf {
|
if r >= utf8.RuneSelf {
|
||||||
r, width = utf8.DecodeRune(l.content[l.pos:])
|
r, width = utf8.DecodeRune(l.content[l.pos:])
|
||||||
}
|
}
|
||||||
l.width = pos(width)
|
l.width = width
|
||||||
l.pos += l.width
|
l.pos += l.width
|
||||||
|
|
||||||
if r == ' ' {
|
if r == ' ' {
|
||||||
|
@ -172,38 +173,7 @@ func lexReplacements(l *contentlexer) stateFunc {
|
||||||
|
|
||||||
// Done!
|
// Done!
|
||||||
if l.pos > l.start {
|
if l.pos > l.start {
|
||||||
l.emit(tText)
|
l.emit()
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func checkCandidate(l *contentlexer) {
|
|
||||||
isSource := l.prefixLookup.first == 's'
|
|
||||||
for _, m := range l.matchers {
|
|
||||||
|
|
||||||
if isSource && !m.isSourceType() || !isSource && m.isSourceType() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if bytes.HasPrefix(l.content[l.pos:], m.match) {
|
|
||||||
// check for schemaless urls
|
|
||||||
posAfter := pos(int(l.pos) + len(m.match))
|
|
||||||
if int(posAfter) >= len(l.content) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
r, _ := utf8.DecodeRune(l.content[posAfter:])
|
|
||||||
if r == '/' {
|
|
||||||
// schemaless: skip
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if l.pos > l.start {
|
|
||||||
l.emit(tText)
|
|
||||||
}
|
|
||||||
l.pos += pos(len(m.match))
|
|
||||||
l.emit(m.replaceType)
|
|
||||||
return
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -211,34 +181,12 @@ func doReplace(content []byte, matchers []absurlMatcher) []byte {
|
||||||
b := bp.GetBuffer()
|
b := bp.GetBuffer()
|
||||||
defer bp.PutBuffer(b)
|
defer bp.PutBuffer(b)
|
||||||
|
|
||||||
var items []item
|
|
||||||
if x := itemSlicePool.Get(); x != nil {
|
|
||||||
items = x.([]item)[:0]
|
|
||||||
defer itemSlicePool.Put(items)
|
|
||||||
} else {
|
|
||||||
items = make([]item, 0, 8)
|
|
||||||
}
|
|
||||||
|
|
||||||
lexer := &contentlexer{content: content,
|
lexer := &contentlexer{content: content,
|
||||||
items: items,
|
b: b,
|
||||||
prefixLookup: &prefixes{pr: mainPrefixRunes},
|
prefixLookup: &prefixes{pr: mainPrefixRunes},
|
||||||
matchers: matchers}
|
matchers: matchers}
|
||||||
|
|
||||||
lexer.runReplacer()
|
lexer.replace()
|
||||||
|
|
||||||
for _, token := range lexer.items {
|
|
||||||
switch token.typ {
|
|
||||||
case tText:
|
|
||||||
b.Write(token.val)
|
|
||||||
default:
|
|
||||||
for _, e := range matchers {
|
|
||||||
if token.typ == e.replaceType {
|
|
||||||
b.Write(e.replacement)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return b.Bytes()
|
return b.Bytes()
|
||||||
}
|
}
|
||||||
|
@ -266,16 +214,17 @@ func newAbsurlReplacer(baseUrl string) *absurlReplacer {
|
||||||
dqXml := []byte(""" + base + "/")
|
dqXml := []byte(""" + base + "/")
|
||||||
sqXml := []byte("'" + base + "/")
|
sqXml := []byte("'" + base + "/")
|
||||||
|
|
||||||
return &absurlReplacer{htmlMatchers: []absurlMatcher{
|
return &absurlReplacer{
|
||||||
{tSrcdq, dqHtmlMatch, dqHtml},
|
htmlMatchers: []absurlMatcher{
|
||||||
{tSrcsq, sqHtmlMatch, sqHtml},
|
{matchPrefixSrc, dqHtmlMatch, dqHtml},
|
||||||
{tHrefdq, dqHtmlMatch, dqHtml},
|
{matchPrefixSrc, sqHtmlMatch, sqHtml},
|
||||||
{tHrefsq, sqHtmlMatch, sqHtml}},
|
{matchPrefixHref, dqHtmlMatch, dqHtml},
|
||||||
|
{matchPrefixHref, sqHtmlMatch, sqHtml}},
|
||||||
xmlMatchers: []absurlMatcher{
|
xmlMatchers: []absurlMatcher{
|
||||||
{tSrcdq, dqXmlMatch, dqXml},
|
{matchPrefixSrc, dqXmlMatch, dqXml},
|
||||||
{tSrcsq, sqXmlMatch, sqXml},
|
{matchPrefixSrc, sqXmlMatch, sqXml},
|
||||||
{tHrefdq, dqXmlMatch, dqXml},
|
{matchPrefixHref, dqXmlMatch, dqXml},
|
||||||
{tHrefsq, sqXmlMatch, sqXml},
|
{matchPrefixHref, sqXmlMatch, sqXml},
|
||||||
}}
|
}}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue