absurlreplacer: write replacements directly to the byte buffer

The extra step isn't needed and this makes the code simpler.

And slightly faster:

benchmark              old ns/op     new ns/op     delta
BenchmarkAbsUrl        19987         17498         -12.45%
BenchmarkXmlAbsUrl     10606         9503          -10.40%

benchmark              old allocs     new allocs     delta
BenchmarkAbsUrl        28             24             -14.29%
BenchmarkXmlAbsUrl     14             12             -14.29%

benchmark              old bytes     new bytes     delta
BenchmarkAbsUrl        3512          3297          -6.12%
BenchmarkXmlAbsUrl     2059          1963          -4.66%
This commit is contained in:
bep 2015-03-10 18:44:32 +01:00
parent 3ba279c2e5
commit c641ffea3a

View file

@ -5,13 +5,9 @@ import (
bp "github.com/spf13/hugo/bufferpool" bp "github.com/spf13/hugo/bufferpool"
"net/url" "net/url"
"strings" "strings"
"sync"
"unicode/utf8" "unicode/utf8"
) )
// position (in bytes)
type pos int
type matchState int type matchState int
const ( const (
@ -21,37 +17,23 @@ const (
matchStateFull matchStateFull
) )
type item struct {
typ itemType
pos pos
val []byte
}
type itemType int
const ( const (
tText itemType = iota matchPrefixSrc int = iota
matchPrefixHref
// matches
tSrcdq
tHrefdq
tSrcsq
tHrefsq
) )
type contentlexer struct { type contentlexer struct {
content []byte content []byte
pos pos // input position pos int // input position
start pos // item start position start int // item start position
width pos // width of last element width int // width of last element
matchers []absurlMatcher matchers []absurlMatcher
state stateFunc state stateFunc
prefixLookup *prefixes prefixLookup *prefixes
// items delivered to client b *bytes.Buffer
items []item
} }
type stateFunc func(*contentlexer) stateFunc type stateFunc func(*contentlexer) stateFunc
@ -112,36 +94,55 @@ func (l *contentlexer) match(r rune) {
l.prefixLookup.ms = matchStateNone l.prefixLookup.ms = matchStateNone
} }
func (l *contentlexer) emit(t itemType) { func (l *contentlexer) emit() {
l.items = append(l.items, item{t, l.start, l.content[l.start:l.pos]}) l.b.Write(l.content[l.start:l.pos])
l.start = l.pos l.start = l.pos
} }
var mainPrefixRunes = []prefixRunes{{'s', 'r', 'c', '='}, {'h', 'r', 'e', 'f', '='}} var mainPrefixRunes = []prefixRunes{{'s', 'r', 'c', '='}, {'h', 'r', 'e', 'f', '='}}
var itemSlicePool = &sync.Pool{
New: func() interface{} {
return make([]item, 0, 8)
},
}
func (l *contentlexer) runReplacer() {
for l.state = lexReplacements; l.state != nil; {
l.state = l.state(l)
}
}
type absurlMatcher struct { type absurlMatcher struct {
replaceType itemType prefix int
match []byte match []byte
replacement []byte replacement []byte
} }
func (a absurlMatcher) isSourceType() bool { func (a absurlMatcher) isSourceType() bool {
return a.replaceType == tSrcdq || a.replaceType == tSrcsq return a.prefix == matchPrefixSrc
} }
func lexReplacements(l *contentlexer) stateFunc { func checkCandidate(l *contentlexer) {
isSource := l.prefixLookup.first == 's'
for _, m := range l.matchers {
if isSource && !m.isSourceType() || !isSource && m.isSourceType() {
continue
}
if bytes.HasPrefix(l.content[l.pos:], m.match) {
// check for schemaless urls
posAfter := l.pos + len(m.match)
if int(posAfter) >= len(l.content) {
return
}
r, _ := utf8.DecodeRune(l.content[posAfter:])
if r == '/' {
// schemaless: skip
return
}
if l.pos > l.start {
l.emit()
}
l.pos += len(m.match)
l.b.Write(m.replacement)
l.start = l.pos
return
}
}
}
func (l *contentlexer) replace() {
contentLength := len(l.content) contentLength := len(l.content)
var r rune var r rune
@ -156,7 +157,7 @@ func lexReplacements(l *contentlexer) stateFunc {
if r >= utf8.RuneSelf { if r >= utf8.RuneSelf {
r, width = utf8.DecodeRune(l.content[l.pos:]) r, width = utf8.DecodeRune(l.content[l.pos:])
} }
l.width = pos(width) l.width = width
l.pos += l.width l.pos += l.width
if r == ' ' { if r == ' ' {
@ -172,38 +173,7 @@ func lexReplacements(l *contentlexer) stateFunc {
// Done! // Done!
if l.pos > l.start { if l.pos > l.start {
l.emit(tText) l.emit()
}
return nil
}
func checkCandidate(l *contentlexer) {
isSource := l.prefixLookup.first == 's'
for _, m := range l.matchers {
if isSource && !m.isSourceType() || !isSource && m.isSourceType() {
continue
}
if bytes.HasPrefix(l.content[l.pos:], m.match) {
// check for schemaless urls
posAfter := pos(int(l.pos) + len(m.match))
if int(posAfter) >= len(l.content) {
return
}
r, _ := utf8.DecodeRune(l.content[posAfter:])
if r == '/' {
// schemaless: skip
return
}
if l.pos > l.start {
l.emit(tText)
}
l.pos += pos(len(m.match))
l.emit(m.replaceType)
return
}
} }
} }
@ -211,34 +181,12 @@ func doReplace(content []byte, matchers []absurlMatcher) []byte {
b := bp.GetBuffer() b := bp.GetBuffer()
defer bp.PutBuffer(b) defer bp.PutBuffer(b)
var items []item
if x := itemSlicePool.Get(); x != nil {
items = x.([]item)[:0]
defer itemSlicePool.Put(items)
} else {
items = make([]item, 0, 8)
}
lexer := &contentlexer{content: content, lexer := &contentlexer{content: content,
items: items, b: b,
prefixLookup: &prefixes{pr: mainPrefixRunes}, prefixLookup: &prefixes{pr: mainPrefixRunes},
matchers: matchers} matchers: matchers}
lexer.runReplacer() lexer.replace()
for _, token := range lexer.items {
switch token.typ {
case tText:
b.Write(token.val)
default:
for _, e := range matchers {
if token.typ == e.replaceType {
b.Write(e.replacement)
break
}
}
}
}
return b.Bytes() return b.Bytes()
} }
@ -266,16 +214,17 @@ func newAbsurlReplacer(baseUrl string) *absurlReplacer {
dqXml := []byte(""" + base + "/") dqXml := []byte(""" + base + "/")
sqXml := []byte("'" + base + "/") sqXml := []byte("'" + base + "/")
return &absurlReplacer{htmlMatchers: []absurlMatcher{ return &absurlReplacer{
{tSrcdq, dqHtmlMatch, dqHtml}, htmlMatchers: []absurlMatcher{
{tSrcsq, sqHtmlMatch, sqHtml}, {matchPrefixSrc, dqHtmlMatch, dqHtml},
{tHrefdq, dqHtmlMatch, dqHtml}, {matchPrefixSrc, sqHtmlMatch, sqHtml},
{tHrefsq, sqHtmlMatch, sqHtml}}, {matchPrefixHref, dqHtmlMatch, dqHtml},
{matchPrefixHref, sqHtmlMatch, sqHtml}},
xmlMatchers: []absurlMatcher{ xmlMatchers: []absurlMatcher{
{tSrcdq, dqXmlMatch, dqXml}, {matchPrefixSrc, dqXmlMatch, dqXml},
{tSrcsq, sqXmlMatch, sqXml}, {matchPrefixSrc, sqXmlMatch, sqXml},
{tHrefdq, dqXmlMatch, dqXml}, {matchPrefixHref, dqXmlMatch, dqXml},
{tHrefsq, sqXmlMatch, sqXml}, {matchPrefixHref, sqXmlMatch, sqXml},
}} }}
} }