mirror of
https://github.com/gohugoio/hugo.git
synced 2024-07-04 16:21:57 +00:00
parent
2fdc4a24d5
commit
27f5a906a2
|
@ -18,7 +18,7 @@ import "fmt"
|
||||||
type Item struct {
|
type Item struct {
|
||||||
typ itemType
|
typ itemType
|
||||||
pos pos
|
pos pos
|
||||||
Val string
|
Val []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i Item) IsText() bool {
|
func (i Item) IsText() bool {
|
||||||
|
@ -70,7 +70,7 @@ func (i Item) String() string {
|
||||||
case i.typ == tEOF:
|
case i.typ == tEOF:
|
||||||
return "EOF"
|
return "EOF"
|
||||||
case i.typ == tError:
|
case i.typ == tError:
|
||||||
return i.Val
|
return string(i.Val)
|
||||||
case i.typ > tKeywordMarker:
|
case i.typ > tKeywordMarker:
|
||||||
return fmt.Sprintf("<%s>", i.Val)
|
return fmt.Sprintf("<%s>", i.Val)
|
||||||
case len(i.Val) > 50:
|
case len(i.Val) > 50:
|
||||||
|
|
|
@ -18,8 +18,8 @@
|
||||||
package pageparser
|
package pageparser
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
|
||||||
"unicode"
|
"unicode"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
@ -44,7 +44,7 @@ type lexerShortcodeState struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type pageLexer struct {
|
type pageLexer struct {
|
||||||
input string
|
input []byte
|
||||||
stateStart stateFunc
|
stateStart stateFunc
|
||||||
state stateFunc
|
state stateFunc
|
||||||
pos pos // input position
|
pos pos // input position
|
||||||
|
@ -65,14 +65,16 @@ func Parse(s string) *Tokens {
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseFrom(s string, from int) *Tokens {
|
func ParseFrom(s string, from int) *Tokens {
|
||||||
lexer := newPageLexer(s, pos(from), lexMainSection) // TODO(bep) 2errors
|
input := []byte(s)
|
||||||
|
lexer := newPageLexer(input, pos(from), lexMainSection) // TODO(bep) 2errors
|
||||||
lexer.run()
|
lexer.run()
|
||||||
return &Tokens{lexer: lexer}
|
return &Tokens{lexer: lexer}
|
||||||
}
|
}
|
||||||
|
|
||||||
// note: the input position here is normally 0 (start), but
|
// note: the input position here is normally 0 (start), but
|
||||||
// can be set if position of first shortcode is known
|
// can be set if position of first shortcode is known
|
||||||
func newPageLexer(input string, inputPosition pos, stateStart stateFunc) *pageLexer {
|
// TODO(bep) 2errors byte
|
||||||
|
func newPageLexer(input []byte, inputPosition pos, stateStart stateFunc) *pageLexer {
|
||||||
lexer := &pageLexer{
|
lexer := &pageLexer{
|
||||||
input: input,
|
input: input,
|
||||||
pos: inputPosition,
|
pos: inputPosition,
|
||||||
|
@ -97,19 +99,22 @@ func (l *pageLexer) run() *pageLexer {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shortcode syntax
|
// Shortcode syntax
|
||||||
const (
|
var (
|
||||||
leftDelimScNoMarkup = "{{<"
|
leftDelimScNoMarkup = []byte("{{<")
|
||||||
rightDelimScNoMarkup = ">}}"
|
rightDelimScNoMarkup = []byte(">}}")
|
||||||
leftDelimScWithMarkup = "{{%"
|
leftDelimScWithMarkup = []byte("{{%")
|
||||||
rightDelimScWithMarkup = "%}}"
|
rightDelimScWithMarkup = []byte("%}}")
|
||||||
leftComment = "/*" // comments in this context us used to to mark shortcodes as "not really a shortcode"
|
leftComment = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode"
|
||||||
rightComment = "*/"
|
rightComment = []byte("*/")
|
||||||
)
|
)
|
||||||
|
|
||||||
// Page syntax
|
// Page syntax
|
||||||
const (
|
var (
|
||||||
summaryDivider = "<!--more-->"
|
summaryDivider = []byte("<!--more-->")
|
||||||
summaryDividerOrg = "# more"
|
summaryDividerOrg = []byte("# more")
|
||||||
|
delimTOML = []byte("+++")
|
||||||
|
delimYAML = []byte("---")
|
||||||
|
delimOrg = []byte("#+")
|
||||||
)
|
)
|
||||||
|
|
||||||
func (l *pageLexer) next() rune {
|
func (l *pageLexer) next() rune {
|
||||||
|
@ -118,9 +123,7 @@ func (l *pageLexer) next() rune {
|
||||||
return eof
|
return eof
|
||||||
}
|
}
|
||||||
|
|
||||||
// looks expensive, but should produce the same iteration sequence as the string range loop
|
runeValue, runeWidth := utf8.DecodeRune(l.input[l.pos:])
|
||||||
// see: http://blog.golang.org/strings
|
|
||||||
runeValue, runeWidth := utf8.DecodeRuneInString(l.input[l.pos:])
|
|
||||||
l.width = pos(runeWidth)
|
l.width = pos(runeWidth)
|
||||||
l.pos += l.width
|
l.pos += l.width
|
||||||
return runeValue
|
return runeValue
|
||||||
|
@ -146,7 +149,7 @@ func (l *pageLexer) emit(t itemType) {
|
||||||
|
|
||||||
// special case, do not send '\\' back to client
|
// special case, do not send '\\' back to client
|
||||||
func (l *pageLexer) ignoreEscapesAndEmit(t itemType) {
|
func (l *pageLexer) ignoreEscapesAndEmit(t itemType) {
|
||||||
val := strings.Map(func(r rune) rune {
|
val := bytes.Map(func(r rune) rune {
|
||||||
if r == '\\' {
|
if r == '\\' {
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
@ -157,7 +160,7 @@ func (l *pageLexer) ignoreEscapesAndEmit(t itemType) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// gets the current value (for debugging and error handling)
|
// gets the current value (for debugging and error handling)
|
||||||
func (l *pageLexer) current() string {
|
func (l *pageLexer) current() []byte {
|
||||||
return l.input[l.start:l.pos]
|
return l.input[l.start:l.pos]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -166,14 +169,16 @@ func (l *pageLexer) ignore() {
|
||||||
l.start = l.pos
|
l.start = l.pos
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var lf = []byte("\n")
|
||||||
|
|
||||||
// nice to have in error logs
|
// nice to have in error logs
|
||||||
func (l *pageLexer) lineNum() int {
|
func (l *pageLexer) lineNum() int {
|
||||||
return strings.Count(l.input[:l.lastPos], "\n") + 1
|
return bytes.Count(l.input[:l.lastPos], lf) + 1
|
||||||
}
|
}
|
||||||
|
|
||||||
// nil terminates the parser
|
// nil terminates the parser
|
||||||
func (l *pageLexer) errorf(format string, args ...interface{}) stateFunc {
|
func (l *pageLexer) errorf(format string, args ...interface{}) stateFunc {
|
||||||
l.items = append(l.items, Item{tError, l.start, fmt.Sprintf(format, args...)})
|
l.items = append(l.items, Item{tError, l.start, []byte(fmt.Sprintf(format, args...))})
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -203,7 +208,7 @@ func lexMainSection(l *pageLexer) stateFunc {
|
||||||
if l.pos > l.start {
|
if l.pos > l.start {
|
||||||
l.emit(tText)
|
l.emit(tText)
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) {
|
if bytes.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) {
|
||||||
l.currLeftDelimItem = tLeftDelimScWithMarkup
|
l.currLeftDelimItem = tLeftDelimScWithMarkup
|
||||||
l.currRightDelimItem = tRightDelimScWithMarkup
|
l.currRightDelimItem = tRightDelimScWithMarkup
|
||||||
} else {
|
} else {
|
||||||
|
@ -214,14 +219,14 @@ func lexMainSection(l *pageLexer) stateFunc {
|
||||||
}
|
}
|
||||||
|
|
||||||
if l.contentSections <= 1 {
|
if l.contentSections <= 1 {
|
||||||
if strings.HasPrefix(l.input[l.pos:], summaryDivider) {
|
if bytes.HasPrefix(l.input[l.pos:], summaryDivider) {
|
||||||
if l.pos > l.start {
|
if l.pos > l.start {
|
||||||
l.emit(tText)
|
l.emit(tText)
|
||||||
}
|
}
|
||||||
l.contentSections++
|
l.contentSections++
|
||||||
l.pos += pos(len(summaryDivider))
|
l.pos += pos(len(summaryDivider))
|
||||||
l.emit(tSummaryDivider)
|
l.emit(tSummaryDivider)
|
||||||
} else if strings.HasPrefix(l.input[l.pos:], summaryDividerOrg) {
|
} else if bytes.HasPrefix(l.input[l.pos:], summaryDividerOrg) {
|
||||||
if l.pos > l.start {
|
if l.pos > l.start {
|
||||||
l.emit(tText)
|
l.emit(tText)
|
||||||
}
|
}
|
||||||
|
@ -243,7 +248,7 @@ func lexMainSection(l *pageLexer) stateFunc {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *pageLexer) isShortCodeStart() bool {
|
func (l *pageLexer) isShortCodeStart() bool {
|
||||||
return strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup)
|
return bytes.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || bytes.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup)
|
||||||
}
|
}
|
||||||
|
|
||||||
func lexIntroSection(l *pageLexer) stateFunc {
|
func lexIntroSection(l *pageLexer) stateFunc {
|
||||||
|
@ -256,9 +261,9 @@ LOOP:
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case r == '+':
|
case r == '+':
|
||||||
return l.lexFrontMatterSection(tFrontMatterTOML, r, "TOML", "+++")
|
return l.lexFrontMatterSection(tFrontMatterTOML, r, "TOML", delimTOML)
|
||||||
case r == '-':
|
case r == '-':
|
||||||
return l.lexFrontMatterSection(tFrontMatterYAML, r, "YAML", "---")
|
return l.lexFrontMatterSection(tFrontMatterYAML, r, "YAML", delimYAML)
|
||||||
case r == '{':
|
case r == '{':
|
||||||
return lexFrontMatterJSON
|
return lexFrontMatterJSON
|
||||||
case r == '#':
|
case r == '#':
|
||||||
|
@ -342,11 +347,9 @@ func lexFrontMatterOrgMode(l *pageLexer) stateFunc {
|
||||||
#+DESCRIPTION: Just another golang parser for org content!
|
#+DESCRIPTION: Just another golang parser for org content!
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const prefix = "#+"
|
|
||||||
|
|
||||||
l.backup()
|
l.backup()
|
||||||
|
|
||||||
if !strings.HasPrefix(l.input[l.pos:], prefix) {
|
if !bytes.HasPrefix(l.input[l.pos:], delimOrg) {
|
||||||
// TODO(bep) consider error
|
// TODO(bep) consider error
|
||||||
return lexMainSection
|
return lexMainSection
|
||||||
}
|
}
|
||||||
|
@ -359,7 +362,7 @@ LOOP:
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case r == '\n':
|
case r == '\n':
|
||||||
if !strings.HasPrefix(l.input[l.pos:], prefix) {
|
if !bytes.HasPrefix(l.input[l.pos:], delimOrg) {
|
||||||
break LOOP
|
break LOOP
|
||||||
}
|
}
|
||||||
case r == eof:
|
case r == eof:
|
||||||
|
@ -375,7 +378,7 @@ LOOP:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle YAML or TOML front matter.
|
// Handle YAML or TOML front matter.
|
||||||
func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim string) stateFunc {
|
func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name string, delim []byte) stateFunc {
|
||||||
for i := 0; i < 2; i++ {
|
for i := 0; i < 2; i++ {
|
||||||
if r := l.next(); r != delimr {
|
if r := l.next(); r != delimr {
|
||||||
return l.errorf("invalid %s delimiter", name)
|
return l.errorf("invalid %s delimiter", name)
|
||||||
|
@ -395,7 +398,7 @@ func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim
|
||||||
return l.errorf("EOF looking for end %s front matter delimiter", name)
|
return l.errorf("EOF looking for end %s front matter delimiter", name)
|
||||||
}
|
}
|
||||||
if isEndOfLine(r) {
|
if isEndOfLine(r) {
|
||||||
if strings.HasPrefix(l.input[l.pos:], delim) {
|
if bytes.HasPrefix(l.input[l.pos:], delim) {
|
||||||
l.emit(tp)
|
l.emit(tp)
|
||||||
l.pos += 3
|
l.pos += 3
|
||||||
l.consumeCRLF()
|
l.consumeCRLF()
|
||||||
|
@ -410,7 +413,7 @@ func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim
|
||||||
|
|
||||||
func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
|
func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
|
||||||
l.pos += pos(len(l.currentLeftShortcodeDelim()))
|
l.pos += pos(len(l.currentLeftShortcodeDelim()))
|
||||||
if strings.HasPrefix(l.input[l.pos:], leftComment) {
|
if bytes.HasPrefix(l.input[l.pos:], leftComment) {
|
||||||
return lexShortcodeComment
|
return lexShortcodeComment
|
||||||
}
|
}
|
||||||
l.emit(l.currentLeftShortcodeDelimItem())
|
l.emit(l.currentLeftShortcodeDelimItem())
|
||||||
|
@ -420,7 +423,7 @@ func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
|
||||||
}
|
}
|
||||||
|
|
||||||
func lexShortcodeComment(l *pageLexer) stateFunc {
|
func lexShortcodeComment(l *pageLexer) stateFunc {
|
||||||
posRightComment := strings.Index(l.input[l.pos:], rightComment+l.currentRightShortcodeDelim())
|
posRightComment := bytes.Index(l.input[l.pos:], append(rightComment, l.currentRightShortcodeDelim()...))
|
||||||
if posRightComment <= 1 {
|
if posRightComment <= 1 {
|
||||||
return l.errorf("comment must be closed")
|
return l.errorf("comment must be closed")
|
||||||
}
|
}
|
||||||
|
@ -576,7 +579,7 @@ Loop:
|
||||||
case r == '/':
|
case r == '/':
|
||||||
default:
|
default:
|
||||||
l.backup()
|
l.backup()
|
||||||
word := l.input[l.start:l.pos]
|
word := string(l.input[l.start:l.pos])
|
||||||
if l.closingState > 0 && !l.openShortcodes[word] {
|
if l.closingState > 0 && !l.openShortcodes[word] {
|
||||||
return l.errorf("closing tag for shortcode '%s' does not match start tag", word)
|
return l.errorf("closing tag for shortcode '%s' does not match start tag", word)
|
||||||
} else if l.closingState > 0 {
|
} else if l.closingState > 0 {
|
||||||
|
@ -600,7 +603,7 @@ Loop:
|
||||||
}
|
}
|
||||||
|
|
||||||
func lexEndOfShortcode(l *pageLexer) stateFunc {
|
func lexEndOfShortcode(l *pageLexer) stateFunc {
|
||||||
if strings.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
|
if bytes.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
|
||||||
return lexShortcodeRightDelim
|
return lexShortcodeRightDelim
|
||||||
}
|
}
|
||||||
switch r := l.next(); {
|
switch r := l.next(); {
|
||||||
|
@ -614,7 +617,7 @@ func lexEndOfShortcode(l *pageLexer) stateFunc {
|
||||||
|
|
||||||
// scans the elements inside shortcode tags
|
// scans the elements inside shortcode tags
|
||||||
func lexInsideShortcode(l *pageLexer) stateFunc {
|
func lexInsideShortcode(l *pageLexer) stateFunc {
|
||||||
if strings.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
|
if bytes.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
|
||||||
return lexShortcodeRightDelim
|
return lexShortcodeRightDelim
|
||||||
}
|
}
|
||||||
switch r := l.next(); {
|
switch r := l.next(); {
|
||||||
|
@ -659,7 +662,7 @@ func (l *pageLexer) currentRightShortcodeDelimItem() itemType {
|
||||||
return l.currRightDelimItem
|
return l.currRightDelimItem
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *pageLexer) currentLeftShortcodeDelim() string {
|
func (l *pageLexer) currentLeftShortcodeDelim() []byte {
|
||||||
if l.currLeftDelimItem == tLeftDelimScWithMarkup {
|
if l.currLeftDelimItem == tLeftDelimScWithMarkup {
|
||||||
return leftDelimScWithMarkup
|
return leftDelimScWithMarkup
|
||||||
}
|
}
|
||||||
|
@ -667,7 +670,7 @@ func (l *pageLexer) currentLeftShortcodeDelim() string {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *pageLexer) currentRightShortcodeDelim() string {
|
func (l *pageLexer) currentRightShortcodeDelim() []byte {
|
||||||
if l.currRightDelimItem == tRightDelimScWithMarkup {
|
if l.currRightDelimItem == tRightDelimScWithMarkup {
|
||||||
return rightDelimScWithMarkup
|
return rightDelimScWithMarkup
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,7 @@ package pageparser
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"reflect"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
@ -25,23 +26,27 @@ type lexerTest struct {
|
||||||
items []Item
|
items []Item
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func nti(tp itemType, val string) Item {
|
||||||
|
return Item{tp, 0, []byte(val)}
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
tstJSON = `{ "a": { "b": "\"Hugo\"}" } }`
|
tstJSON = `{ "a": { "b": "\"Hugo\"}" } }`
|
||||||
tstHTMLLead = Item{tHTMLLead, 0, " <"}
|
tstHTMLLead = nti(tHTMLLead, " <")
|
||||||
tstFrontMatterTOML = Item{tFrontMatterTOML, 0, "foo = \"bar\"\n"}
|
tstFrontMatterTOML = nti(tFrontMatterTOML, "foo = \"bar\"\n")
|
||||||
tstFrontMatterYAML = Item{tFrontMatterYAML, 0, "foo: \"bar\"\n"}
|
tstFrontMatterYAML = nti(tFrontMatterYAML, "foo: \"bar\"\n")
|
||||||
tstFrontMatterYAMLCRLF = Item{tFrontMatterYAML, 0, "foo: \"bar\"\r\n"}
|
tstFrontMatterYAMLCRLF = nti(tFrontMatterYAML, "foo: \"bar\"\r\n")
|
||||||
tstFrontMatterJSON = Item{tFrontMatterJSON, 0, tstJSON + "\r\n"}
|
tstFrontMatterJSON = nti(tFrontMatterJSON, tstJSON+"\r\n")
|
||||||
tstSomeText = Item{tText, 0, "\nSome text.\n"}
|
tstSomeText = nti(tText, "\nSome text.\n")
|
||||||
tstSummaryDivider = Item{tSummaryDivider, 0, "<!--more-->"}
|
tstSummaryDivider = nti(tSummaryDivider, "<!--more-->")
|
||||||
tstSummaryDividerOrg = Item{tSummaryDividerOrg, 0, "# more"}
|
tstSummaryDividerOrg = nti(tSummaryDividerOrg, "# more")
|
||||||
|
|
||||||
tstORG = `
|
tstORG = `
|
||||||
#+TITLE: T1
|
#+TITLE: T1
|
||||||
#+AUTHOR: A1
|
#+AUTHOR: A1
|
||||||
#+DESCRIPTION: D1
|
#+DESCRIPTION: D1
|
||||||
`
|
`
|
||||||
tstFrontMatterORG = Item{tFrontMatterORG, 0, tstORG}
|
tstFrontMatterORG = nti(tFrontMatterORG, tstORG)
|
||||||
)
|
)
|
||||||
|
|
||||||
var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
|
var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
|
||||||
|
@ -49,7 +54,7 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
|
||||||
// TODO(bep) a way to toggle ORG mode vs the rest.
|
// TODO(bep) a way to toggle ORG mode vs the rest.
|
||||||
var frontMatterTests = []lexerTest{
|
var frontMatterTests = []lexerTest{
|
||||||
{"empty", "", []Item{tstEOF}},
|
{"empty", "", []Item{tstEOF}},
|
||||||
{"HTML Document", ` <html> `, []Item{tstHTMLLead, Item{tText, 0, "html> "}, tstEOF}},
|
{"HTML Document", ` <html> `, []Item{tstHTMLLead, nti(tText, "html> "), tstEOF}},
|
||||||
{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
|
{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
|
||||||
// Note that we keep all bytes as they are, but we need to handle CRLF
|
// Note that we keep all bytes as they are, but we need to handle CRLF
|
||||||
{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}},
|
{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}},
|
||||||
|
@ -63,7 +68,7 @@ var frontMatterTests = []lexerTest{
|
||||||
func TestFrontMatter(t *testing.T) {
|
func TestFrontMatter(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
for i, test := range frontMatterTests {
|
for i, test := range frontMatterTests {
|
||||||
items := collect(test.name, test.input, false, lexIntroSection)
|
items := collect([]byte(test.input), false, lexIntroSection)
|
||||||
if !equal(items, test.items) {
|
if !equal(items, test.items) {
|
||||||
got := crLfReplacer.Replace(fmt.Sprint(items))
|
got := crLfReplacer.Replace(fmt.Sprint(items))
|
||||||
expected := crLfReplacer.Replace(fmt.Sprint(test.items))
|
expected := crLfReplacer.Replace(fmt.Sprint(test.items))
|
||||||
|
@ -72,7 +77,7 @@ func TestFrontMatter(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func collect(name, input string, skipFrontMatter bool, stateStart stateFunc) (items []Item) {
|
func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []Item) {
|
||||||
l := newPageLexer(input, 0, stateStart)
|
l := newPageLexer(input, 0, stateStart)
|
||||||
l.run()
|
l.run()
|
||||||
|
|
||||||
|
@ -95,7 +100,7 @@ func equal(i1, i2 []Item) bool {
|
||||||
if i1[k].typ != i2[k].typ {
|
if i1[k].typ != i2[k].typ {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if i1[k].Val != i2[k].Val {
|
if !reflect.DeepEqual(i1[k].Val, i2[k].Val) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,25 +16,25 @@ package pageparser
|
||||||
import "testing"
|
import "testing"
|
||||||
|
|
||||||
var (
|
var (
|
||||||
tstEOF = Item{tEOF, 0, ""}
|
tstEOF = nti(tEOF, "")
|
||||||
tstLeftNoMD = Item{tLeftDelimScNoMarkup, 0, "{{<"}
|
tstLeftNoMD = nti(tLeftDelimScNoMarkup, "{{<")
|
||||||
tstRightNoMD = Item{tRightDelimScNoMarkup, 0, ">}}"}
|
tstRightNoMD = nti(tRightDelimScNoMarkup, ">}}")
|
||||||
tstLeftMD = Item{tLeftDelimScWithMarkup, 0, "{{%"}
|
tstLeftMD = nti(tLeftDelimScWithMarkup, "{{%")
|
||||||
tstRightMD = Item{tRightDelimScWithMarkup, 0, "%}}"}
|
tstRightMD = nti(tRightDelimScWithMarkup, "%}}")
|
||||||
tstSCClose = Item{tScClose, 0, "/"}
|
tstSCClose = nti(tScClose, "/")
|
||||||
tstSC1 = Item{tScName, 0, "sc1"}
|
tstSC1 = nti(tScName, "sc1")
|
||||||
tstSC2 = Item{tScName, 0, "sc2"}
|
tstSC2 = nti(tScName, "sc2")
|
||||||
tstSC3 = Item{tScName, 0, "sc3"}
|
tstSC3 = nti(tScName, "sc3")
|
||||||
tstSCSlash = Item{tScName, 0, "sc/sub"}
|
tstSCSlash = nti(tScName, "sc/sub")
|
||||||
tstParam1 = Item{tScParam, 0, "param1"}
|
tstParam1 = nti(tScParam, "param1")
|
||||||
tstParam2 = Item{tScParam, 0, "param2"}
|
tstParam2 = nti(tScParam, "param2")
|
||||||
tstVal = Item{tScParamVal, 0, "Hello World"}
|
tstVal = nti(tScParamVal, "Hello World")
|
||||||
)
|
)
|
||||||
|
|
||||||
var shortCodeLexerTests = []lexerTest{
|
var shortCodeLexerTests = []lexerTest{
|
||||||
{"empty", "", []Item{tstEOF}},
|
{"empty", "", []Item{tstEOF}},
|
||||||
{"spaces", " \t\n", []Item{{tText, 0, " \t\n"}, tstEOF}},
|
{"spaces", " \t\n", []Item{nti(tText, " \t\n"), tstEOF}},
|
||||||
{"text", `to be or not`, []Item{{tText, 0, "to be or not"}, tstEOF}},
|
{"text", `to be or not`, []Item{nti(tText, "to be or not"), tstEOF}},
|
||||||
{"no markup", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
|
{"no markup", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
|
||||||
{"with EOL", "{{< sc1 \n >}}", []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
|
{"with EOL", "{{< sc1 \n >}}", []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
|
||||||
|
|
||||||
|
@ -43,12 +43,12 @@ var shortCodeLexerTests = []lexerTest{
|
||||||
{"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}},
|
{"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}},
|
||||||
{"with spaces", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
|
{"with spaces", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
|
||||||
{"mismatched rightDelim", `{{< sc1 %}}`, []Item{tstLeftNoMD, tstSC1,
|
{"mismatched rightDelim", `{{< sc1 %}}`, []Item{tstLeftNoMD, tstSC1,
|
||||||
{tError, 0, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"}}},
|
nti(tError, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted")}},
|
||||||
{"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []Item{
|
{"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []Item{
|
||||||
tstLeftMD,
|
tstLeftMD,
|
||||||
tstSC1,
|
tstSC1,
|
||||||
tstRightMD,
|
tstRightMD,
|
||||||
{tText, 0, " inner "},
|
nti(tText, " inner "),
|
||||||
tstLeftMD,
|
tstLeftMD,
|
||||||
tstSCClose,
|
tstSCClose,
|
||||||
tstSC1,
|
tstSC1,
|
||||||
|
@ -56,20 +56,20 @@ var shortCodeLexerTests = []lexerTest{
|
||||||
tstEOF,
|
tstEOF,
|
||||||
}},
|
}},
|
||||||
{"close, but no open", `{{< /sc1 >}}`, []Item{
|
{"close, but no open", `{{< /sc1 >}}`, []Item{
|
||||||
tstLeftNoMD, {tError, 0, "got closing shortcode, but none is open"}}},
|
tstLeftNoMD, nti(tError, "got closing shortcode, but none is open")}},
|
||||||
{"close wrong", `{{< sc1 >}}{{< /another >}}`, []Item{
|
{"close wrong", `{{< sc1 >}}{{< /another >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
|
tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
|
||||||
{tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},
|
nti(tError, "closing tag for shortcode 'another' does not match start tag")}},
|
||||||
{"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []Item{
|
{"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
|
tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
|
||||||
{tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},
|
nti(tError, "closing tag for shortcode 'another' does not match start tag")}},
|
||||||
{"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []Item{
|
{"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1,
|
tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1,
|
||||||
{tError, 0, "unclosed shortcode"}}},
|
nti(tError, "unclosed shortcode")}},
|
||||||
{"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []Item{
|
{"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-Q_456igdO-4"}, tstRightNoMD, tstEOF}},
|
tstLeftNoMD, tstSC1, nti(tScParam, "-ziL-Q_456igdO-4"), tstRightNoMD, tstEOF}},
|
||||||
{"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []Item{
|
{"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-.%QigdO-4"}, tstRightNoMD, tstEOF}},
|
tstLeftNoMD, tstSC1, nti(tScParam, "-ziL-.%QigdO-4"), tstRightNoMD, tstEOF}},
|
||||||
|
|
||||||
{"two params", `{{< sc1 param1 param2 >}}`, []Item{
|
{"two params", `{{< sc1 param1 param2 >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF}},
|
tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF}},
|
||||||
|
@ -94,64 +94,64 @@ var shortCodeLexerTests = []lexerTest{
|
||||||
tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF}},
|
tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF}},
|
||||||
{"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []Item{
|
{"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstRightNoMD,
|
tstLeftNoMD, tstSC1, tstRightNoMD,
|
||||||
{tText, 0, "ab"},
|
nti(tText, "ab"),
|
||||||
tstLeftMD, tstSC2, tstParam1, tstRightMD,
|
tstLeftMD, tstSC2, tstParam1, tstRightMD,
|
||||||
{tText, 0, "cd"},
|
nti(tText, "cd"),
|
||||||
tstLeftNoMD, tstSC3, tstRightNoMD,
|
tstLeftNoMD, tstSC3, tstRightNoMD,
|
||||||
{tText, 0, "ef"},
|
nti(tText, "ef"),
|
||||||
tstLeftNoMD, tstSCClose, tstSC3, tstRightNoMD,
|
tstLeftNoMD, tstSCClose, tstSC3, tstRightNoMD,
|
||||||
{tText, 0, "gh"},
|
nti(tText, "gh"),
|
||||||
tstLeftMD, tstSCClose, tstSC2, tstRightMD,
|
tstLeftMD, tstSCClose, tstSC2, tstRightMD,
|
||||||
{tText, 0, "ij"},
|
nti(tText, "ij"),
|
||||||
tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD,
|
tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD,
|
||||||
{tText, 0, "kl"}, tstEOF,
|
nti(tText, "kl"), tstEOF,
|
||||||
}},
|
}},
|
||||||
|
|
||||||
{"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []Item{
|
{"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, {tScParam, 0, "param nr. 1"}, {tScParam, 0, "param nr. 2"}, tstRightNoMD, tstEOF}},
|
tstLeftNoMD, tstSC1, nti(tScParam, "param nr. 1"), nti(tScParam, "param nr. 2"), tstRightNoMD, tstEOF}},
|
||||||
{"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []Item{
|
{"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, {tScParamVal, 0, "p2Val"}, tstRightNoMD, tstEOF}},
|
tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, nti(tScParamVal, "p2Val"), tstRightNoMD, tstEOF}},
|
||||||
{"escaped quotes", `{{< sc1 param1=\"Hello World\" >}}`, []Item{
|
{"escaped quotes", `{{< sc1 param1=\"Hello World\" >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF}},
|
tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF}},
|
||||||
{"escaped quotes, positional param", `{{< sc1 \"param1\" >}}`, []Item{
|
{"escaped quotes, positional param", `{{< sc1 \"param1\" >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF}},
|
tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF}},
|
||||||
{"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\" >}}`, []Item{
|
{"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\" >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstParam1,
|
tstLeftNoMD, tstSC1, tstParam1,
|
||||||
{tScParamVal, 0, `Hello `}, {tError, 0, `got positional parameter 'escaped'. Cannot mix named and positional parameters`}}},
|
nti(tScParamVal, `Hello `), nti(tError, `got positional parameter 'escaped'. Cannot mix named and positional parameters`)}},
|
||||||
{"escaped quotes inside nonescaped quotes",
|
{"escaped quotes inside nonescaped quotes",
|
||||||
`{{< sc1 param1="Hello \"escaped\" World" >}}`, []Item{
|
`{{< sc1 param1="Hello \"escaped\" World" >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstParam1, {tScParamVal, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},
|
tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, `Hello "escaped" World`), tstRightNoMD, tstEOF}},
|
||||||
{"escaped quotes inside nonescaped quotes in positional param",
|
{"escaped quotes inside nonescaped quotes in positional param",
|
||||||
`{{< sc1 "Hello \"escaped\" World" >}}`, []Item{
|
`{{< sc1 "Hello \"escaped\" World" >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, {tScParam, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},
|
tstLeftNoMD, tstSC1, nti(tScParam, `Hello "escaped" World`), tstRightNoMD, tstEOF}},
|
||||||
{"unterminated quote", `{{< sc1 param2="Hello World>}}`, []Item{
|
{"unterminated quote", `{{< sc1 param2="Hello World>}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstParam2, {tError, 0, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"}}},
|
tstLeftNoMD, tstSC1, tstParam2, nti(tError, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'")}},
|
||||||
{"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []Item{
|
{"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstParam1, tstVal,
|
tstLeftNoMD, tstSC1, tstParam1, tstVal,
|
||||||
{tError, 0, "got positional parameter 'p2'. Cannot mix named and positional parameters"}}},
|
nti(tError, "got positional parameter 'p2'. Cannot mix named and positional parameters")}},
|
||||||
{"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []Item{
|
{"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstParam1, tstVal,
|
tstLeftNoMD, tstSC1, tstParam1, tstVal,
|
||||||
{tError, 0, "got quoted positional parameter. Cannot mix named and positional parameters"}}},
|
nti(tError, "got quoted positional parameter. Cannot mix named and positional parameters")}},
|
||||||
{"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []Item{
|
{"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstParam1,
|
tstLeftNoMD, tstSC1, tstParam1,
|
||||||
{tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},
|
nti(tError, "got named parameter 'param2'. Cannot mix named and positional parameters")}},
|
||||||
{"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []Item{
|
{"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []Item{
|
||||||
tstLeftNoMD, tstSC1, tstParam1,
|
tstLeftNoMD, tstSC1, tstParam1,
|
||||||
{tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},
|
nti(tError, "got named parameter 'param2'. Cannot mix named and positional parameters")}},
|
||||||
{"commented out", `{{</* sc1 */>}}`, []Item{
|
{"commented out", `{{</* sc1 */>}}`, []Item{
|
||||||
{tText, 0, "{{<"}, {tText, 0, " sc1 "}, {tText, 0, ">}}"}, tstEOF}},
|
nti(tText, "{{<"), nti(tText, " sc1 "), nti(tText, ">}}"), tstEOF}},
|
||||||
{"commented out, with asterisk inside", `{{</* sc1 "**/*.pdf" */>}}`, []Item{
|
{"commented out, with asterisk inside", `{{</* sc1 "**/*.pdf" */>}}`, []Item{
|
||||||
{tText, 0, "{{<"}, {tText, 0, " sc1 \"**/*.pdf\" "}, {tText, 0, ">}}"}, tstEOF}},
|
nti(tText, "{{<"), nti(tText, " sc1 \"**/*.pdf\" "), nti(tText, ">}}"), tstEOF}},
|
||||||
{"commented out, missing close", `{{</* sc1 >}}`, []Item{
|
{"commented out, missing close", `{{</* sc1 >}}`, []Item{
|
||||||
{tError, 0, "comment must be closed"}}},
|
nti(tError, "comment must be closed")}},
|
||||||
{"commented out, misplaced close", `{{</* sc1 >}}*/`, []Item{
|
{"commented out, misplaced close", `{{</* sc1 >}}*/`, []Item{
|
||||||
{tError, 0, "comment must be closed"}}},
|
nti(tError, "comment must be closed")}},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestShortcodeLexer(t *testing.T) {
|
func TestShortcodeLexer(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
for i, test := range shortCodeLexerTests {
|
for i, test := range shortCodeLexerTests {
|
||||||
items := collect(test.name, test.input, true, lexMainSection)
|
items := collect([]byte(test.input), true, lexMainSection)
|
||||||
if !equal(items, test.items) {
|
if !equal(items, test.items) {
|
||||||
t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items)
|
t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items)
|
||||||
}
|
}
|
||||||
|
@ -159,13 +159,17 @@ func TestShortcodeLexer(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkShortcodeLexer(b *testing.B) {
|
func BenchmarkShortcodeLexer(b *testing.B) {
|
||||||
|
testInputs := make([][]byte, len(shortCodeLexerTests))
|
||||||
|
for i, input := range shortCodeLexerTests {
|
||||||
|
testInputs[i] = []byte(input.input)
|
||||||
|
}
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
for _, test := range shortCodeLexerTests {
|
for _, input := range testInputs {
|
||||||
items := collect(test.name, test.input, true, lexMainSection)
|
items := collect(input, true, lexMainSection)
|
||||||
if !equal(items, test.items) {
|
if len(items) == 0 {
|
||||||
b.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue