From 27f5a906a2a34e3b8348c8baeea48355352b5bbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Thu, 18 Oct 2018 09:04:48 +0200 Subject: [PATCH] parser/pageparser: Use []byte in page lexer See #5324 --- parser/pageparser/item.go | 4 +- parser/pageparser/pagelexer.go | 83 ++++++++------- parser/pageparser/pageparser_intro_test.go | 31 +++--- .../pageparser/pageparser_shortcode_test.go | 100 +++++++++--------- 4 files changed, 115 insertions(+), 103 deletions(-) diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go index f7495c90e..35bc8e268 100644 --- a/parser/pageparser/item.go +++ b/parser/pageparser/item.go @@ -18,7 +18,7 @@ import "fmt" type Item struct { typ itemType pos pos - Val string + Val []byte } func (i Item) IsText() bool { @@ -70,7 +70,7 @@ func (i Item) String() string { case i.typ == tEOF: return "EOF" case i.typ == tError: - return i.Val + return string(i.Val) case i.typ > tKeywordMarker: return fmt.Sprintf("<%s>", i.Val) case len(i.Val) > 50: diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go index 0c97becde..3bdfb6c33 100644 --- a/parser/pageparser/pagelexer.go +++ b/parser/pageparser/pagelexer.go @@ -18,8 +18,8 @@ package pageparser import ( + "bytes" "fmt" - "strings" "unicode" "unicode/utf8" ) @@ -44,7 +44,7 @@ type lexerShortcodeState struct { } type pageLexer struct { - input string + input []byte stateStart stateFunc state stateFunc pos pos // input position @@ -65,14 +65,16 @@ func Parse(s string) *Tokens { } func ParseFrom(s string, from int) *Tokens { - lexer := newPageLexer(s, pos(from), lexMainSection) // TODO(bep) 2errors + input := []byte(s) + lexer := newPageLexer(input, pos(from), lexMainSection) // TODO(bep) 2errors lexer.run() return &Tokens{lexer: lexer} } // note: the input position here is normally 0 (start), but // can be set if position of first shortcode is known -func newPageLexer(input string, inputPosition pos, stateStart stateFunc) *pageLexer { +// TODO(bep) 2errors byte +func newPageLexer(input []byte, inputPosition pos, stateStart stateFunc) *pageLexer { lexer := &pageLexer{ input: input, pos: inputPosition, @@ -97,19 +99,22 @@ func (l *pageLexer) run() *pageLexer { } // Shortcode syntax -const ( - leftDelimScNoMarkup = "{{<" - rightDelimScNoMarkup = ">}}" - leftDelimScWithMarkup = "{{%" - rightDelimScWithMarkup = "%}}" - leftComment = "/*" // comments in this context us used to to mark shortcodes as "not really a shortcode" - rightComment = "*/" +var ( + leftDelimScNoMarkup = []byte("{{<") + rightDelimScNoMarkup = []byte(">}}") + leftDelimScWithMarkup = []byte("{{%") + rightDelimScWithMarkup = []byte("%}}") + leftComment = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode" + rightComment = []byte("*/") ) // Page syntax -const ( - summaryDivider = "" - summaryDividerOrg = "# more" +var ( + summaryDivider = []byte("") + summaryDividerOrg = []byte("# more") + delimTOML = []byte("+++") + delimYAML = []byte("---") + delimOrg = []byte("#+") ) func (l *pageLexer) next() rune { @@ -118,9 +123,7 @@ func (l *pageLexer) next() rune { return eof } - // looks expensive, but should produce the same iteration sequence as the string range loop - // see: http://blog.golang.org/strings - runeValue, runeWidth := utf8.DecodeRuneInString(l.input[l.pos:]) + runeValue, runeWidth := utf8.DecodeRune(l.input[l.pos:]) l.width = pos(runeWidth) l.pos += l.width return runeValue @@ -146,7 +149,7 @@ func (l *pageLexer) emit(t itemType) { // special case, do not send '\\' back to client func (l *pageLexer) ignoreEscapesAndEmit(t itemType) { - val := strings.Map(func(r rune) rune { + val := bytes.Map(func(r rune) rune { if r == '\\' { return -1 } @@ -157,7 +160,7 @@ func (l *pageLexer) ignoreEscapesAndEmit(t itemType) { } // gets the current value (for debugging and error handling) -func (l *pageLexer) current() string { +func (l *pageLexer) current() []byte { return l.input[l.start:l.pos] } @@ -166,14 +169,16 @@ func (l *pageLexer) ignore() { l.start = l.pos } +var lf = []byte("\n") + // nice to have in error logs func (l *pageLexer) lineNum() int { - return strings.Count(l.input[:l.lastPos], "\n") + 1 + return bytes.Count(l.input[:l.lastPos], lf) + 1 } // nil terminates the parser func (l *pageLexer) errorf(format string, args ...interface{}) stateFunc { - l.items = append(l.items, Item{tError, l.start, fmt.Sprintf(format, args...)}) + l.items = append(l.items, Item{tError, l.start, []byte(fmt.Sprintf(format, args...))}) return nil } @@ -203,7 +208,7 @@ func lexMainSection(l *pageLexer) stateFunc { if l.pos > l.start { l.emit(tText) } - if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) { + if bytes.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) { l.currLeftDelimItem = tLeftDelimScWithMarkup l.currRightDelimItem = tRightDelimScWithMarkup } else { @@ -214,14 +219,14 @@ func lexMainSection(l *pageLexer) stateFunc { } if l.contentSections <= 1 { - if strings.HasPrefix(l.input[l.pos:], summaryDivider) { + if bytes.HasPrefix(l.input[l.pos:], summaryDivider) { if l.pos > l.start { l.emit(tText) } l.contentSections++ l.pos += pos(len(summaryDivider)) l.emit(tSummaryDivider) - } else if strings.HasPrefix(l.input[l.pos:], summaryDividerOrg) { + } else if bytes.HasPrefix(l.input[l.pos:], summaryDividerOrg) { if l.pos > l.start { l.emit(tText) } @@ -243,7 +248,7 @@ func lexMainSection(l *pageLexer) stateFunc { } func (l *pageLexer) isShortCodeStart() bool { - return strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup) + return bytes.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || bytes.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup) } func lexIntroSection(l *pageLexer) stateFunc { @@ -256,9 +261,9 @@ LOOP: switch { case r == '+': - return l.lexFrontMatterSection(tFrontMatterTOML, r, "TOML", "+++") + return l.lexFrontMatterSection(tFrontMatterTOML, r, "TOML", delimTOML) case r == '-': - return l.lexFrontMatterSection(tFrontMatterYAML, r, "YAML", "---") + return l.lexFrontMatterSection(tFrontMatterYAML, r, "YAML", delimYAML) case r == '{': return lexFrontMatterJSON case r == '#': @@ -342,11 +347,9 @@ func lexFrontMatterOrgMode(l *pageLexer) stateFunc { #+DESCRIPTION: Just another golang parser for org content! */ - const prefix = "#+" - l.backup() - if !strings.HasPrefix(l.input[l.pos:], prefix) { + if !bytes.HasPrefix(l.input[l.pos:], delimOrg) { // TODO(bep) consider error return lexMainSection } @@ -359,7 +362,7 @@ LOOP: switch { case r == '\n': - if !strings.HasPrefix(l.input[l.pos:], prefix) { + if !bytes.HasPrefix(l.input[l.pos:], delimOrg) { break LOOP } case r == eof: @@ -375,7 +378,7 @@ LOOP: } // Handle YAML or TOML front matter. -func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim string) stateFunc { +func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name string, delim []byte) stateFunc { for i := 0; i < 2; i++ { if r := l.next(); r != delimr { return l.errorf("invalid %s delimiter", name) @@ -395,7 +398,7 @@ func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim return l.errorf("EOF looking for end %s front matter delimiter", name) } if isEndOfLine(r) { - if strings.HasPrefix(l.input[l.pos:], delim) { + if bytes.HasPrefix(l.input[l.pos:], delim) { l.emit(tp) l.pos += 3 l.consumeCRLF() @@ -410,7 +413,7 @@ func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim func lexShortcodeLeftDelim(l *pageLexer) stateFunc { l.pos += pos(len(l.currentLeftShortcodeDelim())) - if strings.HasPrefix(l.input[l.pos:], leftComment) { + if bytes.HasPrefix(l.input[l.pos:], leftComment) { return lexShortcodeComment } l.emit(l.currentLeftShortcodeDelimItem()) @@ -420,7 +423,7 @@ func lexShortcodeLeftDelim(l *pageLexer) stateFunc { } func lexShortcodeComment(l *pageLexer) stateFunc { - posRightComment := strings.Index(l.input[l.pos:], rightComment+l.currentRightShortcodeDelim()) + posRightComment := bytes.Index(l.input[l.pos:], append(rightComment, l.currentRightShortcodeDelim()...)) if posRightComment <= 1 { return l.errorf("comment must be closed") } @@ -576,7 +579,7 @@ Loop: case r == '/': default: l.backup() - word := l.input[l.start:l.pos] + word := string(l.input[l.start:l.pos]) if l.closingState > 0 && !l.openShortcodes[word] { return l.errorf("closing tag for shortcode '%s' does not match start tag", word) } else if l.closingState > 0 { @@ -600,7 +603,7 @@ Loop: } func lexEndOfShortcode(l *pageLexer) stateFunc { - if strings.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) { + if bytes.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) { return lexShortcodeRightDelim } switch r := l.next(); { @@ -614,7 +617,7 @@ func lexEndOfShortcode(l *pageLexer) stateFunc { // scans the elements inside shortcode tags func lexInsideShortcode(l *pageLexer) stateFunc { - if strings.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) { + if bytes.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) { return lexShortcodeRightDelim } switch r := l.next(); { @@ -659,7 +662,7 @@ func (l *pageLexer) currentRightShortcodeDelimItem() itemType { return l.currRightDelimItem } -func (l *pageLexer) currentLeftShortcodeDelim() string { +func (l *pageLexer) currentLeftShortcodeDelim() []byte { if l.currLeftDelimItem == tLeftDelimScWithMarkup { return leftDelimScWithMarkup } @@ -667,7 +670,7 @@ func (l *pageLexer) currentLeftShortcodeDelim() string { } -func (l *pageLexer) currentRightShortcodeDelim() string { +func (l *pageLexer) currentRightShortcodeDelim() []byte { if l.currRightDelimItem == tRightDelimScWithMarkup { return rightDelimScWithMarkup } diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go index 3dc08c776..19e30dc9a 100644 --- a/parser/pageparser/pageparser_intro_test.go +++ b/parser/pageparser/pageparser_intro_test.go @@ -15,6 +15,7 @@ package pageparser import ( "fmt" + "reflect" "strings" "testing" ) @@ -25,23 +26,27 @@ type lexerTest struct { items []Item } +func nti(tp itemType, val string) Item { + return Item{tp, 0, []byte(val)} +} + var ( tstJSON = `{ "a": { "b": "\"Hugo\"}" } }` - tstHTMLLead = Item{tHTMLLead, 0, " <"} - tstFrontMatterTOML = Item{tFrontMatterTOML, 0, "foo = \"bar\"\n"} - tstFrontMatterYAML = Item{tFrontMatterYAML, 0, "foo: \"bar\"\n"} - tstFrontMatterYAMLCRLF = Item{tFrontMatterYAML, 0, "foo: \"bar\"\r\n"} - tstFrontMatterJSON = Item{tFrontMatterJSON, 0, tstJSON + "\r\n"} - tstSomeText = Item{tText, 0, "\nSome text.\n"} - tstSummaryDivider = Item{tSummaryDivider, 0, ""} - tstSummaryDividerOrg = Item{tSummaryDividerOrg, 0, "# more"} + tstHTMLLead = nti(tHTMLLead, " <") + tstFrontMatterTOML = nti(tFrontMatterTOML, "foo = \"bar\"\n") + tstFrontMatterYAML = nti(tFrontMatterYAML, "foo: \"bar\"\n") + tstFrontMatterYAMLCRLF = nti(tFrontMatterYAML, "foo: \"bar\"\r\n") + tstFrontMatterJSON = nti(tFrontMatterJSON, tstJSON+"\r\n") + tstSomeText = nti(tText, "\nSome text.\n") + tstSummaryDivider = nti(tSummaryDivider, "") + tstSummaryDividerOrg = nti(tSummaryDividerOrg, "# more") tstORG = ` #+TITLE: T1 #+AUTHOR: A1 #+DESCRIPTION: D1 ` - tstFrontMatterORG = Item{tFrontMatterORG, 0, tstORG} + tstFrontMatterORG = nti(tFrontMatterORG, tstORG) ) var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$") @@ -49,7 +54,7 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$") // TODO(bep) a way to toggle ORG mode vs the rest. var frontMatterTests = []lexerTest{ {"empty", "", []Item{tstEOF}}, - {"HTML Document", ` `, []Item{tstHTMLLead, Item{tText, 0, "html> "}, tstEOF}}, + {"HTML Document", ` `, []Item{tstHTMLLead, nti(tText, "html> "), tstEOF}}, {"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}}, // Note that we keep all bytes as they are, but we need to handle CRLF {"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}}, @@ -63,7 +68,7 @@ var frontMatterTests = []lexerTest{ func TestFrontMatter(t *testing.T) { t.Parallel() for i, test := range frontMatterTests { - items := collect(test.name, test.input, false, lexIntroSection) + items := collect([]byte(test.input), false, lexIntroSection) if !equal(items, test.items) { got := crLfReplacer.Replace(fmt.Sprint(items)) expected := crLfReplacer.Replace(fmt.Sprint(test.items)) @@ -72,7 +77,7 @@ func TestFrontMatter(t *testing.T) { } } -func collect(name, input string, skipFrontMatter bool, stateStart stateFunc) (items []Item) { +func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []Item) { l := newPageLexer(input, 0, stateStart) l.run() @@ -95,7 +100,7 @@ func equal(i1, i2 []Item) bool { if i1[k].typ != i2[k].typ { return false } - if i1[k].Val != i2[k].Val { + if !reflect.DeepEqual(i1[k].Val, i2[k].Val) { return false } } diff --git a/parser/pageparser/pageparser_shortcode_test.go b/parser/pageparser/pageparser_shortcode_test.go index 525c7452f..efef6fca2 100644 --- a/parser/pageparser/pageparser_shortcode_test.go +++ b/parser/pageparser/pageparser_shortcode_test.go @@ -16,25 +16,25 @@ package pageparser import "testing" var ( - tstEOF = Item{tEOF, 0, ""} - tstLeftNoMD = Item{tLeftDelimScNoMarkup, 0, "{{<"} - tstRightNoMD = Item{tRightDelimScNoMarkup, 0, ">}}"} - tstLeftMD = Item{tLeftDelimScWithMarkup, 0, "{{%"} - tstRightMD = Item{tRightDelimScWithMarkup, 0, "%}}"} - tstSCClose = Item{tScClose, 0, "/"} - tstSC1 = Item{tScName, 0, "sc1"} - tstSC2 = Item{tScName, 0, "sc2"} - tstSC3 = Item{tScName, 0, "sc3"} - tstSCSlash = Item{tScName, 0, "sc/sub"} - tstParam1 = Item{tScParam, 0, "param1"} - tstParam2 = Item{tScParam, 0, "param2"} - tstVal = Item{tScParamVal, 0, "Hello World"} + tstEOF = nti(tEOF, "") + tstLeftNoMD = nti(tLeftDelimScNoMarkup, "{{<") + tstRightNoMD = nti(tRightDelimScNoMarkup, ">}}") + tstLeftMD = nti(tLeftDelimScWithMarkup, "{{%") + tstRightMD = nti(tRightDelimScWithMarkup, "%}}") + tstSCClose = nti(tScClose, "/") + tstSC1 = nti(tScName, "sc1") + tstSC2 = nti(tScName, "sc2") + tstSC3 = nti(tScName, "sc3") + tstSCSlash = nti(tScName, "sc/sub") + tstParam1 = nti(tScParam, "param1") + tstParam2 = nti(tScParam, "param2") + tstVal = nti(tScParamVal, "Hello World") ) var shortCodeLexerTests = []lexerTest{ {"empty", "", []Item{tstEOF}}, - {"spaces", " \t\n", []Item{{tText, 0, " \t\n"}, tstEOF}}, - {"text", `to be or not`, []Item{{tText, 0, "to be or not"}, tstEOF}}, + {"spaces", " \t\n", []Item{nti(tText, " \t\n"), tstEOF}}, + {"text", `to be or not`, []Item{nti(tText, "to be or not"), tstEOF}}, {"no markup", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, {"with EOL", "{{< sc1 \n >}}", []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, @@ -43,12 +43,12 @@ var shortCodeLexerTests = []lexerTest{ {"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}}, {"with spaces", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, {"mismatched rightDelim", `{{< sc1 %}}`, []Item{tstLeftNoMD, tstSC1, - {tError, 0, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"}}}, + nti(tError, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted")}}, {"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []Item{ tstLeftMD, tstSC1, tstRightMD, - {tText, 0, " inner "}, + nti(tText, " inner "), tstLeftMD, tstSCClose, tstSC1, @@ -56,20 +56,20 @@ var shortCodeLexerTests = []lexerTest{ tstEOF, }}, {"close, but no open", `{{< /sc1 >}}`, []Item{ - tstLeftNoMD, {tError, 0, "got closing shortcode, but none is open"}}}, + tstLeftNoMD, nti(tError, "got closing shortcode, but none is open")}}, {"close wrong", `{{< sc1 >}}{{< /another >}}`, []Item{ tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, - {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}}, + nti(tError, "closing tag for shortcode 'another' does not match start tag")}}, {"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []Item{ tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, - {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}}, + nti(tError, "closing tag for shortcode 'another' does not match start tag")}}, {"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []Item{ tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, - {tError, 0, "unclosed shortcode"}}}, + nti(tError, "unclosed shortcode")}}, {"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []Item{ - tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-Q_456igdO-4"}, tstRightNoMD, tstEOF}}, + tstLeftNoMD, tstSC1, nti(tScParam, "-ziL-Q_456igdO-4"), tstRightNoMD, tstEOF}}, {"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []Item{ - tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-.%QigdO-4"}, tstRightNoMD, tstEOF}}, + tstLeftNoMD, tstSC1, nti(tScParam, "-ziL-.%QigdO-4"), tstRightNoMD, tstEOF}}, {"two params", `{{< sc1 param1 param2 >}}`, []Item{ tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF}}, @@ -94,64 +94,64 @@ var shortCodeLexerTests = []lexerTest{ tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF}}, {"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []Item{ tstLeftNoMD, tstSC1, tstRightNoMD, - {tText, 0, "ab"}, + nti(tText, "ab"), tstLeftMD, tstSC2, tstParam1, tstRightMD, - {tText, 0, "cd"}, + nti(tText, "cd"), tstLeftNoMD, tstSC3, tstRightNoMD, - {tText, 0, "ef"}, + nti(tText, "ef"), tstLeftNoMD, tstSCClose, tstSC3, tstRightNoMD, - {tText, 0, "gh"}, + nti(tText, "gh"), tstLeftMD, tstSCClose, tstSC2, tstRightMD, - {tText, 0, "ij"}, + nti(tText, "ij"), tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, - {tText, 0, "kl"}, tstEOF, + nti(tText, "kl"), tstEOF, }}, {"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []Item{ - tstLeftNoMD, tstSC1, {tScParam, 0, "param nr. 1"}, {tScParam, 0, "param nr. 2"}, tstRightNoMD, tstEOF}}, + tstLeftNoMD, tstSC1, nti(tScParam, "param nr. 1"), nti(tScParam, "param nr. 2"), tstRightNoMD, tstEOF}}, {"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, {tScParamVal, 0, "p2Val"}, tstRightNoMD, tstEOF}}, + tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, nti(tScParamVal, "p2Val"), tstRightNoMD, tstEOF}}, {"escaped quotes", `{{< sc1 param1=\"Hello World\" >}}`, []Item{ tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF}}, {"escaped quotes, positional param", `{{< sc1 \"param1\" >}}`, []Item{ tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF}}, {"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\" >}}`, []Item{ tstLeftNoMD, tstSC1, tstParam1, - {tScParamVal, 0, `Hello `}, {tError, 0, `got positional parameter 'escaped'. Cannot mix named and positional parameters`}}}, + nti(tScParamVal, `Hello `), nti(tError, `got positional parameter 'escaped'. Cannot mix named and positional parameters`)}}, {"escaped quotes inside nonescaped quotes", `{{< sc1 param1="Hello \"escaped\" World" >}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, {tScParamVal, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}}, + tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, `Hello "escaped" World`), tstRightNoMD, tstEOF}}, {"escaped quotes inside nonescaped quotes in positional param", `{{< sc1 "Hello \"escaped\" World" >}}`, []Item{ - tstLeftNoMD, tstSC1, {tScParam, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}}, + tstLeftNoMD, tstSC1, nti(tScParam, `Hello "escaped" World`), tstRightNoMD, tstEOF}}, {"unterminated quote", `{{< sc1 param2="Hello World>}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam2, {tError, 0, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"}}}, + tstLeftNoMD, tstSC1, tstParam2, nti(tError, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'")}}, {"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []Item{ tstLeftNoMD, tstSC1, tstParam1, tstVal, - {tError, 0, "got positional parameter 'p2'. Cannot mix named and positional parameters"}}}, + nti(tError, "got positional parameter 'p2'. Cannot mix named and positional parameters")}}, {"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []Item{ tstLeftNoMD, tstSC1, tstParam1, tstVal, - {tError, 0, "got quoted positional parameter. Cannot mix named and positional parameters"}}}, + nti(tError, "got quoted positional parameter. Cannot mix named and positional parameters")}}, {"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []Item{ tstLeftNoMD, tstSC1, tstParam1, - {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}}, + nti(tError, "got named parameter 'param2'. Cannot mix named and positional parameters")}}, {"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []Item{ tstLeftNoMD, tstSC1, tstParam1, - {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}}, + nti(tError, "got named parameter 'param2'. Cannot mix named and positional parameters")}}, {"commented out", `{{}}`, []Item{ - {tText, 0, "{{<"}, {tText, 0, " sc1 "}, {tText, 0, ">}}"}, tstEOF}}, + nti(tText, "{{<"), nti(tText, " sc1 "), nti(tText, ">}}"), tstEOF}}, {"commented out, with asterisk inside", `{{}}`, []Item{ - {tText, 0, "{{<"}, {tText, 0, " sc1 \"**/*.pdf\" "}, {tText, 0, ">}}"}, tstEOF}}, + nti(tText, "{{<"), nti(tText, " sc1 \"**/*.pdf\" "), nti(tText, ">}}"), tstEOF}}, {"commented out, missing close", `{{}}`, []Item{ - {tError, 0, "comment must be closed"}}}, + nti(tError, "comment must be closed")}}, {"commented out, misplaced close", `{{}}*/`, []Item{ - {tError, 0, "comment must be closed"}}}, + nti(tError, "comment must be closed")}}, } func TestShortcodeLexer(t *testing.T) { t.Parallel() for i, test := range shortCodeLexerTests { - items := collect(test.name, test.input, true, lexMainSection) + items := collect([]byte(test.input), true, lexMainSection) if !equal(items, test.items) { t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items) } @@ -159,13 +159,17 @@ func TestShortcodeLexer(t *testing.T) { } func BenchmarkShortcodeLexer(b *testing.B) { + testInputs := make([][]byte, len(shortCodeLexerTests)) + for i, input := range shortCodeLexerTests { + testInputs[i] = []byte(input.input) + } b.ResetTimer() for i := 0; i < b.N; i++ { - for _, test := range shortCodeLexerTests { - items := collect(test.name, test.input, true, lexMainSection) - if !equal(items, test.items) { - b.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items) + for _, input := range testInputs { + items := collect(input, true, lexMainSection) + if len(items) == 0 { } + } } }