hugo/hugolib/shortcodeparser_test.go
bep 55fcd2f30f Shortcode rewrite, take 2
This commit contains a restructuring and partial rewrite of the shortcode handling.

Prior to this commit rendering of the page content was mingled with handling of the shortcodes. This led to several oddities.

The new flow is:

1. Shortcodes are extracted from page and replaced with placeholders.
2. Shortcodes are processed and rendered
3. Page is processed
4. The placeholders are replaced with the rendered shortcodes

The handling of summaries is also made simpler by this.

This commit also introduces some other chenges:

1. distinction between shortcodes that need further processing and those who do not:

* `{{< >}}`: Typically raw HTML. Will not be processed.
* `{{% %}}`: Will be processed by the page's markup engine (Markdown or (infuture) Asciidoctor)

The above also involves a new shortcode-parser, with lexical scanning inspired by Rob Pike's talk called "Lexical Scanning in Go",
which should be easier to understand, give better error messages and perform better.

2. If you want to exclude a shortcode from being processed (for documentation etc.), the inner part of the shorcode must be commented out, i.e. `{{%/* movie 47238zzb */%}}`. See the updated shortcode section in the documentation for further examples.

The new parser supports nested shortcodes. This isn't new, but has two related design choices worth mentioning:

* The shortcodes will be rendered individually, so If both `{{< >}}` and `{{% %}}` are used in the nested hierarchy, one will be passed through the page's markdown processor, the other not.
* To avoid potential costly overhead of always looking far ahead for a possible closing tag, this implementation looks at the template itself, and is branded as a container with inner content if it contains a reference to `.Inner`

Fixes #565
Fixes #480
Fixes #461

And probably some others.
2014-11-17 18:32:06 -05:00

163 lines
6.9 KiB
Go

package hugolib
import (
"testing"
)
type shortCodeLexerTest struct {
name string
input string
items []item
}
var (
tstEOF = item{tEOF, 0, ""}
tstLeftNoMD = item{tLeftDelimScNoMarkup, 0, "{{<"}
tstRightNoMD = item{tRightDelimScNoMarkup, 0, ">}}"}
tstLeftMD = item{tLeftDelimScWithMarkup, 0, "{{%"}
tstRightMD = item{tRightDelimScWithMarkup, 0, "%}}"}
tstSCClose = item{tScClose, 0, "/"}
tstSC1 = item{tScName, 0, "sc1"}
tstSC2 = item{tScName, 0, "sc2"}
tstSC3 = item{tScName, 0, "sc3"}
tstParam1 = item{tScParam, 0, "param1"}
tstParam2 = item{tScParam, 0, "param2"}
tstVal = item{tScParamVal, 0, "Hello World"}
)
var shortCodeLexerTests = []shortCodeLexerTest{
{"empty", "", []item{tstEOF}},
{"spaces", " \t\n", []item{{tText, 0, " \t\n"}, tstEOF}},
{"text", `to be or not`, []item{{tText, 0, "to be or not"}, tstEOF}},
{"no markup", `{{< sc1 >}}`, []item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
{"with EOL", "{{< sc1 \n >}}", []item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
{"simple with markup", `{{% sc1 %}}`, []item{tstLeftMD, tstSC1, tstRightMD, tstEOF}},
{"with spaces", `{{< sc1 >}}`, []item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
{"mismatched rightDelim", `{{< sc1 %}}`, []item{tstLeftNoMD, tstSC1,
{tError, 0, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"}}},
{"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []item{
tstLeftMD,
tstSC1,
tstRightMD,
{tText, 0, " inner "},
tstLeftMD,
tstSCClose,
tstSC1,
tstRightMD,
tstEOF,
}},
{"close, but no open", `{{< /sc1 >}}`, []item{
tstLeftNoMD, {tError, 0, "got closing shortcode, but none is open"}}},
{"close wrong", `{{< sc1 >}}{{< /another >}}`, []item{
tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
{tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},
{"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []item{
tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
{tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},
{"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []item{
tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1,
{tError, 0, "unclosed shortcode"}}},
{"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []item{
tstLeftNoMD, tstSC1, item{tScParam, 0, "-ziL-Q_456igdO-4"}, tstRightNoMD, tstEOF}},
{"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []item{
tstLeftNoMD, tstSC1, item{tScParam, 0, "-ziL-.%QigdO-4"}, tstRightNoMD, tstEOF}},
{"two params", `{{< sc1 param1 param2 >}}`, []item{
tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF}},
{"nested simple", `{{< sc1 >}}{{< sc2 >}}{{< /sc1 >}}`, []item{
tstLeftNoMD, tstSC1, tstRightNoMD,
tstLeftNoMD, tstSC2, tstRightNoMD,
tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF}},
{"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []item{
tstLeftNoMD, tstSC1, tstRightNoMD,
item{tText, 0, "ab"},
tstLeftMD, tstSC2, tstParam1, tstRightMD,
item{tText, 0, "cd"},
tstLeftNoMD, tstSC3, tstRightNoMD,
item{tText, 0, "ef"},
tstLeftNoMD, tstSCClose, tstSC3, tstRightNoMD,
item{tText, 0, "gh"},
tstLeftMD, tstSCClose, tstSC2, tstRightMD,
item{tText, 0, "ij"},
tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD,
item{tText, 0, "kl"}, tstEOF,
}},
{"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []item{
tstLeftNoMD, tstSC1, item{tScParam, 0, "param nr. 1"}, item{tScParam, 0, "param nr. 2"}, tstRightNoMD, tstEOF}},
{"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []item{
tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, {tScParamVal, 0, "p2Val"}, tstRightNoMD, tstEOF}},
{"escaped quotes", `{{< sc1 param1=\"Hello World\" >}}`, []item{
tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF}},
{"escaped quotes, positional param", `{{< sc1 \"param1\" >}}`, []item{
tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF}},
{"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\" >}}`, []item{
tstLeftNoMD, tstSC1, tstParam1,
item{tScParamVal, 0, `Hello `}, {tError, 0, `got positional parameter 'escaped'. Cannot mix named and positional parameters`}}},
{"escaped quotes inside nonescaped quotes",
`{{< sc1 param1="Hello \"escaped\" World" >}}`, []item{
tstLeftNoMD, tstSC1, tstParam1, item{tScParamVal, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},
{"escaped quotes inside nonescaped quotes in positional param",
`{{< sc1 "Hello \"escaped\" World" >}}`, []item{
tstLeftNoMD, tstSC1, item{tScParam, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},
{"unterminated quote", `{{< sc1 param2="Hello World>}}`, []item{
tstLeftNoMD, tstSC1, tstParam2, {tError, 0, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"}}},
{"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []item{
tstLeftNoMD, tstSC1, tstParam1, tstVal,
{tError, 0, "got positional parameter 'p2'. Cannot mix named and positional parameters"}}},
{"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []item{
tstLeftNoMD, tstSC1, tstParam1, tstVal,
{tError, 0, "got quoted positional parameter. Cannot mix named and positional parameters"}}},
{"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []item{
tstLeftNoMD, tstSC1, tstParam1,
{tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},
{"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []item{
tstLeftNoMD, tstSC1, tstParam1,
{tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},
{"commented out", `{{</* sc1 */>}}`, []item{
item{tText, 0, "{{<"}, item{tText, 0, " sc1 "}, item{tText, 0, ">}}"}, tstEOF}},
{"commented out, missing close", `{{</* sc1 >}}`, []item{
{tError, 0, "comment must be closed"}}},
{"commented out, misplaced close", `{{</* sc1 >}}*/`, []item{
item{tText, 0, "{{<"}, item{tText, 0, " sc1 >}}"}, {tError, 0, "comment ends before the right shortcode delimiter"}}},
}
func TestPagelexer(t *testing.T) {
for _, test := range shortCodeLexerTests {
items := collect(&test)
if !equal(items, test.items) {
t.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items)
}
}
}
func collect(t *shortCodeLexerTest) (items []item) {
l := newShortcodeLexer(t.name, t.input, 0)
for {
item := l.nextItem()
items = append(items, item)
if item.typ == tEOF || item.typ == tError {
break
}
}
return
}
// no positional checking, for now ...
func equal(i1, i2 []item) bool {
if len(i1) != len(i2) {
return false
}
for k := range i1 {
if i1[k].typ != i2[k].typ {
return false
}
if i1[k].val != i2[k].val {
return false
}
}
return true
}