parser/pageparser: Fix handling of commented out front matter

When the page parser was rewritten in 0.51, this was interpreted literally, but commented out front matter is used in the wild to "hide it from GitHub", e.g:

```
<!--
+++
title = "hello"
+++
-->
```

Fixes #5478
This commit is contained in:
Bjørn Erik Pedersen 2018-11-28 10:21:54 +01:00
parent 7e75aeca80
commit 7540a62834
No known key found for this signature in database
GPG key ID: 330E6E2BD4859D8F
6 changed files with 65 additions and 13 deletions

1
go.sum
View file

@ -68,6 +68,7 @@ github.com/magefile/mage v1.4.0 h1:RI7B1CgnPAuu2O9lWszwya61RLmfL0KCdo+QyyI/Bhk=
github.com/magefile/mage v1.4.0/go.mod h1:IUDi13rsHje59lecXokTfGX0QIzO45uVPlXnJYsXepA= github.com/magefile/mage v1.4.0/go.mod h1:IUDi13rsHje59lecXokTfGX0QIzO45uVPlXnJYsXepA=
github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY= github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/markbates/inflect v0.0.0-20171215194931-a12c3aec81a6 h1:LZhVjIISSbj8qLf2qDPP0D8z0uvOWAW5C85ly5mJW6c=
github.com/markbates/inflect v0.0.0-20171215194931-a12c3aec81a6/go.mod h1:oTeZL2KHA7CUX6X+fovmK9OvIOFuqu0TwdQrZjLTh88= github.com/markbates/inflect v0.0.0-20171215194931-a12c3aec81a6/go.mod h1:oTeZL2KHA7CUX6X+fovmK9OvIOFuqu0TwdQrZjLTh88=
github.com/matryer/try v0.0.0-20161228173917-9ac251b645a2/go.mod h1:0KeJpeMD6o+O4hW7qJOT7vyQPKrWmj26uf5wMc/IiIs= github.com/matryer/try v0.0.0-20161228173917-9ac251b645a2/go.mod h1:0KeJpeMD6o+O4hW7qJOT7vyQPKrWmj26uf5wMc/IiIs=
github.com/mattn/go-isatty v0.0.4 h1:bnP0vzxcAdeI1zdubAl5PjU6zsERjGZb7raWodagDYs= github.com/mattn/go-isatty v0.0.4 h1:bnP0vzxcAdeI1zdubAl5PjU6zsERjGZb7raWodagDYs=

View file

@ -83,8 +83,6 @@ Loop:
switch { switch {
case it.Type == pageparser.TypeIgnore: case it.Type == pageparser.TypeIgnore:
case it.Type == pageparser.TypeHTMLComment:
// Ignore. This is only a leading Front matter comment.
case it.Type == pageparser.TypeHTMLStart: case it.Type == pageparser.TypeHTMLStart:
// This is HTML without front matter. It can still have shortcodes. // This is HTML without front matter. It can still have shortcodes.
p.renderable = false p.renderable = false

View file

@ -1586,6 +1586,33 @@ CONTENT:{{ .Content }}
) )
} }
// https://github.com/gohugoio/hugo/issues/5478
func TestPageWithCommentedOutFrontMatter(t *testing.T) {
b := newTestSitesBuilder(t)
b.WithSimpleConfigFile()
b.WithContent("page.md", `<!--
+++
title = "hello"
+++
-->
This is the content.
`)
b.WithTemplatesAdded("layouts/_default/single.html", `
Title: {{ .Title }}
Content:{{ .Content }}
`)
b.CreateSites().Build(BuildCfg{})
b.AssertFileContent("public/page/index.html",
"Title: hello",
"Content:<p>This is the content.</p>",
)
}
// TODO(bep) this may be useful for other tests. // TODO(bep) this may be useful for other tests.
func compareObjects(a interface{}, b interface{}) bool { func compareObjects(a interface{}, b interface{}) bool {
aStr := strings.Split(fmt.Sprintf("%v", a), "") aStr := strings.Split(fmt.Sprintf("%v", a), "")

View file

@ -108,7 +108,6 @@ const (
// page items // page items
TypeHTMLStart // document starting with < as first non-whitespace TypeHTMLStart // document starting with < as first non-whitespace
TypeHTMLComment // We ignore leading comments
TypeLeadSummaryDivider // <!--more-->, # more TypeLeadSummaryDivider // <!--more-->, # more
TypeFrontMatterYAML TypeFrontMatterYAML
TypeFrontMatterTOML TypeFrontMatterTOML

View file

@ -53,6 +53,8 @@ type pageLexer struct {
summaryDivider []byte summaryDivider []byte
// Set when we have parsed any summary divider // Set when we have parsed any summary divider
summaryDividerChecked bool summaryDividerChecked bool
// Whether we're in a HTML comment.
isInHTMLComment bool
lexerShortcodeState lexerShortcodeState
@ -120,7 +122,7 @@ var (
delimYAML = []byte("---") delimYAML = []byte("---")
delimOrg = []byte("#+") delimOrg = []byte("#+")
htmlCommentStart = []byte("<!--") htmlCommentStart = []byte("<!--")
htmlCOmmentEnd = []byte("-->") htmlCommentEnd = []byte("-->")
) )
func (l *pageLexer) next() rune { func (l *pageLexer) next() rune {
@ -195,6 +197,15 @@ func (l *pageLexer) consumeCRLF() bool {
return consumed return consumed
} }
func (l *pageLexer) consumeToNextLine() {
for {
r := l.next()
if r == eof || isEndOfLine(r) {
return
}
}
}
func (l *pageLexer) consumeSpace() { func (l *pageLexer) consumeSpace() {
for { for {
r := l.next() r := l.next()
@ -206,6 +217,10 @@ func (l *pageLexer) consumeSpace() {
} }
func lexMainSection(l *pageLexer) stateFunc { func lexMainSection(l *pageLexer) stateFunc {
if l.isInHTMLComment {
return lexEndFromtMatterHTMLComment
}
// Fast forward as far as possible. // Fast forward as far as possible.
var l1, l2 int var l1, l2 int
@ -312,16 +327,15 @@ LOOP:
case r == byteOrderMark: case r == byteOrderMark:
l.emit(TypeIgnore) l.emit(TypeIgnore)
case !isSpace(r) && !isEndOfLine(r): case !isSpace(r) && !isEndOfLine(r):
// No front matter.
if r == '<' { if r == '<' {
l.backup() l.backup()
if l.hasPrefix(htmlCommentStart) { if l.hasPrefix(htmlCommentStart) {
right := l.index(htmlCOmmentEnd) // This may be commented out front mattter, which should
if right == -1 { // still be read.
return l.errorf("starting HTML comment with no end") l.consumeToNextLine()
} l.isInHTMLComment = true
l.pos += right + len(htmlCOmmentEnd) l.emit(TypeIgnore)
l.emit(TypeHTMLComment) continue LOOP
} else { } else {
if l.pos > l.start { if l.pos > l.start {
l.emit(tText) l.emit(tText)
@ -341,6 +355,19 @@ LOOP:
return lexMainSection return lexMainSection
} }
func lexEndFromtMatterHTMLComment(l *pageLexer) stateFunc {
l.isInHTMLComment = false
right := l.index(htmlCommentEnd)
if right == -1 {
return l.errorf("starting HTML comment with no end")
}
l.pos += right + len(htmlCommentEnd)
l.emit(TypeIgnore)
// Now move on to the shortcodes.
return lexMainSection
}
func lexDone(l *pageLexer) stateFunc { func lexDone(l *pageLexer) stateFunc {
// Done! // Done!

View file

@ -60,7 +60,8 @@ var frontMatterTests = []lexerTest{
{"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}}, {"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}},
{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}}, {"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
{"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}}, {"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}},
{"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []Item{nti(TypeHTMLComment, "<!--\n---\nfoo: \"bar\"\n---\n-->"), tstSomeText, tstEOF}}, {"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []Item{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(TypeIgnore, "-->"), tstSomeText, tstEOF}},
{"YAML commented out front matter, no end", "<!--\n---\nfoo: \"bar\"\n---\nSome text.\n", []Item{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(tError, "starting HTML comment with no end")}},
// Note that we keep all bytes as they are, but we need to handle CRLF // Note that we keep all bytes as they are, but we need to handle CRLF
{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}}, {"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}},
{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}}, {"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}},
@ -78,7 +79,6 @@ var frontMatterTests = []lexerTest{
func TestFrontMatter(t *testing.T) { func TestFrontMatter(t *testing.T) {
t.Parallel() t.Parallel()
for i, test := range frontMatterTests { for i, test := range frontMatterTests {
items := collect([]byte(test.input), false, lexIntroSection) items := collect([]byte(test.input), false, lexIntroSection)
if !equal(items, test.items) { if !equal(items, test.items) {
got := crLfReplacer.Replace(fmt.Sprint(items)) got := crLfReplacer.Replace(fmt.Sprint(items))