Fix handling of HTML files without front matter

This means that any HTML file inside /content will be treated as a regular file.

If you want it processes with shortcodes and a layout, add front matter.

The defintion of an HTML file here is:

* File with extension .htm or .html
* With first non-whitespace character "<" that isn't a HTML comment.

This is in line with the documentation.

Fixes #7030
Fixes #7028
See #6789
This commit is contained in:
Bjørn Erik Pedersen 2020-03-09 12:04:33 +01:00
parent 8279d2e227
commit ffcb4aeb8e
18 changed files with 168 additions and 309 deletions

6
deps/deps.go vendored
View file

@ -5,7 +5,6 @@ import (
"time"
"github.com/pkg/errors"
"go.uber.org/atomic"
"github.com/gohugoio/hugo/cache/filecache"
"github.com/gohugoio/hugo/common/loggers"
@ -377,11 +376,8 @@ type DepsCfg struct {
// BuildFlags are flags that may be turned on during a build.
type BuildFlags struct {
HasLateTemplate atomic.Bool
}
func NewBuildFlags() BuildFlags {
return BuildFlags{
//HasLateTemplate: atomic.NewBool(false),
}
return BuildFlags{}
}

8
deps/deps_test.go vendored
View file

@ -15,14 +15,8 @@ package deps
import (
"testing"
qt "github.com/frankban/quicktest"
)
func TestBuildFlags(t *testing.T) {
c := qt.New(t)
var bf BuildFlags
c.Assert(bf.HasLateTemplate.Load(), qt.Equals, false)
bf.HasLateTemplate.Store(true)
c.Assert(bf.HasLateTemplate.Load(), qt.Equals, true)
}

View file

@ -14,10 +14,16 @@
package files
import (
"bufio"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strings"
"unicode"
"github.com/spf13/afero"
)
var (
@ -32,6 +38,11 @@ var (
"pandoc", "pdc"}
contentFileExtensionsSet map[string]bool
htmlFileExtensions = []string{
"html", "htm"}
htmlFileExtensionsSet map[string]bool
)
func init() {
@ -39,12 +50,20 @@ func init() {
for _, ext := range contentFileExtensions {
contentFileExtensionsSet[ext] = true
}
htmlFileExtensionsSet = make(map[string]bool)
for _, ext := range htmlFileExtensions {
htmlFileExtensionsSet[ext] = true
}
}
func IsContentFile(filename string) bool {
return contentFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")]
}
func IsHTMLFile(filename string) bool {
return htmlFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")]
}
func IsContentExt(ext string) bool {
return contentFileExtensionsSet[ext]
}
@ -62,10 +81,33 @@ func (c ContentClass) IsBundle() bool {
return c == ContentClassLeaf || c == ContentClassBranch
}
func ClassifyContentFile(filename string) ContentClass {
func ClassifyContentFile(filename string, open func() (afero.File, error)) ContentClass {
if !IsContentFile(filename) {
return ContentClassFile
}
if IsHTMLFile(filename) {
// We need to look inside the file. If the first non-whitespace
// character is a "<", then we treat it as a regular file.
// Eearlier we created pages for these files, but that had all sorts
// of troubles, and isn't what it says in the documentation.
// See https://github.com/gohugoio/hugo/issues/7030
if open == nil {
panic(fmt.Sprintf("no file opener provided for %q", filename))
}
f, err := open()
if err != nil {
return ContentClassFile
}
ishtml := isHTMLContent(f)
f.Close()
if ishtml {
return ContentClassFile
}
}
if strings.HasPrefix(filename, "_index.") {
return ContentClassBranch
}
@ -77,6 +119,40 @@ func ClassifyContentFile(filename string) ContentClass {
return ContentClassContent
}
var htmlComment = []rune{'<', '!', '-', '-'}
func isHTMLContent(r io.Reader) bool {
br := bufio.NewReader(r)
i := 0
for {
c, _, err := br.ReadRune()
if err != nil {
break
}
if i > 0 {
if i >= len(htmlComment) {
return false
}
if c != htmlComment[i] {
return true
}
i++
continue
}
if !unicode.IsSpace(c) {
if i == 0 && c != '<' {
return false
}
i++
}
}
return true
}
const (
ComponentFolderArchetypes = "archetypes"
ComponentFolderStatic = "static"

View file

@ -15,6 +15,7 @@ package files
import (
"path/filepath"
"strings"
"testing"
qt "github.com/frankban/quicktest"
@ -30,6 +31,17 @@ func TestIsContentFile(t *testing.T) {
c.Assert(IsContentExt("json"), qt.Equals, false)
}
func TestIsHTMLContent(t *testing.T) {
c := qt.New(t)
c.Assert(isHTMLContent(strings.NewReader(" <html>")), qt.Equals, true)
c.Assert(isHTMLContent(strings.NewReader(" <!--\n---")), qt.Equals, false)
c.Assert(isHTMLContent(strings.NewReader(" <!--")), qt.Equals, true)
c.Assert(isHTMLContent(strings.NewReader(" ---<")), qt.Equals, false)
c.Assert(isHTMLContent(strings.NewReader(" foo <")), qt.Equals, false)
}
func TestComponentFolders(t *testing.T) {
c := qt.New(t)

View file

@ -66,7 +66,7 @@ func NewLanguageFs(langs map[string]int, fs afero.Fs) (afero.Fs, error) {
metaKeyOrdinal: langs[lang],
metaKeyTranslationBaseName: translationBaseName,
metaKeyTranslationBaseNameWithExt: translationBaseNameWithExt,
metaKeyClassifier: files.ClassifyContentFile(fi.Name()),
metaKeyClassifier: files.ClassifyContentFile(fi.Name(), meta.GetOpener()),
})
fis[i] = fim

View file

@ -54,7 +54,7 @@ func BenchmarkContentMap(b *testing.B) {
// real flow, so simulate this here.
meta["lang"] = lang
meta["path"] = meta.Filename()
meta["classifier"] = files.ClassifyContentFile(fi.Name())
meta["classifier"] = files.ClassifyContentFile(fi.Name(), meta.GetOpener())
})
}
@ -115,7 +115,7 @@ func TestContentMap(t *testing.T) {
// real flow, so simulate this here.
meta["lang"] = lang
meta["path"] = meta.Filename()
meta["classifier"] = files.ClassifyContentFile(fi.Name())
meta["classifier"] = files.ClassifyContentFile(fi.Name(), meta.GetOpener())
meta["translationBaseName"] = helpers.Filename(fi.Name())
})

View file

@ -135,7 +135,7 @@ title: No Template
}
counters := &testCounters{}
b.Build(BuildCfg{testCounters: counters})
b.Assert(int(counters.contentRenderCounter), qt.Equals, 50)
b.Assert(int(counters.contentRenderCounter), qt.Equals, 43)
b.AssertFileContent("public/blog/p1/index.html", `
<p>Cool Page|https://www.google.com|Title: Google's Homepage|Text: First Link|END</p>

View file

@ -449,12 +449,6 @@ func (p *pageState) initOutputFormat(isRenderingSite bool, idx int) error {
return err
}
if !p.renderable {
if _, err := p.Content(); err != nil {
return err
}
}
return nil
}
@ -679,8 +673,6 @@ func (p *pageState) mapContent(bucket *pagesMapBucket, meta *pageMeta) error {
s := p.shortcodeState
p.renderable = true
rn := &pageContentMap{
items: make([]interface{}, 0, 20),
}
@ -703,12 +695,6 @@ Loop:
switch {
case it.Type == pageparser.TypeIgnore:
case it.Type == pageparser.TypeHTMLStart:
// This is HTML without front matter. It can still have shortcodes.
p.selfLayout = "__" + p.File().Filename()
p.renderable = false
p.s.BuildFlags.HasLateTemplate.CAS(false, true)
rn.AddBytes(it)
case it.IsFrontMatter():
f := pageparser.FormatFromFrontMatterType(it.Type)
m, err := metadecoders.Default.UnmarshalToMap(it.Val, f)

View file

@ -28,7 +28,6 @@ var (
// The content related items on a Page.
type pageContent struct {
renderable bool
selfLayout string
truncated bool
@ -52,7 +51,7 @@ func (p pageContent) contentToRender(renderedShortcodes map[string]string) []byt
case pageContentReplacement:
c = append(c, v.val...)
case *shortcode:
if !p.renderable || !v.insertPlaceholder() {
if !v.insertPlaceholder() {
// Insert the rendered shortcode.
renderedShortcode, found := renderedShortcodes[v.placeholder]
if !found {

View file

@ -122,7 +122,6 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err
isHTML := cp.p.m.markup == "html"
if p.renderable {
if !isHTML {
r, err := cp.renderContent(cp.workContent, true)
if err != nil {
@ -194,21 +193,13 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err
cp.content = helpers.BytesToHTML(cp.workContent)
}
if !p.renderable {
err := cp.addSelfTemplate()
return err
}
return nil
}
// Recursive loops can only happen in content files with template code (shortcodes etc.)
// Avoid creating new goroutines if we don't have to.
needTimeout := !p.renderable || p.shortcodeState.hasShortcodes()
needTimeout = needTimeout || cp.renderHooks != nil
needTimeout := p.shortcodeState.hasShortcodes() || cp.renderHooks != nil
if needTimeout {
cp.initMain = parent.BranchWithTimeout(p.s.siteCfg.timeout, func(ctx context.Context) (interface{}, error) {
@ -428,15 +419,6 @@ func (p *pageContentOutput) setWordCounts(isCJKLanguage bool) {
}
}
func (p *pageContentOutput) addSelfTemplate() error {
self := p.p.selfLayoutForOutput(p.f)
err := p.p.s.Tmpl().(tpl.TemplateManager).AddLateTemplate(self, string(p.workContent))
if err != nil {
return err
}
return nil
}
// A callback to signal that we have inserted a placeholder into the rendered
// content. This avoids doing extra replacement work.
func (p *pageContentOutput) enablePlaceholders() {

View file

@ -408,7 +408,7 @@ func renderShortcode(
}
func (s *shortcodeHandler) hasShortcodes() bool {
return len(s.shortcodes) > 0
return s != nil && len(s.shortcodes) > 0
}
func (s *shortcodeHandler) renderShortcodesForPage(p *pageState, f output.Format) (map[string]string, bool, error) {

View file

@ -15,7 +15,6 @@ package hugolib
import (
"fmt"
"os"
"path/filepath"
"strings"
"testing"
@ -24,8 +23,6 @@ import (
"github.com/markbates/inflect"
"github.com/gohugoio/hugo/helpers"
qt "github.com/frankban/quicktest"
"github.com/gohugoio/hugo/deps"
"github.com/gohugoio/hugo/resources/page"
@ -502,70 +499,6 @@ func doTestSectionNaming(t *testing.T, canonify, uglify, pluralize bool) {
}
func TestSkipRender(t *testing.T) {
t.Parallel()
sources := [][2]string{
{filepath.FromSlash("sect/doc1.html"), "---\nmarkup: markdown\n---\n# title\nsome *content*"},
{filepath.FromSlash("sect/doc2.html"), "<!doctype html><html><body>more content</body></html>"},
{filepath.FromSlash("sect/doc3.md"), "# doc3\n*some* content"},
{filepath.FromSlash("sect/doc4.md"), "---\ntitle: doc4\n---\n# doc4\n*some content*"},
{filepath.FromSlash("sect/doc5.html"), "<!doctype html><html>{{ template \"head\" }}<body>body5</body></html>"},
{filepath.FromSlash("sect/doc6.html"), "<!doctype html><html>{{ template \"head_abs\" }}<body>body5</body></html>"},
{filepath.FromSlash("doc7.html"), "<html><body>doc7 content</body></html>"},
{filepath.FromSlash("sect/doc8.html"), "---\nmarkup: md\n---\n# title\nsome *content*"},
// Issue #3021
{filepath.FromSlash("doc9.html"), "<html><body>doc9: {{< myshortcode >}}</body></html>"},
}
cfg, fs := newTestCfg()
cfg.Set("verbose", true)
cfg.Set("canonifyURLs", true)
cfg.Set("uglyURLs", true)
cfg.Set("baseURL", "http://auth/bub")
for _, src := range sources {
writeSource(t, fs, filepath.Join("content", src[0]), src[1])
}
writeSource(t, fs, filepath.Join("layouts", "_default/single.html"), "{{.Content}}")
writeSource(t, fs, filepath.Join("layouts", "head"), "<head><script src=\"script.js\"></script></head>")
writeSource(t, fs, filepath.Join("layouts", "head_abs"), "<head><script src=\"/script.js\"></script></head>")
writeSource(t, fs, filepath.Join("layouts", "shortcodes", "myshortcode.html"), "SHORT")
buildSingleSite(t, deps.DepsCfg{Fs: fs, Cfg: cfg}, BuildCfg{})
tests := []struct {
doc string
expected string
}{
{filepath.FromSlash("public/sect/doc1.html"), "<h1 id=\"title\">title</h1>\n<p>some <em>content</em></p>\n"},
{filepath.FromSlash("public/sect/doc2.html"), "<!doctype html><html><body>more content</body></html>"},
{filepath.FromSlash("public/sect/doc3.html"), "<h1 id=\"doc3\">doc3</h1>\n<p><em>some</em> content</p>\n"},
{filepath.FromSlash("public/sect/doc4.html"), "<h1 id=\"doc4\">doc4</h1>\n<p><em>some content</em></p>\n"},
{filepath.FromSlash("public/sect/doc5.html"), "<!doctype html><html><head><script src=\"script.js\"></script></head><body>body5</body></html>"},
{filepath.FromSlash("public/sect/doc6.html"), "<!doctype html><html><head><script src=\"http://auth/bub/script.js\"></script></head><body>body5</body></html>"},
{filepath.FromSlash("public/doc7.html"), "<html><body>doc7 content</body></html>"},
{filepath.FromSlash("public/sect/doc8.html"), "<h1 id=\"title\">title</h1>\n<p>some <em>content</em></p>\n"},
{filepath.FromSlash("public/doc9.html"), "<html><body>doc9: SHORT</body></html>"},
}
for _, test := range tests {
file, err := fs.Destination.Open(test.doc)
if err != nil {
helpers.PrintFs(fs.Destination, "public", os.Stdout)
t.Fatalf("Did not find %s in target.", test.doc)
}
content := helpers.ReaderToString(file)
if content != test.expected {
t.Errorf("%s content expected:\n%q\ngot:\n%q", test.doc, test.expected, content)
}
}
}
func TestAbsURLify(t *testing.T) {
t.Parallel()
sources := [][2]string{

View file

@ -245,87 +245,6 @@ Page Content
}
func TestTemplateLateTemplates(t *testing.T) {
t.Parallel()
b := newTestSitesBuilder(t).WithSimpleConfigFile().Running()
numPages := 500 // To get some parallelism
homeTempl := `
Len RegularPages: {{ len site.RegularPages }}
{{ range site.RegularPages }}
Link: {{ .RelPermalink }} Len Content: {{ len .Content }}
{{ end }}
`
pageTemplate := `<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>{{ .RelPermalink }}</title>
<meta name="description" content="The HTML5 Herald">
<meta name="author" content="SitePoint">
<link rel="stylesheet" href="css/styles.css?v=1.0">
</head>
<body>
<h1>{{ .RelPermalink }}</h1>
<p>Shortcode: {{< shortcode >}}</p>
<p>Partial: {{ partial "mypartial.html" . }}</p>
<script src="js/scripts.js"></script>
</body>
</html>
`
b.WithTemplatesAdded(
"index.html", homeTempl,
"partials/mypartial.html", `this my partial`,
)
// Make sure we get some parallelism.
for i := 0; i < numPages; i++ {
b.WithContent(fmt.Sprintf("page%d.html", i+1), pageTemplate)
}
b.Build(BuildCfg{})
b.AssertFileContent("public/index.html", fmt.Sprintf(`
Len RegularPages: %d
Link: /page3/ Len Content: 0
Link: /page22/ Len Content: 0
`, numPages))
for i := 0; i < numPages; i++ {
b.AssertFileContent(fmt.Sprintf("public/page%d/index.html", i+1),
fmt.Sprintf(`<title>/page%d/</title>`, i+1),
`<p>Shortcode: Shortcode: Hello</p>`,
"<p>Partial: this my partial</p>",
)
}
b.EditFiles(
"layouts/partials/mypartial.html", `this my changed partial`,
"layouts/index.html", (homeTempl + "CHANGED"),
)
for i := 0; i < 5; i++ {
b.EditFiles(fmt.Sprintf("content/page%d.html", i+1), pageTemplate+"CHANGED")
}
b.Build(BuildCfg{})
b.AssertFileContent("public/index.html", fmt.Sprintf(`
Len RegularPages: %d
Link: /page3/ Len Content: 0
Link: /page2/ Len Content: 0
CHANGED
`, numPages))
for i := 0; i < 5; i++ {
b.AssertFileContent(fmt.Sprintf("public/page%d/index.html", i+1),
fmt.Sprintf(`<title>/page%d/</title>`, i+1),
`<p>Shortcode: Shortcode: Hello</p>`,
"<p>Partial: this my changed partial</p>",
"CHANGED",
)
}
}
func TestTemplateManyBaseTemplates(t *testing.T) {
t.Parallel()
b := newTestSitesBuilder(t).WithSimpleConfigFile()

View file

@ -140,7 +140,6 @@ const (
tEOF
// page items
TypeHTMLStart // document starting with < as first non-whitespace
TypeLeadSummaryDivider // <!--more-->, # more
TypeFrontMatterYAML
TypeFrontMatterTOML

View file

@ -42,21 +42,14 @@ LOOP:
if r == '<' {
l.backup()
if l.hasPrefix(htmlCommentStart) {
// This may be commented out front mattter, which should
// This may be commented out front matter, which should
// still be read.
l.consumeToNextLine()
l.isInHTMLComment = true
l.emit(TypeIgnore)
continue LOOP
} else {
if l.pos > l.start {
l.emit(tText)
}
l.next()
// This is the start of a plain HTML document with no
// front matter. I still can contain shortcodes, so we
// have to keep looking.
l.emit(TypeHTMLStart)
return l.errorf("plain HTML documents not supported")
}
}
break LOOP

View file

@ -38,7 +38,6 @@ var (
tstFrontMatterJSON = nti(TypeFrontMatterJSON, tstJSON+"\r\n")
tstSomeText = nti(tText, "\nSome text.\n")
tstSummaryDivider = nti(TypeLeadSummaryDivider, "<!--more-->\n")
tstHtmlStart = nti(TypeHTMLStart, "<")
tstNewline = nti(tText, "\n")
tstORG = `
@ -55,8 +54,8 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
var frontMatterTests = []lexerTest{
{"empty", "", []Item{tstEOF}},
{"Byte order mark", "\ufeff\nSome text.\n", []Item{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}},
{"HTML Document", ` <html> `, []Item{nti(tText, " "), tstHtmlStart, nti(tText, "html> "), tstEOF}},
{"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, []Item{tstHtmlStart, nti(tText, "html>"), tstLeftNoMD, tstSC1, tstRightNoMD, nti(tText, "</html>"), tstEOF}},
{"HTML Document", ` <html> `, []Item{nti(tError, "plain HTML documents not supported")}},
{"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, []Item{nti(tError, "plain HTML documents not supported")}},
{"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}},
{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
{"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}},

View file

@ -29,7 +29,6 @@ type TemplateManager interface {
TemplateHandler
TemplateFuncGetter
AddTemplate(name, tpl string) error
AddLateTemplate(name, tpl string) error
MarkReady() error
}

View file

@ -251,21 +251,8 @@ func (t *templateExec) MarkReady() error {
}
})
if err != nil {
return err
}
if t.Deps.BuildFlags.HasLateTemplate.Load() {
// This costs memory, so try to avoid it if we don't have to.
// The late templates are used to handle HTML in files in /content
// without front matter.
t.readyLateInit.Do(func() {
t.late = t.main.Clone(true)
t.late.createPrototypes()
})
}
return nil
}
type templateHandler struct {
@ -273,10 +260,7 @@ type templateHandler struct {
needsBaseof map[string]templateInfo
baseof map[string]templateInfo
late *templateNamespace // Templates added after main has started executing.
readyInit sync.Once
readyLateInit sync.Once
// This is the filesystem to load the templates from. All the templates are
// stored in the root of this filesystem.
@ -309,14 +293,6 @@ type templateHandler struct {
templateInfo map[string]tpl.Info
}
// AddLateTemplate is used to add a template after the
// regular templates have started its execution.
// These are currently "pure HTML content files".
func (t *templateHandler) AddLateTemplate(name, tpl string) error {
_, err := t.late.parse(t.newTemplateInfo(name, tpl))
return err
}
// AddTemplate parses and adds a template to the collection.
// Templates with name prefixed with "_text" will be handled as plain
// text templates.
@ -334,10 +310,6 @@ func (t *templateHandler) Lookup(name string) (tpl.Template, bool) {
return templ, true
}
if t.late != nil {
return t.late.Lookup(name)
}
return nil, false
}