hugo/hugolib/pagecollections.go
Bjørn Erik Pedersen 7ff0a8ee9f Simplify page tree logic
This is preparation for #6041.

For historic reasons, the code for bulding the section tree and the taxonomies were very much separate.

This works, but makes it hard to extend, maintain, and possibly not so fast as it could be.

This simplification also introduces 3 slightly breaking changes, which I suspect most people will be pleased about. See referenced issues:

This commit also switches the radix tree dependency to a mutable implementation: github.com/armon/go-radix.

Fixes #6154
Fixes #6153
Fixes #6152
2019-08-08 20:13:39 +02:00

574 lines
14 KiB
Go

// Copyright 2019 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hugolib
import (
"fmt"
"path"
"path/filepath"
"sort"
"strings"
"sync"
"time"
"github.com/gohugoio/hugo/resources/resource"
"github.com/pkg/errors"
"github.com/gohugoio/hugo/cache"
"github.com/gohugoio/hugo/helpers"
"github.com/gohugoio/hugo/resources/page"
)
// Used in the page cache to mark more than one hit for a given key.
var ambiguityFlag = &pageState{}
// PageCollections contains the page collections for a site.
type PageCollections struct {
pagesMap *pagesMap
// Includes absolute all pages (of all types), including drafts etc.
rawAllPages pageStatePages
// rawAllPages plus additional pages created during the build process.
workAllPages pageStatePages
// Includes headless bundles, i.e. bundles that produce no output for its content page.
headlessPages pageStatePages
// Lazy initialized page collections
pages *lazyPagesFactory
regularPages *lazyPagesFactory
allPages *lazyPagesFactory
allRegularPages *lazyPagesFactory
// The index for .Site.GetPage etc.
pageIndex *cache.Lazy
}
// Pages returns all pages.
// This is for the current language only.
func (c *PageCollections) Pages() page.Pages {
return c.pages.get()
}
// RegularPages returns all the regular pages.
// This is for the current language only.
func (c *PageCollections) RegularPages() page.Pages {
return c.regularPages.get()
}
// AllPages returns all pages for all languages.
func (c *PageCollections) AllPages() page.Pages {
return c.allPages.get()
}
// AllPages returns all regular pages for all languages.
func (c *PageCollections) AllRegularPages() page.Pages {
return c.allRegularPages.get()
}
// Get initializes the index if not already done so, then
// looks up the given page ref, returns nil if no value found.
func (c *PageCollections) getFromCache(ref string) (page.Page, error) {
v, found, err := c.pageIndex.Get(ref)
if err != nil {
return nil, err
}
if !found {
return nil, nil
}
p := v.(page.Page)
if p != ambiguityFlag {
return p, nil
}
return nil, fmt.Errorf("page reference %q is ambiguous", ref)
}
type lazyPagesFactory struct {
pages page.Pages
init sync.Once
factory page.PagesFactory
}
func (l *lazyPagesFactory) get() page.Pages {
l.init.Do(func() {
l.pages = l.factory()
})
return l.pages
}
func newLazyPagesFactory(factory page.PagesFactory) *lazyPagesFactory {
return &lazyPagesFactory{factory: factory}
}
func newPageCollections() *PageCollections {
return newPageCollectionsFromPages(nil)
}
func newPageCollectionsFromPages(pages pageStatePages) *PageCollections {
c := &PageCollections{rawAllPages: pages}
c.pages = newLazyPagesFactory(func() page.Pages {
pages := make(page.Pages, len(c.workAllPages))
for i, p := range c.workAllPages {
pages[i] = p
}
return pages
})
c.regularPages = newLazyPagesFactory(func() page.Pages {
return c.findPagesByKindInWorkPages(page.KindPage, c.workAllPages)
})
c.pageIndex = cache.NewLazy(func() (map[string]interface{}, error) {
index := make(map[string]interface{})
add := func(ref string, p page.Page) {
ref = strings.ToLower(ref)
existing := index[ref]
if existing == nil {
index[ref] = p
} else if existing != ambiguityFlag && existing != p {
index[ref] = ambiguityFlag
}
}
for _, pageCollection := range []pageStatePages{c.workAllPages, c.headlessPages} {
for _, p := range pageCollection {
if p.IsPage() {
sourceRef := p.sourceRef()
if sourceRef != "" {
// index the canonical ref
// e.g. /section/article.md
add(sourceRef, p)
}
// Ref/Relref supports this potentially ambiguous lookup.
add(p.File().LogicalName(), p)
translationBaseName := p.File().TranslationBaseName()
dir, _ := path.Split(sourceRef)
dir = strings.TrimSuffix(dir, "/")
if translationBaseName == "index" {
add(dir, p)
add(path.Base(dir), p)
} else {
add(translationBaseName, p)
}
// We need a way to get to the current language version.
pathWithNoExtensions := path.Join(dir, translationBaseName)
add(pathWithNoExtensions, p)
} else {
// index the canonical, unambiguous ref for any backing file
// e.g. /section/_index.md
sourceRef := p.sourceRef()
if sourceRef != "" {
add(sourceRef, p)
}
ref := p.SectionsPath()
// index the canonical, unambiguous virtual ref
// e.g. /section
// (this may already have been indexed above)
add("/"+ref, p)
}
}
}
return index, nil
})
return c
}
// This is an adapter func for the old API with Kind as first argument.
// This is invoked when you do .Site.GetPage. We drop the Kind and fails
// if there are more than 2 arguments, which would be ambigous.
func (c *PageCollections) getPageOldVersion(ref ...string) (page.Page, error) {
var refs []string
for _, r := range ref {
// A common construct in the wild is
// .Site.GetPage "home" "" or
// .Site.GetPage "home" "/"
if r != "" && r != "/" {
refs = append(refs, r)
}
}
var key string
if len(refs) > 2 {
// This was allowed in Hugo <= 0.44, but we cannot support this with the
// new API. This should be the most unusual case.
return nil, fmt.Errorf(`too many arguments to .Site.GetPage: %v. Use lookups on the form {{ .Site.GetPage "/posts/mypage-md" }}`, ref)
}
if len(refs) == 0 || refs[0] == page.KindHome {
key = "/"
} else if len(refs) == 1 {
if len(ref) == 2 && refs[0] == page.KindSection {
// This is an old style reference to the "Home Page section".
// Typically fetched via {{ .Site.GetPage "section" .Section }}
// See https://github.com/gohugoio/hugo/issues/4989
key = "/"
} else {
key = refs[0]
}
} else {
key = refs[1]
}
key = filepath.ToSlash(key)
if !strings.HasPrefix(key, "/") {
key = "/" + key
}
return c.getPageNew(nil, key)
}
// Only used in tests.
func (c *PageCollections) getPage(typ string, sections ...string) page.Page {
refs := append([]string{typ}, path.Join(sections...))
p, _ := c.getPageOldVersion(refs...)
return p
}
// Case insensitive page lookup.
func (c *PageCollections) getPageNew(context page.Page, ref string) (page.Page, error) {
var anError error
ref = strings.ToLower(ref)
// Absolute (content root relative) reference.
if strings.HasPrefix(ref, "/") {
p, err := c.getFromCache(ref)
if err == nil && p != nil {
return p, nil
}
if err != nil {
anError = err
}
} else if context != nil {
// Try the page-relative path.
ppath := path.Join("/", strings.ToLower(context.SectionsPath()), ref)
p, err := c.getFromCache(ppath)
if err == nil && p != nil {
return p, nil
}
if err != nil {
anError = err
}
}
if !strings.HasPrefix(ref, "/") {
// Many people will have "post/foo.md" in their content files.
p, err := c.getFromCache("/" + ref)
if err == nil && p != nil {
if context != nil {
// TODO(bep) remove this case and the message below when the storm has passed
err := wrapErr(errors.Errorf(`make non-relative ref/relref page reference(s) in page %q absolute, e.g. {{< ref "/blog/my-post.md" >}}`, context.Path()), context)
helpers.DistinctWarnLog.Println(err)
}
return p, nil
}
if err != nil {
anError = err
}
}
// Last try.
ref = strings.TrimPrefix(ref, "/")
p, err := c.getFromCache(ref)
if err != nil {
anError = err
}
if p == nil && anError != nil {
return nil, wrapErr(errors.Wrap(anError, "failed to resolve ref"), context)
}
return p, nil
}
func (*PageCollections) findPagesByKindIn(kind string, inPages page.Pages) page.Pages {
var pages page.Pages
for _, p := range inPages {
if p.Kind() == kind {
pages = append(pages, p)
}
}
return pages
}
func (c *PageCollections) findPagesByKind(kind string) page.Pages {
return c.findPagesByKindIn(kind, c.Pages())
}
func (c *PageCollections) findWorkPagesByKind(kind string) pageStatePages {
var pages pageStatePages
for _, p := range c.workAllPages {
if p.Kind() == kind {
pages = append(pages, p)
}
}
return pages
}
func (*PageCollections) findPagesByKindInWorkPages(kind string, inPages pageStatePages) page.Pages {
var pages page.Pages
for _, p := range inPages {
if p.Kind() == kind {
pages = append(pages, p)
}
}
return pages
}
func (c *PageCollections) addPage(page *pageState) {
c.rawAllPages = append(c.rawAllPages, page)
}
func (c *PageCollections) removePageFilename(filename string) {
if i := c.rawAllPages.findPagePosByFilename(filename); i >= 0 {
c.clearResourceCacheForPage(c.rawAllPages[i])
c.rawAllPages = append(c.rawAllPages[:i], c.rawAllPages[i+1:]...)
}
}
func (c *PageCollections) removePage(page *pageState) {
if i := c.rawAllPages.findPagePos(page); i >= 0 {
c.clearResourceCacheForPage(c.rawAllPages[i])
c.rawAllPages = append(c.rawAllPages[:i], c.rawAllPages[i+1:]...)
}
}
func (c *PageCollections) findPagesByShortcode(shortcode string) page.Pages {
var pages page.Pages
for _, p := range c.rawAllPages {
if p.HasShortcode(shortcode) {
pages = append(pages, p)
}
}
return pages
}
func (c *PageCollections) replacePage(page *pageState) {
// will find existing page that matches filepath and remove it
c.removePage(page)
c.addPage(page)
}
func (c *PageCollections) clearResourceCacheForPage(page *pageState) {
if len(page.resources) > 0 {
page.s.ResourceSpec.DeleteCacheByPrefix(page.targetPaths().SubResourceBaseTarget)
}
}
func (c *PageCollections) assemblePagesMap(s *Site) error {
c.pagesMap = newPagesMap(s)
rootSections := make(map[string]bool)
// Add all branch nodes first.
for _, p := range c.rawAllPages {
rootSections[p.Section()] = true
if p.IsPage() {
continue
}
c.pagesMap.addPage(p)
}
// Create missing home page and the first level sections if no
// _index provided.
s.home = c.pagesMap.getOrCreateHome()
for k := range rootSections {
c.pagesMap.createSectionIfNotExists(k)
}
// Attach the regular pages to their section.
for _, p := range c.rawAllPages {
if p.IsNode() {
continue
}
c.pagesMap.addPage(p)
}
return nil
}
func (c *PageCollections) createWorkAllPages() error {
c.workAllPages = make(pageStatePages, 0, len(c.rawAllPages))
c.headlessPages = make(pageStatePages, 0)
var (
homeDates *resource.Dates
sectionDates *resource.Dates
siteLastmod time.Time
siteLastDate time.Time
sectionsParamId = "mainSections"
sectionsParamIdLower = strings.ToLower(sectionsParamId)
)
mainSections, mainSectionsFound := c.pagesMap.s.Info.Params()[sectionsParamIdLower]
var (
bucketsToRemove []string
rootBuckets []*pagesMapBucket
)
c.pagesMap.r.Walk(func(s string, v interface{}) bool {
bucket := v.(*pagesMapBucket)
var parentBucket *pagesMapBucket
if s != "/" {
_, parentv, found := c.pagesMap.r.LongestPrefix(path.Dir(s))
if !found {
panic(fmt.Sprintf("[BUG] parent bucket not found for %q", s))
}
parentBucket = parentv.(*pagesMapBucket)
if !mainSectionsFound && strings.Count(s, "/") == 1 {
// Root section
rootBuckets = append(rootBuckets, bucket)
}
}
if bucket.owner.IsHome() {
if resource.IsZeroDates(bucket.owner) {
// Calculate dates from the page tree.
homeDates = &bucket.owner.m.Dates
}
}
sectionDates = nil
if resource.IsZeroDates(bucket.owner) {
sectionDates = &bucket.owner.m.Dates
}
if parentBucket != nil {
bucket.parent = parentBucket
if bucket.owner.IsSection() {
parentBucket.bucketSections = append(parentBucket.bucketSections, bucket)
}
}
tmp := bucket.pages[:0]
for _, x := range bucket.pages {
if c.pagesMap.s.shouldBuild(x) {
tmp = append(tmp, x)
}
}
bucket.pages = tmp
if bucket.isEmpty() {
if bucket.owner.IsSection() && bucket.owner.File().IsZero() {
// Check for any nested section.
var hasDescendant bool
c.pagesMap.r.WalkPrefix(s, func(ss string, v interface{}) bool {
if s != ss {
hasDescendant = true
return true
}
return false
})
if !hasDescendant {
// This is an auto-created section with, now, nothing in it.
bucketsToRemove = append(bucketsToRemove, s)
return false
}
}
}
if !bucket.disabled {
c.workAllPages = append(c.workAllPages, bucket.owner)
}
if !bucket.view {
for _, p := range bucket.pages {
ps := p.(*pageState)
ps.parent = bucket.owner
if ps.m.headless {
c.headlessPages = append(c.headlessPages, ps)
} else {
c.workAllPages = append(c.workAllPages, ps)
}
if homeDates != nil {
homeDates.UpdateDateAndLastmodIfAfter(ps)
}
if sectionDates != nil {
sectionDates.UpdateDateAndLastmodIfAfter(ps)
}
if p.Lastmod().After(siteLastmod) {
siteLastmod = p.Lastmod()
}
if p.Date().After(siteLastDate) {
siteLastDate = p.Date()
}
}
}
return false
})
c.pagesMap.s.lastmod = siteLastmod
if !mainSectionsFound {
// Calculare main section
var (
maxRootBucketWeight int
maxRootBucket *pagesMapBucket
)
for _, b := range rootBuckets {
weight := len(b.pages) + (len(b.bucketSections) * 5)
if weight >= maxRootBucketWeight {
maxRootBucket = b
maxRootBucketWeight = weight
}
}
if maxRootBucket != nil {
// Try to make this as backwards compatible as possible.
mainSections = []string{maxRootBucket.owner.Section()}
}
}
c.pagesMap.s.Info.Params()[sectionsParamId] = mainSections
c.pagesMap.s.Info.Params()[sectionsParamIdLower] = mainSections
for _, key := range bucketsToRemove {
c.pagesMap.r.Delete(key)
}
sort.Sort(c.workAllPages)
return nil
}