hugo/commands/convert.go
Bjørn Erik Pedersen eada236f87
Introduce a tree map for all content
This commit introduces a new data structure to store pages and their resources.

This data structure is backed by radix trees.

This simplies tree operations, makes all pages a bundle,  and paves the way for #6310.

It also solves a set of annoying issues (see list below).

Not a motivation behind this, but this commit also makes Hugo in general a little bit faster and more memory effective (see benchmarks). Especially for partial rebuilds on content edits, but also when taxonomies is in use.

```
name                                   old time/op    new time/op    delta
SiteNew/Bundle_with_image/Edit-16        1.32ms ± 8%    1.00ms ± 9%  -24.42%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16    1.28ms ± 0%    0.94ms ± 0%  -26.26%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16      33.9ms ± 2%    21.8ms ± 1%  -35.67%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16            40.6ms ± 1%    37.7ms ± 3%   -7.20%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16        56.7ms ± 0%    51.7ms ± 1%   -8.82%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16      19.9ms ± 2%    18.3ms ± 3%   -7.64%  (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16         37.9ms ± 4%    34.0ms ± 2%  -10.28%  (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16             10.7ms ± 0%    10.6ms ± 0%   -1.15%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16         10.8ms ± 0%    10.7ms ± 0%   -1.05%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories-16           43.2ms ± 1%    39.6ms ± 1%   -8.35%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16                 47.6ms ± 1%    47.3ms ± 0%     ~     (p=0.057 n=4+4)
SiteNew/Deep_content_tree-16             73.0ms ± 1%    74.2ms ± 1%     ~     (p=0.114 n=4+4)
SiteNew/Many_HTML_templates-16           37.9ms ± 0%    38.1ms ± 1%     ~     (p=0.114 n=4+4)
SiteNew/Page_collections-16              53.6ms ± 1%    54.7ms ± 1%   +2.09%  (p=0.029 n=4+4)

name                                   old alloc/op   new alloc/op   delta
SiteNew/Bundle_with_image/Edit-16         486kB ± 0%     430kB ± 0%  -11.47%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16     265kB ± 0%     209kB ± 0%  -21.06%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16      13.6MB ± 0%     8.8MB ± 0%  -34.93%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16            66.5MB ± 0%    63.9MB ± 0%   -3.95%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16        28.8MB ± 0%    25.8MB ± 0%  -10.55%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16      6.16MB ± 0%    5.56MB ± 0%   -9.86%  (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16         16.9MB ± 0%    16.0MB ± 0%   -5.19%  (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16             2.28MB ± 0%    2.29MB ± 0%   +0.35%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16         2.07MB ± 0%    2.07MB ± 0%     ~     (p=0.114 n=4+4)
SiteNew/Tags_and_categories-16           14.3MB ± 0%    13.2MB ± 0%   -7.30%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16                 69.1MB ± 0%    69.0MB ± 0%     ~     (p=0.343 n=4+4)
SiteNew/Deep_content_tree-16             31.3MB ± 0%    31.8MB ± 0%   +1.49%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates-16           10.8MB ± 0%    10.9MB ± 0%   +1.11%  (p=0.029 n=4+4)
SiteNew/Page_collections-16              21.4MB ± 0%    21.6MB ± 0%   +1.15%  (p=0.029 n=4+4)

name                                   old allocs/op  new allocs/op  delta
SiteNew/Bundle_with_image/Edit-16         4.74k ± 0%     3.86k ± 0%  -18.57%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16     4.73k ± 0%     3.85k ± 0%  -18.58%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16        301k ± 0%      198k ± 0%  -34.14%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16              389k ± 0%      373k ± 0%   -4.07%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16          338k ± 0%      262k ± 0%  -22.63%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16        102k ± 0%       88k ± 0%  -13.81%  (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16           176k ± 0%      152k ± 0%  -13.32%  (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16              26.8k ± 0%     26.8k ± 0%   +0.05%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16          26.8k ± 0%     26.8k ± 0%   +0.05%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories-16             273k ± 0%      245k ± 0%  -10.36%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16                   396k ± 0%      398k ± 0%   +0.39%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree-16               317k ± 0%      325k ± 0%   +2.53%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates-16             146k ± 0%      147k ± 0%   +0.98%  (p=0.029 n=4+4)
SiteNew/Page_collections-16                210k ± 0%      215k ± 0%   +2.44%  (p=0.029 n=4+4)
```

Fixes #6312
Fixes #6087
Fixes #6738
Fixes #6412
Fixes #6743
Fixes #6875
Fixes #6034
Fixes #6902
Fixes #6173
Fixes #6590
2020-02-18 09:49:42 +01:00

213 lines
5.3 KiB
Go

// Copyright 2019 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package commands
import (
"bytes"
"fmt"
"strings"
"time"
"github.com/gohugoio/hugo/parser/pageparser"
"github.com/gohugoio/hugo/resources/page"
"github.com/gohugoio/hugo/hugofs"
"github.com/gohugoio/hugo/helpers"
"github.com/gohugoio/hugo/parser"
"github.com/gohugoio/hugo/parser/metadecoders"
"github.com/pkg/errors"
"github.com/gohugoio/hugo/hugolib"
"path/filepath"
"github.com/spf13/cobra"
)
var (
_ cmder = (*convertCmd)(nil)
)
type convertCmd struct {
outputDir string
unsafe bool
*baseBuilderCmd
}
func (b *commandsBuilder) newConvertCmd() *convertCmd {
cc := &convertCmd{}
cmd := &cobra.Command{
Use: "convert",
Short: "Convert your content to different formats",
Long: `Convert your content (e.g. front matter) to different formats.
See convert's subcommands toJSON, toTOML and toYAML for more information.`,
RunE: nil,
}
cmd.AddCommand(
&cobra.Command{
Use: "toJSON",
Short: "Convert front matter to JSON",
Long: `toJSON converts all front matter in the content directory
to use JSON for the front matter.`,
RunE: func(cmd *cobra.Command, args []string) error {
return cc.convertContents(metadecoders.JSON)
},
},
&cobra.Command{
Use: "toTOML",
Short: "Convert front matter to TOML",
Long: `toTOML converts all front matter in the content directory
to use TOML for the front matter.`,
RunE: func(cmd *cobra.Command, args []string) error {
return cc.convertContents(metadecoders.TOML)
},
},
&cobra.Command{
Use: "toYAML",
Short: "Convert front matter to YAML",
Long: `toYAML converts all front matter in the content directory
to use YAML for the front matter.`,
RunE: func(cmd *cobra.Command, args []string) error {
return cc.convertContents(metadecoders.YAML)
},
},
)
cmd.PersistentFlags().StringVarP(&cc.outputDir, "output", "o", "", "filesystem path to write files to")
cmd.PersistentFlags().BoolVar(&cc.unsafe, "unsafe", false, "enable less safe operations, please backup first")
cc.baseBuilderCmd = b.newBuilderBasicCmd(cmd)
return cc
}
func (cc *convertCmd) convertContents(format metadecoders.Format) error {
if cc.outputDir == "" && !cc.unsafe {
return newUserError("Unsafe operation not allowed, use --unsafe or set a different output path")
}
c, err := initializeConfig(true, false, &cc.hugoBuilderCommon, cc, nil)
if err != nil {
return err
}
c.Cfg.Set("buildDrafts", true)
h, err := hugolib.NewHugoSites(*c.DepsCfg)
if err != nil {
return err
}
if err := h.Build(hugolib.BuildCfg{SkipRender: true}); err != nil {
return err
}
site := h.Sites[0]
site.Log.FEEDBACK.Println("processing", len(site.AllPages()), "content files")
for _, p := range site.AllPages() {
if err := cc.convertAndSavePage(p, site, format); err != nil {
return err
}
}
return nil
}
func (cc *convertCmd) convertAndSavePage(p page.Page, site *hugolib.Site, targetFormat metadecoders.Format) error {
// The resources are not in .Site.AllPages.
for _, r := range p.Resources().ByType("page") {
if err := cc.convertAndSavePage(r.(page.Page), site, targetFormat); err != nil {
return err
}
}
if p.File().IsZero() {
// No content file.
return nil
}
errMsg := fmt.Errorf("Error processing file %q", p.Path())
site.Log.INFO.Println("Attempting to convert", p.File().Filename())
f := p.File()
file, err := f.FileInfo().Meta().Open()
if err != nil {
site.Log.ERROR.Println(errMsg)
file.Close()
return nil
}
pf, err := pageparser.ParseFrontMatterAndContent(file)
if err != nil {
site.Log.ERROR.Println(errMsg)
file.Close()
return err
}
file.Close()
// better handling of dates in formats that don't have support for them
if pf.FrontMatterFormat == metadecoders.JSON || pf.FrontMatterFormat == metadecoders.YAML || pf.FrontMatterFormat == metadecoders.TOML {
for k, v := range pf.FrontMatter {
switch vv := v.(type) {
case time.Time:
pf.FrontMatter[k] = vv.Format(time.RFC3339)
}
}
}
var newContent bytes.Buffer
err = parser.InterfaceToFrontMatter(pf.FrontMatter, targetFormat, &newContent)
if err != nil {
site.Log.ERROR.Println(errMsg)
return err
}
newContent.Write(pf.Content)
newFilename := p.File().Filename()
if cc.outputDir != "" {
contentDir := strings.TrimSuffix(newFilename, p.Path())
contentDir = filepath.Base(contentDir)
newFilename = filepath.Join(cc.outputDir, contentDir, p.Path())
}
fs := hugofs.Os
if err := helpers.WriteToDisk(newFilename, &newContent, fs); err != nil {
return errors.Wrapf(err, "Failed to save file %q:", newFilename)
}
return nil
}
type parsedFile struct {
frontMatterFormat metadecoders.Format
frontMatterSource []byte
frontMatter map[string]interface{}
// Everything after Front Matter
content []byte
}