From f7aeaa61291dd75f92901bcbeecc7fce07a28dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Thu, 8 Nov 2018 10:24:13 +0100 Subject: [PATCH] Add a consolidated file cache This commits reworks how file caching is performed in Hugo. Now there is only one way, and it can be configured. This is the default configuration: ```toml [caches] [caches.getjson] dir = ":cacheDir" maxAge = -1 [caches.getcsv] dir = ":cacheDir" maxAge = -1 [caches.images] dir = ":resourceDir/_gen" maxAge = -1 [caches.assets] dir = ":resourceDir/_gen" maxAge = -1 ``` You can override any of these cache setting in your own `config.toml`. The placeholders explained: `:cacheDir`: This is the value of the `cacheDir` config option if set (can also be set via OS env variable `HUGO_CACHEDIR`). It will fall back to `/opt/build/cache/hugo_cache/` on Netlify, or a `hugo_cache` directory below the OS temp dir for the others. `:resourceDir`: This is the value of the `resourceDir` config option. `maxAge` is the time in seconds before a cache entry will be evicted, -1 means forever and 0 effectively turns that particular cache off. This means that if you run your builds on Netlify, all caches configured with `:cacheDir` will be saved and restored on the next build. For other CI vendors, please read their documentation. For an CircleCI example, see https://github.com/bep/hugo-sass-test/blob/6c3960a8f4b90e8938228688bc49bdcdd6b2d99e/.circleci/config.yml Fixes #5404 --- cache/filecache/filecache.go | 442 +++++++++++++++++++++++++++++ cache/filecache/filecache_test.go | 306 ++++++++++++++++++++ commands/commandeer.go | 17 +- common/hugio/writers.go | 33 +++ deps/deps.go | 16 +- go.mod | 2 +- go.sum | 2 + helpers/path.go | 55 ++++ hugolib/filesystems/basefs.go | 8 - hugolib/filesystems/basefs_test.go | 3 - hugolib/prune_resources.go | 33 +-- resource/bundler/bundler.go | 2 +- resource/create/create.go | 2 +- resource/image.go | 107 ++----- resource/image_cache.go | 88 +++--- resource/image_test.go | 19 +- resource/resource.go | 26 +- resource/resource_cache.go | 61 ++-- resource/testhelpers_test.go | 17 +- resource/transform.go | 19 +- tpl/data/cache.go | 85 ------ tpl/data/cache_test.go | 63 ---- tpl/data/data.go | 108 ++++--- tpl/data/init_test.go | 7 +- tpl/data/resources.go | 151 +++++----- tpl/data/resources_test.go | 63 ++-- 26 files changed, 1192 insertions(+), 543 deletions(-) create mode 100644 cache/filecache/filecache.go create mode 100644 cache/filecache/filecache_test.go delete mode 100644 tpl/data/cache.go delete mode 100644 tpl/data/cache_test.go diff --git a/cache/filecache/filecache.go b/cache/filecache/filecache.go new file mode 100644 index 000000000..45359f574 --- /dev/null +++ b/cache/filecache/filecache.go @@ -0,0 +1,442 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filecache + +import ( + "bytes" + "io" + "io/ioutil" + "path" + "path/filepath" + "strings" + "time" + + "github.com/gohugoio/hugo/common/hugio" + + "github.com/gohugoio/hugo/helpers" + + "github.com/gohugoio/hugo/hugolib/paths" + + "github.com/pkg/errors" + + "github.com/BurntSushi/locker" + "github.com/bep/mapstructure" + "github.com/spf13/afero" +) + +const ( + cachesConfigKey = "caches" + + resourcesGenDir = ":resourceDir/_gen" +) + +var defaultCacheConfig = cacheConfig{ + MaxAge: -1, // Never expire + Dir: ":cacheDir", +} + +const ( + cacheKeyGetJSON = "getjson" + cacheKeyGetCSV = "getcsv" + cacheKeyImages = "images" + cacheKeyAssets = "assets" +) + +var defaultCacheConfigs = map[string]cacheConfig{ + cacheKeyGetJSON: defaultCacheConfig, + cacheKeyGetCSV: defaultCacheConfig, + cacheKeyImages: cacheConfig{ + MaxAge: -1, + Dir: resourcesGenDir, + }, + cacheKeyAssets: cacheConfig{ + MaxAge: -1, + Dir: resourcesGenDir, + }, +} + +type cachesConfig map[string]cacheConfig + +type cacheConfig struct { + // Maxe age of ache entries in this cache. Any items older than this will + // be removed and not returned from the cache. + // -1 means forever, 0 means cache is disabled. + MaxAge int + + // The directory where files are stored. + Dir string +} + +// Cache caches a set of files in a directory. This is usually a file on +// disk, but since this is backed by an Afero file system, it can be anything. +type Cache struct { + Fs afero.Fs + + // Max age in seconds. + maxAge int + + nlocker *locker.Locker +} + +// ItemInfo contains info about a cached file. +type ItemInfo struct { + // This is the file's name relative to the cache's filesystem. + Name string +} + +// NewCache creates a new file cache with the given filesystem and max age. +func NewCache(fs afero.Fs, maxAge int) *Cache { + return &Cache{ + Fs: fs, + nlocker: locker.NewLocker(), + maxAge: maxAge, + } +} + +// lockedFile is a file with a lock that is released on Close. +type lockedFile struct { + afero.File + unlock func() +} + +func (l *lockedFile) Close() error { + defer l.unlock() + return l.File.Close() +} + +// GetWriteCloser returns a transactional writer into the cache. +// It's important that it's closed when done. +func (c *Cache) WriteCloser(id string) (ItemInfo, io.WriteCloser, error) { + id = cleanID(id) + c.nlocker.Lock(id) + + info := ItemInfo{Name: id} + + f, err := helpers.OpenFileForWriting(c.Fs, id) + if err != nil { + c.nlocker.Unlock(id) + return info, nil, err + } + + return info, &lockedFile{ + File: f, + unlock: func() { c.nlocker.Unlock(id) }, + }, nil +} + +// ReadOrCreate tries to lookup the file in cache. +// If found, it is passed to read and then closed. +// If not found a new file is created and passed to create, which should close +// it when done. +func (c *Cache) ReadOrCreate(id string, + read func(info ItemInfo, r io.Reader) error, + create func(info ItemInfo, w io.WriteCloser) error) (info ItemInfo, err error) { + id = cleanID(id) + + c.nlocker.Lock(id) + defer c.nlocker.Unlock(id) + + info = ItemInfo{Name: id} + + if r := c.getOrRemove(id); r != nil { + err = read(info, r) + defer r.Close() + return + } + + f, err := helpers.OpenFileForWriting(c.Fs, id) + if err != nil { + return + } + + err = create(info, f) + + return + +} + +// GetOrCreate tries to get the file with the given id from cache. If not found or expired, create will +// be invoked and the result cached. +// This method is protected by a named lock using the given id as identifier. +func (c *Cache) GetOrCreate(id string, create func() (io.ReadCloser, error)) (ItemInfo, io.ReadCloser, error) { + id = cleanID(id) + + c.nlocker.Lock(id) + defer c.nlocker.Unlock(id) + + info := ItemInfo{Name: id} + + if r := c.getOrRemove(id); r != nil { + return info, r, nil + } + + r, err := create() + if err != nil { + return info, nil, err + } + + if c.maxAge == 0 { + // No caching. + return info, hugio.ToReadCloser(r), nil + } + + var buff bytes.Buffer + return info, + hugio.ToReadCloser(&buff), + afero.WriteReader(c.Fs, id, io.TeeReader(r, &buff)) +} + +// GetOrCreateBytes is the same as GetOrCreate, but produces a byte slice. +func (c *Cache) GetOrCreateBytes(id string, create func() ([]byte, error)) (ItemInfo, []byte, error) { + id = cleanID(id) + + c.nlocker.Lock(id) + defer c.nlocker.Unlock(id) + + info := ItemInfo{Name: id} + + if r := c.getOrRemove(id); r != nil { + defer r.Close() + b, err := ioutil.ReadAll(r) + return info, b, err + } + + b, err := create() + if err != nil { + return info, nil, err + } + + if c.maxAge == 0 { + return info, b, nil + } + + if err := afero.WriteReader(c.Fs, id, bytes.NewReader(b)); err != nil { + return info, nil, err + } + return info, b, nil + +} + +// GetBytes gets the file content with the given id from the cahce, nil if none found. +func (c *Cache) GetBytes(id string) (ItemInfo, []byte, error) { + id = cleanID(id) + + c.nlocker.Lock(id) + defer c.nlocker.Unlock(id) + + info := ItemInfo{Name: id} + + if r := c.getOrRemove(id); r != nil { + defer r.Close() + b, err := ioutil.ReadAll(r) + return info, b, err + } + + return info, nil, nil +} + +// Get gets the file with the given id from the cahce, nil if none found. +func (c *Cache) Get(id string) (ItemInfo, io.ReadCloser, error) { + id = cleanID(id) + + c.nlocker.Lock(id) + defer c.nlocker.Unlock(id) + + info := ItemInfo{Name: id} + + r := c.getOrRemove(id) + + return info, r, nil +} + +// get gets the file with the given id. If it's expired, it will +// be removed. +func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser { + if c.maxAge == 0 { + // No caching. + return nil + } + + if c.maxAge > 0 { + fi, err := c.Fs.Stat(id) + if err != nil { + return nil + } + + expiry := time.Now().Add(-time.Duration(c.maxAge) * time.Second) + expired := fi.ModTime().Before(expiry) + if expired { + c.Fs.Remove(id) + return nil + } + } + + f, err := c.Fs.Open(id) + + if err != nil { + return nil + } + + return f +} + +// For testing +func (c *Cache) getString(id string) string { + id = cleanID(id) + + c.nlocker.Lock(id) + defer c.nlocker.Unlock(id) + + if r := c.getOrRemove(id); r != nil { + defer r.Close() + b, _ := ioutil.ReadAll(r) + return string(b) + } + + return "" + +} + +// Caches is a named set of caches. +type Caches map[string]*Cache + +// Get gets a named cache, nil if none found. +func (f Caches) Get(name string) *Cache { + return f[strings.ToLower(name)] +} + +// GetJSOnCache gets the file cache for getJSON. +func (f Caches) GetJSONCache() *Cache { + return f[cacheKeyGetJSON] +} + +// GetCSVCache gets the file cache for getCSV. +func (f Caches) GetCSVCache() *Cache { + return f[cacheKeyGetCSV] +} + +// ImageCache gets the file cache for processed images. +func (f Caches) ImageCache() *Cache { + return f[cacheKeyImages] +} + +// AssetsCache gets the file cache for assets (processed resources, SCSS etc.). +func (f Caches) AssetsCache() *Cache { + return f[cacheKeyAssets] +} + +// NewCachesFromPaths creates a new set of file caches from the given +// configuration. +func NewCachesFromPaths(p *paths.Paths) (Caches, error) { + dcfg, err := decodeConfig(p) + if err != nil { + return nil, err + } + + fs := p.Fs.Source + + m := make(Caches) + for k, v := range dcfg { + baseDir := filepath.Join(v.Dir, k) + if err = fs.MkdirAll(baseDir, 0777); err != nil { + return nil, err + } + bfs := afero.NewBasePathFs(fs, baseDir) + m[k] = NewCache(bfs, v.MaxAge) + } + + return m, nil +} + +func decodeConfig(p *paths.Paths) (cachesConfig, error) { + c := make(cachesConfig) + valid := make(map[string]bool) + // Add defaults + for k, v := range defaultCacheConfigs { + c[k] = v + valid[k] = true + } + + cfg := p.Cfg + + m := cfg.GetStringMap(cachesConfigKey) + + _, isOsFs := p.Fs.Source.(*afero.OsFs) + + for k, v := range m { + cc := defaultCacheConfig + + if err := mapstructure.WeakDecode(v, &cc); err != nil { + return nil, err + } + + if cc.Dir == "" { + return c, errors.New("must provide cache Dir") + } + + name := strings.ToLower(k) + if !valid[name] { + return nil, errors.Errorf("%q is not a valid cache name", name) + } + + c[name] = cc + } + + // This is a very old flag in Hugo, but we need to respect it. + disabled := cfg.GetBool("ignoreCache") + + for k, v := range c { + v.Dir = filepath.Clean(v.Dir) + dir := filepath.ToSlash(v.Dir) + parts := strings.Split(dir, "/") + first := parts[0] + + if strings.HasPrefix(first, ":") { + resolved, err := resolveDirPlaceholder(p, first) + if err != nil { + return c, err + } + resolved = filepath.ToSlash(resolved) + + v.Dir = filepath.FromSlash(path.Join((append([]string{resolved}, parts[1:]...))...)) + + } else if isOsFs && !path.IsAbs(dir) { + return c, errors.Errorf("%q must either start with a placeholder (e.g. :cacheDir, :resourceDir) or be absolute", v.Dir) + } + + if disabled { + v.MaxAge = 0 + } + + c[k] = v + } + + return c, nil +} + +// Resolves :resourceDir => /myproject/resources etc., :cacheDir => ... +func resolveDirPlaceholder(p *paths.Paths, placeholder string) (string, error) { + switch strings.ToLower(placeholder) { + case ":resourcedir": + return p.AbsResourcesDir, nil + case ":cachedir": + return helpers.GetCacheDir(p.Fs.Source, p.Cfg) + } + + return "", errors.Errorf("%q is not a valid placeholder (valid values are :cacheDir or :resourceDir)", placeholder) +} + +func cleanID(name string) string { + return filepath.Clean(name) +} diff --git a/cache/filecache/filecache_test.go b/cache/filecache/filecache_test.go new file mode 100644 index 000000000..d483fc1a7 --- /dev/null +++ b/cache/filecache/filecache_test.go @@ -0,0 +1,306 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filecache + +import ( + "fmt" + "io" + "io/ioutil" + "path/filepath" + "regexp" + "runtime" + "strings" + "sync" + "testing" + "time" + + "github.com/gohugoio/hugo/common/hugio" + + "github.com/gohugoio/hugo/config" + "github.com/gohugoio/hugo/hugofs" + "github.com/gohugoio/hugo/hugolib/paths" + "github.com/spf13/afero" + "github.com/spf13/viper" + + "github.com/stretchr/testify/require" +) + +func TestFileCache(t *testing.T) { + t.Parallel() + assert := require.New(t) + + for _, cacheDir := range []string{"mycache", ""} { + + configStr := ` +cacheDir = "CACHEDIR" +[caches] +[caches.getJSON] +maxAge = 111 +dir = ":cacheDir/c" + +` + configStr = strings.Replace(configStr, "CACHEDIR", cacheDir, 1) + + cfg, err := config.FromConfigString(configStr, "toml") + assert.NoError(err) + + fs := hugofs.NewMem(cfg) + p, err := paths.New(fs, cfg) + assert.NoError(err) + + caches, err := NewCachesFromPaths(p) + assert.NoError(err) + + c := caches.Get("GetJSON") + assert.NotNil(c) + assert.Equal(111, c.maxAge) + + bfs, ok := c.Fs.(*afero.BasePathFs) + assert.True(ok) + filename, err := bfs.RealPath("key") + assert.NoError(err) + if cacheDir != "" { + assert.Equal(filepath.FromSlash(cacheDir+"/c/getjson/key"), filename) + } else { + // Temp dir. + assert.Regexp(regexp.MustCompile("hugo_cache.*key"), filename) + } + + rf := func(s string) func() (io.ReadCloser, error) { + return func() (io.ReadCloser, error) { + return struct { + io.ReadSeeker + io.Closer + }{ + strings.NewReader(s), + ioutil.NopCloser(nil), + }, nil + } + } + + bf := func() ([]byte, error) { + return []byte("bcd"), nil + } + + for i := 0; i < 2; i++ { + info, r, err := c.GetOrCreate("a", rf("abc")) + assert.NoError(err) + assert.NotNil(r) + assert.Equal("a", info.Name) + b, _ := ioutil.ReadAll(r) + r.Close() + assert.Equal("abc", string(b)) + + info, b, err = c.GetOrCreateBytes("b", bf) + assert.NoError(err) + assert.NotNil(r) + assert.Equal("b", info.Name) + assert.Equal("bcd", string(b)) + + _, b, err = c.GetOrCreateBytes("a", bf) + assert.NoError(err) + assert.Equal("abc", string(b)) + + _, r, err = c.GetOrCreate("a", rf("bcd")) + assert.NoError(err) + b, _ = ioutil.ReadAll(r) + r.Close() + assert.Equal("abc", string(b)) + } + + assert.NotNil(caches.Get("getJSON")) + + info, w, err := caches.ImageCache().WriteCloser("mykey") + assert.NoError(err) + assert.Equal("mykey", info.Name) + io.WriteString(w, "Hugo is great!") + w.Close() + assert.Equal("Hugo is great!", caches.ImageCache().getString("mykey")) + + info, r, err := caches.ImageCache().Get("mykey") + assert.NoError(err) + assert.NotNil(r) + assert.Equal("mykey", info.Name) + b, _ := ioutil.ReadAll(r) + r.Close() + assert.Equal("Hugo is great!", string(b)) + + info, b, err = caches.ImageCache().GetBytes("mykey") + assert.NoError(err) + assert.Equal("mykey", info.Name) + assert.Equal("Hugo is great!", string(b)) + + } + +} + +func TestFileCacheConcurrent(t *testing.T) { + t.Parallel() + + assert := require.New(t) + + configStr := ` +[caches] +[caches.getjson] +maxAge = 1 +dir = "/cache/c" + +` + + cfg, err := config.FromConfigString(configStr, "toml") + assert.NoError(err) + fs := hugofs.NewMem(cfg) + p, err := paths.New(fs, cfg) + assert.NoError(err) + + caches, err := NewCachesFromPaths(p) + assert.NoError(err) + + const cacheName = "getjson" + + filenameData := func(i int) (string, string) { + data := fmt.Sprintf("data: %d", i) + filename := fmt.Sprintf("file%d", i) + return filename, data + } + + var wg sync.WaitGroup + + for i := 0; i < 50; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + for j := 0; j < 20; j++ { + c := caches.Get(cacheName) + assert.NotNil(c) + filename, data := filenameData(i) + _, r, err := c.GetOrCreate(filename, func() (io.ReadCloser, error) { + return hugio.ToReadCloser(strings.NewReader(data)), nil + }) + assert.NoError(err) + b, _ := ioutil.ReadAll(r) + r.Close() + assert.Equal(data, string(b)) + // Trigger some expiration. + time.Sleep(50 * time.Millisecond) + } + }(i) + + } + wg.Wait() +} + +func TestDecodeConfig(t *testing.T) { + t.Parallel() + + assert := require.New(t) + + configStr := ` +[caches] +[caches.getJSON] +maxAge = 1234 +dir = "/path/to/c1" +[caches.getCSV] +maxAge = 3456 +dir = "/path/to/c2" +[caches.images] +dir = "/path/to/c3" + +` + + cfg, err := config.FromConfigString(configStr, "toml") + assert.NoError(err) + fs := hugofs.NewMem(cfg) + p, err := paths.New(fs, cfg) + assert.NoError(err) + + decoded, err := decodeConfig(p) + assert.NoError(err) + + assert.Equal(4, len(decoded)) + + c2 := decoded["getcsv"] + assert.Equal(3456, c2.MaxAge) + assert.Equal(filepath.FromSlash("/path/to/c2"), c2.Dir) + + c3 := decoded["images"] + assert.Equal(-1, c3.MaxAge) + assert.Equal(filepath.FromSlash("/path/to/c3"), c3.Dir) + +} + +func TestDecodeConfigIgnoreCache(t *testing.T) { + t.Parallel() + + assert := require.New(t) + + configStr := ` +ignoreCache = true +[caches] +[caches.getJSON] +maxAge = 1234 +dir = "/path/to/c1" +[caches.getCSV] +maxAge = 3456 +dir = "/path/to/c2" +[caches.images] +dir = "/path/to/c3" + +` + + cfg, err := config.FromConfigString(configStr, "toml") + assert.NoError(err) + fs := hugofs.NewMem(cfg) + p, err := paths.New(fs, cfg) + assert.NoError(err) + + decoded, err := decodeConfig(p) + assert.NoError(err) + + assert.Equal(4, len(decoded)) + + for _, v := range decoded { + assert.Equal(0, v.MaxAge) + } + +} + +func TestDecodeConfigDefault(t *testing.T) { + assert := require.New(t) + cfg := viper.New() + if runtime.GOOS == "windows" { + cfg.Set("resourceDir", "c:\\cache\\resources") + cfg.Set("cacheDir", "c:\\cache\\thecache") + + } else { + cfg.Set("resourceDir", "/cache/resources") + cfg.Set("cacheDir", "/cache/thecache") + } + + fs := hugofs.NewMem(cfg) + p, err := paths.New(fs, cfg) + assert.NoError(err) + + decoded, err := decodeConfig(p) + + assert.NoError(err) + + assert.Equal(4, len(decoded)) + + if runtime.GOOS == "windows" { + assert.Equal("c:\\cache\\resources\\_gen", decoded[cacheKeyImages].Dir) + } else { + assert.Equal("/cache/resources/_gen", decoded[cacheKeyImages].Dir) + } +} diff --git a/commands/commandeer.go b/commands/commandeer.go index 545191cfe..cd2866a27 100644 --- a/commands/commandeer.go +++ b/commands/commandeer.go @@ -359,20 +359,11 @@ func (c *commandeer) loadConfig(mustHaveConfigFile, running bool) error { return err } - cacheDir := config.GetString("cacheDir") - if cacheDir != "" { - if helpers.FilePathSeparator != cacheDir[len(cacheDir)-1:] { - cacheDir = cacheDir + helpers.FilePathSeparator - } - isDir, err := helpers.DirExists(cacheDir, sourceFs) - checkErr(cfg.Logger, err) - if !isDir { - mkdir(cacheDir) - } - config.Set("cacheDir", cacheDir) - } else { - config.Set("cacheDir", helpers.GetTempDir("hugo_cache", sourceFs)) + cacheDir, err := helpers.GetCacheDir(sourceFs, config) + if err != nil { + return err } + config.Set("cacheDir", cacheDir) cfg.Logger.INFO.Println("Using config file:", config.ConfigFileUsed()) diff --git a/common/hugio/writers.go b/common/hugio/writers.go index 2766146e1..82c4dca52 100644 --- a/common/hugio/writers.go +++ b/common/hugio/writers.go @@ -15,6 +15,7 @@ package hugio import ( "io" + "io/ioutil" ) type multiWriteCloser struct { @@ -41,3 +42,35 @@ func NewMultiWriteCloser(writeClosers ...io.WriteCloser) io.WriteCloser { } return multiWriteCloser{Writer: io.MultiWriter(writers...), closers: writeClosers} } + +// ToWriteCloser creates an io.WriteCloser from the given io.Writer. +// If it's not already, one will be created with a Close method that does nothing. +func ToWriteCloser(w io.Writer) io.WriteCloser { + if rw, ok := w.(io.WriteCloser); ok { + return rw + } + + return struct { + io.Writer + io.Closer + }{ + w, + ioutil.NopCloser(nil), + } +} + +// ToReadCloser creates an io.ReadCloser from the given io.Reader. +// If it's not already, one will be created with a Close method that does nothing. +func ToReadCloser(r io.Reader) io.ReadCloser { + if rc, ok := r.(io.ReadCloser); ok { + return rc + } + + return struct { + io.Reader + io.Closer + }{ + r, + ioutil.NopCloser(nil), + } +} diff --git a/deps/deps.go b/deps/deps.go index db59ad212..a1b54a862 100644 --- a/deps/deps.go +++ b/deps/deps.go @@ -4,6 +4,9 @@ import ( "sync" "time" + "github.com/pkg/errors" + + "github.com/gohugoio/hugo/cache/filecache" "github.com/gohugoio/hugo/common/loggers" "github.com/gohugoio/hugo/config" "github.com/gohugoio/hugo/helpers" @@ -53,6 +56,9 @@ type Deps struct { // The configuration to use Cfg config.Provider `json:"-"` + // The file cache to use. + FileCaches filecache.Caches + // The translation func to use Translate func(translationID string, args ...interface{}) string `json:"-"` @@ -187,7 +193,12 @@ func New(cfg DepsCfg) (*Deps, error) { return nil, err } - resourceSpec, err := resource.NewSpec(ps, logger, cfg.OutputFormats, cfg.MediaTypes) + fileCaches, err := filecache.NewCachesFromPaths(ps.Paths) + if err != nil { + return nil, errors.WithMessage(err, "failed to create file caches from configuration") + } + + resourceSpec, err := resource.NewSpec(ps, fileCaches, logger, cfg.OutputFormats, cfg.MediaTypes) if err != nil { return nil, err } @@ -219,6 +230,7 @@ func New(cfg DepsCfg) (*Deps, error) { ResourceSpec: resourceSpec, Cfg: cfg.Language, Language: cfg.Language, + FileCaches: fileCaches, BuildStartListeners: &Listeners{}, Timeout: time.Duration(timeoutms) * time.Millisecond, globalErrHandler: &globalErrHandler{}, @@ -250,7 +262,7 @@ func (d Deps) ForLanguage(cfg DepsCfg) (*Deps, error) { // The resource cache is global so reuse. // TODO(bep) clean up these inits. resourceCache := d.ResourceSpec.ResourceCache - d.ResourceSpec, err = resource.NewSpec(d.PathSpec, d.Log, cfg.OutputFormats, cfg.MediaTypes) + d.ResourceSpec, err = resource.NewSpec(d.PathSpec, d.ResourceSpec.FileCaches, d.Log, cfg.OutputFormats, cfg.MediaTypes) if err != nil { return nil, err } diff --git a/go.mod b/go.mod index a3fa54f2a..1e0d0d6eb 100644 --- a/go.mod +++ b/go.mod @@ -52,7 +52,7 @@ require ( github.com/spf13/nitro v0.0.0-20131003134307-24d7ef30a12d github.com/spf13/pflag v1.0.3 github.com/spf13/viper v1.2.0 - github.com/stretchr/testify v1.2.3-0.20181002232621-f2347ac6c9c9 + github.com/stretchr/testify v1.2.3-0.20181014000028-04af85275a5c github.com/tdewolff/minify/v2 v2.3.7 github.com/wellington/go-libsass v0.0.0-20180624165032-615eaa47ef79 // indirect github.com/yosssi/ace v0.0.5 diff --git a/go.sum b/go.sum index 4b0ca2cea..ba3c80768 100644 --- a/go.sum +++ b/go.sum @@ -131,6 +131,8 @@ github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.3-0.20181002232621-f2347ac6c9c9 h1:kcVw9CGDqYBy0TTpIq2+BNR4W9poqiwEPBh/OYX5CaU= github.com/stretchr/testify v1.2.3-0.20181002232621-f2347ac6c9c9/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.2.3-0.20181014000028-04af85275a5c h1:03OmljzZYsezlgAfa+f/cY8E8XXPiFh5bgANMhUlDI4= +github.com/stretchr/testify v1.2.3-0.20181014000028-04af85275a5c/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/tdewolff/minify/v2 v2.3.7 h1:nhk7MKYRdTDwTxqEQZKLDkLe04tDHht8mBI+VJrsYvk= github.com/tdewolff/minify/v2 v2.3.7/go.mod h1:DD1stRlSx6JsHfl1+E/HVMQeXiec9rD1UQ0epklIZLc= github.com/tdewolff/parse/v2 v2.3.5 h1:/uS8JfhwVJsNkEh769GM5ENv6L9LOh2Z9uW3tCdlhs0= diff --git a/helpers/path.go b/helpers/path.go index f96156dcf..f5096e62f 100644 --- a/helpers/path.go +++ b/helpers/path.go @@ -24,6 +24,8 @@ import ( "strings" "unicode" + "github.com/gohugoio/hugo/config" + "github.com/gohugoio/hugo/common/hugio" _errors "github.com/pkg/errors" "github.com/spf13/afero" @@ -575,6 +577,59 @@ func OpenFileForWriting(fs afero.Fs, filename string) (afero.File, error) { return f, err } +// GetCacheDir returns a cache dir from the given filesystem and config. +// The dir will be created if it does not exist. +func GetCacheDir(fs afero.Fs, cfg config.Provider) (string, error) { + cacheDir := getCacheDir(cfg) + if cacheDir != "" { + exists, err := DirExists(cacheDir, fs) + if err != nil { + return "", err + } + if !exists { + err := fs.MkdirAll(cacheDir, 0777) // Before umask + if err != nil { + return "", _errors.Wrap(err, "failed to create cache dir") + } + } + return cacheDir, nil + } + + // Fall back to a cache in /tmp. + return GetTempDir("hugo_cache", fs), nil + +} + +func getCacheDir(cfg config.Provider) string { + // Always use the cacheDir config if set. + cacheDir := cfg.GetString("cacheDir") + if len(cacheDir) > 1 { + return addTrailingFileSeparator(cacheDir) + } + + // Both of these are fairly distinctive OS env keys used by Netlify. + if os.Getenv("DEPLOY_PRIME_URL") != "" && os.Getenv("PULL_REQUEST") != "" { + // Netlify's cache behaviour is not documented, the currently best example + // is this project: + // https://github.com/philhawksworth/content-shards/blob/master/gulpfile.js + return "/opt/build/cache/hugo_cache/" + + } + + // This will fall back to an hugo_cache folder in the tmp dir, which should work fine for most CI + // providers. See this for a working CircleCI setup: + // https://github.com/bep/hugo-sass-test/blob/6c3960a8f4b90e8938228688bc49bdcdd6b2d99e/.circleci/config.yml + // If not, they can set the HUGO_CACHEDIR environment variable or cacheDir config key. + return "" +} + +func addTrailingFileSeparator(s string) string { + if !strings.HasSuffix(s, FilePathSeparator) { + s = s + FilePathSeparator + } + return s +} + // GetTempDir returns a temporary directory with the given sub path. func GetTempDir(subPath string, fs afero.Fs) string { return afero.GetTempDir(fs, subPath) diff --git a/hugolib/filesystems/basefs.go b/hugolib/filesystems/basefs.go index ee1c870d9..8fc1861b8 100644 --- a/hugolib/filesystems/basefs.go +++ b/hugolib/filesystems/basefs.go @@ -81,7 +81,6 @@ type SourceFilesystems struct { Layouts *SourceFilesystem Archetypes *SourceFilesystem Assets *SourceFilesystem - Resources *SourceFilesystem // This is a unified read-only view of the project's and themes' workdir. Work *SourceFilesystem @@ -376,13 +375,6 @@ func (b *sourceFilesystemsBuilder) Build() (*SourceFilesystems, error) { } b.result.Assets = sfs - sfs, err = b.createFs(true, false, "resourceDir", "resources") - if err != nil { - return nil, err - } - - b.result.Resources = sfs - sfs, err = b.createFs(false, true, "", "") if err != nil { return nil, err diff --git a/hugolib/filesystems/basefs_test.go b/hugolib/filesystems/basefs_test.go index ec6ccb30c..8e9b7235c 100644 --- a/hugolib/filesystems/basefs_test.go +++ b/hugolib/filesystems/basefs_test.go @@ -108,7 +108,6 @@ theme = ["atheme"] checkFileCount(bfs.Data.Fs, "", assert, 9) // 7 + 2 themes checkFileCount(bfs.Archetypes.Fs, "", assert, 10) // 8 + 2 themes checkFileCount(bfs.Assets.Fs, "", assert, 9) - checkFileCount(bfs.Resources.Fs, "", assert, 10) checkFileCount(bfs.Work.Fs, "", assert, 78) assert.Equal([]string{filepath.FromSlash("/my/work/mydata"), filepath.FromSlash("/my/work/themes/btheme/data"), filepath.FromSlash("/my/work/themes/atheme/data")}, bfs.Data.Dirnames) @@ -229,8 +228,6 @@ func TestRealDirs(t *testing.T) { assert.Equal(filepath.Join(root, "myassets/scss"), realDirs[0]) assert.Equal(filepath.Join(themesDir, "mytheme/assets/scss"), realDirs[len(realDirs)-1]) - checkFileCount(bfs.Resources.Fs, "", assert, 3) - assert.NotNil(bfs.themeFs) fi, b, err := bfs.themeFs.(afero.Lstater).LstatIfPossible(filepath.Join("resources", "t1.txt")) assert.NoError(err) diff --git a/hugolib/prune_resources.go b/hugolib/prune_resources.go index 28802c6f2..63623d75d 100644 --- a/hugolib/prune_resources.go +++ b/hugolib/prune_resources.go @@ -14,7 +14,6 @@ package hugolib import ( - "fmt" "io" "os" "strings" @@ -27,21 +26,12 @@ import ( // GC requires a build first. func (h *HugoSites) GC() (int, error) { s := h.Sites[0] - fs := h.PathSpec.BaseFs.Resources.Fs + assetsCacheFs := h.Deps.FileCaches.AssetsCache().Fs + imageCacheFs := h.Deps.FileCaches.ImageCache().Fs - imageCacheDir := s.ResourceSpec.GenImagePath - if len(imageCacheDir) < 10 { - panic("invalid image cache") - } - assetsCacheDir := s.ResourceSpec.GenAssetsPath - if len(assetsCacheDir) < 10 { - panic("invalid assets cache") - } - - isImageInUse := func(filename string) bool { - key := strings.TrimPrefix(filename, imageCacheDir) + isImageInUse := func(name string) bool { for _, site := range h.Sites { - if site.ResourceSpec.IsInImageCache(key) { + if site.ResourceSpec.IsInImageCache(name) { return true } } @@ -49,10 +39,9 @@ func (h *HugoSites) GC() (int, error) { return false } - isAssetInUse := func(filename string) bool { - key := strings.TrimPrefix(filename, assetsCacheDir) + isAssetInUse := func(name string) bool { // These assets are stored in tuplets with an added extension to the key. - key = strings.TrimSuffix(key, helpers.Ext(key)) + key := strings.TrimSuffix(name, helpers.Ext(name)) for _, site := range h.Sites { if site.ResourceSpec.ResourceCache.Contains(key) { return true @@ -62,17 +51,13 @@ func (h *HugoSites) GC() (int, error) { return false } - walker := func(dirname string, inUse func(filename string) bool) (int, error) { + walker := func(fs afero.Fs, dirname string, inUse func(filename string) bool) (int, error) { counter := 0 err := afero.Walk(fs, dirname, func(path string, info os.FileInfo, err error) error { if info == nil { return nil } - if !strings.HasPrefix(path, dirname) { - return fmt.Errorf("Invalid state, walk outside of resource dir: %q", path) - } - if info.IsDir() { f, err := fs.Open(path) if err != nil { @@ -103,8 +88,8 @@ func (h *HugoSites) GC() (int, error) { return counter, err } - imageCounter, err1 := walker(imageCacheDir, isImageInUse) - assetsCounter, err2 := walker(assetsCacheDir, isAssetInUse) + imageCounter, err1 := walker(imageCacheFs, "", isImageInUse) + assetsCounter, err2 := walker(assetsCacheFs, "", isAssetInUse) totalCount := imageCounter + assetsCounter if err1 != nil { diff --git a/resource/bundler/bundler.go b/resource/bundler/bundler.go index 39ce9b521..70b8ee536 100644 --- a/resource/bundler/bundler.go +++ b/resource/bundler/bundler.go @@ -105,7 +105,7 @@ func (c *Client) Concat(targetPath string, resources resource.Resources) (resour } composite, err := c.rs.NewForFs( - c.rs.BaseFs.Resources.Fs, + c.rs.FileCaches.AssetsCache().Fs, resource.ResourceSourceDescriptor{ LazyPublish: true, OpenReadSeekCloser: concatr, diff --git a/resource/create/create.go b/resource/create/create.go index 0f909a36d..db23930e4 100644 --- a/resource/create/create.go +++ b/resource/create/create.go @@ -51,7 +51,7 @@ func (c *Client) Get(fs afero.Fs, filename string) (resource.Resource, error) { func (c *Client) FromString(targetPath, content string) (resource.Resource, error) { return c.rs.ResourceCache.GetOrCreate(resource.CACHE_OTHER, targetPath, func() (resource.Resource, error) { return c.rs.NewForFs( - c.rs.BaseFs.Resources.Fs, + c.rs.FileCaches.AssetsCache().Fs, resource.ResourceSourceDescriptor{ LazyPublish: true, OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) { diff --git a/resource/image.go b/resource/image.go index fc79cbef5..0fdabf788 100644 --- a/resource/image.go +++ b/resource/image.go @@ -21,6 +21,7 @@ import ( "image/draw" "image/jpeg" "io" + "io/ioutil" "os" "strconv" "strings" @@ -125,9 +126,6 @@ type Image struct { copyToDestinationInit sync.Once - // Lock used when creating alternate versions of this image. - createMu sync.Mutex - imaging *Imaging format imaging.Format @@ -245,7 +243,7 @@ func (i *Image) doWithImageConfig(action, spec string, f func(src image.Image, c } } - return i.spec.imageCache.getOrCreate(i, conf, func(resourceCacheFilename string) (*Image, error) { + return i.spec.imageCache.getOrCreate(i, conf, func() (*Image, image.Image, error) { imageProcSem <- true defer func() { <-imageProcSem @@ -260,7 +258,7 @@ func (i *Image) doWithImageConfig(action, spec string, f func(src image.Image, c src, err := i.decodeSource() if err != nil { - return nil, &os.PathError{Op: errOp, Path: errPath, Err: err} + return nil, nil, &os.PathError{Op: errOp, Path: errPath, Err: err} } if conf.Rotate != 0 { @@ -270,7 +268,7 @@ func (i *Image) doWithImageConfig(action, spec string, f func(src image.Image, c converted, err := f(src, conf) if err != nil { - return ci, &os.PathError{Op: errOp, Path: errPath, Err: err} + return ci, nil, &os.PathError{Op: errOp, Path: errPath, Err: err} } if i.format == imaging.PNG { @@ -286,7 +284,7 @@ func (i *Image) doWithImageConfig(action, spec string, f func(src image.Image, c ci.config = image.Config{Width: b.Max.X, Height: b.Max.Y} ci.configLoaded = true - return ci, i.encodeToDestinations(converted, conf, resourceCacheFilename, ci.targetFilenames()...) + return ci, converted, nil }) } @@ -462,79 +460,37 @@ func (i *Image) decodeSource() (image.Image, error) { return img, err } -func (i *Image) copyToDestination(src string) error { - var res error - i.copyToDestinationInit.Do(func() { - targetFilenames := i.targetFilenames() - var changedFilenames []string +func (i *Image) openDestinationsForWriting() (io.WriteCloser, error) { + targetFilenames := i.targetFilenames() + var changedFilenames []string - // Fast path: - // This is a processed version of the original. - // If it exists on destination with the same filename and file size, it is - // the same file, so no need to transfer it again. - for _, targetFilename := range targetFilenames { - if fi, err := i.spec.BaseFs.PublishFs.Stat(targetFilename); err == nil && fi.Size() == i.osFileInfo.Size() { - continue - } - changedFilenames = append(changedFilenames, targetFilename) + // Fast path: + // This is a processed version of the original. + // If it exists on destination with the same filename and file size, it is + // the same file, so no need to transfer it again. + for _, targetFilename := range targetFilenames { + if fi, err := i.spec.BaseFs.PublishFs.Stat(targetFilename); err == nil && fi.Size() == i.osFileInfo.Size() { + continue } - - if len(changedFilenames) == 0 { - return - } - - in, err := i.sourceFs().Open(src) - if err != nil { - res = err - return - } - defer in.Close() - - out, err := helpers.OpenFilesForWriting(i.spec.BaseFs.PublishFs, changedFilenames...) - - if err != nil { - res = err - return - } - defer out.Close() - - _, err = io.Copy(out, in) - if err != nil { - res = err - return - } - }) - - if res != nil { - return fmt.Errorf("failed to copy image to destination: %s", res) + changedFilenames = append(changedFilenames, targetFilename) } - return nil + + if len(changedFilenames) == 0 { + return struct { + io.Writer + io.Closer + }{ + ioutil.Discard, + ioutil.NopCloser(nil), + }, nil + + } + + return helpers.OpenFilesForWriting(i.spec.BaseFs.PublishFs, changedFilenames...) + } -func (i *Image) encodeToDestinations(img image.Image, conf imageConfig, resourceCacheFilename string, targetFilenames ...string) error { - - file1, err := helpers.OpenFilesForWriting(i.spec.BaseFs.PublishFs, targetFilenames...) - if err != nil { - return err - } - - defer file1.Close() - - var w io.Writer - - if resourceCacheFilename != "" { - // Also save it to the image resource cache for later reuse. - file2, err := helpers.OpenFileForWriting(i.spec.BaseFs.Resources.Fs, resourceCacheFilename) - if err != nil { - return err - } - - w = io.MultiWriter(file1, file2) - defer file2.Close() - } else { - w = file1 - } - +func (i *Image) encodeTo(conf imageConfig, img image.Image, w io.Writer) error { switch i.format { case imaging.JPEG: @@ -557,7 +513,6 @@ func (i *Image) encodeToDestinations(img image.Image, conf imageConfig, resource default: return imaging.Encode(w, img, i.format) } - } func (i *Image) clone() *Image { diff --git a/resource/image_cache.go b/resource/image_cache.go index 470c24c99..22c86ea32 100644 --- a/resource/image_cache.go +++ b/resource/image_cache.go @@ -15,19 +15,25 @@ package resource import ( "fmt" + "image" + "io" "path/filepath" "strings" "sync" "time" + "github.com/gohugoio/hugo/common/hugio" + + "github.com/gohugoio/hugo/cache/filecache" "github.com/gohugoio/hugo/helpers" ) type imageCache struct { - cacheDir string pathSpec *helpers.PathSpec - mu sync.RWMutex + fileCache *filecache.Cache + + mu sync.RWMutex store map[string]*Image } @@ -66,7 +72,7 @@ func (c *imageCache) clear() { } func (c *imageCache) getOrCreate( - parent *Image, conf imageConfig, create func(resourceCacheFilename string) (*Image, error)) (*Image, error) { + parent *Image, conf imageConfig, createImage func() (*Image, image.Image, error)) (*Image, error) { relTarget := parent.relTargetPathFromConfig(conf) key := parent.relTargetPathForRel(relTarget.path(), false, false) @@ -80,58 +86,76 @@ func (c *imageCache) getOrCreate( return img, nil } - // Now look in the file cache. - // Multiple Go routines can invoke same operation on the same image, so - // we need to make sure this is serialized per source image. - parent.createMu.Lock() - defer parent.createMu.Unlock() + // These funcs are protected by a named lock. + // read clones the parent to its new name and copies + // the content to the destinations. + read := func(info filecache.ItemInfo, r io.Reader) error { + img = parent.clone() + img.relTargetDirFile.file = relTarget.file + img.sourceFilename = info.Name - cacheFilename := filepath.Join(c.cacheDir, key) + w, err := img.openDestinationsForWriting() + if err != nil { + return err + } + + defer w.Close() + _, err = io.Copy(w, r) + return err + } + + // create creates the image and encodes it to w (cache) and to its destinations. + create := func(info filecache.ItemInfo, w io.WriteCloser) (err error) { + var conv image.Image + img, conv, err = createImage() + if err != nil { + w.Close() + return + } + img.relTargetDirFile.file = relTarget.file + img.sourceFilename = info.Name + + destinations, err := img.openDestinationsForWriting() + if err != nil { + w.Close() + return err + } + + mw := hugio.NewMultiWriteCloser(w, destinations) + defer mw.Close() + + return img.encodeTo(conf, conv, mw) + } + + // Now look in the file cache. // The definition of this counter is not that we have processed that amount // (e.g. resized etc.), it can be fetched from file cache, // but the count of processed image variations for this site. c.pathSpec.ProcessingStats.Incr(&c.pathSpec.ProcessingStats.ProcessedImages) - exists, err := helpers.Exists(cacheFilename, c.pathSpec.BaseFs.Resources.Fs) + _, err := c.fileCache.ReadOrCreate(key, read, create) if err != nil { return nil, err } - if exists { - img = parent.clone() - } else { - img, err = create(cacheFilename) - if err != nil { - return nil, err - } - } - img.relTargetDirFile.file = relTarget.file - img.sourceFilename = cacheFilename - // We have to look in the resources file system for this. - img.overriddenSourceFs = img.spec.BaseFs.Resources.Fs + // The file is now stored in this cache. + img.overriddenSourceFs = c.fileCache.Fs c.mu.Lock() if img2, found := c.store[key]; found { c.mu.Unlock() return img2, nil } - c.store[key] = img - c.mu.Unlock() - if !exists { - // File already written to destination - return img, nil - } - - return img, img.copyToDestination(cacheFilename) + return img, nil } -func newImageCache(ps *helpers.PathSpec, cacheDir string) *imageCache { - return &imageCache{pathSpec: ps, store: make(map[string]*Image), cacheDir: cacheDir} +func newImageCache(fileCache *filecache.Cache, ps *helpers.PathSpec) *imageCache { + return &imageCache{fileCache: fileCache, pathSpec: ps, store: make(map[string]*Image)} } func timeTrack(start time.Time, name string) { diff --git a/resource/image_test.go b/resource/image_test.go index a9aacbdce..07eab5d56 100644 --- a/resource/image_test.go +++ b/resource/image_test.go @@ -62,6 +62,7 @@ func TestImageTransformBasic(t *testing.T) { assert := require.New(t) image := fetchSunset(assert) + fileCache := image.spec.FileCaches.ImageCache().Fs assert.Equal("/a/sunset.jpg", image.RelPermalink()) assert.Equal("image", image.ResourceType()) @@ -76,19 +77,20 @@ func TestImageTransformBasic(t *testing.T) { assert.NoError(err) assert.Equal(320, resized0x.Width()) assert.Equal(200, resized0x.Height()) - assertFileCache(assert, image.spec.BaseFs.Resources.Fs, resized0x.RelPermalink(), 320, 200) + + assertFileCache(assert, fileCache, resized0x.RelPermalink(), 320, 200) resizedx0, err := image.Resize("200x") assert.NoError(err) assert.Equal(200, resizedx0.Width()) assert.Equal(125, resizedx0.Height()) - assertFileCache(assert, image.spec.BaseFs.Resources.Fs, resizedx0.RelPermalink(), 200, 125) + assertFileCache(assert, fileCache, resizedx0.RelPermalink(), 200, 125) resizedAndRotated, err := image.Resize("x200 r90") assert.NoError(err) assert.Equal(125, resizedAndRotated.Width()) assert.Equal(200, resizedAndRotated.Height()) - assertFileCache(assert, image.spec.BaseFs.Resources.Fs, resizedAndRotated.RelPermalink(), 125, 200) + assertFileCache(assert, fileCache, resizedAndRotated.RelPermalink(), 125, 200) assert.Equal("/a/sunset_hu59e56ffff1bc1d8d122b1403d34e039f_90587_300x200_resize_q68_linear.jpg", resized.RelPermalink()) assert.Equal(300, resized.Width()) @@ -113,21 +115,21 @@ func TestImageTransformBasic(t *testing.T) { assert.Equal("/a/sunset_hu59e56ffff1bc1d8d122b1403d34e039f_90587_200x100_fill_q68_linear_bottomleft.jpg", filled.RelPermalink()) assert.Equal(200, filled.Width()) assert.Equal(100, filled.Height()) - assertFileCache(assert, image.spec.BaseFs.Resources.Fs, filled.RelPermalink(), 200, 100) + assertFileCache(assert, fileCache, filled.RelPermalink(), 200, 100) smart, err := image.Fill("200x100 smart") assert.NoError(err) assert.Equal(fmt.Sprintf("/a/sunset_hu59e56ffff1bc1d8d122b1403d34e039f_90587_200x100_fill_q68_linear_smart%d.jpg", smartCropVersionNumber), smart.RelPermalink()) assert.Equal(200, smart.Width()) assert.Equal(100, smart.Height()) - assertFileCache(assert, image.spec.BaseFs.Resources.Fs, smart.RelPermalink(), 200, 100) + assertFileCache(assert, fileCache, smart.RelPermalink(), 200, 100) // Check cache filledAgain, err := image.Fill("200x100 bottomLeft") assert.NoError(err) assert.True(filled == filledAgain) assert.True(filled.sourceFilename == filledAgain.sourceFilename) - assertFileCache(assert, image.spec.BaseFs.Resources.Fs, filledAgain.RelPermalink(), 200, 100) + assertFileCache(assert, fileCache, filledAgain.RelPermalink(), 200, 100) } @@ -286,6 +288,7 @@ func TestImageResizeInSubPath(t *testing.T) { assert := require.New(t) image := fetchImage(assert, "sub/gohugoio2.png") + fileCache := image.spec.FileCaches.ImageCache().Fs assert.Equal(imaging.PNG, image.format) assert.Equal("/a/sub/gohugoio2.png", image.RelPermalink()) @@ -297,7 +300,7 @@ func TestImageResizeInSubPath(t *testing.T) { assert.Equal("/a/sub/gohugoio2_hu0e1b9e4a4be4d6f86c7b37b9ccce3fbc_73886_101x101_resize_linear_2.png", resized.RelPermalink()) assert.Equal(101, resized.Width()) - assertFileCache(assert, image.spec.BaseFs.Resources.Fs, resized.RelPermalink(), 101, 101) + assertFileCache(assert, fileCache, resized.RelPermalink(), 101, 101) publishedImageFilename := filepath.Clean(resized.RelPermalink()) assertImageFile(assert, image.spec.BaseFs.PublishFs, publishedImageFilename, 101, 101) assert.NoError(image.spec.BaseFs.PublishFs.Remove(publishedImageFilename)) @@ -309,7 +312,7 @@ func TestImageResizeInSubPath(t *testing.T) { assert.NoError(err) assert.Equal("/a/sub/gohugoio2_hu0e1b9e4a4be4d6f86c7b37b9ccce3fbc_73886_101x101_resize_linear_2.png", resizedAgain.RelPermalink()) assert.Equal(101, resizedAgain.Width()) - assertFileCache(assert, image.spec.BaseFs.Resources.Fs, resizedAgain.RelPermalink(), 101, 101) + assertFileCache(assert, fileCache, resizedAgain.RelPermalink(), 101, 101) assertImageFile(assert, image.spec.BaseFs.PublishFs, publishedImageFilename, 101, 101) } diff --git a/resource/resource.go b/resource/resource.go index be3ebdb8b..350519913 100644 --- a/resource/resource.go +++ b/resource/resource.go @@ -28,6 +28,7 @@ import ( "github.com/gohugoio/hugo/tpl" "github.com/pkg/errors" + "github.com/gohugoio/hugo/cache/filecache" "github.com/gohugoio/hugo/common/collections" "github.com/gohugoio/hugo/common/hugio" "github.com/gohugoio/hugo/common/loggers" @@ -280,12 +281,15 @@ type Spec struct { imageCache *imageCache ResourceCache *ResourceCache - - GenImagePath string - GenAssetsPath string + FileCaches filecache.Caches } -func NewSpec(s *helpers.PathSpec, logger *loggers.Logger, outputFormats output.Formats, mimeTypes media.Types) (*Spec, error) { +func NewSpec( + s *helpers.PathSpec, + fileCaches filecache.Caches, + logger *loggers.Logger, + outputFormats output.Formats, + mimeTypes media.Types) (*Spec, error) { imaging, err := decodeImaging(s.Cfg.GetStringMap("imaging")) if err != nil { @@ -296,24 +300,16 @@ func NewSpec(s *helpers.PathSpec, logger *loggers.Logger, outputFormats output.F logger = loggers.NewErrorLogger() } - genImagePath := filepath.FromSlash("_gen/images") - // The transformed assets (CSS etc.) - genAssetsPath := filepath.FromSlash("_gen/assets") - rs := &Spec{PathSpec: s, Logger: logger, - GenImagePath: genImagePath, - GenAssetsPath: genAssetsPath, imaging: &imaging, MediaTypes: mimeTypes, OutputFormats: outputFormats, + FileCaches: fileCaches, imageCache: newImageCache( + fileCaches.ImageCache(), + s, - // We're going to write a cache pruning routine later, so make it extremely - // unlikely that the user shoots him or herself in the foot - // and this is set to a value that represents data he/she - // cares about. This should be set in stone once released. - genImagePath, )} rs.ResourceCache = newResourceCache(rs) diff --git a/resource/resource_cache.go b/resource/resource_cache.go index e0b86ec9e..e7c6ab6d0 100644 --- a/resource/resource_cache.go +++ b/resource/resource_cache.go @@ -15,14 +15,13 @@ package resource import ( "encoding/json" - "io/ioutil" + "io" "path" "path/filepath" "strings" "sync" - "github.com/gohugoio/hugo/helpers" - "github.com/spf13/afero" + "github.com/gohugoio/hugo/cache/filecache" "github.com/BurntSushi/locker" ) @@ -35,8 +34,10 @@ const ( type ResourceCache struct { rs *Spec - cache map[string]Resource sync.RWMutex + cache map[string]Resource + + fileCache *filecache.Cache // Provides named resource locks. nlocker *locker.Locker @@ -56,9 +57,10 @@ func ResourceKeyPartition(filename string) string { func newResourceCache(rs *Spec) *ResourceCache { return &ResourceCache{ - rs: rs, - cache: make(map[string]Resource), - nlocker: locker.NewLocker(), + rs: rs, + fileCache: rs.FileCaches.AssetsCache(), + cache: make(map[string]Resource), + nlocker: locker.NewLocker(), } } @@ -118,65 +120,56 @@ func (c *ResourceCache) GetOrCreate(partition, key string, f func() (Resource, e } func (c *ResourceCache) getFilenames(key string) (string, string) { - filenameBase := filepath.Join(c.rs.GenAssetsPath, key) - filenameMeta := filenameBase + ".json" - filenameContent := filenameBase + ".content" + filenameMeta := key + ".json" + filenameContent := key + ".content" return filenameMeta, filenameContent } -func (c *ResourceCache) getFromFile(key string) (afero.File, transformedResourceMetadata, bool) { +func (c *ResourceCache) getFromFile(key string) (filecache.ItemInfo, io.ReadCloser, transformedResourceMetadata, bool) { c.RLock() defer c.RUnlock() var meta transformedResourceMetadata filenameMeta, filenameContent := c.getFilenames(key) - fMeta, err := c.rs.Resources.Fs.Open(filenameMeta) - if err != nil { - return nil, meta, false - } - defer fMeta.Close() - jsonContent, err := ioutil.ReadAll(fMeta) - if err != nil { - return nil, meta, false + _, jsonContent, _ := c.fileCache.GetBytes(filenameMeta) + if jsonContent == nil { + return filecache.ItemInfo{}, nil, meta, false } if err := json.Unmarshal(jsonContent, &meta); err != nil { - return nil, meta, false + return filecache.ItemInfo{}, nil, meta, false } - fContent, err := c.rs.Resources.Fs.Open(filenameContent) - if err != nil { - return nil, meta, false - } + fi, rc, _ := c.fileCache.Get(filenameContent) + + return fi, rc, meta, rc != nil - return fContent, meta, true } // writeMeta writes the metadata to file and returns a writer for the content part. -func (c *ResourceCache) writeMeta(key string, meta transformedResourceMetadata) (afero.File, error) { +func (c *ResourceCache) writeMeta(key string, meta transformedResourceMetadata) (filecache.ItemInfo, io.WriteCloser, error) { filenameMeta, filenameContent := c.getFilenames(key) raw, err := json.Marshal(meta) if err != nil { - return nil, err + return filecache.ItemInfo{}, nil, err } - fm, err := c.openResourceFileForWriting(filenameMeta) + _, fm, err := c.fileCache.WriteCloser(filenameMeta) if err != nil { - return nil, err + return filecache.ItemInfo{}, nil, err } + defer fm.Close() if _, err := fm.Write(raw); err != nil { - return nil, err + return filecache.ItemInfo{}, nil, err } - return c.openResourceFileForWriting(filenameContent) + fi, fc, err := c.fileCache.WriteCloser(filenameContent) -} + return fi, fc, err -func (c *ResourceCache) openResourceFileForWriting(filename string) (afero.File, error) { - return helpers.OpenFileForWriting(c.rs.Resources.Fs, filename) } func (c *ResourceCache) set(key string, r Resource) { diff --git a/resource/testhelpers_test.go b/resource/testhelpers_test.go index 2a5d2b3cd..05a02a6fc 100644 --- a/resource/testhelpers_test.go +++ b/resource/testhelpers_test.go @@ -13,6 +13,7 @@ import ( "runtime" "strings" + "github.com/gohugoio/hugo/cache/filecache" "github.com/gohugoio/hugo/helpers" "github.com/gohugoio/hugo/hugofs" "github.com/gohugoio/hugo/media" @@ -49,10 +50,12 @@ func newTestResourceSpecForBaseURL(assert *require.Assertions, baseURL string) * fs := hugofs.NewMem(cfg) s, err := helpers.NewPathSpec(fs, cfg) - assert.NoError(err) - spec, err := NewSpec(s, nil, output.DefaultFormats, media.DefaultTypes) + filecaches, err := filecache.NewCachesFromPaths(s.Paths) + assert.NoError(err) + + spec, err := NewSpec(s, filecaches, nil, output.DefaultFormats, media.DefaultTypes) assert.NoError(err) return spec } @@ -70,7 +73,7 @@ func newTestResourceOsFs(assert *require.Assertions) *Spec { } cfg.Set("workingDir", workDir) - cfg.Set("resourceDir", filepath.Join(workDir, "res")) + cfg.Set("resourceDir", "resources") cfg.Set("contentDir", "content") cfg.Set("dataDir", "data") cfg.Set("i18nDir", "i18n") @@ -83,10 +86,12 @@ func newTestResourceOsFs(assert *require.Assertions) *Spec { fs.Destination = &afero.MemMapFs{} s, err := helpers.NewPathSpec(fs, cfg) - assert.NoError(err) - spec, err := NewSpec(s, nil, output.DefaultFormats, media.DefaultTypes) + filecaches, err := filecache.NewCachesFromPaths(s.Paths) + assert.NoError(err) + + spec, err := NewSpec(s, filecaches, nil, output.DefaultFormats, media.DefaultTypes) assert.NoError(err) return spec @@ -144,7 +149,7 @@ func assertImageFile(assert *require.Assertions, fs afero.Fs, filename string, w } func assertFileCache(assert *require.Assertions, fs afero.Fs, filename string, width, height int) { - assertImageFile(assert, fs, filepath.Join("_gen/images", filename), width, height) + assertImageFile(assert, fs, filepath.Clean(filename), width, height) } func writeSource(t testing.TB, fs *hugofs.Fs, filename, content string) { diff --git a/resource/transform.go b/resource/transform.go index a4b1ca27c..0b5772dde 100644 --- a/resource/transform.go +++ b/resource/transform.go @@ -24,7 +24,6 @@ import ( "github.com/gohugoio/hugo/common/hugio" "github.com/gohugoio/hugo/helpers" "github.com/mitchellh/hashstructure" - "github.com/spf13/afero" "fmt" "io" @@ -210,12 +209,12 @@ func (r *transformedResource) transferTransformedValues(another *transformedReso } func (r *transformedResource) tryTransformedFileCache(key string) io.ReadCloser { - f, meta, found := r.cache.getFromFile(key) + fi, f, meta, found := r.cache.getFromFile(key) if !found { return nil } r.transformedResourceMetadata = meta - r.sourceFilename = f.Name() + r.sourceFilename = fi.Name return f } @@ -263,7 +262,7 @@ func (r *transformedResource) initContent() error { var err error r.contentInit.Do(func() { var b []byte - b, err := afero.ReadFile(r.cache.rs.Resources.Fs, r.sourceFilename) + _, b, err = r.cache.fileCache.GetBytes(r.sourceFilename) if err != nil { return } @@ -434,16 +433,15 @@ func (r *transformedResource) transform(setContent bool) (err error) { } defer publicw.Close() - publishwriters := []io.Writer{publicw} + publishwriters := []io.WriteCloser{publicw} if transformedContentr == nil { // Also write it to the cache - metaw, err := r.cache.writeMeta(key, r.transformedResourceMetadata) + fi, metaw, err := r.cache.writeMeta(key, r.transformedResourceMetadata) if err != nil { return err } - r.sourceFilename = metaw.Name() - defer metaw.Close() + r.sourceFilename = fi.Name publishwriters = append(publishwriters, metaw) @@ -460,11 +458,12 @@ func (r *transformedResource) transform(setContent bool) (err error) { if setContent { contentmemw = bp.GetBuffer() defer bp.PutBuffer(contentmemw) - publishwriters = append(publishwriters, contentmemw) + publishwriters = append(publishwriters, hugio.ToWriteCloser(contentmemw)) } - publishw := io.MultiWriter(publishwriters...) + publishw := hugio.NewMultiWriteCloser(publishwriters...) _, r.transformErr = io.Copy(publishw, transformedContentr) + publishw.Close() if setContent { r.contentInit.Do(func() { diff --git a/tpl/data/cache.go b/tpl/data/cache.go deleted file mode 100644 index 6c4033160..000000000 --- a/tpl/data/cache.go +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2017 The Hugo Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package data - -import ( - "crypto/md5" - "encoding/hex" - "errors" - "sync" - - "github.com/gohugoio/hugo/config" - "github.com/gohugoio/hugo/helpers" - "github.com/spf13/afero" -) - -var cacheMu sync.RWMutex - -// getCacheFileID returns the cache ID for a string. -func getCacheFileID(cfg config.Provider, id string) string { - hash := md5.Sum([]byte(id)) - return cfg.GetString("cacheDir") + hex.EncodeToString(hash[:]) -} - -// getCache returns the content for an ID from the file cache or an error. -// If the ID is not found, return nil,nil. -func getCache(id string, fs afero.Fs, cfg config.Provider, ignoreCache bool) ([]byte, error) { - if ignoreCache { - return nil, nil - } - - cacheMu.RLock() - defer cacheMu.RUnlock() - - fID := getCacheFileID(cfg, id) - isExists, err := helpers.Exists(fID, fs) - if err != nil { - return nil, err - } - if !isExists { - return nil, nil - } - - return afero.ReadFile(fs, fID) -} - -// writeCache writes bytes associated with an ID into the file cache. -func writeCache(id string, c []byte, fs afero.Fs, cfg config.Provider, ignoreCache bool) error { - if ignoreCache { - return nil - } - - cacheMu.Lock() - defer cacheMu.Unlock() - - fID := getCacheFileID(cfg, id) - f, err := fs.Create(fID) - if err != nil { - return errors.New("Error: " + err.Error() + ". Failed to create file: " + fID) - } - defer f.Close() - - n, err := f.Write(c) - if err != nil { - return errors.New("Error: " + err.Error() + ". Failed to write to file: " + fID) - } - if n == 0 { - return errors.New("No bytes written to file: " + fID) - } - return nil -} - -func deleteCache(id string, fs afero.Fs, cfg config.Provider) error { - return fs.Remove(getCacheFileID(cfg, id)) -} diff --git a/tpl/data/cache_test.go b/tpl/data/cache_test.go deleted file mode 100644 index 6057f0321..000000000 --- a/tpl/data/cache_test.go +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2017 The Hugo Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package data - -import ( - "fmt" - "testing" - - "github.com/spf13/afero" - "github.com/spf13/viper" - "github.com/stretchr/testify/assert" -) - -func TestCache(t *testing.T) { - t.Parallel() - - fs := new(afero.MemMapFs) - - for i, test := range []struct { - path string - content []byte - ignore bool - }{ - {"http://Foo.Bar/foo_Bar-Foo", []byte(`T€st Content 123`), false}, - {"fOO,bar:foo%bAR", []byte(`T€st Content 123 fOO,bar:foo%bAR`), false}, - {"FOo/BaR.html", []byte(`FOo/BaR.html T€st Content 123`), false}, - {"трям/трям", []byte(`T€st трям/трям Content 123`), false}, - {"은행", []byte(`T€st C은행ontent 123`), false}, - {"Банковский кассир", []byte(`Банковский кассир T€st Content 123`), false}, - {"Банковский кассир", []byte(`Банковский кассир T€st Content 456`), true}, - } { - msg := fmt.Sprintf("Test #%d: %v", i, test) - - cfg := viper.New() - - c, err := getCache(test.path, fs, cfg, test.ignore) - assert.NoError(t, err, msg) - assert.Nil(t, c, msg) - - err = writeCache(test.path, test.content, fs, cfg, test.ignore) - assert.NoError(t, err, msg) - - c, err = getCache(test.path, fs, cfg, test.ignore) - assert.NoError(t, err, msg) - - if test.ignore { - assert.Nil(t, c, msg) - } else { - assert.Equal(t, string(test.content), string(c)) - } - } -} diff --git a/tpl/data/data.go b/tpl/data/data.go index cecce4b45..61de2f72e 100644 --- a/tpl/data/data.go +++ b/tpl/data/data.go @@ -20,17 +20,20 @@ import ( "errors" "net/http" "strings" - "time" + "github.com/gohugoio/hugo/cache/filecache" "github.com/gohugoio/hugo/deps" _errors "github.com/pkg/errors" ) // New returns a new instance of the data-namespaced template functions. func New(deps *deps.Deps) *Namespace { + return &Namespace{ - deps: deps, - client: http.DefaultClient, + deps: deps, + cacheGetCSV: deps.FileCaches.GetCSVCache(), + cacheGetJSON: deps.FileCaches.GetJSONCache(), + client: http.DefaultClient, } } @@ -38,6 +41,9 @@ func New(deps *deps.Deps) *Namespace { type Namespace struct { deps *deps.Deps + cacheGetJSON *filecache.Cache + cacheGetCSV *filecache.Cache + client *http.Client } @@ -48,40 +54,34 @@ type Namespace struct { // GetCSV returns nil or a slice slice to use in a short code. func (ns *Namespace) GetCSV(sep string, urlParts ...string) (d [][]string, err error) { url := strings.Join(urlParts, "") + cache := ns.cacheGetCSV - var clearCacheSleep = func(i int, u string) { - ns.deps.Log.INFO.Printf("Retry #%d for %s and sleeping for %s", i, url, resSleep) - time.Sleep(resSleep) - deleteCache(url, ns.deps.Fs.Source, ns.deps.Cfg) - } - - for i := 0; i <= resRetries; i++ { - var req *http.Request - req, err = http.NewRequest("GET", url, nil) - if err != nil { - return nil, _errors.Wrapf(err, "failed to create request for getCSV for resource %s", url) + unmarshal := func(b []byte) (error, bool) { + if !bytes.Contains(b, []byte(sep)) { + return _errors.Errorf("cannot find separator %s in CSV for %s", sep, url), false } - req.Header.Add("Accept", "text/csv") - req.Header.Add("Accept", "text/plain") - - var c []byte - c, err = ns.getResource(req) - if err != nil { - return nil, _errors.Wrapf(err, "failed to read CSV resource %q", url) - } - - if !bytes.Contains(c, []byte(sep)) { - return nil, _errors.Errorf("cannot find separator %s in CSV for %s", sep, url) - } - - if d, err = parseCSV(c, sep); err != nil { + if d, err = parseCSV(b, sep); err != nil { err = _errors.Wrapf(err, "failed to parse CSV file %s", url) - clearCacheSleep(i, url) - continue + return err, true } - break + + return nil, false + } + + var req *http.Request + req, err = http.NewRequest("GET", url, nil) + if err != nil { + return nil, _errors.Wrapf(err, "failed to create request for getCSV for resource %s", url) + } + + req.Header.Add("Accept", "text/csv") + req.Header.Add("Accept", "text/plain") + + err = ns.getResource(cache, unmarshal, req) + if err != nil { + return nil, _errors.Wrapf(err, "failed to read CSV resource %q", url) } return @@ -90,38 +90,34 @@ func (ns *Namespace) GetCSV(sep string, urlParts ...string) (d [][]string, err e // GetJSON expects one or n-parts of a URL to a resource which can either be a local or a remote one. // If you provide multiple parts they will be joined together to the final URL. // GetJSON returns nil or parsed JSON to use in a short code. -func (ns *Namespace) GetJSON(urlParts ...string) (v interface{}, err error) { +func (ns *Namespace) GetJSON(urlParts ...string) (interface{}, error) { + var v interface{} url := strings.Join(urlParts, "") + cache := ns.cacheGetJSON - for i := 0; i <= resRetries; i++ { - var req *http.Request - req, err = http.NewRequest("GET", url, nil) - if err != nil { - return nil, _errors.Wrapf(err, "Failed to create request for getJSON resource %s", url) - } - - req.Header.Add("Accept", "application/json") - - var c []byte - c, err = ns.getResource(req) - if err != nil { - return nil, _errors.Wrapf(err, "failed to get getJSON resource %q", url) - } - err = json.Unmarshal(c, &v) - if err != nil { - ns.deps.Log.INFO.Printf("Cannot read JSON from resource %s: %s", url, err) - ns.deps.Log.INFO.Printf("Retry #%d for %s and sleeping for %s", i, url, resSleep) - time.Sleep(resSleep) - deleteCache(url, ns.deps.Fs.Source, ns.deps.Cfg) - continue - } - break + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, _errors.Wrapf(err, "Failed to create request for getJSON resource %s", url) } + unmarshal := func(b []byte) (error, bool) { + err := json.Unmarshal(b, &v) + if err != nil { + return err, true + } + return nil, false + } + + req.Header.Add("Accept", "application/json") + + err = ns.getResource(cache, unmarshal, req) + if err != nil { return nil, _errors.Wrapf(err, "failed to get getJSON resource %q", url) } - return + + return v, nil + } // parseCSV parses bytes of CSV data into a slice slice string or an error diff --git a/tpl/data/init_test.go b/tpl/data/init_test.go index 6bb689a95..c4751e892 100644 --- a/tpl/data/init_test.go +++ b/tpl/data/init_test.go @@ -16,8 +16,8 @@ package data import ( "testing" - "github.com/gohugoio/hugo/deps" "github.com/gohugoio/hugo/tpl/internal" + "github.com/spf13/viper" "github.com/stretchr/testify/require" ) @@ -25,8 +25,11 @@ func TestInit(t *testing.T) { var found bool var ns *internal.TemplateFuncsNamespace + v := viper.New() + v.Set("contentDir", "content") + for _, nsf := range internal.TemplateFuncsNamespaceRegistry { - ns = nsf(&deps.Deps{}) + ns = nsf(newDeps(v)) if ns.Name == name { found = true break diff --git a/tpl/data/resources.go b/tpl/data/resources.go index 11c35f9d9..8b246a662 100644 --- a/tpl/data/resources.go +++ b/tpl/data/resources.go @@ -14,102 +14,81 @@ package data import ( - "fmt" "io/ioutil" "net/http" "path/filepath" - "sync" "time" + "github.com/pkg/errors" + + "github.com/gohugoio/hugo/cache/filecache" + "github.com/gohugoio/hugo/config" "github.com/gohugoio/hugo/helpers" "github.com/spf13/afero" - jww "github.com/spf13/jwalterweatherman" ) var ( - remoteURLLock = &remoteLock{m: make(map[string]*sync.Mutex)} - resSleep = time.Second * 2 // if JSON decoding failed sleep for n seconds before retrying - resRetries = 1 // number of retries to load the JSON from URL or local file system + resSleep = time.Second * 2 // if JSON decoding failed sleep for n seconds before retrying + resRetries = 1 // number of retries to load the JSON from URL ) -type remoteLock struct { - sync.RWMutex - m map[string]*sync.Mutex -} - -// URLLock locks an URL during download -func (l *remoteLock) URLLock(url string) { - var ( - lock *sync.Mutex - ok bool - ) - l.Lock() - if lock, ok = l.m[url]; !ok { - lock = &sync.Mutex{} - l.m[url] = lock - } - l.Unlock() - lock.Lock() -} - -// URLUnlock unlocks an URL when the download has been finished. Use only in defer calls. -func (l *remoteLock) URLUnlock(url string) { - l.RLock() - defer l.RUnlock() - if um, ok := l.m[url]; ok { - um.Unlock() - } -} - // getRemote loads the content of a remote file. This method is thread safe. -func getRemote(req *http.Request, fs afero.Fs, cfg config.Provider, hc *http.Client) ([]byte, error) { +func (ns *Namespace) getRemote(cache *filecache.Cache, unmarshal func([]byte) (error, bool), req *http.Request) error { url := req.URL.String() + id := helpers.MD5String(url) + var handled bool + var retry bool + + _, b, err := cache.GetOrCreateBytes(id, func() ([]byte, error) { + var err error + handled = true + for i := 0; i <= resRetries; i++ { + ns.deps.Log.INFO.Printf("Downloading: %s ...", url) + var res *http.Response + res, err = ns.client.Do(req) + if err != nil { + return nil, err + } + + if isHTTPError(res) { + return nil, errors.Errorf("Failed to retrieve remote file: %s", http.StatusText(res.StatusCode)) + } + + var b []byte + b, err = ioutil.ReadAll(res.Body) + + if err != nil { + return nil, err + } + res.Body.Close() + + err, retry = unmarshal(b) + + if err == nil { + // Return it so it can be cached. + return b, nil + } + + if !retry { + return nil, err + } + + ns.deps.Log.INFO.Printf("Cannot read remote resource %s: %s", url, err) + ns.deps.Log.INFO.Printf("Retry #%d for %s and sleeping for %s", i+1, url, resSleep) + time.Sleep(resSleep) + } - c, err := getCache(url, fs, cfg, cfg.GetBool("ignoreCache")) - if err != nil { return nil, err - } - if c != nil { - return c, nil + + }) + + if !handled { + // This is cached content and should be correct. + err, _ = unmarshal(b) } - // avoid race condition with locks, block other goroutines if the current url is processing - remoteURLLock.URLLock(url) - defer func() { remoteURLLock.URLUnlock(url) }() - - // avoid multiple locks due to calling getCache twice - c, err = getCache(url, fs, cfg, cfg.GetBool("ignoreCache")) - if err != nil { - return nil, err - } - if c != nil { - return c, nil - } - - jww.INFO.Printf("Downloading: %s ...", url) - res, err := hc.Do(req) - if err != nil { - return nil, err - } - - if res.StatusCode < 200 || res.StatusCode > 299 { - return nil, fmt.Errorf("Failed to retrieve remote file: %s", http.StatusText(res.StatusCode)) - } - - c, err = ioutil.ReadAll(res.Body) - res.Body.Close() - if err != nil { - return nil, err - } - - err = writeCache(url, c, fs, cfg, cfg.GetBool("ignoreCache")) - if err != nil { - return nil, err - } - - jww.INFO.Printf("... and cached to: %s", getCacheFileID(cfg, url)) - return c, nil + return err } // getLocal loads the content of a local file @@ -123,12 +102,22 @@ func getLocal(url string, fs afero.Fs, cfg config.Provider) ([]byte, error) { } -// getResource loads the content of a local or remote file -func (ns *Namespace) getResource(req *http.Request) ([]byte, error) { +// getResource loads the content of a local or remote file and returns its content and the +// cache ID used, if relevant. +func (ns *Namespace) getResource(cache *filecache.Cache, unmarshal func(b []byte) (error, bool), req *http.Request) error { switch req.URL.Scheme { case "": - return getLocal(req.URL.String(), ns.deps.Fs.Source, ns.deps.Cfg) + b, err := getLocal(req.URL.String(), ns.deps.Fs.Source, ns.deps.Cfg) + if err != nil { + return err + } + err, _ = unmarshal(b) + return err default: - return getRemote(req, ns.deps.Fs.Source, ns.deps.Cfg, ns.client) + return ns.getRemote(cache, unmarshal, req) } } + +func isHTTPError(res *http.Response) bool { + return res.StatusCode < 200 || res.StatusCode > 299 +} diff --git a/tpl/data/resources_test.go b/tpl/data/resources_test.go index c1da36d05..54eb123ee 100644 --- a/tpl/data/resources_test.go +++ b/tpl/data/resources_test.go @@ -23,6 +23,9 @@ import ( "testing" "time" + "github.com/gohugoio/hugo/hugolib/paths" + + "github.com/gohugoio/hugo/cache/filecache" "github.com/gohugoio/hugo/common/loggers" "github.com/gohugoio/hugo/config" "github.com/gohugoio/hugo/deps" @@ -85,16 +88,16 @@ func getTestServer(handler func(w http.ResponseWriter, r *http.Request)) (*httpt func TestScpGetRemote(t *testing.T) { t.Parallel() fs := new(afero.MemMapFs) + cache := filecache.NewCache(fs, 100) tests := []struct { path string content []byte - ignore bool }{ - {"http://Foo.Bar/foo_Bar-Foo", []byte(`T€st Content 123`), false}, - {"http://Doppel.Gänger/foo_Bar-Foo", []byte(`T€st Cont€nt 123`), false}, - {"http://Doppel.Gänger/Fizz_Bazz-Foo", []byte(`T€st Банковский кассир Cont€nt 123`), false}, - {"http://Doppel.Gänger/Fizz_Bazz-Bar", []byte(`T€st Банковский кассир Cont€nt 456`), true}, + {"http://Foo.Bar/foo_Bar-Foo", []byte(`T€st Content 123`)}, + {"http://Doppel.Gänger/foo_Bar-Foo", []byte(`T€st Cont€nt 123`)}, + {"http://Doppel.Gänger/Fizz_Bazz-Foo", []byte(`T€st Банковский кассир Cont€nt 123`)}, + {"http://Doppel.Gänger/Fizz_Bazz-Bar", []byte(`T€st Банковский кассир Cont€nt 456`)}, } for _, test := range tests { @@ -108,53 +111,64 @@ func TestScpGetRemote(t *testing.T) { }) defer func() { srv.Close() }() - cfg := viper.New() + ns := newTestNs() + ns.client = cl - c, err := getRemote(req, fs, cfg, cl) + var c []byte + f := func(b []byte) (error, bool) { + c = b + return nil, false + } + + err = ns.getRemote(cache, f, req) require.NoError(t, err, msg) assert.Equal(t, string(test.content), string(c)) - c, err = getCache(req.URL.String(), fs, cfg, test.ignore) - require.NoError(t, err, msg) + assert.Equal(t, string(test.content), string(c)) - if test.ignore { - assert.Empty(t, c, msg) - } else { - assert.Equal(t, string(test.content), string(c)) - - } } } func TestScpGetRemoteParallel(t *testing.T) { t.Parallel() - ns := newTestNs() - content := []byte(`T€st Content 123`) srv, cl := getTestServer(func(w http.ResponseWriter, r *http.Request) { w.Write(content) }) + defer func() { srv.Close() }() url := "http://Foo.Bar/foo_Bar-Foo" req, err := http.NewRequest("GET", url, nil) require.NoError(t, err) - for _, ignoreCache := range []bool{false, true} { + for _, ignoreCache := range []bool{false} { cfg := viper.New() cfg.Set("ignoreCache", ignoreCache) + cfg.Set("contentDir", "content") + + ns := New(newDeps(cfg)) + ns.client = cl var wg sync.WaitGroup - for i := 0; i < 50; i++ { + for i := 0; i < 1; i++ { wg.Add(1) go func(gor int) { defer wg.Done() for j := 0; j < 10; j++ { - c, err := getRemote(req, ns.deps.Fs.Source, ns.deps.Cfg, cl) + var c []byte + f := func(b []byte) (error, bool) { + c = b + return nil, false + } + err := ns.getRemote(ns.cacheGetJSON, f, req) + assert.NoError(t, err) - assert.Equal(t, string(content), string(c)) + if string(content) != string(c) { + t.Fatalf("expected\n%q\ngot\n%q", content, c) + } time.Sleep(23 * time.Millisecond) } @@ -173,11 +187,16 @@ func newDeps(cfg config.Provider) *deps.Deps { panic(err) } + fs := hugofs.NewMem(l) logger := loggers.NewErrorLogger() + p, _ := paths.New(fs, cfg) + + fileCaches, _ := filecache.NewCachesFromPaths(p) return &deps.Deps{ Cfg: cfg, - Fs: hugofs.NewMem(l), + Fs: fs, + FileCaches: fileCaches, ContentSpec: cs, Log: logger, DistinctErrorLog: helpers.NewDistinctLogger(logger.ERROR),