// Copyright 2018 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package resource import ( "errors" "fmt" "io" "io/ioutil" "mime" "os" "path" "path/filepath" "strings" "sync" "github.com/gohugoio/hugo/tpl" "github.com/gohugoio/hugo/common/loggers" jww "github.com/spf13/jwalterweatherman" "github.com/spf13/afero" "github.com/gobwas/glob" "github.com/gohugoio/hugo/helpers" "github.com/gohugoio/hugo/media" "github.com/gohugoio/hugo/source" ) var ( _ ContentResource = (*genericResource)(nil) _ ReadSeekCloserResource = (*genericResource)(nil) _ Resource = (*genericResource)(nil) _ Source = (*genericResource)(nil) _ Cloner = (*genericResource)(nil) _ ResourcesLanguageMerger = (*Resources)(nil) _ permalinker = (*genericResource)(nil) ) const DefaultResourceType = "unknown" var noData = make(map[string]interface{}) // Source is an internal template and not meant for use in the templates. It // may change without notice. type Source interface { Publish() error } type permalinker interface { relPermalinkFor(target string) string permalinkFor(target string) string relTargetPathFor(target string) string relTargetPath() string targetPath() string } // Cloner is an internal template and not meant for use in the templates. It // may change without notice. type Cloner interface { WithNewBase(base string) Resource } // Resource represents a linkable resource, i.e. a content page, image etc. type Resource interface { // Permalink represents the absolute link to this resource. Permalink() string // RelPermalink represents the host relative link to this resource. RelPermalink() string // ResourceType is the resource type. For most file types, this is the main // part of the MIME type, e.g. "image", "application", "text" etc. // For content pages, this value is "page". ResourceType() string // MediaType is this resource's MIME type. MediaType() media.Type // Name is the logical name of this resource. This can be set in the front matter // metadata for this resource. If not set, Hugo will assign a value. // This will in most cases be the base filename. // So, for the image "/some/path/sunset.jpg" this will be "sunset.jpg". // The value returned by this method will be used in the GetByPrefix and ByPrefix methods // on Resources. Name() string // Title returns the title if set in front matter. For content pages, this will be the expected value. Title() string // Resource specific data set by Hugo. // One example would be.Data.Digest for fingerprinted resources. Data() interface{} // Params set in front matter for this resource. Params() map[string]interface{} } type ResourcesLanguageMerger interface { MergeByLanguage(other Resources) Resources // Needed for integration with the tpl package. MergeByLanguageInterface(other interface{}) (interface{}, error) } type translatedResource interface { TranslationKey() string } // ContentResource represents a Resource that provides a way to get to its content. // Most Resource types in Hugo implements this interface, including Page. // This should be used with care, as it will read the file content into memory, but it // should be cached as effectively as possible by the implementation. type ContentResource interface { Resource // Content returns this resource's content. It will be equivalent to reading the content // that RelPermalink points to in the published folder. // The return type will be contextual, and should be what you would expect: // * Page: template.HTML // * JSON: String // * Etc. Content() (interface{}, error) } // ReadSeekCloser is implemented by afero.File. We use this as the common type for // content in Resource objects, even for strings. type ReadSeekCloser interface { io.Reader io.Seeker io.Closer } // OpenReadSeekeCloser allows setting some other way (than reading from a filesystem) // to open or create a ReadSeekCloser. type OpenReadSeekCloser func() (ReadSeekCloser, error) // ReadSeekCloserResource is a Resource that supports loading its content. type ReadSeekCloserResource interface { Resource ReadSeekCloser() (ReadSeekCloser, error) } // Resources represents a slice of resources, which can be a mix of different types. // I.e. both pages and images etc. type Resources []Resource func (r Resources) ByType(tp string) Resources { var filtered Resources for _, resource := range r { if resource.ResourceType() == tp { filtered = append(filtered, resource) } } return filtered } // GetMatch finds the first Resource matching the given pattern, or nil if none found. // See Match for a more complete explanation about the rules used. func (r Resources) GetMatch(pattern string) Resource { g, err := getGlob(pattern) if err != nil { return nil } for _, resource := range r { if g.Match(strings.ToLower(resource.Name())) { return resource } } return nil } // Match gets all resources matching the given base filename prefix, e.g // "*.png" will match all png files. The "*" does not match path delimiters (/), // so if you organize your resources in sub-folders, you need to be explicit about it, e.g.: // "images/*.png". To match any PNG image anywhere in the bundle you can do "**.png", and // to match all PNG images below the images folder, use "images/**.jpg". // The matching is case insensitive. // Match matches by using the value of Resource.Name, which, by default, is a filename with // path relative to the bundle root with Unix style slashes (/) and no leading slash, e.g. "images/logo.png". // See https://github.com/gobwas/glob for the full rules set. func (r Resources) Match(pattern string) Resources { g, err := getGlob(pattern) if err != nil { return nil } var matches Resources for _, resource := range r { if g.Match(strings.ToLower(resource.Name())) { matches = append(matches, resource) } } return matches } var ( globCache = make(map[string]glob.Glob) globMu sync.RWMutex ) func getGlob(pattern string) (glob.Glob, error) { var g glob.Glob globMu.RLock() g, found := globCache[pattern] globMu.RUnlock() if !found { var err error g, err = glob.Compile(strings.ToLower(pattern), '/') if err != nil { return nil, err } globMu.Lock() globCache[pattern] = g globMu.Unlock() } return g, nil } // MergeByLanguage adds missing translations in r1 from r2. func (r1 Resources) MergeByLanguage(r2 Resources) Resources { result := append(Resources(nil), r1...) m := make(map[string]bool) for _, r := range r1 { if translated, ok := r.(translatedResource); ok { m[translated.TranslationKey()] = true } } for _, r := range r2 { if translated, ok := r.(translatedResource); ok { if _, found := m[translated.TranslationKey()]; !found { result = append(result, r) } } } return result } // MergeByLanguageInterface is the generic version of MergeByLanguage. It // is here just so it can be called from the tpl package. func (r1 Resources) MergeByLanguageInterface(in interface{}) (interface{}, error) { r2, ok := in.(Resources) if !ok { return nil, fmt.Errorf("%T cannot be merged by language", in) } return r1.MergeByLanguage(r2), nil } type Spec struct { *helpers.PathSpec MediaTypes media.Types Logger *jww.Notepad TextTemplates tpl.TemplateParseFinder // Holds default filter settings etc. imaging *Imaging imageCache *imageCache ResourceCache *ResourceCache GenImagePath string GenAssetsPath string } func NewSpec(s *helpers.PathSpec, logger *jww.Notepad, mimeTypes media.Types) (*Spec, error) { imaging, err := decodeImaging(s.Cfg.GetStringMap("imaging")) if err != nil { return nil, err } if logger == nil { logger = loggers.NewErrorLogger() } genImagePath := filepath.FromSlash("_gen/images") // The transformed assets (CSS etc.) genAssetsPath := filepath.FromSlash("_gen/assets") rs := &Spec{PathSpec: s, Logger: logger, GenImagePath: genImagePath, GenAssetsPath: genAssetsPath, imaging: &imaging, MediaTypes: mimeTypes, imageCache: newImageCache( s, // We're going to write a cache pruning routine later, so make it extremely // unlikely that the user shoots him or herself in the foot // and this is set to a value that represents data he/she // cares about. This should be set in stone once released. genImagePath, )} rs.ResourceCache = newResourceCache(rs) return rs, nil } type ResourceSourceDescriptor struct { // TargetPathBuilder is a callback to create target paths's relative to its owner. TargetPathBuilder func(base string) string // Need one of these to load the resource content. SourceFile source.File OpenReadSeekCloser OpenReadSeekCloser // If OpenReadSeekerCloser is not set, we use this to open the file. SourceFilename string // The relative target filename without any language code. RelTargetFilename string // Any base path prepeneded to the permalink. // Typically the language code if this resource should be published to its sub-folder. URLBase string // Any base path prepended to the target path. This will also typically be the // language code, but setting it here means that it should not have any effect on // the permalink. TargetPathBase string // Delay publishing until either Permalink or RelPermalink is called. Maybe never. LazyPublish bool } func (r ResourceSourceDescriptor) Filename() string { if r.SourceFile != nil { return r.SourceFile.Filename() } return r.SourceFilename } func (r *Spec) sourceFs() afero.Fs { return r.PathSpec.BaseFs.Content.Fs } func (r *Spec) New(fd ResourceSourceDescriptor) (Resource, error) { return r.newResourceForFs(r.sourceFs(), fd) } func (r *Spec) NewForFs(sourceFs afero.Fs, fd ResourceSourceDescriptor) (Resource, error) { return r.newResourceForFs(sourceFs, fd) } func (r *Spec) newResourceForFs(sourceFs afero.Fs, fd ResourceSourceDescriptor) (Resource, error) { if fd.OpenReadSeekCloser == nil { if fd.SourceFile != nil && fd.SourceFilename != "" { return nil, errors.New("both SourceFile and AbsSourceFilename provided") } else if fd.SourceFile == nil && fd.SourceFilename == "" { return nil, errors.New("either SourceFile or AbsSourceFilename must be provided") } } if fd.URLBase == "" { fd.URLBase = r.GetURLLanguageBasePath() } if fd.TargetPathBase == "" { fd.TargetPathBase = r.GetTargetLanguageBasePath() } if fd.RelTargetFilename == "" { fd.RelTargetFilename = fd.Filename() } return r.newResource(sourceFs, fd) } func (r *Spec) newResource(sourceFs afero.Fs, fd ResourceSourceDescriptor) (Resource, error) { var fi os.FileInfo var sourceFilename string if fd.OpenReadSeekCloser != nil { } else if fd.SourceFilename != "" { var err error fi, err = sourceFs.Stat(fd.SourceFilename) if err != nil { return nil, err } sourceFilename = fd.SourceFilename } else { fi = fd.SourceFile.FileInfo() sourceFilename = fd.SourceFile.Filename() } if fd.RelTargetFilename == "" { fd.RelTargetFilename = sourceFilename } ext := filepath.Ext(fd.RelTargetFilename) mimeType, found := r.MediaTypes.GetFirstBySuffix(strings.TrimPrefix(ext, ".")) // TODO(bep) we need to handle these ambigous types better, but in this context // we most likely want the application/xml type. if mimeType.Suffix == "xml" && mimeType.SubType == "rss" { mimeType, found = r.MediaTypes.GetByType("application/xml") } if !found { mimeStr := mime.TypeByExtension(ext) if mimeStr != "" { mimeType, _ = media.FromString(mimeStr) } } gr := r.newGenericResourceWithBase( sourceFs, fd.LazyPublish, fd.OpenReadSeekCloser, fd.URLBase, fd.TargetPathBase, fd.TargetPathBuilder, fi, sourceFilename, fd.RelTargetFilename, mimeType) if mimeType.MainType == "image" { ext := strings.ToLower(helpers.Ext(sourceFilename)) imgFormat, ok := imageFormats[ext] if !ok { // This allows SVG etc. to be used as resources. They will not have the methods of the Image, but // that would not (currently) have worked. return gr, nil } if err := gr.initHash(); err != nil { return nil, err } return &Image{ format: imgFormat, imaging: r.imaging, genericResource: gr}, nil } return gr, nil } // TODO(bep) unify func (r *Spec) IsInImageCache(key string) bool { // This is used for cache pruning. We currently only have images, but we could // imagine expanding on this. return r.imageCache.isInCache(key) } func (r *Spec) DeleteCacheByPrefix(prefix string) { r.imageCache.deleteByPrefix(prefix) } func (r *Spec) ClearCaches() { r.imageCache.clear() r.ResourceCache.clear() } func (r *Spec) CacheStats() string { r.imageCache.mu.RLock() defer r.imageCache.mu.RUnlock() s := fmt.Sprintf("Cache entries: %d", len(r.imageCache.store)) count := 0 for k := range r.imageCache.store { if count > 5 { break } s += "\n" + k count++ } return s } type dirFile struct { // This is the directory component with Unix-style slashes. dir string // This is the file component. file string } func (d dirFile) path() string { return path.Join(d.dir, d.file) } type resourcePathDescriptor struct { // The relative target directory and filename. relTargetDirFile dirFile // Callback used to construct a target path relative to its owner. targetPathBuilder func(rel string) string // baseURLDir is the fixed sub-folder for a resource in permalinks. This will typically // be the language code if we publish to the language's sub-folder. baseURLDir string // This will normally be the same as above, but this will only apply to publishing // of resources. baseTargetPathDir string // baseOffset is set when the output format's path has a offset, e.g. for AMP. baseOffset string } type resourceContent struct { content string contentInit sync.Once } type resourceHash struct { hash string hashInit sync.Once } type publishOnce struct { publisherInit sync.Once publisherErr error logger *jww.Notepad } func (l *publishOnce) publish(s Source) error { l.publisherInit.Do(func() { l.publisherErr = s.Publish() if l.publisherErr != nil { l.logger.ERROR.Printf("failed to publish Resource: %s", l.publisherErr) } }) return l.publisherErr } // genericResource represents a generic linkable resource. type genericResource struct { resourcePathDescriptor title string name string params map[string]interface{} // Absolute filename to the source, including any content folder path. // Note that this is absolute in relation to the filesystem it is stored in. // It can be a base path filesystem, and then this filename will not match // the path to the file on the real filesystem. sourceFilename string // Will be set if this resource is backed by something other than a file. openReadSeekerCloser OpenReadSeekCloser // A hash of the source content. Is only calculated in caching situations. *resourceHash // This may be set to tell us to look in another filesystem for this resource. // We, by default, use the sourceFs filesystem in the spec below. overriddenSourceFs afero.Fs spec *Spec resourceType string mediaType media.Type osFileInfo os.FileInfo // We create copies of this struct, so this needs to be a pointer. *resourceContent // May be set to signal lazy/delayed publishing. *publishOnce } func (l *genericResource) Data() interface{} { return noData } func (l *genericResource) Content() (interface{}, error) { if err := l.initContent(); err != nil { return nil, err } return l.content, nil } func (l *genericResource) ReadSeekCloser() (ReadSeekCloser, error) { if l.openReadSeekerCloser != nil { return l.openReadSeekerCloser() } f, err := l.sourceFs().Open(l.sourceFilename) if err != nil { return nil, err } return f, nil } func (l *genericResource) MediaType() media.Type { return l.mediaType } // Implement the Cloner interface. func (l genericResource) WithNewBase(base string) Resource { l.baseOffset = base l.resourceContent = &resourceContent{} return &l } func (l *genericResource) initHash() error { var err error l.hashInit.Do(func() { var hash string var f ReadSeekCloser f, err = l.ReadSeekCloser() if err != nil { err = fmt.Errorf("failed to open source file: %s", err) return } defer f.Close() hash, err = helpers.MD5FromFileFast(f) if err != nil { return } l.hash = hash }) return err } func (l *genericResource) initContent() error { var err error l.contentInit.Do(func() { var r ReadSeekCloser r, err = l.ReadSeekCloser() if err != nil { return } defer r.Close() var b []byte b, err = ioutil.ReadAll(r) if err != nil { return } l.content = string(b) }) return err } func (l *genericResource) sourceFs() afero.Fs { if l.overriddenSourceFs != nil { return l.overriddenSourceFs } return l.spec.sourceFs() } func (l *genericResource) publishIfNeeded() { if l.publishOnce != nil { l.publishOnce.publish(l) } } func (l *genericResource) Permalink() string { l.publishIfNeeded() return l.spec.PermalinkForBaseURL(l.relPermalinkForRel(l.relTargetDirFile.path()), l.spec.BaseURL.HostURL()) } func (l *genericResource) RelPermalink() string { l.publishIfNeeded() return l.relPermalinkFor(l.relTargetDirFile.path()) } func (l *genericResource) relPermalinkFor(target string) string { return l.relPermalinkForRel(target) } func (l *genericResource) permalinkFor(target string) string { return l.spec.PermalinkForBaseURL(l.relPermalinkForRel(target), l.spec.BaseURL.HostURL()) } func (l *genericResource) relTargetPathFor(target string) string { return l.relTargetPathForRel(target, false) } func (l *genericResource) relTargetPath() string { return l.relTargetPathForRel(l.targetPath(), false) } func (l *genericResource) Name() string { return l.name } func (l *genericResource) Title() string { return l.title } func (l *genericResource) Params() map[string]interface{} { return l.params } func (l *genericResource) setTitle(title string) { l.title = title } func (l *genericResource) setName(name string) { l.name = name } func (l *genericResource) updateParams(params map[string]interface{}) { if l.params == nil { l.params = params return } // Sets the params not already set for k, v := range params { if _, found := l.params[k]; !found { l.params[k] = v } } } func (l *genericResource) relPermalinkForRel(rel string) string { return l.spec.PathSpec.URLizeFilename(l.relTargetPathForRel(rel, true)) } func (l *genericResource) relTargetPathForRel(rel string, isURL bool) string { if l.targetPathBuilder != nil { rel = l.targetPathBuilder(rel) } if isURL && l.baseURLDir != "" { rel = path.Join(l.baseURLDir, rel) } if !isURL && l.baseTargetPathDir != "" { rel = path.Join(l.baseTargetPathDir, rel) } if l.baseOffset != "" { rel = path.Join(l.baseOffset, rel) } if isURL && l.spec.PathSpec.BasePath != "" { rel = path.Join(l.spec.PathSpec.BasePath, rel) } if len(rel) == 0 || rel[0] != '/' { rel = "/" + rel } return rel } func (l *genericResource) ResourceType() string { return l.resourceType } func (l *genericResource) String() string { return fmt.Sprintf("Resource(%s: %s)", l.resourceType, l.name) } func (l *genericResource) Publish() error { f, err := l.ReadSeekCloser() if err != nil { return err } defer f.Close() return helpers.WriteToDisk(l.targetFilename(), f, l.spec.BaseFs.PublishFs) } // Path is stored with Unix style slashes. func (l *genericResource) targetPath() string { return l.relTargetDirFile.path() } func (l *genericResource) targetFilename() string { return filepath.Clean(l.relTargetPath()) } // TODO(bep) clean up below func (r *Spec) newGenericResource(sourceFs afero.Fs, targetPathBuilder func(base string) string, osFileInfo os.FileInfo, sourceFilename, baseFilename string, mediaType media.Type) *genericResource { return r.newGenericResourceWithBase( sourceFs, false, nil, "", "", targetPathBuilder, osFileInfo, sourceFilename, baseFilename, mediaType, ) } func (r *Spec) newGenericResourceWithBase( sourceFs afero.Fs, lazyPublish bool, openReadSeekerCloser OpenReadSeekCloser, urlBaseDir string, targetPathBaseDir string, targetPathBuilder func(base string) string, osFileInfo os.FileInfo, sourceFilename, baseFilename string, mediaType media.Type) *genericResource { // This value is used both to construct URLs and file paths, but start // with a Unix-styled path. baseFilename = helpers.ToSlashTrimLeading(baseFilename) fpath, fname := path.Split(baseFilename) var resourceType string if mediaType.MainType == "image" { resourceType = mediaType.MainType } else { resourceType = mediaType.SubType } pathDescriptor := resourcePathDescriptor{ baseURLDir: urlBaseDir, baseTargetPathDir: targetPathBaseDir, targetPathBuilder: targetPathBuilder, relTargetDirFile: dirFile{dir: fpath, file: fname}, } var po *publishOnce if lazyPublish { po = &publishOnce{logger: r.Logger} } return &genericResource{ openReadSeekerCloser: openReadSeekerCloser, publishOnce: po, resourcePathDescriptor: pathDescriptor, overriddenSourceFs: sourceFs, osFileInfo: osFileInfo, sourceFilename: sourceFilename, mediaType: mediaType, resourceType: resourceType, spec: r, params: make(map[string]interface{}), name: baseFilename, title: baseFilename, resourceContent: &resourceContent{}, resourceHash: &resourceHash{}, } }