resource: Add Match and GetMatch

These methods takes a glob pattern as argument:

* by default matching from the bundle root
* matching is case insensitive and the separator is Unix style slashes: "/"
* the bundle root does (by default) not start with a leading slash
* if you renames the `Name` for the rsource in front matter (`src=...`), then that is the value used in `Match`.
* double asterisk matches beyond directory borders, so "**.jpg" will match any JPEG image in the bundle

See https://github.com/gobwas/glob

This commit also deprecates `ByPrefix` and `GetByPrefix`.

This should also be more effective, given a fair amount of reuse of the glob patterns:

```bash
BenchmarkResourcesByPrefix-4         300000          4284 ns/op        1130 B/op           7 allocs/op
BenchmarkResourcesMatch-4            300000          5220 ns/op         505 B/op           3 allocs/op
```

Fixes #4301
This commit is contained in:
Bjørn Erik Pedersen 2018-01-20 18:07:41 +01:00
parent 5d03086981
commit 9421380168
No known key found for this signature in database
GPG key ID: 330E6E2BD4859D8F
4 changed files with 224 additions and 5 deletions

17
Gopkg.lock generated
View file

@ -97,6 +97,21 @@
revision = "c2828203cd70a50dcccfb2761f8b1f8ceef9a8e9"
version = "v1.4.7"
[[projects]]
name = "github.com/gobwas/glob"
packages = [
".",
"compiler",
"match",
"syntax",
"syntax/ast",
"syntax/lexer",
"util/runes",
"util/strings"
]
revision = "bea32b9cd2d6f55753d94a28e959b13f0244797a"
version = "v0.2.2"
[[projects]]
name = "github.com/gorilla/websocket"
packages = ["."]
@ -366,6 +381,6 @@
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "398d769bcbce6e8fa04f7afab14eb64d25aed5b581d7bd39294014351a459890"
inputs-digest = "3bd32a09fb219b2cddf0e5a91110430a05bfd2d3a28215c69b8013ff2b6b89dc"
solver-name = "gps-cdcl"
solver-version = 1

View file

@ -131,3 +131,7 @@
[[constraint]]
branch = "v2"
name = "gopkg.in/yaml.v2"
[[constraint]]
name = "github.com/gobwas/glob"
version = "0.2.2"

View file

@ -21,13 +21,14 @@ import (
"path/filepath"
"strconv"
"strings"
"sync"
"github.com/spf13/cast"
"github.com/gobwas/glob"
"github.com/gohugoio/hugo/helpers"
"github.com/gohugoio/hugo/media"
"github.com/gohugoio/hugo/source"
"github.com/gohugoio/hugo/helpers"
)
var (
@ -101,10 +102,21 @@ func (r Resources) ByType(tp string) Resources {
return filtered
}
const prefixDeprecatedMsg = `We have added the more flexible Resources.GetMatch (find one) and Resources.Match (many) to replace the "prefix" methods.
These matches by a given globbing pattern, e.g. "*.jpg".
Some examples:
* To find all resources by its prefix in the root dir of the bundle: .Match image*
* To find one resource by its prefix in the root dir of the bundle: .GetMatch image*
* To find all JPEG images anywhere in the bundle: .Match **.jpg`
// GetBySuffix gets the first resource matching the given filename prefix, e.g
// "logo" will match logo.png. It returns nil of none found.
// In potential ambiguous situations, combine it with ByType.
func (r Resources) GetByPrefix(prefix string) Resource {
helpers.Deprecated("Resources", "GetByPrefix", prefixDeprecatedMsg, false)
prefix = strings.ToLower(prefix)
for _, resource := range r {
if matchesPrefix(resource, prefix) {
@ -117,6 +129,7 @@ func (r Resources) GetByPrefix(prefix string) Resource {
// ByPrefix gets all resources matching the given base filename prefix, e.g
// "logo" will match logo.png.
func (r Resources) ByPrefix(prefix string) Resources {
helpers.Deprecated("Resources", "ByPrefix", prefixDeprecatedMsg, false)
var matches Resources
prefix = strings.ToLower(prefix)
for _, resource := range r {
@ -127,10 +140,80 @@ func (r Resources) ByPrefix(prefix string) Resources {
return matches
}
// GetMatch finds the first Resource matching the given pattern, or nil if none found.
// See Match for a more complete explanation about the rules used.
func (r Resources) GetMatch(pattern string) Resource {
g, err := getGlob(pattern)
if err != nil {
return nil
}
for _, resource := range r {
if g.Match(strings.ToLower(resource.Name())) {
return resource
}
}
return nil
}
// Match gets all resources matching the given base filename prefix, e.g
// "*.png" will match all png files. The "*" does not match path delimiters (/),
// so if you organize your resources in sub-folders, you need to be explicit about it, e.g.:
// "images/*.png". To match any PNG image anywhere in the bundle you can do "**.png", and
// to match all PNG images below the images folder, use "images/**.jpg".
// The matching is case insensitive.
// Match matches by using the value of Resource.Name, which, by default, is a filename with
// path relative to the bundle root with Unix style slashes (/) and no leading slash, e.g. "images/logo.png".
// See https://github.com/gobwas/glob for the full rules set.
func (r Resources) Match(pattern string) Resources {
g, err := getGlob(pattern)
if err != nil {
return nil
}
var matches Resources
for _, resource := range r {
if g.Match(strings.ToLower(resource.Name())) {
matches = append(matches, resource)
}
}
return matches
}
func matchesPrefix(r Resource, prefix string) bool {
return strings.HasPrefix(strings.ToLower(r.Name()), prefix)
}
var (
globCache = make(map[string]glob.Glob)
globMu sync.RWMutex
)
func getGlob(pattern string) (glob.Glob, error) {
pattern = strings.ToLower(pattern)
var g glob.Glob
globMu.RLock()
g, found := globCache[pattern]
globMu.RUnlock()
if !found {
var err error
g, err = glob.Compile(pattern, '/')
if err != nil {
return nil, err
}
globMu.Lock()
globCache[pattern] = g
globMu.Unlock()
}
return g, nil
}
type Spec struct {
*helpers.PathSpec
mimeTypes media.Types
@ -390,11 +473,13 @@ func AssignMetadata(metadata []map[string]interface{}, resources ...Resource) er
srcKey := strings.ToLower(cast.ToString(src))
match, err := path.Match(srcKey, resourceSrcKey)
glob, err := getGlob(srcKey)
if err != nil {
return fmt.Errorf("failed to match resource with metadata: %s", err)
}
match := glob.Match(resourceSrcKey)
if match {
if !nameSet {
name, found := meta["name"]

View file

@ -15,9 +15,12 @@ package resource
import (
"fmt"
"math/rand"
"path"
"path/filepath"
"strings"
"testing"
"time"
"github.com/stretchr/testify/require"
)
@ -137,6 +140,52 @@ func TestResourcesGetByPrefix(t *testing.T) {
}
func TestResourcesGetMatch(t *testing.T) {
assert := require.New(t)
spec := newTestResourceSpec(assert)
resources := Resources{
spec.newGenericResource(nil, nil, "/public", "/a/foo1.css", "foo1.css", "css"),
spec.newGenericResource(nil, nil, "/public", "/a/logo1.png", "logo1.png", "image"),
spec.newGenericResource(nil, nil, "/public", "/b/Logo2.png", "Logo2.png", "image"),
spec.newGenericResource(nil, nil, "/public", "/b/foo2.css", "foo2.css", "css"),
spec.newGenericResource(nil, nil, "/public", "/b/foo3.css", "foo3.css", "css"),
spec.newGenericResource(nil, nil, "/public", "/b/c/foo4.css", "c/foo4.css", "css"),
spec.newGenericResource(nil, nil, "/public", "/b/c/foo5.css", "c/foo5.css", "css"),
spec.newGenericResource(nil, nil, "/public", "/b/c/d/foo6.css", "c/d/foo6.css", "css"),
}
assert.Equal("/logo1.png", resources.GetMatch("logo*").RelPermalink())
assert.Equal("/logo1.png", resources.GetMatch("loGo*").RelPermalink())
assert.Equal("/Logo2.png", resources.GetMatch("logo2*").RelPermalink())
assert.Equal("/foo2.css", resources.GetMatch("foo2*").RelPermalink())
assert.Equal("/foo1.css", resources.GetMatch("foo1*").RelPermalink())
assert.Equal("/foo1.css", resources.GetMatch("foo1*").RelPermalink())
assert.Equal("/c/foo4.css", resources.GetMatch("*/foo*").RelPermalink())
assert.Nil(resources.GetMatch("asdfasdf"))
assert.Equal(2, len(resources.Match("Logo*")))
assert.Equal(1, len(resources.Match("logo2*")))
assert.Equal(2, len(resources.Match("c/*")))
assert.Equal(6, len(resources.Match("**.css")))
assert.Equal(3, len(resources.Match("**/*.css")))
assert.Equal(1, len(resources.Match("c/**/*.css")))
// Matches only CSS files in c/
assert.Equal(3, len(resources.Match("c/**.css")))
// Matches all CSS files below c/ (including in c/d/)
assert.Equal(3, len(resources.Match("c/**.css")))
// Patterns beginning with a slash will not match anything.
// We could maybe consider trimming that slash, but let's be explicit about this.
// (it is possible for users to do a rename)
// This is analogous to standing in a directory and doing "ls *.*".
assert.Equal(0, len(resources.Match("/c/**.css")))
}
func TestAssignMetadata(t *testing.T) {
assert := require.New(t)
spec := newTestResourceSpec(assert)
@ -290,6 +339,73 @@ func TestAssignMetadata(t *testing.T) {
}
func BenchmarkResourcesByPrefix(b *testing.B) {
resources := benchResources(b)
prefixes := []string{"abc", "jkl", "nomatch", "sub/"}
rnd := rand.New(rand.NewSource(time.Now().Unix()))
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
resources.ByPrefix(prefixes[rnd.Intn(len(prefixes))])
}
})
}
func BenchmarkResourcesMatch(b *testing.B) {
resources := benchResources(b)
prefixes := []string{"abc*", "jkl*", "nomatch*", "sub/*"}
rnd := rand.New(rand.NewSource(time.Now().Unix()))
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
resources.Match(prefixes[rnd.Intn(len(prefixes))])
}
})
}
// This adds a benchmark for the a100 test case as described by Russ Cox here:
// https://research.swtch.com/glob (really interesting article)
// I don't expect Hugo users to "stumble upon" this problem, so this is more to satisfy
// my own curiosity.
func BenchmarkResourcesMatchA100(b *testing.B) {
assert := require.New(b)
spec := newTestResourceSpec(assert)
a100 := strings.Repeat("a", 100)
pattern := "a*a*a*a*a*a*a*a*b"
resources := Resources{spec.newGenericResource(nil, nil, "/public", "/a/"+a100, a100, "css")}
b.ResetTimer()
for i := 0; i < b.N; i++ {
resources.Match(pattern)
}
}
func benchResources(b *testing.B) Resources {
assert := require.New(b)
spec := newTestResourceSpec(assert)
var resources Resources
for i := 0; i < 30; i++ {
name := fmt.Sprintf("abcde%d_%d.css", i%5, i)
resources = append(resources, spec.newGenericResource(nil, nil, "/public", "/a/"+name, name, "css"))
}
for i := 0; i < 30; i++ {
name := fmt.Sprintf("efghi%d_%d.css", i%5, i)
resources = append(resources, spec.newGenericResource(nil, nil, "/public", "/a/"+name, name, "css"))
}
for i := 0; i < 30; i++ {
name := fmt.Sprintf("jklmn%d_%d.css", i%5, i)
resources = append(resources, spec.newGenericResource(nil, nil, "/public", "/b/sub/"+name, "sub/"+name, "css"))
}
return resources
}
func BenchmarkAssignMetadata(b *testing.B) {
assert := require.New(b)
spec := newTestResourceSpec(assert)
@ -320,5 +436,4 @@ func BenchmarkAssignMetadata(b *testing.B) {
}
}
}