From 9421380168f66620cb73203e1267814b3086d805 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Sat, 20 Jan 2018 18:07:41 +0100 Subject: [PATCH] resource: Add Match and GetMatch These methods takes a glob pattern as argument: * by default matching from the bundle root * matching is case insensitive and the separator is Unix style slashes: "/" * the bundle root does (by default) not start with a leading slash * if you renames the `Name` for the rsource in front matter (`src=...`), then that is the value used in `Match`. * double asterisk matches beyond directory borders, so "**.jpg" will match any JPEG image in the bundle See https://github.com/gobwas/glob This commit also deprecates `ByPrefix` and `GetByPrefix`. This should also be more effective, given a fair amount of reuse of the glob patterns: ```bash BenchmarkResourcesByPrefix-4 300000 4284 ns/op 1130 B/op 7 allocs/op BenchmarkResourcesMatch-4 300000 5220 ns/op 505 B/op 3 allocs/op ``` Fixes #4301 --- Gopkg.lock | 17 +++++- Gopkg.toml | 4 ++ resource/resource.go | 91 ++++++++++++++++++++++++++++- resource/resource_test.go | 117 +++++++++++++++++++++++++++++++++++++- 4 files changed, 224 insertions(+), 5 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index db9ec78dc..e7d14c5e3 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -97,6 +97,21 @@ revision = "c2828203cd70a50dcccfb2761f8b1f8ceef9a8e9" version = "v1.4.7" +[[projects]] + name = "github.com/gobwas/glob" + packages = [ + ".", + "compiler", + "match", + "syntax", + "syntax/ast", + "syntax/lexer", + "util/runes", + "util/strings" + ] + revision = "bea32b9cd2d6f55753d94a28e959b13f0244797a" + version = "v0.2.2" + [[projects]] name = "github.com/gorilla/websocket" packages = ["."] @@ -366,6 +381,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "398d769bcbce6e8fa04f7afab14eb64d25aed5b581d7bd39294014351a459890" + inputs-digest = "3bd32a09fb219b2cddf0e5a91110430a05bfd2d3a28215c69b8013ff2b6b89dc" solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index 5218dc3af..b07a41f7c 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -131,3 +131,7 @@ [[constraint]] branch = "v2" name = "gopkg.in/yaml.v2" + +[[constraint]] + name = "github.com/gobwas/glob" + version = "0.2.2" diff --git a/resource/resource.go b/resource/resource.go index b3a92273d..a2128bdc7 100644 --- a/resource/resource.go +++ b/resource/resource.go @@ -21,13 +21,14 @@ import ( "path/filepath" "strconv" "strings" + "sync" "github.com/spf13/cast" + "github.com/gobwas/glob" + "github.com/gohugoio/hugo/helpers" "github.com/gohugoio/hugo/media" "github.com/gohugoio/hugo/source" - - "github.com/gohugoio/hugo/helpers" ) var ( @@ -101,10 +102,21 @@ func (r Resources) ByType(tp string) Resources { return filtered } +const prefixDeprecatedMsg = `We have added the more flexible Resources.GetMatch (find one) and Resources.Match (many) to replace the "prefix" methods. + +These matches by a given globbing pattern, e.g. "*.jpg". + +Some examples: + +* To find all resources by its prefix in the root dir of the bundle: .Match image* +* To find one resource by its prefix in the root dir of the bundle: .GetMatch image* +* To find all JPEG images anywhere in the bundle: .Match **.jpg` + // GetBySuffix gets the first resource matching the given filename prefix, e.g // "logo" will match logo.png. It returns nil of none found. // In potential ambiguous situations, combine it with ByType. func (r Resources) GetByPrefix(prefix string) Resource { + helpers.Deprecated("Resources", "GetByPrefix", prefixDeprecatedMsg, false) prefix = strings.ToLower(prefix) for _, resource := range r { if matchesPrefix(resource, prefix) { @@ -117,6 +129,7 @@ func (r Resources) GetByPrefix(prefix string) Resource { // ByPrefix gets all resources matching the given base filename prefix, e.g // "logo" will match logo.png. func (r Resources) ByPrefix(prefix string) Resources { + helpers.Deprecated("Resources", "ByPrefix", prefixDeprecatedMsg, false) var matches Resources prefix = strings.ToLower(prefix) for _, resource := range r { @@ -127,10 +140,80 @@ func (r Resources) ByPrefix(prefix string) Resources { return matches } +// GetMatch finds the first Resource matching the given pattern, or nil if none found. +// See Match for a more complete explanation about the rules used. +func (r Resources) GetMatch(pattern string) Resource { + g, err := getGlob(pattern) + if err != nil { + return nil + } + + for _, resource := range r { + if g.Match(strings.ToLower(resource.Name())) { + return resource + } + } + + return nil +} + +// Match gets all resources matching the given base filename prefix, e.g +// "*.png" will match all png files. The "*" does not match path delimiters (/), +// so if you organize your resources in sub-folders, you need to be explicit about it, e.g.: +// "images/*.png". To match any PNG image anywhere in the bundle you can do "**.png", and +// to match all PNG images below the images folder, use "images/**.jpg". +// The matching is case insensitive. +// Match matches by using the value of Resource.Name, which, by default, is a filename with +// path relative to the bundle root with Unix style slashes (/) and no leading slash, e.g. "images/logo.png". +// See https://github.com/gobwas/glob for the full rules set. +func (r Resources) Match(pattern string) Resources { + g, err := getGlob(pattern) + if err != nil { + return nil + } + + var matches Resources + for _, resource := range r { + if g.Match(strings.ToLower(resource.Name())) { + matches = append(matches, resource) + } + } + return matches +} + func matchesPrefix(r Resource, prefix string) bool { return strings.HasPrefix(strings.ToLower(r.Name()), prefix) } +var ( + globCache = make(map[string]glob.Glob) + globMu sync.RWMutex +) + +func getGlob(pattern string) (glob.Glob, error) { + pattern = strings.ToLower(pattern) + + var g glob.Glob + + globMu.RLock() + g, found := globCache[pattern] + globMu.RUnlock() + if !found { + var err error + g, err = glob.Compile(pattern, '/') + if err != nil { + return nil, err + } + + globMu.Lock() + globCache[pattern] = g + globMu.Unlock() + } + + return g, nil + +} + type Spec struct { *helpers.PathSpec mimeTypes media.Types @@ -390,11 +473,13 @@ func AssignMetadata(metadata []map[string]interface{}, resources ...Resource) er srcKey := strings.ToLower(cast.ToString(src)) - match, err := path.Match(srcKey, resourceSrcKey) + glob, err := getGlob(srcKey) if err != nil { return fmt.Errorf("failed to match resource with metadata: %s", err) } + match := glob.Match(resourceSrcKey) + if match { if !nameSet { name, found := meta["name"] diff --git a/resource/resource_test.go b/resource/resource_test.go index 4670ef632..b4cf3ebed 100644 --- a/resource/resource_test.go +++ b/resource/resource_test.go @@ -15,9 +15,12 @@ package resource import ( "fmt" + "math/rand" "path" "path/filepath" + "strings" "testing" + "time" "github.com/stretchr/testify/require" ) @@ -137,6 +140,52 @@ func TestResourcesGetByPrefix(t *testing.T) { } +func TestResourcesGetMatch(t *testing.T) { + assert := require.New(t) + spec := newTestResourceSpec(assert) + resources := Resources{ + spec.newGenericResource(nil, nil, "/public", "/a/foo1.css", "foo1.css", "css"), + spec.newGenericResource(nil, nil, "/public", "/a/logo1.png", "logo1.png", "image"), + spec.newGenericResource(nil, nil, "/public", "/b/Logo2.png", "Logo2.png", "image"), + spec.newGenericResource(nil, nil, "/public", "/b/foo2.css", "foo2.css", "css"), + spec.newGenericResource(nil, nil, "/public", "/b/foo3.css", "foo3.css", "css"), + spec.newGenericResource(nil, nil, "/public", "/b/c/foo4.css", "c/foo4.css", "css"), + spec.newGenericResource(nil, nil, "/public", "/b/c/foo5.css", "c/foo5.css", "css"), + spec.newGenericResource(nil, nil, "/public", "/b/c/d/foo6.css", "c/d/foo6.css", "css"), + } + + assert.Equal("/logo1.png", resources.GetMatch("logo*").RelPermalink()) + assert.Equal("/logo1.png", resources.GetMatch("loGo*").RelPermalink()) + assert.Equal("/Logo2.png", resources.GetMatch("logo2*").RelPermalink()) + assert.Equal("/foo2.css", resources.GetMatch("foo2*").RelPermalink()) + assert.Equal("/foo1.css", resources.GetMatch("foo1*").RelPermalink()) + assert.Equal("/foo1.css", resources.GetMatch("foo1*").RelPermalink()) + assert.Equal("/c/foo4.css", resources.GetMatch("*/foo*").RelPermalink()) + + assert.Nil(resources.GetMatch("asdfasdf")) + + assert.Equal(2, len(resources.Match("Logo*"))) + assert.Equal(1, len(resources.Match("logo2*"))) + assert.Equal(2, len(resources.Match("c/*"))) + + assert.Equal(6, len(resources.Match("**.css"))) + assert.Equal(3, len(resources.Match("**/*.css"))) + assert.Equal(1, len(resources.Match("c/**/*.css"))) + + // Matches only CSS files in c/ + assert.Equal(3, len(resources.Match("c/**.css"))) + + // Matches all CSS files below c/ (including in c/d/) + assert.Equal(3, len(resources.Match("c/**.css"))) + + // Patterns beginning with a slash will not match anything. + // We could maybe consider trimming that slash, but let's be explicit about this. + // (it is possible for users to do a rename) + // This is analogous to standing in a directory and doing "ls *.*". + assert.Equal(0, len(resources.Match("/c/**.css"))) + +} + func TestAssignMetadata(t *testing.T) { assert := require.New(t) spec := newTestResourceSpec(assert) @@ -290,6 +339,73 @@ func TestAssignMetadata(t *testing.T) { } +func BenchmarkResourcesByPrefix(b *testing.B) { + resources := benchResources(b) + prefixes := []string{"abc", "jkl", "nomatch", "sub/"} + rnd := rand.New(rand.NewSource(time.Now().Unix())) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + resources.ByPrefix(prefixes[rnd.Intn(len(prefixes))]) + } + }) +} + +func BenchmarkResourcesMatch(b *testing.B) { + resources := benchResources(b) + prefixes := []string{"abc*", "jkl*", "nomatch*", "sub/*"} + rnd := rand.New(rand.NewSource(time.Now().Unix())) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + resources.Match(prefixes[rnd.Intn(len(prefixes))]) + } + }) +} + +// This adds a benchmark for the a100 test case as described by Russ Cox here: +// https://research.swtch.com/glob (really interesting article) +// I don't expect Hugo users to "stumble upon" this problem, so this is more to satisfy +// my own curiosity. +func BenchmarkResourcesMatchA100(b *testing.B) { + assert := require.New(b) + spec := newTestResourceSpec(assert) + a100 := strings.Repeat("a", 100) + pattern := "a*a*a*a*a*a*a*a*b" + + resources := Resources{spec.newGenericResource(nil, nil, "/public", "/a/"+a100, a100, "css")} + + b.ResetTimer() + for i := 0; i < b.N; i++ { + resources.Match(pattern) + } + +} + +func benchResources(b *testing.B) Resources { + assert := require.New(b) + spec := newTestResourceSpec(assert) + var resources Resources + + for i := 0; i < 30; i++ { + name := fmt.Sprintf("abcde%d_%d.css", i%5, i) + resources = append(resources, spec.newGenericResource(nil, nil, "/public", "/a/"+name, name, "css")) + } + + for i := 0; i < 30; i++ { + name := fmt.Sprintf("efghi%d_%d.css", i%5, i) + resources = append(resources, spec.newGenericResource(nil, nil, "/public", "/a/"+name, name, "css")) + } + + for i := 0; i < 30; i++ { + name := fmt.Sprintf("jklmn%d_%d.css", i%5, i) + resources = append(resources, spec.newGenericResource(nil, nil, "/public", "/b/sub/"+name, "sub/"+name, "css")) + } + + return resources + +} + func BenchmarkAssignMetadata(b *testing.B) { assert := require.New(b) spec := newTestResourceSpec(assert) @@ -320,5 +436,4 @@ func BenchmarkAssignMetadata(b *testing.B) { } } - }