hugo/resources/resource_factories/create/remote.go
0xb10c cf5d0c1f91
add URI to resource load error
Previouly, only "ERROR failed to fetch remote resource: Forbidden"
would be shown when a resource failed to load. In my case, a tweet's
author I was embedding using the twitter shortcode switched his profile
to private which resulted in my blog failing to build. To figure out
where the originates, I added the log to the error message.
2024-03-18 11:07:51 +01:00

353 lines
8.9 KiB
Go

// Copyright 2021 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package create
import (
"bufio"
"bytes"
"fmt"
"io"
"math/rand"
"mime"
"net/http"
"net/http/httputil"
"net/url"
"path"
"strings"
"time"
"github.com/gohugoio/hugo/common/hugio"
"github.com/gohugoio/hugo/common/maps"
"github.com/gohugoio/hugo/common/types"
"github.com/gohugoio/hugo/identity"
"github.com/gohugoio/hugo/media"
"github.com/gohugoio/hugo/resources"
"github.com/gohugoio/hugo/resources/resource"
"github.com/mitchellh/mapstructure"
)
type HTTPError struct {
error
Data map[string]any
StatusCode int
Body string
}
func responseToData(res *http.Response, readBody bool) map[string]any {
var body []byte
if readBody {
body, _ = io.ReadAll(res.Body)
}
m := map[string]any{
"StatusCode": res.StatusCode,
"Status": res.Status,
"TransferEncoding": res.TransferEncoding,
"ContentLength": res.ContentLength,
"ContentType": res.Header.Get("Content-Type"),
}
if readBody {
m["Body"] = string(body)
}
return m
}
func toHTTPError(err error, res *http.Response, readBody bool) *HTTPError {
if err == nil {
panic("err is nil")
}
if res == nil {
return &HTTPError{
error: err,
Data: map[string]any{},
}
}
return &HTTPError{
error: err,
Data: responseToData(res, readBody),
}
}
var temporaryHTTPStatusCodes = map[int]bool{
408: true,
429: true,
500: true,
502: true,
503: true,
504: true,
}
// FromRemote expects one or n-parts of a URL to a resource
// If you provide multiple parts they will be joined together to the final URL.
func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resource, error) {
rURL, err := url.Parse(uri)
if err != nil {
return nil, fmt.Errorf("failed to parse URL for resource %s: %w", uri, err)
}
method := "GET"
if s, ok := maps.LookupEqualFold(optionsm, "method"); ok {
method = strings.ToUpper(s.(string))
}
isHeadMethod := method == "HEAD"
resourceID := calculateResourceID(uri, optionsm)
_, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) {
options, err := decodeRemoteOptions(optionsm)
if err != nil {
return nil, fmt.Errorf("failed to decode options for resource %s: %w", uri, err)
}
if err := c.validateFromRemoteArgs(uri, options); err != nil {
return nil, err
}
var (
start time.Time
nextSleep = time.Duration((rand.Intn(1000) + 100)) * time.Millisecond
nextSleepLimit = time.Duration(5) * time.Second
)
for {
b, retry, err := func() ([]byte, bool, error) {
req, err := options.NewRequest(uri)
if err != nil {
return nil, false, fmt.Errorf("failed to create request for resource %s: %w", uri, err)
}
res, err := c.httpClient.Do(req)
if err != nil {
return nil, false, err
}
defer res.Body.Close()
if res.StatusCode != http.StatusNotFound {
if res.StatusCode < 200 || res.StatusCode > 299 {
return nil, temporaryHTTPStatusCodes[res.StatusCode], toHTTPError(fmt.Errorf("failed to fetch remote resource from '%s': %s", uri, http.StatusText(res.StatusCode)), res, !isHeadMethod)
}
}
b, err := httputil.DumpResponse(res, true)
if err != nil {
return nil, false, toHTTPError(err, res, !isHeadMethod)
}
return b, false, nil
}()
if err != nil {
if retry {
if start.IsZero() {
start = time.Now()
} else if d := time.Since(start) + nextSleep; d >= c.rs.Cfg.Timeout() {
c.rs.Logger.Errorf("Retry timeout (configured to %s) fetching remote resource.", c.rs.Cfg.Timeout())
return nil, err
}
time.Sleep(nextSleep)
if nextSleep < nextSleepLimit {
nextSleep *= 2
}
continue
}
return nil, err
}
return hugio.ToReadCloser(bytes.NewReader(b)), nil
}
})
if err != nil {
return nil, err
}
defer httpResponse.Close()
res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil)
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode == http.StatusNotFound {
// Not found. This matches how looksup for local resources work.
return nil, nil
}
var (
body []byte
mediaType media.Type
)
// A response to a HEAD method should not have a body. If it has one anyway, that body must be ignored.
// See https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/HEAD
if !isHeadMethod && res.Body != nil {
body, err = io.ReadAll(res.Body)
if err != nil {
return nil, fmt.Errorf("failed to read remote resource %q: %w", uri, err)
}
}
filename := path.Base(rURL.Path)
if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil {
if _, ok := params["filename"]; ok {
filename = params["filename"]
}
}
contentType := res.Header.Get("Content-Type")
// For HEAD requests we have no body to work with, so we need to use the Content-Type header.
if isHeadMethod || c.rs.ExecHelper.Sec().HTTP.MediaTypes.Accept(contentType) {
var found bool
mediaType, found = c.rs.MediaTypes().GetByType(contentType)
if !found {
// A media type not configured in Hugo, just create one from the content type string.
mediaType, _ = media.FromString(contentType)
}
}
if mediaType.IsZero() {
var extensionHints []string
// mime.ExtensionsByType gives a long list of extensions for text/plain,
// just use ".txt".
if strings.HasPrefix(contentType, "text/plain") {
extensionHints = []string{".txt"}
} else {
exts, _ := mime.ExtensionsByType(contentType)
if exts != nil {
extensionHints = exts
}
}
// Look for a file extension. If it's .txt, look for a more specific.
if extensionHints == nil || extensionHints[0] == ".txt" {
if ext := path.Ext(filename); ext != "" {
extensionHints = []string{ext}
}
}
// Now resolve the media type primarily using the content.
mediaType = media.FromContent(c.rs.MediaTypes(), extensionHints, body)
}
if mediaType.IsZero() {
return nil, fmt.Errorf("failed to resolve media type for remote resource %q", uri)
}
resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + mediaType.FirstSuffix.FullSuffix
data := responseToData(res, false)
return c.rs.NewResource(
resources.ResourceSourceDescriptor{
MediaType: mediaType,
Data: data,
GroupIdentity: identity.StringIdentity(resourceID),
LazyPublish: true,
OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) {
return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil
},
TargetPath: resourceID,
})
}
func (c *Client) validateFromRemoteArgs(uri string, options fromRemoteOptions) error {
if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPURL(uri); err != nil {
return err
}
if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPMethod(options.Method); err != nil {
return err
}
return nil
}
func calculateResourceID(uri string, optionsm map[string]any) string {
if key, found := maps.LookupEqualFold(optionsm, "key"); found {
return identity.HashString(key)
}
return identity.HashString(uri, optionsm)
}
func addDefaultHeaders(req *http.Request) {
if !hasHeaderKey(req.Header, "User-Agent") {
req.Header.Add("User-Agent", "Hugo Static Site Generator")
}
}
func addUserProvidedHeaders(headers map[string]any, req *http.Request) {
if headers == nil {
return
}
for key, val := range headers {
vals := types.ToStringSlicePreserveString(val)
for _, s := range vals {
req.Header.Add(key, s)
}
}
}
func hasHeaderKey(m http.Header, key string) bool {
_, ok := m[key]
return ok
}
type fromRemoteOptions struct {
Method string
Headers map[string]any
Body []byte
}
func (o fromRemoteOptions) BodyReader() io.Reader {
if o.Body == nil {
return nil
}
return bytes.NewBuffer(o.Body)
}
func (o fromRemoteOptions) NewRequest(url string) (*http.Request, error) {
req, err := http.NewRequest(o.Method, url, o.BodyReader())
if err != nil {
return nil, err
}
// First add any user provided headers.
if o.Headers != nil {
addUserProvidedHeaders(o.Headers, req)
}
// Then add default headers not provided by the user.
addDefaultHeaders(req)
return req, nil
}
func decodeRemoteOptions(optionsm map[string]any) (fromRemoteOptions, error) {
options := fromRemoteOptions{
Method: "GET",
}
err := mapstructure.WeakDecode(optionsm, &options)
if err != nil {
return options, err
}
options.Method = strings.ToUpper(options.Method)
return options, nil
}