deploy: Set an MD5 attribute and use that if eTag not available

During Hugo deploy when a remote MD5 is invalid (e.g due to multipart eTag) Hugo reads the entire remote file and calculates the MD5 again which can be slow.

This commit updates the file upload so that it will also store an MD5 hash in the cloud provider's attributes. e.g in AWS this looks like x-amz-meta-md5chksum: 26fe392386a8123bf8956a16e08cb841.
This commit is contained in:
David Jones 2022-04-05 09:42:54 +01:00 committed by GitHub
parent a6e2e38bb2
commit d0657a436e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -21,6 +21,7 @@ import (
"compress/gzip"
"context"
"crypto/md5"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
@ -73,6 +74,8 @@ type deploySummary struct {
NumLocal, NumRemote, NumUploads, NumDeletes int
}
const metaMD5Hash = "md5chksum" // the meta key to store md5hash in
// New constructs a new *Deployer.
func New(cfg config.Provider, localFs afero.Fs) (*Deployer, error) {
targetName := cfg.GetString("target")
@ -314,6 +317,7 @@ func doSingleUpload(ctx context.Context, bucket *blob.Bucket, upload *fileToUplo
CacheControl: upload.Local.CacheControl(),
ContentEncoding: upload.Local.ContentEncoding(),
ContentType: upload.Local.ContentType(),
Metadata: map[string]string{metaMD5Hash: hex.EncodeToString(upload.Local.MD5())},
}
w, err := bucket.NewWriter(ctx, upload.Local.SlashPath, opts)
if err != nil {
@ -566,7 +570,7 @@ func walkRemote(ctx context.Context, bucket *blob.Bucket, include, exclude glob.
jww.INFO.Printf(" remote dropping %q due to exclude\n", obj.Key)
continue
}
// If the remote didn't give us an MD5, compute one.
// If the remote didn't give us an MD5, use remote attributes MD5, if that doesn't exist compute one.
// This can happen for some providers (e.g., fileblob, which uses the
// local filesystem), but not for the most common Cloud providers
// (S3, GCS, Azure). Although, it can happen for S3 if the blob was uploaded
@ -574,13 +578,25 @@ func walkRemote(ctx context.Context, bucket *blob.Bucket, include, exclude glob.
// Although it's unfortunate to have to read the file, it's likely better
// than assuming a delta and re-uploading it.
if len(obj.MD5) == 0 {
r, err := bucket.NewReader(ctx, obj.Key, nil)
var attrMD5 []byte
attrs, err := bucket.Attributes(ctx, obj.Key)
if err == nil {
h := md5.New()
if _, err := io.Copy(h, r); err == nil {
obj.MD5 = h.Sum(nil)
md5String, exists := attrs.Metadata[metaMD5Hash]
if exists {
attrMD5, _ = hex.DecodeString(md5String)
}
r.Close()
}
if len(attrMD5) == 0 {
r, err := bucket.NewReader(ctx, obj.Key, nil)
if err == nil {
h := md5.New()
if _, err := io.Copy(h, r); err == nil {
obj.MD5 = h.Sum(nil)
}
r.Close()
}
} else {
obj.MD5 = attrMD5
}
}
retval[obj.Key] = obj