diff --git a/image.go b/image.go
index 163d3426953..47842db7d0e 100644
--- a/image.go
+++ b/image.go
@@ -557,6 +557,10 @@ func (i *containerImageSource) LayerInfosForCopy(ctx context.Context) ([]types.B
 	return nil, nil
 }
 
+func (i *containerImageSource) HasThreadSafeGetBlob() bool {
+	return false
+}
+
 func (i *containerImageSource) GetBlob(ctx context.Context, blob types.BlobInfo, cache types.BlobInfoCache) (reader io.ReadCloser, size int64, err error) {
 	if blob.Digest == i.configDigest {
 		logrus.Debugf("start reading config")
diff --git a/pkg/blobcache/blobcache.go b/pkg/blobcache/blobcache.go
index 63022f15dad..ae55316b030 100644
--- a/pkg/blobcache/blobcache.go
+++ b/pkg/blobcache/blobcache.go
@@ -218,6 +218,10 @@ func (s *blobCacheSource) GetManifest(ctx context.Context, instanceDigest *diges
 	return s.source.GetManifest(ctx, instanceDigest)
 }
 
+func (s *blobCacheSource) HasThreadSafeGetBlob() bool {
+	return false
+}
+
 func (s *blobCacheSource) GetBlob(ctx context.Context, blobinfo types.BlobInfo, cache types.BlobInfoCache) (io.ReadCloser, int64, error) {
 	present, size, err := s.reference.HasBlob(blobinfo)
 	if err != nil {
@@ -398,6 +402,10 @@ func saveStream(wg *sync.WaitGroup, decompressReader io.ReadCloser, tempFile *os
 	}
 }
 
+func (s *blobCacheDestination) HasThreadSafePutBlob() bool {
+	return false
+}
+
 func (d *blobCacheDestination) PutBlob(ctx context.Context, stream io.Reader, inputInfo types.BlobInfo, cache types.BlobInfoCache, isConfig bool) (types.BlobInfo, error) {
 	var tempfile *os.File
 	var err error
diff --git a/tests/blobcache.bats b/tests/blobcache.bats
index d04a2506880..74613209284 100644
--- a/tests/blobcache.bats
+++ b/tests/blobcache.bats
@@ -187,33 +187,6 @@ load helpers
 	run buildah build-using-dockerfile -t ${target} --pull-always --blob-cache=${blobcachedir} --signature-policy ${TESTSDIR}/policy.json ${TESTSDIR}/bud/add-file
 	echo "$output"
 	[ "$status" -eq 0 ]
-	# Try to push the image without the blob cache.
-	doomeddir=${TESTDIR}/doomed
-	mkdir -p ${doomeddir}
-	run buildah push --signature-policy ${TESTSDIR}/policy.json ${target} dir:${doomeddir}
-	echo "$output"
-	[ "$status" -eq 0 ]
-	# Look for layer blobs in the destination that match the ones in the cache.
-	matched=0
-	unmatched=0
-	for content in ${doomeddir}/* ; do
-		match=false
-		for blob in ${blobcachedir}/* ; do
-			if cmp -s ${content} ${blob} ; then
-				echo $(file ${blob}) and ${content} have the same contents, was cached
-				match=true
-				break
-			fi
-		done
-		if ${match} ; then
-			matched=$(( ${matched} + 1 ))
-		else
-			unmatched=$(( ${unmatched} + 1 ))
-			echo ${content} was not cached
-		fi
-	done
-	[ ${matched} -eq 4 ] # the base layer, our new layer, config, and manifest should all match the cache
-	[ ${unmatched} -eq 1 ] # the only mismatch should be "version"
 	# Now try to push the image using the blob cache.
 	destdir=${TESTDIR}/dest
 	mkdir -p ${destdir}
@@ -251,33 +224,6 @@ load helpers
 	run buildah build-using-dockerfile -t ${target} -D --pull-always --blob-cache=${blobcachedir} --signature-policy ${TESTSDIR}/policy.json ${TESTSDIR}/bud/add-file
 	echo "$output"
 	[ "$status" -eq 0 ]
-	# Try to push the image without the blob cache.
-	doomeddir=${TESTDIR}/doomed
-	mkdir -p ${doomeddir}
-	run buildah push --signature-policy ${TESTSDIR}/policy.json ${target} dir:${doomeddir}
-	echo "$output"
-	[ "$status" -eq 0 ]
-	# Look for layer blobs in the destination that match the ones in the cache.
-	matched=0
-	unmatched=0
-	for content in ${doomeddir}/* ; do
-		match=false
-		for blob in ${blobcachedir}/* ; do
-			if cmp -s ${content} ${blob} ; then
-				echo $(file ${blob}) and ${content} have the same contents, was cached
-				match=true
-				break
-			fi
-		done
-		if ${match} ; then
-			matched=$(( ${matched} + 1 ))
-		else
-			unmatched=$(( ${unmatched} + 1 ))
-			echo ${content} was not cached
-		fi
-	done
-	[ ${matched} -eq 2 ] # our new layer (written at build-time) and config should match the cache
-	[ ${unmatched} -eq 3 ] # expected mismatches should be "version", and the base layers, which had to be recompressed
 	# Now try to push the image using the blob cache.
 	destdir=${TESTDIR}/dest
 	mkdir -p ${destdir}
diff --git a/vendor.conf b/vendor.conf
index 707d208c930..ee8192935a5 100644
--- a/vendor.conf
+++ b/vendor.conf
@@ -3,10 +3,10 @@ github.com/blang/semver master
 github.com/BurntSushi/toml master
 github.com/containerd/continuity master
 github.com/containernetworking/cni v0.7.0-alpha1
-github.com/containers/image d53afe179b381fafb427e6b9cf9b1996a98c1067
+github.com/containers/image 0c6cc8e1420001ae39fa89d308b3b3bc5ee81c57
 github.com/boltdb/bolt master
 github.com/containers/libpod c8eaf59d5f4bec249db8134c6a9fcfbcac792519
-github.com/containers/storage db40f96d853dfced60c563e61fb66ba231ce7c8d
+github.com/containers/storage 60a692f7ce891feb91ce0eda87bd06bfd5651dff
 github.com/docker/distribution 5f6282db7d65e6d72ad7c2cc66310724a57be716
 github.com/docker/docker 86f080cff0914e9694068ed78d503701667c4c00
 github.com/docker/docker-credential-helpers d68f9aeca33f5fd3f08eeae5e9d175edf4e731d1
@@ -32,7 +32,7 @@ github.com/mistifyio/go-zfs master
 github.com/moby/moby f8806b18b4b92c5e1980f6e11c917fad201cd73c
 github.com/mtrmac/gpgme master
 github.com/Nvveen/Gotty master
-github.com/opencontainers/go-digest aa2ec055abd10d26d539eb630a92241b781ce4bc
+github.com/opencontainers/go-digest c9281466c8b2f606084ac71339773efd177436e7
 github.com/opencontainers/image-spec v1.0.0
 github.com/opencontainers/runc master
 github.com/opencontainers/runtime-spec v1.0.0
@@ -55,10 +55,14 @@ github.com/xeipuuv/gojsonreference master
 github.com/xeipuuv/gojsonschema master
 golang.org/x/crypto master
 golang.org/x/net master
+golang.org/x/sync 42b317875d0fa942474b76e1b46a6060d720ae6e
 golang.org/x/sys master
 golang.org/x/text master
-gopkg.in/cheggaaa/pb.v1 v1.0.13
+gopkg.in/cheggaaa/pb.v1 v1.0.27
 gopkg.in/yaml.v2 cd8b52f8269e0feb286dfeef29f8fe4d5b397e0b
 k8s.io/apimachinery master
 k8s.io/client-go master
 k8s.io/kubernetes master
+github.com/klauspost/pgzip v1.2.1
+github.com/klauspost/compress v1.4.1
+github.com/klauspost/cpuid v1.2.0
diff --git a/vendor/github.com/containers/image/copy/copy.go b/vendor/github.com/containers/image/copy/copy.go
index 013080e8da5..89c7e580f88 100644
--- a/vendor/github.com/containers/image/copy/copy.go
+++ b/vendor/github.com/containers/image/copy/copy.go
@@ -2,7 +2,6 @@ package copy
 
 import (
 	"bytes"
-	"compress/gzip"
 	"context"
 	"fmt"
 	"io"
@@ -10,6 +9,7 @@ import (
 	"reflect"
 	"runtime"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/containers/image/image"
@@ -18,9 +18,11 @@ import (
 	"github.com/containers/image/signature"
 	"github.com/containers/image/transports"
 	"github.com/containers/image/types"
+	"github.com/klauspost/pgzip"
 	"github.com/opencontainers/go-digest"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
+	"golang.org/x/sync/semaphore"
 	pb "gopkg.in/cheggaaa/pb.v1"
 )
 
@@ -32,6 +34,10 @@ type digestingReader struct {
 	validationSucceeded bool
 }
 
+// maxParallelDownloads is used to limit the maxmimum number of parallel
+// downloads.  Let's follow Firefox by limiting it to 6.
+var maxParallelDownloads = 6
+
 // newDigestingReader returns an io.Reader implementation with contents of source, which will eventually return a non-EOF error
 // or set validationSucceeded/validationFailed to true if the source stream does/does not match expectedDigest.
 // (neither is set if EOF is never reached).
@@ -81,6 +87,7 @@ type copier struct {
 	progressInterval time.Duration
 	progress         chan types.ProgressProperties
 	blobInfoCache    types.BlobInfoCache
+	copyInParallel   bool
 }
 
 // imageCopier tracks state specific to a single image (possibly an item of a manifest list)
@@ -145,12 +152,14 @@ func Image(ctx context.Context, policyContext *signature.PolicyContext, destRef,
 		}
 	}()
 
+	copyInParallel := dest.HasThreadSafePutBlob() && rawSource.HasThreadSafeGetBlob()
 	c := &copier{
 		dest:             dest,
 		rawSource:        rawSource,
 		reportWriter:     reportWriter,
 		progressInterval: options.ProgressInterval,
 		progress:         options.Progress,
+		copyInParallel:   copyInParallel,
 		// FIXME? The cache is used for sources and destinations equally, but we only have a SourceCtx and DestinationCtx.
 		// For now, use DestinationCtx (because blob reuse changes the behavior of the destination side more); eventually
 		// we might want to add a separate CommonCtx — or would that be too confusing?
@@ -380,11 +389,26 @@ func (ic *imageCopier) updateEmbeddedDockerReference() error {
 	return nil
 }
 
+// shortDigest returns the first 12 characters of the digest.
+func shortDigest(d digest.Digest) string {
+	return d.Encoded()[:12]
+}
+
+// createProgressBar creates a pb.ProgressBar.
+func createProgressBar(srcInfo types.BlobInfo, kind string, writer io.Writer) *pb.ProgressBar {
+	bar := pb.New(int(srcInfo.Size)).SetUnits(pb.U_BYTES)
+	bar.SetMaxWidth(80)
+	bar.ShowTimeLeft = false
+	bar.ShowPercent = false
+	bar.Prefix(fmt.Sprintf("Copying %s %s:", kind, shortDigest(srcInfo.Digest)))
+	bar.Output = writer
+	return bar
+}
+
 // copyLayers copies layers from ic.src/ic.c.rawSource to dest, using and updating ic.manifestUpdates if necessary and ic.canModifyManifest.
 func (ic *imageCopier) copyLayers(ctx context.Context) error {
 	srcInfos := ic.src.LayerInfos()
-	destInfos := []types.BlobInfo{}
-	diffIDs := []digest.Digest{}
+	numLayers := len(srcInfos)
 	updatedSrcInfos, err := ic.src.LayerInfosForCopy(ctx)
 	if err != nil {
 		return err
@@ -397,30 +421,83 @@ func (ic *imageCopier) copyLayers(ctx context.Context) error {
 		srcInfos = updatedSrcInfos
 		srcInfosUpdated = true
 	}
-	for _, srcLayer := range srcInfos {
-		var (
-			destInfo types.BlobInfo
-			diffID   digest.Digest
-			err      error
-		)
+
+	type copyLayerData struct {
+		destInfo types.BlobInfo
+		diffID   digest.Digest
+		err      error
+	}
+
+	// copyGroup is used to determine if all layers are copied
+	copyGroup := sync.WaitGroup{}
+	copyGroup.Add(numLayers)
+	// copySemaphore is used to limit the number of parallel downloads to
+	// avoid malicious images causing troubles and to be nice to servers.
+	var copySemaphore *semaphore.Weighted
+	if ic.c.copyInParallel {
+		copySemaphore = semaphore.NewWeighted(int64(maxParallelDownloads))
+	} else {
+		copySemaphore = semaphore.NewWeighted(int64(1))
+	}
+
+	data := make([]copyLayerData, numLayers)
+	copyLayerHelper := func(index int, srcLayer types.BlobInfo, bar *pb.ProgressBar) {
+		defer bar.Finish()
+		defer copySemaphore.Release(1)
+		defer copyGroup.Done()
+		cld := copyLayerData{}
 		if ic.c.dest.AcceptsForeignLayerURLs() && len(srcLayer.URLs) != 0 {
 			// DiffIDs are, currently, needed only when converting from schema1.
 			// In which case src.LayerInfos will not have URLs because schema1
 			// does not support them.
 			if ic.diffIDsAreNeeded {
-				return errors.New("getting DiffID for foreign layers is unimplemented")
+				cld.err = errors.New("getting DiffID for foreign layers is unimplemented")
+				bar.Prefix(fmt.Sprintf("Skipping blob %s (DiffID foreign layer unimplemented):", shortDigest(srcLayer.Digest)))
+				bar.Finish()
+			} else {
+				cld.destInfo = srcLayer
+				logrus.Debugf("Skipping foreign layer %q copy to %s\n", cld.destInfo.Digest, ic.c.dest.Reference().Transport().Name())
+				bar.Prefix(fmt.Sprintf("Skipping blob %s (foreign layer):", shortDigest(srcLayer.Digest)))
+				bar.Add64(bar.Total)
+				bar.Finish()
 			}
-			destInfo = srcLayer
-			ic.c.Printf("Skipping foreign layer %q copy to %s\n", destInfo.Digest, ic.c.dest.Reference().Transport().Name())
 		} else {
-			destInfo, diffID, err = ic.copyLayer(ctx, srcLayer)
-			if err != nil {
-				return err
-			}
+			cld.destInfo, cld.diffID, cld.err = ic.copyLayer(ctx, srcLayer, bar)
 		}
-		destInfos = append(destInfos, destInfo)
-		diffIDs = append(diffIDs, diffID)
+		data[index] = cld
 	}
+
+	progressBars := make([]*pb.ProgressBar, numLayers)
+	for i, srcInfo := range srcInfos {
+		bar := createProgressBar(srcInfo, "blob", nil)
+		progressBars[i] = bar
+	}
+
+	progressPool := pb.NewPool(progressBars...)
+	progressPool.Output = ic.c.reportWriter
+	if err := progressPool.Start(); err != nil {
+		return errors.Wrapf(err, "error creating progress-bar pool")
+	}
+
+	for i, srcLayer := range srcInfos {
+		copySemaphore.Acquire(ctx, 1)
+		go copyLayerHelper(i, srcLayer, progressBars[i])
+	}
+
+	destInfos := make([]types.BlobInfo, numLayers)
+	diffIDs := make([]digest.Digest, numLayers)
+
+	copyGroup.Wait()
+	progressPool.Stop()
+
+	for i, cld := range data {
+		if cld.err != nil {
+			return cld.err
+		}
+		destInfos[i] = cld.destInfo
+		diffIDs[i] = cld.diffID
+	}
+
 	ic.manifestUpdates.InformationOnly.LayerInfos = destInfos
 	if ic.diffIDsAreNeeded {
 		ic.manifestUpdates.InformationOnly.LayerDiffIDs = diffIDs
@@ -487,12 +564,14 @@ func (ic *imageCopier) copyUpdatedConfigAndManifest(ctx context.Context) ([]byte
 func (c *copier) copyConfig(ctx context.Context, src types.Image) error {
 	srcInfo := src.ConfigInfo()
 	if srcInfo.Digest != "" {
-		c.Printf("Copying config %s\n", srcInfo.Digest)
 		configBlob, err := src.ConfigBlob(ctx)
 		if err != nil {
 			return errors.Wrapf(err, "Error reading config blob %s", srcInfo.Digest)
 		}
-		destInfo, err := c.copyBlobFromStream(ctx, bytes.NewReader(configBlob), srcInfo, nil, false, true)
+		bar := createProgressBar(srcInfo, "config", c.reportWriter)
+		defer bar.Finish()
+		bar.Start()
+		destInfo, err := c.copyBlobFromStream(ctx, bytes.NewReader(configBlob), srcInfo, nil, false, true, bar)
 		if err != nil {
 			return err
 		}
@@ -512,7 +591,7 @@ type diffIDResult struct {
 
 // copyLayer copies a layer with srcInfo (with known Digest and possibly known Size) in src to dest, perhaps compressing it if canCompress,
 // and returns a complete blobInfo of the copied layer, and a value for LayerDiffIDs if diffIDIsNeeded
-func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo) (types.BlobInfo, digest.Digest, error) {
+func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, bar *pb.ProgressBar) (types.BlobInfo, digest.Digest, error) {
 	cachedDiffID := ic.c.blobInfoCache.UncompressedDigest(srcInfo.Digest) // May be ""
 	diffIDIsNeeded := ic.diffIDsAreNeeded && cachedDiffID == ""
 
@@ -523,13 +602,14 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo) (t
 			return types.BlobInfo{}, "", errors.Wrapf(err, "Error trying to reuse blob %s at destination", srcInfo.Digest)
 		}
 		if reused {
-			ic.c.Printf("Skipping fetch of repeat blob %s\n", srcInfo.Digest)
+			bar.Prefix(fmt.Sprintf("Skipping blob %s (already present):", shortDigest(srcInfo.Digest)))
+			bar.Add64(bar.Total)
+			bar.Finish()
 			return blobInfo, cachedDiffID, nil
 		}
 	}
 
 	// Fallback: copy the layer, computing the diffID if we need to do so
-	ic.c.Printf("Copying blob %s\n", srcInfo.Digest)
 	srcStream, srcBlobSize, err := ic.c.rawSource.GetBlob(ctx, srcInfo, ic.c.blobInfoCache)
 	if err != nil {
 		return types.BlobInfo{}, "", errors.Wrapf(err, "Error reading blob %s", srcInfo.Digest)
@@ -537,7 +617,7 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo) (t
 	defer srcStream.Close()
 
 	blobInfo, diffIDChan, err := ic.copyLayerFromStream(ctx, srcStream, types.BlobInfo{Digest: srcInfo.Digest, Size: srcBlobSize},
-		diffIDIsNeeded)
+		diffIDIsNeeded, bar)
 	if err != nil {
 		return types.BlobInfo{}, "", err
 	}
@@ -565,7 +645,7 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo) (t
 // perhaps compressing the stream if canCompress,
 // and returns a complete blobInfo of the copied blob and perhaps a <-chan diffIDResult if diffIDIsNeeded, to be read by the caller.
 func (ic *imageCopier) copyLayerFromStream(ctx context.Context, srcStream io.Reader, srcInfo types.BlobInfo,
-	diffIDIsNeeded bool) (types.BlobInfo, <-chan diffIDResult, error) {
+	diffIDIsNeeded bool, bar *pb.ProgressBar) (types.BlobInfo, <-chan diffIDResult, error) {
 	var getDiffIDRecorder func(compression.DecompressorFunc) io.Writer // = nil
 	var diffIDChan chan diffIDResult
 
@@ -589,7 +669,7 @@ func (ic *imageCopier) copyLayerFromStream(ctx context.Context, srcStream io.Rea
 			return pipeWriter
 		}
 	}
-	blobInfo, err := ic.c.copyBlobFromStream(ctx, srcStream, srcInfo, getDiffIDRecorder, ic.canModifyManifest, false) // Sets err to nil on success
+	blobInfo, err := ic.c.copyBlobFromStream(ctx, srcStream, srcInfo, getDiffIDRecorder, ic.canModifyManifest, false, bar) // Sets err to nil on success
 	return blobInfo, diffIDChan, err
 	// We need the defer … pipeWriter.CloseWithError() to happen HERE so that the caller can block on reading from diffIDChan
 }
@@ -626,7 +706,7 @@ func computeDiffID(stream io.Reader, decompressor compression.DecompressorFunc)
 // and returns a complete blobInfo of the copied blob.
 func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, srcInfo types.BlobInfo,
 	getOriginalLayerCopyWriter func(decompressor compression.DecompressorFunc) io.Writer,
-	canModifyBlob bool, isConfig bool) (types.BlobInfo, error) {
+	canModifyBlob bool, isConfig bool, bar *pb.ProgressBar) (types.BlobInfo, error) {
 	// The copying happens through a pipeline of connected io.Readers.
 	// === Input: srcStream
 
@@ -649,16 +729,7 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
 		return types.BlobInfo{}, errors.Wrapf(err, "Error reading blob %s", srcInfo.Digest)
 	}
 	isCompressed := decompressor != nil
-
-	// === Report progress using a pb.Reader.
-	bar := pb.New(int(srcInfo.Size)).SetUnits(pb.U_BYTES)
-	bar.Output = c.reportWriter
-	bar.SetMaxWidth(80)
-	bar.ShowTimeLeft = false
-	bar.ShowPercent = false
-	bar.Start()
 	destStream = bar.NewProxyReader(destStream)
-	defer bar.Finish()
 
 	// === Send a copy of the original, uncompressed, stream, to a separate path if necessary.
 	var originalLayerReader io.Reader // DO NOT USE this other than to drain the input if no other consumer in the pipeline has done so.
@@ -761,7 +832,7 @@ func compressGoroutine(dest *io.PipeWriter, src io.Reader) {
 		dest.CloseWithError(err) // CloseWithError(nil) is equivalent to Close()
 	}()
 
-	zipper := gzip.NewWriter(dest)
+	zipper := pgzip.NewWriter(dest)
 	defer zipper.Close()
 
 	_, err = io.Copy(zipper, src) // Sets err to nil, i.e. causes dest.Close()
diff --git a/vendor/github.com/containers/image/directory/directory_dest.go b/vendor/github.com/containers/image/directory/directory_dest.go
index d75c195b2dd..4b2ab022e24 100644
--- a/vendor/github.com/containers/image/directory/directory_dest.go
+++ b/vendor/github.com/containers/image/directory/directory_dest.go
@@ -124,6 +124,11 @@ func (d *dirImageDestination) IgnoresEmbeddedDockerReference() bool {
 	return false // N/A, DockerReference() returns nil.
 }
 
+// HasThreadSafePutBlob indicates whether PutBlob can be executed concurrently.
+func (d *dirImageDestination) HasThreadSafePutBlob() bool {
+	return false
+}
+
 // PutBlob writes contents of stream and returns data representing the result (with all data filled in).
 // inputInfo.Digest can be optionally provided if known; it is not mandatory for the implementation to verify it.
 // inputInfo.Size is the expected length of stream, if known.
diff --git a/vendor/github.com/containers/image/directory/directory_src.go b/vendor/github.com/containers/image/directory/directory_src.go
index 3625def8059..59b625397b8 100644
--- a/vendor/github.com/containers/image/directory/directory_src.go
+++ b/vendor/github.com/containers/image/directory/directory_src.go
@@ -48,6 +48,11 @@ func (s *dirImageSource) GetManifest(ctx context.Context, instanceDigest *digest
 	return m, manifest.GuessMIMEType(m), err
 }
 
+// HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently.
+func (s *dirImageSource) HasThreadSafeGetBlob() bool {
+	return false
+}
+
 // GetBlob returns a stream for the specified blob, and the blob’s size (or -1 if unknown).
 // The Digest field in BlobInfo is guaranteed to be provided, Size may be -1 and MediaType may be optionally provided.
 // May update BlobInfoCache, preferably after it knows for certain that a blob truly exists at a specific location.
diff --git a/vendor/github.com/containers/image/docker/docker_client.go b/vendor/github.com/containers/image/docker/docker_client.go
index 7f55dbe7f88..23d2ac70fdb 100644
--- a/vendor/github.com/containers/image/docker/docker_client.go
+++ b/vendor/github.com/containers/image/docker/docker_client.go
@@ -13,6 +13,7 @@ import (
 	"path/filepath"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/containers/image/docker/reference"
@@ -84,6 +85,7 @@ type dockerClient struct {
 	registry              string
 	client                *http.Client
 	insecureSkipTLSVerify bool
+
 	// The following members are not set by newDockerClient and must be set by callers if needed.
 	username      string
 	password      string
@@ -95,8 +97,13 @@ type dockerClient struct {
 	scheme             string // Empty value also used to indicate detectProperties() has not yet succeeded.
 	challenges         []challenge
 	supportsSignatures bool
-	// Private state for setupRequestAuth
-	tokenCache map[string]bearerToken
+
+	// Private state for setupRequestAuth (key: string, value: bearerToken)
+	tokenCache sync.Map
+	// detectPropertiesError caches the initial error.
+	detectPropertiesError error
+	// detectPropertiesOnce is used to execuute detectProperties() at most once in in makeRequest().
+	detectPropertiesOnce sync.Once
 }
 
 type authScope struct {
@@ -262,7 +269,6 @@ func newDockerClient(sys *types.SystemContext, registry, reference string) (*doc
 		registry:              registry,
 		client:                &http.Client{Transport: tr},
 		insecureSkipTLSVerify: skipVerify,
-		tokenCache:            map[string]bearerToken{},
 	}, nil
 }
 
@@ -473,14 +479,18 @@ func (c *dockerClient) setupRequestAuth(req *http.Request) error {
 				cacheKey = fmt.Sprintf("%s:%s", c.extraScope.remoteName, c.extraScope.actions)
 				scopes = append(scopes, *c.extraScope)
 			}
-			token, ok := c.tokenCache[cacheKey]
-			if !ok || time.Now().After(token.expirationTime) {
+			var token bearerToken
+			t, inCache := c.tokenCache.Load(cacheKey)
+			if inCache {
+				token = t.(bearerToken)
+			}
+			if !inCache || time.Now().After(token.expirationTime) {
 				t, err := c.getBearerToken(req.Context(), challenge, scopes)
 				if err != nil {
 					return err
 				}
 				token = *t
-				c.tokenCache[cacheKey] = token
+				c.tokenCache.Store(cacheKey, token)
 			}
 			req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token.Token))
 			return nil
@@ -545,9 +555,9 @@ func (c *dockerClient) getBearerToken(ctx context.Context, challenge challenge,
 	return newBearerTokenFromJSONBlob(tokenBlob)
 }
 
-// detectProperties detects various properties of the registry.
-// See the dockerClient documentation for members which are affected by this.
-func (c *dockerClient) detectProperties(ctx context.Context) error {
+// detectPropertiesHelper performs the work of detectProperties which executes
+// it at most once.
+func (c *dockerClient) detectPropertiesHelper(ctx context.Context) error {
 	if c.scheme != "" {
 		return nil
 	}
@@ -604,6 +614,13 @@ func (c *dockerClient) detectProperties(ctx context.Context) error {
 	return err
 }
 
+// detectProperties detects various properties of the registry.
+// See the dockerClient documentation for members which are affected by this.
+func (c *dockerClient) detectProperties(ctx context.Context) error {
+	c.detectPropertiesOnce.Do(func() { c.detectPropertiesError = c.detectPropertiesHelper(ctx) })
+	return c.detectPropertiesError
+}
+
 // getExtensionsSignatures returns signatures from the X-Registry-Supports-Signatures API extension,
 // using the original data structures.
 func (c *dockerClient) getExtensionsSignatures(ctx context.Context, ref dockerReference, manifestDigest digest.Digest) (*extensionSignatureList, error) {
diff --git a/vendor/github.com/containers/image/docker/docker_image_dest.go b/vendor/github.com/containers/image/docker/docker_image_dest.go
index 2f471f64840..5b006c66bd5 100644
--- a/vendor/github.com/containers/image/docker/docker_image_dest.go
+++ b/vendor/github.com/containers/image/docker/docker_image_dest.go
@@ -111,6 +111,11 @@ func (c *sizeCounter) Write(p []byte) (n int, err error) {
 	return len(p), nil
 }
 
+// HasThreadSafePutBlob indicates whether PutBlob can be executed concurrently.
+func (d *dockerImageDestination) HasThreadSafePutBlob() bool {
+	return true
+}
+
 // PutBlob writes contents of stream and returns data representing the result (with all data filled in).
 // inputInfo.Digest can be optionally provided if known; it is not mandatory for the implementation to verify it.
 // inputInfo.Size is the expected length of stream, if known.
diff --git a/vendor/github.com/containers/image/docker/docker_image_src.go b/vendor/github.com/containers/image/docker/docker_image_src.go
index fbed6297f68..c88ff2f3462 100644
--- a/vendor/github.com/containers/image/docker/docker_image_src.go
+++ b/vendor/github.com/containers/image/docker/docker_image_src.go
@@ -161,6 +161,11 @@ func getBlobSize(resp *http.Response) int64 {
 	return size
 }
 
+// HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently.
+func (s *dockerImageSource) HasThreadSafeGetBlob() bool {
+	return true
+}
+
 // GetBlob returns a stream for the specified blob, and the blob’s size (or -1 if unknown).
 // The Digest field in BlobInfo is guaranteed to be provided, Size may be -1 and MediaType may be optionally provided.
 // May update BlobInfoCache, preferably after it knows for certain that a blob truly exists at a specific location.
diff --git a/vendor/github.com/containers/image/docker/tarfile/dest.go b/vendor/github.com/containers/image/docker/tarfile/dest.go
index ad8a48a0355..5f30eddbc72 100644
--- a/vendor/github.com/containers/image/docker/tarfile/dest.go
+++ b/vendor/github.com/containers/image/docker/tarfile/dest.go
@@ -82,6 +82,11 @@ func (d *Destination) IgnoresEmbeddedDockerReference() bool {
 	return false // N/A, we only accept schema2 images where EmbeddedDockerReferenceConflicts() is always false.
 }
 
+// HasThreadSafePutBlob indicates whether PutBlob can be executed concurrently.
+func (d *Destination) HasThreadSafePutBlob() bool {
+	return false
+}
+
 // PutBlob writes contents of stream and returns data representing the result (with all data filled in).
 // inputInfo.Digest can be optionally provided if known; it is not mandatory for the implementation to verify it.
 // inputInfo.Size is the expected length of stream, if known.
diff --git a/vendor/github.com/containers/image/docker/tarfile/src.go b/vendor/github.com/containers/image/docker/tarfile/src.go
index d94ed97831f..889e5f8e8d9 100644
--- a/vendor/github.com/containers/image/docker/tarfile/src.go
+++ b/vendor/github.com/containers/image/docker/tarfile/src.go
@@ -397,6 +397,11 @@ func (r uncompressedReadCloser) Close() error {
 	return res
 }
 
+// HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently.
+func (s *Source) HasThreadSafeGetBlob() bool {
+	return false
+}
+
 // GetBlob returns a stream for the specified blob, and the blob’s size (or -1 if unknown).
 // The Digest field in BlobInfo is guaranteed to be provided, Size may be -1 and MediaType may be optionally provided.
 // May update BlobInfoCache, preferably after it knows for certain that a blob truly exists at a specific location.
diff --git a/vendor/github.com/containers/image/oci/archive/oci_dest.go b/vendor/github.com/containers/image/oci/archive/oci_dest.go
index 3997ac2eef9..9571c37e2ba 100644
--- a/vendor/github.com/containers/image/oci/archive/oci_dest.go
+++ b/vendor/github.com/containers/image/oci/archive/oci_dest.go
@@ -77,6 +77,11 @@ func (d *ociArchiveImageDestination) IgnoresEmbeddedDockerReference() bool {
 	return d.unpackedDest.IgnoresEmbeddedDockerReference()
 }
 
+// HasThreadSafePutBlob indicates whether PutBlob can be executed concurrently.
+func (d *ociArchiveImageDestination) HasThreadSafePutBlob() bool {
+	return false
+}
+
 // PutBlob writes contents of stream and returns data representing the result.
 // inputInfo.Digest can be optionally provided if known; it is not mandatory for the implementation to verify it.
 // inputInfo.Size is the expected length of stream, if known.
diff --git a/vendor/github.com/containers/image/oci/archive/oci_src.go b/vendor/github.com/containers/image/oci/archive/oci_src.go
index 084d818f72e..ca74f950bfa 100644
--- a/vendor/github.com/containers/image/oci/archive/oci_src.go
+++ b/vendor/github.com/containers/image/oci/archive/oci_src.go
@@ -76,6 +76,11 @@ func (s *ociArchiveImageSource) GetManifest(ctx context.Context, instanceDigest
 	return s.unpackedSrc.GetManifest(ctx, instanceDigest)
 }
 
+// HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently.
+func (s *ociArchiveImageSource) HasThreadSafeGetBlob() bool {
+	return false
+}
+
 // GetBlob returns a stream for the specified blob, and the blob’s size (or -1 if unknown).
 // The Digest field in BlobInfo is guaranteed to be provided, Size may be -1 and MediaType may be optionally provided.
 // May update BlobInfoCache, preferably after it knows for certain that a blob truly exists at a specific location.
diff --git a/vendor/github.com/containers/image/oci/layout/oci_dest.go b/vendor/github.com/containers/image/oci/layout/oci_dest.go
index b5a6e08f87d..db102184dbc 100644
--- a/vendor/github.com/containers/image/oci/layout/oci_dest.go
+++ b/vendor/github.com/containers/image/oci/layout/oci_dest.go
@@ -107,6 +107,11 @@ func (d *ociImageDestination) IgnoresEmbeddedDockerReference() bool {
 	return false // N/A, DockerReference() returns nil.
 }
 
+// HasThreadSafePutBlob indicates whether PutBlob can be executed concurrently.
+func (d *ociImageDestination) HasThreadSafePutBlob() bool {
+	return false
+}
+
 // PutBlob writes contents of stream and returns data representing the result.
 // inputInfo.Digest can be optionally provided if known; it is not mandatory for the implementation to verify it.
 // inputInfo.Size is the expected length of stream, if known.
diff --git a/vendor/github.com/containers/image/oci/layout/oci_src.go b/vendor/github.com/containers/image/oci/layout/oci_src.go
index 086a7040de0..cc536f69ef7 100644
--- a/vendor/github.com/containers/image/oci/layout/oci_src.go
+++ b/vendor/github.com/containers/image/oci/layout/oci_src.go
@@ -92,6 +92,11 @@ func (s *ociImageSource) GetManifest(ctx context.Context, instanceDigest *digest
 	return m, mimeType, nil
 }
 
+// HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently.
+func (s *ociImageSource) HasThreadSafeGetBlob() bool {
+	return false
+}
+
 // GetBlob returns a stream for the specified blob, and the blob’s size (or -1 if unknown).
 // The Digest field in BlobInfo is guaranteed to be provided, Size may be -1 and MediaType may be optionally provided.
 // May update BlobInfoCache, preferably after it knows for certain that a blob truly exists at a specific location.
diff --git a/vendor/github.com/containers/image/openshift/openshift.go b/vendor/github.com/containers/image/openshift/openshift.go
index 0cce1e6c704..814c3eea1de 100644
--- a/vendor/github.com/containers/image/openshift/openshift.go
+++ b/vendor/github.com/containers/image/openshift/openshift.go
@@ -211,6 +211,11 @@ func (s *openshiftImageSource) GetManifest(ctx context.Context, instanceDigest *
 	return s.docker.GetManifest(ctx, instanceDigest)
 }
 
+// HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently.
+func (s *openshiftImageSource) HasThreadSafeGetBlob() bool {
+	return false
+}
+
 // GetBlob returns a stream for the specified blob, and the blob’s size (or -1 if unknown).
 // The Digest field in BlobInfo is guaranteed to be provided, Size may be -1 and MediaType may be optionally provided.
 // May update BlobInfoCache, preferably after it knows for certain that a blob truly exists at a specific location.
@@ -378,6 +383,11 @@ func (d *openshiftImageDestination) IgnoresEmbeddedDockerReference() bool {
 	return d.docker.IgnoresEmbeddedDockerReference()
 }
 
+// HasThreadSafePutBlob indicates whether PutBlob can be executed concurrently.
+func (d *openshiftImageDestination) HasThreadSafePutBlob() bool {
+	return false
+}
+
 // PutBlob writes contents of stream and returns data representing the result (with all data filled in).
 // inputInfo.Digest can be optionally provided if known; it is not mandatory for the implementation to verify it.
 // inputInfo.Size is the expected length of stream, if known.
diff --git a/vendor/github.com/containers/image/ostree/ostree_dest.go b/vendor/github.com/containers/image/ostree/ostree_dest.go
index 06489894835..d69f4fa331a 100644
--- a/vendor/github.com/containers/image/ostree/ostree_dest.go
+++ b/vendor/github.com/containers/image/ostree/ostree_dest.go
@@ -4,7 +4,6 @@ package ostree
 
 import (
 	"bytes"
-	"compress/gzip"
 	"context"
 	"encoding/base64"
 	"encoding/json"
@@ -24,6 +23,7 @@ import (
 	"github.com/containers/image/manifest"
 	"github.com/containers/image/types"
 	"github.com/containers/storage/pkg/archive"
+	"github.com/klauspost/pgzip"
 	"github.com/opencontainers/go-digest"
 	selinux "github.com/opencontainers/selinux/go-selinux"
 	"github.com/ostreedev/ostree-go/pkg/otbuiltin"
@@ -132,6 +132,11 @@ func (d *ostreeImageDestination) IgnoresEmbeddedDockerReference() bool {
 	return false // N/A, DockerReference() returns nil.
 }
 
+// HasThreadSafePutBlob indicates whether PutBlob can be executed concurrently.
+func (d *ostreeImageDestination) HasThreadSafePutBlob() bool {
+	return false
+}
+
 // PutBlob writes contents of stream and returns data representing the result.
 // inputInfo.Digest can be optionally provided if known; it is not mandatory for the implementation to verify it.
 // inputInfo.Size is the expected length of stream, if known.
@@ -249,7 +254,7 @@ func (d *ostreeImageDestination) ostreeCommit(repo *otbuiltin.Repo, branch strin
 }
 
 func generateTarSplitMetadata(output *bytes.Buffer, file string) (digest.Digest, int64, error) {
-	mfz := gzip.NewWriter(output)
+	mfz := pgzip.NewWriter(output)
 	defer mfz.Close()
 	metaPacker := storage.NewJSONPacker(mfz)
 
diff --git a/vendor/github.com/containers/image/ostree/ostree_src.go b/vendor/github.com/containers/image/ostree/ostree_src.go
index e73cae19861..df432c9f3ad 100644
--- a/vendor/github.com/containers/image/ostree/ostree_src.go
+++ b/vendor/github.com/containers/image/ostree/ostree_src.go
@@ -4,7 +4,6 @@ package ostree
 
 import (
 	"bytes"
-	"compress/gzip"
 	"context"
 	"encoding/base64"
 	"fmt"
@@ -17,6 +16,7 @@ import (
 	"github.com/containers/image/manifest"
 	"github.com/containers/image/types"
 	"github.com/containers/storage/pkg/ioutils"
+	"github.com/klauspost/pgzip"
 	"github.com/opencontainers/go-digest"
 	glib "github.com/ostreedev/ostree-go/pkg/glibobject"
 	"github.com/pkg/errors"
@@ -255,6 +255,11 @@ func (s *ostreeImageSource) readSingleFile(commit, path string) (io.ReadCloser,
 	return getter.Get(path)
 }
 
+// HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently.
+func (s *ostreeImageSource) HasThreadSafeGetBlob() bool {
+	return false
+}
+
 // GetBlob returns a stream for the specified blob, and the blob’s size (or -1 if unknown).
 // The Digest field in BlobInfo is guaranteed to be provided, Size may be -1 and MediaType may be optionally provided.
 // May update BlobInfoCache, preferably after it knows for certain that a blob truly exists at a specific location.
@@ -304,7 +309,7 @@ func (s *ostreeImageSource) GetBlob(ctx context.Context, info types.BlobInfo, ca
 	}
 
 	mf := bytes.NewReader(tarsplit)
-	mfz, err := gzip.NewReader(mf)
+	mfz, err := pgzip.NewReader(mf)
 	if err != nil {
 		return nil, 0, err
 	}
diff --git a/vendor/github.com/containers/image/pkg/blobinfocache/default.go b/vendor/github.com/containers/image/pkg/blobinfocache/default.go
index 6da9f2805db..459ae5c0664 100644
--- a/vendor/github.com/containers/image/pkg/blobinfocache/default.go
+++ b/vendor/github.com/containers/image/pkg/blobinfocache/default.go
@@ -54,7 +54,7 @@ func DefaultCache(sys *types.SystemContext) types.BlobInfoCache {
 	}
 	path := filepath.Join(dir, blobInfoCacheFilename)
 	if err := os.MkdirAll(dir, 0700); err != nil {
-		logrus.Debugf("Error creating parent directories for %s, using a memory-only cache: %v", err)
+		logrus.Debugf("Error creating parent directories for %s, using a memory-only cache: %v", blobInfoCacheFilename, err)
 		return NewMemoryCache()
 	}
 
diff --git a/vendor/github.com/containers/image/pkg/compression/compression.go b/vendor/github.com/containers/image/pkg/compression/compression.go
index a39523e4f4b..aad2bfcf269 100644
--- a/vendor/github.com/containers/image/pkg/compression/compression.go
+++ b/vendor/github.com/containers/image/pkg/compression/compression.go
@@ -3,10 +3,10 @@ package compression
 import (
 	"bytes"
 	"compress/bzip2"
-	"compress/gzip"
 	"io"
 	"io/ioutil"
 
+	"github.com/klauspost/pgzip"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 	"github.com/ulikunitz/xz"
@@ -18,7 +18,7 @@ type DecompressorFunc func(io.Reader) (io.ReadCloser, error)
 
 // GzipDecompressor is a DecompressorFunc for the gzip compression algorithm.
 func GzipDecompressor(r io.Reader) (io.ReadCloser, error) {
-	return gzip.NewReader(r)
+	return pgzip.NewReader(r)
 }
 
 // Bzip2Decompressor is a DecompressorFunc for the bzip2 compression algorithm.
diff --git a/vendor/github.com/containers/image/pkg/sysregistriesv2/system_registries_v2.go b/vendor/github.com/containers/image/pkg/sysregistriesv2/system_registries_v2.go
index afc7312d1d4..9e3e9cfe19a 100644
--- a/vendor/github.com/containers/image/pkg/sysregistriesv2/system_registries_v2.go
+++ b/vendor/github.com/containers/image/pkg/sysregistriesv2/system_registries_v2.go
@@ -318,8 +318,37 @@ func FindUnqualifiedSearchRegistries(ctx *types.SystemContext) ([]Registry, erro
 	return unqualified, nil
 }
 
-// FindRegistry returns the Registry with the longest prefix for ref.  If no
-// Registry prefixes the image, nil is returned.
+// refMatchesPrefix returns true iff ref,
+// which is a registry, repository namespace, repository or image reference (as formatted by
+// reference.Domain(), reference.Named.Name() or reference.Reference.String()
+// — note that this requires the name to start with an explicit hostname!),
+// matches a Registry.Prefix value.
+// (This is split from the caller primarily to make testing easier.)
+func refMatchesPrefix(ref, prefix string) bool {
+	switch {
+	case len(ref) < len(prefix):
+		return false
+	case len(ref) == len(prefix):
+		return ref == prefix
+	case len(ref) > len(prefix):
+		if !strings.HasPrefix(ref, prefix) {
+			return false
+		}
+		c := ref[len(prefix)]
+		// This allows "example.com:5000" to match "example.com",
+		// which is unintended; that will get fixed eventually, DON'T RELY
+		// ON THE CURRENT BEHAVIOR.
+		return c == ':' || c == '/' || c == '@'
+	default:
+		panic("Internal error: impossible comparison outcome")
+	}
+}
+
+// FindRegistry returns the Registry with the longest prefix for ref,
+// which is a registry, repository namespace repository or image reference (as formatted by
+// reference.Domain(), reference.Named.Name() or reference.Reference.String()
+// — note that this requires the name to start with an explicit hostname!).
+// If no Registry prefixes the image, nil is returned.
 func FindRegistry(ctx *types.SystemContext, ref string) (*Registry, error) {
 	registries, err := GetRegistries(ctx)
 	if err != nil {
@@ -329,7 +358,7 @@ func FindRegistry(ctx *types.SystemContext, ref string) (*Registry, error) {
 	reg := Registry{}
 	prefixLen := 0
 	for _, r := range registries {
-		if strings.HasPrefix(ref, r.Prefix+"/") || ref == r.Prefix {
+		if refMatchesPrefix(ref, r.Prefix) {
 			length := len(r.Prefix)
 			if length > prefixLen {
 				reg = r
diff --git a/vendor/github.com/containers/image/storage/storage_image.go b/vendor/github.com/containers/image/storage/storage_image.go
index bd6813119e7..b53fbdf6e79 100644
--- a/vendor/github.com/containers/image/storage/storage_image.go
+++ b/vendor/github.com/containers/image/storage/storage_image.go
@@ -11,6 +11,7 @@ import (
 	"io/ioutil"
 	"os"
 	"path/filepath"
+	"sync"
 	"sync/atomic"
 
 	"github.com/containers/image/image"
@@ -47,6 +48,7 @@ type storageImageSource struct {
 	image          *storage.Image
 	layerPosition  map[digest.Digest]int // Where we are in reading a blob's layers
 	cachedManifest []byte                // A cached copy of the manifest, if already known, or nil
+	getBlobMutex   sync.Mutex            // Mutex to sync state for parallel GetBlob executions
 	SignatureSizes []int                 `json:"signature-sizes,omitempty"` // List of sizes of each signature slice
 }
 
@@ -56,6 +58,7 @@ type storageImageDestination struct {
 	nextTempFileID int32                           // A counter that we use for computing filenames to assign to blobs
 	manifest       []byte                          // Manifest contents, temporary
 	signatures     []byte                          // Signature contents, temporary
+	putBlobMutex   sync.Mutex                      // Mutex to sync state for parallel PutBlob executions
 	blobDiffIDs    map[digest.Digest]digest.Digest // Mapping from layer blobsums to their corresponding DiffIDs
 	fileSizes      map[digest.Digest]int64         // Mapping from layer blobsums to their sizes
 	filenames      map[digest.Digest]string        // Mapping from layer blobsums to names of files we used to hold them
@@ -91,15 +94,20 @@ func newImageSource(imageRef storageReference) (*storageImageSource, error) {
 }
 
 // Reference returns the image reference that we used to find this image.
-func (s storageImageSource) Reference() types.ImageReference {
+func (s *storageImageSource) Reference() types.ImageReference {
 	return s.imageRef
 }
 
 // Close cleans up any resources we tied up while reading the image.
-func (s storageImageSource) Close() error {
+func (s *storageImageSource) Close() error {
 	return nil
 }
 
+// HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently.
+func (s *storageImageSource) HasThreadSafeGetBlob() bool {
+	return true
+}
+
 // GetBlob returns a stream for the specified blob, and the blob’s size (or -1 if unknown).
 // The Digest field in BlobInfo is guaranteed to be provided, Size may be -1 and MediaType may be optionally provided.
 // May update BlobInfoCache, preferably after it knows for certain that a blob truly exists at a specific location.
@@ -137,8 +145,10 @@ func (s *storageImageSource) getBlobAndLayerID(info types.BlobInfo) (rc io.ReadC
 	// Step through the list of matching layers.  Tests may want to verify that if we have multiple layers
 	// which claim to have the same contents, that we actually do have multiple layers, otherwise we could
 	// just go ahead and use the first one every time.
+	s.getBlobMutex.Lock()
 	i := s.layerPosition[info.Digest]
 	s.layerPosition[info.Digest] = i + 1
+	s.getBlobMutex.Unlock()
 	if len(layers) > 0 {
 		layer = layers[i%len(layers)]
 	}
@@ -300,7 +310,7 @@ func newImageDestination(imageRef storageReference) (*storageImageDestination, e
 
 // Reference returns the reference used to set up this destination.  Note that this should directly correspond to user's intent,
 // e.g. it should use the public hostname instead of the result of resolving CNAMEs or following redirects.
-func (s storageImageDestination) Reference() types.ImageReference {
+func (s *storageImageDestination) Reference() types.ImageReference {
 	return s.imageRef
 }
 
@@ -309,7 +319,7 @@ func (s *storageImageDestination) Close() error {
 	return os.RemoveAll(s.directory)
 }
 
-func (s storageImageDestination) DesiredLayerCompression() types.LayerCompression {
+func (s *storageImageDestination) DesiredLayerCompression() types.LayerCompression {
 	// We ultimately have to decompress layers to populate trees on disk,
 	// so callers shouldn't bother compressing them before handing them to
 	// us, if they're not already compressed.
@@ -320,6 +330,11 @@ func (s *storageImageDestination) computeNextBlobCacheFile() string {
 	return filepath.Join(s.directory, fmt.Sprintf("%d", atomic.AddInt32(&s.nextTempFileID, 1)))
 }
 
+// HasThreadSafePutBlob indicates whether PutBlob can be executed concurrently.
+func (s *storageImageDestination) HasThreadSafePutBlob() bool {
+	return true
+}
+
 // PutBlob writes contents of stream and returns data representing the result.
 // inputInfo.Digest can be optionally provided if known; it is not mandatory for the implementation to verify it.
 // inputInfo.Size is the expected length of stream, if known.
@@ -370,9 +385,11 @@ func (s *storageImageDestination) PutBlob(ctx context.Context, stream io.Reader,
 		return errorBlobInfo, ErrBlobSizeMismatch
 	}
 	// Record information about the blob.
+	s.putBlobMutex.Lock()
 	s.blobDiffIDs[hasher.Digest()] = diffID.Digest()
 	s.fileSizes[hasher.Digest()] = counter.Count
 	s.filenames[hasher.Digest()] = filename
+	s.putBlobMutex.Unlock()
 	blobDigest := blobinfo.Digest
 	if blobDigest.Validate() != nil {
 		blobDigest = hasher.Digest()
@@ -398,6 +415,9 @@ func (s *storageImageDestination) PutBlob(ctx context.Context, stream io.Reader,
 // If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure.
 // May use and/or update cache.
 func (s *storageImageDestination) TryReusingBlob(ctx context.Context, blobinfo types.BlobInfo, cache types.BlobInfoCache, canSubstitute bool) (bool, types.BlobInfo, error) {
+	// lock the entire method as it executes fairly quickly
+	s.putBlobMutex.Lock()
+	defer s.putBlobMutex.Unlock()
 	if blobinfo.Digest == "" {
 		return false, types.BlobInfo{}, errors.Errorf(`Can not check for a blob with unknown digest`)
 	}
diff --git a/vendor/github.com/containers/image/tarball/tarball_src.go b/vendor/github.com/containers/image/tarball/tarball_src.go
index ee963b8d871..76e3e755ff9 100644
--- a/vendor/github.com/containers/image/tarball/tarball_src.go
+++ b/vendor/github.com/containers/image/tarball/tarball_src.go
@@ -2,7 +2,6 @@ package tarball
 
 import (
 	"bytes"
-	"compress/gzip"
 	"context"
 	"encoding/json"
 	"fmt"
@@ -14,7 +13,7 @@ import (
 	"time"
 
 	"github.com/containers/image/types"
-
+	"github.com/klauspost/pgzip"
 	digest "github.com/opencontainers/go-digest"
 	imgspecs "github.com/opencontainers/image-spec/specs-go"
 	imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1"
@@ -77,7 +76,7 @@ func (r *tarballReference) NewImageSource(ctx context.Context, sys *types.System
 
 		// Set up to digest the file after we maybe decompress it.
 		diffIDdigester := digest.Canonical.Digester()
-		uncompressed, err := gzip.NewReader(reader)
+		uncompressed, err := pgzip.NewReader(reader)
 		if err == nil {
 			// It is compressed, so the diffID is the digest of the uncompressed version
 			reader = io.TeeReader(uncompressed, diffIDdigester.Hash())
@@ -207,6 +206,11 @@ func (is *tarballImageSource) Close() error {
 	return nil
 }
 
+// HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently.
+func (is *tarballImageSource) HasThreadSafeGetBlob() bool {
+	return false
+}
+
 // GetBlob returns a stream for the specified blob, and the blob’s size (or -1 if unknown).
 // The Digest field in BlobInfo is guaranteed to be provided, Size may be -1 and MediaType may be optionally provided.
 // May update BlobInfoCache, preferably after it knows for certain that a blob truly exists at a specific location.
diff --git a/vendor/github.com/containers/image/types/types.go b/vendor/github.com/containers/image/types/types.go
index dda33277639..9fdab2314a4 100644
--- a/vendor/github.com/containers/image/types/types.go
+++ b/vendor/github.com/containers/image/types/types.go
@@ -198,6 +198,8 @@ type ImageSource interface {
 	// The Digest field in BlobInfo is guaranteed to be provided, Size may be -1 and MediaType may be optionally provided.
 	// May update BlobInfoCache, preferably after it knows for certain that a blob truly exists at a specific location.
 	GetBlob(context.Context, BlobInfo, BlobInfoCache) (io.ReadCloser, int64, error)
+	// HasThreadSafeGetBlob indicates whether GetBlob can be executed concurrently.
+	HasThreadSafeGetBlob() bool
 	// GetSignatures returns the image's signatures.  It may use a remote (= slow) service.
 	// If instanceDigest is not nil, it contains a digest of the specific manifest instance to retrieve signatures for
 	// (when the primary manifest is a manifest list); this never happens if the primary manifest is not a manifest list
@@ -264,6 +266,8 @@ type ImageDestination interface {
 	// to any other readers for download using the supplied digest.
 	// If stream.Read() at any time, ESPECIALLY at end of input, returns an error, PutBlob MUST 1) fail, and 2) delete any data stored so far.
 	PutBlob(ctx context.Context, stream io.Reader, inputInfo BlobInfo, cache BlobInfoCache, isConfig bool) (BlobInfo, error)
+	// HasThreadSafePutBlob indicates whether PutBlob can be executed concurrently.
+	HasThreadSafePutBlob() bool
 	// TryReusingBlob checks whether the transport already contains, or can efficiently reuse, a blob, and if so, applies it to the current destination
 	// (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree).
 	// info.Digest must not be empty.
diff --git a/vendor/github.com/containers/image/vendor.conf b/vendor/github.com/containers/image/vendor.conf
index 88537981a29..bbbb1a45844 100644
--- a/vendor/github.com/containers/image/vendor.conf
+++ b/vendor/github.com/containers/image/vendor.conf
@@ -16,7 +16,7 @@ github.com/imdario/mergo 50d4dbd4eb0e84778abe37cefef140271d96fade
 github.com/mattn/go-runewidth 14207d285c6c197daabb5c9793d63e7af9ab2d50
 github.com/mistifyio/go-zfs c0224de804d438efd11ea6e52ada8014537d6062
 github.com/mtrmac/gpgme b2432428689ca58c2b8e8dea9449d3295cf96fc9
-github.com/opencontainers/go-digest aa2ec055abd10d26d539eb630a92241b781ce4bc
+github.com/opencontainers/go-digest c9281466c8b2f606084ac71339773efd177436e7
 github.com/opencontainers/image-spec v1.0.0
 github.com/opencontainers/runc 6b1d0e76f239ffb435445e5ae316d2676c07c6e3
 github.com/pborman/uuid 1b00554d822231195d1babd97ff4a781231955c9
@@ -26,8 +26,9 @@ github.com/stretchr/testify 4d4bfba8f1d1027c4fdbe371823030df51419987
 github.com/vbatts/tar-split v0.10.2
 golang.org/x/crypto 453249f01cfeb54c3d549ddb75ff152ca243f9d8
 golang.org/x/net 6b27048ae5e6ad1ef927e72e437531493de612fe
+golang.org/x/sync 42b317875d0fa942474b76e1b46a6060d720ae6e
 golang.org/x/sys 43e60d72a8e2bd92ee98319ba9a384a0e9837c08
-gopkg.in/cheggaaa/pb.v1 d7e6ca3010b6f084d8056847f55d7f572f180678
+gopkg.in/cheggaaa/pb.v1 v1.0.27
 gopkg.in/yaml.v2 a3f3340b5840cee44f372bddb5880fcbc419b46a
 k8s.io/client-go bcde30fb7eaed76fd98a36b4120321b94995ffb6
 github.com/xeipuuv/gojsonschema master
@@ -36,7 +37,7 @@ github.com/xeipuuv/gojsonpointer master
 github.com/tchap/go-patricia v2.2.6
 github.com/opencontainers/selinux 077c8b6d1c18456fb7c792bc0de52295a0d1900e
 github.com/BurntSushi/toml b26d9c308763d68093482582cea63d69be07a0f0
-github.com/ostreedev/ostree-go aeb02c6b6aa2889db3ef62f7855650755befd460
+github.com/ostreedev/ostree-go 56f3a639dbc0f2f5051c6d52dade28a882ba78ce
 github.com/gogo/protobuf fcdc5011193ff531a548e9b0301828d5a5b97fd8
 github.com/pquerna/ffjson master
 github.com/syndtr/gocapability master
@@ -44,3 +45,6 @@ github.com/Microsoft/go-winio ab35fc04b6365e8fcb18e6e9e41ea4a02b10b175
 github.com/Microsoft/hcsshim eca7177590cdcbd25bbc5df27e3b693a54b53a6a
 github.com/ulikunitz/xz v0.5.4
 github.com/boltdb/bolt master
+github.com/klauspost/pgzip v1.2.1
+github.com/klauspost/compress v1.4.1
+github.com/klauspost/cpuid v1.2.0
diff --git a/vendor/github.com/containers/storage/drivers/devmapper/device_setup.go b/vendor/github.com/containers/storage/drivers/devmapper/device_setup.go
index 53aa217c9c0..58abca4772e 100644
--- a/vendor/github.com/containers/storage/drivers/devmapper/device_setup.go
+++ b/vendor/github.com/containers/storage/drivers/devmapper/device_setup.go
@@ -26,7 +26,7 @@ type directLVMConfig struct {
 var (
 	errThinpPercentMissing = errors.New("must set both `dm.thinp_percent` and `dm.thinp_metapercent` if either is specified")
 	errThinpPercentTooBig  = errors.New("combined `dm.thinp_percent` and `dm.thinp_metapercent` must not be greater than 100")
-	errMissingSetupDevice  = errors.New("must provide device path in `dm.setup_device` in order to configure direct-lvm")
+	errMissingSetupDevice  = errors.New("must provide device path in `dm.directlvm_device` in order to configure direct-lvm")
 )
 
 func validateLVMConfig(cfg directLVMConfig) error {
diff --git a/vendor/github.com/containers/storage/layers.go b/vendor/github.com/containers/storage/layers.go
index 0b532eb7716..299d2f81880 100644
--- a/vendor/github.com/containers/storage/layers.go
+++ b/vendor/github.com/containers/storage/layers.go
@@ -2,7 +2,6 @@ package storage
 
 import (
 	"bytes"
-	"compress/gzip"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -20,6 +19,7 @@ import (
 	"github.com/containers/storage/pkg/stringid"
 	"github.com/containers/storage/pkg/system"
 	"github.com/containers/storage/pkg/truncindex"
+	"github.com/klauspost/pgzip"
 	digest "github.com/opencontainers/go-digest"
 	"github.com/opencontainers/selinux/go-selinux/label"
 	"github.com/pkg/errors"
@@ -1055,7 +1055,7 @@ func (r *layerStore) Diff(from, to string, options *DiffOptions) (io.ReadCloser,
 	}
 	defer tsfile.Close()
 
-	decompressor, err := gzip.NewReader(tsfile)
+	decompressor, err := pgzip.NewReader(tsfile)
 	if err != nil {
 		return nil, err
 	}
@@ -1116,9 +1116,9 @@ func (r *layerStore) ApplyDiff(to string, diff io.Reader) (size int64, err error
 	defragmented := io.TeeReader(io.MultiReader(bytes.NewBuffer(header[:n]), diff), compressedCounter)
 
 	tsdata := bytes.Buffer{}
-	compressor, err := gzip.NewWriterLevel(&tsdata, gzip.BestSpeed)
+	compressor, err := pgzip.NewWriterLevel(&tsdata, pgzip.BestSpeed)
 	if err != nil {
-		compressor = gzip.NewWriter(&tsdata)
+		compressor = pgzip.NewWriter(&tsdata)
 	}
 	metadata := storage.NewJSONPacker(compressor)
 	uncompressed, err := archive.DecompressStream(defragmented)
diff --git a/vendor/github.com/containers/storage/pkg/archive/archive.go b/vendor/github.com/containers/storage/pkg/archive/archive.go
index 8d6eaacf35f..228d8bb8272 100644
--- a/vendor/github.com/containers/storage/pkg/archive/archive.go
+++ b/vendor/github.com/containers/storage/pkg/archive/archive.go
@@ -5,7 +5,6 @@ import (
 	"bufio"
 	"bytes"
 	"compress/bzip2"
-	"compress/gzip"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -22,6 +21,7 @@ import (
 	"github.com/containers/storage/pkg/pools"
 	"github.com/containers/storage/pkg/promise"
 	"github.com/containers/storage/pkg/system"
+	gzip "github.com/klauspost/pgzip"
 	rsystem "github.com/opencontainers/runc/libcontainer/system"
 	"github.com/sirupsen/logrus"
 )
@@ -499,6 +499,8 @@ func (ta *tarAppender) addTarFile(path, name string) error {
 		hdr.Gid = ta.ChownOpts.GID
 	}
 
+	maybeTruncateHeaderModTime(hdr)
+
 	if ta.WhiteoutConverter != nil {
 		wo, err := ta.WhiteoutConverter.ConvertWrite(hdr, path, fi)
 		if err != nil {
@@ -640,11 +642,12 @@ func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, L
 	var errors []string
 	for key, value := range hdr.Xattrs {
 		if err := system.Lsetxattr(path, key, []byte(value), 0); err != nil {
-			if err == syscall.ENOTSUP {
+			if err == syscall.ENOTSUP || (err == syscall.EPERM && inUserns) {
 				// We ignore errors here because not all graphdrivers support
 				// xattrs *cough* old versions of AUFS *cough*. However only
 				// ENOTSUP should be emitted in that case, otherwise we still
-				// bail.
+				// bail.  We also ignore EPERM errors if we are running in a
+				// user namespace.
 				errors = append(errors, err.Error())
 				continue
 			}
diff --git a/vendor/github.com/containers/storage/pkg/archive/archive_110.go b/vendor/github.com/containers/storage/pkg/archive/archive_110.go
index 22b8b48cc10..7bc44a5665e 100644
--- a/vendor/github.com/containers/storage/pkg/archive/archive_110.go
+++ b/vendor/github.com/containers/storage/pkg/archive/archive_110.go
@@ -4,8 +4,19 @@ package archive
 
 import (
 	"archive/tar"
+	"time"
 )
 
 func copyPassHeader(hdr *tar.Header) {
 	hdr.Format = tar.FormatPAX
 }
+
+func maybeTruncateHeaderModTime(hdr *tar.Header) {
+	if hdr.Format == tar.FormatUnknown {
+		// one of the first things archive/tar does is round this
+		// value, possibly up, if the format isn't specified, while we
+		// are much better equipped to handle truncation when scanning
+		// for changes between source and an extracted copy of this
+		hdr.ModTime = hdr.ModTime.Truncate(time.Second)
+	}
+}
diff --git a/vendor/github.com/containers/storage/pkg/archive/archive_19.go b/vendor/github.com/containers/storage/pkg/archive/archive_19.go
index d10d595fa8b..d19811fdbca 100644
--- a/vendor/github.com/containers/storage/pkg/archive/archive_19.go
+++ b/vendor/github.com/containers/storage/pkg/archive/archive_19.go
@@ -8,3 +8,6 @@ import (
 
 func copyPassHeader(hdr *tar.Header) {
 }
+
+func maybeTruncateHeaderModTime(hdr *tar.Header) {
+}
diff --git a/vendor/github.com/containers/storage/pkg/archive/archive_ffjson.go b/vendor/github.com/containers/storage/pkg/archive/archive_ffjson.go
index 9b8103e4da5..2ba44b85d73 100644
--- a/vendor/github.com/containers/storage/pkg/archive/archive_ffjson.go
+++ b/vendor/github.com/containers/storage/pkg/archive/archive_ffjson.go
@@ -1,5 +1,5 @@
 // Code generated by ffjson <https://github.com/pquerna/ffjson>. DO NOT EDIT.
-// source: pkg/archive/archive.go
+// source: ./pkg/archive/archive.go
 
 package archive
 
diff --git a/vendor/github.com/containers/storage/store.go b/vendor/github.com/containers/storage/store.go
index e0dd1b92f4f..a166799c609 100644
--- a/vendor/github.com/containers/storage/store.go
+++ b/vendor/github.com/containers/storage/store.go
@@ -1070,7 +1070,7 @@ func (s *store) imageTopLayerForMapping(image *Image, ristore ROImageStore, read
 		}
 		mappedLayer, _, err := rlstore.Put("", parentLayer, nil, layer.MountLabel, nil, &layerOptions, false, nil, rc)
 		if err != nil {
-			return nil, errors.Wrapf(err, "error creating ID-mapped copy of layer %q", parentLayer.ID)
+			return nil, errors.Wrapf(err, "error creating ID-mapped copy of layer %q", layer.ID)
 		}
 		if err = istore.addMappedTopLayer(image.ID, mappedLayer.ID); err != nil {
 			if err2 := rlstore.Delete(mappedLayer.ID); err2 != nil {
diff --git a/vendor/github.com/containers/storage/vendor.conf b/vendor/github.com/containers/storage/vendor.conf
index fa52584d7cb..936321b12d9 100644
--- a/vendor/github.com/containers/storage/vendor.conf
+++ b/vendor/github.com/containers/storage/vendor.conf
@@ -23,3 +23,6 @@ golang.org/x/net 7dcfb8076726a3fdd9353b6b8a1f1b6be6811bd6
 golang.org/x/sys 07c182904dbd53199946ba614a412c61d3c548f5
 gotest.tools master
 github.com/google/go-cmp master
+github.com/klauspost/pgzip v1.2.1
+github.com/klauspost/compress v1.4.1
+github.com/klauspost/cpuid v1.2.0
diff --git a/vendor/github.com/klauspost/compress/LICENSE b/vendor/github.com/klauspost/compress/LICENSE
new file mode 100644
index 00000000000..74487567632
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2012 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md
new file mode 100644
index 00000000000..c0de37df40c
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/README.md
@@ -0,0 +1,160 @@
+# compress
+
+This package is based on an optimized Deflate function, which is used by gzip/zip/zlib packages.
+
+It offers slightly better compression at lower compression settings, and up to 3x faster encoding at highest compression level.
+
+* [High Throughput Benchmark](http://blog.klauspost.com/go-gzipdeflate-benchmarks/).
+* [Small Payload/Webserver Benchmarks](http://blog.klauspost.com/gzip-performance-for-go-webservers/).
+* [Linear Time Compression](http://blog.klauspost.com/constant-time-gzipzip-compression/).
+* [Re-balancing Deflate Compression Levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/)
+
+[![Build Status](https://travis-ci.org/klauspost/compress.svg?branch=master)](https://travis-ci.org/klauspost/compress)
+[![Sourcegraph Badge](https://sourcegraph.com/github.com/klauspost/compress/-/badge.svg)](https://sourcegraph.com/github.com/klauspost/compress?badge)
+
+# changelog
+
+* Aug 1, 2018: Added [huff0 README](https://github.com/klauspost/compress/tree/master/huff0#huff0-entropy-compression).
+* Jul 8, 2018: Added [Performance Update 2018](#performance-update-2018) below.
+* Jun 23, 2018: Merged [Go 1.11 inflate optimizations](https://go-review.googlesource.com/c/go/+/102235). Go 1.9 is now required. Backwards compatible version tagged with [v1.3.0](https://github.com/klauspost/compress/releases/tag/v1.3.0).
+* Apr 2, 2018: Added [huff0](https://godoc.org/github.com/klauspost/compress/huff0) en/decoder. Experimental for now, API may change.
+* Mar 4, 2018: Added [FSE Entropy](https://godoc.org/github.com/klauspost/compress/fse) en/decoder. Experimental for now, API may change.
+* Nov 3, 2017: Add compression [Estimate](https://godoc.org/github.com/klauspost/compress#Estimate) function.
+* May 28, 2017: Reduce allocations when resetting decoder.
+* Apr 02, 2017: Change back to official crc32, since changes were merged in Go 1.7.
+* Jan 14, 2017: Reduce stack pressure due to array copies. See [Issue #18625](https://github.com/golang/go/issues/18625).
+* Oct 25, 2016: Level 2-4 have been rewritten and now offers significantly better performance than before.
+* Oct 20, 2016: Port zlib changes from Go 1.7 to fix zlib writer issue. Please update.
+* Oct 16, 2016: Go 1.7 changes merged. Apples to apples this package is a few percent faster, but has a significantly better balance between speed and compression per level. 
+* Mar 24, 2016: Always attempt Huffman encoding on level 4-7. This improves base 64 encoded data compression.
+* Mar 24, 2016: Small speedup for level 1-3.
+* Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster.
+* Feb 19, 2016: Handle small payloads faster in level 1-3.
+* Feb 19, 2016: Added faster level 2 + 3 compression modes.
+* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progresssion in terms of compression. New default level is 5.
+* Feb 14, 2016: Snappy: Merge upstream changes. 
+* Feb 14, 2016: Snappy: Fix aggressive skipping.
+* Feb 14, 2016: Snappy: Update benchmark.
+* Feb 13, 2016: Deflate: Fixed assembler problem that could lead to sub-optimal compression.
+* Feb 12, 2016: Snappy: Added AMD64 SSE 4.2 optimizations to matching, which makes easy to compress material run faster. Typical speedup is around 25%.
+* Feb 9, 2016: Added Snappy package fork. This version is 5-7% faster, much more on hard to compress content.
+* Jan 30, 2016: Optimize level 1 to 3 by not considering static dictionary or storing uncompressed. ~4-5% speedup.
+* Jan 16, 2016: Optimization on deflate level 1,2,3 compression.
+* Jan 8 2016: Merge [CL 18317](https://go-review.googlesource.com/#/c/18317): fix reading, writing of zip64 archives.
+* Dec 8 2015: Make level 1 and -2 deterministic even if write size differs.
+* Dec 8 2015: Split encoding functions, so hashing and matching can potentially be inlined. 1-3% faster on AMD64. 5% faster on other platforms.
+* Dec 8 2015: Fixed rare [one byte out-of bounds read](https://github.com/klauspost/compress/issues/20). Please update!
+* Nov 23 2015: Optimization on token writer. ~2-4% faster. Contributed by [@dsnet](https://github.com/dsnet).
+* Nov 20 2015: Small optimization to bit writer on 64 bit systems.
+* Nov 17 2015: Fixed out-of-bound errors if the underlying Writer returned an error. See [#15](https://github.com/klauspost/compress/issues/15).
+* Nov 12 2015: Added [io.WriterTo](https://golang.org/pkg/io/#WriterTo) support to gzip/inflate.
+* Nov 11 2015: Merged [CL 16669](https://go-review.googlesource.com/#/c/16669/4): archive/zip: enable overriding (de)compressors per file
+* Oct 15 2015: Added skipping on uncompressible data. Random data speed up >5x.
+
+# usage
+
+The packages are drop-in replacements for standard libraries. Simply replace the import path to use them:
+
+| old import         | new import                              |
+|--------------------|-----------------------------------------|
+| `compress/gzip`    | `github.com/klauspost/compress/gzip`    |
+| `compress/zlib`    | `github.com/klauspost/compress/zlib`    |
+| `archive/zip`      | `github.com/klauspost/compress/zip`     |
+| `compress/flate`   | `github.com/klauspost/compress/flate`   |
+
+You may also be interested in [pgzip](https://github.com/klauspost/pgzip), which is a drop in replacement for gzip, which support multithreaded compression on big files and the optimized [crc32](https://github.com/klauspost/crc32) package used by these packages.
+
+The packages contains the same as the standard library, so you can use the godoc for that: [gzip](http://golang.org/pkg/compress/gzip/), [zip](http://golang.org/pkg/archive/zip/),  [zlib](http://golang.org/pkg/compress/zlib/), [flate](http://golang.org/pkg/compress/flate/).
+
+Currently there is only minor speedup on decompression (mostly CRC32 calculation).
+
+# Performance Update 2018
+
+It has been a while since we have been looking at the speed of this package compared to the standard library, so I thought I would re-do my tests and give some overall recommendations based on the current state. All benchmarks have been performed with Go 1.10 on my Desktop Intel(R) Core(TM) i7-2600 CPU @3.40GHz. Since I last ran the tests, I have gotten more RAM, which means tests with big files are no longer limited by my SSD.
+
+The raw results are in my [updated spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing). Due to cgo changes and upstream updates i could not get the cgo version of gzip to compile. Instead I included the [zstd](https://github.com/datadog/zstd) cgo implementation. If I get cgo gzip to work again, I might replace the results in the sheet.
+
+The columns to take note of are: *MB/s* - the throughput. *Reduction* - the data size reduction in percent of the original. *Rel Speed* relative speed compared to the standard libary at the same level. *Smaller* - how many percent smaller is the compressed output compared to stdlib. Negative means the output was bigger. *Loss* means the loss (or gain) in compression as a percentage difference of the input.
+
+The `gzstd` (standard library gzip) and `gzkp` (this package gzip) only uses one CPU core. [`pgzip`](https://github.com/klauspost/pgzip), [`bgzf`](https://github.com/biogo/hts/bgzf) uses all 4 cores. [`zstd`](https://github.com/DataDog/zstd) uses one core, and is a beast (but not Go, yet).
+
+
+## Overall differences.
+
+There appears to be a roughly 5-10% speed advantage over the standard library when comparing at similar compression levels.
+
+The biggest difference you will see is the result of [re-balancing](https://blog.klauspost.com/rebalancing-deflate-compression-levels/) the compression levels. I wanted by library to give a smoother transition between the compression levels than the standard library.
+
+This package attempts to provide a more smooth transition, where "1" is taking a lot of shortcuts, "5" is the reasonable trade-off and "9" is the "give me the best compression", and the values in between gives something reasonable in between. The standard library has big differences in levels 1-4, but levels 5-9 having no significant gains - often spending a lot more time than can be justified by the achieved compression.
+
+There are links to all the test data in the [spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing) in the top left field on each tab.
+
+## Web Content
+
+This test set aims to emulate typical use in a web server. The test-set is 4GB data in 53k files, and is a mixture of (mostly) HTML, JS, CSS.
+
+Since level 1 and 9 are close to being the same code, they are quite close. But looking at the levels in-between the differences are quite big.
+
+Looking at level 6, this package is 88% faster, but will output about 6% more data. For a web server, this means you can serve 88% more data, but have to pay for 6% more bandwidth. You can draw your own conclusions on what would be the most expensive for your case.
+
+## Object files
+
+This test is for typical data files stored on a server. In this case it is a collection of Go precompiled objects. They are very compressible.
+
+The picture is similar to the web content, but with small differences since this is very compressible. Levels 2-3 offer good speed, but is sacrificing quite a bit of compression. 
+
+The standard library seems suboptimal on level 3 and 4 - offering both worse compression and speed than level 6 & 7 of this package respectively.
+
+## Highly Compressible File
+
+This is a JSON file with very high redundancy. The reduction starts at 95% on level 1, so in real life terms we are dealing with something like a highly redundant stream of data, etc.
+
+It is definitely visible that we are dealing with specialized content here, so the results are very scattered. This package does not do very well at levels 1-4, but picks up significantly at level 5 and levels 7 and 8 offering great speed for the achieved compression.
+
+So if you know you content is extremely compressible you might want to go slightly higher than the defaults. The standard library has a huge gap between levels 3 and 4 in terms of speed (2.75x slowdown), so it offers little "middle ground".
+
+## Medium-High Compressible
+
+This is a pretty common test corpus: [enwik9](http://mattmahoney.net/dc/textdata.html). It contains the first 10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. This is a very good test of typical text based compression and more data heavy streams.
+
+We see a similar picture here as in "Web Content". On equal levels some compression is sacrificed for more speed. Level 5 seems to be the best trade-off between speed and size, beating stdlib level 3 in both.
+
+## Medium Compressible
+
+I will combine two test sets, one [10GB file set](http://mattmahoney.net/dc/10gb.html) and a VM disk image (~8GB). Both contain different data types and represent a typical backup scenario.
+
+The most notable thing is how quickly the standard libary drops to very low compression speeds around level 5-6 without any big gains in compression. Since this type of data is fairly common, this does not seem like good behavior.
+
+
+## Un-compressible Content
+
+This is mainly a test of how good the algorithms are at detecting un-compressible input. The standard library only offers this feature with very conservative settings at level 1. Obviously there is no reason for the algorithms to try to compress input that cannot be compressed.  The only downside is that it might skip some compressible data on false detections.
+
+
+# linear time compression (huffman only)
+
+This compression library adds a special compression level, named `HuffmanOnly`, which allows near linear time compression. This is done by completely disabling matching of previous data, and only reduce the number of bits to represent each character. 
+
+This means that often used characters, like 'e' and ' ' (space) in text use the fewest bits to represent, and rare characters like '¤' takes more bits to represent. For more information see [wikipedia](https://en.wikipedia.org/wiki/Huffman_coding) or this nice [video](https://youtu.be/ZdooBTdW5bM).
+
+Since this type of compression has much less variance, the compression speed is mostly unaffected by the input data, and is usually more than *180MB/s* for a single core.
+
+The downside is that the compression ratio is usually considerably worse than even the fastest conventional compression. The compression raio can never be better than 8:1 (12.5%). 
+
+The linear time compression can be used as a "better than nothing" mode, where you cannot risk the encoder to slow down on some content. For comparison, the size of the "Twain" text is *233460 bytes* (+29% vs. level 1) and encode speed is 144MB/s (4.5x level 1). So in this case you trade a 30% size increase for a 4 times speedup.
+
+For more information see my blog post on [Fast Linear Time Compression](http://blog.klauspost.com/constant-time-gzipzip-compression/).
+
+This is implemented on Go 1.7 as "Huffman Only" mode, though not exposed for gzip.
+
+
+# snappy package
+
+The standard snappy package has now been improved. This repo contains a copy of the snappy repo.
+
+I would advise to use the standard package: https://github.com/golang/snappy
+
+
+# license
+
+This code is licensed under the same conditions as the original Go code. See LICENSE file.
diff --git a/vendor/github.com/klauspost/compress/flate/copy.go b/vendor/github.com/klauspost/compress/flate/copy.go
new file mode 100644
index 00000000000..a3200a8f49e
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/copy.go
@@ -0,0 +1,32 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+// forwardCopy is like the built-in copy function except that it always goes
+// forward from the start, even if the dst and src overlap.
+// It is equivalent to:
+//   for i := 0; i < n; i++ {
+//     mem[dst+i] = mem[src+i]
+//   }
+func forwardCopy(mem []byte, dst, src, n int) {
+	if dst <= src {
+		copy(mem[dst:dst+n], mem[src:src+n])
+		return
+	}
+	for {
+		if dst >= src+n {
+			copy(mem[dst:dst+n], mem[src:src+n])
+			return
+		}
+		// There is some forward overlap.  The destination
+		// will be filled with a repeated pattern of mem[src:src+k].
+		// We copy one instance of the pattern here, then repeat.
+		// Each time around this loop k will double.
+		k := dst - src
+		copy(mem[dst:dst+k], mem[src:src+k])
+		n -= k
+		dst += k
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/crc32_amd64.go b/vendor/github.com/klauspost/compress/flate/crc32_amd64.go
new file mode 100644
index 00000000000..8298d309aef
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/crc32_amd64.go
@@ -0,0 +1,42 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+package flate
+
+import (
+	"github.com/klauspost/cpuid"
+)
+
+// crc32sse returns a hash for the first 4 bytes of the slice
+// len(a) must be >= 4.
+//go:noescape
+func crc32sse(a []byte) uint32
+
+// crc32sseAll calculates hashes for each 4-byte set in a.
+// dst must be east len(a) - 4 in size.
+// The size is not checked by the assembly.
+//go:noescape
+func crc32sseAll(a []byte, dst []uint32)
+
+// matchLenSSE4 returns the number of matching bytes in a and b
+// up to length 'max'. Both slices must be at least 'max'
+// bytes in size.
+//
+// TODO: drop the "SSE4" name, since it doesn't use any SSE instructions.
+//
+//go:noescape
+func matchLenSSE4(a, b []byte, max int) int
+
+// histogram accumulates a histogram of b in h.
+// h must be at least 256 entries in length,
+// and must be cleared before calling this function.
+//go:noescape
+func histogram(b []byte, h []int32)
+
+// Detect SSE 4.2 feature.
+func init() {
+	useSSE42 = cpuid.CPU.SSE42()
+}
diff --git a/vendor/github.com/klauspost/compress/flate/crc32_amd64.s b/vendor/github.com/klauspost/compress/flate/crc32_amd64.s
new file mode 100644
index 00000000000..a7994372702
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/crc32_amd64.s
@@ -0,0 +1,214 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+// func crc32sse(a []byte) uint32
+TEXT ·crc32sse(SB), 4, $0
+	MOVQ a+0(FP), R10
+	XORQ BX, BX
+
+	// CRC32   dword (R10), EBX
+	BYTE $0xF2; BYTE $0x41; BYTE $0x0f
+	BYTE $0x38; BYTE $0xf1; BYTE $0x1a
+
+	MOVL BX, ret+24(FP)
+	RET
+
+// func crc32sseAll(a []byte, dst []uint32)
+TEXT ·crc32sseAll(SB), 4, $0
+	MOVQ  a+0(FP), R8      // R8: src
+	MOVQ  a_len+8(FP), R10 // input length
+	MOVQ  dst+24(FP), R9   // R9: dst
+	SUBQ  $4, R10
+	JS    end
+	JZ    one_crc
+	MOVQ  R10, R13
+	SHRQ  $2, R10          // len/4
+	ANDQ  $3, R13          // len&3
+	XORQ  BX, BX
+	ADDQ  $1, R13
+	TESTQ R10, R10
+	JZ    rem_loop
+
+crc_loop:
+	MOVQ (R8), R11
+	XORQ BX, BX
+	XORQ DX, DX
+	XORQ DI, DI
+	MOVQ R11, R12
+	SHRQ $8, R11
+	MOVQ R12, AX
+	MOVQ R11, CX
+	SHRQ $16, R12
+	SHRQ $16, R11
+	MOVQ R12, SI
+
+	// CRC32   EAX, EBX
+	BYTE $0xF2; BYTE $0x0f
+	BYTE $0x38; BYTE $0xf1; BYTE $0xd8
+
+	// CRC32   ECX, EDX
+	BYTE $0xF2; BYTE $0x0f
+	BYTE $0x38; BYTE $0xf1; BYTE $0xd1
+
+	// CRC32   ESI, EDI
+	BYTE $0xF2; BYTE $0x0f
+	BYTE $0x38; BYTE $0xf1; BYTE $0xfe
+	MOVL BX, (R9)
+	MOVL DX, 4(R9)
+	MOVL DI, 8(R9)
+
+	XORQ BX, BX
+	MOVL R11, AX
+
+	// CRC32   EAX, EBX
+	BYTE $0xF2; BYTE $0x0f
+	BYTE $0x38; BYTE $0xf1; BYTE $0xd8
+	MOVL BX, 12(R9)
+
+	ADDQ $16, R9
+	ADDQ $4, R8
+	XORQ BX, BX
+	SUBQ $1, R10
+	JNZ  crc_loop
+
+rem_loop:
+	MOVL (R8), AX
+
+	// CRC32   EAX, EBX
+	BYTE $0xF2; BYTE $0x0f
+	BYTE $0x38; BYTE $0xf1; BYTE $0xd8
+
+	MOVL BX, (R9)
+	ADDQ $4, R9
+	ADDQ $1, R8
+	XORQ BX, BX
+	SUBQ $1, R13
+	JNZ  rem_loop
+
+end:
+	RET
+
+one_crc:
+	MOVQ $1, R13
+	XORQ BX, BX
+	JMP  rem_loop
+
+// func matchLenSSE4(a, b []byte, max int) int
+TEXT ·matchLenSSE4(SB), 4, $0
+	MOVQ a_base+0(FP), SI
+	MOVQ b_base+24(FP), DI
+	MOVQ DI, DX
+	MOVQ max+48(FP), CX
+
+cmp8:
+	// As long as we are 8 or more bytes before the end of max, we can load and
+	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+	CMPQ CX, $8
+	JLT  cmp1
+	MOVQ (SI), AX
+	MOVQ (DI), BX
+	CMPQ AX, BX
+	JNE  bsf
+	ADDQ $8, SI
+	ADDQ $8, DI
+	SUBQ $8, CX
+	JMP  cmp8
+
+bsf:
+	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
+	// the index of the first byte that differs. The BSF instruction finds the
+	// least significant 1 bit, the amd64 architecture is little-endian, and
+	// the shift by 3 converts a bit index to a byte index.
+	XORQ AX, BX
+	BSFQ BX, BX
+	SHRQ $3, BX
+	ADDQ BX, DI
+
+	// Subtract off &b[0] to convert from &b[ret] to ret, and return.
+	SUBQ DX, DI
+	MOVQ DI, ret+56(FP)
+	RET
+
+cmp1:
+	// In the slices' tail, compare 1 byte at a time.
+	CMPQ CX, $0
+	JEQ  matchLenEnd
+	MOVB (SI), AX
+	MOVB (DI), BX
+	CMPB AX, BX
+	JNE  matchLenEnd
+	ADDQ $1, SI
+	ADDQ $1, DI
+	SUBQ $1, CX
+	JMP  cmp1
+
+matchLenEnd:
+	// Subtract off &b[0] to convert from &b[ret] to ret, and return.
+	SUBQ DX, DI
+	MOVQ DI, ret+56(FP)
+	RET
+
+// func histogram(b []byte, h []int32)
+TEXT ·histogram(SB), 4, $0
+	MOVQ b+0(FP), SI     // SI: &b
+	MOVQ b_len+8(FP), R9 // R9: len(b)
+	MOVQ h+24(FP), DI    // DI: Histogram
+	MOVQ R9, R8
+	SHRQ $3, R8
+	JZ   hist1
+	XORQ R11, R11
+
+loop_hist8:
+	MOVQ (SI), R10
+
+	MOVB R10, R11
+	INCL (DI)(R11*4)
+	SHRQ $8, R10
+
+	MOVB R10, R11
+	INCL (DI)(R11*4)
+	SHRQ $8, R10
+
+	MOVB R10, R11
+	INCL (DI)(R11*4)
+	SHRQ $8, R10
+
+	MOVB R10, R11
+	INCL (DI)(R11*4)
+	SHRQ $8, R10
+
+	MOVB R10, R11
+	INCL (DI)(R11*4)
+	SHRQ $8, R10
+
+	MOVB R10, R11
+	INCL (DI)(R11*4)
+	SHRQ $8, R10
+
+	MOVB R10, R11
+	INCL (DI)(R11*4)
+	SHRQ $8, R10
+
+	INCL (DI)(R10*4)
+
+	ADDQ $8, SI
+	DECQ R8
+	JNZ  loop_hist8
+
+hist1:
+	ANDQ $7, R9
+	JZ   end_hist
+	XORQ R10, R10
+
+loop_hist1:
+	MOVB (SI), R10
+	INCL (DI)(R10*4)
+	INCQ SI
+	DECQ R9
+	JNZ  loop_hist1
+
+end_hist:
+	RET
diff --git a/vendor/github.com/klauspost/compress/flate/crc32_noasm.go b/vendor/github.com/klauspost/compress/flate/crc32_noasm.go
new file mode 100644
index 00000000000..dcf43bd50a8
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/crc32_noasm.go
@@ -0,0 +1,35 @@
+//+build !amd64 noasm appengine gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+package flate
+
+func init() {
+	useSSE42 = false
+}
+
+// crc32sse should never be called.
+func crc32sse(a []byte) uint32 {
+	panic("no assembler")
+}
+
+// crc32sseAll should never be called.
+func crc32sseAll(a []byte, dst []uint32) {
+	panic("no assembler")
+}
+
+// matchLenSSE4 should never be called.
+func matchLenSSE4(a, b []byte, max int) int {
+	panic("no assembler")
+	return 0
+}
+
+// histogram accumulates a histogram of b in h.
+//
+// len(h) must be >= 256, and h's elements must be all zeroes.
+func histogram(b []byte, h []int32) {
+	h = h[:256]
+	for _, t := range b {
+		h[t]++
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
new file mode 100644
index 00000000000..9e6e7ff0cfe
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -0,0 +1,1353 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Copyright (c) 2015 Klaus Post
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+	"fmt"
+	"io"
+	"math"
+)
+
+const (
+	NoCompression      = 0
+	BestSpeed          = 1
+	BestCompression    = 9
+	DefaultCompression = -1
+
+	// HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman
+	// entropy encoding. This mode is useful in compressing data that has
+	// already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
+	// that lacks an entropy encoder. Compression gains are achieved when
+	// certain bytes in the input stream occur more frequently than others.
+	//
+	// Note that HuffmanOnly produces a compressed output that is
+	// RFC 1951 compliant. That is, any valid DEFLATE decompressor will
+	// continue to be able to decompress this output.
+	HuffmanOnly         = -2
+	ConstantCompression = HuffmanOnly // compatibility alias.
+
+	logWindowSize    = 15
+	windowSize       = 1 << logWindowSize
+	windowMask       = windowSize - 1
+	logMaxOffsetSize = 15  // Standard DEFLATE
+	minMatchLength   = 4   // The smallest match that the compressor looks for
+	maxMatchLength   = 258 // The longest match for the compressor
+	minOffsetSize    = 1   // The shortest offset that makes any sense
+
+	// The maximum number of tokens we put into a single flat block, just too
+	// stop things from getting too large.
+	maxFlateBlockTokens = 1 << 14
+	maxStoreBlockSize   = 65535
+	hashBits            = 17 // After 17 performance degrades
+	hashSize            = 1 << hashBits
+	hashMask            = (1 << hashBits) - 1
+	hashShift           = (hashBits + minMatchLength - 1) / minMatchLength
+	maxHashOffset       = 1 << 24
+
+	skipNever = math.MaxInt32
+)
+
+var useSSE42 bool
+
+type compressionLevel struct {
+	good, lazy, nice, chain, fastSkipHashing, level int
+}
+
+// Compression levels have been rebalanced from zlib deflate defaults
+// to give a bigger spread in speed and compression.
+// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/
+var levels = []compressionLevel{
+	{}, // 0
+	// Level 1-4 uses specialized algorithm - values not used
+	{0, 0, 0, 0, 0, 1},
+	{0, 0, 0, 0, 0, 2},
+	{0, 0, 0, 0, 0, 3},
+	{0, 0, 0, 0, 0, 4},
+	// For levels 5-6 we don't bother trying with lazy matches.
+	// Lazy matching is at least 30% slower, with 1.5% increase.
+	{6, 0, 12, 8, 12, 5},
+	{8, 0, 24, 16, 16, 6},
+	// Levels 7-9 use increasingly more lazy matching
+	// and increasingly stringent conditions for "good enough".
+	{8, 8, 24, 16, skipNever, 7},
+	{10, 16, 24, 64, skipNever, 8},
+	{32, 258, 258, 4096, skipNever, 9},
+}
+
+type compressor struct {
+	compressionLevel
+
+	w          *huffmanBitWriter
+	bulkHasher func([]byte, []uint32)
+
+	// compression algorithm
+	fill func(*compressor, []byte) int // copy data to window
+	step func(*compressor)             // process window
+	sync bool                          // requesting flush
+
+	// Input hash chains
+	// hashHead[hashValue] contains the largest inputIndex with the specified hash value
+	// If hashHead[hashValue] is within the current window, then
+	// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
+	// with the same hash value.
+	chainHead  int
+	hashHead   [hashSize]uint32
+	hashPrev   [windowSize]uint32
+	hashOffset int
+
+	// input window: unprocessed data is window[index:windowEnd]
+	index         int
+	window        []byte
+	windowEnd     int
+	blockStart    int  // window index where current tokens start
+	byteAvailable bool // if true, still need to process window[index-1].
+
+	// queued output tokens
+	tokens tokens
+
+	// deflate state
+	length         int
+	offset         int
+	hash           uint32
+	maxInsertIndex int
+	err            error
+	ii             uint16 // position of last match, intended to overflow to reset.
+
+	snap      snappyEnc
+	hashMatch [maxMatchLength + minMatchLength]uint32
+}
+
+func (d *compressor) fillDeflate(b []byte) int {
+	if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
+		// shift the window by windowSize
+		copy(d.window[:], d.window[windowSize:2*windowSize])
+		d.index -= windowSize
+		d.windowEnd -= windowSize
+		if d.blockStart >= windowSize {
+			d.blockStart -= windowSize
+		} else {
+			d.blockStart = math.MaxInt32
+		}
+		d.hashOffset += windowSize
+		if d.hashOffset > maxHashOffset {
+			delta := d.hashOffset - 1
+			d.hashOffset -= delta
+			d.chainHead -= delta
+			// Iterate over slices instead of arrays to avoid copying
+			// the entire table onto the stack (Issue #18625).
+			for i, v := range d.hashPrev[:] {
+				if int(v) > delta {
+					d.hashPrev[i] = uint32(int(v) - delta)
+				} else {
+					d.hashPrev[i] = 0
+				}
+			}
+			for i, v := range d.hashHead[:] {
+				if int(v) > delta {
+					d.hashHead[i] = uint32(int(v) - delta)
+				} else {
+					d.hashHead[i] = 0
+				}
+			}
+		}
+	}
+	n := copy(d.window[d.windowEnd:], b)
+	d.windowEnd += n
+	return n
+}
+
+func (d *compressor) writeBlock(tok tokens, index int, eof bool) error {
+	if index > 0 || eof {
+		var window []byte
+		if d.blockStart <= index {
+			window = d.window[d.blockStart:index]
+		}
+		d.blockStart = index
+		d.w.writeBlock(tok.tokens[:tok.n], eof, window)
+		return d.w.err
+	}
+	return nil
+}
+
+// writeBlockSkip writes the current block and uses the number of tokens
+// to determine if the block should be stored on no matches, or
+// only huffman encoded.
+func (d *compressor) writeBlockSkip(tok tokens, index int, eof bool) error {
+	if index > 0 || eof {
+		if d.blockStart <= index {
+			window := d.window[d.blockStart:index]
+			// If we removed less than a 64th of all literals
+			// we huffman compress the block.
+			if int(tok.n) > len(window)-int(tok.n>>6) {
+				d.w.writeBlockHuff(eof, window)
+			} else {
+				// Write a dynamic huffman block.
+				d.w.writeBlockDynamic(tok.tokens[:tok.n], eof, window)
+			}
+		} else {
+			d.w.writeBlock(tok.tokens[:tok.n], eof, nil)
+		}
+		d.blockStart = index
+		return d.w.err
+	}
+	return nil
+}
+
+// fillWindow will fill the current window with the supplied
+// dictionary and calculate all hashes.
+// This is much faster than doing a full encode.
+// Should only be used after a start/reset.
+func (d *compressor) fillWindow(b []byte) {
+	// Do not fill window if we are in store-only mode,
+	// use constant or Snappy compression.
+	switch d.compressionLevel.level {
+	case 0, 1, 2:
+		return
+	}
+	// If we are given too much, cut it.
+	if len(b) > windowSize {
+		b = b[len(b)-windowSize:]
+	}
+	// Add all to window.
+	n := copy(d.window[d.windowEnd:], b)
+
+	// Calculate 256 hashes at the time (more L1 cache hits)
+	loops := (n + 256 - minMatchLength) / 256
+	for j := 0; j < loops; j++ {
+		startindex := j * 256
+		end := startindex + 256 + minMatchLength - 1
+		if end > n {
+			end = n
+		}
+		tocheck := d.window[startindex:end]
+		dstSize := len(tocheck) - minMatchLength + 1
+
+		if dstSize <= 0 {
+			continue
+		}
+
+		dst := d.hashMatch[:dstSize]
+		d.bulkHasher(tocheck, dst)
+		var newH uint32
+		for i, val := range dst {
+			di := i + startindex
+			newH = val & hashMask
+			// Get previous value with the same hash.
+			// Our chain should point to the previous value.
+			d.hashPrev[di&windowMask] = d.hashHead[newH]
+			// Set the head of the hash chain to us.
+			d.hashHead[newH] = uint32(di + d.hashOffset)
+		}
+		d.hash = newH
+	}
+	// Update window information.
+	d.windowEnd += n
+	d.index = n
+}
+
+// Try to find a match starting at index whose length is greater than prevSize.
+// We only look at chainCount possibilities before giving up.
+// pos = d.index, prevHead = d.chainHead-d.hashOffset, prevLength=minMatchLength-1, lookahead
+func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
+	minMatchLook := maxMatchLength
+	if lookahead < minMatchLook {
+		minMatchLook = lookahead
+	}
+
+	win := d.window[0 : pos+minMatchLook]
+
+	// We quit when we get a match that's at least nice long
+	nice := len(win) - pos
+	if d.nice < nice {
+		nice = d.nice
+	}
+
+	// If we've got a match that's good enough, only look in 1/4 the chain.
+	tries := d.chain
+	length = prevLength
+	if length >= d.good {
+		tries >>= 2
+	}
+
+	wEnd := win[pos+length]
+	wPos := win[pos:]
+	minIndex := pos - windowSize
+
+	for i := prevHead; tries > 0; tries-- {
+		if wEnd == win[i+length] {
+			n := matchLen(win[i:], wPos, minMatchLook)
+
+			if n > length && (n > minMatchLength || pos-i <= 4096) {
+				length = n
+				offset = pos - i
+				ok = true
+				if n >= nice {
+					// The match is good enough that we don't try to find a better one.
+					break
+				}
+				wEnd = win[pos+n]
+			}
+		}
+		if i == minIndex {
+			// hashPrev[i & windowMask] has already been overwritten, so stop now.
+			break
+		}
+		i = int(d.hashPrev[i&windowMask]) - d.hashOffset
+		if i < minIndex || i < 0 {
+			break
+		}
+	}
+	return
+}
+
+// Try to find a match starting at index whose length is greater than prevSize.
+// We only look at chainCount possibilities before giving up.
+// pos = d.index, prevHead = d.chainHead-d.hashOffset, prevLength=minMatchLength-1, lookahead
+func (d *compressor) findMatchSSE(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
+	minMatchLook := maxMatchLength
+	if lookahead < minMatchLook {
+		minMatchLook = lookahead
+	}
+
+	win := d.window[0 : pos+minMatchLook]
+
+	// We quit when we get a match that's at least nice long
+	nice := len(win) - pos
+	if d.nice < nice {
+		nice = d.nice
+	}
+
+	// If we've got a match that's good enough, only look in 1/4 the chain.
+	tries := d.chain
+	length = prevLength
+	if length >= d.good {
+		tries >>= 2
+	}
+
+	wEnd := win[pos+length]
+	wPos := win[pos:]
+	minIndex := pos - windowSize
+
+	for i := prevHead; tries > 0; tries-- {
+		if wEnd == win[i+length] {
+			n := matchLenSSE4(win[i:], wPos, minMatchLook)
+
+			if n > length && (n > minMatchLength || pos-i <= 4096) {
+				length = n
+				offset = pos - i
+				ok = true
+				if n >= nice {
+					// The match is good enough that we don't try to find a better one.
+					break
+				}
+				wEnd = win[pos+n]
+			}
+		}
+		if i == minIndex {
+			// hashPrev[i & windowMask] has already been overwritten, so stop now.
+			break
+		}
+		i = int(d.hashPrev[i&windowMask]) - d.hashOffset
+		if i < minIndex || i < 0 {
+			break
+		}
+	}
+	return
+}
+
+func (d *compressor) writeStoredBlock(buf []byte) error {
+	if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
+		return d.w.err
+	}
+	d.w.writeBytes(buf)
+	return d.w.err
+}
+
+const hashmul = 0x1e35a7bd
+
+// hash4 returns a hash representation of the first 4 bytes
+// of the supplied slice.
+// The caller must ensure that len(b) >= 4.
+func hash4(b []byte) uint32 {
+	return ((uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24) * hashmul) >> (32 - hashBits)
+}
+
+// bulkHash4 will compute hashes using the same
+// algorithm as hash4
+func bulkHash4(b []byte, dst []uint32) {
+	if len(b) < minMatchLength {
+		return
+	}
+	hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
+	dst[0] = (hb * hashmul) >> (32 - hashBits)
+	end := len(b) - minMatchLength + 1
+	for i := 1; i < end; i++ {
+		hb = (hb << 8) | uint32(b[i+3])
+		dst[i] = (hb * hashmul) >> (32 - hashBits)
+	}
+}
+
+// matchLen returns the number of matching bytes in a and b
+// up to length 'max'. Both slices must be at least 'max'
+// bytes in size.
+func matchLen(a, b []byte, max int) int {
+	a = a[:max]
+	b = b[:len(a)]
+	for i, av := range a {
+		if b[i] != av {
+			return i
+		}
+	}
+	return max
+}
+
+func (d *compressor) initDeflate() {
+	d.window = make([]byte, 2*windowSize)
+	d.hashOffset = 1
+	d.length = minMatchLength - 1
+	d.offset = 0
+	d.byteAvailable = false
+	d.index = 0
+	d.hash = 0
+	d.chainHead = -1
+	d.bulkHasher = bulkHash4
+	if useSSE42 {
+		d.bulkHasher = crc32sseAll
+	}
+}
+
+// Assumes that d.fastSkipHashing != skipNever,
+// otherwise use deflateLazy
+func (d *compressor) deflate() {
+
+	// Sanity enables additional runtime tests.
+	// It's intended to be used during development
+	// to supplement the currently ad-hoc unit tests.
+	const sanity = false
+
+	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
+		return
+	}
+
+	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
+	if d.index < d.maxInsertIndex {
+		d.hash = hash4(d.window[d.index : d.index+minMatchLength])
+	}
+
+	for {
+		if sanity && d.index > d.windowEnd {
+			panic("index > windowEnd")
+		}
+		lookahead := d.windowEnd - d.index
+		if lookahead < minMatchLength+maxMatchLength {
+			if !d.sync {
+				return
+			}
+			if sanity && d.index > d.windowEnd {
+				panic("index > windowEnd")
+			}
+			if lookahead == 0 {
+				if d.tokens.n > 0 {
+					if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil {
+						return
+					}
+					d.tokens.n = 0
+				}
+				return
+			}
+		}
+		if d.index < d.maxInsertIndex {
+			// Update the hash
+			d.hash = hash4(d.window[d.index : d.index+minMatchLength])
+			ch := d.hashHead[d.hash&hashMask]
+			d.chainHead = int(ch)
+			d.hashPrev[d.index&windowMask] = ch
+			d.hashHead[d.hash&hashMask] = uint32(d.index + d.hashOffset)
+		}
+		d.length = minMatchLength - 1
+		d.offset = 0
+		minIndex := d.index - windowSize
+		if minIndex < 0 {
+			minIndex = 0
+		}
+
+		if d.chainHead-d.hashOffset >= minIndex && lookahead > minMatchLength-1 {
+			if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
+				d.length = newLength
+				d.offset = newOffset
+			}
+		}
+		if d.length >= minMatchLength {
+			d.ii = 0
+			// There was a match at the previous step, and the current match is
+			// not better. Output the previous match.
+			// "d.length-3" should NOT be "d.length-minMatchLength", since the format always assume 3
+			d.tokens.tokens[d.tokens.n] = matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize))
+			d.tokens.n++
+			// Insert in the hash table all strings up to the end of the match.
+			// index and index-1 are already inserted. If there is not enough
+			// lookahead, the last two strings are not inserted into the hash
+			// table.
+			if d.length <= d.fastSkipHashing {
+				var newIndex int
+				newIndex = d.index + d.length
+				// Calculate missing hashes
+				end := newIndex
+				if end > d.maxInsertIndex {
+					end = d.maxInsertIndex
+				}
+				end += minMatchLength - 1
+				startindex := d.index + 1
+				if startindex > d.maxInsertIndex {
+					startindex = d.maxInsertIndex
+				}
+				tocheck := d.window[startindex:end]
+				dstSize := len(tocheck) - minMatchLength + 1
+				if dstSize > 0 {
+					dst := d.hashMatch[:dstSize]
+					bulkHash4(tocheck, dst)
+					var newH uint32
+					for i, val := range dst {
+						di := i + startindex
+						newH = val & hashMask
+						// Get previous value with the same hash.
+						// Our chain should point to the previous value.
+						d.hashPrev[di&windowMask] = d.hashHead[newH]
+						// Set the head of the hash chain to us.
+						d.hashHead[newH] = uint32(di + d.hashOffset)
+					}
+					d.hash = newH
+				}
+				d.index = newIndex
+			} else {
+				// For matches this long, we don't bother inserting each individual
+				// item into the table.
+				d.index += d.length
+				if d.index < d.maxInsertIndex {
+					d.hash = hash4(d.window[d.index : d.index+minMatchLength])
+				}
+			}
+			if d.tokens.n == maxFlateBlockTokens {
+				// The block includes the current character
+				if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil {
+					return
+				}
+				d.tokens.n = 0
+			}
+		} else {
+			d.ii++
+			end := d.index + int(d.ii>>uint(d.fastSkipHashing)) + 1
+			if end > d.windowEnd {
+				end = d.windowEnd
+			}
+			for i := d.index; i < end; i++ {
+				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[i]))
+				d.tokens.n++
+				if d.tokens.n == maxFlateBlockTokens {
+					if d.err = d.writeBlockSkip(d.tokens, i+1, false); d.err != nil {
+						return
+					}
+					d.tokens.n = 0
+				}
+			}
+			d.index = end
+		}
+	}
+}
+
+// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
+// meaning it always has lazy matching on.
+func (d *compressor) deflateLazy() {
+	// Sanity enables additional runtime tests.
+	// It's intended to be used during development
+	// to supplement the currently ad-hoc unit tests.
+	const sanity = false
+
+	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
+		return
+	}
+
+	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
+	if d.index < d.maxInsertIndex {
+		d.hash = hash4(d.window[d.index : d.index+minMatchLength])
+	}
+
+	for {
+		if sanity && d.index > d.windowEnd {
+			panic("index > windowEnd")
+		}
+		lookahead := d.windowEnd - d.index
+		if lookahead < minMatchLength+maxMatchLength {
+			if !d.sync {
+				return
+			}
+			if sanity && d.index > d.windowEnd {
+				panic("index > windowEnd")
+			}
+			if lookahead == 0 {
+				// Flush current output block if any.
+				if d.byteAvailable {
+					// There is still one pending token that needs to be flushed
+					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+					d.tokens.n++
+					d.byteAvailable = false
+				}
+				if d.tokens.n > 0 {
+					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+						return
+					}
+					d.tokens.n = 0
+				}
+				return
+			}
+		}
+		if d.index < d.maxInsertIndex {
+			// Update the hash
+			d.hash = hash4(d.window[d.index : d.index+minMatchLength])
+			ch := d.hashHead[d.hash&hashMask]
+			d.chainHead = int(ch)
+			d.hashPrev[d.index&windowMask] = ch
+			d.hashHead[d.hash&hashMask] = uint32(d.index + d.hashOffset)
+		}
+		prevLength := d.length
+		prevOffset := d.offset
+		d.length = minMatchLength - 1
+		d.offset = 0
+		minIndex := d.index - windowSize
+		if minIndex < 0 {
+			minIndex = 0
+		}
+
+		if d.chainHead-d.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
+			if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
+				d.length = newLength
+				d.offset = newOffset
+			}
+		}
+		if prevLength >= minMatchLength && d.length <= prevLength {
+			// There was a match at the previous step, and the current match is
+			// not better. Output the previous match.
+			d.tokens.tokens[d.tokens.n] = matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
+			d.tokens.n++
+
+			// Insert in the hash table all strings up to the end of the match.
+			// index and index-1 are already inserted. If there is not enough
+			// lookahead, the last two strings are not inserted into the hash
+			// table.
+			var newIndex int
+			newIndex = d.index + prevLength - 1
+			// Calculate missing hashes
+			end := newIndex
+			if end > d.maxInsertIndex {
+				end = d.maxInsertIndex
+			}
+			end += minMatchLength - 1
+			startindex := d.index + 1
+			if startindex > d.maxInsertIndex {
+				startindex = d.maxInsertIndex
+			}
+			tocheck := d.window[startindex:end]
+			dstSize := len(tocheck) - minMatchLength + 1
+			if dstSize > 0 {
+				dst := d.hashMatch[:dstSize]
+				bulkHash4(tocheck, dst)
+				var newH uint32
+				for i, val := range dst {
+					di := i + startindex
+					newH = val & hashMask
+					// Get previous value with the same hash.
+					// Our chain should point to the previous value.
+					d.hashPrev[di&windowMask] = d.hashHead[newH]
+					// Set the head of the hash chain to us.
+					d.hashHead[newH] = uint32(di + d.hashOffset)
+				}
+				d.hash = newH
+			}
+
+			d.index = newIndex
+			d.byteAvailable = false
+			d.length = minMatchLength - 1
+			if d.tokens.n == maxFlateBlockTokens {
+				// The block includes the current character
+				if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+					return
+				}
+				d.tokens.n = 0
+			}
+		} else {
+			// Reset, if we got a match this run.
+			if d.length >= minMatchLength {
+				d.ii = 0
+			}
+			// We have a byte waiting. Emit it.
+			if d.byteAvailable {
+				d.ii++
+				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+				d.tokens.n++
+				if d.tokens.n == maxFlateBlockTokens {
+					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+						return
+					}
+					d.tokens.n = 0
+				}
+				d.index++
+
+				// If we have a long run of no matches, skip additional bytes
+				// Resets when d.ii overflows after 64KB.
+				if d.ii > 31 {
+					n := int(d.ii >> 5)
+					for j := 0; j < n; j++ {
+						if d.index >= d.windowEnd-1 {
+							break
+						}
+
+						d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+						d.tokens.n++
+						if d.tokens.n == maxFlateBlockTokens {
+							if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+								return
+							}
+							d.tokens.n = 0
+						}
+						d.index++
+					}
+					// Flush last byte
+					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+					d.tokens.n++
+					d.byteAvailable = false
+					// d.length = minMatchLength - 1 // not needed, since d.ii is reset above, so it should never be > minMatchLength
+					if d.tokens.n == maxFlateBlockTokens {
+						if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+							return
+						}
+						d.tokens.n = 0
+					}
+				}
+			} else {
+				d.index++
+				d.byteAvailable = true
+			}
+		}
+	}
+}
+
+// Assumes that d.fastSkipHashing != skipNever,
+// otherwise use deflateLazySSE
+func (d *compressor) deflateSSE() {
+
+	// Sanity enables additional runtime tests.
+	// It's intended to be used during development
+	// to supplement the currently ad-hoc unit tests.
+	const sanity = false
+
+	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
+		return
+	}
+
+	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
+	if d.index < d.maxInsertIndex {
+		d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
+	}
+
+	for {
+		if sanity && d.index > d.windowEnd {
+			panic("index > windowEnd")
+		}
+		lookahead := d.windowEnd - d.index
+		if lookahead < minMatchLength+maxMatchLength {
+			if !d.sync {
+				return
+			}
+			if sanity && d.index > d.windowEnd {
+				panic("index > windowEnd")
+			}
+			if lookahead == 0 {
+				if d.tokens.n > 0 {
+					if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil {
+						return
+					}
+					d.tokens.n = 0
+				}
+				return
+			}
+		}
+		if d.index < d.maxInsertIndex {
+			// Update the hash
+			d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
+			ch := d.hashHead[d.hash]
+			d.chainHead = int(ch)
+			d.hashPrev[d.index&windowMask] = ch
+			d.hashHead[d.hash] = uint32(d.index + d.hashOffset)
+		}
+		d.length = minMatchLength - 1
+		d.offset = 0
+		minIndex := d.index - windowSize
+		if minIndex < 0 {
+			minIndex = 0
+		}
+
+		if d.chainHead-d.hashOffset >= minIndex && lookahead > minMatchLength-1 {
+			if newLength, newOffset, ok := d.findMatchSSE(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
+				d.length = newLength
+				d.offset = newOffset
+			}
+		}
+		if d.length >= minMatchLength {
+			d.ii = 0
+			// There was a match at the previous step, and the current match is
+			// not better. Output the previous match.
+			// "d.length-3" should NOT be "d.length-minMatchLength", since the format always assume 3
+			d.tokens.tokens[d.tokens.n] = matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize))
+			d.tokens.n++
+			// Insert in the hash table all strings up to the end of the match.
+			// index and index-1 are already inserted. If there is not enough
+			// lookahead, the last two strings are not inserted into the hash
+			// table.
+			if d.length <= d.fastSkipHashing {
+				var newIndex int
+				newIndex = d.index + d.length
+				// Calculate missing hashes
+				end := newIndex
+				if end > d.maxInsertIndex {
+					end = d.maxInsertIndex
+				}
+				end += minMatchLength - 1
+				startindex := d.index + 1
+				if startindex > d.maxInsertIndex {
+					startindex = d.maxInsertIndex
+				}
+				tocheck := d.window[startindex:end]
+				dstSize := len(tocheck) - minMatchLength + 1
+				if dstSize > 0 {
+					dst := d.hashMatch[:dstSize]
+
+					crc32sseAll(tocheck, dst)
+					var newH uint32
+					for i, val := range dst {
+						di := i + startindex
+						newH = val & hashMask
+						// Get previous value with the same hash.
+						// Our chain should point to the previous value.
+						d.hashPrev[di&windowMask] = d.hashHead[newH]
+						// Set the head of the hash chain to us.
+						d.hashHead[newH] = uint32(di + d.hashOffset)
+					}
+					d.hash = newH
+				}
+				d.index = newIndex
+			} else {
+				// For matches this long, we don't bother inserting each individual
+				// item into the table.
+				d.index += d.length
+				if d.index < d.maxInsertIndex {
+					d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
+				}
+			}
+			if d.tokens.n == maxFlateBlockTokens {
+				// The block includes the current character
+				if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil {
+					return
+				}
+				d.tokens.n = 0
+			}
+		} else {
+			d.ii++
+			end := d.index + int(d.ii>>5) + 1
+			if end > d.windowEnd {
+				end = d.windowEnd
+			}
+			for i := d.index; i < end; i++ {
+				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[i]))
+				d.tokens.n++
+				if d.tokens.n == maxFlateBlockTokens {
+					if d.err = d.writeBlockSkip(d.tokens, i+1, false); d.err != nil {
+						return
+					}
+					d.tokens.n = 0
+				}
+			}
+			d.index = end
+		}
+	}
+}
+
+// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
+// meaning it always has lazy matching on.
+func (d *compressor) deflateLazySSE() {
+	// Sanity enables additional runtime tests.
+	// It's intended to be used during development
+	// to supplement the currently ad-hoc unit tests.
+	const sanity = false
+
+	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
+		return
+	}
+
+	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
+	if d.index < d.maxInsertIndex {
+		d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
+	}
+
+	for {
+		if sanity && d.index > d.windowEnd {
+			panic("index > windowEnd")
+		}
+		lookahead := d.windowEnd - d.index
+		if lookahead < minMatchLength+maxMatchLength {
+			if !d.sync {
+				return
+			}
+			if sanity && d.index > d.windowEnd {
+				panic("index > windowEnd")
+			}
+			if lookahead == 0 {
+				// Flush current output block if any.
+				if d.byteAvailable {
+					// There is still one pending token that needs to be flushed
+					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+					d.tokens.n++
+					d.byteAvailable = false
+				}
+				if d.tokens.n > 0 {
+					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+						return
+					}
+					d.tokens.n = 0
+				}
+				return
+			}
+		}
+		if d.index < d.maxInsertIndex {
+			// Update the hash
+			d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
+			ch := d.hashHead[d.hash]
+			d.chainHead = int(ch)
+			d.hashPrev[d.index&windowMask] = ch
+			d.hashHead[d.hash] = uint32(d.index + d.hashOffset)
+		}
+		prevLength := d.length
+		prevOffset := d.offset
+		d.length = minMatchLength - 1
+		d.offset = 0
+		minIndex := d.index - windowSize
+		if minIndex < 0 {
+			minIndex = 0
+		}
+
+		if d.chainHead-d.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
+			if newLength, newOffset, ok := d.findMatchSSE(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
+				d.length = newLength
+				d.offset = newOffset
+			}
+		}
+		if prevLength >= minMatchLength && d.length <= prevLength {
+			// There was a match at the previous step, and the current match is
+			// not better. Output the previous match.
+			d.tokens.tokens[d.tokens.n] = matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
+			d.tokens.n++
+
+			// Insert in the hash table all strings up to the end of the match.
+			// index and index-1 are already inserted. If there is not enough
+			// lookahead, the last two strings are not inserted into the hash
+			// table.
+			var newIndex int
+			newIndex = d.index + prevLength - 1
+			// Calculate missing hashes
+			end := newIndex
+			if end > d.maxInsertIndex {
+				end = d.maxInsertIndex
+			}
+			end += minMatchLength - 1
+			startindex := d.index + 1
+			if startindex > d.maxInsertIndex {
+				startindex = d.maxInsertIndex
+			}
+			tocheck := d.window[startindex:end]
+			dstSize := len(tocheck) - minMatchLength + 1
+			if dstSize > 0 {
+				dst := d.hashMatch[:dstSize]
+				crc32sseAll(tocheck, dst)
+				var newH uint32
+				for i, val := range dst {
+					di := i + startindex
+					newH = val & hashMask
+					// Get previous value with the same hash.
+					// Our chain should point to the previous value.
+					d.hashPrev[di&windowMask] = d.hashHead[newH]
+					// Set the head of the hash chain to us.
+					d.hashHead[newH] = uint32(di + d.hashOffset)
+				}
+				d.hash = newH
+			}
+
+			d.index = newIndex
+			d.byteAvailable = false
+			d.length = minMatchLength - 1
+			if d.tokens.n == maxFlateBlockTokens {
+				// The block includes the current character
+				if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+					return
+				}
+				d.tokens.n = 0
+			}
+		} else {
+			// Reset, if we got a match this run.
+			if d.length >= minMatchLength {
+				d.ii = 0
+			}
+			// We have a byte waiting. Emit it.
+			if d.byteAvailable {
+				d.ii++
+				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+				d.tokens.n++
+				if d.tokens.n == maxFlateBlockTokens {
+					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+						return
+					}
+					d.tokens.n = 0
+				}
+				d.index++
+
+				// If we have a long run of no matches, skip additional bytes
+				// Resets when d.ii overflows after 64KB.
+				if d.ii > 31 {
+					n := int(d.ii >> 6)
+					for j := 0; j < n; j++ {
+						if d.index >= d.windowEnd-1 {
+							break
+						}
+
+						d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+						d.tokens.n++
+						if d.tokens.n == maxFlateBlockTokens {
+							if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+								return
+							}
+							d.tokens.n = 0
+						}
+						d.index++
+					}
+					// Flush last byte
+					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+					d.tokens.n++
+					d.byteAvailable = false
+					// d.length = minMatchLength - 1 // not needed, since d.ii is reset above, so it should never be > minMatchLength
+					if d.tokens.n == maxFlateBlockTokens {
+						if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+							return
+						}
+						d.tokens.n = 0
+					}
+				}
+			} else {
+				d.index++
+				d.byteAvailable = true
+			}
+		}
+	}
+}
+
+func (d *compressor) store() {
+	if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) {
+		d.err = d.writeStoredBlock(d.window[:d.windowEnd])
+		d.windowEnd = 0
+	}
+}
+
+// fillWindow will fill the buffer with data for huffman-only compression.
+// The number of bytes copied is returned.
+func (d *compressor) fillBlock(b []byte) int {
+	n := copy(d.window[d.windowEnd:], b)
+	d.windowEnd += n
+	return n
+}
+
+// storeHuff will compress and store the currently added data,
+// if enough has been accumulated or we at the end of the stream.
+// Any error that occurred will be in d.err
+func (d *compressor) storeHuff() {
+	if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
+		return
+	}
+	d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+	d.err = d.w.err
+	d.windowEnd = 0
+}
+
+// storeHuff will compress and store the currently added data,
+// if enough has been accumulated or we at the end of the stream.
+// Any error that occurred will be in d.err
+func (d *compressor) storeSnappy() {
+	// We only compress if we have maxStoreBlockSize.
+	if d.windowEnd < maxStoreBlockSize {
+		if !d.sync {
+			return
+		}
+		// Handle extremely small sizes.
+		if d.windowEnd < 128 {
+			if d.windowEnd == 0 {
+				return
+			}
+			if d.windowEnd <= 32 {
+				d.err = d.writeStoredBlock(d.window[:d.windowEnd])
+				d.tokens.n = 0
+				d.windowEnd = 0
+			} else {
+				d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+				d.err = d.w.err
+			}
+			d.tokens.n = 0
+			d.windowEnd = 0
+			d.snap.Reset()
+			return
+		}
+	}
+
+	d.snap.Encode(&d.tokens, d.window[:d.windowEnd])
+	// If we made zero matches, store the block as is.
+	if int(d.tokens.n) == d.windowEnd {
+		d.err = d.writeStoredBlock(d.window[:d.windowEnd])
+		// If we removed less than 1/16th, huffman compress the block.
+	} else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) {
+		d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+		d.err = d.w.err
+	} else {
+		d.w.writeBlockDynamic(d.tokens.tokens[:d.tokens.n], false, d.window[:d.windowEnd])
+		d.err = d.w.err
+	}
+	d.tokens.n = 0
+	d.windowEnd = 0
+}
+
+// write will add input byte to the stream.
+// Unless an error occurs all bytes will be consumed.
+func (d *compressor) write(b []byte) (n int, err error) {
+	if d.err != nil {
+		return 0, d.err
+	}
+	n = len(b)
+	for len(b) > 0 {
+		d.step(d)
+		b = b[d.fill(d, b):]
+		if d.err != nil {
+			return 0, d.err
+		}
+	}
+	return n, d.err
+}
+
+func (d *compressor) syncFlush() error {
+	d.sync = true
+	if d.err != nil {
+		return d.err
+	}
+	d.step(d)
+	if d.err == nil {
+		d.w.writeStoredHeader(0, false)
+		d.w.flush()
+		d.err = d.w.err
+	}
+	d.sync = false
+	return d.err
+}
+
+func (d *compressor) init(w io.Writer, level int) (err error) {
+	d.w = newHuffmanBitWriter(w)
+
+	switch {
+	case level == NoCompression:
+		d.window = make([]byte, maxStoreBlockSize)
+		d.fill = (*compressor).fillBlock
+		d.step = (*compressor).store
+	case level == ConstantCompression:
+		d.window = make([]byte, maxStoreBlockSize)
+		d.fill = (*compressor).fillBlock
+		d.step = (*compressor).storeHuff
+	case level >= 1 && level <= 4:
+		d.snap = newSnappy(level)
+		d.window = make([]byte, maxStoreBlockSize)
+		d.fill = (*compressor).fillBlock
+		d.step = (*compressor).storeSnappy
+	case level == DefaultCompression:
+		level = 5
+		fallthrough
+	case 5 <= level && level <= 9:
+		d.compressionLevel = levels[level]
+		d.initDeflate()
+		d.fill = (*compressor).fillDeflate
+		if d.fastSkipHashing == skipNever {
+			if useSSE42 {
+				d.step = (*compressor).deflateLazySSE
+			} else {
+				d.step = (*compressor).deflateLazy
+			}
+		} else {
+			if useSSE42 {
+				d.step = (*compressor).deflateSSE
+			} else {
+				d.step = (*compressor).deflate
+
+			}
+		}
+	default:
+		return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
+	}
+	return nil
+}
+
+// reset the state of the compressor.
+func (d *compressor) reset(w io.Writer) {
+	d.w.reset(w)
+	d.sync = false
+	d.err = nil
+	// We only need to reset a few things for Snappy.
+	if d.snap != nil {
+		d.snap.Reset()
+		d.windowEnd = 0
+		d.tokens.n = 0
+		return
+	}
+	switch d.compressionLevel.chain {
+	case 0:
+		// level was NoCompression or ConstantCompresssion.
+		d.windowEnd = 0
+	default:
+		d.chainHead = -1
+		for i := range d.hashHead {
+			d.hashHead[i] = 0
+		}
+		for i := range d.hashPrev {
+			d.hashPrev[i] = 0
+		}
+		d.hashOffset = 1
+		d.index, d.windowEnd = 0, 0
+		d.blockStart, d.byteAvailable = 0, false
+		d.tokens.n = 0
+		d.length = minMatchLength - 1
+		d.offset = 0
+		d.hash = 0
+		d.ii = 0
+		d.maxInsertIndex = 0
+	}
+}
+
+func (d *compressor) close() error {
+	if d.err != nil {
+		return d.err
+	}
+	d.sync = true
+	d.step(d)
+	if d.err != nil {
+		return d.err
+	}
+	if d.w.writeStoredHeader(0, true); d.w.err != nil {
+		return d.w.err
+	}
+	d.w.flush()
+	return d.w.err
+}
+
+// NewWriter returns a new Writer compressing data at the given level.
+// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression);
+// higher levels typically run slower but compress more.
+// Level 0 (NoCompression) does not attempt any compression; it only adds the
+// necessary DEFLATE framing.
+// Level -1 (DefaultCompression) uses the default compression level.
+// Level -2 (ConstantCompression) will use Huffman compression only, giving
+// a very fast compression for all types of input, but sacrificing considerable
+// compression efficiency.
+//
+// If level is in the range [-2, 9] then the error returned will be nil.
+// Otherwise the error returned will be non-nil.
+func NewWriter(w io.Writer, level int) (*Writer, error) {
+	var dw Writer
+	if err := dw.d.init(w, level); err != nil {
+		return nil, err
+	}
+	return &dw, nil
+}
+
+// NewWriterDict is like NewWriter but initializes the new
+// Writer with a preset dictionary.  The returned Writer behaves
+// as if the dictionary had been written to it without producing
+// any compressed output.  The compressed data written to w
+// can only be decompressed by a Reader initialized with the
+// same dictionary.
+func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
+	dw := &dictWriter{w}
+	zw, err := NewWriter(dw, level)
+	if err != nil {
+		return nil, err
+	}
+	zw.d.fillWindow(dict)
+	zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method.
+	return zw, err
+}
+
+type dictWriter struct {
+	w io.Writer
+}
+
+func (w *dictWriter) Write(b []byte) (n int, err error) {
+	return w.w.Write(b)
+}
+
+// A Writer takes data written to it and writes the compressed
+// form of that data to an underlying writer (see NewWriter).
+type Writer struct {
+	d    compressor
+	dict []byte
+}
+
+// Write writes data to w, which will eventually write the
+// compressed form of data to its underlying writer.
+func (w *Writer) Write(data []byte) (n int, err error) {
+	return w.d.write(data)
+}
+
+// Flush flushes any pending data to the underlying writer.
+// It is useful mainly in compressed network protocols, to ensure that
+// a remote reader has enough data to reconstruct a packet.
+// Flush does not return until the data has been written.
+// Calling Flush when there is no pending data still causes the Writer
+// to emit a sync marker of at least 4 bytes.
+// If the underlying writer returns an error, Flush returns that error.
+//
+// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
+func (w *Writer) Flush() error {
+	// For more about flushing:
+	// http://www.bolet.org/~pornin/deflate-flush.html
+	return w.d.syncFlush()
+}
+
+// Close flushes and closes the writer.
+func (w *Writer) Close() error {
+	return w.d.close()
+}
+
+// Reset discards the writer's state and makes it equivalent to
+// the result of NewWriter or NewWriterDict called with dst
+// and w's level and dictionary.
+func (w *Writer) Reset(dst io.Writer) {
+	if dw, ok := w.d.w.writer.(*dictWriter); ok {
+		// w was created with NewWriterDict
+		dw.w = dst
+		w.d.reset(dw)
+		w.d.fillWindow(w.dict)
+	} else {
+		// w was created with NewWriter
+		w.d.reset(dst)
+	}
+}
+
+// ResetDict discards the writer's state and makes it equivalent to
+// the result of NewWriter or NewWriterDict called with dst
+// and w's level, but sets a specific dictionary.
+func (w *Writer) ResetDict(dst io.Writer, dict []byte) {
+	w.dict = dict
+	w.d.reset(dst)
+	w.d.fillWindow(w.dict)
+}
diff --git a/vendor/github.com/klauspost/compress/flate/dict_decoder.go b/vendor/github.com/klauspost/compress/flate/dict_decoder.go
new file mode 100644
index 00000000000..71c75a065ea
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/dict_decoder.go
@@ -0,0 +1,184 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
+// LZ77 decompresses data through sequences of two forms of commands:
+//
+//	* Literal insertions: Runs of one or more symbols are inserted into the data
+//	stream as is. This is accomplished through the writeByte method for a
+//	single symbol, or combinations of writeSlice/writeMark for multiple symbols.
+//	Any valid stream must start with a literal insertion if no preset dictionary
+//	is used.
+//
+//	* Backward copies: Runs of one or more symbols are copied from previously
+//	emitted data. Backward copies come as the tuple (dist, length) where dist
+//	determines how far back in the stream to copy from and length determines how
+//	many bytes to copy. Note that it is valid for the length to be greater than
+//	the distance. Since LZ77 uses forward copies, that situation is used to
+//	perform a form of run-length encoding on repeated runs of symbols.
+//	The writeCopy and tryWriteCopy are used to implement this command.
+//
+// For performance reasons, this implementation performs little to no sanity
+// checks about the arguments. As such, the invariants documented for each
+// method call must be respected.
+type dictDecoder struct {
+	hist []byte // Sliding window history
+
+	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
+	wrPos int  // Current output position in buffer
+	rdPos int  // Have emitted hist[:rdPos] already
+	full  bool // Has a full window length been written yet?
+}
+
+// init initializes dictDecoder to have a sliding window dictionary of the given
+// size. If a preset dict is provided, it will initialize the dictionary with
+// the contents of dict.
+func (dd *dictDecoder) init(size int, dict []byte) {
+	*dd = dictDecoder{hist: dd.hist}
+
+	if cap(dd.hist) < size {
+		dd.hist = make([]byte, size)
+	}
+	dd.hist = dd.hist[:size]
+
+	if len(dict) > len(dd.hist) {
+		dict = dict[len(dict)-len(dd.hist):]
+	}
+	dd.wrPos = copy(dd.hist, dict)
+	if dd.wrPos == len(dd.hist) {
+		dd.wrPos = 0
+		dd.full = true
+	}
+	dd.rdPos = dd.wrPos
+}
+
+// histSize reports the total amount of historical data in the dictionary.
+func (dd *dictDecoder) histSize() int {
+	if dd.full {
+		return len(dd.hist)
+	}
+	return dd.wrPos
+}
+
+// availRead reports the number of bytes that can be flushed by readFlush.
+func (dd *dictDecoder) availRead() int {
+	return dd.wrPos - dd.rdPos
+}
+
+// availWrite reports the available amount of output buffer space.
+func (dd *dictDecoder) availWrite() int {
+	return len(dd.hist) - dd.wrPos
+}
+
+// writeSlice returns a slice of the available buffer to write data to.
+//
+// This invariant will be kept: len(s) <= availWrite()
+func (dd *dictDecoder) writeSlice() []byte {
+	return dd.hist[dd.wrPos:]
+}
+
+// writeMark advances the writer pointer by cnt.
+//
+// This invariant must be kept: 0 <= cnt <= availWrite()
+func (dd *dictDecoder) writeMark(cnt int) {
+	dd.wrPos += cnt
+}
+
+// writeByte writes a single byte to the dictionary.
+//
+// This invariant must be kept: 0 < availWrite()
+func (dd *dictDecoder) writeByte(c byte) {
+	dd.hist[dd.wrPos] = c
+	dd.wrPos++
+}
+
+// writeCopy copies a string at a given (dist, length) to the output.
+// This returns the number of bytes copied and may be less than the requested
+// length if the available space in the output buffer is too small.
+//
+// This invariant must be kept: 0 < dist <= histSize()
+func (dd *dictDecoder) writeCopy(dist, length int) int {
+	dstBase := dd.wrPos
+	dstPos := dstBase
+	srcPos := dstPos - dist
+	endPos := dstPos + length
+	if endPos > len(dd.hist) {
+		endPos = len(dd.hist)
+	}
+
+	// Copy non-overlapping section after destination position.
+	//
+	// This section is non-overlapping in that the copy length for this section
+	// is always less than or equal to the backwards distance. This can occur
+	// if a distance refers to data that wraps-around in the buffer.
+	// Thus, a backwards copy is performed here; that is, the exact bytes in
+	// the source prior to the copy is placed in the destination.
+	if srcPos < 0 {
+		srcPos += len(dd.hist)
+		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
+		srcPos = 0
+	}
+
+	// Copy possibly overlapping section before destination position.
+	//
+	// This section can overlap if the copy length for this section is larger
+	// than the backwards distance. This is allowed by LZ77 so that repeated
+	// strings can be succinctly represented using (dist, length) pairs.
+	// Thus, a forwards copy is performed here; that is, the bytes copied is
+	// possibly dependent on the resulting bytes in the destination as the copy
+	// progresses along. This is functionally equivalent to the following:
+	//
+	//	for i := 0; i < endPos-dstPos; i++ {
+	//		dd.hist[dstPos+i] = dd.hist[srcPos+i]
+	//	}
+	//	dstPos = endPos
+	//
+	for dstPos < endPos {
+		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
+	}
+
+	dd.wrPos = dstPos
+	return dstPos - dstBase
+}
+
+// tryWriteCopy tries to copy a string at a given (distance, length) to the
+// output. This specialized version is optimized for short distances.
+//
+// This method is designed to be inlined for performance reasons.
+//
+// This invariant must be kept: 0 < dist <= histSize()
+func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
+	dstPos := dd.wrPos
+	endPos := dstPos + length
+	if dstPos < dist || endPos > len(dd.hist) {
+		return 0
+	}
+	dstBase := dstPos
+	srcPos := dstPos - dist
+
+	// Copy possibly overlapping section before destination position.
+loop:
+	dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
+	if dstPos < endPos {
+		goto loop // Avoid for-loop so that this function can be inlined
+	}
+
+	dd.wrPos = dstPos
+	return dstPos - dstBase
+}
+
+// readFlush returns a slice of the historical buffer that is ready to be
+// emitted to the user. The data returned by readFlush must be fully consumed
+// before calling any other dictDecoder methods.
+func (dd *dictDecoder) readFlush() []byte {
+	toRead := dd.hist[dd.rdPos:dd.wrPos]
+	dd.rdPos = dd.wrPos
+	if dd.wrPos == len(dd.hist) {
+		dd.wrPos, dd.rdPos = 0, 0
+		dd.full = true
+	}
+	return toRead
+}
diff --git a/vendor/github.com/klauspost/compress/flate/gen.go b/vendor/github.com/klauspost/compress/flate/gen.go
new file mode 100644
index 00000000000..154c89a488e
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/gen.go
@@ -0,0 +1,265 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// This program generates fixedhuff.go
+// Invoke as
+//
+//	go run gen.go -output fixedhuff.go
+
+package main
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"go/format"
+	"io/ioutil"
+	"log"
+)
+
+var filename = flag.String("output", "fixedhuff.go", "output file name")
+
+const maxCodeLen = 16
+
+// Note: the definition of the huffmanDecoder struct is copied from
+// inflate.go, as it is private to the implementation.
+
+// chunk & 15 is number of bits
+// chunk >> 4 is value, including table link
+
+const (
+	huffmanChunkBits  = 9
+	huffmanNumChunks  = 1 << huffmanChunkBits
+	huffmanCountMask  = 15
+	huffmanValueShift = 4
+)
+
+type huffmanDecoder struct {
+	min      int                      // the minimum code length
+	chunks   [huffmanNumChunks]uint32 // chunks as described above
+	links    [][]uint32               // overflow links
+	linkMask uint32                   // mask the width of the link table
+}
+
+// Initialize Huffman decoding tables from array of code lengths.
+// Following this function, h is guaranteed to be initialized into a complete
+// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
+// degenerate case where the tree has only a single symbol with length 1. Empty
+// trees are permitted.
+func (h *huffmanDecoder) init(bits []int) bool {
+	// Sanity enables additional runtime tests during Huffman
+	// table construction.  It's intended to be used during
+	// development to supplement the currently ad-hoc unit tests.
+	const sanity = false
+
+	if h.min != 0 {
+		*h = huffmanDecoder{}
+	}
+
+	// Count number of codes of each length,
+	// compute min and max length.
+	var count [maxCodeLen]int
+	var min, max int
+	for _, n := range bits {
+		if n == 0 {
+			continue
+		}
+		if min == 0 || n < min {
+			min = n
+		}
+		if n > max {
+			max = n
+		}
+		count[n]++
+	}
+
+	// Empty tree. The decompressor.huffSym function will fail later if the tree
+	// is used. Technically, an empty tree is only valid for the HDIST tree and
+	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
+	// is guaranteed to fail since it will attempt to use the tree to decode the
+	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
+	// guaranteed to fail later since the compressed data section must be
+	// composed of at least one symbol (the end-of-block marker).
+	if max == 0 {
+		return true
+	}
+
+	code := 0
+	var nextcode [maxCodeLen]int
+	for i := min; i <= max; i++ {
+		code <<= 1
+		nextcode[i] = code
+		code += count[i]
+	}
+
+	// Check that the coding is complete (i.e., that we've
+	// assigned all 2-to-the-max possible bit sequences).
+	// Exception: To be compatible with zlib, we also need to
+	// accept degenerate single-code codings.  See also
+	// TestDegenerateHuffmanCoding.
+	if code != 1<<uint(max) && !(code == 1 && max == 1) {
+		return false
+	}
+
+	h.min = min
+	if max > huffmanChunkBits {
+		numLinks := 1 << (uint(max) - huffmanChunkBits)
+		h.linkMask = uint32(numLinks - 1)
+
+		// create link tables
+		link := nextcode[huffmanChunkBits+1] >> 1
+		h.links = make([][]uint32, huffmanNumChunks-link)
+		for j := uint(link); j < huffmanNumChunks; j++ {
+			reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
+			reverse >>= uint(16 - huffmanChunkBits)
+			off := j - uint(link)
+			if sanity && h.chunks[reverse] != 0 {
+				panic("impossible: overwriting existing chunk")
+			}
+			h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
+			h.links[off] = make([]uint32, numLinks)
+		}
+	}
+
+	for i, n := range bits {
+		if n == 0 {
+			continue
+		}
+		code := nextcode[n]
+		nextcode[n]++
+		chunk := uint32(i<<huffmanValueShift | n)
+		reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
+		reverse >>= uint(16 - n)
+		if n <= huffmanChunkBits {
+			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
+				// We should never need to overwrite
+				// an existing chunk.  Also, 0 is
+				// never a valid chunk, because the
+				// lower 4 "count" bits should be
+				// between 1 and 15.
+				if sanity && h.chunks[off] != 0 {
+					panic("impossible: overwriting existing chunk")
+				}
+				h.chunks[off] = chunk
+			}
+		} else {
+			j := reverse & (huffmanNumChunks - 1)
+			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
+				// Longer codes should have been
+				// associated with a link table above.
+				panic("impossible: not an indirect chunk")
+			}
+			value := h.chunks[j] >> huffmanValueShift
+			linktab := h.links[value]
+			reverse >>= huffmanChunkBits
+			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
+				if sanity && linktab[off] != 0 {
+					panic("impossible: overwriting existing chunk")
+				}
+				linktab[off] = chunk
+			}
+		}
+	}
+
+	if sanity {
+		// Above we've sanity checked that we never overwrote
+		// an existing entry.  Here we additionally check that
+		// we filled the tables completely.
+		for i, chunk := range h.chunks {
+			if chunk == 0 {
+				// As an exception, in the degenerate
+				// single-code case, we allow odd
+				// chunks to be missing.
+				if code == 1 && i%2 == 1 {
+					continue
+				}
+				panic("impossible: missing chunk")
+			}
+		}
+		for _, linktab := range h.links {
+			for _, chunk := range linktab {
+				if chunk == 0 {
+					panic("impossible: missing chunk")
+				}
+			}
+		}
+	}
+
+	return true
+}
+
+func main() {
+	flag.Parse()
+
+	var h huffmanDecoder
+	var bits [288]int
+	initReverseByte()
+	for i := 0; i < 144; i++ {
+		bits[i] = 8
+	}
+	for i := 144; i < 256; i++ {
+		bits[i] = 9
+	}
+	for i := 256; i < 280; i++ {
+		bits[i] = 7
+	}
+	for i := 280; i < 288; i++ {
+		bits[i] = 8
+	}
+	h.init(bits[:])
+	if h.links != nil {
+		log.Fatal("Unexpected links table in fixed Huffman decoder")
+	}
+
+	var buf bytes.Buffer
+
+	fmt.Fprintf(&buf, `// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.`+"\n\n")
+
+	fmt.Fprintln(&buf, "package flate")
+	fmt.Fprintln(&buf)
+	fmt.Fprintln(&buf, "// autogenerated by go run gen.go -output fixedhuff.go, DO NOT EDIT")
+	fmt.Fprintln(&buf)
+	fmt.Fprintln(&buf, "var fixedHuffmanDecoder = huffmanDecoder{")
+	fmt.Fprintf(&buf, "\t%d,\n", h.min)
+	fmt.Fprintln(&buf, "\t[huffmanNumChunks]uint32{")
+	for i := 0; i < huffmanNumChunks; i++ {
+		if i&7 == 0 {
+			fmt.Fprintf(&buf, "\t\t")
+		} else {
+			fmt.Fprintf(&buf, " ")
+		}
+		fmt.Fprintf(&buf, "0x%04x,", h.chunks[i])
+		if i&7 == 7 {
+			fmt.Fprintln(&buf)
+		}
+	}
+	fmt.Fprintln(&buf, "\t},")
+	fmt.Fprintln(&buf, "\tnil, 0,")
+	fmt.Fprintln(&buf, "}")
+
+	data, err := format.Source(buf.Bytes())
+	if err != nil {
+		log.Fatal(err)
+	}
+	err = ioutil.WriteFile(*filename, data, 0644)
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+
+var reverseByte [256]byte
+
+func initReverseByte() {
+	for x := 0; x < 256; x++ {
+		var result byte
+		for i := uint(0); i < 8; i++ {
+			result |= byte(((x >> i) & 1) << (7 - i))
+		}
+		reverseByte[x] = result
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
new file mode 100644
index 00000000000..f9b2a699a3d
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
@@ -0,0 +1,701 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+	"io"
+)
+
+const (
+	// The largest offset code.
+	offsetCodeCount = 30
+
+	// The special code used to mark the end of a block.
+	endBlockMarker = 256
+
+	// The first length code.
+	lengthCodesStart = 257
+
+	// The number of codegen codes.
+	codegenCodeCount = 19
+	badCode          = 255
+
+	// bufferFlushSize indicates the buffer size
+	// after which bytes are flushed to the writer.
+	// Should preferably be a multiple of 6, since
+	// we accumulate 6 bytes between writes to the buffer.
+	bufferFlushSize = 240
+
+	// bufferSize is the actual output byte buffer size.
+	// It must have additional headroom for a flush
+	// which can contain up to 8 bytes.
+	bufferSize = bufferFlushSize + 8
+)
+
+// The number of extra bits needed by length code X - LENGTH_CODES_START.
+var lengthExtraBits = []int8{
+	/* 257 */ 0, 0, 0,
+	/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
+	/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
+	/* 280 */ 4, 5, 5, 5, 5, 0,
+}
+
+// The length indicated by length code X - LENGTH_CODES_START.
+var lengthBase = []uint32{
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
+	12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
+	64, 80, 96, 112, 128, 160, 192, 224, 255,
+}
+
+// offset code word extra bits.
+var offsetExtraBits = []int8{
+	0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
+	4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
+	9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
+	/* extended window */
+	14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
+}
+
+var offsetBase = []uint32{
+	/* normal deflate */
+	0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
+	0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
+	0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
+	0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
+	0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
+	0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
+
+	/* extended window */
+	0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
+	0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
+	0x100000, 0x180000, 0x200000, 0x300000,
+}
+
+// The odd order in which the codegen code sizes are written.
+var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
+
+type huffmanBitWriter struct {
+	// writer is the underlying writer.
+	// Do not use it directly; use the write method, which ensures
+	// that Write errors are sticky.
+	writer io.Writer
+
+	// Data waiting to be written is bytes[0:nbytes]
+	// and then the low nbits of bits.
+	bits            uint64
+	nbits           uint
+	bytes           [bufferSize]byte
+	codegenFreq     [codegenCodeCount]int32
+	nbytes          int
+	literalFreq     []int32
+	offsetFreq      []int32
+	codegen         []uint8
+	literalEncoding *huffmanEncoder
+	offsetEncoding  *huffmanEncoder
+	codegenEncoding *huffmanEncoder
+	err             error
+}
+
+func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
+	return &huffmanBitWriter{
+		writer:          w,
+		literalFreq:     make([]int32, maxNumLit),
+		offsetFreq:      make([]int32, offsetCodeCount),
+		codegen:         make([]uint8, maxNumLit+offsetCodeCount+1),
+		literalEncoding: newHuffmanEncoder(maxNumLit),
+		codegenEncoding: newHuffmanEncoder(codegenCodeCount),
+		offsetEncoding:  newHuffmanEncoder(offsetCodeCount),
+	}
+}
+
+func (w *huffmanBitWriter) reset(writer io.Writer) {
+	w.writer = writer
+	w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
+	w.bytes = [bufferSize]byte{}
+}
+
+func (w *huffmanBitWriter) flush() {
+	if w.err != nil {
+		w.nbits = 0
+		return
+	}
+	n := w.nbytes
+	for w.nbits != 0 {
+		w.bytes[n] = byte(w.bits)
+		w.bits >>= 8
+		if w.nbits > 8 { // Avoid underflow
+			w.nbits -= 8
+		} else {
+			w.nbits = 0
+		}
+		n++
+	}
+	w.bits = 0
+	w.write(w.bytes[:n])
+	w.nbytes = 0
+}
+
+func (w *huffmanBitWriter) write(b []byte) {
+	if w.err != nil {
+		return
+	}
+	_, w.err = w.writer.Write(b)
+}
+
+func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
+	if w.err != nil {
+		return
+	}
+	w.bits |= uint64(b) << w.nbits
+	w.nbits += nb
+	if w.nbits >= 48 {
+		bits := w.bits
+		w.bits >>= 48
+		w.nbits -= 48
+		n := w.nbytes
+		bytes := w.bytes[n : n+6]
+		bytes[0] = byte(bits)
+		bytes[1] = byte(bits >> 8)
+		bytes[2] = byte(bits >> 16)
+		bytes[3] = byte(bits >> 24)
+		bytes[4] = byte(bits >> 32)
+		bytes[5] = byte(bits >> 40)
+		n += 6
+		if n >= bufferFlushSize {
+			w.write(w.bytes[:n])
+			n = 0
+		}
+		w.nbytes = n
+	}
+}
+
+func (w *huffmanBitWriter) writeBytes(bytes []byte) {
+	if w.err != nil {
+		return
+	}
+	n := w.nbytes
+	if w.nbits&7 != 0 {
+		w.err = InternalError("writeBytes with unfinished bits")
+		return
+	}
+	for w.nbits != 0 {
+		w.bytes[n] = byte(w.bits)
+		w.bits >>= 8
+		w.nbits -= 8
+		n++
+	}
+	if n != 0 {
+		w.write(w.bytes[:n])
+	}
+	w.nbytes = 0
+	w.write(bytes)
+}
+
+// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
+// the literal and offset lengths arrays (which are concatenated into a single
+// array).  This method generates that run-length encoding.
+//
+// The result is written into the codegen array, and the frequencies
+// of each code is written into the codegenFreq array.
+// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
+// information. Code badCode is an end marker
+//
+//  numLiterals      The number of literals in literalEncoding
+//  numOffsets       The number of offsets in offsetEncoding
+//  litenc, offenc   The literal and offset encoder to use
+func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
+	for i := range w.codegenFreq {
+		w.codegenFreq[i] = 0
+	}
+	// Note that we are using codegen both as a temporary variable for holding
+	// a copy of the frequencies, and as the place where we put the result.
+	// This is fine because the output is always shorter than the input used
+	// so far.
+	codegen := w.codegen // cache
+	// Copy the concatenated code sizes to codegen. Put a marker at the end.
+	cgnl := codegen[:numLiterals]
+	for i := range cgnl {
+		cgnl[i] = uint8(litEnc.codes[i].len)
+	}
+
+	cgnl = codegen[numLiterals : numLiterals+numOffsets]
+	for i := range cgnl {
+		cgnl[i] = uint8(offEnc.codes[i].len)
+	}
+	codegen[numLiterals+numOffsets] = badCode
+
+	size := codegen[0]
+	count := 1
+	outIndex := 0
+	for inIndex := 1; size != badCode; inIndex++ {
+		// INVARIANT: We have seen "count" copies of size that have not yet
+		// had output generated for them.
+		nextSize := codegen[inIndex]
+		if nextSize == size {
+			count++
+			continue
+		}
+		// We need to generate codegen indicating "count" of size.
+		if size != 0 {
+			codegen[outIndex] = size
+			outIndex++
+			w.codegenFreq[size]++
+			count--
+			for count >= 3 {
+				n := 6
+				if n > count {
+					n = count
+				}
+				codegen[outIndex] = 16
+				outIndex++
+				codegen[outIndex] = uint8(n - 3)
+				outIndex++
+				w.codegenFreq[16]++
+				count -= n
+			}
+		} else {
+			for count >= 11 {
+				n := 138
+				if n > count {
+					n = count
+				}
+				codegen[outIndex] = 18
+				outIndex++
+				codegen[outIndex] = uint8(n - 11)
+				outIndex++
+				w.codegenFreq[18]++
+				count -= n
+			}
+			if count >= 3 {
+				// count >= 3 && count <= 10
+				codegen[outIndex] = 17
+				outIndex++
+				codegen[outIndex] = uint8(count - 3)
+				outIndex++
+				w.codegenFreq[17]++
+				count = 0
+			}
+		}
+		count--
+		for ; count >= 0; count-- {
+			codegen[outIndex] = size
+			outIndex++
+			w.codegenFreq[size]++
+		}
+		// Set up invariant for next time through the loop.
+		size = nextSize
+		count = 1
+	}
+	// Marker indicating the end of the codegen.
+	codegen[outIndex] = badCode
+}
+
+// dynamicSize returns the size of dynamically encoded data in bits.
+func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
+	numCodegens = len(w.codegenFreq)
+	for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
+		numCodegens--
+	}
+	header := 3 + 5 + 5 + 4 + (3 * numCodegens) +
+		w.codegenEncoding.bitLength(w.codegenFreq[:]) +
+		int(w.codegenFreq[16])*2 +
+		int(w.codegenFreq[17])*3 +
+		int(w.codegenFreq[18])*7
+	size = header +
+		litEnc.bitLength(w.literalFreq) +
+		offEnc.bitLength(w.offsetFreq) +
+		extraBits
+
+	return size, numCodegens
+}
+
+// fixedSize returns the size of dynamically encoded data in bits.
+func (w *huffmanBitWriter) fixedSize(extraBits int) int {
+	return 3 +
+		fixedLiteralEncoding.bitLength(w.literalFreq) +
+		fixedOffsetEncoding.bitLength(w.offsetFreq) +
+		extraBits
+}
+
+// storedSize calculates the stored size, including header.
+// The function returns the size in bits and whether the block
+// fits inside a single block.
+func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
+	if in == nil {
+		return 0, false
+	}
+	if len(in) <= maxStoreBlockSize {
+		return (len(in) + 5) * 8, true
+	}
+	return 0, false
+}
+
+func (w *huffmanBitWriter) writeCode(c hcode) {
+	if w.err != nil {
+		return
+	}
+	w.bits |= uint64(c.code) << w.nbits
+	w.nbits += uint(c.len)
+	if w.nbits >= 48 {
+		bits := w.bits
+		w.bits >>= 48
+		w.nbits -= 48
+		n := w.nbytes
+		bytes := w.bytes[n : n+6]
+		bytes[0] = byte(bits)
+		bytes[1] = byte(bits >> 8)
+		bytes[2] = byte(bits >> 16)
+		bytes[3] = byte(bits >> 24)
+		bytes[4] = byte(bits >> 32)
+		bytes[5] = byte(bits >> 40)
+		n += 6
+		if n >= bufferFlushSize {
+			w.write(w.bytes[:n])
+			n = 0
+		}
+		w.nbytes = n
+	}
+}
+
+// Write the header of a dynamic Huffman block to the output stream.
+//
+//  numLiterals  The number of literals specified in codegen
+//  numOffsets   The number of offsets specified in codegen
+//  numCodegens  The number of codegens used in codegen
+func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
+	if w.err != nil {
+		return
+	}
+	var firstBits int32 = 4
+	if isEof {
+		firstBits = 5
+	}
+	w.writeBits(firstBits, 3)
+	w.writeBits(int32(numLiterals-257), 5)
+	w.writeBits(int32(numOffsets-1), 5)
+	w.writeBits(int32(numCodegens-4), 4)
+
+	for i := 0; i < numCodegens; i++ {
+		value := uint(w.codegenEncoding.codes[codegenOrder[i]].len)
+		w.writeBits(int32(value), 3)
+	}
+
+	i := 0
+	for {
+		var codeWord int = int(w.codegen[i])
+		i++
+		if codeWord == badCode {
+			break
+		}
+		w.writeCode(w.codegenEncoding.codes[uint32(codeWord)])
+
+		switch codeWord {
+		case 16:
+			w.writeBits(int32(w.codegen[i]), 2)
+			i++
+			break
+		case 17:
+			w.writeBits(int32(w.codegen[i]), 3)
+			i++
+			break
+		case 18:
+			w.writeBits(int32(w.codegen[i]), 7)
+			i++
+			break
+		}
+	}
+}
+
+func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
+	if w.err != nil {
+		return
+	}
+	var flag int32
+	if isEof {
+		flag = 1
+	}
+	w.writeBits(flag, 3)
+	w.flush()
+	w.writeBits(int32(length), 16)
+	w.writeBits(int32(^uint16(length)), 16)
+}
+
+func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
+	if w.err != nil {
+		return
+	}
+	// Indicate that we are a fixed Huffman block
+	var value int32 = 2
+	if isEof {
+		value = 3
+	}
+	w.writeBits(value, 3)
+}
+
+// writeBlock will write a block of tokens with the smallest encoding.
+// The original input can be supplied, and if the huffman encoded data
+// is larger than the original bytes, the data will be written as a
+// stored block.
+// If the input is nil, the tokens will always be Huffman encoded.
+func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
+	if w.err != nil {
+		return
+	}
+
+	tokens = append(tokens, endBlockMarker)
+	numLiterals, numOffsets := w.indexTokens(tokens)
+
+	var extraBits int
+	storedSize, storable := w.storedSize(input)
+	if storable {
+		// We only bother calculating the costs of the extra bits required by
+		// the length of offset fields (which will be the same for both fixed
+		// and dynamic encoding), if we need to compare those two encodings
+		// against stored encoding.
+		for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
+			// First eight length codes have extra size = 0.
+			extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart])
+		}
+		for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
+			// First four offset codes have extra size = 0.
+			extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode])
+		}
+	}
+
+	// Figure out smallest code.
+	// Fixed Huffman baseline.
+	var literalEncoding = fixedLiteralEncoding
+	var offsetEncoding = fixedOffsetEncoding
+	var size = w.fixedSize(extraBits)
+
+	// Dynamic Huffman?
+	var numCodegens int
+
+	// Generate codegen and codegenFrequencies, which indicates how to encode
+	// the literalEncoding and the offsetEncoding.
+	w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
+	w.codegenEncoding.generate(w.codegenFreq[:], 7)
+	dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
+
+	if dynamicSize < size {
+		size = dynamicSize
+		literalEncoding = w.literalEncoding
+		offsetEncoding = w.offsetEncoding
+	}
+
+	// Stored bytes?
+	if storable && storedSize < size {
+		w.writeStoredHeader(len(input), eof)
+		w.writeBytes(input)
+		return
+	}
+
+	// Huffman.
+	if literalEncoding == fixedLiteralEncoding {
+		w.writeFixedHeader(eof)
+	} else {
+		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
+	}
+
+	// Write the tokens.
+	w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes)
+}
+
+// writeBlockDynamic encodes a block using a dynamic Huffman table.
+// This should be used if the symbols used have a disproportionate
+// histogram distribution.
+// If input is supplied and the compression savings are below 1/16th of the
+// input size the block is stored.
+func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) {
+	if w.err != nil {
+		return
+	}
+
+	tokens = append(tokens, endBlockMarker)
+	numLiterals, numOffsets := w.indexTokens(tokens)
+
+	// Generate codegen and codegenFrequencies, which indicates how to encode
+	// the literalEncoding and the offsetEncoding.
+	w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
+	w.codegenEncoding.generate(w.codegenFreq[:], 7)
+	size, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, 0)
+
+	// Store bytes, if we don't get a reasonable improvement.
+	if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
+		w.writeStoredHeader(len(input), eof)
+		w.writeBytes(input)
+		return
+	}
+
+	// Write Huffman table.
+	w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
+
+	// Write the tokens.
+	w.writeTokens(tokens, w.literalEncoding.codes, w.offsetEncoding.codes)
+}
+
+// indexTokens indexes a slice of tokens, and updates
+// literalFreq and offsetFreq, and generates literalEncoding
+// and offsetEncoding.
+// The number of literal and offset tokens is returned.
+func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets int) {
+	for i := range w.literalFreq {
+		w.literalFreq[i] = 0
+	}
+	for i := range w.offsetFreq {
+		w.offsetFreq[i] = 0
+	}
+
+	for _, t := range tokens {
+		if t < matchType {
+			w.literalFreq[t.literal()]++
+			continue
+		}
+		length := t.length()
+		offset := t.offset()
+		w.literalFreq[lengthCodesStart+lengthCode(length)]++
+		w.offsetFreq[offsetCode(offset)]++
+	}
+
+	// get the number of literals
+	numLiterals = len(w.literalFreq)
+	for w.literalFreq[numLiterals-1] == 0 {
+		numLiterals--
+	}
+	// get the number of offsets
+	numOffsets = len(w.offsetFreq)
+	for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
+		numOffsets--
+	}
+	if numOffsets == 0 {
+		// We haven't found a single match. If we want to go with the dynamic encoding,
+		// we should count at least one offset to be sure that the offset huffman tree could be encoded.
+		w.offsetFreq[0] = 1
+		numOffsets = 1
+	}
+	w.literalEncoding.generate(w.literalFreq, 15)
+	w.offsetEncoding.generate(w.offsetFreq, 15)
+	return
+}
+
+// writeTokens writes a slice of tokens to the output.
+// codes for literal and offset encoding must be supplied.
+func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
+	if w.err != nil {
+		return
+	}
+	for _, t := range tokens {
+		if t < matchType {
+			w.writeCode(leCodes[t.literal()])
+			continue
+		}
+		// Write the length
+		length := t.length()
+		lengthCode := lengthCode(length)
+		w.writeCode(leCodes[lengthCode+lengthCodesStart])
+		extraLengthBits := uint(lengthExtraBits[lengthCode])
+		if extraLengthBits > 0 {
+			extraLength := int32(length - lengthBase[lengthCode])
+			w.writeBits(extraLength, extraLengthBits)
+		}
+		// Write the offset
+		offset := t.offset()
+		offsetCode := offsetCode(offset)
+		w.writeCode(oeCodes[offsetCode])
+		extraOffsetBits := uint(offsetExtraBits[offsetCode])
+		if extraOffsetBits > 0 {
+			extraOffset := int32(offset - offsetBase[offsetCode])
+			w.writeBits(extraOffset, extraOffsetBits)
+		}
+	}
+}
+
+// huffOffset is a static offset encoder used for huffman only encoding.
+// It can be reused since we will not be encoding offset values.
+var huffOffset *huffmanEncoder
+
+func init() {
+	w := newHuffmanBitWriter(nil)
+	w.offsetFreq[0] = 1
+	huffOffset = newHuffmanEncoder(offsetCodeCount)
+	huffOffset.generate(w.offsetFreq, 15)
+}
+
+// writeBlockHuff encodes a block of bytes as either
+// Huffman encoded literals or uncompressed bytes if the
+// results only gains very little from compression.
+func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
+	if w.err != nil {
+		return
+	}
+
+	// Clear histogram
+	for i := range w.literalFreq {
+		w.literalFreq[i] = 0
+	}
+
+	// Add everything as literals
+	histogram(input, w.literalFreq)
+
+	w.literalFreq[endBlockMarker] = 1
+
+	const numLiterals = endBlockMarker + 1
+	const numOffsets = 1
+
+	w.literalEncoding.generate(w.literalFreq, 15)
+
+	// Figure out smallest code.
+	// Always use dynamic Huffman or Store
+	var numCodegens int
+
+	// Generate codegen and codegenFrequencies, which indicates how to encode
+	// the literalEncoding and the offsetEncoding.
+	w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
+	w.codegenEncoding.generate(w.codegenFreq[:], 7)
+	size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
+
+	// Store bytes, if we don't get a reasonable improvement.
+	if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
+		w.writeStoredHeader(len(input), eof)
+		w.writeBytes(input)
+		return
+	}
+
+	// Huffman.
+	w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
+	encoding := w.literalEncoding.codes[:257]
+	n := w.nbytes
+	for _, t := range input {
+		// Bitwriting inlined, ~30% speedup
+		c := encoding[t]
+		w.bits |= uint64(c.code) << w.nbits
+		w.nbits += uint(c.len)
+		if w.nbits < 48 {
+			continue
+		}
+		// Store 6 bytes
+		bits := w.bits
+		w.bits >>= 48
+		w.nbits -= 48
+		bytes := w.bytes[n : n+6]
+		bytes[0] = byte(bits)
+		bytes[1] = byte(bits >> 8)
+		bytes[2] = byte(bits >> 16)
+		bytes[3] = byte(bits >> 24)
+		bytes[4] = byte(bits >> 32)
+		bytes[5] = byte(bits >> 40)
+		n += 6
+		if n < bufferFlushSize {
+			continue
+		}
+		w.write(w.bytes[:n])
+		if w.err != nil {
+			return // Return early in the event of write failures
+		}
+		n = 0
+	}
+	w.nbytes = n
+	w.writeCode(encoding[endBlockMarker])
+}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go
new file mode 100644
index 00000000000..bdcbd823b00
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go
@@ -0,0 +1,344 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+	"math"
+	"sort"
+)
+
+// hcode is a huffman code with a bit code and bit length.
+type hcode struct {
+	code, len uint16
+}
+
+type huffmanEncoder struct {
+	codes     []hcode
+	freqcache []literalNode
+	bitCount  [17]int32
+	lns       byLiteral // stored to avoid repeated allocation in generate
+	lfs       byFreq    // stored to avoid repeated allocation in generate
+}
+
+type literalNode struct {
+	literal uint16
+	freq    int32
+}
+
+// A levelInfo describes the state of the constructed tree for a given depth.
+type levelInfo struct {
+	// Our level.  for better printing
+	level int32
+
+	// The frequency of the last node at this level
+	lastFreq int32
+
+	// The frequency of the next character to add to this level
+	nextCharFreq int32
+
+	// The frequency of the next pair (from level below) to add to this level.
+	// Only valid if the "needed" value of the next lower level is 0.
+	nextPairFreq int32
+
+	// The number of chains remaining to generate for this level before moving
+	// up to the next level
+	needed int32
+}
+
+// set sets the code and length of an hcode.
+func (h *hcode) set(code uint16, length uint16) {
+	h.len = length
+	h.code = code
+}
+
+func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
+
+func newHuffmanEncoder(size int) *huffmanEncoder {
+	return &huffmanEncoder{codes: make([]hcode, size)}
+}
+
+// Generates a HuffmanCode corresponding to the fixed literal table
+func generateFixedLiteralEncoding() *huffmanEncoder {
+	h := newHuffmanEncoder(maxNumLit)
+	codes := h.codes
+	var ch uint16
+	for ch = 0; ch < maxNumLit; ch++ {
+		var bits uint16
+		var size uint16
+		switch {
+		case ch < 144:
+			// size 8, 000110000  .. 10111111
+			bits = ch + 48
+			size = 8
+			break
+		case ch < 256:
+			// size 9, 110010000 .. 111111111
+			bits = ch + 400 - 144
+			size = 9
+			break
+		case ch < 280:
+			// size 7, 0000000 .. 0010111
+			bits = ch - 256
+			size = 7
+			break
+		default:
+			// size 8, 11000000 .. 11000111
+			bits = ch + 192 - 280
+			size = 8
+		}
+		codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
+	}
+	return h
+}
+
+func generateFixedOffsetEncoding() *huffmanEncoder {
+	h := newHuffmanEncoder(30)
+	codes := h.codes
+	for ch := range codes {
+		codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5}
+	}
+	return h
+}
+
+var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
+var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
+
+func (h *huffmanEncoder) bitLength(freq []int32) int {
+	var total int
+	for i, f := range freq {
+		if f != 0 {
+			total += int(f) * int(h.codes[i].len)
+		}
+	}
+	return total
+}
+
+const maxBitsLimit = 16
+
+// Return the number of literals assigned to each bit size in the Huffman encoding
+//
+// This method is only called when list.length >= 3
+// The cases of 0, 1, and 2 literals are handled by special case code.
+//
+// list  An array of the literals with non-zero frequencies
+//             and their associated frequencies. The array is in order of increasing
+//             frequency, and has as its last element a special element with frequency
+//             MaxInt32
+// maxBits     The maximum number of bits that should be used to encode any literal.
+//             Must be less than 16.
+// return      An integer array in which array[i] indicates the number of literals
+//             that should be encoded in i bits.
+func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
+	if maxBits >= maxBitsLimit {
+		panic("flate: maxBits too large")
+	}
+	n := int32(len(list))
+	list = list[0 : n+1]
+	list[n] = maxNode()
+
+	// The tree can't have greater depth than n - 1, no matter what. This
+	// saves a little bit of work in some small cases
+	if maxBits > n-1 {
+		maxBits = n - 1
+	}
+
+	// Create information about each of the levels.
+	// A bogus "Level 0" whose sole purpose is so that
+	// level1.prev.needed==0.  This makes level1.nextPairFreq
+	// be a legitimate value that never gets chosen.
+	var levels [maxBitsLimit]levelInfo
+	// leafCounts[i] counts the number of literals at the left
+	// of ancestors of the rightmost node at level i.
+	// leafCounts[i][j] is the number of literals at the left
+	// of the level j ancestor.
+	var leafCounts [maxBitsLimit][maxBitsLimit]int32
+
+	for level := int32(1); level <= maxBits; level++ {
+		// For every level, the first two items are the first two characters.
+		// We initialize the levels as if we had already figured this out.
+		levels[level] = levelInfo{
+			level:        level,
+			lastFreq:     list[1].freq,
+			nextCharFreq: list[2].freq,
+			nextPairFreq: list[0].freq + list[1].freq,
+		}
+		leafCounts[level][level] = 2
+		if level == 1 {
+			levels[level].nextPairFreq = math.MaxInt32
+		}
+	}
+
+	// We need a total of 2*n - 2 items at top level and have already generated 2.
+	levels[maxBits].needed = 2*n - 4
+
+	level := maxBits
+	for {
+		l := &levels[level]
+		if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
+			// We've run out of both leafs and pairs.
+			// End all calculations for this level.
+			// To make sure we never come back to this level or any lower level,
+			// set nextPairFreq impossibly large.
+			l.needed = 0
+			levels[level+1].nextPairFreq = math.MaxInt32
+			level++
+			continue
+		}
+
+		prevFreq := l.lastFreq
+		if l.nextCharFreq < l.nextPairFreq {
+			// The next item on this row is a leaf node.
+			n := leafCounts[level][level] + 1
+			l.lastFreq = l.nextCharFreq
+			// Lower leafCounts are the same of the previous node.
+			leafCounts[level][level] = n
+			l.nextCharFreq = list[n].freq
+		} else {
+			// The next item on this row is a pair from the previous row.
+			// nextPairFreq isn't valid until we generate two
+			// more values in the level below
+			l.lastFreq = l.nextPairFreq
+			// Take leaf counts from the lower level, except counts[level] remains the same.
+			copy(leafCounts[level][:level], leafCounts[level-1][:level])
+			levels[l.level-1].needed = 2
+		}
+
+		if l.needed--; l.needed == 0 {
+			// We've done everything we need to do for this level.
+			// Continue calculating one level up. Fill in nextPairFreq
+			// of that level with the sum of the two nodes we've just calculated on
+			// this level.
+			if l.level == maxBits {
+				// All done!
+				break
+			}
+			levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
+			level++
+		} else {
+			// If we stole from below, move down temporarily to replenish it.
+			for levels[level-1].needed > 0 {
+				level--
+			}
+		}
+	}
+
+	// Somethings is wrong if at the end, the top level is null or hasn't used
+	// all of the leaves.
+	if leafCounts[maxBits][maxBits] != n {
+		panic("leafCounts[maxBits][maxBits] != n")
+	}
+
+	bitCount := h.bitCount[:maxBits+1]
+	bits := 1
+	counts := &leafCounts[maxBits]
+	for level := maxBits; level > 0; level-- {
+		// chain.leafCount gives the number of literals requiring at least "bits"
+		// bits to encode.
+		bitCount[bits] = counts[level] - counts[level-1]
+		bits++
+	}
+	return bitCount
+}
+
+// Look at the leaves and assign them a bit count and an encoding as specified
+// in RFC 1951 3.2.2
+func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
+	code := uint16(0)
+	for n, bits := range bitCount {
+		code <<= 1
+		if n == 0 || bits == 0 {
+			continue
+		}
+		// The literals list[len(list)-bits] .. list[len(list)-bits]
+		// are encoded using "bits" bits, and get the values
+		// code, code + 1, ....  The code values are
+		// assigned in literal order (not frequency order).
+		chunk := list[len(list)-int(bits):]
+
+		h.lns.sort(chunk)
+		for _, node := range chunk {
+			h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
+			code++
+		}
+		list = list[0 : len(list)-int(bits)]
+	}
+}
+
+// Update this Huffman Code object to be the minimum code for the specified frequency count.
+//
+// freq  An array of frequencies, in which frequency[i] gives the frequency of literal i.
+// maxBits  The maximum number of bits to use for any literal.
+func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
+	if h.freqcache == nil {
+		// Allocate a reusable buffer with the longest possible frequency table.
+		// Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit.
+		// The largest of these is maxNumLit, so we allocate for that case.
+		h.freqcache = make([]literalNode, maxNumLit+1)
+	}
+	list := h.freqcache[:len(freq)+1]
+	// Number of non-zero literals
+	count := 0
+	// Set list to be the set of all non-zero literals and their frequencies
+	for i, f := range freq {
+		if f != 0 {
+			list[count] = literalNode{uint16(i), f}
+			count++
+		} else {
+			list[count] = literalNode{}
+			h.codes[i].len = 0
+		}
+	}
+	list[len(freq)] = literalNode{}
+
+	list = list[:count]
+	if count <= 2 {
+		// Handle the small cases here, because they are awkward for the general case code. With
+		// two or fewer literals, everything has bit length 1.
+		for i, node := range list {
+			// "list" is in order of increasing literal value.
+			h.codes[node.literal].set(uint16(i), 1)
+		}
+		return
+	}
+	h.lfs.sort(list)
+
+	// Get the number of literals for each bit count
+	bitCount := h.bitCounts(list, maxBits)
+	// And do the assignment
+	h.assignEncodingAndSize(bitCount, list)
+}
+
+type byLiteral []literalNode
+
+func (s *byLiteral) sort(a []literalNode) {
+	*s = byLiteral(a)
+	sort.Sort(s)
+}
+
+func (s byLiteral) Len() int { return len(s) }
+
+func (s byLiteral) Less(i, j int) bool {
+	return s[i].literal < s[j].literal
+}
+
+func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+
+type byFreq []literalNode
+
+func (s *byFreq) sort(a []literalNode) {
+	*s = byFreq(a)
+	sort.Sort(s)
+}
+
+func (s byFreq) Len() int { return len(s) }
+
+func (s byFreq) Less(i, j int) bool {
+	if s[i].freq == s[j].freq {
+		return s[i].literal < s[j].literal
+	}
+	return s[i].freq < s[j].freq
+}
+
+func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go
new file mode 100644
index 00000000000..800d0ce9e54
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/inflate.go
@@ -0,0 +1,880 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package flate implements the DEFLATE compressed data format, described in
+// RFC 1951.  The gzip and zlib packages implement access to DEFLATE-based file
+// formats.
+package flate
+
+import (
+	"bufio"
+	"io"
+	"math/bits"
+	"strconv"
+	"sync"
+)
+
+const (
+	maxCodeLen     = 16 // max length of Huffman code
+	maxCodeLenMask = 15 // mask for max length of Huffman code
+	// The next three numbers come from the RFC section 3.2.7, with the
+	// additional proviso in section 3.2.5 which implies that distance codes
+	// 30 and 31 should never occur in compressed data.
+	maxNumLit  = 286
+	maxNumDist = 30
+	numCodes   = 19 // number of codes in Huffman meta-code
+)
+
+// Initialize the fixedHuffmanDecoder only once upon first use.
+var fixedOnce sync.Once
+var fixedHuffmanDecoder huffmanDecoder
+
+// A CorruptInputError reports the presence of corrupt input at a given offset.
+type CorruptInputError int64
+
+func (e CorruptInputError) Error() string {
+	return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
+}
+
+// An InternalError reports an error in the flate code itself.
+type InternalError string
+
+func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
+
+// A ReadError reports an error encountered while reading input.
+//
+// Deprecated: No longer returned.
+type ReadError struct {
+	Offset int64 // byte offset where error occurred
+	Err    error // error returned by underlying Read
+}
+
+func (e *ReadError) Error() string {
+	return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
+}
+
+// A WriteError reports an error encountered while writing output.
+//
+// Deprecated: No longer returned.
+type WriteError struct {
+	Offset int64 // byte offset where error occurred
+	Err    error // error returned by underlying Write
+}
+
+func (e *WriteError) Error() string {
+	return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
+}
+
+// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
+// to switch to a new underlying Reader. This permits reusing a ReadCloser
+// instead of allocating a new one.
+type Resetter interface {
+	// Reset discards any buffered data and resets the Resetter as if it was
+	// newly initialized with the given reader.
+	Reset(r io.Reader, dict []byte) error
+}
+
+// The data structure for decoding Huffman tables is based on that of
+// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
+// For codes smaller than the table width, there are multiple entries
+// (each combination of trailing bits has the same value). For codes
+// larger than the table width, the table contains a link to an overflow
+// table. The width of each entry in the link table is the maximum code
+// size minus the chunk width.
+//
+// Note that you can do a lookup in the table even without all bits
+// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
+// have the property that shorter codes come before longer ones, the
+// bit length estimate in the result is a lower bound on the actual
+// number of bits.
+//
+// See the following:
+//	http://www.gzip.org/algorithm.txt
+
+// chunk & 15 is number of bits
+// chunk >> 4 is value, including table link
+
+const (
+	huffmanChunkBits  = 9
+	huffmanNumChunks  = 1 << huffmanChunkBits
+	huffmanCountMask  = 15
+	huffmanValueShift = 4
+)
+
+type huffmanDecoder struct {
+	min      int                       // the minimum code length
+	chunks   *[huffmanNumChunks]uint32 // chunks as described above
+	links    [][]uint32                // overflow links
+	linkMask uint32                    // mask the width of the link table
+}
+
+// Initialize Huffman decoding tables from array of code lengths.
+// Following this function, h is guaranteed to be initialized into a complete
+// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
+// degenerate case where the tree has only a single symbol with length 1. Empty
+// trees are permitted.
+func (h *huffmanDecoder) init(lengths []int) bool {
+	// Sanity enables additional runtime tests during Huffman
+	// table construction. It's intended to be used during
+	// development to supplement the currently ad-hoc unit tests.
+	const sanity = false
+
+	if h.chunks == nil {
+		h.chunks = &[huffmanNumChunks]uint32{}
+	}
+	if h.min != 0 {
+		*h = huffmanDecoder{chunks: h.chunks, links: h.links}
+	}
+
+	// Count number of codes of each length,
+	// compute min and max length.
+	var count [maxCodeLen]int
+	var min, max int
+	for _, n := range lengths {
+		if n == 0 {
+			continue
+		}
+		if min == 0 || n < min {
+			min = n
+		}
+		if n > max {
+			max = n
+		}
+		count[n&maxCodeLenMask]++
+	}
+
+	// Empty tree. The decompressor.huffSym function will fail later if the tree
+	// is used. Technically, an empty tree is only valid for the HDIST tree and
+	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
+	// is guaranteed to fail since it will attempt to use the tree to decode the
+	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
+	// guaranteed to fail later since the compressed data section must be
+	// composed of at least one symbol (the end-of-block marker).
+	if max == 0 {
+		return true
+	}
+
+	code := 0
+	var nextcode [maxCodeLen]int
+	for i := min; i <= max; i++ {
+		code <<= 1
+		nextcode[i&maxCodeLenMask] = code
+		code += count[i&maxCodeLenMask]
+	}
+
+	// Check that the coding is complete (i.e., that we've
+	// assigned all 2-to-the-max possible bit sequences).
+	// Exception: To be compatible with zlib, we also need to
+	// accept degenerate single-code codings. See also
+	// TestDegenerateHuffmanCoding.
+	if code != 1<<uint(max) && !(code == 1 && max == 1) {
+		return false
+	}
+
+	h.min = min
+	chunks := h.chunks[:]
+	for i := range chunks {
+		chunks[i] = 0
+	}
+
+	if max > huffmanChunkBits {
+		numLinks := 1 << (uint(max) - huffmanChunkBits)
+		h.linkMask = uint32(numLinks - 1)
+
+		// create link tables
+		link := nextcode[huffmanChunkBits+1] >> 1
+		if cap(h.links) < huffmanNumChunks-link {
+			h.links = make([][]uint32, huffmanNumChunks-link)
+		} else {
+			h.links = h.links[:huffmanNumChunks-link]
+		}
+		for j := uint(link); j < huffmanNumChunks; j++ {
+			reverse := int(bits.Reverse16(uint16(j)))
+			reverse >>= uint(16 - huffmanChunkBits)
+			off := j - uint(link)
+			if sanity && h.chunks[reverse] != 0 {
+				panic("impossible: overwriting existing chunk")
+			}
+			h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
+			if cap(h.links[off]) < numLinks {
+				h.links[off] = make([]uint32, numLinks)
+			} else {
+				links := h.links[off][:0]
+				h.links[off] = links[:numLinks]
+			}
+		}
+	} else {
+		h.links = h.links[:0]
+	}
+
+	for i, n := range lengths {
+		if n == 0 {
+			continue
+		}
+		code := nextcode[n]
+		nextcode[n]++
+		chunk := uint32(i<<huffmanValueShift | n)
+		reverse := int(bits.Reverse16(uint16(code)))
+		reverse >>= uint(16 - n)
+		if n <= huffmanChunkBits {
+			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
+				// We should never need to overwrite
+				// an existing chunk. Also, 0 is
+				// never a valid chunk, because the
+				// lower 4 "count" bits should be
+				// between 1 and 15.
+				if sanity && h.chunks[off] != 0 {
+					panic("impossible: overwriting existing chunk")
+				}
+				h.chunks[off] = chunk
+			}
+		} else {
+			j := reverse & (huffmanNumChunks - 1)
+			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
+				// Longer codes should have been
+				// associated with a link table above.
+				panic("impossible: not an indirect chunk")
+			}
+			value := h.chunks[j] >> huffmanValueShift
+			linktab := h.links[value]
+			reverse >>= huffmanChunkBits
+			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
+				if sanity && linktab[off] != 0 {
+					panic("impossible: overwriting existing chunk")
+				}
+				linktab[off] = chunk
+			}
+		}
+	}
+
+	if sanity {
+		// Above we've sanity checked that we never overwrote
+		// an existing entry. Here we additionally check that
+		// we filled the tables completely.
+		for i, chunk := range h.chunks {
+			if chunk == 0 {
+				// As an exception, in the degenerate
+				// single-code case, we allow odd
+				// chunks to be missing.
+				if code == 1 && i%2 == 1 {
+					continue
+				}
+				panic("impossible: missing chunk")
+			}
+		}
+		for _, linktab := range h.links {
+			for _, chunk := range linktab {
+				if chunk == 0 {
+					panic("impossible: missing chunk")
+				}
+			}
+		}
+	}
+
+	return true
+}
+
+// The actual read interface needed by NewReader.
+// If the passed in io.Reader does not also have ReadByte,
+// the NewReader will introduce its own buffering.
+type Reader interface {
+	io.Reader
+	io.ByteReader
+}
+
+// Decompress state.
+type decompressor struct {
+	// Input source.
+	r       Reader
+	roffset int64
+
+	// Input bits, in top of b.
+	b  uint32
+	nb uint
+
+	// Huffman decoders for literal/length, distance.
+	h1, h2 huffmanDecoder
+
+	// Length arrays used to define Huffman codes.
+	bits     *[maxNumLit + maxNumDist]int
+	codebits *[numCodes]int
+
+	// Output history, buffer.
+	dict dictDecoder
+
+	// Temporary buffer (avoids repeated allocation).
+	buf [4]byte
+
+	// Next step in the decompression,
+	// and decompression state.
+	step      func(*decompressor)
+	stepState int
+	final     bool
+	err       error
+	toRead    []byte
+	hl, hd    *huffmanDecoder
+	copyLen   int
+	copyDist  int
+}
+
+func (f *decompressor) nextBlock() {
+	for f.nb < 1+2 {
+		if f.err = f.moreBits(); f.err != nil {
+			return
+		}
+	}
+	f.final = f.b&1 == 1
+	f.b >>= 1
+	typ := f.b & 3
+	f.b >>= 2
+	f.nb -= 1 + 2
+	switch typ {
+	case 0:
+		f.dataBlock()
+	case 1:
+		// compressed, fixed Huffman tables
+		f.hl = &fixedHuffmanDecoder
+		f.hd = nil
+		f.huffmanBlock()
+	case 2:
+		// compressed, dynamic Huffman tables
+		if f.err = f.readHuffman(); f.err != nil {
+			break
+		}
+		f.hl = &f.h1
+		f.hd = &f.h2
+		f.huffmanBlock()
+	default:
+		// 3 is reserved.
+		f.err = CorruptInputError(f.roffset)
+	}
+}
+
+func (f *decompressor) Read(b []byte) (int, error) {
+	for {
+		if len(f.toRead) > 0 {
+			n := copy(b, f.toRead)
+			f.toRead = f.toRead[n:]
+			if len(f.toRead) == 0 {
+				return n, f.err
+			}
+			return n, nil
+		}
+		if f.err != nil {
+			return 0, f.err
+		}
+		f.step(f)
+		if f.err != nil && len(f.toRead) == 0 {
+			f.toRead = f.dict.readFlush() // Flush what's left in case of error
+		}
+	}
+}
+
+// Support the io.WriteTo interface for io.Copy and friends.
+func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
+	total := int64(0)
+	flushed := false
+	for {
+		if len(f.toRead) > 0 {
+			n, err := w.Write(f.toRead)
+			total += int64(n)
+			if err != nil {
+				f.err = err
+				return total, err
+			}
+			if n != len(f.toRead) {
+				return total, io.ErrShortWrite
+			}
+			f.toRead = f.toRead[:0]
+		}
+		if f.err != nil && flushed {
+			if f.err == io.EOF {
+				return total, nil
+			}
+			return total, f.err
+		}
+		if f.err == nil {
+			f.step(f)
+		}
+		if len(f.toRead) == 0 && f.err != nil && !flushed {
+			f.toRead = f.dict.readFlush() // Flush what's left in case of error
+			flushed = true
+		}
+	}
+}
+
+func (f *decompressor) Close() error {
+	if f.err == io.EOF {
+		return nil
+	}
+	return f.err
+}
+
+// RFC 1951 section 3.2.7.
+// Compression with dynamic Huffman codes
+
+var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
+
+func (f *decompressor) readHuffman() error {
+	// HLIT[5], HDIST[5], HCLEN[4].
+	for f.nb < 5+5+4 {
+		if err := f.moreBits(); err != nil {
+			return err
+		}
+	}
+	nlit := int(f.b&0x1F) + 257
+	if nlit > maxNumLit {
+		return CorruptInputError(f.roffset)
+	}
+	f.b >>= 5
+	ndist := int(f.b&0x1F) + 1
+	if ndist > maxNumDist {
+		return CorruptInputError(f.roffset)
+	}
+	f.b >>= 5
+	nclen := int(f.b&0xF) + 4
+	// numCodes is 19, so nclen is always valid.
+	f.b >>= 4
+	f.nb -= 5 + 5 + 4
+
+	// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
+	for i := 0; i < nclen; i++ {
+		for f.nb < 3 {
+			if err := f.moreBits(); err != nil {
+				return err
+			}
+		}
+		f.codebits[codeOrder[i]] = int(f.b & 0x7)
+		f.b >>= 3
+		f.nb -= 3
+	}
+	for i := nclen; i < len(codeOrder); i++ {
+		f.codebits[codeOrder[i]] = 0
+	}
+	if !f.h1.init(f.codebits[0:]) {
+		return CorruptInputError(f.roffset)
+	}
+
+	// HLIT + 257 code lengths, HDIST + 1 code lengths,
+	// using the code length Huffman code.
+	for i, n := 0, nlit+ndist; i < n; {
+		x, err := f.huffSym(&f.h1)
+		if err != nil {
+			return err
+		}
+		if x < 16 {
+			// Actual length.
+			f.bits[i] = x
+			i++
+			continue
+		}
+		// Repeat previous length or zero.
+		var rep int
+		var nb uint
+		var b int
+		switch x {
+		default:
+			return InternalError("unexpected length code")
+		case 16:
+			rep = 3
+			nb = 2
+			if i == 0 {
+				return CorruptInputError(f.roffset)
+			}
+			b = f.bits[i-1]
+		case 17:
+			rep = 3
+			nb = 3
+			b = 0
+		case 18:
+			rep = 11
+			nb = 7
+			b = 0
+		}
+		for f.nb < nb {
+			if err := f.moreBits(); err != nil {
+				return err
+			}
+		}
+		rep += int(f.b & uint32(1<<nb-1))
+		f.b >>= nb
+		f.nb -= nb
+		if i+rep > n {
+			return CorruptInputError(f.roffset)
+		}
+		for j := 0; j < rep; j++ {
+			f.bits[i] = b
+			i++
+		}
+	}
+
+	if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
+		return CorruptInputError(f.roffset)
+	}
+
+	// As an optimization, we can initialize the min bits to read at a time
+	// for the HLIT tree to the length of the EOB marker since we know that
+	// every block must terminate with one. This preserves the property that
+	// we never read any extra bytes after the end of the DEFLATE stream.
+	if f.h1.min < f.bits[endBlockMarker] {
+		f.h1.min = f.bits[endBlockMarker]
+	}
+
+	return nil
+}
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanBlock() {
+	const (
+		stateInit = iota // Zero value must be stateInit
+		stateDict
+	)
+
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	// Read literal and/or (length, distance) according to RFC section 3.2.3.
+	{
+		v, err := f.huffSym(f.hl)
+		if err != nil {
+			f.err = err
+			return
+		}
+		var n uint // number of bits extra
+		var length int
+		switch {
+		case v < 256:
+			f.dict.writeByte(byte(v))
+			if f.dict.availWrite() == 0 {
+				f.toRead = f.dict.readFlush()
+				f.step = (*decompressor).huffmanBlock
+				f.stepState = stateInit
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.finishBlock()
+			return
+		// otherwise, reference to older data
+		case v < 265:
+			length = v - (257 - 3)
+			n = 0
+		case v < 269:
+			length = v*2 - (265*2 - 11)
+			n = 1
+		case v < 273:
+			length = v*4 - (269*4 - 19)
+			n = 2
+		case v < 277:
+			length = v*8 - (273*8 - 35)
+			n = 3
+		case v < 281:
+			length = v*16 - (277*16 - 67)
+			n = 4
+		case v < 285:
+			length = v*32 - (281*32 - 131)
+			n = 5
+		case v < maxNumLit:
+			length = 258
+			n = 0
+		default:
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+		if n > 0 {
+			for f.nb < n {
+				if err = f.moreBits(); err != nil {
+					f.err = err
+					return
+				}
+			}
+			length += int(f.b & uint32(1<<n-1))
+			f.b >>= n
+			f.nb -= n
+		}
+
+		var dist int
+		if f.hd == nil {
+			for f.nb < 5 {
+				if err = f.moreBits(); err != nil {
+					f.err = err
+					return
+				}
+			}
+			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			f.b >>= 5
+			f.nb -= 5
+		} else {
+			if dist, err = f.huffSym(f.hd); err != nil {
+				f.err = err
+				return
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			// have 1 bit in bottom of dist, need nb more.
+			extra := (dist & 1) << nb
+			for f.nb < nb {
+				if err = f.moreBits(); err != nil {
+					f.err = err
+					return
+				}
+			}
+			extra |= int(f.b & uint32(1<<nb-1))
+			f.b >>= nb
+			f.nb -= nb
+			dist = 1<<(nb+1) + 1 + extra
+		default:
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		// No check on length; encoding can be prescient.
+		if dist > f.dict.histSize() {
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, dist
+		goto copyHistory
+	}
+
+copyHistory:
+	// Perform a backwards copy according to RFC section 3.2.3.
+	{
+		cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if f.dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = f.dict.readFlush()
+			f.step = (*decompressor).huffmanBlock // We need to continue this work
+			f.stepState = stateDict
+			return
+		}
+		goto readLiteral
+	}
+}
+
+// Copy a single uncompressed data block from input to output.
+func (f *decompressor) dataBlock() {
+	// Uncompressed.
+	// Discard current half-byte.
+	f.nb = 0
+	f.b = 0
+
+	// Length then ones-complement of length.
+	nr, err := io.ReadFull(f.r, f.buf[0:4])
+	f.roffset += int64(nr)
+	if err != nil {
+		f.err = noEOF(err)
+		return
+	}
+	n := int(f.buf[0]) | int(f.buf[1])<<8
+	nn := int(f.buf[2]) | int(f.buf[3])<<8
+	if uint16(nn) != uint16(^n) {
+		f.err = CorruptInputError(f.roffset)
+		return
+	}
+
+	if n == 0 {
+		f.toRead = f.dict.readFlush()
+		f.finishBlock()
+		return
+	}
+
+	f.copyLen = n
+	f.copyData()
+}
+
+// copyData copies f.copyLen bytes from the underlying reader into f.hist.
+// It pauses for reads when f.hist is full.
+func (f *decompressor) copyData() {
+	buf := f.dict.writeSlice()
+	if len(buf) > f.copyLen {
+		buf = buf[:f.copyLen]
+	}
+
+	cnt, err := io.ReadFull(f.r, buf)
+	f.roffset += int64(cnt)
+	f.copyLen -= cnt
+	f.dict.writeMark(cnt)
+	if err != nil {
+		f.err = noEOF(err)
+		return
+	}
+
+	if f.dict.availWrite() == 0 || f.copyLen > 0 {
+		f.toRead = f.dict.readFlush()
+		f.step = (*decompressor).copyData
+		return
+	}
+	f.finishBlock()
+}
+
+func (f *decompressor) finishBlock() {
+	if f.final {
+		if f.dict.availRead() > 0 {
+			f.toRead = f.dict.readFlush()
+		}
+		f.err = io.EOF
+	}
+	f.step = (*decompressor).nextBlock
+}
+
+// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
+func noEOF(e error) error {
+	if e == io.EOF {
+		return io.ErrUnexpectedEOF
+	}
+	return e
+}
+
+func (f *decompressor) moreBits() error {
+	c, err := f.r.ReadByte()
+	if err != nil {
+		return noEOF(err)
+	}
+	f.roffset++
+	f.b |= uint32(c) << f.nb
+	f.nb += 8
+	return nil
+}
+
+// Read the next Huffman-encoded symbol from f according to h.
+func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
+	// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+	// with single element, huffSym must error on these two edge cases. In both
+	// cases, the chunks slice will be 0 for the invalid sequence, leading it
+	// satisfy the n == 0 check below.
+	n := uint(h.min)
+	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+	// but is smart enough to keep local variables in registers, so use nb and b,
+	// inline call to moreBits and reassign b,nb back to f on return.
+	nb, b := f.nb, f.b
+	for {
+		for nb < n {
+			c, err := f.r.ReadByte()
+			if err != nil {
+				f.b = b
+				f.nb = nb
+				return 0, noEOF(err)
+			}
+			f.roffset++
+			b |= uint32(c) << (nb & 31)
+			nb += 8
+		}
+		chunk := h.chunks[b&(huffmanNumChunks-1)]
+		n = uint(chunk & huffmanCountMask)
+		if n > huffmanChunkBits {
+			chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
+			n = uint(chunk & huffmanCountMask)
+		}
+		if n <= nb {
+			if n == 0 {
+				f.b = b
+				f.nb = nb
+				f.err = CorruptInputError(f.roffset)
+				return 0, f.err
+			}
+			f.b = b >> (n & 31)
+			f.nb = nb - n
+			return int(chunk >> huffmanValueShift), nil
+		}
+	}
+}
+
+func makeReader(r io.Reader) Reader {
+	if rr, ok := r.(Reader); ok {
+		return rr
+	}
+	return bufio.NewReader(r)
+}
+
+func fixedHuffmanDecoderInit() {
+	fixedOnce.Do(func() {
+		// These come from the RFC section 3.2.6.
+		var bits [288]int
+		for i := 0; i < 144; i++ {
+			bits[i] = 8
+		}
+		for i := 144; i < 256; i++ {
+			bits[i] = 9
+		}
+		for i := 256; i < 280; i++ {
+			bits[i] = 7
+		}
+		for i := 280; i < 288; i++ {
+			bits[i] = 8
+		}
+		fixedHuffmanDecoder.init(bits[:])
+	})
+}
+
+func (f *decompressor) Reset(r io.Reader, dict []byte) error {
+	*f = decompressor{
+		r:        makeReader(r),
+		bits:     f.bits,
+		codebits: f.codebits,
+		h1:       f.h1,
+		h2:       f.h2,
+		dict:     f.dict,
+		step:     (*decompressor).nextBlock,
+	}
+	f.dict.init(maxMatchOffset, dict)
+	return nil
+}
+
+// NewReader returns a new ReadCloser that can be used
+// to read the uncompressed version of r.
+// If r does not also implement io.ByteReader,
+// the decompressor may read more data than necessary from r.
+// It is the caller's responsibility to call Close on the ReadCloser
+// when finished reading.
+//
+// The ReadCloser returned by NewReader also implements Resetter.
+func NewReader(r io.Reader) io.ReadCloser {
+	fixedHuffmanDecoderInit()
+
+	var f decompressor
+	f.r = makeReader(r)
+	f.bits = new([maxNumLit + maxNumDist]int)
+	f.codebits = new([numCodes]int)
+	f.step = (*decompressor).nextBlock
+	f.dict.init(maxMatchOffset, nil)
+	return &f
+}
+
+// NewReaderDict is like NewReader but initializes the reader
+// with a preset dictionary. The returned Reader behaves as if
+// the uncompressed data stream started with the given dictionary,
+// which has already been read. NewReaderDict is typically used
+// to read data compressed by NewWriterDict.
+//
+// The ReadCloser returned by NewReader also implements Resetter.
+func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
+	fixedHuffmanDecoderInit()
+
+	var f decompressor
+	f.r = makeReader(r)
+	f.bits = new([maxNumLit + maxNumDist]int)
+	f.codebits = new([numCodes]int)
+	f.step = (*decompressor).nextBlock
+	f.dict.init(maxMatchOffset, dict)
+	return &f
+}
diff --git a/vendor/github.com/klauspost/compress/flate/reverse_bits.go b/vendor/github.com/klauspost/compress/flate/reverse_bits.go
new file mode 100644
index 00000000000..c1a02720d1a
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/reverse_bits.go
@@ -0,0 +1,48 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+var reverseByte = [256]byte{
+	0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
+	0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
+	0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
+	0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
+	0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
+	0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
+	0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
+	0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
+	0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
+	0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
+	0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
+	0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
+	0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
+	0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
+	0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
+	0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
+	0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
+	0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
+	0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
+	0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
+	0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
+	0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
+	0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
+	0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
+	0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
+	0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
+	0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
+	0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
+	0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
+	0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
+	0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
+	0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
+}
+
+func reverseUint16(v uint16) uint16 {
+	return uint16(reverseByte[v>>8]) | uint16(reverseByte[v&0xFF])<<8
+}
+
+func reverseBits(number uint16, bitLength byte) uint16 {
+	return reverseUint16(number << uint8(16-bitLength))
+}
diff --git a/vendor/github.com/klauspost/compress/flate/snappy.go b/vendor/github.com/klauspost/compress/flate/snappy.go
new file mode 100644
index 00000000000..d853320a758
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/snappy.go
@@ -0,0 +1,900 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Modified for deflate by Klaus Post (c) 2015.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+// emitLiteral writes a literal chunk and returns the number of bytes written.
+func emitLiteral(dst *tokens, lit []byte) {
+	ol := int(dst.n)
+	for i, v := range lit {
+		dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
+	}
+	dst.n += uint16(len(lit))
+}
+
+// emitCopy writes a copy chunk and returns the number of bytes written.
+func emitCopy(dst *tokens, offset, length int) {
+	dst.tokens[dst.n] = matchToken(uint32(length-3), uint32(offset-minOffsetSize))
+	dst.n++
+}
+
+type snappyEnc interface {
+	Encode(dst *tokens, src []byte)
+	Reset()
+}
+
+func newSnappy(level int) snappyEnc {
+	switch level {
+	case 1:
+		return &snappyL1{}
+	case 2:
+		return &snappyL2{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}
+	case 3:
+		return &snappyL3{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}
+	case 4:
+		return &snappyL4{snappyL3{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}}
+	default:
+		panic("invalid level specified")
+	}
+}
+
+const (
+	tableBits       = 14             // Bits used in the table
+	tableSize       = 1 << tableBits // Size of the table
+	tableMask       = tableSize - 1  // Mask for table indices. Redundant, but can eliminate bounds checks.
+	tableShift      = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
+	baseMatchOffset = 1              // The smallest match offset
+	baseMatchLength = 3              // The smallest match length per the RFC section 3.2.5
+	maxMatchOffset  = 1 << 15        // The largest match offset
+)
+
+func load32(b []byte, i int) uint32 {
+	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func load64(b []byte, i int) uint64 {
+	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+func hash(u uint32) uint32 {
+	return (u * 0x1e35a7bd) >> tableShift
+}
+
+// snappyL1 encapsulates level 1 compression
+type snappyL1 struct{}
+
+func (e *snappyL1) Reset() {}
+
+func (e *snappyL1) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 16 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+	)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Initialize the hash table.
+	//
+	// The table element type is uint16, as s < sLimit and sLimit < len(src)
+	// and len(src) <= maxStoreBlockSize and maxStoreBlockSize == 65535.
+	var table [tableSize]uint16
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := len(src) - inputMargin
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := 0
+
+	// The encoded form must start with a literal, as there are no previous
+	// bytes to copy, so we start looking for hash matches at s == 1.
+	s := 1
+	nextHash := hash(load32(src, s))
+
+	for {
+		// Copied from the C++ snappy implementation:
+		//
+		// Heuristic match skipping: If 32 bytes are scanned with no matches
+		// found, start looking only at every other byte. If 32 more bytes are
+		// scanned (or skipped), look at every third byte, etc.. When a match
+		// is found, immediately go back to looking at every byte. This is a
+		// small loss (~5% performance, ~0.1% density) for compressible data
+		// due to more bookkeeping, but for non-compressible data (such as
+		// JPEG) it's a huge win since the compressor quickly "realizes" the
+		// data is incompressible and doesn't bother looking for matches
+		// everywhere.
+		//
+		// The "skip" variable keeps track of how many bytes there are since
+		// the last match; dividing it by 32 (ie. right-shifting by five) gives
+		// the number of bytes to move ahead for each iteration.
+		skip := 32
+
+		nextS := s
+		candidate := 0
+		for {
+			s = nextS
+			bytesBetweenHashLookups := skip >> 5
+			nextS = s + bytesBetweenHashLookups
+			skip += bytesBetweenHashLookups
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			candidate = int(table[nextHash&tableMask])
+			table[nextHash&tableMask] = uint16(s)
+			nextHash = hash(load32(src, nextS))
+			if s-candidate <= maxMatchOffset && load32(src, s) == load32(src, candidate) {
+				break
+			}
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+		emitLiteral(dst, src[nextEmit:s])
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+			base := s
+
+			// Extend the 4-byte match as long as possible.
+			//
+			// This is an inlined version of Snappy's:
+			//	s = extendMatch(src, candidate+4, s+4)
+			s += 4
+			s1 := base + maxMatchLength
+			if s1 > len(src) {
+				s1 = len(src)
+			}
+			a := src[s:s1]
+			b := src[candidate+4:]
+			b = b[:len(a)]
+			l := len(a)
+			for i := range a {
+				if a[i] != b[i] {
+					l = i
+					break
+				}
+			}
+			s += l
+
+			// matchToken is flate's equivalent of Snappy's emitCopy.
+			dst.tokens[dst.n] = matchToken(uint32(s-base-baseMatchLength), uint32(base-candidate-baseMatchOffset))
+			dst.n++
+			nextEmit = s
+			if s >= sLimit {
+				goto emitRemainder
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-1 and at s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load64(src, s-1)
+			prevHash := hash(uint32(x >> 0))
+			table[prevHash&tableMask] = uint16(s - 1)
+			currHash := hash(uint32(x >> 8))
+			candidate = int(table[currHash&tableMask])
+			table[currHash&tableMask] = uint16(s)
+			if s-candidate > maxMatchOffset || uint32(x>>8) != load32(src, candidate) {
+				nextHash = hash(uint32(x >> 16))
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if nextEmit < len(src) {
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
+
+type tableEntry struct {
+	val    uint32
+	offset int32
+}
+
+func load3232(b []byte, i int32) uint32 {
+	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func load6432(b []byte, i int32) uint64 {
+	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+// snappyGen maintains the table for matches,
+// and the previous byte block for level 2.
+// This is the generic implementation.
+type snappyGen struct {
+	prev []byte
+	cur  int32
+}
+
+// snappyGen maintains the table for matches,
+// and the previous byte block for level 2.
+// This is the generic implementation.
+type snappyL2 struct {
+	snappyGen
+	table [tableSize]tableEntry
+}
+
+// EncodeL2 uses a similar algorithm to level 1, but is capable
+// of matching across blocks giving better compression at a small slowdown.
+func (e *snappyL2) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 8 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+	)
+
+	// Protect against e.cur wraparound.
+	if e.cur > 1<<30 {
+		for i := range e.table[:] {
+			e.table[i] = tableEntry{}
+		}
+		e.cur = maxStoreBlockSize
+	}
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		e.cur += maxStoreBlockSize
+		e.prev = e.prev[:0]
+		return
+	}
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := int32(0)
+	s := int32(0)
+	cv := load3232(src, s)
+	nextHash := hash(cv)
+
+	for {
+		// Copied from the C++ snappy implementation:
+		//
+		// Heuristic match skipping: If 32 bytes are scanned with no matches
+		// found, start looking only at every other byte. If 32 more bytes are
+		// scanned (or skipped), look at every third byte, etc.. When a match
+		// is found, immediately go back to looking at every byte. This is a
+		// small loss (~5% performance, ~0.1% density) for compressible data
+		// due to more bookkeeping, but for non-compressible data (such as
+		// JPEG) it's a huge win since the compressor quickly "realizes" the
+		// data is incompressible and doesn't bother looking for matches
+		// everywhere.
+		//
+		// The "skip" variable keeps track of how many bytes there are since
+		// the last match; dividing it by 32 (ie. right-shifting by five) gives
+		// the number of bytes to move ahead for each iteration.
+		skip := int32(32)
+
+		nextS := s
+		var candidate tableEntry
+		for {
+			s = nextS
+			bytesBetweenHashLookups := skip >> 5
+			nextS = s + bytesBetweenHashLookups
+			skip += bytesBetweenHashLookups
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			candidate = e.table[nextHash&tableMask]
+			now := load3232(src, nextS)
+			e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: cv}
+			nextHash = hash(now)
+
+			offset := s - (candidate.offset - e.cur)
+			if offset > maxMatchOffset || cv != candidate.val {
+				// Out of range or not matched.
+				cv = now
+				continue
+			}
+			break
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+		emitLiteral(dst, src[nextEmit:s])
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+
+			// Extend the 4-byte match as long as possible.
+			//
+			s += 4
+			t := candidate.offset - e.cur + 4
+			l := e.matchlen(s, t, src)
+
+			// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
+			dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
+			dst.n++
+			s += l
+			nextEmit = s
+			if s >= sLimit {
+				t += l
+				// Index first pair after match end.
+				if int(t+4) < len(src) && t > 0 {
+					cv := load3232(src, t)
+					e.table[hash(cv)&tableMask] = tableEntry{offset: t + e.cur, val: cv}
+				}
+				goto emitRemainder
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-1 and at s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load6432(src, s-1)
+			prevHash := hash(uint32(x))
+			e.table[prevHash&tableMask] = tableEntry{offset: e.cur + s - 1, val: uint32(x)}
+			x >>= 8
+			currHash := hash(uint32(x))
+			candidate = e.table[currHash&tableMask]
+			e.table[currHash&tableMask] = tableEntry{offset: e.cur + s, val: uint32(x)}
+
+			offset := s - (candidate.offset - e.cur)
+			if offset > maxMatchOffset || uint32(x) != candidate.val {
+				cv = uint32(x >> 8)
+				nextHash = hash(cv)
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		emitLiteral(dst, src[nextEmit:])
+	}
+	e.cur += int32(len(src))
+	e.prev = e.prev[:len(src)]
+	copy(e.prev, src)
+}
+
+type tableEntryPrev struct {
+	Cur  tableEntry
+	Prev tableEntry
+}
+
+// snappyL3
+type snappyL3 struct {
+	snappyGen
+	table [tableSize]tableEntryPrev
+}
+
+// Encode uses a similar algorithm to level 2, will check up to two candidates.
+func (e *snappyL3) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 8 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+	)
+
+	// Protect against e.cur wraparound.
+	if e.cur > 1<<30 {
+		for i := range e.table[:] {
+			e.table[i] = tableEntryPrev{}
+		}
+		e.snappyGen = snappyGen{cur: maxStoreBlockSize, prev: e.prev[:0]}
+	}
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		e.cur += maxStoreBlockSize
+		e.prev = e.prev[:0]
+		return
+	}
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := int32(0)
+	s := int32(0)
+	cv := load3232(src, s)
+	nextHash := hash(cv)
+
+	for {
+		// Copied from the C++ snappy implementation:
+		//
+		// Heuristic match skipping: If 32 bytes are scanned with no matches
+		// found, start looking only at every other byte. If 32 more bytes are
+		// scanned (or skipped), look at every third byte, etc.. When a match
+		// is found, immediately go back to looking at every byte. This is a
+		// small loss (~5% performance, ~0.1% density) for compressible data
+		// due to more bookkeeping, but for non-compressible data (such as
+		// JPEG) it's a huge win since the compressor quickly "realizes" the
+		// data is incompressible and doesn't bother looking for matches
+		// everywhere.
+		//
+		// The "skip" variable keeps track of how many bytes there are since
+		// the last match; dividing it by 32 (ie. right-shifting by five) gives
+		// the number of bytes to move ahead for each iteration.
+		skip := int32(32)
+
+		nextS := s
+		var candidate tableEntry
+		for {
+			s = nextS
+			bytesBetweenHashLookups := skip >> 5
+			nextS = s + bytesBetweenHashLookups
+			skip += bytesBetweenHashLookups
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			candidates := e.table[nextHash&tableMask]
+			now := load3232(src, nextS)
+			e.table[nextHash&tableMask] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
+			nextHash = hash(now)
+
+			// Check both candidates
+			candidate = candidates.Cur
+			if cv == candidate.val {
+				offset := s - (candidate.offset - e.cur)
+				if offset <= maxMatchOffset {
+					break
+				}
+			} else {
+				// We only check if value mismatches.
+				// Offset will always be invalid in other cases.
+				candidate = candidates.Prev
+				if cv == candidate.val {
+					offset := s - (candidate.offset - e.cur)
+					if offset <= maxMatchOffset {
+						break
+					}
+				}
+			}
+			cv = now
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+		emitLiteral(dst, src[nextEmit:s])
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+
+			// Extend the 4-byte match as long as possible.
+			//
+			s += 4
+			t := candidate.offset - e.cur + 4
+			l := e.matchlen(s, t, src)
+
+			// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
+			dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
+			dst.n++
+			s += l
+			nextEmit = s
+			if s >= sLimit {
+				t += l
+				// Index first pair after match end.
+				if int(t+4) < len(src) && t > 0 {
+					cv := load3232(src, t)
+					nextHash = hash(cv)
+					e.table[nextHash&tableMask] = tableEntryPrev{
+						Prev: e.table[nextHash&tableMask].Cur,
+						Cur:  tableEntry{offset: e.cur + t, val: cv},
+					}
+				}
+				goto emitRemainder
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-3 to s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load6432(src, s-3)
+			prevHash := hash(uint32(x))
+			e.table[prevHash&tableMask] = tableEntryPrev{
+				Prev: e.table[prevHash&tableMask].Cur,
+				Cur:  tableEntry{offset: e.cur + s - 3, val: uint32(x)},
+			}
+			x >>= 8
+			prevHash = hash(uint32(x))
+
+			e.table[prevHash&tableMask] = tableEntryPrev{
+				Prev: e.table[prevHash&tableMask].Cur,
+				Cur:  tableEntry{offset: e.cur + s - 2, val: uint32(x)},
+			}
+			x >>= 8
+			prevHash = hash(uint32(x))
+
+			e.table[prevHash&tableMask] = tableEntryPrev{
+				Prev: e.table[prevHash&tableMask].Cur,
+				Cur:  tableEntry{offset: e.cur + s - 1, val: uint32(x)},
+			}
+			x >>= 8
+			currHash := hash(uint32(x))
+			candidates := e.table[currHash&tableMask]
+			cv = uint32(x)
+			e.table[currHash&tableMask] = tableEntryPrev{
+				Prev: candidates.Cur,
+				Cur:  tableEntry{offset: s + e.cur, val: cv},
+			}
+
+			// Check both candidates
+			candidate = candidates.Cur
+			if cv == candidate.val {
+				offset := s - (candidate.offset - e.cur)
+				if offset <= maxMatchOffset {
+					continue
+				}
+			} else {
+				// We only check if value mismatches.
+				// Offset will always be invalid in other cases.
+				candidate = candidates.Prev
+				if cv == candidate.val {
+					offset := s - (candidate.offset - e.cur)
+					if offset <= maxMatchOffset {
+						continue
+					}
+				}
+			}
+			cv = uint32(x >> 8)
+			nextHash = hash(cv)
+			s++
+			break
+		}
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		emitLiteral(dst, src[nextEmit:])
+	}
+	e.cur += int32(len(src))
+	e.prev = e.prev[:len(src)]
+	copy(e.prev, src)
+}
+
+// snappyL4
+type snappyL4 struct {
+	snappyL3
+}
+
+// Encode uses a similar algorithm to level 3,
+// but will check up to two candidates if first isn't long enough.
+func (e *snappyL4) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 8 - 3
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		matchLenGood           = 12
+	)
+
+	// Protect against e.cur wraparound.
+	if e.cur > 1<<30 {
+		for i := range e.table[:] {
+			e.table[i] = tableEntryPrev{}
+		}
+		e.snappyGen = snappyGen{cur: maxStoreBlockSize, prev: e.prev[:0]}
+	}
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		e.cur += maxStoreBlockSize
+		e.prev = e.prev[:0]
+		return
+	}
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := int32(0)
+	s := int32(0)
+	cv := load3232(src, s)
+	nextHash := hash(cv)
+
+	for {
+		// Copied from the C++ snappy implementation:
+		//
+		// Heuristic match skipping: If 32 bytes are scanned with no matches
+		// found, start looking only at every other byte. If 32 more bytes are
+		// scanned (or skipped), look at every third byte, etc.. When a match
+		// is found, immediately go back to looking at every byte. This is a
+		// small loss (~5% performance, ~0.1% density) for compressible data
+		// due to more bookkeeping, but for non-compressible data (such as
+		// JPEG) it's a huge win since the compressor quickly "realizes" the
+		// data is incompressible and doesn't bother looking for matches
+		// everywhere.
+		//
+		// The "skip" variable keeps track of how many bytes there are since
+		// the last match; dividing it by 32 (ie. right-shifting by five) gives
+		// the number of bytes to move ahead for each iteration.
+		skip := int32(32)
+
+		nextS := s
+		var candidate tableEntry
+		var candidateAlt tableEntry
+		for {
+			s = nextS
+			bytesBetweenHashLookups := skip >> 5
+			nextS = s + bytesBetweenHashLookups
+			skip += bytesBetweenHashLookups
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			candidates := e.table[nextHash&tableMask]
+			now := load3232(src, nextS)
+			e.table[nextHash&tableMask] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
+			nextHash = hash(now)
+
+			// Check both candidates
+			candidate = candidates.Cur
+			if cv == candidate.val {
+				offset := s - (candidate.offset - e.cur)
+				if offset < maxMatchOffset {
+					offset = s - (candidates.Prev.offset - e.cur)
+					if cv == candidates.Prev.val && offset < maxMatchOffset {
+						candidateAlt = candidates.Prev
+					}
+					break
+				}
+			} else {
+				// We only check if value mismatches.
+				// Offset will always be invalid in other cases.
+				candidate = candidates.Prev
+				if cv == candidate.val {
+					offset := s - (candidate.offset - e.cur)
+					if offset < maxMatchOffset {
+						break
+					}
+				}
+			}
+			cv = now
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+		emitLiteral(dst, src[nextEmit:s])
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+
+			// Extend the 4-byte match as long as possible.
+			//
+			s += 4
+			t := candidate.offset - e.cur + 4
+			l := e.matchlen(s, t, src)
+			// Try alternative candidate if match length < matchLenGood.
+			if l < matchLenGood-4 && candidateAlt.offset != 0 {
+				t2 := candidateAlt.offset - e.cur + 4
+				l2 := e.matchlen(s, t2, src)
+				if l2 > l {
+					l = l2
+					t = t2
+				}
+			}
+			// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
+			dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
+			dst.n++
+			s += l
+			nextEmit = s
+			if s >= sLimit {
+				t += l
+				// Index first pair after match end.
+				if int(t+4) < len(src) && t > 0 {
+					cv := load3232(src, t)
+					nextHash = hash(cv)
+					e.table[nextHash&tableMask] = tableEntryPrev{
+						Prev: e.table[nextHash&tableMask].Cur,
+						Cur:  tableEntry{offset: e.cur + t, val: cv},
+					}
+				}
+				goto emitRemainder
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-3 to s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load6432(src, s-3)
+			prevHash := hash(uint32(x))
+			e.table[prevHash&tableMask] = tableEntryPrev{
+				Prev: e.table[prevHash&tableMask].Cur,
+				Cur:  tableEntry{offset: e.cur + s - 3, val: uint32(x)},
+			}
+			x >>= 8
+			prevHash = hash(uint32(x))
+
+			e.table[prevHash&tableMask] = tableEntryPrev{
+				Prev: e.table[prevHash&tableMask].Cur,
+				Cur:  tableEntry{offset: e.cur + s - 2, val: uint32(x)},
+			}
+			x >>= 8
+			prevHash = hash(uint32(x))
+
+			e.table[prevHash&tableMask] = tableEntryPrev{
+				Prev: e.table[prevHash&tableMask].Cur,
+				Cur:  tableEntry{offset: e.cur + s - 1, val: uint32(x)},
+			}
+			x >>= 8
+			currHash := hash(uint32(x))
+			candidates := e.table[currHash&tableMask]
+			cv = uint32(x)
+			e.table[currHash&tableMask] = tableEntryPrev{
+				Prev: candidates.Cur,
+				Cur:  tableEntry{offset: s + e.cur, val: cv},
+			}
+
+			// Check both candidates
+			candidate = candidates.Cur
+			candidateAlt = tableEntry{}
+			if cv == candidate.val {
+				offset := s - (candidate.offset - e.cur)
+				if offset <= maxMatchOffset {
+					offset = s - (candidates.Prev.offset - e.cur)
+					if cv == candidates.Prev.val && offset <= maxMatchOffset {
+						candidateAlt = candidates.Prev
+					}
+					continue
+				}
+			} else {
+				// We only check if value mismatches.
+				// Offset will always be invalid in other cases.
+				candidate = candidates.Prev
+				if cv == candidate.val {
+					offset := s - (candidate.offset - e.cur)
+					if offset <= maxMatchOffset {
+						continue
+					}
+				}
+			}
+			cv = uint32(x >> 8)
+			nextHash = hash(cv)
+			s++
+			break
+		}
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		emitLiteral(dst, src[nextEmit:])
+	}
+	e.cur += int32(len(src))
+	e.prev = e.prev[:len(src)]
+	copy(e.prev, src)
+}
+
+func (e *snappyGen) matchlen(s, t int32, src []byte) int32 {
+	s1 := int(s) + maxMatchLength - 4
+	if s1 > len(src) {
+		s1 = len(src)
+	}
+
+	// If we are inside the current block
+	if t >= 0 {
+		b := src[t:]
+		a := src[s:s1]
+		b = b[:len(a)]
+		// Extend the match to be as long as possible.
+		for i := range a {
+			if a[i] != b[i] {
+				return int32(i)
+			}
+		}
+		return int32(len(a))
+	}
+
+	// We found a match in the previous block.
+	tp := int32(len(e.prev)) + t
+	if tp < 0 {
+		return 0
+	}
+
+	// Extend the match to be as long as possible.
+	a := src[s:s1]
+	b := e.prev[tp:]
+	if len(b) > len(a) {
+		b = b[:len(a)]
+	}
+	a = a[:len(b)]
+	for i := range b {
+		if a[i] != b[i] {
+			return int32(i)
+		}
+	}
+
+	// If we reached our limit, we matched everything we are
+	// allowed to in the previous block and we return.
+	n := int32(len(b))
+	if int(s+n) == s1 {
+		return n
+	}
+
+	// Continue looking for more matches in the current block.
+	a = src[s+n : s1]
+	b = src[:len(a)]
+	for i := range a {
+		if a[i] != b[i] {
+			return int32(i) + n
+		}
+	}
+	return int32(len(a)) + n
+}
+
+// Reset the encoding table.
+func (e *snappyGen) Reset() {
+	e.prev = e.prev[:0]
+	e.cur += maxMatchOffset
+}
diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go
new file mode 100644
index 00000000000..4f275ea61df
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/token.go
@@ -0,0 +1,115 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import "fmt"
+
+const (
+	// 2 bits:   type   0 = literal  1=EOF  2=Match   3=Unused
+	// 8 bits:   xlength = length - MIN_MATCH_LENGTH
+	// 22 bits   xoffset = offset - MIN_OFFSET_SIZE, or literal
+	lengthShift = 22
+	offsetMask  = 1<<lengthShift - 1
+	typeMask    = 3 << 30
+	literalType = 0 << 30
+	matchType   = 1 << 30
+)
+
+// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
+// is lengthCodes[length - MIN_MATCH_LENGTH]
+var lengthCodes = [...]uint32{
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
+	9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
+	13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
+	15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+	17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
+	18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+	19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
+	20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+	21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+	21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+	22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+	22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+	23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
+	24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+	24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+	24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 28,
+}
+
+var offsetCodes = [...]uint32{
+	0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+	8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
+	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+	13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+	13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+}
+
+type token uint32
+
+type tokens struct {
+	tokens [maxStoreBlockSize + 1]token
+	n      uint16 // Must be able to contain maxStoreBlockSize
+}
+
+// Convert a literal into a literal token.
+func literalToken(literal uint32) token { return token(literalType + literal) }
+
+// Convert a < xlength, xoffset > pair into a match token.
+func matchToken(xlength uint32, xoffset uint32) token {
+	return token(matchType + xlength<<lengthShift + xoffset)
+}
+
+func matchTokend(xlength uint32, xoffset uint32) token {
+	if xlength > maxMatchLength || xoffset > maxMatchOffset {
+		panic(fmt.Sprintf("Invalid match: len: %d, offset: %d\n", xlength, xoffset))
+		return token(matchType)
+	}
+	return token(matchType + xlength<<lengthShift + xoffset)
+}
+
+// Returns the type of a token
+func (t token) typ() uint32 { return uint32(t) & typeMask }
+
+// Returns the literal of a literal token
+func (t token) literal() uint32 { return uint32(t - literalType) }
+
+// Returns the extra offset of a match token
+func (t token) offset() uint32 { return uint32(t) & offsetMask }
+
+func (t token) length() uint32 { return uint32((t - matchType) >> lengthShift) }
+
+func lengthCode(len uint32) uint32 { return lengthCodes[len] }
+
+// Returns the offset code corresponding to a specific offset
+func offsetCode(off uint32) uint32 {
+	if off < uint32(len(offsetCodes)) {
+		return offsetCodes[off]
+	} else if off>>7 < uint32(len(offsetCodes)) {
+		return offsetCodes[off>>7] + 14
+	} else {
+		return offsetCodes[off>>14] + 28
+	}
+}
diff --git a/vendor/github.com/klauspost/cpuid/LICENSE b/vendor/github.com/klauspost/cpuid/LICENSE
new file mode 100644
index 00000000000..5cec7ee949b
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Klaus Post
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/vendor/github.com/klauspost/cpuid/README.md b/vendor/github.com/klauspost/cpuid/README.md
new file mode 100644
index 00000000000..b2b6bee879a
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/README.md
@@ -0,0 +1,145 @@
+# cpuid
+Package cpuid provides information about the CPU running the current program.
+
+CPU features are detected on startup, and kept for fast access through the life of the application.
+Currently x86 / x64 (AMD64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
+
+You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+
+Package home: https://github.com/klauspost/cpuid
+
+[![GoDoc][1]][2] [![Build Status][3]][4]
+
+[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
+[2]: https://godoc.org/github.com/klauspost/cpuid
+[3]: https://travis-ci.org/klauspost/cpuid.svg
+[4]: https://travis-ci.org/klauspost/cpuid
+
+# features
+## CPU Instructions
+*  **CMOV** (i686 CMOV)
+*  **NX** (NX (No-Execute) bit)
+*  **AMD3DNOW** (AMD 3DNOW)
+*  **AMD3DNOWEXT** (AMD 3DNowExt)
+*  **MMX** (standard MMX)
+*  **MMXEXT** (SSE integer functions or AMD MMX ext)
+*  **SSE** (SSE functions)
+*  **SSE2** (P4 SSE functions)
+*  **SSE3** (Prescott SSE3 functions)
+*  **SSSE3** (Conroe SSSE3 functions)
+*  **SSE4** (Penryn SSE4.1 functions)
+*  **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
+*  **SSE42** (Nehalem SSE4.2 functions)
+*  **AVX** (AVX functions)
+*  **AVX2** (AVX2 functions)
+*  **FMA3** (Intel FMA 3)
+*  **FMA4** (Bulldozer FMA4 functions)
+*  **XOP** (Bulldozer XOP functions)
+*  **F16C** (Half-precision floating-point conversion)
+*  **BMI1** (Bit Manipulation Instruction Set 1)
+*  **BMI2** (Bit Manipulation Instruction Set 2)
+*  **TBM** (AMD Trailing Bit Manipulation)
+*  **LZCNT** (LZCNT instruction)
+*  **POPCNT** (POPCNT instruction)
+*  **AESNI** (Advanced Encryption Standard New Instructions)
+*  **CLMUL** (Carry-less Multiplication)
+*  **HTT** (Hyperthreading (enabled))
+*  **HLE** (Hardware Lock Elision)
+*  **RTM** (Restricted Transactional Memory)
+*  **RDRAND** (RDRAND instruction is available)
+*  **RDSEED** (RDSEED instruction is available)
+*  **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
+*  **SHA** (Intel SHA Extensions)
+*  **AVX512F** (AVX-512 Foundation)
+*  **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
+*  **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
+*  **AVX512PF** (AVX-512 Prefetch Instructions)
+*  **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
+*  **AVX512CD** (AVX-512 Conflict Detection Instructions)
+*  **AVX512BW** (AVX-512 Byte and Word Instructions)
+*  **AVX512VL** (AVX-512 Vector Length Extensions)
+*  **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
+*  **MPX** (Intel MPX (Memory Protection Extensions))
+*  **ERMS** (Enhanced REP MOVSB/STOSB)
+*  **RDTSCP** (RDTSCP Instruction)
+*  **CX16** (CMPXCHG16B Instruction)
+*  **SGX** (Software Guard Extensions, with activation details)
+
+## Performance
+*  **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
+*  **SSE2SLOW** (SSE2 is supported, but usually not faster)
+*  **SSE3SLOW** (SSE3 is supported, but usually not faster)
+*  **ATOM** (Atom processor, some SSSE3 instructions are slower)
+*  **Cache line** (Probable size of a cache line).
+*  **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
+
+## Cpu Vendor/VM
+* **Intel**
+* **AMD**
+* **VIA**
+* **Transmeta**
+* **NSC**
+* **KVM**  (Kernel-based Virtual Machine)
+* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
+* **VMware**
+* **XenHVM**
+
+# installing
+
+```go get github.com/klauspost/cpuid```
+
+# example
+
+```Go
+package main
+
+import (
+	"fmt"
+	"github.com/klauspost/cpuid"
+)
+
+func main() {
+	// Print basic CPU information:
+	fmt.Println("Name:", cpuid.CPU.BrandName)
+	fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
+	fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
+	fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
+	fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
+	fmt.Println("Features:", cpuid.CPU.Features)
+	fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
+	fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
+	fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
+	fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
+	fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
+
+	// Test if we have a specific feature:
+	if cpuid.CPU.SSE() {
+		fmt.Println("We have Streaming SIMD Extensions")
+	}
+}
+```
+
+Sample output:
+```
+>go run main.go
+Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
+PhysicalCores: 2
+ThreadsPerCore: 2
+LogicalCores: 4
+Family 6 Model: 42
+Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
+Cacheline bytes: 64
+We have Streaming SIMD Extensions
+```
+
+# private package
+
+In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
+
+For this purpose all exports are removed, and functions and constants are lowercased.
+
+This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
+
+# license
+
+This code is published under an MIT license. See LICENSE file for more information.
diff --git a/vendor/github.com/klauspost/cpuid/cpuid.go b/vendor/github.com/klauspost/cpuid/cpuid.go
new file mode 100644
index 00000000000..60c681bed24
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/cpuid.go
@@ -0,0 +1,1040 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// Package cpuid provides information about the CPU running the current program.
+//
+// CPU features are detected on startup, and kept for fast access through the life of the application.
+// Currently x86 / x64 (AMD64) is supported.
+//
+// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+//
+// Package home: https://github.com/klauspost/cpuid
+package cpuid
+
+import "strings"
+
+// Vendor is a representation of a CPU vendor.
+type Vendor int
+
+const (
+	Other Vendor = iota
+	Intel
+	AMD
+	VIA
+	Transmeta
+	NSC
+	KVM  // Kernel-based Virtual Machine
+	MSVM // Microsoft Hyper-V or Windows Virtual PC
+	VMware
+	XenHVM
+)
+
+const (
+	CMOV        = 1 << iota // i686 CMOV
+	NX                      // NX (No-Execute) bit
+	AMD3DNOW                // AMD 3DNOW
+	AMD3DNOWEXT             // AMD 3DNowExt
+	MMX                     // standard MMX
+	MMXEXT                  // SSE integer functions or AMD MMX ext
+	SSE                     // SSE functions
+	SSE2                    // P4 SSE functions
+	SSE3                    // Prescott SSE3 functions
+	SSSE3                   // Conroe SSSE3 functions
+	SSE4                    // Penryn SSE4.1 functions
+	SSE4A                   // AMD Barcelona microarchitecture SSE4a instructions
+	SSE42                   // Nehalem SSE4.2 functions
+	AVX                     // AVX functions
+	AVX2                    // AVX2 functions
+	FMA3                    // Intel FMA 3
+	FMA4                    // Bulldozer FMA4 functions
+	XOP                     // Bulldozer XOP functions
+	F16C                    // Half-precision floating-point conversion
+	BMI1                    // Bit Manipulation Instruction Set 1
+	BMI2                    // Bit Manipulation Instruction Set 2
+	TBM                     // AMD Trailing Bit Manipulation
+	LZCNT                   // LZCNT instruction
+	POPCNT                  // POPCNT instruction
+	AESNI                   // Advanced Encryption Standard New Instructions
+	CLMUL                   // Carry-less Multiplication
+	HTT                     // Hyperthreading (enabled)
+	HLE                     // Hardware Lock Elision
+	RTM                     // Restricted Transactional Memory
+	RDRAND                  // RDRAND instruction is available
+	RDSEED                  // RDSEED instruction is available
+	ADX                     // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+	SHA                     // Intel SHA Extensions
+	AVX512F                 // AVX-512 Foundation
+	AVX512DQ                // AVX-512 Doubleword and Quadword Instructions
+	AVX512IFMA              // AVX-512 Integer Fused Multiply-Add Instructions
+	AVX512PF                // AVX-512 Prefetch Instructions
+	AVX512ER                // AVX-512 Exponential and Reciprocal Instructions
+	AVX512CD                // AVX-512 Conflict Detection Instructions
+	AVX512BW                // AVX-512 Byte and Word Instructions
+	AVX512VL                // AVX-512 Vector Length Extensions
+	AVX512VBMI              // AVX-512 Vector Bit Manipulation Instructions
+	MPX                     // Intel MPX (Memory Protection Extensions)
+	ERMS                    // Enhanced REP MOVSB/STOSB
+	RDTSCP                  // RDTSCP Instruction
+	CX16                    // CMPXCHG16B Instruction
+	SGX                     // Software Guard Extensions
+	IBPB                    // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+	STIBP                   // Single Thread Indirect Branch Predictors
+
+	// Performance indicators
+	SSE2SLOW // SSE2 is supported, but usually not faster
+	SSE3SLOW // SSE3 is supported, but usually not faster
+	ATOM     // Atom processor, some SSSE3 instructions are slower
+)
+
+var flagNames = map[Flags]string{
+	CMOV:        "CMOV",        // i686 CMOV
+	NX:          "NX",          // NX (No-Execute) bit
+	AMD3DNOW:    "AMD3DNOW",    // AMD 3DNOW
+	AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
+	MMX:         "MMX",         // Standard MMX
+	MMXEXT:      "MMXEXT",      // SSE integer functions or AMD MMX ext
+	SSE:         "SSE",         // SSE functions
+	SSE2:        "SSE2",        // P4 SSE2 functions
+	SSE3:        "SSE3",        // Prescott SSE3 functions
+	SSSE3:       "SSSE3",       // Conroe SSSE3 functions
+	SSE4:        "SSE4.1",      // Penryn SSE4.1 functions
+	SSE4A:       "SSE4A",       // AMD Barcelona microarchitecture SSE4a instructions
+	SSE42:       "SSE4.2",      // Nehalem SSE4.2 functions
+	AVX:         "AVX",         // AVX functions
+	AVX2:        "AVX2",        // AVX functions
+	FMA3:        "FMA3",        // Intel FMA 3
+	FMA4:        "FMA4",        // Bulldozer FMA4 functions
+	XOP:         "XOP",         // Bulldozer XOP functions
+	F16C:        "F16C",        // Half-precision floating-point conversion
+	BMI1:        "BMI1",        // Bit Manipulation Instruction Set 1
+	BMI2:        "BMI2",        // Bit Manipulation Instruction Set 2
+	TBM:         "TBM",         // AMD Trailing Bit Manipulation
+	LZCNT:       "LZCNT",       // LZCNT instruction
+	POPCNT:      "POPCNT",      // POPCNT instruction
+	AESNI:       "AESNI",       // Advanced Encryption Standard New Instructions
+	CLMUL:       "CLMUL",       // Carry-less Multiplication
+	HTT:         "HTT",         // Hyperthreading (enabled)
+	HLE:         "HLE",         // Hardware Lock Elision
+	RTM:         "RTM",         // Restricted Transactional Memory
+	RDRAND:      "RDRAND",      // RDRAND instruction is available
+	RDSEED:      "RDSEED",      // RDSEED instruction is available
+	ADX:         "ADX",         // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+	SHA:         "SHA",         // Intel SHA Extensions
+	AVX512F:     "AVX512F",     // AVX-512 Foundation
+	AVX512DQ:    "AVX512DQ",    // AVX-512 Doubleword and Quadword Instructions
+	AVX512IFMA:  "AVX512IFMA",  // AVX-512 Integer Fused Multiply-Add Instructions
+	AVX512PF:    "AVX512PF",    // AVX-512 Prefetch Instructions
+	AVX512ER:    "AVX512ER",    // AVX-512 Exponential and Reciprocal Instructions
+	AVX512CD:    "AVX512CD",    // AVX-512 Conflict Detection Instructions
+	AVX512BW:    "AVX512BW",    // AVX-512 Byte and Word Instructions
+	AVX512VL:    "AVX512VL",    // AVX-512 Vector Length Extensions
+	AVX512VBMI:  "AVX512VBMI",  // AVX-512 Vector Bit Manipulation Instructions
+	MPX:         "MPX",         // Intel MPX (Memory Protection Extensions)
+	ERMS:        "ERMS",        // Enhanced REP MOVSB/STOSB
+	RDTSCP:      "RDTSCP",      // RDTSCP Instruction
+	CX16:        "CX16",        // CMPXCHG16B Instruction
+	SGX:         "SGX",         // Software Guard Extensions
+	IBPB:        "IBPB",        // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
+	STIBP:       "STIBP",       // Single Thread Indirect Branch Predictors
+
+	// Performance indicators
+	SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
+	SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
+	ATOM:     "ATOM",     // Atom processor, some SSSE3 instructions are slower
+
+}
+
+// CPUInfo contains information about the detected system CPU.
+type CPUInfo struct {
+	BrandName      string // Brand name reported by the CPU
+	VendorID       Vendor // Comparable CPU vendor ID
+	Features       Flags  // Features of the CPU
+	PhysicalCores  int    // Number of physical processor cores in your CPU. Will be 0 if undetectable.
+	ThreadsPerCore int    // Number of threads per physical core. Will be 1 if undetectable.
+	LogicalCores   int    // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
+	Family         int    // CPU family number
+	Model          int    // CPU model number
+	CacheLine      int    // Cache line size in bytes. Will be 0 if undetectable.
+	Cache          struct {
+		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
+		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
+		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
+		L3  int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
+	}
+	SGX       SGXSupport
+	maxFunc   uint32
+	maxExFunc uint32
+}
+
+var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
+var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+var xgetbv func(index uint32) (eax, edx uint32)
+var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
+
+// CPU contains information about the CPU as detected on startup,
+// or when Detect last was called.
+//
+// Use this as the primary entry point to you data,
+// this way queries are
+var CPU CPUInfo
+
+func init() {
+	initCPU()
+	Detect()
+}
+
+// Detect will re-detect current CPU info.
+// This will replace the content of the exported CPU variable.
+//
+// Unless you expect the CPU to change while you are running your program
+// you should not need to call this function.
+// If you call this, you must ensure that no other goroutine is accessing the
+// exported CPU variable.
+func Detect() {
+	CPU.maxFunc = maxFunctionID()
+	CPU.maxExFunc = maxExtendedFunction()
+	CPU.BrandName = brandName()
+	CPU.CacheLine = cacheLine()
+	CPU.Family, CPU.Model = familyModel()
+	CPU.Features = support()
+	CPU.SGX = hasSGX(CPU.Features&SGX != 0)
+	CPU.ThreadsPerCore = threadsPerCore()
+	CPU.LogicalCores = logicalCores()
+	CPU.PhysicalCores = physicalCores()
+	CPU.VendorID = vendorID()
+	CPU.cacheSize()
+}
+
+// Generated here: http://play.golang.org/p/BxFH2Gdc0G
+
+// Cmov indicates support of CMOV instructions
+func (c CPUInfo) Cmov() bool {
+	return c.Features&CMOV != 0
+}
+
+// Amd3dnow indicates support of AMD 3DNOW! instructions
+func (c CPUInfo) Amd3dnow() bool {
+	return c.Features&AMD3DNOW != 0
+}
+
+// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
+func (c CPUInfo) Amd3dnowExt() bool {
+	return c.Features&AMD3DNOWEXT != 0
+}
+
+// MMX indicates support of MMX instructions
+func (c CPUInfo) MMX() bool {
+	return c.Features&MMX != 0
+}
+
+// MMXExt indicates support of MMXEXT instructions
+// (SSE integer functions or AMD MMX ext)
+func (c CPUInfo) MMXExt() bool {
+	return c.Features&MMXEXT != 0
+}
+
+// SSE indicates support of SSE instructions
+func (c CPUInfo) SSE() bool {
+	return c.Features&SSE != 0
+}
+
+// SSE2 indicates support of SSE 2 instructions
+func (c CPUInfo) SSE2() bool {
+	return c.Features&SSE2 != 0
+}
+
+// SSE3 indicates support of SSE 3 instructions
+func (c CPUInfo) SSE3() bool {
+	return c.Features&SSE3 != 0
+}
+
+// SSSE3 indicates support of SSSE 3 instructions
+func (c CPUInfo) SSSE3() bool {
+	return c.Features&SSSE3 != 0
+}
+
+// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
+func (c CPUInfo) SSE4() bool {
+	return c.Features&SSE4 != 0
+}
+
+// SSE42 indicates support of SSE4.2 instructions
+func (c CPUInfo) SSE42() bool {
+	return c.Features&SSE42 != 0
+}
+
+// AVX indicates support of AVX instructions
+// and operating system support of AVX instructions
+func (c CPUInfo) AVX() bool {
+	return c.Features&AVX != 0
+}
+
+// AVX2 indicates support of AVX2 instructions
+func (c CPUInfo) AVX2() bool {
+	return c.Features&AVX2 != 0
+}
+
+// FMA3 indicates support of FMA3 instructions
+func (c CPUInfo) FMA3() bool {
+	return c.Features&FMA3 != 0
+}
+
+// FMA4 indicates support of FMA4 instructions
+func (c CPUInfo) FMA4() bool {
+	return c.Features&FMA4 != 0
+}
+
+// XOP indicates support of XOP instructions
+func (c CPUInfo) XOP() bool {
+	return c.Features&XOP != 0
+}
+
+// F16C indicates support of F16C instructions
+func (c CPUInfo) F16C() bool {
+	return c.Features&F16C != 0
+}
+
+// BMI1 indicates support of BMI1 instructions
+func (c CPUInfo) BMI1() bool {
+	return c.Features&BMI1 != 0
+}
+
+// BMI2 indicates support of BMI2 instructions
+func (c CPUInfo) BMI2() bool {
+	return c.Features&BMI2 != 0
+}
+
+// TBM indicates support of TBM instructions
+// (AMD Trailing Bit Manipulation)
+func (c CPUInfo) TBM() bool {
+	return c.Features&TBM != 0
+}
+
+// Lzcnt indicates support of LZCNT instruction
+func (c CPUInfo) Lzcnt() bool {
+	return c.Features&LZCNT != 0
+}
+
+// Popcnt indicates support of POPCNT instruction
+func (c CPUInfo) Popcnt() bool {
+	return c.Features&POPCNT != 0
+}
+
+// HTT indicates the processor has Hyperthreading enabled
+func (c CPUInfo) HTT() bool {
+	return c.Features&HTT != 0
+}
+
+// SSE2Slow indicates that SSE2 may be slow on this processor
+func (c CPUInfo) SSE2Slow() bool {
+	return c.Features&SSE2SLOW != 0
+}
+
+// SSE3Slow indicates that SSE3 may be slow on this processor
+func (c CPUInfo) SSE3Slow() bool {
+	return c.Features&SSE3SLOW != 0
+}
+
+// AesNi indicates support of AES-NI instructions
+// (Advanced Encryption Standard New Instructions)
+func (c CPUInfo) AesNi() bool {
+	return c.Features&AESNI != 0
+}
+
+// Clmul indicates support of CLMUL instructions
+// (Carry-less Multiplication)
+func (c CPUInfo) Clmul() bool {
+	return c.Features&CLMUL != 0
+}
+
+// NX indicates support of NX (No-Execute) bit
+func (c CPUInfo) NX() bool {
+	return c.Features&NX != 0
+}
+
+// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
+func (c CPUInfo) SSE4A() bool {
+	return c.Features&SSE4A != 0
+}
+
+// HLE indicates support of Hardware Lock Elision
+func (c CPUInfo) HLE() bool {
+	return c.Features&HLE != 0
+}
+
+// RTM indicates support of Restricted Transactional Memory
+func (c CPUInfo) RTM() bool {
+	return c.Features&RTM != 0
+}
+
+// Rdrand indicates support of RDRAND instruction is available
+func (c CPUInfo) Rdrand() bool {
+	return c.Features&RDRAND != 0
+}
+
+// Rdseed indicates support of RDSEED instruction is available
+func (c CPUInfo) Rdseed() bool {
+	return c.Features&RDSEED != 0
+}
+
+// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+func (c CPUInfo) ADX() bool {
+	return c.Features&ADX != 0
+}
+
+// SHA indicates support of Intel SHA Extensions
+func (c CPUInfo) SHA() bool {
+	return c.Features&SHA != 0
+}
+
+// AVX512F indicates support of AVX-512 Foundation
+func (c CPUInfo) AVX512F() bool {
+	return c.Features&AVX512F != 0
+}
+
+// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
+func (c CPUInfo) AVX512DQ() bool {
+	return c.Features&AVX512DQ != 0
+}
+
+// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
+func (c CPUInfo) AVX512IFMA() bool {
+	return c.Features&AVX512IFMA != 0
+}
+
+// AVX512PF indicates support of AVX-512 Prefetch Instructions
+func (c CPUInfo) AVX512PF() bool {
+	return c.Features&AVX512PF != 0
+}
+
+// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
+func (c CPUInfo) AVX512ER() bool {
+	return c.Features&AVX512ER != 0
+}
+
+// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
+func (c CPUInfo) AVX512CD() bool {
+	return c.Features&AVX512CD != 0
+}
+
+// AVX512BW indicates support of AVX-512 Byte and Word Instructions
+func (c CPUInfo) AVX512BW() bool {
+	return c.Features&AVX512BW != 0
+}
+
+// AVX512VL indicates support of AVX-512 Vector Length Extensions
+func (c CPUInfo) AVX512VL() bool {
+	return c.Features&AVX512VL != 0
+}
+
+// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
+func (c CPUInfo) AVX512VBMI() bool {
+	return c.Features&AVX512VBMI != 0
+}
+
+// MPX indicates support of Intel MPX (Memory Protection Extensions)
+func (c CPUInfo) MPX() bool {
+	return c.Features&MPX != 0
+}
+
+// ERMS indicates support of Enhanced REP MOVSB/STOSB
+func (c CPUInfo) ERMS() bool {
+	return c.Features&ERMS != 0
+}
+
+// RDTSCP Instruction is available.
+func (c CPUInfo) RDTSCP() bool {
+	return c.Features&RDTSCP != 0
+}
+
+// CX16 indicates if CMPXCHG16B instruction is available.
+func (c CPUInfo) CX16() bool {
+	return c.Features&CX16 != 0
+}
+
+// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
+// So TSX simply checks that.
+func (c CPUInfo) TSX() bool {
+	return c.Features&(HLE|RTM) == HLE|RTM
+}
+
+// Atom indicates an Atom processor
+func (c CPUInfo) Atom() bool {
+	return c.Features&ATOM != 0
+}
+
+// Intel returns true if vendor is recognized as Intel
+func (c CPUInfo) Intel() bool {
+	return c.VendorID == Intel
+}
+
+// AMD returns true if vendor is recognized as AMD
+func (c CPUInfo) AMD() bool {
+	return c.VendorID == AMD
+}
+
+// Transmeta returns true if vendor is recognized as Transmeta
+func (c CPUInfo) Transmeta() bool {
+	return c.VendorID == Transmeta
+}
+
+// NSC returns true if vendor is recognized as National Semiconductor
+func (c CPUInfo) NSC() bool {
+	return c.VendorID == NSC
+}
+
+// VIA returns true if vendor is recognized as VIA
+func (c CPUInfo) VIA() bool {
+	return c.VendorID == VIA
+}
+
+// RTCounter returns the 64-bit time-stamp counter
+// Uses the RDTSCP instruction. The value 0 is returned
+// if the CPU does not support the instruction.
+func (c CPUInfo) RTCounter() uint64 {
+	if !c.RDTSCP() {
+		return 0
+	}
+	a, _, _, d := rdtscpAsm()
+	return uint64(a) | (uint64(d) << 32)
+}
+
+// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
+// This variable is OS dependent, but on Linux contains information
+// about the current cpu/core the code is running on.
+// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
+func (c CPUInfo) Ia32TscAux() uint32 {
+	if !c.RDTSCP() {
+		return 0
+	}
+	_, _, ecx, _ := rdtscpAsm()
+	return ecx
+}
+
+// LogicalCPU will return the Logical CPU the code is currently executing on.
+// This is likely to change when the OS re-schedules the running thread
+// to another CPU.
+// If the current core cannot be detected, -1 will be returned.
+func (c CPUInfo) LogicalCPU() int {
+	if c.maxFunc < 1 {
+		return -1
+	}
+	_, ebx, _, _ := cpuid(1)
+	return int(ebx >> 24)
+}
+
+// VM Will return true if the cpu id indicates we are in
+// a virtual machine. This is only a hint, and will very likely
+// have many false negatives.
+func (c CPUInfo) VM() bool {
+	switch c.VendorID {
+	case MSVM, KVM, VMware, XenHVM:
+		return true
+	}
+	return false
+}
+
+// Flags contains detected cpu features and caracteristics
+type Flags uint64
+
+// String returns a string representation of the detected
+// CPU features.
+func (f Flags) String() string {
+	return strings.Join(f.Strings(), ",")
+}
+
+// Strings returns and array of the detected features.
+func (f Flags) Strings() []string {
+	s := support()
+	r := make([]string, 0, 20)
+	for i := uint(0); i < 64; i++ {
+		key := Flags(1 << i)
+		val := flagNames[key]
+		if s&key != 0 {
+			r = append(r, val)
+		}
+	}
+	return r
+}
+
+func maxExtendedFunction() uint32 {
+	eax, _, _, _ := cpuid(0x80000000)
+	return eax
+}
+
+func maxFunctionID() uint32 {
+	a, _, _, _ := cpuid(0)
+	return a
+}
+
+func brandName() string {
+	if maxExtendedFunction() >= 0x80000004 {
+		v := make([]uint32, 0, 48)
+		for i := uint32(0); i < 3; i++ {
+			a, b, c, d := cpuid(0x80000002 + i)
+			v = append(v, a, b, c, d)
+		}
+		return strings.Trim(string(valAsString(v...)), " ")
+	}
+	return "unknown"
+}
+
+func threadsPerCore() int {
+	mfi := maxFunctionID()
+	if mfi < 0x4 || vendorID() != Intel {
+		return 1
+	}
+
+	if mfi < 0xb {
+		_, b, _, d := cpuid(1)
+		if (d & (1 << 28)) != 0 {
+			// v will contain logical core count
+			v := (b >> 16) & 255
+			if v > 1 {
+				a4, _, _, _ := cpuid(4)
+				// physical cores
+				v2 := (a4 >> 26) + 1
+				if v2 > 0 {
+					return int(v) / int(v2)
+				}
+			}
+		}
+		return 1
+	}
+	_, b, _, _ := cpuidex(0xb, 0)
+	if b&0xffff == 0 {
+		return 1
+	}
+	return int(b & 0xffff)
+}
+
+func logicalCores() int {
+	mfi := maxFunctionID()
+	switch vendorID() {
+	case Intel:
+		// Use this on old Intel processors
+		if mfi < 0xb {
+			if mfi < 1 {
+				return 0
+			}
+			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
+			// that can be assigned to logical processors in a physical package.
+			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
+			_, ebx, _, _ := cpuid(1)
+			logical := (ebx >> 16) & 0xff
+			return int(logical)
+		}
+		_, b, _, _ := cpuidex(0xb, 1)
+		return int(b & 0xffff)
+	case AMD:
+		_, b, _, _ := cpuid(1)
+		return int((b >> 16) & 0xff)
+	default:
+		return 0
+	}
+}
+
+func familyModel() (int, int) {
+	if maxFunctionID() < 0x1 {
+		return 0, 0
+	}
+	eax, _, _, _ := cpuid(1)
+	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
+	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
+	return int(family), int(model)
+}
+
+func physicalCores() int {
+	switch vendorID() {
+	case Intel:
+		return logicalCores() / threadsPerCore()
+	case AMD:
+		if maxExtendedFunction() >= 0x80000008 {
+			_, _, c, _ := cpuid(0x80000008)
+			return int(c&0xff) + 1
+		}
+	}
+	return 0
+}
+
+// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
+var vendorMapping = map[string]Vendor{
+	"AMDisbetter!": AMD,
+	"AuthenticAMD": AMD,
+	"CentaurHauls": VIA,
+	"GenuineIntel": Intel,
+	"TransmetaCPU": Transmeta,
+	"GenuineTMx86": Transmeta,
+	"Geode by NSC": NSC,
+	"VIA VIA VIA ": VIA,
+	"KVMKVMKVMKVM": KVM,
+	"Microsoft Hv": MSVM,
+	"VMwareVMware": VMware,
+	"XenVMMXenVMM": XenHVM,
+}
+
+func vendorID() Vendor {
+	_, b, c, d := cpuid(0)
+	v := valAsString(b, d, c)
+	vend, ok := vendorMapping[string(v)]
+	if !ok {
+		return Other
+	}
+	return vend
+}
+
+func cacheLine() int {
+	if maxFunctionID() < 0x1 {
+		return 0
+	}
+
+	_, ebx, _, _ := cpuid(1)
+	cache := (ebx & 0xff00) >> 5 // cflush size
+	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
+		_, _, ecx, _ := cpuid(0x80000006)
+		cache = ecx & 0xff // cacheline size
+	}
+	// TODO: Read from Cache and TLB Information
+	return int(cache)
+}
+
+func (c *CPUInfo) cacheSize() {
+	c.Cache.L1D = -1
+	c.Cache.L1I = -1
+	c.Cache.L2 = -1
+	c.Cache.L3 = -1
+	vendor := vendorID()
+	switch vendor {
+	case Intel:
+		if maxFunctionID() < 4 {
+			return
+		}
+		for i := uint32(0); ; i++ {
+			eax, ebx, ecx, _ := cpuidex(4, i)
+			cacheType := eax & 15
+			if cacheType == 0 {
+				break
+			}
+			cacheLevel := (eax >> 5) & 7
+			coherency := int(ebx&0xfff) + 1
+			partitions := int((ebx>>12)&0x3ff) + 1
+			associativity := int((ebx>>22)&0x3ff) + 1
+			sets := int(ecx) + 1
+			size := associativity * partitions * coherency * sets
+			switch cacheLevel {
+			case 1:
+				if cacheType == 1 {
+					// 1 = Data Cache
+					c.Cache.L1D = size
+				} else if cacheType == 2 {
+					// 2 = Instruction Cache
+					c.Cache.L1I = size
+				} else {
+					if c.Cache.L1D < 0 {
+						c.Cache.L1I = size
+					}
+					if c.Cache.L1I < 0 {
+						c.Cache.L1I = size
+					}
+				}
+			case 2:
+				c.Cache.L2 = size
+			case 3:
+				c.Cache.L3 = size
+			}
+		}
+	case AMD:
+		// Untested.
+		if maxExtendedFunction() < 0x80000005 {
+			return
+		}
+		_, _, ecx, edx := cpuid(0x80000005)
+		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
+		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
+
+		if maxExtendedFunction() < 0x80000006 {
+			return
+		}
+		_, _, ecx, _ = cpuid(0x80000006)
+		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
+	}
+
+	return
+}
+
+type SGXSupport struct {
+	Available           bool
+	SGX1Supported       bool
+	SGX2Supported       bool
+	MaxEnclaveSizeNot64 int64
+	MaxEnclaveSize64    int64
+}
+
+func hasSGX(available bool) (rval SGXSupport) {
+	rval.Available = available
+
+	if !available {
+		return
+	}
+
+	a, _, _, d := cpuidex(0x12, 0)
+	rval.SGX1Supported = a&0x01 != 0
+	rval.SGX2Supported = a&0x02 != 0
+	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
+	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
+
+	return
+}
+
+func support() Flags {
+	mfi := maxFunctionID()
+	vend := vendorID()
+	if mfi < 0x1 {
+		return 0
+	}
+	rval := uint64(0)
+	_, _, c, d := cpuid(1)
+	if (d & (1 << 15)) != 0 {
+		rval |= CMOV
+	}
+	if (d & (1 << 23)) != 0 {
+		rval |= MMX
+	}
+	if (d & (1 << 25)) != 0 {
+		rval |= MMXEXT
+	}
+	if (d & (1 << 25)) != 0 {
+		rval |= SSE
+	}
+	if (d & (1 << 26)) != 0 {
+		rval |= SSE2
+	}
+	if (c & 1) != 0 {
+		rval |= SSE3
+	}
+	if (c & 0x00000200) != 0 {
+		rval |= SSSE3
+	}
+	if (c & 0x00080000) != 0 {
+		rval |= SSE4
+	}
+	if (c & 0x00100000) != 0 {
+		rval |= SSE42
+	}
+	if (c & (1 << 25)) != 0 {
+		rval |= AESNI
+	}
+	if (c & (1 << 1)) != 0 {
+		rval |= CLMUL
+	}
+	if c&(1<<23) != 0 {
+		rval |= POPCNT
+	}
+	if c&(1<<30) != 0 {
+		rval |= RDRAND
+	}
+	if c&(1<<29) != 0 {
+		rval |= F16C
+	}
+	if c&(1<<13) != 0 {
+		rval |= CX16
+	}
+	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
+		if threadsPerCore() > 1 {
+			rval |= HTT
+		}
+	}
+
+	// Check XGETBV, OXSAVE and AVX bits
+	if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
+		// Check for OS support
+		eax, _ := xgetbv(0)
+		if (eax & 0x6) == 0x6 {
+			rval |= AVX
+			if (c & 0x00001000) != 0 {
+				rval |= FMA3
+			}
+		}
+	}
+
+	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
+	if mfi >= 7 {
+		_, ebx, ecx, edx := cpuidex(7, 0)
+		if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
+			rval |= AVX2
+		}
+		if (ebx & 0x00000008) != 0 {
+			rval |= BMI1
+			if (ebx & 0x00000100) != 0 {
+				rval |= BMI2
+			}
+		}
+		if ebx&(1<<2) != 0 {
+			rval |= SGX
+		}
+		if ebx&(1<<4) != 0 {
+			rval |= HLE
+		}
+		if ebx&(1<<9) != 0 {
+			rval |= ERMS
+		}
+		if ebx&(1<<11) != 0 {
+			rval |= RTM
+		}
+		if ebx&(1<<14) != 0 {
+			rval |= MPX
+		}
+		if ebx&(1<<18) != 0 {
+			rval |= RDSEED
+		}
+		if ebx&(1<<19) != 0 {
+			rval |= ADX
+		}
+		if ebx&(1<<29) != 0 {
+			rval |= SHA
+		}
+		if edx&(1<<26) != 0 {
+			rval |= IBPB
+		}
+		if edx&(1<<27) != 0 {
+			rval |= STIBP
+		}
+
+		// Only detect AVX-512 features if XGETBV is supported
+		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
+			// Check for OS support
+			eax, _ := xgetbv(0)
+
+			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+			// ZMM16-ZMM31 state are enabled by OS)
+			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
+			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
+				if ebx&(1<<16) != 0 {
+					rval |= AVX512F
+				}
+				if ebx&(1<<17) != 0 {
+					rval |= AVX512DQ
+				}
+				if ebx&(1<<21) != 0 {
+					rval |= AVX512IFMA
+				}
+				if ebx&(1<<26) != 0 {
+					rval |= AVX512PF
+				}
+				if ebx&(1<<27) != 0 {
+					rval |= AVX512ER
+				}
+				if ebx&(1<<28) != 0 {
+					rval |= AVX512CD
+				}
+				if ebx&(1<<30) != 0 {
+					rval |= AVX512BW
+				}
+				if ebx&(1<<31) != 0 {
+					rval |= AVX512VL
+				}
+				// ecx
+				if ecx&(1<<1) != 0 {
+					rval |= AVX512VBMI
+				}
+			}
+		}
+	}
+
+	if maxExtendedFunction() >= 0x80000001 {
+		_, _, c, d := cpuid(0x80000001)
+		if (c & (1 << 5)) != 0 {
+			rval |= LZCNT
+			rval |= POPCNT
+		}
+		if (d & (1 << 31)) != 0 {
+			rval |= AMD3DNOW
+		}
+		if (d & (1 << 30)) != 0 {
+			rval |= AMD3DNOWEXT
+		}
+		if (d & (1 << 23)) != 0 {
+			rval |= MMX
+		}
+		if (d & (1 << 22)) != 0 {
+			rval |= MMXEXT
+		}
+		if (c & (1 << 6)) != 0 {
+			rval |= SSE4A
+		}
+		if d&(1<<20) != 0 {
+			rval |= NX
+		}
+		if d&(1<<27) != 0 {
+			rval |= RDTSCP
+		}
+
+		/* Allow for selectively disabling SSE2 functions on AMD processors
+		   with SSE2 support but not SSE4a. This includes Athlon64, some
+		   Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
+		   than SSE2 often enough to utilize this special-case flag.
+		   AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
+		   so that SSE2 is used unless explicitly disabled by checking
+		   AV_CPU_FLAG_SSE2SLOW. */
+		if vendorID() != Intel &&
+			rval&SSE2 != 0 && (c&0x00000040) == 0 {
+			rval |= SSE2SLOW
+		}
+
+		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
+		 * used unless the OS has AVX support. */
+		if (rval & AVX) != 0 {
+			if (c & 0x00000800) != 0 {
+				rval |= XOP
+			}
+			if (c & 0x00010000) != 0 {
+				rval |= FMA4
+			}
+		}
+
+		if vendorID() == Intel {
+			family, model := familyModel()
+			if family == 6 && (model == 9 || model == 13 || model == 14) {
+				/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
+				 * 6/14 (core1 "yonah") theoretically support sse2, but it's
+				 * usually slower than mmx. */
+				if (rval & SSE2) != 0 {
+					rval |= SSE2SLOW
+				}
+				if (rval & SSE3) != 0 {
+					rval |= SSE3SLOW
+				}
+			}
+			/* The Atom processor has SSSE3 support, which is useful in many cases,
+			 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
+			 * on the Atom, but is generally faster on other processors supporting
+			 * SSSE3. This flag allows for selectively disabling certain SSSE3
+			 * functions on the Atom. */
+			if family == 6 && model == 28 {
+				rval |= ATOM
+			}
+		}
+	}
+	return Flags(rval)
+}
+
+func valAsString(values ...uint32) []byte {
+	r := make([]byte, 4*len(values))
+	for i, v := range values {
+		dst := r[i*4:]
+		dst[0] = byte(v & 0xff)
+		dst[1] = byte((v >> 8) & 0xff)
+		dst[2] = byte((v >> 16) & 0xff)
+		dst[3] = byte((v >> 24) & 0xff)
+		switch {
+		case dst[0] == 0:
+			return r[:i*4]
+		case dst[1] == 0:
+			return r[:i*4+1]
+		case dst[2] == 0:
+			return r[:i*4+2]
+		case dst[3] == 0:
+			return r[:i*4+3]
+		}
+	}
+	return r
+}
diff --git a/vendor/github.com/klauspost/cpuid/cpuid_386.s b/vendor/github.com/klauspost/cpuid/cpuid_386.s
new file mode 100644
index 00000000000..4d731711e48
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/cpuid_386.s
@@ -0,0 +1,42 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// +build 386,!gccgo
+
+// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuid(SB), 7, $0
+	XORL CX, CX
+	MOVL op+0(FP), AX
+	CPUID
+	MOVL AX, eax+4(FP)
+	MOVL BX, ebx+8(FP)
+	MOVL CX, ecx+12(FP)
+	MOVL DX, edx+16(FP)
+	RET
+
+// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuidex(SB), 7, $0
+	MOVL op+0(FP), AX
+	MOVL op2+4(FP), CX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
+// func xgetbv(index uint32) (eax, edx uint32)
+TEXT ·asmXgetbv(SB), 7, $0
+	MOVL index+0(FP), CX
+	BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
+	MOVL AX, eax+4(FP)
+	MOVL DX, edx+8(FP)
+	RET
+
+// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+TEXT ·asmRdtscpAsm(SB), 7, $0
+	BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
+	MOVL AX, eax+0(FP)
+	MOVL BX, ebx+4(FP)
+	MOVL CX, ecx+8(FP)
+	MOVL DX, edx+12(FP)
+	RET
diff --git a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
new file mode 100644
index 00000000000..3c1d60e4221
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
@@ -0,0 +1,42 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build amd64,!gccgo
+
+// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuid(SB), 7, $0
+	XORQ CX, CX
+	MOVL op+0(FP), AX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
+// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuidex(SB), 7, $0
+	MOVL op+0(FP), AX
+	MOVL op2+4(FP), CX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
+// func asmXgetbv(index uint32) (eax, edx uint32)
+TEXT ·asmXgetbv(SB), 7, $0
+	MOVL index+0(FP), CX
+	BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
+	MOVL AX, eax+8(FP)
+	MOVL DX, edx+12(FP)
+	RET
+
+// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+TEXT ·asmRdtscpAsm(SB), 7, $0
+	BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
+	MOVL AX, eax+0(FP)
+	MOVL BX, ebx+4(FP)
+	MOVL CX, ecx+8(FP)
+	MOVL DX, edx+12(FP)
+	RET
diff --git a/vendor/github.com/klauspost/cpuid/detect_intel.go b/vendor/github.com/klauspost/cpuid/detect_intel.go
new file mode 100644
index 00000000000..a5f04dd6d0a
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/detect_intel.go
@@ -0,0 +1,17 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// +build 386,!gccgo amd64,!gccgo
+
+package cpuid
+
+func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+func asmXgetbv(index uint32) (eax, edx uint32)
+func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+
+func initCPU() {
+	cpuid = asmCpuid
+	cpuidex = asmCpuidex
+	xgetbv = asmXgetbv
+	rdtscpAsm = asmRdtscpAsm
+}
diff --git a/vendor/github.com/klauspost/cpuid/detect_ref.go b/vendor/github.com/klauspost/cpuid/detect_ref.go
new file mode 100644
index 00000000000..909c5d9a7ae
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/detect_ref.go
@@ -0,0 +1,23 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// +build !amd64,!386 gccgo
+
+package cpuid
+
+func initCPU() {
+	cpuid = func(op uint32) (eax, ebx, ecx, edx uint32) {
+		return 0, 0, 0, 0
+	}
+
+	cpuidex = func(op, op2 uint32) (eax, ebx, ecx, edx uint32) {
+		return 0, 0, 0, 0
+	}
+
+	xgetbv = func(index uint32) (eax, edx uint32) {
+		return 0, 0
+	}
+
+	rdtscpAsm = func() (eax, ebx, ecx, edx uint32) {
+		return 0, 0, 0, 0
+	}
+}
diff --git a/vendor/github.com/klauspost/cpuid/generate.go b/vendor/github.com/klauspost/cpuid/generate.go
new file mode 100644
index 00000000000..90e7a98d278
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/generate.go
@@ -0,0 +1,4 @@
+package cpuid
+
+//go:generate go run private-gen.go
+//go:generate gofmt -w ./private
diff --git a/vendor/github.com/klauspost/cpuid/private-gen.go b/vendor/github.com/klauspost/cpuid/private-gen.go
new file mode 100644
index 00000000000..437333d2922
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/private-gen.go
@@ -0,0 +1,476 @@
+// +build ignore
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"go/ast"
+	"go/parser"
+	"go/printer"
+	"go/token"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"reflect"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+var inFiles = []string{"cpuid.go", "cpuid_test.go"}
+var copyFiles = []string{"cpuid_amd64.s", "cpuid_386.s", "detect_ref.go", "detect_intel.go"}
+var fileSet = token.NewFileSet()
+var reWrites = []rewrite{
+	initRewrite("CPUInfo -> cpuInfo"),
+	initRewrite("Vendor -> vendor"),
+	initRewrite("Flags -> flags"),
+	initRewrite("Detect -> detect"),
+	initRewrite("CPU -> cpu"),
+}
+var excludeNames = map[string]bool{"string": true, "join": true, "trim": true,
+	// cpuid_test.go
+	"t": true, "println": true, "logf": true, "log": true, "fatalf": true, "fatal": true,
+}
+
+var excludePrefixes = []string{"test", "benchmark"}
+
+func main() {
+	Package := "private"
+	parserMode := parser.ParseComments
+	exported := make(map[string]rewrite)
+	for _, file := range inFiles {
+		in, err := os.Open(file)
+		if err != nil {
+			log.Fatalf("opening input", err)
+		}
+
+		src, err := ioutil.ReadAll(in)
+		if err != nil {
+			log.Fatalf("reading input", err)
+		}
+
+		astfile, err := parser.ParseFile(fileSet, file, src, parserMode)
+		if err != nil {
+			log.Fatalf("parsing input", err)
+		}
+
+		for _, rw := range reWrites {
+			astfile = rw(astfile)
+		}
+
+		// Inspect the AST and print all identifiers and literals.
+		var startDecl token.Pos
+		var endDecl token.Pos
+		ast.Inspect(astfile, func(n ast.Node) bool {
+			var s string
+			switch x := n.(type) {
+			case *ast.Ident:
+				if x.IsExported() {
+					t := strings.ToLower(x.Name)
+					for _, pre := range excludePrefixes {
+						if strings.HasPrefix(t, pre) {
+							return true
+						}
+					}
+					if excludeNames[t] != true {
+						//if x.Pos() > startDecl && x.Pos() < endDecl {
+						exported[x.Name] = initRewrite(x.Name + " -> " + t)
+					}
+				}
+
+			case *ast.GenDecl:
+				if x.Tok == token.CONST && x.Lparen > 0 {
+					startDecl = x.Lparen
+					endDecl = x.Rparen
+					// fmt.Printf("Decl:%s -> %s\n", fileSet.Position(startDecl), fileSet.Position(endDecl))
+				}
+			}
+			if s != "" {
+				fmt.Printf("%s:\t%s\n", fileSet.Position(n.Pos()), s)
+			}
+			return true
+		})
+
+		for _, rw := range exported {
+			astfile = rw(astfile)
+		}
+
+		var buf bytes.Buffer
+
+		printer.Fprint(&buf, fileSet, astfile)
+
+		// Remove package documentation and insert information
+		s := buf.String()
+		ind := strings.Index(buf.String(), "\npackage cpuid")
+		s = s[ind:]
+		s = "// Generated, DO NOT EDIT,\n" +
+			"// but copy it to your own project and rename the package.\n" +
+			"// See more at http://github.com/klauspost/cpuid\n" +
+			s
+
+		outputName := Package + string(os.PathSeparator) + file
+
+		err = ioutil.WriteFile(outputName, []byte(s), 0644)
+		if err != nil {
+			log.Fatalf("writing output: %s", err)
+		}
+		log.Println("Generated", outputName)
+	}
+
+	for _, file := range copyFiles {
+		dst := ""
+		if strings.HasPrefix(file, "cpuid") {
+			dst = Package + string(os.PathSeparator) + file
+		} else {
+			dst = Package + string(os.PathSeparator) + "cpuid_" + file
+		}
+		err := copyFile(file, dst)
+		if err != nil {
+			log.Fatalf("copying file: %s", err)
+		}
+		log.Println("Copied", dst)
+	}
+}
+
+// CopyFile copies a file from src to dst. If src and dst files exist, and are
+// the same, then return success. Copy the file contents from src to dst.
+func copyFile(src, dst string) (err error) {
+	sfi, err := os.Stat(src)
+	if err != nil {
+		return
+	}
+	if !sfi.Mode().IsRegular() {
+		// cannot copy non-regular files (e.g., directories,
+		// symlinks, devices, etc.)
+		return fmt.Errorf("CopyFile: non-regular source file %s (%q)", sfi.Name(), sfi.Mode().String())
+	}
+	dfi, err := os.Stat(dst)
+	if err != nil {
+		if !os.IsNotExist(err) {
+			return
+		}
+	} else {
+		if !(dfi.Mode().IsRegular()) {
+			return fmt.Errorf("CopyFile: non-regular destination file %s (%q)", dfi.Name(), dfi.Mode().String())
+		}
+		if os.SameFile(sfi, dfi) {
+			return
+		}
+	}
+	err = copyFileContents(src, dst)
+	return
+}
+
+// copyFileContents copies the contents of the file named src to the file named
+// by dst. The file will be created if it does not already exist. If the
+// destination file exists, all it's contents will be replaced by the contents
+// of the source file.
+func copyFileContents(src, dst string) (err error) {
+	in, err := os.Open(src)
+	if err != nil {
+		return
+	}
+	defer in.Close()
+	out, err := os.Create(dst)
+	if err != nil {
+		return
+	}
+	defer func() {
+		cerr := out.Close()
+		if err == nil {
+			err = cerr
+		}
+	}()
+	if _, err = io.Copy(out, in); err != nil {
+		return
+	}
+	err = out.Sync()
+	return
+}
+
+type rewrite func(*ast.File) *ast.File
+
+// Mostly copied from gofmt
+func initRewrite(rewriteRule string) rewrite {
+	f := strings.Split(rewriteRule, "->")
+	if len(f) != 2 {
+		fmt.Fprintf(os.Stderr, "rewrite rule must be of the form 'pattern -> replacement'\n")
+		os.Exit(2)
+	}
+	pattern := parseExpr(f[0], "pattern")
+	replace := parseExpr(f[1], "replacement")
+	return func(p *ast.File) *ast.File { return rewriteFile(pattern, replace, p) }
+}
+
+// parseExpr parses s as an expression.
+// It might make sense to expand this to allow statement patterns,
+// but there are problems with preserving formatting and also
+// with what a wildcard for a statement looks like.
+func parseExpr(s, what string) ast.Expr {
+	x, err := parser.ParseExpr(s)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "parsing %s %s at %s\n", what, s, err)
+		os.Exit(2)
+	}
+	return x
+}
+
+// Keep this function for debugging.
+/*
+func dump(msg string, val reflect.Value) {
+	fmt.Printf("%s:\n", msg)
+	ast.Print(fileSet, val.Interface())
+	fmt.Println()
+}
+*/
+
+// rewriteFile applies the rewrite rule 'pattern -> replace' to an entire file.
+func rewriteFile(pattern, replace ast.Expr, p *ast.File) *ast.File {
+	cmap := ast.NewCommentMap(fileSet, p, p.Comments)
+	m := make(map[string]reflect.Value)
+	pat := reflect.ValueOf(pattern)
+	repl := reflect.ValueOf(replace)
+
+	var rewriteVal func(val reflect.Value) reflect.Value
+	rewriteVal = func(val reflect.Value) reflect.Value {
+		// don't bother if val is invalid to start with
+		if !val.IsValid() {
+			return reflect.Value{}
+		}
+		for k := range m {
+			delete(m, k)
+		}
+		val = apply(rewriteVal, val)
+		if match(m, pat, val) {
+			val = subst(m, repl, reflect.ValueOf(val.Interface().(ast.Node).Pos()))
+		}
+		return val
+	}
+
+	r := apply(rewriteVal, reflect.ValueOf(p)).Interface().(*ast.File)
+	r.Comments = cmap.Filter(r).Comments() // recreate comments list
+	return r
+}
+
+// set is a wrapper for x.Set(y); it protects the caller from panics if x cannot be changed to y.
+func set(x, y reflect.Value) {
+	// don't bother if x cannot be set or y is invalid
+	if !x.CanSet() || !y.IsValid() {
+		return
+	}
+	defer func() {
+		if x := recover(); x != nil {
+			if s, ok := x.(string); ok &&
+				(strings.Contains(s, "type mismatch") || strings.Contains(s, "not assignable")) {
+				// x cannot be set to y - ignore this rewrite
+				return
+			}
+			panic(x)
+		}
+	}()
+	x.Set(y)
+}
+
+// Values/types for special cases.
+var (
+	objectPtrNil = reflect.ValueOf((*ast.Object)(nil))
+	scopePtrNil  = reflect.ValueOf((*ast.Scope)(nil))
+
+	identType     = reflect.TypeOf((*ast.Ident)(nil))
+	objectPtrType = reflect.TypeOf((*ast.Object)(nil))
+	positionType  = reflect.TypeOf(token.NoPos)
+	callExprType  = reflect.TypeOf((*ast.CallExpr)(nil))
+	scopePtrType  = reflect.TypeOf((*ast.Scope)(nil))
+)
+
+// apply replaces each AST field x in val with f(x), returning val.
+// To avoid extra conversions, f operates on the reflect.Value form.
+func apply(f func(reflect.Value) reflect.Value, val reflect.Value) reflect.Value {
+	if !val.IsValid() {
+		return reflect.Value{}
+	}
+
+	// *ast.Objects introduce cycles and are likely incorrect after
+	// rewrite; don't follow them but replace with nil instead
+	if val.Type() == objectPtrType {
+		return objectPtrNil
+	}
+
+	// similarly for scopes: they are likely incorrect after a rewrite;
+	// replace them with nil
+	if val.Type() == scopePtrType {
+		return scopePtrNil
+	}
+
+	switch v := reflect.Indirect(val); v.Kind() {
+	case reflect.Slice:
+		for i := 0; i < v.Len(); i++ {
+			e := v.Index(i)
+			set(e, f(e))
+		}
+	case reflect.Struct:
+		for i := 0; i < v.NumField(); i++ {
+			e := v.Field(i)
+			set(e, f(e))
+		}
+	case reflect.Interface:
+		e := v.Elem()
+		set(v, f(e))
+	}
+	return val
+}
+
+func isWildcard(s string) bool {
+	rune, size := utf8.DecodeRuneInString(s)
+	return size == len(s) && unicode.IsLower(rune)
+}
+
+// match returns true if pattern matches val,
+// recording wildcard submatches in m.
+// If m == nil, match checks whether pattern == val.
+func match(m map[string]reflect.Value, pattern, val reflect.Value) bool {
+	// Wildcard matches any expression.  If it appears multiple
+	// times in the pattern, it must match the same expression
+	// each time.
+	if m != nil && pattern.IsValid() && pattern.Type() == identType {
+		name := pattern.Interface().(*ast.Ident).Name
+		if isWildcard(name) && val.IsValid() {
+			// wildcards only match valid (non-nil) expressions.
+			if _, ok := val.Interface().(ast.Expr); ok && !val.IsNil() {
+				if old, ok := m[name]; ok {
+					return match(nil, old, val)
+				}
+				m[name] = val
+				return true
+			}
+		}
+	}
+
+	// Otherwise, pattern and val must match recursively.
+	if !pattern.IsValid() || !val.IsValid() {
+		return !pattern.IsValid() && !val.IsValid()
+	}
+	if pattern.Type() != val.Type() {
+		return false
+	}
+
+	// Special cases.
+	switch pattern.Type() {
+	case identType:
+		// For identifiers, only the names need to match
+		// (and none of the other *ast.Object information).
+		// This is a common case, handle it all here instead
+		// of recursing down any further via reflection.
+		p := pattern.Interface().(*ast.Ident)
+		v := val.Interface().(*ast.Ident)
+		return p == nil && v == nil || p != nil && v != nil && p.Name == v.Name
+	case objectPtrType, positionType:
+		// object pointers and token positions always match
+		return true
+	case callExprType:
+		// For calls, the Ellipsis fields (token.Position) must
+		// match since that is how f(x) and f(x...) are different.
+		// Check them here but fall through for the remaining fields.
+		p := pattern.Interface().(*ast.CallExpr)
+		v := val.Interface().(*ast.CallExpr)
+		if p.Ellipsis.IsValid() != v.Ellipsis.IsValid() {
+			return false
+		}
+	}
+
+	p := reflect.Indirect(pattern)
+	v := reflect.Indirect(val)
+	if !p.IsValid() || !v.IsValid() {
+		return !p.IsValid() && !v.IsValid()
+	}
+
+	switch p.Kind() {
+	case reflect.Slice:
+		if p.Len() != v.Len() {
+			return false
+		}
+		for i := 0; i < p.Len(); i++ {
+			if !match(m, p.Index(i), v.Index(i)) {
+				return false
+			}
+		}
+		return true
+
+	case reflect.Struct:
+		for i := 0; i < p.NumField(); i++ {
+			if !match(m, p.Field(i), v.Field(i)) {
+				return false
+			}
+		}
+		return true
+
+	case reflect.Interface:
+		return match(m, p.Elem(), v.Elem())
+	}
+
+	// Handle token integers, etc.
+	return p.Interface() == v.Interface()
+}
+
+// subst returns a copy of pattern with values from m substituted in place
+// of wildcards and pos used as the position of tokens from the pattern.
+// if m == nil, subst returns a copy of pattern and doesn't change the line
+// number information.
+func subst(m map[string]reflect.Value, pattern reflect.Value, pos reflect.Value) reflect.Value {
+	if !pattern.IsValid() {
+		return reflect.Value{}
+	}
+
+	// Wildcard gets replaced with map value.
+	if m != nil && pattern.Type() == identType {
+		name := pattern.Interface().(*ast.Ident).Name
+		if isWildcard(name) {
+			if old, ok := m[name]; ok {
+				return subst(nil, old, reflect.Value{})
+			}
+		}
+	}
+
+	if pos.IsValid() && pattern.Type() == positionType {
+		// use new position only if old position was valid in the first place
+		if old := pattern.Interface().(token.Pos); !old.IsValid() {
+			return pattern
+		}
+		return pos
+	}
+
+	// Otherwise copy.
+	switch p := pattern; p.Kind() {
+	case reflect.Slice:
+		v := reflect.MakeSlice(p.Type(), p.Len(), p.Len())
+		for i := 0; i < p.Len(); i++ {
+			v.Index(i).Set(subst(m, p.Index(i), pos))
+		}
+		return v
+
+	case reflect.Struct:
+		v := reflect.New(p.Type()).Elem()
+		for i := 0; i < p.NumField(); i++ {
+			v.Field(i).Set(subst(m, p.Field(i), pos))
+		}
+		return v
+
+	case reflect.Ptr:
+		v := reflect.New(p.Type()).Elem()
+		if elem := p.Elem(); elem.IsValid() {
+			v.Set(subst(m, elem, pos).Addr())
+		}
+		return v
+
+	case reflect.Interface:
+		v := reflect.New(p.Type()).Elem()
+		if elem := p.Elem(); elem.IsValid() {
+			v.Set(subst(m, elem, pos))
+		}
+		return v
+	}
+
+	return pattern
+}
diff --git a/vendor/github.com/klauspost/pgzip/GO_LICENSE b/vendor/github.com/klauspost/pgzip/GO_LICENSE
new file mode 100644
index 00000000000..74487567632
--- /dev/null
+++ b/vendor/github.com/klauspost/pgzip/GO_LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2012 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/klauspost/pgzip/LICENSE b/vendor/github.com/klauspost/pgzip/LICENSE
new file mode 100644
index 00000000000..2bdc0d75171
--- /dev/null
+++ b/vendor/github.com/klauspost/pgzip/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2014 Klaus Post
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/vendor/github.com/klauspost/pgzip/README.md b/vendor/github.com/klauspost/pgzip/README.md
new file mode 100644
index 00000000000..81000996c97
--- /dev/null
+++ b/vendor/github.com/klauspost/pgzip/README.md
@@ -0,0 +1,136 @@
+pgzip
+=====
+
+Go parallel gzip compression/decompression. This is a fully gzip compatible drop in replacement for "compress/gzip".
+
+This will split compression into blocks that are compressed in parallel. 
+This can be useful for compressing big amounts of data. The output is a standard gzip file.
+
+The gzip decompression is modified so it decompresses ahead of the current reader. 
+This means that reads will be non-blocking if the decompressor can keep ahead of your code reading from it. 
+CRC calculation also takes place in a separate goroutine.
+
+You should only use this if you are (de)compressing big amounts of data, 
+say **more than 1MB** at the time, otherwise you will not see any benefit, 
+and it will likely be faster to use the internal gzip library 
+or [this package](https://github.com/klauspost/compress).
+
+It is important to note that this library creates and reads *standard gzip files*. 
+You do not have to match the compressor/decompressor to get the described speedups, 
+and the gzip files are fully compatible with other gzip readers/writers.
+
+A golang variant of this is [bgzf](https://godoc.org/github.com/biogo/hts/bgzf), 
+which has the same feature, as well as seeking in the resulting file. 
+The only drawback is a slightly bigger overhead compared to this and pure gzip. 
+See a comparison below.
+
+[![GoDoc][1]][2] [![Build Status][3]][4]
+
+[1]: https://godoc.org/github.com/klauspost/pgzip?status.svg
+[2]: https://godoc.org/github.com/klauspost/pgzip
+[3]: https://travis-ci.org/klauspost/pgzip.svg
+[4]: https://travis-ci.org/klauspost/pgzip
+
+Installation
+====
+```go get github.com/klauspost/pgzip/...```
+
+You might need to get/update the dependencies:
+
+```
+go get -u github.com/klauspost/compress
+go get -u github.com/klauspost/crc32
+```
+
+Usage
+====
+[Godoc Doumentation](https://godoc.org/github.com/klauspost/pgzip)
+
+To use as a replacement for gzip, exchange 
+
+```import "compress/gzip"``` 
+with 
+```import gzip "github.com/klauspost/pgzip"```.
+
+# Changes
+
+* Oct 6, 2016: Fixed an issue if the destination writer returned an error.
+* Oct 6, 2016: Better buffer reuse, should now generate less garbage.
+* Oct 6, 2016: Output does not change based on write sizes.
+* Dec 8, 2015: Decoder now supports the io.WriterTo interface, giving a speedup and less GC pressure.
+* Oct 9, 2015: Reduced allocations by ~35 by using sync.Pool. ~15% overall speedup.
+
+Changes in [github.com/klauspost/compress](https://github.com/klauspost/compress#changelog) are also carried over, so see that for more changes.
+
+## Compression
+The simplest way to use this is to simply do the same as you would when using [compress/gzip](http://golang.org/pkg/compress/gzip). 
+
+To change the block size, use the added (*pgzip.Writer).SetConcurrency(blockSize, blocks int) function. With this you can control the approximate size of your blocks, as well as how many you want to be processing in parallel. Default values for this is SetConcurrency(250000, 16), meaning blocks are split at 250000 bytes and up to 16 blocks can be processing at once before the writer blocks.
+
+
+Example:
+```
+var b bytes.Buffer
+w := gzip.NewWriter(&b)
+w.SetConcurrency(100000, 10)
+w.Write([]byte("hello, world\n"))
+w.Close()
+```
+
+To get any performance gains, you should at least be compressing more than 1 megabyte of data at the time.
+
+You should at least have a block size of 100k and at least a number of blocks that match the number of cores your would like to utilize, but about twice the number of blocks would be the best.
+
+Another side effect of this is, that it is likely to speed up your other code, since writes to the compressor only blocks if the compressor is already compressing the number of blocks you have specified. This also means you don't have worry about buffering input to the compressor.
+
+## Decompression
+
+Decompression works similar to compression. That means that you simply call pgzip the same way as you would call [compress/gzip](http://golang.org/pkg/compress/gzip). 
+
+The only difference is that if you want to specify your own readahead, you have to use `pgzip.NewReaderN(r io.Reader, blockSize, blocks int)` to get a reader with your custom blocksizes. The `blockSize` is the size of each block decoded, and `blocks` is the maximum number of blocks that is decoded ahead.
+
+See [Example on playground](http://play.golang.org/p/uHv1B5NbDh)
+
+Performance
+====
+## Compression
+
+See my blog post in [Benchmarks of Golang Gzip](https://blog.klauspost.com/go-gzipdeflate-benchmarks/).
+
+Compression cost is usually about 0.2% with default settings with a block size of 250k.
+
+Example with GOMAXPROC set to 8 (quad core with 8 hyperthreads)
+
+Content is [Matt Mahoneys 10GB corpus](http://mattmahoney.net/dc/10gb.html). Compression level 6.
+
+Compressor  | MB/sec   | speedup | size | size overhead (lower=better)
+------------|----------|---------|------|---------
+[gzip](http://golang.org/pkg/compress/gzip) (golang) | 7.21MB/s | 1.0x | 4786608902 | 0%
+[gzip](http://github.com/klauspost/compress/gzip) (klauspost) | 10.98MB/s | 1.52x | 4781331645 | -0.11%
+[pgzip](https://github.com/klauspost/pgzip) (klauspost) | 50.76MB/s|7.04x | 4784121440 | -0.052%
+[bgzf](https://godoc.org/github.com/biogo/hts/bgzf) (biogo) | 38.65MB/s | 5.36x | 4924899484 | 2.889%
+[pargzip](https://godoc.org/github.com/golang/build/pargzip) (builder) | 32.00MB/s | 4.44x | 4791226567 | 0.096%
+
+pgzip also contains a [linear time compression](https://github.com/klauspost/compress#linear-time-compression) mode, that will allow compression at ~150MB per core per second, independent of the content.
+
+See the [complete sheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing) for different content types and compression settings.
+
+## Decompression
+
+The decompression speedup is there because it allows you to do other work while the decompression is taking place.
+
+In the example above, the numbers are as follows on a 4 CPU machine:
+
+Decompressor | Time | Speedup
+-------------|------|--------
+[gzip](http://golang.org/pkg/compress/gzip) (golang) | 1m28.85s | 0%
+[pgzip](https://github.com/klauspost/pgzip) (golang) | 43.48s | 104%
+
+But wait, since gzip decompression is inherently singlethreaded (aside from CRC calculation) how can it be more than 100% faster?  Because pgzip due to its design also acts as a buffer. When using unbuffered gzip, you are also waiting for io when you are decompressing. If the gzip decoder can keep up, it will always have data ready for your reader, and you will not be waiting for input to the gzip decompressor to complete.
+
+This is pretty much an optimal situation for pgzip, but it reflects most common usecases for CPU intensive gzip usage.
+
+I haven't included [bgzf](https://godoc.org/github.com/biogo/hts/bgzf) in this comparison, since it only can decompress files created by a compatible encoder, and therefore cannot be considered a generic gzip decompressor. But if you are able to compress your files with a bgzf compatible program, you can expect it to scale beyond 100%.
+
+# License
+This contains large portions of code from the go repository - see GO_LICENSE for more information. The changes are released under MIT License. See LICENSE for more information.
diff --git a/vendor/github.com/klauspost/pgzip/gunzip.go b/vendor/github.com/klauspost/pgzip/gunzip.go
new file mode 100644
index 00000000000..93efec71489
--- /dev/null
+++ b/vendor/github.com/klauspost/pgzip/gunzip.go
@@ -0,0 +1,573 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package pgzip implements reading and writing of gzip format compressed files,
+// as specified in RFC 1952.
+//
+// This is a drop in replacement for "compress/gzip".
+// This will split compression into blocks that are compressed in parallel.
+// This can be useful for compressing big amounts of data.
+// The gzip decompression has not been modified, but remains in the package,
+// so you can use it as a complete replacement for "compress/gzip".
+//
+// See more at https://github.com/klauspost/pgzip
+package pgzip
+
+import (
+	"bufio"
+	"errors"
+	"hash"
+	"hash/crc32"
+	"io"
+	"sync"
+	"time"
+
+	"github.com/klauspost/compress/flate"
+)
+
+const (
+	gzipID1     = 0x1f
+	gzipID2     = 0x8b
+	gzipDeflate = 8
+	flagText    = 1 << 0
+	flagHdrCrc  = 1 << 1
+	flagExtra   = 1 << 2
+	flagName    = 1 << 3
+	flagComment = 1 << 4
+)
+
+func makeReader(r io.Reader) flate.Reader {
+	if rr, ok := r.(flate.Reader); ok {
+		return rr
+	}
+	return bufio.NewReader(r)
+}
+
+var (
+	// ErrChecksum is returned when reading GZIP data that has an invalid checksum.
+	ErrChecksum = errors.New("gzip: invalid checksum")
+	// ErrHeader is returned when reading GZIP data that has an invalid header.
+	ErrHeader = errors.New("gzip: invalid header")
+)
+
+// The gzip file stores a header giving metadata about the compressed file.
+// That header is exposed as the fields of the Writer and Reader structs.
+type Header struct {
+	Comment string    // comment
+	Extra   []byte    // "extra data"
+	ModTime time.Time // modification time
+	Name    string    // file name
+	OS      byte      // operating system type
+}
+
+// A Reader is an io.Reader that can be read to retrieve
+// uncompressed data from a gzip-format compressed file.
+//
+// In general, a gzip file can be a concatenation of gzip files,
+// each with its own header.  Reads from the Reader
+// return the concatenation of the uncompressed data of each.
+// Only the first header is recorded in the Reader fields.
+//
+// Gzip files store a length and checksum of the uncompressed data.
+// The Reader will return a ErrChecksum when Read
+// reaches the end of the uncompressed data if it does not
+// have the expected length or checksum.  Clients should treat data
+// returned by Read as tentative until they receive the io.EOF
+// marking the end of the data.
+type Reader struct {
+	Header
+	r            flate.Reader
+	decompressor io.ReadCloser
+	digest       hash.Hash32
+	size         uint32
+	flg          byte
+	buf          [512]byte
+	err          error
+	closeErr     chan error
+	multistream  bool
+
+	readAhead   chan read
+	roff        int // read offset
+	current     []byte
+	closeReader chan struct{}
+	lastBlock   bool
+	blockSize   int
+	blocks      int
+
+	activeRA bool       // Indication if readahead is active
+	mu       sync.Mutex // Lock for above
+
+	blockPool chan []byte
+}
+
+type read struct {
+	b   []byte
+	err error
+}
+
+// NewReader creates a new Reader reading the given reader.
+// The implementation buffers input and may read more data than necessary from r.
+// It is the caller's responsibility to call Close on the Reader when done.
+func NewReader(r io.Reader) (*Reader, error) {
+	z := new(Reader)
+	z.blocks = defaultBlocks
+	z.blockSize = defaultBlockSize
+	z.r = makeReader(r)
+	z.digest = crc32.NewIEEE()
+	z.multistream = true
+	z.blockPool = make(chan []byte, z.blocks)
+	for i := 0; i < z.blocks; i++ {
+		z.blockPool <- make([]byte, z.blockSize)
+	}
+	if err := z.readHeader(true); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// NewReaderN creates a new Reader reading the given reader.
+// The implementation buffers input and may read more data than necessary from r.
+// It is the caller's responsibility to call Close on the Reader when done.
+//
+// With this you can control the approximate size of your blocks,
+// as well as how many blocks you want to have prefetched.
+//
+// Default values for this is blockSize = 250000, blocks = 16,
+// meaning up to 16 blocks of maximum 250000 bytes will be
+// prefetched.
+func NewReaderN(r io.Reader, blockSize, blocks int) (*Reader, error) {
+	z := new(Reader)
+	z.blocks = blocks
+	z.blockSize = blockSize
+	z.r = makeReader(r)
+	z.digest = crc32.NewIEEE()
+	z.multistream = true
+
+	// Account for too small values
+	if z.blocks <= 0 {
+		z.blocks = defaultBlocks
+	}
+	if z.blockSize <= 512 {
+		z.blockSize = defaultBlockSize
+	}
+	z.blockPool = make(chan []byte, z.blocks)
+	for i := 0; i < z.blocks; i++ {
+		z.blockPool <- make([]byte, z.blockSize)
+	}
+	if err := z.readHeader(true); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// Reset discards the Reader z's state and makes it equivalent to the
+// result of its original state from NewReader, but reading from r instead.
+// This permits reusing a Reader rather than allocating a new one.
+func (z *Reader) Reset(r io.Reader) error {
+	z.killReadAhead()
+	z.r = makeReader(r)
+	z.digest = crc32.NewIEEE()
+	z.size = 0
+	z.err = nil
+	z.multistream = true
+
+	// Account for uninitialized values
+	if z.blocks <= 0 {
+		z.blocks = defaultBlocks
+	}
+	if z.blockSize <= 512 {
+		z.blockSize = defaultBlockSize
+	}
+
+	if z.blockPool == nil {
+		z.blockPool = make(chan []byte, z.blocks)
+		for i := 0; i < z.blocks; i++ {
+			z.blockPool <- make([]byte, z.blockSize)
+		}
+	}
+
+	return z.readHeader(true)
+}
+
+// Multistream controls whether the reader supports multistream files.
+//
+// If enabled (the default), the Reader expects the input to be a sequence
+// of individually gzipped data streams, each with its own header and
+// trailer, ending at EOF. The effect is that the concatenation of a sequence
+// of gzipped files is treated as equivalent to the gzip of the concatenation
+// of the sequence. This is standard behavior for gzip readers.
+//
+// Calling Multistream(false) disables this behavior; disabling the behavior
+// can be useful when reading file formats that distinguish individual gzip
+// data streams or mix gzip data streams with other data streams.
+// In this mode, when the Reader reaches the end of the data stream,
+// Read returns io.EOF. If the underlying reader implements io.ByteReader,
+// it will be left positioned just after the gzip stream.
+// To start the next stream, call z.Reset(r) followed by z.Multistream(false).
+// If there is no next stream, z.Reset(r) will return io.EOF.
+func (z *Reader) Multistream(ok bool) {
+	z.multistream = ok
+}
+
+// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
+func get4(p []byte) uint32 {
+	return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
+}
+
+func (z *Reader) readString() (string, error) {
+	var err error
+	needconv := false
+	for i := 0; ; i++ {
+		if i >= len(z.buf) {
+			return "", ErrHeader
+		}
+		z.buf[i], err = z.r.ReadByte()
+		if err != nil {
+			return "", err
+		}
+		if z.buf[i] > 0x7f {
+			needconv = true
+		}
+		if z.buf[i] == 0 {
+			// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
+			if needconv {
+				s := make([]rune, 0, i)
+				for _, v := range z.buf[0:i] {
+					s = append(s, rune(v))
+				}
+				return string(s), nil
+			}
+			return string(z.buf[0:i]), nil
+		}
+	}
+}
+
+func (z *Reader) read2() (uint32, error) {
+	_, err := io.ReadFull(z.r, z.buf[0:2])
+	if err != nil {
+		return 0, err
+	}
+	return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil
+}
+
+func (z *Reader) readHeader(save bool) error {
+	z.killReadAhead()
+
+	_, err := io.ReadFull(z.r, z.buf[0:10])
+	if err != nil {
+		return err
+	}
+	if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
+		return ErrHeader
+	}
+	z.flg = z.buf[3]
+	if save {
+		z.ModTime = time.Unix(int64(get4(z.buf[4:8])), 0)
+		// z.buf[8] is xfl, ignored
+		z.OS = z.buf[9]
+	}
+	z.digest.Reset()
+	z.digest.Write(z.buf[0:10])
+
+	if z.flg&flagExtra != 0 {
+		n, err := z.read2()
+		if err != nil {
+			return err
+		}
+		data := make([]byte, n)
+		if _, err = io.ReadFull(z.r, data); err != nil {
+			return err
+		}
+		if save {
+			z.Extra = data
+		}
+	}
+
+	var s string
+	if z.flg&flagName != 0 {
+		if s, err = z.readString(); err != nil {
+			return err
+		}
+		if save {
+			z.Name = s
+		}
+	}
+
+	if z.flg&flagComment != 0 {
+		if s, err = z.readString(); err != nil {
+			return err
+		}
+		if save {
+			z.Comment = s
+		}
+	}
+
+	if z.flg&flagHdrCrc != 0 {
+		n, err := z.read2()
+		if err != nil {
+			return err
+		}
+		sum := z.digest.Sum32() & 0xFFFF
+		if n != sum {
+			return ErrHeader
+		}
+	}
+
+	z.digest.Reset()
+	z.decompressor = flate.NewReader(z.r)
+	z.doReadAhead()
+	return nil
+}
+
+func (z *Reader) killReadAhead() error {
+	z.mu.Lock()
+	defer z.mu.Unlock()
+	if z.activeRA {
+		if z.closeReader != nil {
+			close(z.closeReader)
+		}
+
+		// Wait for decompressor to be closed and return error, if any.
+		e, ok := <-z.closeErr
+		z.activeRA = false
+		if !ok {
+			// Channel is closed, so if there was any error it has already been returned.
+			return nil
+		}
+		return e
+	}
+	return nil
+}
+
+// Starts readahead.
+// Will return on error (including io.EOF)
+// or when z.closeReader is closed.
+func (z *Reader) doReadAhead() {
+	z.mu.Lock()
+	defer z.mu.Unlock()
+	z.activeRA = true
+
+	if z.blocks <= 0 {
+		z.blocks = defaultBlocks
+	}
+	if z.blockSize <= 512 {
+		z.blockSize = defaultBlockSize
+	}
+	ra := make(chan read, z.blocks)
+	z.readAhead = ra
+	closeReader := make(chan struct{}, 0)
+	z.closeReader = closeReader
+	z.lastBlock = false
+	closeErr := make(chan error, 1)
+	z.closeErr = closeErr
+	z.size = 0
+	z.roff = 0
+	z.current = nil
+	decomp := z.decompressor
+
+	go func() {
+		defer func() {
+			closeErr <- decomp.Close()
+			close(closeErr)
+			close(ra)
+		}()
+
+		// We hold a local reference to digest, since
+		// it way be changed by reset.
+		digest := z.digest
+		var wg sync.WaitGroup
+		for {
+			var buf []byte
+			select {
+			case buf = <-z.blockPool:
+			case <-closeReader:
+				return
+			}
+			buf = buf[0:z.blockSize]
+			// Try to fill the buffer
+			n, err := io.ReadFull(decomp, buf)
+			if err == io.ErrUnexpectedEOF {
+				if n > 0 {
+					err = nil
+				} else {
+					// If we got zero bytes, we need to establish if
+					// we reached end of stream or truncated stream.
+					_, err = decomp.Read([]byte{})
+					if err == io.EOF {
+						err = nil
+					}
+				}
+			}
+			if n < len(buf) {
+				buf = buf[0:n]
+			}
+			wg.Wait()
+			wg.Add(1)
+			go func() {
+				digest.Write(buf)
+				wg.Done()
+			}()
+			z.size += uint32(n)
+
+			// If we return any error, out digest must be ready
+			if err != nil {
+				wg.Wait()
+			}
+			select {
+			case z.readAhead <- read{b: buf, err: err}:
+			case <-closeReader:
+				// Sent on close, we don't care about the next results
+				return
+			}
+			if err != nil {
+				return
+			}
+		}
+	}()
+}
+
+func (z *Reader) Read(p []byte) (n int, err error) {
+	if z.err != nil {
+		return 0, z.err
+	}
+	if len(p) == 0 {
+		return 0, nil
+	}
+
+	for {
+		if len(z.current) == 0 && !z.lastBlock {
+			read := <-z.readAhead
+
+			if read.err != nil {
+				// If not nil, the reader will have exited
+				z.closeReader = nil
+
+				if read.err != io.EOF {
+					z.err = read.err
+					return
+				}
+				if read.err == io.EOF {
+					z.lastBlock = true
+					err = nil
+				}
+			}
+			z.current = read.b
+			z.roff = 0
+		}
+		avail := z.current[z.roff:]
+		if len(p) >= len(avail) {
+			// If len(p) >= len(current), return all content of current
+			n = copy(p, avail)
+			z.blockPool <- z.current
+			z.current = nil
+			if z.lastBlock {
+				err = io.EOF
+				break
+			}
+		} else {
+			// We copy as much as there is space for
+			n = copy(p, avail)
+			z.roff += n
+		}
+		return
+	}
+
+	// Finished file; check checksum + size.
+	if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil {
+		z.err = err
+		return 0, err
+	}
+	crc32, isize := get4(z.buf[0:4]), get4(z.buf[4:8])
+	sum := z.digest.Sum32()
+	if sum != crc32 || isize != z.size {
+		z.err = ErrChecksum
+		return 0, z.err
+	}
+
+	// File is ok; should we attempt reading one more?
+	if !z.multistream {
+		return 0, io.EOF
+	}
+
+	// Is there another?
+	if err = z.readHeader(false); err != nil {
+		z.err = err
+		return
+	}
+
+	// Yes.  Reset and read from it.
+	return z.Read(p)
+}
+
+func (z *Reader) WriteTo(w io.Writer) (n int64, err error) {
+	total := int64(0)
+	for {
+		if z.err != nil {
+			return total, z.err
+		}
+		// We write both to output and digest.
+		for {
+			// Read from input
+			read := <-z.readAhead
+			if read.err != nil {
+				// If not nil, the reader will have exited
+				z.closeReader = nil
+
+				if read.err != io.EOF {
+					z.err = read.err
+					return total, z.err
+				}
+				if read.err == io.EOF {
+					z.lastBlock = true
+					err = nil
+				}
+			}
+			// Write what we got
+			n, err := w.Write(read.b)
+			if n != len(read.b) {
+				return total, io.ErrShortWrite
+			}
+			total += int64(n)
+			if err != nil {
+				return total, err
+			}
+			// Put block back
+			z.blockPool <- read.b
+			if z.lastBlock {
+				break
+			}
+		}
+
+		// Finished file; check checksum + size.
+		if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil {
+			z.err = err
+			return total, err
+		}
+		crc32, isize := get4(z.buf[0:4]), get4(z.buf[4:8])
+		sum := z.digest.Sum32()
+		if sum != crc32 || isize != z.size {
+			z.err = ErrChecksum
+			return total, z.err
+		}
+		// File is ok; should we attempt reading one more?
+		if !z.multistream {
+			return total, nil
+		}
+
+		// Is there another?
+		err = z.readHeader(false)
+		if err == io.EOF {
+			return total, nil
+		}
+		if err != nil {
+			z.err = err
+			return total, err
+		}
+	}
+}
+
+// Close closes the Reader. It does not close the underlying io.Reader.
+func (z *Reader) Close() error {
+	return z.killReadAhead()
+}
diff --git a/vendor/github.com/klauspost/pgzip/gzip.go b/vendor/github.com/klauspost/pgzip/gzip.go
new file mode 100644
index 00000000000..85d14e9cbca
--- /dev/null
+++ b/vendor/github.com/klauspost/pgzip/gzip.go
@@ -0,0 +1,501 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pgzip
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"hash"
+	"hash/crc32"
+	"io"
+	"sync"
+	"time"
+
+	"github.com/klauspost/compress/flate"
+)
+
+const (
+	defaultBlockSize = 256 << 10
+	tailSize         = 16384
+	defaultBlocks    = 16
+)
+
+// These constants are copied from the flate package, so that code that imports
+// "compress/gzip" does not also have to import "compress/flate".
+const (
+	NoCompression       = flate.NoCompression
+	BestSpeed           = flate.BestSpeed
+	BestCompression     = flate.BestCompression
+	DefaultCompression  = flate.DefaultCompression
+	ConstantCompression = flate.ConstantCompression
+	HuffmanOnly         = flate.HuffmanOnly
+)
+
+// A Writer is an io.WriteCloser.
+// Writes to a Writer are compressed and written to w.
+type Writer struct {
+	Header
+	w             io.Writer
+	level         int
+	wroteHeader   bool
+	blockSize     int
+	blocks        int
+	currentBuffer []byte
+	prevTail      []byte
+	digest        hash.Hash32
+	size          int
+	closed        bool
+	buf           [10]byte
+	errMu         sync.RWMutex
+	err           error
+	pushedErr     chan struct{}
+	results       chan result
+	dictFlatePool sync.Pool
+	dstPool       sync.Pool
+	wg            sync.WaitGroup
+}
+
+type result struct {
+	result        chan []byte
+	notifyWritten chan struct{}
+}
+
+// Use SetConcurrency to finetune the concurrency level if needed.
+//
+// With this you can control the approximate size of your blocks,
+// as well as how many you want to be processing in parallel.
+//
+// Default values for this is SetConcurrency(250000, 16),
+// meaning blocks are split at 250000 bytes and up to 16 blocks
+// can be processing at once before the writer blocks.
+func (z *Writer) SetConcurrency(blockSize, blocks int) error {
+	if blockSize <= tailSize {
+		return fmt.Errorf("gzip: block size cannot be less than or equal to %d", tailSize)
+	}
+	if blocks <= 0 {
+		return errors.New("gzip: blocks cannot be zero or less")
+	}
+	if blockSize == z.blockSize && blocks == z.blocks {
+		return nil
+	}
+	z.blockSize = blockSize
+	z.results = make(chan result, blocks)
+	z.blocks = blocks
+	z.dstPool = sync.Pool{New: func() interface{} { return make([]byte, 0, blockSize+(blockSize)>>4) }}
+	return nil
+}
+
+// NewWriter returns a new Writer.
+// Writes to the returned writer are compressed and written to w.
+//
+// It is the caller's responsibility to call Close on the WriteCloser when done.
+// Writes may be buffered and not flushed until Close.
+//
+// Callers that wish to set the fields in Writer.Header must do so before
+// the first call to Write or Close. The Comment and Name header fields are
+// UTF-8 strings in Go, but the underlying format requires NUL-terminated ISO
+// 8859-1 (Latin-1). NUL or non-Latin-1 runes in those strings will lead to an
+// error on Write.
+func NewWriter(w io.Writer) *Writer {
+	z, _ := NewWriterLevel(w, DefaultCompression)
+	return z
+}
+
+// NewWriterLevel is like NewWriter but specifies the compression level instead
+// of assuming DefaultCompression.
+//
+// The compression level can be DefaultCompression, NoCompression, or any
+// integer value between BestSpeed and BestCompression inclusive. The error
+// returned will be nil if the level is valid.
+func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
+	if level < ConstantCompression || level > BestCompression {
+		return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
+	}
+	z := new(Writer)
+	z.SetConcurrency(defaultBlockSize, defaultBlocks)
+	z.init(w, level)
+	return z, nil
+}
+
+// This function must be used by goroutines to set an
+// error condition, since z.err access is restricted
+// to the callers goruotine.
+func (z *Writer) pushError(err error) {
+	z.errMu.Lock()
+	if z.err != nil {
+		z.errMu.Unlock()
+		return
+	}
+	z.err = err
+	close(z.pushedErr)
+	z.errMu.Unlock()
+}
+
+func (z *Writer) init(w io.Writer, level int) {
+	z.wg.Wait()
+	digest := z.digest
+	if digest != nil {
+		digest.Reset()
+	} else {
+		digest = crc32.NewIEEE()
+	}
+	z.Header = Header{OS: 255}
+	z.w = w
+	z.level = level
+	z.digest = digest
+	z.pushedErr = make(chan struct{}, 0)
+	z.results = make(chan result, z.blocks)
+	z.err = nil
+	z.closed = false
+	z.Comment = ""
+	z.Extra = nil
+	z.ModTime = time.Time{}
+	z.wroteHeader = false
+	z.currentBuffer = nil
+	z.buf = [10]byte{}
+	z.prevTail = nil
+	z.size = 0
+	if z.dictFlatePool.New == nil {
+		z.dictFlatePool.New = func() interface{} {
+			f, _ := flate.NewWriterDict(w, level, nil)
+			return f
+		}
+	}
+}
+
+// Reset discards the Writer z's state and makes it equivalent to the
+// result of its original state from NewWriter or NewWriterLevel, but
+// writing to w instead. This permits reusing a Writer rather than
+// allocating a new one.
+func (z *Writer) Reset(w io.Writer) {
+	if z.results != nil && !z.closed {
+		close(z.results)
+	}
+	z.SetConcurrency(defaultBlockSize, defaultBlocks)
+	z.init(w, z.level)
+}
+
+// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
+func put2(p []byte, v uint16) {
+	p[0] = uint8(v >> 0)
+	p[1] = uint8(v >> 8)
+}
+
+func put4(p []byte, v uint32) {
+	p[0] = uint8(v >> 0)
+	p[1] = uint8(v >> 8)
+	p[2] = uint8(v >> 16)
+	p[3] = uint8(v >> 24)
+}
+
+// writeBytes writes a length-prefixed byte slice to z.w.
+func (z *Writer) writeBytes(b []byte) error {
+	if len(b) > 0xffff {
+		return errors.New("gzip.Write: Extra data is too large")
+	}
+	put2(z.buf[0:2], uint16(len(b)))
+	_, err := z.w.Write(z.buf[0:2])
+	if err != nil {
+		return err
+	}
+	_, err = z.w.Write(b)
+	return err
+}
+
+// writeString writes a UTF-8 string s in GZIP's format to z.w.
+// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
+func (z *Writer) writeString(s string) (err error) {
+	// GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII.
+	needconv := false
+	for _, v := range s {
+		if v == 0 || v > 0xff {
+			return errors.New("gzip.Write: non-Latin-1 header string")
+		}
+		if v > 0x7f {
+			needconv = true
+		}
+	}
+	if needconv {
+		b := make([]byte, 0, len(s))
+		for _, v := range s {
+			b = append(b, byte(v))
+		}
+		_, err = z.w.Write(b)
+	} else {
+		_, err = io.WriteString(z.w, s)
+	}
+	if err != nil {
+		return err
+	}
+	// GZIP strings are NUL-terminated.
+	z.buf[0] = 0
+	_, err = z.w.Write(z.buf[0:1])
+	return err
+}
+
+// compressCurrent will compress the data currently buffered
+// This should only be called from the main writer/flush/closer
+func (z *Writer) compressCurrent(flush bool) {
+	r := result{}
+	r.result = make(chan []byte, 1)
+	r.notifyWritten = make(chan struct{}, 0)
+	select {
+	case z.results <- r:
+	case <-z.pushedErr:
+		return
+	}
+
+	// If block given is more than twice the block size, split it.
+	c := z.currentBuffer
+	if len(c) > z.blockSize*2 {
+		c = c[:z.blockSize]
+		z.wg.Add(1)
+		go z.compressBlock(c, z.prevTail, r, false)
+		z.prevTail = c[len(c)-tailSize:]
+		z.currentBuffer = z.currentBuffer[z.blockSize:]
+		z.compressCurrent(flush)
+		// Last one flushes if needed
+		return
+	}
+
+	z.wg.Add(1)
+	go z.compressBlock(c, z.prevTail, r, z.closed)
+	if len(c) > tailSize {
+		z.prevTail = c[len(c)-tailSize:]
+	} else {
+		z.prevTail = nil
+	}
+	z.currentBuffer = z.dstPool.Get().([]byte)
+	z.currentBuffer = z.currentBuffer[:0]
+
+	// Wait if flushing
+	if flush {
+		<-r.notifyWritten
+	}
+}
+
+// Returns an error if it has been set.
+// Cannot be used by functions that are from internal goroutines.
+func (z *Writer) checkError() error {
+	z.errMu.RLock()
+	err := z.err
+	z.errMu.RUnlock()
+	return err
+}
+
+// Write writes a compressed form of p to the underlying io.Writer. The
+// compressed bytes are not necessarily flushed to output until
+// the Writer is closed or Flush() is called.
+//
+// The function will return quickly, if there are unused buffers.
+// The sent slice (p) is copied, and the caller is free to re-use the buffer
+// when the function returns.
+//
+// Errors that occur during compression will be reported later, and a nil error
+// does not signify that the compression succeeded (since it is most likely still running)
+// That means that the call that returns an error may not be the call that caused it.
+// Only Flush and Close functions are guaranteed to return any errors up to that point.
+func (z *Writer) Write(p []byte) (int, error) {
+	if err := z.checkError(); err != nil {
+		return 0, err
+	}
+	// Write the GZIP header lazily.
+	if !z.wroteHeader {
+		z.wroteHeader = true
+		z.buf[0] = gzipID1
+		z.buf[1] = gzipID2
+		z.buf[2] = gzipDeflate
+		z.buf[3] = 0
+		if z.Extra != nil {
+			z.buf[3] |= 0x04
+		}
+		if z.Name != "" {
+			z.buf[3] |= 0x08
+		}
+		if z.Comment != "" {
+			z.buf[3] |= 0x10
+		}
+		put4(z.buf[4:8], uint32(z.ModTime.Unix()))
+		if z.level == BestCompression {
+			z.buf[8] = 2
+		} else if z.level == BestSpeed {
+			z.buf[8] = 4
+		} else {
+			z.buf[8] = 0
+		}
+		z.buf[9] = z.OS
+		var n int
+		var err error
+		n, err = z.w.Write(z.buf[0:10])
+		if err != nil {
+			z.pushError(err)
+			return n, err
+		}
+		if z.Extra != nil {
+			err = z.writeBytes(z.Extra)
+			if err != nil {
+				z.pushError(err)
+				return n, err
+			}
+		}
+		if z.Name != "" {
+			err = z.writeString(z.Name)
+			if err != nil {
+				z.pushError(err)
+				return n, err
+			}
+		}
+		if z.Comment != "" {
+			err = z.writeString(z.Comment)
+			if err != nil {
+				z.pushError(err)
+				return n, err
+			}
+		}
+		// Start receiving data from compressors
+		go func() {
+			listen := z.results
+			for {
+				r, ok := <-listen
+				// If closed, we are finished.
+				if !ok {
+					return
+				}
+				buf := <-r.result
+				n, err := z.w.Write(buf)
+				if err != nil {
+					z.pushError(err)
+					close(r.notifyWritten)
+					return
+				}
+				if n != len(buf) {
+					z.pushError(fmt.Errorf("gzip: short write %d should be %d", n, len(buf)))
+					close(r.notifyWritten)
+					return
+				}
+				z.dstPool.Put(buf)
+				close(r.notifyWritten)
+			}
+		}()
+		z.currentBuffer = make([]byte, 0, z.blockSize)
+	}
+	q := p
+	for len(q) > 0 {
+		length := len(q)
+		if length+len(z.currentBuffer) > z.blockSize {
+			length = z.blockSize - len(z.currentBuffer)
+		}
+		z.digest.Write(q[:length])
+		z.currentBuffer = append(z.currentBuffer, q[:length]...)
+		if len(z.currentBuffer) >= z.blockSize {
+			z.compressCurrent(false)
+			if err := z.checkError(); err != nil {
+				return len(p) - len(q) - length, err
+			}
+		}
+		z.size += length
+		q = q[length:]
+	}
+	return len(p), z.checkError()
+}
+
+// Step 1: compresses buffer to buffer
+// Step 2: send writer to channel
+// Step 3: Close result channel to indicate we are done
+func (z *Writer) compressBlock(p, prevTail []byte, r result, closed bool) {
+	defer func() {
+		close(r.result)
+		z.wg.Done()
+	}()
+	buf := z.dstPool.Get().([]byte)
+	dest := bytes.NewBuffer(buf[:0])
+
+	compressor := z.dictFlatePool.Get().(*flate.Writer)
+	compressor.ResetDict(dest, prevTail)
+	compressor.Write(p)
+
+	err := compressor.Flush()
+	if err != nil {
+		z.pushError(err)
+		return
+	}
+	if closed {
+		err = compressor.Close()
+		if err != nil {
+			z.pushError(err)
+			return
+		}
+	}
+	z.dictFlatePool.Put(compressor)
+	// Read back buffer
+	buf = dest.Bytes()
+	r.result <- buf
+}
+
+// Flush flushes any pending compressed data to the underlying writer.
+//
+// It is useful mainly in compressed network protocols, to ensure that
+// a remote reader has enough data to reconstruct a packet. Flush does
+// not return until the data has been written. If the underlying
+// writer returns an error, Flush returns that error.
+//
+// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
+func (z *Writer) Flush() error {
+	if err := z.checkError(); err != nil {
+		return err
+	}
+	if z.closed {
+		return nil
+	}
+	if !z.wroteHeader {
+		_, err := z.Write(nil)
+		if err != nil {
+			return err
+		}
+	}
+	// We send current block to compression
+	z.compressCurrent(true)
+
+	return z.checkError()
+}
+
+// UncompressedSize will return the number of bytes written.
+// pgzip only, not a function in the official gzip package.
+func (z *Writer) UncompressedSize() int {
+	return z.size
+}
+
+// Close closes the Writer, flushing any unwritten data to the underlying
+// io.Writer, but does not close the underlying io.Writer.
+func (z *Writer) Close() error {
+	if err := z.checkError(); err != nil {
+		return err
+	}
+	if z.closed {
+		return nil
+	}
+
+	z.closed = true
+	if !z.wroteHeader {
+		z.Write(nil)
+		if err := z.checkError(); err != nil {
+			return err
+		}
+	}
+	z.compressCurrent(true)
+	if err := z.checkError(); err != nil {
+		return err
+	}
+	close(z.results)
+	put4(z.buf[0:4], z.digest.Sum32())
+	put4(z.buf[4:8], uint32(z.size))
+	_, err := z.w.Write(z.buf[0:8])
+	if err != nil {
+		z.pushError(err)
+		return err
+	}
+	return nil
+}
diff --git a/vendor/github.com/opencontainers/go-digest/LICENSE b/vendor/github.com/opencontainers/go-digest/LICENSE
new file mode 100644
index 00000000000..0ea3ff81e3f
--- /dev/null
+++ b/vendor/github.com/opencontainers/go-digest/LICENSE
@@ -0,0 +1,191 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        https://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright 2016 Docker, Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       https://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/vendor/github.com/opencontainers/go-digest/LICENSE.docs b/vendor/github.com/opencontainers/go-digest/LICENSE.docs
new file mode 100644
index 00000000000..e26cd4fc8ed
--- /dev/null
+++ b/vendor/github.com/opencontainers/go-digest/LICENSE.docs
@@ -0,0 +1,425 @@
+Attribution-ShareAlike 4.0 International
+
+=======================================================================
+
+Creative Commons Corporation ("Creative Commons") is not a law firm and
+does not provide legal services or legal advice. Distribution of
+Creative Commons public licenses does not create a lawyer-client or
+other relationship. Creative Commons makes its licenses and related
+information available on an "as-is" basis. Creative Commons gives no
+warranties regarding its licenses, any material licensed under their
+terms and conditions, or any related information. Creative Commons
+disclaims all liability for damages resulting from their use to the
+fullest extent possible.
+
+Using Creative Commons Public Licenses
+
+Creative Commons public licenses provide a standard set of terms and
+conditions that creators and other rights holders may use to share
+original works of authorship and other material subject to copyright
+and certain other rights specified in the public license below. The
+following considerations are for informational purposes only, are not
+exhaustive, and do not form part of our licenses.
+
+     Considerations for licensors: Our public licenses are
+     intended for use by those authorized to give the public
+     permission to use material in ways otherwise restricted by
+     copyright and certain other rights. Our licenses are
+     irrevocable. Licensors should read and understand the terms
+     and conditions of the license they choose before applying it.
+     Licensors should also secure all rights necessary before
+     applying our licenses so that the public can reuse the
+     material as expected. Licensors should clearly mark any
+     material not subject to the license. This includes other CC-
+     licensed material, or material used under an exception or
+     limitation to copyright. More considerations for licensors:
+	wiki.creativecommons.org/Considerations_for_licensors
+
+     Considerations for the public: By using one of our public
+     licenses, a licensor grants the public permission to use the
+     licensed material under specified terms and conditions. If
+     the licensor's permission is not necessary for any reason--for
+     example, because of any applicable exception or limitation to
+     copyright--then that use is not regulated by the license. Our
+     licenses grant only permissions under copyright and certain
+     other rights that a licensor has authority to grant. Use of
+     the licensed material may still be restricted for other
+     reasons, including because others have copyright or other
+     rights in the material. A licensor may make special requests,
+     such as asking that all changes be marked or described.
+     Although not required by our licenses, you are encouraged to
+     respect those requests where reasonable. More_considerations
+     for the public:
+	wiki.creativecommons.org/Considerations_for_licensees
+
+=======================================================================
+
+Creative Commons Attribution-ShareAlike 4.0 International Public
+License
+
+By exercising the Licensed Rights (defined below), You accept and agree
+to be bound by the terms and conditions of this Creative Commons
+Attribution-ShareAlike 4.0 International Public License ("Public
+License"). To the extent this Public License may be interpreted as a
+contract, You are granted the Licensed Rights in consideration of Your
+acceptance of these terms and conditions, and the Licensor grants You
+such rights in consideration of benefits the Licensor receives from
+making the Licensed Material available under these terms and
+conditions.
+
+
+Section 1 -- Definitions.
+
+  a. Adapted Material means material subject to Copyright and Similar
+     Rights that is derived from or based upon the Licensed Material
+     and in which the Licensed Material is translated, altered,
+     arranged, transformed, or otherwise modified in a manner requiring
+     permission under the Copyright and Similar Rights held by the
+     Licensor. For purposes of this Public License, where the Licensed
+     Material is a musical work, performance, or sound recording,
+     Adapted Material is always produced where the Licensed Material is
+     synched in timed relation with a moving image.
+
+  b. Adapter's License means the license You apply to Your Copyright
+     and Similar Rights in Your contributions to Adapted Material in
+     accordance with the terms and conditions of this Public License.
+
+  c. BY-SA Compatible License means a license listed at
+     creativecommons.org/compatiblelicenses, approved by Creative
+     Commons as essentially the equivalent of this Public License.
+
+  d. Copyright and Similar Rights means copyright and/or similar rights
+     closely related to copyright including, without limitation,
+     performance, broadcast, sound recording, and Sui Generis Database
+     Rights, without regard to how the rights are labeled or
+     categorized. For purposes of this Public License, the rights
+     specified in Section 2(b)(1)-(2) are not Copyright and Similar
+     Rights.
+
+  e. Effective Technological Measures means those measures that, in the
+     absence of proper authority, may not be circumvented under laws
+     fulfilling obligations under Article 11 of the WIPO Copyright
+     Treaty adopted on December 20, 1996, and/or similar international
+     agreements.
+
+  f. Exceptions and Limitations means fair use, fair dealing, and/or
+     any other exception or limitation to Copyright and Similar Rights
+     that applies to Your use of the Licensed Material.
+
+  g. License Elements means the license attributes listed in the name
+     of a Creative Commons Public License. The License Elements of this
+     Public License are Attribution and ShareAlike.
+
+  h. Licensed Material means the artistic or literary work, database,
+     or other material to which the Licensor applied this Public
+     License.
+
+  i. Licensed Rights means the rights granted to You subject to the
+     terms and conditions of this Public License, which are limited to
+     all Copyright and Similar Rights that apply to Your use of the
+     Licensed Material and that the Licensor has authority to license.
+
+  j. Licensor means the individual(s) or entity(ies) granting rights
+     under this Public License.
+
+  k. Share means to provide material to the public by any means or
+     process that requires permission under the Licensed Rights, such
+     as reproduction, public display, public performance, distribution,
+     dissemination, communication, or importation, and to make material
+     available to the public including in ways that members of the
+     public may access the material from a place and at a time
+     individually chosen by them.
+
+  l. Sui Generis Database Rights means rights other than copyright
+     resulting from Directive 96/9/EC of the European Parliament and of
+     the Council of 11 March 1996 on the legal protection of databases,
+     as amended and/or succeeded, as well as other essentially
+     equivalent rights anywhere in the world.
+
+  m. You means the individual or entity exercising the Licensed Rights
+     under this Public License. Your has a corresponding meaning.
+
+
+Section 2 -- Scope.
+
+  a. License grant.
+
+       1. Subject to the terms and conditions of this Public License,
+          the Licensor hereby grants You a worldwide, royalty-free,
+          non-sublicensable, non-exclusive, irrevocable license to
+          exercise the Licensed Rights in the Licensed Material to:
+
+            a. reproduce and Share the Licensed Material, in whole or
+               in part; and
+
+            b. produce, reproduce, and Share Adapted Material.
+
+       2. Exceptions and Limitations. For the avoidance of doubt, where
+          Exceptions and Limitations apply to Your use, this Public
+          License does not apply, and You do not need to comply with
+          its terms and conditions.
+
+       3. Term. The term of this Public License is specified in Section
+          6(a).
+
+       4. Media and formats; technical modifications allowed. The
+          Licensor authorizes You to exercise the Licensed Rights in
+          all media and formats whether now known or hereafter created,
+          and to make technical modifications necessary to do so. The
+          Licensor waives and/or agrees not to assert any right or
+          authority to forbid You from making technical modifications
+          necessary to exercise the Licensed Rights, including
+          technical modifications necessary to circumvent Effective
+          Technological Measures. For purposes of this Public License,
+          simply making modifications authorized by this Section 2(a)
+          (4) never produces Adapted Material.
+
+       5. Downstream recipients.
+
+            a. Offer from the Licensor -- Licensed Material. Every
+               recipient of the Licensed Material automatically
+               receives an offer from the Licensor to exercise the
+               Licensed Rights under the terms and conditions of this
+               Public License.
+
+            b. Additional offer from the Licensor -- Adapted Material.
+               Every recipient of Adapted Material from You
+               automatically receives an offer from the Licensor to
+               exercise the Licensed Rights in the Adapted Material
+               under the conditions of the Adapter's License You apply.
+
+            c. No downstream restrictions. You may not offer or impose
+               any additional or different terms or conditions on, or
+               apply any Effective Technological Measures to, the
+               Licensed Material if doing so restricts exercise of the
+               Licensed Rights by any recipient of the Licensed
+               Material.
+
+       6. No endorsement. Nothing in this Public License constitutes or
+          may be construed as permission to assert or imply that You
+          are, or that Your use of the Licensed Material is, connected
+          with, or sponsored, endorsed, or granted official status by,
+          the Licensor or others designated to receive attribution as
+          provided in Section 3(a)(1)(A)(i).
+
+  b. Other rights.
+
+       1. Moral rights, such as the right of integrity, are not
+          licensed under this Public License, nor are publicity,
+          privacy, and/or other similar personality rights; however, to
+          the extent possible, the Licensor waives and/or agrees not to
+          assert any such rights held by the Licensor to the limited
+          extent necessary to allow You to exercise the Licensed
+          Rights, but not otherwise.
+
+       2. Patent and trademark rights are not licensed under this
+          Public License.
+
+       3. To the extent possible, the Licensor waives any right to
+          collect royalties from You for the exercise of the Licensed
+          Rights, whether directly or through a collecting society
+          under any voluntary or waivable statutory or compulsory
+          licensing scheme. In all other cases the Licensor expressly
+          reserves any right to collect such royalties.
+
+
+Section 3 -- License Conditions.
+
+Your exercise of the Licensed Rights is expressly made subject to the
+following conditions.
+
+  a. Attribution.
+
+       1. If You Share the Licensed Material (including in modified
+          form), You must:
+
+            a. retain the following if it is supplied by the Licensor
+               with the Licensed Material:
+
+                 i. identification of the creator(s) of the Licensed
+                    Material and any others designated to receive
+                    attribution, in any reasonable manner requested by
+                    the Licensor (including by pseudonym if
+                    designated);
+
+                ii. a copyright notice;
+
+               iii. a notice that refers to this Public License;
+
+                iv. a notice that refers to the disclaimer of
+                    warranties;
+
+                 v. a URI or hyperlink to the Licensed Material to the
+                    extent reasonably practicable;
+
+            b. indicate if You modified the Licensed Material and
+               retain an indication of any previous modifications; and
+
+            c. indicate the Licensed Material is licensed under this
+               Public License, and include the text of, or the URI or
+               hyperlink to, this Public License.
+
+       2. You may satisfy the conditions in Section 3(a)(1) in any
+          reasonable manner based on the medium, means, and context in
+          which You Share the Licensed Material. For example, it may be
+          reasonable to satisfy the conditions by providing a URI or
+          hyperlink to a resource that includes the required
+          information.
+
+       3. If requested by the Licensor, You must remove any of the
+          information required by Section 3(a)(1)(A) to the extent
+          reasonably practicable.
+
+  b. ShareAlike.
+
+     In addition to the conditions in Section 3(a), if You Share
+     Adapted Material You produce, the following conditions also apply.
+
+       1. The Adapter's License You apply must be a Creative Commons
+          license with the same License Elements, this version or
+          later, or a BY-SA Compatible License.
+
+       2. You must include the text of, or the URI or hyperlink to, the
+          Adapter's License You apply. You may satisfy this condition
+          in any reasonable manner based on the medium, means, and
+          context in which You Share Adapted Material.
+
+       3. You may not offer or impose any additional or different terms
+          or conditions on, or apply any Effective Technological
+          Measures to, Adapted Material that restrict exercise of the
+          rights granted under the Adapter's License You apply.
+
+
+Section 4 -- Sui Generis Database Rights.
+
+Where the Licensed Rights include Sui Generis Database Rights that
+apply to Your use of the Licensed Material:
+
+  a. for the avoidance of doubt, Section 2(a)(1) grants You the right
+     to extract, reuse, reproduce, and Share all or a substantial
+     portion of the contents of the database;
+
+  b. if You include all or a substantial portion of the database
+     contents in a database in which You have Sui Generis Database
+     Rights, then the database in which You have Sui Generis Database
+     Rights (but not its individual contents) is Adapted Material,
+
+     including for purposes of Section 3(b); and
+  c. You must comply with the conditions in Section 3(a) if You Share
+     all or a substantial portion of the contents of the database.
+
+For the avoidance of doubt, this Section 4 supplements and does not
+replace Your obligations under this Public License where the Licensed
+Rights include other Copyright and Similar Rights.
+
+
+Section 5 -- Disclaimer of Warranties and Limitation of Liability.
+
+  a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
+     EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
+     AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
+     ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
+     IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
+     WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
+     PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
+     ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
+     KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
+     ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
+
+  b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
+     TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
+     NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
+     INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
+     COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
+     USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
+     ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
+     DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
+     IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
+
+  c. The disclaimer of warranties and limitation of liability provided
+     above shall be interpreted in a manner that, to the extent
+     possible, most closely approximates an absolute disclaimer and
+     waiver of all liability.
+
+
+Section 6 -- Term and Termination.
+
+  a. This Public License applies for the term of the Copyright and
+     Similar Rights licensed here. However, if You fail to comply with
+     this Public License, then Your rights under this Public License
+     terminate automatically.
+
+  b. Where Your right to use the Licensed Material has terminated under
+     Section 6(a), it reinstates:
+
+       1. automatically as of the date the violation is cured, provided
+          it is cured within 30 days of Your discovery of the
+          violation; or
+
+       2. upon express reinstatement by the Licensor.
+
+     For the avoidance of doubt, this Section 6(b) does not affect any
+     right the Licensor may have to seek remedies for Your violations
+     of this Public License.
+
+  c. For the avoidance of doubt, the Licensor may also offer the
+     Licensed Material under separate terms or conditions or stop
+     distributing the Licensed Material at any time; however, doing so
+     will not terminate this Public License.
+
+  d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
+     License.
+
+
+Section 7 -- Other Terms and Conditions.
+
+  a. The Licensor shall not be bound by any additional or different
+     terms or conditions communicated by You unless expressly agreed.
+
+  b. Any arrangements, understandings, or agreements regarding the
+     Licensed Material not stated herein are separate from and
+     independent of the terms and conditions of this Public License.
+
+
+Section 8 -- Interpretation.
+
+  a. For the avoidance of doubt, this Public License does not, and
+     shall not be interpreted to, reduce, limit, restrict, or impose
+     conditions on any use of the Licensed Material that could lawfully
+     be made without permission under this Public License.
+
+  b. To the extent possible, if any provision of this Public License is
+     deemed unenforceable, it shall be automatically reformed to the
+     minimum extent necessary to make it enforceable. If the provision
+     cannot be reformed, it shall be severed from this Public License
+     without affecting the enforceability of the remaining terms and
+     conditions.
+
+  c. No term or condition of this Public License will be waived and no
+     failure to comply consented to unless expressly agreed to by the
+     Licensor.
+
+  d. Nothing in this Public License constitutes or may be interpreted
+     as a limitation upon, or waiver of, any privileges and immunities
+     that apply to the Licensor or You, including from the legal
+     processes of any jurisdiction or authority.
+
+
+=======================================================================
+
+Creative Commons is not a party to its public licenses.
+Notwithstanding, Creative Commons may elect to apply one of its public
+licenses to material it publishes and in those instances will be
+considered the "Licensor." Except for the limited purpose of indicating
+that material is shared under a Creative Commons public license or as
+otherwise permitted by the Creative Commons policies published at
+creativecommons.org/policies, Creative Commons does not authorize the
+use of the trademark "Creative Commons" or any other trademark or logo
+of Creative Commons without its prior written consent including,
+without limitation, in connection with any unauthorized modifications
+to any of its public licenses or any other arrangements,
+understandings, or agreements concerning use of licensed material. For
+the avoidance of doubt, this paragraph does not form part of the public
+licenses.
+
+Creative Commons may be contacted at creativecommons.org.
diff --git a/vendor/github.com/opencontainers/go-digest/README.md b/vendor/github.com/opencontainers/go-digest/README.md
new file mode 100644
index 00000000000..25aac3470c2
--- /dev/null
+++ b/vendor/github.com/opencontainers/go-digest/README.md
@@ -0,0 +1,104 @@
+# go-digest
+
+[![GoDoc](https://godoc.org/github.com/opencontainers/go-digest?status.svg)](https://godoc.org/github.com/opencontainers/go-digest) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/go-digest)](https://goreportcard.com/report/github.com/opencontainers/go-digest) [![Build Status](https://travis-ci.org/opencontainers/go-digest.svg?branch=master)](https://travis-ci.org/opencontainers/go-digest)
+
+Common digest package used across the container ecosystem.
+
+Please see the [godoc](https://godoc.org/github.com/opencontainers/go-digest) for more information.
+
+# What is a digest?
+
+A digest is just a hash.
+
+The most common use case for a digest is to create a content
+identifier for use in [Content Addressable Storage](https://en.wikipedia.org/wiki/Content-addressable_storage)
+systems:
+
+```go
+id := digest.FromBytes([]byte("my content"))
+```
+
+In the example above, the id can be used to uniquely identify 
+the byte slice "my content". This allows two disparate applications
+to agree on a verifiable identifier without having to trust one
+another.
+
+An identifying digest can be verified, as follows:
+
+```go
+if id != digest.FromBytes([]byte("my content")) {
+  return errors.New("the content has changed!")
+}
+```
+
+A `Verifier` type can be used to handle cases where an `io.Reader`
+makes more sense:
+
+```go
+rd := getContent()
+verifier := id.Verifier()
+io.Copy(verifier, rd)
+
+if !verifier.Verified() {
+  return errors.New("the content has changed!")
+}
+```
+
+Using [Merkle DAGs](https://en.wikipedia.org/wiki/Merkle_tree), this
+can power a rich, safe, content distribution system.
+
+# Usage
+
+While the [godoc](https://godoc.org/github.com/opencontainers/go-digest) is
+considered the best resource, a few important items need to be called 
+out when using this package.
+
+1. Make sure to import the hash implementations into your application
+    or the package will panic. You should have something like the 
+    following in the main (or other entrypoint) of your application:
+   
+    ```go
+    import (
+        _ "crypto/sha256"
+   	    _ "crypto/sha512"
+    )
+    ```
+    This may seem inconvenient but it allows you replace the hash 
+    implementations with others, such as https://github.com/stevvooe/resumable.
+ 
+2. Even though `digest.Digest` may be assemable as a string, _always_ 
+    verify your input with `digest.Parse` or use `Digest.Validate`
+    when accepting untrusted input. While there are measures to 
+    avoid common problems, this will ensure you have valid digests
+    in the rest of your application.
+
+# Stability
+
+The Go API, at this stage, is considered stable, unless otherwise noted.
+
+As always, before using a package export, read the [godoc](https://godoc.org/github.com/opencontainers/go-digest).
+
+# Contributing
+
+This package is considered fairly complete. It has been in production
+in thousands (millions?) of deployments and is fairly battle-hardened.
+New additions will be met with skepticism. If you think there is a 
+missing feature, please file a bug clearly describing the problem and 
+the alternatives you tried before submitting a PR.
+
+# Reporting security issues
+
+Please DO NOT file a public issue, instead send your report privately to
+security@opencontainers.org.
+
+The maintainers take security seriously. If you discover a security issue,
+please bring it to their attention right away!
+
+If you are reporting a security issue, do not create an issue or file a pull
+request on GitHub. Instead, disclose the issue responsibly by sending an email
+to security@opencontainers.org (which is inhabited only by the maintainers of
+the various OCI projects).
+
+# Copyright and license
+
+Copyright © 2016 Docker, Inc. All rights reserved, except as follows. Code is released under the [Apache 2.0 license](LICENSE). This `README.md` file and the [`CONTRIBUTING.md`](CONTRIBUTING.md) file are licensed under the Creative Commons Attribution 4.0 International License under the terms and conditions set forth in the file [`LICENSE.docs`](LICENSE.docs). You may obtain a duplicate copy of the same license, titled CC BY-SA 4.0, at http://creativecommons.org/licenses/by-sa/4.0/.
diff --git a/vendor/github.com/opencontainers/go-digest/algorithm.go b/vendor/github.com/opencontainers/go-digest/algorithm.go
index bdff42d929f..8813bd26f1f 100644
--- a/vendor/github.com/opencontainers/go-digest/algorithm.go
+++ b/vendor/github.com/opencontainers/go-digest/algorithm.go
@@ -19,6 +19,7 @@ import (
 	"fmt"
 	"hash"
 	"io"
+	"regexp"
 )
 
 // Algorithm identifies and implementation of a digester by an identifier.
@@ -28,9 +29,9 @@ type Algorithm string
 
 // supported digest types
 const (
-	SHA256 Algorithm = "sha256" // sha256 with hex encoding
-	SHA384 Algorithm = "sha384" // sha384 with hex encoding
-	SHA512 Algorithm = "sha512" // sha512 with hex encoding
+	SHA256 Algorithm = "sha256" // sha256 with hex encoding (lower case only)
+	SHA384 Algorithm = "sha384" // sha384 with hex encoding (lower case only)
+	SHA512 Algorithm = "sha512" // sha512 with hex encoding (lower case only)
 
 	// Canonical is the primary digest algorithm used with the distribution
 	// project. Other digests may be used but this one is the primary storage
@@ -50,6 +51,14 @@ var (
 		SHA384: crypto.SHA384,
 		SHA512: crypto.SHA512,
 	}
+
+	// anchoredEncodedRegexps contains anchored regular expressions for hex-encoded digests.
+	// Note that /A-F/ disallowed.
+	anchoredEncodedRegexps = map[Algorithm]*regexp.Regexp{
+		SHA256: regexp.MustCompile(`^[a-f0-9]{64}$`),
+		SHA384: regexp.MustCompile(`^[a-f0-9]{96}$`),
+		SHA512: regexp.MustCompile(`^[a-f0-9]{128}$`),
+	}
 )
 
 // Available returns true if the digest type is available for use. If this
@@ -125,6 +134,14 @@ func (a Algorithm) Hash() hash.Hash {
 	return algorithms[a].New()
 }
 
+// Encode encodes the raw bytes of a digest, typically from a hash.Hash, into
+// the encoded portion of the digest.
+func (a Algorithm) Encode(d []byte) string {
+	// TODO(stevvooe): Currently, all algorithms use a hex encoding. When we
+	// add support for back registration, we can modify this accordingly.
+	return fmt.Sprintf("%x", d)
+}
+
 // FromReader returns the digest of the reader using the algorithm.
 func (a Algorithm) FromReader(rd io.Reader) (Digest, error) {
 	digester := a.Digester()
@@ -156,3 +173,20 @@ func (a Algorithm) FromBytes(p []byte) Digest {
 func (a Algorithm) FromString(s string) Digest {
 	return a.FromBytes([]byte(s))
 }
+
+// Validate validates the encoded portion string
+func (a Algorithm) Validate(encoded string) error {
+	r, ok := anchoredEncodedRegexps[a]
+	if !ok {
+		return ErrDigestUnsupported
+	}
+	// Digests much always be hex-encoded, ensuring that their hex portion will
+	// always be size*2
+	if a.Size()*2 != len(encoded) {
+		return ErrDigestInvalidLength
+	}
+	if r.MatchString(encoded) {
+		return nil
+	}
+	return ErrDigestInvalidFormat
+}
diff --git a/vendor/github.com/opencontainers/go-digest/digest.go b/vendor/github.com/opencontainers/go-digest/digest.go
index 69e1d2b542c..ad398cba2fb 100644
--- a/vendor/github.com/opencontainers/go-digest/digest.go
+++ b/vendor/github.com/opencontainers/go-digest/digest.go
@@ -45,16 +45,21 @@ func NewDigest(alg Algorithm, h hash.Hash) Digest {
 // functions. This is also useful for rebuilding digests from binary
 // serializations.
 func NewDigestFromBytes(alg Algorithm, p []byte) Digest {
-	return Digest(fmt.Sprintf("%s:%x", alg, p))
+	return NewDigestFromEncoded(alg, alg.Encode(p))
 }
 
-// NewDigestFromHex returns a Digest from alg and a the hex encoded digest.
+// NewDigestFromHex is deprecated. Please use NewDigestFromEncoded.
 func NewDigestFromHex(alg, hex string) Digest {
-	return Digest(fmt.Sprintf("%s:%s", alg, hex))
+	return NewDigestFromEncoded(Algorithm(alg), hex)
+}
+
+// NewDigestFromEncoded returns a Digest from alg and the encoded digest.
+func NewDigestFromEncoded(alg Algorithm, encoded string) Digest {
+	return Digest(fmt.Sprintf("%s:%s", alg, encoded))
 }
 
 // DigestRegexp matches valid digest types.
-var DigestRegexp = regexp.MustCompile(`[a-zA-Z0-9-_+.]+:[a-fA-F0-9]+`)
+var DigestRegexp = regexp.MustCompile(`[a-z0-9]+(?:[.+_-][a-z0-9]+)*:[a-zA-Z0-9=_-]+`)
 
 // DigestRegexpAnchored matches valid digest types, anchored to the start and end of the match.
 var DigestRegexpAnchored = regexp.MustCompile(`^` + DigestRegexp.String() + `$`)
@@ -96,26 +101,18 @@ func FromString(s string) Digest {
 // error if not.
 func (d Digest) Validate() error {
 	s := string(d)
-
 	i := strings.Index(s, ":")
-
-	// validate i then run through regexp
-	if i < 0 || i+1 == len(s) || !DigestRegexpAnchored.MatchString(s) {
+	if i <= 0 || i+1 == len(s) {
 		return ErrDigestInvalidFormat
 	}
-
-	algorithm := Algorithm(s[:i])
+	algorithm, encoded := Algorithm(s[:i]), s[i+1:]
 	if !algorithm.Available() {
+		if !DigestRegexpAnchored.MatchString(s) {
+			return ErrDigestInvalidFormat
+		}
 		return ErrDigestUnsupported
 	}
-
-	// Digests much always be hex-encoded, ensuring that their hex portion will
-	// always be size*2
-	if algorithm.Size()*2 != len(s[i+1:]) {
-		return ErrDigestInvalidLength
-	}
-
-	return nil
+	return algorithm.Validate(encoded)
 }
 
 // Algorithm returns the algorithm portion of the digest. This will panic if
@@ -133,12 +130,17 @@ func (d Digest) Verifier() Verifier {
 	}
 }
 
-// Hex returns the hex digest portion of the digest. This will panic if the
+// Encoded returns the encoded portion of the digest. This will panic if the
 // underlying digest is not in a valid format.
-func (d Digest) Hex() string {
+func (d Digest) Encoded() string {
 	return string(d[d.sepIndex()+1:])
 }
 
+// Hex is deprecated. Please use Digest.Encoded.
+func (d Digest) Hex() string {
+	return d.Encoded()
+}
+
 func (d Digest) String() string {
 	return string(d)
 }
diff --git a/vendor/golang.org/x/sync/LICENSE b/vendor/golang.org/x/sync/LICENSE
new file mode 100644
index 00000000000..6a66aea5eaf
--- /dev/null
+++ b/vendor/golang.org/x/sync/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/golang.org/x/sync/PATENTS b/vendor/golang.org/x/sync/PATENTS
new file mode 100644
index 00000000000..733099041f8
--- /dev/null
+++ b/vendor/golang.org/x/sync/PATENTS
@@ -0,0 +1,22 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the Go project.
+
+Google hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer and otherwise run, modify and propagate the contents of this
+implementation of Go, where such license applies only to those patent
+claims, both currently owned or controlled by Google and acquired in
+the future, licensable by Google that are necessarily infringed by this
+implementation of Go.  This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation.  If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of Go or any code incorporated within this
+implementation of Go constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of Go
+shall terminate as of the date such litigation is filed.
diff --git a/vendor/golang.org/x/sync/README.md b/vendor/golang.org/x/sync/README.md
new file mode 100644
index 00000000000..1f8436cc9c8
--- /dev/null
+++ b/vendor/golang.org/x/sync/README.md
@@ -0,0 +1,18 @@
+# Go Sync
+
+This repository provides Go concurrency primitives in addition to the
+ones provided by the language and "sync" and "sync/atomic" packages.
+
+## Download/Install
+
+The easiest way to install is to run `go get -u golang.org/x/sync`. You can
+also manually git clone the repository to `$GOPATH/src/golang.org/x/sync`.
+
+## Report Issues / Send Patches
+
+This repository uses Gerrit for code changes. To learn how to submit changes to
+this repository, see https://golang.org/doc/contribute.html.
+
+The main issue tracker for the sync repository is located at
+https://github.com/golang/go/issues. Prefix your issue with "x/sync:" in the
+subject line, so it is easy to find.
diff --git a/vendor/golang.org/x/sync/semaphore/semaphore.go b/vendor/golang.org/x/sync/semaphore/semaphore.go
new file mode 100644
index 00000000000..2096ca3a003
--- /dev/null
+++ b/vendor/golang.org/x/sync/semaphore/semaphore.go
@@ -0,0 +1,127 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package semaphore provides a weighted semaphore implementation.
+package semaphore // import "golang.org/x/sync/semaphore"
+
+import (
+	"container/list"
+	"context"
+	"sync"
+)
+
+type waiter struct {
+	n     int64
+	ready chan<- struct{} // Closed when semaphore acquired.
+}
+
+// NewWeighted creates a new weighted semaphore with the given
+// maximum combined weight for concurrent access.
+func NewWeighted(n int64) *Weighted {
+	w := &Weighted{size: n}
+	return w
+}
+
+// Weighted provides a way to bound concurrent access to a resource.
+// The callers can request access with a given weight.
+type Weighted struct {
+	size    int64
+	cur     int64
+	mu      sync.Mutex
+	waiters list.List
+}
+
+// Acquire acquires the semaphore with a weight of n, blocking only until ctx
+// is done. On success, returns nil. On failure, returns ctx.Err() and leaves
+// the semaphore unchanged.
+//
+// If ctx is already done, Acquire may still succeed without blocking.
+func (s *Weighted) Acquire(ctx context.Context, n int64) error {
+	s.mu.Lock()
+	if s.size-s.cur >= n && s.waiters.Len() == 0 {
+		s.cur += n
+		s.mu.Unlock()
+		return nil
+	}
+
+	if n > s.size {
+		// Don't make other Acquire calls block on one that's doomed to fail.
+		s.mu.Unlock()
+		<-ctx.Done()
+		return ctx.Err()
+	}
+
+	ready := make(chan struct{})
+	w := waiter{n: n, ready: ready}
+	elem := s.waiters.PushBack(w)
+	s.mu.Unlock()
+
+	select {
+	case <-ctx.Done():
+		err := ctx.Err()
+		s.mu.Lock()
+		select {
+		case <-ready:
+			// Acquired the semaphore after we were canceled.  Rather than trying to
+			// fix up the queue, just pretend we didn't notice the cancelation.
+			err = nil
+		default:
+			s.waiters.Remove(elem)
+		}
+		s.mu.Unlock()
+		return err
+
+	case <-ready:
+		return nil
+	}
+}
+
+// TryAcquire acquires the semaphore with a weight of n without blocking.
+// On success, returns true. On failure, returns false and leaves the semaphore unchanged.
+func (s *Weighted) TryAcquire(n int64) bool {
+	s.mu.Lock()
+	success := s.size-s.cur >= n && s.waiters.Len() == 0
+	if success {
+		s.cur += n
+	}
+	s.mu.Unlock()
+	return success
+}
+
+// Release releases the semaphore with a weight of n.
+func (s *Weighted) Release(n int64) {
+	s.mu.Lock()
+	s.cur -= n
+	if s.cur < 0 {
+		s.mu.Unlock()
+		panic("semaphore: bad release")
+	}
+	for {
+		next := s.waiters.Front()
+		if next == nil {
+			break // No more waiters blocked.
+		}
+
+		w := next.Value.(waiter)
+		if s.size-s.cur < w.n {
+			// Not enough tokens for the next waiter.  We could keep going (to try to
+			// find a waiter with a smaller request), but under load that could cause
+			// starvation for large requests; instead, we leave all remaining waiters
+			// blocked.
+			//
+			// Consider a semaphore used as a read-write lock, with N tokens, N
+			// readers, and one writer.  Each reader can Acquire(1) to obtain a read
+			// lock.  The writer can Acquire(N) to obtain a write lock, excluding all
+			// of the readers.  If we allow the readers to jump ahead in the queue,
+			// the writer will starve — there is always one token available for every
+			// reader.
+			break
+		}
+
+		s.cur += w.n
+		s.waiters.Remove(next)
+		close(w.ready)
+	}
+	s.mu.Unlock()
+}
diff --git a/vendor/gopkg.in/cheggaaa/pb.v1/README.md b/vendor/gopkg.in/cheggaaa/pb.v1/README.md
new file mode 100644
index 00000000000..1295df70e45
--- /dev/null
+++ b/vendor/gopkg.in/cheggaaa/pb.v1/README.md
@@ -0,0 +1,177 @@
+# Terminal progress bar for Go  
+
+Simple progress bar for console programs.    
+   
+Please check the new version https://github.com/cheggaaa/pb/tree/v2 (currently, it's beta)
+
+## Installation
+
+```
+go get gopkg.in/cheggaaa/pb.v1
+```   
+
+## Usage   
+
+```Go
+package main
+
+import (
+	"gopkg.in/cheggaaa/pb.v1"
+	"time"
+)
+
+func main() {
+	count := 100000
+	bar := pb.StartNew(count)
+	for i := 0; i < count; i++ {
+		bar.Increment()
+		time.Sleep(time.Millisecond)
+	}
+	bar.FinishPrint("The End!")
+}
+
+```
+
+Result will be like this:
+
+```
+> go run test.go
+37158 / 100000 [================>_______________________________] 37.16% 1m11s
+```
+
+## Customization
+
+```Go  
+// create bar
+bar := pb.New(count)
+
+// refresh info every second (default 200ms)
+bar.SetRefreshRate(time.Second)
+
+// show percents (by default already true)
+bar.ShowPercent = true
+
+// show bar (by default already true)
+bar.ShowBar = true
+
+// no counters
+bar.ShowCounters = false
+
+// show "time left"
+bar.ShowTimeLeft = true
+
+// show average speed
+bar.ShowSpeed = true
+
+// sets the width of the progress bar
+bar.SetWidth(80)
+
+// sets the width of the progress bar, but if terminal size smaller will be ignored
+bar.SetMaxWidth(80)
+
+// convert output to readable format (like KB, MB)
+bar.SetUnits(pb.U_BYTES)
+
+// and start
+bar.Start()
+``` 
+
+## Progress bar for IO Operations
+
+```go
+// create and start bar
+bar := pb.New(myDataLen).SetUnits(pb.U_BYTES)
+bar.Start()
+
+// my io.Reader
+r := myReader
+
+// my io.Writer
+w := myWriter
+
+// create proxy reader
+reader := bar.NewProxyReader(r)
+
+// and copy from pb reader
+io.Copy(w, reader)
+
+```
+
+```go
+// create and start bar
+bar := pb.New(myDataLen).SetUnits(pb.U_BYTES)
+bar.Start()
+
+// my io.Reader
+r := myReader
+
+// my io.Writer
+w := myWriter
+
+// create multi writer
+writer := io.MultiWriter(w, bar)
+
+// and copy
+io.Copy(writer, r)
+
+bar.Finish()
+```
+
+## Custom Progress Bar Look-and-feel
+
+```go
+bar.Format("<.- >")
+```
+
+## Multiple Progress Bars (experimental and unstable)
+
+Do not print to terminal while pool is active.
+
+```go
+package main
+
+import (
+    "math/rand"
+    "sync"
+    "time"
+
+    "gopkg.in/cheggaaa/pb.v1"
+)
+
+func main() {
+    // create bars
+    first := pb.New(200).Prefix("First ")
+    second := pb.New(200).Prefix("Second ")
+    third := pb.New(200).Prefix("Third ")
+    // start pool
+    pool, err := pb.StartPool(first, second, third)
+    if err != nil {
+        panic(err)
+    }
+    // update bars
+    wg := new(sync.WaitGroup)
+    for _, bar := range []*pb.ProgressBar{first, second, third} {
+        wg.Add(1)
+        go func(cb *pb.ProgressBar) {
+            for n := 0; n < 200; n++ {
+                cb.Increment()
+                time.Sleep(time.Millisecond * time.Duration(rand.Intn(100)))
+            }
+            cb.Finish()
+            wg.Done()
+        }(bar)
+    }
+    wg.Wait()
+    // close pool
+    pool.Stop()
+}
+```
+
+The result will be as follows:
+
+```
+$ go run example/multiple.go 
+First  34 / 200 [=========>---------------------------------------------]  17.00% 00m08s
+Second  42 / 200 [===========>------------------------------------------]  21.00% 00m06s
+Third  36 / 200 [=========>---------------------------------------------]  18.00% 00m08s
+```
diff --git a/vendor/gopkg.in/cheggaaa/pb.v1/format.go b/vendor/gopkg.in/cheggaaa/pb.v1/format.go
index c47a88fd210..8bb8a7a1dfd 100644
--- a/vendor/gopkg.in/cheggaaa/pb.v1/format.go
+++ b/vendor/gopkg.in/cheggaaa/pb.v1/format.go
@@ -10,8 +10,10 @@ type Units int
 const (
 	// U_NO are default units, they represent a simple value and are not formatted at all.
 	U_NO Units = iota
-	// U_BYTES units are formatted in a human readable way (b, Bb, Mb, ...)
+	// U_BYTES units are formatted in a human readable way (B, KiB, MiB, ...)
 	U_BYTES
+	// U_BYTES_DEC units are like U_BYTES, but base 10 (B, KB, MB, ...)
+	U_BYTES_DEC
 	// U_DURATION units are formatted in a human readable way (3h14m15s)
 	U_DURATION
 )
@@ -21,6 +23,11 @@ const (
 	MiB = 1048576
 	GiB = 1073741824
 	TiB = 1099511627776
+
+	KB = 1e3
+	MB = 1e6
+	GB = 1e9
+	TB = 1e12
 )
 
 func Format(i int64) *formatter {
@@ -53,6 +60,8 @@ func (f *formatter) String() (out string) {
 	switch f.unit {
 	case U_BYTES:
 		out = formatBytes(f.n)
+	case U_BYTES_DEC:
+		out = formatBytesDec(f.n)
 	case U_DURATION:
 		out = formatDuration(f.n)
 	default:
@@ -64,7 +73,7 @@ func (f *formatter) String() (out string) {
 	return
 }
 
-// Convert bytes to human readable string. Like a 2 MiB, 64.2 KiB, 52 B
+// Convert bytes to human readable string. Like 2 MiB, 64.2 KiB, 52 B
 func formatBytes(i int64) (result string) {
 	switch {
 	case i >= TiB:
@@ -81,12 +90,36 @@ func formatBytes(i int64) (result string) {
 	return
 }
 
+// Convert bytes to base-10 human readable string. Like 2 MB, 64.2 KB, 52 B
+func formatBytesDec(i int64) (result string) {
+	switch {
+	case i >= TB:
+		result = fmt.Sprintf("%.02f TB", float64(i)/TB)
+	case i >= GB:
+		result = fmt.Sprintf("%.02f GB", float64(i)/GB)
+	case i >= MB:
+		result = fmt.Sprintf("%.02f MB", float64(i)/MB)
+	case i >= KB:
+		result = fmt.Sprintf("%.02f KB", float64(i)/KB)
+	default:
+		result = fmt.Sprintf("%d B", i)
+	}
+	return
+}
+
 func formatDuration(n int64) (result string) {
 	d := time.Duration(n)
 	if d > time.Hour*24 {
 		result = fmt.Sprintf("%dd", d/24/time.Hour)
 		d -= (d / time.Hour / 24) * (time.Hour * 24)
 	}
-	result = fmt.Sprintf("%s%v", result, d)
+	if d > time.Hour {
+		result = fmt.Sprintf("%s%dh", result, d/time.Hour)
+		d -= d / time.Hour * time.Hour
+	}
+	m := d / time.Minute
+	d -= m * time.Minute
+	s := d / time.Second
+	result = fmt.Sprintf("%s%02dm%02ds", result, m, s)
 	return
 }
diff --git a/vendor/gopkg.in/cheggaaa/pb.v1/pb.go b/vendor/gopkg.in/cheggaaa/pb.v1/pb.go
index 7acc9a7ffd8..e03291b8969 100644
--- a/vendor/gopkg.in/cheggaaa/pb.v1/pb.go
+++ b/vendor/gopkg.in/cheggaaa/pb.v1/pb.go
@@ -13,7 +13,7 @@ import (
 )
 
 // Current version
-const Version = "1.0.13"
+const Version = "1.0.27"
 
 const (
 	// Default refresh rate - 200ms
@@ -37,17 +37,17 @@ func New(total int) *ProgressBar {
 // Create new progress bar object using int64 as total
 func New64(total int64) *ProgressBar {
 	pb := &ProgressBar{
-		Total:         total,
-		RefreshRate:   DEFAULT_REFRESH_RATE,
-		ShowPercent:   true,
-		ShowCounters:  true,
-		ShowBar:       true,
-		ShowTimeLeft:  true,
-		ShowFinalTime: true,
-		Units:         U_NO,
-		ManualUpdate:  false,
-		finish:        make(chan struct{}),
-		currentValue:  -1,
+		Total:           total,
+		RefreshRate:     DEFAULT_REFRESH_RATE,
+		ShowPercent:     true,
+		ShowCounters:    true,
+		ShowBar:         true,
+		ShowTimeLeft:    true,
+		ShowElapsedTime: false,
+		ShowFinalTime:   true,
+		Units:           U_NO,
+		ManualUpdate:    false,
+		finish:          make(chan struct{}),
 	}
 	return pb.Format(FORMAT)
 }
@@ -66,13 +66,14 @@ func StartNew(total int) *ProgressBar {
 type Callback func(out string)
 
 type ProgressBar struct {
-	current int64 // current must be first member of struct (https://code.google.com/p/go/issues/detail?id=5278)
+	current  int64 // current must be first member of struct (https://code.google.com/p/go/issues/detail?id=5278)
+	previous int64
 
 	Total                            int64
 	RefreshRate                      time.Duration
 	ShowPercent, ShowCounters        bool
 	ShowSpeed, ShowTimeLeft, ShowBar bool
-	ShowFinalTime                    bool
+	ShowFinalTime, ShowElapsedTime   bool
 	Output                           io.Writer
 	Callback                         Callback
 	NotPrint                         bool
@@ -90,9 +91,10 @@ type ProgressBar struct {
 	finish     chan struct{}
 	isFinish   bool
 
-	startTime    time.Time
-	startValue   int64
-	currentValue int64
+	startTime  time.Time
+	startValue int64
+
+	changeTime time.Time
 
 	prefix, postfix string
 
@@ -112,7 +114,7 @@ type ProgressBar struct {
 func (pb *ProgressBar) Start() *ProgressBar {
 	pb.startTime = time.Now()
 	pb.startValue = atomic.LoadInt64(&pb.current)
-	if pb.Total == 0 {
+	if atomic.LoadInt64(&pb.Total) == 0 {
 		pb.ShowTimeLeft = false
 		pb.ShowPercent = false
 		pb.AutoStat = false
@@ -157,12 +159,16 @@ func (pb *ProgressBar) Add64(add int64) int64 {
 
 // Set prefix string
 func (pb *ProgressBar) Prefix(prefix string) *ProgressBar {
+	pb.mu.Lock()
+	defer pb.mu.Unlock()
 	pb.prefix = prefix
 	return pb
 }
 
 // Set postfix string
 func (pb *ProgressBar) Postfix(postfix string) *ProgressBar {
+	pb.mu.Lock()
+	defer pb.mu.Unlock()
 	pb.postfix = postfix
 	return pb
 }
@@ -220,7 +226,7 @@ func (pb *ProgressBar) Finish() {
 	//Protect multiple calls
 	pb.finishOnce.Do(func() {
 		close(pb.finish)
-		pb.write(atomic.LoadInt64(&pb.current))
+		pb.write(atomic.LoadInt64(&pb.Total), atomic.LoadInt64(&pb.current))
 		pb.mu.Lock()
 		defer pb.mu.Unlock()
 		switch {
@@ -270,16 +276,18 @@ func (pb *ProgressBar) NewProxyReader(r io.Reader) *Reader {
 	return &Reader{r, pb}
 }
 
-func (pb *ProgressBar) write(current int64) {
+func (pb *ProgressBar) write(total, current int64) {
+	pb.mu.Lock()
+	defer pb.mu.Unlock()
 	width := pb.GetWidth()
 
-	var percentBox, countersBox, timeLeftBox, speedBox, barBox, end, out string
+	var percentBox, countersBox, timeLeftBox, timeSpentBox, speedBox, barBox, end, out string
 
 	// percents
 	if pb.ShowPercent {
 		var percent float64
-		if pb.Total > 0 {
-			percent = float64(current) / (float64(pb.Total) / float64(100))
+		if total > 0 {
+			percent = float64(current) / (float64(total) / float64(100))
 		} else {
 			percent = float64(current) / float64(100)
 		}
@@ -289,17 +297,24 @@ func (pb *ProgressBar) write(current int64) {
 	// counters
 	if pb.ShowCounters {
 		current := Format(current).To(pb.Units).Width(pb.UnitsWidth)
-		if pb.Total > 0 {
-			total := Format(pb.Total).To(pb.Units).Width(pb.UnitsWidth)
-			countersBox = fmt.Sprintf(" %s / %s ", current, total)
+		if total > 0 {
+			totalS := Format(total).To(pb.Units).Width(pb.UnitsWidth)
+			countersBox = fmt.Sprintf(" %s / %s ", current, totalS)
 		} else {
 			countersBox = fmt.Sprintf(" %s / ? ", current)
 		}
 	}
 
 	// time left
-	fromStart := time.Now().Sub(pb.startTime)
 	currentFromStart := current - pb.startValue
+	fromStart := time.Now().Sub(pb.startTime)
+	lastChangeTime := pb.changeTime
+	fromChange := lastChangeTime.Sub(pb.startTime)
+
+	if pb.ShowElapsedTime {
+		timeSpentBox = fmt.Sprintf(" %s ", (fromStart/time.Second)*time.Second)
+	}
+
 	select {
 	case <-pb.finish:
 		if pb.ShowFinalTime {
@@ -309,17 +324,20 @@ func (pb *ProgressBar) write(current int64) {
 		}
 	default:
 		if pb.ShowTimeLeft && currentFromStart > 0 {
-			perEntry := fromStart / time.Duration(currentFromStart)
+			perEntry := fromChange / time.Duration(currentFromStart)
 			var left time.Duration
-			if pb.Total > 0 {
-				left = time.Duration(pb.Total-currentFromStart) * perEntry
+			if total > 0 {
+				left = time.Duration(total-currentFromStart) * perEntry
+				left -= time.Since(lastChangeTime)
 				left = (left / time.Second) * time.Second
 			} else {
 				left = time.Duration(currentFromStart) * perEntry
 				left = (left / time.Second) * time.Second
 			}
-			timeLeft := Format(int64(left)).To(U_DURATION).String()
-			timeLeftBox = fmt.Sprintf(" %s", timeLeft)
+			if left > 0 {
+				timeLeft := Format(int64(left)).To(U_DURATION).String()
+				timeLeftBox = fmt.Sprintf(" %s", timeLeft)
+			}
 		}
 	}
 
@@ -334,30 +352,38 @@ func (pb *ProgressBar) write(current int64) {
 		speedBox = " " + Format(int64(speed)).To(pb.Units).Width(pb.UnitsWidth).PerSec().String()
 	}
 
-	barWidth := escapeAwareRuneCountInString(countersBox + pb.BarStart + pb.BarEnd + percentBox + timeLeftBox + speedBox + pb.prefix + pb.postfix)
+	barWidth := escapeAwareRuneCountInString(countersBox + pb.BarStart + pb.BarEnd + percentBox + timeSpentBox + timeLeftBox + speedBox + pb.prefix + pb.postfix)
 	// bar
 	if pb.ShowBar {
 		size := width - barWidth
 		if size > 0 {
-			if pb.Total > 0 {
-				curCount := int(math.Ceil((float64(current) / float64(pb.Total)) * float64(size)))
-				emptCount := size - curCount
+			if total > 0 {
+				curSize := int(math.Ceil((float64(current) / float64(total)) * float64(size)))
+				emptySize := size - curSize
 				barBox = pb.BarStart
-				if emptCount < 0 {
-					emptCount = 0
+				if emptySize < 0 {
+					emptySize = 0
 				}
-				if curCount > size {
-					curCount = size
+				if curSize > size {
+					curSize = size
 				}
-				if emptCount <= 0 {
-					barBox += strings.Repeat(pb.Current, curCount)
-				} else if curCount > 0 {
-					barBox += strings.Repeat(pb.Current, curCount-1) + pb.CurrentN
+
+				cursorLen := escapeAwareRuneCountInString(pb.Current)
+				if emptySize <= 0 {
+					barBox += strings.Repeat(pb.Current, curSize/cursorLen)
+				} else if curSize > 0 {
+					cursorEndLen := escapeAwareRuneCountInString(pb.CurrentN)
+					cursorRepetitions := (curSize - cursorEndLen) / cursorLen
+					barBox += strings.Repeat(pb.Current, cursorRepetitions)
+					barBox += pb.CurrentN
 				}
-				barBox += strings.Repeat(pb.Empty, emptCount) + pb.BarEnd
+
+				emptyLen := escapeAwareRuneCountInString(pb.Empty)
+				barBox += strings.Repeat(pb.Empty, emptySize/emptyLen)
+				barBox += pb.BarEnd
 			} else {
-				barBox = pb.BarStart
 				pos := size - int(current)%int(size)
+				barBox = pb.BarStart
 				if pos-1 > 0 {
 					barBox += strings.Repeat(pb.Empty, pos-1)
 				}
@@ -371,16 +397,16 @@ func (pb *ProgressBar) write(current int64) {
 	}
 
 	// check len
-	out = pb.prefix + countersBox + barBox + percentBox + speedBox + timeLeftBox + pb.postfix
+	out = pb.prefix + timeSpentBox + countersBox + barBox + percentBox + speedBox + timeLeftBox + pb.postfix
+
 	if cl := escapeAwareRuneCountInString(out); cl < width {
 		end = strings.Repeat(" ", width-cl)
 	}
 
 	// and print!
-	pb.mu.Lock()
 	pb.lastPrint = out + end
 	isFinish := pb.isFinish
-	pb.mu.Unlock()
+
 	switch {
 	case isFinish:
 		return
@@ -415,15 +441,20 @@ func (pb *ProgressBar) GetWidth() int {
 // Write the current state of the progressbar
 func (pb *ProgressBar) Update() {
 	c := atomic.LoadInt64(&pb.current)
-	if pb.AlwaysUpdate || c != pb.currentValue {
-		pb.write(c)
-		pb.currentValue = c
+	p := atomic.LoadInt64(&pb.previous)
+	t := atomic.LoadInt64(&pb.Total)
+	if p != c {
+		pb.mu.Lock()
+		pb.changeTime = time.Now()
+		pb.mu.Unlock()
+		atomic.StoreInt64(&pb.previous, c)
 	}
+	pb.write(t, c)
 	if pb.AutoStat {
 		if c == 0 {
 			pb.startTime = time.Now()
 			pb.startValue = 0
-		} else if c >= pb.Total && pb.isFinish != true {
+		} else if c >= t && pb.isFinish != true {
 			pb.Finish()
 		}
 	}
@@ -436,6 +467,29 @@ func (pb *ProgressBar) String() string {
 	return pb.lastPrint
 }
 
+// SetTotal atomically sets new total count
+func (pb *ProgressBar) SetTotal(total int) *ProgressBar {
+	return pb.SetTotal64(int64(total))
+}
+
+// SetTotal64 atomically sets new total count
+func (pb *ProgressBar) SetTotal64(total int64) *ProgressBar {
+	atomic.StoreInt64(&pb.Total, total)
+	return pb
+}
+
+// Reset bar and set new total count
+// Does effect only on finished bar
+func (pb *ProgressBar) Reset(total int) *ProgressBar {
+	pb.mu.Lock()
+	defer pb.mu.Unlock()
+	if pb.isFinish {
+		pb.SetTotal(total).Set(0)
+		atomic.StoreInt64(&pb.previous, 0)
+	}
+	return pb
+}
+
 // Internal loop for refreshing the progressbar
 func (pb *ProgressBar) refresher() {
 	for {
@@ -447,10 +501,3 @@ func (pb *ProgressBar) refresher() {
 		}
 	}
 }
-
-type window struct {
-	Row    uint16
-	Col    uint16
-	Xpixel uint16
-	Ypixel uint16
-}
diff --git a/vendor/gopkg.in/cheggaaa/pb.v1/pb_appengine.go b/vendor/gopkg.in/cheggaaa/pb.v1/pb_appengine.go
index d85dbc3b2bb..17168f39a61 100644
--- a/vendor/gopkg.in/cheggaaa/pb.v1/pb_appengine.go
+++ b/vendor/gopkg.in/cheggaaa/pb.v1/pb_appengine.go
@@ -1,4 +1,4 @@
-// +build appengine
+// +build appengine js
 
 package pb
 
diff --git a/vendor/gopkg.in/cheggaaa/pb.v1/pb_nix.go b/vendor/gopkg.in/cheggaaa/pb.v1/pb_nix.go
deleted file mode 100644
index c06097b4345..00000000000
--- a/vendor/gopkg.in/cheggaaa/pb.v1/pb_nix.go
+++ /dev/null
@@ -1,8 +0,0 @@
-// +build linux darwin freebsd netbsd openbsd dragonfly
-// +build !appengine
-
-package pb
-
-import "syscall"
-
-const sysIoctl = syscall.SYS_IOCTL
diff --git a/vendor/gopkg.in/cheggaaa/pb.v1/pb_solaris.go b/vendor/gopkg.in/cheggaaa/pb.v1/pb_solaris.go
deleted file mode 100644
index b7d461e1742..00000000000
--- a/vendor/gopkg.in/cheggaaa/pb.v1/pb_solaris.go
+++ /dev/null
@@ -1,6 +0,0 @@
-// +build solaris
-// +build !appengine
-
-package pb
-
-const sysIoctl = 54
diff --git a/vendor/gopkg.in/cheggaaa/pb.v1/pb_win.go b/vendor/gopkg.in/cheggaaa/pb.v1/pb_win.go
index 72f68283544..9595e823696 100644
--- a/vendor/gopkg.in/cheggaaa/pb.v1/pb_win.go
+++ b/vendor/gopkg.in/cheggaaa/pb.v1/pb_win.go
@@ -102,7 +102,7 @@ var echoLockMutex sync.Mutex
 
 var oldState word
 
-func lockEcho() (quit chan int, err error) {
+func lockEcho() (shutdownCh chan struct{}, err error) {
 	echoLockMutex.Lock()
 	defer echoLockMutex.Unlock()
 	if echoLocked {
@@ -124,6 +124,8 @@ func lockEcho() (quit chan int, err error) {
 		err = fmt.Errorf("Can't set terminal settings: %v", e)
 		return
 	}
+
+	shutdownCh = make(chan struct{})
 	return
 }
 
diff --git a/vendor/gopkg.in/cheggaaa/pb.v1/pb_x.go b/vendor/gopkg.in/cheggaaa/pb.v1/pb_x.go
index 12e6fe6e2a0..af42517604b 100644
--- a/vendor/gopkg.in/cheggaaa/pb.v1/pb_x.go
+++ b/vendor/gopkg.in/cheggaaa/pb.v1/pb_x.go
@@ -1,5 +1,5 @@
 // +build linux darwin freebsd netbsd openbsd solaris dragonfly
-// +build !appengine
+// +build !appengine !js
 
 package pb
 
@@ -8,101 +8,109 @@ import (
 	"fmt"
 	"os"
 	"os/signal"
-	"runtime"
 	"sync"
 	"syscall"
-	"unsafe"
-)
 
-const (
-	TIOCGWINSZ     = 0x5413
-	TIOCGWINSZ_OSX = 1074295912
+	"golang.org/x/sys/unix"
 )
 
-var tty *os.File
-
 var ErrPoolWasStarted = errors.New("Bar pool was started")
 
-var echoLocked bool
-var echoLockMutex sync.Mutex
+var (
+	echoLockMutex    sync.Mutex
+	origTermStatePtr *unix.Termios
+	tty              *os.File
+	istty            bool
+)
 
 func init() {
+	echoLockMutex.Lock()
+	defer echoLockMutex.Unlock()
+
 	var err error
 	tty, err = os.Open("/dev/tty")
+	istty = true
 	if err != nil {
 		tty = os.Stdin
+		istty = false
 	}
 }
 
 // terminalWidth returns width of the terminal.
 func terminalWidth() (int, error) {
-	w := new(window)
-	tio := syscall.TIOCGWINSZ
-	if runtime.GOOS == "darwin" {
-		tio = TIOCGWINSZ_OSX
+	if !istty {
+		return 0, errors.New("Not Supported")
 	}
-	res, _, err := syscall.Syscall(sysIoctl,
-		tty.Fd(),
-		uintptr(tio),
-		uintptr(unsafe.Pointer(w)),
-	)
-	if int(res) == -1 {
+	echoLockMutex.Lock()
+	defer echoLockMutex.Unlock()
+
+	fd := int(tty.Fd())
+
+	ws, err := unix.IoctlGetWinsize(fd, unix.TIOCGWINSZ)
+	if err != nil {
 		return 0, err
 	}
-	return int(w.Col), nil
-}
 
-var oldState syscall.Termios
+	return int(ws.Col), nil
+}
 
-func lockEcho() (quit chan int, err error) {
+func lockEcho() (shutdownCh chan struct{}, err error) {
 	echoLockMutex.Lock()
 	defer echoLockMutex.Unlock()
-	if echoLocked {
-		err = ErrPoolWasStarted
-		return
-	}
-	echoLocked = true
+	if istty {
+		if origTermStatePtr != nil {
+			return shutdownCh, ErrPoolWasStarted
+		}
 
-	fd := tty.Fd()
-	if _, _, e := syscall.Syscall6(sysIoctl, fd, ioctlReadTermios, uintptr(unsafe.Pointer(&oldState)), 0, 0, 0); e != 0 {
-		err = fmt.Errorf("Can't get terminal settings: %v", e)
-		return
-	}
+		fd := int(tty.Fd())
+
+		origTermStatePtr, err = unix.IoctlGetTermios(fd, ioctlReadTermios)
+		if err != nil {
+			return nil, fmt.Errorf("Can't get terminal settings: %v", err)
+		}
+
+		oldTermios := *origTermStatePtr
+		newTermios := oldTermios
+		newTermios.Lflag &^= syscall.ECHO
+		newTermios.Lflag |= syscall.ICANON | syscall.ISIG
+		newTermios.Iflag |= syscall.ICRNL
+		if err := unix.IoctlSetTermios(fd, ioctlWriteTermios, &newTermios); err != nil {
+			return nil, fmt.Errorf("Can't set terminal settings: %v", err)
+		}
 
-	newState := oldState
-	newState.Lflag &^= syscall.ECHO
-	newState.Lflag |= syscall.ICANON | syscall.ISIG
-	newState.Iflag |= syscall.ICRNL
-	if _, _, e := syscall.Syscall6(sysIoctl, fd, ioctlWriteTermios, uintptr(unsafe.Pointer(&newState)), 0, 0, 0); e != 0 {
-		err = fmt.Errorf("Can't set terminal settings: %v", e)
-		return
 	}
-	quit = make(chan int, 1)
-	go catchTerminate(quit)
+	shutdownCh = make(chan struct{})
+	go catchTerminate(shutdownCh)
 	return
 }
 
-func unlockEcho() (err error) {
+func unlockEcho() error {
 	echoLockMutex.Lock()
 	defer echoLockMutex.Unlock()
-	if !echoLocked {
-		return
-	}
-	echoLocked = false
-	fd := tty.Fd()
-	if _, _, e := syscall.Syscall6(sysIoctl, fd, ioctlWriteTermios, uintptr(unsafe.Pointer(&oldState)), 0, 0, 0); e != 0 {
-		err = fmt.Errorf("Can't set terminal settings")
+	if istty {
+		if origTermStatePtr == nil {
+			return nil
+		}
+
+		fd := int(tty.Fd())
+
+		if err := unix.IoctlSetTermios(fd, ioctlWriteTermios, origTermStatePtr); err != nil {
+			return fmt.Errorf("Can't set terminal settings: %v", err)
+		}
+
 	}
-	return
+	origTermStatePtr = nil
+
+	return nil
 }
 
 // listen exit signals and restore terminal state
-func catchTerminate(quit chan int) {
+func catchTerminate(shutdownCh chan struct{}) {
 	sig := make(chan os.Signal, 1)
 	signal.Notify(sig, os.Interrupt, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGKILL)
 	defer signal.Stop(sig)
 	select {
-	case <-quit:
+	case <-shutdownCh:
 		unlockEcho()
 	case <-sig:
 		unlockEcho()
diff --git a/vendor/gopkg.in/cheggaaa/pb.v1/pool.go b/vendor/gopkg.in/cheggaaa/pb.v1/pool.go
index bc1a13886db..392e7599c50 100644
--- a/vendor/gopkg.in/cheggaaa/pb.v1/pool.go
+++ b/vendor/gopkg.in/cheggaaa/pb.v1/pool.go
@@ -12,19 +12,28 @@ import (
 // You need call pool.Stop() after work
 func StartPool(pbs ...*ProgressBar) (pool *Pool, err error) {
 	pool = new(Pool)
-	if err = pool.start(); err != nil {
+	if err = pool.Start(); err != nil {
 		return
 	}
 	pool.Add(pbs...)
 	return
 }
 
+// NewPool initialises a pool with progress bars, but
+// doesn't start it. You need to call Start manually
+func NewPool(pbs ...*ProgressBar) (pool *Pool) {
+	pool = new(Pool)
+	pool.Add(pbs...)
+	return
+}
+
 type Pool struct {
 	Output        io.Writer
 	RefreshRate   time.Duration
 	bars          []*ProgressBar
 	lastBarsCount int
-	quit          chan int
+	shutdownCh    chan struct{}
+	workerCh      chan struct{}
 	m             sync.Mutex
 	finishOnce    sync.Once
 }
@@ -41,30 +50,38 @@ func (p *Pool) Add(pbs ...*ProgressBar) {
 	}
 }
 
-func (p *Pool) start() (err error) {
+func (p *Pool) Start() (err error) {
 	p.RefreshRate = DefaultRefreshRate
-	quit, err := lockEcho()
+	p.shutdownCh, err = lockEcho()
 	if err != nil {
 		return
 	}
-	p.quit = make(chan int)
-	go p.writer(quit)
+	p.workerCh = make(chan struct{})
+	go p.writer()
 	return
 }
 
-func (p *Pool) writer(finish chan int) {
+func (p *Pool) writer() {
 	var first = true
+	defer func() {
+		if first == false {
+			p.print(false)
+		} else {
+			p.print(true)
+			p.print(false)
+		}
+		close(p.workerCh)
+	}()
+
 	for {
 		select {
 		case <-time.After(p.RefreshRate):
 			if p.print(first) {
 				p.print(false)
-				finish <- 1
 				return
 			}
 			first = false
-		case <-p.quit:
-			finish <- 1
+		case <-p.shutdownCh:
 			return
 		}
 	}
@@ -72,11 +89,16 @@ func (p *Pool) writer(finish chan int) {
 
 // Restore terminal state and close pool
 func (p *Pool) Stop() error {
-	// Wait until one final refresh has passed.
-	time.Sleep(p.RefreshRate)
-
 	p.finishOnce.Do(func() {
-		close(p.quit)
+		if p.shutdownCh != nil {
+			close(p.shutdownCh)
+		}
 	})
+
+	// Wait for the worker to complete
+	select {
+	case <-p.workerCh:
+	}
+
 	return unlockEcho()
 }
diff --git a/vendor/gopkg.in/cheggaaa/pb.v1/termios_nix.go b/vendor/gopkg.in/cheggaaa/pb.v1/termios_nix.go
deleted file mode 100644
index ebb3fe87c83..00000000000
--- a/vendor/gopkg.in/cheggaaa/pb.v1/termios_nix.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// +build linux solaris
-// +build !appengine
-
-package pb
-
-const ioctlReadTermios = 0x5401  // syscall.TCGETS
-const ioctlWriteTermios = 0x5402 // syscall.TCSETS
diff --git a/vendor/gopkg.in/cheggaaa/pb.v1/termios_sysv.go b/vendor/gopkg.in/cheggaaa/pb.v1/termios_sysv.go
new file mode 100644
index 00000000000..b10f61859c4
--- /dev/null
+++ b/vendor/gopkg.in/cheggaaa/pb.v1/termios_sysv.go
@@ -0,0 +1,13 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux solaris
+// +build !appengine
+
+package pb
+
+import "golang.org/x/sys/unix"
+
+const ioctlReadTermios = unix.TCGETS
+const ioctlWriteTermios = unix.TCSETS