Skip to content
This repository has been archived by the owner on Dec 9, 2022. It is now read-only.

Commit

Permalink
FilesToKeys as a function
Browse files Browse the repository at this point in the history
+ unit tests
  • Loading branch information
Pedro Cunha committed Oct 13, 2017
1 parent 3309911 commit b4e255d
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 31 deletions.
58 changes: 34 additions & 24 deletions cli/data/commit.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@ import (
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/deliveroo/paddle/common"
"github.com/spf13/afero"
"github.com/spf13/cobra"
"github.com/spf13/viper"
"os"
"path/filepath"
"strings"
"time"
)

var commitBranch string
var AppFs = afero.NewOsFs()

var commitCmd = &cobra.Command{
Use: "commit [source path] [version]",
Expand All @@ -48,9 +49,10 @@ $ paddle data commit -b experimental source/path trained-model/version1

destination := S3Path{
bucket: viper.GetString("bucket"),
path: fmt.Sprintf("%s/%s", args[1], commitBranch),
path: fmt.Sprintf("%s/%s", args[1], commitBranch),
}

validatePath(args[0])
commitPath(args[0], destination)
},
}
Expand All @@ -59,55 +61,63 @@ func init() {
commitCmd.Flags().StringVarP(&commitBranch, "branch", "b", "master", "Branch to work on")
}

func commitPath(path string, destination S3Path) {
fd, err := os.Stat(path)
func validatePath(path string) {
fd, err := AppFs.Stat(path)
if err != nil {
exitErrorf("Source path %v not found", path)
}
if !fd.Mode().IsDir() {
exitErrorf("Source path %v must be a directory", path)
}
}

func commitPath(path string, destination S3Path) {
sess := session.Must(session.NewSessionWithOptions(session.Options{
SharedConfigState: session.SharedConfigEnable,
}))

fileList := []string{}
filepath.Walk(path, func(p string, f os.FileInfo, err error) error {
if common.IsDirectory(p) {
return nil
} else {
fileList = append(fileList, p)
rootKey := generateRootKey(path, destination)
keys := filesToKeys(path)
uploader := s3manager.NewUploader(sess)

for _, file := range keys {
key := fmt.Sprintf("%s/%s", rootKey, strings.TrimPrefix(file, path+"/"))
fmt.Println(file + " -> " + key)
uploadFileToS3(uploader, destination.bucket, key, file)
}

// Update HEAD
headKey := fmt.Sprintf("%s/HEAD", destination.path)
uploadDataToS3(sess, destination.bucket, headKey, rootKey)
}

func filesToKeys(path string) (keys []string) {
afero.Walk(AppFs, path, func(p string, f os.FileInfo, err error) error {
if f.IsDir() {
return nil
}
keys = append(keys, p)
return nil
})
return keys
}

func generateRootKey(source string, destination S3Path) string {
t := time.Now().UTC()
datePath := fmt.Sprintf("%d/%02d/%02d/%02d%02d",
t.Year(), t.Month(), t.Day(),
t.Hour(), t.Minute())

hash, err := common.DirHash(path)
hash, err := common.DirHash(source)
if err != nil {
exitErrorf("Unable to hash input folder")
}

uploader := s3manager.NewUploader(sess)
folderKey := fmt.Sprintf("%s/%s_%s", destination.path, datePath, hash)

for _, file := range fileList {
key := fmt.Sprintf("%s/%s", folderKey, strings.TrimPrefix(file, path + "/"))
fmt.Println(file + " -> " + key)
uploadFileToS3(uploader, destination.bucket, key, file)
}

// Update HEAD
headKey := fmt.Sprintf("%s/HEAD", destination.path)
uploadDataToS3(sess, destination.bucket, headKey, folderKey)
return fmt.Sprintf("%s/%s_%s", destination.path, datePath, hash)
}

func uploadFileToS3(uploader *s3manager.Uploader, bucket string, key string, filePath string) {
file, err := os.Open(filePath)
file, err := AppFs.Open(filePath)
if err != nil {
exitErrorf("Failed to open file", file, err)
}
Expand Down
36 changes: 36 additions & 0 deletions cli/data/commit_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package data

import (
"github.com/spf13/afero"
"reflect"
"strings"
"testing"
)

func TestFilesToKeys(t *testing.T) {
AppFs = afero.NewMemMapFs()
AppFs.MkdirAll("src/", 0755)
afero.WriteFile(AppFs, "src/a/b", []byte("file c"), 0644)
afero.WriteFile(AppFs, "src/c", []byte("file c"), 0644)

list := filesToKeys("src")
expectation := []string{
"src/a/b",
"src/c",
}

if !reflect.DeepEqual(list, expectation) {
t.Errorf("list is different got: %s, want: %s.", strings.Join(list, ","), strings.Join(expectation, ","))
}
}

func TestFilesToKeysWhenEmptyFolder(t *testing.T) {
AppFs = afero.NewMemMapFs()
AppFs.MkdirAll("src", 0755)

list := filesToKeys("src")

if len(list) != 0 {
t.Errorf("expecting empty list but got: %s", strings.Join(list, ","))
}
}
4 changes: 2 additions & 2 deletions cli/data/get.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ $ paddle data get -b experimental trained-model/version1 dest/path

source := S3Path{
bucket: viper.GetString("bucket"),
path: fmt.Sprintf("%s/%s/%s", args[0], getBranch, getCommitPath),
path: fmt.Sprintf("%s/%s/%s", args[0], getBranch, getCommitPath),
}

copyPathToDestination(source, args[1])
Expand Down Expand Up @@ -131,7 +131,7 @@ func copyToLocalFiles(s3Client *s3.S3, objects []*s3.Object, source S3Path, dest
if err != nil {
exitErrorf("%v", err)
}
destFilePath := destination + "/" + strings.TrimPrefix(destFilename, source.Dirname() + "/")
destFilePath := destination + "/" + strings.TrimPrefix(destFilename, source.Dirname()+"/")
err = os.MkdirAll(filepath.Dir(destFilePath), 0777)
fmt.Print(destFilePath)
destFile, err := os.Create(destFilePath)
Expand Down
2 changes: 1 addition & 1 deletion cli/data/s3path.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (

type S3Path struct {
bucket string
path string
path string
}

func (p *S3Path) Basename() string {
Expand Down
8 changes: 4 additions & 4 deletions cli/data/s3path_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import "testing"
func TestBasename(t *testing.T) {
path := S3Path{
bucket: "foo",
path: "aaa/bbb/ccc",
path: "aaa/bbb/ccc",
}

dirname := path.Basename()
Expand All @@ -19,7 +19,7 @@ func TestBasename(t *testing.T) {
func TestBasenameWithEmptyPath(t *testing.T) {
path := S3Path{
bucket: "foo",
path: "",
path: "",
}

dirname := path.Basename()
Expand All @@ -33,7 +33,7 @@ func TestBasenameWithEmptyPath(t *testing.T) {
func TestDirname(t *testing.T) {
path := S3Path{
bucket: "foo",
path: "aaa/bbb/ccc",
path: "aaa/bbb/ccc",
}

dirname := path.Dirname()
Expand All @@ -47,7 +47,7 @@ func TestDirname(t *testing.T) {
func TestDirnameOnBasicPath(t *testing.T) {
path := S3Path{
bucket: "foo",
path: "aaa",
path: "aaa",
}

dirname := path.Dirname()
Expand Down

0 comments on commit b4e255d

Please sign in to comment.