diff --git a/cli/data/cmd.go b/cli/data/cmd.go new file mode 100644 index 0000000..7ba51e0 --- /dev/null +++ b/cli/data/cmd.go @@ -0,0 +1,39 @@ +// Copyright © 2017 RooFoods LTD +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package data + +import ( + "github.com/spf13/cobra" +) + +// dataCmd represents the data command +var DataCmd = &cobra.Command{ + Use: "data", + Short: "Commit and retrieve data", + Long: `Commands to commit data to S3 and retrieve it. +`, +} + +func init() { + DataCmd.AddCommand(commitCmd) + // Here you will define your flags and configuration settings. + + // Cobra supports Persistent Flags which will work for this command + // and all subcommands, e.g.: + // dataCmd.PersistentFlags().String("foo", "", "A help for foo") + + // Cobra supports local flags which will only run when this command + // is called directly, e.g.: + // dataCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") +} diff --git a/cli/data/commit.go b/cli/data/commit.go new file mode 100644 index 0000000..1675471 --- /dev/null +++ b/cli/data/commit.go @@ -0,0 +1,144 @@ +// Copyright © 2017 RooFoods LTD +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package data + +import ( + "bytes" + "fmt" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/aws/aws-sdk-go/service/s3/s3manager" + "github.com/deliveroo/paddle/common" + "github.com/spf13/cobra" + "github.com/spf13/viper" + "os" + "path/filepath" + "strings" + "time" +) + +var branch string + +var commitCmd = &cobra.Command{ + Use: "commit [source path] [version]", + Short: "Commit data to S3", + Args: cobra.ExactArgs(2), + Long: `Store data into S3 under a versioned path, and update HEAD. + +Example: + +$ paddle data commit -b experimantal source/path version1 +`, + Run: func(cmd *cobra.Command, args []string) { + if !viper.IsSet("bucket") { + exitErrorf("Bucket not defined. Please define 'bucket' in your config file.") + } + commitPath(args[0], viper.GetString("bucket"), args[1], branch) + }, +} + +func init() { + commitCmd.Flags().StringVarP(&branch, "branch", "b", "master", "Branch to work on") +} + +func exitErrorf(msg string, args ...interface{}) { + fmt.Fprintf(os.Stderr, msg+"\n", args...) + os.Exit(1) +} + +func commitPath(path string, bucket string, version string, branch string) { + fd, err := os.Stat(path) + if err != nil { + exitErrorf("Path %v not found", path) + } + if !fd.Mode().IsDir() { + exitErrorf("Path %v must be a directory", path) + } + + hash, err := common.DirHash(path) + if err != nil { + exitErrorf("Unable to hash input folder") + } + + t := time.Now().UTC() + + datePath := fmt.Sprintf("%d/%02d/%02d/%02d%02d", + t.Year(), t.Month(), t.Day(), + t.Hour(), t.Minute()) + + destPath := fmt.Sprintf("%s/%s/%s_%s", version, branch, datePath, hash) + + sess := session.Must(session.NewSessionWithOptions(session.Options{ + SharedConfigState: session.SharedConfigEnable, + })) + + fileList := []string{} + filepath.Walk(path, func(p string, f os.FileInfo, err error) error { + if common.IsDirectory(p) { + return nil + } else { + fileList = append(fileList, p) + return nil + } + }) + + uploader := s3manager.NewUploader(sess) + + for _, file := range fileList { + key := destPath + "/" + strings.TrimPrefix(file, path+"/") + fmt.Println(file + " -> " + key) + uploadFileToS3(uploader, bucket, key, file) + } + + // Update HEAD + + headFile := fmt.Sprintf("%s/%s/HEAD", version, branch) + + uploadDataToS3(sess, destPath, bucket, headFile) +} + +func uploadFileToS3(uploader *s3manager.Uploader, bucketName string, key string, filePath string) { + file, err := os.Open(filePath) + if err != nil { + fmt.Println("Failed to open file", file, err) + os.Exit(1) + } + defer file.Close() + + _, err = uploader.Upload(&s3manager.UploadInput{ + Bucket: aws.String(bucketName), + Key: aws.String(key), + Body: file, + }) + + if err != nil { + exitErrorf("Failed to upload data to %s/%s, %s", bucketName, key, err.Error()) + return + } +} + +func uploadDataToS3(sess *session.Session, data string, bucket string, key string) { + s3Svc := s3.New(sess) + + _, err := s3Svc.PutObject(&s3.PutObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + Body: bytes.NewReader([]byte(data)), + }) + + if err != nil { + exitErrorf("Unable to update %s", key) + } +} diff --git a/cli/root.go b/cli/root.go new file mode 100644 index 0000000..00c08f1 --- /dev/null +++ b/cli/root.go @@ -0,0 +1,89 @@ +// Copyright © 2017 RooFoods LTD +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cli + +import ( + "fmt" + "os" + + "github.com/deliveroo/paddle/cli/data" + homedir "github.com/mitchellh/go-homedir" + "github.com/spf13/cobra" + "github.com/spf13/viper" +) + +var cfgFile string + +// RootCmd represents the base command when called without any subcommands +var RootCmd = &cobra.Command{ + Use: "paddle", + Short: "A brief description of your application", + Long: `A longer description that spans multiple lines and likely contains +examples and usage of using your application. For example: + +Cobra is a CLI library for Go that empowers applications. +This application is a tool to generate the needed files +to quickly create a Cobra application.`, + // Uncomment the following line if your bare application + // has an action associated with it: + // Run: func(cmd *cobra.Command, args []string) { }, +} + +// Execute adds all child commands to the root command and sets flags appropriately. +// This is called by main.main(). It only needs to happen once to the rootCmd. +func Execute() { + if err := RootCmd.Execute(); err != nil { + fmt.Println(err) + os.Exit(1) + } +} + +func init() { + cobra.OnInitialize(initConfig) + + RootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.paddle.yaml)") + + // // Cobra also supports local flags, which will only run + // // when this action is called directly. + // RootCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") + + RootCmd.AddCommand(data.DataCmd) + +} + +// initConfig reads in config file and ENV variables if set. +func initConfig() { + if cfgFile != "" { + // Use config file from the flag. + viper.SetConfigFile(cfgFile) + } else { + // Find home directory. + home, err := homedir.Dir() + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + // Search config in home directory with name ".paddle" (without extension). + viper.AddConfigPath(home) + viper.SetConfigName(".paddle") + } + + viper.AutomaticEnv() // read in environment variables that match + + // If a config file is found, read it in. + if err := viper.ReadInConfig(); err == nil { + // fmt.Println("Using config file:", viper.ConfigFileUsed()) + } +} diff --git a/common/hasher.go b/common/hasher.go new file mode 100644 index 0000000..81998ee --- /dev/null +++ b/common/hasher.go @@ -0,0 +1,81 @@ +package common + +import ( + "crypto/sha1" + "fmt" + "io" + "math" + "os" + "path/filepath" + "strings" +) + +const filechunk = 8192 + +func DirHash(path string) (string, error) { + fileList := []string{} + sha1List := []string{} + filepath.Walk(path, func(p string, f os.FileInfo, err error) error { + if IsDirectory(p) { + return nil + } else { + fileList = append(fileList, p) + return nil + } + }) + for _, file := range fileList { + sha, err := FileHash(file) + if err == nil { + sha1List = append(sha1List, fmt.Sprintf("%s:%s", file, sha)) + } else { + return "", err + } + } + files := strings.Join(sha1List, "\n") + hasher := sha1.New() + hasher.Write([]byte(files)) + return fmt.Sprintf("%x", hasher.Sum(nil)), nil +} + +func FileHash(path string) (string, error) { + // Open the file for reading + file, err := os.Open(path) + if err != nil { + fmt.Println("Cannot find file:", os.Args[1]) + return "", err + } + + defer file.Close() + + // Get file info + info, err := file.Stat() + if err != nil { + fmt.Println("Cannot access file:", os.Args[1]) + return "", err + } + + // Get the filesize + filesize := info.Size() + + // Calculate the number of blocks + blocks := uint64(math.Ceil(float64(filesize) / float64(filechunk))) + + hash := sha1.New() + + // Check each block + for i := uint64(0); i < blocks; i++ { + // Calculate block size + blocksize := int(math.Min(filechunk, float64(filesize-int64(i*filechunk)))) + + // Make a buffer + buf := make([]byte, blocksize) + + // Make a buffer + file.Read(buf) + + // Write to the buffer + io.WriteString(hash, string(buf)) + } + + return fmt.Sprintf("%x", hash.Sum(nil)), nil +} diff --git a/common/path.go b/common/path.go new file mode 100644 index 0000000..dbc9286 --- /dev/null +++ b/common/path.go @@ -0,0 +1,21 @@ +package common + +import ( + "fmt" + "os" +) + +func IsDirectory(path string) bool { + fd, err := os.Stat(path) + if err != nil { + fmt.Println(err) + os.Exit(2) + } + switch mode := fd.Mode(); { + case mode.IsDir(): + return true + case mode.IsRegular(): + return false + } + return false +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..c829334 --- /dev/null +++ b/main.go @@ -0,0 +1,20 @@ +// Copyright © 2017 NAME HERE +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "github.com/deliveroo/paddle/cli" + +func main() { + cli.Execute() +}