Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/datastore/filedb/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ go_library(
srcs = [
"codec.go",
"filedb.go",
"filter.go",
"iterator.go",
],
importpath = "github.com/pipe-cd/pipecd/pkg/datastore/filedb",
visibility = ["//visibility:public"],
Expand Down
78 changes: 76 additions & 2 deletions pkg/datastore/filedb/filedb.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package filedb
import (
"context"
"fmt"
"path/filepath"

"go.uber.org/zap"

Expand Down Expand Up @@ -61,11 +62,80 @@ func (f *FileDB) fetch(ctx context.Context, path string) ([]byte, error) {
}

func (f *FileDB) Find(ctx context.Context, col datastore.Collection, opts datastore.ListOptions) (datastore.Iterator, error) {
_, ok := col.(datastore.ShardStorable)
scol, ok := col.(datastore.ShardStorable)
if !ok {
return nil, datastore.ErrUnsupported
}
return nil, datastore.ErrUnimplemented

var (
kind = col.Kind()
shards = scol.ListInUsedShards()
// Map of objects values with the first key is the object id.
objects map[string][][]byte
)

// Fetch the first part of all objects under a specified "directory".
for _, shard := range shards {
dpath := makeHotStorageDirPath(kind, shard)
parts, err := f.backend.List(ctx, dpath)
if err != nil {
f.logger.Error("failed to find entities",
zap.String("kind", kind),
zap.Error(err),
)
return nil, err
}

if objects == nil {
objects = make(map[string][][]byte, len(parts))
}
for _, obj := range parts {
id := filepath.Base(obj.Path)

data, err := f.fetch(ctx, obj.Path)
Copy link
Member

@nghialv nghialv Mar 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops. I was thinking that the List function also returns data of file objects so that we can have their contents without requiring any extra requests.
By this way, a lot of requests number_of_shards * (1 + number_of_objects) will be required in a short time and I think it will not be realistic.

Do we have any better idea for that problem?

Copy link
Member Author

@khanhtc1202 khanhtc1202 Mar 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel you, the (n+1) problem, right. Tbh, I was thinking as you at first but when I read the place we use this filestore List interface (ref: planpreview cleaner) I got the same surprise as you have now 😄

I tried to work around with the "list" support API from our supporting filestore (gcs, s3 and minio), and looks like they only support the get object by key as the only way to fetch the raw data of an object, all list API seems only returns the attributes and meta which we can use to fetch the content. I will investigate more about that, but in the worst case, I think we have 2 points to rely on which are:

  • the number of the object in hot storage of each kind is expected to be small enough, we may need to do some kind of middleware fetching objects parts in parallel here if necessary
  • cache storage in API layer will reduce the number of times we have to list this in a direct way.

Wdyt?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your explanation.

the number of the object in hot storage of each kind is expected to be all enough, we may need to do some kind of middleware fetching objects parts in parallel here if necessary

I'm afraid this way is not appropriate since we will make a burst of requests on external services so we could get the limit error.

cache storage in API layer will reduce the number of times we have to list this in a direct way.

Yes, that could be an approachable way. I think it is time to think about that before continuing implementation.
What cache solution do you think about now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nghialv thank you so much for your comment 🙏 I added logic to check whether the raw data of the object part is updated or not based on the etag value returned from List objects request. For now, we will only fetch for the object part stored under the given path in case there is no version of it found in filedb.cache. PTAL when you have time 🙌

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you. Let me take a look.

if err != nil {
f.logger.Error("failed to fetch entity part",
zap.String("kind", kind),
zap.String("id", id),
zap.Error(err),
)
return nil, err
}

objects[id] = append(objects[id], data)
}
}

entities := make([]interface{}, 0, len(objects))
for id, obj := range objects {
e := col.Factory()()
if err := decode(col, e, obj...); err != nil {
f.logger.Error("failed to build entity",
zap.String("kind", kind),
zap.String("id", id),
zap.Error(err),
)
return nil, err
}

pass, err := filter(col, e, opts.Filters)
if err != nil {
f.logger.Error("failed to filter entity",
zap.String("kind", kind),
zap.String("id", id),
zap.Error(err),
)
return nil, err
}

if pass {
entities = append(entities, e)
}
}

return &Iterator{
data: entities,
}, nil
}

func (f *FileDB) Get(ctx context.Context, col datastore.Collection, id string, v interface{}) error {
Expand Down Expand Up @@ -141,3 +211,7 @@ func makeHotStorageFilePath(kind, id string, shard datastore.Shard) string {
// TODO: Find a way to separate files by project to avoid fetch resources cross project.
return fmt.Sprintf("%s/%s/%s.json", kind, shard, id)
}

func makeHotStorageDirPath(kind string, shard datastore.Shard) string {
return fmt.Sprintf("%s/%s/", kind, shard)
}
33 changes: 33 additions & 0 deletions pkg/datastore/filedb/filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2022 The PipeCD Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package filedb

import (
"github.com/pipe-cd/pipecd/pkg/datastore"
)

// TODO: Implement filterable interface for each collection.
type filterable interface {
Match(e interface{}, filters []datastore.ListFilter) (bool, error)
}

func filter(col datastore.Collection, e interface{}, filters []datastore.ListFilter) (bool, error) {
fcol, ok := col.(filterable)
if !ok {
return false, datastore.ErrUnsupported
}

return fcol.Match(e, filters)
}
29 changes: 29 additions & 0 deletions pkg/datastore/filedb/iterator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright 2022 The PipeCD Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package filedb

import "github.com/pipe-cd/pipecd/pkg/datastore"

type Iterator struct {
data []interface{}
}

func (it *Iterator) Next(dst interface{}) error {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dst is unused in Next

return datastore.ErrUnimplemented
}

func (it *Iterator) Cursor() (string, error) {
return "", datastore.ErrUnimplemented
}