From 714f2debead06ce6213a790fffec4f2f0b0e1fd9 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Mon, 20 Jun 2016 17:06:04 -0400 Subject: [PATCH 1/2] Add Files API root as best-effort pin. Closes #2697. Closes #2698. License: MIT Signed-off-by: Kevin Atkinson --- core/commands/pin.go | 2 +- core/corerepo/gc.go | 25 +++++++++++++++++++++-- core/coreunix/add_test.go | 4 ++-- merkledag/merkledag.go | 10 ++++++--- merkledag/merkledag_test.go | 4 ++-- pin/gc/gc.go | 20 +++++++++++------- test/sharness/t0252-files-gc.sh | 36 +++++++++++++++++++++++++++++++++ 7 files changed, 84 insertions(+), 17 deletions(-) create mode 100755 test/sharness/t0252-files-gc.sh diff --git a/core/commands/pin.go b/core/commands/pin.go index 934c9a39dcb..0842b35cae1 100644 --- a/core/commands/pin.go +++ b/core/commands/pin.go @@ -330,7 +330,7 @@ func pinLsAll(typeStr string, ctx context.Context, n *core.IpfsNode) (map[string if err != nil { return nil, err } - err = dag.EnumerateChildren(n.Context(), n.DAG, nd, ks) + err = dag.EnumerateChildren(n.Context(), n.DAG, nd, ks, false) if err != nil { return nil, err } diff --git a/core/corerepo/gc.go b/core/corerepo/gc.go index 621b44d3936..9333bfa2638 100644 --- a/core/corerepo/gc.go +++ b/core/corerepo/gc.go @@ -6,6 +6,7 @@ import ( key "github.com/ipfs/go-ipfs/blocks/key" "github.com/ipfs/go-ipfs/core" + mfs "github.com/ipfs/go-ipfs/mfs" gc "github.com/ipfs/go-ipfs/pin/gc" repo "github.com/ipfs/go-ipfs/repo" humanize "gx/ipfs/QmPSBJL4momYnE7DcUyk2DVhD6rH488ZmHBGLbxNdhU44K/go-humanize" @@ -71,10 +72,26 @@ func NewGC(n *core.IpfsNode) (*GC, error) { }, nil } +func BestEffortRoots(filesRoot *mfs.Root) ([]key.Key, error) { + rootDag, err := filesRoot.GetValue().GetNode() + if err != nil { + return nil, err + } + rootKey, err := rootDag.Key() + if err != nil { + return nil, err + } + return []key.Key{rootKey}, nil +} + func GarbageCollect(n *core.IpfsNode, ctx context.Context) error { ctx, cancel := context.WithCancel(ctx) defer cancel() // in case error occurs during operation - rmed, err := gc.GC(ctx, n.Blockstore, n.Pinning) + roots, err := BestEffortRoots(n.FilesRoot) + if err != nil { + return err + } + rmed, err := gc.GC(ctx, n.Blockstore, n.Pinning, roots) if err != nil { return err } @@ -93,7 +110,11 @@ func GarbageCollect(n *core.IpfsNode, ctx context.Context) error { } func GarbageCollectAsync(n *core.IpfsNode, ctx context.Context) (<-chan *KeyRemoved, error) { - rmed, err := gc.GC(ctx, n.Blockstore, n.Pinning) + roots, err := BestEffortRoots(n.FilesRoot) + if err != nil { + return nil, err + } + rmed, err := gc.GC(ctx, n.Blockstore, n.Pinning, roots) if err != nil { return nil, err } diff --git a/core/coreunix/add_test.go b/core/coreunix/add_test.go index 1663e0388d5..7a43b634903 100644 --- a/core/coreunix/add_test.go +++ b/core/coreunix/add_test.go @@ -96,7 +96,7 @@ func TestAddGCLive(t *testing.T) { gcstarted := make(chan struct{}) go func() { defer close(gcstarted) - gcchan, err := gc.GC(context.Background(), node.Blockstore, node.Pinning) + gcchan, err := gc.GC(context.Background(), node.Blockstore, node.Pinning, nil) if err != nil { log.Error("GC ERROR:", err) errs <- err @@ -155,7 +155,7 @@ func TestAddGCLive(t *testing.T) { t.Fatal(err) } - err = dag.EnumerateChildren(ctx, node.DAG, root, key.NewKeySet()) + err = dag.EnumerateChildren(ctx, node.DAG, root, key.NewKeySet(), false) if err != nil { t.Fatal(err) } diff --git a/merkledag/merkledag.go b/merkledag/merkledag.go index 6792e3e51c5..4938e7bab11 100644 --- a/merkledag/merkledag.go +++ b/merkledag/merkledag.go @@ -357,16 +357,20 @@ func (t *Batch) Commit() error { // EnumerateChildren will walk the dag below the given root node and add all // unseen children to the passed in set. // TODO: parallelize to avoid disk latency perf hits? -func EnumerateChildren(ctx context.Context, ds DAGService, root *Node, set key.KeySet) error { +func EnumerateChildren(ctx context.Context, ds DAGService, root *Node, set key.KeySet, bestEffort bool) error { for _, lnk := range root.Links { k := key.Key(lnk.Hash) if !set.Has(k) { set.Add(k) child, err := ds.Get(ctx, k) if err != nil { - return err + if bestEffort && err == ErrNotFound { + continue + } else { + return err + } } - err = EnumerateChildren(ctx, ds, child, set) + err = EnumerateChildren(ctx, ds, child, set, bestEffort) if err != nil { return err } diff --git a/merkledag/merkledag_test.go b/merkledag/merkledag_test.go index 644d4e2d5ee..79b7399b57f 100644 --- a/merkledag/merkledag_test.go +++ b/merkledag/merkledag_test.go @@ -292,7 +292,7 @@ func TestFetchGraph(t *testing.T) { offline_ds := NewDAGService(bs) ks := key.NewKeySet() - err = EnumerateChildren(context.Background(), offline_ds, root, ks) + err = EnumerateChildren(context.Background(), offline_ds, root, ks, false) if err != nil { t.Fatal(err) } @@ -309,7 +309,7 @@ func TestEnumerateChildren(t *testing.T) { } ks := key.NewKeySet() - err = EnumerateChildren(context.Background(), ds, root, ks) + err = EnumerateChildren(context.Background(), ds, root, ks, false) if err != nil { t.Fatal(err) } diff --git a/pin/gc/gc.go b/pin/gc/gc.go index 1a043c81778..34906fffb63 100644 --- a/pin/gc/gc.go +++ b/pin/gc/gc.go @@ -17,18 +17,19 @@ var log = logging.Logger("gc") // GC performs a mark and sweep garbage collection of the blocks in the blockstore // first, it creates a 'marked' set and adds to it the following: // - all recursively pinned blocks, plus all of their descendants (recursively) +// - bestEffortRoots, plus all of its descendants (recursively) // - all directly pinned blocks // - all blocks utilized internally by the pinner // // The routine then iterates over every block in the blockstore and // deletes any block that is not found in the marked set. -func GC(ctx context.Context, bs bstore.GCBlockstore, pn pin.Pinner) (<-chan key.Key, error) { +func GC(ctx context.Context, bs bstore.GCBlockstore, pn pin.Pinner, bestEffortRoots []key.Key) (<-chan key.Key, error) { unlocker := bs.GCLock() bsrv := bserv.New(bs, offline.Exchange(bs)) ds := dag.NewDAGService(bsrv) - gcs, err := ColoredSet(ctx, pn, ds) + gcs, err := ColoredSet(ctx, pn, ds, bestEffortRoots) if err != nil { return nil, err } @@ -69,7 +70,7 @@ func GC(ctx context.Context, bs bstore.GCBlockstore, pn pin.Pinner) (<-chan key. return output, nil } -func Descendants(ctx context.Context, ds dag.DAGService, set key.KeySet, roots []key.Key) error { +func Descendants(ctx context.Context, ds dag.DAGService, set key.KeySet, roots []key.Key, bestEffort bool) error { for _, k := range roots { set.Add(k) nd, err := ds.Get(ctx, k) @@ -78,7 +79,7 @@ func Descendants(ctx context.Context, ds dag.DAGService, set key.KeySet, roots [ } // EnumerateChildren recursively walks the dag and adds the keys to the given set - err = dag.EnumerateChildren(ctx, ds, nd, set) + err = dag.EnumerateChildren(ctx, ds, nd, set, bestEffort) if err != nil { return err } @@ -87,11 +88,16 @@ func Descendants(ctx context.Context, ds dag.DAGService, set key.KeySet, roots [ return nil } -func ColoredSet(ctx context.Context, pn pin.Pinner, ds dag.DAGService) (key.KeySet, error) { +func ColoredSet(ctx context.Context, pn pin.Pinner, ds dag.DAGService, bestEffortRoots []key.Key) (key.KeySet, error) { // KeySet currently implemented in memory, in the future, may be bloom filter or // disk backed to conserve memory. gcs := key.NewKeySet() - err := Descendants(ctx, ds, gcs, pn.RecursiveKeys()) + err := Descendants(ctx, ds, gcs, pn.RecursiveKeys(), false) + if err != nil { + return nil, err + } + + err = Descendants(ctx, ds, gcs, bestEffortRoots, true) if err != nil { return nil, err } @@ -100,7 +106,7 @@ func ColoredSet(ctx context.Context, pn pin.Pinner, ds dag.DAGService) (key.KeyS gcs.Add(k) } - err = Descendants(ctx, ds, gcs, pn.InternalPins()) + err = Descendants(ctx, ds, gcs, pn.InternalPins(), false) if err != nil { return nil, err } diff --git a/test/sharness/t0252-files-gc.sh b/test/sharness/t0252-files-gc.sh new file mode 100755 index 00000000000..416b020fddf --- /dev/null +++ b/test/sharness/t0252-files-gc.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# +# Copyright (c) 2016 Jeromy Johnson +# MIT Licensed; see the LICENSE file in this repository. +# + +test_description="test how the unix files api interacts with the gc" + +. lib/test-lib.sh + +test_init_ipfs + +test_expect_success "object not removed after gc" ' + echo "hello world" | ipfs files write --create /hello.txt && + ipfs repo gc && + ipfs cat QmVib14uvPnCP73XaCDpwugRuwfTsVbGyWbatHAmLSdZUS +' + +test_expect_success "gc okay after adding incomplete node -- prep" ' + ipfs files mkdir /adir && + echo "file1" | ipfs files write --create /adir/file1 && + echo "file2" | ipfs files write --create /adir/file2 && + ipfs pin add --recursive=false QmbCgoMYVuZq8m1vK31JQx9DorwQdLMF1M3sJ7kygLLqnW && + ipfs files rm -r /adir && + ipfs repo gc && # will remove /adir/file1 and /adir/file2 but not /adir + ipfs files cp /ipfs/QmbCgoMYVuZq8m1vK31JQx9DorwQdLMF1M3sJ7kygLLqnW /adir && + ipfs pin rm QmbCgoMYVuZq8m1vK31JQx9DorwQdLMF1M3sJ7kygLLqnW +' + +test_expect_success "gc okay after adding incomplete node" ' + ipfs refs QmbCgoMYVuZq8m1vK31JQx9DorwQdLMF1M3sJ7kygLLqnW && + ipfs repo gc && + ipfs refs QmbCgoMYVuZq8m1vK31JQx9DorwQdLMF1M3sJ7kygLLqnW +' + +test_done From 466d70993fdb7c4043f243fed7a95d6b9e938ee5 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Mon, 20 Jun 2016 21:11:17 -0400 Subject: [PATCH 2/2] Enhance tests and fix tests copyright for files API root best-effort pin. License: MIT Signed-off-by: --- test/sharness/t0252-files-gc.sh | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/test/sharness/t0252-files-gc.sh b/test/sharness/t0252-files-gc.sh index 416b020fddf..68c720e1a80 100755 --- a/test/sharness/t0252-files-gc.sh +++ b/test/sharness/t0252-files-gc.sh @@ -1,6 +1,6 @@ #!/bin/sh # -# Copyright (c) 2016 Jeromy Johnson +# Copyright (c) 2016 Kevin Atkinson # MIT Licensed; see the LICENSE file in this repository. # @@ -16,21 +16,30 @@ test_expect_success "object not removed after gc" ' ipfs cat QmVib14uvPnCP73XaCDpwugRuwfTsVbGyWbatHAmLSdZUS ' +test_expect_success "/hello.txt still accessible after gc" ' + ipfs files read /hello.txt +' + +ADIR_HASH=QmbCgoMYVuZq8m1vK31JQx9DorwQdLMF1M3sJ7kygLLqnW +FILE1_HASH=QmX4eaSJz39mNhdu5ACUwTDpyA6y24HmrQNnAape6u3buS + test_expect_success "gc okay after adding incomplete node -- prep" ' ipfs files mkdir /adir && echo "file1" | ipfs files write --create /adir/file1 && echo "file2" | ipfs files write --create /adir/file2 && - ipfs pin add --recursive=false QmbCgoMYVuZq8m1vK31JQx9DorwQdLMF1M3sJ7kygLLqnW && + ipfs cat $FILE1_HASH && + ipfs pin add --recursive=false $ADIR_HASH && ipfs files rm -r /adir && ipfs repo gc && # will remove /adir/file1 and /adir/file2 but not /adir - ipfs files cp /ipfs/QmbCgoMYVuZq8m1vK31JQx9DorwQdLMF1M3sJ7kygLLqnW /adir && - ipfs pin rm QmbCgoMYVuZq8m1vK31JQx9DorwQdLMF1M3sJ7kygLLqnW + test_must_fail ipfs cat $FILE1_HASH && + ipfs files cp /ipfs/$ADIR_HASH /adir && + ipfs pin rm $ADIR_HASH ' test_expect_success "gc okay after adding incomplete node" ' - ipfs refs QmbCgoMYVuZq8m1vK31JQx9DorwQdLMF1M3sJ7kygLLqnW && + ipfs refs $ADIR_HASH && ipfs repo gc && - ipfs refs QmbCgoMYVuZq8m1vK31JQx9DorwQdLMF1M3sJ7kygLLqnW + ipfs refs $ADIR_HASH ' test_done