ipld · warpfork · Oct 14, 2021 · Oct 10, 2021 · Oct 10, 2021 · Oct 10, 2021
diff --git a/adl/rot13adl/example_test.go b/adl/rot13adl/example_test.go
@@ -14,7 +14,7 @@ import (
 	"github.com/ipld/go-ipld-prime/linking"
 	cidlink "github.com/ipld/go-ipld-prime/linking/cid"
 	"github.com/ipld/go-ipld-prime/must"
-	"github.com/ipld/go-ipld-prime/storage"
+	"github.com/ipld/go-ipld-prime/storage/memstore"
 )
 
 func ExampleReify_unmarshallingToADL() {
@@ -64,9 +64,9 @@ func ExampleReify_loadingToADL() {
 		MhLength: 4,
 	}}
 	linkSystem := cidlink.DefaultLinkSystem()
-	storage := &storage.Memory{}
-	linkSystem.StorageReadOpener = storage.OpenRead
-	linkSystem.StorageWriteOpener = storage.OpenWrite
+	storage := &memstore.Store{}
+	linkSystem.SetReadStorage(storage)
+	linkSystem.SetWriteStorage(storage)
 	linkSystem.NodeReifier = func(_ linking.LinkContext, nd datamodel.Node, _ *linking.LinkSystem) (datamodel.Node, error) {
 		return rot13adl.Reify(nd)
 	}

diff --git a/datamodel/link.go b/datamodel/link.go
@@ -34,6 +34,19 @@ type Link interface {
 	// There is no contract that requires that the string be able to be parsed back into a Link value,
 	// but the string should be unique (e.g. not elide any parts of the hash).
 	String() string
+
+	// Binary should return the densest possible encoding of the Link.
+	// The value need not be printable or human-readable;
+	// the golang string type is used for immutability and for ease of use as a map key.
+	// As with the String method, the returned value may not elide any parts of the hash.
+	//
+	// Note that there is still no contract that the returned value be able to be parsed back into a Link value;
+	// not even in the case of `lnk.Prototype().BuildLink(lnk.Binary()[:])`.
+	// This is because the value returned by this method may contain data that the LinkPrototype would also restate.
+	// (For a concrete example: if using CIDs, this method will return a binary string that includes
+	// the cid version indicator, the multicodec and multihash indicators, etc, in addition to the hash itself --
+	// whereas the LinkPrototype.BuildLink function still expects to receive only the hash itself alone.)
+	Binary() string
 }
 
 // LinkPrototype encapsulates any implementation details and parameters

diff --git a/linking/cid/cidLink.go b/linking/cid/cidLink.go
@@ -30,6 +30,9 @@ func (lnk Link) Prototype() datamodel.LinkPrototype {
 func (lnk Link) String() string {
 	return lnk.Cid.String()
 }
+func (lnk Link) Binary() string {
+	return lnk.Cid.KeyString()
+}
 
 type LinkPrototype struct {
 	cid.Prefix

diff --git a/linking/linkingExamples_test.go b/linking/linkingExamples_test.go
@@ -10,7 +10,7 @@ import (
 	"github.com/ipld/go-ipld-prime/linking"
 	cidlink "github.com/ipld/go-ipld-prime/linking/cid"
 	"github.com/ipld/go-ipld-prime/node/basicnode"
-	"github.com/ipld/go-ipld-prime/storage"
+	"github.com/ipld/go-ipld-prime/storage/memstore"
 )
 
 // storage is a map where we'll store serialized IPLD data.
@@ -20,7 +20,7 @@ import (
 //
 // In a real program, you'll probably make functions to load and store from disk,
 // or some network storage, or... whatever you want, really :)
-var store = storage.Memory{}
+var store = memstore.Store{}
 
 // TODO: These examples are really heavy on CIDs and the multicodec and multihash magic tables.
 // It would be good to have examples that create and use less magical LinkSystem constructions, too.
@@ -36,8 +36,8 @@ func ExampleLinkSystem_Store() {
 	// We want to store the serialized data somewhere.
 	//  We'll use an in-memory store for this.  (It's a package scoped variable.)
 	//  You can use any kind of storage system here;
-	//   you just need a function that conforms to the datamodel.BlockWriteOpener interface.
-	lsys.StorageWriteOpener = (&store).OpenWrite
+	//   or if you need even more control, you could also write a function that conforms to the linking.BlockWriteOpener interface.
+	lsys.SetWriteStorage(&store)
 
 	// To create any links, first we need a LinkPrototype.
 	// This gathers together any parameters that might be needed when making a link.
@@ -103,8 +103,8 @@ func ExampleLinkSystem_Load() {
 	//  We'll use an in-memory store for this.  (It's a package scoped variable.)
 	//   (This particular memory store was filled with the data we'll load earlier, during ExampleLinkSystem_Store.)
 	//  You can use any kind of storage system here;
-	//   you just need a function that conforms to the datamodel.BlockReadOpener interface.
-	lsys.StorageReadOpener = (&store).OpenRead
+	//   or if you need even more control, you could also write a function that conforms to the linking.BlockReadOpener interface.
+	lsys.SetReadStorage(&store)
 
 	// We'll need to decide what in-memory implementation of datamodel.Node we want to use.
 	//  Here, we'll use the "basicnode" implementation.  This is a good getting-started choice.

diff --git a/linking/setup.go b/linking/setup.go
@@ -0,0 +1,41 @@
+package linking
+
+import (
+	"io"
+
+	"github.com/ipld/go-ipld-prime/datamodel"
+	"github.com/ipld/go-ipld-prime/storage"
+)
+
+// SetReadStorage configures how the LinkSystem will look for information to load,
+// setting it to look at the given storage.ReadableStorage.
+//
+// This will overwrite the LinkSystem.StorageReadOpener field.
+//
+// This mechanism only supports setting exactly one ReadableStorage.
+// If you would like to make a more complex configuration
+// (for example, perhaps using information from a LinkContext to decide which storage area to use?)
+// then you should set LinkSystem.StorageReadOpener to a custom callback of your own creation instead.
+func (lsys *LinkSystem) SetReadStorage(store storage.ReadableStorage) {
+	lsys.StorageReadOpener = func(lctx LinkContext, lnk datamodel.Link) (io.Reader, error) {
+		return storage.GetStream(lctx.Ctx, store, lnk.Binary())
+	}
+}
+
+// SetWriteStorage configures how the LinkSystem will store information,
+// setting it to write into the given storage.WritableStorage.
+//
+// This will overwrite the LinkSystem.StorageWriteOpener field.
+//
+// This mechanism only supports setting exactly one WritableStorage.
+// If you would like to make a more complex configuration
+// (for example, perhaps using information from a LinkContext to decide which storage area to use?)
+// then you should set LinkSystem.StorageWriteOpener to a custom callback of your own creation instead.
+func (lsys *LinkSystem) SetWriteStorage(store storage.WritableStorage) {
+	lsys.StorageWriteOpener = func(lctx LinkContext) (io.Writer, BlockWriteCommitter, error) {
+		wr, wrcommit, err := storage.PutStream(lctx.Ctx, store)
+		return wr, func(lnk datamodel.Link) error {
+			return wrcommit(lnk.Binary())
+		}, err
+	}
+}
diff --git a/node/tests/schemaLinks.go b/node/tests/schemaLinks.go
@@ -15,13 +15,13 @@ import (
 	cidlink "github.com/ipld/go-ipld-prime/linking/cid"
 	"github.com/ipld/go-ipld-prime/node/basicnode"
 	"github.com/ipld/go-ipld-prime/schema"
-	"github.com/ipld/go-ipld-prime/storage"
+	"github.com/ipld/go-ipld-prime/storage/memstore"
 	"github.com/ipld/go-ipld-prime/traversal"
 	"github.com/ipld/go-ipld-prime/traversal/selector"
 	"github.com/ipld/go-ipld-prime/traversal/selector/builder"
 )
 
-var store = storage.Memory{}
+var store = memstore.Store{}
 
 func encode(n datamodel.Node) (datamodel.Node, datamodel.Link) {
 	lp := cidlink.LinkPrototype{cid.Prefix{
@@ -31,7 +31,7 @@ func encode(n datamodel.Node) (datamodel.Node, datamodel.Link) {
 		MhLength: 4,
 	}}
 	lsys := cidlink.DefaultLinkSystem()
-	lsys.StorageWriteOpener = (&store).OpenWrite
+	lsys.SetWriteStorage(&store)
 
 	lnk, err := lsys.Store(linking.LinkContext{}, lp, n)
 	if err != nil {
@@ -96,7 +96,7 @@ func SchemaTestLinks(t *testing.T, engine Engine) {
 
 		var order int
 		lsys := cidlink.DefaultLinkSystem()
-		lsys.StorageReadOpener = (&store).OpenRead
+		lsys.SetReadStorage(&store)
 		err = traversal.Progress{
 			Cfg: &traversal.Config{
 				LinkSystem: lsys,

diff --git a/storage/api.go b/storage/api.go
@@ -0,0 +1,103 @@
+package storage
+
+import (
+	"context"
+	"io"
+)
+
+// --- basics --->
+
+type ReadableStorage interface {
+	Get(ctx context.Context, key string) ([]byte, error)
+}
+
+type WritableStorage interface {
+	Put(ctx context.Context, key string, content []byte) error
+}
+
+// --- streaming --->
+
+type StreamingReadableStorage interface {
+	// Note that the returned io.Reader may also be an io.ReadCloser -- check for this.
+	GetStream(ctx context.Context, key string) (io.Reader, error)
+}
+
+// StreamingWritableStorage is a feature-detection interface that advertises support for streaming writes.
+// It is normal for APIs to use WritableStorage in their exported API surface,
+// and then internally check if that value implements StreamingWritableStorage if they wish to use streaming operations.
+//
+// Streaming writes can be preferable to the all-in-one style of writing of WritableStorage.Put,
+// because with streaming writes, the high water mark for memory usage can be kept lower.
+// On the other hand, streaming writes can incur slightly higher allocation counts,
+// which may cause some performance overhead when handling many small writes in sequence.
+//
+// The PutStream function returns three parameters: an io.Writer (as you'd expect), another function, and an error.
+// The function returned is called a "WriteCommitter".
+// The final error value is as usual: it will contain an error value if the write could not be begun.
+// ("WriteCommitter" will be refered to as such throughout the docs, but we don't give it a named type --
+// unfortunately, this is important, because we don't want to force implementers of storage systems to import this package just for a type name.)
+//
+// The WriteCommitter function should be called when you're done writing,
+// at which time you give it the key you want to commit the data as.
+// It will close and flush any streams, and commit the data to its final location under this key.
+// (If the io.Writer is also an io.WriteCloser, it is not necessary to call Close on it,
+// because using the WriteCommiter will do this for you.)
+//
+// Because these storage APIs are meant to work well for content-addressed systems,
+// the key argument is not provided at the start of the write -- it's provided at the end.
+// (This gives the opportunity to be computing a hash of the contents as they're written to the stream.)
+//
+// As a special case, giving a key of the zero string to the WriteCommiter will
+// instead close and remove any temp files, and store nothing.
+// An error may still be returned from the WriteCommitter if there is an error cleaning up
+// any temporary storage buffers that were created.
+//
+// Continuing to write to the io.Writer after calling the WriteCommitter function will result in errors.
+// Calling the WriteCommitter function more than once will result in errors.
+type StreamingWritableStorage interface {
+	PutStream(ctx context.Context) (io.Writer, func(key string) error, error)
+}
+
+// --- other specializations --->
+
+// VectorWritableStorage is an API for writing several slices of bytes at once into storage.
+// It's meant a feature-detection interface; not all storage implementations need to provide this feature.
+// This kind of API can be useful for maximizing performance in scenarios where
+// data is already loaded completely into memory, but scattered across several non-contiguous regions.
+type VectorWritableStorage interface {
+	PutVec(ctx context.Context, key string, blobVec [][]byte) error
+}
+
+// PeekableStorage is a feature-detection interface which a storage implementation can use to advertise
+// the ability to look at a piece of data, and return it in shared memory.
+// The PeekableStorage.Peek method is essentially the same as ReadableStorage.Get --
+// but by contrast, ReadableStorage is expected to return a safe copy.
+// PeekableStorage can be used when the caller knows they will not mutate the returned slice.
+//
+// An io.Closer is returned along with the byte slice.
+// The Close method on the Closer must be called when the caller is done with the byte slice;
+// otherwise, memory leaks may result.
+// (Implementers of this interface may be expecting to reuse the byte slice after Close is called.)
+//
+// Note that Peek does not imply that the caller can use the byte slice freely;
+// doing so may result in storage corruption or other undefined behavior.
+type PeekableStorage interface {
+	Peek(ctx context.Context, key string) ([]byte, io.Closer, error)
+}
+
+// the following are all hypothetical additional future interfaces (in varying degress of speculativeness):
+
+// FUTURE: an EnumerableStorage API, that lets you list all keys present?
+
+// FUTURE: a cleanup API (for getting rid of tmp files that might've been left behind on rough shutdown)?
+
+// FUTURE: a sync-forcing API?
+
+// FUTURE: a delete API?  sure.  (just document carefully what its consistency model is -- i.e. basically none.)
+//   (hunch: if you do want some sort of consistency model -- consider offering a whole family of methods that have some sort of generation or sequencing number on them.)
+
+// FUTURE: a force-overwrite API?  (not useful for a content-address system.  but maybe a gesture towards wider reusability is acceptable to have on offer.)
+
+// FUTURE: a size estimation API?  (unclear if we need to standardize this, but we could.  an offer, anyway.)
+
+// FUTURE: a GC API?  (dubious -- doing it well probably crosses logical domains, and should not be tied down here.)
diff --git a/storage/doc.go b/storage/doc.go
@@ -1,9 +1,49 @@
-// Storage contains some simple implementations for the
-// ipld.BlockReadOpener and ipld.BlockWriteOpener interfaces,
-// which are typically used by composition in a LinkSystem.
-//
-// These are provided as simple "batteries included" storage systems.
-// They are aimed at being quickly usable to build simple demonstrations.
-// For heavy usage (large datasets, with caching, etc) you'll probably
-// want to start looking for other libraries which go deeper on this subject.
+// The storage package contains interfaces for storage systems, and functions for using them.
+//
+// These are very low-level storage primitives.
+// The interfaces here deal only with raw keys and raw binary blob values.
+//
+// In IPLD, you can often avoid dealing with storage directly yourself,
+// and instead use linking.LinkSystem to handle serialization, hashing, and storage all at once.
+// You'll hand some values that match interfaces from this package to LinkSystem when configuring it.
+//
+// The most basic APIs are ReadableStorage and WritableStorage.
+// APIs should usually be designed around accepting ReadableStorage or WritableStorage as parameters
+// (depending on which direction of data flow the API is regarding),
+// and use the other interfaces (e.g. StreamingReadableStorage) thereafter internally for feature detection.
+// Similarly, implementers of storage systems should implement ReadableStorage or WritableStorage
+// before any other features.
+//
+// Storage systems as described by this package are allowed to make some interesting trades.
+// Generally, write operations are allowed to be first-write-wins.
+// Furthermore, there is no requirement that the system return an error if a subsequent write to the same key has different content.
+// These rules are reasonable for a content-addressed storage system, and allow great optimizitions to be made.
+//
+// If implementing a storage system, you should implement packages from this interface.
+// Beyond the basic two (described above), all the other interfaces are optional:
+// you can implement them if you want to advertise additional features,
+// or advertise fastpaths that your storage system supports;
+// but you don't have implement any of the additional interfaces if you don't want to.
+//
+// Note that all of the interfaces in this package only use types that are present in the golang standard library.
+// This is intentional, and was done very carefully.
+// If implementing a storage system, you should find it possible to do so *without* importing this package.
+// Because only standard library types are present in the interface contracts,
+// it's possible to implement types that align with the interfaces without refering to them.
+//
+// Note that where keys are discussed in this package, they use the golang string type --
+// however, they may be binary.  (The golang string type allows arbitrary bytes in general,
+// and here, we both use that, and explicitly disavow the usual "norm" that the string type implies UTF-8.
+// This is roughly the same as the practical truth that appears when using e.g. os.OpenFile and other similar functions.)
+// If you are creating a storage implementation where the underlying medium does not support arbitrary binary keys,
+// then it is strongly recommend that your storage implementation should support being configured with
+// an "escaping function", which should typically simply be of the form `func(string) string`.
+// Additional, your storage implementation's documentation should also clearly describe its internal limitations,
+// so that users have enough information to write an escaping function which
+// maps their domain into the domain your storage implementation can handle.
 package storage
+
+// also note:
+// LinkContext stays *out* of this package.  It's a chooser-related thing.
+// LinkSystem can think about it (and your callbacks over there can think about it), and that's the end of its road.
+// (Future: probably LinkSystem should have SetStorage and SetupStorageChooser methods for helping you set things up -- where the former doesn't discuss LinkContext at all.)
diff --git a/storage/dsadapter/README.md b/storage/dsadapter/README.md
@@ -0,0 +1,33 @@
+dsadapter
+=========
+
+The `dsadapter` package/module is a small piece of glue code to connect
+the `github.com/ipfs/go-datastore` package, and packages implementing its interfaces,
+forward into the `go-ipld-prime/storage` interfaces.
+
+For example, this can be used to use "flatfs" and other datastore plugins
+with go-ipld-prime storage APIs.
+
+Why structured like this?
+-------------------------
+
+Why are there layers of interface code?
+The `go-ipld-prime/storage` interfaces are a newer generation,
+and improves on several things vs `go-datastore`.  (See other docs for that.)
+
+Why is this code in a shared place?
+The glue code to connect `go-datastore` to the new `go-ipld-prime/storage` APIs
+is fairly minimal, but there's also no reason for anyone to write it twice,
+so we want to put it somewhere easy to share.
+
+Why does this code has its own go module?
+A separate module is used because it's important that go-ipld-prime can be used
+without forming a dependency on `go-datastore`.
+(We want this so that there's a reasonable deprecation pathway -- it must be
+possible to write new code that doesn't take on transitive dependencies to old code.)
+
+Why does this code exist here, in this git repo?
+We put this separate module in the same git repo as `go-ipld-prime`... because we can.
+Technically, neither this module nor the go-ipld-prime module depend on each other --
+they just have interfaces that are aligned with each other -- so it's very easy to
+hold them as separate go modules in the same repo, even though that can otherwise sometimes be tricky.