-
Notifications
You must be signed in to change notification settings - Fork 3
/
partition_index.go
25 lines (23 loc) · 2.51 KB
/
partition_index.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
package sif
// A PartitionIndex is an index for Partitions, useful for shuffling, sorting and/or reducing.
// An implementation of PartitionIndex permits the indexing of Partitions as well as individual rows,
// and provides a PartitionIterator/SerializedPartitionIterator to iterate over the indexed partitions
// in a particular order unique to the implementation (e.g. sorted order for an index which sorts Rows).
// Leverages an underlying PartitionCache for Partition storage, rather than storing Partition data itself.
type PartitionIndex interface {
SetMaxRows(maxRows int) // Change the maxRows for future partitions created by this index
GetNextStageSchema() Schema // Returns the Schema for the Stage which will *read* from this index
MergePartition(part ReduceablePartition, reducefn ReductionOperation) error // Merges all the Rows within a keyed Partition into this PartitionIndex. reducefn may be nil, indicating that reduction is not intended.
MergeRow(tempRow Row, row Row, reducefn ReductionOperation) error // Merges a keyed Row of data into the PartitionIndex, possibly appending it to an existing/new Partition, or combining it with an existing Row. reducefn may be nil, indicating that reduction is not intended.
GetPartitionIterator(destructive bool) PartitionIterator // Returns the PartitionIterator for this PartitionIndex. Must always return the same PartitionIterator, even if called multiple times.
GetSerializedPartitionIterator(destructive bool) SerializedPartitionIterator // Returns a SerializedPartitionIterator for this PartitionIndex. Must always return the same SerializedPartitionIterator, even if called multiple times.
NumPartitions() uint64 // Returns the number of Partitions in this PartitionIndex
CacheSize() int // Returns the in-memory size (in Partitions) of the underlying PartitionCache
ResizeCache(frac float64) bool // Resizes the underlying PartitionCache
Destroy() // Destroys the index
}
// A BucketedPartitionIndex is a PartitionIndex divided into buckets, which are indexed by uint64s
type BucketedPartitionIndex interface {
PartitionIndex
GetBucket(bucket uint64) PartitionIndex // return the PartitionIndex associated with the given bucket
}