Skip to content

Commit

Permalink
Merge pull request #38 from jfontan/improvement/improve-index-update
Browse files Browse the repository at this point in the history
Improve siva index generation in ReadWriter
  • Loading branch information
mcuadros authored Oct 16, 2018
2 parents a31824b + fd4c675 commit 7da16ad
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 5 deletions.
117 changes: 114 additions & 3 deletions index.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,25 @@ func (i *Index) WriteTo(w io.Writer) error {
return nil
}

func (s Index) Len() int { return len(s) }
func (s Index) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
// Len implements sort.Interface.
func (s Index) Len() int { return len(s) }

// Swap implements sort.Interface.
func (s Index) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

// Less implements sort.Interface.
func (s Index) Less(i, j int) bool { return s[i].absStart < s[j].absStart }

// Filter returns a filtered version of the current Index removing duplicates
// keeping the latest versions and filtering all the deleted files
func (i *Index) Filter() Index {
index := i.filter()
sort.Sort(index)

return index
}

func (i *Index) filter() Index {
var f Index

seen := make(map[string]bool)
Expand All @@ -186,7 +198,6 @@ func (i *Index) Filter() Index {
f = append(f, e)
}

sort.Sort(f)
return f
}

Expand Down Expand Up @@ -235,6 +246,106 @@ func (i Index) Glob(pattern string) ([]*IndexEntry, error) {
return matches, nil
}

// OrderedIndex is a specialized index lexicographically ordered. It has
// methods to add or delete IndexEntries and maintain its order. Also has
// as faster Find method.
type OrderedIndex Index

// Pos gets the position of the file in the index or where it should be
// inserted if it's not already there.
func (o OrderedIndex) Pos(path string) int {
if len(o) == 0 {
return 0
}

pos := sort.Search(len(o), func(i int) bool {
return o[i].Name >= path
})

return pos
}

// Update adds or deletes an IndexEntry to the index depending on the
// FlagDeleted value.
func (o OrderedIndex) Update(e *IndexEntry) OrderedIndex {
if e == nil {
return o
}

if e.Flags&FlagDeleted == 0 {
return o.Add(e)
}

return o.Delete(e.Name)
}

// Add returns an updated index with the new IndexEntry.
func (o OrderedIndex) Add(e *IndexEntry) OrderedIndex {
if e == nil {
return o
}

if len(o) == 0 {
return OrderedIndex{e}
}

path := e.Name
pos := o.Pos(path)
if pos < len(o) && o[pos].Name == path {
o[pos] = e
return o
}

if pos == len(o) {
return append(o, e)
}

return append(o[:pos], append(Index{e}, o[pos:]...)...)
}

// Delete returns an updated index with the IndexEntry for the path deleted.
func (o OrderedIndex) Delete(path string) OrderedIndex {
if len(o) == 0 {
return o
}

pos := o.Pos(path)
if pos < len(o) && o[pos].Name != path {
return o
}

return append(o[:pos], o[pos+1:]...)
}

// Find returns the IndexEntry for a path or nil. This version is faster than
// Index.Find.
func (o OrderedIndex) Find(path string) *IndexEntry {
if len(o) == 0 {
return nil
}

pos := o.Pos(path)
if pos >= 0 && pos < len(o) && o[pos].Name == path {
return o[pos]
}

return nil
}

// Sort orders the index lexicographically.
func (o OrderedIndex) Sort() {
sort.Sort(o)
}

// Len implements sort.Interface.
func (s OrderedIndex) Len() int { return len(s) }

// Swap implements sort.Interface.
func (s OrderedIndex) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

// Less implements sort.Interface.
func (s OrderedIndex) Less(i, j int) bool { return s[i].Name < s[j].Name }

type IndexEntry struct {
Header
Start uint64
Expand Down
7 changes: 6 additions & 1 deletion readwriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,17 @@ func NewReaderWriter(rw io.ReadWriteSeeker) (*ReadWriter, error) {
}

w := newWriter(rw)
w.oIndex = OrderedIndex(i.filter())
w.oIndex.Sort()

getIndexFunc := func() (Index, error) {
for _, e := range w.index {
e.absStart = uint64(end) + e.Start
}
return append(i, w.index...), nil

return Index(w.oIndex), nil
}

r := newReaderWithIndex(rw, getIndexFunc)
return &ReadWriter{r, w}, nil
}
64 changes: 63 additions & 1 deletion readwriter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,15 @@ func (s *ReadWriterSuite) testWriteRead(c *C, f *os.File, iter int) {

index, err := rw.Index()
c.Assert(err, IsNil)
c.Assert(len(index), Equals, iters*iter+i+1)

// index after the first iteration will contain the total amount
// of files
num := i + 1
if iter > 0 {
num = iters
}

c.Assert(len(index), Equals, num)

e := index.Find(curName)
c.Assert(e, NotNil)
Expand Down Expand Up @@ -172,3 +180,57 @@ func (_ dummyReadWriterSeeker) Write(p []byte) (n int, err error) {
func (_ dummyReadWriterSeeker) Seek(offset int64, whence int) (n int64, err error) {
return
}

func (s *ReadWriterSuite) TestDelete(c *C) {
data := "data"

path := filepath.Join(s.tmpDir, c.TestName())
tmpFile, err := os.Create(path)
c.Assert(err, IsNil)
c.Assert(tmpFile, NotNil)

rw, err := siva.NewReaderWriter(tmpFile)
c.Assert(err, IsNil)

testSteps := []struct {
name string
del bool
files []string
}{
{"one", false, []string{"one"}},
{"two", false, []string{"one", "two"}},
{"three", false, []string{"one", "three", "two"}},
{"two", true, []string{"one", "three"}},
{"two", false, []string{"one", "three", "two"}},
{"four", true, []string{"one", "three", "two"}},
{"three", true, []string{"one", "two"}},
}

for _, t := range testSteps {
var flags siva.Flag
if t.del {
flags = siva.FlagDeleted
}

err := rw.WriteHeader(&siva.Header{
Name: t.name,
Flags: flags,
})
c.Assert(err, IsNil)

written, err := rw.Write([]byte(data))
c.Assert(err, IsNil)
c.Assert(written, Equals, len(data))

err = rw.Flush()
c.Assert(err, IsNil)

index, err := rw.Index()
c.Assert(err, IsNil)

c.Assert(len(index), Equals, len(t.files))
for i, name := range t.files {
c.Assert(index[i].Name, Equals, name)
}
}
}
3 changes: 3 additions & 0 deletions writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ type Writer interface {
type writer struct {
w *hashedWriter
index Index
oIndex OrderedIndex
current *IndexEntry
position uint64
closed bool
Expand Down Expand Up @@ -49,6 +50,8 @@ func (w *writer) WriteHeader(h *Header) error {
}

w.index = append(w.index, w.current)
w.oIndex = w.oIndex.Update(w.current)

return nil
}

Expand Down

0 comments on commit 7da16ad

Please sign in to comment.