Skip to content
This repository was archived by the owner on Aug 23, 2023. It is now read-only.

read from first chunks #994

Merged
merged 2 commits into from
Aug 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 17 additions & 24 deletions mdata/aggmetric.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ type AggMetric struct {
Chunks []*chunk.Chunk
aggregators []*Aggregator
dropFirstChunk bool
firstChunkT0 uint32
ttl uint32
lastSaveStart uint32 // last chunk T0 that was added to the write Queue.
lastSaveFinish uint32 // last chunk T0 successfully written to Cassandra.
lastWrite uint32
firstTs uint32
}

// NewAggMetric creates a metric with given key, it retains the given number of chunks each chunkSpan seconds long
Expand Down Expand Up @@ -268,27 +268,16 @@ func (a *AggMetric) Get(from, to uint32) (Result, error) {
return result, ErrNilChunk
}

// The first chunk is likely only a partial chunk. If we are not the primary node
// we should not serve data from this chunk, and should instead get the chunk from cassandra.
// if we are the primary node, then there is likely no data in Cassandra anyway.
if !cluster.Manager.IsPrimary() && oldestChunk.T0 == a.firstChunkT0 {
oldestPos++
if oldestPos >= len(a.Chunks) {
oldestPos = 0
}
oldestChunk = a.getChunk(oldestPos)
if oldestChunk == nil {
log.Error(3, "%s", ErrNilChunk)
return result, ErrNilChunk
}
}

if to <= oldestChunk.T0 {
// the requested time range ends before any data we have.
if LogLevel < 2 {
log.Debug("AM %s Get(): no data for requested range", a.Key)
}
result.Oldest = oldestChunk.T0
if oldestChunk.First {
result.Oldest = a.firstTs
} else {
result.Oldest = oldestChunk.T0
}
return result, nil
}

Expand Down Expand Up @@ -342,8 +331,13 @@ func (a *AggMetric) Get(from, to uint32) (Result, error) {
}
}

if oldestChunk.First {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems unnecessary that each chunk needs to have this First bool. Can you use the t0 and LastTs to be able to tell whether firstTs falls in this range? Something like a ContainsTimestamp function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's a good idea. I had been thinking for a while what was the simplest/lowest-overhead way to do this (ideally introducing only 1 new attribute instead of 2)

But note that the First field should have no overhead. it's a bool so fits in the space that was previously padded space (as we already had a bool on chunk.Chunk) so i still like it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's less about the space and more about the branching of chunk types, when from a Chunks perspective it is just a Chunk and has no concept of Firstness. Like, if Chunk was just a slice, it would be weird for the First member to live with the slice, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

funny, i also thought about this. but what I realized is that chunk.Chunk is already not a pure chunk, rather it is a "chunk as embedded in an AggMetric in metrictank", what i mean is it already has a bunch of fields for other uses within metrictank besides mere chunkness. so one more doesn't change anything IMHO.

that said at some point i want to refactor this to clarify this.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it doesn't bother you that's ok. But I'll point out that the other elements of the Chunk are actually properties relevant to a Chunks internals. First will be the first/only property that is actually about a state outside of Chunk, and that's what makes it stand out as awkward to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense. you're looking at it from a "field describes which state" perspective. I was looking at it "field is used for what/by what" perspective.
using the former perspective generally leads to cleaner code I think. And you're right, it looks a bit more awkward to me too now. But it's the most efficient solution so until we refactor I think it's OK.

result.Oldest = a.firstTs
} else {
result.Oldest = oldestChunk.T0
}

memToIterDuration.Value(time.Now().Sub(pre))
result.Oldest = oldestChunk.T0
return result, nil
}

Expand Down Expand Up @@ -483,12 +477,11 @@ func (a *AggMetric) add(ts uint32, val float64) {

if len(a.Chunks) == 0 {
chunkCreate.Inc()
// no data has been added to this metric at all.
a.Chunks = append(a.Chunks, chunk.New(t0))

// The first chunk is typically going to be a partial chunk
// so we keep a record of it.
a.firstChunkT0 = t0
// no data has been added to this AggMetric yet.
// note that we may not be aware of prior data that belongs into this chunk
// so we should track this cutoff point
a.Chunks = append(a.Chunks, chunk.NewFirst(t0))
a.firstTs = ts

if err := a.Chunks[0].Push(ts, val); err != nil {
panic(fmt.Sprintf("FATAL ERROR: this should never happen. Pushing initial value <%d,%f> to new chunk at pos 0 failed: %q", ts, val, err))
Expand Down
6 changes: 2 additions & 4 deletions mdata/aggmetric_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func (c *Checker) Verify(primary bool, from, to, first, last uint32) {
}
for pj = pi; c.points[pj].ts != last; pj++ {
}
c.t.Logf("verifying AggMetric.Get(%d,%d) =?= %d <= ts <= %d", from, to, first, last)
c.t.Logf("verifying AggMetric.Get(%d,%d) -> range is %d - %d ?", from, to, first, last)
index := pi - 1
for _, iter := range res.Iters {
for iter.Next() {
Expand Down Expand Up @@ -182,9 +182,7 @@ func TestAggMetric(t *testing.T) {
c.Add(200, 200)
c.Add(315, 315)
c.Verify(true, 100, 399, 101, 315)

// verify as secondary node. Data from the first chunk should not be returned.
c.Verify(false, 100, 399, 200, 315)
c.Verify(false, 100, 399, 101, 315)

// get subranges
c.Verify(true, 120, 299, 101, 200)
Expand Down
15 changes: 10 additions & 5 deletions mdata/chunk/chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,25 @@ type Chunk struct {
tsz.Series
LastTs uint32 // last TS seen, not computed or anything
NumPoints uint32
First bool
Closed bool
}

func New(t0 uint32) *Chunk {
return &Chunk{
Series: *tsz.New(t0),
LastTs: 0,
NumPoints: 0,
Closed: false,
Series: *tsz.New(t0),
}
}

func NewFirst(t0 uint32) *Chunk {
return &Chunk{
Series: *tsz.New(t0),
First: true,
}
}

func (c *Chunk) String() string {
return fmt.Sprintf("<chunk T0=%d, LastTs=%d, NumPoints=%d, Closed=%t>", c.T0, c.LastTs, c.NumPoints, c.Closed)
return fmt.Sprintf("<chunk T0=%d, LastTs=%d, NumPoints=%d, First=%t, Closed=%t>", c.T0, c.LastTs, c.NumPoints, c.First, c.Closed)

}
func (c *Chunk) Push(t uint32, v float64) error {
Expand Down
2 changes: 1 addition & 1 deletion mdata/result.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ import (
type Result struct {
Points []schema.Point
Iters []chunk.Iter
Oldest uint32
Oldest uint32 // timestamp of oldest point we have, to know when and when not we may need to query slower storage
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to know when and when not we may need

is a bit hard to parse. Maybe

to know whether we need

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

}