forked from apache/beam
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[apache#30083] Add synthetic processing time to prism. (apache#30492)
* [prism] Add basic processing time queue. * Initial residual handling refactor. * Re-work teststream initilization. Remove pending element race. * touch up * rm merge duplicate * Simplify watermark hold tracking. * First successful run! * Remove duplicated test run. * Deduplicate processing time heap. * rm debug text * Remove some debug prints, cleanup. * tiny todo cleanup * ProcessingTime workming most of the time! * Some cleanup * try to get github suite to pass #1 * touch * reduce counts a bit, filter tests some. * Clean up unrelated state changes. Clean up comments somewhat. * Filter out dataflow incompatible test. * Refine processing time event comment. * Remove test touch. --------- Co-authored-by: lostluck <[email protected]>
- Loading branch information
Showing
15 changed files
with
1,206 additions
and
78 deletions.
There are no files selected for viewing
284 changes: 247 additions & 37 deletions
284
sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager.go
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
96 changes: 96 additions & 0 deletions
96
sdks/go/pkg/beam/runners/prism/internal/engine/processingtime.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one or more | ||
// contributor license agreements. See the NOTICE file distributed with | ||
// this work for additional information regarding copyright ownership. | ||
// The ASF licenses this file to You under the Apache License, Version 2.0 | ||
// (the "License"); you may not use this file except in compliance with | ||
// the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
package engine | ||
|
||
import ( | ||
"container/heap" | ||
|
||
"github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" | ||
) | ||
|
||
// Notes on Processing Time handling: | ||
// | ||
// ProcessingTime events (processingTime timers, process continuations, triggers) necessarily need to operate on a global queue. | ||
// However, PT timers are per key+family+tag, and may be overwritten by subsequent elements. | ||
// So, similarly to event time timers, we need to manage a "last set" queue, and to manage the holds. | ||
// This implies they should probably be handled by state, instead of globally. | ||
// In reality, it's probably going to be "both", a global PT event queue, and per stage state. | ||
// | ||
// In principle, timers would be how to implement the related features, so getting those right will simplify their handling. | ||
// Test stream is already central, but doesn't set events, it controls their execution. | ||
// | ||
// The ElementManager doesn't retain any data itself, so it should not hold material data about what is being triggered. | ||
// The ElementManager should only contain which stage state should be triggered when in a time domain. | ||
// | ||
// ProcessContinuations count as pending events, and must be drained accordingly before time expires. | ||
// | ||
// A stage may trigger on multiple ticks. | ||
// It's up to a stage to schedule additional work on those notices. | ||
|
||
// stageRefreshQueue manages ProcessingTime events, in particular, which stages need notification | ||
// at which points in processing time they occur. It doesn't handle the interface between | ||
// walltime or any synthetic notions of time. | ||
// | ||
// stageRefreshQueue is not goroutine safe and relies on external synchronization. | ||
type stageRefreshQueue struct { | ||
events map[mtime.Time]set[string] | ||
order mtimeHeap | ||
} | ||
|
||
// newStageRefreshQueue creates an initialized stageRefreshQueue. | ||
func newStageRefreshQueue() *stageRefreshQueue { | ||
return &stageRefreshQueue{ | ||
events: map[mtime.Time]set[string]{}, | ||
} | ||
} | ||
|
||
// Schedule a stage event at the given time. | ||
func (q *stageRefreshQueue) Schedule(t mtime.Time, stageID string) { | ||
if s, ok := q.events[t]; ok { | ||
// We already have a trigger at this time, mutate that instead. | ||
if s.present(stageID) { | ||
// We already notify this stage at this time, no action required. | ||
return | ||
} | ||
s.insert(stageID) | ||
return | ||
} | ||
q.events[t] = set[string]{stageID: struct{}{}} | ||
heap.Push(&q.order, t) | ||
} | ||
|
||
// Peek returns the minimum time in the queue and whether it is valid. | ||
// If there are no times left in the queue, the boolean will be false. | ||
func (q *stageRefreshQueue) Peek() (mtime.Time, bool) { | ||
if len(q.order) == 0 { | ||
return mtime.MaxTimestamp, false | ||
} | ||
return q.order[0], true | ||
} | ||
|
||
// AdvanceTo takes in the current now time, and returns the set of ids that need a refresh. | ||
func (q *stageRefreshQueue) AdvanceTo(now mtime.Time) set[string] { | ||
notify := set[string]{} | ||
for { | ||
// If there are no elements, then we're done. | ||
if len(q.order) == 0 || q.order[0] > now { | ||
return notify | ||
} | ||
// pop elements off the queue until the next time is later than now. | ||
next := heap.Pop(&q.order).(mtime.Time) | ||
notify.merge(q.events[next]) | ||
delete(q.events, next) | ||
} | ||
} |
139 changes: 139 additions & 0 deletions
139
sdks/go/pkg/beam/runners/prism/internal/engine/processingtime_test.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one or more | ||
// contributor license agreements. See the NOTICE file distributed with | ||
// this work for additional information regarding copyright ownership. | ||
// The ASF licenses this file to You under the Apache License, Version 2.0 | ||
// (the "License"); you may not use this file except in compliance with | ||
// the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
package engine | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" | ||
"github.com/google/go-cmp/cmp" | ||
) | ||
|
||
func TestProcessingTimeQueue(t *testing.T) { | ||
t.Run("empty", func(t *testing.T) { | ||
q := newStageRefreshQueue() | ||
emptyTime, ok := q.Peek() | ||
if ok != false { | ||
t.Errorf("q.Peek() on empty queue should have returned false") | ||
} | ||
if got, want := emptyTime, mtime.MaxTimestamp; got != want { | ||
t.Errorf("q.Peek() on empty queue returned %v, want %v", got, want) | ||
} | ||
|
||
tests := []mtime.Time{ | ||
mtime.MinTimestamp, | ||
-273, | ||
0, | ||
42, | ||
mtime.EndOfGlobalWindowTime, | ||
mtime.MaxTimestamp, | ||
} | ||
for _, test := range tests { | ||
if got, want := q.AdvanceTo(test), (set[string]{}); len(got) > 0 { | ||
t.Errorf("q.AdvanceTo(%v) on empty queue returned %v, want %v", test, got, want) | ||
} | ||
} | ||
}) | ||
t.Run("scheduled", func(t *testing.T) { | ||
type event struct { | ||
t mtime.Time | ||
stage string | ||
} | ||
|
||
s := func(ids ...string) set[string] { | ||
ret := set[string]{} | ||
for _, id := range ids { | ||
ret.insert(id) | ||
} | ||
return ret | ||
} | ||
|
||
tests := []struct { | ||
name string | ||
events []event | ||
|
||
minTime mtime.Time | ||
|
||
advanceTime mtime.Time | ||
want set[string] | ||
}{ | ||
{ | ||
"singleBefore", | ||
[]event{{1, "test1"}}, | ||
1, | ||
0, | ||
s(), | ||
}, { | ||
"singleAt", | ||
[]event{{1, "test1"}}, | ||
1, | ||
1, | ||
s("test1"), | ||
}, { | ||
"singleAfter", | ||
[]event{{1, "test1"}}, | ||
1, | ||
2, | ||
s("test1"), | ||
}, { | ||
"trioDistinct", | ||
[]event{{1, "test1"}, {2, "test2"}, {3, "test3"}}, | ||
1, | ||
2, | ||
s("test1", "test2"), | ||
}, { | ||
"trioDistinctReversed", | ||
[]event{{3, "test3"}, {2, "test2"}, {1, "test1"}}, | ||
1, | ||
2, | ||
s("test1", "test2"), | ||
}, { | ||
"trioDistinctTimeSameId", | ||
[]event{{3, "test"}, {2, "test"}, {1, "test"}}, | ||
1, | ||
2, | ||
s("test"), | ||
}, { | ||
"trioOneTime", | ||
[]event{{1, "test3"}, {1, "test2"}, {1, "test1"}}, | ||
1, | ||
1, | ||
s("test1", "test2", "test3"), | ||
}, { | ||
"trioDuplicates", | ||
[]event{{1, "test"}, {1, "test"}, {1, "test"}}, | ||
1, | ||
1, | ||
s("test", "test", "test"), | ||
}, | ||
} | ||
|
||
for _, test := range tests { | ||
t.Run(test.name, func(t *testing.T) { | ||
q := newStageRefreshQueue() | ||
for _, e := range test.events { | ||
q.Schedule(e.t, e.stage) | ||
} | ||
if got, _ := q.Peek(); got != test.minTime { | ||
t.Errorf("q.Peek() = %v, want %v", got, test.minTime) | ||
} | ||
|
||
if got, want := q.AdvanceTo(test.advanceTime), test.want; !cmp.Equal(got, want) { | ||
t.Errorf("q.AdvanceTo(%v) = %v, want %v", test.advanceTime, got, want) | ||
} | ||
}) | ||
} | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.