-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Add a sema to bound the number of inflight vexec queries when getting workflows #8353
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ package workflow | |
| import ( | ||
| "context" | ||
| "errors" | ||
| "flag" | ||
| "fmt" | ||
| "sort" | ||
| "strings" | ||
|
|
@@ -29,6 +30,7 @@ import ( | |
| "k8s.io/apimachinery/pkg/util/sets" | ||
|
|
||
| "vitess.io/vitess/go/sqltypes" | ||
| "vitess.io/vitess/go/sync2" | ||
| "vitess.io/vitess/go/trace" | ||
| "vitess.io/vitess/go/vt/concurrency" | ||
| "vitess.io/vitess/go/vt/key" | ||
|
|
@@ -39,11 +41,17 @@ import ( | |
| "vitess.io/vitess/go/vt/vttablet/tmclient" | ||
|
|
||
| binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" | ||
| querypb "vitess.io/vitess/go/vt/proto/query" | ||
| topodatapb "vitess.io/vitess/go/vt/proto/topodata" | ||
| vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata" | ||
| "vitess.io/vitess/go/vt/proto/vttime" | ||
| ) | ||
|
|
||
| var ( | ||
| vExecPoolSize = flag.Uint("workflow_server_vexec_pool_size", 1000, "maximum number of concurrent vexec queries to allow") | ||
| vExecPoolTimeout = flag.Duration("workflow_server_vexec_pool_default_timeout", time.Millisecond*50, "default timeout to wait acquiring a connection from the vexec pool. zero implies no timeout") | ||
| ) | ||
|
|
||
| var ( | ||
| // ErrInvalidWorkflow is a catchall error type for conditions that should be | ||
| // impossible when operating on a workflow. | ||
|
|
@@ -56,6 +64,10 @@ var ( | |
| // target keyspaces across different shard primaries. This should be | ||
| // impossible. | ||
| ErrMultipleTargetKeyspaces = errors.New("multiple target keyspaces for a single workflow") | ||
|
|
||
| // ErrVExecConnTimeout is returned if a call to acquire a server's vexecPool | ||
| // hits a deadline exceeded | ||
| ErrVExecConnTimeout = errors.New("timeout exceeded getting vexec conn") | ||
| ) | ||
|
|
||
| // Server provides an API to work with Vitess workflows, like vreplication | ||
|
|
@@ -66,16 +78,18 @@ var ( | |
| // actions on vreplication workflows, and schema migration workflows entirely, | ||
| // are not yet supported, but planned. | ||
| type Server struct { | ||
| ts *topo.Server | ||
| tmc tmclient.TabletManagerClient | ||
| ts *topo.Server | ||
| tmc tmclient.TabletManagerClient | ||
| vexecPool *sync2.Semaphore | ||
| } | ||
|
|
||
| // NewServer returns a new server instance with the given topo.Server and | ||
| // TabletManagerClient. | ||
| func NewServer(ts *topo.Server, tmc tmclient.TabletManagerClient) *Server { | ||
| return &Server{ | ||
| ts: ts, | ||
| tmc: tmc, | ||
| ts: ts, | ||
| tmc: tmc, | ||
| vexecPool: sync2.NewSemaphore(int(*vExecPoolSize), *vExecPoolTimeout), | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -92,6 +106,8 @@ func (s *Server) CheckReshardingJournalExistsOnTablet(ctx context.Context, table | |
| exists bool | ||
| ) | ||
|
|
||
| // Note we do not use the s.vexecPool semaphore here, to maintain consistent | ||
| // behavior with the existing wrangler APIs this function was migrated from. | ||
| query := fmt.Sprintf("select val from _vt.resharding_journal where id=%v", migrationID) | ||
| p3qr, err := s.tmc.VReplicationExec(ctx, tablet, query) | ||
| if err != nil { | ||
|
|
@@ -297,7 +313,13 @@ func (s *Server) GetWorkflows(ctx context.Context, req *vtctldatapb.GetWorkflows | |
| ) | ||
|
|
||
| vx := vexec.NewVExec(req.Keyspace, "", s.ts, s.tmc) | ||
|
|
||
| if !s.vexecPool.AcquireContext(ctx) { | ||
| return nil, ErrVExecConnTimeout | ||
| } | ||
|
Comment on lines
315
to
+319
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tioli: and then moving other vexec endpoints over to it is a tiiiiiiny bit less effort; also you don't have to remember to manage locking around each query |
||
| results, err := vx.QueryContext(ctx, query) | ||
| s.vexecPool.Release() | ||
|
|
||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
@@ -522,7 +544,18 @@ ORDER BY | |
| span.Annotate("keyspace", req.Keyspace) | ||
| span.Annotate("workflow", workflow.Name) | ||
|
|
||
| results, err := vx.WithWorkflow(workflow.Name).QueryContext(ctx, vrepLogQuery) | ||
| var ( | ||
| results map[*topo.TabletInfo]*querypb.QueryResult | ||
| err error | ||
| ) | ||
|
|
||
| if !s.vexecPool.AcquireContext(ctx) { | ||
| err = ErrVExecConnTimeout | ||
| } else { | ||
| results, err = vx.WithWorkflow(workflow.Name).QueryContext(ctx, vrepLogQuery) | ||
| s.vexecPool.Release() | ||
| } | ||
|
|
||
| if err != nil { | ||
| // Note that we do not return here. If there are any query results | ||
| // in the map (i.e. some tablets returned successfully), we will | ||
|
|
@@ -708,7 +741,14 @@ func (s *Server) getWorkflowCopyStates(ctx context.Context, tablet *topo.TabletI | |
| span.Annotate("vrepl_id", id) | ||
|
|
||
| query := fmt.Sprintf("select table_name, lastpk from _vt.copy_state where vrepl_id = %d", id) | ||
|
|
||
| if !s.vexecPool.AcquireContext(ctx) { | ||
| return nil, ErrVExecConnTimeout | ||
| } | ||
|
|
||
| qr, err := s.tmc.VReplicationExec(ctx, tablet.Tablet, query) | ||
| s.vexecPool.Release() | ||
|
|
||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we should update release notes with this and if we expect to roll out limits per call (imo) we should have a better approach than a flag for concurrency-per-call