diff --git a/go/vt/tabletmanager/actionnode/actionnode.go b/go/vt/tabletmanager/actionnode/actionnode.go index 20352115c20..d7a20feaff4 100644 --- a/go/vt/tabletmanager/actionnode/actionnode.go +++ b/go/vt/tabletmanager/actionnode/actionnode.go @@ -119,6 +119,10 @@ const ( // the topo server. TABLET_ACTION_REFRESH_STATE = "RefreshState" + // RunHealthCheck tells the tablet to refresh its tablet record from + // the topo server. + TABLET_ACTION_RUN_HEALTH_CHECK = "RunHealthCheck" + // ReloadSchema tells the tablet to reload its schema. TABLET_ACTION_RELOAD_SCHEMA = "ReloadSchema" diff --git a/go/vt/tabletmanager/agent_rpc_actions.go b/go/vt/tabletmanager/agent_rpc_actions.go index cc566dc629c..a7e6297f6fe 100644 --- a/go/vt/tabletmanager/agent_rpc_actions.go +++ b/go/vt/tabletmanager/agent_rpc_actions.go @@ -64,6 +64,8 @@ type RpcAgent interface { RefreshState() + RunHealthCheck(targetTabletType topo.TabletType) + ReloadSchema() PreflightSchema(change string) (*myproto.SchemaChangeResult, error) @@ -209,6 +211,12 @@ func (agent *ActionAgent) ExecuteHook(hk *hook.Hook) *hook.HookResult { func (agent *ActionAgent) RefreshState() { } +// RunHealthCheck will manually run the health check on the tablet +// Should be called under RpcWrap. +func (agent *ActionAgent) RunHealthCheck(targetTabletType topo.TabletType) { + agent.runHealthCheck(targetTabletType) +} + // ReloadSchema will reload the schema // Should be called under RpcWrapLockAction. func (agent *ActionAgent) ReloadSchema() { diff --git a/go/vt/tabletmanager/agentrpctest/test_agent_rpc.go b/go/vt/tabletmanager/agentrpctest/test_agent_rpc.go index a2fcffc068b..3a16e020540 100644 --- a/go/vt/tabletmanager/agentrpctest/test_agent_rpc.go +++ b/go/vt/tabletmanager/agentrpctest/test_agent_rpc.go @@ -295,6 +295,19 @@ func agentRpcTestRefreshState(t *testing.T, client tmclient.TabletManagerClient, } } +var testRunHealthCheckValue = topo.TYPE_RDONLY + +func (fra *fakeRpcAgent) RunHealthCheck(targetTabletType topo.TabletType) { + compare(fra.t, "RunHealthCheck tabletType", targetTabletType, testRunHealthCheckValue) +} + +func agentRpcTestRunHealthCheck(t *testing.T, client tmclient.TabletManagerClient, ti *topo.TabletInfo) { + err := client.RunHealthCheck(ti, testRunHealthCheckValue, time.Minute) + if err != nil { + t.Errorf("RunHealthCheck failed: %v", err) + } +} + var testReloadSchemaCalled = false func (fra *fakeRpcAgent) ReloadSchema() { @@ -877,6 +890,8 @@ func AgentRpcTestSuite(t *testing.T, client tmclient.TabletManagerClient, ti *to agentRpcTestScrap(t, client, ti) agentRpcTestSleep(t, client, ti) agentRpcTestExecuteHook(t, client, ti) + agentRpcTestRefreshState(t, client, ti) + agentRpcTestRunHealthCheck(t, client, ti) agentRpcTestReloadSchema(t, client, ti) agentRpcTestPreflightSchema(t, client, ti) agentRpcTestApplySchema(t, client, ti) diff --git a/go/vt/tabletmanager/gorpctmclient/gorpc_client.go b/go/vt/tabletmanager/gorpctmclient/gorpc_client.go index e470c6a2336..77a07342b42 100644 --- a/go/vt/tabletmanager/gorpctmclient/gorpc_client.go +++ b/go/vt/tabletmanager/gorpctmclient/gorpc_client.go @@ -129,6 +129,11 @@ func (client *GoRpcTabletManagerClient) RefreshState(tablet *topo.TabletInfo, wa return client.rpcCallTablet(tablet, actionnode.TABLET_ACTION_REFRESH_STATE, "", &noOutput, waitTime) } +func (client *GoRpcTabletManagerClient) RunHealthCheck(tablet *topo.TabletInfo, targetTabletType topo.TabletType, waitTime time.Duration) error { + var noOutput rpc.UnusedResponse + return client.rpcCallTablet(tablet, actionnode.TABLET_ACTION_RUN_HEALTH_CHECK, &targetTabletType, &noOutput, waitTime) +} + func (client *GoRpcTabletManagerClient) ReloadSchema(tablet *topo.TabletInfo, waitTime time.Duration) error { var noOutput rpc.UnusedResponse return client.rpcCallTablet(tablet, actionnode.TABLET_ACTION_RELOAD_SCHEMA, "", &noOutput, waitTime) diff --git a/go/vt/tabletmanager/gorpctmserver/gorpc_server.go b/go/vt/tabletmanager/gorpctmserver/gorpc_server.go index 428d7e1d784..5e1ed1a7d63 100644 --- a/go/vt/tabletmanager/gorpctmserver/gorpc_server.go +++ b/go/vt/tabletmanager/gorpctmserver/gorpc_server.go @@ -109,6 +109,13 @@ func (tm *TabletManager) RefreshState(context *rpcproto.Context, args *rpc.Unuse }) } +func (tm *TabletManager) RunHealthCheck(context *rpcproto.Context, args *topo.TabletType, reply *rpc.UnusedResponse) error { + return tm.agent.RpcWrap(context, actionnode.TABLET_ACTION_RUN_HEALTH_CHECK, args, reply, func() error { + tm.agent.RunHealthCheck(*args) + return nil + }) +} + func (tm *TabletManager) ReloadSchema(context *rpcproto.Context, args *rpc.UnusedRequest, reply *rpc.UnusedResponse) error { return tm.agent.RpcWrapLockAction(context, actionnode.TABLET_ACTION_RELOAD_SCHEMA, args, reply, true, func() error { tm.agent.ReloadSchema() diff --git a/go/vt/tabletmanager/tmclient/rpc_client_api.go b/go/vt/tabletmanager/tmclient/rpc_client_api.go index c4835189173..27d8cdf8006 100644 --- a/go/vt/tabletmanager/tmclient/rpc_client_api.go +++ b/go/vt/tabletmanager/tmclient/rpc_client_api.go @@ -69,6 +69,9 @@ type TabletManagerClient interface { // RefreshState asks the remote tablet to reload its tablet record RefreshState(tablet *topo.TabletInfo, waitTime time.Duration) error + // RunHealthCheck asks the remote tablet to run a health check cycle + RunHealthCheck(tablet *topo.TabletInfo, targetTabletType topo.TabletType, waitTime time.Duration) error + // ReloadSchema asks the remote tablet to reload its schema ReloadSchema(tablet *topo.TabletInfo, waitTime time.Duration) error diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 9931b706d1c..b6351a38465 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -83,6 +83,9 @@ var commands = []commandGroup{ command{"RefreshState", commandRefreshState, "", "Asks a remote tablet to reload its tablet record."}, + command{"RunHealthCheck", commandRunHealthCheck, + " ", + "Asks a remote tablet to run a health check with the providd target type."}, command{"Query", commandQuery, " ", "Send a SQL query to a tablet."}, @@ -859,6 +862,28 @@ func commandRefreshState(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []s return wr.TabletManagerClient().RefreshState(tabletInfo, wr.ActionTimeout()) } +func commandRunHealthCheck(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { + if err := subFlags.Parse(args); err != nil { + return err + } + if subFlags.NArg() != 2 { + return fmt.Errorf("action RunHealthCheck requires ") + } + tabletAlias, err := tabletParamToTabletAlias(subFlags.Arg(0)) + if err != nil { + return err + } + servedType, err := parseTabletType(subFlags.Arg(1), []topo.TabletType{topo.TYPE_REPLICA, topo.TYPE_RDONLY}) + if err != nil { + return err + } + tabletInfo, err := wr.TopoServer().GetTablet(tabletAlias) + if err != nil { + return err + } + return wr.TabletManagerClient().RunHealthCheck(tabletInfo, servedType, wr.ActionTimeout()) +} + func commandQuery(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { if err := subFlags.Parse(args); err != nil { return err diff --git a/test/resharding.py b/test/resharding.py index cc06f5023d0..d9e6f3e939d 100755 --- a/test/resharding.py +++ b/test/resharding.py @@ -433,8 +433,12 @@ def _test_keyrange_constraints(self): def _check_query_service(self, tablet, serving, tablet_control_disabled): """_check_query_service will check that the query service is enabled - or disabled on the tablet. It will also check if the tablet control - status is the reason for being enabled / disabled.""" + or disabled on the tablet. It will also check if the tablet control + status is the reason for being enabled / disabled. + + It will also run a remote RunHealthCheck to be sure it doesn't change + the serving state. + """ tablet_vars = utils.get_vars(tablet.port) if serving: expected_state = 'SERVING' @@ -448,6 +452,17 @@ def _check_query_service(self, tablet, serving, tablet_control_disabled): else: self.assertNotIn("Query Service disabled by TabletControl", status) + if tablet.tablet_type == 'rdonly': + utils.run_vtctl(['RunHealthCheck', tablet.tablet_alias, 'rdonly'], + auto_log=True) + + tablet_vars = utils.get_vars(tablet.port) + if serving: + expected_state = 'SERVING' + else: + expected_state = 'NOT_SERVING' + self.assertEqual(tablet_vars['TabletStateName'], expected_state, 'tablet %s is not in the right serving state after health check: got %s expected %s' % (tablet.tablet_alias, tablet_vars['TabletStateName'], expected_state)) + def test_resharding(self): utils.run_vtctl(['CreateKeyspace', '--sharding_column_name', 'bad_column',