Skip to content

Commit c6e10b4

Browse files
committed
Probe NSX API endpoint in manager cluster
In manager cluster resource, probe for NSX connectivity before joining cluster nodes. Parameters for connection probing are specified within `node_connectivity` block. This option is useful since nsx manager nodes might be spawned within same apply process, in which case it would take a while for NSX API endpoint to become responsive. Rather than using custom provisioner script to ensure connectivity, we give a probe and wait option in manager cluster resource. Standard retry mechanism will not be present in this connection probing in order to avoid duplication (we expect retry parameters to be very different for regular retry and initial connection probing) Signed-off-by: Anna Khmelnitsky <[email protected]>
1 parent 8e24ef4 commit c6e10b4

File tree

3 files changed

+138
-7
lines changed

3 files changed

+138
-7
lines changed

nsxt/provider.go

+11-7
Original file line numberDiff line numberDiff line change
@@ -712,13 +712,13 @@ func configurePolicyConnectorData(d *schema.ResourceData, clients *nsxtClients)
712712
}
713713

714714
if !isVMC {
715-
err = configureLicenses(getPolicyConnectorForInit(*clients), clients.CommonConfig.LicenseDiff)
715+
err = configureLicenses(getPolicyConnectorForInit(*clients, true), clients.CommonConfig.LicenseDiff)
716716
if err != nil {
717717
return err
718718
}
719719
}
720720

721-
err = initNSXVersion(getPolicyConnectorForInit(*clients))
721+
err = initNSXVersion(getPolicyConnectorForInit(*clients, true))
722722
if err != nil && isVMC {
723723
// In case version API does not work for VMC, we workaround by testing version-specific APIs
724724
// TODO - remove this when /node/version API works for all auth methods on VMC
@@ -978,14 +978,14 @@ func providerConfigure(d *schema.ResourceData) (interface{}, error) {
978978
}
979979

980980
func getPolicyConnector(clients interface{}) client.Connector {
981-
return getPolicyConnectorWithHeaders(clients, nil, false)
981+
return getPolicyConnectorWithHeaders(clients, nil, false, true)
982982
}
983983

984-
func getPolicyConnectorForInit(clients interface{}) client.Connector {
985-
return getPolicyConnectorWithHeaders(clients, nil, true)
984+
func getPolicyConnectorForInit(clients interface{}, withRetry bool) client.Connector {
985+
return getPolicyConnectorWithHeaders(clients, nil, true, withRetry)
986986
}
987987

988-
func getPolicyConnectorWithHeaders(clients interface{}, customHeaders *map[string]string, initFlow bool) client.Connector {
988+
func getPolicyConnectorWithHeaders(clients interface{}, customHeaders *map[string]string, initFlow bool, withRetry bool) client.Connector {
989989
c := clients.(nsxtClients)
990990

991991
retryFunc := func(retryContext retry.RetryContext) bool {
@@ -1018,10 +1018,14 @@ func getPolicyConnectorWithHeaders(clients interface{}, customHeaders *map[strin
10181018
return true
10191019
}
10201020

1021-
connectorOptions := []client.ConnectorOption{client.UsingRest(nil), client.WithHttpClient(c.PolicyHTTPClient), client.WithDecorators(retry.NewRetryDecorator(uint(c.CommonConfig.MaxRetries), retryFunc))}
1021+
connectorOptions := []client.ConnectorOption{client.UsingRest(nil), client.WithHttpClient(c.PolicyHTTPClient)}
10221022
var requestProcessors []core.RequestProcessor
10231023
var responseAcceptors []core.ResponseAcceptor
10241024

1025+
if withRetry {
1026+
connectorOptions = append(connectorOptions, client.WithDecorators(retry.NewRetryDecorator(uint(c.CommonConfig.MaxRetries), retryFunc)))
1027+
}
1028+
10251029
if c.PolicySecurityContext != nil {
10261030
connectorOptions = append(connectorOptions, client.WithSecurityContext(c.PolicySecurityContext))
10271031
}

nsxt/resource_nsxt_manager_cluster.go

+120
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,19 @@ import (
1313

1414
"golang.org/x/exp/slices"
1515

16+
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource"
1617
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
1718
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/validation"
19+
"github.com/vmware/vsphere-automation-sdk-go/runtime/protocol/client"
1820
"github.com/vmware/vsphere-automation-sdk-go/services/nsxt-mp/nsx"
1921
nsxModel "github.com/vmware/vsphere-automation-sdk-go/services/nsxt-mp/nsx/model"
22+
"github.com/vmware/vsphere-automation-sdk-go/services/nsxt/infra"
2023
)
2124

25+
const nodeConnectivityInitialDelay int = 20
26+
const nodeConnectivityInterval int = 16
27+
const nodeConnectivityTimeout int = 1800
28+
2229
func resourceNsxtManagerCluster() *schema.Resource {
2330
return &schema.Resource{
2431
Create: resourceNsxtManagerClusterCreate,
@@ -28,6 +35,40 @@ func resourceNsxtManagerCluster() *schema.Resource {
2835

2936
Schema: map[string]*schema.Schema{
3037
"revision": getRevisionSchema(),
38+
"api_probing": {
39+
Type: schema.TypeList,
40+
MaxItems: 1,
41+
Description: "Settings that control initial node connection",
42+
Elem: &schema.Resource{
43+
Schema: map[string]*schema.Schema{
44+
"enabled": {
45+
Type: schema.TypeBool,
46+
Description: "Whether API probing for NSX nodes is enabled",
47+
Optional: true,
48+
Default: true,
49+
},
50+
"delay": {
51+
Type: schema.TypeInt,
52+
Description: "Initial delay in seconds before probing connection",
53+
Optional: true,
54+
Default: nodeConnectivityInitialDelay,
55+
},
56+
"interval": {
57+
Type: schema.TypeInt,
58+
Description: "Connection probing interval in seconds",
59+
Optional: true,
60+
Default: nodeConnectivityInterval,
61+
},
62+
"timeout": {
63+
Type: schema.TypeInt,
64+
Description: "Timeout for connection probing in seconds",
65+
Optional: true,
66+
Default: nodeConnectivityTimeout,
67+
},
68+
},
69+
},
70+
Optional: true,
71+
},
3172
"node": {
3273
Type: schema.TypeList,
3374
Description: "Nodes in the cluster",
@@ -82,6 +123,76 @@ type NsxClusterNode struct {
82123
Status string
83124
}
84125

126+
func getNodeConnectivityStateConf(connector client.Connector, delay int, interval int, timeout int) *resource.StateChangeConf {
127+
128+
return &resource.StateChangeConf{
129+
Pending: []string{"notyet"},
130+
Target: []string{"success"},
131+
Refresh: func() (interface{}, string, error) {
132+
siteClient := infra.NewSitesClient(connector)
133+
// We use default site API to probe NSX manager API endpoint readiness,
134+
// since it may take a while to auto-generate default site after API is responsive
135+
resp, err := siteClient.Get("default")
136+
if err != nil {
137+
log.Printf("[DEBUG]: NSX API endpoint not ready: %v", err)
138+
return nil, "notyet", nil
139+
}
140+
141+
log.Printf("[INFO]: NSX API endpoint ready")
142+
return resp, "success", nil
143+
},
144+
Delay: time.Duration(delay) * time.Second,
145+
Timeout: time.Duration(timeout) * time.Second,
146+
PollInterval: time.Duration(interval) * time.Second,
147+
}
148+
}
149+
150+
func waitForNodeStatus(d *schema.ResourceData, m interface{}, nodes []NsxClusterNode) error {
151+
152+
delay := nodeConnectivityInitialDelay
153+
interval := nodeConnectivityInterval
154+
timeout := nodeConnectivityTimeout
155+
probingEnabled := true
156+
probing := d.Get("api_probing").([]interface{})
157+
for _, item := range probing {
158+
entry := item.(map[string]interface{})
159+
probingEnabled = entry["enabled"].(bool)
160+
delay = entry["delay"].(int)
161+
interval = entry["interval"].(int)
162+
timeout = entry["timeout"].(int)
163+
break
164+
}
165+
166+
// Wait for main mode
167+
if !probingEnabled {
168+
log.Printf("[DEBUG]: API probing for NSX is disabled")
169+
return nil
170+
}
171+
connector := getPolicyConnectorForInit(m, false)
172+
stateConf := getNodeConnectivityStateConf(connector, delay, interval, timeout)
173+
_, err := stateConf.WaitForState()
174+
if err != nil {
175+
return fmt.Errorf("Failed to connect to main NSX manager endpoint")
176+
}
177+
178+
// Wait for joining nodes
179+
for _, node := range nodes {
180+
c, err := getNewNsxtClient(node, d, m)
181+
if err != nil {
182+
return err
183+
}
184+
newNsxClients := c.(nsxtClients)
185+
nodeConnector := getPolicyConnectorForInit(newNsxClients, false)
186+
nodeConf := getNodeConnectivityStateConf(nodeConnector, 0, interval, timeout)
187+
_, err = nodeConf.WaitForState()
188+
if err != nil {
189+
return fmt.Errorf("Failed to connect to NSX node endpoint %s", node.IPAddress)
190+
}
191+
}
192+
193+
return nil
194+
}
195+
85196
func getClusterNodesFromSchema(d *schema.ResourceData) []NsxClusterNode {
86197
nodes := d.Get("node").([]interface{})
87198
var clusterNodes []NsxClusterNode
@@ -108,6 +219,11 @@ func resourceNsxtManagerClusterCreate(d *schema.ResourceData, m interface{}) err
108219
if len(nodes) == 0 {
109220
return fmt.Errorf("At least a manager appliance must be provided to form a cluster")
110221
}
222+
223+
err := waitForNodeStatus(d, m, nodes)
224+
if err != nil {
225+
return fmt.Errorf("Failed to establish connection to NSX API: %v", err)
226+
}
111227
clusterID, certSha256Thumbprint, hostIPs, err := getClusterInfoFromHostNode(d, m)
112228
if err != nil {
113229
return handleCreateError("ManagerCluster", "", err)
@@ -329,6 +445,10 @@ func resourceNsxtManagerClusterRead(d *schema.ResourceData, m interface{}) error
329445
}
330446

331447
func resourceNsxtManagerClusterUpdate(d *schema.ResourceData, m interface{}) error {
448+
if !d.HasChange("node") {
449+
// CHanges to attributes other than "node" should be ignored
450+
return nil
451+
}
332452
id := d.Id()
333453
connector := getPolicyConnector(m)
334454
client := nsx.NewClusterClient(connector)

website/docs/r/manager_cluster.html.markdown

+7
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ This resource is supported with NSX 4.1.0 onwards.
1212
The main node for the cluster is the host in terraform nsxt provider config,
1313
user will need to specify the nodes that will join the cluster in the resource config.
1414
Only one instance of nsxt_manager_cluster resource is supported.
15+
If `api_probing` is enabled, this resource will wait for NSX API endpoints to come up
16+
before performing cluster joining.
1517

1618
## Example Usage
1719

@@ -38,6 +40,11 @@ The following arguments are supported:
3840
* `ip_address` - (Required) Ip address of the node.
3941
* `username` - (Required) The username for login to the node.
4042
* `password` - (Required) The password for login to the node.
43+
* `api_probing` - (Optional) Parameters for probing NSX API endpoint connection. Since NSX nodes might have been created during same apply, we might need to wait until the API endpoint becomes available and all required default objects are created.
44+
* `enabled` - (Optional) Whether API connectivity check is enabled. Default is `true`.
45+
* `delay` - (Optional) Initial delay before we start probing API endpoint in seconds. Default is 0.
46+
* `interval` - (Optional) Interval for probing API endpoint in seconds. Default is 10.
47+
* `timeout` - (Optional) Timeout for probing the API endpoint in seconds. Default is 1800.
4148

4249
## Argument Reference
4350

0 commit comments

Comments
 (0)