diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 418b5d7105c9..195baa976c8a 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -302,7 +302,6 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Improve logging in Okta Entity Analytics provider. {issue}40106[40106] {pull}40347[40347] - Document `winlog` input. {issue}40074[40074] {pull}40462[40462] - Added retry logic to websocket connections in the streaming input. {issue}40271[40271] {pull}40601[40601] -- Add new metricset cluster for the vSphere module. {pull}40536[40536] - Disable event normalization for netflow input {pull}40635[40635] - Allow attribute selection in the Active Directory entity analytics provider. {issue}40482[40482] {pull}40662[40662] - Improve error quality when CEL program does not correctly return an events array. {pull}40580[40580] @@ -339,9 +338,13 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Add new metricset network for the vSphere module. {pull}40559[40559] - Add new metricset resourcepool for the vSphere module. {pull}40456[40456] - Add metrics for the vSphere Virtualmachine metricset. {pull}40485[40485] -- Add support for snapshot in vSphere virtualmachine metricset {pull}40683[40683] -- Update fields to use mapstr in vSphere virtualmachine metricset {pull}40707[40707] - Log the total time taken for GCP `ListTimeSeries` and `AggregatedList` requests {pull}40661[40661] +- Add new metrics for the vSphere Host metricset. {pull}40429[40429] +- Add new metrics for the vSphere Datastore metricset. {pull}40441[40441] +- Add new metricset cluster for the vSphere module. {pull}40536[40536] +- Add new metricset datastorecluster for vSphere module. {pull}40634[40634] {pull}40694[40694] +- Add new metrics for the vSphere Virtualmachine metricset. {pull}40485[40485] +- Add support for period based intervalID in vSphere host and datastore metricsets {pull}40678[40678] - Add `metrics_count` to Prometheus module if `metrics_count: true` is set. {pull}40411[40411] - Added Cisco Meraki module {pull}40836[40836] - Added Palo Alto Networks module {pull}40686[40686] @@ -349,7 +352,6 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] *Metricbeat* -- Add support for new metrics for vSphere module datastorecluster metricset. {pull}40694[40694] *Osquerybeat* diff --git a/metricbeat/docs/modules/vsphere.asciidoc b/metricbeat/docs/modules/vsphere.asciidoc index 762f7ba255e5..e7e6e78205d6 100644 --- a/metricbeat/docs/modules/vsphere.asciidoc +++ b/metricbeat/docs/modules/vsphere.asciidoc @@ -9,14 +9,79 @@ This file is generated! See scripts/mage/docs_collector.go [[metricbeat-module-vsphere]] == vSphere module -The vSphere module uses the https://github.com/vmware/govmomi[Govmomi] library to collect metrics from any Vmware SDK URL (ESXi/VCenter). This library is built for and tested against ESXi and vCenter 5.5, 6.0 and 6.5. +The vSphere module uses the https://github.com/vmware/govmomi[Govmomi] library to collect metrics from any VMware SDK URL (ESXi/VCenter). -By default it enables the metricsets `cluster`, `datastore`, `datastorecluster`, `host`, `network`, `resourcepool` and `virtualmachine`. +This module has been tested against ESXi and vCenter versions 5.5, 6.0, 6.5, and 7.0.3. + +By default, the vSphere module enables the following metricsets: + +1. cluster + +2. datastore + +3. datastorecluster + +4. host + +5. network + +6. resourcepool + +7. virtualmachine + +[float] +=== Supported Periods: +The Datastore and Host metricsets support performance data collection using the vSphere performance API. Given that the performance API imposes usage restrictions based on data collection intervals, users should configure the period optimally to ensure the receipt of real-time data. This configuration can be determined based on the https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html[Data Collection Intervals] and https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html[Data Collection Levels]. + +[IMPORTANT] + +Only host and datastore metricsets have limitation of system configured period from vSphere instance. Users can still collect summary metrics if performance metrics are not supported for the configured instance. + +[float] +==== Real-time data collection default interval: +- 20s + +[float] +==== Historical data collection default intervals: +- 300s +- 1800s +- 7200s +- 86400s + +[float] +=== Example: +If you need to configure multiple metricsets with different periods, you can achieve this by setting up multiple vSphere modules with different metricsets as demonstrated below: + +[source,yaml] +---- +- module: vsphere + metricsets: + - cluster + - datastorecluster + - network + - resourcepool + - virtualmachine + period: 10s + hosts: ["https://localhost/sdk"] + username: "user" + password: "password" + insecure: false + +- module: vsphere + metricsets: + - datastore + - host + period: 300s + hosts: ["https://localhost/sdk"] + username: "user" + password: "password" + insecure: false +---- [float] === Dashboard -The vsphere module comes with a predefined dashboard. For example: +The vSphere module includes a predefined dashboard. For example: image::./images/metricbeat_vsphere_dashboard.png[] image::./images/metricbeat_vsphere_vm_dashboard.png[] @@ -36,7 +101,17 @@ metricbeat.modules: - module: vsphere enabled: true metricsets: ["cluster", "datastore", "datastorecluster", "host", "network", "resourcepool", "virtualmachine"] - # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds. + + # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds by default. + # Supported Periods: + # The Datastore and Host metricsets support performance data collection using the vSphere performance API. + # Since the performance API has usage restrictions based on data collection intervals, + # users should ensure that the period is configured optimally to receive real-time data. + # users can still collect summary metrics if performance metrics are not supported for the configured instance. + # This configuration can be determined based on the Data Collection Intervals and Data Collection Levels. + # Reference Links: + # Data Collection Intervals: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html + # Data Collection Levels: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html period: 20s hosts: ["https://localhost/sdk"] @@ -44,7 +119,7 @@ metricbeat.modules: password: "password" # If insecure is true, don't verify the server's certificate chain insecure: false - # Get custom fields when using virtualmachine metric set. Default false. + # Get custom fields when using virtualmachine metricset. Default false. # get_custom_fields: false ---- diff --git a/metricbeat/metricbeat.reference.yml b/metricbeat/metricbeat.reference.yml index f9ecce62a49b..b87cdb049fe6 100644 --- a/metricbeat/metricbeat.reference.yml +++ b/metricbeat/metricbeat.reference.yml @@ -1031,7 +1031,17 @@ metricbeat.modules: - module: vsphere enabled: true metricsets: ["cluster", "datastore", "datastorecluster", "host", "network", "resourcepool", "virtualmachine"] - # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds. + + # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds by default. + # Supported Periods: + # The Datastore and Host metricsets support performance data collection using the vSphere performance API. + # Since the performance API has usage restrictions based on data collection intervals, + # users should ensure that the period is configured optimally to receive real-time data. + # users can still collect summary metrics if performance metrics are not supported for the configured instance. + # This configuration can be determined based on the Data Collection Intervals and Data Collection Levels. + # Reference Links: + # Data Collection Intervals: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html + # Data Collection Levels: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html period: 20s hosts: ["https://localhost/sdk"] @@ -1039,7 +1049,7 @@ metricbeat.modules: password: "password" # If insecure is true, don't verify the server's certificate chain insecure: false - # Get custom fields when using virtualmachine metric set. Default false. + # Get custom fields when using virtualmachine metricset. Default false. # get_custom_fields: false #------------------------------- Windows Module ------------------------------- diff --git a/metricbeat/module/vsphere/_meta/config.reference.yml b/metricbeat/module/vsphere/_meta/config.reference.yml index 91a32da76776..9ec81a4ecefd 100644 --- a/metricbeat/module/vsphere/_meta/config.reference.yml +++ b/metricbeat/module/vsphere/_meta/config.reference.yml @@ -1,7 +1,17 @@ - module: vsphere enabled: true metricsets: ["cluster", "datastore", "datastorecluster", "host", "network", "resourcepool", "virtualmachine"] - # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds. + + # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds by default. + # Supported Periods: + # The Datastore and Host metricsets support performance data collection using the vSphere performance API. + # Since the performance API has usage restrictions based on data collection intervals, + # users should ensure that the period is configured optimally to receive real-time data. + # users can still collect summary metrics if performance metrics are not supported for the configured instance. + # This configuration can be determined based on the Data Collection Intervals and Data Collection Levels. + # Reference Links: + # Data Collection Intervals: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html + # Data Collection Levels: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html period: 20s hosts: ["https://localhost/sdk"] @@ -9,5 +19,5 @@ password: "password" # If insecure is true, don't verify the server's certificate chain insecure: false - # Get custom fields when using virtualmachine metric set. Default false. + # Get custom fields when using virtualmachine metricset. Default false. # get_custom_fields: false diff --git a/metricbeat/module/vsphere/_meta/config.yml b/metricbeat/module/vsphere/_meta/config.yml index 173be03fc4fb..24f94e29287c 100644 --- a/metricbeat/module/vsphere/_meta/config.yml +++ b/metricbeat/module/vsphere/_meta/config.yml @@ -7,7 +7,17 @@ # - network # - resourcepool # - virtualmachine - # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds. + + # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds by default. + # Supported Periods: + # The Datastore and Host metricsets support performance data collection using the vSphere performance API. + # Since the performance API has usage restrictions based on data collection intervals, + # users should ensure that the period is configured optimally to receive real-time data. + # users can still collect summary metrics if performance metrics are not supported for the configured instance. + # This configuration can be determined based on the Data Collection Intervals and Data Collection Levels. + # Reference Links: + # Data Collection Intervals: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html + # Data Collection Levels: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html period: 20s hosts: ["https://localhost/sdk"] @@ -15,5 +25,5 @@ password: "password" # If insecure is true, don't verify the server's certificate chain insecure: false - # Get custom fields when using virtualmachine metric set. Default false. + # Get custom fields when using virtualmachine metricset. Default false. # get_custom_fields: false diff --git a/metricbeat/module/vsphere/_meta/docs.asciidoc b/metricbeat/module/vsphere/_meta/docs.asciidoc index 353cfda1e7b6..9815541831bf 100644 --- a/metricbeat/module/vsphere/_meta/docs.asciidoc +++ b/metricbeat/module/vsphere/_meta/docs.asciidoc @@ -1,11 +1,76 @@ -The vSphere module uses the https://github.com/vmware/govmomi[Govmomi] library to collect metrics from any Vmware SDK URL (ESXi/VCenter). This library is built for and tested against ESXi and vCenter 5.5, 6.0 and 6.5. +The vSphere module uses the https://github.com/vmware/govmomi[Govmomi] library to collect metrics from any VMware SDK URL (ESXi/VCenter). -By default it enables the metricsets `cluster`, `datastore`, `datastorecluster`, `host`, `network`, `resourcepool` and `virtualmachine`. +This module has been tested against ESXi and vCenter versions 5.5, 6.0, 6.5, and 7.0.3. + +By default, the vSphere module enables the following metricsets: + +1. cluster + +2. datastore + +3. datastorecluster + +4. host + +5. network + +6. resourcepool + +7. virtualmachine + +[float] +=== Supported Periods: +The Datastore and Host metricsets support performance data collection using the vSphere performance API. Given that the performance API imposes usage restrictions based on data collection intervals, users should configure the period optimally to ensure the receipt of real-time data. This configuration can be determined based on the https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html[Data Collection Intervals] and https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html[Data Collection Levels]. + +[IMPORTANT] + +Only host and datastore metricsets have limitation of system configured period from vSphere instance. Users can still collect summary metrics if performance metrics are not supported for the configured instance. + +[float] +==== Real-time data collection default interval: +- 20s + +[float] +==== Historical data collection default intervals: +- 300s +- 1800s +- 7200s +- 86400s + +[float] +=== Example: +If you need to configure multiple metricsets with different periods, you can achieve this by setting up multiple vSphere modules with different metricsets as demonstrated below: + +[source,yaml] +---- +- module: vsphere + metricsets: + - cluster + - datastorecluster + - network + - resourcepool + - virtualmachine + period: 10s + hosts: ["https://localhost/sdk"] + username: "user" + password: "password" + insecure: false + +- module: vsphere + metricsets: + - datastore + - host + period: 300s + hosts: ["https://localhost/sdk"] + username: "user" + password: "password" + insecure: false +---- [float] === Dashboard -The vsphere module comes with a predefined dashboard. For example: +The vSphere module includes a predefined dashboard. For example: image::./images/metricbeat_vsphere_dashboard.png[] image::./images/metricbeat_vsphere_vm_dashboard.png[] diff --git a/metricbeat/module/vsphere/cluster/cluster.go b/metricbeat/module/vsphere/cluster/cluster.go index 10184c55c901..5c462d849334 100644 --- a/metricbeat/module/vsphere/cluster/cluster.go +++ b/metricbeat/module/vsphere/cluster/cluster.go @@ -75,7 +75,7 @@ func (m *ClusterMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) er } defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Debug(fmt.Errorf("error trying to logout from vSphere: %w", err)) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -91,7 +91,7 @@ func (m *ClusterMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) er defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Errorf("error trying to destroy view from vSphere: %w", err) + m.Logger().Errorf("error trying to destroy view from vSphere: %v", err) } }() diff --git a/metricbeat/module/vsphere/datastore/_meta/data.json b/metricbeat/module/vsphere/datastore/_meta/data.json index 1e1a1691219b..9ab349a293c7 100644 --- a/metricbeat/module/vsphere/datastore/_meta/data.json +++ b/metricbeat/module/vsphere/datastore/_meta/data.json @@ -1,64 +1,48 @@ { - "@timestamp": "2017-10-12T08:05:34.853Z", - "event": { - "dataset": "vsphere.datastore", - "duration": 115000, - "module": "vsphere" - }, - "metricset": { - "name": "datastore", - "period": 10000 - }, - "service": { - "address": "127.0.0.1:33365", - "type": "vsphere" - }, - "vsphere": { - "datastore": { - "iops": 0, - "host": { - "count": 1, - "names": [ - "DC3_H0" - ] - }, - "status": "green", - "vm": { - "count": 6, - "names": [ - "DC3_H0_VM0" - ] - }, - "read": { - "bytes": 0, - "latency": { - "total": { - "ms": 0 - } + "@timestamp": "2017-10-12T08:05:34.853Z", + "event": { + "dataset": "vsphere.datastore", + "duration": 115000, + "module": "vsphere" + }, + "metricset": { + "name": "datastore", + "period": 10000 + }, + "service": { + "address": "127.0.0.1:55632", + "type": "vsphere" + }, + "vsphere": { + "datastore": { + "capacity": { + "free": { + "bytes": 10973641441280 + }, + "total": { + "bytes": 10995116277760 + }, + "used": { + "bytes": 21474836480, + "pct": 0.001953125 + } + }, + "fstype": "OTHER", + "host": { + "count": 1, + "names": [ + "localhost_localdomain" + ] + }, + "name": "LocalDS_0", + "status": "green", + "vm": { + "count": 2, + "names": [ + "ha-host_VM0", + "ha-host_VM1" + ] + } } - }, - "write": { - "bytes": 337000, - "latency": { - "total": { - "ms": 0 - } - } - }, - "capacity": { - "free": { - "bytes": 37120094208 - }, - "total": { - "bytes": 74686664704 - }, - "used": { - "bytes": 37566570496, - "pct": 0.502988996026061 - } - }, - "fstype": "local", - "name": "LocalDS_0" } - } } \ No newline at end of file diff --git a/metricbeat/module/vsphere/datastore/datastore.go b/metricbeat/module/vsphere/datastore/datastore.go index ee2d08feff10..3014ec0e3f5d 100644 --- a/metricbeat/module/vsphere/datastore/datastore.go +++ b/metricbeat/module/vsphere/datastore/datastore.go @@ -86,7 +86,7 @@ func (m *DataStoreMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) } defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Debugf("error trying to log out from vSphere: %w", err) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -102,7 +102,7 @@ func (m *DataStoreMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Debugf("error trying to destroy view from vSphere: %w", err) + m.Logger().Debugf("error trying to destroy view from vSphere: %v", err) } }() @@ -140,48 +140,18 @@ func (m *DataStoreMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) default: assetNames, err := getAssetNames(ctx, pc, &dst[i]) if err != nil { - m.Logger().Errorf("Failed to retrieve object from host %s: %w", dst[i].Name, err) - continue + m.Logger().Errorf("Failed to retrieve object from datastore %s: %v", dst[i].Name, err) } - spec := types.PerfQuerySpec{ - Entity: dst[i].Reference(), - MetricId: metricIds, - MaxSample: 1, - IntervalId: 20, // right now we are only grabbing real time metrics from the performance manager - } - - // Query performance data - samples, err := perfManager.Query(ctx, []types.PerfQuerySpec{spec}) - if err != nil { - m.Logger().Debugf("Failed to query performance data for host %s: %v", dst[i].Name, err) - continue - } - - if len(samples) == 0 { - m.Logger().Debugf("No samples returned from performance manager") - continue - } - - results, err := perfManager.ToMetricSeries(ctx, samples) + metricMap, err := m.getPerfMetrics(ctx, perfManager, dst[i], metricIds) if err != nil { - m.Logger().Debugf("Failed to query performance data to metric series for host %s: %v", dst[i].Name, err) - continue - } - - metricMap := make(map[string]interface{}) - for _, result := range results[0].Value { - if len(result.Value) > 0 { - metricMap[result.Name] = result.Value[0] - continue - } - m.Logger().Debugf("For host %s,Metric %v: No result found", dst[i].Name, result.Name) + m.Logger().Errorf("Failed to retrieve performance metrics from datastore %s: %v", dst[i].Name, err) } reporter.Event(mb.Event{ MetricSetFields: m.mapEvent(dst[i], &metricData{ perfMetrics: metricMap, - assetNames: *assetNames, + assetNames: assetNames, }), }) } @@ -190,13 +160,12 @@ func (m *DataStoreMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) return nil } -func getAssetNames(ctx context.Context, pc *property.Collector, ds *mo.Datastore) (*assetNames, error) { - +func getAssetNames(ctx context.Context, pc *property.Collector, ds *mo.Datastore) (assetNames, error) { outputVmNames := make([]string, 0, len(ds.Vm)) if len(ds.Vm) > 0 { var objects []mo.ManagedEntity if err := pc.Retrieve(ctx, ds.Vm, []string{"name"}, &objects); err != nil { - return nil, err + return assetNames{}, err } for _, ob := range objects { if ob.Reference().Type == "VirtualMachine" { @@ -220,7 +189,7 @@ func getAssetNames(ctx context.Context, pc *property.Collector, ds *mo.Datastore if len(hsRefs) > 0 { err := pc.Retrieve(ctx, hsRefs, []string{"name"}, &hosts) if err != nil { - return nil, err + return assetNames{}, err } } @@ -230,8 +199,57 @@ func getAssetNames(ctx context.Context, pc *property.Collector, ds *mo.Datastore } } - return &assetNames{ + return assetNames{ outputHostNames: outputHostNames, outputVmNames: outputVmNames, }, nil } + +func (m *DataStoreMetricSet) getPerfMetrics(ctx context.Context, perfManager *performance.Manager, dst mo.Datastore, metricIds []types.PerfMetricId) (metricMap map[string]interface{}, err error) { + metricMap = make(map[string]interface{}) + + period := m.Module().Config().Period + refreshRate := int32(period.Seconds()) + + spec := types.PerfQuerySpec{ + Entity: dst.Reference(), + MetricId: metricIds, + MaxSample: 1, + IntervalId: refreshRate, // using refreshRate as interval + } + + // Query performance data + samples, err := perfManager.Query(ctx, []types.PerfQuerySpec{spec}) + if err != nil { + if strings.Contains(err.Error(), "ServerFaultCode: A specified parameter was not correct: querySpec.interval") { + return metricMap, fmt.Errorf("failed to query performance data: use one of the system's supported interval. consider adjusting period: %w", err) + } + + return metricMap, fmt.Errorf("failed to query performance data: %w", err) + } + + if len(samples) == 0 { + m.Logger().Debug("No samples returned from performance manager") + return metricMap, nil + } + + results, err := perfManager.ToMetricSeries(ctx, samples) + if err != nil { + return metricMap, fmt.Errorf("failed to convert performance data to metric series: %w", err) + } + + if len(results) == 0 { + m.Logger().Debug("No results returned from metric series conversion") + return metricMap, nil + } + + for _, result := range results[0].Value { + if len(result.Value) > 0 { + metricMap[result.Name] = result.Value[0] + continue + } + m.Logger().Debugf("For datastore %s, Metric %s: No result found", dst.Name, result.Name) + } + + return metricMap, nil +} diff --git a/metricbeat/module/vsphere/datastore/datastore_test.go b/metricbeat/module/vsphere/datastore/datastore_test.go index e94989f10557..d9a9a5487809 100644 --- a/metricbeat/module/vsphere/datastore/datastore_test.go +++ b/metricbeat/module/vsphere/datastore/datastore_test.go @@ -19,6 +19,7 @@ package datastore import ( "testing" + "time" mbtest "github.com/elastic/beats/v7/metricbeat/mb/testing" @@ -105,5 +106,6 @@ func getConfig(ts *simulator.Server) map[string]interface{} { "username": "user", "password": "pass", "insecure": true, + "period": time.Second * 20, } } diff --git a/metricbeat/module/vsphere/datastorecluster/datastorecluster.go b/metricbeat/module/vsphere/datastorecluster/datastorecluster.go index dd26fa7ba046..3cadd20a9f1c 100644 --- a/metricbeat/module/vsphere/datastorecluster/datastorecluster.go +++ b/metricbeat/module/vsphere/datastorecluster/datastorecluster.go @@ -76,7 +76,7 @@ func (m *DatastoreClusterMetricSet) Fetch(ctx context.Context, reporter mb.Repor defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Errorf("error trying to logout from vSphere: %w", err) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -89,7 +89,7 @@ func (m *DatastoreClusterMetricSet) Fetch(ctx context.Context, reporter mb.Repor defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Errorf("error trying to destroy view from vSphere: %w", err) + m.Logger().Errorf("error trying to destroy view from vSphere: %v", err) } }() @@ -107,7 +107,7 @@ func (m *DatastoreClusterMetricSet) Fetch(ctx context.Context, reporter mb.Repor assetNames, err := getAssetNames(ctx, pc, &datastoreCluster[i]) if err != nil { - m.Logger().Errorf("Failed to retrieve object from host %s: %w", datastoreCluster[i].Name, err) + m.Logger().Errorf("Failed to retrieve object from host %s: v", datastoreCluster[i].Name, err) } reporter.Event(mb.Event{MetricSetFields: m.mapEvent(datastoreCluster[i], &metricData{assetNames: assetNames})}) diff --git a/metricbeat/module/vsphere/host/_meta/data.json b/metricbeat/module/vsphere/host/_meta/data.json index cb8d7896ce52..568d08f97c9c 100644 --- a/metricbeat/module/vsphere/host/_meta/data.json +++ b/metricbeat/module/vsphere/host/_meta/data.json @@ -1,55 +1,46 @@ { - "@timestamp": "2022-09-06T06:41:22.128Z", + "@timestamp": "2017-10-12T08:05:34.853Z", + "event": { + "dataset": "vsphere.host", + "duration": 115000, + "module": "vsphere" + }, "metricset": { "name": "host", "period": 10000 }, "service": { - "address": "https://localhost:8989/sdk", + "address": "127.0.0.1:55538", "type": "vsphere" }, - "event": { - "module": "vsphere", - "duration": 23519250, - "dataset": "vsphere.host" - }, "vsphere": { "host": { "cpu": { - "used": { - "mhz": 67 + "free": { + "mhz": 4521 }, "total": { "mhz": 4588 }, - "free": { - "mhz": 4521 + "used": { + "mhz": 67 } }, + "datastore": { + "count": 1, + "names": [ + "LocalDS_0" + ] + }, "disk": { - "capacity": { - "usage": { - "bytes": 0 - } - }, - "devicelatency": { - "average": { - "ms": 0 - } - }, - "latency": { - "total": { - "ms": 18 - } + "read": { + "bytes": 159744 }, "total": { - "bytes": 262000 - }, - "read": { - "bytes": 13000 + "bytes": 401408 }, "write": { - "bytes": 248000 + "bytes": 259072 } }, "memory": { @@ -63,80 +54,49 @@ "bytes": 1472200704 } }, + "name": "localhost.localdomain", "network": { "bandwidth": { + "received": { + "bytes": 270336 + }, "total": { - "bytes": 372000 + "bytes": 532480 }, "transmitted": { - "bytes": 0 - }, - "received": { - "bytes": 371000 + "bytes": 249856 } }, + "count": 1, + "names": [ + "VM Network" + ], "packets": { - "received": { - "count": 9463 - }, - "errors": { - "transmitted": { - "count": 0 - }, - "received": { - "count": 0 - }, - "total": { - "count": 0 - } - }, "multicast": { - "total": { - "count": 6679 - }, - "transmitted": { - "count": 0 - }, "received": { - "count": 6679 + "count": 61 } }, - "dropped": { - "received": { - "count": 0 - }, - "total": { - "count": 0 - }, - "transmitted": { - "count": 0 - } + "received": { + "count": 4569 }, "transmitted": { - "count": 54 + "count": 4578 } } }, + "network_names": [ + "VM Network" + ], + "status": "gray", + "uptime": 77229, "vm": { "count": 2, "names": [ - "DC0_H0_VM0", - "DC0_H0_VM1" + "ha-host_VM0", + "ha-host_VM1" ] - }, - "datastore": { - "count": 1, - "names": [ - "LocalDS_0" - ] - }, - "network_count": 1, - "network_names": [ - "VM Network" - ], - "name": "DC0_H0", - "status": "green", - "uptime": 1728865 + } } } -} +} \ No newline at end of file diff --git a/metricbeat/module/vsphere/host/host.go b/metricbeat/module/vsphere/host/host.go index 2b7e26de72e2..6248f3e6cba3 100644 --- a/metricbeat/module/vsphere/host/host.go +++ b/metricbeat/module/vsphere/host/host.go @@ -100,7 +100,7 @@ func (m *HostMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Errorf("error trying to log out from vSphere: %w", err) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -116,7 +116,7 @@ func (m *HostMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Errorf("error trying to destroy view from vSphere: %w", err) + m.Logger().Errorf("error trying to destroy view from vSphere: %v", err) } }() @@ -154,40 +154,12 @@ func (m *HostMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error default: assetNames, err := getAssetNames(ctx, pc, &hst[i]) if err != nil { - m.Logger().Errorf("Failed to retrieve object from host %s: %w", hst[i].Name, err) + m.Logger().Errorf("Failed to retrieve object from host %s: %v", hst[i].Name, err) } - spec := types.PerfQuerySpec{ - Entity: hst[i].Reference(), - MetricId: metricIds, - MaxSample: 1, - IntervalId: 20, // right now we are only grabbing real time metrics from the performance manager - } - - // Query performance data - samples, err := perfManager.Query(ctx, []types.PerfQuerySpec{spec}) - if err != nil { - m.Logger().Errorf("Failed to query performance data from host %s: %v", hst[i].Name, err) - continue - } - - if len(samples) == 0 { - m.Logger().Debug("No samples returned from performance manager") - continue - } - - results, err := perfManager.ToMetricSeries(ctx, samples) + metricMap, err := m.getPerfMetrics(ctx, perfManager, hst[i], metricIds) if err != nil { - m.Logger().Errorf("Failed to convert performance data to metric series for host %s: %v", hst[i].Name, err) - } - - metricMap := make(map[string]interface{}) - for _, result := range results[0].Value { - if len(result.Value) > 0 { - metricMap[result.Name] = result.Value[0] - continue - } - m.Logger().Debugf("For host %s,Metric %v: No result found", hst[i].Name, result.Name) + m.Logger().Errorf("Failed to retrieve performance metrics from host %s: %v", hst[i].Name, err) } reporter.Event(mb.Event{ @@ -240,3 +212,52 @@ func getAssetNames(ctx context.Context, pc *property.Collector, hs *mo.HostSyste outputVmNames: outputVmNames, }, nil } + +func (m *HostMetricSet) getPerfMetrics(ctx context.Context, perfManager *performance.Manager, hst mo.HostSystem, metricIds []types.PerfMetricId) (metricMap map[string]interface{}, err error) { + metricMap = make(map[string]interface{}) + + period := m.Module().Config().Period + refreshRate := int32(period.Seconds()) + + spec := types.PerfQuerySpec{ + Entity: hst.Reference(), + MetricId: metricIds, + MaxSample: 1, + IntervalId: refreshRate, + } + + // Query performance data + samples, err := perfManager.Query(ctx, []types.PerfQuerySpec{spec}) + if err != nil { + if strings.Contains(err.Error(), "ServerFaultCode: A specified parameter was not correct: querySpec.interval") { + return metricMap, fmt.Errorf("failed to query performance data: use one of the system's supported interval. consider adjusting period: %w", err) + } + + return metricMap, fmt.Errorf("failed to query performance data: %w", err) + } + + if len(samples) == 0 { + m.Logger().Debug("No samples returned from performance manager") + return metricMap, nil + } + + results, err := perfManager.ToMetricSeries(ctx, samples) + if err != nil { + return metricMap, fmt.Errorf("failed to convert performance data to metric series: %w", err) + } + + if len(results) == 0 { + m.Logger().Debug("No results returned from metric series conversion") + return metricMap, nil + } + + for _, result := range results[0].Value { + if len(result.Value) > 0 { + metricMap[result.Name] = result.Value[0] + continue + } + m.Logger().Debugf("For host %s, Metric %s: No result found", hst.Name, result.Name) + } + + return metricMap, nil +} diff --git a/metricbeat/module/vsphere/host/host_test.go b/metricbeat/module/vsphere/host/host_test.go index 12692702b7a5..5a69e0e546fe 100644 --- a/metricbeat/module/vsphere/host/host_test.go +++ b/metricbeat/module/vsphere/host/host_test.go @@ -19,6 +19,7 @@ package host import ( "testing" + "time" mbtest "github.com/elastic/beats/v7/metricbeat/mb/testing" "github.com/elastic/elastic-agent-libs/mapstr" @@ -191,5 +192,6 @@ func getConfig(ts *simulator.Server) map[string]interface{} { "username": "user", "password": "pass", "insecure": true, + "period": time.Second * 20, } } diff --git a/metricbeat/module/vsphere/network/network.go b/metricbeat/module/vsphere/network/network.go index 1adbf8f5d33d..34e690009cda 100644 --- a/metricbeat/module/vsphere/network/network.go +++ b/metricbeat/module/vsphere/network/network.go @@ -79,7 +79,7 @@ func (m *NetworkMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) er defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Errorf("error trying to logout from vSphere: %w", err) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -92,7 +92,7 @@ func (m *NetworkMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) er defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Errorf("error trying to destroy view from vSphere: %w", err) + m.Logger().Errorf("error trying to destroy view from vSphere: %v", err) } }() diff --git a/metricbeat/module/vsphere/resourcepool/resourcepool.go b/metricbeat/module/vsphere/resourcepool/resourcepool.go index ed54b6907fde..1c83e3d890d7 100644 --- a/metricbeat/module/vsphere/resourcepool/resourcepool.go +++ b/metricbeat/module/vsphere/resourcepool/resourcepool.go @@ -80,7 +80,7 @@ func (m *ResourcePoolMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Errorf("error trying to log out from vSphere: %w", err) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -96,7 +96,7 @@ func (m *ResourcePoolMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Errorf("error trying to destroy view from vSphere: %w", err) + m.Logger().Errorf("error trying to destroy view from vSphere: %v", err) } }() @@ -115,7 +115,7 @@ func (m *ResourcePoolMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV default: assetNames, err := getAssetNames(ctx, pc, &rps[i]) if err != nil { - m.Logger().Errorf("Failed to retrieve object from resource pool %s: %w", rps[i].Name, err) + m.Logger().Errorf("Failed to retrieve object from resource pool %s: %v", rps[i].Name, err) } reporter.Event(mb.Event{ diff --git a/metricbeat/module/vsphere/test_vsphere.py b/metricbeat/module/vsphere/test_vsphere.py index edc06a16c9fe..acf795b79ddf 100644 --- a/metricbeat/module/vsphere/test_vsphere.py +++ b/metricbeat/module/vsphere/test_vsphere.py @@ -24,7 +24,7 @@ def test_datastore(self): "name": "vsphere", "metricsets": ["datastore"], "hosts": self.get_hosts(), - "period": "5s", + "period": "20s", "username": "user", "password": "pass", "extras": { @@ -55,7 +55,7 @@ def test_host(self): "name": "vsphere", "metricsets": ["host"], "hosts": self.get_hosts(), - "period": "5s", + "period": "20s", "username": "user", "password": "pass", "extras": { diff --git a/metricbeat/module/vsphere/virtualmachine/virtualmachine.go b/metricbeat/module/vsphere/virtualmachine/virtualmachine.go index e704061f8597..bcc42b516538 100644 --- a/metricbeat/module/vsphere/virtualmachine/virtualmachine.go +++ b/metricbeat/module/vsphere/virtualmachine/virtualmachine.go @@ -104,7 +104,7 @@ func (m *MetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error { defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Debugf("Error logging out from vsphere: %v", err) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -130,7 +130,7 @@ func (m *MetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error { defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Debug("Error destroying view from vsphere %w", err) + m.Logger().Debugf("Error destroying view from vsphere %v", err) } }() diff --git a/metricbeat/modules.d/vsphere.yml.disabled b/metricbeat/modules.d/vsphere.yml.disabled index 717ce1326d50..b5685cee5e10 100644 --- a/metricbeat/modules.d/vsphere.yml.disabled +++ b/metricbeat/modules.d/vsphere.yml.disabled @@ -10,7 +10,17 @@ # - network # - resourcepool # - virtualmachine - # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds. + + # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds by default. + # Supported Periods: + # The Datastore and Host metricsets support performance data collection using the vSphere performance API. + # Since the performance API has usage restrictions based on data collection intervals, + # users should ensure that the period is configured optimally to receive real-time data. + # users can still collect summary metrics if performance metrics are not supported for the configured instance. + # This configuration can be determined based on the Data Collection Intervals and Data Collection Levels. + # Reference Links: + # Data Collection Intervals: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html + # Data Collection Levels: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html period: 20s hosts: ["https://localhost/sdk"] @@ -18,5 +28,5 @@ password: "password" # If insecure is true, don't verify the server's certificate chain insecure: false - # Get custom fields when using virtualmachine metric set. Default false. + # Get custom fields when using virtualmachine metricset. Default false. # get_custom_fields: false diff --git a/x-pack/metricbeat/metricbeat.reference.yml b/x-pack/metricbeat/metricbeat.reference.yml index f146f0f0f135..240acb2cfd6a 100644 --- a/x-pack/metricbeat/metricbeat.reference.yml +++ b/x-pack/metricbeat/metricbeat.reference.yml @@ -1617,7 +1617,17 @@ metricbeat.modules: - module: vsphere enabled: true metricsets: ["cluster", "datastore", "datastorecluster", "host", "network", "resourcepool", "virtualmachine"] - # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds. + + # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds by default. + # Supported Periods: + # The Datastore and Host metricsets support performance data collection using the vSphere performance API. + # Since the performance API has usage restrictions based on data collection intervals, + # users should ensure that the period is configured optimally to receive real-time data. + # users can still collect summary metrics if performance metrics are not supported for the configured instance. + # This configuration can be determined based on the Data Collection Intervals and Data Collection Levels. + # Reference Links: + # Data Collection Intervals: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html + # Data Collection Levels: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html period: 20s hosts: ["https://localhost/sdk"] @@ -1625,7 +1635,7 @@ metricbeat.modules: password: "password" # If insecure is true, don't verify the server's certificate chain insecure: false - # Get custom fields when using virtualmachine metric set. Default false. + # Get custom fields when using virtualmachine metricset. Default false. # get_custom_fields: false #------------------------------- Windows Module -------------------------------