Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions receiver/dockerstatsreceiver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,18 @@ with detailed sample configurations [here](./testdata/config.yaml).

[alpha]: https://github.com/open-telemetry/opentelemetry-collector#alpha
[contrib]: https://github.com/open-telemetry/opentelemetry-collector-releases/tree/main/distributions/otelcol-contrib


## Feature Gates

See the [Collector feature gates](https://github.com/open-telemetry/opentelemetry-collector/blob/main/service/featuregate/README.md#collector-feature-gates) for an overview of feature gates in the collector.

**ALPHA**: `receiver.dockerstats.useScraperV2`

The feature gate `receiver.dockerstatsd.useScraperV2` once enabled allows collection of selective metrics that is described in [documentation.md](./documentation.md). When the feature gate is disabled, the metrics settings are mostly ignored and not configurable with minor variation in metric name and attributes.

This is considered a breaking change for existing users of this receiver, and it is recommended to migrate to the new implementation when possible. Any new users planning to adopt this receiver should enable this feature gate to avoid having to migrate any visualisations or alerts.

This feature gate will eventually be enabled by default, and eventually the old implementation will be removed. It aims
Comment thread
jamesmoessis marked this conversation as resolved.
to give users time to migrate to the new implementation. The target release for this featuregate to be enabled by default
is 0.60.0.
104 changes: 52 additions & 52 deletions receiver/dockerstatsreceiver/documentation.md

Large diffs are not rendered by default.

21 changes: 18 additions & 3 deletions receiver/dockerstatsreceiver/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,25 @@ import (
"go.opentelemetry.io/collector/config"
"go.opentelemetry.io/collector/consumer"
"go.opentelemetry.io/collector/receiver/scraperhelper"
"go.opentelemetry.io/collector/service/featuregate"

"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/dockerstatsreceiver/internal/metadata"
)

const (
typeStr = "docker_stats"
stability = component.StabilityLevelAlpha
typeStr = "docker_stats"
stability = component.StabilityLevelAlpha
useScraperV2ID = "receiver.dockerstats.useScraperV2"
)

func init() {
Comment thread
jamesmoessis marked this conversation as resolved.
featuregate.GetRegistry().MustRegister(featuregate.Gate{
ID: useScraperV2ID,
Description: "When enabled, the receiver will use the function ScrapeV2 to collect metrics. This allows each metric to be turned off/on via config. The new metrics are slightly different to the legacy implementation.",
Enabled: false,
})
}

func NewFactory() component.ReceiverFactory {
return component.NewReceiverFactory(
typeStr,
Expand Down Expand Up @@ -59,7 +69,12 @@ func createMetricsReceiver(
dockerConfig := config.(*Config)
dsr := newReceiver(params, dockerConfig)

scrp, err := scraperhelper.NewScraper(typeStr, dsr.scrape, scraperhelper.WithStart(dsr.start))
scrapeFunc := dsr.scrape
if featuregate.GetRegistry().IsEnabled(useScraperV2ID) {
scrapeFunc = dsr.scrapeV2
}

scrp, err := scraperhelper.NewScraper(typeStr, scrapeFunc, scraperhelper.WithStart(dsr.start))
if err != nil {
return nil, err
}
Expand Down

Large diffs are not rendered by default.

44 changes: 22 additions & 22 deletions receiver/dockerstatsreceiver/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,15 @@ metrics:
container.cpu.throttling_data.periods:
enabled: true
description: "Number of periods with throttling active."
unit: "1"
unit: "{periods}"
sum:
value_type: int
monotonic: true
aggregation: cumulative
container.cpu.throttling_data.throttled_periods:
enabled: true
description: "Number of periods when the container hits its throttling limit."
unit: "1"
unit: "{periods}"
sum:
value_type: int
monotonic: true
Expand Down Expand Up @@ -179,7 +179,7 @@ metrics:
enabled: true
description: "Number of pages read from disk by the cgroup."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt)."
unit: "1"
unit: "{operations}"
sum:
value_type: int
aggregation: cumulative
Expand All @@ -188,7 +188,7 @@ metrics:
enabled: true
description: "Number of pages written to disk by the cgroup."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt)."
unit: "1"
unit: "{operations}"
sum:
value_type: int
aggregation: cumulative
Expand All @@ -202,15 +202,15 @@ metrics:
container.memory.pgfault:
enabled: true
description: "Indicate the number of times that a process of the cgroup triggered a page fault."
unit: "1"
unit: "{faults}"
sum:
value_type: int
aggregation: cumulative
monotonic: true
container.memory.pgmajfault:
enabled: true
description: "Indicate the number of times that a process of the cgroup triggered a major fault."
unit: "1"
unit: "{faults}"
sum:
value_type: int
aggregation: cumulative
Expand Down Expand Up @@ -298,15 +298,15 @@ metrics:
container.memory.total_pgpgin:
enabled: true
description: "Number of pages read from disk by the cgroup and descendant groups."
unit: "1"
unit: "{operations}"
sum:
value_type: int
aggregation: cumulative
monotonic: true
container.memory.total_pgpgout:
enabled: true
description: "Number of pages written to disk by the cgroup and descendant groups."
unit: "1"
unit: "{operations}"
sum:
value_type: int
aggregation: cumulative
Expand All @@ -320,15 +320,15 @@ metrics:
container.memory.total_pgfault:
enabled: true
description: "Indicate the number of times that a process of the cgroup (or descendant cgroups) triggered a page fault."
unit: "1"
unit: "{faults}"
sum:
value_type: int
aggregation: cumulative
monotonic: true
container.memory.total_pgmajfault:
enabled: true
description: "Indicate the number of times that a process of the cgroup (or descendant cgroups) triggered a major fault."
unit: "1"
unit: "{faults}"
sum:
value_type: int
aggregation: cumulative
Expand Down Expand Up @@ -372,7 +372,7 @@ metrics:
enabled: true
description: "Number of bios/requests merged into requests belonging to this cgroup and its descendant cgroups."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt)."
unit: "1"
unit: "{operations}"
sum:
value_type: int
monotonic: true
Expand All @@ -390,7 +390,7 @@ metrics:
enabled: true
description: "Number of requests queued up for this cgroup and its descendant cgroups."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt)."
unit: "1"
unit: "{operations}"
sum:
value_type: int
monotonic: true
Expand Down Expand Up @@ -426,7 +426,7 @@ metrics:
enabled: true
description: "Total amount of time in nanoseconds between request dispatch and request completion for the IOs done by this cgroup and descendant cgroups."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt)."
unit: "1" # Preserving legacy incorrect unit for now. Should be nanoseconds eventually.
unit: ns
sum:
value_type: int
monotonic: true
Expand All @@ -444,7 +444,7 @@ metrics:
enabled: true
description: "Number of IOs (bio) issued to the disk by the group and descendant groups."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt)."
unit: "1"
unit: "{operations}"
sum:
value_type: int
monotonic: true
Expand Down Expand Up @@ -480,7 +480,7 @@ metrics:
enabled: true
description: "Total amount of time the IOs for this cgroup (and descendant cgroups) spent waiting in the scheduler queues for service."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt)."
unit: "1" # Should be in ns but preserving legacy mistake for now
unit: ns
sum:
value_type: int
monotonic: true
Expand All @@ -498,7 +498,7 @@ metrics:
enabled: true
description: "Number of sectors transferred to/from disk by the group and descendant groups."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt)."
unit: "1"
unit: "{sectors}"
sum:
value_type: int
monotonic: true
Expand Down Expand Up @@ -536,7 +536,7 @@ metrics:
container.network.io.usage.rx_dropped:
enabled: true
description: "Incoming packets dropped."
unit: "1"
unit: "{packets}"
sum:
value_type: int
monotonic: true
Expand All @@ -546,7 +546,7 @@ metrics:
container.network.io.usage.tx_dropped:
enabled: true
description: "Outgoing packets dropped."
unit: "1"
unit: "{packets}"
sum:
value_type: int
monotonic: true
Expand All @@ -556,7 +556,7 @@ metrics:
container.network.io.usage.rx_errors:
enabled: true
description: "Received errors."
unit: "1"
unit: "{errors}"
sum:
value_type: int
monotonic: true
Expand All @@ -566,7 +566,7 @@ metrics:
container.network.io.usage.tx_errors:
enabled: true
description: "Sent errors."
unit: "1"
unit: "{errors}"
sum:
value_type: int
monotonic: true
Expand All @@ -576,7 +576,7 @@ metrics:
container.network.io.usage.rx_packets:
enabled: true
description: "Packets received."
unit: "1"
unit: "{packets}"
sum:
value_type: int
monotonic: true
Expand All @@ -586,7 +586,7 @@ metrics:
container.network.io.usage.tx_packets:
enabled: true
description: "Packets sent."
unit: "1"
unit: "{packets}"
sum:
value_type: int
monotonic: true
Expand Down
46 changes: 4 additions & 42 deletions receiver/dockerstatsreceiver/receiver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,12 @@ import (
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/collector/component/componenttest"
"go.opentelemetry.io/collector/pdata/pmetric"
"go.opentelemetry.io/collector/receiver/scraperhelper"

"github.com/open-telemetry/opentelemetry-collector-contrib/internal/scrapertest"
Expand Down Expand Up @@ -77,7 +75,7 @@ func TestErrorsInStart(t *testing.T) {
assert.Contains(t, err.Error(), "context deadline exceeded")
}

func TestScrapes(t *testing.T) {
func TestScrapeV2(t *testing.T) {
containerIDs := []string{
"10b703fb312b25e8368ab5a3bce3a1610d1cee5d71a94920f1a7adbc5b0cb326",
"89d28931fd8b95c8806343a532e9e76bf0a0b76ee8f19452b8f75dee1ebcebb7",
Expand All @@ -103,39 +101,16 @@ func TestScrapes(t *testing.T) {

testCases := []struct {
desc string
scrape func(*receiver) (pmetric.Metrics, error)
expectedMetricsFile string
mockDockerEngine *httptest.Server
}{
{
desc: "scrapeV1_single_container",
scrape: func(rcv *receiver) (pmetric.Metrics, error) {
return rcv.scrape(context.Background())
},
desc: "scrapeV2_single_container",
expectedMetricsFile: filepath.Join(mockFolder, "single_container", "expected_metrics.json"),
mockDockerEngine: singleContainerEngineMock,
},
{
desc: "scrapeV2_single_container",
scrape: func(rcv *receiver) (pmetric.Metrics, error) {
return rcv.scrapeV2(context.Background())
},
expectedMetricsFile: filepath.Join(mockFolder, "single_container", "expected_metrics.json"),
mockDockerEngine: singleContainerEngineMock,
},
{
desc: "scrapeV1_two_containers",
scrape: func(rcv *receiver) (pmetric.Metrics, error) {
return rcv.scrape(context.Background())
},
expectedMetricsFile: filepath.Join(mockFolder, "two_containers", "expected_metrics.json"),
mockDockerEngine: twoContainerEngineMock,
},
{
desc: "scrapeV2_two_containers",
scrape: func(rcv *receiver) (pmetric.Metrics, error) {
return rcv.scrapeV2(context.Background())
},
desc: "scrapeV2_two_containers",
expectedMetricsFile: filepath.Join(mockFolder, "two_containers", "expected_metrics.json"),
mockDockerEngine: twoContainerEngineMock,
},
Expand All @@ -153,24 +128,11 @@ func TestScrapes(t *testing.T) {
err := receiver.start(context.Background(), componenttest.NewNopHost())
require.NoError(t, err)

actualMetrics, err := tc.scrape(receiver)
actualMetrics, err := receiver.scrapeV2(context.Background())
require.NoError(t, err)

expectedMetrics, err := golden.ReadMetrics(tc.expectedMetricsFile)

if !strings.HasPrefix(tc.desc, "scrapeV1") {
// Unset various fields for comparison purposes (non-mdatagen implementation doesn't have these set)
for i := 0; i < actualMetrics.ResourceMetrics().Len(); i++ {
for j := 0; j < actualMetrics.ResourceMetrics().At(i).ScopeMetrics().Len(); j++ {
sm := actualMetrics.ResourceMetrics().At(i).ScopeMetrics().At(j)
sm.Scope().SetName("")
sm.Scope().SetVersion("")
for k := 0; k < sm.Metrics().Len(); k++ {
sm.Metrics().At(k).SetDescription("")
}
}
}
}
assert.NoError(t, err)
assert.NoError(t, scrapertest.CompareMetrics(expectedMetrics, actualMetrics))
})
Expand Down
Loading