Skip to content

Commit e39e36f

Browse files
authored
PMM-12522 collect chunks metrics in low resolution (#849)
* PMM-12522 add component to logs. * PMM-12522 log shards DSNs. * PMM-12522 New chunks collector. * PMM-12522 Dropped new chunks collector and moved data to shards collector. * PMM-12522 Shards collector runs only for mongos instance. * PMM-12522 Fix tests. * PMM-12522 Fix linter. * PMM-12522 Cleanup. * PMM-12522 Cleanup. * PMM-12522 Cleanup.
1 parent 6ba3e6b commit e39e36f

20 files changed

+127
-108
lines changed

docker-compose.yml

+6-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ services:
55
image: ${TEST_MONGODB_IMAGE:-mongo:4.2}
66
ports:
77
- "${TEST_MONGODB_S1_PRIMARY_PORT:-17001}:27017"
8-
command: mongod --replSet rs1 --shardsvr --port 27017 --oplogSize 16
8+
command: mongod --replSet rs1 --shardsvr --port 27017 --oplogSize 16 --bind_ip 0.0.0.0
99
networks:
1010
- rs1
1111
depends_on:
@@ -18,7 +18,7 @@ services:
1818
image: ${TEST_MONGODB_IMAGE:-mongo:4.2}
1919
ports:
2020
- "${TEST_MONGODB_S1_SECONDARY1_PORT:-17002}:27017"
21-
command: mongod --replSet rs1 --shardsvr --port 27017 --oplogSize 16
21+
command: mongod --replSet rs1 --shardsvr --port 27017 --oplogSize 16 --bind_ip 0.0.0.0
2222
networks:
2323
- rs1
2424

@@ -27,7 +27,7 @@ services:
2727
image: ${TEST_MONGODB_IMAGE:-mongo:4.2}
2828
ports:
2929
- "${TEST_MONGODB_S1_SECONDARY2_PORT:-17003}:27017"
30-
command: mongod --replSet rs1 --shardsvr --port 27017 --oplogSize 16
30+
command: mongod --replSet rs1 --shardsvr --port 27017 --oplogSize 16 --bind_ip 0.0.0.0
3131
networks:
3232
- rs1
3333

@@ -192,6 +192,9 @@ services:
192192
- "mongos"
193193
networks:
194194
- mongo-shard
195+
- rs1
196+
- rs2
197+
- cnf-serv
195198
volumes:
196199
- ./docker/scripts:/scripts
197200
environment:

docker/scripts/init-shard.sh

+4
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ done
3737
echo "Started.."
3838

3939
echo init-shard.sh time now: `date +"%T" `
40+
echo "Configuring sharding.."
41+
echo "${RS1}/${mongodb11}:${PORT1},${mongodb12}:${PORT2},${mongodb13}:${PORT3}"
42+
echo "${RS2}/${mongodb21}:${PORT1},${mongodb22}:${PORT2},${mongodb23}:${PORT3}"
43+
4044
${MONGODB_CLIENT} --host ${mongodb1}:${port} <<EOF
4145
sh.addShard( "${RS1}/${mongodb11}:${PORT1},${mongodb12}:${PORT2},${mongodb13}:${PORT3}" );
4246
sh.addShard( "${RS2}/${mongodb21}:${PORT1},${mongodb22}:${PORT2},${mongodb23}:${PORT3}" );

exporter/base_collector.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@ import (
2626

2727
type baseCollector struct {
2828
client *mongo.Client
29-
logger *logrus.Logger
29+
logger *logrus.Entry
3030

3131
lock sync.Mutex
3232
metricsCache []prometheus.Metric
3333
}
3434

3535
// newBaseCollector creates a skeletal collector, which is used to create other collectors.
36-
func newBaseCollector(client *mongo.Client, logger *logrus.Logger) *baseCollector {
36+
func newBaseCollector(client *mongo.Client, logger *logrus.Entry) *baseCollector {
3737
return &baseCollector{
3838
client: client,
3939
logger: logger,

exporter/collstats_collector.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ type collstatsCollector struct {
4040
func newCollectionStatsCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible, discovery bool, topology labelsGetter, collections []string) *collstatsCollector {
4141
return &collstatsCollector{
4242
ctx: ctx,
43-
base: newBaseCollector(client, logger),
43+
base: newBaseCollector(client, logger.WithFields(logrus.Fields{"collector": "collstats"})),
4444

4545
compatibleMode: compatible,
4646
discoveringMode: discovery,
@@ -79,7 +79,6 @@ func (d *collstatsCollector) collect(ch chan<- prometheus.Metric) {
7979
collections, err = checkNamespacesForViews(d.ctx, client, d.collections)
8080
if err != nil {
8181
logger.Errorf("cannot list collections: %s", err.Error())
82-
8382
return
8483
}
8584
}

exporter/currentop_collector.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func newCurrentopCollector(ctx context.Context, client *mongo.Client, logger *lo
4444
) *currentopCollector {
4545
return &currentopCollector{
4646
ctx: ctx,
47-
base: newBaseCollector(client, logger),
47+
base: newBaseCollector(client, logger.WithFields(logrus.Fields{"collector": "currentop"})),
4848
compatibleMode: compatible,
4949
topologyInfo: topology,
5050
currentopslowtime: currentOpSlowTime,

exporter/dbstats_collector.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ type dbstatsCollector struct {
4040
func newDBStatsCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible bool, topology labelsGetter, databaseRegex []string, freeStorage bool) *dbstatsCollector {
4141
return &dbstatsCollector{
4242
ctx: ctx,
43-
base: newBaseCollector(client, logger),
43+
base: newBaseCollector(client, logger.WithFields(logrus.Fields{"collector": "dbstats"})),
4444

4545
compatibleMode: compatible,
4646
topologyInfo: topology,

exporter/debug.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ import (
2323
"github.com/sirupsen/logrus"
2424
)
2525

26-
func debugResult(log *logrus.Logger, m interface{}) {
27-
if !log.IsLevelEnabled(logrus.DebugLevel) {
26+
func debugResult(log *logrus.Entry, m interface{}) {
27+
if !log.Logger.IsLevelEnabled(logrus.DebugLevel) {
2828
return
2929
}
3030

exporter/debug_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func TestDebug(t *testing.T) {
5555
}
5656
}` + "\n"
5757

58-
debugResult(log, m)
58+
debugResult(log.WithField("component", "test"), m)
5959
assert.NoError(t, w.Close())
6060
out, _ := io.ReadAll(r)
6161

exporter/diagnostic_data_collector.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ type diagnosticDataCollector struct {
4343
func newDiagnosticDataCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible bool, topology labelsGetter) *diagnosticDataCollector {
4444
return &diagnosticDataCollector{
4545
ctx: ctx,
46-
base: newBaseCollector(client, logger),
46+
base: newBaseCollector(client, logger.WithFields(logrus.Fields{"collector": "diagnostic_data"})),
4747

4848
compatibleMode: compatible,
4949
topologyInfo: topology,
@@ -102,6 +102,7 @@ func (d *diagnosticDataCollector) collect(ch chan<- prometheus.Metric) {
102102
}
103103

104104
if d.compatibleMode {
105+
logger.Debug("running special metrics for compatibility mode")
105106
metrics = append(metrics, specialMetrics(d.ctx, client, m, logger)...)
106107

107108
if cem, err := cacheEvictedTotalMetric(m); err == nil {
@@ -114,6 +115,7 @@ func (d *diagnosticDataCollector) collect(ch chan<- prometheus.Metric) {
114115
"component": "diagnosticDataCollector",
115116
}).Errorf("Cannot get node type to check if this is a mongos: %s", err)
116117
} else if nodeType == typeMongos {
118+
logger.Debug("running special metrics for mongos")
117119
metrics = append(metrics, mongosMetrics(d.ctx, client, logger)...)
118120
}
119121
}

exporter/exporter.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ func (e *Exporter) makeRegistry(ctx context.Context, client *mongo.Client, topol
229229
registry.MustRegister(rsgsc)
230230
}
231231

232-
if e.opts.EnableShards && requestOpts.EnableShards {
232+
if e.opts.EnableShards && nodeType == typeMongos && requestOpts.EnableShards {
233233
sc := newShardsCollector(ctx, client, e.opts.Logger, e.opts.CompatibleMode)
234234
registry.MustRegister(sc)
235235
}

exporter/general_collector.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ type generalCollector struct {
3535
func newGeneralCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger) *generalCollector {
3636
return &generalCollector{
3737
ctx: ctx,
38-
base: newBaseCollector(client, logger),
38+
base: newBaseCollector(client, logger.WithFields(logrus.Fields{"collector": "general"})),
3939
}
4040
}
4141

@@ -52,7 +52,7 @@ func (d *generalCollector) collect(ch chan<- prometheus.Metric) {
5252
ch <- mongodbUpMetric(d.ctx, d.base.client, d.base.logger)
5353
}
5454

55-
func mongodbUpMetric(ctx context.Context, client *mongo.Client, log *logrus.Logger) prometheus.Metric {
55+
func mongodbUpMetric(ctx context.Context, client *mongo.Client, log *logrus.Entry) prometheus.Metric { //nolint:ireturn
5656
var value float64
5757

5858
if client != nil {

exporter/indexstats_collector.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ type indexstatsCollector struct {
4141
func newIndexStatsCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, discovery, overrideDescendingIndex bool, topology labelsGetter, collections []string) *indexstatsCollector {
4242
return &indexstatsCollector{
4343
ctx: ctx,
44-
base: newBaseCollector(client, logger),
44+
base: newBaseCollector(client, logger.WithFields(logrus.Fields{"collector": "indexstats"})),
4545

4646
discoveringMode: discovery,
4747
topologyInfo: topology,

exporter/metrics.go

-1
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,6 @@ func makeMetrics(prefix string, m bson.M, labels map[string]string, compatibleMo
253253
case map[string]interface{}:
254254
res = append(res, makeMetrics(prefix+k, v, labels, compatibleMode)...)
255255
case primitive.A:
256-
v = []interface{}(v)
257256
res = append(res, processSlice(prefix, k, v, labels, compatibleMode)...)
258257
case []interface{}:
259258
continue

exporter/profile_status_collector.go

+3-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"context"
2020
"time"
2121

22-
"github.com/pkg/errors"
2322
"github.com/prometheus/client_golang/prometheus"
2423
"github.com/sirupsen/logrus"
2524
"go.mongodb.org/mongo-driver/bson"
@@ -41,7 +40,7 @@ func newProfileCollector(ctx context.Context, client *mongo.Client, logger *logr
4140
) *profileCollector {
4241
return &profileCollector{
4342
ctx: ctx,
44-
base: newBaseCollector(client, logger),
43+
base: newBaseCollector(client, logger.WithFields(logrus.Fields{"collector": "profile"})),
4544
compatibleMode: compatible,
4645
topologyInfo: topology,
4746
profiletimets: profileTimeTS,
@@ -65,7 +64,7 @@ func (d *profileCollector) collect(ch chan<- prometheus.Metric) {
6564

6665
databases, err := databases(d.ctx, client, nil, nil)
6766
if err != nil {
68-
errors.Wrap(err, "cannot get the database names list")
67+
logger.Warnf("cannot get databases: %s", err)
6968
return
7069
}
7170

@@ -79,7 +78,7 @@ func (d *profileCollector) collect(ch chan<- prometheus.Metric) {
7978
for _, db := range databases {
8079
res, err := client.Database(db).Collection("system.profile").CountDocuments(d.ctx, cmd)
8180
if err != nil {
82-
errors.Wrapf(err, "cannot read system.profile")
81+
logger.Warnf("cannot get profile count for database %s: %s", db, err)
8382
break
8483
}
8584
labels["database"] = db

exporter/replset_status_collector.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ type replSetGetStatusCollector struct {
4141
func newReplicationSetStatusCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible bool, topology labelsGetter) *replSetGetStatusCollector {
4242
return &replSetGetStatusCollector{
4343
ctx: ctx,
44-
base: newBaseCollector(client, logger),
44+
base: newBaseCollector(client, logger.WithFields(logrus.Fields{"collector": "replset_status"})),
4545

4646
compatibleMode: compatible,
4747
topologyInfo: topology,

exporter/shards_collector.go

+77-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"fmt"
2121
"strings"
2222

23+
"github.com/pkg/errors"
2324
"github.com/prometheus/client_golang/prometheus"
2425
"github.com/sirupsen/logrus"
2526
"go.mongodb.org/mongo-driver/bson"
@@ -37,7 +38,7 @@ type shardsCollector struct {
3738
func newShardsCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatibleMode bool) *shardsCollector {
3839
return &shardsCollector{
3940
ctx: ctx,
40-
base: newBaseCollector(client, logger),
41+
base: newBaseCollector(client, logger.WithFields(logrus.Fields{"collector": "shards"})),
4142
compatible: compatibleMode,
4243
}
4344
}
@@ -56,6 +57,26 @@ func (d *shardsCollector) collect(ch chan<- prometheus.Metric) {
5657
client := d.base.client
5758
logger := d.base.logger
5859
prefix := "shards collection chunks"
60+
ctx := d.ctx
61+
62+
metrics := make([]prometheus.Metric, 0)
63+
metric, err := chunksTotal(ctx, client)
64+
if err != nil {
65+
logger.Warnf("cannot create metric for chunks total: %s", err)
66+
} else {
67+
metrics = append(metrics, metric)
68+
}
69+
70+
ms, err := chunksTotalPerShard(ctx, client)
71+
if err != nil {
72+
logger.Warnf("cannot create metric for chunks total per shard: %s", err)
73+
} else {
74+
metrics = append(metrics, ms...)
75+
}
76+
77+
for _, metric := range metrics {
78+
ch <- metric
79+
}
5980

6081
databaseNames, err := client.ListDatabaseNames(d.ctx, bson.D{})
6182
if err != nil {
@@ -186,4 +207,59 @@ func (d *shardsCollector) getChunksForCollection(row primitive.M) []bson.M {
186207
return chunks
187208
}
188209

210+
func chunksTotal(ctx context.Context, client *mongo.Client) (prometheus.Metric, error) { //nolint:ireturn
211+
n, err := client.Database("config").Collection("chunks").CountDocuments(ctx, bson.M{})
212+
if err != nil {
213+
return nil, errors.Wrap(err, "cannot get total number of chunks")
214+
}
215+
216+
name := "mongodb_mongos_sharding_chunks_total"
217+
help := "Total number of chunks"
218+
219+
d := prometheus.NewDesc(name, help, nil, nil)
220+
return prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(n))
221+
}
222+
223+
func chunksTotalPerShard(ctx context.Context, client *mongo.Client) ([]prometheus.Metric, error) {
224+
aggregation := bson.D{
225+
{Key: "$group", Value: bson.M{"_id": "$shard", "count": bson.M{"$sum": 1}}},
226+
}
227+
228+
cursor, err := client.Database("config").Collection("chunks").Aggregate(ctx, mongo.Pipeline{aggregation})
229+
if err != nil {
230+
return nil, errors.Wrap(err, "cannot get $shards cursor for collection config.chunks")
231+
}
232+
233+
var shards []bson.M
234+
if err = cursor.All(ctx, &shards); err != nil {
235+
return nil, errors.Wrap(err, "cannot get $shards for collection config.chunks")
236+
}
237+
238+
metrics := make([]prometheus.Metric, 0, len(shards))
239+
240+
for _, shard := range shards {
241+
help := "Total number of chunks per shard"
242+
id, ok := shard["_id"].(string)
243+
if !ok {
244+
continue
245+
}
246+
labels := map[string]string{"shard": id}
247+
248+
d := prometheus.NewDesc("mongodb_mongos_sharding_shard_chunks_total", help, nil, labels)
249+
val, ok := shard["count"].(int32)
250+
if !ok {
251+
continue
252+
}
253+
254+
metric, err := prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(val))
255+
if err != nil {
256+
continue
257+
}
258+
259+
metrics = append(metrics, metric)
260+
}
261+
262+
return metrics, nil
263+
}
264+
189265
var _ prometheus.Collector = (*shardsCollector)(nil)

exporter/top_collector.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func newTopCollector(ctx context.Context, client *mongo.Client, logger *logrus.L
4141
) *topCollector {
4242
return &topCollector{
4343
ctx: ctx,
44-
base: newBaseCollector(client, logger),
44+
base: newBaseCollector(client, logger.WithFields(logrus.Fields{"collector": "top"})),
4545
compatibleMode: compatible,
4646
topologyInfo: topology,
4747
}

exporter/topology_info.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ type topologyInfo struct {
5757
// by a new connector, able to reconnect if needed. In case of reconnection, we should
5858
// call loadLabels to refresh the labels because they might have changed
5959
client *mongo.Client
60-
logger *logrus.Logger
60+
logger *logrus.Entry
6161
rw sync.RWMutex
6262
labels map[string]string
6363
}
@@ -68,7 +68,7 @@ var ErrCannotGetTopologyLabels = fmt.Errorf("cannot get topology labels")
6868
func newTopologyInfo(ctx context.Context, client *mongo.Client, logger *logrus.Logger) *topologyInfo {
6969
ti := &topologyInfo{
7070
client: client,
71-
logger: logger,
71+
logger: logger.WithFields(logrus.Fields{"component": "topology_info"}),
7272
labels: make(map[string]string),
7373
rw: sync.RWMutex{},
7474
}

0 commit comments

Comments
 (0)