Skip to content

Commit 28e85f3

Browse files
idoqoBupycHuk
andauthored
PMM-12989 Reduce error logs from diagnostic data on arbiter nodes (#820)
* PMM-12989 return early when checking diagnostic data for arbiter nodes * fix lint * bump test timeout * skip diagnostic data if we can't run command * remove unneeded test * refactor tests * drop timeout * revert timeout' * changeset for tests * change keyfile permission Also, update actions workflow to output docker logs on failure * change file permission before running * bump permissions * add entrypoint for keyfile * use custom dockerfile for authenticated instances * use only default cluster as shard server * drop logs * fix linter errors * split metrics collection * show warning earlier when running on arbiter * show warning earlier when running on arbiter * Update diagnostic_data_collector.go * Update v1_compatibility.go --------- Co-authored-by: Nurlan Moldomurov <[email protected]>
1 parent e39e36f commit 28e85f3

9 files changed

+217
-71
lines changed

CONTRIBUTING.md

+9-3
Original file line numberDiff line numberDiff line change
@@ -91,18 +91,24 @@ It will install `goimports`, `goreleaser`, `golangci-lint` and `reviewdog`.
9191

9292
The testing sandbox starts `n` MongoDB instances as follows:
9393

94-
- 3 Instances for shard 1 at ports 17001, 17002, 17003
95-
- 3 instances for shard 2 at ports 17004, 17005, 17006
94+
- 3 Instances for shard 1 at ports 17001, 17002, 17003 (with no authentication)
95+
- 3 instances for shard 2 at ports 17004, 17005, 17006 (with authentication enabled)
9696
- 3 config servers at ports 17007, 17008, 17009
9797
- 1 mongos server at port 17000
9898
- 1 stand alone instance at port 27017
9999

100-
All instances are currently running without user and password so for example, to connect to the **mongos** you can just use:
100+
To connect to the **mongos** on shard 1, you can use:
101101

102102
```
103103
mongo mongodb://127.0.0.1:17001/admin
104104
```
105105

106+
To connect to the **mongos** on shard 2 (with authentication enabled), you can use:
107+
108+
```
109+
mongo mongodb://admin:[email protected]:17001/admin
110+
```
111+
106112
The sandbox can be started using the provided Makefile using: `make test-cluster` and it can be stopped using `make test-cluster-clean`.
107113

108114
### Running tests

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ test-race: env ## Run all tests with race flag.
104104
go test -race -v -timeout 30s ./...
105105

106106
test-cluster: env ## Starts MongoDB test cluster. Use env var TEST_MONGODB_IMAGE to set flavor and version. Example: TEST_MONGODB_IMAGE=mongo:3.6 make test-cluster
107-
docker compose up -d
107+
docker compose up --build -d
108108

109109
test-cluster-clean: env ## Stops MongoDB test cluster.
110110
docker compose down --remove-orphans

docker-compose.yml

+25-8
Original file line numberDiff line numberDiff line change
@@ -63,37 +63,52 @@ services:
6363

6464
mongo-2-2:
6565
container_name: "mongo-2-2"
66-
image: ${TEST_MONGODB_IMAGE:-mongo:4.2}
66+
build:
67+
dockerfile: ./docker/mongodb-auth.dockerfile
68+
args:
69+
TEST_MONGODB_IMAGE: ${TEST_MONGODB_IMAGE}
70+
environment:
71+
- MONGO_INITDB_ROOT_USERNAME=${TEST_MONGODB_USERNAME:-admin}
72+
- MONGO_INITDB_ROOT_PASSWORD=${TEST_MONGODB_PASSWORD:-admin}
6773
ports:
6874
- "${TEST_MONGODB_S2_PRIMARY_PORT:-17004}:27017"
69-
command: mongod --replSet rs2 --shardsvr --port 27017 --oplogSize 16
75+
command: mongod --replSet rs2 --port 27017 --oplogSize 16 --auth --keyFile=/opt/keyfile
7076
networks:
7177
- rs2
7278

7379
mongo-2-3:
7480
container_name: "mongo-2-3"
75-
image: ${TEST_MONGODB_IMAGE:-mongo:4.2}
81+
build:
82+
dockerfile: ./docker/mongodb-auth.dockerfile
83+
args:
84+
TEST_MONGODB_IMAGE: ${TEST_MONGODB_IMAGE}
7685
ports:
7786
- "${TEST_MONGODB_S2_SECONDARY1_PORT:-17005}:27017"
78-
command: mongod --replSet rs2 --shardsvr --port 27017 --oplogSize 16
87+
command: mongod --replSet rs2 --port 27017 --oplogSize 16 --auth --keyFile=/opt/keyfile
7988
networks:
8089
- rs2
8190

8291
mongo-2-1:
8392
container_name: "mongo-2-1"
84-
image: ${TEST_MONGODB_IMAGE:-mongo:4.2}
93+
build:
94+
dockerfile: ./docker/mongodb-auth.dockerfile
95+
args:
96+
TEST_MONGODB_IMAGE: ${TEST_MONGODB_IMAGE}
8597
ports:
8698
- "${TEST_MONGODB_S2_SECONDARY2_PORT:-17006}:27017"
87-
command: mongod --replSet rs2 --shardsvr --port 27017 --oplogSize 16
99+
command: mongod --replSet rs2 --port 27017 --oplogSize 16 --auth --keyFile=/opt/keyfile
88100
networks:
89101
- rs2
90102

91103
mongo-2-arbiter:
92104
container_name: "mongo-2-arbiter"
93-
image: ${TEST_MONGODB_IMAGE:-mongo:4.2}
105+
build:
106+
dockerfile: ./docker/mongodb-auth.dockerfile
107+
args:
108+
TEST_MONGODB_IMAGE: ${TEST_MONGODB_IMAGE}
94109
ports:
95110
- "${TEST_MONGODB_S2_ARBITER:-17012}:27017"
96-
command: mongod --replSet rs1 --shardsvr --port 27017 --oplogSize 16
111+
command: mongod --replSet rs2 --port 27017 --oplogSize 16 --auth --keyFile=/opt/keyfile
97112
networks:
98113
- rs2
99114

@@ -114,6 +129,8 @@ services:
114129
- ARBITER=mongo-2-arbiter
115130
- RS=rs2
116131
- VERSION=${TEST_MONGODB_IMAGE}
132+
- MONGO_INITDB_ROOT_USERNAME=${TEST_MONGODB_USERNAME:-admin}
133+
- MONGO_INITDB_ROOT_PASSWORD=${TEST_MONGODB_PASSWORD:-admin}
117134
entrypoint: [ "/scripts/setup.sh" ]
118135
networks:
119136
- rs2

docker/mongodb-auth.dockerfile

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
ARG TEST_MONGODB_IMAGE=mongo:4.2
2+
FROM ${TEST_MONGODB_IMAGE}
3+
USER root
4+
COPY docker/secret/keyfile /opt/keyfile
5+
RUN chown mongodb /opt/keyfile && chmod 400 /opt/keyfile && mkdir -p /home/mongodb/ && chown mongodb /home/mongodb
6+
USER mongodb

docker/scripts/setup.sh

+9-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ MONGODB_CLIENT="mongosh --quiet"
44
PARSED=(${VERSION//:/ })
55
MONGODB_VERSION=${PARSED[1]}
66
MONGODB_VENDOR=${PARSED[0]}
7+
78
if [ "`echo ${MONGODB_VERSION} | cut -c 1`" = "4" ]; then
89
MONGODB_CLIENT="mongo"
910
fi
@@ -17,6 +18,9 @@ mongodb2=`getent hosts ${MONGO2} | awk '{ print $1 }'`
1718
mongodb3=`getent hosts ${MONGO3} | awk '{ print $1 }'`
1819
arbiter=`getent hosts ${ARBITER} | awk '{ print $1 }'`
1920

21+
username=${MONGO_INITDB_ROOT_USERNAME}
22+
password=${MONGO_INITDB_ROOT_PASSWORD}
23+
2024
port=${PORT:-27017}
2125

2226
echo "Waiting for startup.."
@@ -60,7 +64,11 @@ EOF
6064

6165
function general_servers() {
6266
echo "setup servers on ${MONGO1}(${mongodb1}:${port})"
63-
${MONGODB_CLIENT} --host ${mongodb1}:${port} <<EOF
67+
command="${MONGODB_CLIENT} --host ${mongodb1}:${port}"
68+
if [[ -n "$username" && -n "$password" ]]; then
69+
command="${MONGODB_CLIENT} --host ${mongodb1}:${port} -u ${username} -p ${password}"
70+
fi
71+
${command} <<EOF
6472
var cfg = {
6573
"_id": "${RS}",
6674
"protocolVersion": 1,

docker/secret/keyfile

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
P4CWhUl1qj2EiSM+r3mzAHSATiAMBv8l2rAClMrAqOqyyCzVd2tIFW0nOEbSRvoL
2+
Ap7oZkC7IvjX5ELJnqQbxT00+dv1s8U1UFk3J96xro1x3mLgAmASJ1TrFSKXUkss
3+
bbwZ6QBNONCrTNCqlL2q1umLiYUTPeUH0RlLg2GsH1H66a9dEKZaS5AtJIO1hJ6s
4+
znivgdx2vnSA3bs1coR3nEBeNRsMoHp2Dtn9P6FDNlhssZh+EO9GdoRh16cqRiEM
5+
V2VnS2vjEZPpcMrX3ZGU/JBy/bw5GhGXrIfzQrvuKanUsuAVA3bmh6Fh8Rnu66mA
6+
GETO09R/3cjLxHBngZdL9Zn3+3HgDTQHKtpW0ZfjdiK5Xh4JqGXKtDr68S88qgAb
7+
eYQaeJMBoe0o1JXeKYRba+fXmubO7nGN3IDYWulNtkJLe9bx1mseVqlh9FSg7CuG
8+
++sszE7nzmtAlZlaLKlONMm9+dLjhT9n7cSkoWbSIUHvWIzGKC2yadabt7T21VPc
9+
Cyxdaat28xo93J5qjUetjTLKWQmG4eL/AG2/plT7WrTOjQqYCjfFU3XmpVBgtaAr
10+
wkJB8sOUbjcuVJok235Eu/8SxHJKQb0dl8ALwbwLG7CPZMrZ5YF6eJgjE+Sy+WKn
11+
OqfyzNwS7j7nvOvdpRgnHUwwc0G6eIfj90S+XdieJMMuzt7NUS8NsCuQx3nAK0EB
12+
FNWk02pUYE5yHWvmG+7Mo+Jd9OBWcQnGErux6NTYA42uYiMamZgsByBJ2Y8S2a2L
13+
VtbJVhO2indAvGqrGNbdOKGKagFjIQP2A7zsvLFkgfwbBG785CI3Il9jaeQhcefY
14+
Xs0cSSFhFkz/Ak17gU5l0WF6h5HHacWaCzmWdOiZVbOeZboaaxSBry+uZ8oRXPbp
15+
BavuRtu1afTSrikgJdCcoA8Opn7kkETt5xa5PP1dNLEWIMFui2BOlRAw8BDWJ+nD
16+
8fYBaVaqdDs/Y8K0xWHuJjja+s91

exporter/diagnostic_data_collector.go

+64-34
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,18 @@ type diagnosticDataCollector struct {
4141

4242
// newDiagnosticDataCollector creates a collector for diagnostic information.
4343
func newDiagnosticDataCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible bool, topology labelsGetter) *diagnosticDataCollector {
44+
nodeType, err := getNodeType(ctx, client)
45+
if err != nil {
46+
logger.WithFields(logrus.Fields{
47+
"component": "diagnosticDataCollector",
48+
}).Errorf("Cannot get node type: %s", err)
49+
}
50+
if nodeType == typeArbiter {
51+
logger.WithFields(logrus.Fields{
52+
"component": "diagnosticDataCollector",
53+
}).Warn("some metrics might be unavailable on arbiter nodes")
54+
}
55+
4456
return &diagnosticDataCollector{
4557
ctx: ctx,
4658
base: newBaseCollector(client, logger.WithFields(logrus.Fields{"collector": "diagnostic_data"})),
@@ -66,56 +78,74 @@ func (d *diagnosticDataCollector) collect(ch chan<- prometheus.Metric) {
6678
logger := d.base.logger
6779
client := d.base.client
6880

81+
nodeType, err := getNodeType(d.ctx, client)
82+
if err != nil {
83+
logger.WithFields(logrus.Fields{
84+
"component": "diagnosticDataCollector",
85+
}).Errorf("Cannot get node type: %s", err)
86+
}
87+
88+
var metrics []prometheus.Metric
6989
cmd := bson.D{{Key: "getDiagnosticData", Value: "1"}}
7090
res := client.Database("admin").RunCommand(d.ctx, cmd)
7191
if res.Err() != nil {
72-
logger.Errorf("failed to run command: getDiagnosticData: %s", res.Err())
73-
logger.Warn("cannot run getDiagnosticData, some metrics might be unavailable.")
74-
}
92+
if nodeType != typeArbiter {
93+
logger.Warnf("failed to run command: getDiagnosticData, some metrics might be unavailable %s", res.Err())
94+
}
95+
} else {
96+
if err := res.Decode(&m); err != nil {
97+
logger.Errorf("cannot run getDiagnosticData: %s", err)
98+
return
99+
}
75100

76-
if err := res.Decode(&m); err != nil {
77-
logger.Errorf("cannot run getDiagnosticData: %s", err)
78-
}
101+
if m == nil || m["data"] == nil {
102+
logger.Error("cannot run getDiagnosticData: response is empty")
103+
}
79104

80-
if m == nil || m["data"] == nil {
81-
logger.Error("cannot run getDiagnosticData: response is empty")
82-
}
105+
var ok bool
106+
m, ok = m["data"].(bson.M)
107+
if !ok {
108+
err = errors.Wrapf(errUnexpectedDataType, "%T for data field", m["data"])
109+
logger.Errorf("cannot decode getDiagnosticData: %s", err)
110+
}
83111

84-
m, ok := m["data"].(bson.M)
85-
if !ok {
86-
err := errors.Wrapf(errUnexpectedDataType, "%T for data field", m["data"])
112+
logger.Debug("getDiagnosticData result")
113+
debugResult(logger, m)
87114

88-
logger.Errorf("cannot decode getDiagnosticData: %s", err)
89-
}
115+
metrics = makeMetrics("", m, d.topologyInfo.baseLabels(), d.compatibleMode)
116+
metrics = append(metrics, locksMetrics(logger, m)...)
90117

91-
logger.Debug("getDiagnosticData result")
92-
debugResult(logger, m)
118+
securityMetric, err := d.getSecurityMetricFromLineOptions(client)
119+
if err != nil {
120+
logger.Errorf("failed to run command: getCmdLineOptions: %s", err)
121+
} else if securityMetric != nil {
122+
metrics = append(metrics, securityMetric)
123+
}
93124

94-
metrics := makeMetrics("", m, d.topologyInfo.baseLabels(), d.compatibleMode)
95-
metrics = append(metrics, locksMetrics(logger, m)...)
125+
if d.compatibleMode {
126+
metrics = append(metrics, specialMetrics(d.ctx, client, m, nodeType, logger)...)
96127

97-
securityMetric, err := d.getSecurityMetricFromLineOptions(client)
98-
if err != nil {
99-
logger.Errorf("cannot decode getCmdLineOtpions: %s", err)
100-
} else if securityMetric != nil {
101-
metrics = append(metrics, securityMetric)
128+
if cem, err := cacheEvictedTotalMetric(m); err == nil {
129+
metrics = append(metrics, cem)
130+
}
131+
}
102132
}
103133

104134
if d.compatibleMode {
105-
logger.Debug("running special metrics for compatibility mode")
106-
metrics = append(metrics, specialMetrics(d.ctx, client, m, logger)...)
135+
buildInfo, err := retrieveMongoDBBuildInfo(d.ctx, client, logger)
136+
if err != nil {
137+
logger.Errorf("cannot retrieve MongoDB buildInfo: %s", err)
138+
}
139+
140+
metrics = append(metrics, serverVersion(buildInfo))
107141

108-
if cem, err := cacheEvictedTotalMetric(m); err == nil {
109-
metrics = append(metrics, cem)
142+
if nodeType == typeArbiter {
143+
if hm := arbiterMetrics(d.ctx, client, logger); hm != nil {
144+
metrics = append(metrics, hm...)
145+
}
110146
}
111147

112-
nodeType, err := getNodeType(d.ctx, client)
113-
if err != nil {
114-
logger.WithFields(logrus.Fields{
115-
"component": "diagnosticDataCollector",
116-
}).Errorf("Cannot get node type to check if this is a mongos: %s", err)
117-
} else if nodeType == typeMongos {
118-
logger.Debug("running special metrics for mongos")
148+
if nodeType == typeMongos {
119149
metrics = append(metrics, mongosMetrics(d.ctx, client, logger)...)
120150
}
121151
}

exporter/diagnostic_data_collector_test.go

+78
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/prometheus/client_golang/prometheus"
3030
"github.com/prometheus/client_golang/prometheus/testutil"
3131
"github.com/sirupsen/logrus"
32+
logrustest "github.com/sirupsen/logrus/hooks/test"
3233
"github.com/stretchr/testify/assert"
3334
"github.com/stretchr/testify/require"
3435
"go.mongodb.org/mongo-driver/bson"
@@ -236,6 +237,83 @@ func TestAllDiagnosticDataCollectorMetrics(t *testing.T) {
236237
}
237238
}
238239

240+
//nolint:funlen
241+
func TestDiagnosticDataErrors(t *testing.T) {
242+
t.Parallel()
243+
type log struct {
244+
message string
245+
level uint32
246+
}
247+
248+
type testCase struct {
249+
name string
250+
containerName string
251+
expectedMessage string
252+
}
253+
254+
cases := []testCase{
255+
{
256+
name: "authenticated arbiter has warning about missing metrics",
257+
containerName: "mongo-2-arbiter",
258+
expectedMessage: "some metrics might be unavailable on arbiter nodes",
259+
},
260+
{
261+
name: "authenticated data node has no error in logs",
262+
containerName: "mongo-1-1",
263+
expectedMessage: "",
264+
},
265+
{
266+
name: "unauthenticated arbiter has warning about missing metrics",
267+
containerName: "mongo-1-arbiter",
268+
expectedMessage: "some metrics might be unavailable on arbiter nodes",
269+
},
270+
}
271+
272+
for _, tc := range cases {
273+
tc := tc
274+
t.Run(tc.name, func(t *testing.T) {
275+
t.Parallel()
276+
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
277+
defer cancel()
278+
279+
port, err := tu.PortForContainer(tc.containerName)
280+
require.NoError(t, err)
281+
client := tu.TestClient(ctx, port, t)
282+
283+
logger, hook := logrustest.NewNullLogger()
284+
ti := newTopologyInfo(ctx, client, logger)
285+
c := newDiagnosticDataCollector(ctx, client, logger, true, ti)
286+
287+
reg := prometheus.NewRegistry()
288+
err = reg.Register(c)
289+
require.NoError(t, err)
290+
_ = helpers.CollectMetrics(c)
291+
292+
var errorLogs []log
293+
for _, entry := range hook.Entries {
294+
if entry.Level == logrus.ErrorLevel || entry.Level == logrus.WarnLevel {
295+
errorLogs = append(errorLogs, log{
296+
message: entry.Message,
297+
level: uint32(entry.Level),
298+
})
299+
}
300+
}
301+
302+
if tc.expectedMessage == "" {
303+
assert.Empty(t, errorLogs)
304+
} else {
305+
require.NotEmpty(t, errorLogs)
306+
assert.True(
307+
t,
308+
strings.HasPrefix(hook.LastEntry().Message, tc.expectedMessage),
309+
"'%s' has no prefix: '%s'",
310+
hook.LastEntry().Message,
311+
tc.expectedMessage)
312+
}
313+
})
314+
}
315+
}
316+
239317
func TestContextTimeout(t *testing.T) {
240318
ctx := context.Background()
241319

0 commit comments

Comments
 (0)