diff --git a/changelog/23.0/23.0.0/summary.md b/changelog/23.0/23.0.0/summary.md
new file mode 100644
index 00000000000..b48297f52af
--- /dev/null
+++ b/changelog/23.0/23.0.0/summary.md
@@ -0,0 +1,14 @@
+## Summary
+
+### Table of Contents
+- **[Minor Changes](#minor-changes)**
+ - **[VTTablet](#minor-changes-vttablet)**
+ - [CLI Flags](#flags-vttablet)
+
+## Minor Changes
+
+### VTTablet
+
+#### CLI Flags
+
+- `skip-user-metrics` flag if enabled, replaces the username label with "UserLabelDisabled" to prevent metric explosion in environments with many unique users.
\ No newline at end of file
diff --git a/changelog/23.0/README.md b/changelog/23.0/README.md
new file mode 100644
index 00000000000..42a151fd40c
--- /dev/null
+++ b/changelog/23.0/README.md
@@ -0,0 +1,2 @@
+## v23.0
+* **[23.0.0](23.0.0)**
diff --git a/changelog/README.md b/changelog/README.md
index 7cade673d26..b5cfdedcc8c 100644
--- a/changelog/README.md
+++ b/changelog/README.md
@@ -1,4 +1,5 @@
## Releases
+* [23.0](23.0)
* [22.0](22.0)
* [21.0](21.0)
* [20.0](20.0)
diff --git a/go/flags/endtoend/vtcombo.txt b/go/flags/endtoend/vtcombo.txt
index 9bb706b42b7..cf382598cae 100644
--- a/go/flags/endtoend/vtcombo.txt
+++ b/go/flags/endtoend/vtcombo.txt
@@ -326,6 +326,7 @@ Flags:
--serving_state_grace_period duration how long to pause after broadcasting health to vtgate, before enforcing a new serving state
--shard_sync_retry_delay duration delay between retries of updates to keep the tablet and its shard record in sync (default 30s)
--shutdown_grace_period duration how long to wait for queries and transactions to complete during graceful shutdown. (default 3s)
+ --skip-user-metrics If true, user based stats are not recorded.
--sql-max-length-errors int truncate queries in error logs to the given length (default unlimited)
--sql-max-length-ui int truncate queries in debug UIs to the given length (default 512) (default 512)
--srv_topo_cache_refresh duration how frequently to refresh the topology for cached entries (default 1s)
diff --git a/go/flags/endtoend/vttablet.txt b/go/flags/endtoend/vttablet.txt
index 04eb16edc25..973ee500558 100644
--- a/go/flags/endtoend/vttablet.txt
+++ b/go/flags/endtoend/vttablet.txt
@@ -327,6 +327,7 @@ Flags:
--serving_state_grace_period duration how long to pause after broadcasting health to vtgate, before enforcing a new serving state
--shard_sync_retry_delay duration delay between retries of updates to keep the tablet and its shard record in sync (default 30s)
--shutdown_grace_period duration how long to wait for queries and transactions to complete during graceful shutdown. (default 3s)
+ --skip-user-metrics If true, user based stats are not recorded.
--sql-max-length-errors int truncate queries in error logs to the given length (default unlimited)
--sql-max-length-ui int truncate queries in debug UIs to the given length (default 512) (default 512)
--srv_topo_cache_refresh duration how frequently to refresh the topology for cached entries (default 1s)
diff --git a/go/vt/tableacl/acl/acl.go b/go/vt/tableacl/acl/acl.go
index abff749384c..7180e2c00f7 100644
--- a/go/vt/tableacl/acl/acl.go
+++ b/go/vt/tableacl/acl/acl.go
@@ -47,3 +47,12 @@ type AcceptAllACL struct{}
func (acl AcceptAllACL) IsMember(principal *querypb.VTGateCallerID) bool {
return true
}
+
+type ACLState int8
+
+const (
+ ACLUnknown ACLState = iota
+ ACLAllow
+ ACLDenied
+ ACLPseudoDenied
+)
diff --git a/go/vt/vttablet/endtoend/transaction_test.go b/go/vt/vttablet/endtoend/transaction_test.go
index 5e5889ad671..50d9b06ffbd 100644
--- a/go/vt/vttablet/endtoend/transaction_test.go
+++ b/go/vt/vttablet/endtoend/transaction_test.go
@@ -846,3 +846,81 @@ func TestUnresolvedTransactionsOrdering(t *testing.T) {
assert.Equal(t, want[i].Participants, transaction.Participants)
}
}
+
+// TestSkipUserMetrics tests the SkipUserMetrics flag in the config that disables user label in the metrics.
+func TestSkipUserMetrics(t *testing.T) {
+ client := framework.NewClient()
+ query := "select * from vitess_test"
+
+ runQueries := func() {
+ // non-tx execute
+ _, err := client.Execute(query, nil)
+ require.NoError(t, err)
+
+ // tx execute
+ _, err = client.BeginExecute(query, nil, nil)
+ require.NoError(t, err)
+ require.NoError(t, client.Commit())
+ }
+
+ // Initial test with user metrics enabled
+ vstart := framework.DebugVars()
+ runQueries()
+
+ expectedDiffs := []struct {
+ tag string
+ diff int
+ }{{ // not dependent on user
+ tag: "Transactions/TotalCount", diff: 1,
+ }, { // not dependent on user
+ tag: "Transactions/Histograms/commit/Count", diff: 1,
+ }, { // dependent on user
+ tag: "TableACLAllowed/vitess_test.vitess_test.Select.dev", diff: 2,
+ }, { // user metric enabled so this should be zero.
+ tag: "TableACLAllowed/vitess_test.vitess_test.Select.UserLabelDisabled", diff: 0,
+ }, { // dependent on user
+ tag: "UserTableQueryCount/vitess_test.dev.Execute", diff: 2,
+ }, { // user metric enabled so this should be zero.
+ tag: "UserTableQueryCount/vitess_test.UserLabelDisabled.Execute", diff: 0,
+ }, { // dependent on user
+ tag: "UserTransactionCount/dev.commit", diff: 1,
+ }}
+ vend := framework.DebugVars()
+ for _, expected := range expectedDiffs {
+ compareIntDiff(t, vend, expected.tag, vstart, expected.diff)
+ }
+
+ // Enable SkipUserMetrics and re-run tests
+ framework.Server.Config().SkipUserMetrics = true
+ defer func() {
+ framework.Server.Config().SkipUserMetrics = false
+ }()
+ vstart = framework.DebugVars()
+ runQueries()
+
+ expectedDiffs = []struct {
+ tag string
+ diff int
+ }{{ // not dependent on user
+ tag: "Transactions/TotalCount", diff: 1,
+ }, { // not dependent on user
+ tag: "Transactions/Histograms/commit/Count", diff: 1,
+ }, { // dependent on user - should be zero now
+ tag: "TableACLAllowed/vitess_test.vitess_test.Select.dev", diff: 0,
+ }, { // user metric disabled so this should be non-zero.
+ tag: "TableACLAllowed/vitess_test.vitess_test.Select.UserLabelDisabled", diff: 2,
+ }, { // dependent on user - should be zero now
+ tag: "UserTableQueryCount/vitess_test.dev.Execute", diff: 0,
+ }, { // user metric disabled so this should be non-zero.
+ tag: "UserTableQueryCount/vitess_test.UserLabelDisabled.Execute", diff: 2,
+ }, { // dependent on user
+ tag: "UserTransactionCount/dev.commit", diff: 0,
+ }, { // no need to publish this as "Transactions" histogram already captures this.
+ tag: "UserTransactionCount/UserLabelDisabled.commit", diff: 0,
+ }}
+ vend = framework.DebugVars()
+ for _, expected := range expectedDiffs {
+ compareIntDiff(t, vend, expected.tag, vstart, expected.diff)
+ }
+
+}
diff --git a/go/vt/vttablet/tabletserver/dt_executor_test.go b/go/vt/vttablet/tabletserver/dt_executor_test.go
index f83d7ff9006..ec2418e16e3 100644
--- a/go/vt/vttablet/tabletserver/dt_executor_test.go
+++ b/go/vt/vttablet/tabletserver/dt_executor_test.go
@@ -31,12 +31,11 @@ import (
"github.com/stretchr/testify/require"
"google.golang.org/protobuf/proto"
- "vitess.io/vitess/go/streamlog"
-
"vitess.io/vitess/go/event/syslogger"
"vitess.io/vitess/go/mysql/fakesqldb"
"vitess.io/vitess/go/mysql/sqlerror"
"vitess.io/vitess/go/sqltypes"
+ "vitess.io/vitess/go/streamlog"
querypb "vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
"vitess.io/vitess/go/vt/vtenv"
@@ -58,7 +57,7 @@ func TestTxExecutorEmptyPrepare(t *testing.T) {
// taint the connection.
sc, err := tsv.te.txPool.GetAndLock(txid, "taint")
require.NoError(t, err)
- sc.Taint(ctx, nil)
+ sc.Taint(ctx, tsv.te.reservedConnStats)
sc.Unlock()
err = txe.Prepare(txid, "aa")
@@ -80,7 +79,7 @@ func TestExecutorPrepareFailure(t *testing.T) {
// taint the connection.
sc, err := tsv.te.txPool.GetAndLock(txid, "taint")
require.NoError(t, err)
- sc.Taint(ctx, nil)
+ sc.Taint(ctx, tsv.te.reservedConnStats)
sc.Unlock()
// try 2pc commit of Metadata Manager.
@@ -374,7 +373,7 @@ func TestExecutorStartCommitFailure(t *testing.T) {
// taint the connection.
sc, err := tsv.te.txPool.GetAndLock(txid, "taint")
require.NoError(t, err)
- sc.Taint(ctx, nil)
+ sc.Taint(ctx, tsv.te.reservedConnStats)
sc.Unlock()
// add rollback state update expectation
diff --git a/go/vt/vttablet/tabletserver/query_executor.go b/go/vt/vttablet/tabletserver/query_executor.go
index 2a7d2049ba9..382cd9e58f2 100644
--- a/go/vt/vttablet/tabletserver/query_executor.go
+++ b/go/vt/vttablet/tabletserver/query_executor.go
@@ -40,6 +40,7 @@ import (
"vitess.io/vitess/go/vt/schema"
"vitess.io/vitess/go/vt/sqlparser"
"vitess.io/vitess/go/vt/tableacl"
+ "vitess.io/vitess/go/vt/tableacl/acl"
"vitess.io/vitess/go/vt/vterrors"
"vitess.io/vitess/go/vt/vtgate/evalengine"
"vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
@@ -69,9 +70,10 @@ type QueryExecutor struct {
}
const (
- streamRowsSize = 256
- resetLastIDQuery = "select last_insert_id(18446744073709547416)"
- resetLastIDValue = 18446744073709547416
+ streamRowsSize = 256
+ resetLastIDQuery = "select last_insert_id(18446744073709547416)"
+ resetLastIDValue = 18446744073709547416
+ userLabelDisabled = "UserLabelDisabled"
)
var (
@@ -530,10 +532,14 @@ func (qre *QueryExecutor) checkPermissions() error {
}
func (qre *QueryExecutor) checkAccess(authorized *tableacl.ACLResult, tableName string, callerID *querypb.VTGateCallerID) error {
- statsKey := []string{tableName, authorized.GroupName, qre.plan.PlanID.String(), callerID.Username}
+ var aclState acl.ACLState
+ defer func() {
+ statsKey := qre.generateACLStatsKey(tableName, authorized, callerID)
+ qre.recordACLStats(statsKey, aclState)
+ }()
if !authorized.IsMember(callerID) {
if qre.tsv.qe.enableTableACLDryRun {
- qre.tsv.Stats().TableaclPseudoDenied.Add(statsKey, 1)
+ aclState = acl.ACLPseudoDenied
return nil
}
@@ -547,17 +553,37 @@ func (qre *QueryExecutor) checkAccess(authorized *tableacl.ACLResult, tableName
if len(callerID.Groups) > 0 {
groupStr = fmt.Sprintf(", in groups [%s],", strings.Join(callerID.Groups, ", "))
}
+ aclState = acl.ACLDenied
errStr := fmt.Sprintf("%s command denied to user '%s'%s for table '%s' (ACL check error)", qre.plan.PlanID.String(), callerID.Username, groupStr, tableName)
- qre.tsv.Stats().TableaclDenied.Add(statsKey, 1)
qre.tsv.qe.accessCheckerLogger.Infof("%s", errStr)
return vterrors.Errorf(vtrpcpb.Code_PERMISSION_DENIED, "%s", errStr)
}
return nil
}
- qre.tsv.Stats().TableaclAllowed.Add(statsKey, 1)
+ aclState = acl.ACLAllow
return nil
}
+func (qre *QueryExecutor) generateACLStatsKey(tableName string, authorized *tableacl.ACLResult, callerID *querypb.VTGateCallerID) []string {
+ if qre.tsv.Config().SkipUserMetrics {
+ return []string{tableName, authorized.GroupName, qre.plan.PlanID.String(), userLabelDisabled}
+ }
+ return []string{tableName, authorized.GroupName, qre.plan.PlanID.String(), callerID.Username}
+}
+
+func (qre *QueryExecutor) recordACLStats(key []string, aclState acl.ACLState) {
+ switch aclState {
+ case acl.ACLAllow:
+ qre.tsv.Stats().TableaclAllowed.Add(key, 1)
+ case acl.ACLDenied:
+ qre.tsv.Stats().TableaclDenied.Add(key, 1)
+ case acl.ACLPseudoDenied:
+ qre.tsv.Stats().TableaclPseudoDenied.Add(key, 1)
+ case acl.ACLUnknown:
+ // nothing to record here.
+ }
+}
+
func (qre *QueryExecutor) execDDL(conn *StatefulConnection) (result *sqltypes.Result, err error) {
// Let's see if this is a normal DDL statement or an Online DDL statement.
// An Online DDL statement is identified by /*vt+ .. */ comment with expected directives, like uuid etc.
@@ -1274,9 +1300,14 @@ func (qre *QueryExecutor) execStreamSQL(conn *connpool.PooledConn, isTransaction
}
func (qre *QueryExecutor) recordUserQuery(queryType string, duration int64) {
- username := callerid.GetPrincipal(callerid.EffectiveCallerIDFromContext(qre.ctx))
- if username == "" {
- username = callerid.GetUsername(callerid.ImmediateCallerIDFromContext(qre.ctx))
+ var username string
+ if qre.tsv.config.SkipUserMetrics {
+ username = userLabelDisabled
+ } else {
+ username = callerid.GetPrincipal(callerid.EffectiveCallerIDFromContext(qre.ctx))
+ if username == "" {
+ username = callerid.GetUsername(callerid.ImmediateCallerIDFromContext(qre.ctx))
+ }
}
tableName := qre.plan.TableName().String()
qre.tsv.Stats().UserTableQueryCount.Add([]string{tableName, username, queryType}, 1)
diff --git a/go/vt/vttablet/tabletserver/stateful_connection.go b/go/vt/vttablet/tabletserver/stateful_connection.go
index 10fc763984f..d64d2fefe9c 100644
--- a/go/vt/vttablet/tabletserver/stateful_connection.go
+++ b/go/vt/vttablet/tabletserver/stateful_connection.go
@@ -179,7 +179,7 @@ func (sc *StatefulConnection) ReleaseString(reason string) {
}
sc.dbConn.Recycle()
sc.dbConn = nil
- sc.logReservedConn()
+ sc.logReservedConn(reason)
}
// Renew the existing connection with new connection id.
@@ -260,7 +260,11 @@ func (sc *StatefulConnection) Taint(ctx context.Context, stats *servenv.TimingsW
Stats: stats,
}
sc.dbConn.Taint()
- sc.Stats().UserActiveReservedCount.Add(sc.getUsername(), 1)
+ if sc.env.Config().SkipUserMetrics {
+ sc.Stats().UserActiveReservedCount.Add(userLabelDisabled, 1)
+ } else {
+ sc.Stats().UserActiveReservedCount.Add(sc.getUsername(), 1)
+ }
return nil
}
@@ -282,9 +286,11 @@ func (sc *StatefulConnection) LogTransaction(reason tx.ReleaseReason) {
username = callerid.GetUsername(sc.txProps.ImmediateCaller)
}
duration := sc.txProps.EndTime.Sub(sc.txProps.StartTime)
- sc.Stats().UserTransactionCount.Add([]string{username, reason.Name()}, 1)
- sc.Stats().UserTransactionTimesNs.Add([]string{username, reason.Name()}, int64(duration))
sc.txProps.Stats.Add(reason.Name(), duration)
+ if !sc.env.Config().SkipUserMetrics {
+ sc.Stats().UserTransactionCount.Add([]string{username, reason.Name()}, 1)
+ sc.Stats().UserTransactionTimesNs.Add([]string{username, reason.Name()}, int64(duration))
+ }
tabletenv.TxLogger.Send(sc)
}
@@ -294,15 +300,19 @@ func (sc *StatefulConnection) SetTimeout(timeout time.Duration) {
}
// logReservedConn logs reserved connection related stats.
-func (sc *StatefulConnection) logReservedConn() {
+func (sc *StatefulConnection) logReservedConn(reason string) {
if sc.reservedProps == nil {
return // Nothing to log as this connection is not reserved.
}
- duration := time.Since(sc.reservedProps.StartTime)
- username := sc.getUsername()
- sc.Stats().UserActiveReservedCount.Add(username, -1)
- sc.Stats().UserReservedCount.Add(username, 1)
- sc.Stats().UserReservedTimesNs.Add(username, int64(duration))
+ sc.reservedProps.Stats.Record(reason, sc.reservedProps.StartTime)
+ if sc.env.Config().SkipUserMetrics {
+ sc.Stats().UserActiveReservedCount.Add(userLabelDisabled, -1)
+ } else {
+ username := sc.getUsername()
+ sc.Stats().UserActiveReservedCount.Add(username, -1)
+ sc.Stats().UserReservedCount.Add(username, 1)
+ sc.Stats().UserReservedTimesNs.Add(username, int64(time.Since(sc.reservedProps.StartTime)))
+ }
}
func (sc *StatefulConnection) getUsername() string {
diff --git a/go/vt/vttablet/tabletserver/stateful_connection_pool_test.go b/go/vt/vttablet/tabletserver/stateful_connection_pool_test.go
index b93c822cfdc..cb7b9190abf 100644
--- a/go/vt/vttablet/tabletserver/stateful_connection_pool_test.go
+++ b/go/vt/vttablet/tabletserver/stateful_connection_pool_test.go
@@ -27,6 +27,7 @@ import (
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/dbconfigs"
querypb "vitess.io/vitess/go/vt/proto/query"
+ "vitess.io/vitess/go/vt/servenv"
"vitess.io/vitess/go/vt/vttablet/tabletserver/tx"
)
@@ -96,24 +97,25 @@ func TestStatefulPoolShutdownNonTx(t *testing.T) {
pool := newActivePool()
params := dbconfigs.New(db.ConnParams())
pool.Open(params, params, params)
+ rcStats := servenv.NewExporter("TestStatefulPoolShutdownNonTx", "").NewTimings("rconn", "test1", "test2")
// conn1 non-tx, not in use.
conn1, err := pool.NewConn(ctx, &querypb.ExecuteOptions{}, nil)
require.NoError(t, err)
- conn1.Taint(ctx, nil)
+ conn1.Taint(ctx, rcStats)
conn1.Unlock()
// conn2 tx, not in use.
conn2, err := pool.NewConn(ctx, &querypb.ExecuteOptions{}, nil)
require.NoError(t, err)
- conn2.Taint(ctx, nil)
+ conn2.Taint(ctx, rcStats)
conn2.txProps = &tx.Properties{}
conn2.Unlock()
// conn3 non-tx, in use.
conn3, err := pool.NewConn(ctx, &querypb.ExecuteOptions{}, nil)
require.NoError(t, err)
- conn3.Taint(ctx, nil)
+ conn3.Taint(ctx, rcStats)
// After ShutdownNonTx, conn1 should be closed, but not conn3.
pool.ShutdownNonTx()
diff --git a/go/vt/vttablet/tabletserver/tabletenv/config.go b/go/vt/vttablet/tabletserver/tabletenv/config.go
index e4ca2bfc96a..b52157b1452 100644
--- a/go/vt/vttablet/tabletserver/tabletenv/config.go
+++ b/go/vt/vttablet/tabletserver/tabletenv/config.go
@@ -216,6 +216,7 @@ func registerTabletEnvFlags(fs *pflag.FlagSet) {
fs.BoolVar(¤tConfig.EnableViews, "queryserver-enable-views", false, "Enable views support in vttablet.")
fs.BoolVar(¤tConfig.EnablePerWorkloadTableMetrics, "enable-per-workload-table-metrics", defaultConfig.EnablePerWorkloadTableMetrics, "If true, query counts and query error metrics include a label that identifies the workload")
+ fs.BoolVar(¤tConfig.SkipUserMetrics, "skip-user-metrics", defaultConfig.SkipUserMetrics, "If true, user based stats are not recorded.")
fs.BoolVar(¤tConfig.Unmanaged, "unmanaged", false, "Indicates an unmanaged tablet, i.e. using an external mysql-compatible database")
}
@@ -370,6 +371,7 @@ type TabletConfig struct {
EnableViews bool `json:"-"`
EnablePerWorkloadTableMetrics bool `json:"-"`
+ SkipUserMetrics bool `json:"-"`
}
func (cfg *TabletConfig) MarshalJSON() ([]byte, error) {
diff --git a/misc/git/hooks/golangci-lint b/misc/git/hooks/golangci-lint
index 10fc03f0505..fa81f5b52a3 100755
--- a/misc/git/hooks/golangci-lint
+++ b/misc/git/hooks/golangci-lint
@@ -52,7 +52,7 @@ else
if ! version_greater_or_equal "$INSTALLED_VERSION" "${REQUIRED_VERSION#v}"; then
echo "golangci-lint version $INSTALLED_VERSION found, but $REQUIRED_VERSION or newer is required."
echo "Installing version $REQUIRED_VERSION..."
- go install github.com/golangci/golangci-lint/cmd/golangci-lint@$REQUIRED_VERSION
+ go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@$REQUIRED_VERSION
fi
fi