Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions changelog/23.0/23.0.0/summary.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
## Summary

### Table of Contents
- **[Minor Changes](#minor-changes)**
- **[VTTablet](#minor-changes-vttablet)**
- [CLI Flags](#flags-vttablet)

## <a id="minor-changes"/>Minor Changes</a>

### <a id="minor-changes-vttablet"/>VTTablet</a>

#### <a id="flags-vttablet"/>CLI Flags</a>

- `skip-user-metrics` flag if enabled, replaces the username label with "UserLabelDisabled" to prevent metric explosion in environments with many unique users.
2 changes: 2 additions & 0 deletions changelog/23.0/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
## v23.0
* **[23.0.0](23.0.0)**
1 change: 1 addition & 0 deletions changelog/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
## Releases
* [23.0](23.0)
* [22.0](22.0)
* [21.0](21.0)
* [20.0](20.0)
Expand Down
1 change: 1 addition & 0 deletions go/flags/endtoend/vtcombo.txt
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ Flags:
--serving_state_grace_period duration how long to pause after broadcasting health to vtgate, before enforcing a new serving state
--shard_sync_retry_delay duration delay between retries of updates to keep the tablet and its shard record in sync (default 30s)
--shutdown_grace_period duration how long to wait for queries and transactions to complete during graceful shutdown. (default 3s)
--skip-user-metrics If true, user based stats are not recorded.
--sql-max-length-errors int truncate queries in error logs to the given length (default unlimited)
--sql-max-length-ui int truncate queries in debug UIs to the given length (default 512) (default 512)
--srv_topo_cache_refresh duration how frequently to refresh the topology for cached entries (default 1s)
Expand Down
1 change: 1 addition & 0 deletions go/flags/endtoend/vttablet.txt
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ Flags:
--serving_state_grace_period duration how long to pause after broadcasting health to vtgate, before enforcing a new serving state
--shard_sync_retry_delay duration delay between retries of updates to keep the tablet and its shard record in sync (default 30s)
--shutdown_grace_period duration how long to wait for queries and transactions to complete during graceful shutdown. (default 3s)
--skip-user-metrics If true, user based stats are not recorded.
--sql-max-length-errors int truncate queries in error logs to the given length (default unlimited)
--sql-max-length-ui int truncate queries in debug UIs to the given length (default 512) (default 512)
--srv_topo_cache_refresh duration how frequently to refresh the topology for cached entries (default 1s)
Expand Down
9 changes: 9 additions & 0 deletions go/vt/tableacl/acl/acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,12 @@ type AcceptAllACL struct{}
func (acl AcceptAllACL) IsMember(principal *querypb.VTGateCallerID) bool {
return true
}

type ACLState int8

const (
ACLUnknown ACLState = iota
ACLAllow
ACLDenied
ACLPseudoDenied
)
78 changes: 78 additions & 0 deletions go/vt/vttablet/endtoend/transaction_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -846,3 +846,81 @@ func TestUnresolvedTransactionsOrdering(t *testing.T) {
assert.Equal(t, want[i].Participants, transaction.Participants)
}
}

// TestSkipUserMetrics tests the SkipUserMetrics flag in the config that disables user label in the metrics.
func TestSkipUserMetrics(t *testing.T) {
client := framework.NewClient()
query := "select * from vitess_test"

runQueries := func() {
// non-tx execute
_, err := client.Execute(query, nil)
require.NoError(t, err)

// tx execute
_, err = client.BeginExecute(query, nil, nil)
require.NoError(t, err)
require.NoError(t, client.Commit())
}

// Initial test with user metrics enabled
vstart := framework.DebugVars()
runQueries()

expectedDiffs := []struct {
tag string
diff int
}{{ // not dependent on user
tag: "Transactions/TotalCount", diff: 1,
}, { // not dependent on user
tag: "Transactions/Histograms/commit/Count", diff: 1,
}, { // dependent on user
tag: "TableACLAllowed/vitess_test.vitess_test.Select.dev", diff: 2,
}, { // user metric enabled so this should be zero.
tag: "TableACLAllowed/vitess_test.vitess_test.Select.UserLabelDisabled", diff: 0,
}, { // dependent on user
tag: "UserTableQueryCount/vitess_test.dev.Execute", diff: 2,
}, { // user metric enabled so this should be zero.
tag: "UserTableQueryCount/vitess_test.UserLabelDisabled.Execute", diff: 0,
}, { // dependent on user
tag: "UserTransactionCount/dev.commit", diff: 1,
}}
vend := framework.DebugVars()
for _, expected := range expectedDiffs {
compareIntDiff(t, vend, expected.tag, vstart, expected.diff)
}

// Enable SkipUserMetrics and re-run tests
framework.Server.Config().SkipUserMetrics = true
defer func() {
framework.Server.Config().SkipUserMetrics = false
}()
vstart = framework.DebugVars()
runQueries()

expectedDiffs = []struct {
tag string
diff int
}{{ // not dependent on user
tag: "Transactions/TotalCount", diff: 1,
}, { // not dependent on user
tag: "Transactions/Histograms/commit/Count", diff: 1,
}, { // dependent on user - should be zero now
tag: "TableACLAllowed/vitess_test.vitess_test.Select.dev", diff: 0,
}, { // user metric disabled so this should be non-zero.
tag: "TableACLAllowed/vitess_test.vitess_test.Select.UserLabelDisabled", diff: 2,
}, { // dependent on user - should be zero now
tag: "UserTableQueryCount/vitess_test.dev.Execute", diff: 0,
}, { // user metric disabled so this should be non-zero.
tag: "UserTableQueryCount/vitess_test.UserLabelDisabled.Execute", diff: 2,
}, { // dependent on user
tag: "UserTransactionCount/dev.commit", diff: 0,
}, { // no need to publish this as "Transactions" histogram already captures this.
tag: "UserTransactionCount/UserLabelDisabled.commit", diff: 0,
}}
vend = framework.DebugVars()
for _, expected := range expectedDiffs {
compareIntDiff(t, vend, expected.tag, vstart, expected.diff)
}

}
9 changes: 4 additions & 5 deletions go/vt/vttablet/tabletserver/dt_executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,11 @@ import (
"github.com/stretchr/testify/require"
"google.golang.org/protobuf/proto"

"vitess.io/vitess/go/streamlog"

"vitess.io/vitess/go/event/syslogger"
"vitess.io/vitess/go/mysql/fakesqldb"
"vitess.io/vitess/go/mysql/sqlerror"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/streamlog"
querypb "vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
"vitess.io/vitess/go/vt/vtenv"
Expand All @@ -58,7 +57,7 @@ func TestTxExecutorEmptyPrepare(t *testing.T) {
// taint the connection.
sc, err := tsv.te.txPool.GetAndLock(txid, "taint")
require.NoError(t, err)
sc.Taint(ctx, nil)
sc.Taint(ctx, tsv.te.reservedConnStats)
sc.Unlock()

err = txe.Prepare(txid, "aa")
Expand All @@ -80,7 +79,7 @@ func TestExecutorPrepareFailure(t *testing.T) {
// taint the connection.
sc, err := tsv.te.txPool.GetAndLock(txid, "taint")
require.NoError(t, err)
sc.Taint(ctx, nil)
sc.Taint(ctx, tsv.te.reservedConnStats)
sc.Unlock()

// try 2pc commit of Metadata Manager.
Expand Down Expand Up @@ -374,7 +373,7 @@ func TestExecutorStartCommitFailure(t *testing.T) {
// taint the connection.
sc, err := tsv.te.txPool.GetAndLock(txid, "taint")
require.NoError(t, err)
sc.Taint(ctx, nil)
sc.Taint(ctx, tsv.te.reservedConnStats)
sc.Unlock()

// add rollback state update expectation
Expand Down
51 changes: 41 additions & 10 deletions go/vt/vttablet/tabletserver/query_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
"vitess.io/vitess/go/vt/schema"
"vitess.io/vitess/go/vt/sqlparser"
"vitess.io/vitess/go/vt/tableacl"
"vitess.io/vitess/go/vt/tableacl/acl"
"vitess.io/vitess/go/vt/vterrors"
"vitess.io/vitess/go/vt/vtgate/evalengine"
"vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
Expand Down Expand Up @@ -69,9 +70,10 @@ type QueryExecutor struct {
}

const (
streamRowsSize = 256
resetLastIDQuery = "select last_insert_id(18446744073709547416)"
resetLastIDValue = 18446744073709547416
streamRowsSize = 256
resetLastIDQuery = "select last_insert_id(18446744073709547416)"
resetLastIDValue = 18446744073709547416
userLabelDisabled = "UserLabelDisabled"
)

var (
Expand Down Expand Up @@ -530,10 +532,14 @@ func (qre *QueryExecutor) checkPermissions() error {
}

func (qre *QueryExecutor) checkAccess(authorized *tableacl.ACLResult, tableName string, callerID *querypb.VTGateCallerID) error {
statsKey := []string{tableName, authorized.GroupName, qre.plan.PlanID.String(), callerID.Username}
var aclState acl.ACLState
defer func() {
statsKey := qre.generateACLStatsKey(tableName, authorized, callerID)
qre.recordACLStats(statsKey, aclState)
}()
if !authorized.IsMember(callerID) {
if qre.tsv.qe.enableTableACLDryRun {
qre.tsv.Stats().TableaclPseudoDenied.Add(statsKey, 1)
aclState = acl.ACLPseudoDenied
return nil
}

Expand All @@ -547,17 +553,37 @@ func (qre *QueryExecutor) checkAccess(authorized *tableacl.ACLResult, tableName
if len(callerID.Groups) > 0 {
groupStr = fmt.Sprintf(", in groups [%s],", strings.Join(callerID.Groups, ", "))
}
aclState = acl.ACLDenied
errStr := fmt.Sprintf("%s command denied to user '%s'%s for table '%s' (ACL check error)", qre.plan.PlanID.String(), callerID.Username, groupStr, tableName)
qre.tsv.Stats().TableaclDenied.Add(statsKey, 1)
qre.tsv.qe.accessCheckerLogger.Infof("%s", errStr)
return vterrors.Errorf(vtrpcpb.Code_PERMISSION_DENIED, "%s", errStr)
}
return nil
}
qre.tsv.Stats().TableaclAllowed.Add(statsKey, 1)
aclState = acl.ACLAllow
return nil
}

func (qre *QueryExecutor) generateACLStatsKey(tableName string, authorized *tableacl.ACLResult, callerID *querypb.VTGateCallerID) []string {
if qre.tsv.Config().SkipUserMetrics {
return []string{tableName, authorized.GroupName, qre.plan.PlanID.String(), userLabelDisabled}
}
return []string{tableName, authorized.GroupName, qre.plan.PlanID.String(), callerID.Username}
}

func (qre *QueryExecutor) recordACLStats(key []string, aclState acl.ACLState) {
switch aclState {
case acl.ACLAllow:
qre.tsv.Stats().TableaclAllowed.Add(key, 1)
case acl.ACLDenied:
qre.tsv.Stats().TableaclDenied.Add(key, 1)
case acl.ACLPseudoDenied:
qre.tsv.Stats().TableaclPseudoDenied.Add(key, 1)
case acl.ACLUnknown:
// nothing to record here.
}
}

func (qre *QueryExecutor) execDDL(conn *StatefulConnection) (result *sqltypes.Result, err error) {
// Let's see if this is a normal DDL statement or an Online DDL statement.
// An Online DDL statement is identified by /*vt+ .. */ comment with expected directives, like uuid etc.
Expand Down Expand Up @@ -1274,9 +1300,14 @@ func (qre *QueryExecutor) execStreamSQL(conn *connpool.PooledConn, isTransaction
}

func (qre *QueryExecutor) recordUserQuery(queryType string, duration int64) {
username := callerid.GetPrincipal(callerid.EffectiveCallerIDFromContext(qre.ctx))
if username == "" {
username = callerid.GetUsername(callerid.ImmediateCallerIDFromContext(qre.ctx))
var username string
if qre.tsv.config.SkipUserMetrics {
username = userLabelDisabled
} else {
username = callerid.GetPrincipal(callerid.EffectiveCallerIDFromContext(qre.ctx))
if username == "" {
username = callerid.GetUsername(callerid.ImmediateCallerIDFromContext(qre.ctx))
}
}
tableName := qre.plan.TableName().String()
qre.tsv.Stats().UserTableQueryCount.Add([]string{tableName, username, queryType}, 1)
Expand Down
30 changes: 20 additions & 10 deletions go/vt/vttablet/tabletserver/stateful_connection.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ func (sc *StatefulConnection) ReleaseString(reason string) {
}
sc.dbConn.Recycle()
sc.dbConn = nil
sc.logReservedConn()
sc.logReservedConn(reason)
}

// Renew the existing connection with new connection id.
Expand Down Expand Up @@ -260,7 +260,11 @@ func (sc *StatefulConnection) Taint(ctx context.Context, stats *servenv.TimingsW
Stats: stats,
}
sc.dbConn.Taint()
sc.Stats().UserActiveReservedCount.Add(sc.getUsername(), 1)
if sc.env.Config().SkipUserMetrics {
sc.Stats().UserActiveReservedCount.Add(userLabelDisabled, 1)
} else {
sc.Stats().UserActiveReservedCount.Add(sc.getUsername(), 1)
}
return nil
}

Expand All @@ -282,9 +286,11 @@ func (sc *StatefulConnection) LogTransaction(reason tx.ReleaseReason) {
username = callerid.GetUsername(sc.txProps.ImmediateCaller)
}
duration := sc.txProps.EndTime.Sub(sc.txProps.StartTime)
sc.Stats().UserTransactionCount.Add([]string{username, reason.Name()}, 1)
sc.Stats().UserTransactionTimesNs.Add([]string{username, reason.Name()}, int64(duration))
sc.txProps.Stats.Add(reason.Name(), duration)
if !sc.env.Config().SkipUserMetrics {
sc.Stats().UserTransactionCount.Add([]string{username, reason.Name()}, 1)
sc.Stats().UserTransactionTimesNs.Add([]string{username, reason.Name()}, int64(duration))
}
tabletenv.TxLogger.Send(sc)
}

Expand All @@ -294,15 +300,19 @@ func (sc *StatefulConnection) SetTimeout(timeout time.Duration) {
}

// logReservedConn logs reserved connection related stats.
func (sc *StatefulConnection) logReservedConn() {
func (sc *StatefulConnection) logReservedConn(reason string) {
if sc.reservedProps == nil {
return // Nothing to log as this connection is not reserved.
}
duration := time.Since(sc.reservedProps.StartTime)
username := sc.getUsername()
sc.Stats().UserActiveReservedCount.Add(username, -1)
sc.Stats().UserReservedCount.Add(username, 1)
sc.Stats().UserReservedTimesNs.Add(username, int64(duration))
sc.reservedProps.Stats.Record(reason, sc.reservedProps.StartTime)
if sc.env.Config().SkipUserMetrics {
sc.Stats().UserActiveReservedCount.Add(userLabelDisabled, -1)
} else {
username := sc.getUsername()
sc.Stats().UserActiveReservedCount.Add(username, -1)
sc.Stats().UserReservedCount.Add(username, 1)
sc.Stats().UserReservedTimesNs.Add(username, int64(time.Since(sc.reservedProps.StartTime)))
}
}

func (sc *StatefulConnection) getUsername() string {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/dbconfigs"
querypb "vitess.io/vitess/go/vt/proto/query"
"vitess.io/vitess/go/vt/servenv"
"vitess.io/vitess/go/vt/vttablet/tabletserver/tx"
)

Expand Down Expand Up @@ -96,24 +97,25 @@ func TestStatefulPoolShutdownNonTx(t *testing.T) {
pool := newActivePool()
params := dbconfigs.New(db.ConnParams())
pool.Open(params, params, params)
rcStats := servenv.NewExporter("TestStatefulPoolShutdownNonTx", "").NewTimings("rconn", "test1", "test2")

// conn1 non-tx, not in use.
conn1, err := pool.NewConn(ctx, &querypb.ExecuteOptions{}, nil)
require.NoError(t, err)
conn1.Taint(ctx, nil)
conn1.Taint(ctx, rcStats)
conn1.Unlock()

// conn2 tx, not in use.
conn2, err := pool.NewConn(ctx, &querypb.ExecuteOptions{}, nil)
require.NoError(t, err)
conn2.Taint(ctx, nil)
conn2.Taint(ctx, rcStats)
conn2.txProps = &tx.Properties{}
conn2.Unlock()

// conn3 non-tx, in use.
conn3, err := pool.NewConn(ctx, &querypb.ExecuteOptions{}, nil)
require.NoError(t, err)
conn3.Taint(ctx, nil)
conn3.Taint(ctx, rcStats)

// After ShutdownNonTx, conn1 should be closed, but not conn3.
pool.ShutdownNonTx()
Expand Down
2 changes: 2 additions & 0 deletions go/vt/vttablet/tabletserver/tabletenv/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ func registerTabletEnvFlags(fs *pflag.FlagSet) {
fs.BoolVar(&currentConfig.EnableViews, "queryserver-enable-views", false, "Enable views support in vttablet.")

fs.BoolVar(&currentConfig.EnablePerWorkloadTableMetrics, "enable-per-workload-table-metrics", defaultConfig.EnablePerWorkloadTableMetrics, "If true, query counts and query error metrics include a label that identifies the workload")
fs.BoolVar(&currentConfig.SkipUserMetrics, "skip-user-metrics", defaultConfig.SkipUserMetrics, "If true, user based stats are not recorded.")

fs.BoolVar(&currentConfig.Unmanaged, "unmanaged", false, "Indicates an unmanaged tablet, i.e. using an external mysql-compatible database")
}
Expand Down Expand Up @@ -370,6 +371,7 @@ type TabletConfig struct {
EnableViews bool `json:"-"`

EnablePerWorkloadTableMetrics bool `json:"-"`
SkipUserMetrics bool `json:"-"`
}

func (cfg *TabletConfig) MarshalJSON() ([]byte, error) {
Expand Down
2 changes: 1 addition & 1 deletion misc/git/hooks/golangci-lint
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ else
if ! version_greater_or_equal "$INSTALLED_VERSION" "${REQUIRED_VERSION#v}"; then
echo "golangci-lint version $INSTALLED_VERSION found, but $REQUIRED_VERSION or newer is required."
echo "Installing version $REQUIRED_VERSION..."
go install github.com/golangci/golangci-lint/cmd/golangci-lint@$REQUIRED_VERSION
go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@$REQUIRED_VERSION
fi
fi

Expand Down
Loading