diff --git a/go/Godeps/LICENSES b/go/Godeps/LICENSES index 4080cf5fed6..36616a15ce1 100644 --- a/go/Godeps/LICENSES +++ b/go/Godeps/LICENSES @@ -16816,39 +16816,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. = LICENSE 3565fbf999a10a748647f3a2f7ff9f5dfcf1af7502a30f860ef0bf98 = ================================================================================ -================================================================================ -= gopkg.in/errgo.v2 licensed under: = - -Copyright © 2013, Roger Peppe -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of this project nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -= LICENSE fdb54eb3c3cf061a91aac42ab8e6578c3c69de803c2becb0d86810a5 = -================================================================================ - ================================================================================ = gopkg.in/go-jose/go-jose.v2 licensed under: = diff --git a/go/cmd/dolt/commands/archive.go b/go/cmd/dolt/commands/archive.go index 2c839ca25c7..e4a525fc4cf 100644 --- a/go/cmd/dolt/commands/archive.go +++ b/go/cmd/dolt/commands/archive.go @@ -310,8 +310,14 @@ func relateCommitToParentChunks(ctx context.Context, commit hash.Hash, groupings from, to, err := delta.GetRowData(ctx) - f := durable.ProllyMapFromIndex(from) - t := durable.ProllyMapFromIndex(to) + f, err := durable.ProllyMapFromIndex(from) + if err != nil { + return err + } + t, err := durable.ProllyMapFromIndex(to) + if err != nil { + return err + } if f.Node().Level() != t.Node().Level() { continue diff --git a/go/cmd/dolt/commands/engine/sqlengine.go b/go/cmd/dolt/commands/engine/sqlengine.go index 31c2ec43049..ae65d9293f6 100644 --- a/go/cmd/dolt/commands/engine/sqlengine.go +++ b/go/cmd/dolt/commands/engine/sqlengine.go @@ -16,7 +16,6 @@ package engine import ( "context" - "fmt" "os" "strconv" "strings" @@ -45,7 +44,6 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/mysql_file_handler" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer" "github.com/dolthub/dolt/go/libraries/utils/config" @@ -85,6 +83,7 @@ type SqlEngineConfig struct { AutoGCController *dsqle.AutoGCController BinlogReplicaController binlogreplication.BinlogReplicaController EventSchedulerStatus eventscheduler.SchedulerStatus + StatsController sql.StatsProvider } // NewSqlEngine returns a SqlEngine @@ -201,9 +200,6 @@ func NewSqlEngine( "authentication_dolt_jwt": NewAuthenticateDoltJWTPlugin(config.JwksConfig), }) - statsPro := statspro.NewProvider(pro, statsnoms.NewNomsStatsFactory(mrEnv.RemoteDialProvider())) - engine.Analyzer.Catalog.StatsProvider = statsPro - if config.AutoGCController != nil { err = config.AutoGCController.RunBackgroundThread(bThreads, sqlEngine.NewDefaultContext) if err != nil { @@ -216,8 +212,15 @@ func NewSqlEngine( dprocedures.UseSessionAwareSafepointController = true } + _, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsEnabled) + if enabled.(int8) == 1 { + config.StatsController = statspro.NewStatsController(logrus.StandardLogger(), mrEnv.GetEnv(mrEnv.GetFirstDatabase())) + } else { + config.StatsController = statspro.StatsNoop{} + } + engine.Analyzer.ExecBuilder = rowexec.NewOverrideBuilder(kvexec.Builder{}) - sessFactory := doltSessionFactory(pro, statsPro, mrEnv.Config(), bcController, gcSafepointController, config.Autocommit) + sessFactory := doltSessionFactory(pro, config.StatsController, mrEnv.Config(), bcController, gcSafepointController, config.Autocommit) sqlEngine.provider = pro sqlEngine.contextFactory = sqlContextFactory sqlEngine.dsessFactory = sessFactory @@ -236,8 +239,28 @@ func NewSqlEngine( // configuring stats depends on sessionBuilder // sessionBuilder needs ref to statsProv - if err = statsPro.Configure(ctx, sqlEngine.NewDefaultContext, bThreads, dbs); err != nil { - fmt.Fprintln(cli.CliErr, err) + if sc, ok := config.StatsController.(*statspro.StatsController); ok { + _, memOnly, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly) + sc.SetMemOnly(memOnly.(int8) == 1) + + pro.InitDatabaseHooks = append(pro.InitDatabaseHooks, statspro.NewInitDatabaseHook(sc)) + pro.DropDatabaseHooks = append(pro.DropDatabaseHooks, statspro.NewDropDatabaseHook(sc)) + + var sqlDbs []sql.Database + for _, db := range dbs { + sqlDbs = append(sqlDbs, db) + } + + err = sc.Init(ctx, pro, sqlEngine.NewDefaultContext, bThreads, sqlDbs) + if err != nil { + return nil, err + } + + if _, paused, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsPaused); paused.(int8) == 0 { + if err = sc.Restart(); err != nil { + return nil, err + } + } } // Load MySQL Db information diff --git a/go/cmd/dolt/commands/sqlserver/server.go b/go/cmd/dolt/commands/sqlserver/server.go index 435915690af..2059db5ed6f 100644 --- a/go/cmd/dolt/commands/sqlserver/server.go +++ b/go/cmd/dolt/commands/sqlserver/server.go @@ -303,17 +303,9 @@ func ConfigureServices( var sqlEngine *engine.SqlEngine InitSqlEngine := &svcs.AnonService{ InitF: func(ctx context.Context) (err error) { - if statsOn, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsAutoRefreshEnabled); err != nil { - // Auto-stats is off by default for every command except - // sql-server. Unless the config specifies a specific - // behavior, enable server stats collection. - sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, 1) - } else if statsOn != "0" { - // do not bootstrap if auto-stats enabled - } else if _, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsBootstrapEnabled); err != nil { - // If we've disabled stats collection and config does not - // specify bootstrap behavior, enable bootstrapping. - sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 1) + if _, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsPaused); err != nil { + // unless otherwise specified, run stats writer alongside server + sql.SystemVariables.SetGlobal(dsess.DoltStatsPaused, 0) } sqlEngine, err = engine.NewSqlEngine( ctx, diff --git a/go/cmd/dolt/commands/sqlserver/server_test.go b/go/cmd/dolt/commands/sqlserver/server_test.go index c97411a907d..9a1d7ce9216 100644 --- a/go/cmd/dolt/commands/sqlserver/server_test.go +++ b/go/cmd/dolt/commands/sqlserver/server_test.go @@ -15,6 +15,7 @@ package sqlserver import ( + "fmt" "net/http" "os" "path/filepath" @@ -184,11 +185,6 @@ func TestServerBadArgs(t *testing.T) { func TestServerGoodParams(t *testing.T) { ctx := context.Background() - env, err := sqle.CreateEnvWithSeedData() - require.NoError(t, err) - defer func() { - assert.NoError(t, env.DoltDB(ctx).Close()) - }() tests := []servercfg.ServerConfig{ DefaultCommandLineServerConfig(), @@ -210,6 +206,11 @@ func TestServerGoodParams(t *testing.T) { for _, test := range tests { t.Run(servercfg.ConfigInfo(test), func(t *testing.T) { + env, err := sqle.CreateEnvWithSeedData() + require.NoError(t, err) + defer func() { + assert.NoError(t, env.DoltDB(ctx).Close()) + }() sc := svcs.NewController() go func(config servercfg.ServerConfig, sc *svcs.Controller) { _, _ = Serve(context.Background(), &Config{ @@ -219,7 +220,7 @@ func TestServerGoodParams(t *testing.T) { DoltEnv: env, }) }(test, sc) - err := sc.WaitForStart() + err = sc.WaitForStart() require.NoError(t, err) conn, err := dbr.Open("mysql", servercfg.ConnectionString(test, "dbname"), nil) require.NoError(t, err) @@ -228,6 +229,7 @@ func TestServerGoodParams(t *testing.T) { sc.Stop() err = sc.WaitForStop() assert.NoError(t, err) + fmt.Println("stop server") }) } } diff --git a/go/go.mod b/go/go.mod index 4eafd0bf51e..c17664311ef 100644 --- a/go/go.mod +++ b/go/go.mod @@ -61,7 +61,7 @@ require ( github.com/creasty/defaults v1.6.0 github.com/dolthub/aws-sdk-go-ini-parser v0.0.0-20250305001723-2821c37f6c12 github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2 - github.com/dolthub/go-mysql-server v0.19.1-0.20250320042421-9a6edfcfab0d + github.com/dolthub/go-mysql-server v0.19.1-0.20250320173422-cce3ea1590af github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 github.com/esote/minmaxheap v1.0.0 github.com/goccy/go-json v0.10.2 @@ -94,7 +94,6 @@ require ( golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 golang.org/x/text v0.22.0 gonum.org/v1/plot v0.11.0 - gopkg.in/errgo.v2 v2.1.0 gopkg.in/go-jose/go-jose.v2 v2.6.3 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go/go.sum b/go/go.sum index e8c57135381..53b7c897536 100644 --- a/go/go.sum +++ b/go/go.sum @@ -221,8 +221,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= github.com/dolthub/go-icu-regex v0.0.0-20250319212010-451ea8d003fa h1:NFbzJ4wjWRz32nz2EimbrHpRx1Xt6k+IaR8N+j4x62k= github.com/dolthub/go-icu-regex v0.0.0-20250319212010-451ea8d003fa/go.mod h1:ylU4XjUpsMcvl/BKeRRMXSH7e7WBrPXdSLvnRJYrxEA= -github.com/dolthub/go-mysql-server v0.19.1-0.20250320042421-9a6edfcfab0d h1:Ra9hv9fvJkSvjihPmtQB4EMGhq9qNp08gUI/mRmF9no= -github.com/dolthub/go-mysql-server v0.19.1-0.20250320042421-9a6edfcfab0d/go.mod h1:9itIc5jYYDRxmchFmegPaLaqdf4XWYX6nua5HhrajgA= +github.com/dolthub/go-mysql-server v0.19.1-0.20250320173422-cce3ea1590af h1:ozgYo2hKV6uQqLxZTS+QElHTaZ8mMiKOln25jZI1gVc= +github.com/dolthub/go-mysql-server v0.19.1-0.20250320173422-cce3ea1590af/go.mod h1:9itIc5jYYDRxmchFmegPaLaqdf4XWYX6nua5HhrajgA= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q= github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 h1:lT7hE5k+0nkBdj/1UOSFwjWpNxf+LCApbRHgnCA17XE= @@ -1186,7 +1186,6 @@ gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= -gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o= diff --git a/go/go.work.sum b/go/go.work.sum index 154da430063..7a6b5fc0f32 100644 --- a/go/go.work.sum +++ b/go/go.work.sum @@ -386,6 +386,12 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumC github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE= github.com/dolthub/go-mysql-server v0.19.1-0.20250228215144-f8da474ecd9f h1:lwQH9jVmSiPg1DFMYB9rWyyJTMPMoBpGrYRsOGOD/hA= github.com/dolthub/go-mysql-server v0.19.1-0.20250228215144-f8da474ecd9f/go.mod h1:JTlrabhq5TJqvlL+J3NKlm0EzTHQQugUAH6yAxWi4Ww= +github.com/dolthub/go-mysql-server v0.19.1-0.20250305230031-14a57e076a0a h1:lemFIUt0NCKIeX7vnU2yKF8UIgc0DT8zIoEUn7oy+60= +github.com/dolthub/go-mysql-server v0.19.1-0.20250305230031-14a57e076a0a/go.mod h1:yr+Vv47/YLOKMgiEY+QxHTlbIVpTuiVtkEZ5l+xruY4= +github.com/dolthub/maphash v0.0.0-20221220182448-74e1e1ea1577 h1:SegEguMxToBn045KRHLIUlF2/jR7Y2qD6fF+3tdOfvI= +github.com/dolthub/maphash v0.0.0-20221220182448-74e1e1ea1577/go.mod h1:gkg4Ch4CdCDu5h6PMriVLawB7koZ+5ijb9puGMV50a4= +github.com/dolthub/swiss v0.1.0 h1:EaGQct3AqeP/MjASHLiH6i4TAmgbG/c4rA6a1bzCOPc= +github.com/dolthub/swiss v0.1.0/go.mod h1:BeucyB08Vb1G9tumVN3Vp/pyY4AMUnr9p7Rz7wJ7kAQ= github.com/dolthub/vitess v0.0.0-20241104125316-860772ba6683 h1:2/RJeUfNAXS7mbBnEr9C36htiCJHk5XldDPzhxtEsME= github.com/dolthub/vitess v0.0.0-20241104125316-860772ba6683/go.mod h1:uBvlRluuL+SbEWTCZ68o0xvsdYZER3CEG/35INdzfJM= github.com/dolthub/vitess v0.0.0-20241231200706-18992bb25fdc/go.mod h1:1gQZs/byeHLMSul3Lvl3MzioMtOW1je79QYGyi2fd70= @@ -651,6 +657,8 @@ github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/sts v1.0.588/go.mod h1: github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/tag v1.0.233/go.mod h1:sX14+NSvMjOhNFaMtP2aDy6Bss8PyFXij21gpY6+DAs= github.com/tencentyun/cos-go-sdk-v5 v0.7.42/go.mod h1:LUFnaqRmGk6pEHOaRmdn2dCZR2j0cSsM5xowWFPTPao= github.com/thanhpk/randstr v1.0.4/go.mod h1:M/H2P1eNLZzlDwAzpkkkUvoyNNMbzRGhESZuEQk3r0U= +github.com/thepudds/swisstable v0.0.0-20221011152303-9c77dc657777 h1:5u+6YWU2faS+Sr/x8j9yalMpSDUkatNOZWXV3wMUCGQ= +github.com/thepudds/swisstable v0.0.0-20221011152303-9c77dc657777/go.mod h1:4af3KxEsswy6aTzsTcwa8QZUSh4V+80oHdp1QX9uJHA= github.com/thlib/go-timezone-local v0.0.0-20210907160436-ef149e42d28e/go.mod h1:/Tnicc6m/lsJE0irFMA0LfIwTBo4QP7A8IfyIv4zZKI= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8 h1:ndzgwNDnKIqyCvHTXaCqh9KlOWKvBry6nuXMJmonVsE= github.com/tombuildsstuff/giovanni v0.15.1/go.mod h1:0TZugJPEtqzPlMpuJHYfXY6Dq2uLPrXf98D2XQSxNbA= @@ -740,6 +748,7 @@ google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/cheggaaa/pb.v1 v1.0.25 h1:Ev7yu1/f6+d+b3pi5vPdRPc6nNtP1umSfcWiEfRqv6I= +gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/gcfg.v1 v1.2.3 h1:m8OOJ4ccYHnx2f4gQwpno8nAX5OGOh7RLaaz0pj3Ogs= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= diff --git a/go/libraries/doltcore/diff/diff_stat.go b/go/libraries/doltcore/diff/diff_stat.go index dc358ca6bb3..051396fc1cf 100644 --- a/go/libraries/doltcore/diff/diff_stat.go +++ b/go/libraries/doltcore/diff/diff_stat.go @@ -105,11 +105,16 @@ func diffProllyTrees(ctx context.Context, ch chan DiffStatProgress, keyless bool var f, t prolly.Map if from != nil { - f = durable.ProllyMapFromIndex(from) + f, err = durable.ProllyMapFromIndex(from) + if err != nil { + return err + } } if to != nil { - t = durable.ProllyMapFromIndex(to) - + t, err = durable.ProllyMapFromIndex(to) + if err != nil { + return err + } } _, fVD := f.Descriptors() diff --git a/go/libraries/doltcore/doltdb/doltdb.go b/go/libraries/doltcore/doltdb/doltdb.go index d9e8597b9b5..ff9bac1a815 100644 --- a/go/libraries/doltcore/doltdb/doltdb.go +++ b/go/libraries/doltcore/doltdb/doltdb.go @@ -2110,8 +2110,8 @@ func (ddb *DoltDB) AddStash(ctx context.Context, head *Commit, stash RootValue, return err } -func (ddb *DoltDB) SetStatisics(ctx context.Context, branch string, addr hash.Hash) error { - statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String()) +func (ddb *DoltDB) SetStatistics(ctx context.Context, branch string, addr hash.Hash) error { + statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef().String()) if err != nil { return err } @@ -2119,8 +2119,8 @@ func (ddb *DoltDB) SetStatisics(ctx context.Context, branch string, addr hash.Ha return err } -func (ddb *DoltDB) DropStatisics(ctx context.Context, branch string) error { - statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String()) +func (ddb *DoltDB) DropStatisics(ctx context.Context) error { + statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef().String()) _, err = ddb.db.Delete(ctx, statsDs, "") if err != nil { @@ -2132,8 +2132,8 @@ func (ddb *DoltDB) DropStatisics(ctx context.Context, branch string) error { var ErrNoStatistics = errors.New("no statistics found") // GetStatistics returns the value of the singleton ref.StatsRef for this database -func (ddb *DoltDB) GetStatistics(ctx context.Context, branch string) (prolly.Map, error) { - ds, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String()) +func (ddb *DoltDB) GetStatistics(ctx context.Context) (prolly.Map, error) { + ds, err := ddb.db.GetDataset(ctx, ref.NewStatsRef().String()) if err != nil { return prolly.Map{}, err } diff --git a/go/libraries/doltcore/doltdb/durable/index.go b/go/libraries/doltcore/doltdb/durable/index.go index db5385f1351..8a775f12b7a 100644 --- a/go/libraries/doltcore/doltdb/durable/index.go +++ b/go/libraries/doltcore/doltdb/durable/index.go @@ -273,8 +273,13 @@ type prollyIndex struct { } // ProllyMapFromIndex unwraps the Index and returns the underlying prolly.Map. -func ProllyMapFromIndex(i Index) prolly.Map { - return i.(prollyIndex).index +func ProllyMapFromIndex(i Index) (prolly.Map, error) { + switch i := i.(type) { + case prollyIndex: + return i.index, nil + default: + return prolly.Map{}, fmt.Errorf("expected prollyIndex, found: %T", i) + } } // xxx: don't use this, temporary fix waiting for bigger @@ -369,7 +374,10 @@ func (i prollyIndex) AddColumnToRows(ctx context.Context, newCol string, newSche } // If not, then we have to iterate over this table's rows and update all the offsets for the new column - rowMap := ProllyMapFromIndex(i) + rowMap, err := ProllyMapFromIndex(i) + if err != nil { + return nil, err + } mutator := rowMap.Mutate() iter, err := mutator.IterAll(ctx) diff --git a/go/libraries/doltcore/merge/fulltext_rebuild.go b/go/libraries/doltcore/merge/fulltext_rebuild.go index e1cf674a19a..93529a2fa0c 100644 --- a/go/libraries/doltcore/merge/fulltext_rebuild.go +++ b/go/libraries/doltcore/merge/fulltext_rebuild.go @@ -295,7 +295,10 @@ func createRowIterForTable(ctx *sql.Context, t *doltdb.Table, sch schema.Schema) if err != nil { return nil, err } - rows := durable.ProllyMapFromIndex(rowData) + rows, err := durable.ProllyMapFromIndex(rowData) + if err != nil { + return nil, err + } rowCount, err := rows.Count() if err != nil { return nil, err diff --git a/go/libraries/doltcore/merge/fulltext_table.go b/go/libraries/doltcore/merge/fulltext_table.go index 3e85e343a91..40897215f3f 100644 --- a/go/libraries/doltcore/merge/fulltext_table.go +++ b/go/libraries/doltcore/merge/fulltext_table.go @@ -145,7 +145,10 @@ func (table *fulltextTable) ApplyToTable(ctx *sql.Context) (*doltdb.Table, error if err != nil { return nil, err } - m := durable.ProllyMapFromIndex(idx) + m, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return nil, err + } keyDesc, valDesc := m.Descriptors() keyMap, valMap := ordinalMappingsFromSchema(table.SqlSch, table.Sch) mut := m.Mutate() diff --git a/go/libraries/doltcore/merge/keyless_integration_test.go b/go/libraries/doltcore/merge/keyless_integration_test.go index f4766b79f1d..75666a93f17 100644 --- a/go/libraries/doltcore/merge/keyless_integration_test.go +++ b/go/libraries/doltcore/merge/keyless_integration_test.go @@ -403,7 +403,7 @@ func assertNomsConflicts(t *testing.T, ctx context.Context, tbl *doltdb.Table, e func mustGetRowValueFromTable(t *testing.T, ctx context.Context, tbl *doltdb.Table, key val.Tuple) val.Tuple { idx, err := tbl.GetRowData(ctx) require.NoError(t, err) - m := durable.ProllyMapFromIndex(idx) + m, _ := durable.ProllyMapFromIndex(idx) var value val.Tuple err = m.Get(ctx, key, func(_, v val.Tuple) error { @@ -438,7 +438,7 @@ func assertKeylessRows(t *testing.T, ctx context.Context, tbl *doltdb.Table, exp func assertKeylessProllyRows(t *testing.T, ctx context.Context, tbl *doltdb.Table, expected []keylessEntry) { idx, err := tbl.GetRowData(ctx) require.NoError(t, err) - m := durable.ProllyMapFromIndex(idx) + m, _ := durable.ProllyMapFromIndex(idx) expectedSet := mustHash128Set(expected...) diff --git a/go/libraries/doltcore/merge/merge_prolly_indexes.go b/go/libraries/doltcore/merge/merge_prolly_indexes.go index 416e14d25db..974f930e169 100644 --- a/go/libraries/doltcore/merge/merge_prolly_indexes.go +++ b/go/libraries/doltcore/merge/merge_prolly_indexes.go @@ -50,7 +50,10 @@ func mergeProllySecondaryIndexes( return nil, err } - mergedM := durable.ProllyMapFromIndex(finalRows) + mergedM, err := durable.ProllyMapFromIndex(finalRows) + if err != nil { + return nil, err + } tryGetIdx := func(sch schema.Schema, iS durable.IndexSet, indexName string) (prolly.Map, bool, error) { ok := sch.Indexes().Contains(indexName) @@ -59,7 +62,10 @@ func mergeProllySecondaryIndexes( if err != nil { return prolly.Map{}, false, err } - m := durable.ProllyMapFromIndex(idx) + m, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return prolly.Map{}, false, err + } return m, true, nil } return prolly.Map{}, false, nil diff --git a/go/libraries/doltcore/merge/merge_prolly_rows.go b/go/libraries/doltcore/merge/merge_prolly_rows.go index 9a8371deffe..49c90a8389b 100644 --- a/go/libraries/doltcore/merge/merge_prolly_rows.go +++ b/go/libraries/doltcore/merge/merge_prolly_rows.go @@ -73,7 +73,10 @@ func mergeProllyTable( if err != nil { return nil, nil, err } - leftRows := durable.ProllyMapFromIndex(lr) + leftRows, err := durable.ProllyMapFromIndex(lr) + if err != nil { + return nil, nil, err + } valueMerger := newValueMerger(mergedSch, tm.leftSch, tm.rightSch, tm.ancSch, leftRows.Pool(), tm.ns) if !valueMerger.leftMapping.IsIdentityMapping() { @@ -130,7 +133,11 @@ func mergeProllyTableData(ctx *sql.Context, tm *TableMerger, finalSch schema.Sch if err != nil { return nil, nil, err } - leftEditor := durable.ProllyMapFromIndex(lr).Rewriter(finalSch.GetKeyDescriptor(ns), finalSch.GetValueDescriptor(ns)) + lIdx, err := durable.ProllyMapFromIndex(lr) + if err != nil { + return nil, nil, err + } + leftEditor := lIdx.Rewriter(finalSch.GetKeyDescriptor(ns), finalSch.GetValueDescriptor(ns)) ai, err := mergeTbl.GetArtifacts(ctx) if err != nil { @@ -331,19 +338,27 @@ func threeWayDiffer(ctx context.Context, tm *TableMerger, valueMerger *valueMerg if err != nil { return nil, err } - leftRows := durable.ProllyMapFromIndex(lr) + leftRows, err := durable.ProllyMapFromIndex(lr) + if err != nil { + return nil, err + } rr, err := tm.rightTbl.GetRowData(ctx) if err != nil { return nil, err } - rightRows := durable.ProllyMapFromIndex(rr) - + rightRows, err := durable.ProllyMapFromIndex(rr) + if err != nil { + return nil, err + } ar, err := tm.ancTbl.GetRowData(ctx) if err != nil { return nil, err } - ancRows := durable.ProllyMapFromIndex(ar) + ancRows, err := durable.ProllyMapFromIndex(ar) + if err != nil { + return nil, err + } return tree.NewThreeWayDiffer( ctx, @@ -534,7 +549,10 @@ func newUniqValidator(ctx *sql.Context, sch schema.Schema, tm *TableMerger, vm * if err != nil { return uniqValidator{}, err } - clustered := durable.ProllyMapFromIndex(rows) + clustered, err := durable.ProllyMapFromIndex(rows) + if err != nil { + return uniqValidator{}, err + } indexes, err := tm.leftTbl.GetIndexSet(ctx) if err != nil { @@ -552,7 +570,10 @@ func newUniqValidator(ctx *sql.Context, sch schema.Schema, tm *TableMerger, vm * if err != nil { return uniqValidator{}, err } - secondary := durable.ProllyMapFromIndex(idx) + secondary, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return uniqValidator{}, err + } u, err := newUniqIndex(ctx, sch, tm.name.Name, def, clustered, secondary) if err != nil { diff --git a/go/libraries/doltcore/merge/merge_test.go b/go/libraries/doltcore/merge/merge_test.go index 347bf27223a..c1b9d429fc2 100644 --- a/go/libraries/doltcore/merge/merge_test.go +++ b/go/libraries/doltcore/merge/merge_test.go @@ -332,14 +332,18 @@ func TestMergeCommits(t *testing.T) { artifacts := durable.ProllyMapFromArtifactIndex(artIdx) MustEqualArtifactMap(t, expectedArtifacts, artifacts) - MustEqualProlly(t, tableName, durable.ProllyMapFromIndex(expectedRows), durable.ProllyMapFromIndex(mergedRows)) + idx1, _ := durable.ProllyMapFromIndex(expectedRows) + idx2, _ := durable.ProllyMapFromIndex(mergedRows) + MustEqualProlly(t, tableName, idx1, idx2) for _, index := range sch.Indexes().AllIndexes() { mergedIndexRows, err := merged.table.GetIndexRowData(ctx, index.Name()) require.NoError(t, err) expectedIndexRows, err := expected.GetIndexRowData(ctx, index.Name()) require.NoError(t, err) - MustEqualProlly(t, index.Name(), durable.ProllyMapFromIndex(expectedIndexRows), durable.ProllyMapFromIndex(mergedIndexRows)) + idx1, _ := durable.ProllyMapFromIndex(expectedIndexRows) + idx2, _ := durable.ProllyMapFromIndex(mergedIndexRows) + MustEqualProlly(t, index.Name(), idx1, idx2) } h, err := merged.table.HashOf() @@ -635,7 +639,7 @@ func rebuildAllProllyIndexes(ctx *sql.Context, tbl *doltdb.Table) (*doltdb.Table if err != nil { return nil, err } - primary := durable.ProllyMapFromIndex(tableRowData) + primary, _ := durable.ProllyMapFromIndex(tableRowData) for _, index := range sch.Indexes().AllIndexes() { rebuiltIndexRowData, err := creation.BuildSecondaryProllyIndex(ctx, tbl.ValueReadWriter(), tbl.NodeStore(), sch, tableName, index, primary) diff --git a/go/libraries/doltcore/merge/mutable_secondary_index.go b/go/libraries/doltcore/merge/mutable_secondary_index.go index cf72af8ddeb..23b46a6cfe2 100644 --- a/go/libraries/doltcore/merge/mutable_secondary_index.go +++ b/go/libraries/doltcore/merge/mutable_secondary_index.go @@ -35,7 +35,10 @@ func GetMutableSecondaryIdxs(ctx *sql.Context, ourSch, sch schema.Schema, tableN if err != nil { return nil, err } - m := durable.ProllyMapFromIndex(idx) + m, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return nil, err + } mods[i], err = NewMutableSecondaryIdx(ctx, m, ourSch, sch, tableName, index) if err != nil { return nil, err @@ -68,7 +71,10 @@ func GetMutableSecondaryIdxsWithPending(ctx *sql.Context, ns tree.NodeStore, our if err != nil { return nil, err } - m := durable.ProllyMapFromIndex(idx) + m, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return nil, err + } // If the schema has changed, don't reuse the index. // TODO: This isn't technically required, but correctly handling updating secondary indexes when only some diff --git a/go/libraries/doltcore/merge/violations_fk.go b/go/libraries/doltcore/merge/violations_fk.go index d0ce2585067..1fd758941bf 100644 --- a/go/libraries/doltcore/merge/violations_fk.go +++ b/go/libraries/doltcore/merge/violations_fk.go @@ -361,7 +361,10 @@ func parentFkConstraintViolations( return nomsParentFkConstraintViolations(ctx, vr, foreignKey, postParent, postChild, preParent.Schema, m, receiver) } if preParent.IndexData == nil || postParent.Schema.GetPKCols().Size() == 0 || preParent.Schema.GetPKCols().Size() == 0 { - m := durable.ProllyMapFromIndex(preParentRowData) + m, err := durable.ProllyMapFromIndex(preParentRowData) + if err != nil { + return err + } return prollyParentPriDiffFkConstraintViolations(ctx, foreignKey, postParent, postChild, m, receiver) } empty, err := preParentRowData.Empty() @@ -377,7 +380,10 @@ func parentFkConstraintViolations( } else { idx = preParent.IndexData } - m := durable.ProllyMapFromIndex(idx) + m, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return err + } return prollyParentSecDiffFkConstraintViolations(ctx, foreignKey, postParent, postChild, m, receiver) } @@ -396,7 +402,10 @@ func childFkConstraintViolations( return nomsChildFkConstraintViolations(ctx, vr, foreignKey, postParent, postChild, preChild.Schema, m, receiver) } if preChild.IndexData == nil || postChild.Schema.GetPKCols().Size() == 0 || preChild.Schema.GetPKCols().Size() == 0 { - m := durable.ProllyMapFromIndex(preChildRowData) + m, err := durable.ProllyMapFromIndex(preChildRowData) + if err != nil { + return err + } return prollyChildPriDiffFkConstraintViolations(ctx, foreignKey, postParent, postChild, m, receiver) } empty, err := preChildRowData.Empty() @@ -412,7 +421,10 @@ func childFkConstraintViolations( } else { idx = preChild.IndexData } - m := durable.ProllyMapFromIndex(idx) + m, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return err + } return prollyChildSecDiffFkConstraintViolations(ctx, foreignKey, postParent, postChild, m, receiver) } diff --git a/go/libraries/doltcore/merge/violations_fk_prolly.go b/go/libraries/doltcore/merge/violations_fk_prolly.go index 60d99e83f6b..769e20fc20e 100644 --- a/go/libraries/doltcore/merge/violations_fk_prolly.go +++ b/go/libraries/doltcore/merge/violations_fk_prolly.go @@ -38,19 +38,29 @@ func prollyParentSecDiffFkConstraintViolations( postParent, postChild *constraintViolationsLoadedTable, preParentSecIdx prolly.Map, receiver FKViolationReceiver) error { - - postParentRowData := durable.ProllyMapFromIndex(postParent.RowData) - postParentSecIdx := durable.ProllyMapFromIndex(postParent.IndexData) - childSecIdx := durable.ProllyMapFromIndex(postChild.IndexData) + postParentRowData, err := durable.ProllyMapFromIndex(postParent.RowData) + if err != nil { + return err + } + postParentSecIdx, err := durable.ProllyMapFromIndex(postParent.IndexData) + if err != nil { + return err + } + childSecIdx, err := durable.ProllyMapFromIndex(postChild.IndexData) + if err != nil { + return err + } parentSecKD, _ := postParentSecIdx.Descriptors() parentPrefixKD := parentSecKD.PrefixDesc(len(foreignKey.TableColumns)) partialKB := val.NewTupleBuilder(parentPrefixKD) - childPriIdx := durable.ProllyMapFromIndex(postChild.RowData) + childPriIdx, err := durable.ProllyMapFromIndex(postChild.RowData) + if err != nil { + return err + } childPriKD, _ := childPriIdx.Descriptors() - var err error // TODO: Determine whether we should surface every row as a diff when the map's value descriptor has changed. considerAllRowsModified := false err = prolly.DiffMaps(ctx, preParentSecIdx, postParentSecIdx, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error { @@ -95,20 +105,32 @@ func prollyParentPriDiffFkConstraintViolations( postParent, postChild *constraintViolationsLoadedTable, preParentRowData prolly.Map, receiver FKViolationReceiver) error { - postParentRowData := durable.ProllyMapFromIndex(postParent.RowData) - postParentIndexData := durable.ProllyMapFromIndex(postParent.IndexData) + postParentRowData, err := durable.ProllyMapFromIndex(postParent.RowData) + if err != nil { + return err + } + postParentIndexData, err := durable.ProllyMapFromIndex(postParent.IndexData) + if err != nil { + return err + } idxDesc, _ := postParentIndexData.Descriptors() partialDesc := idxDesc.PrefixDesc(len(foreignKey.TableColumns)) partialKB := val.NewTupleBuilder(partialDesc) - childPriIdx := durable.ProllyMapFromIndex(postChild.RowData) - childScndryIdx := durable.ProllyMapFromIndex(postChild.IndexData) + childPriIdx, err := durable.ProllyMapFromIndex(postChild.RowData) + if err != nil { + return err + } + childScndryIdx, err := durable.ProllyMapFromIndex(postChild.IndexData) + if err != nil { + return err + } primaryKD, _ := childPriIdx.Descriptors() // TODO: Determine whether we should surface every row as a diff when the map's value descriptor has changed. considerAllRowsModified := false - err := prolly.DiffMaps(ctx, preParentRowData, postParentRowData, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error { + err = prolly.DiffMaps(ctx, preParentRowData, postParentRowData, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error { switch diff.Type { case tree.RemovedDiff, tree.ModifiedDiff: partialKey, hadNulls := makePartialKey(partialKB, foreignKey.ReferencedTableColumns, postParent.Index, postParent.Schema, val.Tuple(diff.Key), val.Tuple(diff.From), preParentRowData.Pool()) @@ -159,8 +181,14 @@ func prollyChildPriDiffFkConstraintViolations( postParent, postChild *constraintViolationsLoadedTable, preChildRowData prolly.Map, receiver FKViolationReceiver) error { - postChildRowData := durable.ProllyMapFromIndex(postChild.RowData) - parentScndryIdx := durable.ProllyMapFromIndex(postParent.IndexData) + postChildRowData, err := durable.ProllyMapFromIndex(postChild.RowData) + if err != nil { + return err + } + parentScndryIdx, err := durable.ProllyMapFromIndex(postParent.IndexData) + if err != nil { + return err + } idxDesc, _ := parentScndryIdx.Descriptors() partialDesc := idxDesc.PrefixDesc(len(foreignKey.TableColumns)) @@ -168,7 +196,7 @@ func prollyChildPriDiffFkConstraintViolations( // TODO: Determine whether we should surface every row as a diff when the map's value descriptor has changed. considerAllRowsModified := false - err := prolly.DiffMaps(ctx, preChildRowData, postChildRowData, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error { + err = prolly.DiffMaps(ctx, preChildRowData, postChildRowData, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error { switch diff.Type { case tree.AddedDiff, tree.ModifiedDiff: k, v := val.Tuple(diff.Key), val.Tuple(diff.To) @@ -207,9 +235,18 @@ func prollyChildSecDiffFkConstraintViolations( postParent, postChild *constraintViolationsLoadedTable, preChildSecIdx prolly.Map, receiver FKViolationReceiver) error { - postChildRowData := durable.ProllyMapFromIndex(postChild.RowData) - postChildSecIdx := durable.ProllyMapFromIndex(postChild.IndexData) - parentSecIdx := durable.ProllyMapFromIndex(postParent.IndexData) + postChildRowData, err := durable.ProllyMapFromIndex(postChild.RowData) + if err != nil { + return err + } + postChildSecIdx, err := durable.ProllyMapFromIndex(postChild.IndexData) + if err != nil { + return err + } + parentSecIdx, err := durable.ProllyMapFromIndex(postParent.IndexData) + if err != nil { + return err + } parentSecIdxDesc, _ := parentSecIdx.Descriptors() prefixDesc := parentSecIdxDesc.PrefixDesc(len(foreignKey.TableColumns)) @@ -218,7 +255,7 @@ func prollyChildSecDiffFkConstraintViolations( // TODO: Determine whether we should surface every row as a diff when the map's value descriptor has changed. considerAllRowsModified := false - err := prolly.DiffMaps(ctx, preChildSecIdx, postChildSecIdx, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error { + err = prolly.DiffMaps(ctx, preChildSecIdx, postChildSecIdx, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error { switch diff.Type { case tree.AddedDiff, tree.ModifiedDiff: k := val.Tuple(diff.Key) diff --git a/go/libraries/doltcore/migrate/transform.go b/go/libraries/doltcore/migrate/transform.go index b7ad1b358a6..d9737717cde 100644 --- a/go/libraries/doltcore/migrate/transform.go +++ b/go/libraries/doltcore/migrate/transform.go @@ -405,7 +405,10 @@ func migrateTable(ctx context.Context, newSch schema.Schema, oldParentTbl, oldTb if err != nil { return nil, err } - newParentRows := durable.ProllyMapFromIndex(idx) + newParentRows, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return nil, err + } oldParentSet, err := oldParentTbl.GetIndexSet(ctx) if err != nil { @@ -582,7 +585,10 @@ func migrateIndexSet( if err != nil { return nil, err } - newParent := durable.ProllyMapFromIndex(idx) + newParent, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return nil, err + } newIdx, err := migrateIndex(ctx, def.Schema(), oldParent, old, newParent, ns) if err != nil { diff --git a/go/libraries/doltcore/ref/ref.go b/go/libraries/doltcore/ref/ref.go index be856926f55..502ef416ff5 100644 --- a/go/libraries/doltcore/ref/ref.go +++ b/go/libraries/doltcore/ref/ref.go @@ -205,7 +205,7 @@ func Parse(str string) (DoltRef, error) { } if prefix := PrefixForType(StatsRefType); strings.HasPrefix(str, prefix) { - return NewStatsRef(str[len(prefix):]), nil + return NewStatsRef(), nil } if prefix := PrefixForType(TupleRefType); strings.HasPrefix(str, prefix) { diff --git a/go/libraries/doltcore/ref/stats_ref.go b/go/libraries/doltcore/ref/stats_ref.go index 7f957ae05bb..18cfe95814f 100644 --- a/go/libraries/doltcore/ref/stats_ref.go +++ b/go/libraries/doltcore/ref/stats_ref.go @@ -20,9 +20,11 @@ type StatsRef struct { var _ DoltRef = StatsRef{} +const statsBranch = "main" + // NewStatsRef creates a reference to a statistic dataset head. -func NewStatsRef(branch string) StatsRef { - return StatsRef{branch} +func NewStatsRef() StatsRef { + return StatsRef{statsBranch} } // GetType will return StatsRefType diff --git a/go/libraries/doltcore/remotestorage/internal/reliable/chan.go b/go/libraries/doltcore/remotestorage/internal/reliable/chan.go index 8beeb5ea61a..c975e7e52f9 100644 --- a/go/libraries/doltcore/remotestorage/internal/reliable/chan.go +++ b/go/libraries/doltcore/remotestorage/internal/reliable/chan.go @@ -15,7 +15,7 @@ package reliable import ( - "github.com/dolthub/dolt/go/libraries/doltcore/remotestorage/internal/circular" + "github.com/dolthub/dolt/go/libraries/utils/circular" ) // A reliable.Chan is a type of channel transformer which can be used to build diff --git a/go/libraries/doltcore/schema/statistic.go b/go/libraries/doltcore/schema/statistic.go index 1879951e10b..88215a7443a 100644 --- a/go/libraries/doltcore/schema/statistic.go +++ b/go/libraries/doltcore/schema/statistic.go @@ -24,12 +24,12 @@ import ( const StatsVersion int64 = 1 const ( - StatsQualifierColName = "qualifier" StatsDbColName = "database_name" StatsTableColName = "table_name" StatsIndexColName = "index_name" - StatsPositionColName = "position" + StatsBranchName = "branch" StatsCommitHashColName = "commit_hash" + StatsPrefixLenName = "prefix_len" StatsRowCountColName = "row_count" StatsDistinctCountColName = "distinct_count" StatsNullCountColName = "null_count" @@ -42,7 +42,7 @@ const ( StatsMcv2ColName = "mcv2" StatsMcv3ColName = "mcv3" StatsMcv4ColName = "mcv4" - StatsMcvCountsColName = "mcvCounts" + StatsMcvCountsColName = "mcv_counts" StatsVersionColName = "version" ) @@ -52,6 +52,7 @@ const ( StatsIndexTag StatsPositionTag StatsVersionTag + StatsPrefixLenTag StatsCommitHashTag StatsRowCountTag StatsDistinctCountTag @@ -71,9 +72,9 @@ const ( func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema { return sql.PrimaryKeySchema{ Schema: sql.Schema{ - &sql.Column{Name: StatsDbColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName}, - &sql.Column{Name: StatsTableColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName}, - &sql.Column{Name: StatsIndexColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName}, + &sql.Column{Name: StatsDbColName, Type: types.Text, DatabaseSource: dbName}, + &sql.Column{Name: StatsTableColName, Type: types.Text, DatabaseSource: dbName}, + &sql.Column{Name: StatsIndexColName, Type: types.Text, DatabaseSource: dbName}, &sql.Column{Name: StatsRowCountColName, Type: types.Int64, DatabaseSource: dbName}, &sql.Column{Name: StatsDistinctCountColName, Type: types.Int64, DatabaseSource: dbName}, &sql.Column{Name: StatsNullCountColName, Type: types.Int64, DatabaseSource: dbName}, @@ -88,7 +89,6 @@ func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema { &sql.Column{Name: StatsMcv4ColName, Type: types.Text, DatabaseSource: dbName}, &sql.Column{Name: StatsMcvCountsColName, Type: types.Text, DatabaseSource: dbName}, }, - PkOrdinals: []int{0, 1}, } } @@ -96,20 +96,14 @@ var StatsTableDoltSchema = StatsTableDoltSchemaGen() func StatsTableDoltSchemaGen() Schema { colColl := NewColCollection( - NewColumn(StatsDbColName, StatsDbTag, stypes.StringKind, true, NotNullConstraint{}), - NewColumn(StatsTableColName, StatsTableTag, stypes.StringKind, true, NotNullConstraint{}), - NewColumn(StatsIndexColName, StatsIndexTag, stypes.StringKind, true, NotNullConstraint{}), - NewColumn(StatsPositionColName, StatsPositionTag, stypes.IntKind, true, NotNullConstraint{}), + NewColumn(StatsPrefixLenName, StatsPrefixLenTag, stypes.IntKind, true, NotNullConstraint{}), + NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, true, NotNullConstraint{}), NewColumn(StatsVersionColName, StatsVersionTag, stypes.IntKind, false, NotNullConstraint{}), - NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, false, NotNullConstraint{}), NewColumn(StatsRowCountColName, StatsRowCountTag, stypes.IntKind, false, NotNullConstraint{}), NewColumn(StatsDistinctCountColName, StatsDistinctCountTag, stypes.IntKind, false, NotNullConstraint{}), NewColumn(StatsNullCountColName, StatsNullCountTag, stypes.IntKind, false, NotNullConstraint{}), - NewColumn(StatsColumnsColName, StatsColumnsTag, stypes.StringKind, false, NotNullConstraint{}), - NewColumn(StatsTypesColName, StatsTypesTag, stypes.StringKind, false, NotNullConstraint{}), NewColumn(StatsUpperBoundColName, StatsUpperBoundTag, stypes.StringKind, false, NotNullConstraint{}), NewColumn(StatsUpperBoundCntColName, StatsUpperBoundCntTag, stypes.IntKind, false, NotNullConstraint{}), - NewColumn(StatsCreatedAtColName, StatsCreatedAtTag, stypes.TimestampKind, false, NotNullConstraint{}), NewColumn(StatsMcv1ColName, StatsMcv1Tag, stypes.StringKind, false), NewColumn(StatsMcv2ColName, StatsMcv2Tag, stypes.StringKind, false), NewColumn(StatsMcv3ColName, StatsMcv3Tag, stypes.StringKind, false), diff --git a/go/libraries/doltcore/sqle/binlogreplication/binlog_producer.go b/go/libraries/doltcore/sqle/binlogreplication/binlog_producer.go index a17a9fedbbc..c39eea497fb 100644 --- a/go/libraries/doltcore/sqle/binlogreplication/binlog_producer.go +++ b/go/libraries/doltcore/sqle/binlogreplication/binlog_producer.go @@ -377,10 +377,16 @@ func (b *binlogProducer) createRowEvents(ctx *sql.Context, tableDeltas []diff.Ta var fromMap, toMap prolly.Map if fromRowData != nil { - fromMap = durable.ProllyMapFromIndex(fromRowData) + fromMap, err = durable.ProllyMapFromIndex(fromRowData) + if err != nil { + return nil, err + } } if toRowData != nil { - toMap = durable.ProllyMapFromIndex(toRowData) + toMap, err = durable.ProllyMapFromIndex(toRowData) + if err != nil { + return nil, err + } } sch, err := tableDelta.ToTable.GetSchema(ctx) diff --git a/go/libraries/doltcore/sqle/clusterdb/database.go b/go/libraries/doltcore/sqle/clusterdb/database.go index dd741a9a205..4577d2f3c4d 100644 --- a/go/libraries/doltcore/sqle/clusterdb/database.go +++ b/go/libraries/doltcore/sqle/clusterdb/database.go @@ -162,6 +162,10 @@ func (db database) RequestedName() string { return db.Name() } +func (db database) AliasedName() string { + return db.Name() +} + type noopRepoStateWriter struct{} var _ env.RepoStateWriter = noopRepoStateWriter{} diff --git a/go/libraries/doltcore/sqle/database.go b/go/libraries/doltcore/sqle/database.go index ddbf1382304..eb48597c59f 100644 --- a/go/libraries/doltcore/sqle/database.go +++ b/go/libraries/doltcore/sqle/database.go @@ -694,6 +694,9 @@ func (db Database) getTableInsensitive(ctx *sql.Context, head *doltdb.Commit, ds if err != nil { return nil, false, err } + if branch == "" { + branch = db.Revision() + } dt, found = dtables.NewStatisticsTable(ctx, db.Name(), db.schemaName, branch, tables), true case doltdb.ProceduresTableName: found = true diff --git a/go/libraries/doltcore/sqle/database_provider.go b/go/libraries/doltcore/sqle/database_provider.go index 0aa88b1d95e..1fa0163302c 100644 --- a/go/libraries/doltcore/sqle/database_provider.go +++ b/go/libraries/doltcore/sqle/database_provider.go @@ -985,7 +985,7 @@ func (p *DoltDatabaseProvider) databaseForRevision(ctx *sql.Context, revisionQua } } - db, err := revisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName) + db, err := RevisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName) // preserve original user case in the case of not found if sql.ErrDatabaseNotFound.Is(err) { return nil, false, sql.ErrDatabaseNotFound.New(revisionQualifiedName) @@ -1526,8 +1526,8 @@ func isTag(ctx context.Context, db dsess.SqlDatabase, tagName string) (string, b return "", false, nil } -// revisionDbForBranch returns a new database that is tied to the branch named by revSpec -func revisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) { +// RevisionDbForBranch returns a new database that is tied to the branch named by revSpec +func RevisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) { static := staticRepoState{ branch: ref.NewBranchRef(revSpec), RepoStateWriter: srcDb.DbData().Rsw, diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_conflicts_resolve.go b/go/libraries/doltcore/sqle/dprocedures/dolt_conflicts_resolve.go index ddcdcc1ea00..7773bbbada1 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_conflicts_resolve.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_conflicts_resolve.go @@ -73,7 +73,11 @@ func getProllyRowMaps(ctx *sql.Context, vrw types.ValueReadWriter, ns tree.NodeS return prolly.Map{}, err } - return durable.ProllyMapFromIndex(idx), nil + pm, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return prolly.Map{}, err + } + return pm, nil } func resolveProllyConflicts(ctx *sql.Context, tbl *doltdb.Table, tblName string, ourSch, sch schema.Schema) (*doltdb.Table, error) { @@ -94,7 +98,10 @@ func resolveProllyConflicts(ctx *sql.Context, tbl *doltdb.Table, tblName string, if err != nil { return nil, err } - ourMap := durable.ProllyMapFromIndex(ourIdx) + ourMap, err := durable.ProllyMapFromIndex(ourIdx) + if err != nil { + return nil, err + } mutMap := ourMap.Mutate() // get mutable secondary indexes diff --git a/go/libraries/doltcore/sqle/dprocedures/init.go b/go/libraries/doltcore/sqle/dprocedures/init.go index 499d4209886..f36f10b3cd3 100644 --- a/go/libraries/doltcore/sqle/dprocedures/init.go +++ b/go/libraries/doltcore/sqle/dprocedures/init.go @@ -47,12 +47,15 @@ var DoltProcedures = []sql.ExternalStoredProcedureDetails{ {Name: "dolt_tag", Schema: int64Schema("status"), Function: doltTag}, {Name: "dolt_verify_constraints", Schema: int64Schema("violations"), Function: doltVerifyConstraints}, - {Name: "dolt_stats_drop", Schema: statsFuncSchema, Function: statsFunc(statsDrop)}, {Name: "dolt_stats_restart", Schema: statsFuncSchema, Function: statsFunc(statsRestart)}, {Name: "dolt_stats_stop", Schema: statsFuncSchema, Function: statsFunc(statsStop)}, - {Name: "dolt_stats_status", Schema: statsFuncSchema, Function: statsFunc(statsStatus)}, - {Name: "dolt_stats_prune", Schema: statsFuncSchema, Function: statsFunc(statsPrune)}, + {Name: "dolt_stats_info", Schema: statsFuncSchema, Function: statsFunc(statsInfo)}, {Name: "dolt_stats_purge", Schema: statsFuncSchema, Function: statsFunc(statsPurge)}, + {Name: "dolt_stats_wait", Schema: statsFuncSchema, Function: statsFunc(statsWait)}, + {Name: "dolt_stats_flush", Schema: statsFuncSchema, Function: statsFunc(statsFlush)}, + {Name: "dolt_stats_once", Schema: statsFuncSchema, Function: statsFunc(statsOnce)}, + {Name: "dolt_stats_gc", Schema: statsFuncSchema, Function: statsFunc(statsGc)}, + {Name: "dolt_stats_timers", Schema: statsFuncSchema, Function: statsFunc(statsTimers)}, } // stringSchema returns a non-nullable schema with all columns as LONGTEXT. diff --git a/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go b/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go index 139bec5e5d2..96f46508c91 100644 --- a/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go +++ b/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go @@ -15,14 +15,14 @@ package dprocedures import ( + "context" + "encoding/json" "fmt" - "strings" + "strconv" "github.com/dolthub/go-mysql-server/sql" gmstypes "github.com/dolthub/go-mysql-server/sql/types" - "github.com/dolthub/dolt/go/libraries/doltcore/env" - "github.com/dolthub/dolt/go/libraries/doltcore/ref" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" ) @@ -34,9 +34,16 @@ var statsFuncSchema = []*sql.Column{ }, } -func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Context, args ...string) (sql.RowIter, error) { - return func(ctx *sql.Context, args ...string) (sql.RowIter, error) { - res, err := fn(ctx) +const OkResult = "Ok" + +func statsFunc(fn func(ctx *sql.Context, args ...string) (interface{}, error)) func(ctx *sql.Context, args ...string) (sql.RowIter, error) { + return func(ctx *sql.Context, args ...string) (iter sql.RowIter, err error) { + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("stats function unexpectedly panicked: %s", r) + } + }() + res, err := fn(ctx, args...) if err != nil { return nil, err } @@ -44,124 +51,211 @@ func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Con } } -// AutoRefreshStatsProvider is a sql.StatsProvider that exposes hooks for +// StatsInfo gives a summary of the current coordinator stats. +type StatsInfo struct { + DbCnt int `json:"dbCnt"` + Active bool `json:"active"` + StorageBucketCnt int `json:"storageBucketCnt"` + CachedBucketCnt int `json:"cachedBucketCnt"` + CachedBoundCnt int `json:"cachedBoundCnt"` + CachedTemplateCnt int `json:"cachedTemplateCnt"` + StatCnt int `json:"statCnt"` + GcCnt int `json:"gcCnt,omitempty"` + GenCnt int `json:"genCnt,omitempty"` + Backing string `json:"backing"` +} + +// ToJson returns stats info as a json string. Use the |short| +// flag to exclude cycle counters. +func (si StatsInfo) ToJson(short bool) string { + if short { + si.GcCnt = 0 + si.GenCnt = 0 + } + jsonData, err := json.Marshal(si) + if err != nil { + return "" + } + return string(jsonData) +} + +// ExtendedStatsProvider is a sql.StatsProvider that exposes hooks for // observing and manipulating background database auto refresh threads. -type AutoRefreshStatsProvider interface { +type ExtendedStatsProvider interface { sql.StatsProvider - CancelRefreshThread(string) - StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error - ThreadStatus(string) string - Prune(ctx *sql.Context) error + // Restart starts a new stats thread, finalizes any active thread + Restart() error + // Stop finalizes stats thread if active + Stop() + // Info returns summary statistics about the current coordinator state + Info(ctx context.Context) (StatsInfo, error) + // Purge wipes the memory and storage state, and pauses stats collection Purge(ctx *sql.Context) error + // WaitForSync blocks until the stats state includes changes + // from the current session + WaitForSync(ctx context.Context) error + // Gc forces the next stats cycle to perform a GC. Block until + // the GC lands. + Gc(ctx *sql.Context) error + // WaitForFlush blocks until the next cycle finishes and flushes + // buckets to disk. + WaitForFlush(ctx *sql.Context) error + // CollectOnce performs a stats update in-thread. This will contend + // with background collection and most useful in a non-server context. + CollectOnce(ctx context.Context) (string, error) + // SetTimers is an access point for editing the statistics + // delay timer. This will block if the scheduler is not running. + SetTimers(int64, int64) } type BranchStatsProvider interface { DropBranchDbStats(ctx *sql.Context, branch, db string, flush bool) error } -// statsRestart tries to stop and then start a refresh thread -func statsRestart(ctx *sql.Context) (interface{}, error) { +// statsRestart cancels any ongoing update thread and starts a new worker +func statsRestart(ctx *sql.Context, _ ...string) (interface{}, error) { dSess := dsess.DSessFromSess(ctx.Session) statsPro := dSess.StatsProvider() - dbName := strings.ToLower(ctx.GetCurrentDatabase()) - if afp, ok := statsPro.(AutoRefreshStatsProvider); ok { - pro := dSess.Provider() - newFs, err := pro.FileSystemForDatabase(dbName) - if err != nil { - return nil, fmt.Errorf("failed to restart stats collection: %w", err) + if afp, ok := statsPro.(ExtendedStatsProvider); ok { + if err := afp.Restart(); err != nil { + return nil, err } - dEnv := env.Load(ctx, env.GetCurrentUserHomeDir, newFs, pro.DbFactoryUrl(), "TODO") + return OkResult, nil + } + return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider") +} - sqlDb, ok := pro.BaseDatabase(ctx, dbName) - if !ok { - return nil, fmt.Errorf("failed to restart stats collection: database not found: %s", dbName) +// statsInfo returns a coordinator state summary +func statsInfo(ctx *sql.Context, args ...string) (interface{}, error) { + dSess := dsess.DSessFromSess(ctx.Session) + pro := dSess.StatsProvider() + if afp, ok := pro.(ExtendedStatsProvider); ok { + var short bool + if len(args) > 0 && (args[0] == "-s" || args[0] == "--short") { + short = true } - - afp.CancelRefreshThread(dbName) - - err = afp.StartRefreshThread(ctx, pro, dbName, dEnv, sqlDb) + info, err := afp.Info(ctx) if err != nil { - return nil, fmt.Errorf("failed to restart collection: %w", err) + return nil, err } - return fmt.Sprintf("restarted stats collection: %s", ref.StatsRef{}.String()), nil + return info.ToJson(short), nil } - return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider") + return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider") } -// statsStatus returns the last update for a stats thread -func statsStatus(ctx *sql.Context) (interface{}, error) { +// statsWait blocks until the stats worker executes two full loops +// of instructions. The second loop will include the most recent +// committed session as of this function's execution. +func statsWait(ctx *sql.Context, _ ...string) (interface{}, error) { dSess := dsess.DSessFromSess(ctx.Session) - dbName := strings.ToLower(ctx.GetCurrentDatabase()) pro := dSess.StatsProvider() - if afp, ok := pro.(AutoRefreshStatsProvider); ok { - return afp.ThreadStatus(dbName), nil + if afp, ok := pro.(ExtendedStatsProvider); ok { + if err := afp.WaitForSync(ctx); err != nil { + return nil, err + } + return OkResult, nil } - return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider") + return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider") } -// statsStop cancels a refresh thread -func statsStop(ctx *sql.Context) (interface{}, error) { +// statsOnce runs a one-off worker update. This is mostly used for +// testing and grabbing statistics while in the shell. Servers +// should use `dolt_stats_wait` to avoid contending with the +// background thread. +func statsOnce(ctx *sql.Context, _ ...string) (interface{}, error) { dSess := dsess.DSessFromSess(ctx.Session) - statsPro := dSess.StatsProvider() - dbName := strings.ToLower(ctx.GetCurrentDatabase()) - - if afp, ok := statsPro.(AutoRefreshStatsProvider); ok { - afp.CancelRefreshThread(dbName) - return fmt.Sprintf("stopped thread: %s", dbName), nil + pro := dSess.StatsProvider() + if afp, ok := pro.(ExtendedStatsProvider); ok { + str, err := afp.CollectOnce(ctx) + if err != nil { + return nil, err + } + return str, nil } - return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider") + return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider") } -// statsDrop deletes the stats ref -func statsDrop(ctx *sql.Context) (interface{}, error) { +// statsFlush waits for the next stats flush to storage. +func statsFlush(ctx *sql.Context, _ ...string) (interface{}, error) { dSess := dsess.DSessFromSess(ctx.Session) pro := dSess.StatsProvider() - dbName := strings.ToLower(ctx.GetCurrentDatabase()) - - branch, err := dSess.GetBranch() - if err != nil { - return nil, fmt.Errorf("failed to drop stats: %w", err) + if afp, ok := pro.(ExtendedStatsProvider); ok { + if err := afp.WaitForFlush(ctx); err != nil { + return nil, err + } + return OkResult, nil } + return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider") +} - if afp, ok := pro.(AutoRefreshStatsProvider); ok { - // currently unsafe to drop stats while running refresh - afp.CancelRefreshThread(dbName) - } - if bsp, ok := pro.(BranchStatsProvider); ok { - err := bsp.DropBranchDbStats(ctx, branch, dbName, true) - if err != nil { - return nil, fmt.Errorf("failed to drop stats: %w", err) +// statsGc sets the |doGc| flag and waits until a worker +// performs an update/GC. +func statsGc(ctx *sql.Context, _ ...string) (interface{}, error) { + dSess := dsess.DSessFromSess(ctx.Session) + pro := dSess.StatsProvider() + if afp, ok := pro.(ExtendedStatsProvider); ok { + if err := afp.Gc(ctx); err != nil { + return nil, err } + return OkResult, nil } + return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider") +} - return fmt.Sprintf("deleted stats ref for %s", dbName), nil +// statsStop flushes the job queue and leaves the stats provider +// in a paused state. +func statsStop(ctx *sql.Context, _ ...string) (interface{}, error) { + dSess := dsess.DSessFromSess(ctx.Session) + statsPro := dSess.StatsProvider() + + if afp, ok := statsPro.(ExtendedStatsProvider); ok { + afp.Stop() + return OkResult, nil + } + return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider") } -// statsPrune replaces the current disk contents with only the currently -// tracked in memory statistics. -func statsPrune(ctx *sql.Context) (interface{}, error) { +// statsPurge flushes the job queue, deletes the current caches +// and storage targets, re-initializes the tracked database +// states, and returns with stats collection paused. +func statsPurge(ctx *sql.Context, _ ...string) (interface{}, error) { dSess := dsess.DSessFromSess(ctx.Session) - pro, ok := dSess.StatsProvider().(AutoRefreshStatsProvider) + pro, ok := dSess.StatsProvider().(ExtendedStatsProvider) if !ok { return nil, fmt.Errorf("stats not persisted, cannot purge") } - if err := pro.Prune(ctx); err != nil { - return "failed to prune stats databases", err + + pro.Stop() + + if err := pro.Purge(ctx); err != nil { + return "failed to purge stats", err } - return "pruned all stats databases", nil + + return OkResult, nil } -// statsPurge removes the stats database from disk -func statsPurge(ctx *sql.Context) (interface{}, error) { +// statsTimers updates the stats timers, which go into effect immediately. +func statsTimers(ctx *sql.Context, args ...string) (interface{}, error) { dSess := dsess.DSessFromSess(ctx.Session) - pro, ok := dSess.StatsProvider().(AutoRefreshStatsProvider) - if !ok { - return nil, fmt.Errorf("stats not persisted, cannot purge") + statsPro := dSess.StatsProvider() + + if len(args) != 2 { + return nil, fmt.Errorf("expected timer arguments (ns): (job, gc)") } - if err := pro.Purge(ctx); err != nil { - return "failed to purged databases", err + job, err := strconv.ParseInt(args[0], 10, 64) + if err != nil { + return nil, fmt.Errorf("interval timer must be positive intergers") + } + gc, err := strconv.ParseInt(args[1], 10, 64) + if err != nil { + return nil, fmt.Errorf("interval timer must be positive intergers") + } + + if afp, ok := statsPro.(ExtendedStatsProvider); ok { + afp.SetTimers(job, gc) + return OkResult, nil } - return "purged all database stats", nil + return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider") } diff --git a/go/libraries/doltcore/sqle/dsess/autoincrement_tracker.go b/go/libraries/doltcore/sqle/dsess/autoincrement_tracker.go index 18aba69e957..0fc5afc1640 100644 --- a/go/libraries/doltcore/sqle/dsess/autoincrement_tracker.go +++ b/go/libraries/doltcore/sqle/dsess/autoincrement_tracker.go @@ -327,7 +327,10 @@ func (a *AutoIncrementTracker) deepSet(ctx *sql.Context, tableName string, table func getMaxIndexValue(ctx context.Context, indexData durable.Index) (uint64, error) { if types.IsFormat_DOLT(indexData.Format()) { - idx := durable.ProllyMapFromIndex(indexData) + idx, err := durable.ProllyMapFromIndex(indexData) + if err != nil { + return 0, err + } iter, err := idx.IterAllReverse(ctx) if err != nil { diff --git a/go/libraries/doltcore/sqle/dsess/session_db_provider.go b/go/libraries/doltcore/sqle/dsess/session_db_provider.go index 3d4969bb114..05e72971747 100644 --- a/go/libraries/doltcore/sqle/dsess/session_db_provider.go +++ b/go/libraries/doltcore/sqle/dsess/session_db_provider.go @@ -122,6 +122,7 @@ type SqlDatabase interface { sql.Database sql.SchemaDatabase sql.DatabaseSchema + sql.AliasedDatabase SessionDatabase RevisionDatabase diff --git a/go/libraries/doltcore/sqle/dsess/variables.go b/go/libraries/doltcore/sqle/dsess/variables.go index 848ed2218ec..eb604d19b87 100644 --- a/go/libraries/doltcore/sqle/dsess/variables.go +++ b/go/libraries/doltcore/sqle/dsess/variables.go @@ -59,12 +59,13 @@ const ( DoltClusterRoleEpochVariable = "dolt_cluster_role_epoch" DoltClusterAckWritesTimeoutSecs = "dolt_cluster_ack_writes_timeout_secs" - DoltStatsAutoRefreshEnabled = "dolt_stats_auto_refresh_enabled" - DoltStatsBootstrapEnabled = "dolt_stats_bootstrap_enabled" - DoltStatsAutoRefreshThreshold = "dolt_stats_auto_refresh_threshold" - DoltStatsAutoRefreshInterval = "dolt_stats_auto_refresh_interval" - DoltStatsMemoryOnly = "dolt_stats_memory_only" - DoltStatsBranches = "dolt_stats_branches" + DoltStatsEnabled = "dolt_stats_enabled" + DoltStatsPaused = "dolt_stats_paused" + DoltStatsMemoryOnly = "dolt_stats_memory_only" + DoltStatsBranches = "dolt_stats_branches" + DoltStatsJobInterval = "dolt_stats_job_interval" + DoltStatsGCInterval = "dolt_stats_gc_interval" + DoltStatsGCEnabled = "dolt_stats_gc_enabled" ) const URLTemplateDatabasePlaceholder = "{database}" diff --git a/go/libraries/doltcore/sqle/dtables/conflicts_tables_prolly.go b/go/libraries/doltcore/sqle/dtables/conflicts_tables_prolly.go index 9c1bb1bdcea..f535f466be5 100644 --- a/go/libraries/doltcore/sqle/dtables/conflicts_tables_prolly.go +++ b/go/libraries/doltcore/sqle/dtables/conflicts_tables_prolly.go @@ -154,7 +154,10 @@ func newProllyConflictRowIter(ctx *sql.Context, ct ProllyConflictsTable) (*proll if err != nil { return nil, err } - ourRows := durable.ProllyMapFromIndex(idx) + ourRows, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return nil, err + } itr, err := ct.artM.IterAllConflicts(ctx) if err != nil { @@ -424,7 +427,11 @@ func (itr *prollyConflictRowIter) loadTableMaps(ctx *sql.Context, baseHash, thei return err } - itr.baseRows = durable.ProllyMapFromIndex(idx) + itr.baseRows, err = durable.ProllyMapFromIndex(idx) + if err != nil { + return err + } + itr.baseHash = baseHash } @@ -446,7 +453,10 @@ func (itr *prollyConflictRowIter) loadTableMaps(ctx *sql.Context, baseHash, thei if err != nil { return err } - itr.theirRows = durable.ProllyMapFromIndex(idx) + itr.theirRows, err = durable.ProllyMapFromIndex(idx) + if err != nil { + return err + } itr.theirHash = theirHash } diff --git a/go/libraries/doltcore/sqle/dtables/diff_iter.go b/go/libraries/doltcore/sqle/dtables/diff_iter.go index 464c7813751..e3c1bdec1bc 100644 --- a/go/libraries/doltcore/sqle/dtables/diff_iter.go +++ b/go/libraries/doltcore/sqle/dtables/diff_iter.go @@ -251,7 +251,10 @@ func newProllyDiffIter(ctx *sql.Context, dp DiffPartition, targetFromSchema, tar if err != nil { return prollyDiffIter{}, err } - from = durable.ProllyMapFromIndex(idx) + from, err = durable.ProllyMapFromIndex(idx) + if err != nil { + return prollyDiffIter{}, err + } if fsch, err = dp.from.GetSchema(ctx); err != nil { return prollyDiffIter{}, err } @@ -263,7 +266,10 @@ func newProllyDiffIter(ctx *sql.Context, dp DiffPartition, targetFromSchema, tar if err != nil { return prollyDiffIter{}, err } - to = durable.ProllyMapFromIndex(idx) + to, err = durable.ProllyMapFromIndex(idx) + if err != nil { + return prollyDiffIter{}, err + } if tsch, err = dp.to.GetSchema(ctx); err != nil { return prollyDiffIter{}, err } diff --git a/go/libraries/doltcore/sqle/dtables/query_catalog_table.go b/go/libraries/doltcore/sqle/dtables/query_catalog_table.go index a3d3da20220..f019289970c 100644 --- a/go/libraries/doltcore/sqle/dtables/query_catalog_table.go +++ b/go/libraries/doltcore/sqle/dtables/query_catalog_table.go @@ -236,7 +236,10 @@ func newQueryCatalogEntryProlly(ctx context.Context, tbl *doltdb.Table, id, name if err != nil { return SavedQuery{}, nil, err } - m := durable.ProllyMapFromIndex(idx) + m, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return SavedQuery{}, nil, err + } existingSQ, err := retrieveFromQueryCatalogProlly(ctx, tbl, id) if err != nil && !ErrQueryNotFound.Is(err) { @@ -312,7 +315,11 @@ func retrieveFromQueryCatalogProlly(ctx context.Context, tbl *doltdb.Table, id s return SavedQuery{}, err } - m := durable.ProllyMapFromIndex(idx) + m, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return SavedQuery{}, err + } + kb := val.NewTupleBuilder(catalogKd) kb.PutString(0, id) k := kb.Build(m.Pool()) diff --git a/go/libraries/doltcore/sqle/dtables/statistics_table.go b/go/libraries/doltcore/sqle/dtables/statistics_table.go index fda463e7e49..f73cfaf192b 100644 --- a/go/libraries/doltcore/sqle/dtables/statistics_table.go +++ b/go/libraries/doltcore/sqle/dtables/statistics_table.go @@ -68,7 +68,7 @@ func (st *StatisticsTable) DataLength(ctx *sql.Context) (uint64, error) { } type BranchStatsProvider interface { - GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]sql.Statistic, error) + GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]*stats.Statistic, error) } // RowCount implements sql.StatisticsTable @@ -119,14 +119,19 @@ func (st *StatisticsTable) Partitions(*sql.Context) (sql.PartitionIter, error) { // PartitionRows is a sql.Table interface function that gets a row iterator for a partition func (st *StatisticsTable) PartitionRows(ctx *sql.Context, _ sql.Partition) (sql.RowIter, error) { dSess := dsess.DSessFromSess(ctx.Session) - statsPro := dSess.StatsProvider().(BranchStatsProvider) + statsPro, ok := dSess.StatsProvider().(BranchStatsProvider) + if !ok { + return sql.RowsToRowIter(), nil + } var dStats []sql.Statistic for _, table := range st.tableNames { dbStats, err := statsPro.GetTableDoltStats(ctx, st.branch, st.dbName, st.schemaName, table) if err != nil { return nil, err } - dStats = append(dStats, dbStats...) + for _, s := range dbStats { + dStats = append(dStats, s) + } } return stats.NewStatsIter(ctx, dStats...) } diff --git a/go/libraries/doltcore/sqle/dtables/workspace_table.go b/go/libraries/doltcore/sqle/dtables/workspace_table.go index 9ecba12d6d6..681c78e0b8b 100644 --- a/go/libraries/doltcore/sqle/dtables/workspace_table.go +++ b/go/libraries/doltcore/sqle/dtables/workspace_table.go @@ -825,7 +825,10 @@ func newWorkspaceDiffIter(ctx *sql.Context, wp WorkspacePartition) (workspaceDif if err != nil { return workspaceDiffIter{}, err } - base = durable.ProllyMapFromIndex(idx) + base, err = durable.ProllyMapFromIndex(idx) + if err != nil { + return workspaceDiffIter{}, err + } } if wp.staging != nil { @@ -833,7 +836,10 @@ func newWorkspaceDiffIter(ctx *sql.Context, wp WorkspacePartition) (workspaceDif if err != nil { return workspaceDiffIter{}, err } - staging = durable.ProllyMapFromIndex(idx) + staging, err = durable.ProllyMapFromIndex(idx) + if err != nil { + return workspaceDiffIter{}, err + } } if wp.working != nil { @@ -841,7 +847,10 @@ func newWorkspaceDiffIter(ctx *sql.Context, wp WorkspacePartition) (workspaceDif if err != nil { return workspaceDiffIter{}, err } - working = durable.ProllyMapFromIndex(idx) + working, err = durable.ProllyMapFromIndex(idx) + if err != nil { + return workspaceDiffIter{}, err + } } var nodeStore tree.NodeStore diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go index e86bdb16105..20e8231b6b1 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go @@ -392,16 +392,12 @@ func TestQueryPlans(t *testing.T) { } func TestIntegrationQueryPlans(t *testing.T) { - harness := newDoltEnginetestHarness(t).WithConfigureStats(true) + harness := newDoltEnginetestHarness(t) defer harness.Close() enginetest.TestIntegrationPlans(t, harness) } func TestDoltDiffQueryPlans(t *testing.T) { - if !types.IsFormat_DOLT(types.Format_Default) { - t.Skip("only new format support system table indexing") - } - harness := newDoltEnginetestHarness(t).WithParallelism(2) // want Exchange nodes RunDoltDiffQueryPlansTest(t, harness) } @@ -608,7 +604,7 @@ func TestScripts(t *testing.T) { if types.IsFormat_DOLT(types.Format_Default) { skipped = append(skipped, newFormatSkippedScripts...) } - h := newDoltHarness(t).WithSkippedQueries(skipped) + h := newDoltHarness(t).WithSkippedQueries(skipped).WithConfigureStats(true) defer h.Close() enginetest.TestScripts(t, h) } @@ -685,20 +681,13 @@ func TestDoltUserPrivileges(t *testing.T) { } func TestJoinOps(t *testing.T) { - if types.IsFormat_LD(types.Format_Default) { - t.Skip("DOLT_LD keyless indexes are not sorted") - } - h := newDoltHarness(t) defer h.Close() enginetest.TestJoinOps(t, h, enginetest.DefaultJoinOpTests) } func TestJoinPlanning(t *testing.T) { - if types.IsFormat_LD(types.Format_Default) { - t.Skip("DOLT_LD keyless indexes are not sorted") - } - h := newDoltEnginetestHarness(t).WithConfigureStats(true) + h := newDoltEnginetestHarness(t) defer h.Close() enginetest.TestJoinPlanning(t, h) } @@ -706,7 +695,6 @@ func TestJoinPlanning(t *testing.T) { func TestJoinQueries(t *testing.T) { h := newDoltHarness(t) defer h.Close() - enginetest.TestJoinQueries(t, h) } func TestJoinQueriesPrepared(t *testing.T) { @@ -1458,11 +1446,6 @@ func TestStatBranchTests(t *testing.T) { RunStatBranchTests(t, harness) } -func TestStatsFunctions(t *testing.T) { - harness := newDoltEnginetestHarness(t) - RunStatsFunctionsTest(t, harness) -} - func TestDiffTableFunction(t *testing.T) { harness := newDoltEnginetestHarness(t) RunDiffTableFunctionTests(t, harness) @@ -1669,11 +1652,6 @@ func TestStatsStorage(t *testing.T) { RunStatsStorageTests(t, h) } -func TestStatsIOWithoutReload(t *testing.T) { - h := newDoltEnginetestHarness(t) - RunStatsIOTestsWithoutReload(t, h) -} - func TestJoinStats(t *testing.T) { h := newDoltEnginetestHarness(t) RunJoinStatsTests(t, h) @@ -1744,7 +1722,7 @@ func TestScriptsPrepared(t *testing.T) { skipped = append(skipped, newFormatSkippedScripts...) } skipPreparedTests(t) - h := newDoltHarness(t).WithSkippedQueries(skipped) + h := newDoltHarness(t).WithSkippedQueries(skipped).WithConfigureStats(true) defer h.Close() enginetest.TestScriptsPrepared(t, h) } @@ -1945,6 +1923,10 @@ func TestCreateDatabaseErrorCleansUp(t *testing.T) { // (2) auto refresh threads, and (3) manual ANALYZE statements. // todo: the dolt_stat functions should be concurrency tested func TestStatsAutoRefreshConcurrency(t *testing.T) { + if runtime.GOOS == "windows" && os.Getenv("CI") != "" { + t.Skip("Racy on Windows CI.") + } + // create engine harness := newDoltHarness(t) harness.Setup(setup.MydbData) @@ -1959,21 +1941,16 @@ func TestStatsAutoRefreshConcurrency(t *testing.T) { // Setting an interval of 0 and a threshold of 0 will result // in the stats being updated after every operation - intervalSec := time.Duration(0) - thresholdf64 := 0. - bThreads := sql.NewBackgroundThreads() - branches := []string{"main"} - statsProv := engine.EngineAnalyzer().Catalog.StatsProvider.(*statspro.Provider) + statsProv := engine.EngineAnalyzer().Catalog.StatsProvider.(*statspro.StatsController) // it is important to use new sessions for this test, to avoid working root conflicts readCtx := enginetest.NewSession(harness) writeCtx := enginetest.NewSession(harness) - refreshCtx := enginetest.NewSession(harness) - newCtx := func(context.Context) (*sql.Context, error) { - return refreshCtx, nil - } - err := statsProv.InitAutoRefreshWithParams(newCtx, sqlDb.Name(), bThreads, intervalSec, thresholdf64, branches) + fs, err := engine.EngineAnalyzer().Catalog.DbProvider.(*sqle.DoltDatabaseProvider).FileSystemForDatabase(sqlDb.AliasedName()) + require.NoError(t, err) + + err = statsProv.AddFs(readCtx, sqlDb, fs, true) require.NoError(t, err) execQ := func(ctx *sql.Context, q string, id int, tag string) { diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go index efd221635f4..c536103d689 100755 --- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go @@ -234,41 +234,8 @@ func RunVersionedQueriesTest(t *testing.T, h DoltEnginetestHarness) { } func RunQueryTestPlans(t *testing.T, harness DoltEnginetestHarness) { - // Dolt supports partial keys, so the index matched is different for some plans - // TODO: Fix these differences by implementing partial key matching in the memory tables, or the engine itself - skipped := []string{ - "SELECT pk,pk1,pk2 FROM one_pk LEFT JOIN two_pk ON pk=pk1", - "SELECT pk,pk1,pk2 FROM one_pk JOIN two_pk ON pk=pk1", - "SELECT one_pk.c5,pk1,pk2 FROM one_pk JOIN two_pk ON pk=pk1 ORDER BY 1,2,3", - "SELECT opk.c5,pk1,pk2 FROM one_pk opk JOIN two_pk tpk ON opk.pk=tpk.pk1 ORDER BY 1,2,3", - "SELECT opk.c5,pk1,pk2 FROM one_pk opk JOIN two_pk tpk ON pk=pk1 ORDER BY 1,2,3", - "SELECT pk,pk1,pk2 FROM one_pk LEFT JOIN two_pk ON pk=pk1 ORDER BY 1,2,3", - "SELECT pk,pk1,pk2 FROM one_pk t1, two_pk t2 WHERE pk=1 AND pk2=1 AND pk1=1 ORDER BY 1,2", - } - // Parallelism introduces Exchange nodes into the query plans, so disable. - // TODO: exchange nodes should really only be part of the explain plan under certain debug settings - harness = harness.NewHarness(t).WithSkippedQueries(skipped).WithConfigureStats(true) - if !types.IsFormat_DOLT(types.Format_Default) { - // only new format supports reverse IndexTableAccess - reverseIndexSkip := []string{ - "SELECT * FROM one_pk ORDER BY pk", - "SELECT * FROM two_pk ORDER BY pk1, pk2", - "SELECT * FROM two_pk ORDER BY pk1", - "SELECT pk1 AS one, pk2 AS two FROM two_pk ORDER BY pk1, pk2", - "SELECT pk1 AS one, pk2 AS two FROM two_pk ORDER BY one, two", - "SELECT i FROM (SELECT i FROM mytable ORDER BY i DESC LIMIT 1) sq WHERE i = 3", - "SELECT i FROM (SELECT i FROM (SELECT i FROM mytable ORDER BY DES LIMIT 1) sql1)sql2 WHERE i = 3", - "SELECT s,i FROM mytable order by i DESC", - "SELECT s,i FROM mytable as a order by i DESC", - "SELECT pk1, pk2 FROM two_pk order by pk1 asc, pk2 asc", - "SELECT pk1, pk2 FROM two_pk order by pk1 desc, pk2 desc", - "SELECT i FROM (SELECT i FROM (SELECT i FROM mytable ORDER BY i DESC LIMIT 1) sq1) sq2 WHERE i = 3", - } - harness = harness.WithSkippedQueries(reverseIndexSkip) - } - + harness = harness.NewHarness(t) defer harness.Close() - sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 0) enginetest.TestQueryPlans(t, harness, queries.PlanTests) } @@ -1165,21 +1132,6 @@ func mustNewEngine(t *testing.T, h enginetest.Harness) enginetest.QueryEngine { return e } -func RunStatsFunctionsTest(t *testing.T, harness DoltEnginetestHarness) { - defer harness.Close() - for _, test := range StatProcTests { - t.Run(test.Name, func(t *testing.T) { - // reset engine so provider statistics are clean - harness = harness.NewHarness(t).WithConfigureStats(true) - harness.Setup(setup.MydbData) - harness.SkipSetupCommit() - e := mustNewEngine(t, harness) - defer e.Close() - enginetest.TestScriptWithEngine(t, e, harness, test) - }) - } -} - func RunDiffTableFunctionTests(t *testing.T, harness DoltEnginetestHarness) { for _, test := range DiffTableFunctionScriptTests { t.Run(test.Name, func(t *testing.T) { @@ -1559,30 +1511,15 @@ func RunStatsHistogramTests(t *testing.T, h DoltEnginetestHarness) { } func RunStatsStorageTests(t *testing.T, h DoltEnginetestHarness) { - for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) { + for _, script := range DoltHistogramTests { func() { h = h.NewHarness(t).WithConfigureStats(true) - defer h.Close() e := mustNewEngine(t, h) if enginetest.IsServerEngine(e) { return } defer e.Close() - TestProviderReloadScriptWithEngine(t, e, h, script) - }() - } -} - -func RunStatsIOTestsWithoutReload(t *testing.T, h DoltEnginetestHarness) { - for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) { - func() { - h = h.NewHarness(t).WithConfigureStats(true) defer h.Close() - e := mustNewEngine(t, h) - if enginetest.IsServerEngine(e) { - return - } - defer e.Close() enginetest.TestScriptWithEngine(t, e, h, script) }() } diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go index c599c61da79..d9b42b7643d 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go @@ -20,6 +20,7 @@ import ( "runtime" "strings" "testing" + "time" gms "github.com/dolthub/go-mysql-server" "github.com/dolthub/go-mysql-server/enginetest" @@ -28,6 +29,7 @@ import ( "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/mysql_db" "github.com/dolthub/go-mysql-server/sql/rowexec" + "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" "github.com/dolthub/dolt/go/libraries/doltcore/branch_control" @@ -36,7 +38,6 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/sqle" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer" "github.com/dolthub/dolt/go/libraries/utils/filesys" @@ -46,7 +47,7 @@ import ( type DoltHarness struct { t *testing.T provider dsess.DoltDatabaseProvider - statsPro sql.StatsProvider + statsPro *statspro.StatsController multiRepoEnv *env.MultiRepoEnv session *dsess.DoltSession branchControl *branch_control.Controller @@ -59,6 +60,7 @@ type DoltHarness struct { setupDbs map[string]struct{} skipSetupCommit bool configureStats bool + statsThreads *sql.BackgroundThreads useLocalFilesystem bool setupTestProcedures bool } @@ -242,12 +244,19 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) { } doltProvider, ok := pro.(*sqle.DoltDatabaseProvider) require.True(t, ok) + d.provider = doltProvider d.gcSafepointController = dsess.NewGCSafepointController() - statsProv := statspro.NewProvider(d.provider.(*sqle.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider())) - d.statsPro = statsProv + bThreads := sql.NewBackgroundThreads() + + ctxGen := func(ctx context.Context) (*sql.Context, error) { + client := sql.Client{Address: "localhost", User: "root"} + return sql.NewContext(context.Background(), sql.WithSession(d.newSessionWithClient(client))), nil + } + statsPro := statspro.NewStatsController(logrus.StandardLogger(), d.multiRepoEnv.GetEnv(d.multiRepoEnv.GetFirstDatabase())) + d.statsPro = statsPro var err error d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), d.provider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, d.gcSafepointController) @@ -262,6 +271,7 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) { sqlCtx := enginetest.NewContext(d) databases := pro.AllDatabases(sqlCtx) + d.setupDbs = make(map[string]struct{}) var dbs []string for _, db := range databases { @@ -281,41 +291,45 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) { require.NoError(t, err) } - if d.configureStats { - bThreads := sql.NewBackgroundThreads() - e = e.WithBackgroundThreads(bThreads) - - dSess := dsess.DSessFromSess(sqlCtx.Session) - dbCache := dSess.DatabaseCache(sqlCtx) + e = e.WithBackgroundThreads(bThreads) - dsessDbs := make([]dsess.SqlDatabase, len(dbs)) - for i, dbName := range dbs { - dsessDbs[i], _ = dbCache.GetCachedRevisionDb(fmt.Sprintf("%s/main", dbName), dbName) + // xxx: stats threads can't be tied to single test cycle, + // this is only OK for enginetests + statsThreads := sql.NewBackgroundThreads() + if d.configureStats { + err = statsPro.Init(ctx, doltProvider, ctxGen, statsThreads, databases) + if err != nil { + return nil, err } + statsPro.SetTimers(int64(1*time.Nanosecond), int64(1*time.Second)) - ctxFact := func(context.Context) (*sql.Context, error) { - sess := d.newSessionWithClient(sql.Client{Address: "localhost", User: "root"}) - return sql.NewContext(context.Background(), sql.WithSession(sess)), nil - } - if err = statsProv.Configure(sqlCtx, ctxFact, bThreads, dsessDbs); err != nil { + err = statsPro.Restart() + if err != nil { return nil, err } statsOnlyQueries := filterStatsOnlyQueries(d.setupData) e, err = enginetest.RunSetupScripts(sqlCtx, e, statsOnlyQueries, d.SupportsNativeIndexCreation()) + if err != nil { + return nil, err + } + + finalizeStatsAfterSetup := []setup.SetupScript{{"call dolt_stats_wait()"}} + e, err = enginetest.RunSetupScripts(sqlCtx, d.engine, finalizeStatsAfterSetup, d.SupportsNativeIndexCreation()) + require.NoError(t, err) } return e, nil } // Reset the mysql DB table to a clean state for this new engine + ctx := enginetest.NewContext(d) + d.engine.Analyzer.Catalog.MySQLDb = mysql_db.CreateEmptyMySQLDb() d.engine.Analyzer.Catalog.MySQLDb.AddRootAccount() - d.engine.Analyzer.Catalog.StatsProvider = statspro.NewProvider(d.provider.(*sqle.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider())) - var err error - sqlCtx := enginetest.NewContext(d) - e, err := enginetest.RunSetupScripts(sqlCtx, d.engine, d.resetScripts(), d.SupportsNativeIndexCreation()) + e, err := enginetest.RunSetupScripts(ctx, d.engine, d.resetScripts(), d.SupportsNativeIndexCreation()) + require.NoError(t, err) // Get a fresh session after running setup scripts, since some setup scripts can change the session state d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), d.provider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, nil) @@ -430,7 +444,6 @@ func (d *DoltHarness) NewDatabases(names ...string) []sql.Database { doltProvider, ok := pro.(*sqle.DoltDatabaseProvider) require.True(d.t, ok) d.provider = doltProvider - d.statsPro = statspro.NewProvider(doltProvider, statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider())) var err error d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), doltProvider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, nil) @@ -502,7 +515,6 @@ func (d *DoltHarness) NewDatabaseProvider() sql.MutableDatabaseProvider { func (d *DoltHarness) Close() { d.closeProvider() - sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, int8(0)) } func (d *DoltHarness) closeProvider() { diff --git a/go/libraries/doltcore/sqle/enginetest/stats_queries.go b/go/libraries/doltcore/sqle/enginetest/stats_queries.go index a5616e37f8e..33a3c66625c 100644 --- a/go/libraries/doltcore/sqle/enginetest/stats_queries.go +++ b/go/libraries/doltcore/sqle/enginetest/stats_queries.go @@ -17,184 +17,218 @@ package enginetest import ( "fmt" "strings" - "testing" - gms "github.com/dolthub/go-mysql-server" - "github.com/dolthub/go-mysql-server/enginetest" "github.com/dolthub/go-mysql-server/enginetest/queries" "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/types" - "github.com/stretchr/testify/require" "github.com/dolthub/dolt/go/libraries/doltcore/schema" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" ) // fillerVarchar pushes the tree into level 3 var fillerVarchar = strings.Repeat("x", 500) var DoltHistogramTests = []queries.ScriptTest{ + //{ + // Name: "mcv checking", + // SetUpScript: []string{ + // "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", + // "insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')", + // "analyze table xy", + // }, + // Assertions: []queries.ScriptTestAssertion{ + // { + // Query: " SELECT mcv_cnt from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv_cnt JSON path '$.mcv_counts')) as dt where table_name = 'xy' and column_name = 'y,z'", + // Expected: []sql.Row{ + // {types.JSONDocument{Val: []interface{}{ + // float64(4), + // }}}, + // }, + // }, + // { + // Query: " SELECT mcv from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv JSON path '$.mcvs[*]')) as dt where table_name = 'xy' and column_name = 'y,z'", + // Expected: []sql.Row{ + // {types.JSONDocument{Val: []interface{}{ + // []interface{}{float64(0), "a"}, + // }}}, + // }, + // }, + // { + // Query: " SELECT x,z from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(x bigint path '$.upper_bound[0]', z text path '$.upper_bound[1]')) as dt where table_name = 'xy' and column_name = 'y,z'", + // Expected: []sql.Row{ + // {2, "a"}, + // }, + // }, + // }, + //}, + //{ + // Name: "int pk", + // SetUpScript: []string{ + // "CREATE table xy (x bigint primary key, y varchar(500));", + // fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar), + // fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar), + // fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar), + // "analyze table xy", + // }, + // Assertions: []queries.ScriptTestAssertion{ + // { + // Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x'", + // Expected: []sql.Row{{32}}, + // }, + // { + // Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x'", + // Expected: []sql.Row{{float64(30000)}}, + // }, + // { + // Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x'", + // Expected: []sql.Row{{float64(0)}}, + // }, + // { + // Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x'", + // Expected: []sql.Row{{float64(30000)}}, + // }, + // { + // Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'x'", + // Expected: []sql.Row{{int64(1)}}, + // }, + // }, + //}, + //{ + // Name: "nulls distinct across chunk boundary", + // SetUpScript: []string{ + // "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));", + // fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 200) select * from inputs) dt", fillerVarchar), + // fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 201 union select x+1 from inputs where x < 400) select * from inputs) dt", fillerVarchar), + // "analyze table xy", + // }, + // Assertions: []queries.ScriptTestAssertion{ + // { + // Query: "call dolt_stats_wait()", + // }, + // { + // Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'", + // Expected: []sql.Row{{2}}, + // }, + // { + // // bucket boundary duplication + // Query: "SELECT json_value(histogram, \"$.statistic.distinct_count\", 'signed') from information_schema.column_statistics where column_name = 'z'", + // Expected: []sql.Row{{202}}, + // }, + // { + // Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'", + // Expected: []sql.Row{{float64(400)}}, + // }, + // { + // Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'", + // Expected: []sql.Row{{float64(200)}}, + // }, + // { + // // chunk border double count + // Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'", + // Expected: []sql.Row{{float64(202)}}, + // }, + // { + // // max bound count is an all nulls chunk + // Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'", + // Expected: []sql.Row{{int64(183)}}, + // }, + // }, + //}, + //{ + // Name: "int index", + // SetUpScript: []string{ + // "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));", + // fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar), + // fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar), + // fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar), + // "analyze table xy", + // }, + // Assertions: []queries.ScriptTestAssertion{ + // { + // Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'", + // Expected: []sql.Row{{152}}, + // }, + // { + // Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'", + // Expected: []sql.Row{{float64(30000)}}, + // }, + // { + // Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'", + // Expected: []sql.Row{{float64(10000)}}, + // }, + // { + // // border NULL double count + // Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'", + // Expected: []sql.Row{{float64(20036)}}, + // }, + // { + // // max bound count is nulls chunk + // Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'", + // Expected: []sql.Row{{int64(440)}}, + // }, + // }, + //}, { - Name: "mcv checking", + Name: "multiint index", SetUpScript: []string{ - "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", - "insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')", - "analyze table xy", + "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(x, z));", + fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar), + fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar), + fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar), }, Assertions: []queries.ScriptTestAssertion{ { - Query: " SELECT mcv_cnt from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv_cnt JSON path '$.mcv_counts')) as dt where table_name = 'xy' and column_name = 'y,z'", - Expected: []sql.Row{ - {types.JSONDocument{Val: []interface{}{ - float64(4), - }}}, - }, - }, - { - Query: " SELECT mcv from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv JSON path '$.mcvs[*]')) as dt where table_name = 'xy' and column_name = 'y,z'", - Expected: []sql.Row{ - {types.JSONDocument{Val: []interface{}{ - []interface{}{float64(0), "a"}, - }}}, - }, - }, - { - Query: " SELECT x,z from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(x bigint path '$.upper_bound[0]', z text path '$.upper_bound[1]')) as dt where table_name = 'xy' and column_name = 'y,z'", - Expected: []sql.Row{ - {2, "a"}, - }, + Query: "call dolt_stats_wait()", }, - }, - }, - { - Name: "int pk", - SetUpScript: []string{ - "CREATE table xy (x bigint primary key, y varchar(500));", - fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar), - fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar), - fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar), - "analyze table xy", - }, - Assertions: []queries.ScriptTestAssertion{ { - Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x'", - Expected: []sql.Row{{32}}, + Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x,z'", + Expected: []sql.Row{{155}}, }, { - Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x'", + Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x,z'", Expected: []sql.Row{{float64(30000)}}, }, { - Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x'", - Expected: []sql.Row{{float64(0)}}, + Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x,z'", + Expected: []sql.Row{{float64(10000)}}, }, { - Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x'", + Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x,z'", Expected: []sql.Row{{float64(30000)}}, }, { - Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'x'", + // max bound count is nulls chunk + Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'x,z'", Expected: []sql.Row{{int64(1)}}, }, }, }, { - Name: "nulls distinct across chunk boundary", + Name: "multiint index small", SetUpScript: []string{ - "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));", - fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 200) select * from inputs) dt", fillerVarchar), - fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 201 union select x+1 from inputs where x < 400) select * from inputs) dt", fillerVarchar), - "analyze table xy", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'", - Expected: []sql.Row{{2}}, - }, - { - // bucket boundary duplication - Query: "SELECT json_value(histogram, \"$.statistic.distinct_count\", 'signed') from information_schema.column_statistics where column_name = 'z'", - Expected: []sql.Row{{202}}, - }, - { - Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'", - Expected: []sql.Row{{float64(400)}}, - }, - { - Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'", - Expected: []sql.Row{{float64(200)}}, - }, - { - // chunk border double count - Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'", - Expected: []sql.Row{{float64(202)}}, - }, - { - // max bound count is an all nulls chunk - Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'", - Expected: []sql.Row{{int64(183)}}, - }, - }, - }, - { - Name: "int index", - SetUpScript: []string{ - "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));", - fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar), - fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar), - fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar), - "analyze table xy", + "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(x, z));", + fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 2) select * from inputs) dt", fillerVarchar), + fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 3 union select x+1 from inputs where x < 4) select * from inputs) dt", fillerVarchar), + fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 5 union select x+1 from inputs where x < 6) select * from inputs) dt", fillerVarchar), }, Assertions: []queries.ScriptTestAssertion{ { - Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'", - Expected: []sql.Row{{152}}, - }, - { - Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'", - Expected: []sql.Row{{float64(30000)}}, - }, - { - Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'", - Expected: []sql.Row{{float64(10000)}}, + Query: "call dolt_stats_wait()", }, - { - // border NULL double count - Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'", - Expected: []sql.Row{{float64(20036)}}, - }, - { - // max bound count is nulls chunk - Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'", - Expected: []sql.Row{{int64(440)}}, - }, - }, - }, - { - Name: "multiint index", - SetUpScript: []string{ - "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(x, z));", - fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar), - fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar), - fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar), - "analyze table xy", - }, - Assertions: []queries.ScriptTestAssertion{ { Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x,z'", - Expected: []sql.Row{{155}}, + Expected: []sql.Row{{1}}, }, { Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x,z'", - Expected: []sql.Row{{float64(30000)}}, + Expected: []sql.Row{{float64(6)}}, }, { Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x,z'", - Expected: []sql.Row{{float64(10000)}}, + Expected: []sql.Row{{float64(2)}}, }, { Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x,z'", - Expected: []sql.Row{{float64(30000)}}, + Expected: []sql.Row{{float64(6)}}, }, { // max bound count is nulls chunk @@ -211,7 +245,10 @@ var DoltHistogramTests = []queries.ScriptTest{ }, Assertions: []queries.ScriptTestAssertion{ { - Query: " SELECT column_name from information_schema.column_statistics", + Query: "call dolt_stats_purge()", + }, + { + Query: "SELECT column_name from information_schema.column_statistics", Expected: []sql.Row{}, }, { @@ -535,8 +572,6 @@ var DoltStatsStorageTests = []queries.ScriptTest{ { Name: "incremental stats deletes auto", SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", "insert into xy select x, 1, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;", "analyze table xy", @@ -550,10 +585,7 @@ var DoltStatsStorageTests = []queries.ScriptTest{ Query: "delete from xy where x > 500", }, { - Query: "call dolt_stats_restart()", - }, - { - Query: "select sleep(.1)", + Query: "analyze table xy", }, { Query: "select count(*) from dolt_statistics group by table_name, index_name", @@ -565,8 +597,6 @@ var DoltStatsStorageTests = []queries.ScriptTest{ // https://github.com/dolthub/dolt/issues/8504 Name: "alter index column type", SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", "CREATE table xy (x bigint primary key, y varchar(16))", "insert into xy values (0,'0'), (1,'1'), (2,'2')", "analyze table xy", @@ -594,78 +624,9 @@ var DoltStatsStorageTests = []queries.ScriptTest{ }, }, }, - { - Name: "differentiate table cases", - SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", - "set @@PERSIST.dolt_stats_branches ='main'", - "CREATE table XY (x bigint primary key, y varchar(16))", - "insert into XY values (0,'0'), (1,'1'), (2,'2')", - "analyze table XY", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "select table_name, upper_bound from dolt_statistics", - Expected: []sql.Row{{"xy", "2"}}, - }, - }, - }, - { - Name: "deleted table loads OK", - SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", - "set @@PERSIST.dolt_stats_branches ='main'", - "CREATE table xy (x bigint primary key, y varchar(16))", - "insert into xy values (0,'0'), (1,'1'), (2,'2')", - "analyze table xy", - "CREATE table uv (u bigint primary key, v varchar(16))", - "insert into uv values (0,'0'), (1,'1'), (2,'2')", - "analyze table uv", - "drop table uv", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "select table_name, upper_bound from dolt_statistics", - Expected: []sql.Row{{"xy", "2"}}, - }, - }, - }, - { - Name: "differentiate branch names", - SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", - "set @@PERSIST.dolt_stats_branches ='main,feat'", - "CREATE table xy (x bigint primary key, y varchar(16))", - "insert into xy values (0,'0'), (1,'1'), (2,'2')", - "analyze table xy", - "call dolt_checkout('-b', 'feat')", - "CREATE table xy (x varchar(16) primary key, y bigint, z bigint)", - "insert into xy values (3,'3',3)", - "analyze table xy", - "call dolt_checkout('main')", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "select table_name, upper_bound from dolt_statistics", - Expected: []sql.Row{{"xy", "2"}}, - }, - { - Query: "call dolt_checkout('feat')", - }, - { - Query: "select table_name, upper_bound from dolt_statistics", - Expected: []sql.Row{{"xy", "3"}}, - }, - }, - }, { Name: "drop primary key", SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", "CREATE table xy (x bigint primary key, y varchar(16))", "insert into xy values (0,'0'), (1,'1'), (2,'2')", "analyze table xy", @@ -682,10 +643,7 @@ var DoltStatsStorageTests = []queries.ScriptTest{ Query: "insert into xy values ('3', '3')", }, { - Query: "call dolt_stats_restart()", - }, - { - Query: "select sleep(.2)", + Query: "analyze table xy", }, { Query: "select count(*) from dolt_statistics group by table_name, index_name", @@ -699,9 +657,6 @@ var StatBranchTests = []queries.ScriptTest{ { Name: "multi branch stats", SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", - "set @@PERSIST.dolt_stats_branches = 'main,feat';", "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", "insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')", "call dolt_commit('-Am', 'xy')", @@ -713,10 +668,7 @@ var StatBranchTests = []queries.ScriptTest{ }, Assertions: []queries.ScriptTestAssertion{ { - Query: "call dolt_stats_restart()", - }, - { - Query: "select sleep(.1)", + Query: "call dolt_stats_wait()", }, { Query: "select table_name, index_name, row_count from dolt_statistics", @@ -751,7 +703,7 @@ var StatBranchTests = []queries.ScriptTest{ Query: "call dolt_commit('-am', 'cm')", }, { - Query: "select sleep(.1)", + Query: "call dolt_stats_wait()", }, { Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'", @@ -769,30 +721,6 @@ var StatBranchTests = []queries.ScriptTest{ {"xy", "y", uint64(6)}, }, }, - { - Query: "call dolt_checkout('feat')", - }, - { - Query: "call dolt_stats_stop()", - }, - { - Query: "select sleep(.1)", - }, - { - Query: "call dolt_stats_drop()", - }, - { - Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'", - Expected: []sql.Row{}, - }, - { - // we dropped 'feat', not 'main' - Query: "select table_name, index_name, row_count from dolt_statistics as of 'main'", - Expected: []sql.Row{ - {"xy", "primary", uint64(6)}, - {"xy", "y", uint64(6)}, - }, - }, }, }, { @@ -812,302 +740,3 @@ var StatBranchTests = []queries.ScriptTest{ }, }, } - -var StatProcTests = []queries.ScriptTest{ - { - Name: "deleting stats removes information_schema access point", - SetUpScript: []string{ - "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", - "insert into xy values (0,0,0)", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "analyze table xy", - }, - { - Query: "select count(*) from information_schema.column_statistics", - Expected: []sql.Row{{2}}, - }, - { - Query: "call dolt_stats_drop()", - }, - { - Query: "select count(*) from information_schema.column_statistics", - Expected: []sql.Row{{0}}, - }, - }, - }, - { - Name: "restart empty stats panic", - SetUpScript: []string{ - "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "analyze table xy", - }, - { - Query: "select count(*) from dolt_statistics", - Expected: []sql.Row{{0}}, - }, - { - Query: "set @@GLOBAL.dolt_stats_auto_refresh_threshold = 0", - Expected: []sql.Row{{}}, - }, - { - Query: "set @@GLOBAL.dolt_stats_auto_refresh_interval = 0", - Expected: []sql.Row{{}}, - }, - { - // don't panic - Query: "call dolt_stats_restart()", - }, - { - Query: "select sleep(.1)", - }, - { - Query: "insert into xy values (0,0,0)", - }, - { - Query: "select sleep(.1)", - }, - { - Query: "select count(*) from dolt_statistics", - Expected: []sql.Row{{2}}, - }, - }, - }, - { - Name: "basic start, status, stop loop", - SetUpScript: []string{ - "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", - "insert into xy values (0,0,'a'), (2,0,'a'), (4,1,'a'), (6,2,'a')", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "select count(*) from dolt_statistics", - Expected: []sql.Row{{0}}, - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"no active stats thread"}}, - }, - // set refresh interval arbitrarily high to avoid updating when we restart - { - Query: "set @@PERSIST.dolt_stats_auto_refresh_interval = 100000;", - Expected: []sql.Row{{}}, - }, - { - Query: "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0", - Expected: []sql.Row{{}}, - }, - { - Query: "call dolt_stats_restart()", - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"restarted thread: mydb"}}, - }, - { - Query: "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - Expected: []sql.Row{{}}, - }, - // new restart picks up 0-interval, will start refreshing immediately - { - Query: "call dolt_stats_restart()", - }, - { - Query: "select sleep(.1)", - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"refreshed mydb"}}, - }, - { - Query: "select count(*) from dolt_statistics", - Expected: []sql.Row{{2}}, - }, - // kill refresh thread - { - Query: "call dolt_stats_stop()", - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"cancelled thread: mydb"}}, - }, - // insert without refresh thread will not update stats - { - Query: "insert into xy values (1,0,'a'), (3,0,'a'), (5,2,'a'), (7,1,'a')", - }, - { - Query: "select sleep(.1)", - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"cancelled thread: mydb"}}, - }, - // manual analyze will update stats - { - Query: "analyze table xy", - Expected: []sql.Row{{"xy", "analyze", "status", "OK"}}, - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"refreshed mydb"}}, - }, - { - Query: "select count(*) from dolt_statistics", - Expected: []sql.Row{{2}}, - }, - // kill refresh thread and delete stats ref - { - Query: "call dolt_stats_drop()", - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"dropped"}}, - }, - { - Query: "select count(*) from dolt_statistics", - Expected: []sql.Row{{0}}, - }, - }, - }, - { - Name: "test purge", - SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;", - "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", - "insert into xy values (1, 1, 'a'), (2,1,'a'), (3,1,'a'), (4,2,'b'), (5,2,'b'), (6,3,'c');", - "analyze table xy", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt", - Expected: []sql.Row{{1}, {1}}, - }, - { - Query: "call dolt_stats_purge()", - }, - { - Query: "select count(*) from dolt_statistics;", - Expected: []sql.Row{{0}}, - }, - }, - }, - { - Name: "test prune", - SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;", - "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", - "insert into xy values (1, 1, 'a'), (2,1,'a'), (3,1,'a'), (4,2,'b'), (5,2,'b'), (6,3,'c');", - "analyze table xy", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt", - Expected: []sql.Row{{1}, {1}}, - }, - { - Query: "call dolt_stats_prune()", - }, - { - Query: "select count(*) from dolt_statistics;", - Expected: []sql.Row{{2}}, - }, - }, - }, -} - -// TestProviderReloadScriptWithEngine runs the test script given with the engine provided. -func TestProviderReloadScriptWithEngine(t *testing.T, e enginetest.QueryEngine, harness enginetest.Harness, script queries.ScriptTest) { - ctx := enginetest.NewContext(harness) - err := enginetest.CreateNewConnectionForServerEngine(ctx, e) - require.NoError(t, err, nil) - - t.Run(script.Name, func(t *testing.T) { - for _, statement := range script.SetUpScript { - if sh, ok := harness.(enginetest.SkippingHarness); ok { - if sh.SkipQueryTest(statement) { - t.Skip() - } - } - ctx = ctx.WithQuery(statement) - enginetest.RunQueryWithContext(t, e, harness, ctx, statement) - } - - assertions := script.Assertions - if len(assertions) == 0 { - assertions = []queries.ScriptTestAssertion{ - { - Query: script.Query, - Expected: script.Expected, - ExpectedErr: script.ExpectedErr, - ExpectedIndexes: script.ExpectedIndexes, - }, - } - } - - { - // reload provider, get disk stats - eng, ok := e.(*gms.Engine) - if !ok { - t.Errorf("expected *gms.Engine but found: %T", e) - } - - branches := eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).TrackedBranches("mydb") - brCopy := make([]string, len(branches)) - copy(brCopy, branches) - err := eng.Analyzer.Catalog.StatsProvider.DropDbStats(ctx, "mydb", false) - require.NoError(t, err) - for _, branch := range brCopy { - err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", branch) - require.NoError(t, err) - } - } - - for _, assertion := range assertions { - t.Run(assertion.Query, func(t *testing.T) { - if assertion.NewSession { - th, ok := harness.(enginetest.TransactionHarness) - require.True(t, ok, "ScriptTestAssertion requested a NewSession, "+ - "but harness doesn't implement TransactionHarness") - ctx = th.NewSession() - } - - if sh, ok := harness.(enginetest.SkippingHarness); ok && sh.SkipQueryTest(assertion.Query) { - t.Skip() - } - if assertion.Skip { - t.Skip() - } - - if assertion.ExpectedErr != nil { - enginetest.AssertErr(t, e, harness, assertion.Query, nil, assertion.ExpectedErr) - } else if assertion.ExpectedErrStr != "" { - enginetest.AssertErrWithCtx(t, e, harness, ctx, assertion.Query, nil, nil, assertion.ExpectedErrStr) - } else if assertion.ExpectedWarning != 0 { - enginetest.AssertWarningAndTestQuery(t, e, nil, harness, assertion.Query, - assertion.Expected, nil, assertion.ExpectedWarning, assertion.ExpectedWarningsCount, - assertion.ExpectedWarningMessageSubstring, assertion.SkipResultsCheck) - } else if assertion.SkipResultsCheck { - enginetest.RunQueryWithContext(t, e, harness, nil, assertion.Query) - } else if assertion.CheckIndexedAccess { - enginetest.TestQueryWithIndexCheck(t, ctx, e, harness, assertion.Query, assertion.Expected, assertion.ExpectedColumns, assertion.Bindings) - } else { - var expected = assertion.Expected - if enginetest.IsServerEngine(e) && assertion.SkipResultCheckOnServerEngine { - // TODO: remove this check in the future - expected = nil - } - enginetest.TestQueryWithContext(t, ctx, e, harness, assertion.Query, expected, assertion.ExpectedColumns, assertion.Bindings, nil) - } - }) - } - }) -} - -func mustNewStatQual(s string) sql.StatQualifier { - qual, _ := sql.NewQualifierFromString(s) - return qual -} diff --git a/go/libraries/doltcore/sqle/index/index_reader.go b/go/libraries/doltcore/sqle/index/index_reader.go index e048a548b9c..7fca917bf24 100644 --- a/go/libraries/doltcore/sqle/index/index_reader.go +++ b/go/libraries/doltcore/sqle/index/index_reader.go @@ -292,7 +292,7 @@ type IndexScanBuilder interface { // NewSecondaryIter returns an object used to perform secondary lookups // for index joins. - NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen + NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) // Key returns the table root for caching purposes Key() doltdb.DataCacheKey @@ -395,7 +395,10 @@ func newNonCoveringLookupBuilder(s *durableIndexState, b *baseIndexImplBuilder) "primary index passed, but only secondary indexes are supported") } - primary := durable.ProllyMapFromIndex(s.Primary) + primary, err := durable.ProllyMapFromIndex(s.Primary) + if err != nil { + return nil, err + } priKd, _ := primary.Descriptors() tbBld := val.NewTupleBuilder(priKd) pkMap := OrdinalMappingFromIndex(b.idx) @@ -452,7 +455,7 @@ func (ib *baseIndexImplBuilder) NewRangeMapIter(_ context.Context, _ prolly.Rang panic("cannot call NewMapIter on baseIndexImplBuilder") } -func (ib *baseIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen { +func (ib *baseIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) { panic("cannot call NewSecondaryIter on baseIndexImplBuilder") } @@ -628,11 +631,11 @@ func (ib *coveringIndexImplBuilder) NewPartitionRowIter(ctx *sql.Context, part s } // NewSecondaryIter implements IndexScanBuilder -func (ib *coveringIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen { +func (ib *coveringIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) { if strict { - return &covStrictSecondaryLookupGen{m: ib.sec, prefixDesc: ib.secKd.PrefixDesc(cnt), index: ib.idx} + return &covStrictSecondaryLookupGen{m: ib.sec, prefixDesc: ib.secKd.PrefixDesc(cnt), index: ib.idx}, nil } else { - return &covLaxSecondaryLookupGen{m: ib.sec, prefixDesc: ib.secKd.PrefixDesc(cnt), index: ib.idx, nullSafe: nullSafe} + return &covLaxSecondaryLookupGen{m: ib.sec, prefixDesc: ib.secKd.PrefixDesc(cnt), index: ib.idx, nullSafe: nullSafe}, nil } } @@ -735,11 +738,11 @@ func (ib *nonCoveringIndexImplBuilder) NewPartitionRowIter(ctx *sql.Context, par }, nil } -func (ib *nonCoveringIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen { +func (ib *nonCoveringIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) { if strict { - return &nonCovStrictSecondaryLookupGen{pri: ib.pri, sec: ib.sec, pkMap: ib.pkMap, pkBld: ib.pkBld, sch: ib.idx.tableSch, prefixDesc: ib.secKd.PrefixDesc(cnt)} + return &nonCovStrictSecondaryLookupGen{pri: ib.pri, sec: ib.sec, pkMap: ib.pkMap, pkBld: ib.pkBld, sch: ib.idx.tableSch, prefixDesc: ib.secKd.PrefixDesc(cnt)}, nil } else { - return &nonCovLaxSecondaryLookupGen{pri: ib.pri, sec: ib.sec, pkMap: ib.pkMap, pkBld: ib.pkBld, sch: ib.idx.tableSch, prefixDesc: ib.secKd.PrefixDesc(cnt), nullSafe: nullSafe} + return &nonCovLaxSecondaryLookupGen{pri: ib.pri, sec: ib.sec, pkMap: ib.pkMap, pkBld: ib.pkBld, sch: ib.idx.tableSch, prefixDesc: ib.secKd.PrefixDesc(cnt), nullSafe: nullSafe}, nil } } @@ -766,12 +769,18 @@ func (ib *keylessIndexImplBuilder) OutputSchema() schema.Schema { func (ib *keylessIndexImplBuilder) NewRangeMapIter(ctx context.Context, r prolly.Range, reverse bool) (prolly.MapIter, error) { rows := ib.s.Primary dsecondary := ib.s.Secondary - secondary := durable.ProllyMapFromIndex(dsecondary) + secondary, err := durable.ProllyMapFromIndex(dsecondary) + if err != nil { + return nil, err + } indexIter, err := secondary.IterRange(ctx, r) if err != nil { return nil, err } - clustered := durable.ProllyMapFromIndex(rows) + clustered, err := durable.ProllyMapFromIndex(rows) + if err != nil { + return nil, err + } keyDesc := clustered.KeyDesc() indexMap := OrdinalMappingFromIndex(ib.idx) @@ -832,12 +841,18 @@ func (ib *keylessIndexImplBuilder) NewPartitionRowIter(ctx *sql.Context, part sq return newProllyKeylessIndexIter(ctx, ib.idx, prollyRange, doltgresRange, ib.sch, ib.projections, ib.s.Primary, ib.s.Secondary, reverse) } -func (ib *keylessIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen { - pri := durable.ProllyMapFromIndex(ib.s.Primary) +func (ib *keylessIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) { + pri, err := durable.ProllyMapFromIndex(ib.s.Primary) + if err != nil { + return nil, err + } pkDesc, _ := pri.Descriptors() pkBld := val.NewTupleBuilder(pkDesc) - secondary := durable.ProllyMapFromIndex(ib.s.Secondary) + secondary, err := durable.ProllyMapFromIndex(ib.s.Secondary) + if err != nil { + return nil, err + } return &keylessSecondaryLookupGen{ pri: pri, @@ -846,7 +861,7 @@ func (ib *keylessIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSa pkMap: OrdinalMappingFromIndex(ib.idx), pkBld: pkBld, prefixDesc: secondary.KeyDesc().PrefixDesc(cnt), - } + }, nil } type nomsIndexImplBuilder struct { @@ -870,7 +885,7 @@ func (ib *nomsIndexImplBuilder) NewRangeMapIter(ctx context.Context, r prolly.Ra panic("cannot call NewMapIter on *nomsIndexImplBuilder") } -func (ib *nomsIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen { +func (ib *nomsIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) { panic("cannot call NewSecondaryIter on *nomsIndexImplBuilder") } diff --git a/go/libraries/doltcore/sqle/index/prolly_index_iter.go b/go/libraries/doltcore/sqle/index/prolly_index_iter.go index e3c302d79d2..9b5d59a23f5 100644 --- a/go/libraries/doltcore/sqle/index/prolly_index_iter.go +++ b/go/libraries/doltcore/sqle/index/prolly_index_iter.go @@ -59,13 +59,20 @@ func newProllyIndexIter( projections []uint64, dprimary, dsecondary durable.Index, ) (prollyIndexIter, error) { - secondary := durable.ProllyMapFromIndex(dsecondary) + secondary, err := durable.ProllyMapFromIndex(dsecondary) + if err != nil { + return prollyIndexIter{}, err + } + indexIter, err := secondary.IterRange(ctx, rng) if err != nil { return prollyIndexIter{}, err } - primary := durable.ProllyMapFromIndex(dprimary) + primary, err := durable.ProllyMapFromIndex(dprimary) + if err != nil { + return prollyIndexIter{}, err + } kd, _ := primary.Descriptors() pkBld := val.NewTupleBuilder(kd) pkMap := OrdinalMappingFromIndex(idx) @@ -183,7 +190,10 @@ func newProllyCoveringIndexIter( projections []uint64, indexdata durable.Index, ) (prollyCoveringIndexIter, error) { - secondary := durable.ProllyMapFromIndex(indexdata) + secondary, err := durable.ProllyMapFromIndex(indexdata) + if err != nil { + return prollyCoveringIndexIter{}, err + } indexIter, err := secondary.IterRange(ctx, rng) if err != nil { return prollyCoveringIndexIter{}, err @@ -293,9 +303,11 @@ type prollyKeylessIndexIter struct { var _ sql.RowIter = prollyKeylessIndexIter{} func newProllyKeylessIndexIter(ctx *sql.Context, idx DoltIndex, rng prolly.Range, doltgresRange *DoltgresRange, pkSch sql.PrimaryKeySchema, projections []uint64, rows, dsecondary durable.Index, reverse bool) (prollyKeylessIndexIter, error) { - secondary := durable.ProllyMapFromIndex(dsecondary) + secondary, err := durable.ProllyMapFromIndex(dsecondary) + if err != nil { + return prollyKeylessIndexIter{}, err + } var indexIter prolly.MapIter - var err error if doltgresRange == nil { if reverse { indexIter, err = secondary.IterRangeReverse(ctx, rng) @@ -312,7 +324,10 @@ func newProllyKeylessIndexIter(ctx *sql.Context, idx DoltIndex, rng prolly.Range } } - clustered := durable.ProllyMapFromIndex(rows) + clustered, err := durable.ProllyMapFromIndex(rows) + if err != nil { + return prollyKeylessIndexIter{}, err + } keyDesc, valDesc := clustered.Descriptors() indexMap := OrdinalMappingFromIndex(idx) keyBld := val.NewTupleBuilder(keyDesc) diff --git a/go/libraries/doltcore/sqle/kvexec/builder.go b/go/libraries/doltcore/sqle/kvexec/builder.go index aeec12679c2..ce47db07f35 100644 --- a/go/libraries/doltcore/sqle/kvexec/builder.go +++ b/go/libraries/doltcore/sqle/kvexec/builder.go @@ -364,7 +364,10 @@ func getSourceKv(ctx *sql.Context, n sql.Node, isSrc bool) (prolly.Map, prolly.M if rowData.Format() != types.Format_DOLT { return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, nil } - priMap = durable.ProllyMapFromIndex(rowData) + priMap, err = durable.ProllyMapFromIndex(rowData) + if err != nil { + return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, err + } priSch = lb.OutputSchema() @@ -384,7 +387,7 @@ func getSourceKv(ctx *sql.Context, n sql.Node, isSrc bool) (prolly.Map, prolly.M return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, err } } else { - dstIter = lb.NewSecondaryIter(n.IsStrictLookup(), len(n.Expressions()), n.NullMask()) + dstIter, _ = lb.NewSecondaryIter(n.IsStrictLookup(), len(n.Expressions()), n.NullMask()) } case *plan.ResolvedTable: @@ -414,7 +417,10 @@ func getSourceKv(ctx *sql.Context, n sql.Node, isSrc bool) (prolly.Map, prolly.M if err != nil { return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, err } - priMap = durable.ProllyMapFromIndex(priIndex) + priMap, err = durable.ProllyMapFromIndex(priIndex) + if err != nil { + return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, err + } secMap = priMap srcIter, err = priMap.IterAll(ctx) @@ -535,7 +541,10 @@ func getMergeKv(ctx *sql.Context, n sql.Node) (mergeState, error) { if err != nil { return ms, err } - ms.idxMap = durable.ProllyMapFromIndex(secIdx) + ms.idxMap, err = durable.ProllyMapFromIndex(secIdx) + if err != nil { + return mergeState{}, err + } table, err = doltTable.DoltTable(ctx) if err != nil { return ms, err @@ -560,7 +569,10 @@ func getMergeKv(ctx *sql.Context, n sql.Node) (mergeState, error) { if err != nil { return ms, err } - ms.idxMap = durable.ProllyMapFromIndex(priIndex) + ms.idxMap, err = durable.ProllyMapFromIndex(priIndex) + if err != nil { + return mergeState{}, err + } secIterGen = index.NewKeylessIndexImplBuilder(priIndex, secIdx, idx) } else { secIterGen = index.NewSecondaryIterGen(ms.idxMap) @@ -584,7 +596,10 @@ func getMergeKv(ctx *sql.Context, n sql.Node) (mergeState, error) { return ms, err } - priMap := durable.ProllyMapFromIndex(priIndex) + priMap, err := durable.ProllyMapFromIndex(priIndex) + if err != nil { + return ms, err + } pkMap := index.OrdinalMappingFromIndex(idx) priKd, _ := priMap.Descriptors() pkBld := val.NewTupleBuilder(priKd) diff --git a/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go b/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go index b1096cd9c88..1f8c3f95ef9 100644 --- a/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go +++ b/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go @@ -33,7 +33,6 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/env" dsql "github.com/dolthub/dolt/go/libraries/doltcore/sqle" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" "github.com/dolthub/dolt/go/libraries/utils/filesys" @@ -144,11 +143,10 @@ func innerInit(h *DoltHarness, dEnv *env.DoltEnv) error { return err } - statsPro := statspro.NewProvider(pro.(*dsql.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(env.NewGRPCDialProviderFromDoltEnv(dEnv))) gcSafepointController := dsess.NewGCSafepointController() config, _ := dEnv.Config.GetConfig(env.GlobalConfig) - sqlCtx := dsql.NewTestSQLCtxWithProvider(ctx, pro, config, statsPro, gcSafepointController) + sqlCtx := dsql.NewTestSQLCtxWithProvider(ctx, pro, config, statspro.StatsNoop{}, gcSafepointController) h.sess = sqlCtx.Session.(*dsess.DoltSession) dbs := h.engine.Analyzer.Catalog.AllDatabases(sqlCtx) diff --git a/go/libraries/doltcore/sqle/rows.go b/go/libraries/doltcore/sqle/rows.go index 430418f0ccb..c0679df7226 100644 --- a/go/libraries/doltcore/sqle/rows.go +++ b/go/libraries/doltcore/sqle/rows.go @@ -183,7 +183,11 @@ func ProllyRowIterFromPartition( projections []uint64, partition doltTablePartition, ) (sql.RowIter, error) { - rows := durable.ProllyMapFromIndex(partition.rowData) + rows, err := durable.ProllyMapFromIndex(partition.rowData) + if err != nil { + return nil, err + } + c, err := rows.Count() if err != nil { return nil, err @@ -243,7 +247,10 @@ func DoltTablePartitionToRowIter(ctx *sql.Context, name string, table *doltdb.Ta } if types.IsFormat_DOLT(data.Format()) { - idx := durable.ProllyMapFromIndex(data) + idx, err := durable.ProllyMapFromIndex(data) + if err != nil { + return nil, nil, err + } c, err := idx.Count() if err != nil { return nil, nil, err diff --git a/go/libraries/doltcore/sqle/sqlddl_test.go b/go/libraries/doltcore/sqle/sqlddl_test.go index 8b1a27ab19b..329fd5f298f 100644 --- a/go/libraries/doltcore/sqle/sqlddl_test.go +++ b/go/libraries/doltcore/sqle/sqlddl_test.go @@ -1127,6 +1127,7 @@ func newTestEngine(ctx context.Context, dEnv *env.DoltEnv) (*gms.Engine, *sql.Co IsServerLocked: false, }), sqlCtx } + func TestIndexOverwrite(t *testing.T) { ctx := context.Background() dEnv := dtestutils.CreateTestEnv() diff --git a/go/libraries/doltcore/sqle/statsnoms/database.go b/go/libraries/doltcore/sqle/statsnoms/database.go deleted file mode 100644 index 527842b1d48..00000000000 --- a/go/libraries/doltcore/sqle/statsnoms/database.go +++ /dev/null @@ -1,489 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statsnoms - -import ( - "context" - "errors" - "fmt" - "path" - "strings" - "sync" - - "github.com/dolthub/go-mysql-server/sql" - - "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" - "github.com/dolthub/dolt/go/libraries/doltcore/env" - "github.com/dolthub/dolt/go/libraries/doltcore/schema" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" - "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" - "github.com/dolthub/dolt/go/libraries/utils/earl" - "github.com/dolthub/dolt/go/libraries/utils/filesys" - "github.com/dolthub/dolt/go/store/datas" - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/prolly" - "github.com/dolthub/dolt/go/store/types" -) - -func NewNomsStatsFactory(dialPro dbfactory.GRPCDialProvider) *NomsStatsFactory { - return &NomsStatsFactory{dialPro: dialPro} -} - -type NomsStatsFactory struct { - dialPro dbfactory.GRPCDialProvider -} - -var _ statspro.StatsFactory = NomsStatsFactory{} - -func (sf NomsStatsFactory) Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (statspro.Database, error) { - params := make(map[string]interface{}) - params[dbfactory.GRPCDialProviderParam] = sf.dialPro - - var urlPath string - u, err := earl.Parse(prov.DbFactoryUrl()) - if u.Scheme == dbfactory.MemScheme { - urlPath = path.Join(prov.DbFactoryUrl(), dbfactory.DoltDataDir) - } else if u.Scheme == dbfactory.FileScheme { - urlPath = doltdb.LocalDirDoltDB - } - - statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir) - if err != nil { - return nil, err - } - - var dEnv *env.DoltEnv - exists, isDir := statsFs.Exists("") - if !exists { - err := statsFs.MkDirs("") - if err != nil { - return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error()) - } - - dEnv = env.Load(context.Background(), hdp, statsFs, urlPath, "test") - sess := dsess.DSessFromSess(ctx.Session) - err = dEnv.InitRepo(ctx, types.Format_Default, sess.Username(), sess.Email(), prov.DefaultBranch()) - if err != nil { - return nil, err - } - } else if !isDir { - return nil, fmt.Errorf("file exists where the dolt stats directory should be") - } else { - dEnv = env.LoadWithoutDB(ctx, hdp, statsFs, "", "") - } - - dEnv.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params) - - deaf := dEnv.DbEaFactory(ctx) - - tmpDir, err := dEnv.TempTableFilesDir() - if err != nil { - return nil, err - } - opts := editor.Options{ - Deaf: deaf, - Tempdir: tmpDir, - } - statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(ctx), opts) - if err != nil { - return nil, err - } - return NewNomsStats(sourceDb, statsDb), nil -} - -func NewNomsStats(sourceDb, statsDb dsess.SqlDatabase) *NomsStatsDatabase { - return &NomsStatsDatabase{mu: &sync.Mutex{}, destDb: statsDb, sourceDb: sourceDb} -} - -type dbStats map[sql.StatQualifier]*statspro.DoltStats - -type NomsStatsDatabase struct { - mu *sync.Mutex - destDb dsess.SqlDatabase - sourceDb dsess.SqlDatabase - stats []dbStats - branches []string - tableHashes []map[string]hash.Hash - schemaHashes []map[string]hash.Hash - dirty []*prolly.MutableMap -} - -var _ statspro.Database = (*NomsStatsDatabase)(nil) - -func (n *NomsStatsDatabase) Close() error { - return n.destDb.DbData().Ddb.Close() -} - -func (n *NomsStatsDatabase) Branches() []string { - return n.branches -} - -func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) error { - branchQDbName := statspro.BranchQualifiedDatabase(n.sourceDb.Name(), branch) - - dSess := dsess.DSessFromSess(ctx.Session) - sqlDb, err := dSess.Provider().Database(ctx, branchQDbName) - if err != nil { - ctx.GetLogger().Debugf("statistics load: branch not found: %s; `call dolt_stats_prune()` to delete stale statistics", branch) - return nil - } - branchQDb, ok := sqlDb.(dsess.SqlDatabase) - if !ok { - return fmt.Errorf("branch/database not found: %s", branchQDbName) - } - - if ok, err := n.SchemaChange(ctx, branch, branchQDb); err != nil { - return err - } else if ok { - ctx.GetLogger().Debugf("statistics load: detected schema change incompatility, purging %s/%s", branch, n.sourceDb.Name()) - if err := n.DeleteBranchStats(ctx, branch, true); err != nil { - return err - } - } - - statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, branch) - if errors.Is(err, doltdb.ErrNoStatistics) { - return n.trackBranch(ctx, branch) - } else if errors.Is(err, datas.ErrNoBranchStats) { - return n.trackBranch(ctx, branch) - } else if err != nil { - return err - } - if cnt, err := statsMap.Count(); err != nil { - return err - } else if cnt == 0 { - return n.trackBranch(ctx, branch) - } - - doltStats, err := loadStats(ctx, branchQDb, statsMap) - if err != nil { - return err - } - n.branches = append(n.branches, branch) - n.stats = append(n.stats, doltStats) - n.dirty = append(n.dirty, nil) - n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash)) - n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash)) - return nil -} - -func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string, branchQDb dsess.SqlDatabase) (bool, error) { - root, err := branchQDb.GetRoot(ctx) - if err != nil { - return false, err - } - tables, err := branchQDb.GetTableNames(ctx) - if err != nil { - return false, err - } - - var keys []string - var schHashes []hash.Hash - for _, tableName := range tables { - table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: tableName}) - if err != nil { - return false, err - } - if !ok { - return false, nil - } - curHash, err := table.GetSchemaHash(ctx) - if err != nil { - return false, err - } - - keys = append(keys, n.schemaTupleKey(branch, tableName)) - schHashes = append(schHashes, curHash) - } - - ddb := n.destDb.DbData().Ddb - var schemaChange bool - for i, key := range keys { - curHash := schHashes[i] - if val, ok, err := ddb.GetTuple(ctx, key); err != nil { - return false, err - } else if ok { - oldHash := hash.Parse(string(val)) - if !ok || !oldHash.Equal(curHash) { - schemaChange = true - break - } - } - } - if schemaChange { - for _, key := range keys { - ddb.DeleteTuple(ctx, key) - } - return true, nil - } - return false, nil -} - -func (n *NomsStatsDatabase) getBranchStats(branch string) dbStats { - for i, b := range n.branches { - if strings.EqualFold(b, branch) { - return n.stats[i] - } - } - return nil -} - -func (n *NomsStatsDatabase) GetStat(branch string, qual sql.StatQualifier) (*statspro.DoltStats, bool) { - n.mu.Lock() - defer n.mu.Unlock() - stats := n.getBranchStats(branch) - ret, ok := stats[qual] - return ret, ok -} - -func (n *NomsStatsDatabase) ListStatQuals(branch string) []sql.StatQualifier { - n.mu.Lock() - defer n.mu.Unlock() - stats := n.getBranchStats(branch) - var ret []sql.StatQualifier - for qual, _ := range stats { - ret = append(ret, qual) - } - return ret -} - -func (n *NomsStatsDatabase) setStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error { - var statsMap *prolly.MutableMap - for i, b := range n.branches { - if strings.EqualFold(branch, b) { - n.stats[i][qual] = stats - if n.dirty[i] == nil { - if err := n.initMutable(ctx, i); err != nil { - return err - } - } - statsMap = n.dirty[i] - } - } - if statsMap == nil { - if err := n.trackBranch(ctx, branch); err != nil { - return err - } - statsMap = n.dirty[len(n.branches)-1] - n.stats[len(n.branches)-1][qual] = stats - } - - return n.replaceStats(ctx, statsMap, stats) -} -func (n *NomsStatsDatabase) SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error { - n.mu.Lock() - defer n.mu.Unlock() - - return n.setStat(ctx, branch, qual, stats) -} - -func (n *NomsStatsDatabase) trackBranch(ctx context.Context, branch string) error { - n.branches = append(n.branches, branch) - n.stats = append(n.stats, make(dbStats)) - n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash)) - n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash)) - - ns := n.destDb.DbData().Ddb.NodeStore() - kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors(ns) - newMap, err := prolly.NewMapFromTuples(ctx, ns, kd, vd) - if err != nil { - return err - } - n.dirty = append(n.dirty, newMap.Mutate()) - return n.destDb.DbData().Ddb.SetStatisics(ctx, branch, newMap.HashOf()) -} - -func (n *NomsStatsDatabase) initMutable(ctx context.Context, i int) error { - statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, n.branches[i]) - if err != nil { - return err - } - n.dirty[i] = statsMap.Mutate() - return nil -} - -func (n *NomsStatsDatabase) DeleteStats(ctx *sql.Context, branch string, quals ...sql.StatQualifier) { - n.mu.Lock() - defer n.mu.Unlock() - - for i, b := range n.branches { - if strings.EqualFold(b, branch) { - for _, qual := range quals { - ctx.GetLogger().Debugf("statistics refresh: deleting index statistics: %s/%s", branch, qual) - delete(n.stats[i], qual) - } - } - } -} - -func (n *NomsStatsDatabase) DeleteBranchStats(ctx *sql.Context, branch string, flush bool) error { - n.mu.Lock() - defer n.mu.Unlock() - - ctx.GetLogger().Debugf("statistics refresh: deleting branch statistics: %s", branch) - - for i, b := range n.branches { - if strings.EqualFold(b, branch) { - n.branches = append(n.branches[:i], n.branches[i+1:]...) - n.dirty = append(n.dirty[:i], n.dirty[i+1:]...) - n.stats = append(n.stats[:i], n.stats[i+1:]...) - n.tableHashes = append(n.tableHashes[:i], n.tableHashes[i+1:]...) - n.schemaHashes = append(n.schemaHashes[:i], n.schemaHashes[i+1:]...) - } - } - if flush { - return n.destDb.DbData().Ddb.DropStatisics(ctx, branch) - } - return nil -} - -func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error { - n.mu.Lock() - defer n.mu.Unlock() - - var dbStat dbStats - for i, b := range n.branches { - if strings.EqualFold(b, branch) { - // naive merge the new with old - dbStat = n.stats[i] - } - } - - if dbStat == nil { - if err := n.trackBranch(ctx, branch); err != nil { - return err - } - dbStat = n.stats[len(n.branches)-1] - } - - if _, ok := dbStat[qual]; ok { - oldChunks := dbStat[qual].Hist - targetBuckets, err := statspro.MergeNewChunks(targetHashes, oldChunks, newChunks) - if err != nil { - return err - } - newStat, err := dbStat[qual].WithHistogram(targetBuckets) - if err != nil { - return err - } - dbStat[qual] = newStat.(*statspro.DoltStats) - } else { - dbStat[qual] = statspro.NewDoltStats() - } - dbStat[qual].Chunks = targetHashes - dbStat[qual].UpdateActive() - - // let |n.SetStats| update memory and disk - return n.setStat(ctx, branch, qual, dbStat[qual]) -} - -func (n *NomsStatsDatabase) Flush(ctx context.Context, branch string) error { - n.mu.Lock() - defer n.mu.Unlock() - - for i, b := range n.branches { - if strings.EqualFold(b, branch) { - if n.dirty[i] != nil { - flushedMap, err := n.dirty[i].Map(ctx) - if err != nil { - return err - } - n.dirty[i] = nil - if err := n.destDb.DbData().Ddb.SetStatisics(ctx, branch, flushedMap.HashOf()); err != nil { - return err - } - return nil - } - } - } - return nil -} - -func (n *NomsStatsDatabase) GetTableHash(branch, tableName string) hash.Hash { - n.mu.Lock() - defer n.mu.Unlock() - for i, b := range n.branches { - if strings.EqualFold(branch, b) { - return n.tableHashes[i][tableName] - } - } - return hash.Hash{} -} - -func (n *NomsStatsDatabase) SetTableHash(branch, tableName string, h hash.Hash) { - n.mu.Lock() - defer n.mu.Unlock() - for i, b := range n.branches { - if strings.EqualFold(branch, b) { - n.tableHashes[i][tableName] = h - break - } - } -} - -func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName string) (hash.Hash, error) { - n.mu.Lock() - defer n.mu.Unlock() - for i, b := range n.branches { - if strings.EqualFold(branch, b) { - return n.schemaHashes[i][tableName], nil - } - if val, ok, err := n.destDb.DbData().Ddb.GetTuple(ctx, n.schemaTupleKey(branch, tableName)); ok { - if err != nil { - return hash.Hash{}, err - } - h := hash.Parse(string(val)) - n.schemaHashes[i][tableName] = h - return h, nil - } else if err != nil { - return hash.Hash{}, err - } - break - } - return hash.Hash{}, nil -} - -func (n *NomsStatsDatabase) schemaTupleKey(branch, tableName string) string { - return n.sourceDb.Name() + "/" + branch + "/" + tableName -} - -func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error { - n.mu.Lock() - defer n.mu.Unlock() - branchIdx := -1 - for i, b := range n.branches { - if strings.EqualFold(branch, b) { - branchIdx = i - break - } - } - if branchIdx < 0 { - branchIdx = len(n.branches) - if err := n.trackBranch(ctx, branch); err != nil { - return err - } - } - - n.schemaHashes[branchIdx][tableName] = h - key := n.schemaTupleKey(branch, tableName) - if err := n.destDb.DbData().Ddb.DeleteTuple(ctx, key); err != doltdb.ErrTupleNotFound { - return err - } - - return n.destDb.DbData().Ddb.SetTuple(ctx, key, []byte(h.String())) -} diff --git a/go/libraries/doltcore/sqle/statsnoms/iter.go b/go/libraries/doltcore/sqle/statsnoms/iter.go deleted file mode 100644 index 59b9456eed6..00000000000 --- a/go/libraries/doltcore/sqle/statsnoms/iter.go +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statsnoms - -import ( - "fmt" - "strings" - "time" - - "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/planbuilder" - "gopkg.in/errgo.v2/errors" - - "github.com/dolthub/dolt/go/libraries/doltcore/schema" - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/prolly" - "github.com/dolthub/dolt/go/store/prolly/tree" - "github.com/dolthub/dolt/go/store/val" -) - -var ErrIncompatibleVersion = errors.New("client stats version mismatch") - -func NewStatsIter(ctx *sql.Context, schemaName string, m prolly.Map) (*statsIter, error) { - iter, err := m.IterAll(ctx) - if err != nil { - return nil, err - } - kd, vd := m.Descriptors() - keyBuilder := val.NewTupleBuilder(kd) - valueBuilder := val.NewTupleBuilder(vd) - ns := m.NodeStore() - - return &statsIter{ - iter: iter, - kb: keyBuilder, - vb: valueBuilder, - ns: ns, - schemaName: schemaName, - planb: planbuilder.New(ctx, nil, nil, nil), - }, nil -} - -// statsIter reads histogram buckets into string-compatible types. -// Values that are SQL rows should be converted with statsIter.ParseRow. -// todo: make a JSON compatible container for sql.Row w/ types so that we -// can eagerly convert to sql.Row without sacrificing string printing. -type statsIter struct { - iter prolly.MapIter - kb, vb *val.TupleBuilder - ns tree.NodeStore - planb *planbuilder.Builder - currentQual string - schemaName string - currentTypes []sql.Type -} - -var _ sql.RowIter = (*statsIter)(nil) - -func (s *statsIter) Next(ctx *sql.Context) (sql.Row, error) { - k, v, err := s.iter.Next(ctx) - if err != nil { - return nil, err - } - - // deserialize K, V - version, err := tree.GetField(ctx, s.vb.Desc, 0, v, s.ns) - if err != nil { - return nil, err - } - if version != schema.StatsVersion { - return nil, fmt.Errorf("%w: write version %d does not match read version %d", ErrIncompatibleVersion, version, schema.StatsVersion) - } - - var row sql.Row - for i := 0; i < s.kb.Desc.Count(); i++ { - f, err := tree.GetField(ctx, s.kb.Desc, i, k, s.ns) - if err != nil { - return nil, err - } - row = append(row, f) - } - - for i := 0; i < s.vb.Desc.Count(); i++ { - f, err := tree.GetField(ctx, s.vb.Desc, i, v, s.ns) - if err != nil { - return nil, err - } - row = append(row, f) - } - - dbName := row[schema.StatsDbTag].(string) - tableName := row[schema.StatsTableTag].(string) - indexName := row[schema.StatsIndexTag].(string) - position := row[schema.StatsPositionTag].(int64) - _ = row[schema.StatsVersionTag] - commit := hash.Parse(row[schema.StatsCommitHashTag].(string)) - rowCount := row[schema.StatsRowCountTag].(int64) - distinctCount := row[schema.StatsDistinctCountTag].(int64) - nullCount := row[schema.StatsNullCountTag].(int64) - columnsStr := row[schema.StatsColumnsTag].(string) - typesStr := row[schema.StatsTypesTag].(string) - upperBoundStr := row[schema.StatsUpperBoundTag].(string) - upperBoundCnt := row[schema.StatsUpperBoundCntTag].(int64) - createdAt := row[schema.StatsCreatedAtTag].(time.Time) - - typs := strings.Split(typesStr, "\n") - for i, t := range typs { - typs[i] = strings.TrimSpace(t) - } - - qual := sql.NewStatQualifier(dbName, s.schemaName, tableName, indexName) - if curQual := qual.String(); !strings.EqualFold(curQual, s.currentQual) { - s.currentQual = curQual - s.currentTypes, err = parseTypeStrings(typs) - if err != nil { - return nil, err - } - } - - mcvCountsStr := row[schema.StatsMcvCountsTag].(string) - - numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag - mcvs := make([]string, numMcvs) - for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] { - if v != nil { - mcvs[i] = v.(string) - } - } - - return sql.Row{ - dbName, - tableName, - indexName, - int(position), - version, - commit.String(), - uint64(rowCount), - uint64(distinctCount), - uint64(nullCount), - columnsStr, - typesStr, - upperBoundStr, - uint64(upperBoundCnt), - createdAt, - mcvs[0], mcvs[1], mcvs[2], mcvs[3], - mcvCountsStr, - }, nil -} - -func (s *statsIter) ParseRow(rowStr string) (sql.Row, error) { - var row sql.Row - for i, v := range strings.Split(rowStr, ",") { - val, _, err := s.currentTypes[i].Convert(v) - if err != nil { - return nil, err - } - row = append(row, val) - } - return row, nil -} - -func (s *statsIter) Close(context *sql.Context) error { - return nil -} diff --git a/go/libraries/doltcore/sqle/statsnoms/load.go b/go/libraries/doltcore/sqle/statsnoms/load.go deleted file mode 100644 index 72051260260..00000000000 --- a/go/libraries/doltcore/sqle/statsnoms/load.go +++ /dev/null @@ -1,308 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statsnoms - -import ( - "errors" - "fmt" - "io" - "strconv" - "strings" - "time" - - "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/planbuilder" - "github.com/dolthub/go-mysql-server/sql/stats" - - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" - "github.com/dolthub/dolt/go/libraries/doltcore/schema" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/prolly" - "github.com/dolthub/dolt/go/store/prolly/tree" - "github.com/dolthub/dolt/go/store/val" -) - -func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.StatQualifier]*statspro.DoltStats, error) { - qualToStats := make(map[sql.StatQualifier]*statspro.DoltStats) - schemaName := db.SchemaName() - iter, err := NewStatsIter(ctx, schemaName, m) - if err != nil { - return nil, err - } - currentStat := statspro.NewDoltStats() - invalidTables := make(map[string]bool) - for { - row, err := iter.Next(ctx) - if errors.Is(err, io.EOF) { - break - } else if err != nil { - return nil, err - } - - // deserialize K, V - dbName := row[schema.StatsDbTag].(string) - tableName := row[schema.StatsTableTag].(string) - indexName := row[schema.StatsIndexTag].(string) - _ = row[schema.StatsVersionTag] - commit := hash.Parse(row[schema.StatsCommitHashTag].(string)) - rowCount := row[schema.StatsRowCountTag].(uint64) - distinctCount := row[schema.StatsDistinctCountTag].(uint64) - nullCount := row[schema.StatsNullCountTag].(uint64) - columns := strings.Split(row[schema.StatsColumnsTag].(string), ",") - typesStr := row[schema.StatsTypesTag].(string) - boundRowStr := row[schema.StatsUpperBoundTag].(string) - upperBoundCnt := row[schema.StatsUpperBoundCntTag].(uint64) - createdAt := row[schema.StatsCreatedAtTag].(time.Time) - - typs := strings.Split(typesStr, "\n") - for i, t := range typs { - typs[i] = strings.TrimSpace(t) - } - - qual := sql.NewStatQualifier(dbName, schemaName, tableName, indexName) - if _, ok := invalidTables[tableName]; ok { - continue - } - - if currentStat.Statistic.Qual.String() != qual.String() { - if !currentStat.Statistic.Qual.Empty() { - currentStat.UpdateActive() - qualToStats[currentStat.Statistic.Qual] = currentStat - } - - currentStat = statspro.NewDoltStats() - - tab, ok, err := db.GetTableInsensitive(ctx, qual.Table()) - if ok { - currentStat.Statistic.Qual = qual - currentStat.Statistic.Cols = columns - currentStat.Statistic.LowerBnd, currentStat.Tb, currentStat.Statistic.Fds, currentStat.Statistic.Colset, err = loadRefdProps(ctx, db, tab, currentStat.Statistic.Qual, len(currentStat.Columns())) - if err != nil { - return nil, err - } - } else if !ok { - ctx.GetLogger().Debugf("stats load: table previously collected is missing from root: %s", tableName) - invalidTables[qual.Table()] = true - continue - } else if err != nil { - return nil, err - } - } - - numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag - - mcvCountsStr := strings.Split(row[schema.StatsMcvCountsTag].(string), ",") - mcvCnts := make([]uint64, numMcvs) - for i, v := range mcvCountsStr { - if v == "" { - continue - } - val, err := strconv.Atoi(v) - if err != nil { - return nil, err - } - mcvCnts[i] = uint64(val) - } - - mcvs := make([]sql.Row, numMcvs) - for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] { - if v != nil && v != "" { - row, err := DecodeRow(ctx, m.NodeStore(), v.(string), currentStat.Tb) - if err != nil { - return nil, err - } - mcvs[i] = row - } - } - - for i, v := range mcvCnts { - if v == 0 { - mcvs = mcvs[:i] - mcvCnts = mcvCnts[:i] - break - } - } - - if currentStat.Statistic.Hist == nil { - currentStat.Statistic.Typs, err = parseTypeStrings(typs) - if err != nil { - return nil, err - } - currentStat.Statistic.Qual = qual - } - - boundRow, err := DecodeRow(ctx, m.NodeStore(), boundRowStr, currentStat.Tb) - if err != nil { - return nil, err - } - - bucket := statspro.DoltBucket{ - Chunk: commit, - Created: createdAt, - Bucket: &stats.Bucket{ - RowCnt: uint64(rowCount), - DistinctCnt: uint64(distinctCount), - NullCnt: uint64(nullCount), - McvVals: mcvs, - McvsCnt: mcvCnts, - BoundCnt: upperBoundCnt, - BoundVal: boundRow, - }, - } - - currentStat.Hist = append(currentStat.Hist, bucket) - currentStat.Statistic.RowCnt += uint64(rowCount) - currentStat.Statistic.DistinctCnt += uint64(distinctCount) - currentStat.Statistic.NullCnt += uint64(rowCount) - if currentStat.Statistic.Created.Before(createdAt) { - currentStat.Statistic.Created = createdAt - } - } - if !currentStat.Qualifier().Empty() { - currentStat.UpdateActive() - qualToStats[currentStat.Statistic.Qual] = currentStat - } - return qualToStats, nil -} - -func parseTypeStrings(typs []string) ([]sql.Type, error) { - var ret []sql.Type - for _, typ := range typs { - ct, err := planbuilder.ParseColumnTypeString(typ) - if err != nil { - return nil, err - } - ret = append(ret, ct) - } - return ret, nil -} - -func loadRefdProps(ctx *sql.Context, db dsess.SqlDatabase, sqlTable sql.Table, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, *sql.FuncDepSet, sql.ColSet, error) { - root, err := db.GetRoot(ctx) - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - - iat, ok := sqlTable.(sql.IndexAddressable) - if !ok { - return nil, nil, nil, sql.ColSet{}, nil - } - - indexes, err := iat.GetIndexes(ctx) - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - - var sqlIdx sql.Index - for _, i := range indexes { - if strings.EqualFold(i.ID(), qual.Index()) { - sqlIdx = i - break - } - } - - if sqlIdx == nil { - return nil, nil, nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index()) - } - - fds, colset, err := stats.IndexFds(qual.Table(), sqlTable.Schema(), sqlIdx) - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: sqlTable.Name()}) - if !ok { - return nil, nil, nil, sql.ColSet{}, sql.ErrTableNotFound.New(qual.Table()) - } - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - - var idx durable.Index - if qual.Index() == "primary" { - idx, err = table.GetRowData(ctx) - } else { - idx, err = table.GetIndexRowData(ctx, qual.Index()) - } - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - - prollyMap := durable.ProllyMapFromIndex(idx) - keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(cols)) - buffPool := prollyMap.NodeStore().Pool() - - if cnt, err := prollyMap.Count(); err != nil { - return nil, nil, nil, sql.ColSet{}, err - } else if cnt == 0 { - return nil, keyBuilder, nil, sql.ColSet{}, nil - } - firstIter, err := prollyMap.IterOrdinalRange(ctx, 0, 1) - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - keyBytes, _, err := firstIter.Next(ctx) - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - for i := range keyBuilder.Desc.Types { - keyBuilder.PutRaw(i, keyBytes.GetField(i)) - } - - firstKey := keyBuilder.Build(buffPool) - firstRow := make(sql.Row, keyBuilder.Desc.Count()) - for i := 0; i < keyBuilder.Desc.Count(); i++ { - firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore()) - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - } - return firstRow, keyBuilder, fds, colset, nil -} - -func loadFuncDeps(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier) (*sql.FuncDepSet, sql.ColSet, error) { - tab, ok, err := db.GetTableInsensitive(ctx, qual.Table()) - if err != nil { - return nil, sql.ColSet{}, err - } else if !ok { - return nil, sql.ColSet{}, fmt.Errorf("%w: table not found: '%s'", statspro.ErrFailedToLoad, qual.Table()) - } - - iat, ok := tab.(sql.IndexAddressable) - if !ok { - return nil, sql.ColSet{}, fmt.Errorf("%w: table does not have indexes: '%s'", statspro.ErrFailedToLoad, qual.Table()) - } - - indexes, err := iat.GetIndexes(ctx) - if err != nil { - return nil, sql.ColSet{}, err - } - - var idx sql.Index - for _, i := range indexes { - if strings.EqualFold(i.ID(), qual.Index()) { - idx = i - break - } - } - - if idx == nil { - return nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index()) - } - - return stats.IndexFds(qual.Table(), tab.Schema(), idx) -} diff --git a/go/libraries/doltcore/sqle/statsnoms/write.go b/go/libraries/doltcore/sqle/statsnoms/write.go deleted file mode 100644 index b97f87f673d..00000000000 --- a/go/libraries/doltcore/sqle/statsnoms/write.go +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statsnoms - -import ( - "context" - "errors" - "io" - "strings" - - "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/stats" - "github.com/dolthub/go-mysql-server/sql/types" - - "github.com/dolthub/dolt/go/libraries/doltcore/schema" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" - "github.com/dolthub/dolt/go/store/prolly" - "github.com/dolthub/dolt/go/store/prolly/tree" - "github.com/dolthub/dolt/go/store/val" -) - -// About ~200 20 byte address fit in a ~4k chunk. Chunk sizes -// are approximate, but certainly shouldn't reach the square -// of the expected size. -const maxBucketFanout = 200 * 200 - -var mcvsTypes = []sql.Type{types.Int64, types.Int64, types.Int64} - -func (n *NomsStatsDatabase) replaceStats(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error { - if err := deleteIndexRows(ctx, statsMap, dStats); err != nil { - return err - } - return putIndexRows(ctx, statsMap, dStats) -} - -func deleteIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error { - if ctx.Err() != nil { - return ctx.Err() - } - sch := schema.StatsTableDoltSchema - kd, _ := sch.GetMapDescriptors(statsMap.NodeStore()) - - keyBuilder := val.NewTupleBuilder(kd) - - qual := dStats.Qualifier() - pool := statsMap.NodeStore().Pool() - - // delete previous entries for this index -> (db, table, index, pos) - keyBuilder.PutString(0, qual.Database) - keyBuilder.PutString(1, qual.Table()) - keyBuilder.PutString(2, qual.Index()) - keyBuilder.PutInt64(3, 0) - firstKey := keyBuilder.Build(pool) - keyBuilder.PutString(0, qual.Database) - keyBuilder.PutString(1, qual.Table()) - keyBuilder.PutString(2, qual.Index()) - keyBuilder.PutInt64(3, maxBucketFanout+1) - maxKey := keyBuilder.Build(pool) - - // there is a limit on the number of buckets for a given index, iter - // will terminate before maxBucketFanout - iter, err := statsMap.IterKeyRange(ctx, firstKey, maxKey) - if err != nil { - return err - } - - for { - k, _, err := iter.Next(ctx) - if errors.Is(err, io.EOF) { - break - } else if err != nil { - return err - } - err = statsMap.Put(ctx, k, nil) - if err != nil { - return err - } - } - return nil -} - -func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error { - if ctx.Err() != nil { - return ctx.Err() - } - sch := schema.StatsTableDoltSchema - kd, vd := sch.GetMapDescriptors(statsMap.NodeStore()) - - keyBuilder := val.NewTupleBuilder(kd) - valueBuilder := val.NewTupleBuilder(vd) - - qual := dStats.Qualifier() - pool := statsMap.NodeStore().Pool() - - // now add new buckets - typesB := strings.Builder{} - sep := "" - for _, t := range dStats.Statistic.Typs { - typesB.WriteString(sep + t.String()) - sep = "\n" - } - typesStr := typesB.String() - - var pos int64 - for _, h := range dStats.Hist { - keyBuilder.PutString(0, qual.Database) - keyBuilder.PutString(1, qual.Tab) - keyBuilder.PutString(2, qual.Idx) - keyBuilder.PutInt64(3, pos) - - valueBuilder.PutInt64(0, schema.StatsVersion) - valueBuilder.PutString(1, statspro.DoltBucketChunk(h).String()) - valueBuilder.PutInt64(2, int64(h.RowCount())) - valueBuilder.PutInt64(3, int64(h.DistinctCount())) - valueBuilder.PutInt64(4, int64(h.NullCount())) - valueBuilder.PutString(5, strings.Join(dStats.Columns(), ",")) - valueBuilder.PutString(6, typesStr) - boundRow, err := EncodeRow(ctx, statsMap.NodeStore(), h.UpperBound(), dStats.Tb) - if err != nil { - return err - } - valueBuilder.PutString(7, string(boundRow)) - valueBuilder.PutInt64(8, int64(h.BoundCount())) - valueBuilder.PutDatetime(9, statspro.DoltBucketCreated(h)) - for i, r := range h.Mcvs() { - mcvRow, err := EncodeRow(ctx, statsMap.NodeStore(), r, dStats.Tb) - if err != nil { - return err - } - valueBuilder.PutString(10+i, string(mcvRow)) - } - var mcvCntsRow sql.Row - for _, v := range h.McvCounts() { - mcvCntsRow = append(mcvCntsRow, int(v)) - } - valueBuilder.PutString(14, stats.StringifyKey(mcvCntsRow, mcvsTypes)) - - key := keyBuilder.Build(pool) - value := valueBuilder.Build(pool) - statsMap.Put(ctx, key, value) - pos++ - } - return nil -} - -func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) { - for i, v := range r { - if v == nil { - continue - } - if err := tree.PutField(ctx, ns, tb, i, v); err != nil { - return nil, err - } - } - return tb.Build(ns.Pool()), nil -} - -func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) { - tup := []byte(s) - r := make(sql.Row, tb.Desc.Count()) - var err error - for i, _ := range r { - r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns) - if err != nil { - return nil, err - } - } - return r, nil -} diff --git a/go/libraries/doltcore/sqle/statspro/analyze.go b/go/libraries/doltcore/sqle/statspro/analyze.go deleted file mode 100644 index 45efb4ab05c..00000000000 --- a/go/libraries/doltcore/sqle/statspro/analyze.go +++ /dev/null @@ -1,351 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statspro - -import ( - "fmt" - "strings" - - "github.com/dolthub/go-mysql-server/sql" - - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" - "github.com/dolthub/dolt/go/libraries/doltcore/env" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/prolly/tree" -) - -const ( - boostrapRowLimit = 2e6 -) - -func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db string) error { - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return err - } - return p.RefreshTableStatsWithBranch(ctx, table, db, branch) -} - -func (p *Provider) BootstrapDatabaseStats(ctx *sql.Context, db string) error { - dSess := dsess.DSessFromSess(ctx.Session) - branches := p.getStatsBranches(ctx) - var rows uint64 - for _, branch := range branches { - sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch)) - if err != nil { - if sql.ErrDatabaseNotFound.Is(err) { - // default branch is not valid - continue - } - return err - } - tables, err := sqlDb.GetTableNames(ctx) - if err != nil { - return err - } - for _, table := range tables { - sqlTable, _, err := GetLatestTable(ctx, table, sqlDb) - if err != nil { - return err - } - - if st, ok := sqlTable.(sql.StatisticsTable); ok { - cnt, ok, err := st.RowCount(ctx) - if ok && err == nil { - rows += cnt - } - } - if rows >= boostrapRowLimit { - return fmt.Errorf("stats bootstrap aborted because %s exceeds the default row limit; manually run \"ANALYZE \" or \"call dolt_stats_restart()\" to collect statistics", db) - } - - if err := p.RefreshTableStatsWithBranch(ctx, sqlTable, db, branch); err != nil { - return err - } - } - } - return nil -} - -func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table, db string, branch string) error { - if !p.TryLockForUpdate(branch, db, table.Name()) { - return fmt.Errorf("already updating statistics") - } - defer p.UnlockTable(branch, db, table.Name()) - - dSess := dsess.DSessFromSess(ctx.Session) - - sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch)) - if err != nil { - return err - } - - // lock only after accessing DatabaseProvider - - tableName := strings.ToLower(table.Name()) - dbName := strings.ToLower(db) - var schemaName string - if schTab, ok := table.(sql.DatabaseSchemaTable); ok { - schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) - } - - iat, ok := table.(sql.IndexAddressableTable) - if !ok { - return nil - } - indexes, err := iat.GetIndexes(ctx) - if err != nil { - return err - } - - // it's important to update WORKING session references every call - sqlTable, dTab, err := GetLatestTable(ctx, tableName, sqlDb) - if err != nil { - return err - } - - statDb, ok := p.getStatDb(dbName) - if !ok { - // if the stats database does not exist, initialize one - fs, err := p.pro.FileSystemForDatabase(dbName) - if err != nil { - return err - } - sourceDb, ok := p.pro.BaseDatabase(ctx, dbName) - if !ok { - return sql.ErrDatabaseNotFound.New(dbName) - } - statDb, err = p.sf.Init(ctx, sourceDb, p.pro, fs, env.GetCurrentUserHomeDir) - if err != nil { - ctx.Warn(0, "%s", err.Error()) - return nil - } - p.setStatDb(dbName, statDb) - } - - schHash, err := dTab.GetSchemaHash(ctx) - if err != nil { - return err - } - - if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, tableName); oldSchHash.IsEmpty() { - if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil { - return fmt.Errorf("set schema hash error: %w", err) - } - } else if oldSchHash != schHash { - ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch) - if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil { - return err - } - - stats, err := p.GetTableDoltStats(ctx, branch, dbName, schemaName, tableName) - if err != nil { - return err - } - for _, stat := range stats { - statDb.DeleteStats(ctx, branch, stat.Qualifier()) - } - } else if err != nil { - return err - } - - tablePrefix := fmt.Sprintf("%s.", tableName) - var idxMetas []indexMeta - for _, idx := range indexes { - cols := make([]string, len(idx.Expressions())) - for i, c := range idx.Expressions() { - cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix) - } - - qual := sql.NewStatQualifier(db, schemaName, table.Name(), strings.ToLower(idx.ID())) - curStat, ok := statDb.GetStat(branch, qual) - if !ok { - curStat = NewDoltStats() - curStat.Statistic.Qual = qual - } - idxMeta, ok, err := newIdxMeta(ctx, curStat, dTab, idx, cols) - if err != nil { - return err - } - if ok { - idxMetas = append(idxMetas, idxMeta) - } - } - - newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas) - if err != nil { - return err - } - - // merge new chunks with preexisting chunks - for _, idxMeta := range idxMetas { - stat, ok := newTableStats[idxMeta.qual] - if !ok { - continue - } - targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Hist) - if err != nil { - return err - } - if targetChunks == nil { - // empty table - continue - } - stat.SetChunks(idxMeta.allAddrs) - stat.Hist = targetChunks - stat.UpdateActive() - if err := statDb.SetStat(ctx, branch, idxMeta.qual, stat); err != nil { - return err - } - } - - p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName)) - return statDb.Flush(ctx, branch) -} - -// BranchQualifiedDatabase returns a branch qualified database. If the database -// is already branch suffixed no duplication is applied. -func BranchQualifiedDatabase(db, branch string) string { - suffix := fmt.Sprintf("/%s", branch) - if !strings.HasSuffix(db, suffix) { - return fmt.Sprintf("%s%s", db, suffix) - } - return db -} - -// GetLatestTable will get the WORKING root table for the current database/branch -func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (sql.Table, *doltdb.Table, error) { - var db sqle.Database - switch d := sqlDb.(type) { - case sqle.Database: - db = d - case sqle.ReadReplicaDatabase: - db = d.Database - default: - return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb) - } - sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName) - if err != nil { - return nil, nil, err - } - if !ok { - return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName) - } - - var dTab *doltdb.Table - switch t := sqlTable.(type) { - case *sqle.AlterableDoltTable: - dTab, err = t.DoltTable.DoltTable(ctx) - case *sqle.WritableDoltTable: - dTab, err = t.DoltTable.DoltTable(ctx) - case *sqle.DoltTable: - dTab, err = t.DoltTable(ctx) - default: - err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable) - } - if err != nil { - return nil, nil, err - } - return sqlTable, dTab, nil -} - -func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, sqlIndex sql.Index, cols []string) (indexMeta, bool, error) { - var idx durable.Index - var err error - if strings.EqualFold(sqlIndex.ID(), "PRIMARY") { - idx, err = doltTable.GetRowData(ctx) - } else { - idx, err = doltTable.GetIndexRowData(ctx, sqlIndex.ID()) - } - if err != nil { - return indexMeta{}, false, err - } - - prollyMap, ok := durable.MaybeProllyMapFromIndex(idx) - if !ok { - return indexMeta{}, false, nil - } - - if cnt, err := prollyMap.Count(); err != nil { - return indexMeta{}, false, err - } else if cnt == 0 { - return indexMeta{ - qual: curStats.Statistic.Qual, - cols: cols, - }, true, nil - } - - // get newest histogram target level hashes - levelNodes, err := tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt) - if err != nil { - return indexMeta{}, false, err - } - - var addrs []hash.Hash - var keepChunks []sql.HistogramBucket - var missingAddrs float64 - var missingChunks []tree.Node - var missingOffsets []updateOrdinal - var offset uint64 - - for _, n := range levelNodes { - // Compare the previous histogram chunks to the newest tree chunks. - // Partition the newest chunks into 1) preserved or 2) missing. - // Missing chunks will need to be scanned on a stats update, so - // track the (start, end) ordinal offsets to simplify the read iter. - treeCnt, err := n.TreeCount() - if err != nil { - return indexMeta{}, false, err - } - - addrs = append(addrs, n.HashOf()) - if bucketIdx, ok := curStats.Active[n.HashOf()]; !ok { - missingChunks = append(missingChunks, n) - missingOffsets = append(missingOffsets, updateOrdinal{offset, offset + uint64(treeCnt)}) - missingAddrs++ - } else { - keepChunks = append(keepChunks, curStats.Hist[bucketIdx]) - } - offset += uint64(treeCnt) - } - - var dropChunks []sql.HistogramBucket - for _, h := range curStats.Chunks { - var match bool - for _, b := range keepChunks { - if DoltBucketChunk(b) == h { - match = true - break - } - } - if !match { - dropChunks = append(dropChunks, curStats.Hist[curStats.Active[h]]) - } - } - - return indexMeta{ - qual: curStats.Statistic.Qual, - cols: cols, - newNodes: missingChunks, - updateOrdinals: missingOffsets, - keepChunks: keepChunks, - dropChunks: dropChunks, - allAddrs: addrs, - }, true, nil -} diff --git a/go/libraries/doltcore/sqle/statspro/auto_refresh.go b/go/libraries/doltcore/sqle/statspro/auto_refresh.go deleted file mode 100644 index df4ed852857..00000000000 --- a/go/libraries/doltcore/sqle/statspro/auto_refresh.go +++ /dev/null @@ -1,296 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statspro - -import ( - "context" - "errors" - "fmt" - "strings" - "time" - - "github.com/dolthub/go-mysql-server/sql" - types2 "github.com/dolthub/go-mysql-server/sql/types" - - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" -) - -const asyncAutoRefreshStats = "async_auto_refresh_stats" - -func (p *Provider) InitAutoRefresh(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads) error { - _, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold) - _, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval) - interval64, _, _ := types2.Int64.Convert(interval) - intervalSec := time.Second * time.Duration(interval64.(int64)) - thresholdf64 := threshold.(float64) - - ctx, err := ctxFactory(context.Background()) - if err != nil { - return err - } - - branches := p.getStatsBranches(ctx) - - return p.InitAutoRefreshWithParams(ctxFactory, dbName, bThreads, intervalSec, thresholdf64, branches) -} - -func (p *Provider) InitAutoRefreshWithParams(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads, checkInterval time.Duration, updateThresh float64, branches []string) error { - // this is only called after initial statistics are finished loading - // launch a thread that periodically checks freshness - - p.mu.Lock() - defer p.mu.Unlock() - - dropDbCtx, dbStatsCancel := context.WithCancel(context.Background()) - p.autoCtxCancelers[dbName] = dbStatsCancel - - return bThreads.Add(fmt.Sprintf("%s_%s", asyncAutoRefreshStats, dbName), func(ctx context.Context) { - ticker := time.NewTicker(checkInterval + time.Nanosecond) - for { - select { - case <-ctx.Done(): - ticker.Stop() - return - case <-ticker.C: - select { - case <-dropDbCtx.Done(): - ticker.Stop() - return - default: - } - - err := func() error { - sqlCtx, err := ctxFactory(ctx) - if err != nil { - return err - } - defer sql.SessionEnd(sqlCtx.Session) - sql.SessionCommandBegin(sqlCtx.Session) - defer sql.SessionCommandEnd(sqlCtx.Session) - - dSess := dsess.DSessFromSess(sqlCtx.Session) - - ddb, ok := dSess.GetDoltDB(sqlCtx, dbName) - if !ok { - sqlCtx.GetLogger().Debugf("statistics refresh error: database not found %s", dbName) - return errors.New("database not found") - } - for _, branch := range branches { - if br, ok, err := ddb.HasBranch(sqlCtx, branch); ok { - sqlCtx.GetLogger().Debugf("starting statistics refresh check for '%s': %s", dbName, time.Now().String()) - // update WORKING session references - sqlDb, err := dSess.Provider().Database(sqlCtx, BranchQualifiedDatabase(dbName, branch)) - if err != nil { - sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error()) - return err - } - - if err := p.checkRefresh(sqlCtx, sqlDb, dbName, br, updateThresh); err != nil { - sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error()) - return err - } - } else if err != nil { - sqlCtx.GetLogger().Debugf("statistics refresh error: branch check error %s", err.Error()) - } else { - sqlCtx.GetLogger().Debugf("statistics refresh error: branch not found %s", br) - } - } - return nil - }() - if err != nil { - return - } - } - } - }) -} - -func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, branch string, updateThresh float64) error { - if !p.TryLockForUpdate(branch, dbName, "") { - return fmt.Errorf("database already being updated: %s/%s", branch, dbName) - } - defer p.UnlockTable(branch, dbName, "") - - // Iterate all dbs, tables, indexes. Each db will collect - // []indexMeta above refresh threshold. We read and process those - // chunks' statistics. We merge updated chunks with precomputed - // chunks. The full set of statistics for each database lands - // 1) in the provider's most recent set of database statistics, and - // 2) on disk in the database's statistics ref'd prolly.Map. - statDb, ok := p.getStatDb(dbName) - if !ok { - return sql.ErrDatabaseNotFound.New(dbName) - } - - var deletedStats []sql.StatQualifier - qualExists := make(map[sql.StatQualifier]bool) - tableExistsAndSkipped := make(map[string]bool) - - tables, err := sqlDb.GetTableNames(ctx) - if err != nil { - return err - } - - for _, table := range tables { - if !p.TryLockForUpdate(branch, dbName, table) { - ctx.GetLogger().Debugf("statistics refresh: table is already being updated: %s/%s.%s", branch, dbName, table) - return fmt.Errorf("table already being updated: %s", table) - } - defer p.UnlockTable(branch, dbName, table) - - sqlTable, dTab, err := GetLatestTable(ctx, table, sqlDb) - if err != nil { - return err - } - - tableHash, err := dTab.GetRowDataHash(ctx) - if err != nil { - return err - } - - if statDb.GetTableHash(branch, table) == tableHash { - // no data changes since last check - tableExistsAndSkipped[table] = true - ctx.GetLogger().Debugf("statistics refresh: table hash unchanged since last check: %s", tableHash) - continue - } else { - ctx.GetLogger().Debugf("statistics refresh: new table hash: %s", tableHash) - } - - schHash, err := dTab.GetSchemaHash(ctx) - if err != nil { - return err - } - - var schemaName string - if schTab, ok := sqlTable.(sql.DatabaseSchemaTable); ok { - schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) - } - - if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, table); oldSchHash.IsEmpty() { - if err := statDb.SetSchemaHash(ctx, branch, table, schHash); err != nil { - return err - } - } else if oldSchHash != schHash { - ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch) - if err := statDb.SetSchemaHash(ctx, branch, table, schHash); err != nil { - return err - } - stats, err := p.GetTableDoltStats(ctx, branch, dbName, schemaName, table) - if err != nil { - return err - } - for _, stat := range stats { - statDb.DeleteStats(ctx, branch, stat.Qualifier()) - } - } else if err != nil { - return err - } - - iat, ok := sqlTable.(sql.IndexAddressableTable) - if !ok { - return fmt.Errorf("table does not support indexes %s", table) - } - - indexes, err := iat.GetIndexes(ctx) - if err != nil { - return err - } - - // collect indexes and ranges to be updated - var idxMetas []indexMeta - for _, index := range indexes { - qual := sql.NewStatQualifier(dbName, schemaName, table, strings.ToLower(index.ID())) - qualExists[qual] = true - curStat, ok := statDb.GetStat(branch, qual) - if !ok { - curStat = NewDoltStats() - curStat.Statistic.Qual = qual - - cols := make([]string, len(index.Expressions())) - tablePrefix := fmt.Sprintf("%s.", table) - for i, c := range index.Expressions() { - cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix) - } - curStat.Statistic.Cols = cols - } - ctx.GetLogger().Debugf("statistics refresh index: %s", qual.String()) - - updateMeta, ok, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns()) - if err != nil { - ctx.GetLogger().Debugf("statistics refresh error: %s", err.Error()) - continue - } - if !ok { - continue - } - curCnt := float64(len(curStat.Active)) - updateCnt := float64(len(updateMeta.newNodes)) - deleteCnt := float64(len(curStat.Active) - len(updateMeta.keepChunks)) - ctx.GetLogger().Debugf("statistics current: %d, new: %d, delete: %d", int(curCnt), int(updateCnt), int(deleteCnt)) - - if curCnt == 0 || (deleteCnt+updateCnt)/curCnt > updateThresh { - if curCnt == 0 && updateCnt == 0 { - continue - } - ctx.GetLogger().Debugf("statistics updating: %s", updateMeta.qual) - // mark index for updating - idxMetas = append(idxMetas, updateMeta) - // update latest hash if we haven't already - statDb.SetTableHash(branch, table, tableHash) - } - } - - // get new buckets for index chunks to update - newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas) - if err != nil { - return err - } - - // merge new chunks with preexisting chunks - for _, updateMeta := range idxMetas { - stat := newTableStats[updateMeta.qual] - if stat != nil { - var err error - if _, ok := statDb.GetStat(branch, updateMeta.qual); !ok { - err = statDb.SetStat(ctx, branch, updateMeta.qual, stat) - } else { - err = statDb.ReplaceChunks(ctx, branch, updateMeta.qual, updateMeta.allAddrs, updateMeta.dropChunks, stat.Hist) - } - if err != nil { - return err - } - p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName)) - } - } - } - - for _, q := range statDb.ListStatQuals(branch) { - // table or index delete leaves hole in stats - // this is separate from threshold check - if !tableExistsAndSkipped[q.Table()] && !qualExists[q] { - // only delete stats we've verified are deleted - deletedStats = append(deletedStats, q) - } - } - - statDb.DeleteStats(ctx, branch, deletedStats...) - - if err := statDb.Flush(ctx, branch); err != nil { - return err - } - - return nil -} diff --git a/go/libraries/doltcore/sqle/statspro/update.go b/go/libraries/doltcore/sqle/statspro/bucket_builder.go similarity index 51% rename from go/libraries/doltcore/sqle/statspro/update.go rename to go/libraries/doltcore/sqle/statspro/bucket_builder.go index 225d79b6bbf..940f7b5716b 100644 --- a/go/libraries/doltcore/sqle/statspro/update.go +++ b/go/libraries/doltcore/sqle/statspro/bucket_builder.go @@ -1,4 +1,4 @@ -// Copyright 2023 Dolthub, Inc. +// Copyright 2023-2025 Dolthub, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,19 +17,11 @@ package statspro import ( "container/heap" "context" - "errors" - "fmt" - "io" "sort" - "strings" - "time" "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/stats" - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" - "github.com/dolthub/dolt/go/store/hash" "github.com/dolthub/dolt/go/store/prolly" "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/val" @@ -40,156 +32,7 @@ const ( mcvCnt = 3 ) -// createNewStatsBuckets builds histograms for a list of index statistic metadata. -// We only read chunk ranges indicated by |indexMeta.updateOrdinals|. If -// the returned buckets are a subset of the index the caller is responsible -// for reconciling the difference. -func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Table, indexes []sql.Index, idxMetas []indexMeta) (map[sql.StatQualifier]*DoltStats, error) { - nameToIdx := make(map[string]sql.Index) - for _, idx := range indexes { - nameToIdx[strings.ToLower(idx.ID())] = idx - } - - ret := make(map[sql.StatQualifier]*DoltStats) - - for _, meta := range idxMetas { - sqlIdx := nameToIdx[strings.ToLower(meta.qual.Index())] - if sqlIdx.IsSpatial() || sqlIdx.IsFullText() || sqlIdx.IsGenerated() || sqlIdx.IsVector() { - continue - } - var idx durable.Index - var err error - if strings.EqualFold(meta.qual.Index(), "PRIMARY") { - idx, err = dTab.GetRowData(ctx) - } else { - idx, err = dTab.GetIndexRowData(ctx, meta.qual.Index()) - } - if err != nil { - return nil, err - } - - prollyMap := durable.ProllyMapFromIndex(idx) - keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc()) - - fds, colSet, err := stats.IndexFds(meta.qual.Table(), sqlTable.Schema(), sqlIdx) - if err != nil { - return nil, err - } - - var types []sql.Type - for _, cet := range nameToIdx[strings.ToLower(meta.qual.Index())].ColumnExpressionTypes() { - types = append(types, cet.Type) - } - - if cnt, err := prollyMap.Count(); err != nil { - return nil, err - } else if cnt == 0 { - // table is empty - ret[meta.qual] = NewDoltStats() - ret[meta.qual].Statistic.Created = time.Now() - ret[meta.qual].Statistic.Cols = meta.cols - ret[meta.qual].Statistic.Typs = types - ret[meta.qual].Statistic.Qual = meta.qual - - ret[meta.qual].Statistic.Fds = fds - ret[meta.qual].Statistic.Colset = colSet - ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols))) - - continue - } - - firstRow, err := firstRowForIndex(ctx, prollyMap, keyBuilder, len(meta.cols)) - if err != nil { - return nil, err - } - - updater := newBucketBuilder(meta.qual, len(meta.cols), prollyMap.KeyDesc()) - ret[meta.qual] = NewDoltStats() - ret[meta.qual].Chunks = meta.allAddrs - ret[meta.qual].Statistic.Created = time.Now() - ret[meta.qual].Statistic.Cols = meta.cols - ret[meta.qual].Statistic.Typs = types - ret[meta.qual].Statistic.Qual = meta.qual - ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols))) - - var start, stop uint64 - // read leaf rows for each bucket - for i, chunk := range meta.newNodes { - // each node is a bucket - updater.newBucket() - - // we read exclusive range [node first key, next node first key) - start, stop = meta.updateOrdinals[i].start, meta.updateOrdinals[i].stop - iter, err := prollyMap.IterOrdinalRange(ctx, start, stop) - if err != nil { - return nil, err - } - for { - // stats key will be a prefix of the index key - keyBytes, _, err := iter.Next(ctx) - if errors.Is(err, io.EOF) { - break - } else if err != nil { - return nil, err - } - // build full key - for i := range keyBuilder.Desc.Types { - keyBuilder.PutRaw(i, keyBytes.GetField(i)) - } - - updater.add(ctx, keyBuilder.BuildPrefixNoRecycle(prollyMap.Pool(), updater.prefixLen)) - keyBuilder.Recycle() - } - - // finalize the aggregation - bucket, err := updater.finalize(ctx, prollyMap.NodeStore()) - if err != nil { - return nil, err - } - bucket.Chunk = chunk.HashOf() - ret[updater.qual].Hist = append(ret[updater.qual].Hist, bucket) - } - - ret[updater.qual].Statistic.DistinctCnt = uint64(updater.globalDistinct) - ret[updater.qual].Statistic.RowCnt = uint64(updater.globalCount) - ret[updater.qual].Statistic.LowerBnd = firstRow - ret[updater.qual].Statistic.Fds = fds - ret[updater.qual].Statistic.Colset = colSet - ret[updater.qual].UpdateActive() - } - return ret, nil -} - -// MergeNewChunks combines a set of old and new chunks to create -// the desired target histogram. Undefined behavior if a |targetHash| -// does not exist in either |oldChunks| or |newChunks|. -func MergeNewChunks(inputHashes []hash.Hash, oldChunks, newChunks []sql.HistogramBucket) ([]sql.HistogramBucket, error) { - hashToPos := make(map[hash.Hash]int, len(inputHashes)) - for i, h := range inputHashes { - hashToPos[h] = i - } - - var cnt int - targetBuckets := make([]sql.HistogramBucket, len(inputHashes)) - for _, c := range oldChunks { - if idx, ok := hashToPos[DoltBucketChunk(c)]; ok { - cnt++ - targetBuckets[idx] = c - } - } - for _, c := range newChunks { - if idx, ok := hashToPos[DoltBucketChunk(c)]; ok && targetBuckets[idx] == nil { - cnt++ - targetBuckets[idx] = c - } - } - if cnt != len(inputHashes) { - return nil, fmt.Errorf("encountered invalid statistic chunks") - } - return targetBuckets, nil -} - -func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.TupleBuilder, prefixLen int) (sql.Row, error) { +func firstRowForIndex(ctx *sql.Context, idxLen int, prollyMap prolly.Map, keyBuilder *val.TupleBuilder) (sql.Row, error) { if cnt, err := prollyMap.Count(); err != nil { return nil, err } else if cnt == 0 { @@ -211,9 +54,9 @@ func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.Tu keyBuilder.PutRaw(i, keyBytes.GetField(i)) } - firstKey := keyBuilder.BuildPrefixNoRecycle(buffPool, prefixLen) - firstRow := make(sql.Row, prefixLen) - for i := 0; i < prefixLen; i++ { + firstKey := keyBuilder.Build(buffPool) + firstRow := make(sql.Row, idxLen) + for i := range firstRow { firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore()) if err != nil { return nil, err @@ -269,7 +112,7 @@ func (u *bucketBuilder) newBucket() { // finalize converts the current aggregation stats into a histogram bucket, // which includes deserializing most common value tuples into sql.Rows. -func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBucket, error) { +func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (*stats.Bucket, error) { // update MCV in case we've ended on a run of many identical keys u.updateMcv() @@ -279,27 +122,25 @@ func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBu // convert the MCV tuples into SQL rows (most efficient to only do this once) mcvRows, err := u.mcvs.Values(ctx, u.tupleDesc, ns, u.prefixLen) if err != nil { - return DoltBucket{}, err + return nil, err } upperBound := make(sql.Row, u.prefixLen) if u.currentKey != nil { for i := 0; i < u.prefixLen; i++ { upperBound[i], err = tree.GetField(ctx, u.tupleDesc, i, u.currentKey, ns) if err != nil { - return DoltBucket{}, err + return nil, err } } } - return DoltBucket{ - Bucket: &stats.Bucket{ - RowCnt: uint64(u.count), - DistinctCnt: uint64(u.distinct), - BoundCnt: uint64(u.currentCnt), - McvVals: mcvRows, - McvsCnt: u.mcvs.Counts(), - BoundVal: upperBound, - NullCnt: uint64(u.nulls), - }, + return &stats.Bucket{ + RowCnt: uint64(u.count), + DistinctCnt: uint64(u.distinct), + BoundCnt: uint64(u.currentCnt), + McvVals: mcvRows, + McvsCnt: u.mcvs.Counts(), + BoundVal: upperBound, + NullCnt: uint64(u.nulls), }, nil } diff --git a/go/libraries/doltcore/sqle/statspro/update_test.go b/go/libraries/doltcore/sqle/statspro/bucket_builder_test.go similarity index 92% rename from go/libraries/doltcore/sqle/statspro/update_test.go rename to go/libraries/doltcore/sqle/statspro/bucket_builder_test.go index b599dfb390b..0e4daaf8500 100644 --- a/go/libraries/doltcore/sqle/statspro/update_test.go +++ b/go/libraries/doltcore/sqle/statspro/bucket_builder_test.go @@ -1,4 +1,4 @@ -// Copyright 2023 Dolthub, Inc. +// Copyright 2023-2025 Dolthub, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -61,27 +61,27 @@ func TestBucketBuilder(t *testing.T) { name string keys []sql.Row keyDesc val.TupleDesc - bucket DoltBucket + bucket *stats.Bucket }{ { name: "ints", keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 15, DistinctCnt: 5, McvVals: []sql.Row{}, McvsCnt: []uint64{}, BoundVal: sql.Row{int64(5)}, BoundCnt: 2, - }}, + }, }, { // technically nulls should be at beginning name: "ints with middle nulls", keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {nil}, {nil}, {nil}, {3}, {4}, {4}, {4}, {5}, {5}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 16, DistinctCnt: 6, NullCnt: 3, @@ -89,13 +89,13 @@ func TestBucketBuilder(t *testing.T) { McvsCnt: []uint64{}, BoundVal: sql.Row{int64(5)}, BoundCnt: 2, - }}, + }, }, { name: "ints with beginning nulls", keys: []sql.Row{{nil}, {nil}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 15, DistinctCnt: 6, NullCnt: 2, @@ -103,86 +103,86 @@ func TestBucketBuilder(t *testing.T) { McvsCnt: []uint64{}, BoundVal: sql.Row{int64(5)}, BoundCnt: 2, - }}, + }, }, { name: "more ints", keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}, {5}, {5}, {6}, {6}, {6}, {6}, {7}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 22, DistinctCnt: 7, BoundCnt: 1, McvVals: []sql.Row{}, McvsCnt: []uint64{}, BoundVal: sql.Row{int64(7)}, - }}, + }, }, { name: "2-ints", keys: []sql.Row{{1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 3}, {2, 3}, {3, 1}, {3, 2}, {3, 3}, {4, 1}, {4, 1}, {4, 1}, {5, 1}, {5, 2}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 15, DistinctCnt: 11, McvVals: []sql.Row{{int64(4), int64(1)}}, McvsCnt: []uint64{3}, BoundVal: sql.Row{int64(5), int64(2)}, BoundCnt: 1, - }}, + }, }, { name: "2-ints with nulls", keys: []sql.Row{{nil, 1}, {1, nil}, {1, 2}, {2, nil}, {2, 2}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}, val.Type{Enc: val.Int64Enc, Nullable: true}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 5, DistinctCnt: 5, NullCnt: 3, McvVals: []sql.Row{}, McvsCnt: []uint64{}, BoundVal: sql.Row{int64(2), int64(2)}, - BoundCnt: 1}, + BoundCnt: 1, }, }, { name: "varchars", keys: []sql.Row{{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"e"}, {"f"}, {"g"}, {"g"}, {"g"}, {"h"}, {"h"}, {"h"}, {"i"}, {"i"}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 15, DistinctCnt: 9, McvVals: []sql.Row{}, McvsCnt: []uint64{}, BoundVal: sql.Row{"i"}, BoundCnt: 2, - }}, + }, }, { name: "varchar-ints", keys: []sql.Row{{"a", 1}, {"b", 1}, {"c", 1}, {"d", 1}, {"e", 1}, {"e", 2}, {"f", 1}, {"g", 1}, {"g", 2}, {"g", 2}, {"h", 1}, {"h", 1}, {"h", 2}, {"i", 1}, {"i", 1}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 15, DistinctCnt: 12, McvVals: []sql.Row{}, McvsCnt: []uint64{}, BoundVal: sql.Row{"i", int64(1)}, BoundCnt: 2, - }}, + }, }, { name: "mcvs", keys: []sql.Row{{1}, {2}, {3}, {4}, {5}, {6}, {7}, {7}, {7}, {7}, {8}, {9}, {10}, {10}, {10}, {11}, {12}, {13}, {14}, {15}, {20}, {21}, {22}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 23, DistinctCnt: 18, McvVals: []sql.Row{{int64(10)}, {int64(7)}}, McvsCnt: []uint64{3, 4}, BoundVal: sql.Row{int64(22)}, BoundCnt: 1, - }}, + }, }, } diff --git a/go/libraries/doltcore/sqle/statspro/configure.go b/go/libraries/doltcore/sqle/statspro/configure.go deleted file mode 100644 index 02f15c38e02..00000000000 --- a/go/libraries/doltcore/sqle/statspro/configure.go +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statspro - -import ( - "context" - "fmt" - "strings" - "time" - - "github.com/dolthub/go-mysql-server/sql" - types2 "github.com/dolthub/go-mysql-server/sql/types" - - "github.com/dolthub/dolt/go/libraries/doltcore/env" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/libraries/utils/filesys" -) - -var helpMsg = "call dolt_stats_purge() to reset statistics" - -func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Context) (*sql.Context, error), bThreads *sql.BackgroundThreads, dbs []dsess.SqlDatabase) error { - p.SetStarter(NewStatsInitDatabaseHook(p, ctxFactory, bThreads)) - - if _, disabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly); disabled == int8(1) { - return nil - } - - loadCtx, err := ctxFactory(ctx) - if err != nil { - return err - } - defer sql.SessionEnd(loadCtx.Session) - sql.SessionCommandBegin(loadCtx.Session) - defer sql.SessionCommandEnd(loadCtx.Session) - - branches := p.getStatsBranches(loadCtx) - - var autoEnabled bool - var startupEnabled bool - var intervalSec time.Duration - var thresholdf64 float64 - if _, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshEnabled); enabled == int8(1) { - autoEnabled = true - _, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold) - _, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval) - interval64, _, _ := types2.Int64.Convert(interval) - intervalSec = time.Second * time.Duration(interval64.(int64)) - thresholdf64 = threshold.(float64) - - p.pro.InitDatabaseHooks = append(p.pro.InitDatabaseHooks, NewStatsInitDatabaseHook(p, ctxFactory, bThreads)) - p.pro.DropDatabaseHooks = append([]sqle.DropDatabaseHook{NewStatsDropDatabaseHook(p)}, p.pro.DropDatabaseHooks...) - } else if _, startupStats, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBootstrapEnabled); startupStats == int8(1) { - startupEnabled = true - } - - eg, ctx := loadCtx.NewErrgroup() - for _, db := range dbs { - // copy closure variables - db := db - eg.Go(func() (err error) { - defer func() { - if r := recover(); r != nil { - if str, ok := r.(fmt.Stringer); ok { - err = fmt.Errorf("%w: %s", ErrFailedToLoad, str.String()) - } else { - err = fmt.Errorf("%w: %v", ErrFailedToLoad, r) - } - return - } - }() - - fs, err := p.pro.FileSystemForDatabase(db.Name()) - if err != nil { - return err - } - - if p.Load(loadCtx, fs, db, branches); err != nil { - return err - } - if autoEnabled { - return p.InitAutoRefreshWithParams(ctxFactory, db.Name(), bThreads, intervalSec, thresholdf64, branches) - } else if startupEnabled { - if err := p.BootstrapDatabaseStats(loadCtx, db.Name()); err != nil { - return err - } - } - return nil - }) - } - return eg.Wait() -} - -// getStatsBranches returns the set of branches whose statistics are tracked. -// The order of precedence is (1) global variable, (2) session current branch, -// (3) engine default branch. -func (p *Provider) getStatsBranches(ctx *sql.Context) []string { - dSess := dsess.DSessFromSess(ctx.Session) - var branches []string - if _, bs, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBranches); bs == "" { - defaultBranch, _ := dSess.GetBranch() - if defaultBranch != "" { - branches = append(branches, defaultBranch) - } - } else { - for _, branch := range strings.Split(bs.(string), ",") { - branches = append(branches, strings.TrimSpace(branch)) - } - } - - if branches == nil { - branches = append(branches, p.pro.DefaultBranch()) - } - return branches -} - -func (p *Provider) LoadStats(ctx *sql.Context, db, branch string) error { - if statDb, ok := p.getStatDb(db); ok { - return statDb.LoadBranchStats(ctx, branch) - } - return nil -} - -// Load scans the statistics tables, populating the |stats| attribute. -// Statistics are not available for reading until we've finished loading. -func (p *Provider) Load(ctx *sql.Context, fs filesys.Filesys, db dsess.SqlDatabase, branches []string) { - // |statPath| is either file://./stat or mem://stat - statsDb, err := p.sf.Init(ctx, db, p.pro, fs, env.GetCurrentUserHomeDir) - if err != nil { - ctx.GetLogger().Errorf("initialize stats failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg) - return - } - - for _, branch := range branches { - if err = statsDb.LoadBranchStats(ctx, branch); err != nil { - // if branch name is invalid, continue loading rest - // TODO: differentiate bad branch name from other errors - ctx.GetLogger().Errorf("load stats init failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg) - continue - } - if err := statsDb.Flush(ctx, branch); err != nil { - ctx.GetLogger().Errorf("load stats flush failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg) - continue - } - } - - p.setStatDb(strings.ToLower(db.Name()), statsDb) - return -} diff --git a/go/libraries/doltcore/sqle/statspro/controller.go b/go/libraries/doltcore/sqle/statspro/controller.go new file mode 100644 index 00000000000..c70a38c8e18 --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/controller.go @@ -0,0 +1,630 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "log" + "path" + "path/filepath" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/stats" + "github.com/sirupsen/logrus" + + "github.com/dolthub/dolt/go/cmd/dolt/doltversion" + "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" + "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" + "github.com/dolthub/dolt/go/libraries/doltcore/env" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dprocedures" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro/jobqueue" + "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" + "github.com/dolthub/dolt/go/libraries/utils/earl" + "github.com/dolthub/dolt/go/libraries/utils/filesys" + "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/types" + "github.com/dolthub/dolt/go/store/val" +) + +var _ sql.StatsProvider = (*StatsController)(nil) + +type ctxFactory func(ctx context.Context) (*sql.Context, error) + +type tableIndexesKey struct { + db string + branch string + table string + schema string +} + +func (k tableIndexesKey) String() string { + if k.table != "" { + return k.schema + "/" + k.db + "/" + k.branch + "/" + k.table + } + return k.db + "/" + k.branch + "/" + k.table +} + +type StatsController struct { + logger *logrus.Logger + pro *sqle.DoltDatabaseProvider + bgThreads *sql.BackgroundThreads + statsBackingDb filesys.Filesys + hdpEnv *env.DoltEnv + + dbFs map[string]filesys.Filesys + + // ctxGen lets us fetch the most recent working root + ctxGen ctxFactory + + sq *jobqueue.SerialQueue + + activeCtxCancel context.CancelFunc + listeners []listener + + JobInterval time.Duration + gcInterval time.Duration + memOnly bool + enableGc bool + doGc bool + Debug bool + closed chan struct{} + + // kv is a content-addressed cache of histogram objects: + // buckets, first bounds, and schema-specific statistic + // templates. + kv StatsKv + // Stats tracks table statistics accessible to sessions. + Stats *rootStats + // mu protects all shared object access + mu sync.Mutex + // genCnt is used to atomically swap Stats, same behavior + // as last-writer wins + genCnt atomic.Uint64 + gcCnt int +} + +type rootStats struct { + hashes map[tableIndexesKey]hash.Hash + stats map[tableIndexesKey][]*stats.Statistic + DbCnt int `json:"dbCnt"` + BucketWrites int `json:"bucketWrites"` + TablesProcessed int `json:"tablesProcessed"` + TablesSkipped int `json:"tablesSkipped"` +} + +func newRootStats() *rootStats { + return &rootStats{ + hashes: make(map[tableIndexesKey]hash.Hash), + stats: make(map[tableIndexesKey][]*stats.Statistic), + } +} + +func (rs *rootStats) String() string { + str, _ := json.Marshal(rs) + return string(str) +} + +func NewStatsController(logger *logrus.Logger, dEnv *env.DoltEnv) *StatsController { + sq := jobqueue.NewSerialQueue().WithErrorCb(func(err error) { + logger.Error(err) + }) + + return &StatsController{ + mu: sync.Mutex{}, + logger: logger, + JobInterval: 500 * time.Millisecond, + gcInterval: 24 * time.Hour, + sq: sq, + Stats: newRootStats(), + dbFs: make(map[string]filesys.Filesys), + closed: make(chan struct{}), + kv: NewMemStats(), + hdpEnv: dEnv, + genCnt: atomic.Uint64{}, + } +} + +func (sc *StatsController) SetBackgroundThreads(bgThreads *sql.BackgroundThreads) { + sc.bgThreads = bgThreads +} + +func (sc *StatsController) SetMemOnly(v bool) { + sc.mu.Lock() + defer sc.mu.Unlock() + sc.memOnly = v +} + +func (sc *StatsController) SetEnableGc(v bool) { + sc.mu.Lock() + defer sc.mu.Unlock() + sc.enableGc = v +} + +func (sc *StatsController) setDoGc(force bool) { + sc.mu.Lock() + defer sc.mu.Unlock() + if sc.enableGc || force { + sc.doGc = true + } +} + +func (sc *StatsController) gcIsSet() bool { + sc.mu.Lock() + defer sc.mu.Unlock() + return sc.doGc +} + +// SetTimers can only be called after Init +func (sc *StatsController) SetTimers(job, gc int64) { + sc.mu.Lock() + defer sc.mu.Unlock() + sc.sq.NewRateLimit(time.Duration(max(1, job))) + sc.gcInterval = time.Duration(gc) +} + +func (sc *StatsController) AddFs(ctx *sql.Context, db dsess.SqlDatabase, fs filesys.Filesys, rotateOk bool) error { + sc.mu.Lock() + defer sc.mu.Unlock() + + firstDb := len(sc.dbFs) == 0 + sc.dbFs[db.AliasedName()] = fs + if rotateOk && firstDb { + return sc.lockedRotateStorage(ctx) + } + return nil +} + +func (sc *StatsController) Info(ctx context.Context) (dprocedures.StatsInfo, error) { + sc.mu.Lock() + defer sc.mu.Unlock() + + // don't use protected access / deadlock + cachedBucketCnt := sc.kv.Len() + storageCnt, err := sc.kv.Flush(ctx) + if err != nil { + return dprocedures.StatsInfo{}, err + } + + var cachedBoundCnt int + var cachedTemplateCnt int + var backing string + switch kv := sc.kv.(type) { + case *memStats: + cachedBoundCnt = len(kv.bounds) + cachedTemplateCnt = len(kv.templates) + backing = "memory" + case *prollyStats: + cachedBoundCnt = len(kv.mem.bounds) + cachedTemplateCnt = len(kv.mem.templates) + backing, _ = sc.statsBackingDb.Abs("") + } + return dprocedures.StatsInfo{ + DbCnt: sc.Stats.DbCnt, + Active: sc.activeCtxCancel != nil, + CachedBucketCnt: cachedBucketCnt, + StorageBucketCnt: storageCnt, + CachedBoundCnt: cachedBoundCnt, + CachedTemplateCnt: cachedTemplateCnt, + StatCnt: len(sc.Stats.stats), + GenCnt: int(sc.genCnt.Load()), + GcCnt: sc.gcCnt, + Backing: filepath.Base(backing), + }, nil +} + +func (sc *StatsController) descError(d string, err error) { + if errors.Is(err, context.Canceled) { + return + } + if sc.Debug { + log.Println("stats error: ", err.Error()) + } + b := strings.Builder{} + b.WriteString("stats error;") + if d != "" { + b.WriteString("; " + d) + } + if err != nil { + b.WriteString("; " + err.Error()) + } + sc.logger.Debug(b.String()) +} + +func (sc *StatsController) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) { + key, err := sc.statsKey(ctx, db, table.Name()) + if err != nil { + return nil, err + } + sc.mu.Lock() + defer sc.mu.Unlock() + if sc.Stats == nil { + return nil, nil + } + st := sc.Stats.stats[key] + var ret []sql.Statistic + for _, s := range st { + ret = append(ret, s) + } + return ret, nil +} + +func (sc *StatsController) AnalyzeTable(ctx *sql.Context, table sql.Table, dbName string) (err error) { + dSess := dsess.DSessFromSess(ctx.Session) + + var branch string + if strings.Contains(dbName, "/") { + parts := strings.Split(dbName, "/") + if len(parts) == 2 { + dbName = parts[0] + branch = parts[1] + } + } + if branch == "" { + var err error + branch, err = dSess.GetBranch() + if err != nil { + return err + } + + if branch == "" { + branch = env.DefaultInitBranch + } + } + + db, err := sc.pro.Database(ctx, dbName) + sqlDb, err := sqle.RevisionDbForBranch(ctx, db.(dsess.SqlDatabase), branch, branch+"/"+dbName) + if err != nil { + return err + } + + newStats := newRootStats() + err = sc.updateTable(ctx, newStats, table.Name(), sqlDb, nil) + if err != nil { + return err + } + + sc.mu.Lock() + for k, v := range newStats.stats { + sc.Stats.stats[k] = v + sc.Stats.hashes[k] = newStats.hashes[k] + } + sc.mu.Unlock() + + return err +} + +func (sc *StatsController) SetStats(ctx *sql.Context, s sql.Statistic) error { + sc.mu.Lock() + defer sc.mu.Unlock() + ss, ok := s.(*stats.Statistic) + if !ok { + return fmt.Errorf("expected *stats.Statistics, found %T", s) + } + key, err := sc.statsKey(ctx, ss.Qualifier().Db(), ss.Qualifier().Table()) + if err != nil { + return err + } + + // not efficient, but this is only used for testing + var newStats []*stats.Statistic + for _, ss := range sc.Stats.stats[key] { + if !strings.EqualFold(ss.Qualifier().Index(), s.Qualifier().Index()) { + newStats = append(newStats, ss) + } + } + newStats = append(newStats, ss) + sc.Stats.stats[key] = newStats + return nil +} + +func (sc *StatsController) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) (sql.Statistic, bool) { + sc.mu.Lock() + defer sc.mu.Unlock() + key, err := sc.statsKey(ctx, qual.Database, qual.Table()) + if err != nil { + return nil, false + } + for _, s := range sc.Stats.stats[key] { + if strings.EqualFold(s.Qualifier().Index(), qual.Index()) { + return s, true + } + } + return nil, false +} + +func (sc *StatsController) GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]*stats.Statistic, error) { + key := tableIndexesKey{ + db: strings.ToLower(db), + branch: strings.ToLower(branch), + table: strings.ToLower(table), + schema: strings.ToLower(schema), + } + sc.mu.Lock() + defer sc.mu.Unlock() + if sc.Stats == nil { + return nil, nil + } + return sc.Stats.stats[key], nil +} + +func (sc *StatsController) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error { + key, err := sc.statsKey(ctx, qual.Database, qual.Table()) + if err != nil { + return err + } + sc.mu.Lock() + defer sc.mu.Unlock() + delete(sc.Stats.stats, key) + return nil +} + +func (sc *StatsController) DropDbStats(ctx *sql.Context, dbName string, flush bool) error { + sc.mu.Lock() + defer sc.mu.Unlock() + + dbFs := sc.dbFs[dbName] + delete(sc.dbFs, dbName) + if sc.statsBackingDb == dbFs { + // don't wait to see if the thread context is invalidated + func() { + sc.mu.Unlock() + sc.Restart() + defer sc.mu.Lock() + }() + if err := sc.lockedRotateStorage(ctx); err != nil { + return err + } + } + + var deleteKeys []tableIndexesKey + for k, _ := range sc.Stats.stats { + if strings.EqualFold(dbName, k.db) { + deleteKeys = append(deleteKeys, k) + } + } + for _, k := range deleteKeys { + delete(sc.Stats.stats, k) + } + return nil +} + +func (sc *StatsController) statsKey(ctx *sql.Context, dbName, table string) (tableIndexesKey, error) { + dSess := dsess.DSessFromSess(ctx.Session) + branch, err := dSess.GetBranch() + if err != nil { + return tableIndexesKey{}, err + } + key := tableIndexesKey{ + db: strings.ToLower(dbName), + branch: strings.ToLower(branch), + table: strings.ToLower(table), + } + return key, nil +} + +func (sc *StatsController) RowCount(ctx *sql.Context, dbName string, table sql.Table) (uint64, error) { + key, err := sc.statsKey(ctx, dbName, table.Name()) + if err != nil { + return 0, err + } + sc.mu.Lock() + defer sc.mu.Unlock() + for _, s := range sc.Stats.stats[key] { + if strings.EqualFold(s.Qualifier().Index(), "PRIMARY") { + return s.RowCnt, nil + } + } + return 0, nil +} + +func (sc *StatsController) DataLength(ctx *sql.Context, dbName string, table sql.Table) (uint64, error) { + key, err := sc.statsKey(ctx, dbName, table.Name()) + if err != nil { + return 0, err + } + sc.mu.Lock() + defer sc.mu.Unlock() + for _, s := range sc.Stats.stats[key] { + if strings.EqualFold(s.Qualifier().Index(), "PRIMARY") { + return s.RowCnt, nil + } + } + return 0, nil +} + +func (sc *StatsController) Purge(ctx *sql.Context) error { + genStart := sc.genCnt.Load() + newKv := NewMemStats() + newKv.gcGen = genStart + newStats := newRootStats() + if ok, err := sc.trySwapStats(ctx, genStart, newStats, newKv); !ok { + return fmt.Errorf("failed to purge stats") + } else if err != nil { + return err + } + return nil +} + +func (sc *StatsController) rotateStorage(ctx context.Context) error { + sc.mu.Lock() + defer sc.mu.Unlock() + return sc.lockedRotateStorage(ctx) +} + +func (sc *StatsController) lockedRotateStorage(ctx context.Context) error { + if sc.memOnly { + return nil + } + if sc.statsBackingDb != nil { + if err := sc.rm(sc.statsBackingDb); err != nil { + return err + } + } + + var mem *memStats + switch kv := sc.kv.(type) { + case *prollyStats: + mem = kv.mem + case *memStats: + mem = kv + default: + mem = NewMemStats() + } + + if len(sc.dbFs) == 0 { + sc.kv = mem + sc.statsBackingDb = nil + return nil + } + + var newStorageTarget filesys.Filesys + for _, dbFs := range sc.dbFs { + newStorageTarget = dbFs + if newStorageTarget == sc.statsBackingDb { + // prefer continuity when possible + break + } + } + + if err := sc.rm(newStorageTarget); err != nil { + return err + } + + sqlCtx, err := sc.ctxGen(ctx) + if err != nil { + return err + } + defer sql.SessionEnd(sqlCtx.Session) + sql.SessionCommandBegin(sqlCtx.Session) + defer sql.SessionCommandEnd(sqlCtx.Session) + + newKv, err := sc.initStorage(sqlCtx, newStorageTarget) + if err != nil { + return err + } + + newKv.mem = mem + sc.kv = newKv + sc.statsBackingDb = newStorageTarget + return nil +} + +func (sc *StatsController) rm(fs filesys.Filesys) error { + statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir) + if err != nil { + return err + } + + if ok, _ := statsFs.Exists(""); ok { + if err := statsFs.Delete("", true); err != nil { + return err + } + } + + dropDbLoc, err := statsFs.Abs("") + if err != nil { + return err + } + + //log.Println("rm", dropDbLoc) + + if err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/noms")); err != nil { + return err + } + return nil +} + +func (sc *StatsController) initStorage(ctx context.Context, fs filesys.Filesys) (*prollyStats, error) { + if sc.hdpEnv == nil { + return nil, fmt.Errorf("cannot initialize *prollKv, missing homeDirProvider") + } + params := make(map[string]interface{}) + params[dbfactory.GRPCDialProviderParam] = env.NewGRPCDialProviderFromDoltEnv(sc.hdpEnv) + + var urlPath string + u, err := earl.Parse(sc.pro.DbFactoryUrl()) + if u.Scheme == dbfactory.MemScheme { + urlPath = path.Join(sc.pro.DbFactoryUrl(), dbfactory.DoltDataDir) + } else if u.Scheme == dbfactory.FileScheme { + urlPath = doltdb.LocalDirDoltDB + } + + statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir) + if err != nil { + return nil, err + } + + var dEnv *env.DoltEnv + exists, isDir := statsFs.Exists("") + if !exists { + err := statsFs.MkDirs("") + if err != nil { + return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error()) + } + + dEnv = env.Load(ctx, sc.hdpEnv.GetUserHomeDir, statsFs, urlPath, "test") + err = dEnv.InitRepo(ctx, types.Format_Default, "stats", "stats@stats.com", env.DefaultInitBranch) + if err != nil { + return nil, err + } + } else if !isDir { + return nil, fmt.Errorf("file exists where the dolt stats directory should be") + } else { + dEnv = env.LoadWithoutDB(ctx, sc.hdpEnv.GetUserHomeDir, statsFs, "", doltversion.Version) + } + + if err := dEnv.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params); err != nil { + return nil, err + } + + deaf := dEnv.DbEaFactory(ctx) + + tmpDir, err := dEnv.TempTableFilesDir() + if err != nil { + return nil, err + } + opts := editor.Options{ + Deaf: deaf, + Tempdir: tmpDir, + } + + statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(ctx), opts) + if err != nil { + return nil, err + } + m, err := dEnv.DbData(ctx).Ddb.GetStatistics(ctx) + if err == nil { + // use preexisting map + kd, vd := m.Descriptors() + return &prollyStats{ + mu: sync.Mutex{}, + destDb: statsDb, + kb: val.NewTupleBuilder(kd), + vb: val.NewTupleBuilder(vd), + m: m.Mutate(), + mem: NewMemStats(), + }, nil + } + return NewProllyStats(ctx, statsDb) +} diff --git a/go/libraries/doltcore/sqle/statspro/doc.go b/go/libraries/doltcore/sqle/statspro/doc.go new file mode 100644 index 00000000000..54e4cc82a05 --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/doc.go @@ -0,0 +1,78 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +// Package statspro provides a queue that manages table statistics +// management and access. +// +// At any given time there is one work generating thread, one scheduling +// thread, and one execution thread. +// +// The worker loop fetches the most recent session root, +// reads all of its databases/tables/ indexes, collects statistics +// for those objects, and updates the shared statistics state. Every +// cycle replaces the shared state. +// +// Work is delegated to the scheduler thread, which serializes +// issuer jobs with concurrent async requests, and rate limits sending +// jobs to the execution thread. The execution thread completes +// function callbacks. +// +// GC occurs within an update cycle. Through a cycle GC populates an +// in-memory cache with the complete and exclusive set of values of +// the new shared statistics object. Both are atomically swapped using +// a generation counter (which may or may not be necessary, but is one +// of several guards against surprising concurrent changes). +// +// Concurrent issuer threads are further restrained with a context list +// that at most one thread owns. There are two contexts, one for the +// thread and another for the specific update cycle. Listeners (like wait) +// use the second context to follow update cycles. Concurrent restarts +// cancel and replace the previous owner's contexts with their own. Atomic +// shared state swaps are likewise guarded on the issuer's context +// integrity. +// +// All stats are persisted within a single database in the `.dolt/stats` +// folder separate from user data. If there are multiple databases, +// one is selected by random as the storage target. If during +// initialization multiple databases have stats, one will be chosen +// by random as the target. If a database changes between server +// restarts, the storage stats will be useless but not impair regular +// operations because storage is only ever a best-effort +// content-addressed persistence layer; buckets will be regenerated if +// they are missing. If the database acting as a storage target is +// deleted, we swap the cache and write to a new storage target. +// +// The main data structures: +// - Table statistics map, that returns a list of table index statistics +// for a specific branch, database, and table name. +// - Object caches: +// - Bucket cache: Chunk addressed hash map. All provider histogram +// references point to objects in the bucket cache. Backed by a +// best-effort on-disk prolly.Map to make restarts faster. +// - Template cache: Table-schema/index addressed stats.Statistics object +// for a specific index. +// - Bound cache: Chunk addressed first row for an index histogram. +// +// The stats lifecycle can be controlled with: +// - dolt_stats_stop: clear queue and disable thread +// - dolt_stats_restart: clear queue, refresh queue, start thread +// - dolt_stats_purge: clear queue, refresh queue, clear cache, +// disable thread +// - dolt_stats_once: collect statistics once, ex: in sql-shell +// - dolt_stats_wait: block on a full queue cycle +// - dolt_stats_gc: block waiting for a GC signal +// - dolt_stats_flush: block waiting for a flush signal +// diff --git a/go/libraries/doltcore/sqle/statspro/dolt_stats.go b/go/libraries/doltcore/sqle/statspro/dolt_stats.go deleted file mode 100644 index 4c5d43250c9..00000000000 --- a/go/libraries/doltcore/sqle/statspro/dolt_stats.go +++ /dev/null @@ -1,290 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statspro - -import ( - "context" - "fmt" - "sync" - "time" - - "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/stats" - - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/val" -) - -type DoltStats struct { - Statistic *stats.Statistic - mu *sync.Mutex - // Chunks is a list of addresses for the histogram fanout level - Chunks []hash.Hash - // Active maps a chunk/bucket address to its position in - // the histogram. 1-indexed to differentiate from an empty - // field on disk - Active map[hash.Hash]int - Hist sql.Histogram - Tb *val.TupleBuilder -} - -func (s *DoltStats) Clone(_ context.Context) sql.JSONWrapper { - return s -} - -var _ sql.Statistic = (*DoltStats)(nil) - -func (s *DoltStats) SetChunks(h []hash.Hash) { - s.mu.Lock() - defer s.mu.Unlock() - s.Chunks = h -} - -func (s *DoltStats) WithColSet(set sql.ColSet) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithColSet(set).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) WithFuncDeps(set *sql.FuncDepSet) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithFuncDeps(set).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) WithDistinctCount(u uint64) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithDistinctCount(u).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) WithRowCount(u uint64) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithRowCount(u).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) WithNullCount(u uint64) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithNullCount(u).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) WithAvgSize(u uint64) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithAvgSize(u).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) WithLowerBound(row sql.Row) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithLowerBound(row).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) RowCount() uint64 { - return s.Statistic.RowCount() -} - -func (s *DoltStats) DistinctCount() uint64 { - return s.Statistic.DistinctCount() -} - -func (s *DoltStats) NullCount() uint64 { - return s.Statistic.NullCount() - -} - -func (s *DoltStats) AvgSize() uint64 { - return s.Statistic.AvgSize() - -} - -func (s *DoltStats) CreatedAt() time.Time { - return s.Statistic.CreatedAt() - -} - -func (s *DoltStats) Columns() []string { - return s.Statistic.Columns() -} - -func (s *DoltStats) Types() []sql.Type { - return s.Statistic.Types() -} - -func (s *DoltStats) Qualifier() sql.StatQualifier { - return s.Statistic.Qualifier() -} - -func (s *DoltStats) IndexClass() sql.IndexClass { - return s.Statistic.IndexClass() -} - -func (s *DoltStats) FuncDeps() *sql.FuncDepSet { - return s.Statistic.FuncDeps() -} - -func (s *DoltStats) ColSet() sql.ColSet { - return s.Statistic.ColSet() -} - -func (s *DoltStats) LowerBound() sql.Row { - return s.Statistic.LowerBound() -} - -func NewDoltStats() *DoltStats { - return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int), Statistic: &stats.Statistic{}} -} - -func (s *DoltStats) ToInterface() (interface{}, error) { - statVal, err := s.Statistic.ToInterface() - if err != nil { - return nil, err - } - ret := statVal.(map[string]interface{}) - - var hist sql.Histogram - for _, b := range s.Hist { - hist = append(hist, b) - } - histVal, err := hist.ToInterface() - if err != nil { - return nil, err - } - ret["statistic"].(map[string]interface{})["buckets"] = histVal - return ret, nil -} - -func (s *DoltStats) WithHistogram(h sql.Histogram) (sql.Statistic, error) { - s.mu.Lock() - defer s.mu.Unlock() - ret := *s - ret.Hist = nil - for _, b := range h { - doltB, ok := b.(DoltBucket) - if !ok { - return nil, fmt.Errorf("invalid bucket type: %T, %s", b, h.DebugString()) - } - ret.Hist = append(ret.Hist, doltB) - } - return &ret, nil -} - -func (s *DoltStats) Histogram() sql.Histogram { - s.mu.Lock() - defer s.mu.Unlock() - return s.Hist -} - -func DoltStatsFromSql(stat sql.Statistic) (*DoltStats, error) { - hist, err := DoltHistFromSql(stat.Histogram(), stat.Types()) - if err != nil { - return nil, err - } - ret := &DoltStats{ - mu: &sync.Mutex{}, - Hist: hist, - Statistic: stats.NewStatistic(stat.RowCount(), stat.DistinctCount(), stat.NullCount(), stat.AvgSize(), stat.CreatedAt(), stat.Qualifier(), stat.Columns(), stat.Types(), nil, stat.IndexClass(), stat.LowerBound()), - Active: make(map[hash.Hash]int), - } - ret.Statistic.Fds = stat.FuncDeps() - ret.Statistic.Colset = stat.ColSet() - return ret, nil -} - -func (s *DoltStats) UpdateActive() { - s.mu.Lock() - defer s.mu.Unlock() - newActive := make(map[hash.Hash]int) - for i, hash := range s.Chunks { - newActive[hash] = i - } - s.Active = newActive -} - -type DoltHistogram []DoltBucket - -type DoltBucket struct { - Bucket *stats.Bucket - Chunk hash.Hash - Created time.Time -} - -func (d DoltBucket) RowCount() uint64 { - return d.Bucket.RowCount() -} - -func (d DoltBucket) DistinctCount() uint64 { - return d.Bucket.DistinctCount() -} - -func (d DoltBucket) NullCount() uint64 { - return d.Bucket.NullCount() -} - -func (d DoltBucket) BoundCount() uint64 { - return d.Bucket.BoundCount() -} - -func (d DoltBucket) UpperBound() sql.Row { - return d.Bucket.UpperBound() -} - -func (d DoltBucket) McvCounts() []uint64 { - return d.Bucket.McvCounts() -} - -func (d DoltBucket) Mcvs() []sql.Row { - return d.Bucket.Mcvs() -} - -func DoltBucketChunk(b sql.HistogramBucket) hash.Hash { - return b.(DoltBucket).Chunk -} - -func DoltBucketCreated(b sql.HistogramBucket) time.Time { - return b.(DoltBucket).Created -} - -var _ sql.HistogramBucket = (*DoltBucket)(nil) - -func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (sql.Histogram, error) { - ret := make(sql.Histogram, len(hist)) - var err error - for i, b := range hist { - upperBound := make(sql.Row, len(b.UpperBound())) - for i, v := range b.UpperBound() { - upperBound[i], _, err = types[i].Convert(v) - if err != nil { - return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String()) - } - } - mcvs := make([]sql.Row, len(b.Mcvs())) - for i, mcv := range b.Mcvs() { - for _, v := range mcv { - conv, _, err := types[i].Convert(v) - if err != nil { - return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String()) - } - mcvs[i] = append(mcvs[i], conv) - } - } - ret[i] = DoltBucket{ - Bucket: stats.NewHistogramBucket(b.RowCount(), b.DistinctCount(), b.NullCount(), b.BoundCount(), upperBound, b.McvCounts(), mcvs).(*stats.Bucket), - } - } - return ret, nil -} diff --git a/go/libraries/doltcore/sqle/statspro/initdbhook.go b/go/libraries/doltcore/sqle/statspro/initdbhook.go index 8e11408ea59..9996a077f81 100644 --- a/go/libraries/doltcore/sqle/statspro/initdbhook.go +++ b/go/libraries/doltcore/sqle/statspro/initdbhook.go @@ -1,4 +1,4 @@ -// Copyright 2024 Dolthub, Inc. +// Copyright 2025 Dolthub, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,10 +15,6 @@ package statspro import ( - "context" - "fmt" - "strings" - "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/dolt/go/libraries/doltcore/env" @@ -26,67 +22,33 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" ) -func NewStatsInitDatabaseHook( - statsProv *Provider, - ctxFactory func(ctx context.Context) (*sql.Context, error), - bThreads *sql.BackgroundThreads, -) sqle.InitDatabaseHook { +func NewInitDatabaseHook(sc *StatsController) sqle.InitDatabaseHook { return func( ctx *sql.Context, - pro *sqle.DoltDatabaseProvider, + _ *sqle.DoltDatabaseProvider, name string, denv *env.DoltEnv, db dsess.SqlDatabase, ) error { - dbName := strings.ToLower(db.Name()) - if statsDb, ok := statsProv.getStatDb(dbName); !ok { - statsDb, err := statsProv.sf.Init(ctx, db, statsProv.pro, denv.FS, env.GetCurrentUserHomeDir) - if err != nil { - ctx.GetLogger().Debugf("statistics load error: %s", err.Error()) - return nil - } - statsProv.setStatDb(dbName, statsDb) - } else { - dSess := dsess.DSessFromSess(ctx.Session) - for _, br := range statsDb.Branches() { - branchQDbName := BranchQualifiedDatabase(dbName, br) - sqlDb, err := dSess.Provider().Database(ctx, branchQDbName) - if err != nil { - ctx.GetLogger().Logger.Errorf("branch not found: %s", br) - continue - } - branchQDb, ok := sqlDb.(dsess.SqlDatabase) - if !ok { - return fmt.Errorf("branch/database not found: %s", branchQDbName) - } - - if ok, err := statsDb.SchemaChange(ctx, br, branchQDb); err != nil { - return err - } else if ok { - if err := statsDb.DeleteBranchStats(ctx, br, true); err != nil { - return err - } - } - } - ctx.GetLogger().Debugf("statistics init error: preexisting stats db: %s", dbName) + if sc.hdpEnv == nil { + sc.mu.Lock() + sc.hdpEnv = denv + sc.mu.Unlock() + } + sqlDb, ok := db.(sqle.Database) + if !ok { + return nil } - ctx.GetLogger().Debugf("statistics refresh: initialize %s", name) - return statsProv.InitAutoRefresh(ctxFactory, name, bThreads) + + // call should only fail if backpressure in secondary queue + return sc.AddFs(ctx, sqlDb, denv.FS, true) } } -func NewStatsDropDatabaseHook(statsProv *Provider) sqle.DropDatabaseHook { +func NewDropDatabaseHook(sc *StatsController) sqle.DropDatabaseHook { return func(ctx *sql.Context, name string) { - statsProv.CancelRefreshThread(name) - if err := statsProv.DropDbStats(ctx, name, false); err != nil { + if err := sc.DropDbStats(ctx, name, false); err != nil { ctx.GetLogger().Debugf("failed to close stats database: %s", err) } - - if db, ok := statsProv.getStatDb(name); ok { - if err := db.Close(); err != nil { - ctx.GetLogger().Debugf("failed to close stats database: %s", err) - } - delete(statsProv.statDbs, name) - } } } diff --git a/go/libraries/doltcore/sqle/statspro/interface.go b/go/libraries/doltcore/sqle/statspro/interface.go deleted file mode 100644 index 5a423466f91..00000000000 --- a/go/libraries/doltcore/sqle/statspro/interface.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statspro - -import ( - "context" - - "github.com/dolthub/go-mysql-server/sql" - - "github.com/dolthub/dolt/go/libraries/doltcore/env" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/libraries/utils/filesys" - "github.com/dolthub/dolt/go/store/hash" -) - -// Database is a backing store for a collection of DoltStats. -// Each stats database tracks a user database, with multiple -// branches potentially each having their own statistics. -type Database interface { - // ListStatQuals returns the list of index statistics for a branch. - ListStatQuals(branch string) []sql.StatQualifier - // LoadBranchStats starts tracking a specific branch's statistics. - LoadBranchStats(ctx *sql.Context, branch string) error - // DeleteBranchStats removes references to in memory index statistics. - // If |flush| is true delete the data from storage. - DeleteBranchStats(ctx *sql.Context, branch string, flush bool) error - // GetStat returns a branch's index statistics. - GetStat(branch string, qual sql.StatQualifier) (*DoltStats, bool) - //SetStat bulk replaces the statistic, deleting any previous version - SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *DoltStats) error - //DeleteStats deletes a list of index statistics. - DeleteStats(ctx *sql.Context, branch string, quals ...sql.StatQualifier) - // ReplaceChunks is an update interface that lets a stats implementation - // decide how to edit stats for a stats refresh. - ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error - // Flush instructs the database to sync any partial state to disk - Flush(ctx context.Context, branch string) error - // Close finalizes any file references. - Close() error - // SetTableHash updates the most recently tracked table stats table hash - SetTableHash(branch, tableName string, h hash.Hash) - // GetTableHash returns the most recently tracked table stats table hash - GetTableHash(branch, tableName string) hash.Hash - // SetSchemaHash updates the most recently stored table stat's schema hash - SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error - // GetSchemaHash returns the schema hash for the latest stored statistics - GetSchemaHash(ctx context.Context, branch, tableName string) (hash.Hash, error) - // Branches returns the set of branches with tracked statistics databases - Branches() []string - // SchemaChange returns false if any table schema in the session - // root is incompatible with the latest schema used to create a stored - // set of statistics. - SchemaChange(ctx *sql.Context, branch string, branchQdb dsess.SqlDatabase) (bool, error) -} - -// StatsFactory instances construct statistic databases. -type StatsFactory interface { - // Init gets a reference to the stats database for a dolt database - // rooted at the given filesystem. It will create the database if - // it does not exist. - Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (Database, error) -} diff --git a/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue.go b/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue.go new file mode 100644 index 00000000000..92da633ff5a --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue.go @@ -0,0 +1,410 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package jobqueue + +import ( + "context" + "errors" + "fmt" + "sync" + "sync/atomic" + "time" + + "github.com/dolthub/dolt/go/libraries/utils/circular" +) + +// A SerialQueue is a job queue which runs one job at a time. Jobs are +// run in the order they are submitted, with the exception that every +// interrupt job is run before any normal priority job. +// +// A SerialQueue can be paused, in which case it will accept new +// submissions, but will not run them until it is started again. +// +// A SerialQueue can be purged, which deletes any pending jobs from +// it. +// +// A SerialQueue can be stopped, in which case it will not accept new +// submissions and no pending work will be run. Stopping a queue does +// not purge it, but it is easy for a caller to stop and purge the +// queue. +// +// A stopped or paused SerialQueue can be started, which will cause it +// to start running submitted jobs again, including any unpurged jobs +// which were pending when it was stopped or paused. +// +// A SerialQueue runs background threads to coordinate its +// behavior. These background threads are launched with a `Context` +// supplied to its |Run| method. If that `Context` ever becomes +// `Done`, the SerialQueue termainally enters a completed state. +// +// In general, jobs running on the queue should not block indefinitely +// and should be very careful about any synchronization. It is safe +// for jobs within the queue to call DoAsync, InterruptAsync, Stop, +// Pause, Purge and Start on the queue itself. It is a deadlock for a +// job within the queue to perform a DoSync or InterruptSync on the +// queue itself, although that deadlock may be resolved if the +// provided |ctx| ends up |Done|. +type SerialQueue struct { + running atomic.Bool + + // If the queue is terminally completed, this will be closed. + // Submissions to the queue scheduler select on this channel + // to return errors if the scheduler is no longer accepting + // work. + completed chan struct{} + + runnerCh chan work + schedCh chan schedReq + errCb func(error) +} + +// |work| represents work to be run on the runner goroutine. +type work struct { + // The function to call. + f func() error + // The channel to close after the work is run. + done chan struct{} + // Update worker rate + newRate time.Duration +} + +type schedState int + +const ( + // When scheduler is running, it is willing to accept new work + // and to give work to the work thread. + schedState_Running schedState = iota + // When scheduler is paused, it is willing to accept new work + // but it does not give work to the work thread. + schedState_Paused + // When scheduler is stopped, it does not accept new work + // and it does not give work to the work thread. + schedState_Stopped +) + +type schedReqType int + +const ( + schedReqType_Enqueue schedReqType = iota + schedReqType_Purge + schedReqType_Start + schedReqType_Pause + schedReqType_Stop +) + +type schedPriority int + +const ( + schedPriority_Normal schedPriority = iota + schedPriority_High +) + +// Incoming message for the scheduler thread. +type schedReq struct { + reqType schedReqType + // Always set, the scheduler's response is + // sent through this channel. The send + // must never block. + resp chan schedResp + // Set when |reqType| is Enqueue + pri schedPriority + // Set when |reqType| is Enqueue + work work +} + +type schedResp struct { + err error +} + +var ErrStoppedQueue = errors.New("stopped queue: cannot submit work to a stopped queue.") +var ErrCompletedQueue = errors.New("completed queue: the queue is no longer running.") + +// Create a new serial queue. All of the methods on the returned +// SerialQueue block indefinitely until its |Run| method is called. +func NewSerialQueue() *SerialQueue { + return &SerialQueue{ + completed: make(chan struct{}), + runnerCh: make(chan work), + schedCh: make(chan schedReq), + } +} +func (s *SerialQueue) WithErrorCb(errCb func(error)) *SerialQueue { + s.errCb = errCb + return s +} + +// Run the serial queue's background threads with this |ctx|. If the +// |ctx| ever becomes |Done|, the queue enters a terminal completed +// state. It is an error to call this function more than once. +func (s *SerialQueue) Run(ctx context.Context) { + if !s.running.CompareAndSwap(false, true) { + panic("Cannot run a SerialQueue more than once.") + } + defer close(s.completed) + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + s.runScheduler(ctx) + }() + go func() { + defer wg.Done() + s.runRunner(ctx) + }() + wg.Wait() +} + +// Start the queue. The queue can be in any state, including already started. +func (s *SerialQueue) Start() error { + return s.makeReq(schedReq{ + reqType: schedReqType_Start, + resp: make(chan schedResp, 1), + }) +} + +// Pause the queue. The queue can be in any state, including already +// paused. Note that pausing the queue does not block on any +// currently running job to complete. A pattern to pause the queue +// with a guarantee that nothing is currently running is: +// +// s.InterruptSync(context.Background(), func() { s.Pause() }) +func (s *SerialQueue) Pause() error { + return s.makeReq(schedReq{ + reqType: schedReqType_Pause, + resp: make(chan schedResp, 1), + }) +} + +// Stop the queue. The queue can be in any state, including already +// stopped. Note that stopping the queue does not block on any +// currently running job to complete. +func (s *SerialQueue) Stop() error { + return s.makeReq(schedReq{ + reqType: schedReqType_Stop, + resp: make(chan schedResp, 1), + }) +} + +// Purge the queue. All pending jobs will be dropped. +func (s *SerialQueue) Purge() error { + return s.makeReq(schedReq{ + reqType: schedReqType_Purge, + resp: make(chan schedResp, 1), + }) +} + +func (s *SerialQueue) NewRateLimit(rate time.Duration) error { + return s.makeReq(schedReq{ + reqType: schedReqType_Enqueue, + pri: schedPriority_High, + work: work{ + f: func() error { return nil }, + done: make(chan struct{}), + newRate: rate, + }, + resp: make(chan schedResp, 1), + }) +} + +// Run a high priority job on the SerialQueue, blocking for its completion. +// If done against a Paused queue, this could block indefinitely. The +// block for completion is gated on the |ctx|. +func (s *SerialQueue) InterruptSync(ctx context.Context, f func() error) error { + w, err := s.submitWork(schedPriority_High, f) + if err != nil { + return err + } + select { + case <-w.done: + return nil + case <-ctx.Done(): + return context.Cause(ctx) + case <-s.completed: + return ErrCompletedQueue + } +} + +// Run a normal priority job on the SerialQueue, blocking for its completion. +// When done against a paused queue, this can block indefinitely. +func (s *SerialQueue) DoSync(ctx context.Context, f func() error) error { + w, err := s.submitWork(schedPriority_Normal, f) + if err != nil { + return err + } + select { + case <-w.done: + return nil + case <-ctx.Done(): + return context.Cause(ctx) + case <-s.completed: + return ErrCompletedQueue + } +} + +// Run a high priority job asynchronously on the queue. Returns once the +// job is accepted. +func (s *SerialQueue) InterruptAsync(f func() error) error { + _, err := s.submitWork(schedPriority_High, f) + if err != nil { + return err + } + return nil +} + +// Run a normal priority job asynchronously on the queue. Returns once the +// job is accepted. +func (s *SerialQueue) DoAsync(f func() error) error { + _, err := s.submitWork(schedPriority_Normal, f) + if err != nil { + return err + } + return nil +} + +// Helper function to submit work. Returns the work submitted, if it +// was successful, and an error otherwise. +func (s *SerialQueue) submitWork(pri schedPriority, f func() error) (work, error) { + w := work{ + f: f, + done: make(chan struct{}), + } + err := s.makeReq(schedReq{ + reqType: schedReqType_Enqueue, + pri: pri, + work: w, + resp: make(chan schedResp, 1), + }) + if err != nil { + return work{}, err + } + return w, nil +} + +func (s *SerialQueue) makeReq(req schedReq) error { + select { + case s.schedCh <- req: + resp := <-req.resp + return resp.err + case <-s.completed: + return ErrCompletedQueue + } +} + +// Read off the input channels and maintain queues of pending work. +// Deliver that work to the runner channel if it is desired. +func (s *SerialQueue) runScheduler(ctx context.Context) { + state := schedState_Running + normalQ := circular.NewBuff[work](16) + highQ := circular.NewBuff[work](16) + for { + var sendWorkCh chan work + var sendWork work + var sentWorkCallback func() + + if state == schedState_Running { + if highQ.Len() > 0 { + sendWorkCh = s.runnerCh + sendWork = highQ.Front() + sentWorkCallback = highQ.Pop + } else if normalQ.Len() > 0 { + sendWorkCh = s.runnerCh + sendWork = normalQ.Front() + sentWorkCallback = normalQ.Pop + } + } + + select { + case msg := <-s.schedCh: + switch msg.reqType { + case schedReqType_Enqueue: + if state == schedState_Stopped { + msg.resp <- schedResp{ + err: ErrStoppedQueue, + } + } else { + if msg.pri == schedPriority_High { + highQ.Push(msg.work) + } else { + normalQ.Push(msg.work) + } + msg.resp <- schedResp{ + err: nil, + } + } + case schedReqType_Purge: + highQ = circular.NewBuff[work](highQ.Cap()) + normalQ = circular.NewBuff[work](normalQ.Cap()) + msg.resp <- schedResp{ + err: nil, + } + case schedReqType_Start: + state = schedState_Running + msg.resp <- schedResp{ + err: nil, + } + case schedReqType_Pause: + state = schedState_Paused + msg.resp <- schedResp{ + err: nil, + } + case schedReqType_Stop: + state = schedState_Stopped + msg.resp <- schedResp{ + err: nil, + } + } + case sendWorkCh <- sendWork: + // Pop from queue the work came from. + sentWorkCallback() + case <-ctx.Done(): + return + } + } +} + +// Read off the runner channel and run the submitted work. +func (s *SerialQueue) runRunner(ctx context.Context) { + ticker := time.NewTicker(1) + for { + select { + case w := <-s.runnerCh: + if w.newRate > 0 { + ticker.Reset(w.newRate) + } + + // do not run jobs more frequently than the ticker rate + select { + case <-ticker.C: + case <-ctx.Done(): + } + + func() { + var err error + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("serialQueue panicked running work: %s", r) + } + if err != nil { + s.errCb(err) + } + }() + err = w.f() + }() + close(w.done) + case <-ctx.Done(): + return + } + } +} diff --git a/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue_test.go b/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue_test.go new file mode 100644 index 00000000000..f318bb3722c --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue_test.go @@ -0,0 +1,361 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package jobqueue + +import ( + "context" + "os" + "runtime" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestSerialQueue(t *testing.T) { + if runtime.GOOS == "windows" && os.Getenv("CI") != "" { + t.Skip("Racy on Windows CI") + } + t.Run("CanceledRunContext", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + queue := NewSerialQueue() + // This should return. + queue.Run(ctx) + // Now all methods should return ErrCompletedQueue. + assert.ErrorIs(t, queue.Start(), ErrCompletedQueue) + assert.ErrorIs(t, queue.Pause(), ErrCompletedQueue) + assert.ErrorIs(t, queue.Stop(), ErrCompletedQueue) + assert.ErrorIs(t, queue.DoSync(context.Background(), func() error { return nil }), ErrCompletedQueue) + assert.ErrorIs(t, queue.DoAsync(func() error { return nil }), ErrCompletedQueue) + assert.ErrorIs(t, queue.InterruptSync(context.Background(), func() error { return nil }), ErrCompletedQueue) + assert.ErrorIs(t, queue.InterruptAsync(func() error { return nil }), ErrCompletedQueue) + }) + t.Run("StartsRunning", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() error { + defer wg.Done() + queue.Run(ctx) + return nil + }() + var ran bool + err := queue.DoSync(context.Background(), func() error { + ran = true + return nil + }) + assert.NoError(t, err) + assert.True(t, ran, "the sync task ran.") + cancel() + wg.Wait() + }) + t.Run("StoppedQueueReturnsError", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() error { + defer wg.Done() + queue.Run(ctx) + return nil + }() + assert.NoError(t, queue.Stop()) + err := queue.DoSync(context.Background(), func() error { return nil }) + assert.ErrorIs(t, err, ErrStoppedQueue) + cancel() + wg.Wait() + }) + t.Run("PausedQueueDoesNotRun", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() error { + defer wg.Done() + queue.Run(ctx) + return nil + }() + assert.NoError(t, queue.Pause()) + var ran bool + for i := 0; i < 16; i++ { + err := queue.DoAsync(func() error { + ran = true + return nil + }) + assert.NoError(t, err) + } + cancel() + wg.Wait() + assert.False(t, ran, "work did not run on the paused queue.") + }) + t.Run("StartingPausedQueueRunsIt", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() error { + defer wg.Done() + queue.Run(ctx) + return nil + }() + assert.NoError(t, queue.Pause()) + var ran bool + for i := 0; i < 16; i++ { + err := queue.DoAsync(func() error { + ran = true + return nil + }) + assert.NoError(t, err) + } + assert.NoError(t, queue.Start()) + err := queue.DoSync(context.Background(), func() error { return nil }) + assert.NoError(t, err) + assert.True(t, ran, "work ran after the paused queue was started.") + cancel() + wg.Wait() + }) + t.Run("InterruptWorkRunsFirst", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() error { + defer wg.Done() + queue.Run(ctx) + return nil + }() + assert.NoError(t, queue.Pause()) + var cnt int + queue.DoAsync(func() error { + assert.Equal(t, cnt, 2) + cnt += 1 + return nil + }) + queue.DoAsync(func() error { + assert.Equal(t, cnt, 3) + cnt += 1 + return nil + }) + queue.InterruptAsync(func() error { + assert.Equal(t, cnt, 0) + cnt += 1 + return nil + }) + queue.InterruptAsync(func() error { + assert.Equal(t, cnt, 1) + cnt += 1 + return nil + }) + assert.NoError(t, queue.Start()) + assert.NoError(t, queue.DoSync(context.Background(), func() error { return nil })) + assert.Equal(t, cnt, 4) + cancel() + wg.Wait() + }) + t.Run("StopFromQueue", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() error { + defer wg.Done() + queue.Run(ctx) + return nil + }() + // block until queue is running + assert.NoError(t, queue.DoSync(ctx, func() error { + return nil + })) + var cnt int + for i := 0; i < 16; i++ { + // Some of these calls may error, since the queue + // will be stopped asynchronously. + queue.DoAsync(func() error { + cnt += 1 + assert.NoError(t, queue.Stop()) + return nil + }) + } + assert.Equal(t, cnt, 1) + cancel() + wg.Wait() + }) + t.Run("PauseFromQueue", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() error { + defer wg.Done() + queue.Run(ctx) + return nil + }() + // block until queue is running + assert.NoError(t, queue.DoSync(ctx, func() error { + return nil + })) + + done := make(chan struct{}) + for i := 0; i < 16; i++ { + err := queue.DoAsync(func() error { + close(done) + assert.NoError(t, queue.Pause()) + return nil + }) + assert.NoError(t, err) + } + <-done + cancel() + wg.Wait() + }) + t.Run("PurgeFromQueue", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + + go func() error { + defer wg.Done() + queue.Run(ctx) + return nil + }() + + assert.NoError(t, queue.Pause()) + var cnt int + didRun := make(chan struct{}) + for i := 0; i < 16; i++ { + err := queue.DoAsync(func() error { + cnt += 1 + assert.NoError(t, queue.Purge()) + close(didRun) + return nil + }) + assert.NoError(t, err) + } + assert.NoError(t, queue.Start()) + <-didRun + assert.NoError(t, queue.DoSync(context.Background(), func() error { return nil })) + assert.Equal(t, cnt, 1) + cancel() + wg.Wait() + }) + t.Run("DoSyncInQueueDeadlockWithContext", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + start := make(chan struct{}) + + var wg sync.WaitGroup + wg.Add(1) + go func() error { + defer wg.Done() + close(start) + queue.Run(ctx) + return nil + }() + <-start + var cnt int + err := queue.DoSync(context.Background(), func() error { + cnt += 1 + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + err := queue.DoSync(ctx, func() error { + cnt += 1 + return nil + }) + assert.ErrorIs(t, err, context.DeadlineExceeded) + return nil + }) + assert.NoError(t, err) + assert.NoError(t, queue.DoSync(context.Background(), func() error { return nil })) + // Both tasks eventually ran... + assert.Equal(t, cnt, 2) + cancel() + wg.Wait() + }) + t.Run("SyncReturnsErrCompletedQueueAfterWorkAccepted", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + start := make(chan struct{}) + var wg sync.WaitGroup + wg.Add(1) + go func() error { + defer wg.Done() + close(start) + queue.Run(ctx) + return nil + }() + <-start + queue.Pause() + var err error + var ran bool + wg.Add(1) + go func() error { + defer wg.Done() + err = queue.InterruptSync(context.Background(), func() error { + ran = true + return nil + }) + return nil + }() + wg.Add(1) + go func() error { + defer wg.Done() + time.Sleep(100 * time.Millisecond) + queue.Stop() + return nil + }() + cancel() + wg.Wait() + assert.ErrorIs(t, err, ErrCompletedQueue) + assert.False(t, ran, "the interrupt task never ran.") + }) + t.Run("RateLimitWorkThroughput", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + queue := NewSerialQueue() + running := make(chan struct{}) + go func() { + close(running) + queue.Run(ctx) + }() + <-running + + // first will run because timeout > job rate + ran := false + subCtx, cancel2 := context.WithTimeout(ctx, 5*time.Millisecond) + defer cancel2() + err := queue.DoSync(subCtx, func() error { + ran = true + return nil + }) + assert.NoError(t, err) + assert.True(t, ran, "the interrupt task never ran.") + + // second timeout < jobrate, will fail + queue.NewRateLimit(10 * time.Millisecond) + ran = false + subCtx, cancel3 := context.WithTimeout(ctx, 5*time.Millisecond) + defer cancel3() + err = queue.DoSync(subCtx, func() error { + ran = true + return nil + }) + assert.ErrorIs(t, err, context.DeadlineExceeded) + assert.False(t, ran, "the interrupt task never ran.") + }) +} diff --git a/go/libraries/doltcore/sqle/statspro/listener.go b/go/libraries/doltcore/sqle/statspro/listener.go new file mode 100644 index 00000000000..d20426ce74e --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/listener.go @@ -0,0 +1,259 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "context" + "fmt" + "time" + + "github.com/dolthub/go-mysql-server/sql" + + "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" +) + +var ErrStatsIssuerPaused = fmt.Errorf("stats issuer is paused") + +type listenerEvent uint16 + +const ( + leUnknown = listenerEvent(iota) + leSwap listenerEvent = 1 << 0 + leStop listenerEvent = 1 << 1 + leGc listenerEvent = 1 << 2 + leFlush listenerEvent = 1 << 3 +) + +func (sc *StatsController) signalListener(s listenerEvent) { + keep := 0 + for i, l := range sc.listeners { + if (l.target|leStop)&s > 0 { + l.c <- s + close(l.c) + } else { + sc.listeners[keep] = sc.listeners[i] + keep++ + } + } + sc.listeners = sc.listeners[:keep] +} + +func (sc *StatsController) newThreadCtx(ctx context.Context) context.Context { + sc.mu.Lock() + defer sc.mu.Unlock() + + newCtx, cancel := context.WithCancel(ctx) + if sc.activeCtxCancel != nil { + sc.activeCtxCancel() + } + sc.signalListener(leStop) + sc.activeCtxCancel = cancel + return newCtx +} + +type listener struct { + target listenerEvent + c chan listenerEvent +} + +func (sc *StatsController) addListener(e listenerEvent) (chan listenerEvent, error) { + sc.mu.Lock() + defer sc.mu.Unlock() + if sc.activeCtxCancel == nil { + return nil, ErrStatsIssuerPaused + } + l := listener{target: e, c: make(chan listenerEvent, 1)} + sc.listeners = append(sc.listeners, l) + return l.c, nil +} + +func (sc *StatsController) Stop() { + // xxx: do not pause |sq|, analyze jobs still need to run + sc.mu.Lock() + defer sc.mu.Unlock() + if sc.activeCtxCancel != nil { + sc.activeCtxCancel() + sc.activeCtxCancel = nil + } + sc.signalListener(leStop) + return +} + +// RefreshFromSysVars reads the environment variables and updates controller +// parameters. If the queue is not started this will hang. +func (sc *StatsController) RefreshFromSysVars() { + _, memOnly, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly) + sc.SetMemOnly(memOnly.(int8) == 1) + + _, gcEnabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsGCEnabled) + sc.SetEnableGc(gcEnabled.(int8) == 1) + + typ, jobI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsJobInterval) + _, gcI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsGCInterval) + + jobInterval, _, _ := typ.GetType().Convert(jobI) + gcInterval, _, _ := typ.GetType().Convert(gcI) + + sc.SetTimers( + jobInterval.(int64)*int64(time.Millisecond), + gcInterval.(int64)*int64(time.Millisecond), + ) +} + +func (sc *StatsController) Restart() error { + select { + case <-sc.closed: + return fmt.Errorf("StatsController is closed") + default: + } + + sc.sq.Start() + sc.RefreshFromSysVars() + + done := make(chan struct{}) + if err := sc.bgThreads.Add("stats_worker", func(ctx context.Context) { + ctx = sc.newThreadCtx(ctx) + close(done) + err := sc.runWorker(ctx) + if err != nil { + sc.logger.Errorf("stats stopped: %s", err.Error()) + } + }); err != nil { + return err + } + // only return after latestCtx updated + <-done + return nil +} + +func (sc *StatsController) RunQueue() { + if err := sc.bgThreads.Add("stats_scheduler", sc.sq.Run); err != nil { + sc.descError("start scheduler", err) + } + // block on queue starting + sc.sq.DoSync(context.Background(), func() error { return nil }) + return +} + +// Init should only be called once +func (sc *StatsController) Init(ctx context.Context, pro *sqle.DoltDatabaseProvider, ctxGen ctxFactory, bthreads *sql.BackgroundThreads, dbs []sql.Database) error { + sc.pro = pro + sc.ctxGen = ctxGen + sc.bgThreads = bthreads + + sc.RunQueue() + sqlCtx, err := sc.ctxGen(ctx) + if err != nil { + return err + } + defer sql.SessionEnd(sqlCtx.Session) + sql.SessionCommandBegin(sqlCtx.Session) + defer sql.SessionCommandEnd(sqlCtx.Session) + + for i, db := range dbs { + if db, ok := db.(sqle.Database); ok { // exclude read replica dbs + fs, err := sc.pro.FileSystemForDatabase(db.AliasedName()) + if err != nil { + return err + } + if err := sc.AddFs(sqlCtx, db, fs, false); err != nil { + return err + } + if i > 0 || sc.memOnly { + continue + } + // attempt to access previously written stats + statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir) + if err != nil { + return err + } + + exists, isDir := statsFs.Exists("") + if exists && isDir { + newKv, err := sc.initStorage(ctx, fs) + if err == nil { + sc.kv = newKv + sc.statsBackingDb = fs + continue + } else { + path, _ := statsFs.Abs("") + sc.descError("failed to reboot stats from: "+path, err) + } + } + + // otherwise wipe and create new stats dir + if err := sc.lockedRotateStorage(ctx); err != nil { + return err + } + } + } + return nil +} + +func (sc *StatsController) waitForSignal(ctx context.Context, signal listenerEvent, cnt int) (err error) { + for cnt > 0 { + var l chan listenerEvent + l, err = sc.addListener(signal) + if err != nil { + return err + } + + select { + case <-ctx.Done(): + return context.Cause(ctx) + case <-l: + cnt-- + } + } + return nil +} + +func (sc *StatsController) WaitForSync(ctx context.Context) (err error) { + // wait for 2 cycles because first completion is usually a stale context + return sc.waitForSignal(ctx, leSwap, 2) +} + +func (sc *StatsController) WaitForFlush(ctx *sql.Context) error { + sc.mu.Lock() + memOnly := sc.memOnly + sc.mu.Unlock() + if memOnly { + return fmt.Errorf("memory only statistics will not flush") + } + return sc.waitForSignal(ctx, leFlush, 1) +} + +func (sc *StatsController) Gc(ctx *sql.Context) error { + sc.setDoGc(true) + return sc.waitForSignal(ctx, leGc, 1) +} + +func (sc *StatsController) Close() { + sc.mu.Lock() + defer sc.mu.Unlock() + if sc.activeCtxCancel != nil { + sc.activeCtxCancel() + sc.activeCtxCancel = nil + sc.sq.InterruptAsync(func() error { + return sc.sq.Stop() + }) + } + sc.signalListener(leStop) + + close(sc.closed) + return +} diff --git a/go/libraries/doltcore/sqle/statspro/listener_test.go b/go/libraries/doltcore/sqle/statspro/listener_test.go new file mode 100644 index 00000000000..792f1c75124 --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/listener_test.go @@ -0,0 +1,250 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "context" + "sync" + "testing" + "time" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/stretchr/testify/require" + "golang.org/x/sync/errgroup" +) + +func TestListening(t *testing.T) { + bthreads := sql.NewBackgroundThreads() + defer bthreads.Shutdown() + t.Run("ClosedDoesNotStart", func(t *testing.T) { + sc := newStatsCoord(bthreads) + sc.Close() + require.Error(t, sc.Restart()) + require.Nil(t, sc.activeCtxCancel) + }) + t.Run("IsStoppable", func(t *testing.T) { + sc := newStatsCoord(bthreads) + eg := errgroup.Group{} + ctx := sc.newThreadCtx(context.Background()) + eg.Go(func() error { + return sc.runWorker(ctx) + }) + + require.NotNil(t, sc.activeCtxCancel) + + l, err := sc.addListener(leSwap) + require.NoError(t, err) + <-l + select { + case <-ctx.Done(): + t.Fatal("expected latest thread ctx to be active") + default: + } + sc.Stop() + <-ctx.Done() + require.ErrorIs(t, eg.Wait(), context.Canceled) + }) + t.Run("StopsAreIdempotent", func(t *testing.T) { + sc := newStatsCoord(bthreads) + eg := errgroup.Group{} + ctx := sc.newThreadCtx(context.Background()) + eg.Go(func() error { + return sc.runWorker(ctx) + }) + + sc.Stop() + sc.Stop() + sc.Stop() + sc.Stop() + <-ctx.Done() + require.ErrorIs(t, eg.Wait(), context.Canceled) + }) + t.Run("IsRestartable", func(t *testing.T) { + sc := newStatsCoord(bthreads) + eg := errgroup.Group{} + ctx1 := sc.newThreadCtx(context.Background()) + eg.Go(func() error { + return sc.runWorker(ctx1) + }) + + ctx2 := sc.newThreadCtx(context.Background()) + eg.Go(func() error { + return sc.runWorker(ctx2) + }) + + ctx3 := sc.newThreadCtx(context.Background()) + eg.Go(func() error { + return sc.runWorker(ctx3) + }) + + <-ctx1.Done() + <-ctx2.Done() + sc.Stop() + <-ctx3.Done() + require.ErrorIs(t, eg.Wait(), context.Canceled) + }) + t.Run("ConcurrentStartStopsAreOk", func(t *testing.T) { + sc := newStatsCoord(bthreads) + wg := sync.WaitGroup{} + wg.Add(2) + go func() { + defer wg.Done() + for range 20 { + require.NoError(t, sc.Restart()) + l, err := sc.addListener(leSwap) + if err != nil { + require.ErrorIs(t, err, ErrStatsIssuerPaused) + continue + } + select { + case <-l: + } + } + }() + go func() { + defer wg.Done() + for range 20 { + sc.Stop() + l, err := sc.addListener(leSwap) + if err != nil { + require.ErrorIs(t, err, ErrStatsIssuerPaused) + continue + } + select { + case <-l: + case <-time.Tick(10 * time.Millisecond): + print() + } + } + }() + wg.Wait() + }) + t.Run("ListenForSwap", func(t *testing.T) { + sc := newStatsCoord(bthreads) + require.NoError(t, sc.Restart()) + l, err := sc.addListener(leSwap) + require.NoError(t, err) + select { + case e := <-l: + require.True(t, (leSwap&e) > 0, "expected success or gc signal") + } + }) + t.Run("ListenForStop", func(t *testing.T) { + sc := newStatsCoord(bthreads) + require.NoError(t, sc.Restart()) + var l chan listenerEvent + err := sc.sq.DoSync(context.Background(), func() error { + // do this in serial queue to make sure we don't race + // with swap + var err error + require.NoError(t, err) + l, err = sc.addListener(leUnknown) + require.NoError(t, err) + sc.Stop() + return nil + }) + require.NoError(t, err) + select { + case e := <-l: + require.Equal(t, e, leStop) + default: + t.Fatal("expected listener to recv stop") + } + }) + t.Run("ListenerFailsIfStopped", func(t *testing.T) { + sc := newStatsCoord(bthreads) + require.NoError(t, sc.Restart()) + sc.Stop() + _, err := sc.addListener(leUnknown) + require.ErrorIs(t, err, ErrStatsIssuerPaused) + }) + t.Run("ListenerFailsIfClosed", func(t *testing.T) { + sc := newStatsCoord(bthreads) + sc.Close() + require.Error(t, sc.Restart()) + _, err := sc.addListener(leUnknown) + require.ErrorIs(t, err, ErrStatsIssuerPaused) + }) + t.Run("WaitBlocksOnStatsCollection", func(t *testing.T) { + sqlCtx, sqlEng, sc := emptySetup(t, bthreads, true, true) + require.NoError(t, executeQuery(sqlCtx, sqlEng, "create table xy (x int primary key, y int)")) + require.NoError(t, sc.Restart()) + done := make(chan struct{}) + wg := sync.WaitGroup{} + wg.Add(2) + err := sc.sq.DoAsync(func() error { + defer wg.Done() + <-done + return nil + }) + require.NoError(t, err) + go func() { + defer wg.Done() + defer close(done) + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + err := sc.waitForSignal(ctx, leSwap, 1) + require.ErrorIs(t, err, context.DeadlineExceeded) + }() + wg.Wait() + }) + t.Run("WaitReturnsIfStoppedBefore", func(t *testing.T) { + sqlCtx, sqlEng, sc := emptySetup(t, bthreads, true, true) + require.NoError(t, executeQuery(sqlCtx, sqlEng, "create table xy (x int primary key, y int)")) + require.NoError(t, sc.Restart()) + done := make(chan struct{}) + wg := sync.WaitGroup{} + wg.Add(2) + err := sc.sq.DoAsync(func() error { + defer wg.Done() + <-done + return nil + }) + require.NoError(t, err) + go func() { + defer wg.Done() + defer close(done) + sc.Stop() + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + err := sc.waitForSignal(ctx, leSwap, 1) + require.ErrorIs(t, err, ErrStatsIssuerPaused) + }() + wg.Wait() + }) + t.Run("WaitHangsUntilCycleCompletes", func(t *testing.T) { + sqlCtx, sqlEng, sc := emptySetup(t, bthreads, true, true) + require.NoError(t, executeQuery(sqlCtx, sqlEng, "create table xy (x int primary key, y int)")) + require.NoError(t, sc.Restart()) + done := make(chan struct{}) + wg := sync.WaitGroup{} + wg.Add(2) + err := sc.sq.DoAsync(func() error { + defer wg.Done() + <-done + return nil + }) + require.NoError(t, err) + go func() { + defer wg.Done() + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + err := sc.waitForSignal(ctx, leSwap, 1) + require.NoError(t, err) + }() + close(done) + wg.Wait() + }) +} diff --git a/go/libraries/doltcore/sqle/statspro/noop_controller.go b/go/libraries/doltcore/sqle/statspro/noop_controller.go new file mode 100644 index 00000000000..3aa36dc4db6 --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/noop_controller.go @@ -0,0 +1,86 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "github.com/dolthub/go-mysql-server/sql" + + "github.com/dolthub/dolt/go/libraries/doltcore/env" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" +) + +type StatsNoop struct{} + +func (s StatsNoop) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) { + return nil, nil +} + +func (s StatsNoop) AnalyzeTable(ctx *sql.Context, table sql.Table, db string) error { + return nil +} + +func (s StatsNoop) SetStats(ctx *sql.Context, stats sql.Statistic) error { + return nil +} + +func (s StatsNoop) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) (sql.Statistic, bool) { + return nil, false +} + +func (s StatsNoop) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error { + return nil +} + +func (s StatsNoop) DropDbStats(ctx *sql.Context, db string, flush bool) error { + return nil +} + +func (s StatsNoop) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) { + return 0, nil +} + +func (s StatsNoop) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) { + return 0, nil +} + +func (s StatsNoop) CancelRefreshThread(string) { + return +} + +func (s StatsNoop) StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error { + return nil +} + +func (s StatsNoop) ThreadStatus(string) string { + return "stats disabled" +} + +func (s StatsNoop) Prune(ctx *sql.Context) error { + return nil +} + +func (s StatsNoop) Purge(ctx *sql.Context) error { + return nil +} + +func (s StatsNoop) WaitForSync(ctx *sql.Context) error { + return nil +} + +func (s StatsNoop) CollectOnce(ctx *sql.Context) (string, error) { + return "", nil +} + +var _ sql.StatsProvider = StatsNoop{} diff --git a/go/libraries/doltcore/sqle/statspro/script_test.go b/go/libraries/doltcore/sqle/statspro/script_test.go new file mode 100644 index 00000000000..eaf72ef4f9b --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/script_test.go @@ -0,0 +1,731 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "encoding/json" + "log" + "strconv" + "testing" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dprocedures" +) + +type scriptTest struct { + name string + setup []string + assertions []assertion +} + +type assertion struct { + query string + res []sql.Row + err string +} + +func TestStatScripts(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + + scripts := []scriptTest{ + { + name: "track updates", + setup: []string{ + "create table xy (x int primary key, y varchar(16), key (y,x))", + "insert into xy values (0,'zero'), (1, 'one')", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}}, + }, + { + query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(9)}}, + }, + { + query: "update xy set y = 2 where x between 100 and 800", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(9)}}, + }, + }, + }, + { + name: "track deletes", + setup: []string{ + "create table xy (x int primary key, y varchar(16), key (y,x))", + "insert into xy values (0,'zero'), (1, 'one')", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}}, + }, + { + query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(9)}}, + }, + { + query: "delete from xy where x > 600", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(5)}}, + }, + }, + }, + { + name: "ddl table", + setup: []string{ + "create table xy (x int primary key, y varchar(16), key (y,x))", + "insert into xy values (0,'0'), (1,'0'), (2,'0')", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}}, + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + { + query: "truncate table xy", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(0)}}, + }, + { + query: "insert into xy values (0,'0'), (1,'0'), (2,'0')", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + { + query: "drop table xy", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(0)}}, + }, + }, + }, + { + name: "ddl index", + setup: []string{ + "create table xy (x int primary key, y varchar(16), key (y,x))", + "insert into xy values (0,'0'), (1,'0'), (2,'0')", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}}, + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + { + query: "alter table xy drop index y", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(1)}}, + }, + { + query: "alter table xy add index yx (y,x)", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + { + query: "select types, upper_bound from dolt_statistics where index_name = 'yx'", + res: []sql.Row{{"varchar(16),int", "0,2"}}, + }, + { + query: "alter table xy modify column y int", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select types, upper_bound from dolt_statistics where index_name = 'yx'", + res: []sql.Row{{"int,int", "0,2"}}, + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + }, + }, + { + name: "mcv counts", + setup: []string{ + "create table xy (x int primary key, y int, key (y,x))", + "alter table xy add index y2 (y)", + "alter table xy add index x2 (x,y)", + "insert into xy values (0,0), (1,0), (2,0), (3,0), (4,0), (5,0), (6,1), (7,1), (8,1), (9,1),(10,3),(11,4),(12,5),(13,6),(14,7),(15,8),(16,9),(17,10),(18,11)", + }, + assertions: []assertion{ + { + query: "select mcv1, mcv2, mcv_counts from dolt_statistics where index_name = 'y2'", + res: []sql.Row{{"1", "0", "4,6"}}, + }, + { + query: "select mcv_counts from dolt_statistics where index_name = 'y'", + res: []sql.Row{{""}}, + }, + { + query: "select mcv_counts from dolt_statistics where index_name = 'x2'", + res: []sql.Row{{""}}, + }, + }, + }, + { + name: "vector index", + setup: []string{ + "create table xy (x int primary key, y json, vector key(y))", + "insert into xy values (0, '0'), (1, '1'), (2, '2'), (3, NULL), (4, NULL)", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "xy", "primary"}}, + }, + { + query: "call dolt_stats_info('--short')", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 1, + Backing: "mydb", + Active: true, + StorageBucketCnt: 1, + CachedBucketCnt: 1, + CachedBoundCnt: 1, + CachedTemplateCnt: 1, + StatCnt: 1, + }}, + }, + }, + }, + }, + { + name: "generated index", + setup: []string{ + "create table t (pk int primary key, c0 int, c1 int as (c0) virtual, index idx(c1))", + "insert into t (pk, c0) values (0,0), (1,1), (2,2), (3,NULL), (4,NULL)", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "t", "idx"}, {"mydb", "t", "primary"}}, + }, + { + query: "call dolt_stats_info('--short')", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 1, + Backing: "mydb", + Active: true, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 1, + }}, + }, + }, + }, + }, + { + name: "keyless index", + setup: []string{ + "create table t (c1 int, c2 int, index (c2))", + "insert into t values (0,0), (1,1), (2,2), (3,NULL), (4,NULL)", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "t", "c2"}}, + }, + { + query: "call dolt_stats_info('--short')", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 1, + Backing: "mydb", + Active: true, + StorageBucketCnt: 1, + CachedBucketCnt: 1, + CachedBoundCnt: 1, + CachedTemplateCnt: 1, + StatCnt: 1, + }}, + }, + }, + }, + }, + { + name: "caps testing", + setup: []string{ + "create table XY (x int primary key, Y int, key Yx (Y,x))", + "alter table xy add index y2 (y)", + "insert into xy values (0,0), (1,0), (2,0)", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y2"}, {"mydb", "xy", "yx"}}, + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(3)}}, + }, + { + query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(12)}}, + }, + { + query: "delete from xy where x > 500", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(6)}}, + }, + }, + }, + { + name: "database ddl", + setup: []string{ + "create table mydb.xy (x int primary key, y int, key (y,x))", + "insert into xy values (0,0), (1,0), (2,0)", + "create database repo2", + "create table repo2.xy (x int primary key, y int, key (y,x))", + "insert into repo2.xy values (0,0), (1,0), (2,0)", + "create table repo2.ab (a int primary key, b int, key (b,a))", + "insert into repo2.ab values (0,0), (1,0), (2,0)", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{ + {"mydb", "xy", "primary"}, {"mydb", "xy", "y"}, + }, + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + { + query: "select database_name, table_name, index_name from repo2.dolt_statistics order by index_name", + res: []sql.Row{ + {"repo2", "ab", "b"}, {"repo2", "ab", "primary"}, + {"repo2", "xy", "primary"}, {"repo2", "xy", "y"}, + }, + }, + { + query: "use repo2", + }, + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{ + {"repo2", "ab", "b"}, {"repo2", "ab", "primary"}, + {"repo2", "xy", "primary"}, {"repo2", "xy", "y"}, + }, + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(4)}}, + }, + { + query: "insert into repo2.xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(10)}}, + }, + { + query: "drop database repo2", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "use mydb", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + }, + }, + { + name: "recreate table without index", + setup: []string{ + "create table xy (x int primary key, y int, key (y,x))", + "insert into xy values (0,0), (1,0), (2,0)", + }, + assertions: []assertion{ + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + { + query: "drop table xy", + }, + { + query: "create table xy (x int primary key, y int)", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(0)}}, + }, + }, + }, + { + name: "stats info", + setup: []string{ + "create table xy (x int primary key, y int, key (y,x))", + "insert into xy values (0,0), (1,0), (2,0)", + "call dolt_add('-A')", + "call dolt_commit('-m', 'create xy')", + "call dolt_checkout('-b', 'feat')", + "call dolt_checkout('main')", + }, + assertions: []assertion{ + { + query: "call dolt_stats_info('--short')", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + Backing: "mydb", + Active: true, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 2, + }}, + }, + }, + { + query: "call dolt_checkout('feat')", + }, + { + query: "drop table xy", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "call dolt_stats_info('--short')", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + Backing: "mydb", + Active: true, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 1, + }, + }}, + }, + { + query: "call dolt_checkout('main')", + }, + { + query: "call dolt_branch('-D', 'feat')", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "call dolt_stats_info('--short')", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 1, + Backing: "mydb", + Active: true, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 1, + }, + }}, + }, + }, + }, + { + name: "stats stop/start", + setup: []string{ + "create table xy (x int primary key, y int, key (y,x))", + "insert into xy values (0,0), (1,0), (2,0)", + "call dolt_add('-A')", + "call dolt_commit('-m', 'create xy')", + "call dolt_checkout('-b', 'feat')", + "call dolt_checkout('main')", + }, + assertions: []assertion{ + { + query: "call dolt_stats_info('--short')", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + Backing: "mydb", + Active: true, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 2, + }, + }}, + }, + { + query: "call dolt_stats_stop()", + }, + { + query: "call dolt_stats_info('--short')", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + Backing: "mydb", + Active: false, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 2, + }, + }}, + }, + { + query: "call dolt_stats_restart()", + }, + { + query: "call dolt_stats_info('--short')", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + Backing: "mydb", + Active: true, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 2, + }, + }}, + }, + }, + }, + { + name: "stats purge", + setup: []string{ + "create table xy (x int primary key, y int, key (y,x))", + "insert into xy values (0,0), (1,0), (2,0)", + "call dolt_add('-A')", + "call dolt_commit('-m', 'create xy')", + "call dolt_checkout('-b', 'feat')", + "call dolt_checkout('main')", + "insert into xy values (3,0)", + "call dolt_checkout('feat')", + "insert into xy values (3,0)", + }, + assertions: []assertion{ + { + query: "call dolt_stats_purge()", + }, + { + query: "call dolt_stats_info('--short')", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 0, + Backing: "mydb", + Active: false, + StorageBucketCnt: 0, + CachedBucketCnt: 0, + CachedBoundCnt: 0, + CachedTemplateCnt: 0, + StatCnt: 0, + }, + }}, + }, + { + query: "call dolt_stats_restart()", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "call dolt_stats_info('--short')", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + Backing: "mydb", + Active: true, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 2, + }, + }}, + }, + }, + }, + { + name: "null bounds", + setup: []string{ + "create table xy (x int primary key, y int, key (y))", + "insert into xy values (0,NULL), (1,0), (2,0)", + "CREATE table xyz (x bigint primary key, y varchar(500), z bigint, key(x, z));", + "insert into xyz values (0,0,NULL), (1,1,0), (2,2,0)", + }, + assertions: []assertion{ + { + query: "call dolt_stats_info('--short')", + res: []sql.Row{{dprocedures.StatsInfo{ + DbCnt: 1, + Active: true, + StorageBucketCnt: 4, + CachedBucketCnt: 4, + CachedBoundCnt: 4, + CachedTemplateCnt: 4, + StatCnt: 2, + Backing: "mydb", + }}}, + }, + { + query: "select index_name, null_count from dolt_statistics", + res: []sql.Row{{"primary", uint64(0)}, {"y", uint64(1)}, {"primary", uint64(0)}, {"x", uint64(1)}}, + }, + }, + }, + } + + for _, tt := range scripts { + t.Run(tt.name, func(t *testing.T) { + bthreads := sql.NewBackgroundThreads() + ctx, sqlEng, sc := emptySetup(t, bthreads, false, false) + + defer sqlEng.Close() + + require.NoError(t, sc.Restart()) + + //sc.Debug = true + + for _, s := range tt.setup { + require.NoError(t, executeQuery(ctx, sqlEng, s)) + } + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_flush()")) + + for i, a := range tt.assertions { + if sc.Debug { + log.Println(a.query) + } + rows, err := executeQueryResults(ctx, sqlEng, a.query) + if a.err != "" { + require.Equal(t, a.err, err.Error()) + } else { + require.NoError(t, err) + } + if a.res != nil { + cmp, exp := normalize(rows, a.res) + require.Equal(t, exp, cmp, "query no "+strconv.Itoa(i)+" failed: "+a.query) + } + } + }) + } +} + +func normalize(cmp, exp []sql.Row) ([]sql.Row, []sql.Row) { + for i, r := range exp { + for j, v := range r { + if _, ok := v.(dprocedures.StatsInfo); ok { + if strSi, ok := cmp[i][j].(string); ok { + si := dprocedures.StatsInfo{} + if err := json.Unmarshal([]byte(strSi), &si); err != nil { + log.Fatal(err) + } + si.GenCnt = 0 + cmp[i][j] = si + } + } + } + } + return cmp, exp +} diff --git a/go/libraries/doltcore/sqle/statspro/stats_kv.go b/go/libraries/doltcore/sqle/statspro/stats_kv.go new file mode 100644 index 00000000000..254cc748fbe --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/stats_kv.go @@ -0,0 +1,550 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "context" + "encoding/binary" + "errors" + "fmt" + "strconv" + "strings" + "sync" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/stats" + "github.com/dolthub/go-mysql-server/sql/types" + + "github.com/dolthub/dolt/go/libraries/doltcore/schema" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/prolly" + "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/dolt/go/store/val" +) + +var ErrIncompatibleVersion = errors.New("client stats version mismatch") + +type StatsKv interface { + PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error + GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) + GetTemplate(key templateCacheKey) (stats.Statistic, bool) + PutTemplate(key templateCacheKey, stat stats.Statistic) + GetBound(h hash.Hash, len int) (sql.Row, bool) + PutBound(h hash.Hash, r sql.Row, l int) + Flush(ctx context.Context) (int, error) + Len() int + GcGen() uint64 +} + +var _ StatsKv = (*prollyStats)(nil) +var _ StatsKv = (*memStats)(nil) +var _ StatsKv = (*StatsController)(nil) + +func NewMemStats() *memStats { + return &memStats{ + mu: sync.Mutex{}, + buckets: make(map[bucketKey]*stats.Bucket), + templates: make(map[templateCacheKey]stats.Statistic), + bounds: make(map[bucketKey]sql.Row), + gcFlusher: make(map[*val.TupleBuilder][]bucketKey), + } +} + +type memStats struct { + mu sync.Mutex + gcGen uint64 + + buckets map[bucketKey]*stats.Bucket + templates map[templateCacheKey]stats.Statistic + bounds map[bucketKey]sql.Row + + // gcFlusher tracks state require to lazily swap from + // a *memStats to *prollyStats + gcFlusher map[*val.TupleBuilder][]bucketKey +} + +func (m *memStats) StorageCnt(context.Context) (int, error) { + return 0, nil +} + +func (m *memStats) GetTemplate(key templateCacheKey) (stats.Statistic, bool) { + m.mu.Lock() + defer m.mu.Unlock() + t, ok := m.templates[key] + if !ok { + return stats.Statistic{}, false + } + return t, true +} + +func (m *memStats) PutTemplate(key templateCacheKey, stat stats.Statistic) { + m.mu.Lock() + defer m.mu.Unlock() + m.templates[key] = stat +} + +type bucketKey [22]byte + +func getBucketKey(h hash.Hash, l int) bucketKey { + var k bucketKey + copy(k[:hash.ByteLen], h[:]) + binary.BigEndian.PutUint16(k[hash.ByteLen:], uint16(l)) + return k +} + +func (m *memStats) GetBound(h hash.Hash, l int) (sql.Row, bool) { + m.mu.Lock() + defer m.mu.Unlock() + k := getBucketKey(h, l) + r, ok := m.bounds[k] + if !ok { + return nil, false + } + return r, true +} + +func (m *memStats) PutBound(h hash.Hash, r sql.Row, l int) { + m.mu.Lock() + defer m.mu.Unlock() + k := getBucketKey(h, l) + m.bounds[k] = r +} + +func (m *memStats) GcMark(from StatsKv, nodes []tree.Node, buckets []*stats.Bucket, idxLen int, tb *val.TupleBuilder) bool { + if from.GcGen() > m.GcGen() { + return false + } + + m.mu.Lock() + defer m.mu.Unlock() + + for i, b := range buckets { + h := nodes[i].HashOf() + k := getBucketKey(h, idxLen) + if i == 0 { + m.bounds[k], _ = from.GetBound(h, idxLen) + } + m.buckets[k] = b + m.gcFlusher[tb] = append(m.gcFlusher[tb], k) + } + return true +} + +func (m *memStats) GcGen() uint64 { + m.mu.Lock() + defer m.mu.Unlock() + return m.gcGen +} + +func (m *memStats) Len() int { + m.mu.Lock() + defer m.mu.Unlock() + return len(m.buckets) +} + +func (m *memStats) PutBucket(_ context.Context, h hash.Hash, b *stats.Bucket, _ *val.TupleBuilder) error { + m.mu.Lock() + defer m.mu.Unlock() + k := getBucketKey(h, len(b.BoundVal)) + m.buckets[k] = b + return nil +} + +func (m *memStats) GetBucket(_ context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) { + m.mu.Lock() + defer m.mu.Unlock() + if h.IsEmpty() { + return nil, false, nil + } + k := getBucketKey(h, tupB.Desc.Count()) + b, ok := m.buckets[k] + return b, ok, nil +} + +func (m *memStats) Flush(_ context.Context) (int, error) { + m.mu.Lock() + defer m.mu.Unlock() + if m.gcFlusher != nil { + m.gcFlusher = nil + } + return 0, nil +} + +func NewProllyStats(ctx context.Context, destDb dsess.SqlDatabase) (*prollyStats, error) { + sch := schema.StatsTableDoltSchema + kd, vd := sch.GetMapDescriptors(nil) + + keyBuilder := val.NewTupleBuilder(kd) + valueBuilder := val.NewTupleBuilder(vd) + newMap, err := prolly.NewMapFromTuples(ctx, destDb.DbData().Ddb.NodeStore(), kd, vd) + if err != nil { + return nil, err + } + + return &prollyStats{ + mu: sync.Mutex{}, + destDb: destDb, + kb: keyBuilder, + vb: valueBuilder, + m: newMap.Mutate(), + mem: NewMemStats(), + }, nil +} + +type prollyStats struct { + mu sync.Mutex + destDb dsess.SqlDatabase + kb, vb *val.TupleBuilder + m *prolly.MutableMap + newM *prolly.MutableMap + mem *memStats +} + +func (p *prollyStats) Len() int { + return p.mem.Len() +} + +func (p *prollyStats) GetTemplate(key templateCacheKey) (stats.Statistic, bool) { + return p.mem.GetTemplate(key) +} + +func (p *prollyStats) PutTemplate(key templateCacheKey, stat stats.Statistic) { + p.mem.PutTemplate(key, stat) +} + +func (p *prollyStats) GetBound(h hash.Hash, l int) (sql.Row, bool) { + return p.mem.GetBound(h, l) +} + +func (p *prollyStats) PutBound(h hash.Hash, r sql.Row, l int) { + p.mem.PutBound(h, r, l) +} + +func (p *prollyStats) PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error { + if err := p.mem.PutBucket(ctx, h, b, tupB); err != nil { + return err + } + + k, err := p.encodeHash(h, tupB.Desc.Count()) + if err != nil { + return err + } + v, err := p.encodeBucket(ctx, b, tupB) + if err != nil { + return err + } + + p.mu.Lock() + defer p.mu.Unlock() + return p.m.Put(ctx, k, v) +} + +func (p *prollyStats) GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) { + if h.IsEmpty() { + return nil, false, nil + } + b, ok, err := p.mem.GetBucket(ctx, h, tupB) + if err != nil { + return nil, false, err + } + if ok { + return b, true, nil + } + + // missing bucket and not GC'ing, try disk + k, err := p.encodeHash(h, tupB.Desc.Count()) + if err != nil { + return nil, false, err + } + + var v val.Tuple + err = p.m.Get(ctx, k, func(key val.Tuple, value val.Tuple) error { + if key != nil { + ok = true + v = value + } + return nil + }) + if !ok || err != nil { + return nil, false, err + } + + b, err = p.decodeBucketTuple(ctx, v, tupB) + if err != nil { + return nil, false, err + } + + p.mem.PutBucket(ctx, h, b, tupB) + return b, true, nil +} + +func (p *prollyStats) GcGen() uint64 { + return p.mem.gcGen +} + +func (p *prollyStats) LoadFromMem(ctx context.Context) error { + p.mem.mu.Lock() + defer p.mem.mu.Unlock() + for tb, keys := range p.mem.gcFlusher { + for _, key := range keys { + b, ok := p.mem.buckets[key] + if !ok { + return fmt.Errorf("memory KV inconsistent, missing bucket for: %s", key) + } + tupK, err := p.encodeHash(hash.New(key[:hash.ByteLen]), tb.Desc.Count()) + tupV, err := p.encodeBucket(ctx, b, tb) + if err != nil { + return err + } + if err := p.m.Put(ctx, tupK, tupV); err != nil { + return err + } + } + } + p.mem.gcFlusher = nil + return nil +} + +func (p *prollyStats) Flush(ctx context.Context) (int, error) { + if err := p.LoadFromMem(ctx); err != nil { + return 0, err + } + + p.mu.Lock() + defer p.mu.Unlock() + + flushedMap, err := p.m.Map(ctx) + if err != nil { + return 0, err + } + if err := p.destDb.DbData().Ddb.SetStatistics(ctx, "main", flushedMap.HashOf()); err != nil { + return 0, err + } + + p.m = flushedMap.Mutate() + + cnt, err := flushedMap.Count() + return cnt, err +} + +func (p *prollyStats) encodeHash(h hash.Hash, len int) (val.Tuple, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.kb.PutInt64(0, int64(len)) + if err := p.kb.PutString(1, h.String()); err != nil { + return nil, err + } + return p.kb.Build(p.m.NodeStore().Pool()), nil +} + +func (p *prollyStats) decodeHashTuple(v val.Tuple) (int, hash.Hash, error) { + l, ok := p.kb.Desc.GetInt64(0, v) + hStr, ok := p.kb.Desc.GetString(1, v) + if !ok { + return 0, hash.Hash{}, fmt.Errorf("unexpected null hash") + } + return int(l), hash.Parse(hStr), nil +} + +func (p *prollyStats) decodeBucketTuple(ctx context.Context, v val.Tuple, tupB *val.TupleBuilder) (*stats.Bucket, error) { + var row []interface{} + for i := 0; i < p.vb.Desc.Count(); i++ { + f, err := tree.GetField(ctx, p.vb.Desc, i, v, p.m.NodeStore()) + if err != nil { + return nil, err + } + row = append(row, f) + } + + version := row[0] + if version != schema.StatsVersion { + return nil, fmt.Errorf("%w: write version %d does not match read version %d", ErrIncompatibleVersion, version, schema.StatsVersion) + } + rowCount := row[1].(int64) + distinctCount := row[2].(int64) + nullCount := row[3].(int64) + boundRowStr := row[4].(string) + upperBoundCnt := row[5].(int64) + mcvCountsStr := row[10].(string) + + boundRow, err := DecodeRow(ctx, p.m.NodeStore(), boundRowStr, tupB) + if err != nil { + return nil, err + } + + var mcvCnts []uint64 + if len(mcvCountsStr) > 0 { + for _, c := range strings.Split(mcvCountsStr, ",") { + cnt, err := strconv.ParseInt(c, 10, 64) + if err != nil { + return nil, err + } + mcvCnts = append(mcvCnts, uint64(cnt)) + } + } + + mcvs := make([]sql.Row, len(mcvCnts)) + for i, v := range row[6 : 6+len(mcvCnts)] { + if v != nil && v != "" { + row, err := DecodeRow(ctx, p.m.NodeStore(), v.(string), tupB) + if err != nil { + return nil, err + } + mcvs[i] = row + } + } + + return &stats.Bucket{ + RowCnt: uint64(rowCount), + DistinctCnt: uint64(distinctCount), + NullCnt: uint64(nullCount), + McvsCnt: mcvCnts, + BoundCnt: uint64(upperBoundCnt), + BoundVal: boundRow, + McvVals: mcvs, + }, nil +} + +var mcvTypes = []sql.Type{types.Int16, types.Int16, types.Int16, types.Int16} + +func (p *prollyStats) encodeBucket(ctx context.Context, b *stats.Bucket, tupB *val.TupleBuilder) (val.Tuple, error) { + p.mu.Lock() + defer p.mu.Unlock() + + p.vb.PutInt64(0, schema.StatsVersion) + p.vb.PutInt64(1, int64(b.RowCount())) + p.vb.PutInt64(2, int64(b.DistinctCount())) + p.vb.PutInt64(3, int64(b.NullCount())) + boundRow, err := EncodeRow(ctx, p.m.NodeStore(), b.UpperBound(), tupB) + if err != nil { + return nil, err + } + p.vb.PutString(4, string(boundRow)) + p.vb.PutInt64(5, int64(b.BoundCount())) + for i, r := range b.Mcvs() { + mcvRow, err := EncodeRow(ctx, p.m.NodeStore(), r, tupB) + if err != nil { + return nil, err + } + p.vb.PutString(6+i, string(mcvRow)) + } + var mcvCntsRow sql.Row + for _, v := range b.McvCounts() { + mcvCntsRow = append(mcvCntsRow, int(v)) + } + p.vb.PutString(10, stats.StringifyKey(mcvCntsRow, mcvTypes[:len(mcvCntsRow)])) + + return p.vb.Build(p.m.NodeStore().Pool()), nil +} + +func (p *prollyStats) NewEmpty(ctx context.Context) (StatsKv, error) { + kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors(nil) + newMap, err := prolly.NewMapFromTuples(ctx, p.destDb.DbData().Ddb.NodeStore(), kd, vd) + if err != nil { + return nil, err + } + m := newMap.Mutate() + return &prollyStats{m: m, destDb: p.destDb, kb: p.kb, vb: p.vb}, nil +} + +func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) { + for i := range tb.Desc.Count() { + v := r[i] + if v == nil { + continue + } + if err := tree.PutField(ctx, ns, tb, i, v); err != nil { + return nil, err + } + } + return tb.Build(ns.Pool()), nil +} + +func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) { + tup := []byte(s) + r := make(sql.Row, tb.Desc.Count()) + var err error + for i, _ := range r { + r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns) + if err != nil { + return nil, err + } + } + return r, nil +} + +func (sc *StatsController) PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error { + sc.mu.Lock() + defer sc.mu.Unlock() + return sc.kv.PutBucket(ctx, h, b, tupB) +} + +func (sc *StatsController) GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) { + sc.mu.Lock() + defer sc.mu.Unlock() + return sc.kv.GetBucket(ctx, h, tupB) +} + +func (sc *StatsController) GetTemplate(key templateCacheKey) (stats.Statistic, bool) { + sc.mu.Lock() + defer sc.mu.Unlock() + return sc.kv.GetTemplate(key) +} + +func (sc *StatsController) PutTemplate(key templateCacheKey, stat stats.Statistic) { + sc.mu.Lock() + defer sc.mu.Unlock() + sc.kv.PutTemplate(key, stat) +} + +func (sc *StatsController) GetBound(h hash.Hash, len int) (sql.Row, bool) { + sc.mu.Lock() + defer sc.mu.Unlock() + return sc.kv.GetBound(h, len) +} + +func (sc *StatsController) PutBound(h hash.Hash, r sql.Row, l int) { + sc.mu.Lock() + defer sc.mu.Unlock() + sc.kv.PutBound(h, r, l) +} + +func (sc *StatsController) Flush(ctx context.Context) (int, error) { + sqlCtx, err := sc.ctxGen(ctx) + if err != nil { + return 0, err + } + defer sql.SessionEnd(sqlCtx.Session) + sql.SessionCommandBegin(sqlCtx.Session) + defer sql.SessionCommandEnd(sqlCtx.Session) + + sc.mu.Lock() + defer sc.mu.Unlock() + defer sc.signalListener(leFlush) + return sc.kv.Flush(sqlCtx) +} + +func (sc *StatsController) Len() int { + sc.mu.Lock() + defer sc.mu.Unlock() + return sc.kv.Len() +} + +func (sc *StatsController) GcGen() uint64 { + sc.mu.Lock() + defer sc.mu.Unlock() + return sc.kv.GcGen() +} diff --git a/go/libraries/doltcore/sqle/statspro/stats_kv_test.go b/go/libraries/doltcore/sqle/statspro/stats_kv_test.go new file mode 100644 index 00000000000..0a55b6ce28f --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/stats_kv_test.go @@ -0,0 +1,200 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "context" + "strings" + "testing" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/stats" + "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/store/chunks" + "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/prolly/message" + "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/dolt/go/store/types" + "github.com/dolthub/dolt/go/store/val" +) + +func TestProllyKv(t *testing.T) { + threads := sql.NewBackgroundThreads() + prollyKv := newTestProllyKv(t, threads) + + h := hash.Parse(strings.Repeat("a", hash.StringLen)) + h2 := hash.Parse(strings.Repeat("b", hash.StringLen)) + k := getBucketKey(h, 2) + + tupB := val.NewTupleBuilder(val.NewTupleDescriptor( + val.Type{Enc: val.Int64Enc, Nullable: true}, + val.Type{Enc: val.StringEnc, Nullable: true}, + )) + + t.Run("TestBoundsRoundTrip", func(t *testing.T) { + exp := sql.Row{1, 1} + prollyKv.PutBound(h, exp, 2) + cmp, ok := prollyKv.GetBound(h, 2) + require.True(t, ok) + require.Equal(t, exp, cmp) + + _, ok = prollyKv.GetBound(h2, 2) + require.False(t, ok) + }) + + t.Run("TestTemplatesRoundTrip", func(t *testing.T) { + exp := stats.Statistic{RowCnt: 50, Qual: sql.StatQualifier{Database: "mydb", Tab: "xy"}} + key := templateCacheKey{ + h: h, + idxName: "PRIMARY", + } + prollyKv.PutTemplate(key, exp) + cmp, ok := prollyKv.GetTemplate(key) + require.True(t, ok) + require.Equal(t, exp, cmp) + + key2 := templateCacheKey{ + h: h2, + idxName: "PRIMARY", + } + _, ok = prollyKv.GetTemplate(key2) + require.False(t, ok) + }) + t.Run("TestBucketsRoundTrip", func(t *testing.T) { + exp := stats.NewHistogramBucket(15, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket) + err := prollyKv.PutBucket(context.Background(), h, exp, tupB) + require.NoError(t, err) + cmp, ok, err := prollyKv.GetBucket(context.Background(), h, tupB) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, exp, cmp) + + // delete from memory, should pull from disk when |tupB| supplied + delete(prollyKv.mem.buckets, k) + + cmp, ok, err = prollyKv.GetBucket(context.Background(), h, tupB) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, exp.RowCnt, cmp.RowCnt) + require.Equal(t, exp.DistinctCnt, cmp.DistinctCnt) + require.Equal(t, exp.NullCnt, cmp.NullCnt) + require.Equal(t, exp.McvsCnt, cmp.McvsCnt) + require.Equal(t, exp.McvVals[0], cmp.McvVals[0]) + require.Equal(t, exp.McvVals[1], cmp.McvVals[1]) + require.Equal(t, exp.McvVals[2], cmp.McvVals[2]) + require.Equal(t, exp.McvVals[3], cmp.McvVals[3]) + require.Equal(t, exp.BoundVal, cmp.BoundVal) + require.Equal(t, exp.BoundCnt, cmp.BoundCnt) + }) + t.Run("TestNilMcvsRoundTrip", func(t *testing.T) { + exp := stats.NewHistogramBucket(15, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}}).(*stats.Bucket) + err := prollyKv.PutBucket(context.Background(), h, exp, tupB) + + delete(prollyKv.mem.buckets, k) + + cmp, ok, err := prollyKv.GetBucket(context.Background(), h, tupB) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, exp.RowCnt, cmp.RowCnt) + require.Equal(t, exp.DistinctCnt, cmp.DistinctCnt) + require.Equal(t, exp.NullCnt, cmp.NullCnt) + require.Equal(t, exp.McvsCnt, cmp.McvsCnt) + require.Equal(t, len(exp.McvVals), len(cmp.McvVals)) + require.Equal(t, exp.McvVals[0], cmp.McvVals[0]) + require.Equal(t, exp.McvVals[1], cmp.McvVals[1]) + require.Equal(t, exp.BoundVal, cmp.BoundVal) + require.Equal(t, exp.BoundCnt, cmp.BoundCnt) + }) + t.Run("TestGcGenBlocking", func(t *testing.T) { + to := NewMemStats() + from := NewMemStats() + from.gcGen = 1 + require.False(t, to.GcMark(from, nil, nil, 0, nil)) + }) + t.Run("TestGcMarkFlush", func(t *testing.T) { + ctx := context.Background() + bthreads := sql.NewBackgroundThreads() + defer bthreads.Shutdown() + prev := NewMemStats() + nodes1, bucks1 := testNodes(t, 10, 1) + nodes2, bucks2 := testNodes(t, 10, 2) + nodes3, bucks3 := testNodes(t, 10, 3) + for i := range nodes1 { + require.NoError(t, prev.PutBucket(ctx, nodes1[i].HashOf(), bucks1[i], tupB)) + } + for i := range nodes2 { + require.NoError(t, prev.PutBucket(ctx, nodes2[i].HashOf(), bucks2[i], tupB)) + } + for i := range nodes3 { + require.NoError(t, prev.PutBucket(ctx, nodes3[i].HashOf(), bucks3[i], tupB)) + } + + require.Equal(t, 30, prev.Len()) + + to := NewMemStats() + require.True(t, to.GcMark(prev, nodes1, bucks1, 2, tupB)) + require.True(t, to.GcMark(prev, nodes2, bucks2, 2, tupB)) + + require.Equal(t, 1, len(to.gcFlusher)) + require.Equal(t, 20, len(to.gcFlusher[tupB])) + require.Equal(t, 20, to.Len()) + + kv := newTestProllyKv(t, bthreads) + kv.mem = to + cnt, err := kv.Flush(ctx) + require.NoError(t, err) + require.Equal(t, 20, cnt) + }) +} + +func newTestProllyKv(t *testing.T, threads *sql.BackgroundThreads) *prollyStats { + dEnv := dtestutils.CreateTestEnv() + + sqlEng, ctx := newTestEngine(context.Background(), dEnv, threads) + ctx.Session.SetClient(sql.Client{ + User: "billy boy", + Address: "bigbillie@fake.horse", + }) + require.NoError(t, executeQuery(ctx, sqlEng, "create database mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + + startDbs := sqlEng.Analyzer.Catalog.DbProvider.AllDatabases(ctx) + + kv, err := NewProllyStats(ctx, startDbs[0].(dsess.SqlDatabase)) + require.NoError(t, err) + + return kv +} + +func testNodes(t *testing.T, cnt int, seed uint8) ([]tree.Node, []*stats.Bucket) { + ts := &chunks.TestStorage{} + ns := tree.NewNodeStore(ts.NewViewWithFormat(types.Format_DOLT.VersionString())) + s := message.NewBlobSerializer(ns.Pool()) + + var nodes []tree.Node + var buckets []*stats.Bucket + for i := range cnt { + vals := [][]byte{{uint8(i), seed, 1, 1}} + msg := s.Serialize([][]byte{{0}}, vals, []uint64{1}, 0) + node, _, err := tree.NodeFromBytes(msg) + require.NoError(t, err) + nodes = append(nodes, node) + buckets = append(buckets, &stats.Bucket{RowCnt: uint64(i), BoundVal: sql.Row{i, "col2"}}) + } + return nodes, buckets +} diff --git a/go/libraries/doltcore/sqle/statspro/stats_provider.go b/go/libraries/doltcore/sqle/statspro/stats_provider.go deleted file mode 100644 index 573e20b638a..00000000000 --- a/go/libraries/doltcore/sqle/statspro/stats_provider.go +++ /dev/null @@ -1,535 +0,0 @@ -// Copyright 2023 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statspro - -import ( - "context" - "errors" - "fmt" - "path/filepath" - "strings" - "sync" - - "github.com/dolthub/go-mysql-server/sql" - - "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" - "github.com/dolthub/dolt/go/libraries/doltcore/env" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/prolly/tree" -) - -var ErrFailedToLoad = errors.New("failed to load statistics") - -type indexMeta struct { - qual sql.StatQualifier - cols []string - newNodes []tree.Node - // updateOrdinals are [start, stop] tuples for each update chunk - updateOrdinals []updateOrdinal - keepChunks []sql.HistogramBucket - dropChunks []sql.HistogramBucket - allAddrs []hash.Hash -} - -type updateOrdinal struct { - start, stop uint64 -} - -func NewProvider(pro *sqle.DoltDatabaseProvider, sf StatsFactory) *Provider { - return &Provider{ - pro: pro, - sf: sf, - mu: &sync.Mutex{}, - statDbs: make(map[string]Database), - autoCtxCancelers: make(map[string]context.CancelFunc), - analyzeCtxCancelers: make(map[string]context.CancelFunc), - status: make(map[string]string), - lockedTables: make(map[string]bool), - } -} - -// Provider is the engine interface for reading and writing index statistics. -// Each database has its own statistics table that all tables/indexes in a db -// share. -type Provider struct { - mu *sync.Mutex - pro *sqle.DoltDatabaseProvider - sf StatsFactory - statDbs map[string]Database - autoCtxCancelers map[string]context.CancelFunc - analyzeCtxCancelers map[string]context.CancelFunc - starter sqle.InitDatabaseHook - status map[string]string - lockedTables map[string]bool -} - -// each database has one statistics table that is a collection of the -// table stats in the database -type dbToStats struct { - mu *sync.Mutex - dbName string - stats map[sql.StatQualifier]*DoltStats - statsDatabase Database - latestTableHashes map[string]hash.Hash -} - -func newDbStats(dbName string) *dbToStats { - return &dbToStats{ - mu: &sync.Mutex{}, - dbName: dbName, - stats: make(map[sql.StatQualifier]*DoltStats), - latestTableHashes: make(map[string]hash.Hash), - } -} - -var _ sql.StatsProvider = (*Provider)(nil) - -func (p *Provider) Close() error { - var lastErr error - for _, db := range p.statDbs { - if err := db.Close(); err != nil { - lastErr = err - } - } - return lastErr -} - -func (p *Provider) TryLockForUpdate(branch, db, table string) bool { - p.mu.Lock() - defer p.mu.Unlock() - lockId := fmt.Sprintf("%s.%s.%s", branch, db, table) - if ok := p.lockedTables[lockId]; ok { - return false - } - p.lockedTables[lockId] = true - return true -} - -func (p *Provider) UnlockTable(branch, db, table string) { - p.mu.Lock() - defer p.mu.Unlock() - lockId := fmt.Sprintf("%s.%s.%s", branch, db, table) - p.lockedTables[lockId] = false - return -} - -func (p *Provider) StartRefreshThread(ctx *sql.Context, pro dsess.DoltDatabaseProvider, name string, env *env.DoltEnv, db dsess.SqlDatabase) error { - err := p.starter(ctx, pro.(*sqle.DoltDatabaseProvider), name, env, db) - - if err != nil { - p.UpdateStatus(name, fmt.Sprintf("error restarting thread %s: %s", name, err.Error())) - return err - } - p.UpdateStatus(name, fmt.Sprintf("restarted thread: %s", name)) - return nil -} - -func (p *Provider) SetStarter(hook sqle.InitDatabaseHook) { - p.starter = hook -} - -func (p *Provider) CancelRefreshThread(dbName string) { - p.mu.Lock() - if cancel, ok := p.autoCtxCancelers[dbName]; ok { - cancel() - } - p.mu.Unlock() - p.UpdateStatus(dbName, fmt.Sprintf("cancelled thread: %s", dbName)) - -} - -func (p *Provider) ThreadStatus(dbName string) string { - p.mu.Lock() - defer p.mu.Unlock() - - if msg, ok := p.status[dbName]; ok { - return msg - } - return "no active stats thread" -} - -func (p *Provider) TrackedBranches(dbName string) []string { - db, ok := p.getStatDb(dbName) - if !ok { - return nil - } - return db.Branches() - -} - -func (p *Provider) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) { - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return nil, nil - } - - var schemaName string - if schTab, ok := table.(sql.DatabaseSchemaTable); ok { - schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) - } - - return p.GetTableDoltStats(ctx, branch, db, schemaName, table.Name()) -} - -func (p *Provider) GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]sql.Statistic, error) { - statDb, ok := p.getStatDb(db) - if !ok || statDb == nil { - return nil, nil - } - - if branch == "" { - dSess := dsess.DSessFromSess(ctx.Session) - var err error - branch, err = dSess.GetBranch() - if err != nil { - return nil, nil - } - } - - var ret []sql.Statistic - for _, qual := range statDb.ListStatQuals(branch) { - if strings.EqualFold(db, qual.Database) && strings.EqualFold(schema, qual.Sch) && strings.EqualFold(table, qual.Tab) { - stat, _ := statDb.GetStat(branch, qual) - ret = append(ret, stat) - } - } - - return ret, nil -} - -func (p *Provider) setStatDb(name string, db Database) { - p.mu.Lock() - defer p.mu.Unlock() - p.statDbs[name] = db -} - -func (p *Provider) getStatDb(name string) (Database, bool) { - p.mu.Lock() - defer p.mu.Unlock() - statDb, ok := p.statDbs[strings.ToLower(name)] - return statDb, ok -} - -func (p *Provider) deleteStatDb(name string) { - p.mu.Lock() - defer p.mu.Unlock() - delete(p.statDbs, strings.ToLower(name)) -} - -func (p *Provider) SetStats(ctx *sql.Context, s sql.Statistic) error { - statDb, ok := p.getStatDb(s.Qualifier().Db()) - if !ok { - return nil - } - - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return nil - } - - doltStat, err := DoltStatsFromSql(s) - if err != nil { - return err - } - - p.UpdateStatus(s.Qualifier().Db(), fmt.Sprintf("refreshed %s", s.Qualifier().Db())) - - return statDb.SetStat(ctx, branch, s.Qualifier(), doltStat) -} - -func (p *Provider) getQualStats(ctx *sql.Context, qual sql.StatQualifier) (*DoltStats, bool) { - statDb, ok := p.getStatDb(qual.Db()) - if !ok { - return nil, false - } - - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return nil, false - } - - return statDb.GetStat(branch, qual) -} - -func (p *Provider) GetStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) (sql.Statistic, bool) { - stat, ok := p.getQualStats(ctx, qual) - if !ok { - return nil, false - } - return stat, true -} - -func (p *Provider) DropBranchDbStats(ctx *sql.Context, branch, db string, flush bool) error { - statDb, ok := p.getStatDb(db) - if !ok { - return nil - } - - p.mu.Lock() - defer p.mu.Unlock() - - p.status[db] = "dropped" - - return statDb.DeleteBranchStats(ctx, branch, flush) -} - -func (p *Provider) DropDbStats(ctx *sql.Context, db string, flush bool) error { - statDb, ok := p.getStatDb(db) - if !ok { - return nil - } - for _, branch := range statDb.Branches() { - // remove provider access - p.DropBranchDbStats(ctx, branch, db, flush) - } - - if flush { - p.deleteStatDb(db) - } - - return nil -} - -func (p *Provider) DropStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) error { - statDb, ok := p.getStatDb(qual.Db()) - if !ok { - return nil - } - - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return nil - } - - if _, ok := statDb.GetStat(branch, qual); ok { - statDb.DeleteStats(ctx, branch, qual) - p.UpdateStatus(qual.Db(), fmt.Sprintf("dropped statisic: %s", qual.String())) - } - - return nil -} - -func (p *Provider) UpdateStatus(db string, msg string) { - p.mu.Lock() - defer p.mu.Unlock() - - p.status[db] = msg -} - -func (p *Provider) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) { - statDb, ok := p.getStatDb(db) - if !ok { - return 0, sql.ErrDatabaseNotFound.New(db) - } - - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return 0, err - } - - var schemaName string - if schTab, ok := table.(sql.DatabaseSchemaTable); ok { - schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) - } - - priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, schemaName, table.Name(), "primary")) - if !ok { - return 0, nil - } - - return priStats.RowCount(), nil -} - -func (p *Provider) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) { - statDb, ok := p.getStatDb(db) - if !ok { - return 0, sql.ErrDatabaseNotFound.New(db) - } - - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return 0, err - } - - var schemaName string - if schTab, ok := table.(sql.DatabaseSchemaTable); ok { - schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) - } - - priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, schemaName, table.Name(), "primary")) - if !ok { - return 0, nil - } - - return priStats.AvgSize(), nil -} - -func (p *Provider) Prune(ctx *sql.Context) error { - dSess := dsess.DSessFromSess(ctx.Session) - - for _, sqlDb := range p.pro.DoltDatabases() { - dbName := strings.ToLower(sqlDb.Name()) - sqlDb, ok, err := dSess.Provider().SessionDatabase(ctx, dbName) - if err != nil { - return err - } - if !ok { - continue - } - statDb, ok := p.getStatDb(dbName) - if !ok { - continue - } - - // Canceling refresh thread prevents background thread from - // making progress. Prune should succeed. - p.CancelRefreshThread(dbName) - - tables, err := sqlDb.GetTableNames(ctx) - if err != nil { - return err - } - - for _, branch := range statDb.Branches() { - err := func() error { - // function closure ensures safe defers - var stats []sql.Statistic - for _, t := range tables { - // XXX: avoid races with ANALYZE with the table locks. - // Either concurrent purge or analyze (or both) will fail. - if !p.TryLockForUpdate(branch, dbName, t) { - p.mu.Lock() - fmt.Println(p.lockedTables) - p.mu.Unlock() - return fmt.Errorf("concurrent statistics update and prune; retry prune when update is finished") - } - defer p.UnlockTable(branch, dbName, t) - - tableStats, err := p.GetTableDoltStats(ctx, branch, dbName, sqlDb.SchemaName(), t) - if err != nil { - return err - } - stats = append(stats, tableStats...) - } - - if err := p.DropBranchDbStats(ctx, branch, dbName, true); err != nil { - return err - } - - for _, s := range stats { - ds, ok := s.(*DoltStats) - if !ok { - return fmt.Errorf("unexpected statistics type found: %T", s) - } - if err := statDb.SetStat(ctx, branch, ds.Qualifier(), ds); err != nil { - return err - } - } - if err := statDb.Flush(ctx, branch); err != nil { - return err - } - return nil - }() - if err != nil { - return err - } - } - } - return nil -} - -func (p *Provider) Purge(ctx *sql.Context) error { - for _, sqlDb := range p.pro.DoltDatabases() { - dbName := strings.ToLower(sqlDb.Name()) - - tables, err := sqlDb.GetTableNames(ctx) - if err != nil { - return err - } - - var branches []string - db, ok := p.getStatDb(dbName) - if ok { - // Canceling refresh thread prevents background thread from - // making progress. Purge should succeed. - p.CancelRefreshThread(dbName) - - branches = db.Branches() - for _, branch := range branches { - err := func() error { - for _, t := range tables { - // XXX: avoid races with ANALYZE with the table locks. - // Either concurrent purge or analyze (or both) will fail. - if !p.TryLockForUpdate(branch, dbName, t) { - return fmt.Errorf("concurrent statistics update and prune; retry purge when update is finished") - } - defer p.UnlockTable(branch, dbName, t) - } - - err := p.DropBranchDbStats(ctx, branch, dbName, true) - if err != nil { - return fmt.Errorf("failed to drop stats: %w", err) - } - return nil - }() - if err != nil { - return err - } - } - } - - // if the database's failed to load, we still want to delete the folder - - fs, err := p.pro.FileSystemForDatabase(dbName) - if err != nil { - return err - } - - //remove from filesystem - statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir) - if err != nil { - return err - } - - if ok, _ := statsFs.Exists(""); ok { - if err := statsFs.Delete("", true); err != nil { - return err - } - } - - dropDbLoc, err := statsFs.Abs("") - if err != nil { - return err - } - - if err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/noms")); err != nil { - return err - } - if len(branches) == 0 { - // if stats db was invalid on startup, recreate from baseline - branches = p.getStatsBranches(ctx) - } - p.Load(ctx, fs, sqlDb, branches) - } - return nil -} diff --git a/go/libraries/doltcore/sqle/statspro/worker.go b/go/libraries/doltcore/sqle/statspro/worker.go new file mode 100644 index 00000000000..8d6b87d8129 --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/worker.go @@ -0,0 +1,639 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "context" + "errors" + "fmt" + "io" + "log" + "strings" + "time" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/stats" + + "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" + "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" + "github.com/dolthub/dolt/go/libraries/doltcore/ref" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/prolly" + "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/dolt/go/store/val" +) + +const collectBatchSize = 20 + +func (sc *StatsController) CollectOnce(ctx context.Context) (string, error) { + genStart := sc.genCnt.Load() + newStats, err := sc.newStatsForRoot(ctx, nil) + if errors.Is(err, context.Canceled) { + return "", nil + } else if err != nil { + return "", err + } + if ok, err := sc.trySwapStats(ctx, genStart, newStats, nil); err != nil || !ok { + return "", err + } + return newStats.String(), nil +} + +func (sc *StatsController) runWorker(ctx context.Context) (err error) { + var gcKv *memStats + var newStats *rootStats + gcTicker := time.NewTicker(sc.gcInterval) + for { + // This loops tries to update stats as long as context + // is active. Thread contexts governs who "owns" the update + // process. The generation counters ensure atomic swapping. + + gcKv = nil + genStart := sc.genCnt.Load() + + select { + case <-gcTicker.C: + sc.setDoGc(false) + default: + } + + if sc.gcIsSet() { + gcKv = NewMemStats() + gcKv.gcGen = genStart + } + + newStats, err = sc.newStatsForRoot(ctx, gcKv) + if errors.Is(err, context.Canceled) { + return nil + } else if err != nil { + sc.descError("", err) + } + + if ok, err := sc.trySwapStats(ctx, genStart, newStats, gcKv); err != nil { + if !ok { + sc.descError("failed to swap stats", err) + } else { + sc.descError("swapped stats with flush failure", err) + } + } + + select { + case <-ctx.Done(): + // is double check necessary? + return context.Cause(ctx) + default: + } + + } +} + +func (sc *StatsController) trySwapStats(ctx context.Context, prevGen uint64, newStats *rootStats, gcKv *memStats) (ok bool, err error) { + if newStats == nil { + return false, fmt.Errorf("attempted to place a nil stats object") + } + sc.mu.Lock() + defer sc.mu.Unlock() + + if ctx.Err() != nil { + // final ctx check in critical section, avoid races on + // stats after calling stop + return false, context.Cause(ctx) + } + + signal := leSwap + defer func() { + if ok { + sc.logger.Debugf("stats successful swap: %s\n", newStats.String()) + sc.signalListener(signal) + } + }() + + if sc.genCnt.CompareAndSwap(prevGen, prevGen+1) { + // Replace stats and new Kv if no replacements happened + // in-between. + sc.Stats = newStats + if gcKv != nil { + signal |= leGc + // The new KV has all buckets for the latest root stats, + // background job will to swap the disk location and put + // entries into a prolly tree. + if prevGen != gcKv.GcGen() { + err = fmt.Errorf("gc gen didn't match update gen") + return + } + sc.doGc = false + sc.gcCnt++ + sc.kv = gcKv + ok = true + if !sc.memOnly { + func() { + sc.mu.Unlock() + defer sc.mu.Lock() + if err := sc.sq.DoSync(ctx, func() error { + return sc.rotateStorage(ctx) + }); err != nil { + sc.descError("", err) + } + }() + } + } + // Flush new changes to disk, unlocked + if !sc.memOnly { + func() { + sc.mu.Unlock() + defer sc.mu.Lock() + if err := sc.sq.DoSync(ctx, func() error { + _, err := sc.Flush(ctx) + return err + }); err != nil { + sc.descError("", err) + } + }() + } + signal = signal | leFlush + return true, nil + } + return false, nil +} + +func (sc *StatsController) newStatsForRoot(baseCtx context.Context, gcKv *memStats) (newStats *rootStats, err error) { + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("issuer panicked running work: %s", r) + } + if err != nil { + sc.descError("stats update interrupted", err) + } + }() + + ctx, err := sc.ctxGen(baseCtx) + if err != nil { + return nil, err + } + + defer sql.SessionEnd(ctx.Session) + + dSess := dsess.DSessFromSess(ctx.Session) + var dbs []sql.Database + func() { + sql.SessionCommandBegin(ctx.Session) + defer sql.SessionCommandEnd(ctx.Session) + dbs = dSess.Provider().AllDatabases(ctx) + }() + newStats = newRootStats() + for _, db := range dbs { + sqlDb, ok := db.(sqle.Database) + if !ok { + continue + } + + var branches []ref.DoltRef + if err := sc.sq.DoSync(ctx, func() error { + sql.SessionCommandBegin(ctx.Session) + defer sql.SessionCommandEnd(ctx.Session) + ddb, ok := dSess.GetDoltDB(ctx, db.Name()) + if !ok { + return fmt.Errorf("get dolt db dolt database not found %s", db.Name()) + } + var err error // races with outer err + branches, err = ddb.GetBranches(ctx) + return err + }); err != nil { + return nil, err + } + + for _, br := range branches { + // this call avoids the chunkstore + sqlDb, err := sqle.RevisionDbForBranch(ctx, db.(dsess.SqlDatabase), br.GetPath(), br.GetPath()+"/"+sqlDb.AliasedName()) + if err != nil { + sc.descError("revisionForBranch", err) + continue + } + + var schDbs []sql.DatabaseSchema + if err := sc.sq.DoSync(ctx, func() error { + sql.SessionCommandBegin(ctx.Session) + defer sql.SessionCommandEnd(ctx.Session) + schDbs, err = sqlDb.AllSchemas(ctx) + return err + }); err != nil { + sc.descError("getDatabaseSchemas", err) + continue + } + + for _, sqlDb := range schDbs { + switch sqlDb.SchemaName() { + case "dolt", "information_schema", "pg_catalog": + continue + } + var tableNames []string + if err := sc.sq.DoSync(ctx, func() error { + sql.SessionCommandBegin(ctx.Session) + defer sql.SessionCommandEnd(ctx.Session) + tableNames, err = sqlDb.GetTableNames(ctx) + return err + }); err != nil { + sc.descError("getTableNames", err) + continue + } + + newStats.DbCnt++ + + for _, tableName := range tableNames { + err := sc.updateTable(ctx, newStats, tableName, sqlDb.(dsess.SqlDatabase), gcKv) + if err != nil { + return nil, err + } + } + } + } + } + + return newStats, nil +} + +func (sc *StatsController) preexistingStats(k tableIndexesKey, h hash.Hash) ([]*stats.Statistic, bool) { + sc.mu.Lock() + defer sc.mu.Unlock() + if sc.Stats.hashes[k].Equal(h) { + return sc.Stats.stats[k], true + } + return nil, false +} + +func (sc *StatsController) finalizeHistogram(template stats.Statistic, buckets []*stats.Bucket, firstBound sql.Row) *stats.Statistic { + template.LowerBnd = firstBound + for _, b := range buckets { + // accumulate counts + template.RowCnt += b.RowCnt + template.DistinctCnt += b.DistinctCnt + template.NullCnt += b.NullCnt + template.Hist = append(template.Hist, b) + } + return &template +} + +func (sc *StatsController) collectIndexNodes(ctx *sql.Context, prollyMap prolly.Map, idxLen int, nodes []tree.Node) ([]*stats.Bucket, sql.Row, int, error) { + updater := newBucketBuilder(sql.StatQualifier{}, idxLen, prollyMap.KeyDesc()) + keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(idxLen)) + + firstNodeHash := nodes[0].HashOf() + lowerBound, ok := sc.kv.GetBound(firstNodeHash, idxLen) + if !ok { + sc.sq.DoSync(ctx, func() error { + sql.SessionCommandBegin(ctx.Session) + defer sql.SessionCommandEnd(ctx.Session) + var err error + lowerBound, err = firstRowForIndex(ctx, idxLen, prollyMap, keyBuilder) + if err != nil { + return fmt.Errorf("get histogram bucket for node; %w", err) + } + if sc.Debug { + log.Printf("put bound: %s: %v\n", firstNodeHash.String()[:5], lowerBound) + } + + sc.kv.PutBound(firstNodeHash, lowerBound, idxLen) + return nil + }) + } + + var writes int + var offset uint64 + for i := 0; i < len(nodes); { + err := sc.sq.DoSync(ctx, func() error { + sql.SessionCommandBegin(ctx.Session) + defer sql.SessionCommandEnd(ctx.Session) + + newWrites := 0 + for i < len(nodes) && newWrites < collectBatchSize { + n := nodes[i] + i++ + + treeCnt, err := n.TreeCount() + if err != nil { + return err + } + start, stop := offset, offset+uint64(treeCnt) + offset = stop + + if _, ok, err := sc.GetBucket(ctx, n.HashOf(), keyBuilder); err != nil { + return err + } else if ok { + continue + } + + writes++ + newWrites++ + + updater.newBucket() + + // we read exclusive range [node first key, next node first key) + iter, err := prollyMap.IterOrdinalRange(ctx, start, stop) + if err != nil { + return err + } + for { + // stats key will be a prefix of the index key + keyBytes, _, err := iter.Next(ctx) + if errors.Is(err, io.EOF) { + break + } else if err != nil { + return err + } + // build full key + for i := range keyBuilder.Desc.Types { + keyBuilder.PutRaw(i, keyBytes.GetField(i)) + } + + updater.add(ctx, keyBuilder.BuildPrefixNoRecycle(prollyMap.Pool(), updater.prefixLen)) + keyBuilder.Recycle() + } + + // finalize the aggregation + newBucket, err := updater.finalize(ctx, prollyMap.NodeStore()) + if err != nil { + return err + } + if err := sc.PutBucket(ctx, n.HashOf(), newBucket, keyBuilder); err != nil { + return err + } + } + return nil + }) + if err != nil { + return nil, nil, 0, err + } + } + + var buckets []*stats.Bucket + for _, n := range nodes { + newBucket, ok, err := sc.GetBucket(ctx, n.HashOf(), keyBuilder) + if err != nil || !ok { + sc.descError(fmt.Sprintf("missing histogram bucket for node %s", n.HashOf().String()[:5]), err) + return nil, nil, 0, err + } + buckets = append(buckets, newBucket) + } + + return buckets, lowerBound, writes, nil +} + +func (sc *StatsController) updateTable(ctx *sql.Context, newStats *rootStats, tableName string, sqlDb dsess.SqlDatabase, gcKv *memStats) error { + var err error + var sqlTable *sqle.DoltTable + var dTab *doltdb.Table + if err := sc.sq.DoSync(ctx, func() error { + sql.SessionCommandBegin(ctx.Session) + defer sql.SessionCommandEnd(ctx.Session) + sqlTable, dTab, err = GetLatestTable(ctx, tableName, sqlDb) + return err + }); err != nil { + return err + } + + schemaName := sqlTable.DatabaseSchema().SchemaName() + + tableKey := tableIndexesKey{ + db: strings.ToLower(sqlDb.AliasedName()), + branch: strings.ToLower(sqlDb.Revision()), + table: strings.ToLower(tableName), + schema: strings.ToLower(schemaName), + } + + tableHash, err := dTab.HashOf() + if err != nil { + return err + } + if gcKv == nil { + if stats, ok := sc.preexistingStats(tableKey, tableHash); ok { + newStats.stats[tableKey] = stats + newStats.hashes[tableKey] = tableHash + newStats.TablesSkipped++ + return nil + } + } + + var indexes []sql.Index + if err := sc.sq.DoSync(ctx, func() error { + sql.SessionCommandBegin(ctx.Session) + defer sql.SessionCommandEnd(ctx.Session) + indexes, err = sqlTable.GetIndexes(ctx) + return err + }); err != nil { + return err + } + + var newTableStats []*stats.Statistic + for _, sqlIdx := range indexes { + if sqlIdx.IsSpatial() || sqlIdx.IsFullText() || sqlIdx.IsGenerated() || sqlIdx.IsVector() { + continue + } + var idx durable.Index + var err error + var prollyMap prolly.Map + func() { + sql.SessionCommandBegin(ctx.Session) + defer sql.SessionCommandEnd(ctx.Session) + if strings.EqualFold(sqlIdx.ID(), "PRIMARY") { + idx, err = dTab.GetRowData(ctx) + } else { + idx, err = dTab.GetIndexRowData(ctx, sqlIdx.ID()) + } + if err == nil { + prollyMap, err = durable.ProllyMapFromIndex(idx) + } + }() + if err != nil { + sc.descError("GetRowData", err) + continue + } + + var template stats.Statistic + if err := sc.sq.DoSync(ctx, func() error { + sql.SessionCommandBegin(ctx.Session) + defer sql.SessionCommandEnd(ctx.Session) + _, template, err = sc.getTemplate(ctx, sqlTable, sqlIdx) + if err != nil { + return fmt.Errorf("stats collection failed to generate a statistic template: %s.%s.%s:%T; %s", sqlDb.RevisionQualifiedName(), tableName, sqlIdx, sqlIdx, err.Error()) + } + return nil + }); err != nil { + return err + } else if template.Fds.Empty() { + return fmt.Errorf("failed to creat template for %s/%s/%s/%s", sqlDb.Revision(), sqlDb.AliasedName(), tableName, sqlIdx.ID()) + } + + template.Qual.Database = sqlDb.AliasedName() + + idxLen := len(sqlIdx.Expressions()) + + var levelNodes []tree.Node + if err = sc.sq.DoSync(ctx, func() error { + sql.SessionCommandBegin(ctx.Session) + defer sql.SessionCommandEnd(ctx.Session) + levelNodes, err = tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt) + if err != nil { + sc.descError("get level", err) + } + return err + }); err != nil { + return err + } + var buckets []*stats.Bucket + var firstBound sql.Row + if len(levelNodes) > 0 { + var writes int + buckets, firstBound, writes, err = sc.collectIndexNodes(ctx, prollyMap, idxLen, levelNodes) + if err != nil { + sc.descError("", err) + continue + } + newStats.BucketWrites += writes + } + + newTableStats = append(newTableStats, sc.finalizeHistogram(template, buckets, firstBound)) + + if gcKv != nil { + keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(idxLen)) + if !gcKv.GcMark(sc.kv, levelNodes, buckets, idxLen, keyBuilder) { + return fmt.Errorf("GC interrupted updated") + } + if err := func() error { + sql.SessionCommandBegin(ctx.Session) + defer sql.SessionCommandEnd(ctx.Session) + schHash, _, err := sqlTable.IndexCacheKey(ctx) + if err != nil { + return err + } + key := templateCacheKey{h: schHash.Hash, idxName: sqlIdx.ID()} + if t, ok := sc.GetTemplate(key); ok { + gcKv.PutTemplate(key, t) + } + return nil + }(); err != nil { + return err + } + } + } + newStats.stats[tableKey] = newTableStats + newStats.hashes[tableKey] = tableHash + newStats.TablesProcessed++ + return nil +} + +// GetLatestTable will get the WORKING root table for the current database/branch +func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (*sqle.DoltTable, *doltdb.Table, error) { + var db sqle.Database + switch d := sqlDb.(type) { + case sqle.Database: + db = d + case sqle.ReadReplicaDatabase: + db = d.Database + default: + return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb) + } + sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName) + if err != nil { + return nil, nil, err + } + if !ok { + return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName) + } + + var dTab *doltdb.Table + var sqleTable *sqle.DoltTable + switch t := sqlTable.(type) { + case *sqle.AlterableDoltTable: + sqleTable = t.DoltTable + dTab, err = t.DoltTable.DoltTable(ctx) + case *sqle.WritableDoltTable: + sqleTable = t.DoltTable + dTab, err = t.DoltTable.DoltTable(ctx) + case *sqle.DoltTable: + sqleTable = t + dTab, err = t.DoltTable(ctx) + default: + err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable) + } + if err != nil { + return nil, nil, err + } + return sqleTable, dTab, nil +} + +type templateCacheKey struct { + h hash.Hash + idxName string +} + +func (k templateCacheKey) String() string { + return k.idxName + "/" + k.h.String()[:5] +} + +func (sc *StatsController) getTemplate(ctx *sql.Context, sqlTable *sqle.DoltTable, sqlIdx sql.Index) (templateCacheKey, stats.Statistic, error) { + schHash, _, err := sqlTable.IndexCacheKey(ctx) + if err != nil { + return templateCacheKey{}, stats.Statistic{}, err + } + key := templateCacheKey{h: schHash.Hash, idxName: sqlIdx.ID()} + if template, ok := sc.GetTemplate(key); ok { + return key, template, nil + } + fds, colset, err := stats.IndexFds(strings.ToLower(sqlTable.Name()), sqlTable.Schema(), sqlIdx) + if err != nil { + return templateCacheKey{}, stats.Statistic{}, err + } + + var class sql.IndexClass + switch { + case sqlIdx.IsSpatial(): + class = sql.IndexClassSpatial + case sqlIdx.IsFullText(): + class = sql.IndexClassFulltext + default: + class = sql.IndexClassDefault + } + + var types []sql.Type + for _, cet := range sqlIdx.ColumnExpressionTypes() { + types = append(types, cet.Type) + } + + // xxx: the lower here is load bearing, index comparison + // expects the expressions to be stripped of table name. + tablePrefix := strings.ToLower(sqlTable.Name()) + "." + cols := make([]string, len(sqlIdx.Expressions())) + for i, c := range sqlIdx.Expressions() { + cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix) + } + + template := stats.Statistic{ + Qual: sql.NewStatQualifier("", "", sqlTable.Name(), sqlIdx.ID()), + Cols: cols, + Typs: types, + IdxClass: uint8(class), + Fds: fds, + Colset: colset, + } + + // We put template twice, once for schema changes with no data + // changes (here), and once when we put chunks to avoid GC dropping + // templates before the finalize job. + sc.PutTemplate(key, template) + + return key, template, nil +} diff --git a/go/libraries/doltcore/sqle/statspro/worker_test.go b/go/libraries/doltcore/sqle/statspro/worker_test.go new file mode 100644 index 00000000000..42c27031edf --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/worker_test.go @@ -0,0 +1,1073 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "context" + "fmt" + "io" + "log" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + "testing" + "time" + + gms "github.com/dolthub/go-mysql-server" + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/analyzer" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/libraries/doltcore/branch_control" + "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils" + "github.com/dolthub/dolt/go/libraries/doltcore/env" + "github.com/dolthub/dolt/go/libraries/doltcore/ref" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer" +) + +func TestScheduleLoop(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + + { + // add more data + b := strings.Repeat("b", 100) + require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(100), key (b,a))")) + abIns := strings.Builder{} + abIns.WriteString("insert into ab values") + for i := range 200 { + if i > 0 { + abIns.WriteString(", ") + } + abIns.WriteString(fmt.Sprintf("(%d, '%s')", i, b)) + } + require.NoError(t, executeQuery(ctx, sqlEng, abIns.String())) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_restart()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_stop()")) + + // 4 old + 2*7 new ab + kv := sc.kv.(*memStats) + require.Equal(t, 18, len(kv.buckets)) + require.Equal(t, 4, len(kv.bounds)) + require.Equal(t, 4, len(kv.templates)) + require.Equal(t, 2, len(sc.Stats.stats)) + stat := sc.Stats.stats[tableIndexesKey{"mydb", "main", "ab", ""}] + require.Equal(t, 7, len(stat[0].Hist)) + require.Equal(t, 7, len(stat[1].Hist)) + } + + require.NoError(t, executeQuery(ctx, sqlEng, "drop table xy")) + + //doGcCycle(t, ctx, sc) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_restart()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_stop()")) + + kv := sc.kv.(*memStats) + require.Equal(t, 14, len(kv.buckets)) + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 2, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats.stats)) + stat := sc.Stats.stats[tableIndexesKey{"mydb", "main", "ab", ""}] + require.Equal(t, 2, len(stat)) + require.Equal(t, 7, len(stat[0].Hist)) + require.Equal(t, 7, len(stat[1].Hist)) +} + +func TestAnalyze(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + + require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (-1,-1)")) + + //require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_restart()")) + require.NoError(t, executeQuery(ctx, sqlEng, "analyze table xy")) + //require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + //require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_stop()")) + + si, err := sc.Info(ctx) + require.NoError(t, err) + kv := sc.kv.(*memStats) + require.Equal(t, 0, si.GcCnt) + require.Equal(t, 1, si.DbCnt) + require.Equal(t, false, si.Active) + require.Equal(t, 6, len(kv.buckets)) + require.Equal(t, 4, len(kv.bounds)) + require.Equal(t, 2, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats.stats)) + for _, tableStats := range sc.Stats.stats { + require.Equal(t, 2, len(tableStats)) + } +} + +func TestModifyColumn(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + sc.enableGc = false + { + runBlock(t, ctx, sqlEng, "alter table xy modify column y bigint") + + kv := sc.kv.(*memStats) + require.Equal(t, 10, len(kv.buckets)) + require.Equal(t, 4, len(kv.bounds)) + require.Equal(t, 4, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats.stats)) + stat := sc.Stats.stats[tableIndexesKey{"mydb", "main", "xy", ""}] + require.Equal(t, 4, len(stat[0].Hist)) + require.Equal(t, 2, len(stat[1].Hist)) + + runBlock(t, ctx, sqlEng, "call dolt_stats_gc()") + require.Equal(t, 6, sc.Len()) + } +} + +func TestAddColumn(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + sc.enableGc = false + + runBlock(t, ctx, sqlEng, + "alter table xy add column z int", + ) + + kv := sc.kv.(*memStats) + require.Equal(t, 4, len(kv.buckets)) + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 4, len(kv.templates)) // +2 for new schema + require.Equal(t, 1, len(sc.Stats.stats)) + stat := sc.Stats.stats[tableIndexesKey{"mydb", "main", "xy", ""}] + require.Equal(t, 2, len(stat[0].Hist)) + require.Equal(t, 2, len(stat[1].Hist)) +} + +func TestDropIndex(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + sc.enableGc = false + + runBlock(t, ctx, sqlEng, + "alter table xy drop index y", + ) + + kv := sc.kv.(*memStats) + require.Equal(t, 4, len(kv.buckets)) + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 3, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats.stats)) + stat := sc.Stats.stats[tableIndexesKey{"mydb", "main", "xy", ""}] + require.Equal(t, 1, len(stat)) + require.Equal(t, 2, len(stat[0].Hist)) + + runBlock(t, ctx, sqlEng, "call dolt_stats_gc()") + + kv = sc.kv.(*memStats) + require.Equal(t, 2, len(kv.buckets)) + require.Equal(t, 1, len(kv.bounds)) + require.Equal(t, 1, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats.stats)) + stat = sc.Stats.stats[tableIndexesKey{"mydb", "main", "xy", ""}] + require.Equal(t, 1, len(stat)) + require.Equal(t, 2, len(stat[0].Hist)) +} + +func TestDropTable(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + sc.enableGc = false + + runBlock(t, ctx, sqlEng, + "create table ab (a int primary key, b int)", + "insert into ab values (0,0)", + "drop table xy", + ) + + kv := sc.kv.(*memStats) + require.Equal(t, 5, len(kv.buckets)) + require.Equal(t, 3, len(kv.bounds)) + require.Equal(t, 3, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats.stats)) + stat := sc.Stats.stats[tableIndexesKey{"mydb", "main", "ab", ""}] + require.Equal(t, 1, len(stat)) + require.Equal(t, 1, len(stat[0].Hist)) + + runBlock(t, ctx, sqlEng, "call dolt_stats_gc()") + + kv = sc.kv.(*memStats) + require.Equal(t, 1, len(kv.buckets)) + require.Equal(t, 1, len(kv.bounds)) + require.Equal(t, 1, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats.stats)) + stat = sc.Stats.stats[tableIndexesKey{"mydb", "main", "ab", ""}] + require.Equal(t, 1, len(stat)) + require.Equal(t, 1, len(stat[0].Hist)) +} + +func TestDeleteAboveBoundary(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + sc.enableGc = false + + runBlock(t, ctx, sqlEng, + "alter table xy drop index y", + "delete from xy where x > 498", + "call dolt_stats_wait()", + ) + + kv := sc.kv.(*memStats) + require.Equal(t, 5, len(kv.buckets)) // 1 for new chunk + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 3, len(kv.templates)) // +1 for schema change + require.Equal(t, 1, len(sc.Stats.stats)) + stat := sc.Stats.stats[tableIndexesKey{db: "mydb", branch: "main", table: "xy"}] + require.Equal(t, 2, len(stat[0].Hist)) + + runBlock(t, ctx, sqlEng, "call dolt_stats_gc()") + + require.Equal(t, 2, sc.Len()) +} + +func TestDeleteBelowBoundary(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + sc.enableGc = false + + runBlock(t, ctx, sqlEng, + "alter table xy drop index y", + "delete from xy where x > 410", + "call dolt_stats_wait()", + ) + + kv := sc.kv.(*memStats) + + require.Equal(t, 5, len(kv.buckets)) // +1 rewrite partial chunk + require.Equal(t, 3, len(kv.bounds)) // +1 rewrite first chunk + require.Equal(t, 3, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats.stats)) + stat := sc.Stats.stats[tableIndexesKey{db: "mydb", branch: "main", table: "xy"}] + require.Equal(t, 1, len(stat[0].Hist)) + + runBlock(t, ctx, sqlEng, "call dolt_stats_gc()") + + require.Equal(t, 1, sc.Len()) + +} + +func TestDeleteOnBoundary(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + sc.enableGc = false + + runBlock(t, ctx, sqlEng, + "alter table xy drop index y", + // PRIMARY boundary chunk -> rewrite y_idx's second + "delete from xy where x > 414", + ) + + kv := sc.kv.(*memStats) + require.Equal(t, 4, len(kv.buckets)) + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 3, len(kv.templates)) // +1 schema change + require.Equal(t, 1, len(sc.Stats.stats)) + stat := sc.Stats.stats[tableIndexesKey{db: "mydb", branch: "main", table: "xy"}] + require.Equal(t, 1, len(stat[0].Hist)) + + runBlock(t, ctx, sqlEng, "call dolt_stats_gc()") + + require.Equal(t, 1, sc.Len()) +} + +func TestAddDropDatabases(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + sc.enableGc = false + + { + runBlock(t, ctx, sqlEng, + "create database otherdb", + "use otherdb", + "create table t (i int primary key)", + "insert into t values (0), (1)", + "call dolt_stats_wait()", + ) + + // xy and t + kv := sc.kv.(*memStats) + require.Equal(t, 5, len(kv.buckets)) + require.Equal(t, 3, len(kv.bounds)) + require.Equal(t, 3, len(kv.templates)) + require.Equal(t, 2, len(sc.Stats.stats)) + stat := sc.Stats.stats[tableIndexesKey{db: "otherdb", branch: "main", table: "t"}] + require.Equal(t, 1, len(stat)) + } + + { + runBlock(t, ctx, sqlEng, "drop database otherdb") + _, ok := sc.Stats.stats[tableIndexesKey{db: "otherdb", branch: "main", table: "t"}] + require.False(t, ok) + } +} + +func TestGC(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + + { + runBlock(t, ctx, sqlEng, + "create database otherdb", + "use otherdb", + "create table t (i int primary key)", + "insert into t values (0), (1)", + + "create database thirddb", + "use thirddb", + "create table s (i int primary key, j int, key (j))", + "insert into s values (0,0), (1,1), (2,2)", + ) + + kv := sc.kv.(*memStats) + require.Equal(t, 3, sc.Stats.DbCnt) + + runBlock(t, ctx, sqlEng, + "drop database otherdb", + "alter table s drop index j", + "call dolt_stats_gc()", + ) + + // test for cleanup + require.Equal(t, sc.Stats.DbCnt, 2) + + kv = sc.kv.(*memStats) + require.Equal(t, 5, len(kv.buckets)) + require.Equal(t, 3, len(kv.bounds)) + require.Equal(t, 3, len(kv.templates)) + require.Equal(t, 2, len(sc.Stats.stats)) + } +} + +func TestBranches(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + sc.enableGc = true + { + runBlock(t, ctx, sqlEng, + "call dolt_commit('-Am', 'add xy')", + "create database otherdb", + "use otherdb", + "create table t (i int primary key)", + "insert into t values (0), (1)", + "call dolt_commit('-Am', 'add t')", + + "create database thirddb", + "use thirddb", + "create table s (i int primary key, j int, key (j))", + "insert into s values (0,0), (1,1), (2,2)", + "call dolt_commit('-Am', 'add s')", + ) + + require.Equal(t, sc.Stats.DbCnt, 3) + + stat, ok := sc.Stats.stats[tableIndexesKey{"otherdb", "feat2", "t", ""}] + require.False(t, ok) + stat, ok = sc.Stats.stats[tableIndexesKey{"otherdb", "feat3", "t", ""}] + require.False(t, ok) + stat, ok = sc.Stats.stats[tableIndexesKey{"otherdb", "main", "t", ""}] + require.Equal(t, 1, len(stat)) + stat = sc.Stats.stats[tableIndexesKey{"thirddb", "main", "s", ""}] + require.Equal(t, 2, len(stat)) + + runBlock(t, ctx, sqlEng, + "use mydb", + "call dolt_checkout('-b', 'feat1')", + + "use otherdb", + "call dolt_checkout('-b', 'feat2')", + "insert into t values (2), (3)", + "call dolt_commit('-Am', 'insert into t')", + "call dolt_checkout('-b', 'feat3')", + "drop table t", + "call dolt_commit('-Am', 'drop t')", + + "use thirddb", + "call dolt_checkout('-b', 'feat1')", + "alter table s drop index j", + "call dolt_commit('-Am', 'drop index j')", + ) + // mydb: main, feat1 + // otherdb: main, feat2, feat3 + // thirddb: main, feat1 + require.Equal(t, sc.Stats.DbCnt, 7) + + stat, ok = sc.Stats.stats[tableIndexesKey{"mydb", "feat1", "xy", ""}] + require.True(t, ok) + require.Equal(t, 2, len(stat)) + stat, ok = sc.Stats.stats[tableIndexesKey{"otherdb", "feat2", "t", ""}] + require.True(t, ok) + require.Equal(t, 1, len(stat)) + stat, ok = sc.Stats.stats[tableIndexesKey{"otherdb", "feat3", "t", ""}] + require.False(t, ok) + stat, ok = sc.Stats.stats[tableIndexesKey{"thirddb", "feat1", "s", ""}] + require.True(t, ok) + require.Equal(t, 1, len(stat)) + + // mydb: 4 shared + // otherdb: 1 + 1 + // thirddb: 2 + shared + kv := sc.kv.(*memStats) + require.Equal(t, 4+2+2, len(kv.buckets)) + require.Equal(t, 2+(1+1)+2, len(kv.bounds)) + require.Equal(t, 2+1+(2+1), len(kv.templates)) + require.Equal(t, 7-1, len(sc.Stats.stats)) + + runBlock(t, ctx, sqlEng, + "drop database otherdb", + ) + // mydb: main, feat1 + // thirddb: main, feat1 + require.Equal(t, 4, sc.Stats.DbCnt) + + stat, ok = sc.Stats.stats[tableIndexesKey{"otherdb", "feat2", "t", ""}] + require.False(t, ok) + stat, ok = sc.Stats.stats[tableIndexesKey{"otherdb", "main", "t", ""}] + require.False(t, ok) + + runBlock(t, ctx, sqlEng, + "use mydb", + "call dolt_checkout('main')", + "call dolt_branch('-D', 'feat1')", + ) + // mydb: main + // thirddb: main, feat1 + require.Equal(t, sc.Stats.DbCnt, 3) + + stat, ok = sc.Stats.stats[tableIndexesKey{"mydb", "feat1", "xy", ""}] + require.False(t, ok) + stat, ok = sc.Stats.stats[tableIndexesKey{"mydb", "main", "xy", ""}] + require.True(t, ok) + + runBlock(t, ctx, sqlEng, "call dolt_stats_gc()") + + // 3 dbs remaining, mydb/main, thirddb/feat1, thirddb/main + kv = sc.kv.(*memStats) + require.Equal(t, 4+2, len(kv.buckets)) + require.Equal(t, 4, len(kv.bounds)) + require.Equal(t, 5, len(kv.templates)) + require.Equal(t, 3, len(sc.Stats.stats)) + } +} + +func runBlock(t *testing.T, ctx *sql.Context, sqlEng *gms.Engine, qs ...string) { + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_restart()")) + for _, q := range qs { + require.NoError(t, executeQuery(ctx, sqlEng, q)) + } + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_stop()")) +} + +func TestBucketCounting(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true, false) + sc.enableGc = false + + // add more data + b := strings.Repeat("b", 100) + require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(100), key (b,a))")) + abIns := strings.Builder{} + abIns.WriteString("insert into ab values") + for i := range 200 { + if i > 0 { + abIns.WriteString(", ") + } + abIns.WriteString(fmt.Sprintf("(%d, '%s')", i, b)) + } + runBlock(t, ctx, sqlEng, abIns.String()) + + // 4 old + 2*7 new ab + kv := sc.kv.(*memStats) + require.Equal(t, 18, len(kv.buckets)) + require.Equal(t, 2, len(sc.Stats.stats)) + + runBlock(t, ctx, sqlEng, + "create table cd (c int primary key, d varchar(200), key (d,c))", + "insert into cd select a,b from ab", + ) + + // no new buckets + kv = sc.kv.(*memStats) + require.Equal(t, 18, len(kv.buckets)) + require.Equal(t, 3, len(sc.Stats.stats)) +} + +func TestDropOnlyDb(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, false, false) + require.NoError(t, sc.Restart()) + + _, ok := sc.kv.(*prollyStats) + require.True(t, ok) + statsPath, err := sc.statsBackingDb.Abs("") + require.NoError(t, err) + require.Equal(t, "mydb", filepath.Base(statsPath)) + + // what happens when we drop the only database? swap to memory? + // add first database, switch to prolly? + runBlock(t, ctx, sqlEng, "drop database mydb") + + sc.Stop() + + // empty memory KV + _, ok = sc.kv.(*memStats) + require.True(t, ok) + require.Equal(t, nil, sc.statsBackingDb) + + require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb")) + + // empty prollyKv + _, ok = sc.kv.(*prollyStats) + require.True(t, ok) + statsPath, err = sc.statsBackingDb.Abs("") + require.NoError(t, err) + require.Equal(t, "otherdb", filepath.Base(statsPath)) +} + +func TestRotateBackingDb(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, false, false) + + runBlock(t, ctx, sqlEng, "create database backupdb", + "use backupdb", + "create table xy (x int primary key, y int)", + "insert into xy values (0,0), (1,1), (2,2)", + ) + + require.Equal(t, 5, sc.kv.Len()) + require.Equal(t, 2, len(sc.Stats.stats)) + + runBlock(t, ctx, sqlEng, "drop database mydb") + + _, ok := sc.kv.(*prollyStats) + require.True(t, ok) + statsPath, err := sc.statsBackingDb.Abs("") + require.NoError(t, err) + require.Equal(t, "backupdb", filepath.Base(statsPath)) + + // lost the backing storage, previous in-memory moves into new kv + require.Equal(t, 5, sc.kv.Len()) + require.Equal(t, 1, len(sc.Stats.stats)) + +} + +func TestPanic(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := emptySetup(t, threads, false, true) + sc.SetEnableGc(true) + + require.NoError(t, sc.Restart()) + + sc.sq.DoSync(ctx, func() error { + panic("test panic") + }) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) +} + +func TestMemoryOnly(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := emptySetup(t, threads, true, true) + sc.SetEnableGc(false) + + require.NoError(t, sc.Restart()) + + runBlock(t, ctx, sqlEng, "create database otherdb", + "create table xy (x int primary key, y int)", + "insert into xy values (0,0), (1,1), (2,2)", + "call dolt_stats_wait()", + ) + + _, ok := sc.kv.(*memStats) + require.True(t, ok, "expected *memStats") +} + +func newStatsCoord(bthreads *sql.BackgroundThreads) *StatsController { + dEnv := dtestutils.CreateTestEnv() + sqlEng, ctx := newTestEngine(context.Background(), dEnv, bthreads) + ctx.Session.SetClient(sql.Client{ + User: "billy boy", + Address: "bigbillie@fake.horse", + }) + + sql.SystemVariables.AssignValues(map[string]interface{}{ + dsess.DoltStatsGCInterval: 100, + dsess.DoltStatsJobInterval: 1, + }) + + return sqlEng.Analyzer.Catalog.StatsProvider.(*StatsController) +} + +func emptySetup(t *testing.T, threads *sql.BackgroundThreads, memOnly bool, gcEnabled bool) (*sql.Context, *gms.Engine, *StatsController) { + dEnv := dtestutils.CreateTestEnv() + sqlEng, ctx := newTestEngine(context.Background(), dEnv, threads) + ctx.Session.SetClient(sql.Client{ + User: "billy boy", + Address: "bigbillie@fake.horse", + }) + + sql.SystemVariables.AssignValues(map[string]interface{}{ + dsess.DoltStatsGCInterval: 100, + dsess.DoltStatsJobInterval: 1, + }) + if memOnly { + sql.SystemVariables.AssignValues(map[string]interface{}{ + dsess.DoltStatsMemoryOnly: int8(1), + }) + } else { + sql.SystemVariables.AssignValues(map[string]interface{}{ + dsess.DoltStatsMemoryOnly: int8(0), + }) + } + if gcEnabled { + sql.SystemVariables.AssignValues(map[string]interface{}{ + dsess.DoltStatsGCEnabled: int8(1), + }) + } else { + sql.SystemVariables.AssignValues(map[string]interface{}{ + dsess.DoltStatsGCEnabled: int8(0), + }) + } + + sc := sqlEng.Analyzer.Catalog.StatsProvider.(*StatsController) + sc.SetEnableGc(false) + sc.JobInterval = time.Nanosecond + + require.NoError(t, sc.Restart()) + + ctx, _ = sc.ctxGen(ctx) + ctx.Session.SetClient(sql.Client{ + User: "billy boy", + Address: "bigbillie@fake.horse", + }) + require.NoError(t, executeQuery(ctx, sqlEng, "create database mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + sc.Stop() + + var sqlDbs []sqle.Database + for _, db := range sqlEng.Analyzer.Catalog.DbProvider.AllDatabases(ctx) { + if sqlDb, ok := db.(sqle.Database); ok { + branch := ref.NewBranchRef("main") + db, err := sqle.RevisionDbForBranch(ctx, sqlDb, branch.GetPath(), branch.GetPath()+"/"+sqlDb.AliasedName()) + require.NoError(t, err) + sqlDbs = append(sqlDbs, db.(sqle.Database)) + } + } + + if memOnly { + statsKv := NewMemStats() + sc.kv = statsKv + } + + return ctx, sqlEng, sc +} + +func defaultSetup(t *testing.T, threads *sql.BackgroundThreads, memOnly bool, gcEnabled bool) (*sql.Context, *gms.Engine, *StatsController) { + ctx, sqlEng, sc := emptySetup(t, threads, memOnly, gcEnabled) + //sc.Debug = true + + require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int, key (y,x))")) + + xyIns := strings.Builder{} + xyIns.WriteString("insert into xy values") + for i := range 500 { + if i > 0 { + xyIns.WriteString(", ") + } + xyIns.WriteString(fmt.Sprintf("(%d, %d)", i, i%25)) + } + require.NoError(t, executeQuery(ctx, sqlEng, xyIns.String())) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_restart()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_stop()")) + + var kv *memStats + switch s := sc.kv.(type) { + case *memStats: + kv = s + case *prollyStats: + kv = s.mem + } + require.Equal(t, 4, len(kv.buckets)) + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 2, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats.stats)) + for _, tableStats := range sc.Stats.stats { + require.Equal(t, 2, len(tableStats)) + } + + switch s := sc.kv.(type) { + case *memStats: + kv = s + case *prollyStats: + kv = s.mem + } + require.Equal(t, 4, len(kv.buckets)) + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 2, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats.stats)) + for _, tableStats := range sc.Stats.stats { + require.Equal(t, 2, len(tableStats)) + } + + return ctx, sqlEng, sc +} + +func executeQuery(ctx *sql.Context, eng *gms.Engine, query string) error { + _, iter, _, err := eng.Query(ctx, query) + if err != nil { + return err + } + for { + _, err = iter.Next(ctx) + if err == io.EOF { + break + } + if err != nil { + return err + } + } + return iter.Close(ctx) // tx commit +} + +func executeQueryResults(ctx *sql.Context, eng *gms.Engine, query string) ([]sql.Row, error) { + _, iter, _, err := eng.Query(ctx, query) + if err != nil { + return nil, err + } + var ret []sql.Row + for { + r, err := iter.Next(ctx) + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + ret = append(ret, r) + } + return ret, iter.Close(ctx) // tx commit +} + +func newTestEngine(ctx context.Context, dEnv *env.DoltEnv, threads *sql.BackgroundThreads) (*gms.Engine, *sql.Context) { + pro, err := sqle.NewDoltDatabaseProviderWithDatabases("main", dEnv.FS, nil, nil) + if err != nil { + panic(err) + } + + mrEnv, err := env.MultiEnvForDirectory(ctx, dEnv.Config.WriteableConfig(), dEnv.FS, dEnv.Version, dEnv) + if err != nil { + panic(err) + } + + sc := NewStatsController(logrus.StandardLogger(), dEnv) + + gcSafepointController := dsess.NewGCSafepointController() + + doltSession, err := dsess.NewDoltSession(sql.NewBaseSession(), pro, dEnv.Config.WriteableConfig(), branch_control.CreateDefaultController(ctx), sc, writer.NewWriteSession, gcSafepointController) + if err != nil { + panic(err) + } + + sqlCtx := sql.NewContext(ctx, sql.WithSession(doltSession)) + sqlCtx.SetCurrentDatabase(mrEnv.GetFirstDatabase()) + + ctxGen := func(ctx context.Context) (*sql.Context, error) { + doltSession, err := dsess.NewDoltSession(sql.NewBaseSession(), pro, dEnv.Config.WriteableConfig(), branch_control.CreateDefaultController(ctx), sc, writer.NewWriteSession, gcSafepointController) + if err != nil { + return nil, err + } + return sql.NewContext(ctx, sql.WithSession(doltSession)), nil + } + + pro.InitDatabaseHooks = append(pro.InitDatabaseHooks, NewInitDatabaseHook(sc)) + pro.DropDatabaseHooks = append(pro.DropDatabaseHooks, NewDropDatabaseHook(sc)) + + sqlEng := gms.New(analyzer.NewBuilder(pro).Build(), &gms.Config{ + IsReadOnly: false, + IsServerLocked: false, + }) + + if err := sc.Init(sqlCtx, pro, ctxGen, threads, pro.AllDatabases(sqlCtx)); err != nil { + log.Fatal(err) + } + sqlEng.Analyzer.Catalog.StatsProvider = sc + return sqlEng, sqlCtx +} + +func TestStatsGcConcurrency(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := emptySetup(t, threads, false, true) + sc.SetEnableGc(true) + sc.JobInterval = 1 * time.Nanosecond + sc.gcInterval = 100 * time.Nanosecond + require.NoError(t, sc.Restart()) + + addDb := func(ctx *sql.Context, dbName string) { + require.NoError(t, executeQuery(ctx, sqlEng, "create database "+dbName)) + } + + addData := func(ctx *sql.Context, dbName string, i int) { + //log.Println("add ", dbName) + require.NoError(t, executeQuery(ctx, sqlEng, "use "+dbName)) + require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5), (6,"+strconv.Itoa(i)+")")) + } + + dropDb := func(dropCtx *sql.Context, dbName string) { + //log.Println("drop ", dbName) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "drop database "+dbName)) + } + + // it is important to use new sessions for this test, to avoid working root conflicts + addCtx, _ := sc.ctxGen(context.Background()) + writeCtx, _ := sc.ctxGen(context.Background()) + dropCtx, _ := sc.ctxGen(context.Background()) + + iters := 100 + if os.Getenv("CI") != "" { + iters = 20 + } + dbs := make(chan string, iters) + + { + wg := sync.WaitGroup{} + wg.Add(2) + + addCnt := 0 + go func() { + for i := range iters { + addCnt++ + dbName := "db" + strconv.Itoa(i) + addDb(addCtx, dbName) + addData(writeCtx, dbName, i) + dbs <- dbName + } + close(dbs) + wg.Done() + }() + + dropCnt := 0 + go func() { + i := 0 + for db := range dbs { + if i%2 == 0 { + time.Sleep(50 * time.Millisecond) + dropCnt++ + dropDb(dropCtx, db) + } + i++ + } + wg.Done() + }() + + wg.Wait() + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + + sc.Stop() + + // 101 dbs, 100 with stats (not main) + require.Equal(t, iters/2, len(sc.Stats.stats)) + //require.NoError(t, sc.ValidateState(ctx)) + require.Equal(t, iters/2, sc.kv.Len()) + } +} + +func TestStatsBranchConcurrency(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := emptySetup(t, threads, false, false) + + sc.JobInterval = 1 + sc.gcInterval = time.Hour + require.NoError(t, sc.Restart()) + + addBranch := func(ctx *sql.Context, i int) { + branchName := "branch" + strconv.Itoa(i) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('main')")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', '"+branchName+"')")) + } + + addData := func(ctx *sql.Context, i int) { + branchName := "branch" + strconv.Itoa(i) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('"+branchName+"')")) + require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5), (6,"+strconv.Itoa(i)+")")) + //require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + } + + dropBranch := func(dropCtx *sql.Context, branchName string) { + //log.Println("delete branch: ", branchName) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + del := "call dolt_branch('-d', '" + branchName + "')" + require.NoError(t, executeQuery(ctx, sqlEng, del)) + } + + // it is important to use new sessions for this test, to avoid working root conflicts + addCtx, _ := sc.ctxGen(context.Background()) + dropCtx, _ := sc.ctxGen(context.Background()) + + iters := 100 + if os.Getenv("CI") != "" { + iters = 20 + } + { + branches := make(chan string, iters) + + wg := sync.WaitGroup{} + wg.Add(2) + + go func() { + for i := range iters { + addBranch(addCtx, i) + addData(addCtx, i) + branches <- "branch" + strconv.Itoa(i) + } + close(branches) + wg.Done() + }() + + go func() { + i := 0 + for br := range branches { + if i%2 == 0 { + dropBranch(dropCtx, br) + time.Sleep(50 * time.Microsecond) + } + i++ + } + wg.Done() + }() + + wg.Wait() + + err := executeQuery(ctx, sqlEng, "call dolt_stats_wait()") + require.NoError(t, err) + + err = executeQuery(ctx, sqlEng, "call dolt_stats_gc()") + require.NoError(t, err) + sc.Stop() + + // at the end we should still have |iters/2| databases + require.Equal(t, iters/2, len(sc.Stats.stats)) + //require.NoError(t, sc.ValidateState(ctx)) + require.Equal(t, iters/2, sc.kv.Len()) + } +} + +func TestStatsCacheGrowth(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := emptySetup(t, threads, false, true) + sc.SetEnableGc(true) + + sc.JobInterval = 1 + sc.gcInterval = time.Hour + require.NoError(t, sc.Restart()) + + addBranch := func(ctx *sql.Context, i int) { + branchName := "branch" + strconv.Itoa(i) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('main')")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', '"+branchName+"')")) + } + + addData := func(ctx *sql.Context, i int) { + branchName := "branch" + strconv.Itoa(i) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('"+branchName+"')")) + require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5), (6,"+strconv.Itoa(i)+")")) + + } + + iters := 2000 + if os.Getenv("CI") != "" { + iters = 20 + } + { + branches := make(chan string, iters) + + go func() { + addCtx, _ := sc.ctxGen(context.Background()) + for i := range iters { + addBranch(addCtx, i) + addData(addCtx, i) + branches <- "branch" + strconv.Itoa(i) + if i%500 == 0 { + log.Println("branches: ", strconv.Itoa(i)) + require.NoError(t, executeQuery(addCtx, sqlEng, "call dolt_stats_wait()")) + } + } + close(branches) + }() + + i := 0 + for _ = range branches { + i++ + } + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + + sc.Stop() + + // at the end we should still have |iters/2| databases + require.Equal(t, iters, len(sc.Stats.stats)) + //require.NoError(t, sc.ValidateState(ctx)) + require.Equal(t, iters, sc.kv.Len()) + } +} diff --git a/go/libraries/doltcore/sqle/system_variables.go b/go/libraries/doltcore/sqle/system_variables.go index 99e6c2f5a9b..c7777dd18cd 100644 --- a/go/libraries/doltcore/sqle/system_variables.go +++ b/go/libraries/doltcore/sqle/system_variables.go @@ -16,6 +16,7 @@ package sqle import ( "math" + "time" "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/types" @@ -219,18 +220,18 @@ var DoltSystemVariables = []sql.SystemVariable{ Default: int8(1), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsAutoRefreshEnabled, + Name: dsess.DoltStatsEnabled, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled), - Default: int8(0), + Type: types.NewSystemBoolType(dsess.DoltStatsEnabled), + Default: int8(1), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsBootstrapEnabled, + Name: dsess.DoltStatsPaused, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled), - Default: int8(0), + Type: types.NewSystemBoolType(dsess.DoltStatsPaused), + Default: int8(1), }, &sql.MysqlSystemVariable{ Name: dsess.DoltStatsMemoryOnly, @@ -240,18 +241,25 @@ var DoltSystemVariables = []sql.SystemVariable{ Default: int8(0), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsAutoRefreshThreshold, + Name: dsess.DoltStatsJobInterval, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemDoubleType(dsess.DoltStatsAutoRefreshThreshold, 0, 10), - Default: float64(.5), + Type: types.NewSystemIntType(dsess.DoltStatsJobInterval, 0, math.MaxInt, false), + Default: int64(30 * time.Millisecond / time.Millisecond), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsAutoRefreshInterval, + Name: dsess.DoltStatsGCInterval, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false), - Default: 600, + Type: types.NewSystemIntType(dsess.DoltStatsGCInterval, 0, math.MaxInt, false), + Default: int64(time.Hour / time.Millisecond), + }, + &sql.MysqlSystemVariable{ + Name: dsess.DoltStatsGCEnabled, + Dynamic: true, + Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), + Type: types.NewSystemBoolType(dsess.DoltStatsGCEnabled), + Default: int8(1), }, &sql.MysqlSystemVariable{ Name: dsess.DoltStatsBranches, @@ -446,39 +454,46 @@ func AddDoltSystemVariables() { Default: int8(0), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsAutoRefreshEnabled, + Name: dsess.DoltStatsEnabled, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled), - Default: int8(0), + Type: types.NewSystemBoolType(dsess.DoltStatsEnabled), + Default: int8(1), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsBootstrapEnabled, + Name: dsess.DoltStatsPaused, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled), - Default: int8(0), + Type: types.NewSystemBoolType(dsess.DoltStatsPaused), + Default: int8(1), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsMemoryOnly, + Name: dsess.DoltStatsGCInterval, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly), - Default: int8(0), + Type: types.NewSystemIntType(dsess.DoltStatsGCInterval, 0, math.MaxInt, false), + Default: int64(time.Hour / time.Millisecond), + }, + &sql.MysqlSystemVariable{ + Name: dsess.DoltStatsGCEnabled, + Dynamic: true, + Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), + Type: types.NewSystemBoolType(dsess.DoltStatsGCEnabled), + Default: int8(1), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsAutoRefreshThreshold, + Name: dsess.DoltStatsJobInterval, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemDoubleType(dsess.DoltStatsAutoRefreshThreshold, 0, 10), - Default: float64(.5), + Type: types.NewSystemIntType(dsess.DoltStatsJobInterval, 0, math.MaxInt, false), + Default: int64(30 * time.Millisecond / time.Millisecond), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsAutoRefreshInterval, + Name: dsess.DoltStatsMemoryOnly, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false), - Default: 120, + Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly), + Default: int8(0), }, &sql.MysqlSystemVariable{ Name: dsess.DoltStatsBranches, diff --git a/go/libraries/doltcore/sqle/tables.go b/go/libraries/doltcore/sqle/tables.go index bd189a6295e..bf18408c27c 100644 --- a/go/libraries/doltcore/sqle/tables.go +++ b/go/libraries/doltcore/sqle/tables.go @@ -129,12 +129,12 @@ func (t *DoltTable) LookupForExpressions(ctx *sql.Context, exprs ...sql.Expressi return sql.IndexLookup{}, nil, nil, false, nil } - dbState, ok, err := sess.LookupDbState(ctx, t.db.Name()) + dbState, ok, err := sess.LookupDbState(ctx, t.db.AliasedName()) if err != nil { return sql.IndexLookup{}, nil, nil, false, nil } if !ok { - return sql.IndexLookup{}, nil, nil, false, fmt.Errorf("no state for database %s", t.db.Name()) + return sql.IndexLookup{}, nil, nil, false, fmt.Errorf("no state for database %s", t.db.AliasedName()) } var lookupCols []expression.LookupColumn diff --git a/go/libraries/doltcore/sqle/testutil.go b/go/libraries/doltcore/sqle/testutil.go index f961123e46b..11d35169906 100644 --- a/go/libraries/doltcore/sqle/testutil.go +++ b/go/libraries/doltcore/sqle/testutil.go @@ -517,7 +517,10 @@ func SqlRowsFromDurableIndex(idx durable.Index, sch schema.Schema) ([]sql.Row, e ctx := context.Background() var sqlRows []sql.Row if types.Format_Default == types.Format_DOLT { - rowData := durable.ProllyMapFromIndex(idx) + rowData, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return nil, err + } kd, vd := rowData.Descriptors() iter, err := rowData.IterAll(ctx) if err != nil { diff --git a/go/libraries/doltcore/sqle/user_space_database.go b/go/libraries/doltcore/sqle/user_space_database.go index e54c03b7eb3..c3689e13a61 100644 --- a/go/libraries/doltcore/sqle/user_space_database.go +++ b/go/libraries/doltcore/sqle/user_space_database.go @@ -141,6 +141,10 @@ func (db *UserSpaceDatabase) RequestedName() string { return db.Name() } +func (db *UserSpaceDatabase) AliasedName() string { + return db.Name() +} + func (db *UserSpaceDatabase) GetSchema(ctx *sql.Context, schemaName string) (sql.DatabaseSchema, bool, error) { panic(fmt.Sprintf("GetSchema is not implemented for database %T", db)) } diff --git a/go/libraries/doltcore/sqle/writer/prolly_index_writer.go b/go/libraries/doltcore/sqle/writer/prolly_index_writer.go index 6e2ede1011b..7c67b30a286 100644 --- a/go/libraries/doltcore/sqle/writer/prolly_index_writer.go +++ b/go/libraries/doltcore/sqle/writer/prolly_index_writer.go @@ -36,7 +36,10 @@ func getPrimaryProllyWriter(ctx context.Context, t *doltdb.Table, schState *dses return prollyIndexWriter{}, err } - m := durable.ProllyMapFromIndex(idx) + m, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return prollyIndexWriter{}, err + } keyDesc, valDesc := m.Descriptors() @@ -55,7 +58,10 @@ func getPrimaryKeylessProllyWriter(ctx context.Context, t *doltdb.Table, schStat return prollyKeylessWriter{}, err } - m := durable.ProllyMapFromIndex(idx) + m, err := durable.ProllyMapFromIndex(idx) + if err != nil { + return prollyKeylessWriter{}, err + } keyDesc, valDesc := m.Descriptors() diff --git a/go/libraries/doltcore/sqle/writer/prolly_table_writer.go b/go/libraries/doltcore/sqle/writer/prolly_table_writer.go index d7f05c14532..f63044fe251 100644 --- a/go/libraries/doltcore/sqle/writer/prolly_table_writer.go +++ b/go/libraries/doltcore/sqle/writer/prolly_table_writer.go @@ -116,7 +116,10 @@ func getSecondaryKeylessProllyWriters(ctx context.Context, t *doltdb.Table, schS if err != nil { return nil, err } - m := durable.ProllyMapFromIndex(idxRows) + m, err := durable.ProllyMapFromIndex(idxRows) + if err != nil { + return nil, err + } keyDesc, _ := m.Descriptors() diff --git a/go/libraries/doltcore/table/editor/creation/external_build_index.go b/go/libraries/doltcore/table/editor/creation/external_build_index.go index 07faf56c101..f279ffa01bd 100644 --- a/go/libraries/doltcore/table/editor/creation/external_build_index.go +++ b/go/libraries/doltcore/table/editor/creation/external_build_index.go @@ -102,7 +102,10 @@ func BuildProllyIndexExternal(ctx *sql.Context, vrw types.ValueReadWriter, ns tr defer it.Close() empty, err := durable.NewEmptyIndexFromTableSchema(ctx, vrw, ns, idx, sch) - secondary := durable.ProllyMapFromIndex(empty) + secondary, err := durable.ProllyMapFromIndex(empty) + if err != nil { + return nil, err + } tupIter := &tupleIterWithCb{iter: it, prefixDesc: prefixDesc, uniqCb: uniqCb} ret, err := prolly.MutateMapWithTupleIter(ctx, secondary, tupIter) diff --git a/go/libraries/doltcore/table/editor/creation/index.go b/go/libraries/doltcore/table/editor/creation/index.go index 489aee993a5..10019d5a8ee 100644 --- a/go/libraries/doltcore/table/editor/creation/index.go +++ b/go/libraries/doltcore/table/editor/creation/index.go @@ -150,7 +150,11 @@ func BuildSecondaryIndex(ctx *sql.Context, tbl *doltdb.Table, idx schema.Index, if err != nil { return nil, err } - primary := durable.ProllyMapFromIndex(m) + primary, err := durable.ProllyMapFromIndex(m) + if err != nil { + return nil, err + } + return BuildSecondaryProllyIndex(ctx, tbl.ValueReadWriter(), tbl.NodeStore(), sch, tableName, idx, primary) default: @@ -218,7 +222,10 @@ func BuildUniqueProllyIndex( if err != nil { return nil, err } - secondary := durable.ProllyMapFromIndex(empty) + secondary, err := durable.ProllyMapFromIndex(empty) + if err != nil { + return nil, err + } iter, err := primary.IterAll(ctx) if err != nil { diff --git a/go/libraries/doltcore/remotestorage/internal/circular/buff.go b/go/libraries/utils/circular/buff.go similarity index 90% rename from go/libraries/doltcore/remotestorage/internal/circular/buff.go rename to go/libraries/utils/circular/buff.go index 2a5ba8866d1..36632a88085 100644 --- a/go/libraries/doltcore/remotestorage/internal/circular/buff.go +++ b/go/libraries/utils/circular/buff.go @@ -34,12 +34,20 @@ func (b *Buff[T]) Len() int { return b.len } +func (b *Buff[T]) Cap() int { + return cap(b.arr) +} + func (b *Buff[T]) At(i int) T { + return *b.at(i) +} + +func (b *Buff[T]) at(i int) *T { if i >= b.Len() { panic("At on Buff too small") } j := (b.front + i) % len(b.arr) - return b.arr[j] + return &b.arr[j] } func (b *Buff[T]) Front() T { @@ -50,6 +58,9 @@ func (b *Buff[T]) Pop() { if b.Len() == 0 { panic("Pop empty Buff") } + // Don't leak entries... + var empty T + *b.at(0) = empty b.front = (b.front + 1) % len(b.arr) b.len -= 1 } diff --git a/go/libraries/doltcore/remotestorage/internal/circular/buff_test.go b/go/libraries/utils/circular/buff_test.go similarity index 100% rename from go/libraries/doltcore/remotestorage/internal/circular/buff_test.go rename to go/libraries/utils/circular/buff_test.go diff --git a/go/libraries/utils/valctx/valctx.go b/go/libraries/utils/valctx/valctx.go new file mode 100644 index 00000000000..c7b8431e0c3 --- /dev/null +++ b/go/libraries/utils/valctx/valctx.go @@ -0,0 +1,54 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package valctx + +import ( + "context" +) + +var enabled bool + +// Globally enables context validation for the process. If this is not +// called, then the other functions in this package are noops. +func EnableContextValidation() { + enabled = true +} + +type ctxKey int + +var validationKey ctxKey + +func WithContextValidation(ctx context.Context) context.Context { + if !enabled { + return ctx + } + return context.WithValue(ctx, validationKey, new(Validation)) +} + +type Validation func() + +func SetContextValidation(ctx context.Context, validation Validation) { + if !enabled { + return + } + *ctx.Value(validationKey).(*Validation) = validation +} + +func ValidateContext(ctx context.Context) { + if !enabled { + return + } + (*ctx.Value(validationKey).(*Validation))() +} diff --git a/go/performance/utils/benchmark_runner/sysbench.go b/go/performance/utils/benchmark_runner/sysbench.go index 5953368b5b2..02e637b4920 100644 --- a/go/performance/utils/benchmark_runner/sysbench.go +++ b/go/performance/utils/benchmark_runner/sysbench.go @@ -21,9 +21,6 @@ import ( "os/exec" "path/filepath" "strings" - "time" - - "github.com/jmoiron/sqlx" "github.com/google/uuid" ) @@ -149,10 +146,6 @@ func (t *sysbenchTesterImpl) Test(ctx context.Context) (*Result, error) { return nil, err } - if err := t.collectStats(ctx); err != nil { - return nil, err - } - fmt.Println("Running test", t.test.GetName()) rs, err := t.run(ctx) @@ -162,76 +155,3 @@ func (t *sysbenchTesterImpl) Test(ctx context.Context) (*Result, error) { return rs, nil } - -func (t *sysbenchTesterImpl) collectStats(ctx context.Context) error { - if strings.Contains(t.serverConfig.GetServerExec(), "dolt") && !strings.Contains(t.serverConfig.GetServerExec(), "doltgres") { - db, err := sqlx.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/test", t.serverConfig.GetHost(), t.serverConfig.GetPort())) - if err != nil { - return err - } - return collectStats(ctx, db) - } - return nil -} - -func collectStats(ctx context.Context, db *sqlx.DB) error { - c, err := db.Connx(ctx) - if err != nil { - return err - } - - { - // configuration, restart, and check needs to be in the same session - tx, err := c.BeginTxx(ctx, nil) - if err != nil { - return err - } - - if _, err := tx.Exec("set @@GLOBAL.dolt_stats_auto_refresh_enabled = 1;"); err != nil { - return err - } - if _, err := tx.Exec("set @@GLOBAL.dolt_stats_auto_refresh_interval = 0;"); err != nil { - return err - } - if _, err := tx.Exec("set @@PERSIST.dolt_stats_auto_refresh_interval = 0;"); err != nil { - return err - } - if _, err := tx.Exec("set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;"); err != nil { - return err - } - if _, err := tx.Exec("call dolt_stats_restart();"); err != nil { - return err - } - - rows := map[string]interface{}{"cnt": 0} - tick := time.NewTicker(5 * time.Second) - for { - if rows["cnt"] != 0 { - fmt.Printf("collected %d histogram buckets\n", rows["cnt"]) - break - } - select { - case <-tick.C: - res, err := tx.Queryx("select count(*) as cnt from dolt_statistics;") - if err != nil { - return err - } - if !res.Next() { - return fmt.Errorf("failed to set statistics") - } - if err := res.MapScan(rows); err != nil { - return err - } - if err := res.Close(); err != nil { - return err - } - } - } - } - - if _, err := c.QueryContext(ctx, "call dolt_stats_stop();"); err != nil { - return err - } - - return nil -} diff --git a/go/performance/utils/benchmark_runner/tpcc.go b/go/performance/utils/benchmark_runner/tpcc.go index 4c7f01a2444..be265e6b568 100644 --- a/go/performance/utils/benchmark_runner/tpcc.go +++ b/go/performance/utils/benchmark_runner/tpcc.go @@ -20,9 +20,6 @@ import ( "os" "os/exec" "path/filepath" - "strings" - - "github.com/jmoiron/sqlx" ) type tpccTesterImpl struct { @@ -54,17 +51,6 @@ func (t *tpccTesterImpl) outputToResult(output []byte) (*Result, error) { return OutputToResult(output, t.serverConfig.GetServerType(), t.serverConfig.GetVersion(), t.test.GetName(), t.test.GetId(), t.suiteId, t.config.GetRuntimeOs(), t.config.GetRuntimeGoArch(), t.serverParams, t.test.GetParamsToSlice(), nil, false) } -func (t *tpccTesterImpl) collectStats(ctx context.Context) error { - if strings.Contains(t.serverConfig.GetServerExec(), "dolt") && !strings.Contains(t.serverConfig.GetServerExec(), "doltgres") { - db, err := sqlx.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/sbt", t.serverConfig.GetHost(), t.serverConfig.GetPort())) - if err != nil { - return err - } - return collectStats(ctx, db) - } - return nil -} - func (t *tpccTesterImpl) prepare(ctx context.Context) error { args := t.test.GetPrepareArgs(t.serverConfig) cmd := exec.CommandContext(ctx, t.tpccCommand, args...) @@ -119,10 +105,6 @@ func (t *tpccTesterImpl) Test(ctx context.Context) (*Result, error) { return nil, err } - if err := t.collectStats(ctx); err != nil { - return nil, err - } - fmt.Println("Running test", t.test.GetName()) rs, err := t.run(ctx) diff --git a/go/store/prolly/tree/mutator.go b/go/store/prolly/tree/mutator.go index 824f3013744..08c03d819ee 100644 --- a/go/store/prolly/tree/mutator.go +++ b/go/store/prolly/tree/mutator.go @@ -132,7 +132,7 @@ func ApplyMutations[K ~[]byte, O Ordering[K], S message.Serializer]( prev := newKey newKey, newValue = edits.NextMutation(ctx) if newKey != nil { - assertTrue(order.Compare(ctx, K(newKey), K(prev)) > 0, "expected sorted edits") + assertTrue(order.Compare(ctx, K(newKey), K(prev)) > 0, "expected sorted edits: %v, %v", prev, newKey) } } diff --git a/go/store/prolly/tree/node_cursor.go b/go/store/prolly/tree/node_cursor.go index f1dfbe2c128..7a9e1518a6a 100644 --- a/go/store/prolly/tree/node_cursor.go +++ b/go/store/prolly/tree/node_cursor.go @@ -629,8 +629,8 @@ func fetchChild(ctx context.Context, ns NodeStore, ref hash.Hash) (Node, error) return ns.Read(ctx, ref) } -func assertTrue(b bool, msg string) { +func assertTrue(b bool, msg string, args ...any) { if !b { - panic("assertion failed: " + msg) + panic(fmt.Sprintf("assertion failed: "+msg, args...)) } } diff --git a/go/store/prolly/tree/stats.go b/go/store/prolly/tree/stats.go index 9bc488af4e9..d0a40cf7e01 100644 --- a/go/store/prolly/tree/stats.go +++ b/go/store/prolly/tree/stats.go @@ -141,6 +141,11 @@ func GetChunksAtLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m Static // GetHistogramLevel returns the highest internal level of the tree that has // more than |low| addresses. func GetHistogramLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m StaticMap[K, V, O], low int) ([]Node, error) { + if cnt, err := m.Count(); err != nil { + return nil, err + } else if cnt == 0 { + return nil, nil + } currentLevel := []Node{m.Root} level := m.Root.Level() for len(currentLevel) < low && level > 0 { diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go index f92bc8ce1cb..18b4801eca1 100644 --- a/go/store/val/tuple_builder.go +++ b/go/store/val/tuple_builder.go @@ -15,6 +15,7 @@ package val import ( + "strconv" "time" "github.com/dolthub/go-mysql-server/sql/analyzer/analyzererrors" @@ -77,7 +78,7 @@ func NewTupleBuilder(desc TupleDesc) *TupleBuilder { func (tb *TupleBuilder) Build(pool pool.BuffPool) (tup Tuple) { for i, typ := range tb.Desc.Types { if !typ.Nullable && tb.fields[i] == nil { - panic("cannot write NULL to non-NULL field") + panic("cannot write NULL to non-NULL field: " + strconv.Itoa(i)) } } return tb.BuildPermissive(pool) diff --git a/go/store/val/tuple_descriptor.go b/go/store/val/tuple_descriptor.go index 980a6c91a2a..aea531b3768 100644 --- a/go/store/val/tuple_descriptor.go +++ b/go/store/val/tuple_descriptor.go @@ -636,11 +636,11 @@ func (td TupleDesc) formatValue(ctx context.Context, enc Encoding, i int, value case Hash128Enc: return hex.EncodeToString(value) case BytesAddrEnc: - return hex.EncodeToString(value) + return hash.New(value).String() case StringAddrEnc: - return hex.EncodeToString(value) + return hash.New(value).String() case CommitAddrEnc: - return hex.EncodeToString(value) + return hash.New(value).String() case CellEnc: return hex.EncodeToString(value) case ExtendedEnc: diff --git a/integration-tests/bats/stats.bats b/integration-tests/bats/stats.bats index 7cc4c4bf9f2..620e8323da3 100644 --- a/integration-tests/bats/stats.bats +++ b/integration-tests/bats/stats.bats @@ -22,12 +22,15 @@ SQL cd $TMPDIRS/repo2 dolt init + dolt sql -q "SET @@PERSIST.dolt_stats_job_interval = 100" dolt sql <50% of rows - dolt sql -q "delete from xy where x > 600" +call dolt_add('-A'); +call dolt_commit('-m', 'main branch'); - sleep 1 +-- mirror main +call dolt_checkout('-b', 'feat1'); +call dolt_checkout('-b', 'feat2'); - run dolt sql -r csv -q "select count(*) from dolt_statistics" - [ "$status" -eq 0 ] - [ "${lines[1]}" = "4" ] -} - -@test "stats: dolt_state_purge cli" { - cd repo2 - - dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" - - # setting variables doesn't hang or error - dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;" +create database other; +use other; +create table ot (i int primary key); +insert into ot values (0), (1), (2); - dolt sql -q "analyze table xy" - #start_sql_server - - #sleep 1 +call dolt_stats_wait(); +call dolt_stats_info('--short'); +SQL - run dolt sql -r csv -q "select count(*) from dolt_statistics" + # starting point + # dbs: repo2/[main, feat1, feat2], other/main + # stats: repo2:[xy,ab,toDelete]*3, other:[ot]*1 + run dolt sql -r csv -q "call dolt_stats_info('--short');" [ "$status" -eq 0 ] - [ "${lines[1]}" = "2" ] - - dolt sql -q "call dolt_stats_purge()" + [[ "$output" =~ '"{""dbCnt"":4,""active"":true,""storageBucketCnt"":6,""cachedBucketCnt"":6,""cachedBoundCnt"":6,""cachedTemplateCnt"":6,""statCnt"":10,""backing"":""repo2""}"' ]] || false - run dolt sql -r csv -q "select count(*) from dolt_statistics" + # clear invalid xy + dolt sql -q "call dolt_stats_gc()" + dolt sql -q "call dolt_stats_info('--short')" + run dolt sql -r csv -q "call dolt_stats_info('--short')" [ "$status" -eq 0 ] - [ "${lines[1]}" = "0" ] -} - -@test "stats: dolt_state_purge server" { - cd repo2 - - dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" - - # setting variables doesn't hang or error - dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;" + [[ "$output" =~ '"{""dbCnt"":4,""active"":true,""storageBucketCnt"":4,""cachedBucketCnt"":4,""cachedBoundCnt"":4,""cachedTemplateCnt"":6,""statCnt"":10,""backing"":""repo2""}"' ]] || false - start_sql_server - - sleep 1 - - dolt sql -q "analyze table xy" - - run dolt sql -r csv -q "select count(*) from dolt_statistics" + # remove toDelete table from 2/3 branches and gc + dolt sql -q "use repo2; call dolt_checkout('feat1'); drop table toDelete" + dolt sql -q "use repo2; call dolt_checkout('main'); drop table toDelete" + dolt sql -q "call dolt_stats_gc()" + dolt sql -q "call dolt_stats_info('--short')" + run dolt sql -r csv -q "call dolt_stats_info('--short')" [ "$status" -eq 0 ] - [ "${lines[1]}" = "2" ] - - dolt sql -q "call dolt_stats_purge()" + [[ "$output" =~ '"{""dbCnt"":4,""active"":true,""storageBucketCnt"":4,""cachedBucketCnt"":4,""cachedBoundCnt"":4,""cachedTemplateCnt"":6,""statCnt"":8,""backing"":""repo2""}"' ]] || false - run dolt sql -r csv -q "select count(*) from dolt_statistics" + # remove branch stats and gc + dolt sql -q "use repo2; call dolt_branch('-D', 'feat1', 'feat2')" + dolt sql -q "call dolt_stats_wait()" + dolt sql -q "call dolt_stats_gc()" + dolt sql -q "call dolt_stats_info('--short')" + run dolt sql -r csv -q "call dolt_stats_info('--short')" [ "$status" -eq 0 ] - [ "${lines[1]}" = "0" ] + [[ "$output" =~ '"{""dbCnt"":2,""active"":true,""storageBucketCnt"":3,""cachedBucketCnt"":3,""cachedBoundCnt"":3,""cachedTemplateCnt"":5,""statCnt"":3,""backing"":""repo2""}"' ]] || false - dolt sql -q "analyze table xy" - - run dolt sql -r csv -q "select count(*) from dolt_statistics" + # delete whole db and gc + dolt sql -q "drop database other;" + dolt sql -q "call dolt_stats_wait()" + dolt sql -q "call dolt_stats_gc()" + dolt sql -r csv -q "call dolt_stats_info('--short')" + run dolt sql -r csv -q "call dolt_stats_info('--short')" [ "$status" -eq 0 ] - [ "${lines[1]}" = "2" ] - - stop_sql_server + [[ "$output" =~ '"{""dbCnt"":1,""active"":true,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false } -@test "stats: dolt_state_prune cli" { - cd repo2 - - dolt sql -q "insert into xy values (0,0), (1,0), (2,0)" +@test "stats: delete database clean swap" { + # only user-triggered GC's + dolt sql -q "SET @@PERSIST.dolt_stats_gc_enabled = 0" - # setting variables doesn't hang or error - dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;" + # don't start server in repo2, the shell->server access + # breaks when you delete the primary database + start_sql_server - dolt sql -q "analyze table xy" - #start_sql_server + dolt sql -r csv < data.py -import random -import os - -rows = 2*1000*1000+1 - -def main(): - f = open("data.csv","w+") - f.write("id,hostname\n") - - for i in range(rows): - hostname = random.getrandbits(100) - f.write(f"{i},{hostname}\n") - if i % (500*1000) == 0: - print("row :", i) - f.flush() - - f.close() +@test "stats: restart in shell doesn't drop db, issue#8345" { + cd repo2 -if __name__ == "__main__": - main() + dolt sql -q "insert into xy values (0,0), (1,1), (2,2), (3,3), (4,4)" + dolt sql -q "insert into ab values (0,0), (1,1), (2,2), (3,3), (4,4)" + run dolt sql -r csv <