Skip to content

Commit

Permalink
Use budgeted count for aggregations (#128)
Browse files Browse the repository at this point in the history
This PR introduces a significant optimisation of aggregations (counts). 

It takes advantage of the fact that a Postgres query plan can tell you the cost of a query up-front, along with a rough estimate of the count based on indexes. All count queries now have a "budget", defaulting to 5,000. If the budget is exceeded according to the query plan, then the estimate will be returned (and the UI will display an estimate symbol `~` next to the associated count), otherwise the query will be executed and an exact count will be returned.

The accuracy of the estimate seems to be within 10-20% of the exact count in most cases - though accuracy depends on selected filter criteria and what is being counted, I've noticed bigger discrepancies but overall it seems like an acceptable trade-off.

The background cache warmer has been removed and aggregations are now real time again (the cache warmer was at best a short term mitigation while I figured out a better solution). The cache TTL has been reduced to 10 minutes. It was previously increased to allow the cache warmer to be run less frequently.

There are also some adjustments to the indexes that improve performance and the accuracy of estimations. For large indexes the migration may take a while to run: in my tests on 12 million torrents it took 15 minutes.
  • Loading branch information
mgdigital authored Feb 6, 2024
1 parent 48acef4 commit e471dd2
Show file tree
Hide file tree
Showing 50 changed files with 1,583 additions and 1,072 deletions.
10 changes: 10 additions & 0 deletions graphql/fragments/TorrentContentSearchResult.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -5,52 +5,62 @@ fragment TorrentContentSearchResult on TorrentContentSearchResult {
...TorrentContent
}
totalCount
totalCountIsEstimate
hasNextPage
aggregations {
contentType {
value
label
count
isEstimate
}
torrentSource {
value
label
count
isEstimate
}
torrentTag {
value
label
count
isEstimate
}
torrentFileType {
value
label
count
isEstimate
}
language {
value
label
count
isEstimate
}
genre {
value
label
count
isEstimate
}
releaseYear {
value
label
count
isEstimate
}
videoResolution {
value
label
count
isEstimate
}
videoSource {
value
label
count
isEstimate
}
}
}
11 changes: 11 additions & 0 deletions graphql/schema/search.graphqls
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ input SearchQueryInput {
"""
hasNextPage: Boolean
cached: Boolean
aggregationBudget: Float
}

input ContentTypeFacetInput {
Expand Down Expand Up @@ -75,54 +76,63 @@ type ContentTypeAgg {
value: ContentType
label: String!
count: Int!
isEstimate: Boolean!
}

type TorrentSourceAgg {
value: String!
label: String!
count: Int!
isEstimate: Boolean!
}

type TorrentTagAgg {
value: String!
label: String!
count: Int!
isEstimate: Boolean!
}

type TorrentFileTypeAgg {
value: FileType!
label: String!
count: Int!
isEstimate: Boolean!
}

type LanguageAgg {
value: Language!
label: String!
count: Int!
isEstimate: Boolean!
}

type GenreAgg {
value: String!
label: String!
count: Int!
isEstimate: Boolean!
}

type ReleaseYearAgg {
value: Year
label: String!
count: Int!
isEstimate: Boolean!
}

type VideoResolutionAgg {
value: VideoResolution
label: String!
count: Int!
isEstimate: Boolean!
}

type VideoSourceAgg {
value: VideoSource
label: String!
count: Int!
isEstimate: Boolean!
}

type TorrentContentAggregations {
Expand All @@ -139,6 +149,7 @@ type TorrentContentAggregations {

type TorrentContentSearchResult {
totalCount: Int!
totalCountIsEstimate: Boolean!
"""
hasNextPage is true if there are more results to fetch
"""
Expand Down
2 changes: 1 addition & 1 deletion internal/database/cache/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ func NewDefaultConfig() Config {
// if I can get time to understand the problem better I may open an issue in https://github.com/go-gorm/caches, though they
// don't seem very responsive to issues, hence why bitmagnet uses a forked version of this library...
EaserEnabled: false,
Ttl: time.Minute * 60,
Ttl: time.Minute * 10,
MaxKeys: 1000,
}
}
24 changes: 24 additions & 0 deletions internal/database/dao/budgeted_count.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package dao

import (
"gorm.io/gorm"
)

func ToSQL(db *gorm.DB) string {
return db.ToSQL(func(tx *gorm.DB) *gorm.DB {
return tx.Find(&[]interface{}{})
})
}

type BudgetedCountResult struct {
Count int64
Cost float64
BudgetExceeded bool
}

func BudgetedCount(db *gorm.DB, budget float64) (BudgetedCountResult, error) {
row := db.Raw("SELECT count, cost, budget_exceeded from budgeted_count(?, ?)", ToSQL(db), budget).Row()
result := BudgetedCountResult{}
err := row.Scan(&result.Count, &result.Cost, &result.BudgetExceeded)
return result, err
}
6 changes: 3 additions & 3 deletions internal/database/dao/content.gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions internal/database/dao/torrent_contents.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package dao

import (
"fmt"
"gorm.io/gorm/callbacks"
)

func (t torrentContent) CountEstimate() (int64, error) {
db := t.UnderlyingDB()
callbacks.BuildQuerySQL(db)
query := db.Statement.SQL.String()
args := db.Statement.Vars
fmt.Printf("query: %s, args: %v", query, args)
return 0, nil
}
3 changes: 0 additions & 3 deletions internal/database/databasefx/module.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"github.com/bitmagnet-io/bitmagnet/internal/database/migrations"
"github.com/bitmagnet-io/bitmagnet/internal/database/postgres"
"github.com/bitmagnet-io/bitmagnet/internal/database/search"
"github.com/bitmagnet-io/bitmagnet/internal/database/search/warmer"
"go.uber.org/fx"
)

Expand All @@ -18,7 +17,6 @@ func New() fx.Option {
"database",
configfx.NewConfigModule[postgres.Config]("postgres", postgres.NewDefaultConfig()),
configfx.NewConfigModule[cache.Config]("gorm_cache", cache.NewDefaultConfig()),
configfx.NewConfigModule[warmer.Config]("search_warmer", warmer.NewDefaultConfig()),
fx.Provide(
cache.NewInMemoryCacher,
cache.NewPlugin,
Expand All @@ -28,7 +26,6 @@ func New() fx.Option {
migrations.New,
postgres.New,
search.New,
warmer.New,
),
fx.Decorate(
cache.NewDecorator,
Expand Down
1 change: 1 addition & 0 deletions internal/database/gen/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ func BuildGenerator(db *gorm.DB) *gen.Generator {
gen.FieldType("release_date", "Date"),
gen.FieldGenType("release_date", "Time"),
gen.FieldType("release_year", "Year"),
gen.FieldGenType("release_year", "Uint16"),
gen.FieldType("original_language", "NullLanguage"),
gen.FieldType("popularity", "NullFloat32"),
gen.FieldType("vote_average", "NullFloat32"),
Expand Down
2 changes: 1 addition & 1 deletion internal/database/migrations/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func (l gooseLogger) Println(v ...interface{}) {

func (l gooseLogger) Printf(format string, v ...interface{}) {
fn := l.l.Debugf
if strings.HasPrefix(format, "goose: successfully migrated") {
if strings.HasPrefix(format, "goose: successfully migrated") || strings.HasPrefix(format, "goose: no migrations to run") {
fn = l.l.Infof
}
fn(strings.TrimSpace(format), v...)
Expand Down
12 changes: 8 additions & 4 deletions internal/database/migrations/migrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,18 @@ func New(p Params) Result {
if err != nil {
return nil, err
}
initGoose(p.Logger)
logger := p.Logger.Named("migrator")
initGoose(logger)
return &migrator{
db: db,
db: db,
logger: logger,
}, nil
}),
}
}

func initGoose(logger *zap.SugaredLogger) {
goose.SetLogger(gooseLogger{logger.Named("migrator")})
goose.SetLogger(gooseLogger{logger})
goose.SetBaseFS(migrationssql.FS)
err := goose.SetDialect("postgres")
if err != nil {
Expand All @@ -58,10 +60,12 @@ type Migrator interface {
}

type migrator struct {
db *sql.DB
db *sql.DB
logger *zap.SugaredLogger
}

func (m *migrator) Up(ctx context.Context) error {
m.logger.Info("checking and applying migrations...")
return goose.UpContext(ctx, m.db, ".")
}

Expand Down
Loading

0 comments on commit e471dd2

Please sign in to comment.