Skip to content

Commit

Permalink
feat: add bloat metrics by statistic
Browse files Browse the repository at this point in the history
  • Loading branch information
snakeice committed Jan 19, 2024
1 parent 0efbaca commit d8769e0
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 56 deletions.
197 changes: 197 additions & 0 deletions pkg/db/bloat_queries.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
package db

const (
qryTableBloatPgstattuple = `
select
ceil((size - free_space - dead_tuple_len) * 100 / fillfactor / bs) as effective_page_count,
greatest(round(
(100 * (1 - (100 - free_percent - dead_tuple_percent) / fillfactor))::numeric, 2
),0) as free_percent,
greatest(ceil(size - (size - free_space - dead_tuple_len) * 100 / fillfactor), 0) as free_space
from (
select
current_setting('block_size')::integer as bs,
pg_catalog.pg_relation_size(pg_catalog.pg_class.oid) as size,
coalesce(
(
select (
regexp_matches(
reloptions::text, e'.*fillfactor=(\\\\d+).*'))[1]),
'100')::real as fillfactor,
pgst.*
from pg_catalog.pg_class
cross join
%q.pgstattuple(
(quote_ident($1) || '.' || quote_ident($2))) as pgst
where pg_catalog.pg_class.oid = (quote_ident($1) || '.' || quote_ident($2))::regclass
) as sq limit 1;`

qryTableBloatStatistical = `
SELECT CASE WHEN tblpages - est_tblpages_ff > 0
THEN (tblpages-est_tblpages_ff)*bs
ELSE 0
END AS free_space,
CASE WHEN tblpages - est_tblpages_ff > 0
THEN 100 * (tblpages - est_tblpages_ff)/tblpages::float
ELSE 0
END AS free_percent
FROM (
SELECT ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) AS est_tblpages,
ceil( reltuples / ( (bs-page_hdr)*fillfactor/(tpl_size*100) ) ) + ceil( toasttuples / 4 ) AS est_tblpages_ff,
tblpages, fillfactor, bs, tblid, schemaname, tblname, heappages, toastpages, is_na
-- , tpl_hdr_size, tpl_data_size, pgstattuple(tblid) AS pst -- (DEBUG INFO)
FROM (
SELECT
( 4 + tpl_hdr_size + tpl_data_size + (2*ma)
- CASE WHEN tpl_hdr_size%ma = 0 THEN ma ELSE tpl_hdr_size%ma END
- CASE WHEN ceil(tpl_data_size)::int%ma = 0 THEN ma ELSE ceil(tpl_data_size)::int%ma END
) AS tpl_size, bs - page_hdr AS size_per_block, (heappages + toastpages) AS tblpages, heappages,
toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na
-- , tpl_hdr_size, tpl_data_size
FROM (
SELECT
tbl.oid AS tblid, ns.nspname AS schemaname, tbl.relname AS tblname, tbl.reltuples,
tbl.relpages AS heappages, coalesce(toast.relpages, 0) AS toastpages,
coalesce(toast.reltuples, 0) AS toasttuples,
coalesce(substring(
array_to_string(tbl.reloptions, ' ')
FROM 'fillfactor=([0-9]+)')::smallint, 100) AS fillfactor,
current_setting('block_size')::numeric AS bs,
CASE WHEN version()~'mingw32' OR version()~'64-bit|x86_64|ppc64|ia64|amd64' THEN 8 ELSE 4 END AS ma,
24 AS page_hdr,
23 + CASE WHEN MAX(coalesce(s.null_frac,0)) > 0 THEN ( 7 + count(s.attname) ) / 8 ELSE 0::int END
+ CASE WHEN bool_or(att.attname = 'oid' and att.attnum < 0) THEN 4 ELSE 0 END AS tpl_hdr_size,
sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) AS tpl_data_size,
bool_or(att.atttypid = 'pg_catalog.name'::regtype)
OR sum(CASE WHEN att.attnum > 0 THEN 1 ELSE 0 END) <> count(s.attname) AS is_na
FROM pg_attribute AS att
JOIN pg_class AS tbl ON att.attrelid = tbl.oid
JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace
LEFT JOIN pg_stats AS s ON s.schemaname=ns.nspname
AND s.tablename = tbl.relname AND s.inherited=false AND s.attname=att.attname
LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid
WHERE NOT att.attisdropped
AND tbl.relkind in ('r','m')
AND ns.nspname = $1
AND tbl.relname = $2
GROUP BY 1,2,3,4,5,6,7,8,9,10
ORDER BY 2,3
) AS s
) AS s2
) AS s3
ORDER BY ((tblpages-est_tblpages)*bs)::bigint desc, schemaname, tblname;
`

qryIndexBloatPgstattuple = `
SELECT
CASE
WHEN avg_leaf_density = 'NaN' THEN 0
ELSE
round(
(100 * (1 - avg_leaf_density / fillfactor))::numeric, 2
)
END AS free_percent,
CASE
WHEN avg_leaf_density = 'NaN' THEN 0
ELSE
ceil(
index_size * (1 - avg_leaf_density / fillfactor)
)
END AS free_space
FROM (
SELECT
coalesce(
(
SELECT (
regexp_matches(
reloptions::text, E'.*fillfactor=(\\\\d+).*'))[1]),
'90')::real AS fillfactor,
pgsi.*
FROM pg_catalog.pg_class
CROSS JOIN %q.pgstatindex(
quote_ident($1) || '.' || quote_ident($2)) AS pgsi
WHERE pg_catalog.pg_class.oid = (quote_ident($1) || '.' || quote_ident($2))::regclass
) AS oq`

qryIndexBloatStatistical = `SELECT CASE WHEN relpages > est_pages_ff
THEN bs*(relpages-est_pages_ff)::bigint
ELSE 0::bigint
END AS free_space,
ROUND((100 * (relpages-est_pages_ff)::float / relpages)::numeric , 2) AS free_percent
FROM (
SELECT coalesce(1 +
ceil(reltuples/floor((bs-pageopqdata-pagehdr)/(4+nulldatahdrwidth)::float)), 0 -- ItemIdData size + computed avg size of a tuple (nulldatahdrwidth)
) AS est_pages,
coalesce(1 +
ceil(reltuples/floor((bs-pageopqdata-pagehdr)*fillfactor/(100*(4+nulldatahdrwidth)::float))), 0
) AS est_pages_ff,
bs, nspname, tblname, idxname, relpages, fillfactor, is_na
-- , pgstatindex(idxoid) AS pst, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples -- (DEBUG INFO)
FROM (
SELECT maxalign, bs, nspname, tblname, idxname, reltuples, relpages, idxoid, fillfactor,
( index_tuple_hdr_bm +
maxalign - CASE -- Add padding to the index tuple header to align on MAXALIGN
WHEN index_tuple_hdr_bm%maxalign = 0 THEN maxalign
ELSE index_tuple_hdr_bm%maxalign
END
+ nulldatawidth + maxalign - CASE -- Add padding to the data to align on MAXALIGN
WHEN nulldatawidth = 0 THEN 0
WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign
ELSE nulldatawidth::integer%maxalign
END
)::numeric AS nulldatahdrwidth, pagehdr, pageopqdata, is_na
-- , index_tuple_hdr_bm, nulldatawidth -- (DEBUG INFO)
FROM (
SELECT n.nspname, ct.relname AS tblname, i.idxname, i.reltuples, i.relpages,
i.idxoid, i.fillfactor, current_setting('block_size')::numeric AS bs,
CASE -- MAXALIGN: 4 on 32bits, 8 on 64bits (and mingw32 ?)
WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' THEN 8
ELSE 4
END AS maxalign,
/* per page header, fixed size: 20 for 7.X, 24 for others */
24 AS pagehdr,
/* per page btree opaque data */
16 AS pageopqdata,
/* per tuple header: add IndexAttributeBitMapData if some cols are null-able */
CASE WHEN max(coalesce(s.stanullfrac,0)) = 0
THEN 2 -- IndexTupleData size
ELSE 2 + (( 32 + 8 - 1 ) / 8) -- IndexTupleData size + IndexAttributeBitMapData size ( max num filed per index + 8 - 1 /8)
END AS index_tuple_hdr_bm,
/* data len: we remove null values save space using it fractionnal part from stats */
sum( (1-coalesce(s.stanullfrac, 0)) * coalesce(s.stawidth, 1024)) AS nulldatawidth,
max( CASE WHEN a.atttypid = 'pg_catalog.name'::regtype THEN 1 ELSE 0 END ) > 0 AS is_na
FROM (
SELECT idxname, reltuples, relpages, tbloid, idxoid, fillfactor,
CASE WHEN indkey[i]=0 THEN idxoid ELSE tbloid END AS att_rel,
CASE WHEN indkey[i]=0 THEN i ELSE indkey[i] END AS att_pos
FROM (
SELECT idxname, reltuples, relpages, tbloid, idxoid, fillfactor, indkey, generate_series(1,indnatts) AS i
FROM (
SELECT ci.relname AS idxname, ci.reltuples, ci.relpages, i.indrelid AS tbloid,
i.indexrelid AS idxoid,
coalesce(substring(
array_to_string(ci.reloptions, ' ')
from 'fillfactor=([0-9]+)')::smallint, 90) AS fillfactor,
i.indnatts,
string_to_array(textin(int2vectorout(i.indkey)),' ')::int[] AS indkey
FROM pg_index i
JOIN pg_class ci ON ci.oid=i.indexrelid
WHERE ci.relam=(SELECT oid FROM pg_am WHERE amname = 'btree')
AND ci.relpages > 0
AND ci.relnamespace = ($1)::regnamespace
AND ci.relname = $2
) AS idx_data
) AS idx_data_cross
) i
JOIN pg_attribute a ON a.attrelid = i.att_rel
AND a.attnum = i.att_pos
JOIN pg_statistic s ON s.starelid = i.att_rel
AND s.staattnum = i.att_pos
JOIN pg_class ct ON ct.oid = i.tbloid
JOIN pg_namespace n ON ct.relnamespace = n.oid
GROUP BY 1,2,3,4,5,6,7,8,9,10
) AS rows_data_stats
) AS rows_hdr_pdg_stats
) AS relation_stats;
`
)
37 changes: 7 additions & 30 deletions pkg/db/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -460,36 +460,13 @@ func (pg *PgConnection) getIndexBloatStats(ctx context.Context, schema, index st
if pgstatstupleSchema, err := pg.GetPgStatTupleSchema(ctx); err != nil {
return nil, fmt.Errorf("error getting pgstattuple schema: %w", err)
} else {
qry := fmt.Sprintf(`
SELECT
CASE
WHEN avg_leaf_density = 'NaN' THEN 0
ELSE
round(
(100 * (1 - avg_leaf_density / fillfactor))::numeric, 2
)
END AS free_percent,
CASE
WHEN avg_leaf_density = 'NaN' THEN 0
ELSE
ceil(
index_size * (1 - avg_leaf_density / fillfactor)
)
END AS free_space
FROM (
SELECT
coalesce(
(
SELECT (
regexp_matches(
reloptions::text, E'.*fillfactor=(\\\\d+).*'))[1]),
'90')::real AS fillfactor,
pgsi.*
FROM pg_catalog.pg_class
CROSS JOIN %q.pgstatindex(
quote_ident($1) || '.' || quote_ident($2)) AS pgsi
WHERE pg_catalog.pg_class.oid = (quote_ident($1) || '.' || quote_ident($2))::regclass
) AS oq`, pgstatstupleSchema)
var qry string
switch params.BLOAT_METRIC_SOURCE {
case params.BLOAT_METRIC_STATISTICAL:
qry = qryIndexBloatStatistical
default:
qry = fmt.Sprintf(qryIndexBloatPgstattuple, pgstatstupleSchema)
}

var result IndexBloatStats
err := pgxscan.Get(ctx, pg.Conn, &result, qry, schema, index)
Expand Down
40 changes: 15 additions & 25 deletions pkg/db/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"

"github.com/electric-saw/pg-defrag/pkg/params"
"github.com/georgysavva/scany/v2/pgxscan"
)

Expand Down Expand Up @@ -45,34 +46,23 @@ func (pg *PgConnection) GetBloatStats(ctx context.Context, schema, table string)
if err != nil {
return nil, fmt.Errorf("Failed to get pg_stat_tuple schema: %v", err)
}
qry := fmt.Sprintf(`
select
ceil((size - free_space - dead_tuple_len) * 100 / fillfactor / bs) as effective_page_count,
greatest(round(
(100 * (1 - (100 - free_percent - dead_tuple_percent) / fillfactor))::numeric, 2
),0) as free_percent,
greatest(ceil(size - (size - free_space - dead_tuple_len) * 100 / fillfactor), 0) as free_space
from (
select
current_setting('block_size')::integer as bs,
pg_catalog.pg_relation_size(pg_catalog.pg_class.oid) as size,
coalesce(
(
select (
regexp_matches(
reloptions::text, e'.*fillfactor=(\\\\d+).*'))[1]),
'100')::real as fillfactor,
pgst.*
from pg_catalog.pg_class
cross join
%q.pgstattuple(
(quote_ident($1) || '.' || quote_ident($2))) as pgst
where pg_catalog.pg_class.oid = (quote_ident($1) || '.' || quote_ident($2))::regclass
) as sq limit 1;`, pgStatTupleSchema)

qry := fmt.Sprintf(qryTableBloatPgstattuple, pgStatTupleSchema)

var stats PgBloatStats
err = pgxscan.Get(ctx, pg.Conn, &stats, qry, schema, table)
return &stats, err
if err != nil {
return nil, fmt.Errorf("Failed to get bloat stats: %v", err)
}

if params.BLOAT_METRIC_SOURCE == params.BLOAT_METRIC_STATISTICAL {
qry = qryTableBloatStatistical
err = pgxscan.Get(ctx, pg.Conn, &stats, qry, schema, table)
if err != nil {
return nil, fmt.Errorf("Failed to get bloat stats: %v", err)
}
}
return &stats, nil
}

func (s *PgSizeStats) Copy() *PgSizeStats {
Expand Down
13 changes: 12 additions & 1 deletion pkg/params/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,17 @@ import (
"github.com/sirupsen/logrus"
)

type maxPagesPerRoundFunc func(int64) int64
type (
maxPagesPerRoundFunc func(int64) int64
BloatMetricType int
)

const (
BLOAT_METRIC_PGSTATTUPLE BloatMetricType = iota
BLOAT_METRIC_STATISTICAL

BLOAT_METRIC_DEFAULT = BLOAT_METRIC_PGSTATTUPLE
)

var (
MINIMAL_COMPACT_PAGES int64 = envOrDefaultInt64("MINIMAL_COMPACT_PAGES", 10)
Expand All @@ -22,6 +32,7 @@ var (
PROGRESS_REPORT_PERIOD time.Duration = 60 * time.Second
DELAY_RATIO float64 = envOrDefaultfloat64("DELAY_RATIO", 2)
MAX_DELAY time.Duration = time.Second
BLOAT_METRIC_SOURCE = BLOAT_METRIC_DEFAULT

MAX_RETRY_COUNT int = 10
)
Expand Down

0 comments on commit d8769e0

Please sign in to comment.