Skip to content

Commit

Permalink
Performance 2020 queries (#1091)
Browse files Browse the repository at this point in the history
* Selecting LH performance score. WIP

* Selecting LH performance score. WIP

* Selecting LH performance score. WIP

* Delta for LH5 and LH6 performance scores

* Delta for LH5 and LH6 performance scores

* Delta for LH5 and LH6 performance scores

* web vitals aggregated

* Web Vitals query suggested in the discussion

* cwv per device, ect and country

* offline origins

* webvitals distribution by device

* lcp score by month

* Removed unused query

* review updates

* query name updated

* performance observer

* Distribution of LH5 and LH6 permormance scores on mobile

* FIX: Using abs() to calculate average change between LH5 and LH6

* Updated tables and dates to use August data

* Update sql/2020/09_Performance/lh5_performance_score_distribution.sql

Formatting

Co-authored-by: Rick Viscomi <[email protected]>

* Update sql/2020/09_Performance/lh5_performance_score_distribution.sql

Added "AS"

Co-authored-by: Rick Viscomi <[email protected]>

* Removed unused variables, formatting and using September table in queries.

* Formatting

* updated dates

* median function update

* * Using percentage to compare performance scores between LH5 and LH6.
* Using last year dataset to show distribution of LH performance score

* Using raw distribution for performance score and percentiles for LH5 vs LH6 scores comparison

* review updates + lighthouse performance audits

* year series fix

* fix median precision

* Update sql/2020/09_Performance/lighthouse_performace_audits.sql

Co-authored-by: Barry Pollard <[email protected]>

* Update sql/2020/09_Performance/web_vitals_distribution_by_device.sql

Co-authored-by: Rick Viscomi <[email protected]>

* use August data from crux tables

* Update sql/2020/09_Performance/lh6_vs_lh5_performance_score_01.sql

Co-authored-by: Rick Viscomi <[email protected]>

* Update sql/2020/09_Performance/lh6_vs_lh5_performance_score_02.sql

Co-authored-by: Rick Viscomi <[email protected]>

* Update sql/2020/09_Performance/lh6_vs_lh5_performance_score_01.sql

Co-authored-by: Rick Viscomi <[email protected]>

* Update sql/2020/09_Performance/lh6_vs_lh5_performance_score_01.sql

Co-authored-by: Rick Viscomi <[email protected]>

* added yesr comparison

* linting

Co-authored-by: [email protected] <Dmitry Pokidov>
Co-authored-by: Dmitry Pokidov <[email protected]>
Co-authored-by: Rick Viscomi <[email protected]>
Co-authored-by: Barry Pollard <[email protected]>
  • Loading branch information
4 people committed Oct 12, 2020
1 parent 2815281 commit 66b014d
Show file tree
Hide file tree
Showing 12 changed files with 919 additions and 0 deletions.
14 changes: 14 additions & 0 deletions sql/2020/09_Performance/lh5_performance_score_distribution.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#standardSQL
# Distribution of LH5 performance score.

SELECT
JSON_EXTRACT_SCALAR(report, '$.categories.performance.score') AS score,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER () AS total,
COUNT(0) / SUM(COUNT(0)) OVER () AS pct
FROM
`httparchive.lighthouse.2019_07_01_mobile`
GROUP BY
score
ORDER BY
score
14 changes: 14 additions & 0 deletions sql/2020/09_Performance/lh6_performance_score_distribution.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#standardSQL
# Distribution of LH6 performance score.

SELECT
JSON_EXTRACT_SCALAR(report, '$.categories.performance.score') AS score,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER () AS total,
COUNT(0) / SUM(COUNT(0)) OVER () AS pct
FROM
`httparchive.lighthouse.2020_09_01_mobile`
GROUP BY
score
ORDER BY
score
21 changes: 21 additions & 0 deletions sql/2020/09_Performance/lh6_vs_lh5_performance_score_01.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#standardSQL
# Calculates percentile for delta of LH5 and LH6 performance score for mobile

SELECT
percentile,
APPROX_QUANTILES(perf_score_delta, 1000)[OFFSET(percentile * 10)] AS perf_score_delta
FROM (
SELECT
perf_score_lh6 - perf_score_lh5 AS perf_score_delta
FROM
(
SELECT
CAST(JSON_EXTRACT(lh6.report, '$.categories.performance.score') AS NUMERIC) AS perf_score_lh6,
CAST(JSON_EXTRACT(lh5.report, '$.categories.performance.score') AS NUMERIC) AS perf_score_lh5
FROM `httparchive.lighthouse.2020_09_01_mobile` lh6
JOIN `httparchive.lighthouse.2019_07_01_mobile` lh5 ON lh5.url=lh6.url
)
),
UNNEST([0, 10, 25, 50, 75, 90, 100]) AS percentile
GROUP BY percentile
ORDER BY percentile
30 changes: 30 additions & 0 deletions sql/2020/09_Performance/lh6_vs_lh5_performance_score_02.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#standardSQL

# Calculates percentage of sites where the performance score changed low ( < 10), medium (10-30) or big (> 30) between
# LH 5 and 6 versions.

SELECT
SAFE_DIVIDE(small_change, small_change + mid_change + big_change) AS small,
SAFE_DIVIDE(mid_change, small_change + mid_change + big_change) AS avg,
SAFE_DIVIDE(big_change, small_change + mid_change + big_change) AS big
FROM (
SELECT
COUNTIF(perf_score_delta <= 0.1) AS small_change,
COUNTIF(perf_score_delta > 0.1 AND perf_score_delta <= 0.3) AS mid_change,
COUNTIF(perf_score_delta > 0.3) AS big_change
FROM
(
SELECT
perf_score_lh6,
perf_score_lh5,
(perf_score_lh6 - perf_score_lh5) AS perf_score_delta
FROM
(
SELECT
CAST(JSON_EXTRACT(lh6.report, '$.categories.performance.score') AS NUMERIC) AS perf_score_lh6,
CAST(JSON_EXTRACT(lh5.report, '$.categories.performance.score') AS NUMERIC) AS perf_score_lh5
FROM `httparchive.lighthouse.2020_09_01_mobile` lh6
JOIN `httparchive.lighthouse.2019_07_01_mobile` lh5 ON lh5.url=lh6.url
)
)
)
42 changes: 42 additions & 0 deletions sql/2020/09_Performance/lighthouse_performace_audits.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#standardSQL
# Get summary of all lighthouse scores for a category
# Note scores, weightings, groups and descriptions may be off in mixed months when new versions of Lighthouse roles out

CREATE TEMPORARY FUNCTION getAudits(report STRING, category STRING)
RETURNS ARRAY<STRUCT<id STRING, weight INT64, audit_group STRING, title STRING, description STRING, score INT64>> LANGUAGE js AS '''
var $ = JSON.parse(report);
var auditrefs = $.categories[category].auditRefs;
var audits = $.audits;
$ = null;
var results = [];
for (auditref of auditrefs) {
results.push({
id: auditref.id,
weight: auditref.weight,
audit_group: auditref.group,
description: audits[auditref.id].description,
score: audits[auditref.id].score
});
}
return results;
''';

SELECT
audits.id AS id,
COUNTIF(audits.score > 0) AS num_pages,
COUNT(0) AS total,
COUNTIF(audits.score IS NOT NULL) AS total_applicable,
SAFE_DIVIDE(COUNTIF(audits.score > 0), COUNTIF(audits.score IS NOT NULL)) AS pct,
APPROX_QUANTILES(audits.weight, 100)[OFFSET(50)] AS median_weight,
MAX(audits.audit_group) AS audit_group,
MAX(audits.description) AS description
FROM
`httparchive.lighthouse.2020_09_01_mobile`,
UNNEST(getAudits(report, "performance")) AS audits
WHERE
LENGTH(report) < 20000000 # necessary to avoid out of memory issues. Excludes 16 very large results
GROUP BY
audits.id
ORDER BY
median_weight DESC,
id
16 changes: 16 additions & 0 deletions sql/2020/09_Performance/median_lcp_score_by_month.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#standardSQL
# Median of Largest contentful paint 75% score by month

SELECT
date,
device,
APPROX_QUANTILES(p75_lcp, 1000 RESPECT NULLS)[OFFSET(500)] AS median_p75_lcp,
FROM
`chrome-ux-report.materialized.device_summary`
WHERE
date >= '2019-09-01'
AND date <= '2020-08-01'
AND device IN ('desktop','phone')
GROUP BY
date,
device
16 changes: 16 additions & 0 deletions sql/2020/09_Performance/offline_origins.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#standardSQL
# Offline origins

SELECT
total_origins,
offline_origins,
offline_origins/total_origins AS pct_offline_origins
FROM (
SELECT
COUNT(DISTINCT origin) AS total_origins,
COUNT(DISTINCT IF(offlineDensity > 0, origin, NULL)) AS offline_origins,
FROM
`chrome-ux-report.materialized.metrics_summary`
WHERE
date = '2020-08-01'
)
13 changes: 13 additions & 0 deletions sql/2020/09_Performance/performance_observer.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#standardSQL
# Percent of pages using Performance observer

SELECT
client,
num_urls,
total_urls,
pct_urls
FROM
`httparchive.blink_features.usage`
WHERE
yyyymmdd = '20200801' AND
feature = 'PerformanceObserverForWindow'
154 changes: 154 additions & 0 deletions sql/2020/09_Performance/web_vitals_by_country.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#standardSQL
# Core WebVitals by country

CREATE TEMP FUNCTION IS_GOOD (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
SAFE_DIVIDE(good, (good + needs_improvement + poor)) >= 0.75
);

CREATE TEMP FUNCTION IS_POOR (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
SAFE_DIVIDE(poor, (good + needs_improvement + poor)) >= 0.25
);

CREATE TEMP FUNCTION IS_NI (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
NOT IS_GOOD(good, needs_improvement, poor) AND
NOT IS_POOR(good, needs_improvement, poor)
);

CREATE TEMP FUNCTION IS_NON_ZERO (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good + needs_improvement + poor > 0
);

WITH
base AS (
SELECT
  origin,
country_code,

SUM(fast_fid) / SUM(fast_fid + avg_fid + slow_fid) AS fast_fid,
SUM(avg_fid) / SUM(fast_fid + avg_fid + slow_fid) AS avg_fid,
SUM(slow_fid) / SUM(fast_fid + avg_fid + slow_fid) AS slow_fid,

SUM(fast_lcp) / SUM(fast_lcp + avg_lcp + slow_lcp) AS fast_lcp,
SUM(avg_lcp) / SUM(fast_lcp + avg_lcp + slow_lcp) AS avg_lcp,
SUM(slow_lcp) / SUM(fast_lcp + avg_lcp + slow_lcp) AS slow_lcp,

SUM(small_cls) / SUM(small_cls + medium_cls + large_cls) AS small_cls,
SUM(medium_cls) / SUM(small_cls + medium_cls + large_cls) AS medium_cls,
SUM(large_cls) / SUM(small_cls + medium_cls + large_cls) AS large_cls,

SUM(fast_fcp) / SUM(fast_fcp + avg_fcp + slow_fcp) AS fast_fcp,
SUM(avg_fcp) / SUM(fast_fcp + avg_fcp + slow_fcp) AS avg_fcp,
SUM(slow_fcp) / SUM(fast_fcp + avg_fcp + slow_fcp) AS slow_fcp,

SUM(fast_ttfb) / SUM(fast_ttfb + avg_ttfb + slow_ttfb) AS fast_ttfb,
SUM(avg_ttfb) / SUM(fast_ttfb + avg_ttfb + slow_ttfb) AS avg_ttfb,
SUM(slow_ttfb) / SUM(fast_ttfb + avg_ttfb + slow_ttfb) AS slow_ttfb,

FROM
`chrome-ux-report.materialized.country_summary`
WHERE
yyyymm = 202008
GROUP BY
origin,
country_code
)

SELECT
  `chrome-ux-report`.experimental.GET_COUNTRY(country_code) AS country,

COUNT(DISTINCT origin) AS total_origins,

SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_fid, avg_fid, slow_fid) AND
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AND
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_cwv_good,

SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))) AS pct_lcp_good,
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_NI(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))) AS pct_lcp_ni,
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_POOR(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))) AS pct_lcp_poor,

SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_fid, avg_fid, slow_fid), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL))) AS pct_fid_good,
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_NI(fast_fid, avg_fid, slow_fid), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL))) AS pct_fid_ni,
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_POOR(fast_fid, avg_fid, slow_fid), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL))) AS pct_fid_poor,

SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_cls_good,
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_NI(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_cls_ni,
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_POOR(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_cls_poor,

SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_fcp, avg_fcp, slow_fcp), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp), origin, NULL))) AS pct_fcp_good,
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_NI(fast_fcp, avg_fcp, slow_fcp), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp), origin, NULL))) AS pct_fcp_ni,
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_POOR(fast_fcp, avg_fcp, slow_fcp), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp), origin, NULL))) AS pct_fcp_poor,

SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL))) AS pct_ttfb_good,
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_NI(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL))) AS pct_ttfb_ni,
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_POOR(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb), origin, NULL))) AS pct_ttfb_poor,

FROM
base
GROUP BY
  country
Loading

0 comments on commit 66b014d

Please sign in to comment.