Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CMS 2020 queries #1087

Merged
merged 9 commits into from
Sep 8, 2020
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions sql/2020/15_CMS/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# CMS 2020

Related materials:

- [Planning doc](https://docs.google.com/document/d/1XL87z5MOk7QDLhpFsaMWT3DaIXdOQV32qs57noGwDVQ/edit?usp=sharing)
- [Results spreadsheet](https://docs.google.com/spreadsheets/d/1vTf459CcCbBuYeGvgo-RSidppR62SfM-VTkW-dfS3K4/edit?usp=sharing)
33 changes: 33 additions & 0 deletions sql/2020/15_CMS/adoption_of_image_formats_in_cmss.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#standardSQL
# Adoption of image formats in CMSs
SELECT
client,
format,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM (
SELECT
client,
format,
page
FROM
`httparchive.almanac.requests`
WHERE
date = '2020-08-01' AND
type = 'image')
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page
FROM
`httparchive.technologies.2020_08_01_*`
WHERE
category = 'CMS')
USING
(client, page)
GROUP BY
client,
format
ORDER BY
pct DESC
52 changes: 52 additions & 0 deletions sql/2020/15_CMS/cms_adoption_compared_to_2019.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#standardSQL
# CMS adoptions, compared to 2019
SELECT
_TABLE_SUFFIX AS client,
2020 AS year,
COUNT(0) AS freq,
total,
COUNT(0) / total AS pct
FROM
`httparchive.technologies.2020_08_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2020_08_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(_TABLE_SUFFIX)
WHERE
category = 'CMS'
GROUP BY
client,
total
UNION ALL
SELECT
_TABLE_SUFFIX AS client,
2019 AS year,
COUNT(0) AS freq,
total,
COUNT(0) / total AS pct
FROM
`httparchive.technologies.2019_07_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2019_07_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(_TABLE_SUFFIX)
WHERE
category = 'CMS'
GROUP BY
client,
total
ORDER BY
year DESC,
pct DESC
43 changes: 43 additions & 0 deletions sql/2020/15_CMS/core_web_vitals_distribution.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#standardSQL
# Core Web Vitals distribution by CMS
#
# Note that this is an unweighted average of all sites per CMS.
# Performance of sites with millions of visitors as weighted the same as small sites.
SELECT
client,
cms,
COUNT(DISTINCT origin) AS origins,

SUM(fast_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS good_lcp,
SUM(avg_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS ni_lcp,
SUM(slow_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS poor_lcp,

SUM(fast_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS good_fid,
SUM(avg_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS ni_fid,
SUM(slow_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS poor_fid,

SUM(small_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS good_cls,
SUM(medium_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS ni_cls,
SUM(large_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS poor_cls,
FROM
`chrome-ux-report.materialized.device_summary`
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url,
app AS cms
FROM
`httparchive.technologies.2019_08_01_*`
GregBrimble marked this conversation as resolved.
Show resolved Hide resolved
WHERE
category = 'CMS')
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
WHERE
# The CrUX 202008 dataset is not available until September 8.
date = '2020-07-01'
GROUP BY
client,
cms
ORDER BY
origins DESC
62 changes: 62 additions & 0 deletions sql/2020/15_CMS/core_web_vitals_passing.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#standardSQL
# Core Web Vitals performance by CMS
CREATE TEMP FUNCTION IS_GOOD (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good / (good + needs_improvement + poor) >= 0.75
);

CREATE TEMP FUNCTION IS_NON_ZERO (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good + needs_improvement + poor > 0
);


SELECT
client,
cms,
COUNT(DISTINCT origin) AS origins,
# Origins with good LCP divided by origins with any LCP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))) AS pct_good_lcp,

# Origins with good FID divided by origins with any FID.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_fid, avg_fid, slow_fid), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL))) AS pct_good_fid,

# Origins with good CLS divided by origins with any CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cls,

# Origins with good LCP, FID, and CLS dividied by origins with any LCP, FID, and CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
IS_GOOD(fast_fid, avg_fid, slow_fid) AND
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AND
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cwv
FROM
`chrome-ux-report.materialized.device_summary`
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url,
app AS cms
FROM
`httparchive.technologies.2019_08_01_*`
GregBrimble marked this conversation as resolved.
Show resolved Hide resolved
WHERE
category = 'CMS')
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
WHERE
# The CrUX 202008 dataset is not available until September 8.
date = '2020-07-01'
GROUP BY
client,
cms
ORDER BY
origins DESC
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#standardSQL
# Distribution of CMS page kilobytes per resource type
SELECT
percentile,
client,
type,
APPROX_QUANTILES(requests, 1000)[OFFSET(percentile * 10)] AS requests,
ROUND(APPROX_QUANTILES(bytes, 1000)[OFFSET(percentile * 10)] / 1024, 2) AS kbytes
FROM (
SELECT
client,
type,
COUNT(0) AS requests,
SUM(respSize) AS bytes
FROM (
SELECT
client,
page,
type,
respSize
FROM
`httparchive.almanac.requests`
WHERE
date = '2020-08-01')
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page
FROM
`httparchive.technologies.2020_08_01_*`
WHERE
category = 'CMS')
USING
(client, page)
GROUP BY
client,
type,
page),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
percentile,
client,
type
ORDER BY
percentile,
client,
kbytes DESC
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#standardSQL
# Distribution of page weight, requests, and co2 grams per CMS web page
# https://gitlab.com/wholegrain/carbon-api-2-0/-/blob/b498ec3bb239536d3612c5f3d758f46e0d2431a6/includes/carbonapi.php
CREATE TEMP FUNCTION
GREEN(url STRING) AS (FALSE); -- TODO: Investigate fetching from Green Web Foundation
CREATE TEMP FUNCTION
adjustDataTransfer(val INT64) AS (val * 0.75 + 0.02 * val * 0.25);
CREATE TEMP FUNCTION
energyConsumption(bytes FLOAT64) AS (bytes * 1.805 / 1073741824);
CREATE TEMP FUNCTION
getCo2Grid(energy FLOAT64) AS (energy * 475);
CREATE TEMP FUNCTION
getCo2Renewable(energy FLOAT64) AS (energy * 0.1008 * 33.4 + energy * 0.8992 * 475);
CREATE TEMP FUNCTION
CO2(url STRING, bytes INT64) AS (
IF
(GREEN(url),
getCo2Renewable(energyConsumption(adjustDataTransfer(bytes))),
getCo2Grid(energyConsumption(adjustDataTransfer(bytes)))));

SELECT
percentile,
client,
APPROX_QUANTILES(requests, 1000)[OFFSET(percentile * 10)] AS requests,
ROUND(APPROX_QUANTILES(bytes, 1000)[OFFSET(percentile * 10)] / 1024 / 1024, 2) AS mbytes,
APPROX_QUANTILES(co2grams, 1000)[OFFSET(percentile * 10)] AS co2grams
FROM (
SELECT
_TABLE_SUFFIX AS client,
reqTotal AS requests,
bytesTotal AS bytes,
CO2(url, bytesTotal) AS co2grams
FROM
`httparchive.summary_pages.2020_08_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
url
FROM
`httparchive.technologies.2020_08_01_*`
WHERE
category = 'CMS')
USING
(_TABLE_SUFFIX, url)),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
percentile,
client
ORDER BY
percentile,
client
22 changes: 22 additions & 0 deletions sql/2020/15_CMS/image_bytes_on_cmss.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#standardSQL
# Image bytes on CMSs
SELECT
percentile,
_TABLE_SUFFIX AS client,
APPROX_QUANTILES(reqImg, 1000)[OFFSET(percentile * 10)] AS image_count,
APPROX_QUANTILES(bytesImg, 1000)[OFFSET(percentile * 10)] / 1024 AS image_kbytes
FROM
`httparchive.summary_pages.2020_08_01_*`
JOIN
`httparchive.technologies.2020_08_01_*`
USING
(_TABLE_SUFFIX, url),
UNNEST([10, 25, 50, 75, 90]) AS percentile
WHERE
category = 'CMS'
GROUP BY
percentile,
client
ORDER BY
percentile,
client
21 changes: 21 additions & 0 deletions sql/2020/15_CMS/median_lighthouse_score.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#standardSQL
# Lighthouse category scores per CMS
SELECT
app AS cms,
COUNT(DISTINCT url) AS freq,
# See https://github.com/HTTPArchive/almanac.httparchive.org/pull/1087#issuecomment-684983795
#APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(report, '$.categories.performance.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_performance,
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(report, '$.categories.accessibility.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_accessibility,
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(report, '$.categories.pwa.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_pwa
FROM
`httparchive.lighthouse.2020_08_01_mobile`
JOIN
`httparchive.technologies.2020_08_01_mobile`
USING
(url)
WHERE
category = 'CMS'
GROUP BY
cms
ORDER BY
freq DESC
Loading