-
-
Notifications
You must be signed in to change notification settings - Fork 181
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Sustainability 2024: Queries (#3736)
* Update unminified_css bytes query * Update unused_css_bytes query * Update unused_js_bytes query * Update unminified_js__bytes query * Update cache_header_usage query * Update cdn_adoption query * Update cms_bytes_per_type query * Update ssg_bytes_per_type query * Update ecommerce_bytes_per_type query * Add use of prefers_dark_mode query * Update script usage query * Update stylesheet_count query * Updated for new CO2 calculation * Update ecommerce with new co2 calc * Update Cms with new co2 calc * Add green hosting query * Create favicons.sql Adding based on Laurent Devernay comment in Slack. * Add green third party query * Is root page updates * Update stylesheet count query with root_page filter * Filter root page for comparability * Create query_run_size.sql Adding in a query to track the size of the query. * Update query_run_size.sql Updating the docs * Create global_emissions_per_page.sql, page_byte_pre_type.sql, responsive_images.sql, text_compression.sql * Fix linter issues for recently added SQL queries * run sqlfluff fix * add video_autoplay_values.sql, video_preload_values.sql * fix linter errors * Remove Tablesample mistake * add 2022 queries * Apply suggestions from code review --------- Co-authored-by: Mike Gifford <[email protected]> Co-authored-by: Burak Güneli <[email protected]> Co-authored-by: Rafael Bonalume Lebre <[email protected]> Co-authored-by: Barry Pollard <[email protected]>
- Loading branch information
1 parent
324d22b
commit 6f4be9c
Showing
26 changed files
with
1,942 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#standardSQL | ||
# The distribution of cache header adoption on websites by client. | ||
|
||
SELECT | ||
client, | ||
COUNT(0) AS total_requests, | ||
|
||
COUNTIF(uses_cache_control) AS total_using_cache_control, | ||
COUNTIF(uses_max_age) AS total_using_max_age, | ||
COUNTIF(uses_expires) AS total_using_expires, | ||
COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires, | ||
COUNTIF(uses_cache_control AND uses_expires) AS total_using_both_cc_and_expires, | ||
COUNTIF(NOT uses_cache_control AND NOT uses_expires) AS total_using_neither_cc_and_expires, | ||
COUNTIF(uses_cache_control AND NOT uses_expires) AS total_using_only_cache_control, | ||
COUNTIF(NOT uses_cache_control AND uses_expires) AS total_using_only_expires, | ||
|
||
COUNTIF(uses_cache_control) / COUNT(0) AS pct_cache_control, | ||
COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age, | ||
COUNTIF(uses_expires) / COUNT(0) AS pct_using_expires, | ||
COUNTIF(uses_max_age AND uses_expires) / COUNT(0) AS pct_using_max_age_and_expires, | ||
COUNTIF(uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_both_cc_and_expires, | ||
COUNTIF(NOT uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_neither_cc_nor_expires, | ||
COUNTIF(uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_only_cache_control, | ||
COUNTIF(NOT uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_only_expires | ||
|
||
FROM ( | ||
SELECT | ||
client, | ||
|
||
JSON_EXTRACT_SCALAR(summary, '$.resp_expires') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_expires')) != '' AS uses_expires, | ||
JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control')) != '' AS uses_cache_control, | ||
REGEXP_CONTAINS(JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control'), r'(?i)max-age\s*=\s*[0-9]+') AS uses_max_age, | ||
|
||
JSON_EXTRACT_SCALAR(summary, '$.resp_etag') IS NULL OR TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')) = '' AS uses_no_etag, | ||
JSON_EXTRACT_SCALAR(summary, '$.resp_etag') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')) != '' AS uses_etag, | ||
JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified')) != '' AS uses_last_modified, | ||
|
||
REGEXP_CONTAINS(TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^W/".*"') AS uses_weak_etag, | ||
REGEXP_CONTAINS(TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^".*"') AS uses_strong_etag | ||
|
||
FROM | ||
`httparchive.all.requests` | ||
WHERE | ||
date = '2024-06-01' | ||
) | ||
|
||
GROUP BY | ||
client | ||
ORDER BY | ||
client; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#standardSQL | ||
# The distribution of CDN adoption on websites by client. | ||
|
||
SELECT | ||
client, | ||
IF(cdn = '', 'No CDN', cdn) AS cdn, | ||
COUNT(0) AS freq, | ||
total, | ||
COUNT(0) / total AS pct | ||
FROM ( | ||
SELECT | ||
client, | ||
COUNT(0) AS total, | ||
ARRAY_CONCAT_AGG(SPLIT(JSON_EXTRACT_SCALAR(summary, '$.cdn'), ', ')) AS cdn_list | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' AND | ||
is_root_page = TRUE | ||
GROUP BY | ||
client | ||
), | ||
UNNEST(cdn_list) AS cdn | ||
GROUP BY | ||
client, | ||
cdn, | ||
total | ||
ORDER BY | ||
pct DESC, | ||
client, | ||
cdn; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
#standardSQL | ||
# Median resource weights by CMS | ||
|
||
# Declare variables to calculate the carbon emissions of one byte | ||
# Source: https://sustainablewebdesign.org/calculating-digital-emissions/ | ||
# The implementation below does not make the assumptions about returning visitors or caching that are present in the Sustainable Web Design model. | ||
|
||
DECLARE grid_intensity NUMERIC DEFAULT 494; | ||
DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012; | ||
DECLARE embodied_emissions_network NUMERIC DEFAULT 0.013; | ||
DECLARE embodied_emissions_user_devices NUMERIC DEFAULT 0.081; | ||
DECLARE operational_emissions_data_centers NUMERIC DEFAULT 0.055; | ||
DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; | ||
DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; | ||
|
||
WITH cms_data AS ( | ||
SELECT | ||
client, | ||
page, | ||
tech.technology AS cms, | ||
CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, | ||
|
||
-- Operational emissions calculations | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, | ||
|
||
-- Embodied emissions calculations | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, | ||
|
||
-- Total emissions (operational + embodied) | ||
( | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity | ||
) AS total_operational_emissions, | ||
|
||
( | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity | ||
) AS total_embodied_emissions, | ||
|
||
( | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity | ||
) AS total_emissions, | ||
|
||
-- Proportions of each resource type relative to total bytes | ||
CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, | ||
CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, | ||
CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, | ||
CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, | ||
CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, | ||
|
||
-- Resource-specific emissions calculations | ||
(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( | ||
operational_emissions_data_centers * grid_intensity + | ||
operational_emissions_network * grid_intensity + | ||
operational_emissions_user_devices * grid_intensity + | ||
embodied_emissions_data_centers * grid_intensity + | ||
embodied_emissions_network * grid_intensity + | ||
embodied_emissions_user_devices * grid_intensity | ||
) | ||
)) AS total_html_emissions, | ||
|
||
(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( | ||
operational_emissions_data_centers * grid_intensity + | ||
operational_emissions_network * grid_intensity + | ||
operational_emissions_user_devices * grid_intensity + | ||
embodied_emissions_data_centers * grid_intensity + | ||
embodied_emissions_network * grid_intensity + | ||
embodied_emissions_user_devices * grid_intensity | ||
) | ||
)) AS total_js_emissions, | ||
|
||
(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( | ||
operational_emissions_data_centers * grid_intensity + | ||
operational_emissions_network * grid_intensity + | ||
operational_emissions_user_devices * grid_intensity + | ||
embodied_emissions_data_centers * grid_intensity + | ||
embodied_emissions_network * grid_intensity + | ||
embodied_emissions_user_devices * grid_intensity | ||
) | ||
)) AS total_css_emissions, | ||
|
||
(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( | ||
operational_emissions_data_centers * grid_intensity + | ||
operational_emissions_network * grid_intensity + | ||
operational_emissions_user_devices * grid_intensity + | ||
embodied_emissions_data_centers * grid_intensity + | ||
embodied_emissions_network * grid_intensity + | ||
embodied_emissions_user_devices * grid_intensity | ||
) | ||
)) AS total_img_emissions, | ||
|
||
(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( | ||
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( | ||
operational_emissions_data_centers * grid_intensity + | ||
operational_emissions_network * grid_intensity + | ||
operational_emissions_user_devices * grid_intensity + | ||
embodied_emissions_data_centers * grid_intensity + | ||
embodied_emissions_network * grid_intensity + | ||
embodied_emissions_user_devices * grid_intensity | ||
) | ||
)) AS total_font_emissions, | ||
|
||
-- Resource-specific size in KB | ||
CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, | ||
CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, | ||
CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, | ||
CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, | ||
CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb | ||
FROM | ||
`httparchive.all.pages`, | ||
UNNEST(technologies) AS tech | ||
WHERE | ||
date = '2024-06-01' AND | ||
is_root_page = TRUE AND | ||
'CMS' IN UNNEST(tech.categories) | ||
) | ||
|
||
SELECT | ||
client, | ||
cms, | ||
COUNT(0) AS pages, | ||
-- Median resource weights and emissions | ||
APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, | ||
APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, | ||
APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, | ||
APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, | ||
|
||
-- Resource-specific medians | ||
APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, | ||
APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, | ||
APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, | ||
APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, | ||
APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, | ||
APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, | ||
APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, | ||
APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, | ||
APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, | ||
APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions | ||
FROM | ||
cms_data | ||
GROUP BY | ||
client, | ||
cms | ||
ORDER BY | ||
pages DESC, | ||
cms, | ||
client; |
Oops, something went wrong.