Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sustainability 2024: Queries #3736

Merged
merged 34 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
ff2cac1
Update unminified_css bytes query
Falafelqueen Aug 17, 2024
c570acc
Update unused_css_bytes query
Falafelqueen Aug 17, 2024
bf9bde7
Update unused_js_bytes query
Falafelqueen Aug 17, 2024
21473a9
Update unminified_js__bytes query
Falafelqueen Aug 17, 2024
2c1677e
Update cache_header_usage query
Falafelqueen Aug 18, 2024
0ba1225
Update cdn_adoption query
Falafelqueen Aug 18, 2024
2cdc682
Update cms_bytes_per_type query
Falafelqueen Aug 18, 2024
8c0cc7e
Update ssg_bytes_per_type query
Falafelqueen Aug 18, 2024
66a153c
Update ecommerce_bytes_per_type query
Falafelqueen Aug 18, 2024
4296add
Add use of prefers_dark_mode query
Falafelqueen Aug 18, 2024
19ebd9c
Update script usage query
Falafelqueen Aug 18, 2024
32478dd
Update stylesheet_count query
Falafelqueen Aug 18, 2024
2972c97
Updated for new CO2 calculation
Falafelqueen Sep 2, 2024
a1cb58f
Update ecommerce with new co2 calc
Falafelqueen Sep 8, 2024
371d046
Update Cms with new co2 calc
Falafelqueen Sep 8, 2024
ea5e04a
Add green hosting query
Falafelqueen Sep 8, 2024
caecf09
Create favicons.sql
mgifford Sep 10, 2024
f63da72
Add green third party query
Falafelqueen Sep 14, 2024
5bbbe20
Is root page updates
Falafelqueen Sep 15, 2024
a0f84d8
Update stylesheet count query with root_page filter
Falafelqueen Sep 15, 2024
173c472
Filter root page for comparability
Falafelqueen Sep 15, 2024
8589bb6
Merge branch 'sustainability-sql-2024' of github.com:Falafelqueen/alm…
Falafelqueen Sep 15, 2024
0385404
Create query_run_size.sql
mgifford Oct 11, 2024
6a7e526
Update query_run_size.sql
mgifford Oct 11, 2024
499b26a
Create global_emissions_per_page.sql, page_byte_pre_type.sql, respons…
burakguneli Oct 23, 2024
46132e4
Fix linter issues for recently added SQL queries
burakguneli Oct 23, 2024
74afb2c
run sqlfluff fix
burakguneli Oct 23, 2024
f3f7371
add video_autoplay_values.sql, video_preload_values.sql
burakguneli Oct 24, 2024
f3902b9
fix linter errors
burakguneli Oct 24, 2024
132a394
Merge branch 'sustainability-sql-2024' of github.com:Falafelqueen/alm…
Falafelqueen Sep 15, 2024
22fda76
Merge branch 'sustainability-sql-2024' of github.com:Falafelqueen/alm…
Falafelqueen Oct 24, 2024
6ebbe6e
Remove Tablesample mistake
Falafelqueen Oct 24, 2024
8831ce2
add 2022 queries
lebreRafael Oct 25, 2024
5645d8e
Apply suggestions from code review
tunetheweb Oct 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions sql/2024/sustainability/cache_header_usage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#standardSQL
# The distribution of cache header adoption on websites by client.

SELECT
client,
COUNT(0) AS total_requests,

COUNTIF(uses_cache_control) AS total_using_cache_control,
COUNTIF(uses_max_age) AS total_using_max_age,
COUNTIF(uses_expires) AS total_using_expires,
COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires,
COUNTIF(uses_cache_control AND uses_expires) AS total_using_both_cc_and_expires,
COUNTIF(NOT uses_cache_control AND NOT uses_expires) AS total_using_neither_cc_and_expires,
COUNTIF(uses_cache_control AND NOT uses_expires) AS total_using_only_cache_control,
COUNTIF(NOT uses_cache_control AND uses_expires) AS total_using_only_expires,

COUNTIF(uses_cache_control) / COUNT(0) AS pct_cache_control,
COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age,
COUNTIF(uses_expires) / COUNT(0) AS pct_using_expires,
COUNTIF(uses_max_age AND uses_expires) / COUNT(0) AS pct_using_max_age_and_expires,
COUNTIF(uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_both_cc_and_expires,
COUNTIF(NOT uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_neither_cc_nor_expires,
COUNTIF(uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_only_cache_control,
COUNTIF(NOT uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_only_expires

FROM (
SELECT
client,

JSON_EXTRACT_SCALAR(summary, '$.resp_expires') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_expires')) != '' AS uses_expires,
JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control')) != '' AS uses_cache_control,
REGEXP_CONTAINS(JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control'), r'(?i)max-age\s*=\s*[0-9]+') AS uses_max_age,

JSON_EXTRACT_SCALAR(summary, '$.resp_etag') IS NULL OR TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')) = '' AS uses_no_etag,
JSON_EXTRACT_SCALAR(summary, '$.resp_etag') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')) != '' AS uses_etag,
JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified')) != '' AS uses_last_modified,

REGEXP_CONTAINS(TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^W/".*"') AS uses_weak_etag,
REGEXP_CONTAINS(TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^".*"') AS uses_strong_etag

FROM
`httparchive.all.requests`
WHERE
date = '2024-06-01'
)

GROUP BY
client
ORDER BY
client;
31 changes: 31 additions & 0 deletions sql/2024/sustainability/cdn_adoption.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#standardSQL
# The distribution of CDN adoption on websites by client.

SELECT
client,
IF(cdn = '', 'No CDN', cdn) AS cdn,
COUNT(0) AS freq,
total,
COUNT(0) / total AS pct
FROM (
SELECT
client,
COUNT(0) AS total,
ARRAY_CONCAT_AGG(SPLIT(JSON_EXTRACT_SCALAR(summary, '$.cdn'), ', ')) AS cdn_list
FROM
`httparchive.all.pages`
WHERE
date = '2024-06-01' AND
is_root_page = TRUE
GROUP BY
client
),
UNNEST(cdn_list) AS cdn
GROUP BY
client,
cdn,
total
ORDER BY
pct DESC,
client,
cdn;
162 changes: 162 additions & 0 deletions sql/2024/sustainability/cms_bytes_per_type.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#standardSQL
# Median resource weights by CMS

# Declare variables to calculate the carbon emissions of one byte
# Source: https://sustainablewebdesign.org/calculating-digital-emissions/
# The implementation below does not make the assumptions about returning visitors or caching that are present in the Sustainable Web Design model.

DECLARE grid_intensity NUMERIC DEFAULT 494;
DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012;
DECLARE embodied_emissions_network NUMERIC DEFAULT 0.013;
DECLARE embodied_emissions_user_devices NUMERIC DEFAULT 0.081;
DECLARE operational_emissions_data_centers NUMERIC DEFAULT 0.055;
DECLARE operational_emissions_network NUMERIC DEFAULT 0.059;
DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080;

WITH cms_data AS (
SELECT
client,
page,
tech.technology AS cms,
CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb,

-- Operational emissions calculations
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc,
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks,
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices,

-- Embodied emissions calculations
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc,
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks,
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices,

-- Total emissions (operational + embodied)
(
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity +
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity +
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity
) AS total_operational_emissions,

(
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity +
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity +
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity
) AS total_embodied_emissions,

(
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity +
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity +
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity +
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity +
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity +
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity
) AS total_emissions,

-- Proportions of each resource type relative to total bytes
CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion,
CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion,
CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion,
CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion,
CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion,

-- Resource-specific emissions calculations
(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * (
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * (
operational_emissions_data_centers * grid_intensity +
operational_emissions_network * grid_intensity +
operational_emissions_user_devices * grid_intensity +
embodied_emissions_data_centers * grid_intensity +
embodied_emissions_network * grid_intensity +
embodied_emissions_user_devices * grid_intensity
)
)) AS total_html_emissions,

(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * (
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * (
operational_emissions_data_centers * grid_intensity +
operational_emissions_network * grid_intensity +
operational_emissions_user_devices * grid_intensity +
embodied_emissions_data_centers * grid_intensity +
embodied_emissions_network * grid_intensity +
embodied_emissions_user_devices * grid_intensity
)
)) AS total_js_emissions,

(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * (
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * (
operational_emissions_data_centers * grid_intensity +
operational_emissions_network * grid_intensity +
operational_emissions_user_devices * grid_intensity +
embodied_emissions_data_centers * grid_intensity +
embodied_emissions_network * grid_intensity +
embodied_emissions_user_devices * grid_intensity
)
)) AS total_css_emissions,

(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * (
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * (
operational_emissions_data_centers * grid_intensity +
operational_emissions_network * grid_intensity +
operational_emissions_user_devices * grid_intensity +
embodied_emissions_data_centers * grid_intensity +
embodied_emissions_network * grid_intensity +
embodied_emissions_user_devices * grid_intensity
)
)) AS total_img_emissions,

(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * (
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * (
operational_emissions_data_centers * grid_intensity +
operational_emissions_network * grid_intensity +
operational_emissions_user_devices * grid_intensity +
embodied_emissions_data_centers * grid_intensity +
embodied_emissions_network * grid_intensity +
embodied_emissions_user_devices * grid_intensity
)
)) AS total_font_emissions,

-- Resource-specific size in KB
CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb,
CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb,
CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb,
CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb,
CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb
FROM
`httparchive.all.pages`,
UNNEST(technologies) AS tech
WHERE
date = '2024-06-01' AND
is_root_page = TRUE AND
'CMS' IN UNNEST(tech.categories)
)

SELECT
client,
cms,
COUNT(0) AS pages,
-- Median resource weights and emissions
APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb,
APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions,
APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions,
APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions,

-- Resource-specific medians
APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb,
APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions,
APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb,
APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions,
APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb,
APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions,
APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb,
APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions,
APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb,
APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions
FROM
cms_data
GROUP BY
client,
cms
ORDER BY
pages DESC,
cms,
client;
Loading