From f292777112e1b08265993f343134b186fbc52632 Mon Sep 17 00:00:00 2001 From: Greg Brimble Date: Mon, 27 Jul 2020 12:26:18 +0100 Subject: [PATCH] WIP: Start on the 2020 CMS Queries --- .../adoption_of_image_formats_in_cmss.sql | 28 +++++++++ .../15_CMS/cms_adoption_compared_to_2019.sql | 52 ++++++++++++++++ ...f_cms_page_kilobytes_per_resource_type.sql | 44 +++++++++++++ ...equests_and_co2_grams_per_cms_web_page.sql | 62 +++++++++++++++++++ sql/2020/15_CMS/image_bytes_on_cmss.sql | 27 ++++++++ ...third_party_bytes_and_requests_on_cmss.sql | 47 ++++++++++++++ .../top_cms_platforms_compared_to_2019.sql | 56 +++++++++++++++++ 7 files changed, 316 insertions(+) create mode 100644 sql/2020/15_CMS/adoption_of_image_formats_in_cmss.sql create mode 100644 sql/2020/15_CMS/cms_adoption_compared_to_2019.sql create mode 100644 sql/2020/15_CMS/distribution_of_cms_page_kilobytes_per_resource_type.sql create mode 100644 sql/2020/15_CMS/distribution_of_page_weight_requests_and_co2_grams_per_cms_web_page.sql create mode 100644 sql/2020/15_CMS/image_bytes_on_cmss.sql create mode 100644 sql/2020/15_CMS/third_party_bytes_and_requests_on_cmss.sql create mode 100644 sql/2020/15_CMS/top_cms_platforms_compared_to_2019.sql diff --git a/sql/2020/15_CMS/adoption_of_image_formats_in_cmss.sql b/sql/2020/15_CMS/adoption_of_image_formats_in_cmss.sql new file mode 100644 index 00000000000..dea5e957c5c --- /dev/null +++ b/sql/2020/15_CMS/adoption_of_image_formats_in_cmss.sql @@ -0,0 +1,28 @@ +#standardSQL + # Adoption of image formats in CMSs +SELECT + client, + format, + COUNT(0) AS freq, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY client), 2) AS pct +FROM + `httparchive.almanac.summary_requests` +JOIN ( + SELECT + _TABLE_SUFFIX AS client, + url AS page + FROM + `httparchive.technologies.2020_07_01_*` + WHERE + category = 'CMS') +USING + (client, + page) +WHERE + type = 'image' +GROUP BY + client, + format +ORDER BY + freq / total DESC \ No newline at end of file diff --git a/sql/2020/15_CMS/cms_adoption_compared_to_2019.sql b/sql/2020/15_CMS/cms_adoption_compared_to_2019.sql new file mode 100644 index 00000000000..40327eed9ef --- /dev/null +++ b/sql/2020/15_CMS/cms_adoption_compared_to_2019.sql @@ -0,0 +1,52 @@ +#standardSQL + # CMS adoptions, compared to 2019 +SELECT + _TABLE_SUFFIX AS client, + 2020 AS year, + COUNT(0) AS freq, + total, + ROUND(COUNT(0) * 100 / total, 2) AS pct +FROM + `httparchive.technologies.2020_07_01_*` +JOIN ( + SELECT + _TABLE_SUFFIX, + COUNT(0) AS total + FROM + `httparchive.summary_pages.2020_07_01_*` + GROUP BY + _TABLE_SUFFIX) +USING + (_TABLE_SUFFIX) +WHERE + category = 'CMS' +GROUP BY + client, + total +UNION ALL +SELECT + _TABLE_SUFFIX AS client, + 2019 AS year, + COUNT(0) AS freq, + total, + ROUND(COUNT(0) * 100 / total, 2) AS pct +FROM + `httparchive.technologies.2019_07_01_*` +JOIN ( + SELECT + _TABLE_SUFFIX, + COUNT(0) AS total + FROM + `httparchive.summary_pages.2019_07_01_*` + GROUP BY + _TABLE_SUFFIX) +USING + (_TABLE_SUFFIX) +WHERE + category = 'CMS' +GROUP BY + client, + total +ORDER BY + year DESC, + freq / total DESC \ No newline at end of file diff --git a/sql/2020/15_CMS/distribution_of_cms_page_kilobytes_per_resource_type.sql b/sql/2020/15_CMS/distribution_of_cms_page_kilobytes_per_resource_type.sql new file mode 100644 index 00000000000..95a6cfe3ecd --- /dev/null +++ b/sql/2020/15_CMS/distribution_of_cms_page_kilobytes_per_resource_type.sql @@ -0,0 +1,44 @@ +#standardSQL + # Distribution of CMS page kilobytes per resource type +SELECT + percentile, + client, + type, + APPROX_QUANTILES(requests, 1000)[ +OFFSET + (percentile * 10)] AS requests, + ROUND(APPROX_QUANTILES(bytes, 1000)[ + OFFSET + (percentile * 10)] / 1024, 2) AS kbytes +FROM ( + SELECT + client, + type, + COUNT(0) AS requests, + SUM(respSize) AS bytes + FROM + `httparchive.almanac.summary_requests` + JOIN ( + SELECT + _TABLE_SUFFIX AS client, + url AS page + FROM + `httparchive.technologies.2020_07_01_*` + WHERE + category = 'CMS') + USING + (client, + page) + GROUP BY + client, + type, + page), + UNNEST([10, 25, 50, 75, 90]) AS percentile +GROUP BY + percentile, + client, + type +ORDER BY + percentile, + client, + kbytes DESC \ No newline at end of file diff --git a/sql/2020/15_CMS/distribution_of_page_weight_requests_and_co2_grams_per_cms_web_page.sql b/sql/2020/15_CMS/distribution_of_page_weight_requests_and_co2_grams_per_cms_web_page.sql new file mode 100644 index 00000000000..93e38a1c361 --- /dev/null +++ b/sql/2020/15_CMS/distribution_of_page_weight_requests_and_co2_grams_per_cms_web_page.sql @@ -0,0 +1,62 @@ +#standardSQL + # Distribution of page weight, requests, and co2 grams per CMS web page + -- https://gitlab.com/wholegrain/carbon-api-2-0/-/blob/b498ec3bb239536d3612c5f3d758f46e0d2431a6/includes/carbonapi.php +CREATE TEMP FUNCTION + GREEN(url STRING) AS (FALSE); -- TODO: Investigate fetching from Green Web Foundation +CREATE TEMP FUNCTION + adjustDataTransfer(val INT64) AS (val * 0.75 + 0.02 * val * 0.25); +CREATE TEMP FUNCTION + energyConsumption(bytes FLOAT64) AS (bytes * 1.805 / 1073741824); +CREATE TEMP FUNCTION + getCo2Grid(energy FLOAT64) AS (energy * 475); +CREATE TEMP FUNCTION + getCo2Renewable(energy FLOAT64) AS (energy * 0.1008 * 33.4 + energy * 0.8992 * 475); +CREATE TEMP FUNCTION + CO2(url STRING, + bytes INT64) AS ( + IF + (GREEN(url), + getCo2Renewable(energyConsumption(adjustDataTransfer(bytes))), + getCo2Grid(energyConsumption(adjustDataTransfer(bytes))))); +SELECT + percentile, + client, + APPROX_QUANTILES(requests, 1000)[ +OFFSET + (percentile * 10)] AS requests, + ROUND(APPROX_QUANTILES(bytes, 1000)[ + OFFSET + (percentile * 10)] / 1024 / 1024, 2) AS mbytes, + APPROX_QUANTILES(co2grams, 1000)[ +OFFSET + (percentile * 10)] AS co2grams +FROM ( + SELECT + client, + COUNT(0) AS requests, + SUM(respSize) AS bytes, + CO2(page, + SUM(respSize)) AS co2grams + FROM + `httparchive.almanac.summary_requests` + JOIN ( + SELECT + _TABLE_SUFFIX AS client, + url AS page + FROM + `httparchive.technologies.2020_07_01_*` + WHERE + category = 'CMS') + USING + (client, + page) + GROUP BY + client, + page), + UNNEST([10, 25, 50, 75, 90]) AS percentile +GROUP BY + percentile, + client +ORDER BY + percentile, + client; \ No newline at end of file diff --git a/sql/2020/15_CMS/image_bytes_on_cmss.sql b/sql/2020/15_CMS/image_bytes_on_cmss.sql new file mode 100644 index 00000000000..ad2eafc33be --- /dev/null +++ b/sql/2020/15_CMS/image_bytes_on_cmss.sql @@ -0,0 +1,27 @@ +#standardSQL + # Image bytes on CMSs +SELECT + percentile, + _TABLE_SUFFIX AS client, + APPROX_QUANTILES(reqImg, 1000)[ +OFFSET + (percentile * 10)] AS image_count, + ROUND(APPROX_QUANTILES(bytesImg, 1000)[ + OFFSET + (percentile * 10)] / 1024, 2) AS image_kbytes +FROM + `httparchive.summary_pages.2020_07_01_*` +JOIN + `httparchive.technologies.2020_07_01_*` +USING + (_TABLE_SUFFIX, + url), + UNNEST([10, 25, 50, 75, 90]) AS percentile +WHERE + category = 'CMS' +GROUP BY + percentile, + client +ORDER BY + percentile, + client \ No newline at end of file diff --git a/sql/2020/15_CMS/third_party_bytes_and_requests_on_cmss.sql b/sql/2020/15_CMS/third_party_bytes_and_requests_on_cmss.sql new file mode 100644 index 00000000000..6db1ffd5162 --- /dev/null +++ b/sql/2020/15_CMS/third_party_bytes_and_requests_on_cmss.sql @@ -0,0 +1,47 @@ +#standardSQL + # Third party bytes and requests on CMSs +SELECT + percentile, + client, + APPROX_QUANTILES(requests, 1000)[ +OFFSET + (percentile * 10)] AS requests, + ROUND(APPROX_QUANTILES(bytes, 1000)[ + OFFSET + (percentile * 10)] / 1024, 2) AS kbytes +FROM ( + SELECT + client, + COUNT(0) AS requests, + SUM(respSize) AS bytes + FROM + `httparchive.almanac.summary_requests` + JOIN ( + SELECT + _TABLE_SUFFIX AS client, + url AS page + FROM + `httparchive.technologies.2020_07_01_*` + WHERE + category = 'CMS') + USING + (client, + page) + WHERE + NET.HOST(url) IN ( + SELECT + domain + FROM + `httparchive.almanac.third_parties` + WHERE + category != 'hosting') + GROUP BY + client, + page), + UNNEST([10, 25, 50, 75, 90]) AS percentile +GROUP BY + percentile, + client +ORDER BY + percentile, + client \ No newline at end of file diff --git a/sql/2020/15_CMS/top_cms_platforms_compared_to_2019.sql b/sql/2020/15_CMS/top_cms_platforms_compared_to_2019.sql new file mode 100644 index 00000000000..13f21c1d825 --- /dev/null +++ b/sql/2020/15_CMS/top_cms_platforms_compared_to_2019.sql @@ -0,0 +1,56 @@ +#standardSQL + # Top CMS platforms, compared to 2019 +SELECT + _TABLE_SUFFIX AS client, + 2020 AS year, + app AS cms, + COUNT(0) AS freq, + total, + ROUND(COUNT(0) * 100 / total, 2) AS pct +FROM + `httparchive.technologies.2020_07_01_*` +JOIN ( + SELECT + _TABLE_SUFFIX, + COUNT(0) AS total + FROM + `httparchive.summary_pages.2020_07_01_*` + GROUP BY + _TABLE_SUFFIX) +USING + (_TABLE_SUFFIX) +WHERE + category = 'CMS' +GROUP BY + client, + total, + cms +UNION ALL +SELECT + _TABLE_SUFFIX AS client, + 2019 AS year, + app AS cms, + COUNT(0) AS freq, + total, + ROUND(COUNT(0) * 100 / total, 2) AS pct +FROM + `httparchive.technologies.2019_07_01_*` +JOIN ( + SELECT + _TABLE_SUFFIX, + COUNT(0) AS total + FROM + `httparchive.summary_pages.2019_07_01_*` + GROUP BY + _TABLE_SUFFIX) +USING + (_TABLE_SUFFIX) +WHERE + category = 'CMS' +GROUP BY + client, + total, + cms +ORDER BY + year DESC, + freq DESC \ No newline at end of file