Skip to content

Commit

Permalink
WIP: Start on the 2020 CMS Queries
Browse files Browse the repository at this point in the history
  • Loading branch information
GregBrimble committed Jul 27, 2020
1 parent daf55e8 commit f292777
Show file tree
Hide file tree
Showing 7 changed files with 316 additions and 0 deletions.
28 changes: 28 additions & 0 deletions sql/2020/15_CMS/adoption_of_image_formats_in_cmss.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#standardSQL
# Adoption of image formats in CMSs
SELECT
client,
format,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY client), 2) AS pct
FROM
`httparchive.almanac.summary_requests`
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page
FROM
`httparchive.technologies.2020_07_01_*`
WHERE
category = 'CMS')
USING
(client,
page)
WHERE
type = 'image'
GROUP BY
client,
format
ORDER BY
freq / total DESC
52 changes: 52 additions & 0 deletions sql/2020/15_CMS/cms_adoption_compared_to_2019.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#standardSQL
# CMS adoptions, compared to 2019
SELECT
_TABLE_SUFFIX AS client,
2020 AS year,
COUNT(0) AS freq,
total,
ROUND(COUNT(0) * 100 / total, 2) AS pct
FROM
`httparchive.technologies.2020_07_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2020_07_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(_TABLE_SUFFIX)
WHERE
category = 'CMS'
GROUP BY
client,
total
UNION ALL
SELECT
_TABLE_SUFFIX AS client,
2019 AS year,
COUNT(0) AS freq,
total,
ROUND(COUNT(0) * 100 / total, 2) AS pct
FROM
`httparchive.technologies.2019_07_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2019_07_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(_TABLE_SUFFIX)
WHERE
category = 'CMS'
GROUP BY
client,
total
ORDER BY
year DESC,
freq / total DESC
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#standardSQL
# Distribution of CMS page kilobytes per resource type
SELECT
percentile,
client,
type,
APPROX_QUANTILES(requests, 1000)[
OFFSET
(percentile * 10)] AS requests,
ROUND(APPROX_QUANTILES(bytes, 1000)[
OFFSET
(percentile * 10)] / 1024, 2) AS kbytes
FROM (
SELECT
client,
type,
COUNT(0) AS requests,
SUM(respSize) AS bytes
FROM
`httparchive.almanac.summary_requests`
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page
FROM
`httparchive.technologies.2020_07_01_*`
WHERE
category = 'CMS')
USING
(client,
page)
GROUP BY
client,
type,
page),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
percentile,
client,
type
ORDER BY
percentile,
client,
kbytes DESC
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#standardSQL
# Distribution of page weight, requests, and co2 grams per CMS web page
-- https://gitlab.com/wholegrain/carbon-api-2-0/-/blob/b498ec3bb239536d3612c5f3d758f46e0d2431a6/includes/carbonapi.php
CREATE TEMP FUNCTION
GREEN(url STRING) AS (FALSE); -- TODO: Investigate fetching from Green Web Foundation
CREATE TEMP FUNCTION
adjustDataTransfer(val INT64) AS (val * 0.75 + 0.02 * val * 0.25);
CREATE TEMP FUNCTION
energyConsumption(bytes FLOAT64) AS (bytes * 1.805 / 1073741824);
CREATE TEMP FUNCTION
getCo2Grid(energy FLOAT64) AS (energy * 475);
CREATE TEMP FUNCTION
getCo2Renewable(energy FLOAT64) AS (energy * 0.1008 * 33.4 + energy * 0.8992 * 475);
CREATE TEMP FUNCTION
CO2(url STRING,
bytes INT64) AS (
IF
(GREEN(url),
getCo2Renewable(energyConsumption(adjustDataTransfer(bytes))),
getCo2Grid(energyConsumption(adjustDataTransfer(bytes)))));
SELECT
percentile,
client,
APPROX_QUANTILES(requests, 1000)[
OFFSET
(percentile * 10)] AS requests,
ROUND(APPROX_QUANTILES(bytes, 1000)[
OFFSET
(percentile * 10)] / 1024 / 1024, 2) AS mbytes,
APPROX_QUANTILES(co2grams, 1000)[
OFFSET
(percentile * 10)] AS co2grams
FROM (
SELECT
client,
COUNT(0) AS requests,
SUM(respSize) AS bytes,
CO2(page,
SUM(respSize)) AS co2grams
FROM
`httparchive.almanac.summary_requests`
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page
FROM
`httparchive.technologies.2020_07_01_*`
WHERE
category = 'CMS')
USING
(client,
page)
GROUP BY
client,
page),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
percentile,
client
ORDER BY
percentile,
client;
27 changes: 27 additions & 0 deletions sql/2020/15_CMS/image_bytes_on_cmss.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#standardSQL
# Image bytes on CMSs
SELECT
percentile,
_TABLE_SUFFIX AS client,
APPROX_QUANTILES(reqImg, 1000)[
OFFSET
(percentile * 10)] AS image_count,
ROUND(APPROX_QUANTILES(bytesImg, 1000)[
OFFSET
(percentile * 10)] / 1024, 2) AS image_kbytes
FROM
`httparchive.summary_pages.2020_07_01_*`
JOIN
`httparchive.technologies.2020_07_01_*`
USING
(_TABLE_SUFFIX,
url),
UNNEST([10, 25, 50, 75, 90]) AS percentile
WHERE
category = 'CMS'
GROUP BY
percentile,
client
ORDER BY
percentile,
client
47 changes: 47 additions & 0 deletions sql/2020/15_CMS/third_party_bytes_and_requests_on_cmss.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#standardSQL
# Third party bytes and requests on CMSs
SELECT
percentile,
client,
APPROX_QUANTILES(requests, 1000)[
OFFSET
(percentile * 10)] AS requests,
ROUND(APPROX_QUANTILES(bytes, 1000)[
OFFSET
(percentile * 10)] / 1024, 2) AS kbytes
FROM (
SELECT
client,
COUNT(0) AS requests,
SUM(respSize) AS bytes
FROM
`httparchive.almanac.summary_requests`
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page
FROM
`httparchive.technologies.2020_07_01_*`
WHERE
category = 'CMS')
USING
(client,
page)
WHERE
NET.HOST(url) IN (
SELECT
domain
FROM
`httparchive.almanac.third_parties`
WHERE
category != 'hosting')
GROUP BY
client,
page),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
percentile,
client
ORDER BY
percentile,
client
56 changes: 56 additions & 0 deletions sql/2020/15_CMS/top_cms_platforms_compared_to_2019.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#standardSQL
# Top CMS platforms, compared to 2019
SELECT
_TABLE_SUFFIX AS client,
2020 AS year,
app AS cms,
COUNT(0) AS freq,
total,
ROUND(COUNT(0) * 100 / total, 2) AS pct
FROM
`httparchive.technologies.2020_07_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2020_07_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(_TABLE_SUFFIX)
WHERE
category = 'CMS'
GROUP BY
client,
total,
cms
UNION ALL
SELECT
_TABLE_SUFFIX AS client,
2019 AS year,
app AS cms,
COUNT(0) AS freq,
total,
ROUND(COUNT(0) * 100 / total, 2) AS pct
FROM
`httparchive.technologies.2019_07_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2019_07_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(_TABLE_SUFFIX)
WHERE
category = 'CMS'
GROUP BY
client,
total,
cms
ORDER BY
year DESC,
freq DESC

0 comments on commit f292777

Please sign in to comment.