Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jamstack 2020 queries #1228

Merged
merged 20 commits into from
Oct 12, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions sql/2020/17_JAMstack/cdn_core_web_vitals_distribution.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#standardSQL
# Core Web Vitals distribution by CDN
#
# Note that this is an unweighted average of all sites per SSG.
# Performance of sites with millions of visitors as weighted the same as small sites.
SELECT
client,
CDN,
COUNT(DISTINCT origin) AS origins,
SUM(fast_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS good_lcp,
Copy link
Member

@rviscomi rviscomi Sep 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Throughout) It might not matter but this is simpler and consistent with other CWV queries

Suggested change
SUM(fast_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS good_lcp,
SUM(fast_lcp) / SUM(fast_lcp + avg_lcp + slow_lcp) AS good_lcp,

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment applies throughout the query

SUM(avg_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS ni_lcp,
SUM(slow_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS poor_lcp,

SUM(fast_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS good_fid,
SUM(avg_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS ni_fid,
SUM(slow_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS poor_fid,

SUM(small_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS good_cls,
SUM(medium_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS ni_cls,
SUM(large_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS poor_cls,
FROM
`chrome-ux-report.materialized.device_summary`
JOIN (
SELECT
CASE
WHEN REGEXP_EXTRACT(LOWER(CONCAT(respOtherHeaders, resp_x_powered_by, resp_via, resp_server)), '(x-github-request)') = 'x-github-request' THEN 'GitHub'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Throughout)

Suggested change
WHEN REGEXP_EXTRACT(LOWER(CONCAT(respOtherHeaders, resp_x_powered_by, resp_via, resp_server)), '(x-github-request)') = 'x-github-request' THEN 'GitHub'
WHEN REGEXP_CONTAINS(CONCAT(respOtherHeaders, resp_x_powered_by, resp_via, resp_server), '(?i)x-github-request') THEN 'GitHub'

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment applies throughout the query

WHEN REGEXP_EXTRACT(LOWER(CONCAT(respOtherHeaders, resp_x_powered_by, resp_via, resp_server)), '(netlify)') = 'netlify' THEN 'Netlify'
WHEN _cdn_provider = 'Microsoft Azure' THEN 'Azure'
WHEN _cdn_provider = 'Vercel' THEN 'Vercel'
WHEN _cdn_provider = 'Amazon CloudFront' THEN 'AWS'
WHEN _cdn_provider = 'Akamai' THEN 'Akamai'
WHEN _cdn_provider = 'Cloudflare' THEN 'Cloudflare'
ELSE NULL
END AS CDN,
_TABLE_SUFFIX as client,
url,
app
FROM
`httparchive.summary_requests.2020_08_01_*`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Throughout) Do you also need a WHERE firstHtml clause here so that you're only looking at requests for pages that are served with these headers, not any misc 3P requests?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: can you take a look at the whitespace related to this change? firstHtml is appearing misaligned with the rest of the query.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be aligned

JOIN (
SELECT
_TABLE_SUFFIX,
app,
url
FROM
`httparchive.technologies.2020_08_01_*`
WHERE
LOWER(category) = "static site generator" OR
app = "Next.js"
)
USING (url, _TABLE_SUFFIX)
)
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
WHERE
# The CrUX 202008 dataset is not available until September 8.
date = '2020-08-01' AND
CDN IS NOT NULL
GROUP BY
CDN,
client
ORDER BY
origins DESC
84 changes: 84 additions & 0 deletions sql/2020/17_JAMstack/cdn_core_web_vitals_passing.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#standardSQL
# Core Web Vitals performance by CMS
CREATE TEMP FUNCTION IS_GOOD (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good / (good + needs_improvement + poor) >= 0.75
);

CREATE TEMP FUNCTION IS_NON_ZERO (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good + needs_improvement + poor > 0
);


SELECT
CDN,
client,
COUNT(DISTINCT origin) AS origins,
# Origins with good LCP divided by origins with any LCP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))) AS pct_good_lcp,

# Origins with good FID divided by origins with any FID.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_fid, avg_fid, slow_fid), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL))) AS pct_good_fid,

# Origins with good CLS divided by origins with any CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cls,

# Origins with good LCP, FID, and CLS dividied by origins with any LCP, FID, and CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
IS_GOOD(fast_fid, avg_fid, slow_fid) AND
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AND
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cwv
FROM
`chrome-ux-report.materialized.device_summary`
JOIN (
SELECT
CASE
WHEN REGEXP_EXTRACT(LOWER(CONCAT(respOtherHeaders, resp_x_powered_by, resp_via, resp_server)), '(x-github-request)') = 'x-github-request' THEN 'GitHub'
WHEN REGEXP_EXTRACT(LOWER(CONCAT(respOtherHeaders, resp_x_powered_by, resp_via, resp_server)), '(netlify)') = 'netlify' THEN 'Netlify'
WHEN _cdn_provider = 'Microsoft Azure' THEN 'Azure'
WHEN _cdn_provider = 'Vercel' THEN 'Vercel'
WHEN _cdn_provider = 'Amazon CloudFront' THEN 'AWS'
WHEN _cdn_provider = 'Akamai' THEN 'Akamai'
WHEN _cdn_provider = 'Cloudflare' THEN 'Cloudflare'
ELSE NULL
END AS CDN,
_TABLE_SUFFIX as client,
url,
app
FROM
`httparchive.summary_requests.2020_08_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
app,
url
FROM
`httparchive.technologies.2020_08_01_*`
WHERE
LOWER(category) = "static site generator" OR
app = "Next.js"
)
USING (url, _TABLE_SUFFIX)
)
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
WHERE
# The CrUX 202008 dataset is not available until September 8.
date = '2020-08-01' AND
CDN IS NOT NULL
GROUP BY
CDN,
client
ORDER BY
origins DESC
54 changes: 54 additions & 0 deletions sql/2020/17_JAMstack/ssg_adoption_compared_to_2019.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#standardSQL
# SSG adoptions, compared to 2019
SELECT
_TABLE_SUFFIX AS client,
2020 AS year,
COUNT(0) AS freq,
total,
COUNT(0) / total AS pct
FROM
`httparchive.technologies.2020_08_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2020_08_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(_TABLE_SUFFIX)
WHERE
LOWER(category) = "static site generator" OR
app = "Next.js"
GROUP BY
client,
total
UNION ALL
SELECT
_TABLE_SUFFIX AS client,
2019 AS year,
COUNT(0) AS freq,
total,
COUNT(0) / total AS pct
FROM
`httparchive.technologies.2019_07_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2019_07_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(_TABLE_SUFFIX)
WHERE
LOWER(category) = "static site generator" OR
app = "Next.js"
GROUP BY
client,
total
ORDER BY
year DESC,
pct DESC
64 changes: 64 additions & 0 deletions sql/2020/17_JAMstack/ssg_core_web_vitals_distribution.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#standardSQL
# Core Web Vitals distribution by SSG
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this identical to cdn_core_web_vitals_distribution.sql?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup, I merged them into 1 query.

#
# Note that this is an unweighted average of all sites per SSG.
# Performance of sites with millions of visitors as weighted the same as small sites.
SELECT
app,
client,
COUNT(DISTINCT origin) AS origins,
SUM(fast_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS good_lcp,
SUM(avg_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS ni_lcp,
SUM(slow_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS poor_lcp,

SUM(fast_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS good_fid,
SUM(avg_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS ni_fid,
SUM(slow_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS poor_fid,

SUM(small_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS good_cls,
SUM(medium_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS ni_cls,
SUM(large_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS poor_cls,
FROM
`chrome-ux-report.materialized.device_summary`
JOIN (
SELECT
CASE
WHEN REGEXP_EXTRACT(LOWER(CONCAT(respOtherHeaders, resp_x_powered_by, resp_via, resp_server)), '(x-github-request)') = 'x-github-request' THEN 'GitHub'
WHEN REGEXP_EXTRACT(LOWER(CONCAT(respOtherHeaders, resp_x_powered_by, resp_via, resp_server)), '(netlify)') = 'netlify' THEN 'Netlify'
WHEN _cdn_provider = 'Microsoft Azure' THEN 'Azure'
WHEN _cdn_provider = 'Vercel' THEN 'Vercel'
WHEN _cdn_provider = 'Amazon CloudFront' THEN 'AWS'
WHEN _cdn_provider = 'Akamai' THEN 'Akamai'
WHEN _cdn_provider = 'Cloudflare' THEN 'Cloudflare'
ELSE NULL
END AS CDN,
_TABLE_SUFFIX as client,
url,
app
FROM
`httparchive.summary_requests.2020_08_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
app,
url
FROM
`httparchive.technologies.2020_08_01_*`
WHERE
LOWER(category) = "static site generator" OR
app = "Next.js"
)
USING (url, _TABLE_SUFFIX)
)
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
WHERE
# The CrUX 202008 dataset is not available until September 8.
date = '2020-08-01' AND
CDN IS NOT NULL
GROUP BY
app,
client
ORDER BY
origins DESC
84 changes: 84 additions & 0 deletions sql/2020/17_JAMstack/ssg_core_web_vitals_passing.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#standardSQL
# Core Web Vitals performance by CMS
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the same as cdn_core_web_vitals_passing.sql? I see that it's querying 2020_07, but not sure if that's intentional.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be 2020_08, thanks

CREATE TEMP FUNCTION IS_GOOD (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good / (good + needs_improvement + poor) >= 0.75
);

CREATE TEMP FUNCTION IS_NON_ZERO (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good + needs_improvement + poor > 0
);


SELECT
app,
client,
COUNT(DISTINCT origin) AS origins,
# Origins with good LCP divided by origins with any LCP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))) AS pct_good_lcp,

# Origins with good FID divided by origins with any FID.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_fid, avg_fid, slow_fid), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL))) AS pct_good_fid,

# Origins with good CLS divided by origins with any CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cls,

# Origins with good LCP, FID, and CLS dividied by origins with any LCP, FID, and CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
IS_GOOD(fast_fid, avg_fid, slow_fid) AND
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AND
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cwv
FROM
`chrome-ux-report.materialized.device_summary`
JOIN (
SELECT
CASE
WHEN REGEXP_EXTRACT(LOWER(CONCAT(respOtherHeaders, resp_x_powered_by, resp_via, resp_server)), '(x-github-request)') = 'x-github-request' THEN 'GitHub'
WHEN REGEXP_EXTRACT(LOWER(CONCAT(respOtherHeaders, resp_x_powered_by, resp_via, resp_server)), '(netlify)') = 'netlify' THEN 'Netlify'
WHEN _cdn_provider = 'Microsoft Azure' THEN 'Azure'
WHEN _cdn_provider = 'Vercel' THEN 'Vercel'
WHEN _cdn_provider = 'Amazon CloudFront' THEN 'AWS'
WHEN _cdn_provider = 'Akamai' THEN 'Akamai'
WHEN _cdn_provider = 'Cloudflare' THEN 'Cloudflare'
ELSE NULL
END AS CDN,
_TABLE_SUFFIX as client,
url,
app
FROM
`httparchive.summary_requests.2020_07_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
app,
url
FROM
`httparchive.technologies.2020_07_01_*`
WHERE
LOWER(category) = "static site generator" OR
app = "Next.js"
)
USING (url, _TABLE_SUFFIX)
)
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
WHERE
# The CrUX 202008 dataset is not available until September 8.
date = '2020-07-01' AND
CDN IS NOT NULL
GROUP BY
app,
client
ORDER BY
origins DESC
Loading