Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PWA 2020 queries #1283

Merged
merged 11 commits into from
Sep 28, 2020
7 changes: 7 additions & 0 deletions sql/2020/14_PWA/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# PWA 2020 Queries

Related materials:

- [Chapter Issue](https://github.com/HTTPArchive/almanac.httparchive.org/issues/909)
- [Planning doc](https://docs.google.com/document/d/1JjjwHNjEsF6fl4WNod6mOj-58IIDzg-el3wh36mV3nM/edit?usp=sharing)
- [Results spreadsheet](https://docs.google.com/spreadsheets/d/1AOqCkb5EggnE8BngpxvxGj_fCfssT9sZ0ElCQKp4pOw/edit#gid=2077755325)
19 changes: 19 additions & 0 deletions sql/2020/14_PWA/beforeinstallprompt_usage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#standardSQL
# beforeinstallprompt usage - based on 2019/14_06.sql
tunetheweb marked this conversation as resolved.
Show resolved Hide resolved
SELECT
client,
COUNT(DISTINCT page) AS freq,
total,
COUNT(DISTINCT page) / total AS pct
FROM
`httparchive.almanac.summary_response_bodies`
JOIN
(SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY _TABLE_SUFFIX)
USING (client),
UNNEST(REGEXP_EXTRACT_ALL(body, 'beforeinstallprompt'))
WHERE
date = '2020-08-01' AND
(firstHtml OR type = 'script')
GROUP BY
client,
total
29 changes: 29 additions & 0 deletions sql/2020/14_PWA/dist_sw_payload_sizes.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#standardSQL
# Distribution of SW payload sizes - based on 2019/14_03b.sql
SELECT
date,
tunetheweb marked this conversation as resolved.
Show resolved Hide resolved
percentile,
client,
APPROX_QUANTILES(respSize, 1000)[OFFSET(percentile * 10)] AS bytes
FROM
(SELECT DISTINCT
date,
client,
page,
url
FROM
`httparchive.almanac.service_workers`)
JOIN
`httparchive.almanac.requests`
USING (date, client, page, url),
UNNEST([10, 25, 50, 75, 90]) AS percentile
WHERE
date = '2020-08-01'
GROUP BY
date,
percentile,
client
ORDER BY
date,
percentile,
client
41 changes: 41 additions & 0 deletions sql/2020/14_PWA/lighthouse_pwa_audits.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#standardSQL
# Get summary of all lighthouse scores for a category
tunetheweb marked this conversation as resolved.
Show resolved Hide resolved
# Note scores, weightings, groups and descriptions may be off in mixed months when new versions of Lighthouse roles out

CREATE TEMPORARY FUNCTION getAudits(report STRING, category STRING)
RETURNS ARRAY<STRUCT<id STRING, weight INT64, audit_group STRING, title STRING, description STRING, score INT64>> LANGUAGE js AS '''
var $ = JSON.parse(report);
var auditrefs = $.categories[category].auditRefs;
var audits = $.audits;
$ = null;
var results = [];
for (auditref of auditrefs) {
results.push({
id: auditref.id,
weight: auditref.weight,
audit_group: auditref.group,
description: audits[auditref.id].description,
score: audits[auditref.id].score
});
}
return results;
''';

SELECT
audits.id,
COUNTIF(audits.score > 0) AS num_pages,
COUNT(0) AS total,
COUNTIF(audits.score > 0) / COUNT(0) AS pct,
MAX(audits.weight) AS weight,
MAX(audits.audit_group) AS audit_group,
MAX(audits.description) AS description
FROM
`httparchive.lighthouse.2020_08_01_mobile`,
UNNEST(getAudits(report, "pwa")) AS audits
WHERE
LENGTH(report) < 20000000 # necessary to avoid out of memory issues. Excludes 16 very large results
GROUP BY
audits.id
ORDER BY
weight DESC,
audit.id
34 changes: 34 additions & 0 deletions sql/2020/14_PWA/lighthouse_pwa_score.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#standardSQL
# Percentiles of lighthouse pwa score
# Based on 07_24: Percentiles of lighthouse performance score
# This metric comes from Lighthouse only and is only available in mobile in HTTP Archive dataset
SELECT
'2019_07_01' AS date,
percentile,
APPROX_QUANTILES(score, 1000)[OFFSET(percentile * 10)] AS score
FROM (
SELECT
CAST(JSON_EXTRACT(report, '$.categories.pwa.score') AS NUMERIC) AS score
FROM
`httparchive.lighthouse.2019_07_01_mobile`),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
date,
percentile
UNION ALL
SELECT
'2020_08_01' AS date,
percentile,
APPROX_QUANTILES(score, 1000)[OFFSET(percentile * 10)] AS score
FROM (
SELECT
CAST(JSON_EXTRACT(report, '$.categories.pwa.score') AS NUMERIC) AS score
FROM
`httparchive.lighthouse.2020_08_01_mobile`),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
date,
percentile
ORDER BY
date,
percentile
45 changes: 45 additions & 0 deletions sql/2020/14_PWA/manifests_and_service_workers.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#standardSQL
# Counting Manifests and Service Workers
# Currently showing both years but should change to just current year in future
SELECT
date,
client,
manifests / total AS manifests_pct,
serviceworkers / total AS serviceworkers_pct,
either / total AS either_pct,
both / total AS both_pct,
total
FROM
(SELECT
date,
client,
COUNT(DISTINCT m.page) AS manifests,
COUNT(DISTINCT sw.page) AS serviceworkers,
COUNT(DISTINCT IFNULL(m.page, sw.page)) AS either,
COUNT(DISTINCT (m.page || sw.page)) AS both
FROM
`httparchive.almanac.manifests` m
FULL OUTER JOIN
`httparchive.almanac.service_workers` sw
USING
(date, client, page)
GROUP BY
date,
client
)
JOIN
(SELECT
date,
client,
count(DISTINCT page) AS total
FROM
`httparchive.almanac.summary_requests`
GROUP BY
date,
client
)
USING
(date, client)
ORDER BY
date,
client
32 changes: 32 additions & 0 deletions sql/2020/14_PWA/manifests_not_json_parsable.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#standardSQL
# Manifests that are not JSON parsable - based on 2019/14_04b.sql
CREATE TEMPORARY FUNCTION canParseManifest(manifest STRING)
RETURNS BOOLEAN LANGUAGE js AS '''
try {
JSON.parse(manifest);
return true;
} catch (e) {
return false;
}
''';

SELECT
client,
canParseManifest(body) AS can_parse,
tunetheweb marked this conversation as resolved.
Show resolved Hide resolved
COUNT(DISTINCT page) AS freq,
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total,
COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct
FROM
(SELECT DISTINCT
client,
page,
body
FROM
`httparchive.almanac.manifests`
WHERE
date = '2020-08-01')
GROUP BY
client,
can_parse
ORDER BY
freq DESC
36 changes: 36 additions & 0 deletions sql/2020/14_PWA/manifests_not_json_parsable_sw.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#standardSQL
# Manifests that are not JSON parsable for service worker pages - based on 2019/14_04b.sql
CREATE TEMPORARY FUNCTION canParseManifest(manifest STRING)
RETURNS BOOLEAN LANGUAGE js AS '''
try {
JSON.parse(manifest);
return true;
} catch (e) {
return false;
}
''';

SELECT
client,
canParseManifest(body) AS can_parse,
tunetheweb marked this conversation as resolved.
Show resolved Hide resolved
COUNT(DISTINCT page) AS freq,
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total,
COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct
FROM
(SELECT DISTINCT
m.client,
m.page,
m.body
FROM
`httparchive.almanac.manifests` m
JOIN
`httparchive.almanac.service_workers` sw
USING
(date, client, page)
WHERE
date = '2020-08-01')
GROUP BY
client,
can_parse
ORDER BY
freq DESC
35 changes: 35 additions & 0 deletions sql/2020/14_PWA/manifests_preferring_native_apps.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#standardSQL
# % manifests preferring native apps - based on 2019/14_04e.sql
CREATE TEMPORARY FUNCTION prefersNative(manifest STRING)
RETURNS BOOLEAN LANGUAGE js AS '''
try {
var $ = JSON.parse(manifest);
return $.prefer_related_applications == true && $.related_applications.length > 0;
} catch (e) {
return null;
}
''';

SELECT
client,
prefersNative(body) AS prefers_native,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM
(SELECT DISTINCT
client,
page,
body
FROM
`httparchive.almanac.manifests`
WHERE
date = '2020-08-01')
GROUP BY
client,
prefers_native
HAVING
prefers_native IS NOT NULL
ORDER BY
client,
prefers_native
39 changes: 39 additions & 0 deletions sql/2020/14_PWA/manifests_preferring_native_apps_sw.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#standardSQL
# % manifests preferring native apps where service worker is used - based on 2019/14_04e.sql
CREATE TEMPORARY FUNCTION prefersNative(manifest STRING)
RETURNS BOOLEAN LANGUAGE js AS '''
try {
var $ = JSON.parse(manifest);
return $.prefer_related_applications == true && $.related_applications.length > 0;
} catch (e) {
return null;
}
''';

SELECT
client,
prefersNative(body) AS prefers_native,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM
(SELECT DISTINCT
m.client,
m.page,
m.body
FROM
`httparchive.almanac.manifests` m
JOIN
`httparchive.almanac.service_workers` sw
USING
(date, client, page)
WHERE
date = '2020-08-01')
GROUP BY
client,
prefers_native
HAVING
prefers_native IS NOT NULL
ORDER BY
client,
prefers_native
55 changes: 55 additions & 0 deletions sql/2020/14_PWA/popular_pwa_libraries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#standardSQL
# Popular PWA libraries based on unique ImportScript values
# Use popular_pwa_libraries_helper.sql to find libraries to count
SELECT
date,
client,
COUNT(0) AS total,
COUNTIF(LOWER(body) LIKE '%importscript%') AS uses_importscript,
COUNTIF(LOWER(body) LIKE '%workbox%') AS workbox,
COUNTIF(LOWER(body) LIKE '%sw-toolbox%') AS sw_toolbox,
COUNTIF(LOWER(body) LIKE '%firebase%') AS firebase,
COUNTIF(LOWER(body) LIKE '%onesignalsdk%') AS OneSignalSDK,
COUNTIF(LOWER(body) LIKE '%najva%') AS najva,
COUNTIF(LOWER(body) LIKE '%upush%') AS upush,
COUNTIF(LOWER(body) LIKE '%cache-polyfill%') AS cache_polyfill,
COUNTIF(LOWER(body) LIKE '%analytics-helper%') AS analytics_helper,
COUNTIF(LOWER(body) LIKE '%importscript%' AND
LOWER(body) NOT LIKE '%workbox%' AND
LOWER(body) NOT LIKE '%sw-toolbox%' AND
LOWER(body) NOT LIKE '%firebase%' AND
LOWER(body) NOT LIKE '%onesignalsdk%' AND
LOWER(body) NOT LIKE '%najva%' AND
LOWER(body) NOT LIKE '%upush%' AND
LOWER(body) NOT LIKE '%cache-polyfill%' AND
LOWER(body) NOT LIKE '%analytics-helper%') AS importscript_nolib,
COUNTIF(LOWER(body) NOT LIKE '%importscript%' AND
LOWER(body) NOT LIKE '%workbox%' AND
LOWER(body) NOT LIKE '%sw-toolbox%' AND
LOWER(body) NOT LIKE '%firebase%' AND
LOWER(body) NOT LIKE '%onesignalsdk%' AND
LOWER(body) NOT LIKE '%najva%' AND
LOWER(body) NOT LIKE '%upush%' AND
LOWER(body) NOT LIKE '%cache-polyfill.js%' AND
LOWER(body) NOT LIKE '%analytics-helper.js%') AS none_of_the_above
FROM
(
SELECT DISTINCT
date,
client,
page,
body
FROM
`httparchive.almanac.service_workers`
GROUP BY
date,
client,
page,
body
)
GROUP BY
date,
client
ORDER BY
date,
client
Loading