diff --git a/sql/util/README.md b/sql/util/README.md index 241675d774e..dd651581415 100644 --- a/sql/util/README.md +++ b/sql/util/README.md @@ -2,7 +2,7 @@ This directory contains utilities for managing the Web Almanac dataset on BigQuery. -## [summary_requests.sql](./summary_requests.sql) +## [requests.sql](./requests.sql) This query generates summary metadata about each request from its JSON-encoded HAR object. For every Web Almanac crawl (eg 2019_07_01 and 2020_08_01) this query should be run once and configured to have its results appended to the `almanac.requests` table. This table is useful for Web Almanac analysis because it combines the metadata of the request with the HAR payload, more easily enabling queries that segment requests by resource type (script, style, image) and base HTML page. diff --git a/sql/util/summary_requests.sql b/sql/util/requests.sql similarity index 99% rename from sql/util/summary_requests.sql rename to sql/util/requests.sql index e1e1a04d7a3..e6d317440b5 100644 --- a/sql/util/summary_requests.sql +++ b/sql/util/requests.sql @@ -64,6 +64,7 @@ LANGUAGE js AS """ return 'other'; } function getFormat(prettyType, mimeType, ext) { + ext = ext.toLowerCase(); if (prettyType == 'image') { for (type of ['jpg', 'png', 'gif', 'webp', 'svg', 'ico']) { if (mimeType.includes(type) || ext == type) {