From 265976cb0b150af23988001c93c98d569a64b705 Mon Sep 17 00:00:00 2001 From: Rick Viscomi Date: Wed, 2 Sep 2020 00:35:26 -0400 Subject: [PATCH] fix empty image formats (#1262) --- sql/util/README.md | 2 +- sql/util/{summary_requests.sql => requests.sql} | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) rename sql/util/{summary_requests.sql => requests.sql} (99%) diff --git a/sql/util/README.md b/sql/util/README.md index 241675d774e..dd651581415 100644 --- a/sql/util/README.md +++ b/sql/util/README.md @@ -2,7 +2,7 @@ This directory contains utilities for managing the Web Almanac dataset on BigQuery. -## [summary_requests.sql](./summary_requests.sql) +## [requests.sql](./requests.sql) This query generates summary metadata about each request from its JSON-encoded HAR object. For every Web Almanac crawl (eg 2019_07_01 and 2020_08_01) this query should be run once and configured to have its results appended to the `almanac.requests` table. This table is useful for Web Almanac analysis because it combines the metadata of the request with the HAR payload, more easily enabling queries that segment requests by resource type (script, style, image) and base HTML page. diff --git a/sql/util/summary_requests.sql b/sql/util/requests.sql similarity index 99% rename from sql/util/summary_requests.sql rename to sql/util/requests.sql index e1e1a04d7a3..e6d317440b5 100644 --- a/sql/util/summary_requests.sql +++ b/sql/util/requests.sql @@ -64,6 +64,7 @@ LANGUAGE js AS """ return 'other'; } function getFormat(prettyType, mimeType, ext) { + ext = ext.toLowerCase(); if (prettyType == 'image') { for (type of ['jpg', 'png', 'gif', 'webp', 'svg', 'ico']) { if (mimeType.includes(type) || ext == type) {