From f87af38211411532adc9b5b9270e14c2393701fd Mon Sep 17 00:00:00 2001 From: Kevin Farrugia Date: Tue, 26 Oct 2021 11:15:34 +0200 Subject: [PATCH 1/4] Updated naming convention and grouping on console log preload queries --- .../consoleLog_incorrect_crossorigin.sql | 48 +++++++++++++++++ ...consoleLog_incorrect_crossorigin_type.sql} | 0 .../consoleLog_incorrect_type.sql | 29 +++++++++-- .../consoleLog_missing_crossorigin.sql | 27 ---------- .../consoleLog_unused_preload.sql | 29 +++++++++-- ...reload_imagesizes_imagesrcset_adoption.sql | 51 +++++++++++++++++++ 6 files changed, 149 insertions(+), 35 deletions(-) create mode 100644 sql/2021/resource-hints/consoleLog_incorrect_crossorigin.sql rename sql/2021/resource-hints/{consoleLog_missing_crossorigin_type.sql => consoleLog_incorrect_crossorigin_type.sql} (100%) delete mode 100644 sql/2021/resource-hints/consoleLog_missing_crossorigin.sql create mode 100644 sql/2021/resource-hints/preload_imagesizes_imagesrcset_adoption.sql diff --git a/sql/2021/resource-hints/consoleLog_incorrect_crossorigin.sql b/sql/2021/resource-hints/consoleLog_incorrect_crossorigin.sql new file mode 100644 index 00000000000..3d5edf20abf --- /dev/null +++ b/sql/2021/resource-hints/consoleLog_incorrect_crossorigin.sql @@ -0,0 +1,48 @@ +#standardSQL +# returns the number of pages using preload tags without the required crossorigin attribute + +CREATE TEMPORARY FUNCTION getResourceHints(payload STRING) +RETURNS STRUCT +LANGUAGE js AS ''' +var hints = ['preload', 'prefetch', 'preconnect', 'prerender', 'dns-prefetch', 'modulepreload']; +try { + var $ = JSON.parse(payload); + var almanac = JSON.parse($._almanac); + return hints.reduce((results, hint) => { + results[hint] = !!almanac['link-nodes'].nodes.find(link => link.rel.toLowerCase() == hint); + return results; + }, {}); +} catch (e) { + return hints.reduce((results, hint) => { + results[hint] = false; + return results; + }, {}); +} +''' ; + +SELECT + client, + ARRAY_LENGTH(value) AS num_incorrect_crossorigin, + COUNT(0) AS freq, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct +FROM ( + SELECT + client, + REGEXP_EXTRACT_ALL(consoleLog, r'A preload for (.+?) is found, but is not used because the request credentials mode does not match') AS value + FROM ( + SELECT + _TABLE_SUFFIX AS client, + JSON_EXTRACT(payload, "$._consoleLog") AS consoleLog, + getResourceHints(payload) AS hints + FROM + `httparchive.pages.2021_07_01_*` + ) + WHERE hints.preload IS NOT NULL +) +GROUP BY + client, + num_incorrect_crossorigin +ORDER BY + client, + freq DESC diff --git a/sql/2021/resource-hints/consoleLog_missing_crossorigin_type.sql b/sql/2021/resource-hints/consoleLog_incorrect_crossorigin_type.sql similarity index 100% rename from sql/2021/resource-hints/consoleLog_missing_crossorigin_type.sql rename to sql/2021/resource-hints/consoleLog_incorrect_crossorigin_type.sql diff --git a/sql/2021/resource-hints/consoleLog_incorrect_type.sql b/sql/2021/resource-hints/consoleLog_incorrect_type.sql index df7bf159d82..e8dfcc7a403 100644 --- a/sql/2021/resource-hints/consoleLog_incorrect_type.sql +++ b/sql/2021/resource-hints/consoleLog_incorrect_type.sql @@ -1,9 +1,28 @@ #standardSQL # returns the number of pages which preload a resource of the incorrect script type +CREATE TEMPORARY FUNCTION getResourceHints(payload STRING) +RETURNS STRUCT +LANGUAGE js AS ''' +var hints = ['preload', 'prefetch', 'preconnect', 'prerender', 'dns-prefetch', 'modulepreload']; +try { + var $ = JSON.parse(payload); + var almanac = JSON.parse($._almanac); + return hints.reduce((results, hint) => { + results[hint] = !!almanac['link-nodes'].nodes.find(link => link.rel.toLowerCase() == hint); + return results; + }, {}); +} catch (e) { + return hints.reduce((results, hint) => { + results[hint] = false; + return results; + }, {}); +} +''' ; + SELECT client, - ARRAY_LENGTH(value) AS numOfIncorrectType, + ARRAY_LENGTH(value) AS num_incorrect_type, COUNT(0) AS freq, SUM(COUNT(0)) OVER (PARTITION BY client) AS total, COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct @@ -14,14 +33,16 @@ FROM ( FROM ( SELECT _TABLE_SUFFIX AS client, - JSON_EXTRACT(payload, "$._consoleLog") AS consoleLog + JSON_EXTRACT(payload, "$._consoleLog") AS consoleLog, + getResourceHints(payload) AS hints FROM `httparchive.pages.2021_07_01_*` ) + WHERE hints.preload ) GROUP BY client, - numOfIncorrectType + num_incorrect_type ORDER BY client, - numOfIncorrectType + num_incorrect_type diff --git a/sql/2021/resource-hints/consoleLog_missing_crossorigin.sql b/sql/2021/resource-hints/consoleLog_missing_crossorigin.sql deleted file mode 100644 index 6a2996e737d..00000000000 --- a/sql/2021/resource-hints/consoleLog_missing_crossorigin.sql +++ /dev/null @@ -1,27 +0,0 @@ -#standardSQL -# returns the number of pages using preload tags without the required crossorigin attribute - -SELECT - client, - ARRAY_LENGTH(value) AS numOfMissingCrossorigin, - COUNT(0) AS freq, - SUM(COUNT(0)) OVER (PARTITION BY client) AS total, - COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct -FROM ( - SELECT - client, - REGEXP_EXTRACT_ALL(consoleLog, r'A preload for (.+?) is found, but is not used because the request credentials mode does not match') AS value - FROM ( - SELECT - _TABLE_SUFFIX AS client, - JSON_EXTRACT(payload, "$._consoleLog") AS consoleLog - FROM - `httparchive.pages.2021_07_01_*` - ) -) -GROUP BY - client, - numOfMissingCrossorigin -ORDER BY - client, - freq DESC diff --git a/sql/2021/resource-hints/consoleLog_unused_preload.sql b/sql/2021/resource-hints/consoleLog_unused_preload.sql index 247fcd901be..9c19d960dd1 100644 --- a/sql/2021/resource-hints/consoleLog_unused_preload.sql +++ b/sql/2021/resource-hints/consoleLog_unused_preload.sql @@ -1,9 +1,28 @@ #standardSQL # returns the number of unused preloaded resources +CREATE TEMPORARY FUNCTION getResourceHints(payload STRING) +RETURNS STRUCT +LANGUAGE js AS ''' +var hints = ['preload', 'prefetch', 'preconnect', 'prerender', 'dns-prefetch', 'modulepreload']; +try { + var $ = JSON.parse(payload); + var almanac = JSON.parse($._almanac); + return hints.reduce((results, hint) => { + results[hint] = !!almanac['link-nodes'].nodes.find(link => link.rel.toLowerCase() == hint); + return results; + }, {}); +} catch (e) { + return hints.reduce((results, hint) => { + results[hint] = false; + return results; + }, {}); +} +''' ; + SELECT client, - ARRAY_LENGTH(value) AS numOfUnusedPreloads, + ARRAY_LENGTH(value) AS num_unused_preload, COUNT(0) AS freq, SUM(COUNT(0)) OVER (PARTITION BY client) AS total, COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct @@ -14,14 +33,16 @@ FROM ( FROM ( SELECT _TABLE_SUFFIX AS client, - JSON_EXTRACT(payload, "$._consoleLog") AS consoleLog + JSON_EXTRACT(payload, "$._consoleLog") AS consoleLog, + getResourceHints(payload) AS hints FROM `httparchive.pages.2021_07_01_*` ) + WHERE hints.preload ) GROUP BY client, - numOfUnusedPreloads + num_unused_preload ORDER BY client, - numOfUnusedPreloads + num_unused_preload diff --git a/sql/2021/resource-hints/preload_imagesizes_imagesrcset_adoption.sql b/sql/2021/resource-hints/preload_imagesizes_imagesrcset_adoption.sql new file mode 100644 index 00000000000..466d80d1467 --- /dev/null +++ b/sql/2021/resource-hints/preload_imagesizes_imagesrcset_adoption.sql @@ -0,0 +1,51 @@ +#standardSQL +# Attribute popularity for imagesrcset and imagesizes on rel="preload" + +CREATE TEMPORARY FUNCTION getResourceHintAttrs(payload STRING) +RETURNS ARRAY> +LANGUAGE js AS ''' +var hints = new Set(['preload']); +var attributes = ['imagesrcset', 'imagesizes']; +try { + var $ = JSON.parse(payload); + var almanac = JSON.parse($._almanac); + return almanac['link-nodes'].nodes.reduce((results, link) => { + var hint = link.rel.toLowerCase(); + if (!hints.has(hint)) { + return results; + } + attributes.forEach(attribute => { + var value = link[attribute]; + results.push({ + name: hint, + attribute: attribute, + // Support empty strings. + value: typeof value == 'string' ? value : null + }); + }); + return results; + }, []); +} catch (e) { + return []; +} +''' ; + +SELECT + _TABLE_SUFFIX AS client, + hint.name AS name, + hint.attribute AS attribute, + COUNTIF(hint.value IS NOT NULL) AS freq, + SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX, hint.name, hint.attribute) AS total, + COUNTIF(hint.value IS NOT NULL) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX, hint.name, hint.attribute) AS pct +FROM + `httparchive.pages.2021_07_01_*`, + UNNEST(getResourceHintAttrs(payload)) AS hint +GROUP BY + client, + name, + attribute +ORDER BY + client, + name, + attribute, + pct DESC From 8e73a223818759d4055dda2ba1be2f6aab0bad04 Mon Sep 17 00:00:00 2001 From: Kevin Farrugia Date: Tue, 26 Oct 2021 12:02:14 +0200 Subject: [PATCH 2/4] Minor correction on filtering incorrect crossorigin --- sql/2021/resource-hints/consoleLog_incorrect_crossorigin.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/2021/resource-hints/consoleLog_incorrect_crossorigin.sql b/sql/2021/resource-hints/consoleLog_incorrect_crossorigin.sql index 3d5edf20abf..3ceed42f666 100644 --- a/sql/2021/resource-hints/consoleLog_incorrect_crossorigin.sql +++ b/sql/2021/resource-hints/consoleLog_incorrect_crossorigin.sql @@ -38,7 +38,7 @@ FROM ( FROM `httparchive.pages.2021_07_01_*` ) - WHERE hints.preload IS NOT NULL + WHERE hints.preload ) GROUP BY client, From 236ab01374a08cb066ffe96bc6ef8d57f8419b82 Mon Sep 17 00:00:00 2001 From: Kevin Farrugia Date: Tue, 26 Oct 2021 23:53:42 +0200 Subject: [PATCH 3/4] Updated with code review feedback --- .../preload_imagesizes_imagesrcset_adoption.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/2021/resource-hints/preload_imagesizes_imagesrcset_adoption.sql b/sql/2021/resource-hints/preload_imagesizes_imagesrcset_adoption.sql index 466d80d1467..efc3123e099 100644 --- a/sql/2021/resource-hints/preload_imagesizes_imagesrcset_adoption.sql +++ b/sql/2021/resource-hints/preload_imagesizes_imagesrcset_adoption.sql @@ -35,8 +35,8 @@ SELECT hint.name AS name, hint.attribute AS attribute, COUNTIF(hint.value IS NOT NULL) AS freq, - SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX, hint.name, hint.attribute) AS total, - COUNTIF(hint.value IS NOT NULL) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX, hint.name, hint.attribute) AS pct + SUM(COUNT(0)) OVER () AS total, + COUNTIF(hint.value IS NOT NULL) / SUM(COUNT(0)) OVER () AS pct FROM `httparchive.pages.2021_07_01_*`, UNNEST(getResourceHintAttrs(payload)) AS hint From 7aa5a4e853ce64c211fb31218d545427cd99f756 Mon Sep 17 00:00:00 2001 From: Kevin Farrugia Date: Wed, 27 Oct 2021 10:55:58 +0200 Subject: [PATCH 4/4] Updated preload imagesizes and imagesrcset query to use total number of preloads as denominator --- .../preload_imagesizes_imagesrcset_adoption.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/2021/resource-hints/preload_imagesizes_imagesrcset_adoption.sql b/sql/2021/resource-hints/preload_imagesizes_imagesrcset_adoption.sql index efc3123e099..2895fe8f485 100644 --- a/sql/2021/resource-hints/preload_imagesizes_imagesrcset_adoption.sql +++ b/sql/2021/resource-hints/preload_imagesizes_imagesrcset_adoption.sql @@ -35,8 +35,8 @@ SELECT hint.name AS name, hint.attribute AS attribute, COUNTIF(hint.value IS NOT NULL) AS freq, - SUM(COUNT(0)) OVER () AS total, - COUNTIF(hint.value IS NOT NULL) / SUM(COUNT(0)) OVER () AS pct + SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX, hint.name) AS total, + COUNTIF(hint.value IS NOT NULL) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX, hint.name) AS pct FROM `httparchive.pages.2021_07_01_*`, UNNEST(getResourceHintAttrs(payload)) AS hint