diff --git a/sql/2020/01_CSS/all_features.sql b/sql/2020/01_CSS/all_features.sql new file mode 100644 index 00000000000..5d9aad56b54 --- /dev/null +++ b/sql/2020/01_CSS/all_features.sql @@ -0,0 +1,12 @@ +SELECT + client, + feature, + num_urls AS freq, + total_urls AS total, + pct_urls AS pct_pages +FROM + `httparchive.blink_features.usage` +WHERE + yyyymmdd = '20200801' +ORDER BY + pct_pages DESC diff --git a/sql/2020/01_CSS/all_functions.sql b/sql/2020/01_CSS/all_functions.sql new file mode 100644 index 00000000000..a9c214d91e4 --- /dev/null +++ b/sql/2020/01_CSS/all_functions.sql @@ -0,0 +1,54 @@ +#standardSQL +CREATE TEMPORARY FUNCTION getProperties(css STRING) RETURNS ARRAY LANGUAGE js AS ''' +try { + function compute() { + let ret = {}; + + walkDeclarations(ast, ({property, value}) => { + if (value.length > 1000 || !value.includes("(") || !value.includes(")")) { + return; + } + + for (let {name} of extractFunctionCalls(value)) { + incrementByKey(ret, name); + } + }); + + return sortObject(ret); + + } + + let ast = JSON.parse(css); + let props = compute(ast); + return Object.entries(props).flatMap(([prop, freq]) => { + return Array(freq).fill(prop); + }); +} +catch (e) { + return []; +} +''' +OPTIONS (library="gs://httparchive/lib/css-utils.js"); + +SELECT + * +FROM ( + SELECT + client, + prop, + COUNT(DISTINCT page) AS pages, + COUNT(0) AS freq, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct + FROM + `httparchive.almanac.parsed_css`, + UNNEST(getProperties(css)) AS prop + WHERE + date = '2020-08-01' + GROUP BY + client, + prop) +WHERE + pages >= 1000 +ORDER BY + pct DESC diff --git a/sql/2020/01_CSS/all_properties.sql b/sql/2020/01_CSS/all_properties.sql new file mode 100644 index 00000000000..32f1bfe4027 --- /dev/null +++ b/sql/2020/01_CSS/all_properties.sql @@ -0,0 +1,51 @@ +#standardSQL +CREATE TEMPORARY FUNCTION getProperties(css STRING) RETURNS ARRAY LANGUAGE js AS ''' +try { + function compute(ast) { + let ret = {}; + + walkDeclarations(ast, ({property, value}) => { + if (!property.startsWith("--")) { // Custom props are case sensitive + property = property.toLowerCase(); + } + + incrementByKey(ret, property); + }); + + return sortObject(ret); + } + + let ast = JSON.parse(css); + let props = compute(ast); + return Object.entries(props).flatMap(([prop, freq]) => { + return Array(freq).fill(prop); + }); +} +catch (e) { + return []; +} +''' +OPTIONS (library="gs://httparchive/lib/css-utils.js"); + +SELECT + * +FROM ( + SELECT + client, + prop, + COUNT(DISTINCT page) AS pages, + COUNT(0) AS freq, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct + FROM + `httparchive.almanac.parsed_css`, + UNNEST(getProperties(css)) AS prop + WHERE + date = '2020-08-01' + GROUP BY + client, + prop) +WHERE + pages >= 1000 +ORDER BY + pct DESC diff --git a/sql/2020/01_CSS/color_arg_comma.sql b/sql/2020/01_CSS/color_arg_comma.sql index a9efa02d767..4f50d8995ee 100644 --- a/sql/2020/01_CSS/color_arg_comma.sql +++ b/sql/2020/01_CSS/color_arg_comma.sql @@ -89,6 +89,7 @@ try { } walkDeclarations(ast, ({property, value}) => { + if (value.length > 1000) return; usage.hex[3] += countMatches(value, /#[a-f0-9]{3}\b/gi); usage.hex[4] += countMatches(value, /#[a-f0-9]{4}\b/gi); usage.hex[6] += countMatches(value, /#[a-f0-9]{6}\b/gi); @@ -163,8 +164,6 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + date = '2020-08-01') GROUP BY client \ No newline at end of file diff --git a/sql/2020/01_CSS/color_formats.sql b/sql/2020/01_CSS/color_formats.sql index 15ec578a6ad..71cf16e23be 100644 --- a/sql/2020/01_CSS/color_formats.sql +++ b/sql/2020/01_CSS/color_formats.sql @@ -89,6 +89,7 @@ try { } walkDeclarations(ast, ({property, value}) => { + if (value.length > 1000) return; // First remove url() references to avoid them mucking the results for (let f of extractFunctionCalls(value, {names: "url"})) { let [start, end] = f.pos; @@ -184,9 +185,7 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getColorFormats(css)) AS format WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + date = '2020-08-01') GROUP BY client, format diff --git a/sql/2020/01_CSS/color_functions.sql b/sql/2020/01_CSS/color_functions.sql index f7935dba3de..d853aeaa0e9 100644 --- a/sql/2020/01_CSS/color_functions.sql +++ b/sql/2020/01_CSS/color_functions.sql @@ -89,6 +89,7 @@ try { } walkDeclarations(ast, ({property, value}) => { + if (value.length > 1000) return; usage.hex[3] += countMatches(value, /#[a-f0-9]{3}\b/gi); usage.hex[4] += countMatches(value, /#[a-f0-9]{4}\b/gi); usage.hex[6] += countMatches(value, /#[a-f0-9]{6}\b/gi); @@ -166,8 +167,6 @@ FROM ( UNNEST(getColorFunctions(css)) AS function WHERE date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024 AND function IS NOT NULL) JOIN ( SELECT diff --git a/sql/2020/01_CSS/color_keywords.sql b/sql/2020/01_CSS/color_keywords.sql index 79b74da214f..a390bbfac5f 100644 --- a/sql/2020/01_CSS/color_keywords.sql +++ b/sql/2020/01_CSS/color_keywords.sql @@ -89,6 +89,7 @@ try { } walkDeclarations(ast, ({property, value}) => { + if (value.length > 1000) return; // First remove url() references to avoid them mucking the results for (let f of extractFunctionCalls(value, {names: "url"})) { let [start, end] = f.pos; @@ -174,9 +175,7 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getColorKeywords(css)) AS keyword WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + date = '2020-08-01') GROUP BY client, keyword diff --git a/sql/2020/01_CSS/color_keywords_no_system_casefold.sql b/sql/2020/01_CSS/color_keywords_no_system_casefold.sql index 22c9f1343fb..67bd12475f0 100644 --- a/sql/2020/01_CSS/color_keywords_no_system_casefold.sql +++ b/sql/2020/01_CSS/color_keywords_no_system_casefold.sql @@ -89,6 +89,7 @@ try { } walkDeclarations(ast, ({property, value}) => { + if (value.length > 1000) return; // First remove url() references to avoid them mucking the results for (let f of extractFunctionCalls(value, {names: "url"})) { let [start, end] = f.pos; @@ -174,9 +175,7 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getColorKeywords(css)) AS keyword WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + date = '2020-08-01') GROUP BY client, keyword diff --git a/sql/2020/01_CSS/color_p3.sql b/sql/2020/01_CSS/color_p3.sql index 9d6d5902f2d..8eea33ee87a 100644 --- a/sql/2020/01_CSS/color_p3.sql +++ b/sql/2020/01_CSS/color_p3.sql @@ -89,6 +89,7 @@ try { } walkDeclarations(ast, ({property, value}) => { + if (value.length > 1000) return; // First remove url() references to avoid them mucking the results for (let f of extractFunctionCalls(value, {names: "url"})) { let [start, end] = f.pos; @@ -173,9 +174,7 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getP3Usage(css)) AS p3 WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + date = '2020-08-01') GROUP BY client, p3 diff --git a/sql/2020/01_CSS/color_spaces.sql b/sql/2020/01_CSS/color_spaces.sql index 33a517bd544..45685a939b6 100644 --- a/sql/2020/01_CSS/color_spaces.sql +++ b/sql/2020/01_CSS/color_spaces.sql @@ -89,6 +89,7 @@ try { } walkDeclarations(ast, ({property, value}) => { + if (value.length > 1000) return; // First remove url() references to avoid them mucking the results for (let f of extractFunctionCalls(value, {names: "url"})) { let [start, end] = f.pos; @@ -172,9 +173,7 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getColorSpaces(css)) AS color_space WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + date = '2020-08-01') GROUP BY client, color_space diff --git a/sql/2020/01_CSS/gradient_adoption.sql b/sql/2020/01_CSS/gradient_adoption.sql new file mode 100644 index 00000000000..aabd7d5ddbe --- /dev/null +++ b/sql/2020/01_CSS/gradient_adoption.sql @@ -0,0 +1,175 @@ +#standardSQL +CREATE TEMPORARY FUNCTION getGradientAdoption(css STRING) +RETURNS ARRAY LANGUAGE js AS ''' +try { + function compute(ast) { + let ret = { + functions: {}, // usage by gradient function + properties: {}, // usage by property + max_stops: 0, + max_stops_gradient: [], + two_positions: 0, + hints: 0, + hard_stops: 0 + }; + let stopCount = []; + + const keywords = [ + "aliceblue", "antiquewhite", "aqua", "aquamarine", "azure", "beige", "bisque", "black", "blanchedalmond", "blue", "blueviolet", "brown", "burlywood", "cadetblue", "chartreuse", + "chocolate", "coral", "cornflowerblue", "cornsilk", "crimson", "cyan", "darkblue", "darkcyan", "darkgoldenrod", "darkgray", "darkgreen", "darkgrey", "darkkhaki", "darkmagenta", + "darkolivegreen", "darkorange", "darkorchid", "darkred", "darksalmon", "darkseagreen", "darkslateblue", "darkslategray", "darkslategrey", "darkturquoise", "darkviolet", + "deeppink", "deepskyblue", "dimgray", "dimgrey", "dodgerblue", "firebrick", "floralwhite", "forestgreen", "fuchsia", "gainsboro", "ghostwhite", "gold", "goldenrod", "gray", + "green", "greenyellow", "grey", "honeydew", "hotpink", "indianred", "indigo", "ivory", "khaki", "lavender", "lavenderblush", "lawngreen", "lemonchiffon", "lightblue", "lightcoral", + "lightcyan", "lightgoldenrodyellow", "lightgray", "lightgreen", "lightgrey", "lightpink", "lightsalmon", "lightseagreen", "lightskyblue", "lightslategray", "lightslategrey", + "lightsteelblue", "lightyellow", "lime", "limegreen", "linen", "magenta", "maroon", "mediumaquamarine", "mediumblue", "mediumorchid", "mediumpurple", "mediumseagreen", + "mediumslateblue", "mediumspringgreen", "mediumturquoise", "mediumvioletred", "midnightblue", "mintcream", "mistyrose", "moccasin", "navajowhite", "navy", "oldlace", + "olive", "olivedrab", "orange", "orangered", "orchid", "palegoldenrod", "palegreen", "paleturquoise", "palevioletred", "papayawhip", "peachpuff", "peru", "pink", "plum", + "powderblue", "purple", "rebeccapurple", "red", "rosybrown", "royalblue", "saddlebrown", "salmon", "sandybrown", "seagreen", "seashell", "sienna", "silver", "skyblue", + "slateblue", "slategray", "slategrey", "snow", "springgreen", "steelblue", "tan", "teal", "thistle", "tomato", "turquoise", "violet", "wheat", "white", "whitesmoke", + "yellow", "yellowgreen", "transparent", "currentcolor", + "ActiveBorder", "ActiveCaption", "AppWorkspace", "Background", "ButtonFace", "ButtonHighlight", "ButtonShadow", "ButtonText", "CaptionText", + "GrayText", "Highlight", "HighlightText", "InactiveBorder", "InactiveCaption", "InactiveCaptionText", "InfoBackground", "InfoText", + "Menu", "MenuText", "Scrollbar", "ThreeDDarkShadow", "ThreeDFace", "ThreeDHighlight", "ThreeDLightShadow", "ThreeDShadow", "Window", "WindowFrame", "WindowText" + ]; + const keywordRegex = RegExp(`\\b(? { + if (value.length > 1000) return; + for (let gradient of extractFunctionCalls(value, {names: /-gradient$/})) { + + let {name, args} = gradient; + incrementByKey(ret.functions, name); + + incrementByKey(ret.properties, property.indexOf("--") === 0? "--*" : property); + + // Light color stop parsing + + // Collapse nested function calls into empty function calls + for (let i=0, lastIndex; (i = args.indexOf("(", lastIndex + 1)) > -1; ) { + let a = parsel.gobbleParens(args, i); + args = args.substring(0, i) + "()" + args.substring(i + a.length); + lastIndex = i; + } + + let stops = args.split(/\\s*,\\s*/); + + // Remove first arg if it's params and not a color stop + if (/^(at|to|from)\\s|ellipse|circle|(?:farthest|closest)-(?:side|corner)|[\\d.]+(deg|grad|rad|turn)/.test(stops[0])) { + stops.shift(); + } + + stopCount.push(stops.length); + + if (ret.max_stops < stops.length) { + ret.max_stops = stops.length; + ret.max_stops_gradient = []; + } + + if (ret.max_stops === stops.length) { + ret.max_stops_gradient.push(value.substring(...gradient.pos)); + } + + // The rest will fail if we have variables with fallbacks in the args so let's just skip those altogether for now + if (/\\bvar\\(/.test(args)) { + continue; + } + + // Separate color and position(s) + stops = stops.map(s => { + if (/\\s/.test(s)) { + // Even though the spec doesn't mandate an order, all browsers implement the older grammar + // with the position after the color, so placing the position before the color must be extremely rare. + let parts = s.split(/\\s+/); + return {color: parts[0], pos: parts.slice(1)}; + } + + // We only have one thing, is it a color or a position? + if (/#[a-f0-9]+|(?:rgba?|hsla?|color)\\(/.test(s) || keywordRegex.test(s)) { + keywordRegex.lastIndex = 0; + return {color: s}; + } + + return {pos: s}; + }); + + for (let i=0; i 1) { + ret.two_positions++; + } + + if (!s.color) { + // No color, it must be a hint + ret.hints++; + continue; + } + + let prev = stops[i - 1]; + + // Calculate hard stops + if (prev && prev.pos && s.pos && !s.pos.join("").includes("calc()")) { + let pos = s.pos[0]; + let prevPos = prev.pos[prev.pos.length === 1? 0 : 1]; + + if (parseFloat(pos) === 0 || pos === prevPos) { + ret.hard_stops++; + } + } + } + } + }, { + properties: /^--|-image$|^background$|^content$/ + }); + + // Calculate average and max number of stops + stopCount = stopCount.sort((a, b) => b - a); + ret.avg_stops = stopCount.reduce((a, c) => a + c, 0) / stopCount.length; + + let mi = (stopCount.length - 1) / 2; + ret.median_stops = stopCount.length % 2? stopCount[mi] : (stopCount[Math.floor(mi)] + stopCount[Math.ceil(mi)]) / 2; + ret.stop_count = stopCount; + + return ret; + } + + const ast = JSON.parse(css); + let gradient = compute(ast); + return Object.keys(gradient.properties); +} catch (e) { + return []; +} +''' +OPTIONS (library="gs://httparchive/lib/css-utils.js"); + +SELECT + client, + COUNT(DISTINCT page) AS pages, + total, + COUNT(DISTINCT page) / total AS pct +FROM ( + SELECT DISTINCT + client, + page, + property + FROM + `httparchive.almanac.parsed_css`, + UNNEST(getGradientAdoption(css)) AS property + WHERE + date = '2020-08-01' AND + property IS NOT NULL) +JOIN ( + SELECT + _TABLE_SUFFIX AS client, + COUNT(0) AS total + FROM + `httparchive.summary_pages.2020_08_01_*` + GROUP BY + client) +USING + (client) +GROUP BY + client, + total +ORDER BY + pct DESC \ No newline at end of file diff --git a/sql/2020/01_CSS/gradient_bg_properties.sql b/sql/2020/01_CSS/gradient_bg_properties.sql index a5eb94311b8..f09390b66d7 100644 --- a/sql/2020/01_CSS/gradient_bg_properties.sql +++ b/sql/2020/01_CSS/gradient_bg_properties.sql @@ -34,6 +34,7 @@ try { const keywordRegex = RegExp(`\\b(? { + if (value.length > 1000) return; for (let gradient of extractFunctionCalls(value, {names: /-gradient$/})) { let {name, args} = gradient; @@ -157,8 +158,6 @@ FROM ( UNNEST(getGradientUsageBeyondBg(css)) AS property WHERE date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024 AND property IS NOT NULL) JOIN ( SELECT diff --git a/sql/2020/01_CSS/gradient_functions.sql b/sql/2020/01_CSS/gradient_functions.sql index 49e7a235515..151941fa420 100644 --- a/sql/2020/01_CSS/gradient_functions.sql +++ b/sql/2020/01_CSS/gradient_functions.sql @@ -34,6 +34,7 @@ try { const keywordRegex = RegExp(`\\b(? { + if (value.length > 1000) return; for (let gradient of extractFunctionCalls(value, {names: /-gradient$/})) { let {name, args} = gradient; @@ -157,8 +158,6 @@ FROM ( UNNEST(getGradientFunctions(css)) AS function WHERE date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024 AND function IS NOT NULL) JOIN ( SELECT diff --git a/sql/2020/01_CSS/gradient_hard_stops.sql b/sql/2020/01_CSS/gradient_hard_stops.sql index 0e465105101..3a2b7a893df 100644 --- a/sql/2020/01_CSS/gradient_hard_stops.sql +++ b/sql/2020/01_CSS/gradient_hard_stops.sql @@ -33,6 +33,7 @@ try { const keywordRegex = RegExp(`\\\\b(? { + if (value.length > 1000) return; for (let gradient of extractFunctionCalls(value, {names: /-gradient$/})) { let {name, args} = gradient; @@ -167,9 +168,7 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024 + date = '2020-08-01' GROUP BY client, page) diff --git a/sql/2020/01_CSS/gradient_hints.sql b/sql/2020/01_CSS/gradient_hints.sql index 3ddb050c30c..18f3b612f4f 100644 --- a/sql/2020/01_CSS/gradient_hints.sql +++ b/sql/2020/01_CSS/gradient_hints.sql @@ -33,6 +33,7 @@ try { const keywordRegex = RegExp(`\\\\b(? { + if (value.length > 1000) return; for (let gradient of extractFunctionCalls(value, {names: /-gradient$/})) { let {name, args} = gradient; @@ -167,9 +168,7 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024 + date = '2020-08-01' GROUP BY client, page) diff --git a/sql/2020/01_CSS/gradient_max_stops.sql b/sql/2020/01_CSS/gradient_max_stops.sql index 12c0093c88f..f327699fd7e 100644 --- a/sql/2020/01_CSS/gradient_max_stops.sql +++ b/sql/2020/01_CSS/gradient_max_stops.sql @@ -33,6 +33,7 @@ try { const keywordRegex = RegExp(`\\\\b(? { + if (value.length > 1000) return; for (let gradient of extractFunctionCalls(value, {names: /-gradient$/})) { let {name, args} = gradient; @@ -175,9 +176,7 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024 + date = '2020-08-01' GROUP BY client, page), diff --git a/sql/2020/01_CSS/gradient_properties.sql b/sql/2020/01_CSS/gradient_properties.sql index 2fd313f4b8c..393dbc3ce57 100644 --- a/sql/2020/01_CSS/gradient_properties.sql +++ b/sql/2020/01_CSS/gradient_properties.sql @@ -34,6 +34,7 @@ try { const keywordRegex = RegExp(`\\b(? { + if (value.length > 1000) return; for (let gradient of extractFunctionCalls(value, {names: /-gradient$/})) { let {name, args} = gradient; @@ -157,8 +158,6 @@ FROM ( UNNEST(getGradientUsageBeyondBg(css)) AS property WHERE date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024 AND property IS NOT NULL) JOIN ( SELECT diff --git a/sql/2020/01_CSS/gradient_stops.sql b/sql/2020/01_CSS/gradient_stops.sql index b413389aaf4..61ff8985cce 100644 --- a/sql/2020/01_CSS/gradient_stops.sql +++ b/sql/2020/01_CSS/gradient_stops.sql @@ -33,6 +33,7 @@ try { const keywordRegex = RegExp(`\\\\b(? { + if (value.length > 1000) return; for (let gradient of extractFunctionCalls(value, {names: /-gradient$/})) { let {name, args} = gradient; @@ -167,9 +168,7 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getColorStops(css)) AS color_stops WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024 + date = '2020-08-01' GROUP BY client, page), diff --git a/sql/2020/01_CSS/gradient_stops_per_gradient.sql b/sql/2020/01_CSS/gradient_stops_per_gradient.sql index a508f128648..7bfbf727ec2 100644 --- a/sql/2020/01_CSS/gradient_stops_per_gradient.sql +++ b/sql/2020/01_CSS/gradient_stops_per_gradient.sql @@ -33,6 +33,7 @@ try { const keywordRegex = RegExp(`\\\\b(? { + if (value.length > 1000) return; for (let gradient of extractFunctionCalls(value, {names: /-gradient$/})) { let {name, args} = gradient; @@ -163,9 +164,7 @@ FROM UNNEST(getColorStops(css)) AS color_stops, UNNEST([10, 25, 50, 75, 90]) AS percentile WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024 + date = '2020-08-01' GROUP BY percentile, client diff --git a/sql/2020/01_CSS/keyword_totals.sql b/sql/2020/01_CSS/keyword_totals.sql new file mode 100644 index 00000000000..a5911ff5ab9 --- /dev/null +++ b/sql/2020/01_CSS/keyword_totals.sql @@ -0,0 +1,74 @@ +#standardSQL +CREATE TEMPORARY FUNCTION getGlobalKeywords(css STRING) RETURNS +ARRAY> LANGUAGE js AS ''' +try { + function compute(ast) { + let ret = {}; + + walkDeclarations(ast, ({property, value}) => { + let key = value; + + ret[value] = ret[value] || {}; + + incrementByKey(ret[value], "total"); + incrementByKey(ret[value], property); + }, { + values: ["inherit", "initial", "unset", "revert"] + }); + + for (let keyword in ret) { + ret[keyword] = sortObject(ret[keyword]); + } + + return ret; + } + var ast = JSON.parse(css); + var kw = compute(ast); + return Object.entries(kw).flatMap(([keyword, properties]) => { + return Object.entries(properties).map(([property, freq]) => { + return {property, keyword, freq}; + }); + }); +} catch (e) { + return []; +} +''' +OPTIONS (library="gs://httparchive/lib/css-utils.js"); + +SELECT + *, + pages / total_pages AS pct_pages +FROM ( + SELECT + client, + kw.keyword, + kw.property, + SUM(kw.freq) AS freq, + SUM(SUM(IF(kw.property = 'total', 0, kw.freq))) OVER (PARTITION BY client, kw.keyword) AS total, + SUM(kw.freq) / SUM(SUM(IF(kw.property = 'total', 0, kw.freq))) OVER (PARTITION BY client, kw.keyword) AS pct, + COUNT(DISTINCT page) AS pages + FROM + `httparchive.almanac.parsed_css`, + UNNEST(getGlobalKeywords(css)) AS kw + WHERE + date = '2020-08-01' + GROUP BY + client, + keyword, + property) +JOIN ( + SELECT + _TABLE_SUFFIX AS client, + COUNT(0) AS total_pages + FROM + `httparchive.summary_pages.2020_08_01_*` + GROUP BY + client) +USING + (client) +WHERE + pct >= 0.01 +ORDER BY + client, + keyword, + pct DESC diff --git a/sql/2020/01_CSS/meta_longhand_first_distribution.sql b/sql/2020/01_CSS/meta_longhand_first_distribution.sql index 69f555f7d14..89fc23cbbb1 100644 --- a/sql/2020/01_CSS/meta_longhand_first_distribution.sql +++ b/sql/2020/01_CSS/meta_longhand_first_distribution.sql @@ -449,22 +449,17 @@ SELECT FROM ( SELECT client, - page, - SUM(property.freq) AS freq_longhand_first + property.freq AS freq_longhand_first FROM `httparchive.almanac.parsed_css`, UNNEST(getLonghandFirstProperties(css)) AS property WHERE - date = '2020-08-01' - GROUP BY - client, - page), + date = '2020-08-01' AND + property.freq > 0), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile -WHERE - freq_longhand_first > 0 GROUP BY percentile, client ORDER BY percentile, - client \ No newline at end of file + client diff --git a/sql/2020/01_CSS/meta_shorthand_first_pages.sql b/sql/2020/01_CSS/meta_shorthand_first_pages.sql new file mode 100644 index 00000000000..9dad8e42f7e --- /dev/null +++ b/sql/2020/01_CSS/meta_shorthand_first_pages.sql @@ -0,0 +1,465 @@ +#standardSQL +CREATE TEMPORARY FUNCTION getShorthandFirstProperties(css STRING) RETURNS +ARRAY> LANGUAGE js AS ''' +try { + function compute(ast) { + let ret = { + shorthands: {}, + longhands: {}, + longhands_before_shorthands: {}, + shorthands_before_longhands: {}, + values: {} + }; + + const shorthands = { + "animation": [ + "animation-duration", + "animation-timing-function", + "animation-delay", + "animation-iteration-count", + "animation-direction", + "animation-fill-mode", + "animation-play-state", + "animation-name" + ], + "background": [ + "background-image", + "background-position-x", + "background-position-y", + "background-size", + "background-repeat-x", + "background-repeat-y", + "background-attachment", + "background-origin", + "background-clip", + "background-color" + ], + "background-position": [ + "background-position-x", + "background-position-y" + ], + "background-repeat": [ + "background-repeat-x", + "background-repeat-y" + ], + "border": [ + "border-top-color", + "border-top-style", + "border-top-width", + "border-right-color", + "border-right-style", + "border-right-width", + "border-bottom-color", + "border-bottom-style", + "border-bottom-width", + "border-left-color", + "border-left-style", + "border-left-width", + "border-image-source", + "border-image-slice", + "border-image-width", + "border-image-outset", + "border-image-repeat" + ], + "border-block": [ + "border-block-start-color", + "border-block-start-style", + "border-block-start-width", + "border-block-end-color", + "border-block-end-style", + "border-block-end-width" + ], + "border-block-color": [ + "border-block-start-color", + "border-block-end-color" + ], + "border-block-end": [ + "border-block-end-width", + "border-block-end-style", + "border-block-end-color" + ], + "border-block-start": [ + "border-block-start-width", + "border-block-start-style", + "border-block-start-color" + ], + "border-block-style": [ + "border-block-start-style", + "border-block-end-style" + ], + "border-block-width": [ + "border-block-start-width", + "border-block-end-width" + ], + "border-bottom": [ + "border-bottom-width", + "border-bottom-style", + "border-bottom-color" + ], + "border-color": [ + "border-top-color", + "border-right-color", + "border-bottom-color", + "border-left-color" + ], + "border-image": [ + "border-image-source", + "border-image-slice", + "border-image-width", + "border-image-outset", + "border-image-repeat" + ], + "border-inline": [ + "border-inline-start-color", + "border-inline-start-style", + "border-inline-start-width", + "border-inline-end-color", + "border-inline-end-style", + "border-inline-end-width" + ], + "border-inline-color": [ + "border-inline-start-color", + "border-inline-end-color" + ], + "border-inline-end": [ + "border-inline-end-width", + "border-inline-end-style", + "border-inline-end-color" + ], + "border-inline-start": [ + "border-inline-start-width", + "border-inline-start-style", + "border-inline-start-color" + ], + "border-inline-style": [ + "border-inline-start-style", + "border-inline-end-style" + ], + "border-inline-width": [ + "border-inline-start-width", + "border-inline-end-width" + ], + "border-left": [ + "border-left-width", + "border-left-style", + "border-left-color" + ], + "border-radius": [ + "border-top-left-radius", + "border-top-right-radius", + "border-bottom-right-radius", + "border-bottom-left-radius" + ], + "border-right": [ + "border-right-width", + "border-right-style", + "border-right-color" + ], + "border-style": [ + "border-top-style", + "border-right-style", + "border-bottom-style", + "border-left-style" + ], + "border-top": [ + "border-top-width", + "border-top-style", + "border-top-color" + ], + "border-width": [ + "border-top-width", + "border-right-width", + "border-bottom-width", + "border-left-width" + ], + "column-rule": [ + "column-rule-width", + "column-rule-style", + "column-rule-color" + ], + "columns": [ + "column-width", + "column-count" + ], + "flex": [ + "flex-grow", + "flex-shrink", + "flex-basis" + ], + "flex-flow": [ + "flex-direction", + "flex-wrap" + ], + "font": [ + "font-style", + "font-variant-ligatures", + "font-variant-caps", + "font-variant-numeric", + "font-variant-east-asian", + "font-weight", + "font-stretch", + "font-size", + "line-height", + "font-family" + ], + "font-variant": [ + "font-variant-ligatures", + "font-variant-caps", + "font-variant-numeric", + "font-variant-east-asian" + ], + "gap": [ + "row-gap", + "column-gap" + ], + "grid": [ + "grid-template-rows", + "grid-template-columns", + "grid-template-areas", + "grid-auto-flow", + "grid-auto-rows", + "grid-auto-columns" + ], + "grid-area": [ + "grid-row-start", + "grid-column-start", + "grid-row-end", + "grid-column-end" + ], + "grid-column": [ + "grid-column-start", + "grid-column-end" + ], + "grid-gap": [ + "row-gap", + "column-gap" + ], + "grid-row": [ + "grid-row-start", + "grid-row-end" + ], + "grid-template": [ + "grid-template-rows", + "grid-template-columns", + "grid-template-areas" + ], + "inset": [ + "top", + "right", + "bottom", + "left" + ], + "inset-block": [ + "inset-block-start", + "inset-block-end" + ], + "inset-inline": [ + "inset-inline-start", + "inset-inline-end" + ], + "list-style": [ + "list-style-position", + "list-style-image", + "list-style-type" + ], + "margin": [ + "margin-top", + "margin-right", + "margin-bottom", + "margin-left" + ], + "margin-block": [ + "margin-block-start", + "margin-block-end" + ], + "margin-inline": [ + "margin-inline-start", + "margin-inline-end" + ], + "marker": [ + "marker-start", + "marker-mid", + "marker-end" + ], + "offset": [ + "offset-position", + "offset-path", + "offset-distance", + "offset-rotate", + "offset-anchor" + ], + "outline": [ + "outline-color", + "outline-style", + "outline-width" + ], + "overflow": [ + "overflow-x", + "overflow-y" + ], + "overscroll-behavior": [ + "overscroll-behavior-x", + "overscroll-behavior-y" + ], + "padding": [ + "padding-top", + "padding-right", + "padding-bottom", + "padding-left" + ], + "padding-block": [ + "padding-block-start", + "padding-block-end" + ], + "padding-inline": [ + "padding-inline-start", + "padding-inline-end" + ], + "place-content": [ + "align-content", + "justify-content" + ], + "place-items": [ + "align-items", + "justify-items" + ], + "place-self": [ + "align-self", + "justify-self" + ], + "scroll-margin": [ + "scroll-margin-top", + "scroll-margin-right", + "scroll-margin-bottom", + "scroll-margin-left" + ], + "scroll-margin-block": [ + "scroll-margin-block-start", + "scroll-margin-block-end" + ], + "scroll-margin-inline": [ + "scroll-margin-inline-start", + "scroll-margin-inline-end" + ], + "scroll-padding": [ + "scroll-padding-top", + "scroll-padding-right", + "scroll-padding-bottom", + "scroll-padding-left" + ], + "scroll-padding-block": [ + "scroll-padding-block-start", + "scroll-padding-block-end" + ], + "scroll-padding-inline": [ + "scroll-padding-inline-start", + "scroll-padding-inline-end" + ], + "text-decoration": [ + "text-decoration-line", + "text-decoration-style", + "text-decoration-color" + ], + "transition": [ + "transition-property", + "transition-duration", + "transition-timing-function", + "transition-delay" + ] + }; + + let longhands = {}; + for (let shorthand in shorthands) { + for (let longhand of shorthands[shorthand]) { + longhands[longhand] = longhands[longhand] || new Set(); + longhands[longhand].add(shorthand); + } + + shorthands[shorthand] = new Set(shorthands[shorthand]); + } + + walkRules(ast, rule => { + let seen = new Set(); + + for (let d of rule.declarations) { + let {property, value} = d; + + if (property in shorthands) { + incrementByKey(ret.shorthands, property); + + // Have we seen any of its longhands in this rule? + let seenLonghands = [...shorthands[property]].filter(p => seen.has(p)); + if (seenLonghands.length > 0) { + incrementByKey(ret.longhands_before_shorthands, property); + } + + // If value is simple enough (no functions, strings, repetitions etc), count number of values + if (!matches(value, [/[("',]/, "inherit", "initial", "unset", "revert"])) { + let count = value.split(/\\s+|\\s*\\/\\s*/).length; + ret.values[property] = ret.values[property] || {}; + incrementByKey(ret.values[property], count); + } + + seen.add(property); + } + + if (property in longhands) { + incrementByKey(ret.longhands, property); + + // Have we seen any of its shorthands in this rule? + let seenLonghands = [...longhands[property]].filter(p => seen.has(p)); + if (seenLonghands.length > 0) { + incrementByKey(ret.shorthands_before_longhands, property); + } + + seen.add(property); + } + } + }, {type: "rule"}); + + for (let key in ret) { + if (key !== "values") { + ret[key].total = sumObject(ret[key]); + } + + ret[key] = sortObject(ret[key]); + } + + return ret; + } + + var ast = JSON.parse(css); + var props = compute(ast); + + return Object.entries(props.shorthands_before_longhands).filter(([property]) => { + return property != 'total'; + }).map(([property, freq]) => { + return {property, freq}; + }); +} catch (e) { + return []; +} +''' +OPTIONS (library="gs://httparchive/lib/css-utils.js"); + +SELECT + client, + COUNTIF(freq_shorthand_first > 0) AS pages, + COUNT(0) AS total, + COUNTIF(freq_shorthand_first > 0) / COUNT(0) AS pct +FROM ( + SELECT + client, + page, + SUM(property.freq) AS freq_shorthand_first + FROM + `httparchive.almanac.parsed_css` + LEFT JOIN + UNNEST(getShorthandFirstProperties(css)) AS property + WHERE + date = '2020-08-01' + GROUP BY + client, + page) +GROUP BY + client diff --git a/sql/2020/01_CSS/repetition.sql b/sql/2020/01_CSS/repetition.sql new file mode 100644 index 00000000000..6165a32ce4f --- /dev/null +++ b/sql/2020/01_CSS/repetition.sql @@ -0,0 +1,63 @@ +#standardSQL +CREATE TEMPORARY FUNCTION getDeclarationCounts(css STRING) +RETURNS STRUCT< + total NUMERIC, + unique NUMERIC +> LANGUAGE js AS ''' +try { + function compute() { + let ret = {total: 0}; + let unique = new Set(); + + walkDeclarations(ast, ({property, value}) => { + if (!property.startsWith("--")) { // Custom props are case sensitive + property = property.toLowerCase(); + } + + ret.total++; + unique.add(`${property}: ${value}`); + }); + + ret.unique = unique.size; + + return ret; + } + + const ast = JSON.parse(css); + return compute(ast); +} catch (e) { + return null; +} +''' +OPTIONS (library="gs://httparchive/lib/css-utils.js"); + +SELECT + percentile, + client, + APPROX_QUANTILES(total, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS total, + APPROX_QUANTILES(unique, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS unique, + APPROX_QUANTILES(SAFE_DIVIDE(unique, total), 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS unique_ratio +FROM ( + SELECT + client, + SUM(info.total) AS total, + SUM(info.unique) AS unique + FROM ( + SELECT + client, + page, + getDeclarationCounts(css) AS info + FROM + `httparchive.almanac.parsed_css` + WHERE + date = '2020-08-01') + GROUP BY + client, + page), + UNNEST([10, 25, 50, 75, 90, 95, 100]) AS percentile +GROUP BY + percentile, + client +ORDER BY + percentile, + client diff --git a/sql/2020/01_CSS/sass_nesting.sql b/sql/2020/01_CSS/sass_nesting.sql index d20ad562415..63e0ed8739b 100644 --- a/sql/2020/01_CSS/sass_nesting.sql +++ b/sql/2020/01_CSS/sass_nesting.sql @@ -8,21 +8,24 @@ try { return []; } - return Object.entries(scss.scss.stats.nested).map(([nested, freq]) => { + let ret = scss.scss.stats.nested; + ret.total = sumObject(ret); + return Object.entries(ret).map(([nested, freq]) => { return {nested, freq}; }); } catch (e) { return []; } -'''; +''' +OPTIONS (library="gs://httparchive/lib/css-utils.js"); SELECT client, nested, COUNT(DISTINCT IF(freq > 0, page, NULL)) AS pages, SUM(freq) AS freq, - SUM(SUM(freq)) OVER (PARTITION BY client) AS total, - SUM(freq) / SUM(SUM(freq)) OVER (PARTITION BY client) AS pct + SUM(SUM(freq)) OVER (PARTITION BY client) / 2 AS total, + SUM(freq) / (SUM(SUM(freq)) OVER (PARTITION BY client) / 2) AS pct FROM ( SELECT _TABLE_SUFFIX AS client, @@ -40,4 +43,4 @@ GROUP BY client, nested ORDER BY - pct DESC \ No newline at end of file + pct DESC diff --git a/sql/2020/01_CSS/vendor_prefix_summary.sql b/sql/2020/01_CSS/vendor_prefix_summary.sql new file mode 100644 index 00000000000..aa320b10707 --- /dev/null +++ b/sql/2020/01_CSS/vendor_prefix_summary.sql @@ -0,0 +1,117 @@ +#standardSQL +CREATE TEMPORARY FUNCTION getPrefixStats(css STRING) RETURNS ARRAY LANGUAGE js AS ''' +try { + function compute() { + let ret = { + total: {}, + pseudo_classes: {}, + pseudo_elements: {}, + properties: {}, + functions: {}, + keywords: {}, + media: {} + }; + + ret.total = Object.fromEntries(Object.keys(ret).map(k => [k, 0])); + + walkRules(ast, rule => { + // Prefixed pseudos + if (rule.selectors) { + let pseudos = rule.selectors.flatMap(r => r.match(/::?-[a-z]+-[\\w-]+/g) || []); + + for (let pseudo of pseudos) { + let type = "pseudo_" + (pseudo.indexOf("::") === 0? "elements" : "classes"); + incrementByKey(ret[type], pseudo); + ret.total[type]++; + } + } + + if (rule.declarations) { + walkDeclarations(rule, ({property, value}) => { + if (value.length > 1000) { + return; + } + + // Prefixed properties + if (/^-[a-z]+-.+/.test(property)) { + incrementByKey(ret.properties, property); + ret.total.properties++; + } + + // -prefix-function() + for (let call of extractFunctionCalls(value, {names: /^-[a-z]+-.+/})) { + incrementByKey(ret.functions, call.name); + ret.total.functions++; + } + + // Prefixed keywords + if (!matches(property, /(^|-)(transition(-property)?|animation(-name)?)$/)) { + for (let k of value.matchAll(/(? s.slice(1)); + + for (let feature of features) { + incrementByKey(ret.media, feature); + ret.total.media++; + } + } + } + }); + + ret.total.total = sumObject(ret.total); + + for (let type in ret) { + ret[type] = sortObject(ret[type]); + } + + return ret; + } + + + let ast = JSON.parse(css); + let stats = compute(ast).total; + return Object.entries(stats).flatMap(([prop, freq]) => { + return Array(freq).fill(prop); + }); +} +catch (e) { + return []; +} +''' +OPTIONS (library="gs://httparchive/lib/css-utils.js"); + +SELECT + * +FROM ( + SELECT + client, + prop, + COUNT(DISTINCT page) AS pages, + COUNT(0) AS freq, + SUM(COUNT(IF(prop = 'total', NULL, 0))) OVER (PARTITION BY client) AS total, + COUNT(IF(prop = 'total', NULL, 0)) / SUM(COUNT(IF(prop = 'total', NULL, 0))) OVER (PARTITION BY client) AS pct + FROM + `httparchive.almanac.parsed_css`, + UNNEST(getPrefixStats(css)) AS prop + WHERE + date = '2020-08-01' + GROUP BY + client, + prop) +WHERE + pages >= 1000 +ORDER BY + pct DESC diff --git a/sql/lib/css-utils.js b/sql/lib/css-utils.js index b3470fe3772..5ccb5e4e251 100644 --- a/sql/lib/css-utils.js +++ b/sql/lib/css-utils.js @@ -513,7 +513,6 @@ function removeFunctionCalls(value, test = {}) { for (let f of extractFunctionCalls(value, test)) { let [start, end] = f.pos; - console.log(start, end, offset); start -= offset; end -= offset;