-
-
Notifications
You must be signed in to change notification settings - Fork 183
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Third party 2021 query amends (#2404)
* Add limit to 50 * Linting fixes * More ranking queries * Fix blocking query and linting * Update sql/2021/third-parties/third_parties_blocking_main_thread.sql * Update sql/2021/third-parties/third_parties_blocking_main_thread.sql Co-authored-by: Rick Viscomi <[email protected]>
- Loading branch information
1 parent
e80bb31
commit 33ad00f
Showing
20 changed files
with
371 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
77 changes: 77 additions & 0 deletions
77
sql/2021/third-parties/number_of_third_parties_by_rank.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#standardSQL | ||
# Number of third-parties per websites by rank | ||
WITH requests AS ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
pageid AS page, | ||
url | ||
FROM | ||
`httparchive.summary_requests.2021_07_01_*` | ||
), | ||
|
||
pages AS ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
pageid AS page, | ||
rank | ||
FROM | ||
`httparchive.summary_pages.2021_07_01_*` | ||
), | ||
|
||
third_party AS ( | ||
SELECT | ||
domain, | ||
category, | ||
COUNT(DISTINCT page) AS page_usage | ||
FROM | ||
`httparchive.almanac.third_parties` tp | ||
JOIN | ||
requests r | ||
ON NET.HOST(r.url) = NET.HOST(tp.domain) | ||
WHERE | ||
date = '2021-07-01' AND | ||
category != 'hosting' | ||
GROUP BY | ||
domain, | ||
category | ||
HAVING | ||
page_usage >= 50 | ||
), | ||
|
||
base AS ( | ||
SELECT | ||
client, | ||
page, | ||
rank, | ||
COUNT(domain) AS third_parties_per_page | ||
FROM | ||
requests | ||
LEFT JOIN | ||
third_party | ||
ON | ||
NET.HOST(requests.url) = NET.HOST(third_party.domain) | ||
INNER JOIN | ||
pages | ||
USING | ||
(client, page) | ||
GROUP BY | ||
client, | ||
page, | ||
rank | ||
) | ||
|
||
SELECT | ||
client, | ||
rank_grouping, | ||
APPROX_QUANTILES(third_parties_per_page, 1000)[OFFSET(500)] AS p50_third_parties_per_page | ||
FROM | ||
base, | ||
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping | ||
WHERE | ||
rank <= rank_grouping | ||
GROUP BY | ||
client, | ||
rank_grouping | ||
ORDER BY | ||
client, | ||
rank_grouping |
83 changes: 83 additions & 0 deletions
83
sql/2021/third-parties/number_of_third_parties_by_rank_and_category.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
#standardSQL | ||
# Number of third-parties per websites by rank and category | ||
|
||
WITH requests AS ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
pageid AS page, | ||
url | ||
FROM | ||
`httparchive.summary_requests.2021_07_01_*` | ||
), | ||
|
||
pages AS ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
pageid AS page, | ||
rank | ||
FROM | ||
`httparchive.summary_pages.2021_07_01_*` | ||
), | ||
|
||
third_party AS ( | ||
SELECT | ||
domain, | ||
category, | ||
COUNT(DISTINCT page) AS page_usage | ||
FROM | ||
`httparchive.almanac.third_parties` tp | ||
JOIN | ||
requests r | ||
ON NET.HOST(r.url) = NET.HOST(tp.domain) | ||
WHERE | ||
date = '2021-07-01' AND | ||
category NOT IN ('hosting') | ||
GROUP BY | ||
domain, | ||
category | ||
HAVING | ||
page_usage >= 50 | ||
), | ||
|
||
base AS ( | ||
SELECT | ||
client, | ||
category, | ||
page, | ||
rank, | ||
COUNT(domain) AS third_parties_per_page | ||
FROM | ||
requests | ||
LEFT JOIN | ||
third_party | ||
ON | ||
NET.HOST(requests.url) = NET.HOST(third_party.domain) | ||
INNER JOIN | ||
pages | ||
USING | ||
(client, page) | ||
GROUP BY | ||
client, | ||
category, | ||
page, | ||
rank | ||
) | ||
|
||
SELECT | ||
client, | ||
category, | ||
rank_grouping, | ||
APPROX_QUANTILES(third_parties_per_page, 1000)[OFFSET(500)] AS p50_third_parties_per_page | ||
FROM | ||
base, | ||
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping | ||
WHERE | ||
rank <= rank_grouping | ||
GROUP BY | ||
client, | ||
category, | ||
rank_grouping | ||
ORDER BY | ||
client, | ||
category, | ||
rank_grouping |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.