-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* feat: onchain metrics by collection * feat: github metrics by collection
- Loading branch information
Showing
4 changed files
with
244 additions
and
5 deletions.
There are no files selected for viewing
101 changes: 101 additions & 0 deletions
101
dbt/models/marts/github_metrics/github_metrics_by_collection.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
{# | ||
Summary GitHub metrics for a collection: | ||
- first_commit_date: The date of the first commit to the collection | ||
- last_commit_date: The date of the last commit to the collection | ||
- repos: The number of repositories in the collection | ||
- stars: The number of stars the collection has | ||
- forks: The number of forks the collection has | ||
- contributors: The number of contributors to the collection | ||
- contributors_6_months: The number of contributors to the collection in the last 6 months | ||
- new_contributors_6_months: The number of new contributors to the collection in the last 6 months | ||
- avg_active_devs_6_months: The average number of active developers in the last 6 months | ||
- commits_6_months: The number of commits to the collection in the last 6 months | ||
- issues_opened_6_months: The number of issues opened in the collection in the last 6 months | ||
- issues_closed_6_months: The number of issues closed in the collection in the last 6 months | ||
- pull_requests_opened_6_months: The number of pull requests opened in the collection in the last 6 months | ||
- pull_requests_merged_6_months: The number of pull requests merged in the collection in the last 6 months | ||
#} | ||
|
||
-- CTE for calculating the first and last commit date for each collection, ignoring forked repos | ||
WITH collection_commit_dates AS ( | ||
SELECT | ||
pbc.collection_id, | ||
MIN(e.time) AS first_commit_date, | ||
MAX(e.time) AS last_commit_date | ||
FROM {{ ref('int_events_to_project') }} AS e | ||
JOIN {{ ref('stg_ossd__repositories_by_project') }} AS r ON e.project_id = r.project_id | ||
JOIN {{ ref('stg_ossd__projects_by_collection') }} AS pbc ON r.project_id = pbc.project_id | ||
WHERE e.event_type = 'COMMIT_CODE' | ||
AND r.is_fork = false | ||
GROUP BY pbc.collection_id | ||
), | ||
-- CTE for aggregating stars, forks, and repository counts by collection | ||
collection_stars_forks_repos AS ( | ||
SELECT | ||
pbc.collection_id, | ||
COUNT(DISTINCT r.id) AS repos, | ||
SUM(r.star_count) AS stars, | ||
SUM(r.fork_count) AS forks | ||
FROM {{ ref('stg_ossd__repositories_by_project') }} AS r | ||
JOIN {{ ref('stg_ossd__projects_by_collection') }} AS pbc ON r.project_id = pbc.project_id | ||
WHERE r.is_fork = false | ||
GROUP BY pbc.collection_id | ||
), | ||
-- CTE for calculating contributor counts and new contributors in the last 6 months at collection level | ||
collection_contributors AS ( | ||
SELECT | ||
pbc.collection_id, | ||
COUNT(DISTINCT d.from_id) AS contributors, | ||
COUNT(DISTINCT CASE WHEN d.bucket_month >= DATE_ADD(CURRENT_DATE(), INTERVAL -6 MONTH) THEN d.from_id END) AS contributors_6_months, | ||
COUNT(DISTINCT CASE WHEN d.bucket_month >= DATE_ADD(CURRENT_DATE(), INTERVAL -6 MONTH) AND d.user_segment_type IN ('FULL_TIME_DEV', 'PART_TIME_DEV') THEN CONCAT(d.from_id, '_', d.bucket_month) END) / 6 AS avg_active_devs_6_months, | ||
COUNT(DISTINCT CASE WHEN d.first_contribution_date >= DATE_ADD(CURRENT_DATE(), INTERVAL -6 MONTH) THEN d.from_id END) AS new_contributors_6_months | ||
FROM ( | ||
SELECT | ||
d.from_id, | ||
pbc.collection_id, | ||
d.bucket_month, | ||
d.user_segment_type, | ||
MIN(d.bucket_month) OVER (PARTITION BY d.from_id, pbc.collection_id) AS first_contribution_date | ||
FROM {{ ref('int_devs') }} AS d | ||
JOIN {{ ref('stg_ossd__projects_by_collection') }} AS pbc ON d.project_id = pbc.project_id | ||
) AS d | ||
GROUP BY pbc.collection_id | ||
), | ||
-- CTE for summarizing collection activity metrics over the past 6 months | ||
collection_activity AS ( | ||
SELECT | ||
pbc.collection_id, | ||
SUM(CASE WHEN e.event_type = 'COMMIT_CODE' THEN e.amount END) AS commits_6_months, | ||
SUM(CASE WHEN e.event_type = 'ISSUE_OPENED' THEN e.amount END) AS issues_opened_6_months, | ||
SUM(CASE WHEN e.event_type = 'ISSUE_CLOSED' THEN e.amount END) AS issues_closed_6_months, | ||
SUM(CASE WHEN e.event_type = 'PULL_REQUEST_OPENED' THEN e.amount END) AS pull_requests_opened_6_months, | ||
SUM(CASE WHEN e.event_type = 'PULL_REQUEST_MERGED' THEN e.amount END) AS pull_requests_merged_6_months | ||
FROM {{ ref('int_events_to_project') }} AS e | ||
JOIN {{ ref('stg_ossd__projects_by_collection') }} AS pbc ON e.project_id = pbc.project_id | ||
WHERE e.time >= DATE_ADD(CURRENT_DATE(), INTERVAL -6 MONTH) | ||
GROUP BY pbc.collection_id | ||
) | ||
|
||
-- Final query to join all the metrics together for collections | ||
SELECT | ||
c.collection_id, | ||
c.collection_name, | ||
ccd.first_commit_date, | ||
ccd.last_commit_date, | ||
csfr.repos, | ||
csfr.stars, | ||
csfr.forks, | ||
cc.contributors, | ||
cc.contributors_6_months, | ||
cc.new_contributors_6_months, | ||
cc.avg_active_devs_6_months, | ||
ca.commits_6_months, | ||
ca.issues_opened_6_months, | ||
ca.issues_closed_6_months, | ||
ca.pull_requests_opened_6_months, | ||
ca.pull_requests_merged_6_months | ||
FROM {{ ref('collections') }} AS c | ||
INNER JOIN collection_commit_dates AS ccd ON c.collection_id = ccd.collection_id | ||
INNER JOIN collection_stars_forks_repos AS csfr ON c.collection_id = csfr.collection_id | ||
INNER JOIN collection_contributors AS cc ON c.collection_id = cc.collection_id | ||
INNER JOIN collection_activity AS ca ON c.collection_id = ca.collection_id |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
138 changes: 138 additions & 0 deletions
138
dbt/models/marts/onchain_metrics/onchain_metrics_by_collection_arbitrum.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
{# | ||
Arbitrum Onchain Metrics | ||
Summary onchain metrics for a collection: | ||
- num_projects: The number of projects in the collection | ||
- num_contracts: The number of contracts in the collection | ||
- first_txn_date: The date of the first transaction to the collection | ||
- total_txns: The total number of transactions to the collection | ||
- total_l2_gas: The total L2 gas used by the collection | ||
- total_users: The number of unique users interacting with the collection | ||
- txns_6_months: The total number of transactions to the collection in the last 6 months | ||
- l2_gas_6_months: The total L2 gas used by the collection in the last 6 months | ||
- users_6_months: The number of unique users interacting with the collection in the last 6 months | ||
- new_users: The number of users interacting with the collection for the first time in the last 3 months | ||
- active_users: The number of active users interacting with the collection in the last 3 months | ||
- high_frequency_users: The number of users who have made 1000+ transactions with the collection in the last 3 months | ||
- more_active_users: The number of users who have made 10-999 transactions with the collection in the last 3 months | ||
- less_active_users: The number of users who have made 1-9 transactions with the collection in the last 3 months | ||
#} | ||
|
||
|
||
-- CTE for grabbing the onchain transaction data we care about, including project-collection mapping | ||
WITH txns AS ( | ||
SELECT | ||
pbc.collection_id, | ||
a.project_id, | ||
c.from_source_id AS from_id, | ||
DATE(TIMESTAMP_TRUNC(c.time, MONTH)) AS bucket_month, | ||
l2_gas, | ||
tx_count | ||
FROM {{ ref('stg_dune__arbitrum_contract_invocation') }} AS c | ||
JOIN {{ ref('stg_ossd__artifacts_by_project') }} AS a ON c.to_source_id = a.artifact_source_id | ||
JOIN {{ ref('stg_ossd__projects_by_collection') }} AS pbc ON a.project_id = pbc.project_id | ||
), | ||
|
||
-- CTEs for calculating all-time and 6-month collection metrics across all contracts | ||
metrics_all_time AS ( | ||
SELECT | ||
collection_id, | ||
COUNT(DISTINCT project_id) AS total_projects, | ||
MIN(bucket_month) AS first_txn_date, | ||
COUNT(DISTINCT from_id) AS total_users, | ||
SUM(l2_gas) AS total_l2_gas, | ||
SUM(tx_count) AS total_txns | ||
FROM txns | ||
GROUP BY collection_id | ||
), | ||
metrics_6_months AS ( | ||
SELECT | ||
collection_id, | ||
COUNT(DISTINCT from_id) AS users_6_months, | ||
SUM(l2_gas) AS l2_gas_6_months, | ||
SUM(tx_count) AS txns_6_months | ||
FROM txns | ||
WHERE bucket_month >= DATE_ADD(CURRENT_DATE(), INTERVAL -6 MONTH) | ||
GROUP BY collection_id | ||
), | ||
|
||
-- CTE for identifying new users to the collection in the last 3 months | ||
new_users AS ( | ||
SELECT | ||
collection_id, | ||
SUM(is_new_user) AS new_user_count | ||
FROM ( | ||
SELECT | ||
collection_id, | ||
from_id, | ||
CASE WHEN MIN(bucket_month) >= DATE_ADD(CURRENT_DATE(), INTERVAL -3 MONTH) THEN 1 ELSE 0 END AS is_new_user | ||
FROM txns | ||
GROUP BY collection_id, from_id | ||
) | ||
GROUP BY collection_id | ||
), | ||
|
||
-- CTEs for segmenting different types of active users based on txn volume at collection level | ||
user_txns_aggregated AS ( | ||
SELECT | ||
collection_id, | ||
from_id, | ||
SUM(tx_count) AS total_tx_count | ||
FROM txns | ||
WHERE bucket_month >= DATE_ADD(CURRENT_DATE(), INTERVAL -3 MONTH) | ||
GROUP BY collection_id, from_id | ||
), | ||
user_segments AS ( | ||
SELECT | ||
collection_id, | ||
COUNT(DISTINCT CASE WHEN user_segment = 'HIGH_FREQUENCY_USER' THEN from_id END) AS high_frequency_users, | ||
COUNT(DISTINCT CASE WHEN user_segment = 'MORE_ACTIVE_USER' THEN from_id END) AS more_active_users, | ||
COUNT(DISTINCT CASE WHEN user_segment = 'LESS_ACTIVE_USER' THEN from_id END) AS less_active_users | ||
FROM ( | ||
SELECT | ||
collection_id, | ||
from_id, | ||
CASE | ||
WHEN total_tx_count >= 1000 THEN 'HIGH_FREQUENCY_USER' | ||
WHEN total_tx_count >= 10 THEN 'MORE_ACTIVE_USER' | ||
ELSE 'LESS_ACTIVE_USER' | ||
END AS user_segment | ||
FROM user_txns_aggregated | ||
) | ||
GROUP BY collection_id | ||
), | ||
|
||
-- CTE to count the number of contracts deployed by projects in a collection | ||
contracts AS ( | ||
SELECT | ||
pbc.collection_id, | ||
COUNT(DISTINCT a.artifact_source_id) AS num_contracts | ||
FROM {{ ref('stg_ossd__artifacts_by_project') }} AS a | ||
JOIN {{ ref('stg_ossd__projects_by_collection') }} AS pbc ON a.project_id = pbc.project_id | ||
GROUP BY pbc.collection_id | ||
) | ||
|
||
-- Final query to join all the metrics together for collections | ||
SELECT | ||
c.collection_id, | ||
c.collection_name, | ||
co.num_contracts, | ||
ma.total_projects, | ||
ma.first_txn_date, | ||
ma.total_txns, | ||
ma.total_l2_gas, | ||
ma.total_users, | ||
m6.txns_6_months, | ||
m6.l2_gas_6_months, | ||
m6.users_6_months, | ||
nu.new_user_count AS new_users, | ||
(us.high_frequency_users + us.more_active_users + us.less_active_users) AS active_users, | ||
us.high_frequency_users, | ||
us.more_active_users, | ||
us.less_active_users | ||
|
||
FROM {{ ref('collections') }} AS c | ||
INNER JOIN metrics_all_time AS ma ON c.collection_id = ma.collection_id | ||
INNER JOIN metrics_6_months AS m6 on c.collection_id = m6.collection_id | ||
INNER JOIN new_users AS nu on c.collection_id = nu.collection_id | ||
INNER JOIN user_segments AS us on c.collection_id = us.collection_id | ||
INNER JOIN contracts AS co on c.collection_id = co.collection_id |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters