-
Notifications
You must be signed in to change notification settings - Fork 2
/
growth_accounting_and_ltv.sql
284 lines (274 loc) · 9.88 KB
/
growth_accounting_and_ltv.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
-- This is a translated query which can be run on Snowflake. Originally written by Jonathan Hsu (Tribe Capital),
-- who shared the same analytical pattern to run in PostgreSQL, linked on this blog post:
-- https://tribecap.co/a-quantitative-approach-to-product-market-fit/
WITH dau as (
-- This part of the query can be pretty much anything.
-- The only requirement is that it have three columns:
-- dt, user_id, inc_amt
-- Where dt is a date and user_id is some unique identifier for a user.
-- Each dt-user_id pair should be unique in this table.
-- inc_amt represents the amount of value that this user created on dt.
-- The most common case is
-- inc_amt = incremental revenue from the user on dt
-- If you want to do L28 growth accounting, user inc_amt=1.
-- The version here derives everything from the tutorial.yammer_events
-- data set provided for free by Mode.
-- If you edit just this part to represent your data, the rest
-- of the query should run just fine.
-- The query here is a sample that works in the public Mode Analytics
-- tutorial.
select
[USER_ID] as user_id,
to_date([EVENT_DATE]) as dt,
SUM([SOME_ACTION]) as inc_amt
from [SOME DATA SOURCE]
group by 1,2
),
-- First, set up WAU and MAU tables for future use
wau as (
select
date_trunc('week', dt) as week,
user_id,
sum(inc_amt) as inc_amt
from dau
group by 1,2
),
mau as (
select
date_trunc('month',dt) as month,
user_id,
sum(inc_amt) as inc_amt
from dau
group by 1,2
),
-- This determines the cohort date of each user. In this case we are
-- deriving it from DAU data but you can feel free to replace it with
-- registration date if that's more appropriate.
first_dt as (
select
user_id,
min(dt) as first_dt,
date_trunc('week', min(dt)) as first_week,
date_trunc('month', min(dt)) as first_month
from dau
group by 1
),
mau_decorated as (
select
d.month,
d.user_id,
d.inc_amt,
f.first_month
from mau d
JOIN first_dt f on (d.user_id = f.user_id) and inc_amt > 0
),
-- This is MAU growth accounting. Note that this does not require any
-- information about inc_amt. As discussed in the articles, these
-- quantities satisfy some identities:
-- MAU(t) = retained(t) + new(t) + resurrected(t)
-- MAU(t - 1 month) = retained(t) + churned(t)
mau_growth_accounting as (
select
coalesce(tm.month, dateadd(month, 1, lm.month)) as month,
count(distinct tm.user_id) as mau,
count(distinct case when lm.user_id is not NULL then tm.user_id
else NULL end) as retained,
count(distinct case when tm.first_month = tm.month then tm.user_id
else NULL end) as new,
count(distinct case when tm.first_month != tm.month
and lm.user_id is NULL then tm.user_id else NULL end
) as resurrected,
-1*count(distinct case when tm.user_id is NULL then lm.user_id
else NULL end) as churned
from
mau_decorated tm
full outer join mau_decorated lm on (
tm.user_id = lm.user_id
and tm.month = dateadd(month, 1, lm.month)
)
group by 1
order by 1
),
-- This generates the familiar monthly cohort retention dataset.
mau_retention_by_cohort as (
select
first_month,
datediff('month', first_month, month) as months_since_first,
count(1) as active_users,
sum(inc_amt) as inc_amt
from mau_decorated
group by 1,2
order by 1,2
),
-- This is the MRR growth accounting (or growth accounting of whatever
-- value you put in inc_amt). These also satisfy some identities:
-- MRR(t) = retained(t) + new(t) + resurrected(t) + expansion(t)
-- MAU(t - 1 month) = retained(t) + churned(t) + contraction(t)
mrr_growth_accounting as (
select
coalesce(tm.month, dateadd(month,1,lm.month)) as month,
sum(tm.inc_amt) as rev,
sum(
case
when tm.user_id is not NULL and lm.user_id is not NULL
and tm.inc_amt >= lm.inc_amt then lm.inc_amt
when tm.user_id is not NULL and lm.user_id is not NULL
and tm.inc_amt < lm.inc_amt then tm.inc_amt
else 0
end
) as retained,
sum(
case when tm.first_month = tm.month then tm.inc_amt
else 0 end
) as new,
sum(
case when tm.month != tm.first_month and tm.user_id is not NULL
and lm.user_id is not NULL and tm.inc_amt > lm.inc_amt
and lm.inc_amt > 0 then tm.inc_amt - lm.inc_amt
else 0 end
) as expansion,
sum(
case when tm.user_id is not NULL
and (lm.user_id is NULL or lm.inc_amt = 0)
and tm.inc_amt > 0 and tm.first_month != tm.month
then tm.inc_amt
else 0 end
) as resurrected,
-1 * sum(
case
when tm.month != tm.first_month and tm.user_id is not NULL
and lm.user_id is not NULL
and tm.inc_amt < lm.inc_amt and tm.inc_amt > 0
then lm.inc_amt - tm.inc_amt
else 0 end
) as contraction,
-1 * sum(
case when lm.inc_amt > 0 and (tm.user_id is NULL or tm.inc_amt = 0)
then lm.inc_amt else 0 end
) as churned
from
mau_decorated tm
full outer join mau_decorated lm on (
tm.user_id = lm.user_id
and tm.month = dateadd(month,1,lm.month)
)
group by 1
order by 1
),
-- These next tables are to compute LTV via the cohorts_cumulative table.
-- The LTV here is being computed for weekly cohorts on weekly intervals.
-- The queries can be modified to compute it for cohorts of any size
-- on any time window frequency.
wau_decorated as (
select
week,
w.user_id,
w.inc_amt,
f.first_week
from wau w, first_dt f
where w.user_id = f.user_id
),
cohorts as (
select
first_week,
week as active_week,
datediff(week, first_week, week) as weeks_since_first,
count(distinct user_id) as users,
sum(inc_amt) as inc_amt
from wau_decorated
group by 1,2,3
order by 1,2
),
cohort_sizes as (
select
first_week,
users,
inc_amt
from cohorts
where weeks_since_first = 0
),
cohorts_cumulative as (
-- A semi-cartesian join accomplishes the cumulative behavior.
select
c1.first_week,
c1.active_week,
c1.weeks_since_first,
c1.users,
cs.users as cohort_num_users,
1.0 * c1.users/cs.users as retained_pctg,
c1.inc_amt,
sum(c2.inc_amt) as cum_amt,
1.0*sum(c2.inc_amt)/cs.users as cum_amt_per_user
from
cohorts c1,
cohorts c2,
cohort_sizes cs
where
c1.first_week = c2.first_week
and c2.weeks_since_first <= c1.weeks_since_first
and cs.first_week = c1.first_week
group by 1,2,3,4,5,6,7
order by 1,2
),
-- monthly cumulative cohorts
cohorts_m as (
select
first_month,
month as active_month,
datediff(month, first_month, month) as months_since_first,
count(distinct user_id) as users,
sum(inc_amt) as inc_amt
from mau_decorated
group by 1,2,3
order by 1,2
),
cohort_sizes_m as (
select
first_month,
users,
inc_amt
from cohorts_m
where months_since_first = 0
),
cohorts_cumulative_m as (
-- A semi-cartesian join accomplishes the cumulative behavior.
select
c1.first_month,
c1.active_month,
c1.months_since_first,
c1.users,
cs.users as cohort_num_users,
1.0 * c1.users/cs.users as retained_pctg,
c1.inc_amt,
sum(c2.inc_amt) as cum_amt,
1.0*sum(c2.inc_amt)/cs.users as cum_amt_per_user
from
cohorts_m c1,
cohorts_m c2,
cohort_sizes_m cs
where
c1.first_month = c2.first_month
and c2.months_since_first <= c1.months_since_first
and cs.first_month = c1.first_month
group by 1,2,3,4,5,6,7
order by 1,2
)
-- For MAU retention by cohort, useful for the standard retention heatmap
select * from mau_retention_by_cohort
-- For cumulative LTV data use this
select * from cohorts_cumulative
-- For cumulative LTV with monthly cohorts use this
select * from cohorts_cumulative_m
-- For DAU growth accuonting use this
select * from dau_growth_accounting
-- For MAU growth accuonting use this
select * from mau_growth_accounting
-- For MRR growth accuonting use this
select * from mrr_growth_accounting
-- For use as weekly input in the 8-ball tool use this
select
first_week as cohort_week,
active_week as activity_week,
users,
inc_amt as revenue
from cohorts_cumulative