Skip to content

Commit

Permalink
Add rake job to regenerate statistics
Browse files Browse the repository at this point in the history
Add rake job to regenerate all statistics.
  • Loading branch information
jayjay-w committed Jul 10, 2024
1 parent 4452383 commit 3c9a007
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 88 deletions.
77 changes: 52 additions & 25 deletions lib/tasks/data/statistics.rake
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@ namespace :check do

team = Team.find(team_id)
languages = team.get_languages.to_a
if bot.user == BotUser.smooch_user
platforms = bot.smooch_enabled_integrations.keys
else
platforms = Bot::Smooch::SUPPORTED_INTEGRATION_NAMES.keys
end
platforms = bot.user == BotUser.smooch_user ? bot.smooch_enabled_integrations.keys : Bot::Smooch::SUPPORTED_INTEGRATION_NAMES.keys

team_stats = Hash.new(0)
puts "[#{Time.now}] Generating month tipline statistics for team with ID #{team_id}. (#{index + 1} / #{team_ids.length})"
Expand Down Expand Up @@ -101,40 +97,72 @@ namespace :check do
raise Check::Statistics::IncompleteRunError.new("Failed to calculate #{errors.length} monthly team statistics") if errors.any?
end

# bundle exec rake check:data:regenerate_statistics[unique_newsletters_sent]
desc 'Regenerate specified historic statistic for all workspaces'
task :regenerate_statistics, [:stats_to_generate] => [:environment] do |_t, args|
# bundle exec rake check:data:regenerate_statistics[start_date]
desc 'Regenerate all historic statistics for all workspaces from a given start date'
task :regenerate_statistics, [:start_date] => [:environment] do |_t, args|
old_logger = ActiveRecord::Base.logger
ActiveRecord::Base.logger = nil

puts "[#{Time.now}] Attempting to regenerate keys: #{args.stats_to_generate}"
start_date = DateTime.parse(args.start_date) rescue nil
raise Check::Statistics::ArgumentError.new("Invalid or missing start_date argument") if start_date.nil?

puts "[#{Time.now}] Starting to regenerate all statistics from #{start_date}"
begin
# Give user help if they want it
supported_stats = %w(
conversations
average_messages_per_day
unique_users
returning_users
valid_new_requests
published_native_reports
published_imported_reports
requests_answered_with_report
reports_sent_to_users
unique_users_who_received_report
median_response_time
unique_newsletters_sent
new_newsletter_subscriptions
newsletter_cancellations
current_subscribers
)

# Make sure we have at least one valid argument
requested_stats = (args.stats_to_generate || '').split(',').map(&:strip)
valid_requested_stats = requested_stats.intersection(supported_stats)
unless valid_requested_stats.length > 0
raise Check::Statistics::ArgumentError.new("Argument '#{args.stats_to_generate}' is invalid. We currently support the following values passed a comma-separated list: #{supported_stats.join(',')}.")
end

puts "[#{Time.now}] Regenerating stats for the following keys: #{valid_requested_stats}. Total to update: #{MonthlyTeamStatistic.count}"

# Update all of the stats
puts "[#{Time.now}] Regenerating stats for the following keys: #{supported_stats}. Total to update: #{MonthlyTeamStatistic.where('start_date >= ?', start_date).count}"

# Define a mapping of statistics to their corresponding methods
stats_methods = {
'conversations' => :conversations,
'average_messages_per_day' => :average_messages_per_day,
'unique_users' => :unique_users,
'returning_users' => :returning_users,
'valid_new_requests' => :valid_new_requests,
'published_native_reports' => :published_native_reports,
'published_imported_reports' => :published_imported_reports,
'requests_answered_with_report' => :requests_answered_with_report,
'reports_sent_to_users' => :reports_sent_to_users,
'unique_users_who_received_report' => :unique_users_who_received_report,
'median_response_time' => :median_response_time,
'unique_newsletters_sent' => :number_of_newsletters_sent,
'new_newsletter_subscriptions' => :new_newsletter_subscriptions,
'newsletter_cancellations' => :newsletter_cancellations,
'current_subscribers' => :current_subscribers
}

# Update all of the stats from the start_date
total_successful = Hash.new(0)
MonthlyTeamStatistic.find_each do |monthly_stats|
MonthlyTeamStatistic.where('start_date >= ?', start_date).find_each do |monthly_stats|
team_id = monthly_stats.team_id
start_date = monthly_stats.start_date
end_date = monthly_stats.end_date
language = monthly_stats.language

begin
if valid_requested_stats.include?('unique_newsletters_sent')
monthly_stats.update!(unique_newsletters_sent: CheckStatistics.number_of_newsletters_sent(team_id, start_date, end_date, language))
total_successful[:unique_newsletters_sent] += 1
supported_stats.each do |stat|
method_name = stats_methods[stat]
next unless method_name

result = CheckStatistics.send(method_name, team_id, start_date, end_date, language)
monthly_stats.update!(stat => result)
total_successful[stat.to_sym] += 1
end
rescue StandardError => e
$stderr.puts "[#{Time.now}] Failed to update MonthlyTeamStatistic with ID #{monthly_stats.id}. Error: #{e}"
Expand All @@ -144,7 +172,6 @@ namespace :check do
puts "[#{Time.now}] Finished updating MonthlyTeamStatistics. Total updated: #{total_successful}"
rescue StandardError => e
$stderr.puts e
next
ensure
ActiveRecord::Base.logger = old_logger
end
Expand Down
108 changes: 45 additions & 63 deletions test/lib/tasks/statistics_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,9 @@ def teardown

test "check:data:statistics allows generating conversations for months before april 1 2023, with argument" do
date = DateTime.new(2023,01,01)

create_project_media(user: BotUser.smooch_user, team: @tipline_team, created_at: date + 2.weeks)

CheckStatistics.stubs(:get_statistics).returns(
{
platform: 'whatsapp',
Expand All @@ -348,120 +348,102 @@ def teardown
end_date: date,
}
)

travel_to DateTime.new(2023,01,01)

out, err = capture_io do
# pass in ignore_convo_cutoff: true
Rake::Task['check:data:statistics'].invoke(true)
end
Rake::Task['check:data:statistics'].reenable


puts "Test output: #{out}"
puts "Test error: #{err}"

conversations = MonthlyTeamStatistic.where(team: @tipline_team).pluck(:conversations_24hr).uniq
assert_equal 1, conversations.count
assert !conversations.first.nil?
end

test "check:data:regenerate_statistics errors if only an unsupported argument is passed" do
test "check:data:regenerate_statistics errors if start_date argument is invalid" do
out, err = capture_io do
Rake::Task['check:data:regenerate_statistics'].invoke("foo")
Rake::Task['check:data:regenerate_statistics'].invoke("invalid_date")
end
Rake::Task['check:data:regenerate_statistics'].reenable

assert err.present?
assert_match /Invalid or missing start_date argument/, err
end

test "check:data:regenerate_statistics accepts arguments as comma separated list or string" do
out, err = capture_io do
Rake::Task['check:data:regenerate_statistics'].invoke("unique_newsletters_sent")
end
Rake::Task['check:data:regenerate_statistics'].reenable

assert err.blank?

out, err = capture_io do
Rake::Task['check:data:regenerate_statistics'].invoke("unique_newsletters_sent,foo")
end
Rake::Task['check:data:regenerate_statistics'].reenable

assert err.blank?
end

test "check:data:regenerate_statistics outputs supported arguments if no args provided" do
out, err = capture_io do
Rake::Task['check:data:regenerate_statistics'].invoke
end
Rake::Task['check:data:regenerate_statistics'].reenable

assert_match /unique_newsletters_sent/, err

out, err = capture_io do
Rake::Task['check:data:regenerate_statistics'].invoke
end
Rake::Task['check:data:regenerate_statistics'].reenable

assert_match /unique_newsletters_sent/, err
end

test "check:data:regenerate_statistics regenerates any monthly team statistics present in database for provided stat" do

test "check:data:regenerate_statistics regenerates stats from the provided start date" do
start_date = "2023-04-01"
previous_month_start = DateTime.new(2023,4,1,0,0,0)
previous_month_end = DateTime.new(2023,4,30,23,59,59)

other_workspace_with_stats = create_team

team_stat_one = create_monthly_team_statistic(team: @tipline_team, language: 'en', start_date: previous_month_start, end_date: previous_month_end)
team_stat_two = create_monthly_team_statistic(team: @tipline_team, language: 'es', start_date: @start_of_month, end_date: @current_date)
team_stat_three = create_monthly_team_statistic(team: other_workspace_with_stats, language: 'en', start_date: @start_of_month, end_date: @current_date)

CheckStatistics.stubs(:number_of_newsletters_sent).with(@tipline_team.id, team_stat_one.start_date, team_stat_one.end_date, 'en').returns(100)
CheckStatistics.expects(:number_of_newsletters_sent).with(@tipline_team.id, team_stat_two.start_date, team_stat_two.end_date, 'es').returns(300)
CheckStatistics.expects(:number_of_newsletters_sent).with(other_workspace_with_stats.id, team_stat_three.start_date, team_stat_three.end_date, 'en').returns(400)
travel_to @current_date

out, err = capture_io do
Rake::Task['check:data:regenerate_statistics'].invoke("unique_newsletters_sent")
Rake::Task['check:data:regenerate_statistics'].invoke(start_date)
end
Rake::Task['check:data:regenerate_statistics'].reenable

puts "Test output: #{out}"
puts "Test error: #{err}"

assert err.blank?

# en, previous month
stats_one = MonthlyTeamStatistic.first
stats_one = MonthlyTeamStatistic.find_by(team: @tipline_team, language: 'en', start_date: previous_month_start)
assert_equal @tipline_team.id, stats_one.team_id
assert_equal (@current_date - 1.month).beginning_of_month.to_i, stats_one.start_date.to_i
assert_equal (@current_date - 1.month).end_of_month.to_i, stats_one.end_date.to_i
assert_equal previous_month_start.to_i, stats_one.start_date.to_i
assert_equal previous_month_end.to_i, stats_one.end_date.to_i
assert_equal 'en', stats_one.language
assert_equal 100, stats_one.unique_newsletters_sent

# es, current month
stats_two = MonthlyTeamStatistic.second
stats_two = MonthlyTeamStatistic.find_by(team: @tipline_team, language: 'es', start_date: @start_of_month)
assert_equal @tipline_team.id, stats_two.team_id
assert_equal @start_of_month.to_i, stats_two.start_date.to_i
assert_equal @current_date.to_i, stats_two.end_date.to_i
assert_equal 'es', stats_two.language
assert_equal 300, stats_two.unique_newsletters_sent

# second workspace - es, current month
stats_three = MonthlyTeamStatistic.third
# second workspace - en, current month
stats_three = MonthlyTeamStatistic.find_by(team: other_workspace_with_stats, language: 'en', start_date: @start_of_month)
assert_equal other_workspace_with_stats.id, stats_three.team_id
assert_equal @start_of_month.to_i, stats_three.start_date.to_i
assert_equal @current_date.to_i, stats_three.end_date.to_i
assert_equal 'en', stats_three.language
assert_equal 400, stats_three.unique_newsletters_sent
end

test "check:data:regenerate_statistics doesn't explode if tipline has been disabled, and sets newsletters to nil" do
start_date = "2023-04-01"
random_team = create_team
create_monthly_team_statistic(team: random_team, language: 'es', start_date: @start_of_month, end_date: @current_date)

travel_to @current_date

out, err = capture_io do
Rake::Task['check:data:regenerate_statistics'].invoke("unique_newsletters_sent")
Rake::Task['check:data:regenerate_statistics'].invoke(start_date)
end
Rake::Task['check:data:regenerate_statistics'].reenable

puts "Test output: #{out}"
puts "Test error: #{err}"

assert err.blank?

stats_one = MonthlyTeamStatistic.first
assert_nil stats_one.unique_newsletters_sent
end
end
end

0 comments on commit 3c9a007

Please sign in to comment.