From 3c9a0077b92b642b4481a424448af38727e76a56 Mon Sep 17 00:00:00 2001 From: Jay Joshua <7008757+jayjay-w@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:24:32 +0200 Subject: [PATCH] Add rake job to regenerate statistics Add rake job to regenerate all statistics. --- lib/tasks/data/statistics.rake | 77 ++++++++++++++------- test/lib/tasks/statistics_test.rb | 108 +++++++++++++----------------- 2 files changed, 97 insertions(+), 88 deletions(-) diff --git a/lib/tasks/data/statistics.rake b/lib/tasks/data/statistics.rake index d01967c16b..9511e6ea56 100644 --- a/lib/tasks/data/statistics.rake +++ b/lib/tasks/data/statistics.rake @@ -28,11 +28,7 @@ namespace :check do team = Team.find(team_id) languages = team.get_languages.to_a - if bot.user == BotUser.smooch_user - platforms = bot.smooch_enabled_integrations.keys - else - platforms = Bot::Smooch::SUPPORTED_INTEGRATION_NAMES.keys - end + platforms = bot.user == BotUser.smooch_user ? bot.smooch_enabled_integrations.keys : Bot::Smooch::SUPPORTED_INTEGRATION_NAMES.keys team_stats = Hash.new(0) puts "[#{Time.now}] Generating month tipline statistics for team with ID #{team_id}. (#{index + 1} / #{team_ids.length})" @@ -101,40 +97,72 @@ namespace :check do raise Check::Statistics::IncompleteRunError.new("Failed to calculate #{errors.length} monthly team statistics") if errors.any? end - # bundle exec rake check:data:regenerate_statistics[unique_newsletters_sent] - desc 'Regenerate specified historic statistic for all workspaces' - task :regenerate_statistics, [:stats_to_generate] => [:environment] do |_t, args| + # bundle exec rake check:data:regenerate_statistics[start_date] + desc 'Regenerate all historic statistics for all workspaces from a given start date' + task :regenerate_statistics, [:start_date] => [:environment] do |_t, args| old_logger = ActiveRecord::Base.logger ActiveRecord::Base.logger = nil - puts "[#{Time.now}] Attempting to regenerate keys: #{args.stats_to_generate}" + start_date = DateTime.parse(args.start_date) rescue nil + raise Check::Statistics::ArgumentError.new("Invalid or missing start_date argument") if start_date.nil? + + puts "[#{Time.now}] Starting to regenerate all statistics from #{start_date}" begin - # Give user help if they want it supported_stats = %w( + conversations + average_messages_per_day + unique_users + returning_users + valid_new_requests + published_native_reports + published_imported_reports + requests_answered_with_report + reports_sent_to_users + unique_users_who_received_report + median_response_time unique_newsletters_sent + new_newsletter_subscriptions + newsletter_cancellations + current_subscribers ) - # Make sure we have at least one valid argument - requested_stats = (args.stats_to_generate || '').split(',').map(&:strip) - valid_requested_stats = requested_stats.intersection(supported_stats) - unless valid_requested_stats.length > 0 - raise Check::Statistics::ArgumentError.new("Argument '#{args.stats_to_generate}' is invalid. We currently support the following values passed a comma-separated list: #{supported_stats.join(',')}.") - end - - puts "[#{Time.now}] Regenerating stats for the following keys: #{valid_requested_stats}. Total to update: #{MonthlyTeamStatistic.count}" - - # Update all of the stats + puts "[#{Time.now}] Regenerating stats for the following keys: #{supported_stats}. Total to update: #{MonthlyTeamStatistic.where('start_date >= ?', start_date).count}" + + # Define a mapping of statistics to their corresponding methods + stats_methods = { + 'conversations' => :conversations, + 'average_messages_per_day' => :average_messages_per_day, + 'unique_users' => :unique_users, + 'returning_users' => :returning_users, + 'valid_new_requests' => :valid_new_requests, + 'published_native_reports' => :published_native_reports, + 'published_imported_reports' => :published_imported_reports, + 'requests_answered_with_report' => :requests_answered_with_report, + 'reports_sent_to_users' => :reports_sent_to_users, + 'unique_users_who_received_report' => :unique_users_who_received_report, + 'median_response_time' => :median_response_time, + 'unique_newsletters_sent' => :number_of_newsletters_sent, + 'new_newsletter_subscriptions' => :new_newsletter_subscriptions, + 'newsletter_cancellations' => :newsletter_cancellations, + 'current_subscribers' => :current_subscribers + } + + # Update all of the stats from the start_date total_successful = Hash.new(0) - MonthlyTeamStatistic.find_each do |monthly_stats| + MonthlyTeamStatistic.where('start_date >= ?', start_date).find_each do |monthly_stats| team_id = monthly_stats.team_id start_date = monthly_stats.start_date end_date = monthly_stats.end_date language = monthly_stats.language begin - if valid_requested_stats.include?('unique_newsletters_sent') - monthly_stats.update!(unique_newsletters_sent: CheckStatistics.number_of_newsletters_sent(team_id, start_date, end_date, language)) - total_successful[:unique_newsletters_sent] += 1 + supported_stats.each do |stat| + method_name = stats_methods[stat] + next unless method_name + + result = CheckStatistics.send(method_name, team_id, start_date, end_date, language) + monthly_stats.update!(stat => result) + total_successful[stat.to_sym] += 1 end rescue StandardError => e $stderr.puts "[#{Time.now}] Failed to update MonthlyTeamStatistic with ID #{monthly_stats.id}. Error: #{e}" @@ -144,7 +172,6 @@ namespace :check do puts "[#{Time.now}] Finished updating MonthlyTeamStatistics. Total updated: #{total_successful}" rescue StandardError => e $stderr.puts e - next ensure ActiveRecord::Base.logger = old_logger end diff --git a/test/lib/tasks/statistics_test.rb b/test/lib/tasks/statistics_test.rb index ddb8a1137b..f9f04ed8ba 100644 --- a/test/lib/tasks/statistics_test.rb +++ b/test/lib/tasks/statistics_test.rb @@ -337,9 +337,9 @@ def teardown test "check:data:statistics allows generating conversations for months before april 1 2023, with argument" do date = DateTime.new(2023,01,01) - + create_project_media(user: BotUser.smooch_user, team: @tipline_team, created_at: date + 2.weeks) - + CheckStatistics.stubs(:get_statistics).returns( { platform: 'whatsapp', @@ -348,120 +348,102 @@ def teardown end_date: date, } ) - + travel_to DateTime.new(2023,01,01) - + out, err = capture_io do # pass in ignore_convo_cutoff: true Rake::Task['check:data:statistics'].invoke(true) end Rake::Task['check:data:statistics'].reenable - + + puts "Test output: #{out}" + puts "Test error: #{err}" + conversations = MonthlyTeamStatistic.where(team: @tipline_team).pluck(:conversations_24hr).uniq assert_equal 1, conversations.count assert !conversations.first.nil? end - - test "check:data:regenerate_statistics errors if only an unsupported argument is passed" do + + test "check:data:regenerate_statistics errors if start_date argument is invalid" do out, err = capture_io do - Rake::Task['check:data:regenerate_statistics'].invoke("foo") + Rake::Task['check:data:regenerate_statistics'].invoke("invalid_date") end Rake::Task['check:data:regenerate_statistics'].reenable - + assert err.present? + assert_match /Invalid or missing start_date argument/, err end - - test "check:data:regenerate_statistics accepts arguments as comma separated list or string" do - out, err = capture_io do - Rake::Task['check:data:regenerate_statistics'].invoke("unique_newsletters_sent") - end - Rake::Task['check:data:regenerate_statistics'].reenable - - assert err.blank? - - out, err = capture_io do - Rake::Task['check:data:regenerate_statistics'].invoke("unique_newsletters_sent,foo") - end - Rake::Task['check:data:regenerate_statistics'].reenable - - assert err.blank? - end - - test "check:data:regenerate_statistics outputs supported arguments if no args provided" do - out, err = capture_io do - Rake::Task['check:data:regenerate_statistics'].invoke - end - Rake::Task['check:data:regenerate_statistics'].reenable - - assert_match /unique_newsletters_sent/, err - - out, err = capture_io do - Rake::Task['check:data:regenerate_statistics'].invoke - end - Rake::Task['check:data:regenerate_statistics'].reenable - - assert_match /unique_newsletters_sent/, err - end - - test "check:data:regenerate_statistics regenerates any monthly team statistics present in database for provided stat" do + + test "check:data:regenerate_statistics regenerates stats from the provided start date" do + start_date = "2023-04-01" previous_month_start = DateTime.new(2023,4,1,0,0,0) previous_month_end = DateTime.new(2023,4,30,23,59,59) - + other_workspace_with_stats = create_team - + team_stat_one = create_monthly_team_statistic(team: @tipline_team, language: 'en', start_date: previous_month_start, end_date: previous_month_end) team_stat_two = create_monthly_team_statistic(team: @tipline_team, language: 'es', start_date: @start_of_month, end_date: @current_date) team_stat_three = create_monthly_team_statistic(team: other_workspace_with_stats, language: 'en', start_date: @start_of_month, end_date: @current_date) - + CheckStatistics.stubs(:number_of_newsletters_sent).with(@tipline_team.id, team_stat_one.start_date, team_stat_one.end_date, 'en').returns(100) CheckStatistics.expects(:number_of_newsletters_sent).with(@tipline_team.id, team_stat_two.start_date, team_stat_two.end_date, 'es').returns(300) CheckStatistics.expects(:number_of_newsletters_sent).with(other_workspace_with_stats.id, team_stat_three.start_date, team_stat_three.end_date, 'en').returns(400) travel_to @current_date - + out, err = capture_io do - Rake::Task['check:data:regenerate_statistics'].invoke("unique_newsletters_sent") + Rake::Task['check:data:regenerate_statistics'].invoke(start_date) end Rake::Task['check:data:regenerate_statistics'].reenable + + puts "Test output: #{out}" + puts "Test error: #{err}" + assert err.blank? - + # en, previous month - stats_one = MonthlyTeamStatistic.first + stats_one = MonthlyTeamStatistic.find_by(team: @tipline_team, language: 'en', start_date: previous_month_start) assert_equal @tipline_team.id, stats_one.team_id - assert_equal (@current_date - 1.month).beginning_of_month.to_i, stats_one.start_date.to_i - assert_equal (@current_date - 1.month).end_of_month.to_i, stats_one.end_date.to_i + assert_equal previous_month_start.to_i, stats_one.start_date.to_i + assert_equal previous_month_end.to_i, stats_one.end_date.to_i assert_equal 'en', stats_one.language assert_equal 100, stats_one.unique_newsletters_sent - + # es, current month - stats_two = MonthlyTeamStatistic.second + stats_two = MonthlyTeamStatistic.find_by(team: @tipline_team, language: 'es', start_date: @start_of_month) assert_equal @tipline_team.id, stats_two.team_id assert_equal @start_of_month.to_i, stats_two.start_date.to_i assert_equal @current_date.to_i, stats_two.end_date.to_i assert_equal 'es', stats_two.language assert_equal 300, stats_two.unique_newsletters_sent - - # second workspace - es, current month - stats_three = MonthlyTeamStatistic.third + + # second workspace - en, current month + stats_three = MonthlyTeamStatistic.find_by(team: other_workspace_with_stats, language: 'en', start_date: @start_of_month) assert_equal other_workspace_with_stats.id, stats_three.team_id assert_equal @start_of_month.to_i, stats_three.start_date.to_i assert_equal @current_date.to_i, stats_three.end_date.to_i assert_equal 'en', stats_three.language assert_equal 400, stats_three.unique_newsletters_sent end - + test "check:data:regenerate_statistics doesn't explode if tipline has been disabled, and sets newsletters to nil" do + start_date = "2023-04-01" random_team = create_team create_monthly_team_statistic(team: random_team, language: 'es', start_date: @start_of_month, end_date: @current_date) - + travel_to @current_date - + out, err = capture_io do - Rake::Task['check:data:regenerate_statistics'].invoke("unique_newsletters_sent") + Rake::Task['check:data:regenerate_statistics'].invoke(start_date) end Rake::Task['check:data:regenerate_statistics'].reenable + + puts "Test output: #{out}" + puts "Test error: #{err}" + assert err.blank? - + stats_one = MonthlyTeamStatistic.first assert_nil stats_one.unique_newsletters_sent - end + end end