From a37eab3071eb0acefc7d7df54a536fcc23a935f3 Mon Sep 17 00:00:00 2001 From: Christian Bruckmayer Date: Wed, 24 Jul 2024 10:32:56 +0100 Subject: [PATCH] Report worker crashes to summary command --- ruby/lib/ci/queue/build_record.rb | 8 ++++++++ ruby/lib/ci/queue/redis/build_record.rb | 15 +++++++++++++++ ruby/lib/ci/queue/redis/worker.rb | 4 ++++ ruby/lib/minitest/queue.rb | 1 + ruby/lib/minitest/queue/build_status_reporter.rb | 9 ++++++++- ruby/lib/minitest/queue/runner.rb | 1 + ruby/test/integration/minitest_redis_test.rb | 16 ++++++++++++++++ 7 files changed, 53 insertions(+), 1 deletion(-) diff --git a/ruby/lib/ci/queue/build_record.rb b/ruby/lib/ci/queue/build_record.rb index 1efb9c83..99ebf047 100644 --- a/ruby/lib/ci/queue/build_record.rb +++ b/ruby/lib/ci/queue/build_record.rb @@ -36,6 +36,14 @@ def reset_stats(stat_names) stat_names.each { |s| stats.delete(s) } end + def report_worker_error(_); end + + def reset_worker_error; end + + def worker_errors + {} + end + private attr_reader :stats diff --git a/ruby/lib/ci/queue/redis/build_record.rb b/ruby/lib/ci/queue/redis/build_record.rb index fdbb2eb1..8876f6fa 100644 --- a/ruby/lib/ci/queue/redis/build_record.rb +++ b/ruby/lib/ci/queue/redis/build_record.rb @@ -17,6 +17,21 @@ def queue_exhausted? @queue.exhausted? end + def report_worker_error(error) + redis.pipelined do |pipeline| + pipeline.hset(key('worker-errors'), config.worker_id, error.message) + pipeline.expire(key('worker-errors'), config.redis_ttl) + end + end + + def worker_errors + redis.hgetall(key('worker-errors')) + end + + def reset_worker_error + pipeline.hdel(key('worker-errors'), config.worker_id) + end + def failed_tests redis.hkeys(key('error-reports')) end diff --git a/ruby/lib/ci/queue/redis/worker.rb b/ruby/lib/ci/queue/redis/worker.rb index ff312563..efcfa4e1 100644 --- a/ruby/lib/ci/queue/redis/worker.rb +++ b/ruby/lib/ci/queue/redis/worker.rb @@ -95,6 +95,10 @@ def build @build ||= CI::Queue::Redis::BuildRecord.new(self, redis, config) end + def report_worker_error(error) + build.report_worker_error(error) + end + def acknowledge(test) test_key = test.id raise_on_mismatching_test(test_key) diff --git a/ruby/lib/minitest/queue.rb b/ruby/lib/minitest/queue.rb index f87468d7..8450ec99 100644 --- a/ruby/lib/minitest/queue.rb +++ b/ruby/lib/minitest/queue.rb @@ -268,6 +268,7 @@ def run_from_queue(reporter, *) exit!(41) rescue => error reopen_previous_step + queue.report_worker_error(error) puts red("This worker exited because of an uncaught application error:") puts red("#{error.class}: #{error.message}") exit!(42) diff --git a/ruby/lib/minitest/queue/build_status_reporter.rb b/ruby/lib/minitest/queue/build_status_reporter.rb index f677fc8c..3ae6146c 100644 --- a/ruby/lib/minitest/queue/build_status_reporter.rb +++ b/ruby/lib/minitest/queue/build_status_reporter.rb @@ -36,11 +36,18 @@ def report puts "" end + build.worker_errors.to_a.sort.each do |worker_id, error| + puts yellow("Worker #{worker_id } crashed") + puts error + puts "" + end + errors.empty? end def success? - build.error_reports.empty? + build.error_reports.empty? && + build.worker_errors.empty? end def record(*) diff --git a/ruby/lib/minitest/queue/runner.rb b/ruby/lib/minitest/queue/runner.rb index 8d0faa52..daf53f6a 100644 --- a/ruby/lib/minitest/queue/runner.rb +++ b/ruby/lib/minitest/queue/runner.rb @@ -345,6 +345,7 @@ def run_tests_in_fork(queue) def reset_counters queue.build.reset_stats(BuildStatusRecorder::COUNTERS) + queue.build.reset_worker_error end def populate_queue diff --git a/ruby/test/integration/minitest_redis_test.rb b/ruby/test/integration/minitest_redis_test.rb index 32268c4e..0f49bffd 100644 --- a/ruby/test/integration/minitest_redis_test.rb +++ b/ruby/test/integration/minitest_redis_test.rb @@ -952,6 +952,22 @@ def test_application_error end assert_equal 42, $?.exitstatus + + out, err = capture_subprocess_io do + system( + @exe, 'report', + '--queue', @redis_url, + '--build', '1', + '--timeout', '1', + '--heartbeat', + chdir: 'test/fixtures/', + ) + end + + assert_includes out, "Worker 1 crashed" + assert_includes out, "Some error in the test framework" + + assert_equal 1, $?.exitstatus end private