From 9595a47f9810342396d01eb2bec9f06ffc5dd3d4 Mon Sep 17 00:00:00 2001 From: Evgeni Golov Date: Tue, 28 May 2024 12:10:00 +0200 Subject: [PATCH] stop workers during online backup this ensures more consistent on-disk data --- .../checks/pulpcore/no_running_tasks.rb | 22 ++++++++ definitions/features/dynflow_sidekiq.rb | 4 ++ definitions/features/pulpcore.rb | 24 +++++++++ .../procedures/pulpcore/wait_for_tasks.rb | 15 ++++++ definitions/scenarios/backup.rb | 38 +++++++++---- test/definitions/scenarios/backup_test.rb | 53 +++++++++++++++++-- 6 files changed, 142 insertions(+), 14 deletions(-) create mode 100644 definitions/checks/pulpcore/no_running_tasks.rb create mode 100644 definitions/procedures/pulpcore/wait_for_tasks.rb diff --git a/definitions/checks/pulpcore/no_running_tasks.rb b/definitions/checks/pulpcore/no_running_tasks.rb new file mode 100644 index 000000000..8a4133515 --- /dev/null +++ b/definitions/checks/pulpcore/no_running_tasks.rb @@ -0,0 +1,22 @@ +module Checks::Pulpcore + class NoRunningTasks < ForemanMaintain::Check + metadata do + for_feature :pulpcore + description 'Check for running pulpcore tasks' + tags :pre_upgrade + end + + def run + tasks = feature(:pulpcore).running_tasks + assert(tasks.empty?, failure_message(tasks.length), + :next_steps => [Procedures::Pulpcore::WaitForTasks.new]) + end + + private + + def failure_message(task_count) + "There are #{task_count} active task(s) in the system." \ + "\nPlease wait for these to complete." + end + end +end diff --git a/definitions/features/dynflow_sidekiq.rb b/definitions/features/dynflow_sidekiq.rb index ba4e3e302..64e3cce5d 100644 --- a/definitions/features/dynflow_sidekiq.rb +++ b/definitions/features/dynflow_sidekiq.rb @@ -20,6 +20,10 @@ def services end end + def workers + services.reject { |s| s.name.end_with?('@orchestrator') } + end + private def instance_priority(instance) diff --git a/definitions/features/pulpcore.rb b/definitions/features/pulpcore.rb index 111fe3718..0b58a584b 100644 --- a/definitions/features/pulpcore.rb +++ b/definitions/features/pulpcore.rb @@ -3,10 +3,34 @@ class Features::Pulpcore < ForemanMaintain::Feature include ForemanMaintain::Concerns::PulpCommon + TIMEOUT_FOR_TASKS_STATUS = 300 + RETRY_INTERVAL_FOR_TASKS_STATE = 10 + metadata do label :pulpcore end + def cli(args) + parse_json(execute("pulp --format json #{args}")) + end + + def running_tasks + cli('task list --state-in running --state-in canceling') + end + + def wait_for_tasks(spinner) + Timeout.timeout(TIMEOUT_FOR_TASKS_STATUS) do + while (task_count = running_tasks.length) != 0 + puts "\nThere are #{task_count} tasks." + spinner.update "Waiting #{RETRY_INTERVAL_FOR_TASKS_STATE} seconds before retry." + sleep RETRY_INTERVAL_FOR_TASKS_STATE + end + end + rescue Timeout::Error => e + logger.error e.message + puts "\nTimeout: #{e.message}. Try again." + end + def services redis_services = feature(:redis) ? feature(:redis).services : [] diff --git a/definitions/procedures/pulpcore/wait_for_tasks.rb b/definitions/procedures/pulpcore/wait_for_tasks.rb new file mode 100644 index 000000000..6bfc895c0 --- /dev/null +++ b/definitions/procedures/pulpcore/wait_for_tasks.rb @@ -0,0 +1,15 @@ +module Procedures::Pulpcore + class WaitForTasks < ForemanMaintain::Procedure + metadata do + for_feature :pulpcore + description 'Fetch tasks status and wait till they finish' + advanced_run false + end + + def run + with_spinner("waiting for tasks to finish") do |spinner| + feature(:foreman_tasks).wait_for_tasks(spinner) + end + end + end +end diff --git a/definitions/scenarios/backup.rb b/definitions/scenarios/backup.rb index effa24430..248854731 100644 --- a/definitions/scenarios/backup.rb +++ b/definitions/scenarios/backup.rb @@ -1,5 +1,20 @@ module ForemanMaintain::Scenarios - class Backup < ForemanMaintain::Scenario + class BackupBase < ForemanMaintain::Scenario + private + + def strategy + context.get(:strategy) + end + + def online_worker_services + services = [] + services += feature(:dynflow_sidekiq).workers if feature(:dynflow_sidekiq) + services += feature(:pulpcore).configured_workers if feature(:pulpcore) + services + end + end + + class Backup < BackupBase metadata do description 'Backup' manual_detection @@ -18,6 +33,8 @@ class Backup < ForemanMaintain::Scenario def compose check_valid_strategy + add_step(Checks::ForemanTasks::NotRunning) + add_step(Checks::Pulpcore::NoRunningTasks) safety_confirmation add_step_with_context(Procedures::Backup::AccessibilityConfirmation) if strategy == :offline add_step_with_context(Procedures::Backup::PrepareDirectory) @@ -106,6 +123,9 @@ def include_dumps end def add_online_backup_steps + services = online_worker_services + add_step(Procedures::Service::Stop.new(:only => services)) unless services.empty? + add_step_with_context(Procedures::Backup::ConfigFiles, :ignore_changed_files => true, :online_backup => true) add_step_with_context(Procedures::Backup::Pulp, :ensure_unchanged => true) @@ -114,10 +134,8 @@ def add_online_backup_steps Procedures::Backup::Online::ForemanDB, Procedures::Backup::Online::PulpcoreDB ) - end - def strategy - context.get(:strategy) + add_step(Procedures::Service::Start.new(:only => services)) unless services.empty? end def include_db_dumps? @@ -125,7 +143,7 @@ def include_db_dumps? end end - class BackupRescueCleanup < ForemanMaintain::Scenario + class BackupRescueCleanup < BackupBase metadata do description 'Failed backup cleanup' manual_detection @@ -140,6 +158,10 @@ def compose add_step_with_context(Procedures::Service::Start) add_steps_with_context(find_procedures(:maintenance_mode_off)) end + if strategy == :online + services = online_worker_services + add_step(Procedures::Service::Start.new(:only => services)) unless services.empty? + end add_step_with_context(Procedures::Backup::Clean) end @@ -149,11 +171,5 @@ def set_context_mapping context.map(:preserve_dir, Procedures::Backup::Clean => :preserve_dir) end - - private - - def strategy - context.get(:strategy) - end end end diff --git a/test/definitions/scenarios/backup_test.rb b/test/definitions/scenarios/backup_test.rb index 5952d53eb..3351ecddf 100644 --- a/test/definitions/scenarios/backup_test.rb +++ b/test/definitions/scenarios/backup_test.rb @@ -10,6 +10,22 @@ module Scenarios db.any_instance.stubs(:local?).returns(true) end end + + assume_feature_present(:pulpcore) do |feature| + feature.any_instance.stubs(:configured_workers).returns([existing_pulpcore_worker]) + end + + assume_feature_present(:dynflow_sidekiq) do |feature| + feature.any_instance.stubs(:workers).returns([existing_dynflow_worker]) + end + end + + let(:existing_pulpcore_worker) { existing_system_service('pulpcore-worker@1', 10) } + let(:existing_dynflow_worker) { existing_system_service('dynflow-sidekiq@worker-1', 10) } + + let(:checks) do + [Checks::ForemanTasks::NotRunning, + Checks::Pulpcore::NoRunningTasks] end describe 'offline' do @@ -18,6 +34,9 @@ module Scenarios end it 'composes all steps' do + checks.each do |check| + assert_scenario_has_step(scenario, check) + end assert_scenario_has_step(scenario, Procedures::Backup::AccessibilityConfirmation) assert_scenario_has_step(scenario, Procedures::Backup::PrepareDirectory) assert_scenario_has_step(scenario, Procedures::Backup::Metadata) @@ -42,17 +61,28 @@ module Scenarios end it 'composes all steps' do + checks.each do |check| + assert_scenario_has_step(scenario, check) + end assert_scenario_has_step(scenario, Procedures::Backup::Online::SafetyConfirmation) refute_scenario_has_step(scenario, Procedures::Backup::AccessibilityConfirmation) assert_scenario_has_step(scenario, Procedures::Backup::PrepareDirectory) assert_scenario_has_step(scenario, Procedures::Backup::Metadata) - refute_scenario_has_step(scenario, Procedures::Service::Stop) + assert_scenario_has_step(scenario, Procedures::Service::Stop) do |step| + assert_includes(step.common_options[:only], existing_pulpcore_worker) + assert_includes(step.common_options[:only], existing_dynflow_worker) + assert_equal(step.common_options[:only].length, 2) + end assert_scenario_has_step(scenario, Procedures::Backup::ConfigFiles) assert_scenario_has_step(scenario, Procedures::Backup::Pulp) assert_scenario_has_step(scenario, Procedures::Backup::Online::CandlepinDB) assert_scenario_has_step(scenario, Procedures::Backup::Online::ForemanDB) assert_scenario_has_step(scenario, Procedures::Backup::Online::PulpcoreDB) - refute_scenario_has_step(scenario, Procedures::Service::Start) + assert_scenario_has_step(scenario, Procedures::Service::Start) do |step| + assert_includes(step.common_options[:only], existing_pulpcore_worker) + assert_includes(step.common_options[:only], existing_dynflow_worker) + assert_equal(step.common_options[:only].length, 2) + end assert_scenario_has_step(scenario, Procedures::Backup::CompressData) end end @@ -61,6 +91,19 @@ module Scenarios describe ForemanMaintain::Scenarios::BackupRescueCleanup do include DefinitionsTestHelper + before do + assume_feature_present(:pulpcore) do |feature| + feature.any_instance.stubs(:configured_workers).returns([existing_pulpcore_worker]) + end + + assume_feature_present(:dynflow_sidekiq) do |feature| + feature.any_instance.stubs(:workers).returns([existing_dynflow_worker]) + end + end + + let(:existing_pulpcore_worker) { existing_system_service('pulpcore-worker@1', 10) } + let(:existing_dynflow_worker) { existing_system_service('dynflow-sidekiq@worker-1', 10) } + describe 'offline' do let(:scenario) do ForemanMaintain::Scenarios::BackupRescueCleanup.new(:backup_dir => '.', @@ -80,7 +123,11 @@ module Scenarios end it 'composes all steps' do - refute_scenario_has_step(scenario, Procedures::Service::Start) + assert_scenario_has_step(scenario, Procedures::Service::Start) do |step| + assert_includes(step.common_options[:only], existing_pulpcore_worker) + assert_includes(step.common_options[:only], existing_dynflow_worker) + assert_equal(step.common_options[:only].length, 2) + end assert_scenario_has_step(scenario, Procedures::Backup::Clean) end end