Skip to content

Commit

Permalink
stop workers during online backup
Browse files Browse the repository at this point in the history
this ensures more consistent on-disk data
  • Loading branch information
evgeni committed Jun 14, 2024
1 parent 1c9e873 commit 5059588
Show file tree
Hide file tree
Showing 10 changed files with 302 additions and 36 deletions.
23 changes: 18 additions & 5 deletions definitions/checks/foreman_tasks/not_running.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@ class NotRunning < ForemanMaintain::Check
tags :pre_upgrade
after :foreman_tasks_not_paused
before :check_old_foreman_tasks
param :wait_for_tasks,
'Wait for tasks to finish or fail directly',
:required => false,
:default => true
end

def run
task_count = feature(:foreman_tasks).running_tasks_count
assert(task_count == 0,
failure_message(task_count),
:next_steps =>
[Procedures::ForemanTasks::FetchTasksStatus.new(:state => 'running'),
Procedures::ForemanTasks::UiInvestigate.new(
'search_query' => search_query_for_running_tasks
)])
:next_steps => calculate_next_steps)
end

private
Expand All @@ -30,5 +30,18 @@ def failure_message(task_count)
"There are #{task_count} active task(s) in the system." \
"\nPlease wait for these to complete or cancel them from the Monitor tab."
end

def calculate_next_steps
steps = []
if @wait_for_tasks
steps << Procedures::ForemanTasks::FetchTasksStatus.new(:state => 'running')
unless assumeyes?
steps << Procedures::ForemanTasks::UiInvestigate.new(
'search_query' => search_query_for_running_tasks
)
end
end
steps
end
end
end
32 changes: 32 additions & 0 deletions definitions/checks/pulpcore/no_running_tasks.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
module Checks::Pulpcore
class NoRunningTasks < ForemanMaintain::Check
metadata do
for_feature :pulpcore
description 'Check for running pulpcore tasks'
tags :pre_upgrade
param :wait_for_tasks,
'Wait for tasks to finish or fail directly',
:required => false
end

def run
tasks = feature(:pulpcore).running_tasks
assert(
tasks.empty?,
failure_message(tasks.length),
:next_steps => calculate_next_steps
)
end

private

def failure_message(task_count)
"There are #{task_count} active task(s) in the system." \
"\nPlease wait for these to complete."
end

def calculate_next_steps
@wait_for_tasks ? [Procedures::Pulpcore::WaitForTasks.new] : []
end
end
end
4 changes: 4 additions & 0 deletions definitions/features/dynflow_sidekiq.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ def services
end
end

def workers
services.reject { |service| service.name.end_with?('@orchestrator') }
end

private

def instance_priority(instance)
Expand Down
24 changes: 24 additions & 0 deletions definitions/features/pulpcore.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,34 @@
class Features::Pulpcore < ForemanMaintain::Feature
include ForemanMaintain::Concerns::PulpCommon

TIMEOUT_FOR_TASKS_STATUS = 300
RETRY_INTERVAL_FOR_TASKS_STATE = 10

metadata do
label :pulpcore
end

def cli(args)
parse_json(execute("pulp --format json #{args}"))
end

def running_tasks
cli('task list --state-in running --state-in canceling')
end

def wait_for_tasks(spinner, timeout_for_tasks_status = TIMEOUT_FOR_TASKS_STATUS)
Timeout.timeout(timeout_for_tasks_status) do
while (task_count = running_tasks.length) != 0
puts "\nThere are #{task_count} tasks."
spinner.update "Waiting #{RETRY_INTERVAL_FOR_TASKS_STATE} seconds before retry."
sleep RETRY_INTERVAL_FOR_TASKS_STATE
end
end
rescue Timeout::Error => e
logger.error(e.message)
puts "\nTimeout: #{e.message}. Try again."
end

def services
redis_services = feature(:redis) ? feature(:redis).services : []

Expand Down
15 changes: 15 additions & 0 deletions definitions/procedures/pulpcore/wait_for_tasks.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module Procedures::Pulpcore
class WaitForTasks < ForemanMaintain::Procedure
metadata do
for_feature :pulpcore
description 'Fetch tasks status and wait till they finish'
advanced_run false
end

def run
with_spinner("waiting for tasks to finish") do |spinner|
feature(:pulpcore).wait_for_tasks(spinner)
end
end
end
end
43 changes: 32 additions & 11 deletions definitions/scenarios/backup.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
module ForemanMaintain::Scenarios
class Backup < ForemanMaintain::Scenario
class BackupBase < ForemanMaintain::Scenario
private

def strategy
context.get(:strategy)
end

def wait_for_tasks
!!context.get(:wait_for_tasks)
end

def online_worker_services
services = []
services += feature(:dynflow_sidekiq).workers if feature(:dynflow_sidekiq)
services += feature(:pulpcore).configured_workers if feature(:pulpcore)
services
end
end

class Backup < BackupBase
metadata do
description 'Backup'
manual_detection
Expand All @@ -14,10 +33,13 @@ class Backup < ForemanMaintain::Scenario
param :proxy_features, 'List of proxy features to backup (default: all)', :array => true
param :skip_pulp_content, 'Skip Pulp content during backup'
param :tar_volume_size, 'Size of tar volume (indicates splitting)'
param :wait_for_tasks, 'Wait for running tasks to complete instead of aborting'
end

def compose
check_valid_strategy
add_step(Checks::ForemanTasks::NotRunning.new(:wait_for_tasks => wait_for_tasks))
add_step(Checks::Pulpcore::NoRunningTasks.new(:wait_for_tasks => wait_for_tasks))
safety_confirmation
add_step_with_context(Procedures::Backup::AccessibilityConfirmation) if strategy == :offline
add_step_with_context(Procedures::Backup::PrepareDirectory)
Expand Down Expand Up @@ -106,6 +128,9 @@ def include_dumps
end

def add_online_backup_steps
services = online_worker_services
add_step(Procedures::Service::Stop.new(:only => services)) unless services.empty?

add_step_with_context(Procedures::Backup::ConfigFiles, :ignore_changed_files => true,
:online_backup => true)
add_step_with_context(Procedures::Backup::Pulp, :ensure_unchanged => true)
Expand All @@ -114,18 +139,16 @@ def add_online_backup_steps
Procedures::Backup::Online::ForemanDB,
Procedures::Backup::Online::PulpcoreDB
)
end

def strategy
context.get(:strategy)
add_step(Procedures::Service::Start.new(:only => services)) unless services.empty?
end

def include_db_dumps?
!!context.get(:include_db_dumps)
end
end

class BackupRescueCleanup < ForemanMaintain::Scenario
class BackupRescueCleanup < BackupBase
metadata do
description 'Failed backup cleanup'
manual_detection
Expand All @@ -140,6 +163,10 @@ def compose
add_step_with_context(Procedures::Service::Start)
add_steps_with_context(find_procedures(:maintenance_mode_off))
end
if strategy == :online
services = online_worker_services
add_step(Procedures::Service::Start.new(:only => services)) unless services.empty?
end
add_step_with_context(Procedures::Backup::Clean)
end

Expand All @@ -149,11 +176,5 @@ def set_context_mapping
context.map(:preserve_dir,
Procedures::Backup::Clean => :preserve_dir)
end

private

def strategy
context.get(:strategy)
end
end
end
2 changes: 2 additions & 0 deletions lib/foreman_maintain/cli/backup_command.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def common_backup_options
option '--features', 'FEATURES',
"#{proxy_name} features to include in the backup. " \
'Valid features are tftp, dns, dhcp, openscap, and all.', :multivalued => true
option '--wait-for-tasks', :flag, 'Wait for running tasks to complete instead of aborting'
end
# rubocop:enable Metrics/MethodLength

Expand Down Expand Up @@ -89,6 +90,7 @@ def backup_scenario(options, strategy)
:tar_volume_size => split_pulp_tar,
:skip_pulp_content => skip_pulp_content?,
:incremental_dir => incremental,
:wait_for_tasks => wait_for_tasks?,
}.merge(options))
end

Expand Down
58 changes: 41 additions & 17 deletions test/definitions/checks/foreman_tasks/not_running_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,49 @@
describe Checks::ForemanTasks::NotRunning do
include DefinitionsTestHelper

subject do
Checks::ForemanTasks::NotRunning.new
end
context 'with default params' do
subject do
Checks::ForemanTasks::NotRunning.new
end

it 'passes when not active tasks are present' do
assume_feature_present(:foreman_tasks, :running_tasks_count => 0)
result = run_check(subject)
assert result.success?, 'Check expected to succeed'
end

it 'passes when not active tasks are present' do
assume_feature_present(:foreman_tasks, :running_tasks_count => 0)
result = run_check(subject)
assert result.success?, 'Check expected to succeed'
it 'fails when running/paused tasks are present' do
assume_feature_present(:foreman_tasks, :running_tasks_count => 5)
result = run_check(subject)
assert result.fail?, 'Check expected to fail'
msg = 'There are 5 active task(s) in the system.'
msg += "\nPlease wait for these to complete or cancel them from the Monitor tab."
assert_match msg, result.output
assert_equal [Procedures::ForemanTasks::FetchTasksStatus,
Procedures::ForemanTasks::UiInvestigate],
subject.next_steps.map(&:class)
end
end

it 'fails when running/paused tasks are present' do
assume_feature_present(:foreman_tasks, :running_tasks_count => 5)
result = run_check(subject)
assert result.fail?, 'Check expected to fail'
msg = 'There are 5 active task(s) in the system.'
msg += "\nPlease wait for these to complete or cancel them from the Monitor tab."
assert_match msg, result.output
assert_equal [Procedures::ForemanTasks::FetchTasksStatus,
Procedures::ForemanTasks::UiInvestigate],
subject.next_steps.map(&:class)
context 'with wait_for_tasks=>false' do
subject do
Checks::ForemanTasks::NotRunning.new(:wait_for_tasks => false)
end

it 'passes when not active tasks are present' do
assume_feature_present(:foreman_tasks, :running_tasks_count => 0)
result = run_check(subject)
assert result.success?, 'Check expected to succeed'
end

it 'fails when running/paused tasks are present' do
assume_feature_present(:foreman_tasks, :running_tasks_count => 5)
result = run_check(subject)
assert result.fail?, 'Check expected to fail'
msg = 'There are 5 active task(s) in the system.'
msg += "\nPlease wait for these to complete or cancel them from the Monitor tab."
assert_match msg, result.output
assert_empty subject.next_steps.map(&:class)
end
end
end
50 changes: 50 additions & 0 deletions test/definitions/checks/pulpcore/no_running_tasks_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
require 'test_helper'

describe Checks::Pulpcore::NoRunningTasks do
include DefinitionsTestHelper

context 'with default params' do
subject do
Checks::Pulpcore::NoRunningTasks.new
end

it 'passes when not active tasks are present' do
assume_feature_present(:pulpcore, :running_tasks => [])
result = run_check(subject)
assert result.success?, 'Check expected to succeed'
end

it 'fails when running/paused tasks are present' do
assume_feature_present(:pulpcore, :running_tasks => ['a_task'])
result = run_check(subject)
assert result.fail?, 'Check expected to fail'
msg = 'There are 1 active task(s) in the system.'
msg += "\nPlease wait for these to complete."
assert_match msg, result.output
assert_empty subject.next_steps.map(&:class)
end
end

context 'with wait_for_tasks=>true' do
subject do
Checks::Pulpcore::NoRunningTasks.new(:wait_for_tasks => true)
end

it 'passes when not active tasks are present' do
assume_feature_present(:pulpcore, :running_tasks => [])
result = run_check(subject)
assert result.success?, 'Check expected to succeed'
end

it 'fails when running/paused tasks are present' do
assume_feature_present(:pulpcore, :running_tasks => ['a_task'])
result = run_check(subject)
assert result.fail?, 'Check expected to fail'
msg = 'There are 1 active task(s) in the system.'
msg += "\nPlease wait for these to complete."
assert_match msg, result.output
assert_equal [Procedures::Pulpcore::WaitForTasks],
subject.next_steps.map(&:class)
end
end
end
Loading

0 comments on commit 5059588

Please sign in to comment.