From 2d55105735f7f2bc3a2ca2070cdf3d16d6b7a70d Mon Sep 17 00:00:00 2001 From: Adam Grare Date: Thu, 2 Nov 2023 11:24:44 -0400 Subject: [PATCH] Ensure workers and miq_worker rows match --- .../worker_management/kubernetes.rb | 7 ++--- .../miq_server/worker_management/monitor.rb | 12 ++++++++ .../miq_server/worker_management/process.rb | 29 +++++++++++++++++-- .../miq_server/worker_management/systemd.rb | 6 ++++ 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/app/models/miq_server/worker_management/kubernetes.rb b/app/models/miq_server/worker_management/kubernetes.rb index 3aff19f85ce8..6140bbefb02f 100644 --- a/app/models/miq_server/worker_management/kubernetes.rb +++ b/app/models/miq_server/worker_management/kubernetes.rb @@ -12,10 +12,6 @@ def sync_from_system # we only have to sync the list of pods and deployments once ensure_kube_monitors_started if my_server_is_primary? - # Before syncing the workers check for any orphaned worker rows that don't have - # a current pod and delete them - cleanup_orphaned_worker_rows - # Update worker deployments with updated settings such as cpu/memory limits sync_deployment_settings end @@ -50,6 +46,9 @@ def cleanup_orphaned_worker_rows end end + def cleanup_orphaned_workers + end + def cleanup_failed_workers super diff --git a/app/models/miq_server/worker_management/monitor.rb b/app/models/miq_server/worker_management/monitor.rb index ee8a374a0c4f..a9e123a72b7f 100644 --- a/app/models/miq_server/worker_management/monitor.rb +++ b/app/models/miq_server/worker_management/monitor.rb @@ -18,6 +18,10 @@ def monitor_workers # Cache a list of the native objects backing the miq_workers (e.g.: pods, services, or processes) sync_from_system + cleanup_orphaned_worker_rows + + cleanup_orphaned_workers + sync_monitor # Sync the workers after sync'ing the child worker settings @@ -48,6 +52,14 @@ def sync_workers end end + def cleanup_orphaned_worker_rows + raise NotImplementedError, "cleanup_orphaned_worker_rows must be implemented in a subclass" + end + + def cleanup_orphaned_workers + raise NotImplementedError, "cleanup_orphaned_workers must be implemented in a subclass" + end + def cleanup_failed_workers check_pending_stop clean_worker_records diff --git a/app/models/miq_server/worker_management/process.rb b/app/models/miq_server/worker_management/process.rb index 6c36e163f4e5..01e98c337c8a 100644 --- a/app/models/miq_server/worker_management/process.rb +++ b/app/models/miq_server/worker_management/process.rb @@ -1,13 +1,30 @@ class MiqServer::WorkerManagement::Process < MiqServer::WorkerManagement def sync_from_system require "sys/proctable" - self.miq_processes = Sys::ProcTable.ps.select { |proc| proc.ppid == my_server.pid } + @miq_processes_by_pid = Sys::ProcTable.ps.select { |proc| proc.ppid == my_server.pid }.index_by(&:pid) end def sync_starting_workers MiqWorker.find_all_starting.to_a end + def cleanup_orphaned_worker_rows + orphaned_rows = miq_workers.where.not(:pid => miq_pids) + return if orphaned_rows.empty? + + _log.warn("Removing orphaned worker rows without corresponding processes: #{orphaned_rows.collect(&:pid).inspect}") + orphaned_rows.destroy_all + end + + def cleanup_orphaned_workers + worker_pids = miq_workers.pluck(:pid) + orphaned_workers = miq_processes.reject { |process| worker_pids.include?(process.pid) } + return if orphaned_workers.empty? + + _log.warn("Removing orphaned processes without corresponding worker rows: #{orphaned_workers.collect(&:pid).inspect}") + orphaned_workers.each { |process| Process.kill(9, process.pid) } + end + def monitor_workers super @@ -70,5 +87,13 @@ def validate_worker(worker) private - attr_accessor :miq_processes + attr_reader :miq_processes_by_pid + + def miq_processes + miq_processes_by_pid.values + end + + def miq_pids + miq_processes_by_pid.keys + end end diff --git a/app/models/miq_server/worker_management/systemd.rb b/app/models/miq_server/worker_management/systemd.rb index 41cd20b68910..a1b553c27461 100644 --- a/app/models/miq_server/worker_management/systemd.rb +++ b/app/models/miq_server/worker_management/systemd.rb @@ -20,6 +20,12 @@ def sync_starting_workers starting end + def cleanup_orphaned_worker_rows + end + + def cleanup_orphaned_workers + end + def cleanup_failed_workers super