From ee37b000770b0e667d17534c5ba8da413b80716c Mon Sep 17 00:00:00 2001 From: Kazunori INOUE Date: Wed, 26 May 2021 17:58:32 +0900 Subject: [PATCH] Support starting corosync-notifyd at cluster startup (draft) --- pcs/cli/common/lib_wrapper.py | 2 + pcs/cli/routing/cluster.py | 2 + pcs/cluster.py | 15 ++++++- pcs/common/reports/codes.py | 2 + pcs/common/reports/messages.py | 29 ++++++++++++ pcs/lib/commands/cluster.py | 63 ++++++++++++++++++++++++++ pcs/lib/commands/status.py | 1 + pcs/lib/communication/cluster.py | 76 +++++++++++++++++++++++++++++++- pcs/settings.py.in | 1 + pcs/utils.py | 12 +++++ pcsd/remote.rb | 48 ++++++++++++++++++++ pcsd/settings.rb.in | 2 + 12 files changed, 251 insertions(+), 2 deletions(-) diff --git a/pcs/cli/common/lib_wrapper.py b/pcs/cli/common/lib_wrapper.py index 2a431dfaa..854532821 100644 --- a/pcs/cli/common/lib_wrapper.py +++ b/pcs/cli/common/lib_wrapper.py @@ -194,6 +194,8 @@ def load_module(env, middleware_factory, name): "setup_local": cluster.setup_local, "update_link": cluster.update_link, "verify": cluster.verify, + "enable_corosync_notifyd": cluster.enable_corosync_notifyd, + "disable_corosync_notifyd": cluster.disable_corosync_notifyd, }, ) diff --git a/pcs/cli/routing/cluster.py b/pcs/cli/routing/cluster.py index a9ef530d2..1d4228081 100644 --- a/pcs/cli/routing/cluster.py +++ b/pcs/cli/routing/cluster.py @@ -107,6 +107,8 @@ "unstandby": lambda lib, argv, modifiers: raise_command_replaced( "pcs node unstandby" ), + "enable-corosync-notifyd": cluster.corosync_notifyd_enable_cmd, + "disable-corosync-notifyd": cluster.corosync_notifyd_disable_cmd, }, ["cluster"], ) diff --git a/pcs/cluster.py b/pcs/cluster.py index d4a37b45d..ad592aaa6 100644 --- a/pcs/cluster.py +++ b/pcs/cluster.py @@ -262,6 +262,8 @@ def start_cluster(argv): service_list = ["corosync"] if utils.need_to_handle_qdevice_service(): service_list.append("corosync-qdevice") + if utils.get_enable_corosync_notifyd() == "yes": + service_list.append("corosync-notifyd") service_list.append("pacemaker") for service in service_list: output, retval = utils.start_service(service) @@ -703,6 +705,8 @@ def stop_cluster_corosync(): service_list = [] if utils.need_to_handle_qdevice_service(): service_list.append("corosync-qdevice") + if utils.get_enable_corosync_notifyd() == "yes": + service_list.append("corosync-notifyd") service_list.append("corosync") for service in service_list: output, retval = utils.stop_service(service) @@ -746,6 +750,7 @@ def kill_local_cluster_services(): "gfs_controld", # Corosync daemons "corosync-qdevice", + "corosync-notifyd", "corosync", ] return utils.run([settings.killall_executable, "-9"] + all_cluster_daemons) @@ -1341,7 +1346,7 @@ def cluster_destroy(lib, argv, modifiers): destroy_cluster(corosync_nodes) else: print("Shutting down pacemaker/corosync services...") - for service in ["pacemaker", "corosync-qdevice", "corosync"]: + for service in ["pacemaker", "corosync-qdevice", "corosync-notifyd", "corosync"]: # Returns an error if a service is not running. It is safe to # ignore it since we want it not to be running anyways. utils.stop_service(service) @@ -2181,3 +2186,11 @@ def link_update(lib, argv, modifiers): parse_args.prepare_options(parsed["options"]), force_flags=force_flags, ) + + +def corosync_notifyd_disable_cmd(lib, argv, modifiers): + lib.cluster.disable_corosync_notifyd(argv) + + +def corosync_notifyd_enable_cmd(lib, argv, modifiers): + lib.cluster.enable_corosync_notifyd(argv) diff --git a/pcs/common/reports/codes.py b/pcs/common/reports/codes.py index 3fab8a73f..4198448b1 100644 --- a/pcs/common/reports/codes.py +++ b/pcs/common/reports/codes.py @@ -448,6 +448,8 @@ SBD_WATCHDOG_TEST_ERROR = M("SBD_WATCHDOG_TEST_ERROR") SBD_WATCHDOG_TEST_MULTIPLE_DEVICES = M("SBD_WATCHDOG_TEST_MULTIPLE_DEVICES") SBD_WATCHDOG_TEST_FAILED = M("SBD_WATCHDOG_TEST_FAILED") +PCS_CONFIG_ACCEPTED_BY_NODE = M("PCS_CONFIG_ACCEPTED_BY_NODE") +PCS_CONFIG_DISTRIBUTION_STARTED = M("PCS_CONFIG_DISTRIBUTION_STARTED") SERVICE_ACTION_STARTED = M("SERVICE_ACTION_STARTED") SERVICE_ACTION_FAILED = M("SERVICE_ACTION_FAILED") SERVICE_ACTION_SUCCEEDED = M("SERVICE_ACTION_SUCCEEDED") diff --git a/pcs/common/reports/messages.py b/pcs/common/reports/messages.py index 9f381e65d..7e641374a 100644 --- a/pcs/common/reports/messages.py +++ b/pcs/common/reports/messages.py @@ -6582,3 +6582,32 @@ class CibNvsetAmbiguousProvideNvsetId(ReportItemMessage): @property def message(self) -> str: return "Several options sets exist, please specify an option set ID" + + +@dataclass(frozen=True) +class PcsConfigDistributionStarted(ReportItemMessage): + """ + Distribution of PCS configuration started + """ + + _code = codes.PCS_CONFIG_DISTRIBUTION_STARTED + + @property + def message(self) -> str: + return "Distributing PCS config..." + + +@dataclass(frozen=True) +class PcsConfigAcceptedByNode(ReportItemMessage): + """ + info that PCS configuration has been saved successfully on specified node + + node -- node name + """ + + node: str + _code = codes.PCS_CONFIG_ACCEPTED_BY_NODE + + @property + def message(self) -> str: + return f"{self.node}: PCS config saved" diff --git a/pcs/lib/commands/cluster.py b/pcs/lib/commands/cluster.py index a65119450..cd08660cb 100644 --- a/pcs/lib/commands/cluster.py +++ b/pcs/lib/commands/cluster.py @@ -44,6 +44,11 @@ get_resources, ) from pcs.lib.communication import cluster +from pcs.lib.communication.cluster import ( + SetPcsConfig, + GetPcsConfig, +) +from pcs.lib.tools import dict_to_environment_file from pcs.lib.communication.corosync import ( CheckCorosyncOffline, DistributeCorosyncConf, @@ -2179,3 +2184,61 @@ def corosync_authkey_change( com_cmd = ReloadCorosyncConf(env.report_processor) com_cmd.set_targets(online_cluster_target_list) run_and_raise(env.get_node_communicator(), com_cmd) + + +def enable_corosync_notifyd(env, node=None): + corosync_conf = env.get_corosync_conf() + node_list, get_nodes_report_list = get_existing_nodes_names(corosync_conf) + if not node_list: + get_nodes_report_list.append( + ReportItem.error(reports.messages.CorosyncConfigNoNodesDefined()) + ) + target_list = env.get_node_target_factory().get_target_list( + node if node else node_list, + ) + + com_cmd = GetOnlineTargets( + env.report_processor, + ) + com_cmd.set_targets(target_list) + online_targets = run_and_raise(env.get_node_communicator(), com_cmd) + + config = { + "ENABLE_COROSYNC_NOTIFYD": "yes", + } + com_cmd = SetPcsConfig(env.report_processor) + for target in online_targets: + com_cmd.add_request( + target, + dict_to_environment_file(config), + ) + run_and_raise(env.get_node_communicator(), com_cmd) + + +def disable_corosync_notifyd(env, node=None): + corosync_conf = env.get_corosync_conf() + node_list, get_nodes_report_list = get_existing_nodes_names(corosync_conf) + if not node_list: + get_nodes_report_list.append( + ReportItem.error(reports.messages.CorosyncConfigNoNodesDefined()) + ) + target_list = env.get_node_target_factory().get_target_list( + node if node else node_list, + ) + + com_cmd = GetOnlineTargets( + env.report_processor, + ) + com_cmd.set_targets(target_list) + online_targets = run_and_raise(env.get_node_communicator(), com_cmd) + + config = { + "ENABLE_COROSYNC_NOTIFYD": "no", + } + com_cmd = SetPcsConfig(env.report_processor) + for target in online_targets: + com_cmd.add_request( + target, + dict_to_environment_file(config), + ) + run_and_raise(env.get_node_communicator(), com_cmd) diff --git a/pcs/lib/commands/status.py b/pcs/lib/commands/status.py index 5690cdaa8..726791dcd 100644 --- a/pcs/lib/commands/status.py +++ b/pcs/lib/commands/status.py @@ -219,6 +219,7 @@ def _get_local_services_status(runner: CommandRunner) -> List[_ServiceStatus]: service_def = [ # (service name, display even if not enabled nor running) ("corosync", True), + ("corosync-notifyd", True), ("pacemaker", True), ("pacemaker_remote", False), ("pcsd", True), diff --git a/pcs/lib/communication/cluster.py b/pcs/lib/communication/cluster.py index ed331eb18..68183932d 100644 --- a/pcs/lib/communication/cluster.py +++ b/pcs/lib/communication/cluster.py @@ -1,5 +1,11 @@ from pcs.common import reports -from pcs.common.node_communicator import RequestData + +from pcs.common.node_communicator import ( + Request, + RequestData, +) +from pcs.lib.tools import environment_file_to_dict + from pcs.common.reports.item import ReportItem from pcs.lib.corosync import live as corosync_live from pcs.lib.communication.tools import ( @@ -124,3 +130,71 @@ def _process_response(self, response): def on_complete(self): return self._has_failure, self._quorum_status + + +class SetPcsConfig( + SimpleResponseProcessingMixin, AllAtOnceStrategyMixin, RunRemotelyBase +): + def __init__(self, report_processor): + super().__init__(report_processor) + self._request_data_list = [] + + def _prepare_initial_requests(self): + return [ + Request( + target, + RequestData("remote/set_pcs_config", [("config", config)]), + ) + for target, config in self._request_data_list + ] + + def _get_success_report(self, node_label): + return ReportItem.info( + reports.messages.PcsConfigAcceptedByNode(node_label) + ) + + def add_request(self, target, config): + self._request_data_list.append((target, config)) + + def before(self): + self._report( + ReportItem.info(reports.messages.PcsConfigDistributionStarted()) + ) + + +class GetPcsConfig(AllSameDataMixin, AllAtOnceStrategyMixin, RunRemotelyBase): + def __init__(self, report_processor): + super().__init__(report_processor) + self._config_list = [] + self._successful_target_list = [] + + def _get_request_data(self): + return RequestData("remote/get_pcs_config") + + def _process_response(self, response): + report_item = response_to_report_item( + response, severity=reports.ReportItemSeverity.WARNING + ) + node_label = response.request.target.label + if report_item is not None: + if not response.was_connected: + self._report(report_item) + self._report( + ReportItem.warning( + reports.messages.UnableToGetPcsConfig(node_label, "") + ) + ) + return + self._config_list.append( + { + "node": node_label, + "config": environment_file_to_dict(response.data), + } + ) + self._successful_target_list.append(node_label) + + def on_complete(self): + for node in self._target_list: + if node.label not in self._successful_target_list: + self._config_list.append({"node": node.label, "config": None}) + return self._config_list diff --git a/pcs/settings.py.in b/pcs/settings.py.in index 961e4b3c5..b3c6bf1ae 100644 --- a/pcs/settings.py.in +++ b/pcs/settings.py.in @@ -66,6 +66,7 @@ pcsd_exec_location = "@LIB_DIR@/pcsd/" pcsd_log_location = "@LOCALSTATEDIR@/log/pcsd/pcsd.log" pcsd_default_port = 2224 pcsd_config = "@CONF_DIR@/pcsd" +pcs_config = "@CONF_DIR@/pcs" cib_dir = "@PCMK_CIB_DIR@" pacemaker_uname = "@PCMK_USER@" pacemaker_gname = "@PCMK_GROUP@" diff --git a/pcs/utils.py b/pcs/utils.py index 09848d620..732aab5ee 100644 --- a/pcs/utils.py +++ b/pcs/utils.py @@ -76,6 +76,8 @@ timeout_to_seconds as get_timeout_seconds, validate_id, ) +from pcs.lib.communication.nodes import GetOnlineTargets +import configparser # pylint: disable=invalid-name # pylint: disable=too-many-branches @@ -2899,3 +2901,13 @@ def get_token_from_file(file_name: str) -> str: except OSError as e: err(f"Unable to read file '{file_name}': {e}", exit_after_error=False) raise SystemExit(1) from e + +def get_enable_corosync_notifyd(): + try: + with open(settings.pcs_config, "r", encoding="utf-8") as f: + config_str = "[dummy]\n" + f.read() + config = configparser.ConfigParser() + config.read_string(config_str) + except IOError as e: + err("Unable to read %s: %s" % (settings.pcs_config, e.strerror)) + return config["dummy"]["ENABLE_COROSYNC_NOTIFYD"] diff --git a/pcsd/remote.rb b/pcsd/remote.rb index 96f0f3fc0..c2637abc3 100644 --- a/pcsd/remote.rb +++ b/pcsd/remote.rb @@ -72,6 +72,8 @@ def remote(params, request, auth_user) :sbd_enable => method(:sbd_enable), :remove_stonith_watchdog_timeout=> method(:remove_stonith_watchdog_timeout), :set_stonith_watchdog_timeout_to_zero => method(:set_stonith_watchdog_timeout_to_zero), + :set_pcs_config => method(:set_pcs_config), + :get_pcs_config => method(:get_pcs_config), # lib api: # /api/v1/sbd-enable-sbd/v1 :remote_enable_sbd => method(:remote_enable_sbd), @@ -3077,3 +3079,49 @@ def remove_nodes_from_cib(params, request, auth_user) return 400, "Invalid input data format: #{e.message}" end end + +def set_pcs_config(param, request, auth_user) + unless allowed_for_local_cluster(auth_user, Permissions::WRITE) + return 403, 'Permission denied' + end + config = param[:config] + unless config + return [400, 'Parameter "config" required'] + end + + file = nil + begin + file = File.open(PCS_CONFIG, 'w') + file.flock(File::LOCK_EX) + file.write(config) + rescue => e + return pcsd_error("Unable to save PCS configuration: #{e}") + ensure + if file + file.flock(File::LOCK_UN) + file.close() + end + end + return pcsd_success('PCS configuration saved.') +end + +def get_pcs_config(param, request, auth_user) + unless allowed_for_local_cluster(auth_user, Permissions::READ) + return 403, 'Permission denied' + end + out = [] + file = nil + begin + file = File.open(PCS_CONFIG, 'r') + file.flock(File::LOCK_SH) + out = file.readlines() + rescue => e + return pcsd_error("Unable to get PCS configuration: #{e}") + ensure + if file + file.flock(File::LOCK_UN) + file.close() + end + end + return [200, out.join('')] +end diff --git a/pcsd/settings.rb.in b/pcsd/settings.rb.in index a13be8994..de5b0270f 100644 --- a/pcsd/settings.rb.in +++ b/pcsd/settings.rb.in @@ -35,6 +35,8 @@ SBD_CONFIG = '@SBDCONFDIR@/sbd' BOOTH_CONFIG_DIR='@BOOTHCONFDIR@' +PCS_CONFIG = '@CONF_DIR@/pcs' + SUPERUSER = '@PCMK_USER@' ADMIN_GROUP = '@PCMK_GROUP@'