From 80a586a879ab454142260443c3fbb521996b2218 Mon Sep 17 00:00:00 2001 From: Kavitha Ramalingam Date: Mon, 21 Oct 2024 21:13:46 +0530 Subject: [PATCH 1/3] GNOI Cold Reboot --- dockers/docker-framework/Dockerfile.j2 | 34 ++++++++++++++ dockers/docker-framework/framework.sh | 4 ++ dockers/docker-framework/start.sh | 5 +++ dockers/docker-framework/supervisord.conf | 54 +++++++++++++++++++++++ 4 files changed, 97 insertions(+) create mode 100644 dockers/docker-framework/Dockerfile.j2 create mode 100755 dockers/docker-framework/framework.sh create mode 100755 dockers/docker-framework/start.sh create mode 100644 dockers/docker-framework/supervisord.conf diff --git a/dockers/docker-framework/Dockerfile.j2 b/dockers/docker-framework/Dockerfile.j2 new file mode 100644 index 000000000000..c9c71d402e84 --- /dev/null +++ b/dockers/docker-framework/Dockerfile.j2 @@ -0,0 +1,34 @@ +{% from "dockers/dockerfile-macros.j2" import install_debian_packages, install_python_wheels, copy_files %} +FROM docker-config-engine-bullseye-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}} + +ARG docker_container_name +RUN [ -f /etc/rsyslog.conf ] && sed -ri "s/%syslogtag%/$docker_container_name#%syslogtag%/;" /etc/rsyslog.conf + +## Make apt-get non-interactive +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -f -y \ + libdbus-1-3 \ + libdbus-c++-1-0v5 + +{% if docker_framework_debs.strip() -%} +# Copy locally-built Debian package dependencies +{{ copy_files("debs/", docker_framework_debs.split(' '), "/debs/") }} + +# Install locally-built Debian packages and implicitly install their dependencies +{{ install_debian_packages(docker_framework_debs.split(' ')) }} +{%- endif %} + +RUN apt-get clean -y && \ + apt-get autoclean - && \ + apt-get autoremove -y && \ + rm -rf /debs /var/lib/apt/lists/* /tmp/* ~/.cache/ + +COPY ["start.sh", "/usr/bin/"] +COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +# COPY ["git_commits", "/usr"] + + +ENTRYPOINT ["/usr/local/bin/supervisord"] diff --git a/dockers/docker-framework/framework.sh b/dockers/docker-framework/framework.sh new file mode 100755 index 000000000000..68e72417fd2a --- /dev/null +++ b/dockers/docker-framework/framework.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + + +exec /usr/local/bin/framework --logtostderr diff --git a/dockers/docker-framework/start.sh b/dockers/docker-framework/start.sh new file mode 100755 index 000000000000..1235a4e5a671 --- /dev/null +++ b/dockers/docker-framework/start.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + + +mkdir -p /var/sonic +echo "# Config files managed by sonic-config-engine" > /var/sonic/config_status diff --git a/dockers/docker-framework/supervisord.conf b/dockers/docker-framework/supervisord.conf new file mode 100644 index 000000000000..05ca0f4bc272 --- /dev/null +++ b/dockers/docker-framework/supervisord.conf @@ -0,0 +1,54 @@ +[supervisord] +logfile_maxbytes=1MB +logfile_backups=2 +loglevel=warn +nodaemon=true + +[eventlistener:dependent-startup] +command=python3 -m supervisord_dependent_startup --log-level warn +autostart=true +autorestart=unexpected +stdout_logfile=syslog +stderr_logfile=syslog +startretries=0 +exitcodes=0,3 +events=PROCESS_STATE +buffer_size=50 + +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener --container-name framework +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected +stdout_logfile=syslog +stderr_logfile=syslog + +[program:rsyslogd] +command=/usr/sbin/rsyslogd -n -iNONE +priority=1 +autostart=false +autorestart=unexpected +stdout_logfile=syslog +stderr_logfile=syslog +dependent_startup=true + +[program:start] +command=/usr/bin/start.sh +priority=2 +autostart=false +autorestart=false +startsecs=0 +stdout_logfile=syslog +stderr_logfile=syslog +dependent_startup=true +dependent_startup_wait_for=rsyslogd:running + +[program:rebootbackend] +command=/usr/bin/rebootbackend +priority=3 +autostart=false +autorestart=true +stdout_logfile=syslog +stderr_logfile=syslog +dependent_startup=true +dependent_startup_wait_for=start:exited From 68416e9f54360f6ed1caa8ddfd7f72e59224cf0b Mon Sep 17 00:00:00 2001 From: Kavitha Ramalingam Date: Tue, 22 Oct 2024 19:43:49 +0530 Subject: [PATCH 2/3] Integrated Rebootbackend changes --- .../rebootbackend/gnoi_reboot.xml | 30 ++ .../rebootbackend/gnoi_reboot_dbus.h | 65 +++++ .../rebootbackend/init_thread.h | 136 +++++++++ .../rebootbackend/interfaces.cpp | 72 +++++ .../rebootbackend/interfaces.h | 39 +++ .../rebootbackend/reboot_common.h | 15 + .../rebootbackend/reboot_interfaces.h | 20 ++ .../rebootbackend/reboot_thread.cpp | 262 +++++++++++++++++ .../rebootbackend/reboot_thread.h | 217 ++++++++++++++ .../rebootbackend/rebootbackend.cpp | 10 + .../rebootbackend/rebootbe.cpp | 269 ++++++++++++++++++ src/sonic-framework/rebootbackend/rebootbe.h | 94 ++++++ 12 files changed, 1229 insertions(+) create mode 100644 src/sonic-framework/rebootbackend/gnoi_reboot.xml create mode 100644 src/sonic-framework/rebootbackend/gnoi_reboot_dbus.h create mode 100644 src/sonic-framework/rebootbackend/init_thread.h create mode 100644 src/sonic-framework/rebootbackend/interfaces.cpp create mode 100644 src/sonic-framework/rebootbackend/interfaces.h create mode 100644 src/sonic-framework/rebootbackend/reboot_common.h create mode 100644 src/sonic-framework/rebootbackend/reboot_interfaces.h create mode 100644 src/sonic-framework/rebootbackend/reboot_thread.cpp create mode 100644 src/sonic-framework/rebootbackend/reboot_thread.h create mode 100644 src/sonic-framework/rebootbackend/rebootbackend.cpp create mode 100644 src/sonic-framework/rebootbackend/rebootbe.cpp create mode 100644 src/sonic-framework/rebootbackend/rebootbe.h diff --git a/src/sonic-framework/rebootbackend/gnoi_reboot.xml b/src/sonic-framework/rebootbackend/gnoi_reboot.xml new file mode 100644 index 000000000000..63af1963db4b --- /dev/null +++ b/src/sonic-framework/rebootbackend/gnoi_reboot.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/src/sonic-framework/rebootbackend/gnoi_reboot_dbus.h b/src/sonic-framework/rebootbackend/gnoi_reboot_dbus.h new file mode 100644 index 000000000000..c0fbfe6a897a --- /dev/null +++ b/src/sonic-framework/rebootbackend/gnoi_reboot_dbus.h @@ -0,0 +1,65 @@ + +/* + * This file was automatically generated by dbusxx-xml2cpp; DO NOT EDIT! + */ + +#ifndef __dbusxx__rebootbackend_gnoi_reboot_dbus_h__PROXY_MARSHAL_H +#define __dbusxx__rebootbackend_gnoi_reboot_dbus_h__PROXY_MARSHAL_H + +#include + +#include + +namespace org { +namespace SONiC { +namespace HostService { + +class gnoi_reboot_proxy : public ::DBus::InterfaceProxy { + public: + gnoi_reboot_proxy() + : ::DBus::InterfaceProxy("org.SONiC.HostService.gnoi_reboot") {} + + public: + /* properties exported by this interface */ + public: + /* methods exported by this interface, + * this functions will invoke the corresponding methods on the remote objects + */ + void issue_reboot(const std::vector& options, int32_t& argout0, + std::string& argout1) { + ::DBus::CallMessage call; + ::DBus::MessageIter wi = call.writer(); + + wi << options; + call.member("issue_reboot"); + ::DBus::Message ret = invoke_method(call); + ::DBus::MessageIter ri = ret.reader(); + + ri >> argout0; + ri >> argout1; + } + + void get_reboot_status(int32_t& argout0, std::string& argout1) { + ::DBus::CallMessage call; + call.member("get_reboot_status"); + ::DBus::Message ret = invoke_method(call); + ::DBus::MessageIter ri = ret.reader(); + + ri >> argout0; + ri >> argout1; + } + + public: + /* signal handlers for this interface + */ + + private: + /* unmarshalers (to unpack the DBus message before calling the actual signal + * handler) + */ +}; + +} // namespace HostService +} // namespace SONiC +} // namespace org +#endif //__dbusxx__rebootbackend_gnoi_reboot_dbus_h__PROXY_MARSHAL_H diff --git a/src/sonic-framework/rebootbackend/init_thread.h b/src/sonic-framework/rebootbackend/init_thread.h new file mode 100644 index 000000000000..50c27d6c8c34 --- /dev/null +++ b/src/sonic-framework/rebootbackend/init_thread.h @@ -0,0 +1,136 @@ +#pragma once + +#include +#include +#include +#include + +#include "dbconnector.h" +#include "notificationproducer.h" +#include "reboot_common.h" +#include "reboot_interfaces.h" +#include "redis_utils.h" +#include "select.h" +#include "selectableevent.h" +#include "selectabletimer.h" +#include "subscriberstatetable.h" +#include "system/system.pb.h" + +namespace rebootbackend { + +// Holds a thread safe representation of the InitThread internal state. +// Thread-safe: the expectation is one thread will write and multiple threads +// will read. +class InitThreadStatus { + public: + enum ThreadStatus { + NOT_STARTED = 0, + PENDING = 1, + WAITING_FOR_REGISTRATION = 2, + WAITING_FOR_RECONCILIATION = 3, + WAITING_FOR_STATE_VERIFICATION = 4, + WAITING_FOR_UNFREEZE = 5, + FINALIZE = 6, + DONE = 7, + ERROR = 8, + }; + + enum ErrorCondition { + NO_ERROR = 0, + UNKNOWN = 1, + INTERNAL_ERROR = 2, + REGISTRATION_FAILED = 3, + RECONCILIATION_FAILED = 4, + STATE_VERIFICATION_FAILED = 5, + UNFREEZE_FAILED = 6, + DETECTED_CRITICAL_STATE = 7, + }; + + struct DetailedStatus { + gnoi::system::RebootStatusResponse thread_state; + InitThreadStatus::ThreadStatus detailed_thread_status = + InitThreadStatus::ThreadStatus::NOT_STARTED; + InitThreadStatus::ErrorCondition detailed_thread_error_condition = + InitThreadStatus::ErrorCondition::NO_ERROR; + }; + + InitThreadStatus() { + m_status.detailed_thread_status = ThreadStatus::NOT_STARTED; + m_status.detailed_thread_error_condition = ErrorCondition::NO_ERROR; + + m_status.thread_state.set_active(false); + m_status.thread_state.set_method(gnoi::system::RebootMethod::COLD); + m_status.thread_state.mutable_status()->set_status( + gnoi::system::RebootStatus_Status::RebootStatus_Status_STATUS_SUCCESS); + m_status.thread_state.mutable_status()->set_message(""); + } + + void set_start_status() { + const std::lock_guard lock(m_mutex); + m_status.detailed_thread_status = ThreadStatus::PENDING; + m_status.detailed_thread_error_condition = ErrorCondition::NO_ERROR; + + m_status.thread_state.set_active(true); + m_status.thread_state.set_method(gnoi::system::RebootMethod::NSF); + m_status.thread_state.mutable_status()->set_status( + gnoi::system::RebootStatus_Status::RebootStatus_Status_STATUS_UNKNOWN); + m_status.thread_state.mutable_status()->set_message(""); + } + + bool get_active(void) { + const std::lock_guard lock(m_mutex); + return m_status.thread_state.active(); + } + + void set_detailed_thread_status(ThreadStatus new_status) { + const std::lock_guard lock(m_mutex); + if (m_status.thread_state.active()) { + m_status.detailed_thread_status = new_status; + } + } + + void set_success() { + const std::lock_guard lock(m_mutex); + if (m_status.thread_state.active()) { + m_status.detailed_thread_status = ThreadStatus::DONE; + m_status.thread_state.mutable_status()->set_status( + gnoi::system::RebootStatus_Status:: + RebootStatus_Status_STATUS_SUCCESS); + } + } + + void set_error(ErrorCondition error_condition, + const std::string &error_message) { + const std::lock_guard lock(m_mutex); + if (m_status.thread_state.active()) { + m_status.detailed_thread_status = ThreadStatus::ERROR; + m_status.detailed_thread_error_condition = error_condition; + m_status.thread_state.mutable_status()->set_status( + gnoi::system::RebootStatus_Status:: + RebootStatus_Status_STATUS_FAILURE); + m_status.thread_state.mutable_status()->set_message(error_message); + } + } + + void set_inactive() { + const std::lock_guard lock(m_mutex); + m_status.thread_state.set_active(false); + } + + DetailedStatus get_detailed_thread_status() { + const std::lock_guard lock(m_mutex); + return m_status; + } + + gnoi::system::RebootStatusResponse get_response() { + const std::lock_guard lock(m_mutex); + return m_status.thread_state; + } + + private: + std::mutex m_mutex; + DetailedStatus m_status; +}; + + +} // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/interfaces.cpp b/src/sonic-framework/rebootbackend/interfaces.cpp new file mode 100644 index 000000000000..b74ecb99079e --- /dev/null +++ b/src/sonic-framework/rebootbackend/interfaces.cpp @@ -0,0 +1,72 @@ +#include "interfaces.h" + +#include // DBus + +#include "reboot_interfaces.h" + +constexpr char kRebootBusName[] = "org.SONiC.HostService.gnoi_reboot"; +constexpr char kRebootPath[] = "/org/SONiC/HostService/gnoi_reboot"; + +constexpr char kContainerShutdownBusName[] = + "org.SONiC.HostService.gnoi_container_shutdown"; +constexpr char kContainerShutdownPath[] = + "/org/SONiC/HostService/gnoi_container_shutdown"; + +// DBus::BusDispatcher dispatcher; +DBus::Connection& HostServiceDbus::getConnection(void) { + static DBus::Connection* connPtr = nullptr; + if (connPtr == nullptr) { + static DBus::BusDispatcher dispatcher; + DBus::default_dispatcher = &dispatcher; + + static DBus::Connection conn = DBus::Connection::SystemBus(); + connPtr = &conn; + } + return *connPtr; +} + +DbusInterface::DbusResponse HostServiceDbus::Reboot( + const std::string& jsonRebootRequest) { + int32_t status; + + GnoiDbusReboot reboot_client(getConnection(), kRebootBusName, kRebootPath); + std::string retString; + std::vector options; + options.push_back(jsonRebootRequest); + try { + reboot_client.issue_reboot(options, status, retString); + } catch (DBus::Error& ex) { + return DbusResponse{ + DbusStatus::DBUS_FAIL, + "HostServiceDbus::Reboot: failed to call reboot host service"}; + } + + // gnoi_reboot.py returns 0 for success, 1 for failure + if (status == 0) { + // Successful reboot response is an empty string. + return DbusResponse{DbusStatus::DBUS_SUCCESS, ""}; + } + return DbusResponse{DbusStatus::DBUS_FAIL, retString}; +} + +DbusInterface::DbusResponse HostServiceDbus::RebootStatus( + const std::string& jsonStatusRequest) { + GnoiDbusReboot reboot_client(getConnection(), kRebootBusName, kRebootPath); + int32_t status; + std::string retString; + + try { + reboot_client.get_reboot_status(status, retString); + } catch (DBus::Error& ex) { + return DbusResponse{ + DbusStatus::DBUS_FAIL, + "HostServiceDbus::RebootStatus: failed to call reboot status " + "host service"}; + } + + // gnoi_reboot.py returns 0 for success, 1 for failure + if (status == 0) { + return DbusResponse{DbusStatus::DBUS_SUCCESS, retString}; + } + return DbusResponse{DbusStatus::DBUS_FAIL, retString}; +} diff --git a/src/sonic-framework/rebootbackend/interfaces.h b/src/sonic-framework/rebootbackend/interfaces.h new file mode 100644 index 000000000000..10382af1483f --- /dev/null +++ b/src/sonic-framework/rebootbackend/interfaces.h @@ -0,0 +1,39 @@ +#pragma once +#include + +#include + +#include "gnoi_reboot_dbus.h" // auto generated gnoi_reboot_proxy +#include "reboot_interfaces.h" + +/* Reboot is a request to the reboot sonic host service to request a reboot +from the platform. This takes as an argument a string based json formatted +Reboot request from +system.proto.’https://github.com/openconfig/gnoi/blob/73a1e7675c5f963e7810bd3828203f2758eb47e8/system/system.proto#L107 +*/ + +class GnoiDbusReboot : public org::SONiC::HostService::gnoi_reboot_proxy, + public DBus::IntrospectableProxy, + public DBus::ObjectProxy { + public: + GnoiDbusReboot(DBus::Connection& connection, const char* dbus_bus_name_p, + const char* dbus_obj_name_p) + : DBus::ObjectProxy(connection, dbus_obj_name_p, dbus_bus_name_p) {} +}; + +/* DbusResponse consists of STATUS: success/fail: i.e. was the dbus request +successful DbusResponse.json_string: string based json formatted RebootResponse +defined here: +https://github.com/openconfig/gnoi/blob/73a1e7675c5f963e7810bd3828203f2758eb47e8/system/system.proto#L119 +*/ + +class HostServiceDbus : public DbusInterface { + public: + DbusInterface::DbusResponse Reboot( + const std::string& json_reboot_request) override; + DbusInterface::DbusResponse RebootStatus( + const std::string& json_status_request) override; + + private: + static DBus::Connection& getConnection(void); +}; diff --git a/src/sonic-framework/rebootbackend/reboot_common.h b/src/sonic-framework/rebootbackend/reboot_common.h new file mode 100644 index 000000000000..b2f6848c43f4 --- /dev/null +++ b/src/sonic-framework/rebootbackend/reboot_common.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +#include "status_code_util.h" + +namespace rebootbackend { + +extern bool sigterm_requested; +struct NotificationResponse { + swss::StatusCode status; + std::string json_string; +}; + +} // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/reboot_interfaces.h b/src/sonic-framework/rebootbackend/reboot_interfaces.h new file mode 100644 index 000000000000..599652fa5347 --- /dev/null +++ b/src/sonic-framework/rebootbackend/reboot_interfaces.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +class DbusInterface { + public: + enum class DbusStatus { + DBUS_SUCCESS, + DBUS_FAIL, + }; + + struct DbusResponse { + DbusStatus status; + std::string json_string; + }; + + virtual ~DbusInterface() = default; + virtual DbusResponse Reboot(const std::string& jsonRebootRequest) = 0; + virtual DbusResponse RebootStatus(const std::string& jsonStatusRequest) = 0; +}; diff --git a/src/sonic-framework/rebootbackend/reboot_thread.cpp b/src/sonic-framework/rebootbackend/reboot_thread.cpp new file mode 100644 index 000000000000..81c43327a467 --- /dev/null +++ b/src/sonic-framework/rebootbackend/reboot_thread.cpp @@ -0,0 +1,262 @@ +#include "reboot_thread.h" + +#include + +#include + +#include "dbconnector.h" +#include "logger.h" +#include "notificationproducer.h" +#include "reboot_common.h" +#include "reboot_interfaces.h" +#include "select.h" +#include "selectableevent.h" +#include "selectabletimer.h" +#include "subscriberstatetable.h" +#include "system/system.pb.h" +#include "timestamp.h" + +namespace rebootbackend { + +using namespace ::gnoi::system; +using steady_clock = std::chrono::steady_clock; +using Progress = ::rebootbackend::RebootThread::Progress; +namespace gpu = ::google::protobuf::util; + +bool sigterm_requested = false; + +RebootThread::RebootThread(DbusInterface &dbus_interface, + swss::SelectableEvent &m_finished) + : m_db("STATE_DB", 0), + m_finished(m_finished), + m_dbus_interface(dbus_interface) {} + +void RebootThread::Stop(void) { + SWSS_LOG_ENTER(); + // Notify reboot thread that stop has been requested. + m_stop.notify(); +} + +bool RebootThread::Join(void) { + SWSS_LOG_ENTER(); + + if (!m_thread.joinable()) { + SWSS_LOG_ERROR("RebootThread::Join called, but not joinable"); + return false; + } + + try { + m_thread.join(); + m_status.set_inactive(); + return true; + } catch (const std::system_error &e) { + SWSS_LOG_ERROR("Exception calling join: %s", e.what()); + return false; + } +} + +RebootStatusResponse RebootThread::GetResponse(void) { + return m_status.get_response(); +} + +bool RebootThread::HasRun() { return m_status.get_reboot_count() > 0; } + +Progress RebootThread::platform_reboot_select(swss::Select &s, + swss::SelectableTimer &l_timer) { + SWSS_LOG_ENTER(); + + while (true) { + swss::Selectable *sel; + int select_ret; + select_ret = s.select(&sel); + + if (select_ret == swss::Select::ERROR) { + SWSS_LOG_NOTICE("Error: %s!", strerror(errno)); + } else if (select_ret == swss::Select::OBJECT) { + if (sel == &m_stop) { + // SIGTERM expected after platform reboot request + SWSS_LOG_NOTICE( + "m_stop rx'd (SIGTERM) while waiting for platform reboot"); + return Progress::EXIT_EARLY; + } else if (sel == &l_timer) { + return Progress::PROCEED; + } + } + } +} + +Progress RebootThread::wait_for_platform_reboot(swss::Select &s) { + SWSS_LOG_ENTER(); + + // Sleep for a long time: 260 seconds. + // During this time platform should kill us as part of reboot. + swss::SelectableTimer l_timer( + timespec{.tv_sec = m_reboot_timeout, .tv_nsec = 0}); + s.addSelectable(&l_timer); + + l_timer.start(); + + Progress progress = platform_reboot_select(s, l_timer); + + l_timer.stop(); + s.removeSelectable(&l_timer); + return progress; +} + +void RebootThread::do_reboot(void) { + SWSS_LOG_ENTER(); + + swss::Select s; + s.addSelectable(&m_stop); + + // Check if stop was requested before Selectable was setup + if (sigterm_requested) { + SWSS_LOG_ERROR("sigterm_requested was raised, exiting"); + return; + } + + if (m_request.method() == RebootMethod::COLD) { + do_cold_reboot(s); + } else { + // This shouldn't be possible. Reference check_start_preconditions() + SWSS_LOG_ERROR("Received unrecognized method type = %s", + RebootMethod_Name(m_request.method()).c_str()); + } +} + +RebootThread::Progress RebootThread::send_dbus_reboot_request() { + SWSS_LOG_ENTER(); + SWSS_LOG_NOTICE("Sending reboot request to platform"); + + std::string json_string; + gpu::Status status = gpu::MessageToJsonString(m_request, &json_string); + if (!status.ok()) { + std::string error_string = "unable to convert reboot protobuf to json: " + + status.message().as_string(); + log_error_and_set_non_retry_failure(error_string); + return Progress::EXIT_EARLY; + } + + // Send the reboot request to the reboot host service via dbus. + DbusInterface::DbusResponse dbus_response = + m_dbus_interface.Reboot(json_string); + + if (dbus_response.status == DbusInterface::DbusStatus::DBUS_FAIL) { + log_error_and_set_non_retry_failure(dbus_response.json_string); + return Progress::EXIT_EARLY; + } + return Progress::PROCEED; +} + +void RebootThread::do_cold_reboot(swss::Select &s) { + SWSS_LOG_ENTER(); + SWSS_LOG_NOTICE("Sending cold reboot request to platform"); + if (send_dbus_reboot_request() == Progress::EXIT_EARLY) { + return; + } + + // Wait for platform to reboot. If we return, reboot failed. + if (wait_for_platform_reboot(s) == Progress::EXIT_EARLY) { + return; + } + + // We shouldn't be here. Platform reboot should've killed us. + log_error_and_set_non_retry_failure("platform failed to reboot"); + + return; +} + +void RebootThread::reboot_thread(void) { + SWSS_LOG_ENTER(); + + do_reboot(); + + // Notify calling thread that reboot thread has exited. + // Calling thread will call Join(): join and set thread status to inactive. + m_finished.notify(); +} + +bool RebootThread::check_start_preconditions(const RebootRequest &request, + NotificationResponse &response) { + // We have to join a previous executing thread before restarting. + // Active is cleared in Join. + if (m_status.get_active()) { + response.json_string = "RebootThread: can't Start while active"; + response.status = swss::StatusCode::SWSS_RC_IN_USE; + } else if (request.method() != RebootMethod::COLD && + request.method() != RebootMethod::WARM) { + response.json_string = "RebootThread: Start rx'd unsupported method"; + response.status = swss::StatusCode::SWSS_RC_INVALID_PARAM; + } else if (request.delay() != 0) { + response.json_string = "RebootThread: delayed start not supported"; + response.status = swss::StatusCode::SWSS_RC_INVALID_PARAM; + } + + if (response.status == swss::StatusCode::SWSS_RC_SUCCESS) { + return true; + } + + SWSS_LOG_ERROR("%s", response.json_string.c_str()); + // Log the reboot request contents. + gpu::Status status; + std::string json_request; + status = gpu::MessageToJsonString(request, &json_request); + if (status.ok()) { + SWSS_LOG_ERROR("check_start_preconditions: RebootRequest = %s", + json_request.c_str()); + } else { + SWSS_LOG_ERROR( + "check_start_preconditions: error calling MessageToJsonString"); + } + return false; +} + +NotificationResponse RebootThread::Start(const RebootRequest &request) { + SWSS_LOG_ENTER(); + + NotificationResponse response = {.status = swss::StatusCode::SWSS_RC_SUCCESS, + .json_string = ""}; + + // Confirm we're not running, method is supported and we're not delayed. + if (!check_start_preconditions(request, response)) { + // Errors logged in check_start_preconditions. + return response; + } + + m_request = request; + + // From this point errors will be reported via RebootStatusRequest. + m_status.set_start_status(request.method(), request.message()); + + try { + m_thread = std::thread(&RebootThread::reboot_thread, this); + } catch (const std::system_error &e) { + std::string error_string = "Exception launching reboot thread: "; + error_string += e.what(); + log_error_and_set_failure_as_retriable(error_string); + + // Notify calling thread that thread has finished. + // Calling thread MUST call Join, which will join and clear active bit. + m_finished.notify(); + } + return response; +} + +void RebootThread::log_error_and_set_non_retry_failure( + const std::string error_string) { + SWSS_LOG_ENTER(); + SWSS_LOG_ERROR("%s", error_string.c_str()); + m_status.set_completed_status( + RebootStatus_Status::RebootStatus_Status_STATUS_FAILURE, error_string); +} + +void RebootThread::log_error_and_set_failure_as_retriable( + const std::string error_string) { + SWSS_LOG_ENTER(); + SWSS_LOG_ERROR("%s", error_string.c_str()); + m_status.set_completed_status( + RebootStatus_Status::RebootStatus_Status_STATUS_RETRIABLE_FAILURE, + error_string); +} + +} // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/reboot_thread.h b/src/sonic-framework/rebootbackend/reboot_thread.h new file mode 100644 index 000000000000..46fded6bc38e --- /dev/null +++ b/src/sonic-framework/rebootbackend/reboot_thread.h @@ -0,0 +1,217 @@ +#pragma once + +#include +#include +#include + +#include "dbconnector.h" +#include "notificationproducer.h" +#include "reboot_common.h" +#include "reboot_interfaces.h" +#include "select.h" +#include "selectableevent.h" +#include "selectabletimer.h" +#include "subscriberstatetable.h" +#include "system/system.pb.h" + +namespace rebootbackend { + +// Hold/manage the contents of a RebootStatusResponse as defined +// in system.proto +// Thread-safe: expectation is one thread will write and multiple +// threads can read. +class ThreadStatus { + public: + ThreadStatus() { + m_proto_status.set_active(false); + + // Reason for reboot as specified in message from a RebootRequest. + // This is "message" in RebootRequest. + m_proto_status.set_reason(""); + + // Number of reboots since active. + m_proto_status.set_count(0); + + // RebootMethod is type of of reboot: cold, warm, fast from a + // RebootRequest + m_proto_status.set_method(gnoi::system::RebootMethod::UNKNOWN); + + // Status can be UNKNOWN, SUCCESS, RETRIABLE_FAILURE or FAILURE. + m_proto_status.mutable_status()->set_status( + gnoi::system::RebootStatus_Status::RebootStatus_Status_STATUS_UNKNOWN); + + // In the event of error: message is human readable error explanation. + m_proto_status.mutable_status()->set_message(""); + } + + void set_start_status(const gnoi::system::RebootMethod &method, + const std::string &reason) { + m_mutex.lock(); + + m_proto_status.set_active(true); + m_proto_status.set_reason(reason); + m_proto_status.set_count(m_proto_status.count() + 1); + m_proto_status.set_method(method); + m_proto_status.mutable_status()->set_status( + gnoi::system::RebootStatus_Status::RebootStatus_Status_STATUS_UNKNOWN); + m_proto_status.mutable_status()->set_message(""); + + // set when to time reboot starts + std::chrono::nanoseconds ns = + std::chrono::system_clock::now().time_since_epoch(); + m_proto_status.set_when(ns.count()); + + m_mutex.unlock(); + } + + bool get_active(void) { + m_mutex.lock(); + bool ret = m_proto_status.active(); + m_mutex.unlock(); + return ret; + } + + void set_completed_status(const gnoi::system::RebootStatus_Status &status, + const std::string &message) { + m_mutex.lock(); + + // Status should only be updated while reboot is active + if (m_proto_status.active()) { + m_proto_status.mutable_status()->set_status(status); + m_proto_status.mutable_status()->set_message(message); + } + + m_mutex.unlock(); + } + + void set_inactive(void) { + m_mutex.lock(); + m_proto_status.set_active(false); + m_mutex.unlock(); + } + + int get_reboot_count() { + const std::lock_guard lock(m_mutex); + return m_proto_status.count(); + } + + gnoi::system::RebootStatus_Status get_last_reboot_status(void) { + gnoi::system::RebootStatusResponse response = get_response(); + return response.status().status(); + } + + gnoi::system::RebootStatusResponse get_response(void) { + m_mutex.lock(); + // make a copy + gnoi::system::RebootStatusResponse lstatus = m_proto_status; + m_mutex.unlock(); + + if (lstatus.active()) { + // RebootStatus isn't applicable if we're active + lstatus.mutable_status()->set_status( + gnoi::system::RebootStatus_Status:: + RebootStatus_Status_STATUS_UNKNOWN); + lstatus.mutable_status()->set_message(""); + } else { + // When is only valid while we're active (since delayed + // start isn't supported). Value is set when reboot begins. + lstatus.set_when(0); + } + + return lstatus; + } + + private: + std::mutex m_mutex; + gnoi::system::RebootStatusResponse m_proto_status; +}; + +// RebootThread performs reboot actions leading up to a platform +// request to reboot. +// thread-compatible: expectation is Stop, Start and Join will be +// called from the same thread. +class RebootThread { + public: + enum class Status { SUCCESS, FAILURE, KEEP_WAITING }; + enum class Progress { PROCEED, EXIT_EARLY }; + + // interface: dbus reboot host service access + // m_finished: let launching task know thread has finished + RebootThread(DbusInterface &dbus_interface, + swss::SelectableEvent &m_finished); + + NotificationResponse Start(const gnoi::system::RebootRequest &request); + + // Request thread stop/exit. Only used when platform is shutting down + // all containers/processes. + void Stop(void); + + // Called by launching task after notification sent to m_finished. + bool Join(void); + + // Return Status of last reboot attempt + gnoi::system::RebootStatusResponse GetResponse(); + + // Returns true if the RebootThread has been started since the last reboot, + // and false otherwise. + bool HasRun(); + + private: + void reboot_thread(void); + void do_reboot(void); + Progress send_dbus_reboot_request(); + void do_cold_reboot(swss::Select &s); + + // Inner loop select handler to wait for platform reboot. + // wait for timeout + // wait for a stop request (sigterm) + // Returns: + // EXIT_EARLY: an issue occurred that stops WARM + // PROCEED: if reboot timeout expired + Progress platform_reboot_select(swss::Select &s, + swss::SelectableTimer &l_timer); + + // Wait for platform to reboot while waiting for possible stop + // Returns: + // EXIT_EARLY: an issue occurred that stops WARM + // PROCEED: if reboot timeout expired + Progress wait_for_platform_reboot(swss::Select &s); + + // Log error string, set status to RebootStatus_Status_STATUS_FAILURE + // Set status message to error_string. + void log_error_and_set_non_retry_failure(const std::string error_string); + + // Log error string, set status to + // RebootStatus_Status_STATUS_RETRIABLE_FAILURE Set status message to + // error_string. + void log_error_and_set_failure_as_retriable(const std::string error_string); + + // Request is input only. + // Response is ouput only. + // Return true if preconditions met, false otherwise. + bool check_start_preconditions(const gnoi::system::RebootRequest &request, + NotificationResponse &response); + std::thread m_thread; + + // Signal m_finished to let main thread know weve completed. + // Main thread should call Join. + swss::SelectableEvent &m_finished; + + // m_stop signalled by main thread on sigterm: cleanup and exit. + swss::SelectableEvent m_stop; + DbusInterface &m_dbus_interface; + swss::DBConnector m_db; + ThreadStatus m_status; + gnoi::system::RebootRequest m_request; + + // Wait for system to reboot: allow unit test to shorten. + // TODO: there is a plan to make these timer values + // available in CONFIG_DB + static constexpr uint32_t kRebootTime = 260; + long m_reboot_timeout = kRebootTime; + + friend class RebootBETestWithoutStop; + friend class RebootThreadTest; +}; + +} // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/rebootbackend.cpp b/src/sonic-framework/rebootbackend/rebootbackend.cpp new file mode 100644 index 000000000000..d0962f63869b --- /dev/null +++ b/src/sonic-framework/rebootbackend/rebootbackend.cpp @@ -0,0 +1,10 @@ +#include "interfaces.h" +#include "reboot_interfaces.h" +#include "rebootbe.h" + +int main(int argc, char** argv) { + HostServiceDbus dbus_interface; + ::rebootbackend::RebootBE rebootbe(dbus_interface); + rebootbe.Start(); + return 0; +} diff --git a/src/sonic-framework/rebootbackend/rebootbe.cpp b/src/sonic-framework/rebootbackend/rebootbe.cpp new file mode 100644 index 000000000000..ce607036c4d7 --- /dev/null +++ b/src/sonic-framework/rebootbackend/rebootbe.cpp @@ -0,0 +1,269 @@ +#include "rebootbe.h" + +#include +#include + +#include +#include +#include + +#include "logger.h" +#include "notificationconsumer.h" +#include "notificationproducer.h" +#include "reboot_common.h" +#include "reboot_interfaces.h" +#include "select.h" +#include "status_code_util.h" + +namespace rebootbackend { + +namespace gpu = ::google::protobuf::util; + +RebootBE::RebootBE(DbusInterface &dbus_interface) + : m_db("STATE_DB", 0), + m_RebootResponse(&m_db, REBOOT_RESPONSE_NOTIFICATION_CHANNEL), + m_NotificationConsumer(&m_db, REBOOT_REQUEST_NOTIFICATION_CHANNEL), + m_dbus(dbus_interface), + m_RebootThread(dbus_interface, m_RebootThreadFinished) { + swss::Logger::linkToDbNative("rebootbackend"); +} + +RebootBE::RebManagerStatus RebootBE::GetCurrentStatus() { + const std::lock_guard lock(m_StatusMutex); + return m_CurrentStatus; +} + +void RebootBE::SetCurrentStatus(RebManagerStatus newStatus) { + const std::lock_guard lock(m_StatusMutex); + m_CurrentStatus = newStatus; +} + +void RebootBE::Start() { + SWSS_LOG_ENTER(); + SWSS_LOG_NOTICE("--- Starting rebootbackend ---"); + + swss::Select s; + s.addSelectable(&m_NotificationConsumer); + s.addSelectable(&m_Done); + s.addSelectable(&m_RebootThreadFinished); + + SWSS_LOG_NOTICE("RebootBE entering operational loop"); + while (true) { + swss::Selectable *sel; + int ret; + + ret = s.select(&sel); + if (ret == swss::Select::ERROR) { + SWSS_LOG_NOTICE("Error: %s!", strerror(errno)); + } else if (ret == swss::Select::OBJECT) { + if (sel == &m_NotificationConsumer) { + DoTask(m_NotificationConsumer); + } else if (sel == &m_RebootThreadFinished) { + HandleRebootFinish(); + } else if (sel == &m_Done) { + HandleDone(); + break; + } + } + } + return; +} + +void RebootBE::Stop() { + SWSS_LOG_ENTER(); + m_Done.notify(); + return; +} + +bool RebootBE::RetrieveNotificationData( + swss::NotificationConsumer &consumer, + RebootBE::NotificationRequest &request) { + SWSS_LOG_ENTER(); + + request.op = ""; + request.retString = ""; + + std::string data; + std::vector values; + consumer.pop(request.op, data, values); + + for (auto &fv : values) { + if (DATA_TUPLE_KEY == fvField(fv)) { + request.retString = fvValue(fv); + return true; + } + } + return false; +} + +// Send a response on the Reboot_Response_Channel notification channel.. +// Key is one of: Reboot, RebootStatus, or CancelReboot +// code is swss::StatusCode, hopefully SWSS_RC_SUCCESS. +// message is json formatted RebootResponse, RebootStatusResponse +// or CancelRebootResponse as defined in system.proto +void RebootBE::SendNotificationResponse(const std::string key, + const swss::StatusCode code, + const std::string message) { + SWSS_LOG_ENTER(); + + std::vector ret_values; + ret_values.push_back(swss::FieldValueTuple(DATA_TUPLE_KEY, message)); + + m_RebootResponse.send(key, swss::statusCodeToStr(code), ret_values); +} + +NotificationResponse RebootBE::HandleRebootRequest( + const std::string &jsonRebootRequest) { + using namespace gpu; + + SWSS_LOG_ENTER(); + + // On success an emtpy string is returned. RebootResponse in system.proto + // is an empty proto. + NotificationResponse response = {.status = swss::StatusCode::SWSS_RC_SUCCESS, + .json_string = ""}; + + gnoi::system::RebootRequest request; + Status status = gpu::JsonStringToMessage(jsonRebootRequest, &request); + + if (!status.ok()) { + std::string error_string = + "unable to convert json to rebootRequest protobuf: " + + status.message().as_string(); + SWSS_LOG_ERROR("%s", error_string.c_str()); + SWSS_LOG_ERROR("json = |%s|", jsonRebootRequest.c_str()); + response.status = swss::StatusCode::SWSS_RC_INTERNAL, + response.json_string = error_string; + return response; + } + + if (!RebootAllowed(request.method())) { + response.status = swss::StatusCode::SWSS_RC_IN_USE; + response.json_string = + "Reboot not allowed at this time. Reboot or " + "post-warmboot in progress"; + SWSS_LOG_WARN("%s", response.json_string.c_str()); + return response; + } + + SWSS_LOG_NOTICE("Forwarding request to RebootThread: %s", + request.DebugString().c_str()); + response = m_RebootThread.Start(request); + if (response.status == swss::StatusCode::SWSS_RC_SUCCESS) { + if (request.method() == gnoi::system::RebootMethod::COLD) { + SetCurrentStatus(RebManagerStatus::COLD_REBOOT_IN_PROGRESS); + } else if (request.method() == gnoi::system::RebootMethod::WARM) { + SetCurrentStatus(RebManagerStatus::WARM_REBOOT_IN_PROGRESS); + } + } + return response; +} + +bool RebootBE::RebootAllowed(const gnoi::system::RebootMethod rebMethod) { + RebManagerStatus current_status = GetCurrentStatus(); + switch (current_status) { + case RebManagerStatus::COLD_REBOOT_IN_PROGRESS: + case RebManagerStatus::WARM_REBOOT_IN_PROGRESS: { + return false; + } + case RebManagerStatus::WARM_INIT_WAIT: { + return rebMethod == gnoi::system::RebootMethod::COLD; + } + case RebManagerStatus::IDLE: { + return true; + } + default: { + return true; + } + } +} + +NotificationResponse RebootBE::HandleStatusRequest( + const std::string &jsonStatusRequest) { + SWSS_LOG_ENTER(); + + gnoi::system::RebootStatusResponse reboot_response = + m_RebootThread.GetResponse(); + + std::string json_reboot_response_string; + google::protobuf::util::Status status = + gpu::MessageToJsonString(reboot_response, &json_reboot_response_string); + + NotificationResponse response; + if (status.ok()) { + response.status = swss::StatusCode::SWSS_RC_SUCCESS; + response.json_string = json_reboot_response_string; + } else { + std::string error_string = + "unable to convert reboot status response protobuf to json: " + + status.message().as_string(); + SWSS_LOG_ERROR("%s", error_string.c_str()); + response.status = swss::StatusCode::SWSS_RC_INTERNAL; + response.json_string = error_string; + } + + return response; +} + +NotificationResponse RebootBE::HandleCancelRequest( + const std::string &jsonCancelRequest) { + SWSS_LOG_ENTER(); + + NotificationResponse response; + + // CancelReboot isn't supported: not needed until/unless delayed support + // is added: return unimplemented. + response.status = swss::StatusCode::SWSS_RC_UNIMPLEMENTED; + response.json_string = "Cancel reboot isn't supported"; + SWSS_LOG_WARN("%s", response.json_string.c_str()); + return response; +} + +void RebootBE::DoTask(swss::NotificationConsumer &consumer) { + SWSS_LOG_ENTER(); + + NotificationResponse response; + RebootBE::NotificationRequest request; + + if (!RetrieveNotificationData(consumer, request)) { + // Response is simple string (not json) on error. + response.json_string = + "MESSAGE not present in reboot notification request message, op = " + + request.op; + SWSS_LOG_ERROR("%s", response.json_string.c_str()); + response.status = swss::StatusCode::SWSS_RC_INVALID_PARAM; + } else if (request.op == REBOOT_KEY) { + response = HandleRebootRequest(request.retString); + } else if (request.op == REBOOT_STATUS_KEY) { + response = HandleStatusRequest(request.retString); + } else if (request.op == CANCEL_REBOOT_KEY) { + response = HandleCancelRequest(request.retString); + } else { + // Response is simple string (not json) on error. + response.json_string = + "Unrecognized op in reboot request, op = " + request.op; + SWSS_LOG_ERROR("%s", response.json_string.c_str()); + response.status = swss::StatusCode::SWSS_RC_INVALID_PARAM; + } + SendNotificationResponse(request.op, response.status, response.json_string); +} + +void RebootBE::HandleRebootFinish() { + SWSS_LOG_ENTER(); + SWSS_LOG_WARN( + "Receieved notification that reboot has finished. This probably means " + "something is wrong"); + m_RebootThread.Join(); + SetCurrentStatus(RebManagerStatus::IDLE); +} + +void RebootBE::HandleDone() { + SWSS_LOG_INFO("RebootBE received signal to stop"); + + if (m_RebootThread.GetResponse().active()) { + m_RebootThread.Stop(); + m_RebootThread.Join(); + } +} + +} // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/rebootbe.h b/src/sonic-framework/rebootbackend/rebootbe.h new file mode 100644 index 000000000000..b24db5e7a20b --- /dev/null +++ b/src/sonic-framework/rebootbackend/rebootbe.h @@ -0,0 +1,94 @@ +#pragma once +#include "dbconnector.h" +#include "notificationconsumer.h" +#include "notificationproducer.h" +#include "reboot_common.h" +#include "reboot_interfaces.h" +#include "reboot_thread.h" +#include "selectableevent.h" +#include "status_code_util.h" + +namespace rebootbackend { + +constexpr char REBOOT_REQUEST_NOTIFICATION_CHANNEL[] = "Reboot_Request_Channel"; +constexpr char REBOOT_RESPONSE_NOTIFICATION_CHANNEL[] = + "Reboot_Response_Channel"; +constexpr char REBOOT_KEY[] = "Reboot"; +constexpr char REBOOT_STATUS_KEY[] = "RebootStatus"; +constexpr char CANCEL_REBOOT_KEY[] = "CancelReboot"; +constexpr char DATA_TUPLE_KEY[] = "MESSAGE"; + +class RebootBE { + public: + enum class RebManagerStatus { + WARM_INIT_WAIT, + IDLE, + COLD_REBOOT_IN_PROGRESS, + WARM_REBOOT_IN_PROGRESS + }; + + struct NotificationRequest { + std::string op; + std::string retString; + }; + + RebootBE(DbusInterface &interface); + + // To get the current reboot status. + RebManagerStatus GetCurrentStatus(); + + // Checks for the notification and takes appropriate action. + void Start(); + + // Notifies completion status of reboot. + void Stop(); + + private: + std::mutex m_StatusMutex; + RebManagerStatus m_CurrentStatus = RebManagerStatus::IDLE; + swss::SelectableEvent m_Done; + + swss::DBConnector m_db; + swss::NotificationProducer m_RebootResponse; + swss::NotificationConsumer m_NotificationConsumer; + + DbusInterface &m_dbus; + + // Signalled by reboot thread when thread completes. + swss::SelectableEvent m_RebootThreadFinished; + RebootThread m_RebootThread; + + void SetCurrentStatus(RebManagerStatus newStatus); + + // Reboot_Request_Channel notifications should all contain {"MESSAGE" : Data} + // in the notification Data field. + // Return true if "MESSAGE" is found, false otherwise. + // Set message_value to the Data string if found, "" otherwise. + // consumer is input: this is the consumer from which we pop + // reboot/cancel/status requests. + // request is output: this the request recevied from consumer + bool RetrieveNotificationData(swss::NotificationConsumer &consumer, + NotificationRequest &request); + NotificationResponse HandleRebootRequest( + const std::string &jsonRebootRequest); + NotificationResponse HandleStatusRequest( + const std::string &jsonStatusRequest); + NotificationResponse HandleCancelRequest( + const std::string &jsonCancelRequest); + void SendNotificationResponse(const std::string key, + const swss::StatusCode code, + const std::string message); + + // Returns true if a reboot is allowed at this time given the current + // warm manager state and reboot type, and false otherwise. + bool RebootAllowed(const gnoi::system::RebootMethod rebMethod); + + void DoTask(swss::NotificationConsumer &consumer); + + void HandleRebootFinish(); + void HandleDone(); + + friend class RebootBETestWithoutStop; +}; + +} // namespace rebootbackend From de106e024b78119ed6efa1ac372515562d60f83e Mon Sep 17 00:00:00 2001 From: Kavitha Ramalingam Date: Wed, 23 Oct 2024 17:25:43 +0530 Subject: [PATCH 3/3] gNOI Cold Reboot - Integrated tests --- .../tests/mock_reboot_interfaces.h | 18 + .../tests/reboot_thread_test.cpp | 288 ++++++++++++++ src/sonic-framework/tests/rebootbe_test.cpp | 376 ++++++++++++++++++ src/sonic-framework/tests/test_main.cpp | 7 + 4 files changed, 689 insertions(+) create mode 100644 src/sonic-framework/tests/mock_reboot_interfaces.h create mode 100644 src/sonic-framework/tests/reboot_thread_test.cpp create mode 100644 src/sonic-framework/tests/rebootbe_test.cpp create mode 100644 src/sonic-framework/tests/test_main.cpp diff --git a/src/sonic-framework/tests/mock_reboot_interfaces.h b/src/sonic-framework/tests/mock_reboot_interfaces.h new file mode 100644 index 000000000000..f6833b42c0b0 --- /dev/null +++ b/src/sonic-framework/tests/mock_reboot_interfaces.h @@ -0,0 +1,18 @@ +#pragma once +#include + +#include "reboot_interfaces.h" +#include "selectableevent.h" +#include "system/system.pb.h" + +namespace rebootbackend { + +class MockDbusInterface : public DbusInterface { + public: + MOCK_METHOD(DbusInterface::DbusResponse, Reboot, (const std::string &), + (override)); + MOCK_METHOD(DbusInterface::DbusResponse, RebootStatus, (const std::string &), + (override)); +}; + +} // namespace rebootbackend diff --git a/src/sonic-framework/tests/reboot_thread_test.cpp b/src/sonic-framework/tests/reboot_thread_test.cpp new file mode 100644 index 000000000000..69afc18c327e --- /dev/null +++ b/src/sonic-framework/tests/reboot_thread_test.cpp @@ -0,0 +1,288 @@ +#include "reboot_thread.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "mock_reboot_interfaces.h" +#include "reboot_common.h" +#include "reboot_interfaces.h" +#include "select.h" +#include "selectableevent.h" +#include "status_code_util.h" +#include "system/system.pb.h" +#include "timestamp.h" + +namespace rebootbackend { + + +// using namespace gnoi::system; +namespace gpu = ::google::protobuf::util; +using Progress = ::rebootbackend::RebootThread::Progress; +using RebootThread = ::rebootbackend::RebootThread; +using ::testing::_; +using ::testing::ExplainMatchResult; +using ::testing::HasSubstr; +using ::testing::NiceMock; +using ::testing::Return; +using ::testing::StrEq; +using ::testing::StrictMock; + +MATCHER_P2(IsStatus, status, message, "") { + return (arg.status().status() == status && + ExplainMatchResult(message, arg.status().message(), result_listener)); +} + +class RebootStatusTest : public ::testing::Test { + protected: + RebootStatusTest() : m_status() {} + ThreadStatus m_status; +}; + +TEST_F(RebootStatusTest, TestInit) { + gnoi::system::RebootStatusResponse response = m_status.get_response(); + + EXPECT_FALSE(response.active()); + EXPECT_THAT(response.reason(), StrEq("")); + EXPECT_EQ(response.count(), 0); + EXPECT_EQ(response.method(), gnoi::system::RebootMethod::UNKNOWN); + EXPECT_EQ( + response.status().status(), + gnoi::system::RebootStatus_Status::RebootStatus_Status_STATUS_UNKNOWN); + EXPECT_THAT(response.status().message(), StrEq("")); + + EXPECT_FALSE(m_status.get_active()); +} + +TEST_F(RebootStatusTest, TestGetStatus) { + std::chrono::nanoseconds curr_ns = + std::chrono::high_resolution_clock::now().time_since_epoch(); + + m_status.set_start_status(gnoi::system::RebootMethod::COLD, "reboot because"); + + gnoi::system::RebootStatusResponse response = m_status.get_response(); + EXPECT_EQ( + response.status().status(), + gnoi::system::RebootStatus_Status::RebootStatus_Status_STATUS_UNKNOWN); + + m_status.set_completed_status( + gnoi::system::RebootStatus_Status::RebootStatus_Status_STATUS_SUCCESS, + "anything"); + + response = m_status.get_response(); + + // message should be empty while reboot is active + EXPECT_THAT(response.status().message(), StrEq("")); + + uint64_t reboot_ns = response.when(); + EXPECT_TRUE(reboot_ns > (uint64_t)curr_ns.count()); + + m_status.set_inactive(); + response = m_status.get_response(); + EXPECT_THAT(response.status().message(), StrEq("anything")); + EXPECT_EQ( + response.status().status(), + gnoi::system::RebootStatus_Status::RebootStatus_Status_STATUS_SUCCESS); + EXPECT_EQ(0, response.when()); +} + +class RebootThreadTest : public ::testing::Test { + protected: + RebootThreadTest() + : m_dbus_interface(), + m_db("STATE_DB", 0), + m_config_db("CONFIG_DB", 0), + m_reboot_thread(m_dbus_interface, m_finished) { + sigterm_requested = false; + } + + void overwrite_reboot_timeout(uint32_t timeout_seconds) { + m_reboot_thread.m_reboot_timeout = timeout_seconds; + } + + gnoi::system::RebootStatusResponse get_response(void) { + return m_reboot_thread.m_status.get_response(); + } + + void set_start_status(const gnoi::system::RebootMethod &method, + const std::string &reason) { + return m_reboot_thread.m_status.set_start_status(method, reason); + } + + void set_completed_status(const gnoi::system::RebootStatus_Status &status, + const std::string &message) { + return m_reboot_thread.m_status.set_completed_status(status, message); + } + + void force_inactive(void) { return m_reboot_thread.m_status.set_inactive(); } + + void force_active(void) { return m_reboot_thread.m_status.set_inactive(); } + + void do_reboot(void) { return m_reboot_thread.do_reboot(); } + + void wait_for_finish(swss::Select &s, swss::SelectableEvent &finished, + long timeout_seconds) { + swss::Selectable *sel; + + int ret = s.select(&sel, timeout_seconds * 1000); + EXPECT_EQ(ret, swss::Select::OBJECT); + EXPECT_EQ(sel, &finished); + } + + Progress wait_for_platform_reboot(swss::Select &s) { + return m_reboot_thread.wait_for_platform_reboot(s); + } + + swss::SelectableEvent &return_m_stop_reference() { + return m_reboot_thread.m_stop; + } + + swss::DBConnector m_db; + swss::DBConnector m_config_db; + NiceMock m_dbus_interface; + swss::SelectableEvent m_finished; + RebootThread m_reboot_thread; +}; + +MATCHER_P2(Status, status, message, "") { + return (arg.status().status() == status && arg.status().message() == message); +} + +TEST_F(RebootThreadTest, TestStop) { + EXPECT_CALL(m_dbus_interface, Reboot(_)) + .Times(1) + .WillOnce(Return(DbusInterface::DbusResponse{ + DbusInterface::DbusStatus::DBUS_SUCCESS, ""})); + gnoi::system::RebootRequest request; + request.set_method(gnoi::system::RebootMethod::COLD); + overwrite_reboot_timeout(2); + m_reboot_thread.Start(request); + m_reboot_thread.Stop(); + m_reboot_thread.Join(); + gnoi::system::RebootStatusResponse response = m_reboot_thread.GetResponse(); + EXPECT_THAT( + response, + IsStatus( + gnoi::system::RebootStatus_Status::RebootStatus_Status_STATUS_UNKNOWN, + "")); +} + +TEST_F(RebootThreadTest, TestCleanExit) { + EXPECT_CALL(m_dbus_interface, Reboot(_)) + .Times(1) + .WillOnce(Return(DbusInterface::DbusResponse{ + DbusInterface::DbusStatus::DBUS_SUCCESS, ""})); + + overwrite_reboot_timeout(1); + + swss::Select s; + s.addSelectable(&m_finished); + + gnoi::system::RebootRequest request; + request.set_method(gnoi::system::RebootMethod::COLD); + request.set_message("time to reboot"); + m_reboot_thread.Start(request); + wait_for_finish(s, m_finished, 5); + + // Status should be active until we call join + gnoi::system::RebootStatusResponse response = get_response(); + EXPECT_TRUE(response.active()); + EXPECT_THAT(response.reason(), StrEq("time to reboot")); + EXPECT_EQ(response.count(), 1); + + EXPECT_THAT(response.status().message(), StrEq("")); + + m_reboot_thread.Join(); + + response = get_response(); + EXPECT_FALSE(response.active()); + EXPECT_THAT(response.status().message(), StrEq("platform failed to reboot")); +} + +TEST_F(RebootThreadTest, TestJoinWithoutStart) { + bool ret = m_reboot_thread.Join(); + EXPECT_FALSE(ret); +} + +// Call Start a second time while first thread is still executing. +TEST_F(RebootThreadTest, TestStartWhileRunning) { + EXPECT_CALL(m_dbus_interface, Reboot(_)) + .Times(1) + .WillOnce(Return(DbusInterface::DbusResponse{ + DbusInterface::DbusStatus::DBUS_SUCCESS, ""})); + + overwrite_reboot_timeout(2); + + gnoi::system::RebootRequest request; + request.set_method(gnoi::system::RebootMethod::COLD); + request.set_message("time to reboot"); + m_reboot_thread.Start(request); + + // First thread is still running ... + NotificationResponse response = m_reboot_thread.Start(request); + EXPECT_EQ(response.status, swss::StatusCode::SWSS_RC_IN_USE); + EXPECT_THAT(response.json_string, + StrEq("RebootThread: can't Start while active")); + + bool ret = m_reboot_thread.Join(); + EXPECT_TRUE(ret); +} + +// Call Start a second time after first thread completed +// but before first thread was joined. +// Second start should fail. +TEST_F(RebootThreadTest, TestStartWithoutJoin) { + EXPECT_CALL(m_dbus_interface, Reboot(_)) + .Times(1) + .WillOnce(Return(DbusInterface::DbusResponse{ + DbusInterface::DbusStatus::DBUS_SUCCESS, ""})); + + overwrite_reboot_timeout(1); + + swss::Select s; + s.addSelectable(&m_finished); + + gnoi::system::RebootRequest request; + request.set_method(gnoi::system::RebootMethod::COLD); + request.set_message("time to reboot"); + m_reboot_thread.Start(request); + wait_for_finish(s, m_finished, 3); + + // First thread has stopped: we need to join before + // restart will succeed + NotificationResponse response = m_reboot_thread.Start(request); + EXPECT_EQ(response.status, swss::StatusCode::SWSS_RC_IN_USE); + + // This should join the first start. + bool ret = m_reboot_thread.Join(); + EXPECT_TRUE(ret); +} + +TEST_F(RebootThreadTest, TestUnsupportedRebootType) { + gnoi::system::RebootRequest request; + request.set_method(gnoi::system::RebootMethod::POWERDOWN); + + NotificationResponse response = m_reboot_thread.Start(request); + EXPECT_EQ(response.status, swss::StatusCode::SWSS_RC_INVALID_PARAM); + EXPECT_EQ(response.json_string, + "RebootThread: Start rx'd unsupported method"); +} + +TEST_F(RebootThreadTest, TestInvalidMethodfDoReboot) { + set_start_status(gnoi::system::RebootMethod::POWERUP, "time to reboot"); + do_reboot(); + force_inactive(); + gnoi::system::RebootStatusResponse response = m_reboot_thread.GetResponse(); + EXPECT_THAT( + response, + IsStatus( + gnoi::system::RebootStatus_Status::RebootStatus_Status_STATUS_UNKNOWN, + "")); +} + +} // namespace rebootbackend diff --git a/src/sonic-framework/tests/rebootbe_test.cpp b/src/sonic-framework/tests/rebootbe_test.cpp new file mode 100644 index 000000000000..699e008b3d08 --- /dev/null +++ b/src/sonic-framework/tests/rebootbe_test.cpp @@ -0,0 +1,376 @@ +#include "rebootbe.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "absl/time/time.h" +#include "mock_reboot_interfaces.h" +#include "reboot_common.h" +#include "select.h" +#include "status_code_util.h" +#include "system/system.pb.h" +#include "timestamp.h" + +namespace rebootbackend { + +#define ONE_SECOND (1) +#define TWO_SECONDS (2) +#define TENTH_SECOND_MS (100) +#define SELECT_TIMEOUT_250_MS (250) + +namespace gpu = ::google::protobuf::util; +using namespace gnoi::system; + +using ::testing::_; +using ::testing::AllOf; +using ::testing::AtLeast; +using ::testing::ExplainMatchResult; +using ::testing::HasSubstr; +using ::testing::InSequence; +using ::testing::Invoke; +using ::testing::NiceMock; +using ::testing::Return; +using ::testing::StrEq; +using ::testing::StrictMock; + +MATCHER_P2(IsStatus, status, message, "") { + return (arg.status().status() == status && + ExplainMatchResult(message, arg.status().message(), result_listener)); +} + +MATCHER_P3(ActiveCountMethod, active, count, method, "") { + return (arg.active() == active && arg.count() == (uint32_t)count && + arg.method() == method); +} + +class RebootBETestWithoutStop : public ::testing::Test { + protected: + RebootBETestWithoutStop() + : m_dbus_interface(), + m_db("STATE_DB", 0), + m_config_db("CONFIG_DB", 0), + m_rebootbeRequestChannel(&m_db, REBOOT_REQUEST_NOTIFICATION_CHANNEL), + m_rebootbeReponseChannel(&m_db, REBOOT_RESPONSE_NOTIFICATION_CHANNEL), + m_rebootbe(m_dbus_interface) { + sigterm_requested = false; + + m_s.addSelectable(&m_rebootbeReponseChannel); + + // Make the tests log to stdout, instead of syslog. + swss::Table logging_table(&m_config_db, CFG_LOGGER_TABLE_NAME); + logging_table.hset("rebootbackend", swss::DAEMON_LOGOUTPUT, "STDOUT"); + swss::Logger::restartLogger(); + } + virtual ~RebootBETestWithoutStop() = default; + + void start_rebootbe() { + m_rebootbe_thread = + std::make_unique(&RebootBE::Start, &m_rebootbe); + } + + void set_mock_defaults() { + ON_CALL(m_dbus_interface, Reboot(_)) + .WillByDefault(Return(DbusInterface::DbusResponse{ + DbusInterface::DbusStatus::DBUS_SUCCESS, ""})); + } + + void overwrite_reboot_timeout(uint32_t timeout_seconds) { + m_rebootbe.m_RebootThread.m_reboot_timeout = timeout_seconds; + } + + void send_stop_reboot_thread() { m_rebootbe.m_RebootThread.Stop(); } + + void SendRebootRequest(const std::string &op, const std::string &data, + const std::string &field, const std::string &value) { + std::vector values; + values.push_back(swss::FieldValueTuple{field, value}); + + m_rebootbeRequestChannel.send(op, data, values); + } + + void SendRebootViaProto(RebootRequest &request) { + std::string json_string; + gpu::MessageToJsonString(request, &json_string); + + SendRebootRequest("Reboot", "StatusCode", DATA_TUPLE_KEY, json_string); + } + + void SendRebootStatusRequest(void) { + SendRebootRequest("RebootStatus", "StatusCode", DATA_TUPLE_KEY, + "json status request"); + } + + void start_reboot_via_rpc( + RebootRequest &request, + swss::StatusCode expected_result = swss::StatusCode::SWSS_RC_SUCCESS) { + SendRebootViaProto(request); + while (true) { + int ret; + swss::Selectable *sel; + ret = m_s.select(&sel, SELECT_TIMEOUT_250_MS); + if (ret != swss::Select::OBJECT) continue; + if (sel != &m_rebootbeReponseChannel) continue; + break; + } + std::string op, data; + std::vector ret_values; + m_rebootbeReponseChannel.pop(op, data, ret_values); + + EXPECT_THAT(op, StrEq("Reboot")); + EXPECT_THAT(data, StrEq(swss::statusCodeToStr(expected_result))); + } + + gnoi::system::RebootStatusResponse do_reboot_status_rpc() { + SendRebootStatusRequest(); + while (true) { + int ret; + swss::Selectable *sel; + ret = m_s.select(&sel, SELECT_TIMEOUT_250_MS); + if (ret != swss::Select::OBJECT) continue; + if (sel != &m_rebootbeReponseChannel) continue; + break; + } + std::string op, data; + std::vector ret_values; + m_rebootbeReponseChannel.pop(op, data, ret_values); + + EXPECT_THAT(op, StrEq("RebootStatus")); + EXPECT_EQ(data, swss::statusCodeToStr(swss::StatusCode::SWSS_RC_SUCCESS)); + + std::string json_response; + for (auto &fv : ret_values) { + if (DATA_TUPLE_KEY == fvField(fv)) { + json_response = fvValue(fv); + } + } + gnoi::system::RebootStatusResponse response; + gpu::JsonStringToMessage(json_response, &response); + return response; + } + + void GetNotificationResponse(swss::NotificationConsumer &consumer, + std::string &op, std::string &data, + std::vector &values) { + swss::Select s; + s.addSelectable(&consumer); + swss::Selectable *sel; + s.select(&sel, SELECT_TIMEOUT_250_MS); + + consumer.pop(op, data, values); + } + + NotificationResponse handle_reboot_request(std::string &json_request) { + return m_rebootbe.HandleRebootRequest(json_request); + } + + // Mock interfaces. + NiceMock m_dbus_interface; + + // DB connectors + swss::DBConnector m_db; + swss::DBConnector m_config_db; + + // Reboot thread signaling. + swss::NotificationProducer m_rebootbeRequestChannel; + swss::Select m_s; + swss::NotificationConsumer m_rebootbeReponseChannel; + + // Module under test. + std::unique_ptr m_rebootbe_thread; + RebootBE m_rebootbe; +}; + +class RebootBETest : public RebootBETestWithoutStop { + protected: + ~RebootBETest() { + m_rebootbe.Stop(); + m_rebootbe_thread->join(); + } +}; + +// Test fixture to skip through the startup sequence into the main loop. +// Param indicates if RebootBE should be initialized into a state where the +// system came up in warmboot. +class RebootBEAutoStartTest : public RebootBETest, + public ::testing::WithParamInterface { + protected: + RebootBEAutoStartTest() { + + start_rebootbe(); + + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + EXPECT_EQ(m_rebootbe.GetCurrentStatus(), RebootBE::RebManagerStatus::IDLE); + } +}; + +// Normal operation testing. +TEST_P(RebootBEAutoStartTest, NonExistentMessage) { + swss::NotificationConsumer consumer(&m_db, + REBOOT_RESPONSE_NOTIFICATION_CHANNEL); + + // No "MESSAGE" in field/values + SendRebootRequest("Reboot", "StatusCode", "field1", "field1_value"); + EXPECT_EQ(m_rebootbe.GetCurrentStatus(), RebootBE::RebManagerStatus::IDLE); + + std::string op, data; + std::vector ret_values; + GetNotificationResponse(consumer, op, data, ret_values); + + EXPECT_THAT(op, StrEq("Reboot")); + EXPECT_THAT( + data, + StrEq(swss::statusCodeToStr(swss::StatusCode::SWSS_RC_INVALID_PARAM))); +} + +TEST_P(RebootBEAutoStartTest, TestCancelReboot) { + swss::NotificationConsumer consumer(&m_db, + REBOOT_RESPONSE_NOTIFICATION_CHANNEL); + + SendRebootRequest("CancelReboot", "StatusCode", DATA_TUPLE_KEY, + "json cancelreboot request"); + EXPECT_EQ(m_rebootbe.GetCurrentStatus(), RebootBE::RebManagerStatus::IDLE); + + std::string op, data; + std::vector ret_values; + GetNotificationResponse(consumer, op, data, ret_values); + + EXPECT_THAT(op, StrEq("CancelReboot")); + EXPECT_THAT( + data, + StrEq(swss::statusCodeToStr(swss::StatusCode::SWSS_RC_UNIMPLEMENTED))); +} + +TEST_P(RebootBEAutoStartTest, TestUnrecognizedOP) { + swss::NotificationConsumer consumer(&m_db, + REBOOT_RESPONSE_NOTIFICATION_CHANNEL); + + SendRebootRequest("NonOp", "StatusCode", DATA_TUPLE_KEY, "invalid op code"); + EXPECT_EQ(m_rebootbe.GetCurrentStatus(), RebootBE::RebManagerStatus::IDLE); + + std::string op, data; + std::vector ret_values; + GetNotificationResponse(consumer, op, data, ret_values); + + EXPECT_THAT(op, StrEq("NonOp")); + EXPECT_THAT( + data, + StrEq(swss::statusCodeToStr(swss::StatusCode::SWSS_RC_INVALID_PARAM))); +} + +TEST_P(RebootBEAutoStartTest, TestColdRebootDbusToCompletion) { + DbusInterface::DbusResponse dbus_response{ + DbusInterface::DbusStatus::DBUS_SUCCESS, ""}; + EXPECT_CALL(m_dbus_interface, Reboot(_)) + .Times(3) + .WillRepeatedly(Return(dbus_response)); + + overwrite_reboot_timeout(1); + RebootRequest request; + request.set_method(RebootMethod::COLD); + start_reboot_via_rpc(request); + + std::this_thread::sleep_for(std::chrono::milliseconds(TENTH_SECOND_MS)); + EXPECT_EQ(m_rebootbe.GetCurrentStatus(), + RebootBE::RebManagerStatus::COLD_REBOOT_IN_PROGRESS); + sleep(TWO_SECONDS); + + EXPECT_EQ(m_rebootbe.GetCurrentStatus(), RebootBE::RebManagerStatus::IDLE); + gnoi::system::RebootStatusResponse response = do_reboot_status_rpc(); + EXPECT_THAT(response, ActiveCountMethod(false, 1, RebootMethod::COLD)); + EXPECT_THAT(response, + IsStatus(RebootStatus_Status::RebootStatus_Status_STATUS_FAILURE, + "platform failed to reboot")); + + start_reboot_via_rpc(request); + sleep(TWO_SECONDS); + + start_reboot_via_rpc(request); + sleep(TWO_SECONDS); + + response = do_reboot_status_rpc(); + // Verifiy count is 3 after three reboot attempts. + EXPECT_THAT(response, ActiveCountMethod(false, 3, RebootMethod::COLD)); + EXPECT_THAT(response, + IsStatus(RebootStatus_Status::RebootStatus_Status_STATUS_FAILURE, + "platform failed to reboot")); +} + +TEST_P(RebootBEAutoStartTest, TestColdBootSigterm) { + sigterm_requested = true; + set_mock_defaults(); + overwrite_reboot_timeout(1); + + RebootRequest request; + request.set_method(RebootMethod::COLD); + start_reboot_via_rpc(request); + + sleep(ONE_SECOND); + + EXPECT_EQ(m_rebootbe.GetCurrentStatus(), RebootBE::RebManagerStatus::IDLE); + gnoi::system::RebootStatusResponse second_resp = do_reboot_status_rpc(); + EXPECT_THAT(second_resp, ActiveCountMethod(false, 1, RebootMethod::COLD)); + EXPECT_THAT( + second_resp, + IsStatus(RebootStatus_Status::RebootStatus_Status_STATUS_UNKNOWN, "")); +} + +TEST_P(RebootBEAutoStartTest, TestColdBootDbusError) { + // Return FAIL from dbus reboot call. + DbusInterface::DbusResponse dbus_response{ + DbusInterface::DbusStatus::DBUS_FAIL, "dbus reboot failed"}; + EXPECT_CALL(m_dbus_interface, Reboot(_)) + .Times(1) + .WillOnce(Return(dbus_response)); + + RebootRequest request; + request.set_method(RebootMethod::COLD); + start_reboot_via_rpc(request); + + sleep(TWO_SECONDS); + + EXPECT_EQ(m_rebootbe.GetCurrentStatus(), RebootBE::RebManagerStatus::IDLE); + gnoi::system::RebootStatusResponse second_resp = do_reboot_status_rpc(); + EXPECT_THAT(second_resp, ActiveCountMethod(false, 1, RebootMethod::COLD)); + EXPECT_THAT(second_resp, + IsStatus(RebootStatus_Status::RebootStatus_Status_STATUS_FAILURE, + "dbus reboot failed")); +} + +TEST_P(RebootBEAutoStartTest, TestStopDuringColdBoot) { + set_mock_defaults(); + + RebootRequest request; + request.set_method(RebootMethod::COLD); + start_reboot_via_rpc(request); + std::this_thread::sleep_for(std::chrono::milliseconds(TENTH_SECOND_MS)); + EXPECT_EQ(m_rebootbe.GetCurrentStatus(), + RebootBE::RebManagerStatus::COLD_REBOOT_IN_PROGRESS); + + send_stop_reboot_thread(); + std::this_thread::sleep_for(std::chrono::milliseconds(TENTH_SECOND_MS)); + EXPECT_EQ(m_rebootbe.GetCurrentStatus(), RebootBE::RebManagerStatus::IDLE); + + gnoi::system::RebootStatusResponse response = do_reboot_status_rpc(); + EXPECT_THAT(response, ActiveCountMethod(false, 1, RebootMethod::COLD)); + EXPECT_THAT( + response, + IsStatus(RebootStatus_Status::RebootStatus_Status_STATUS_UNKNOWN, "")); +} + +TEST_P(RebootBEAutoStartTest, TestInvalidJsonRebootRequest) { + std::string json_request = "abcd"; + NotificationResponse response = handle_reboot_request(json_request); + EXPECT_EQ(swss::StatusCode::SWSS_RC_INTERNAL, response.status); +} + +INSTANTIATE_TEST_SUITE_P(TestWithStartupWarmbootEnabledState, + RebootBEAutoStartTest, testing::Values(true, false)); + +} // namespace rebootbackend diff --git a/src/sonic-framework/tests/test_main.cpp b/src/sonic-framework/tests/test_main.cpp new file mode 100644 index 000000000000..693d88f181ed --- /dev/null +++ b/src/sonic-framework/tests/test_main.cpp @@ -0,0 +1,7 @@ + +#include "gtest/gtest.h" + +int main(int argc, char* argv[]) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file