From d899ebefafcd1e7f611854ac27c961ab0f529f2b Mon Sep 17 00:00:00 2001 From: Kamikaza731 Date: Mon, 9 Dec 2024 21:35:56 +0100 Subject: [PATCH] Fix for process_api_not_working function where the API response would return none object type and mess up with the function --- pyproject.toml | 2 +- tnom/check_apis.py | 6 ++++-- tnom/main.py | 39 ++++++++++++++++++++++++++++++++------- tnom/query/api_queries.py | 26 ++++++++++++++++++-------- 4 files changed, 55 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2852fd8..744cebb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "tnom" -version = "0.4.0" +version = "0.4.1" description = "The Nibiru Oracle Monitoring is a tool for monitoring signer wallet for Nibiru Oracle." authors = ["Kamikaza731"] readme = "README.md" diff --git a/tnom/check_apis.py b/tnom/check_apis.py index 9f82e96..12c38af 100644 --- a/tnom/check_apis.py +++ b/tnom/check_apis.py @@ -26,10 +26,12 @@ async def check_apis(load_config: dict[str, Any]) -> list[str]: responses = await asyncio.gather(*tasks, return_exceptions=True) # Fully functional APIs online_apis_with_data = [(api, response) for api, response in zip( - loaded_apis, responses) if not isinstance(response, Exception)] + loaded_apis, responses) if not isinstance(response, Exception) + and response is not None] # Unhealthy APIs unhealthy_apis = [api for api, response in zip( - loaded_apis, responses) if isinstance(response, Exception)] + loaded_apis, responses) if isinstance(response, Exception) + or response is None] if not online_apis_with_data: logging.warning("No healthy APIs found") diff --git a/tnom/main.py b/tnom/main.py index b8891f5..4f7ba2f 100644 --- a/tnom/main.py +++ b/tnom/main.py @@ -301,8 +301,31 @@ async def process_api_not_working( if no_healthy_apis is True: api_consecutive_misses += 1 elif no_healthy_apis is False: - api_consecutive_misses = 0 self.alert_sent["healthy_api_missing"] = False + # if the API was not working properlly for more than 3 minutes and + # it started working again it should send an info alert that + # everything is back to normal + if self.api_consecutive_misses >= API_CONS_MISS_THRESHOLD: + summary = "Alert: API working again!" + level = "info" + alert_details = { + "api_consecutive_misses": api_consecutive_misses, + "alert_level": "info", + } + if self.alert_yml.get("pagerduty_alerts") is True: + alerts.pagerduty_alert_trigger( + self.alert_yml["pagerduty_routing_key"], + alert_details, + summary, + level, + ) + if self.alert_yml.get("telegram_alerts") is True: + await alerts.telegram_alert_trigger( + self.alert_yml["telegram_bot_token"], + alert_details, + self.alert_yml["telegram_chat_id"], + ) + api_consecutive_misses = 0 self.api_consecutive_misses = api_consecutive_misses @@ -332,8 +355,8 @@ async def process_api_not_working( database_handler.overwrite_single_field( self.database_path, epoch, - "api_consecutive_misses", - api_consecutive_misses, + "api_cons_miss", + self.api_consecutive_misses, ) def setup_argument_parser() -> argparse.ArgumentParser: @@ -359,7 +382,7 @@ def setup_argument_parser() -> argparse.ArgumentParser: "--config-path", type=str, help="Path to the config YAML file\n" - f"Default: {working_dir}/config.yml", + f"Default always looks to the current dir: {working_dir}/config.yml", default=working_dir / "config.yml", required=False, ) @@ -368,7 +391,7 @@ def setup_argument_parser() -> argparse.ArgumentParser: "--alert-path", type=str, help="Path to the alert YAML file\n" - f"Default: {working_dir}/alert.yml", + f"Default always looks to the current dir: {working_dir}/alert.yml", default=working_dir / "alert.yml", required=False, ) @@ -376,7 +399,7 @@ def setup_argument_parser() -> argparse.ArgumentParser: parser.add_argument( "--version", action="version", - version="v0.4.0", + version="v0.4.1", ) return parser @@ -410,6 +433,7 @@ async def main() -> None: # Initialize and check the database try: init_and_check_db(working_dir) + # if it exists see if schema is ok database_handler.check_and_update_database_schema(database_path) except Exception as e: logging.exception("Failed to initialize database: %s", e) # noqa: TRY401 @@ -449,7 +473,8 @@ async def monitoring_loop() -> None: healthy_apis = await check_apis(config_yml) while not healthy_apis: logging.error("Failed to check APIs") - latest_epoch = database_handler.read_last_recorded_epoch(database_path) + latest_epoch = ( + database_handler.read_last_recorded_epoch(database_path)) await monitoring_system.process_api_not_working( latest_epoch, no_healthy_apis=True) # stop the script here and start from while True again until there diff --git a/tnom/query/api_queries.py b/tnom/query/api_queries.py index 5a114e8..89b9833 100644 --- a/tnom/query/api_queries.py +++ b/tnom/query/api_queries.py @@ -259,7 +259,7 @@ async def check_token_in_wallet( async def check_latest_block( api: str, session: aiohttp.ClientSession, -) -> tuple[int, str] | None: +) -> tuple[int, str]: """Check the latest block height of the chain. Args: @@ -267,8 +267,11 @@ async def check_latest_block( session (aiohttp.ClientSession): The aiohttp client session Returns: - tuple[int, str] | None: A tuple of the latest block height and timestamp - if successful, otherwise None + tuple[int, str]: A tuple of the latest block height and timestamp + + Raises: + aiohttp.ClientError: If there's an HTTP error + ValueError: If the response status is not OK or if the data is invalid """ try: @@ -281,9 +284,16 @@ async def check_latest_block( block_height = int(json_data["block"]["header"]["height"]) timestamp = json_data["block"]["header"]["time"] return (block_height, timestamp) - logging.error("Failed to collect latest block %s", response.status) - return None - except aiohttp.ContentTypeError as e: - logging.exception("Failed to collect latest block", exc_info=e) - return None + msg = f"API request failed with status {response.status}" + raise ValueError(msg) + + except aiohttp.ContentTypeError as e: + logging.error("Content type error while collecting latest block: %s", str(e)) + raise + except (KeyError, ValueError) as e: + logging.error("Invalid data received from API: %s", str(e)) + raise + except Exception as e: + logging.error("Unexpected error while collecting latest block: %s", str(e)) + raise