From f94fb0103b019a5e49f1ceed72133158a3bb5751 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 6 Nov 2024 16:46:59 +0100 Subject: [PATCH 01/29] Add initial structure for clickhouse connector --- config.yaml | 8 ++ src/logcollector/batch_handler.py | 2 +- src/logcollector/collector.py | 4 +- src/monitoring/__init__.py | 0 src/monitoring/clickhouse_batch.py | 85 +++++++++++++ src/monitoring/clickhouse_connector.py | 118 ++++++++++++++++++ .../create_tables/failed_dns_loglines.sql | 8 ++ src/monitoring/create_tables/server_logs.sql | 7 ++ .../create_tables/server_logs_timestamps.sql | 7 ++ tests/test_batch_handler.py | 68 ++++------ tests/test_collector.py | 32 ++--- 11 files changed, 279 insertions(+), 60 deletions(-) create mode 100644 src/monitoring/__init__.py create mode 100644 src/monitoring/clickhouse_batch.py create mode 100644 src/monitoring/clickhouse_connector.py create mode 100644 src/monitoring/create_tables/failed_dns_loglines.sql create mode 100644 src/monitoring/create_tables/server_logs.sql create mode 100644 src/monitoring/create_tables/server_logs_timestamps.sql diff --git a/config.yaml b/config.yaml index 4a5b593..fea486e 100644 --- a/config.yaml +++ b/config.yaml @@ -65,6 +65,11 @@ pipeline: base_url: https://heibox.uni-heidelberg.de/d/0d5cbcbe16cd46a58021/ threshold: 0.5 + monitoring: + clickhouse_connector: + batch_size: 10000 + batch_timeout: 2.0 + environment: timestamp_format: "%Y-%m-%dT%H:%M:%S.%fZ" kafka_brokers: @@ -78,3 +83,6 @@ environment: hostname: 172.27.0.8 port_in: 9998 port_out: 9999 + monitoring: + clickhouse_server: + hostname: localhost diff --git a/src/logcollector/batch_handler.py b/src/logcollector/batch_handler.py index 34b86ca..18f6ba0 100644 --- a/src/logcollector/batch_handler.py +++ b/src/logcollector/batch_handler.py @@ -254,7 +254,7 @@ def get_stored_keys(self) -> set: return keys_set.copy() -class CollectorKafkaBatchSender: +class BufferedBatchSender: """ Adds messages to the :class:`BufferedBatch` and sends them after a timer ran out or the respective batch is full. """ diff --git a/src/logcollector/collector.py b/src/logcollector/collector.py index e07f51c..3f291f0 100644 --- a/src/logcollector/collector.py +++ b/src/logcollector/collector.py @@ -7,7 +7,7 @@ sys.path.append(os.getcwd()) from src.base.logline_handler import LoglineHandler from src.base import utils -from src.logcollector.batch_handler import CollectorKafkaBatchSender +from src.logcollector.batch_handler import BufferedBatchSender from src.base.log_config import get_logger logger = get_logger("log_collection.collector") @@ -44,7 +44,7 @@ def __init__(self): logger.debug( f"Calling CollectorKafkaBatchSender(transactional_id='collector')..." ) - self.batch_handler = CollectorKafkaBatchSender() + self.batch_handler = BufferedBatchSender() logger.debug("Calling LoglineHandler()...") self.logline_handler = LoglineHandler() logger.debug("Initialized LogCollector.") diff --git a/src/monitoring/__init__.py b/src/monitoring/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/monitoring/clickhouse_batch.py b/src/monitoring/clickhouse_batch.py new file mode 100644 index 0000000..cc199cc --- /dev/null +++ b/src/monitoring/clickhouse_batch.py @@ -0,0 +1,85 @@ +import os +import sys +from threading import Timer + +sys.path.append(os.getcwd()) +from src.base.log_config import get_logger +from src.base.utils import setup_config + +logger = get_logger() + +CONFIG = setup_config() +CLICKHOUSE_HOSTNAME = CONFIG["environment"]["monitoring"]["clickhouse_server"][ + "hostname" +] +BATCH_SIZE = CONFIG["pipeline"]["monitoring"]["clickhouse_connector"]["batch_size"] +BATCH_TIMEOUT = CONFIG["pipeline"]["monitoring"]["clickhouse_connector"][ + "batch_timeout" +] + + +class ClickHouseBatchSender: + def __init__(self, kafka_topic: str, table_name: str, column_names: list[str]): + self.kafka_topic = kafka_topic + self.table_name = table_name + self.column_names = column_names + + self.max_batch_size = BATCH_SIZE + self.batch_timeout = BATCH_TIMEOUT + + self.timer = None + self.batch = [] + # self.client = clickhouse_connect.get_client( + # host=CLICKHOUSE_HOSTNAME, + # ) + + def __del__(self): + self.insert_all() + + def add(self, data: list[str] | list[list[str]]): + def _add_element(element): + if len(element) != len(self.column_names): + raise ValueError( + "Number of elements in the insert does not match the number of columns" + ) + + self.batch.append(element) + + if any(isinstance(e, list) for e in data): + for e in data: + _add_element(e) + else: + _add_element(data) + + if len(self.batch) >= self.max_batch_size: + self.insert_all() + elif not self.timer: + self._start_timer() + + def insert_all(self): + if self.batch: + # self.client.insert( + # self.table_name, + # self.batch, + # self.column_names, + # ) + logger.info( + f""" + self.client.insert( + {self.table_name=}, + {self.batch=}, + {self.column_names=}, + ) + """ + ) + self.batch = [] + + if self.timer: + self.timer.cancel() + + def _start_timer(self): + if self.timer: + self.timer.cancel() + + self.timer = Timer(BATCH_TIMEOUT, self.insert_all) + self.timer.start() diff --git a/src/monitoring/clickhouse_connector.py b/src/monitoring/clickhouse_connector.py new file mode 100644 index 0000000..cba3b33 --- /dev/null +++ b/src/monitoring/clickhouse_connector.py @@ -0,0 +1,118 @@ +import datetime +import os +import sys +import uuid + +import clickhouse_connect + +sys.path.append(os.getcwd()) +from src.monitoring.clickhouse_batch import ClickHouseBatchSender +from src.base.kafka_handler import KafkaConsumeHandler +from src.base.log_config import get_logger +from src.base.utils import setup_config + +logger = get_logger() + +CONFIG = setup_config() +CLICKHOUSE_HOSTNAME = CONFIG["environment"]["monitoring"]["clickhouse_server"][ + "hostname" +] +CREATE_TABLES_DIRECTORY = "create_tables" # TODO: Get from config + + +class ClickHouseConnector: + def __init__(self, table_name: str, column_names: list[str]): + self._table_name = table_name + self._column_names = column_names + self._topic = f"clickhouse_{table_name}" + + self._kafka_consumer = KafkaConsumeHandler(self._topic) + self._batch_sender = ClickHouseBatchSender( + kafka_topic=self._topic, + table_name=self._table_name, + column_names=self._column_names, + ) + + def prepare_tables(self): + def _load_contents(file_name: str) -> str: + with open(file_name, "r") as file: + return file.read() + + for filename in os.listdir(CREATE_TABLES_DIRECTORY): + if filename.endswith(".sql"): + file_path = os.path.join(CREATE_TABLES_DIRECTORY, filename) + sql_content = _load_contents(file_path) + + with clickhouse_connect.get_client(host=CLICKHOUSE_HOSTNAME) as client: + try: + client.command(sql_content) + except Exception as e: + logger.critical("Error in CREATE TABLE statement") + raise e + + def _add_to_batch(self, data): + self._batch_sender.add(data) + + +class ServerLogsConnector(ClickHouseConnector): + def __init__(self): + column_names = [ + "message_id", + "timestamp_in", + "message_text", + ] + + super().__init__("server_logs", column_names) + + def insert( + self, message_text: str, timestamp_in: datetime.datetime | None = None + ) -> uuid.UUID: + message_id = uuid.uuid4() + + if not timestamp_in: + timestamp_in = datetime.datetime.now() + + timestamp_in = timestamp_in.strftime("%Y-%m-%d %H:%M:%S.%f") + + self._add_to_batch([message_id, timestamp_in, message_text]) + + return message_id + + +class ServerLogsTimestampsConnector(ClickHouseConnector): + def __init__(self): + column_names = [ + "message_id", + "event", + "event_timestamp", + ] + + super().__init__("server_logs_timestamps", column_names) + + def insert(self, message_id: uuid.UUID, event: str, event_timestamp) -> uuid.UUID: + # TODO: Implement logic + + return message_id + + +class FailedDNSLoglinesConnector(ClickHouseConnector): + def __init__(self): + super().__init__("failed_dns_loglines") + + self.column_names = [ + "message_text", + "timestamp_in", + "timestamp_failed", + "reason_for_failure", + ] + + def insert( + self, + message_text: str, + timestamp_in, + timestamp_failed, + reason_for_failure: str | None = None, + ): + # TODO: Implement logic + + pass diff --git a/src/monitoring/create_tables/failed_dns_loglines.sql b/src/monitoring/create_tables/failed_dns_loglines.sql new file mode 100644 index 0000000..6da6f5c --- /dev/null +++ b/src/monitoring/create_tables/failed_dns_loglines.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS server_logs ( + message_text String NOT NULL, + timestamp_in DateTime64(6) NOT NULL, + timestamp_failed DateTime64(6) NOT NULL, + reason_for_failure String +) +ENGINE = MergeTree +PRIMARY KEY(message_id); diff --git a/src/monitoring/create_tables/server_logs.sql b/src/monitoring/create_tables/server_logs.sql new file mode 100644 index 0000000..b191d83 --- /dev/null +++ b/src/monitoring/create_tables/server_logs.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS server_logs ( + message_id UUID NOT NULL, + timestamp_in DateTime64(6) NOT NULL, + message_text String NOT NULL +) +ENGINE = MergeTree +PRIMARY KEY(message_id); diff --git a/src/monitoring/create_tables/server_logs_timestamps.sql b/src/monitoring/create_tables/server_logs_timestamps.sql new file mode 100644 index 0000000..7a6c58c --- /dev/null +++ b/src/monitoring/create_tables/server_logs_timestamps.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS server_logs_timestamps ( + message_id UUID NOT NULL, + event String NOT NULL, + event_timestamp DateTime64(6) NOT NULL +) +ENGINE = MergeTree +PRIMARY KEY(message_id); diff --git a/tests/test_batch_handler.py b/tests/test_batch_handler.py index b1bd893..a830e44 100644 --- a/tests/test_batch_handler.py +++ b/tests/test_batch_handler.py @@ -1,7 +1,7 @@ import unittest from unittest.mock import patch, MagicMock -from src.logcollector.batch_handler import CollectorKafkaBatchSender +from src.logcollector.batch_handler import BufferedBatchSender class TestInit(unittest.TestCase): @@ -15,7 +15,7 @@ def test_init_with_buffer(self, mock_kafka_produce_handler, mock_buffered_batch) mock_buffered_batch.return_value = mock_batch_instance # Act - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() # Assert self.assertEqual("Prefilter", sut.topic) @@ -36,11 +36,9 @@ class TestAddMessage(unittest.TestCase): @patch("src.logcollector.batch_handler.logger") @patch("src.logcollector.batch_handler.BATCH_SIZE", 1000) @patch("src.logcollector.batch_handler.KafkaProduceHandler") - @patch("src.logcollector.batch_handler.CollectorKafkaBatchSender._reset_timer") + @patch("src.logcollector.batch_handler.BufferedBatchSender._reset_timer") @patch("src.logcollector.batch_handler.BufferedBatch.get_number_of_messages") - @patch( - "src.logcollector.batch_handler.CollectorKafkaBatchSender._send_batch_for_key" - ) + @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") def test_add_message_normal( self, mock_send_batch, @@ -57,7 +55,7 @@ def test_add_message_normal( key = "test_key" message = "test_message" - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() sut.timer = MagicMock() # Act @@ -71,9 +69,7 @@ def test_add_message_normal( @patch("src.logcollector.batch_handler.logger") @patch("src.logcollector.batch_handler.BATCH_SIZE", 100) @patch("src.logcollector.batch_handler.KafkaProduceHandler") - @patch( - "src.logcollector.batch_handler.CollectorKafkaBatchSender._send_batch_for_key" - ) + @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") def test_add_message_full_messages( self, mock_send_batch, mock_produce_handler, mock_logger ): @@ -83,7 +79,7 @@ def test_add_message_full_messages( key = "test_key" - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() sut.timer = MagicMock() # Act @@ -98,9 +94,7 @@ def test_add_message_full_messages( @patch("src.logcollector.batch_handler.logger") @patch("src.logcollector.batch_handler.BATCH_SIZE", 100) @patch("src.logcollector.batch_handler.KafkaProduceHandler") - @patch( - "src.logcollector.batch_handler.CollectorKafkaBatchSender._send_batch_for_key" - ) + @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") def test_add_message_full_messages_with_different_keys( self, mock_send_batch, mock_produce_handler, mock_logger ): @@ -111,7 +105,7 @@ def test_add_message_full_messages_with_different_keys( key = "test_key" other_key = "other_key" - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() sut.timer = MagicMock() # Act @@ -130,7 +124,7 @@ def test_add_message_full_messages_with_different_keys( @patch("src.logcollector.batch_handler.logger") @patch("src.logcollector.batch_handler.BATCH_SIZE", 100) @patch("src.logcollector.batch_handler.KafkaProduceHandler") - @patch("src.logcollector.batch_handler.CollectorKafkaBatchSender._reset_timer") + @patch("src.logcollector.batch_handler.BufferedBatchSender._reset_timer") def test_add_message_no_timer( self, mock_reset_timer, mock_produce_handler, mock_logger ): @@ -138,7 +132,7 @@ def test_add_message_no_timer( mock_produce_handler_instance = MagicMock() mock_produce_handler.return_value = mock_produce_handler_instance - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() sut.timer = None # Act @@ -151,9 +145,7 @@ def test_add_message_no_timer( class TestSendAllBatches(unittest.TestCase): @patch("src.logcollector.batch_handler.logger") @patch("src.logcollector.batch_handler.KafkaProduceHandler") - @patch( - "src.logcollector.batch_handler.CollectorKafkaBatchSender._send_batch_for_key" - ) + @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") @patch("src.logcollector.batch_handler.BufferedBatch") def test_send_all_batches_with_existing_keys( self, @@ -169,7 +161,7 @@ def test_send_all_batches_with_existing_keys( mock_send_batch_instance = MagicMock() mock_send_batch.return_value = mock_send_batch_instance - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() # Act sut._send_all_batches(reset_timer=False) @@ -180,9 +172,7 @@ def test_send_all_batches_with_existing_keys( self.assertEqual(mock_send_batch.call_count, 2) @patch("src.logcollector.batch_handler.KafkaProduceHandler") - @patch( - "src.logcollector.batch_handler.CollectorKafkaBatchSender._send_batch_for_key" - ) + @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") @patch("src.logcollector.batch_handler.BufferedBatch") def test_send_all_batches_with_one_key( self, mock_buffered_batch, mock_send_batch, mock_kafka_produce_handler @@ -194,7 +184,7 @@ def test_send_all_batches_with_one_key( mock_send_batch_instance = MagicMock() mock_send_batch.return_value = mock_send_batch_instance - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() # Act sut._send_all_batches(reset_timer=False) @@ -204,10 +194,8 @@ def test_send_all_batches_with_one_key( @patch("src.logcollector.batch_handler.logger") @patch("src.logcollector.batch_handler.KafkaProduceHandler") - @patch( - "src.logcollector.batch_handler.CollectorKafkaBatchSender._send_batch_for_key" - ) - @patch("src.logcollector.batch_handler.CollectorKafkaBatchSender._reset_timer") + @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") + @patch("src.logcollector.batch_handler.BufferedBatchSender._reset_timer") @patch("src.logcollector.batch_handler.BufferedBatch") def test_send_all_batches_with_existing_keys_and_reset_timer( self, @@ -224,7 +212,7 @@ def test_send_all_batches_with_existing_keys_and_reset_timer( mock_send_batch_instance = MagicMock() mock_send_batch.return_value = mock_send_batch_instance - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() # Act sut._send_all_batches(reset_timer=True) @@ -236,9 +224,7 @@ def test_send_all_batches_with_existing_keys_and_reset_timer( self.assertEqual(mock_send_batch.call_count, 2) @patch("src.logcollector.batch_handler.KafkaProduceHandler") - @patch( - "src.logcollector.batch_handler.CollectorKafkaBatchSender._send_batch_for_key" - ) + @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") @patch("src.logcollector.batch_handler.BufferedBatch") def test_send_all_batches_with_no_keys( self, mock_buffered_batch, mock_send_batch, mock_kafka_produce_handler @@ -250,7 +236,7 @@ def test_send_all_batches_with_no_keys( mock_send_batch_instance = MagicMock() mock_send_batch.return_value = mock_send_batch_instance - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() # Act sut._send_all_batches(reset_timer=False) @@ -261,7 +247,7 @@ def test_send_all_batches_with_no_keys( class TestSendBatchForKey(unittest.TestCase): @patch("src.logcollector.batch_handler.KafkaProduceHandler") - @patch.object(CollectorKafkaBatchSender, "_send_data_packet") + @patch.object(BufferedBatchSender, "_send_data_packet") @patch("src.logcollector.batch_handler.BufferedBatch") def test_send_batch_for_key_success( self, mock_batch, mock_send_data_packet, mock_produce_handler @@ -271,7 +257,7 @@ def test_send_batch_for_key_success( mock_batch.return_value = mock_batch_instance mock_batch_instance.complete_batch.return_value = "mock_data_packet" - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() key = "test_key" # Act @@ -282,7 +268,7 @@ def test_send_batch_for_key_success( mock_send_data_packet.assert_called_once_with(key, "mock_data_packet") @patch("src.logcollector.batch_handler.KafkaProduceHandler") - @patch.object(CollectorKafkaBatchSender, "_send_data_packet") + @patch.object(BufferedBatchSender, "_send_data_packet") @patch("src.logcollector.batch_handler.BufferedBatch") def test_send_batch_for_key_value_error( self, mock_batch, mock_send_data_packet, mock_produce_handler @@ -292,7 +278,7 @@ def test_send_batch_for_key_value_error( mock_batch.return_value = mock_batch_instance mock_batch_instance.complete_batch.side_effect = ValueError("Mock exception") - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() key = "test_key" # Act @@ -318,7 +304,7 @@ def test_send_data_packet(self, mock_produce_handler): "data": "test_data", } - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() # Act sut._send_data_packet(key, data) @@ -342,7 +328,7 @@ def test_reset_timer_with_existing_timer(self, mock_timer, mock_produce_handler) mock_produce_handler_instance = MagicMock() mock_produce_handler.return_value = mock_produce_handler_instance - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() sut.timer = mock_timer_instance sut._send_all_batches = MagicMock() @@ -364,7 +350,7 @@ def test_reset_timer_without_existing_timer(self, mock_timer, mock_produce_handl mock_produce_handler_instance = MagicMock() mock_produce_handler.return_value = mock_produce_handler_instance - sut = CollectorKafkaBatchSender() + sut = BufferedBatchSender() sut._send_all_batches = MagicMock() # Act diff --git a/tests/test_collector.py b/tests/test_collector.py index 897c581..163bf7a 100644 --- a/tests/test_collector.py +++ b/tests/test_collector.py @@ -12,7 +12,7 @@ class TestInit(unittest.TestCase): @patch("src.logcollector.collector.LOGSERVER_HOSTNAME", "127.0.0.1") @patch("src.logcollector.collector.LOGSERVER_SENDING_PORT", 9999) - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_valid_init_ipv4(self, mock_logline_handler, mock_batch_handler): mock_batch_handler_instance = MagicMock() @@ -36,7 +36,7 @@ def test_valid_init_ipv4(self, mock_logline_handler, mock_batch_handler): @patch("src.logcollector.collector.LOGSERVER_HOSTNAME", "fe80::1") @patch("src.logcollector.collector.LOGSERVER_SENDING_PORT", 8989) - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_valid_init_ipv6(self, mock_logline_handler, mock_batch_handler): mock_batch_handler_instance = MagicMock() @@ -73,7 +73,7 @@ def test_invalid_init_with_invalid_port(self): class TestFetchLogline(unittest.TestCase): @patch("src.logcollector.collector.logger") - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("socket.socket") def test_fetch_logline_successful( self, mock_socket, mock_batch_handler, mock_logger @@ -94,7 +94,7 @@ def test_fetch_logline_successful( self.assertEqual("fake messages", sut.logline) @patch("src.logcollector.collector.logger") - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("socket.socket") def test_fetch_logline_no_data_on_server( self, mock_socket, mock_batch_handler, mock_logger @@ -115,7 +115,7 @@ def test_fetch_logline_no_data_on_server( self.assertIsNone(sut.logline) @patch("src.logcollector.collector.logger") - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("socket.socket") def test_fetch_logline_connection_error( self, mock_socket, mock_batch_handler, mock_logger @@ -138,7 +138,7 @@ def test_fetch_logline_connection_error( class TestGetSubnetId(unittest.TestCase): @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 24) - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_ipv4(self, mock_logline_handler, mock_batch_handler): # Arrange @@ -153,7 +153,7 @@ def test_get_subnet_id_ipv4(self, mock_logline_handler, mock_batch_handler): self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 24) - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_ipv4_zero(self, mock_logline_handler, mock_batch_handler): # Arrange @@ -168,7 +168,7 @@ def test_get_subnet_id_ipv4_zero(self, mock_logline_handler, mock_batch_handler) self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 23) - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_ipv4_max(self, mock_logline_handler, mock_batch_handler): # Arrange @@ -183,7 +183,7 @@ def test_get_subnet_id_ipv4_max(self, mock_logline_handler, mock_batch_handler): self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 64) - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_ipv6(self, mock_logline_handler, mock_batch_handler): # Arrange @@ -198,7 +198,7 @@ def test_get_subnet_id_ipv6(self, mock_logline_handler, mock_batch_handler): self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 64) - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_ipv6_zero(self, mock_logline_handler, mock_batch_handler): # Arrange @@ -213,7 +213,7 @@ def test_get_subnet_id_ipv6_zero(self, mock_logline_handler, mock_batch_handler) self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 48) - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_ipv6_max(self, mock_logline_handler, mock_batch_handler): # Arrange @@ -229,7 +229,7 @@ def test_get_subnet_id_ipv6_max(self, mock_logline_handler, mock_batch_handler): @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 24) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 48) - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_unsupported_type( self, mock_logline_handler, mock_batch_handler @@ -245,7 +245,7 @@ def test_get_subnet_id_unsupported_type( @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 24) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 48) - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_none(self, mock_logline_handler, mock_batch_handler): # Arrange @@ -262,7 +262,7 @@ class TestAddLoglineToBatch(unittest.TestCase): @patch("src.logcollector.collector.logger") @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 22) @patch("src.base.utils.normalize_ipv4_address") - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_add_to_batch_with_data( self, mock_logline_handler, mock_batch_handler, mock_normalize, mock_logger @@ -301,7 +301,7 @@ def test_add_to_batch_with_data( "192.168.0.0_22", expected_message ) - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_add_to_batch_without_data(self, mock_logline_handler, mock_batch_handler): mock_batch_handler_instance = MagicMock() @@ -317,7 +317,7 @@ def test_add_to_batch_without_data(self, mock_logline_handler, mock_batch_handle class TestClearLogline(unittest.TestCase): - @patch("src.logcollector.collector.CollectorKafkaBatchSender") + @patch("src.logcollector.collector.BufferedBatchSender") def test_clear_logline(self, mock_batch_handler): mock_batch_handler_instance = MagicMock() mock_batch_handler.return_value = mock_batch_handler_instance From 9c8a1786d4cccc84514d01edfaf544bcd6a24f7e Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 6 Nov 2024 19:37:06 +0100 Subject: [PATCH 02/29] Implement tables server_logs_timestamps and failed_dns_loglines --- src/monitoring/clickhouse_connector.py | 39 ++++++++++++------- .../create_tables/failed_dns_loglines.sql | 4 +- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/monitoring/clickhouse_connector.py b/src/monitoring/clickhouse_connector.py index cba3b33..b31595d 100644 --- a/src/monitoring/clickhouse_connector.py +++ b/src/monitoring/clickhouse_connector.py @@ -67,15 +67,12 @@ def __init__(self): def insert( self, message_text: str, timestamp_in: datetime.datetime | None = None ) -> uuid.UUID: - message_id = uuid.uuid4() - if not timestamp_in: timestamp_in = datetime.datetime.now() - timestamp_in = timestamp_in.strftime("%Y-%m-%d %H:%M:%S.%f") + message_id = uuid.uuid4() self._add_to_batch([message_id, timestamp_in, message_text]) - return message_id @@ -89,30 +86,44 @@ def __init__(self): super().__init__("server_logs_timestamps", column_names) - def insert(self, message_id: uuid.UUID, event: str, event_timestamp) -> uuid.UUID: - # TODO: Implement logic + def insert( + self, + message_id: uuid.UUID, + event: str, + event_timestamp: datetime.datetime | None = None, + ) -> uuid.UUID: + if not event_timestamp: + event_timestamp = datetime.datetime.now() + event_timestamp = event_timestamp.strftime("%Y-%m-%d %H:%M:%S.%f") + self._add_to_batch([message_id, event, event_timestamp]) return message_id class FailedDNSLoglinesConnector(ClickHouseConnector): def __init__(self): - super().__init__("failed_dns_loglines") - - self.column_names = [ + column_names = [ "message_text", "timestamp_in", "timestamp_failed", "reason_for_failure", ] + super().__init__("failed_dns_loglines", column_names) + def insert( self, message_text: str, - timestamp_in, - timestamp_failed, + timestamp_in: datetime.datetime, + timestamp_failed: datetime.datetime | None = None, reason_for_failure: str | None = None, - ): - # TODO: Implement logic + ) -> None: + if not timestamp_failed: + timestamp_failed = datetime.datetime.now() - pass + timestamp_in = timestamp_in.strftime("%Y-%m-%d %H:%M:%S.%f") + timestamp_failed = timestamp_failed.strftime("%Y-%m-%d %H:%M:%S.%f") + + self._add_to_batch( + [message_text, timestamp_in, timestamp_failed, reason_for_failure] + ) diff --git a/src/monitoring/create_tables/failed_dns_loglines.sql b/src/monitoring/create_tables/failed_dns_loglines.sql index 6da6f5c..c959f1e 100644 --- a/src/monitoring/create_tables/failed_dns_loglines.sql +++ b/src/monitoring/create_tables/failed_dns_loglines.sql @@ -2,7 +2,7 @@ CREATE TABLE IF NOT EXISTS server_logs ( message_text String NOT NULL, timestamp_in DateTime64(6) NOT NULL, timestamp_failed DateTime64(6) NOT NULL, - reason_for_failure String + reason_for_failure Nullable(String) ) ENGINE = MergeTree -PRIMARY KEY(message_id); +PRIMARY KEY(message_text, timestamp_in); From b71b7781ed5805439ef4a74e360ea01227fbc123 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 6 Nov 2024 20:01:06 +0100 Subject: [PATCH 03/29] Implement tables logline_to_batches and dns_loglines --- src/monitoring/clickhouse_connector.py | 58 ++++++++++++++++++- src/monitoring/create_tables/dns_loglines.sql | 11 ++++ .../create_tables/logline_to_batches.sql | 6 ++ 3 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 src/monitoring/create_tables/dns_loglines.sql create mode 100644 src/monitoring/create_tables/logline_to_batches.sql diff --git a/src/monitoring/clickhouse_connector.py b/src/monitoring/clickhouse_connector.py index b31595d..e56282d 100644 --- a/src/monitoring/clickhouse_connector.py +++ b/src/monitoring/clickhouse_connector.py @@ -91,13 +91,12 @@ def insert( message_id: uuid.UUID, event: str, event_timestamp: datetime.datetime | None = None, - ) -> uuid.UUID: + ): if not event_timestamp: event_timestamp = datetime.datetime.now() event_timestamp = event_timestamp.strftime("%Y-%m-%d %H:%M:%S.%f") self._add_to_batch([message_id, event, event_timestamp]) - return message_id class FailedDNSLoglinesConnector(ClickHouseConnector): @@ -127,3 +126,58 @@ def insert( self._add_to_batch( [message_text, timestamp_in, timestamp_failed, reason_for_failure] ) + + +class LoglineToBatchesConnector(ClickHouseConnector): + def __init__(self): + column_names = [ + "logline_id", + "batch_id", + ] + + super().__init__("logline_to_batches", column_names) + + def insert(self, logline_id: uuid.UUID, batch_id: uuid.UUID): + self._add_to_batch([logline_id, batch_id]) + + +class DNSLoglinesConnector(ClickHouseConnector): + def __init__(self): + column_names = [ + "logline_id", + "subnet_id", + "timestamp", + "status_code", + "client_ip", + "record_type", + "additional_fields", + ] + + super().__init__("dns_loglines", column_names) + + def insert( + self, + subnet_id: str, + timestamp: datetime.datetime, + status_code: str, + client_ip: str, + record_type: str, + additional_fields: str | None = None, + ) -> uuid.UUID: + if not timestamp: + timestamp = datetime.datetime.now() + timestamp = timestamp.strftime("%Y-%m-%d %H:%M:%S.%f") + + logline_id = uuid.uuid4() + self._add_to_batch( + [ + logline_id, + subnet_id, + timestamp, + status_code, + client_ip, + record_type, + additional_fields, + ] + ) + return logline_id diff --git a/src/monitoring/create_tables/dns_loglines.sql b/src/monitoring/create_tables/dns_loglines.sql new file mode 100644 index 0000000..c3468f7 --- /dev/null +++ b/src/monitoring/create_tables/dns_loglines.sql @@ -0,0 +1,11 @@ +CREATE TABLE IF NOT EXISTS dns_loglines ( + logline_id UUID NOT NULL, + subnet_id String NOT NULL, + timestamp DateTime64(6) NOT NULL, + status_code String NOT NULL, + client_ip String NOT NULL, + record_type String NOT NULL, + additional_fields Nullable(String) +) +ENGINE = MergeTree +PRIMARY KEY (logline_id); diff --git a/src/monitoring/create_tables/logline_to_batches.sql b/src/monitoring/create_tables/logline_to_batches.sql new file mode 100644 index 0000000..41d4348 --- /dev/null +++ b/src/monitoring/create_tables/logline_to_batches.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS logline_to_batches ( + logline_id UUID NOT NULL, + batch_id UUID NOT NULL +) +ENGINE = MergeTree +PRIMARY KEY (logline_id); From 9a18b3c7a1c53ef4793b54f3b219abaa63599c1d Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Thu, 7 Nov 2024 09:47:42 +0100 Subject: [PATCH 04/29] Implement all tables --- src/monitoring/clickhouse_connector.py | 110 +++++++++++++++++- src/monitoring/create_tables/batch_status.sql | 7 ++ .../create_tables/batch_timestamps.sql | 9 ++ .../create_tables/logline_status.sql | 7 ++ .../create_tables/logline_timestamps.sql | 8 ++ 5 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 src/monitoring/create_tables/batch_status.sql create mode 100644 src/monitoring/create_tables/batch_timestamps.sql create mode 100644 src/monitoring/create_tables/logline_status.sql create mode 100644 src/monitoring/create_tables/logline_timestamps.sql diff --git a/src/monitoring/clickhouse_connector.py b/src/monitoring/clickhouse_connector.py index e56282d..b2ea6db 100644 --- a/src/monitoring/clickhouse_connector.py +++ b/src/monitoring/clickhouse_connector.py @@ -138,7 +138,12 @@ def __init__(self): super().__init__("logline_to_batches", column_names) def insert(self, logline_id: uuid.UUID, batch_id: uuid.UUID): - self._add_to_batch([logline_id, batch_id]) + self._add_to_batch( + [ + logline_id, + batch_id, + ] + ) class DNSLoglinesConnector(ClickHouseConnector): @@ -181,3 +186,106 @@ def insert( ] ) return logline_id + + +class LoglineStatusConnector(ClickHouseConnector): + def __init__(self): + column_names = [ + "logline_id", + "status", + "exit_at_stage", + ] + + super().__init__("logline_status", column_names) + + def insert( + self, logline_id: uuid.UUID, status: str, exit_at_stage: str | None = None + ): + self._add_to_batch( + [ + logline_id, + status, + exit_at_stage, + ] + ) + + +class LoglineTimestampsConnector(ClickHouseConnector): + def __init__(self): + column_names = [ + "logline_id", + "stage", + "status", + "timestamp", + ] + + super().__init__("logline_timestamps", column_names) + + def insert( + self, + logline_id: uuid.UUID, + stage: str, + status: str, + timestamp: datetime.datetime, + ) -> None: + self._add_to_batch( + [ + logline_id, + stage, + status, + timestamp, + ] + ) + + +class BatchStatusConnector(ClickHouseConnector): + def __init__(self): + column_names = [ + "batch_id", + "status", + "exit_at_stage", + ] + + super().__init__("batch_status", column_names) + + def insert( + self, batch_id: uuid.UUID, status: str, exit_at_stage: str | None = None + ): + self._add_to_batch( + [ + batch_id, + status, + exit_at_stage, + ] + ) + + +class BatchTimestampsConnector(ClickHouseConnector): + def __init__(self): + column_names = [ + "batch_id", + "stage", + "status", + "timestamp", + "message_count", + ] + + super().__init__("batch_timestamps", column_names) + + def insert( + self, + batch_id: uuid.UUID, + stage: str, + status: str, + timestamp: datetime.datetime, + message_count: int, + ) -> None: + self._add_to_batch( + [ + batch_id, + stage, + status, + timestamp, + message_count, + ] + ) diff --git a/src/monitoring/create_tables/batch_status.sql b/src/monitoring/create_tables/batch_status.sql new file mode 100644 index 0000000..3f515b9 --- /dev/null +++ b/src/monitoring/create_tables/batch_status.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS batch_status ( + batch_id UUID NOT NULL, + status String NOT NULL, + exit_at_stage Nullable(String) +) +ENGINE = MergeTree +PRIMARY KEY (batch_id); diff --git a/src/monitoring/create_tables/batch_timestamps.sql b/src/monitoring/create_tables/batch_timestamps.sql new file mode 100644 index 0000000..60c9207 --- /dev/null +++ b/src/monitoring/create_tables/batch_timestamps.sql @@ -0,0 +1,9 @@ +CREATE TABLE IF NOT EXISTS batch_timestamps ( + batch_id UUID NOT NULL, + stage String NOT NULL, + status String NOT NULL, + timestamp DateTime64(6) NOT NULL, + message_count UInt64 +) +ENGINE = MergeTree +PRIMARY KEY (batch_id); diff --git a/src/monitoring/create_tables/logline_status.sql b/src/monitoring/create_tables/logline_status.sql new file mode 100644 index 0000000..cdeb6c2 --- /dev/null +++ b/src/monitoring/create_tables/logline_status.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS logline_status ( + logline_id UUID NOT NULL, + status String NOT NULL, + exit_at_stage Nullable(String) +) +ENGINE = MergeTree +PRIMARY KEY (logline_id); diff --git a/src/monitoring/create_tables/logline_timestamps.sql b/src/monitoring/create_tables/logline_timestamps.sql new file mode 100644 index 0000000..4ff9887 --- /dev/null +++ b/src/monitoring/create_tables/logline_timestamps.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS logline_timestamps ( + logline_id UUID NOT NULL, + stage String NOT NULL, + status String NOT NULL, + timestamp DateTime64(6) NOT NULL +) +ENGINE = MergeTree +PRIMARY KEY (logline_id); From 881416020f0eba060f70cf7f99bf53da387f42d2 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Thu, 7 Nov 2024 10:06:50 +0100 Subject: [PATCH 05/29] Update clickhouse_connector.py --- src/monitoring/clickhouse_connector.py | 34 +++++++++++++++++++++----- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/monitoring/clickhouse_connector.py b/src/monitoring/clickhouse_connector.py index b2ea6db..3c063cb 100644 --- a/src/monitoring/clickhouse_connector.py +++ b/src/monitoring/clickhouse_connector.py @@ -33,7 +33,7 @@ def __init__(self, table_name: str, column_names: list[str]): column_names=self._column_names, ) - def prepare_tables(self): + def prepare_all_tables(self): def _load_contents(file_name: str) -> str: with open(file_name, "r") as file: return file.read() @@ -50,6 +50,22 @@ def _load_contents(file_name: str) -> str: logger.critical("Error in CREATE TABLE statement") raise e + def prepare_table(self): + def _load_contents(file_name: str) -> str: + with open(file_name, "r") as file: + return file.read() + + filename = self._table_name + ".sql" + file_path = os.path.join(CREATE_TABLES_DIRECTORY, filename) + sql_content = _load_contents(file_path) + + with clickhouse_connect.get_client(host=CLICKHOUSE_HOSTNAME) as client: + try: + client.command(sql_content) + except Exception as e: + logger.critical("Error in CREATE TABLE statement") + raise e + def _add_to_batch(self, data): self._batch_sender.add(data) @@ -169,11 +185,9 @@ def insert( record_type: str, additional_fields: str | None = None, ) -> uuid.UUID: - if not timestamp: - timestamp = datetime.datetime.now() timestamp = timestamp.strftime("%Y-%m-%d %H:%M:%S.%f") - logline_id = uuid.uuid4() + self._add_to_batch( [ logline_id, @@ -226,8 +240,12 @@ def insert( logline_id: uuid.UUID, stage: str, status: str, - timestamp: datetime.datetime, + timestamp: datetime.datetime = None, ) -> None: + if not timestamp: + timestamp = datetime.datetime.now() + timestamp = timestamp.strftime("%Y-%m-%d %H:%M:%S.%f") + self._add_to_batch( [ logline_id, @@ -277,9 +295,13 @@ def insert( batch_id: uuid.UUID, stage: str, status: str, - timestamp: datetime.datetime, message_count: int, + timestamp: datetime.datetime = None, ) -> None: + if not timestamp: + timestamp = datetime.datetime.now() + timestamp = timestamp.strftime("%Y-%m-%d %H:%M:%S.%f") + self._add_to_batch( [ batch_id, From 4cdf723a7ad1855cd9f33ddd75c11f65b1e927e5 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Thu, 7 Nov 2024 10:49:58 +0100 Subject: [PATCH 06/29] Fix bugs --- src/monitoring/clickhouse_batch.py | 18 ++++++++++-------- src/monitoring/clickhouse_connector.py | 9 --------- .../create_tables/batch_timestamps.sql | 2 +- 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/monitoring/clickhouse_batch.py b/src/monitoring/clickhouse_batch.py index cc199cc..99ab507 100644 --- a/src/monitoring/clickhouse_batch.py +++ b/src/monitoring/clickhouse_batch.py @@ -2,6 +2,8 @@ import sys from threading import Timer +import clickhouse_connect + sys.path.append(os.getcwd()) from src.base.log_config import get_logger from src.base.utils import setup_config @@ -29,9 +31,9 @@ def __init__(self, kafka_topic: str, table_name: str, column_names: list[str]): self.timer = None self.batch = [] - # self.client = clickhouse_connect.get_client( - # host=CLICKHOUSE_HOSTNAME, - # ) + self._client = clickhouse_connect.get_client( + host=CLICKHOUSE_HOSTNAME, + ) def __del__(self): self.insert_all() @@ -58,11 +60,11 @@ def _add_element(element): def insert_all(self): if self.batch: - # self.client.insert( - # self.table_name, - # self.batch, - # self.column_names, - # ) + self._client.insert( + self.table_name, + self.batch, + self.column_names, + ) logger.info( f""" self.client.insert( diff --git a/src/monitoring/clickhouse_connector.py b/src/monitoring/clickhouse_connector.py index 3c063cb..37b3613 100644 --- a/src/monitoring/clickhouse_connector.py +++ b/src/monitoring/clickhouse_connector.py @@ -85,7 +85,6 @@ def insert( ) -> uuid.UUID: if not timestamp_in: timestamp_in = datetime.datetime.now() - timestamp_in = timestamp_in.strftime("%Y-%m-%d %H:%M:%S.%f") message_id = uuid.uuid4() self._add_to_batch([message_id, timestamp_in, message_text]) @@ -110,7 +109,6 @@ def insert( ): if not event_timestamp: event_timestamp = datetime.datetime.now() - event_timestamp = event_timestamp.strftime("%Y-%m-%d %H:%M:%S.%f") self._add_to_batch([message_id, event, event_timestamp]) @@ -136,9 +134,6 @@ def insert( if not timestamp_failed: timestamp_failed = datetime.datetime.now() - timestamp_in = timestamp_in.strftime("%Y-%m-%d %H:%M:%S.%f") - timestamp_failed = timestamp_failed.strftime("%Y-%m-%d %H:%M:%S.%f") - self._add_to_batch( [message_text, timestamp_in, timestamp_failed, reason_for_failure] ) @@ -185,7 +180,6 @@ def insert( record_type: str, additional_fields: str | None = None, ) -> uuid.UUID: - timestamp = timestamp.strftime("%Y-%m-%d %H:%M:%S.%f") logline_id = uuid.uuid4() self._add_to_batch( @@ -244,7 +238,6 @@ def insert( ) -> None: if not timestamp: timestamp = datetime.datetime.now() - timestamp = timestamp.strftime("%Y-%m-%d %H:%M:%S.%f") self._add_to_batch( [ @@ -300,8 +293,6 @@ def insert( ) -> None: if not timestamp: timestamp = datetime.datetime.now() - timestamp = timestamp.strftime("%Y-%m-%d %H:%M:%S.%f") - self._add_to_batch( [ batch_id, diff --git a/src/monitoring/create_tables/batch_timestamps.sql b/src/monitoring/create_tables/batch_timestamps.sql index 60c9207..c0e7a1c 100644 --- a/src/monitoring/create_tables/batch_timestamps.sql +++ b/src/monitoring/create_tables/batch_timestamps.sql @@ -3,7 +3,7 @@ CREATE TABLE IF NOT EXISTS batch_timestamps ( stage String NOT NULL, status String NOT NULL, timestamp DateTime64(6) NOT NULL, - message_count UInt64 + message_count UInt32 ) ENGINE = MergeTree PRIMARY KEY (batch_id); From 99cc5aad2fa938360de0f24b010b7ce8eb6bece5 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Thu, 7 Nov 2024 13:53:30 +0100 Subject: [PATCH 07/29] Make the functionality run via Docker --- config.yaml | 2 +- docker/dockerfiles/Dockerfile.logserver | 4 +++- requirements/requirements.monitoring.txt | 1 + src/logserver/server.py | 20 +++++++++++++++++--- src/monitoring/clickhouse_connector.py | 2 +- 5 files changed, 23 insertions(+), 6 deletions(-) create mode 100644 requirements/requirements.monitoring.txt diff --git a/config.yaml b/config.yaml index fea486e..bb92b76 100644 --- a/config.yaml +++ b/config.yaml @@ -85,4 +85,4 @@ environment: port_out: 9999 monitoring: clickhouse_server: - hostname: localhost + hostname: 172.27.0.11 diff --git a/docker/dockerfiles/Dockerfile.logserver b/docker/dockerfiles/Dockerfile.logserver index e85df13..c89476e 100644 --- a/docker/dockerfiles/Dockerfile.logserver +++ b/docker/dockerfiles/Dockerfile.logserver @@ -5,10 +5,12 @@ ENV PYTHONDONTWRITEBYTECODE=1 WORKDIR /usr/src/app COPY requirements/requirements.logserver.txt ./ -RUN pip --disable-pip-version-check install --no-cache-dir --no-compile -r requirements.logserver.txt +COPY requirements/requirements.monitoring.txt ./ +RUN pip --disable-pip-version-check install --no-cache-dir --no-compile -r requirements.logserver.txt -r requirements.monitoring.txt COPY src/base ./src/base COPY src/logserver ./src/logserver +COPY src/monitoring ./src/monitoring COPY config.yaml . RUN rm -rf /root/.cache diff --git a/requirements/requirements.monitoring.txt b/requirements/requirements.monitoring.txt new file mode 100644 index 0000000..cb97149 --- /dev/null +++ b/requirements/requirements.monitoring.txt @@ -0,0 +1 @@ +clickhouse_connect~=0.8.3 diff --git a/src/logserver/server.py b/src/logserver/server.py index 50028a7..099d0ed 100644 --- a/src/logserver/server.py +++ b/src/logserver/server.py @@ -6,6 +6,7 @@ import aiofiles sys.path.append(os.getcwd()) +from src.monitoring.clickhouse_connector import ServerLogsConnector from src.base.kafka_handler import KafkaConsumeHandler from src.base.utils import setup_config from src.base import utils @@ -51,6 +52,16 @@ def __init__(self) -> None: self.data_queue = queue.Queue() self.kafka_consume_handler = KafkaConsumeHandler(topic=LISTEN_ON_TOPIC) + self.server_logs = ServerLogsConnector() + self.server_logs.prepare_table() + + async def store_message(self, message): + self.data_queue.put(message) + + self.server_logs.insert( + message_text=message, + ) + async def open(self) -> None: """ Opens both ports for sending and receiving and starts reading from the input file as well as listening for @@ -146,7 +157,8 @@ async def handle_kafka_inputs(self) -> None: None, self.kafka_consume_handler.consume ) logger.info(f"Received message via Kafka:\n ⤷ {value}") - self.data_queue.put(value) + task = self.store_message(value) + await task async def async_follow(self, file: str = READ_FROM_FILE) -> None: """ @@ -173,7 +185,8 @@ async def async_follow(self, file: str = READ_FROM_FILE) -> None: continue logger.info(f"Extracted message from file:\n ⤷ {cleaned_line}") - self.data_queue.put(cleaned_line) + task = self.store_message(cleaned_line) + await task async def handle_send_logline(self, reader, writer) -> None: """ @@ -230,7 +243,8 @@ async def receive_logline(self, reader) -> None: break received_message = data.decode().strip() logger.info(f"Received message:\n ⤷ {received_message}") - self.data_queue.put(received_message) + task = self.store_message(received_message) + await task except asyncio.exceptions.IncompleteReadError as e: logger.warning(f"Ignoring message: No separator symbol found: {e}") break diff --git a/src/monitoring/clickhouse_connector.py b/src/monitoring/clickhouse_connector.py index 37b3613..26edea5 100644 --- a/src/monitoring/clickhouse_connector.py +++ b/src/monitoring/clickhouse_connector.py @@ -17,7 +17,7 @@ CLICKHOUSE_HOSTNAME = CONFIG["environment"]["monitoring"]["clickhouse_server"][ "hostname" ] -CREATE_TABLES_DIRECTORY = "create_tables" # TODO: Get from config +CREATE_TABLES_DIRECTORY = "src/monitoring/create_tables" # TODO: Get from config class ClickHouseConnector: From 6fca4d21e7a9caaab163e48d94672ed4c0f69197 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Sun, 10 Nov 2024 15:25:45 +0100 Subject: [PATCH 08/29] Add monitoring_agent and update tests --- docker/docker-compose.yml | 20 ++++++ docker/dockerfiles/Dockerfile.monitoring | 16 +++++ requirements/requirements.detector.txt | 1 - requirements/requirements.monitoring.txt | 4 ++ src/base/kafka_handler.py | 29 ++++---- src/detector/detector.py | 2 +- src/inspector/inspector.py | 2 +- src/logserver/server.py | 15 ++-- src/monitoring/clickhouse_batch.py | 3 +- src/monitoring/clickhouse_connector.py | 35 ++++----- src/monitoring/clickhouse_kafka_sender.py | 18 +++++ src/monitoring/monitoring_agent.py | 74 +++++++++++++++++++ src/prefilter/prefilter.py | 2 +- tests/test_detector.py | 2 +- tests/test_inspector.py | 2 +- tests/test_kafka_consume_handler.py | 10 +-- tests/test_prefilter.py | 2 +- tests/test_server.py | 87 ++++++++++++++++------- 18 files changed, 241 insertions(+), 83 deletions(-) create mode 100644 docker/dockerfiles/Dockerfile.monitoring create mode 100644 src/monitoring/clickhouse_kafka_sender.py create mode 100644 src/monitoring/monitoring_agent.py diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 82993c7..87c2a27 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -49,6 +49,7 @@ services: condition: service_healthy ports: - 9998:9998 + - 9999:9999 networks: heidgaf: ipv4_address: 172.27.0.8 @@ -186,6 +187,25 @@ services: timeout: 5s retries: 3 + monitoring_agent: + build: + context: .. + dockerfile: docker/dockerfiles/Dockerfile.monitoring + network: host + restart: "unless-stopped" + depends_on: + kafka1: + condition: service_healthy + kafka2: + condition: service_healthy + kafka3: + condition: service_healthy + clickhouse-server: + condition: service_healthy + networks: + heidgaf: + ipv4_address: 172.27.0.12 + networks: heidgaf: driver: bridge diff --git a/docker/dockerfiles/Dockerfile.monitoring b/docker/dockerfiles/Dockerfile.monitoring new file mode 100644 index 0000000..cf181a7 --- /dev/null +++ b/docker/dockerfiles/Dockerfile.monitoring @@ -0,0 +1,16 @@ +FROM python:3.11-slim-bookworm + +ENV PYTHONDONTWRITEBYTECODE=1 + +WORKDIR /usr/src/app + +COPY requirements/requirements.monitoring.txt ./ +RUN pip --disable-pip-version-check install --no-cache-dir --no-compile -r requirements.monitoring.txt + +COPY src/base ./src/base +COPY src/monitoring ./src/monitoring +COPY config.yaml . + +RUN rm -rf /root/.cache + +CMD [ "python", "src/monitoring/monitoring_agent.py"] diff --git a/requirements/requirements.detector.txt b/requirements/requirements.detector.txt index 7487f70..eeec3d7 100644 --- a/requirements/requirements.detector.txt +++ b/requirements/requirements.detector.txt @@ -3,6 +3,5 @@ scikit-learn~=1.5.2 requests colorlog~=6.8.2 PyYAML~=6.0.1 -colorlog~=6.8.2 confluent-kafka~=2.4.0 marshmallow_dataclass~=8.7.1 diff --git a/requirements/requirements.monitoring.txt b/requirements/requirements.monitoring.txt index cb97149..a2e9d98 100644 --- a/requirements/requirements.monitoring.txt +++ b/requirements/requirements.monitoring.txt @@ -1 +1,5 @@ clickhouse_connect~=0.8.3 +confluent-kafka~=2.4.0 +marshmallow_dataclass~=8.7.1 +colorlog~=6.8.2 +PyYAML~=6.0.1 diff --git a/src/base/kafka_handler.py b/src/base/kafka_handler.py index fdbeacc..59dc188 100644 --- a/src/base/kafka_handler.py +++ b/src/base/kafka_handler.py @@ -209,15 +209,15 @@ class KafkaConsumeHandler(KafkaHandler): Also uses the Write-Exactly-Once-Semantics which requires handling and committing transactions. """ - def __init__(self, topic: str) -> None: + def __init__(self, topics: str | list[str]) -> None: """ Args: - topic (str): Topic name to consume from + topics (str | list[str]): Topic name(s) to consume from Raises: KafkaException: During construction of Consumer or assignment of topic. """ - logger.debug(f"Initializing KafkaConsumeHandler ({topic=})...") + logger.debug(f"Initializing KafkaConsumeHandler ({topics=})...") super().__init__() conf = { @@ -229,18 +229,21 @@ def __init__(self, topic: str) -> None: } logger.debug(f"Set {conf=}.") + if isinstance(topics, str): + topics = [topics] + self.batch_schema = marshmallow_dataclass.class_schema(Batch)() try: logger.debug("Calling Consumer(conf)...") self.consumer = Consumer(conf) - logger.debug(f"Consumer set. Assigning topic {topic}...") - self.consumer.assign([TopicPartition(topic, 0)]) + logger.debug(f"Consumer set. Assigning topics {topics}...") + self.consumer.assign([TopicPartition(topic, 0) for topic in topics]) except KafkaException as e: logger.error(f"Consumer initialization failed: {e}") raise e - logger.debug(f"Initialized KafkaConsumeHandler ({topic=}).") + logger.debug(f"Initialized KafkaConsumeHandler ({topics=}).") def __del__(self) -> None: """ @@ -251,7 +254,7 @@ def __del__(self) -> None: self.consumer.close() logger.debug("KafkaConsumeHandler deleted.") - def consume(self) -> tuple[str | None, str | None]: + def consume(self) -> tuple[str | None, str | None, str | None]: """ Consumes available messages from the Broker(s) in the specified topic. Decodes the data and returns a tuple of key and data of the message. Blocks and waits if no data is available. @@ -288,13 +291,15 @@ def consume(self) -> tuple[str | None, str | None]: key = msg.key().decode("utf-8") if msg.key() else None value = msg.value().decode("utf-8") if msg.value() else None - logger.debug(f"Received message: {key=}, {value=}") + topic = msg.topic() if msg.topic() else None + + logger.debug(f"Received message: {key=}, {value=}, {topic=}") logger.debug("Committing transaction for message on Consumer...") self.consumer.commit(msg) logger.debug( - f"Transaction committed. Successfully consumed messages. Returning [{key=}, {value=}]..." + f"Transaction committed. Successfully consumed messages. Returning [{key=}, {value=}, {topic=}]..." ) - return key, value + return key, value, topic except KeyboardInterrupt: logger.info("Shutting down KafkaConsumeHandler...") raise KeyboardInterrupt @@ -316,7 +321,7 @@ def consume_and_return_json_data(self) -> tuple[None | str, dict]: KeyboardInterrupt: Execution interrupted by user """ try: - key, value = self.consume() + key, value, topic = self.consume() if not key and not value: logger.debug("No data returned.") @@ -356,7 +361,7 @@ def consume_and_return_object(self) -> tuple[None | str, Batch]: KeyboardInterrupt: Execution interrupted by user """ try: - key, value = self.consume() + key, value, topic = self.consume() if not key and not value: logger.debug("No data returned.") diff --git a/src/detector/detector.py b/src/detector/detector.py index 3b7de11..5b20175 100644 --- a/src/detector/detector.py +++ b/src/detector/detector.py @@ -53,7 +53,7 @@ def __init__(self) -> None: logger.debug(f"Initializing Detector...") logger.debug(f"Calling KafkaConsumeHandler(topic='Detector')...") - self.kafka_consume_handler = KafkaConsumeHandler(topic="Detector") + self.kafka_consume_handler = KafkaConsumeHandler(topics="Detector") self.model = self._get_model() diff --git a/src/inspector/inspector.py b/src/inspector/inspector.py index a989743..fa89baf 100644 --- a/src/inspector/inspector.py +++ b/src/inspector/inspector.py @@ -71,7 +71,7 @@ def __init__(self) -> None: logger.debug(f"Initializing Inspector...") logger.debug(f"Calling KafkaConsumeHandler(topic='Inspect')...") - self.kafka_consume_handler = KafkaConsumeHandler(topic="Inspect") + self.kafka_consume_handler = KafkaConsumeHandler(topics="Inspect") logger.debug(f"Calling KafkaProduceHandler(transactional_id='Inspect')...") self.kafka_produce_handler = KafkaProduceHandler(transactional_id="inspect") logger.debug(f"Initialized Inspector.") diff --git a/src/logserver/server.py b/src/logserver/server.py index 099d0ed..f3b9d76 100644 --- a/src/logserver/server.py +++ b/src/logserver/server.py @@ -1,12 +1,14 @@ import asyncio +import datetime import os import queue import sys +import uuid import aiofiles sys.path.append(os.getcwd()) -from src.monitoring.clickhouse_connector import ServerLogsConnector +from src.monitoring.clickhouse_kafka_sender import ClickHouseKafkaSender from src.base.kafka_handler import KafkaConsumeHandler from src.base.utils import setup_config from src.base import utils @@ -50,17 +52,14 @@ def __init__(self) -> None: self.socket = None self.number_of_connections = 0 self.data_queue = queue.Queue() - self.kafka_consume_handler = KafkaConsumeHandler(topic=LISTEN_ON_TOPIC) + self.kafka_consume_handler = KafkaConsumeHandler(topics=LISTEN_ON_TOPIC) - self.server_logs = ServerLogsConnector() - self.server_logs.prepare_table() + self.server_logs = ClickHouseKafkaSender(table_name="server_logs") async def store_message(self, message): self.data_queue.put(message) - self.server_logs.insert( - message_text=message, - ) + self.server_logs.insert([uuid.uuid4(), message, datetime.datetime.now()]) async def open(self) -> None: """ @@ -153,7 +152,7 @@ async def handle_kafka_inputs(self) -> None: loop = asyncio.get_running_loop() while True: - key, value = await loop.run_in_executor( + key, value, topic = await loop.run_in_executor( None, self.kafka_consume_handler.consume ) logger.info(f"Received message via Kafka:\n ⤷ {value}") diff --git a/src/monitoring/clickhouse_batch.py b/src/monitoring/clickhouse_batch.py index 99ab507..1b41381 100644 --- a/src/monitoring/clickhouse_batch.py +++ b/src/monitoring/clickhouse_batch.py @@ -21,8 +21,7 @@ class ClickHouseBatchSender: - def __init__(self, kafka_topic: str, table_name: str, column_names: list[str]): - self.kafka_topic = kafka_topic + def __init__(self, table_name: str, column_names: list[str]): self.table_name = table_name self.column_names = column_names diff --git a/src/monitoring/clickhouse_connector.py b/src/monitoring/clickhouse_connector.py index 26edea5..2065eba 100644 --- a/src/monitoring/clickhouse_connector.py +++ b/src/monitoring/clickhouse_connector.py @@ -2,12 +2,12 @@ import os import sys import uuid +from abc import abstractmethod import clickhouse_connect sys.path.append(os.getcwd()) from src.monitoring.clickhouse_batch import ClickHouseBatchSender -from src.base.kafka_handler import KafkaConsumeHandler from src.base.log_config import get_logger from src.base.utils import setup_config @@ -24,32 +24,12 @@ class ClickHouseConnector: def __init__(self, table_name: str, column_names: list[str]): self._table_name = table_name self._column_names = column_names - self._topic = f"clickhouse_{table_name}" - self._kafka_consumer = KafkaConsumeHandler(self._topic) self._batch_sender = ClickHouseBatchSender( - kafka_topic=self._topic, table_name=self._table_name, column_names=self._column_names, ) - def prepare_all_tables(self): - def _load_contents(file_name: str) -> str: - with open(file_name, "r") as file: - return file.read() - - for filename in os.listdir(CREATE_TABLES_DIRECTORY): - if filename.endswith(".sql"): - file_path = os.path.join(CREATE_TABLES_DIRECTORY, filename) - sql_content = _load_contents(file_path) - - with clickhouse_connect.get_client(host=CLICKHOUSE_HOSTNAME) as client: - try: - client.command(sql_content) - except Exception as e: - logger.critical("Error in CREATE TABLE statement") - raise e - def prepare_table(self): def _load_contents(file_name: str) -> str: with open(file_name, "r") as file: @@ -69,6 +49,10 @@ def _load_contents(file_name: str) -> str: def _add_to_batch(self, data): self._batch_sender.add(data) + @abstractmethod + def insert(self, *args, **kwargs): + pass + class ServerLogsConnector(ClickHouseConnector): def __init__(self): @@ -81,12 +65,17 @@ def __init__(self): super().__init__("server_logs", column_names) def insert( - self, message_text: str, timestamp_in: datetime.datetime | None = None + self, + message_text: str, + message_id: uuid.UUID = None, + timestamp_in: datetime.datetime | None = None, ) -> uuid.UUID: + if not message_id: + message_id = uuid.uuid4() + if not timestamp_in: timestamp_in = datetime.datetime.now() - message_id = uuid.uuid4() self._add_to_batch([message_id, timestamp_in, message_text]) return message_id diff --git a/src/monitoring/clickhouse_kafka_sender.py b/src/monitoring/clickhouse_kafka_sender.py new file mode 100644 index 0000000..8961697 --- /dev/null +++ b/src/monitoring/clickhouse_kafka_sender.py @@ -0,0 +1,18 @@ +import json +import os +import sys + +sys.path.append(os.getcwd()) +from src.base.kafka_handler import KafkaProduceHandler + + +class ClickHouseKafkaSender: + def __init__(self, table_name: str): + self.table_name = table_name + self.kafka_producer = KafkaProduceHandler(transactional_id="clickhouse") + + def insert(self, data: list): + self.kafka_producer.send( + topic=f"clickhouse_{self.table_name}", + data=json.dumps(data), + ) diff --git a/src/monitoring/monitoring_agent.py b/src/monitoring/monitoring_agent.py new file mode 100644 index 0000000..23b49e8 --- /dev/null +++ b/src/monitoring/monitoring_agent.py @@ -0,0 +1,74 @@ +import asyncio +import json +import os +import sys + +sys.path.append(os.getcwd()) +from src.monitoring.clickhouse_connector import * +from src.base.kafka_handler import KafkaConsumeHandler +from src.base.log_config import get_logger +from src.base.utils import setup_config + +logger = get_logger() + +CONFIG = setup_config() +CREATE_TABLES_DIRECTORY = "src/monitoring/create_tables" # TODO: Get from config + + +def prepare_all_tables(): + def _load_contents(file_name: str) -> str: + with open(file_name, "r") as file: + return file.read() + + for filename in os.listdir(CREATE_TABLES_DIRECTORY): + if filename.endswith(".sql"): + file_path = os.path.join(CREATE_TABLES_DIRECTORY, filename) + sql_content = _load_contents(file_path) + + with clickhouse_connect.get_client(host=CLICKHOUSE_HOSTNAME) as client: + try: + client.command(sql_content) + except Exception as e: + logger.critical("Error in CREATE TABLE statement") + raise e + + +class MonitoringAgent: + def __init__(self): + self.connectors = { + "server_logs": ServerLogsConnector(), + "server_logs_timestamps": ServerLogsTimestampsConnector(), + "failed_dns_loglines": FailedDNSLoglinesConnector(), + "logline_to_batches": LoglineToBatchesConnector(), + "dns_loglines": DNSLoglinesConnector(), + "logline_status": LoglineStatusConnector(), + "logline_timestamps": LoglineTimestampsConnector(), + "batch_status": BatchStatusConnector(), + "batch_timestamps": BatchTimestampsConnector(), + } + + self.topics = [f"clickhouse_{table_name}" for table_name in self.connectors] + self.kafka_consumer = KafkaConsumeHandler(self.topics) + + async def start(self): + prepare_all_tables() + loop = asyncio.get_running_loop() + + try: + while True: + key, value, topic = await loop.run_in_executor( + None, self.kafka_consumer.consume + ) + logger.info(f"Received message via Kafka:\n ⤷ {value}") + + data = json.loads(value) + task = self.connectors[value].insert(**data) + await task + except KeyboardInterrupt: + logger.info("Stop consuming...") + + +if __name__ == "__main__": + logger.info("Starting Monitoring Agent...") + clickhouse_consumer = MonitoringAgent() + asyncio.run(clickhouse_consumer.start()) diff --git a/src/prefilter/prefilter.py b/src/prefilter/prefilter.py index 3fa2424..6743d26 100644 --- a/src/prefilter/prefilter.py +++ b/src/prefilter/prefilter.py @@ -34,7 +34,7 @@ def __init__(self): logger.debug(f"Calling KafkaProduceHandler(transactional_id='prefilter')...") self.kafka_produce_handler = KafkaProduceHandler(transactional_id="prefilter") logger.debug(f"Calling KafkaConsumeHandler(topic='Prefilter')...") - self.kafka_consume_handler = KafkaConsumeHandler(topic="Prefilter") + self.kafka_consume_handler = KafkaConsumeHandler(topics="Prefilter") logger.debug("Initialized Prefilter.") def get_and_fill_data(self) -> None: diff --git a/tests/test_detector.py b/tests/test_detector.py index b3fb72b..8097658 100644 --- a/tests/test_detector.py +++ b/tests/test_detector.py @@ -141,7 +141,7 @@ def test_init(self, mock_kafka_consume_handler, mock_logger): self.assertEqual([], sut.messages) self.assertEqual(mock_kafka_consume_handler_instance, sut.kafka_consume_handler) - mock_kafka_consume_handler.assert_called_once_with(topic="Detector") + mock_kafka_consume_handler.assert_called_once_with(topics="Detector") class TestGetData(unittest.TestCase): diff --git a/tests/test_inspector.py b/tests/test_inspector.py index 37d49a7..106ca66 100644 --- a/tests/test_inspector.py +++ b/tests/test_inspector.py @@ -46,7 +46,7 @@ def test_init(self, mock_kafka_consume_handler, mock_produce_handler): self.assertEqual([], sut.messages) self.assertEqual(mock_kafka_consume_handler_instance, sut.kafka_consume_handler) - mock_kafka_consume_handler.assert_called_once_with(topic="Inspect") + mock_kafka_consume_handler.assert_called_once_with(topics="Inspect") class TestGetData(unittest.TestCase): diff --git a/tests/test_kafka_consume_handler.py b/tests/test_kafka_consume_handler.py index a94ddcc..d6751f6 100644 --- a/tests/test_kafka_consume_handler.py +++ b/tests/test_kafka_consume_handler.py @@ -40,7 +40,7 @@ def test_init(self, mock_consumer): "enable.partition.eof": True, } - sut = KafkaConsumeHandler(topic="test_topic") + sut = KafkaConsumeHandler(topics="test_topic") self.assertEqual(mock_consumer_instance, sut.consumer) @@ -81,7 +81,7 @@ def test_init_fail(self, mock_consumer, mock_logger): with patch.object(mock_consumer_instance, "assign", side_effect=KafkaException): with self.assertRaises(KafkaException): - sut = KafkaConsumeHandler(topic="test_topic") + sut = KafkaConsumeHandler(topics="test_topic") self.assertEqual(mock_consumer_instance, sut.consumer) @@ -114,7 +114,7 @@ def test_del_with_existing_consumer(self, mock_consumer): mock_consumer_instance = MagicMock() mock_consumer.return_value = mock_consumer_instance - sut = KafkaConsumeHandler(topic="test_topic") + sut = KafkaConsumeHandler(topics="test_topic") sut.consumer = mock_consumer_instance # Act @@ -147,7 +147,7 @@ def test_del_with_existing_consumer(self, mock_consumer): mock_consumer_instance = MagicMock() mock_consumer.return_value = mock_consumer_instance - sut = KafkaConsumeHandler(topic="test_topic") + sut = KafkaConsumeHandler(topics="test_topic") sut.consumer = None # Act @@ -181,7 +181,7 @@ def test_dict(self, mock_consumer): mock_consumer_instance = MagicMock() mock_consumer.return_value = mock_consumer_instance - sut = KafkaConsumeHandler(topic="test_topic") + sut = KafkaConsumeHandler(topics="test_topic") self.assertTrue(sut._is_dicts([{}, {}])) diff --git a/tests/test_prefilter.py b/tests/test_prefilter.py index 81484d5..b5bd29a 100644 --- a/tests/test_prefilter.py +++ b/tests/test_prefilter.py @@ -26,7 +26,7 @@ def test_valid_init( self.assertIsNotNone(sut.logline_handler) mock_produce_handler.assert_called_once_with(transactional_id="prefilter") - mock_consume_handler.assert_called_once_with(topic="Prefilter") + mock_consume_handler.assert_called_once_with(topics="Prefilter") mock_logline_handler.assert_called_once() diff --git a/tests/test_server.py b/tests/test_server.py index 4ff3098..570060f 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -20,7 +20,8 @@ class TestInit(unittest.TestCase): @patch("src.logserver.server.PORT_OUT", 8888) @patch("src.logserver.server.LISTEN_ON_TOPIC", "test_topic") @patch("src.logserver.server.KafkaConsumeHandler") - def test_valid_init_ipv4(self, mock_kafka_consume_handler): + @patch("src.logserver.server.ClickHouseKafkaSender") + def test_valid_init_ipv4(self, mock_kafka_sender, mock_kafka_consume_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -31,14 +32,15 @@ def test_valid_init_ipv4(self, mock_kafka_consume_handler): self.assertTrue(sut.data_queue.empty()) self.assertEqual(0, sut.number_of_connections) self.assertEqual(mock_kafka_consume_handler_instance, sut.kafka_consume_handler) - mock_kafka_consume_handler.assert_called_once_with(topic="test_topic") + mock_kafka_consume_handler.assert_called_once_with(topics="test_topic") @patch("src.logserver.server.HOSTNAME", "fe80::1") @patch("src.logserver.server.PORT_IN", 7777) @patch("src.logserver.server.PORT_OUT", 8888) @patch("src.logserver.server.LISTEN_ON_TOPIC", "test_topic") @patch("src.logserver.server.KafkaConsumeHandler") - def test_valid_init_ipv6(self, mock_kafka_consume_handler): + @patch("src.logserver.server.ClickHouseKafkaSender") + def test_valid_init_ipv6(self, mock_kafka_sender, mock_kafka_consume_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -49,7 +51,7 @@ def test_valid_init_ipv6(self, mock_kafka_consume_handler): self.assertTrue(sut.data_queue.empty()) self.assertEqual(0, sut.number_of_connections) self.assertEqual(mock_kafka_consume_handler_instance, sut.kafka_consume_handler) - mock_kafka_consume_handler.assert_called_once_with(topic="test_topic") + mock_kafka_consume_handler.assert_called_once_with(topics="test_topic") @patch("src.logserver.server.HOSTNAME", "256.256.256.256") @patch("src.logserver.server.PORT_IN", 7777) @@ -74,8 +76,14 @@ class TestOpen(unittest.IsolatedAsyncioTestCase): @patch("src.logserver.server.LogServer.handle_kafka_inputs") @patch("src.logserver.server.LogServer.async_follow") @patch("src.logserver.server.KafkaConsumeHandler") + @patch("src.logserver.server.ClickHouseKafkaSender") async def test_open( - self, mock_kafka_consume_handler, mock_follow, mock_handle_kafka, mock_logger + self, + mock_kafka_sender, + mock_kafka_consume_handler, + mock_follow, + mock_handle_kafka, + mock_logger, ): # Arrange sut = LogServer() @@ -114,7 +122,8 @@ async def test_open( @patch("src.logserver.server.HOSTNAME", "127.0.0.1") @patch("src.logserver.server.PORT_IN", 1234) @patch("src.logserver.server.PORT_OUT", 5678) - async def test_open_keyboard_interrupt(self, mock_logger): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_open_keyboard_interrupt(self, mock_kafka_sender, mock_logger): # Arrange sut = LogServer() @@ -140,7 +149,8 @@ async def test_open_keyboard_interrupt(self, mock_logger): class TestHandleConnection(unittest.IsolatedAsyncioTestCase): - async def test_handle_connection_sending(self): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_handle_connection_sending(self, mock_kafka_sender): server_instance = LogServer() server_instance.send_logline = AsyncMock() server_instance.get_next_logline = MagicMock(return_value="test logline") @@ -156,7 +166,8 @@ async def test_handle_connection_sending(self): writer.wait_closed.assert_awaited_once() self.assertEqual(0, server_instance.number_of_connections) - async def test_handle_connection_receiving(self): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_handle_connection_receiving(self, mock_kafka_sender): server_instance = LogServer() server_instance.receive_logline = AsyncMock() @@ -171,7 +182,8 @@ async def test_handle_connection_receiving(self): writer.wait_closed.assert_awaited_once() self.assertEqual(0, server_instance.number_of_connections) - async def test_handle_connection_rejected(self): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_handle_connection_rejected(self, mock_kafka_sender): server_instance = LogServer() server_instance.number_of_connections = 5 @@ -185,7 +197,10 @@ async def test_handle_connection_rejected(self): writer.wait_closed.assert_awaited_once() self.assertEqual(5, server_instance.number_of_connections) - async def test_handle_connection_increases_and_decreases_connections(self): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_handle_connection_increases_and_decreases_connections( + self, mock_kafka_sender + ): server_instance = LogServer() server_instance.send_logline = AsyncMock() server_instance.get_next_logline = MagicMock(return_value="test logline") @@ -199,7 +214,8 @@ async def test_handle_connection_increases_and_decreases_connections(self): self.assertEqual(3, server_instance.number_of_connections) - async def test_handle_connection_cancelled_error(self): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_handle_connection_cancelled_error(self, mock_kafka_sender): server_instance = LogServer() server_instance.send_logline = AsyncMock(side_effect=asyncio.CancelledError) server_instance.get_next_logline = MagicMock(return_value="test logline") @@ -217,7 +233,10 @@ async def test_handle_connection_cancelled_error(self): @patch("src.logserver.server.logger") @patch("src.logserver.server.MAX_NUMBER_OF_CONNECTIONS", 7) - async def test_handle_connection_rejects_additional_connections(self, mock_logger): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_handle_connection_rejects_additional_connections( + self, mock_kafka_sender, mock_logger + ): server_instance = LogServer() server_instance.number_of_connections = 7 @@ -233,7 +252,8 @@ async def test_handle_connection_rejects_additional_connections(self, mock_logge class TestHandleKafkaInputs(unittest.IsolatedAsyncioTestCase): - async def asyncSetUp(self): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def asyncSetUp(self, mock_kafka_sender): self.sut = LogServer() self.sut.kafka_consume_handler = AsyncMock() self.sut.data_queue = MagicMock() @@ -243,10 +263,14 @@ async def asyncSetUp(self): async def test_handle_kafka_inputs(self, mock_get_running_loop, mock_logger): mock_loop = AsyncMock() mock_get_running_loop.return_value = mock_loop - self.sut.kafka_consume_handler.consume.return_value = ("key1", "value1") + self.sut.kafka_consume_handler.consume.return_value = ( + "key1", + "value1", + "topic1", + ) mock_loop.run_in_executor.side_effect = [ - ("key1", "value1"), + ("key1", "value1", "topic1"), asyncio.CancelledError(), ] @@ -257,7 +281,8 @@ async def test_handle_kafka_inputs(self, mock_get_running_loop, mock_logger): class TestAsyncFollow(unittest.IsolatedAsyncioTestCase): - def setUp(self): + @patch("src.logserver.server.ClickHouseKafkaSender") + def setUp(self, mock_kafka_sender): self.sut = LogServer() self.sut.kafka_consume_handler = AsyncMock() self.sut.data_queue = MagicMock() @@ -294,7 +319,8 @@ async def test_async_follow(self, mock_logger): class TestHandleSendLogline(unittest.IsolatedAsyncioTestCase): - async def test_handle_send_logline(self): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_handle_send_logline(self, mock_kafka_sender): server_instance = LogServer() server_instance.handle_connection = AsyncMock() @@ -307,7 +333,8 @@ async def test_handle_send_logline(self): class TestHandleReceiveLogline(unittest.IsolatedAsyncioTestCase): - async def test_handle_receive_logline(self): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_handle_receive_logline(self, mock_kafka_sender): server_instance = LogServer() server_instance.handle_connection = AsyncMock() @@ -323,7 +350,8 @@ async def test_handle_receive_logline(self): class TestSendLogline(unittest.IsolatedAsyncioTestCase): @patch("src.logserver.server.logger") - async def test_send_logline_with_logline(self, mock_logger): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_send_logline_with_logline(self, mock_kafka_sender, mock_logger): server_instance = LogServer() writer = AsyncMock() logline = "Test logline" @@ -333,7 +361,8 @@ async def test_send_logline_with_logline(self, mock_logger): writer.write.assert_called_once_with(logline.encode("utf-8")) writer.drain.assert_called_once() - async def test_send_logline_no_logline(self): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_send_logline_no_logline(self, mock_kafka_sender): server_instance = LogServer() writer = AsyncMock() logline = "" @@ -346,7 +375,8 @@ async def test_send_logline_no_logline(self): class TestReceiveLogline(unittest.IsolatedAsyncioTestCase): @patch("src.logserver.server.logger") - async def test_receive_logline(self, mock_logger): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_receive_logline(self, mock_kafka_sender, mock_logger): reader = AsyncMock() data_queue = MagicMock() server_instance = LogServer() @@ -365,7 +395,8 @@ async def test_receive_logline(self, mock_logger): self.assertEqual(data_queue.put.call_count, 2) @patch("src.logserver.server.logger") - async def test_receive_without_separator(self, mock_logger): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_receive_without_separator(self, mock_kafka_sender, mock_logger): reader = AsyncMock() data_queue = MagicMock() server_instance = LogServer() @@ -379,7 +410,8 @@ async def test_receive_without_separator(self, mock_logger): asyncio.create_task(server_instance.receive_logline(reader)) @patch("src.logserver.server.logger") - async def test_receive_too_long(self, mock_logger): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_receive_too_long(self, mock_kafka_sender, mock_logger): reader = AsyncMock() data_queue = MagicMock() server_instance = LogServer() @@ -391,7 +423,8 @@ async def test_receive_too_long(self, mock_logger): asyncio.create_task(server_instance.receive_logline(reader)) @patch("src.logserver.server.logger") - async def test_receive_raise_other_exception(self, mock_logger): + @patch("src.logserver.server.ClickHouseKafkaSender") + async def test_receive_raise_other_exception(self, mock_kafka_sender, mock_logger): reader = AsyncMock() data_queue = MagicMock() server_instance = LogServer() @@ -405,7 +438,8 @@ async def test_receive_raise_other_exception(self, mock_logger): class TestGetNextLogline(unittest.TestCase): - def test_valid(self): + @patch("src.logserver.server.ClickHouseKafkaSender") + def test_valid(self, mock_kafka_sender): server_instance = LogServer() server_instance.data_queue.put("Element 1") server_instance.data_queue.put("Element 2") @@ -413,7 +447,8 @@ def test_valid(self): self.assertEqual("Element 1", server_instance.get_next_logline()) self.assertEqual("Element 2", server_instance.get_next_logline()) - def test_valid_from_empty_queue(self): + @patch("src.logserver.server.ClickHouseKafkaSender") + def test_valid_from_empty_queue(self, mock_kafka_sender): server_instance = LogServer() self.assertIsNone(server_instance.get_next_logline()) From 85161fd0a767e3f453ac1c639b691c77797093c9 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Tue, 12 Nov 2024 20:29:51 +0100 Subject: [PATCH 09/29] Simplify Kafka Handlers and Log Server --- src/base/kafka_handler.py | 287 ++++++------ src/detector/detector.py | 6 +- src/inspector/inspector.py | 12 +- src/logcollector/batch_handler.py | 8 +- src/logserver/server.py | 241 ++-------- src/monitoring/clickhouse_kafka_sender.py | 9 +- src/prefilter/prefilter.py | 10 +- tests/test_batch_handler.py | 28 +- tests/test_detector.py | 36 +- ...est_exactly_once_kafka_consume_handler.py} | 12 +- ...est_exactly_once_kafka_produce_handler.py} | 38 +- tests/test_inspector.py | 136 +++--- tests/test_prefilter.py | 52 +-- tests/test_server.py | 410 +++--------------- 14 files changed, 437 insertions(+), 848 deletions(-) rename tests/{test_kafka_consume_handler.py => test_exactly_once_kafka_consume_handler.py} (92%) rename tests/{test_kafka_produce_handler.py => test_exactly_once_kafka_produce_handler.py} (88%) diff --git a/src/base/kafka_handler.py b/src/base/kafka_handler.py index 59dc188..a9f291c 100644 --- a/src/base/kafka_handler.py +++ b/src/base/kafka_handler.py @@ -5,10 +5,10 @@ """ import ast -import json import os import sys import time +from abc import abstractmethod import marshmallow_dataclass from confluent_kafka import ( @@ -50,71 +50,80 @@ class KafkaMessageFetchException(Exception): class KafkaHandler: """ - Wraps and adds up on the Kafka functionality. :class:`KafkaHandler` serves as base class for further implementation - in its inheriting classes. Base class only specifies the initialization. + Base class for all Kafka wrappers. Only specifies the initialization. """ def __init__(self) -> None: """ - Initializes the brokers used in further tasks. + Initializes the broker configuration. """ - logger.debug(f"Initializing KafkaHandler...") self.consumer = None - self.brokers = ",".join( [f"{broker['hostname']}:{broker['port']}" for broker in KAFKA_BROKERS] ) - logger.debug(f"Retrieved {self.brokers=}.") - logger.debug(f"Initialized KafkaHandler.") class KafkaProduceHandler(KafkaHandler): """ - Wraps the Kafka Producer functionality of producing data into the Broker(s) using a topic and a - ``transactional_id`` specified in initialization. Also uses the Write-Exactly-Once-Semantics which requires - handling and committing transactions. The topic and data are specified in the method call of :meth:`send()`. + Base class for Kafka Producer wrappers. """ def __init__(self, transactional_id: str): - """ - Args: - transactional_id (str): ID of the transaction - - Raises: - KafkaException: During initialization of Producer or its transactions - """ - logger.debug(f"Initializing KafkaProduceHandler ({transactional_id=})...") super().__init__() - self.batch_schema = marshmallow_dataclass.class_schema(Batch)() - conf = { "bootstrap.servers": self.brokers, "transactional.id": transactional_id, } - logger.debug(f"Set {conf=}.") - try: - logger.debug("Calling Producer(conf)...") - self.producer = Producer(conf) - logger.debug("Producer set. Initializing transactions...") - self.producer.init_transactions() - logger.debug("Transactions initialized.") - except KafkaException as e: - logger.error(f"Producer initialization failed: {e}") - raise + self.producer = Producer(conf) - logger.debug(f"Initialized KafkaProduceHandler ({transactional_id=}).") + @abstractmethod + def produce(self, *args, **kwargs): + """ + Encodes the given data for transport and sends it on the specified topic. + """ + pass def __del__(self) -> None: + self.producer.flush() + + +class SimpleKafkaProduceHandler(KafkaProduceHandler): + """ + Simple wrapper for the Kafka Producer without Write-Exactly-Once semantics. + """ + + def produce(self, topic: str, data: str, key: None | str = None) -> None: """ - Flushes the producer to securely delete the instance. + Encodes the given data for transport and sends it on the specified topic. + + Args: + topic (str): Topic to send the data with + data (str): Data to be sent + key (str): Key to send the data with """ - logger.debug("Closing KafkaProduceHandler...") - self.producer.flush() - logger.debug("Closed KafkaProduceHandler.") + if not data: + return + + self.producer.produce( + topic=topic, + key=key, + value=data.encode("utf-8"), + callback=kafka_delivery_report, + ) - def send(self, topic: str, data: str, key: None | str = None) -> None: + +class ExactlyOnceKafkaProduceHandler(KafkaProduceHandler): + """ + Wrapper for the Kafka Producer with Write-Exactly-Once semantics. + """ + + def __init__(self, transactional_id: str): + super().__init__(transactional_id) + self.producer.init_transactions() + + def produce(self, topic: str, data: str, key: None | str = None) -> None: """ Encodes the given data for transport and sends it with the specified topic. @@ -127,39 +136,24 @@ def send(self, topic: str, data: str, key: None | str = None) -> None: Exception: During :meth:`commit_transaction_with_retry()` or Producer's ``produce()``. Aborts transaction then. """ - logger.debug(f"Starting to send data to Producer...") - logger.debug(f"({topic=}, {data=})") if not data: - logger.debug("No data. Nothing to send. Returning...") return - logger.debug("Beginning transaction...") self.producer.begin_transaction() - logger.debug("Successfully began transaction.") try: - logger.debug(f"Calling Producer for producing {topic=}, key=None...") self.producer.produce( topic=topic, key=key, value=data.encode("utf-8"), callback=kafka_delivery_report, ) - logger.debug( - "Producer.produce() successfully called. Committing transaction..." - ) + self.commit_transaction_with_retry() - logger.debug(f"Transaction committed.") - logger.debug(f"({data=})") except Exception as e: - logger.error(f"Transaction failed: {e}") - logger.error(f"({data=})") self.producer.abort_transaction() logger.error("Transaction aborted.") raise - logger.debug("Data sent to Producer.") - logger.debug(f"({data=})") - def commit_transaction_with_retry( self, max_retries: int = 3, retry_interval_ms: int = 1000 ) -> None: @@ -171,9 +165,6 @@ def commit_transaction_with_retry( max_retries (int): Maximum number of retries retry_interval_ms (int): Interval between retries in ms """ - logger.debug( - f"Committing transaction with up to {max_retries} retries ({retry_interval_ms=})..." - ) committed = False retry_count = 0 @@ -195,29 +186,15 @@ def commit_transaction_with_retry( raise e if not committed: - logger.error("Transaction could not be committed.") raise RuntimeError("Failed to commit transaction after retries.") - logger.debug( - f"Successfully committed transaction after {retry_count} retry/retries." - ) - class KafkaConsumeHandler(KafkaHandler): """ - Wraps and adds up on the Kafka Consumer functionality of consuming data from the Broker(s) in a specified topic. - Also uses the Write-Exactly-Once-Semantics which requires handling and committing transactions. + Base class for Kafka Consumer wrappers. """ def __init__(self, topics: str | list[str]) -> None: - """ - Args: - topics (str | list[str]): Topic name(s) to consume from - - Raises: - KafkaException: During construction of Consumer or assignment of topic. - """ - logger.debug(f"Initializing KafkaConsumeHandler ({topics=})...") super().__init__() conf = { @@ -227,57 +204,82 @@ def __init__(self, topics: str | list[str]) -> None: "auto.offset.reset": "earliest", "enable.partition.eof": True, } - logger.debug(f"Set {conf=}.") if isinstance(topics, str): topics = [topics] - self.batch_schema = marshmallow_dataclass.class_schema(Batch)() + self.consumer = Consumer(conf) + self.consumer.assign([TopicPartition(topic, 0) for topic in topics]) + + @abstractmethod + def consume(self, *args, **kwargs): + """ + Consumes available messages on the specified topic and decodes it. + """ + pass + + def consume_as_json(self) -> tuple[None | str, dict]: + """ + Consumes available messages on the specified topic. Decodes the data and returns the contents in JSON format. + Blocks and waits if no data is available. + + Returns: + Consumed data in JSON format + Raises: + ValueError: Invalid data format + KafkaMessageFetchException: Error during message fetching/consuming + KeyboardInterrupt: Execution interrupted by user + """ try: - logger.debug("Calling Consumer(conf)...") - self.consumer = Consumer(conf) - logger.debug(f"Consumer set. Assigning topics {topics}...") - self.consumer.assign([TopicPartition(topic, 0) for topic in topics]) - except KafkaException as e: - logger.error(f"Consumer initialization failed: {e}") - raise e + key, value, topic = self.consume() + + if not key and not value: + return None, {} + except KafkaMessageFetchException: + raise + except KeyboardInterrupt: + raise + + eval_data = ast.literal_eval(value) - logger.debug(f"Initialized KafkaConsumeHandler ({topics=}).") + if isinstance(eval_data, dict): + return key, eval_data + else: + raise ValueError("Unknown data format") def __del__(self) -> None: - """ - Deletes the instance. Closes the running Kafka Consumer if it exists. - """ - logger.debug("Deleting KafkaConsumeHandler...") if self.consumer: self.consumer.close() - logger.debug("KafkaConsumeHandler deleted.") + + +class SimpleKafkaConsumeHandler(KafkaConsumeHandler): + """ + Simple wrapper for the Kafka Consumer without Write-Exactly-Once semantics. + """ def consume(self) -> tuple[str | None, str | None, str | None]: """ - Consumes available messages from the Broker(s) in the specified topic. Decodes the data and returns a tuple - of key and data of the message. Blocks and waits if no data is available. + Consumes available messages on the specified topic. Decodes the data and returns a tuple + of key, data and topic of the message. Blocks and waits if no data is available. Returns: - Either ``[None,None]`` if empty data was retrieved from the Broker(s) or ``[key,value]`` as tuple + Either ``[None,None,None]`` if empty data was retrieved or ``[key,value,topic]`` as tuple of strings of the consumed data. Raises: KeyboardInterrupt: Execution interrupted by user Exception: Error during consuming """ - logger.debug("Starting to consume messages...") - empty_data_retrieved = False + try: while True: - logger.debug("Polling available messages...") msg = self.consumer.poll(timeout=1.0) if msg is None: if not empty_data_retrieved: - logger.info("Waiting for messages to be produced...") + logger.info("Waiting for messages...") empty_data_retrieved = True continue @@ -289,71 +291,86 @@ def consume(self) -> tuple[str | None, str | None, str | None]: logger.error(f"Consumer error: {msg.error()}") raise + # unpack message key = msg.key().decode("utf-8") if msg.key() else None value = msg.value().decode("utf-8") if msg.value() else None topic = msg.topic() if msg.topic() else None - logger.debug(f"Received message: {key=}, {value=}, {topic=}") - logger.debug("Committing transaction for message on Consumer...") - self.consumer.commit(msg) - logger.debug( - f"Transaction committed. Successfully consumed messages. Returning [{key=}, {value=}, {topic=}]..." - ) return key, value, topic except KeyboardInterrupt: - logger.info("Shutting down KafkaConsumeHandler...") + logger.info("Stopping KafkaConsumeHandler...") raise KeyboardInterrupt except Exception as e: - logger.error(f"Error in KafkaConsumeHandler: {e}") raise - def consume_and_return_json_data(self) -> tuple[None | str, dict]: + +class ExactlyOnceKafkaConsumeHandler(KafkaConsumeHandler): + """ + Wrapper for the Kafka Consumer with Write-Exactly-Once semantics. + """ + + def __init__(self, topics: str | list[str]) -> None: + self.batch_schema = marshmallow_dataclass.class_schema(Batch)() + super().__init__(topics) + + def consume(self) -> tuple[str | None, str | None, str | None]: """ - Calls the :meth:`consume()` method and waits for it to return data. Loads the data and converts it to a JSON - object. Returns the JSON data. + Consumes available messages on the specified topic. Decodes the data and returns a tuple + of key, data and topic of the message. Blocks and waits if no data is available. Returns: - Consumed data in JSON format + Either ``[None,None,None]`` if empty data was retrieved or ``[key,value,topic]`` as tuple + of strings of the consumed data. Raises: - ValueError: Invalid data format - KafkaMessageFetchException: Error during message fetching/consuming KeyboardInterrupt: Execution interrupted by user + Exception: Error during consuming """ + empty_data_retrieved = False + try: - key, value, topic = self.consume() + while True: + msg = self.consumer.poll(timeout=1.0) - if not key and not value: - logger.debug("No data returned.") - return None, {} - except KafkaMessageFetchException as e: - logger.debug(e) - raise - except KeyboardInterrupt: - raise + if msg is None: + if not empty_data_retrieved: + logger.info("Waiting for messages...") - logger.debug("Loading JSON values from received data...") - json_from_message = json.loads(value) - logger.debug(f"{json_from_message=}") - eval_data = ast.literal_eval(value) + empty_data_retrieved = True + continue - if isinstance(eval_data, dict): - logger.debug("Loaded available data. Returning it...") - return key, eval_data - else: - logger.error("Unknown data format.") - raise ValueError + if msg.error(): + if msg.error().code() == KafkaError._PARTITION_EOF: + continue + else: + logger.error(f"Consumer error: {msg.error()}") + raise + + # unpack message + key = msg.key().decode("utf-8") if msg.key() else None + value = msg.value().decode("utf-8") if msg.value() else None + topic = msg.topic() if msg.topic() else None + + self.consumer.commit(msg) - def _is_dicts(self, obj): + return key, value, topic + except KeyboardInterrupt: + logger.info("Shutting down KafkaConsumeHandler...") + raise KeyboardInterrupt + except Exception as e: + raise + + @staticmethod + def _is_dicts(obj): return isinstance(obj, list) and all(isinstance(item, dict) for item in obj) - def consume_and_return_object(self) -> tuple[None | str, Batch]: + def consume_as_object(self) -> tuple[None | str, Batch]: """ - Calls the :meth:`consume()` method and waits for it to return data. Loads the data and converts it to a Batch + Consumes available messages on the specified topic. Decodes the data and converts it to a Batch object. Returns the Batch object. Returns: - Consumed data in Batch object + Consumed data as Batch object Raises: ValueError: Invalid data format @@ -364,7 +381,6 @@ def consume_and_return_object(self) -> tuple[None | str, Batch]: key, value, topic = self.consume() if not key and not value: - logger.debug("No data returned.") return None, {} except KafkaMessageFetchException as e: logger.warning(e) @@ -372,12 +388,7 @@ def consume_and_return_object(self) -> tuple[None | str, Batch]: except KeyboardInterrupt: raise - logger.debug("Loading JSON values from received data...") - json_from_message = json.loads(value) - logger.debug(f"{json_from_message=}") - eval_data: dict = ast.literal_eval(value) - logger.debug("Check if data is a list of dicts") if self._is_dicts(eval_data.get("data")): eval_data["data"] = eval_data.get("data") @@ -389,8 +400,6 @@ def consume_and_return_object(self) -> tuple[None | str, Batch]: eval_data: Batch = self.batch_schema.load(eval_data) if isinstance(eval_data, Batch): - logger.debug("Loaded available data. Returning it...") return key, eval_data else: - logger.error("Unknown data format.") - raise ValueError + raise ValueError("Unknown data format.") diff --git a/src/detector/detector.py b/src/detector/detector.py index 5b20175..1334448 100644 --- a/src/detector/detector.py +++ b/src/detector/detector.py @@ -13,7 +13,7 @@ sys.path.append(os.getcwd()) from src.base.utils import setup_config from src.base.kafka_handler import ( - KafkaConsumeHandler, + ExactlyOnceKafkaConsumeHandler, KafkaMessageFetchException, ) from src.base.log_config import get_logger @@ -53,7 +53,7 @@ def __init__(self) -> None: logger.debug(f"Initializing Detector...") logger.debug(f"Calling KafkaConsumeHandler(topic='Detector')...") - self.kafka_consume_handler = KafkaConsumeHandler(topics="Detector") + self.kafka_consume_handler = ExactlyOnceKafkaConsumeHandler(topics="Detector") self.model = self._get_model() @@ -70,7 +70,7 @@ def get_and_fill_data(self) -> None: logger.debug( "Detector is not busy: Calling KafkaConsumeHandler to consume new JSON messages..." ) - key, data = self.kafka_consume_handler.consume_and_return_object() + key, data = self.kafka_consume_handler.consume_as_object() if data: self.begin_timestamp = data.begin_timestamp diff --git a/src/inspector/inspector.py b/src/inspector/inspector.py index fa89baf..338caf4 100644 --- a/src/inspector/inspector.py +++ b/src/inspector/inspector.py @@ -11,9 +11,9 @@ sys.path.append(os.getcwd()) from src.base.utils import setup_config from src.base.kafka_handler import ( - KafkaConsumeHandler, + ExactlyOnceKafkaConsumeHandler, + ExactlyOnceKafkaProduceHandler, KafkaMessageFetchException, - KafkaProduceHandler, ) from src.base.log_config import get_logger @@ -71,9 +71,11 @@ def __init__(self) -> None: logger.debug(f"Initializing Inspector...") logger.debug(f"Calling KafkaConsumeHandler(topic='Inspect')...") - self.kafka_consume_handler = KafkaConsumeHandler(topics="Inspect") + self.kafka_consume_handler = ExactlyOnceKafkaConsumeHandler(topics="Inspect") logger.debug(f"Calling KafkaProduceHandler(transactional_id='Inspect')...") - self.kafka_produce_handler = KafkaProduceHandler(transactional_id="inspect") + self.kafka_produce_handler = ExactlyOnceKafkaProduceHandler( + transactional_id="inspect" + ) logger.debug(f"Initialized Inspector.") def get_and_fill_data(self) -> None: @@ -89,7 +91,7 @@ def get_and_fill_data(self) -> None: logger.debug( "Inspector is not busy: Calling KafkaConsumeHandler to consume new JSON messages..." ) - key, data = self.kafka_consume_handler.consume_and_return_object() + key, data = self.kafka_consume_handler.consume_as_object() if data: self.begin_timestamp = data.begin_timestamp diff --git a/src/logcollector/batch_handler.py b/src/logcollector/batch_handler.py index 18f6ba0..f45c2f5 100644 --- a/src/logcollector/batch_handler.py +++ b/src/logcollector/batch_handler.py @@ -4,7 +4,7 @@ from datetime import datetime from threading import Timer -from src.base.kafka_handler import KafkaProduceHandler +from src.base.kafka_handler import ExactlyOnceKafkaProduceHandler from src.base.utils import setup_config sys.path.append(os.getcwd()) @@ -265,7 +265,9 @@ def __init__(self): self.timer = None logger.debug(f"Calling KafkaProduceHandler(transactional_id='collector')...") - self.kafka_produce_handler = KafkaProduceHandler(transactional_id="collector") + self.kafka_produce_handler = ExactlyOnceKafkaProduceHandler( + transactional_id="collector" + ) logger.debug(f"Initialized KafkaBatchSender.") def __del__(self): @@ -360,7 +362,7 @@ def _send_batch_for_key(self, key: str) -> None: def _send_data_packet(self, key: str, data: dict) -> None: logger.debug("Sending data to KafkaProduceHandler...") logger.debug(f"{data=}") - self.kafka_produce_handler.send( + self.kafka_produce_handler.produce( topic=self.topic, data=json.dumps(data), key=key, diff --git a/src/logserver/server.py b/src/logserver/server.py index f3b9d76..9a37db6 100644 --- a/src/logserver/server.py +++ b/src/logserver/server.py @@ -1,15 +1,12 @@ import asyncio -import datetime import os -import queue import sys -import uuid +from asyncio import Lock import aiofiles sys.path.append(os.getcwd()) -from src.monitoring.clickhouse_kafka_sender import ClickHouseKafkaSender -from src.base.kafka_handler import KafkaConsumeHandler +from src.base.kafka_handler import SimpleKafkaConsumeHandler, SimpleKafkaProduceHandler from src.base.utils import setup_config from src.base import utils from src.base.log_config import get_logger @@ -18,136 +15,60 @@ CONFIG = setup_config() HOSTNAME = CONFIG["environment"]["logserver"]["hostname"] -PORT_IN = CONFIG["environment"]["logserver"]["port_in"] -PORT_OUT = CONFIG["environment"]["logserver"]["port_out"] -MAX_NUMBER_OF_CONNECTIONS = CONFIG["pipeline"]["log_storage"]["logserver"][ - "max_number_of_connections" -] LISTEN_ON_TOPIC = CONFIG["pipeline"]["log_storage"]["logserver"]["input_kafka_topic"] +SEND_TO_TOPIC = "logserver_to_collector" # TODO: Change READ_FROM_FILE = CONFIG["pipeline"]["log_storage"]["logserver"]["input_file"] class LogServer: """ - Server for receiving, storing and sending single log lines. Opens a port for receiving messages, listens for - messages via Kafka and reads newly added lines from an input file. To retrieve a message from the server, - other modules can connect to its outgoing/sending port. The server will then send its oldest message as a response. + Receives and sends single log lines. Listens for messages via Kafka and reads newly added lines from an input + file. """ def __init__(self) -> None: - logger.debug("Initializing LogServer...") self.host = None - self.port_out = None - self.port_in = None - - logger.debug("Validating host name...") self.host = utils.validate_host(HOSTNAME) - logger.debug("Host name is valid. Validating ingoing port...") - self.port_in = utils.validate_port(PORT_IN) - logger.debug("Ingoing port is valid. Validating outgoing port...") - self.port_out = utils.validate_port(PORT_OUT) - logger.debug("Outgoing port is valid.") - logger.debug("Initialized LogServer.") - - self.socket = None - self.number_of_connections = 0 - self.data_queue = queue.Queue() - self.kafka_consume_handler = KafkaConsumeHandler(topics=LISTEN_ON_TOPIC) - - self.server_logs = ClickHouseKafkaSender(table_name="server_logs") - async def store_message(self, message): - self.data_queue.put(message) + self.lock = Lock() - self.server_logs.insert([uuid.uuid4(), message, datetime.datetime.now()]) + self.kafka_consume_handler = SimpleKafkaConsumeHandler(topics=LISTEN_ON_TOPIC) + self.kafka_produce_handler = SimpleKafkaProduceHandler( + transactional_id="TODO: Change" + ) - async def open(self) -> None: + async def start(self) -> None: """ - Opens both ports for sending and receiving and starts reading from the input file as well as listening for - messages via Kafka. Can be stopped via a ``KeyboardInterrupt``. + Starts fetching messages from Kafka and from the input file. """ - logger.debug("Opening LogServer sockets...") - logger.debug(f"Creating the sending socket on port {self.port_out}...") - send_server = await asyncio.start_server( - self.handle_send_logline, str(self.host), self.port_out - ) - logger.debug(f"Creating the receiving socket on port {self.port_in}...") - receive_server = await asyncio.start_server( - self.handle_receive_logline, str(self.host), self.port_in - ) logger.info( - "LogServer is running:\n" - f" ⤷ receiving on {self.host}:{self.port_in} and Kafka topic '{LISTEN_ON_TOPIC}'\n" - f" ⤷ sending on {self.host}:{self.port_out}" + "LogServer started:\n" + f" ⤷ receiving on Kafka topic '{LISTEN_ON_TOPIC}'\n" + f" ⤷ receiving from input file '{READ_FROM_FILE}'\n" + f" ⤷ sending on Kafka topic 'TODO'" ) try: await asyncio.gather( - send_server.serve_forever(), - receive_server.serve_forever(), - self.handle_kafka_inputs(), - self.async_follow(), + self.fetch_from_kafka(), + self.fetch_from_file(), ) except KeyboardInterrupt: - logger.debug("Stop serving...") - finally: - send_server.close() - receive_server.close() - await asyncio.gather( - send_server.wait_closed(), receive_server.wait_closed() - ) - logger.debug("Both sockets closed.") + logger.info("LogServer stopped.") - async def handle_connection(self, reader, writer, sending: bool) -> None: + async def send(self, message: str) -> None: """ - Handles new incoming connection attempts. If the maximum number of possible connections is not yet reached, the - connection is approved and the log line is sent or received, depending on the calling method. If the number is - reached, a warning message will be printed and no connection gets established. + Sends a received message using Kafka. Args: - reader: Responsible for reading incoming data - writer: Responsible for writing outgoing data - sending (bool): Sending if True, receiving otherwise + message (str): Message to be sent """ - logger.debug(f"Handling connection with {sending=}...") - if self.number_of_connections < MAX_NUMBER_OF_CONNECTIONS: - logger.debug( - f"Adding connection to {self.number_of_connections}/{MAX_NUMBER_OF_CONNECTIONS}) open " - f"connections..." - ) - self.number_of_connections += 1 - client_address = writer.get_extra_info("peername") - logger.debug(f"Connection from {client_address} accepted") - - try: - if sending: - logger.debug( - "Sending active: Calling send_logline for next available logline..." - ) - await self.send_logline(writer, self.get_next_logline()) - else: - logger.debug("Receiving: Calling receive_logline...") - await self.receive_logline(reader) - except asyncio.CancelledError: - logger.debug("Handling cancelled.") - pass - finally: - writer.close() - await writer.wait_closed() - self.number_of_connections -= 1 - logger.debug(f"Connection to {client_address} closed.") - else: - client_address = writer.get_extra_info("peername") - logger.warning( - f"Client connection to {client_address} denied. Max number of connections reached!" - ) - writer.close() - await writer.wait_closed() + async with self.lock: + self.kafka_produce_handler.produce(topic=SEND_TO_TOPIC, data=message) - async def handle_kafka_inputs(self) -> None: + async def fetch_from_kafka(self) -> None: """ - Starts a loop to continuously listen on the configured Kafka topic. If a message is consumed, it is added - to the data queue. + Starts a loop to continuously listen on the configured Kafka topic. If a message is consumed, it is sent. """ loop = asyncio.get_running_loop() @@ -155,128 +76,44 @@ async def handle_kafka_inputs(self) -> None: key, value, topic = await loop.run_in_executor( None, self.kafka_consume_handler.consume ) - logger.info(f"Received message via Kafka:\n ⤷ {value}") - task = self.store_message(value) - await task - async def async_follow(self, file: str = READ_FROM_FILE) -> None: + logger.debug(f"From Kafka: '{value}'") + await self.send(value) + + async def fetch_from_file(self, file: str = READ_FROM_FILE) -> None: """ Continuously checks for new lines at the end of the input file. If one or multiple new lines are found, any - empty lines are removed and the remaining lines added to the data queue. + empty lines are removed and the remaining lines are sent individually. Args: - file (str): File to be read as string + file (str): Filename of the file to be read """ async with aiofiles.open(file, mode="r") as file: - # jump to end of file - await file.seek(0, 2) + await file.seek(0, 2) # jump to end of file while True: lines = await file.readlines() + if not lines: await asyncio.sleep(0.1) continue for line in lines: - # remove empty lines - cleaned_line = line.strip() + cleaned_line = line.strip() # remove empty lines + if not cleaned_line: continue - logger.info(f"Extracted message from file:\n ⤷ {cleaned_line}") - task = self.store_message(cleaned_line) - await task - - async def handle_send_logline(self, reader, writer) -> None: - """ - Handles the sending of a logline by calling :meth:`handle_connection` with ``sending=True``. - - Args: - reader: Responsible for reading incoming data - writer: Responsible for writing outgoing data - """ - logger.debug("Calling handle_connection with sending=True...") - await self.handle_connection(reader, writer, True) - - async def handle_receive_logline(self, reader, writer) -> None: - """ - Handles the receiving of a logline by calling :meth:`handle_connection` with ``sending=False``. - - Args: - reader: Responsible for reading incoming data - writer: Responsible for writing outgoing data - """ - logger.debug("Calling handle_connection with sending=False...") - await self.handle_connection(reader, writer, False) - - @staticmethod - async def send_logline(writer, logline) -> None: - """ - Sends the given log line encoded as UTF-8 to the connected component. - - Args: - writer: Responsible for writing outgoing data - logline: Logline to be sent - """ - if logline: - logger.debug(f"Sending {logline=}...") - writer.write(logline.encode("utf-8")) - await writer.drain() - logger.info(f"Sent message:\n ⤷ {logline}") - return - - logger.debug("No logline available") - - async def receive_logline(self, reader) -> None: - """ - Receives one or multiple log lines encoded as UTF-8 separated by and ending with separator '\n' from the - connected component and adds it or them to the data queue. Message must end with separator symbol. - - Args: - reader: Responsible for reading incoming data - """ - while True: - try: - data = await reader.readuntil(separator=b"\n") - if not data: - break - received_message = data.decode().strip() - logger.info(f"Received message:\n ⤷ {received_message}") - task = self.store_message(received_message) - await task - except asyncio.exceptions.IncompleteReadError as e: - logger.warning(f"Ignoring message: No separator symbol found: {e}") - break - except asyncio.LimitOverrunError: - logger.error(f"Message size exceeded, separator symbol not found") - break - except Exception as e: - logger.error(f"Unexpected error: {e}") - raise - - def get_next_logline(self) -> str | None: - """ - Returns and removes the oldest log line in the data queue. - - Returns: - Oldest log line in the data queue. - """ - logger.debug("Getting next available logline...") - if not self.data_queue.empty(): - logger.debug("Returning logline...") - return self.data_queue.get() - return None + logger.debug(f"From file: '{cleaned_line}'") + await self.send(cleaned_line) def main() -> None: """ Creates the :class:`LogServer` instance and starts it. """ - logger.info("Starting LogServer...") server_instance = LogServer() - logger.debug("LogServer started. Opening sockets...") - - asyncio.run(server_instance.open()) + asyncio.run(server_instance.start()) if __name__ == "__main__": # pragma: no cover diff --git a/src/monitoring/clickhouse_kafka_sender.py b/src/monitoring/clickhouse_kafka_sender.py index 8961697..9991362 100644 --- a/src/monitoring/clickhouse_kafka_sender.py +++ b/src/monitoring/clickhouse_kafka_sender.py @@ -3,16 +3,19 @@ import sys sys.path.append(os.getcwd()) -from src.base.kafka_handler import KafkaProduceHandler +from src.base.kafka_handler import SimpleKafkaProduceHandler +from src.base.log_config import get_logger + +logger = get_logger() class ClickHouseKafkaSender: def __init__(self, table_name: str): self.table_name = table_name - self.kafka_producer = KafkaProduceHandler(transactional_id="clickhouse") + self.kafka_producer = SimpleKafkaProduceHandler(transactional_id="clickhouse") def insert(self, data: list): - self.kafka_producer.send( + self.kafka_producer.produce( topic=f"clickhouse_{self.table_name}", data=json.dumps(data), ) diff --git a/src/prefilter/prefilter.py b/src/prefilter/prefilter.py index 6743d26..e96667e 100644 --- a/src/prefilter/prefilter.py +++ b/src/prefilter/prefilter.py @@ -6,9 +6,9 @@ sys.path.append(os.getcwd()) from src.base.logline_handler import LoglineHandler from src.base.kafka_handler import ( - KafkaConsumeHandler, + ExactlyOnceKafkaConsumeHandler, + ExactlyOnceKafkaProduceHandler, KafkaMessageFetchException, - KafkaProduceHandler, ) from src.base.log_config import get_logger @@ -32,9 +32,11 @@ def __init__(self): logger.debug(f"Calling LoglineHandler()...") self.logline_handler = LoglineHandler() logger.debug(f"Calling KafkaProduceHandler(transactional_id='prefilter')...") - self.kafka_produce_handler = KafkaProduceHandler(transactional_id="prefilter") + self.kafka_produce_handler = ExactlyOnceKafkaProduceHandler( + transactional_id="prefilter" + ) logger.debug(f"Calling KafkaConsumeHandler(topic='Prefilter')...") - self.kafka_consume_handler = KafkaConsumeHandler(topics="Prefilter") + self.kafka_consume_handler = ExactlyOnceKafkaConsumeHandler(topics="Prefilter") logger.debug("Initialized Prefilter.") def get_and_fill_data(self) -> None: diff --git a/tests/test_batch_handler.py b/tests/test_batch_handler.py index a830e44..a4ea8b3 100644 --- a/tests/test_batch_handler.py +++ b/tests/test_batch_handler.py @@ -6,7 +6,7 @@ class TestInit(unittest.TestCase): @patch("src.logcollector.batch_handler.BufferedBatch") - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") def test_init_with_buffer(self, mock_kafka_produce_handler, mock_buffered_batch): # Arrange mock_handler_instance = MagicMock() @@ -35,7 +35,7 @@ class TestDel(unittest.TestCase): class TestAddMessage(unittest.TestCase): @patch("src.logcollector.batch_handler.logger") @patch("src.logcollector.batch_handler.BATCH_SIZE", 1000) - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") @patch("src.logcollector.batch_handler.BufferedBatchSender._reset_timer") @patch("src.logcollector.batch_handler.BufferedBatch.get_number_of_messages") @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") @@ -68,7 +68,7 @@ def test_add_message_normal( @patch("src.logcollector.batch_handler.logger") @patch("src.logcollector.batch_handler.BATCH_SIZE", 100) - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") def test_add_message_full_messages( self, mock_send_batch, mock_produce_handler, mock_logger @@ -93,7 +93,7 @@ def test_add_message_full_messages( @patch("src.logcollector.batch_handler.logger") @patch("src.logcollector.batch_handler.BATCH_SIZE", 100) - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") def test_add_message_full_messages_with_different_keys( self, mock_send_batch, mock_produce_handler, mock_logger @@ -123,7 +123,7 @@ def test_add_message_full_messages_with_different_keys( @patch("src.logcollector.batch_handler.logger") @patch("src.logcollector.batch_handler.BATCH_SIZE", 100) - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") @patch("src.logcollector.batch_handler.BufferedBatchSender._reset_timer") def test_add_message_no_timer( self, mock_reset_timer, mock_produce_handler, mock_logger @@ -144,7 +144,7 @@ def test_add_message_no_timer( class TestSendAllBatches(unittest.TestCase): @patch("src.logcollector.batch_handler.logger") - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") @patch("src.logcollector.batch_handler.BufferedBatch") def test_send_all_batches_with_existing_keys( @@ -171,7 +171,7 @@ def test_send_all_batches_with_existing_keys( mock_send_batch.assert_any_call("key_2") self.assertEqual(mock_send_batch.call_count, 2) - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") @patch("src.logcollector.batch_handler.BufferedBatch") def test_send_all_batches_with_one_key( @@ -193,7 +193,7 @@ def test_send_all_batches_with_one_key( self.assertEqual(mock_send_batch.call_count, 0) @patch("src.logcollector.batch_handler.logger") - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") @patch("src.logcollector.batch_handler.BufferedBatchSender._reset_timer") @patch("src.logcollector.batch_handler.BufferedBatch") @@ -223,7 +223,7 @@ def test_send_all_batches_with_existing_keys_and_reset_timer( mock_reset_timer.assert_called_once() self.assertEqual(mock_send_batch.call_count, 2) - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") @patch("src.logcollector.batch_handler.BufferedBatchSender._send_batch_for_key") @patch("src.logcollector.batch_handler.BufferedBatch") def test_send_all_batches_with_no_keys( @@ -246,7 +246,7 @@ def test_send_all_batches_with_no_keys( class TestSendBatchForKey(unittest.TestCase): - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") @patch.object(BufferedBatchSender, "_send_data_packet") @patch("src.logcollector.batch_handler.BufferedBatch") def test_send_batch_for_key_success( @@ -267,7 +267,7 @@ def test_send_batch_for_key_success( mock_batch_instance.complete_batch.assert_called_once_with(key) mock_send_data_packet.assert_called_once_with(key, "mock_data_packet") - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") @patch.object(BufferedBatchSender, "_send_data_packet") @patch("src.logcollector.batch_handler.BufferedBatch") def test_send_batch_for_key_value_error( @@ -290,7 +290,7 @@ def test_send_batch_for_key_value_error( class TestSendDataPacket(unittest.TestCase): - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") def test_send_data_packet(self, mock_produce_handler): # Arrange mock_produce_handler_instance = MagicMock() @@ -319,7 +319,7 @@ def test_send_data_packet(self, mock_produce_handler): class TestResetTimer(unittest.TestCase): @patch("src.logcollector.batch_handler.BATCH_TIMEOUT", 5.9) - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") @patch("src.logcollector.batch_handler.Timer") def test_reset_timer_with_existing_timer(self, mock_timer, mock_produce_handler): # Arrange @@ -343,7 +343,7 @@ def test_reset_timer_with_existing_timer(self, mock_timer, mock_produce_handler) sut.timer.start.assert_called_once() @patch("src.logcollector.batch_handler.BATCH_TIMEOUT", 4.6) - @patch("src.logcollector.batch_handler.KafkaProduceHandler") + @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") @patch("src.logcollector.batch_handler.Timer") def test_reset_timer_without_existing_timer(self, mock_timer, mock_produce_handler): # Arrange diff --git a/tests/test_detector.py b/tests/test_detector.py index 8097658..6fc6bb9 100644 --- a/tests/test_detector.py +++ b/tests/test_detector.py @@ -11,7 +11,7 @@ class TestSha256Sum(unittest.TestCase): - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_sha256_empty_file(self, mock_kafka_consume_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -21,7 +21,7 @@ def test_sha256_empty_file(self, mock_kafka_consume_handler): with self.assertRaises(FileNotFoundError): sut._sha256sum("") - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_sha256_not_existing_file(self, mock_kafka_consume_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -47,7 +47,7 @@ def setUp(self): "src.detector.detector.MODEL_BASE_URL", "https://heibox.uni-heidelberg.de/d/0d5cbcbe16cd46a58021/", ) - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_get_model(self, mock_kafka_consume_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -71,7 +71,7 @@ def setUp(self): "src.detector.detector.MODEL_BASE_URL", "https://heibox.uni-heidelberg.de/d/0d5cbcbe16cd46a58021/", ) - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_get_model(self, mock_kafka_consume_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -87,7 +87,7 @@ def test_get_model(self, mock_kafka_consume_handler): "src.detector.detector.MODEL_BASE_URL", "https://heibox.uni-heidelberg.de/d/0d5cbcbe16cd46a58021/", ) - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_get_model_not_existing(self, mock_kafka_consume_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -104,7 +104,7 @@ def test_get_model_not_existing(self, mock_kafka_consume_handler): "src.detector.detector.MODEL_BASE_URL", "https://heibox.uni-heidelberg.de/d/0d5cbcbe16cd46a58021/", ) - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_get_model_not_existing(self, mock_kafka_consume_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -121,7 +121,7 @@ def test_get_model_not_existing(self, mock_kafka_consume_handler): "src.detector.detector.MODEL_BASE_URL", "https://heibox.uni-heidelberg.de/d/WRONG/", ) - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_get_model_not_existing(self, mock_kafka_consume_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -132,7 +132,7 @@ def test_get_model_not_existing(self, mock_kafka_consume_handler): class TestInit(unittest.TestCase): @patch("src.detector.detector.logger") - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_init(self, mock_kafka_consume_handler, mock_logger): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -146,7 +146,7 @@ def test_init(self, mock_kafka_consume_handler, mock_logger): class TestGetData(unittest.TestCase): @patch("src.detector.detector.logger") - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_get_data_without_return_data( self, mock_kafka_consume_handler, mock_logger ): @@ -158,7 +158,7 @@ def test_get_data_without_return_data( mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "test", test_batch, ) @@ -169,7 +169,7 @@ def test_get_data_without_return_data( self.assertEqual([], sut.messages) @patch("src.detector.detector.logger") - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_get_data_with_return_data(self, mock_kafka_consume_handler, mock_logger): begin = datetime.now() end = begin + timedelta(0, 3) @@ -181,7 +181,7 @@ def test_get_data_with_return_data(self, mock_kafka_consume_handler, mock_logger mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "192.168.1.0/24", test_batch, ) @@ -195,7 +195,7 @@ def test_get_data_with_return_data(self, mock_kafka_consume_handler, mock_logger self.assertEqual([{"test": "test_message_2"}], sut.messages) @patch("src.detector.detector.logger") - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_get_data_while_busy(self, mock_kafka_consume_handler, mock_logger): begin = datetime.now() end = begin + timedelta(0, 3) @@ -234,7 +234,7 @@ def setUp(self): "src.detector.detector.MODEL_BASE_URL", "https://heibox.uni-heidelberg.de/d/0d5cbcbe16cd46a58021/", ) - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_save_warning(self, mock_kafka_consume_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -271,7 +271,7 @@ def test_save_warning(self, mock_kafka_consume_handler): "src.detector.detector.MODEL_BASE_URL", "https://heibox.uni-heidelberg.de/d/0d5cbcbe16cd46a58021/", ) - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_save_empty_warning(self, mock_kafka_consume_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -293,7 +293,7 @@ def test_save_empty_warning(self, mock_kafka_consume_handler): "src.detector.detector.MODEL_BASE_URL", "https://heibox.uni-heidelberg.de/d/0d5cbcbe16cd46a58021/", ) - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_save_warning_error(self, mock_kafka_consume_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -318,7 +318,7 @@ def setUp(self): self.addCleanup(patcher.stop) @patch("src.detector.detector.logger") - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_clear_data_without_existing_data( self, mock_kafka_consume_handler, mock_logger ): @@ -340,7 +340,7 @@ def test_clear_data_without_existing_data( self.assertEqual([], sut.messages) @patch("src.detector.detector.logger") - @patch("src.detector.detector.KafkaConsumeHandler") + @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_clear_data_with_existing_data( self, mock_kafka_consume_handler, mock_logger ): diff --git a/tests/test_kafka_consume_handler.py b/tests/test_exactly_once_kafka_consume_handler.py similarity index 92% rename from tests/test_kafka_consume_handler.py rename to tests/test_exactly_once_kafka_consume_handler.py index d6751f6..6ef017d 100644 --- a/tests/test_kafka_consume_handler.py +++ b/tests/test_exactly_once_kafka_consume_handler.py @@ -3,7 +3,7 @@ from confluent_kafka import KafkaException -from src.base.kafka_handler import KafkaConsumeHandler +from src.base.kafka_handler import ExactlyOnceKafkaConsumeHandler CONSUMER_GROUP_ID = "test_group_id" @@ -40,7 +40,7 @@ def test_init(self, mock_consumer): "enable.partition.eof": True, } - sut = KafkaConsumeHandler(topics="test_topic") + sut = ExactlyOnceKafkaConsumeHandler(topics="test_topic") self.assertEqual(mock_consumer_instance, sut.consumer) @@ -81,7 +81,7 @@ def test_init_fail(self, mock_consumer, mock_logger): with patch.object(mock_consumer_instance, "assign", side_effect=KafkaException): with self.assertRaises(KafkaException): - sut = KafkaConsumeHandler(topics="test_topic") + sut = ExactlyOnceKafkaConsumeHandler(topics="test_topic") self.assertEqual(mock_consumer_instance, sut.consumer) @@ -114,7 +114,7 @@ def test_del_with_existing_consumer(self, mock_consumer): mock_consumer_instance = MagicMock() mock_consumer.return_value = mock_consumer_instance - sut = KafkaConsumeHandler(topics="test_topic") + sut = ExactlyOnceKafkaConsumeHandler(topics="test_topic") sut.consumer = mock_consumer_instance # Act @@ -147,7 +147,7 @@ def test_del_with_existing_consumer(self, mock_consumer): mock_consumer_instance = MagicMock() mock_consumer.return_value = mock_consumer_instance - sut = KafkaConsumeHandler(topics="test_topic") + sut = ExactlyOnceKafkaConsumeHandler(topics="test_topic") sut.consumer = None # Act @@ -181,7 +181,7 @@ def test_dict(self, mock_consumer): mock_consumer_instance = MagicMock() mock_consumer.return_value = mock_consumer_instance - sut = KafkaConsumeHandler(topics="test_topic") + sut = ExactlyOnceKafkaConsumeHandler(topics="test_topic") self.assertTrue(sut._is_dicts([{}, {}])) diff --git a/tests/test_kafka_produce_handler.py b/tests/test_exactly_once_kafka_produce_handler.py similarity index 88% rename from tests/test_kafka_produce_handler.py rename to tests/test_exactly_once_kafka_produce_handler.py index 8bb5985..8c58970 100644 --- a/tests/test_kafka_produce_handler.py +++ b/tests/test_exactly_once_kafka_produce_handler.py @@ -3,7 +3,7 @@ from confluent_kafka import KafkaException -from src.base.kafka_handler import KafkaProduceHandler +from src.base.kafka_handler import ExactlyOnceKafkaProduceHandler class TestInit(unittest.TestCase): @@ -34,7 +34,7 @@ def test_init(self, mock_producer): "transactional.id": "test_transactional_id", } - sut = KafkaProduceHandler(transactional_id="test_transactional_id") + sut = ExactlyOnceKafkaProduceHandler(transactional_id="test_transactional_id") self.assertIsNone(sut.consumer) self.assertEqual(mock_producer_instance, sut.producer) @@ -74,7 +74,9 @@ def test_init_fail(self, mock_producer, mock_logger): mock_producer_instance, "init_transactions", side_effect=KafkaException ): with self.assertRaises(KafkaException): - sut = KafkaProduceHandler(transactional_id="test_transactional_id") + sut = ExactlyOnceKafkaProduceHandler( + transactional_id="test_transactional_id" + ) mock_producer.assert_called_once_with(expected_conf) mock_producer_instance.init_transactions.assert_called_once() @@ -99,7 +101,9 @@ class TestSend(unittest.TestCase): ], ) @patch("src.base.kafka_handler.Producer") - @patch("src.base.kafka_handler.KafkaProduceHandler.commit_transaction_with_retry") + @patch( + "src.base.kafka_handler.ExactlyOnceKafkaProduceHandler.commit_transaction_with_retry" + ) @patch("src.base.kafka_handler.kafka_delivery_report") def test_send_with_data( self, @@ -110,8 +114,8 @@ def test_send_with_data( mock_producer_instance = MagicMock() mock_producer.return_value = mock_producer_instance - sut = KafkaProduceHandler(transactional_id="test_transactional_id") - sut.send("test_topic", "test_data", key=None) + sut = ExactlyOnceKafkaProduceHandler(transactional_id="test_transactional_id") + sut.produce("test_topic", "test_data", key=None) mock_producer_instance.produce.assert_called_once_with( topic="test_topic", @@ -141,8 +145,8 @@ def test_send_with_data( ) @patch("src.base.kafka_handler.Producer") def test_send_with_empty_data_string(self, mock_producer): - sut = KafkaProduceHandler(transactional_id="test_transactional_id") - sut.send("test_topic", "", None) + sut = ExactlyOnceKafkaProduceHandler(transactional_id="test_transactional_id") + sut.produce("test_topic", "", None) mock_producer.begin_transaction.assert_not_called() mock_producer.produce.assert_not_called() @@ -168,7 +172,9 @@ def test_send_with_empty_data_string(self, mock_producer): ) @patch("src.base.kafka_handler.Producer") @patch("src.base.kafka_handler.kafka_delivery_report") - @patch("src.base.kafka_handler.KafkaProduceHandler.commit_transaction_with_retry") + @patch( + "src.base.kafka_handler.ExactlyOnceKafkaProduceHandler.commit_transaction_with_retry" + ) def test_send_fail( self, mock_commit_transaction_with_retry, @@ -180,10 +186,10 @@ def test_send_fail( mock_producer.return_value = mock_producer_instance mock_commit_transaction_with_retry.side_effect = Exception - sut = KafkaProduceHandler(transactional_id="test_transactional_id") + sut = ExactlyOnceKafkaProduceHandler(transactional_id="test_transactional_id") with self.assertRaises(Exception): - sut.send("test_topic", "test_data", key=None) + sut.produce("test_topic", "test_data", key=None) mock_producer_instance.produce.assert_called_once_with( topic="test_topic", @@ -222,7 +228,7 @@ def test_commit_successful(self, mock_sleep, mock_producer): mock_producer.return_value = mock_producer_instance mock_producer.commit_transaction.return_value = None - sut = KafkaProduceHandler(transactional_id="test_transactional_id") + sut = ExactlyOnceKafkaProduceHandler(transactional_id="test_transactional_id") sut.commit_transaction_with_retry() mock_producer_instance.commit_transaction.assert_called_once() @@ -257,7 +263,7 @@ def test_commit_retries_then_successful(self, mock_sleep, mock_producer): None, ] - sut = KafkaProduceHandler(transactional_id="test_transactional_id") + sut = ExactlyOnceKafkaProduceHandler(transactional_id="test_transactional_id") sut.commit_transaction_with_retry() self.assertEqual(mock_producer_instance.commit_transaction.call_count, 2) @@ -290,7 +296,7 @@ def test_commit_retries_and_fails(self, mock_sleep, mock_producer, mock_logger): "Conflicting commit_transaction API call is already in progress" ) - sut = KafkaProduceHandler(transactional_id="test_transactional_id") + sut = ExactlyOnceKafkaProduceHandler(transactional_id="test_transactional_id") with self.assertRaises(RuntimeError) as context: sut.commit_transaction_with_retry() @@ -326,7 +332,7 @@ def test_commit_fails_with_other_exception(self, mock_sleep, mock_producer): "Some other error" ) - sut = KafkaProduceHandler(transactional_id="test_transactional_id") + sut = ExactlyOnceKafkaProduceHandler(transactional_id="test_transactional_id") with self.assertRaises(KafkaException) as context: sut.commit_transaction_with_retry() @@ -358,7 +364,7 @@ def test_del(self, mock_producer): mock_producer_instance = MagicMock() mock_producer.return_value = mock_producer_instance - sut = KafkaProduceHandler(transactional_id="test_transactional_id") + sut = ExactlyOnceKafkaProduceHandler(transactional_id="test_transactional_id") del sut mock_producer_instance.flush.assert_called_once() diff --git a/tests/test_inspector.py b/tests/test_inspector.py index 106ca66..2bd4669 100644 --- a/tests/test_inspector.py +++ b/tests/test_inspector.py @@ -34,8 +34,8 @@ def get_batch(data): class TestInit(unittest.TestCase): - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") def test_init(self, mock_kafka_consume_handler, mock_produce_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance @@ -51,14 +51,14 @@ def test_init(self, mock_kafka_consume_handler, mock_produce_handler): class TestGetData(unittest.TestCase): @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") def test_get_data_without_return_data( self, mock_kafka_consume_handler, mock_produce_handler, mock_logger ): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( None, get_batch(None), ) @@ -71,15 +71,15 @@ def test_get_data_without_return_data( self.assertEqual([], sut.messages) @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") def test_get_data_with_return_data( self, mock_kafka_consume_handler, mock_produce_handler, mock_logger ): test_batch = get_batch([{"test": "test_message_1"}, {"test": "test_message_2"}]) mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( None, test_batch, ) @@ -97,8 +97,8 @@ def test_get_data_with_return_data( ) @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") def test_get_data_with_no_return_data( self, mock_kafka_consume_handler, mock_produce_handler, mock_logger ): @@ -106,7 +106,7 @@ def test_get_data_with_no_return_data( end = None mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( None, None, ) @@ -121,8 +121,8 @@ def test_get_data_with_no_return_data( self.assertEqual(end, sut.end_timestamp) self.assertEqual([], sut.messages) - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") def test_get_data_while_busy( self, mock_kafka_consume_handler, mock_produce_handler ): @@ -147,8 +147,8 @@ def test_get_data_while_busy( class TestClearData(unittest.TestCase): - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") def test_clear_data_without_existing_data( self, mock_kafka_consume_handler, mock_produce_handler ): @@ -168,8 +168,8 @@ def test_clear_data_without_existing_data( self.assertEqual([], sut.messages) - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") def test_clear_data_with_existing_data( self, mock_kafka_consume_handler, mock_produce_handler ): @@ -196,8 +196,8 @@ def test_clear_data_with_existing_data( class TestDataFunction(unittest.TestCase): - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch("src.inspector.inspector.TIME_TYPE", "ms") @patch("src.inspector.inspector.TIME_RANGE", 1) def test_count_errors(self, mock_kafka_consume_handler, mock_produce_handler): @@ -219,8 +219,8 @@ def test_count_errors(self, mock_kafka_consume_handler, mock_produce_handler): sut._count_errors(messages, begin_timestamp, end_timestamp), ) - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch("src.inspector.inspector.TIME_TYPE", "ms") @patch("src.inspector.inspector.TIME_RANGE", 1) def test_mean_packet_size(self, mock_kafka_consume_handler, mock_produce_handler): @@ -242,8 +242,8 @@ def test_mean_packet_size(self, mock_kafka_consume_handler, mock_produce_handler sut._mean_packet_size(messages, begin_timestamp, end_timestamp), ) - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") def test_count_errors_empty_messages( self, mock_kafka_consume_handler, mock_produce_handler ): @@ -264,8 +264,8 @@ def test_count_errors_empty_messages( sut._count_errors([], begin_timestamp, end_timestamp), ) - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") def test_mean_packet_size_empty_messages( self, mock_kafka_consume_handler, mock_produce_handler ): @@ -289,8 +289,8 @@ def test_mean_packet_size_empty_messages( class TestInspectFunction(unittest.TestCase): @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", None, @@ -300,7 +300,7 @@ def test_inspect_none_models( ): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "test", get_batch(None), ) @@ -312,8 +312,8 @@ def test_inspect_none_models( sut.inspect() @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", "", @@ -323,7 +323,7 @@ def test_inspect_empy_models( ): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "test", get_batch(None), ) @@ -335,8 +335,8 @@ def test_inspect_empy_models( sut.inspect() @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", [{"model": "ZScoreDetector", "module": "streamad.model", "model_args": {}}], @@ -356,7 +356,7 @@ def test_inspect_univariate( test_batch.data = [data] mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "test", test_batch, ) @@ -369,8 +369,8 @@ def test_inspect_univariate( self.assertEqual([0, 0], sut.anomalies) @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", [ @@ -396,7 +396,7 @@ def test_inspect_univariate( test_batch.data = [data] mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "test", test_batch, ) @@ -409,8 +409,8 @@ def test_inspect_univariate( self.assertNotEqual([None, None], sut.anomalies) @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", [ @@ -433,7 +433,7 @@ def test_inspect_univariate_two_models( test_batch.data = [data] mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "test", test_batch, ) @@ -447,8 +447,8 @@ def test_inspect_univariate_two_models( self.assertTrue(isinstance(sut.model, ZScoreDetector)) @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", [{"model": "RShashDetector", "module": "streamad.model", "model_args": {}}], @@ -467,7 +467,7 @@ def test_inspect_multivariate( test_batch.data = [data] mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "test", test_batch, ) @@ -480,8 +480,8 @@ def test_inspect_multivariate( self.assertEqual([0, 0], sut.anomalies) @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", [ @@ -506,7 +506,7 @@ def test_inspect_multivariate_window_len( test_batch.data = [data] mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "test", test_batch, ) @@ -519,8 +519,8 @@ def test_inspect_multivariate_window_len( self.assertNotEqual([None, None], sut.anomalies) @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", [ @@ -542,7 +542,7 @@ def test_inspect_multivariate_two_models( test_batch.data = [data] mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "test", test_batch, ) @@ -556,8 +556,8 @@ def test_inspect_multivariate_two_models( self.assertTrue(isinstance(sut.model, RShashDetector)) @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", [ @@ -587,7 +587,7 @@ def test_inspect_ensemble( test_batch.data = [data] mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "test", test_batch, ) @@ -600,8 +600,8 @@ def test_inspect_ensemble( self.assertEqual([0, 0], sut.anomalies) @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", [ @@ -639,7 +639,7 @@ def test_inspect_ensemble_window_len( test_batch.data = [data] mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "test", test_batch, ) @@ -652,8 +652,8 @@ def test_inspect_ensemble_window_len( self.assertNotEqual([None, None], sut.anomalies) @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", [ @@ -683,7 +683,7 @@ def test_inspect_ensemble_invalid( test_batch.data = [data] mock_kafka_consume_handler_instance = MagicMock() mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_kafka_consume_handler_instance.consume_and_return_object.return_value = ( + mock_kafka_consume_handler_instance.consume_as_object.return_value = ( "test", test_batch, ) @@ -696,8 +696,8 @@ def test_inspect_ensemble_invalid( sut.inspect() @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", [{"model": "INVALID", "module": "streamad.model"}], @@ -715,8 +715,8 @@ def test_invalid_model_univariate( sut.inspect() @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.MODELS", [{"model": "INVALID", "module": "streamad.model"}], @@ -735,8 +735,8 @@ def test_invalid_model_multivariate( sut.inspect() @patch("src.inspector.inspector.logger") - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch( "src.inspector.inspector.ENSEMBLE", {"model": "INVALID", "module": "streamad.process"}, @@ -754,8 +754,8 @@ def test_invalid_model_ensemble( with self.assertRaises(NotImplementedError): sut.inspect() - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch("src.inspector.inspector.MODE", "INVALID") def test_invalid_mode(self, mock_kafka_consume_handler, mock_produce_handler): mock_kafka_consume_handler_instance = MagicMock() @@ -769,8 +769,8 @@ def test_invalid_mode(self, mock_kafka_consume_handler, mock_produce_handler): class TestSend(unittest.TestCase): - @patch("src.inspector.inspector.KafkaProduceHandler") - @patch("src.inspector.inspector.KafkaConsumeHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") + @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") @patch("src.inspector.inspector.SCORE_THRESHOLD", 0.1) @patch("src.inspector.inspector.ANOMALY_THRESHOLD", 0.01) def test_send(self, mock_kafka_consume_handler, mock_produce_handler): diff --git a/tests/test_prefilter.py b/tests/test_prefilter.py index b5bd29a..dc84782 100644 --- a/tests/test_prefilter.py +++ b/tests/test_prefilter.py @@ -8,8 +8,8 @@ class TestInit(unittest.TestCase): @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_valid_init( self, mock_produce_handler, mock_consume_handler, mock_logline_handler ): @@ -33,8 +33,8 @@ def test_valid_init( class TestGetAndFillData(unittest.TestCase): @patch("src.prefilter.prefilter.logger") @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_get_data_without_new_data( self, mock_produce_handler, @@ -62,8 +62,8 @@ def test_get_data_without_new_data( @patch("src.prefilter.prefilter.logger") @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_get_data_with_new_data( self, mock_produce_handler, @@ -95,8 +95,8 @@ def test_get_data_with_new_data( @patch("src.prefilter.prefilter.logger") @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_get_data_with_existing_data( self, mock_batch_handler, @@ -131,8 +131,8 @@ def test_get_data_with_existing_data( class TestFilterByError(unittest.TestCase): @patch("src.prefilter.prefilter.logger") @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_filter_by_error_empty_data( self, mock_produce_handler, @@ -149,8 +149,8 @@ def test_filter_by_error_empty_data( @patch("src.prefilter.prefilter.logger") @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_filter_by_error_with_data_no_error_types( self, mock_produce_handler, @@ -205,8 +205,8 @@ def test_filter_by_error_with_data_no_error_types( @patch("src.prefilter.prefilter.logger") @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_filter_by_error_with_data_one_error_type( self, mock_produce_handler, @@ -261,8 +261,8 @@ def test_filter_by_error_with_data_one_error_type( @patch("src.prefilter.prefilter.logger") @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_filter_by_error_with_data_two_error_types( self, mock_produce_handler, @@ -319,8 +319,8 @@ def test_filter_by_error_with_data_two_error_types( class TestSendFilteredData(unittest.TestCase): @patch("src.prefilter.prefilter.logger") @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_send_with_data( self, mock_produce_handler, @@ -377,8 +377,8 @@ def test_send_with_data( @patch("src.prefilter.prefilter.logger") @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_send_without_filtered_data_with_unfiltered_data( self, mock_produce_handler, @@ -399,8 +399,8 @@ def test_send_without_filtered_data_with_unfiltered_data( mock_produce_handler.add_message.assert_not_called() @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_send_without_data( self, mock_produce_handler, mock_consume_handler, mock_logline_handler ): @@ -418,8 +418,8 @@ def test_send_without_data( class TestClearData(unittest.TestCase): @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_clear_data_with_data( self, mock_produce_handler, mock_consume_handler, mock_logline_handler ): @@ -453,8 +453,8 @@ def test_clear_data_with_data( self.assertEqual([], sut.filtered_data) @patch("src.prefilter.prefilter.LoglineHandler") - @patch("src.prefilter.prefilter.KafkaConsumeHandler") - @patch("src.prefilter.prefilter.KafkaProduceHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") + @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") def test_clear_data_without_data( self, mock_produce_handler, mock_consume_handler, mock_logline_handler ): diff --git a/tests/test_server.py b/tests/test_server.py index 570060f..212db38 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -16,248 +16,105 @@ class TestInit(unittest.TestCase): @patch("src.logserver.server.HOSTNAME", "127.0.0.1") - @patch("src.logserver.server.PORT_IN", 7777) - @patch("src.logserver.server.PORT_OUT", 8888) @patch("src.logserver.server.LISTEN_ON_TOPIC", "test_topic") - @patch("src.logserver.server.KafkaConsumeHandler") - @patch("src.logserver.server.ClickHouseKafkaSender") - def test_valid_init_ipv4(self, mock_kafka_sender, mock_kafka_consume_handler): + @patch("src.logserver.server.Lock") + @patch("src.logserver.server.SimpleKafkaProduceHandler") + @patch("src.logserver.server.SimpleKafkaConsumeHandler") + def test_valid_init_ipv4( + self, mock_kafka_consume_handler, mock_kafka_produce_handler, mock_lock + ): mock_kafka_consume_handler_instance = MagicMock() + mock_kafka_produce_handler_instance = MagicMock() + mock_lock_instance = MagicMock() + + mock_kafka_produce_handler.return_value = mock_kafka_produce_handler_instance mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance + mock_lock.return_value = mock_lock_instance sut = LogServer() self.assertEqual(IPv4Address("127.0.0.1"), sut.host) - self.assertEqual(7777, sut.port_in) - self.assertEqual(8888, sut.port_out) - self.assertTrue(sut.data_queue.empty()) - self.assertEqual(0, sut.number_of_connections) + self.assertEqual(mock_lock_instance, sut.lock) self.assertEqual(mock_kafka_consume_handler_instance, sut.kafka_consume_handler) + self.assertEqual(mock_kafka_produce_handler_instance, sut.kafka_produce_handler) mock_kafka_consume_handler.assert_called_once_with(topics="test_topic") @patch("src.logserver.server.HOSTNAME", "fe80::1") - @patch("src.logserver.server.PORT_IN", 7777) - @patch("src.logserver.server.PORT_OUT", 8888) @patch("src.logserver.server.LISTEN_ON_TOPIC", "test_topic") - @patch("src.logserver.server.KafkaConsumeHandler") - @patch("src.logserver.server.ClickHouseKafkaSender") - def test_valid_init_ipv6(self, mock_kafka_sender, mock_kafka_consume_handler): + @patch("src.logserver.server.Lock") + @patch("src.logserver.server.SimpleKafkaProduceHandler") + @patch("src.logserver.server.SimpleKafkaConsumeHandler") + def test_valid_init_ipv6( + self, mock_kafka_consume_handler, mock_kafka_produce_handler, mock_lock + ): mock_kafka_consume_handler_instance = MagicMock() + mock_kafka_produce_handler_instance = MagicMock() + mock_lock_instance = MagicMock() + + mock_kafka_produce_handler.return_value = mock_kafka_produce_handler_instance mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance + mock_lock.return_value = mock_lock_instance sut = LogServer() self.assertEqual(IPv6Address("fe80::1"), sut.host) - self.assertEqual(7777, sut.port_in) - self.assertEqual(8888, sut.port_out) - self.assertTrue(sut.data_queue.empty()) - self.assertEqual(0, sut.number_of_connections) + self.assertEqual(mock_lock_instance, sut.lock) self.assertEqual(mock_kafka_consume_handler_instance, sut.kafka_consume_handler) + self.assertEqual(mock_kafka_produce_handler_instance, sut.kafka_produce_handler) mock_kafka_consume_handler.assert_called_once_with(topics="test_topic") @patch("src.logserver.server.HOSTNAME", "256.256.256.256") - @patch("src.logserver.server.PORT_IN", 7777) - @patch("src.logserver.server.PORT_OUT", 8888) @patch("src.logserver.server.LISTEN_ON_TOPIC", "test_topic") - @patch("src.logserver.server.KafkaConsumeHandler") - def test_invalid_init_with_invalid_host(self, mock_kafka_consume_handler): + @patch("src.logserver.server.Lock") + @patch("src.logserver.server.SimpleKafkaProduceHandler") + @patch("src.logserver.server.SimpleKafkaConsumeHandler") + def test_invalid_init_with_invalid_host( + self, mock_kafka_consume_handler, mock_kafka_produce_handler, mock_lock + ): mock_kafka_consume_handler_instance = MagicMock() + mock_kafka_produce_handler_instance = MagicMock() + mock_lock_instance = MagicMock() + + mock_kafka_produce_handler.return_value = mock_kafka_produce_handler_instance mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance + mock_lock.return_value = mock_lock_instance with self.assertRaises(ValueError): LogServer() mock_kafka_consume_handler.assert_not_called() + mock_kafka_produce_handler.assert_not_called() class TestOpen(unittest.IsolatedAsyncioTestCase): - @patch("src.logserver.server.logger") @patch("src.logserver.server.HOSTNAME", "127.0.0.1") - @patch("src.logserver.server.PORT_IN", 1234) - @patch("src.logserver.server.PORT_OUT", 5678) - @patch("src.logserver.server.LogServer.handle_kafka_inputs") - @patch("src.logserver.server.LogServer.async_follow") - @patch("src.logserver.server.KafkaConsumeHandler") - @patch("src.logserver.server.ClickHouseKafkaSender") + @patch("src.logserver.server.logger") + @patch("src.logserver.server.LogServer.fetch_from_kafka") + @patch("src.logserver.server.LogServer.fetch_from_file") + @patch("src.logserver.server.SimpleKafkaConsumeHandler") async def test_open( self, - mock_kafka_sender, mock_kafka_consume_handler, - mock_follow, - mock_handle_kafka, + mock_fetch_from_file, + mock_fetch_from_kafka, mock_logger, ): # Arrange sut = LogServer() - with patch("asyncio.start_server", new_callable=AsyncMock) as mock_start_server: - mock_send_server = MagicMock() - mock_receive_server = MagicMock() - - mock_start_server.side_effect = [mock_send_server, mock_receive_server] - - mock_send_server.serve_forever = AsyncMock() - mock_receive_server.serve_forever = AsyncMock() - mock_send_server.wait_closed = AsyncMock() - mock_receive_server.wait_closed = AsyncMock() - - # Act - await sut.open() - - # Assert - mock_start_server.assert_any_call( - sut.handle_send_logline, "127.0.0.1", 5678 - ) - mock_start_server.assert_any_call( - sut.handle_receive_logline, "127.0.0.1", 1234 - ) - mock_send_server.serve_forever.assert_awaited_once() - mock_receive_server.serve_forever.assert_awaited_once() - mock_send_server.close.assert_called_once() - mock_receive_server.close.assert_called_once() - mock_send_server.wait_closed.assert_awaited_once() - mock_receive_server.wait_closed.assert_awaited_once() - mock_handle_kafka.assert_called_once() - mock_follow.assert_called_once() - - @patch("src.logserver.server.logger") - @patch("src.logserver.server.HOSTNAME", "127.0.0.1") - @patch("src.logserver.server.PORT_IN", 1234) - @patch("src.logserver.server.PORT_OUT", 5678) - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_open_keyboard_interrupt(self, mock_kafka_sender, mock_logger): - # Arrange - sut = LogServer() - - with patch("asyncio.start_server", new_callable=AsyncMock) as mock_start_server: - mock_send_server = MagicMock() - mock_receive_server = MagicMock() - - mock_start_server.side_effect = [mock_send_server, mock_receive_server] - - mock_send_server.serve_forever.side_effect = KeyboardInterrupt - mock_receive_server.serve_forever = AsyncMock() - mock_send_server.wait_closed = AsyncMock() - mock_receive_server.wait_closed = AsyncMock() - - # Act & Assert - await sut.open() - - # Additional Assertions - mock_send_server.close.assert_called_once() - mock_receive_server.close.assert_called_once() - mock_send_server.wait_closed.assert_awaited_once() - mock_receive_server.wait_closed.assert_awaited_once() - - -class TestHandleConnection(unittest.IsolatedAsyncioTestCase): - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_handle_connection_sending(self, mock_kafka_sender): - server_instance = LogServer() - server_instance.send_logline = AsyncMock() - server_instance.get_next_logline = MagicMock(return_value="test logline") - - reader = AsyncMock() - writer = AsyncMock() - writer.get_extra_info = MagicMock(return_value="test_address") - - await server_instance.handle_connection(reader, writer, sending=True) - - server_instance.send_logline.assert_awaited_once_with(writer, "test logline") - writer.close.assert_called_once() - writer.wait_closed.assert_awaited_once() - self.assertEqual(0, server_instance.number_of_connections) - - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_handle_connection_receiving(self, mock_kafka_sender): - server_instance = LogServer() - server_instance.receive_logline = AsyncMock() - - reader = AsyncMock() - writer = AsyncMock() - writer.get_extra_info = MagicMock(return_value="test_address") - - await server_instance.handle_connection(reader, writer, sending=False) - - server_instance.receive_logline.assert_awaited_once_with(reader) - writer.close.assert_called_once() - writer.wait_closed.assert_awaited_once() - self.assertEqual(0, server_instance.number_of_connections) - - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_handle_connection_rejected(self, mock_kafka_sender): - server_instance = LogServer() - server_instance.number_of_connections = 5 - - reader = AsyncMock() - writer = AsyncMock() - writer.get_extra_info = MagicMock(return_value="test_address") - - await server_instance.handle_connection(reader, writer, sending=True) - - writer.close.assert_called_once() - writer.wait_closed.assert_awaited_once() - self.assertEqual(5, server_instance.number_of_connections) - - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_handle_connection_increases_and_decreases_connections( - self, mock_kafka_sender - ): - server_instance = LogServer() - server_instance.send_logline = AsyncMock() - server_instance.get_next_logline = MagicMock(return_value="test logline") - server_instance.number_of_connections = 3 - - reader = AsyncMock() - writer = AsyncMock() - writer.get_extra_info = MagicMock(return_value="test_address") - - await server_instance.handle_connection(reader, writer, sending=True) - - self.assertEqual(3, server_instance.number_of_connections) - - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_handle_connection_cancelled_error(self, mock_kafka_sender): - server_instance = LogServer() - server_instance.send_logline = AsyncMock(side_effect=asyncio.CancelledError) - server_instance.get_next_logline = MagicMock(return_value="test logline") - - reader = AsyncMock() - writer = AsyncMock() - writer.get_extra_info = MagicMock(return_value="test_address") - - await server_instance.handle_connection(reader, writer, sending=True) - - server_instance.send_logline.assert_awaited_once_with(writer, "test logline") - writer.close.assert_called_once() - writer.wait_closed.assert_awaited_once() - self.assertEqual(0, server_instance.number_of_connections) - - @patch("src.logserver.server.logger") - @patch("src.logserver.server.MAX_NUMBER_OF_CONNECTIONS", 7) - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_handle_connection_rejects_additional_connections( - self, mock_kafka_sender, mock_logger - ): - server_instance = LogServer() - server_instance.number_of_connections = 7 - - reader = AsyncMock() - writer = AsyncMock() - writer.get_extra_info = MagicMock(return_value="test_address") - - await server_instance.handle_connection(reader, writer, sending=True) + # Act + await sut.start() - writer.close.assert_called_once() - writer.wait_closed.assert_awaited_once() - self.assertEqual(7, server_instance.number_of_connections) + # Assert + mock_fetch_from_kafka.assert_called_once() + mock_fetch_from_file.assert_called_once() -class TestHandleKafkaInputs(unittest.IsolatedAsyncioTestCase): - @patch("src.logserver.server.ClickHouseKafkaSender") - async def asyncSetUp(self, mock_kafka_sender): +class TestFetchFromKafka(unittest.IsolatedAsyncioTestCase): + async def asyncSetUp(self): self.sut = LogServer() self.sut.kafka_consume_handler = AsyncMock() - self.sut.data_queue = MagicMock() + self.sut.kafka_produce_handler = AsyncMock() + @patch("src.logserver.server.SEND_TO_TOPIC", "test_topic") @patch("src.logserver.server.logger") @patch("asyncio.get_running_loop") async def test_handle_kafka_inputs(self, mock_get_running_loop, mock_logger): @@ -275,20 +132,22 @@ async def test_handle_kafka_inputs(self, mock_get_running_loop, mock_logger): ] with self.assertRaises(asyncio.CancelledError): - await self.sut.handle_kafka_inputs() + await self.sut.fetch_from_kafka() - self.sut.data_queue.put.assert_called_once_with("value1") + self.sut.kafka_produce_handler.produce.assert_called_once_with( + topic="test_topic", data="value1" + ) -class TestAsyncFollow(unittest.IsolatedAsyncioTestCase): - @patch("src.logserver.server.ClickHouseKafkaSender") - def setUp(self, mock_kafka_sender): +class TestFetchFromFile(unittest.IsolatedAsyncioTestCase): + def setUp(self): self.sut = LogServer() self.sut.kafka_consume_handler = AsyncMock() - self.sut.data_queue = MagicMock() + self.sut.kafka_produce_handler = AsyncMock() + @patch("src.logserver.server.SEND_TO_TOPIC", "test_topic") @patch("src.logserver.server.logger") - async def test_async_follow(self, mock_logger): + async def test_fetch_from_file(self, mock_logger): with tempfile.NamedTemporaryFile( delete=False, mode="w+", newline="" ) as temp_file: @@ -297,7 +156,7 @@ async def test_async_follow(self, mock_logger): temp_file.flush() try: - task = asyncio.create_task(self.sut.async_follow(temp_file_path)) + task = asyncio.create_task(self.sut.fetch_from_file(temp_file_path)) await asyncio.sleep(0.2) @@ -314,144 +173,13 @@ async def test_async_follow(self, mock_logger): finally: os.remove(temp_file_path) - self.sut.data_queue.put.assert_any_call("Test line 3") - self.sut.data_queue.put.assert_any_call("Test line 4") - - -class TestHandleSendLogline(unittest.IsolatedAsyncioTestCase): - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_handle_send_logline(self, mock_kafka_sender): - server_instance = LogServer() - server_instance.handle_connection = AsyncMock() - - reader = AsyncMock() - writer = AsyncMock() - - await server_instance.handle_send_logline(reader, writer) - - server_instance.handle_connection.assert_awaited_once_with(reader, writer, True) - - -class TestHandleReceiveLogline(unittest.IsolatedAsyncioTestCase): - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_handle_receive_logline(self, mock_kafka_sender): - server_instance = LogServer() - server_instance.handle_connection = AsyncMock() - - reader = AsyncMock() - writer = AsyncMock() - - await server_instance.handle_receive_logline(reader, writer) - - server_instance.handle_connection.assert_awaited_once_with( - reader, writer, False - ) - - -class TestSendLogline(unittest.IsolatedAsyncioTestCase): - @patch("src.logserver.server.logger") - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_send_logline_with_logline(self, mock_kafka_sender, mock_logger): - server_instance = LogServer() - writer = AsyncMock() - logline = "Test logline" - - await server_instance.send_logline(writer, logline) - - writer.write.assert_called_once_with(logline.encode("utf-8")) - writer.drain.assert_called_once() - - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_send_logline_no_logline(self, mock_kafka_sender): - server_instance = LogServer() - writer = AsyncMock() - logline = "" - - await server_instance.send_logline(writer, logline) - - writer.write.assert_not_called() - writer.drain.assert_not_called() - - -class TestReceiveLogline(unittest.IsolatedAsyncioTestCase): - @patch("src.logserver.server.logger") - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_receive_logline(self, mock_kafka_sender, mock_logger): - reader = AsyncMock() - data_queue = MagicMock() - server_instance = LogServer() - server_instance.data_queue = data_queue - - reader.readuntil = AsyncMock( - side_effect=[b"Test message 1\n", b"Test message 2\n", b""] + self.sut.kafka_produce_handler.produce.assert_any_call( + topic="test_topic", data="Test line 3" ) - - receive_task = asyncio.create_task(server_instance.receive_logline(reader)) - await receive_task - - data_queue.put.assert_any_call("Test message 1") - data_queue.put.assert_any_call("Test message 2") - - self.assertEqual(data_queue.put.call_count, 2) - - @patch("src.logserver.server.logger") - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_receive_without_separator(self, mock_kafka_sender, mock_logger): - reader = AsyncMock() - data_queue = MagicMock() - server_instance = LogServer() - server_instance.data_queue = data_queue - - reader.readuntil = AsyncMock( - side_effect=asyncio.exceptions.IncompleteReadError(b"", 100) + self.sut.kafka_produce_handler.produce.assert_any_call( + topic="test_topic", data="Test line 4" ) - # noinspection PyAsyncCall - asyncio.create_task(server_instance.receive_logline(reader)) - - @patch("src.logserver.server.logger") - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_receive_too_long(self, mock_kafka_sender, mock_logger): - reader = AsyncMock() - data_queue = MagicMock() - server_instance = LogServer() - server_instance.data_queue = data_queue - - reader.readuntil = AsyncMock(side_effect=asyncio.LimitOverrunError("", 1)) - - # noinspection PyAsyncCall - asyncio.create_task(server_instance.receive_logline(reader)) - - @patch("src.logserver.server.logger") - @patch("src.logserver.server.ClickHouseKafkaSender") - async def test_receive_raise_other_exception(self, mock_kafka_sender, mock_logger): - reader = AsyncMock() - data_queue = MagicMock() - server_instance = LogServer() - server_instance.data_queue = data_queue - - reader.readuntil = AsyncMock(side_effect=ValueError("Something went wrong")) - - with self.assertRaises(ValueError): - task = asyncio.create_task(server_instance.receive_logline(reader)) - await task - - -class TestGetNextLogline(unittest.TestCase): - @patch("src.logserver.server.ClickHouseKafkaSender") - def test_valid(self, mock_kafka_sender): - server_instance = LogServer() - server_instance.data_queue.put("Element 1") - server_instance.data_queue.put("Element 2") - - self.assertEqual("Element 1", server_instance.get_next_logline()) - self.assertEqual("Element 2", server_instance.get_next_logline()) - - @patch("src.logserver.server.ClickHouseKafkaSender") - def test_valid_from_empty_queue(self, mock_kafka_sender): - server_instance = LogServer() - self.assertIsNone(server_instance.get_next_logline()) - class TestMainFunction(unittest.TestCase): @patch("src.logserver.server.logger") @@ -467,8 +195,8 @@ def test_main(self, mock_log_server_class, mock_asyncio_run, mock_logger): # Assert mock_log_server_class.assert_called_once() - mock_server_instance.open.assert_called_once() - mock_asyncio_run.assert_called_once_with(mock_server_instance.open()) + mock_server_instance.start.assert_called_once() + mock_asyncio_run.assert_called_once_with(mock_server_instance.start()) if __name__ == "__main__": From 57862a598b5299b0b9f276e74e075abbacbb8af9 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 13 Nov 2024 10:17:49 +0100 Subject: [PATCH 10/29] Simplify Collector --- docker/docker-compose.yml | 3 - src/logcollector/collector.py | 217 ++++++++---------- src/logserver/server.py | 1 + tests/test_batch_handler.py | 2 +- tests/test_collector.py | 406 ++++++++++++++++------------------ tests/test_server.py | 18 +- 6 files changed, 287 insertions(+), 360 deletions(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 87c2a27..213458e 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -47,9 +47,6 @@ services: condition: service_healthy clickhouse-server: condition: service_healthy - ports: - - 9998:9998 - - 9999:9999 networks: heidgaf: ipv4_address: 172.27.0.8 diff --git a/src/logcollector/collector.py b/src/logcollector/collector.py index 3f291f0..a8d65a5 100644 --- a/src/logcollector/collector.py +++ b/src/logcollector/collector.py @@ -1,10 +1,13 @@ +import asyncio import ipaddress import json import os -import socket +import queue import sys +from asyncio import Lock sys.path.append(os.getcwd()) +from src.base.kafka_handler import SimpleKafkaConsumeHandler from src.base.logline_handler import LoglineHandler from src.base import utils from src.logcollector.batch_handler import BufferedBatchSender @@ -12,82 +15,112 @@ logger = get_logger("log_collection.collector") -config = utils.setup_config() -LOGSERVER_HOSTNAME = config["environment"]["logserver"]["hostname"] -LOGSERVER_SENDING_PORT = config["environment"]["logserver"]["port_out"] -IPV4_PREFIX_LENGTH = config["pipeline"]["log_collection"]["batch_handler"]["subnet_id"][ +CONFIG = utils.setup_config() +IPV4_PREFIX_LENGTH = CONFIG["pipeline"]["log_collection"]["batch_handler"]["subnet_id"][ "ipv4_prefix_length" ] -IPV6_PREFIX_LENGTH = config["pipeline"]["log_collection"]["batch_handler"]["subnet_id"][ +IPV6_PREFIX_LENGTH = CONFIG["pipeline"]["log_collection"]["batch_handler"]["subnet_id"][ "ipv6_prefix_length" ] -BATCH_SIZE = config["pipeline"]["log_collection"]["batch_handler"]["batch_size"] +BATCH_SIZE = CONFIG["pipeline"]["log_collection"]["batch_handler"]["batch_size"] +LISTEN_ON_TOPIC = "logserver_to_collector" # TODO: Change class LogCollector: """ - Connects to the :class:`LogServer`'s outgoing port to receive a logline. Validates all data fields by type and - value, invalid loglines are discarded. All valid loglines are sent to the :class:`CollectorKafkaBatchSender`. + Consumes incoming log lines from the :class:`LogServer`. Validates all data fields by type and + value, invalid loglines are discarded. All valid loglines are sent to the Batch Sender. """ - def __init__(self): - logger.debug("Initializing LogCollector...") - self.log_server = {} - self.logline = None - - self.log_server["host"] = utils.validate_host(LOGSERVER_HOSTNAME) - logger.debug(f"LogServer host was set to {self.log_server['host']}.") - - self.log_server["port"] = utils.validate_port(LOGSERVER_SENDING_PORT) - logger.debug(f"LogServer outgoing port was set to {self.log_server['port']}.") - - logger.debug( - f"Calling CollectorKafkaBatchSender(transactional_id='collector')..." - ) + def __init__(self) -> None: + self.lock = Lock() + self.loglines = queue.Queue() self.batch_handler = BufferedBatchSender() - logger.debug("Calling LoglineHandler()...") self.logline_handler = LoglineHandler() - logger.debug("Initialized LogCollector.") + self.kafka_consume_handler = SimpleKafkaConsumeHandler(topics=LISTEN_ON_TOPIC) + + async def start(self) -> None: + """ + Starts fetching messages from Kafka and sending them to the Prefilter. + """ + logger.info( + "LogCollector started:\n" + f" ⤷ receiving on Kafka topic '{LISTEN_ON_TOPIC}'\n" + f" ⤷ sending on Kafka topic 'TODO'" + ) + + try: + await asyncio.gather( + self.fetch(), + self.send(), + ) + except KeyboardInterrupt: + logger.info("LogCollector stopped.") - def fetch_logline(self) -> None: + async def fetch(self) -> None: + """ + Starts a loop to continuously listen on the configured Kafka topic. If a message is consumed, it is + decoded and stored. """ - Connects to the :class:`LogServer` and fetches a logline. If logline is available, it is decoded and stored. + loop = asyncio.get_running_loop() + + while True: + key, value, topic = await loop.run_in_executor( + None, self.kafka_consume_handler.consume + ) + + logger.debug(f"From Kafka: '{value}'") + await self.store(value) - Raises: - ConnectionError: Connection to :class:`LogServer` cannot be established. + async def send(self) -> None: + """ + Continuously sends the next logline in JSON format to the BatchSender, where it is stored in + a temporary batch before being sent to the Prefilter. Adds a subnet ID to the message, that it retrieves + from the client's IP address. """ - logger.debug("Fetching new logline from LogServer...") try: - with socket.socket( - socket.AF_INET, socket.SOCK_STREAM - ) as self.client_socket: - logger.debug( - f"Trying to connect to LogServer ({self.log_server['host']}:{self.log_server['port']})..." + while True: + if not self.loglines.empty(): + logline = self.loglines.get() + fields = ( + self.logline_handler.validate_logline_and_get_fields_as_json( + logline + ) + ) + subnet_id = self.get_subnet_id( + ipaddress.ip_address(fields.get("client_ip")) + ) + + self.batch_handler.add_message(subnet_id, json.dumps(fields)) + except KeyboardInterrupt: + while not self.loglines.empty(): + logline = self.loglines.get() + fields = self.logline_handler.validate_logline_and_get_fields_as_json( + logline ) - self.client_socket.connect( - (str(self.log_server.get("host")), self.log_server.get("port")) + subnet_id = self.get_subnet_id( + ipaddress.ip_address(fields.get("client_ip")) ) - logger.debug("Connected to LogServer. Retrieving data...") - data = self.client_socket.recv( - 1024 - ) # loglines are at most ~150 bytes long + self.batch_handler.add_message(subnet_id, json.dumps(fields)) - if not data: - logger.debug("No data available on LogServer.") - return + logger.info("Stopped LogCollector.") - self.logline = data.decode("utf-8") - logger.info(f"Received message:\n ⤷ {self.logline}") - except ConnectionError: - logger.error( - f"Could not connect to LogServer ({self.log_server['host']}:{self.log_server['port']})." - ) - raise + async def store(self, message: str): + """ + Stores the given message temporarily. + + Args: + message (str): Message to be stored + """ + async with self.lock: + self.loglines.put(message) @staticmethod def get_subnet_id(address: ipaddress.IPv4Address | ipaddress.IPv6Address) -> str: """ + Returns the subnet ID of an IP address. + Args: address (ipaddress.IPv4Address | ipaddress.IPv6Address): IP address to get the subnet ID for @@ -107,87 +140,13 @@ def get_subnet_id(address: ipaddress.IPv4Address | ipaddress.IPv6Address) -> str return f"{normalized_ip_address}_{prefix_length}" - def add_logline_to_batch(self) -> None: - """ - Sends the validated logline in JSON format to :class:`CollectorKafkaBatchSender`, where it is stored in - a temporary batch before being sent to topic ``Prefilter``. Adds a subnet_id to the message, that it retrieves - from the client's IP address. - """ - logger.debug("Adding logline to batch...") - if not self.logline: - raise ValueError("Failed to add logline to batch: No logline.") - - log_data = self.logline_handler.validate_logline_and_get_fields_as_json( - self.logline - ) - - logger.debug("Calling KafkaBatchSender to add message...") - subnet_id = self.get_subnet_id(ipaddress.ip_address(log_data.get("client_ip"))) - - self.batch_handler.add_message(subnet_id, json.dumps(log_data)) - - logger.info( - "Added message to the batch.\n" - f" ⤷ The subnet_id {subnet_id} batch currently stores " - f"{self.batch_handler.batch.get_number_of_messages(subnet_id)} of {BATCH_SIZE} messages." - ) - logger.debug(f"{log_data=}") - logger.debug(f"{json.dumps(log_data)=}") - def clear_logline(self) -> None: - """ - Clears all information regarding the stored logline. Afterward, instance can load the next logline. - """ - logger.debug("Clearing current logline...") - self.logline = None - logger.debug("Cleared logline.") - - -def main(one_iteration: bool = False) -> None: +def main() -> None: """ - Creates the :class:`LogCollector` instance. Starts a loop that continuously fetches a logline, validates and - extracts its information and adds it to the batch if valid. - - Args: - one_iteration (bool): For testing purposes: stops loop after one iteration - - Raises: - KeyboardInterrupt: Execution interrupted by user. Closes down the :class:`LogCollector` instance. + Creates the :class:`LogCollector` instance and starts it. """ - logger.info("Starting LogCollector...") - collector = LogCollector() - logger.info( - "LogCollector started.\n" - " ⤷ Fetching loglines from LogServer...\n" - " ⤷ Data will be sent when the respective batch is full or the global timer runs out." - ) - - iterations = 0 - - while True: - if one_iteration and iterations > 0: - break - iterations += 1 - - try: - logger.debug("Before fetching logline") - collector.fetch_logline() - logger.debug("After fetching logline") - - logger.debug("Before adding logline to batch") - collector.add_logline_to_batch() - logger.debug("After adding logline to batch") - except ValueError as err: - logger.debug("Incorrect logline: Waiting for next logline...") - logger.debug(f"{err=}") - except KeyboardInterrupt: - logger.info("Closing down LogCollector...") - collector.clear_logline() - logger.info("LogCollector closed down.") - break - finally: - logger.debug("Closing down LogCollector...") - collector.clear_logline() + collector_instance = LogCollector() + asyncio.run(collector_instance.start()) if __name__ == "__main__": # pragma: no cover diff --git a/src/logserver/server.py b/src/logserver/server.py index 9a37db6..e2757b6 100644 --- a/src/logserver/server.py +++ b/src/logserver/server.py @@ -64,6 +64,7 @@ async def send(self, message: str) -> None: message (str): Message to be sent """ async with self.lock: + logger.debug("Sending...") self.kafka_produce_handler.produce(topic=SEND_TO_TOPIC, data=message) async def fetch_from_kafka(self) -> None: diff --git a/tests/test_batch_handler.py b/tests/test_batch_handler.py index a4ea8b3..aa59f1a 100644 --- a/tests/test_batch_handler.py +++ b/tests/test_batch_handler.py @@ -310,7 +310,7 @@ def test_send_data_packet(self, mock_produce_handler): sut._send_data_packet(key, data) # Assert - mock_produce_handler_instance.send.assert_called_once_with( + mock_produce_handler_instance.produce.assert_called_once_with( topic="Prefilter", data='{"begin_timestamp": "test_begin", "end_timestamp": "test_end", "data": "test_data"}', key=key, diff --git a/tests/test_collector.py b/tests/test_collector.py index 163bf7a..c7eba2f 100644 --- a/tests/test_collector.py +++ b/tests/test_collector.py @@ -1,146 +1,225 @@ +import asyncio import ipaddress import unittest -from ipaddress import IPv4Address, IPv6Address -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, patch, AsyncMock -from src.logcollector.collector import LogCollector, main - -LOG_SERVER_IP_ADDR = "172.27.0.8" -LOG_SERVER_PORT = 9999 +from src.logcollector.collector import LogCollector class TestInit(unittest.TestCase): - @patch("src.logcollector.collector.LOGSERVER_HOSTNAME", "127.0.0.1") - @patch("src.logcollector.collector.LOGSERVER_SENDING_PORT", 9999) + @patch("src.logcollector.collector.LISTEN_ON_TOPIC", "test_topic") + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") - def test_valid_init_ipv4(self, mock_logline_handler, mock_batch_handler): + def test_valid_init( + self, mock_logline_handler, mock_batch_handler, mock_kafka_handler + ): mock_batch_handler_instance = MagicMock() - mock_batch_handler.return_value = mock_batch_handler_instance mock_logline_handler_instance = MagicMock() + mock_kafka_handler_instance = MagicMock() + mock_batch_handler.return_value = mock_batch_handler_instance mock_logline_handler.return_value = mock_logline_handler_instance - - host = "127.0.0.1" - port = 9999 + mock_kafka_handler.return_value = mock_kafka_handler_instance sut = LogCollector() - self.assertEqual(IPv4Address(host), sut.log_server.get("host")) - self.assertEqual(port, sut.log_server.get("port")) - self.assertIsNone(sut.logline) self.assertEqual(mock_batch_handler_instance, sut.batch_handler) self.assertEqual(mock_logline_handler_instance, sut.logline_handler) + self.assertEqual(mock_kafka_handler_instance, sut.kafka_consume_handler) mock_batch_handler.assert_called_once() mock_logline_handler.assert_called_once() + mock_kafka_handler.assert_called_once_with(topics="test_topic") + - @patch("src.logcollector.collector.LOGSERVER_HOSTNAME", "fe80::1") - @patch("src.logcollector.collector.LOGSERVER_SENDING_PORT", 8989) +class TestStart(unittest.IsolatedAsyncioTestCase): + @patch("src.logcollector.collector.logger") + @patch("src.logcollector.collector.LogCollector.send") + @patch("src.logcollector.collector.LogCollector.fetch") + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") - def test_valid_init_ipv6(self, mock_logline_handler, mock_batch_handler): - mock_batch_handler_instance = MagicMock() - mock_batch_handler.return_value = mock_batch_handler_instance - mock_logline_handler_instance = MagicMock() - mock_logline_handler.return_value = mock_logline_handler_instance + async def test_start( + self, + mock_logline_handler, + mock_batch_handler, + mock_kafka_consume_handler, + mock_fetch, + mock_send, + mock_logger, + ): + # Arrange + sut = LogCollector() - host = "fe80::1" - port = 8989 + # Act + await sut.start() - sut = LogCollector() + # Assert + mock_send.assert_called_once() + mock_fetch.assert_called_once() - self.assertEqual(IPv6Address(host), sut.log_server.get("host")) - self.assertEqual(port, sut.log_server.get("port")) - self.assertIsNone(sut.logline) - self.assertEqual(mock_batch_handler_instance, sut.batch_handler) - self.assertEqual(mock_logline_handler_instance, sut.logline_handler) - mock_batch_handler.assert_called_once() - mock_logline_handler.assert_called_once() +class TestFetch(unittest.IsolatedAsyncioTestCase): + @patch("src.logcollector.collector.LoglineHandler") + @patch("src.logcollector.collector.BufferedBatchSender") + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + async def asyncSetUp( + self, mock_kafka_handler, mock_batch_sender, mock_logline_handler + ): + self.sut = LogCollector() + self.sut.kafka_consume_handler = AsyncMock() - @patch("src.logcollector.collector.LOGSERVER_HOSTNAME", "256.256.256.256") - @patch("src.logcollector.collector.LOGSERVER_SENDING_PORT", 9999) - def test_invalid_init_with_invalid_host(self): - with self.assertRaises(ValueError): - LogCollector() + @patch("src.logcollector.collector.LogCollector.store") + @patch("src.logcollector.collector.logger") + @patch("asyncio.get_running_loop") + async def test_handle_kafka_inputs( + self, mock_get_running_loop, mock_logger, mock_store + ): + mock_store_instance = AsyncMock() + mock_store.return_value = mock_store_instance + mock_loop = AsyncMock() + mock_get_running_loop.return_value = mock_loop + self.sut.kafka_consume_handler.consume.return_value = ( + "key1", + "value1", + "topic1", + ) - @patch("src.logcollector.collector.LOGSERVER_HOSTNAME", "127.0.0.1") - @patch("src.logcollector.collector.LOGSERVER_SENDING_PORT", 70000) - def test_invalid_init_with_invalid_port(self): - with self.assertRaises(ValueError): - LogCollector() + mock_loop.run_in_executor.side_effect = [ + ("key1", "value1", "topic1"), + asyncio.CancelledError(), + ] + with self.assertRaises(asyncio.CancelledError): + await self.sut.fetch() -class TestFetchLogline(unittest.TestCase): + mock_store.assert_called_once_with("value1") + + +class TestSend(unittest.IsolatedAsyncioTestCase): @patch("src.logcollector.collector.logger") + @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 22) + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") - @patch("socket.socket") - def test_fetch_logline_successful( - self, mock_socket, mock_batch_handler, mock_logger + @patch("src.logcollector.collector.LoglineHandler") + async def test_send_with_one_logline( + self, mock_logline_handler, mock_batch_handler, mock_kafka_handler, mock_logger ): - mock_socket_instance = mock_socket.return_value.__enter__.return_value - mock_socket_instance.connect.return_value = None - mock_socket_instance.recv.side_effect = ["fake messages".encode("utf-8"), b""] + # Arrange mock_batch_handler_instance = MagicMock() + mock_logline_handler_instance = MagicMock() mock_batch_handler.return_value = mock_batch_handler_instance + mock_batch_handler_instance.add_message.side_effect = [ + KeyboardInterrupt, + ] + mock_logline_handler.return_value = mock_logline_handler_instance + + mock_logline_handler_instance.validate_logline_and_get_fields_as_json.return_value = { + "timestamp": "2024-05-21T08:31:28.119Z", + "status": "NOERROR", + "client_ip": "192.168.0.105", + "dns_ip": "8.8.8.8", + "host_domain_name": "www.heidelberg-botanik.de", + "record_type": "A", + "response_ip": "b937:2f2e:2c1c:82a:33ad:9e59:ceb9:8e1", + "size": "150b", + } + expected_message = ( + '{"timestamp": "2024-05-21T08:31:28.119Z", "status": "NOERROR", "client_ip": ' + '"192.168.0.105", "dns_ip": "8.8.8.8", "host_domain_name": "www.heidelberg-botanik.de", ' + '"record_type": "A", "response_ip": "b937:2f2e:2c1c:82a:33ad:9e59:ceb9:8e1", ' + '"size": "150b"}' + ) + input_logline = ( + "2024-05-21T08:31:28.119Z NOERROR 192.168.0.105 8.8.8.8 www.heidelberg-botanik.de A " + "b937:2f2e:2c1c:82a:33ad:9e59:ceb9:8e1 150b" + ) sut = LogCollector() - sut.fetch_logline() + await sut.store(input_logline) - mock_socket_instance.connect.assert_called_with( - (LOG_SERVER_IP_ADDR, LOG_SERVER_PORT) + # Act + await sut.send() + + mock_batch_handler_instance.add_message.assert_called_once_with( + "192.168.0.0_22", expected_message ) - mock_socket_instance.recv.assert_called_with(1024) - self.assertEqual("fake messages", sut.logline) @patch("src.logcollector.collector.logger") + @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 22) + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") - @patch("socket.socket") - def test_fetch_logline_no_data_on_server( - self, mock_socket, mock_batch_handler, mock_logger + @patch("src.logcollector.collector.LoglineHandler") + async def test_send_keyboard_interrupt( + self, mock_logline_handler, mock_batch_handler, mock_kafka_handler, mock_logger ): - mock_socket_instance = mock_socket.return_value.__enter__.return_value - mock_socket_instance.connect.return_value = None - mock_socket_instance.recv.side_effect = ["".encode("utf-8"), b""] + # Arrange mock_batch_handler_instance = MagicMock() + mock_logline_handler_instance = MagicMock() mock_batch_handler.return_value = mock_batch_handler_instance + mock_batch_handler_instance.add_message.side_effect = [ + None, + KeyboardInterrupt, + None, + None, # KeyboardInterrupt between the messages + ] + mock_logline_handler.return_value = mock_logline_handler_instance + + mock_logline_handler_instance.validate_logline_and_get_fields_as_json.return_value = { + "timestamp": "2024-05-21T08:31:28.119Z", + "status": "NOERROR", + "client_ip": "192.168.0.105", + "dns_ip": "8.8.8.8", + "host_domain_name": "www.heidelberg-botanik.de", + "record_type": "A", + "response_ip": "b937:2f2e:2c1c:82a:33ad:9e59:ceb9:8e1", + "size": "150b", + } + input_logline = ( + "2024-05-21T08:31:28.119Z NOERROR 192.168.0.105 8.8.8.8 www.heidelberg-botanik.de A " + "b937:2f2e:2c1c:82a:33ad:9e59:ceb9:8e1 150b" + ) sut = LogCollector() - sut.fetch_logline() + await sut.store(input_logline) + await sut.store(input_logline) + await sut.store(input_logline) + await sut.store(input_logline) - mock_socket_instance.connect.assert_called_with( - (LOG_SERVER_IP_ADDR, LOG_SERVER_PORT) - ) - mock_socket_instance.recv.assert_called_with(1024) - self.assertIsNone(sut.logline) + # Act + await sut.send() - @patch("src.logcollector.collector.logger") + # Assert + self.assertEqual(4, mock_batch_handler_instance.add_message.call_count) + + +class TestStore(unittest.IsolatedAsyncioTestCase): + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") - @patch("socket.socket") - def test_fetch_logline_connection_error( - self, mock_socket, mock_batch_handler, mock_logger + @patch("src.logcollector.collector.LoglineHandler") + async def test_store( + self, mock_logline_handler, mock_batch_handler, mock_kafka_consume_handler ): - mock_socket_instance = mock_socket.return_value.__enter__.return_value - mock_socket_instance.connect.side_effect = ConnectionError("Unable to connect") - mock_batch_handler_instance = MagicMock() - mock_batch_handler.return_value = mock_batch_handler_instance - + # Arrange sut = LogCollector() + self.assertTrue(sut.loglines.empty()) - with self.assertRaises(ConnectionError): - sut.fetch_logline() + # Act + await sut.store("test_message") - mock_socket_instance.connect.assert_called_with( - (LOG_SERVER_IP_ADDR, LOG_SERVER_PORT) - ) - self.assertIsNone(sut.logline) + # Assert + self.assertEqual("test_message", sut.loglines.get()) + self.assertTrue(sut.loglines.empty()) class TestGetSubnetId(unittest.TestCase): @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 24) + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") - def test_get_subnet_id_ipv4(self, mock_logline_handler, mock_batch_handler): + def test_get_subnet_id_ipv4( + self, mock_logline_handler, mock_batch_handler, mock_kafka_handler + ): # Arrange test_address = ipaddress.IPv4Address("192.168.1.1") expected_result = f"192.168.1.0_24" @@ -153,9 +232,12 @@ def test_get_subnet_id_ipv4(self, mock_logline_handler, mock_batch_handler): self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 24) + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") - def test_get_subnet_id_ipv4_zero(self, mock_logline_handler, mock_batch_handler): + def test_get_subnet_id_ipv4_zero( + self, mock_logline_handler, mock_batch_handler, mock_kafka_handler + ): # Arrange test_address = ipaddress.IPv4Address("0.0.0.0") expected_result = f"0.0.0.0_24" @@ -168,9 +250,12 @@ def test_get_subnet_id_ipv4_zero(self, mock_logline_handler, mock_batch_handler) self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 23) + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") - def test_get_subnet_id_ipv4_max(self, mock_logline_handler, mock_batch_handler): + def test_get_subnet_id_ipv4_max( + self, mock_logline_handler, mock_batch_handler, mock_kafka_handler + ): # Arrange test_address = ipaddress.IPv4Address("255.255.255.255") expected_result = f"255.255.254.0_23" @@ -183,9 +268,12 @@ def test_get_subnet_id_ipv4_max(self, mock_logline_handler, mock_batch_handler): self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 64) + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") - def test_get_subnet_id_ipv6(self, mock_logline_handler, mock_batch_handler): + def test_get_subnet_id_ipv6( + self, mock_logline_handler, mock_batch_handler, mock_kafka_handler + ): # Arrange test_address = ipaddress.IPv6Address("2001:db8:85a3:1234:5678:8a2e:0370:7334") expected_result = f"2001:db8:85a3:1234::_64" @@ -198,9 +286,12 @@ def test_get_subnet_id_ipv6(self, mock_logline_handler, mock_batch_handler): self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 64) + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") - def test_get_subnet_id_ipv6_zero(self, mock_logline_handler, mock_batch_handler): + def test_get_subnet_id_ipv6_zero( + self, mock_logline_handler, mock_batch_handler, mock_kafka_handler + ): # Arrange test_address = ipaddress.IPv6Address("::") expected_result = f"::_64" @@ -213,9 +304,12 @@ def test_get_subnet_id_ipv6_zero(self, mock_logline_handler, mock_batch_handler) self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 48) + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") - def test_get_subnet_id_ipv6_max(self, mock_logline_handler, mock_batch_handler): + def test_get_subnet_id_ipv6_max( + self, mock_logline_handler, mock_batch_handler, mock_kafka_handler + ): # Arrange test_address = ipaddress.IPv6Address("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff") expected_result = f"ffff:ffff:ffff::_48" @@ -229,10 +323,11 @@ def test_get_subnet_id_ipv6_max(self, mock_logline_handler, mock_batch_handler): @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 24) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 48) + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_unsupported_type( - self, mock_logline_handler, mock_batch_handler + self, mock_logline_handler, mock_batch_handler, mock_kafka_handler ): # Arrange test_address = "192.168.1.1" # String instead of IPv4Address or IPv6Address @@ -245,9 +340,12 @@ def test_get_subnet_id_unsupported_type( @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 24) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 48) + @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") - def test_get_subnet_id_none(self, mock_logline_handler, mock_batch_handler): + def test_get_subnet_id_none( + self, mock_logline_handler, mock_batch_handler, mock_kafka_handler + ): # Arrange test_address = None sut = LogCollector() @@ -258,133 +356,5 @@ def test_get_subnet_id_none(self, mock_logline_handler, mock_batch_handler): sut.get_subnet_id(test_address) -class TestAddLoglineToBatch(unittest.TestCase): - @patch("src.logcollector.collector.logger") - @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 22) - @patch("src.base.utils.normalize_ipv4_address") - @patch("src.logcollector.collector.BufferedBatchSender") - @patch("src.logcollector.collector.LoglineHandler") - def test_add_to_batch_with_data( - self, mock_logline_handler, mock_batch_handler, mock_normalize, mock_logger - ): - mock_batch_handler_instance = MagicMock() - mock_logline_handler_instance = MagicMock() - mock_batch_handler.return_value = mock_batch_handler_instance - mock_logline_handler.return_value = mock_logline_handler_instance - mock_logline_handler_instance.validate_logline_and_get_fields_as_json.return_value = { - "timestamp": "2024-05-21T08:31:28.119Z", - "status": "NOERROR", - "client_ip": "192.168.0.105", - "dns_ip": "8.8.8.8", - "host_domain_name": "www.heidelberg-botanik.de", - "record_type": "A", - "response_ip": "b937:2f2e:2c1c:82a:33ad:9e59:ceb9:8e1", - "size": "150b", - } - mock_normalize.return_value = ("192.168.0.0", 22) - - expected_message = ( - '{"timestamp": "2024-05-21T08:31:28.119Z", "status": "NOERROR", "client_ip": ' - '"192.168.0.105", "dns_ip": "8.8.8.8", "host_domain_name": "www.heidelberg-botanik.de", ' - '"record_type": "A", "response_ip": "b937:2f2e:2c1c:82a:33ad:9e59:ceb9:8e1", ' - '"size": "150b"}' - ) - - sut = LogCollector() - sut.logline = ( - "2024-05-21T08:31:28.119Z NOERROR 192.168.0.105 8.8.8.8 www.heidelberg-botanik.de A " - "b937:2f2e:2c1c:82a:33ad:9e59:ceb9:8e1 150b" - ) - sut.add_logline_to_batch() - - mock_batch_handler_instance.add_message.assert_called_once_with( - "192.168.0.0_22", expected_message - ) - - @patch("src.logcollector.collector.BufferedBatchSender") - @patch("src.logcollector.collector.LoglineHandler") - def test_add_to_batch_without_data(self, mock_logline_handler, mock_batch_handler): - mock_batch_handler_instance = MagicMock() - mock_batch_handler.return_value = mock_batch_handler_instance - - sut = LogCollector() - sut.logline = None - - with self.assertRaises(ValueError): - sut.add_logline_to_batch() - - mock_batch_handler.add_message.assert_not_called() - - -class TestClearLogline(unittest.TestCase): - @patch("src.logcollector.collector.BufferedBatchSender") - def test_clear_logline(self, mock_batch_handler): - mock_batch_handler_instance = MagicMock() - mock_batch_handler.return_value = mock_batch_handler_instance - - sut = LogCollector() - sut.logline = ( - "2024-05-21T08:31:28.119Z NOERROR 192.168.0.105 8.8.8.8 " - "www.heidelberg-botanik.de A b937:2f2e:2c1c:82a:33ad:9e59:ceb9:8e1 150b" - ) - sut.clear_logline() - - self.assertIsNone(sut.logline) - self.assertEqual(IPv4Address(LOG_SERVER_IP_ADDR), sut.log_server["host"]) - self.assertEqual(LOG_SERVER_PORT, sut.log_server["port"]) - - -class TestMainFunction(unittest.TestCase): - @patch("src.logcollector.collector.logger.info", MagicMock) - @patch("src.logcollector.collector.LogCollector") - def test_main_loop_execution(self, mock_log_collector): - # Arrange - mock_collector_instance = mock_log_collector.return_value - - mock_collector_instance.fetch_logline = MagicMock() - mock_collector_instance.add_logline_to_batch = MagicMock() - mock_collector_instance.clear_logline = MagicMock() - - # Act - main(one_iteration=True) - - # Assert - self.assertTrue(mock_collector_instance.fetch_logline.called) - self.assertTrue(mock_collector_instance.add_logline_to_batch.called) - self.assertTrue(mock_collector_instance.clear_logline.called) - - @patch("src.logcollector.collector.logger") - @patch("src.logcollector.collector.LogCollector") - def test_main_value_error_handling(self, mock_log_collector, mock_logger): - # Arrange - mock_collector_instance = mock_log_collector.return_value - - # Act - with patch.object( - mock_collector_instance, - "fetch_logline", - side_effect=ValueError("Simulated ValueError"), - ): - main(one_iteration=True) - - # Assert - self.assertTrue(mock_collector_instance.clear_logline.called) - self.assertTrue(mock_collector_instance.loop_exited) - - @patch("src.logcollector.collector.logger") - @patch("src.logcollector.collector.LogCollector") - def test_main_keyboard_interrupt(self, mock_log_collector, mock_logger): - # Arrange - mock_collector_instance = mock_log_collector.return_value - mock_collector_instance.fetch_logline.side_effect = KeyboardInterrupt - - # Act - main() - - # Assert - self.assertTrue(mock_collector_instance.clear_logline.called) - self.assertTrue(mock_collector_instance.loop_exited) - - if __name__ == "__main__": unittest.main() diff --git a/tests/test_server.py b/tests/test_server.py index 212db38..22a42c4 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -10,8 +10,6 @@ from src.logserver.server import LogServer, main LOG_SERVER_IP_ADDR = "192.168.0.1" -LOG_SERVER_PORT_IN = 9998 -LOG_SERVER_PORT_OUT = 9999 class TestInit(unittest.TestCase): @@ -84,13 +82,13 @@ def test_invalid_init_with_invalid_host( mock_kafka_produce_handler.assert_not_called() -class TestOpen(unittest.IsolatedAsyncioTestCase): +class TestStart(unittest.IsolatedAsyncioTestCase): @patch("src.logserver.server.HOSTNAME", "127.0.0.1") @patch("src.logserver.server.logger") @patch("src.logserver.server.LogServer.fetch_from_kafka") @patch("src.logserver.server.LogServer.fetch_from_file") @patch("src.logserver.server.SimpleKafkaConsumeHandler") - async def test_open( + async def test_start( self, mock_kafka_consume_handler, mock_fetch_from_file, @@ -114,10 +112,14 @@ async def asyncSetUp(self): self.sut.kafka_consume_handler = AsyncMock() self.sut.kafka_produce_handler = AsyncMock() - @patch("src.logserver.server.SEND_TO_TOPIC", "test_topic") + @patch("src.logserver.server.LogServer.send") @patch("src.logserver.server.logger") @patch("asyncio.get_running_loop") - async def test_handle_kafka_inputs(self, mock_get_running_loop, mock_logger): + async def test_handle_kafka_inputs( + self, mock_get_running_loop, mock_logger, mock_send + ): + mock_send_instance = AsyncMock() + mock_send.return_value = mock_send_instance mock_loop = AsyncMock() mock_get_running_loop.return_value = mock_loop self.sut.kafka_consume_handler.consume.return_value = ( @@ -134,9 +136,7 @@ async def test_handle_kafka_inputs(self, mock_get_running_loop, mock_logger): with self.assertRaises(asyncio.CancelledError): await self.sut.fetch_from_kafka() - self.sut.kafka_produce_handler.produce.assert_called_once_with( - topic="test_topic", data="value1" - ) + mock_send.assert_called_once_with("value1") class TestFetchFromFile(unittest.IsolatedAsyncioTestCase): From 353868538431fd3626b2ba024ec5f8f154d57ce0 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 13 Nov 2024 15:17:20 +0100 Subject: [PATCH 11/29] Fix Kafka handlers, LogServer and Collector --- src/base/kafka_handler.py | 49 ++++++++++++++++++++++++++--------- src/logcollector/collector.py | 23 ++++++++-------- src/logserver/server.py | 21 +++++++-------- 3 files changed, 58 insertions(+), 35 deletions(-) diff --git a/src/base/kafka_handler.py b/src/base/kafka_handler.py index a9f291c..705dfed 100644 --- a/src/base/kafka_handler.py +++ b/src/base/kafka_handler.py @@ -58,9 +58,6 @@ def __init__(self) -> None: Initializes the broker configuration. """ self.consumer = None - self.brokers = ",".join( - [f"{broker['hostname']}:{broker['port']}" for broker in KAFKA_BROKERS] - ) class KafkaProduceHandler(KafkaHandler): @@ -68,14 +65,8 @@ class KafkaProduceHandler(KafkaHandler): Base class for Kafka Producer wrappers. """ - def __init__(self, transactional_id: str): + def __init__(self, conf): super().__init__() - - conf = { - "bootstrap.servers": self.brokers, - "transactional.id": transactional_id, - } - self.producer = Producer(conf) @abstractmethod @@ -94,6 +85,19 @@ class SimpleKafkaProduceHandler(KafkaProduceHandler): Simple wrapper for the Kafka Producer without Write-Exactly-Once semantics. """ + def __init__(self): + self.brokers = ",".join( + [f"{broker['hostname']}:{broker['port']}" for broker in KAFKA_BROKERS] + ) + + conf = { + "bootstrap.servers": self.brokers, + "enable.idempotence": False, + "acks": "1", + } + + super().__init__(conf) + def produce(self, topic: str, data: str, key: None | str = None) -> None: """ Encodes the given data for transport and sends it on the specified topic. @@ -113,6 +117,8 @@ def produce(self, topic: str, data: str, key: None | str = None) -> None: callback=kafka_delivery_report, ) + self.producer.flush() + class ExactlyOnceKafkaProduceHandler(KafkaProduceHandler): """ @@ -120,7 +126,17 @@ class ExactlyOnceKafkaProduceHandler(KafkaProduceHandler): """ def __init__(self, transactional_id: str): - super().__init__(transactional_id) + self.brokers = ",".join( + [f"{broker['hostname']}:{broker['port']}" for broker in KAFKA_BROKERS] + ) + + conf = { + "bootstrap.servers": self.brokers, + "transactional.id": transactional_id, + "enable.idempotence": True, + } + + super().__init__(conf) self.producer.init_transactions() def produce(self, topic: str, data: str, key: None | str = None) -> None: @@ -139,6 +155,8 @@ def produce(self, topic: str, data: str, key: None | str = None) -> None: if not data: return + self.producer.flush() + self.producer.begin_transaction() try: self.producer.produce( @@ -197,6 +215,10 @@ class KafkaConsumeHandler(KafkaHandler): def __init__(self, topics: str | list[str]) -> None: super().__init__() + self.brokers = ",".join( + [f"{broker['hostname']}:{broker['port']}" for broker in KAFKA_BROKERS] + ) + conf = { "bootstrap.servers": self.brokers, "group.id": CONSUMER_GROUP_ID, @@ -258,6 +280,9 @@ class SimpleKafkaConsumeHandler(KafkaConsumeHandler): Simple wrapper for the Kafka Consumer without Write-Exactly-Once semantics. """ + def __init__(self, topics): + super().__init__(topics) + def consume(self) -> tuple[str | None, str | None, str | None]: """ Consumes available messages on the specified topic. Decodes the data and returns a tuple @@ -357,7 +382,7 @@ def consume(self) -> tuple[str | None, str | None, str | None]: except KeyboardInterrupt: logger.info("Shutting down KafkaConsumeHandler...") raise KeyboardInterrupt - except Exception as e: + except Exception: raise @staticmethod diff --git a/src/logcollector/collector.py b/src/logcollector/collector.py index a8d65a5..3826940 100644 --- a/src/logcollector/collector.py +++ b/src/logcollector/collector.py @@ -2,12 +2,11 @@ import ipaddress import json import os -import queue import sys -from asyncio import Lock +from multiprocessing import Lock sys.path.append(os.getcwd()) -from src.base.kafka_handler import SimpleKafkaConsumeHandler +from src.base.kafka_handler import ExactlyOnceKafkaConsumeHandler from src.base.logline_handler import LoglineHandler from src.base import utils from src.logcollector.batch_handler import BufferedBatchSender @@ -34,14 +33,14 @@ class LogCollector: def __init__(self) -> None: self.lock = Lock() - self.loglines = queue.Queue() + self.loglines = asyncio.Queue() self.batch_handler = BufferedBatchSender() self.logline_handler = LoglineHandler() - self.kafka_consume_handler = SimpleKafkaConsumeHandler(topics=LISTEN_ON_TOPIC) + self.kafka_consume_handler = ExactlyOnceKafkaConsumeHandler(LISTEN_ON_TOPIC) async def start(self) -> None: """ - Starts fetching messages from Kafka and sending them to the Prefilter. + Starts fetching messages from Kafka and sending them to the :class:`Prefilter`. """ logger.info( "LogCollector started:\n" @@ -68,8 +67,8 @@ async def fetch(self) -> None: key, value, topic = await loop.run_in_executor( None, self.kafka_consume_handler.consume ) - logger.debug(f"From Kafka: '{value}'") + await self.store(value) async def send(self) -> None: @@ -81,7 +80,7 @@ async def send(self) -> None: try: while True: if not self.loglines.empty(): - logline = self.loglines.get() + logline = await self.loglines.get() fields = ( self.logline_handler.validate_logline_and_get_fields_as_json( logline @@ -92,9 +91,12 @@ async def send(self) -> None: ) self.batch_handler.add_message(subnet_id, json.dumps(fields)) + logger.debug(f"Sent: '{logline}'") + else: + await asyncio.sleep(0.1) except KeyboardInterrupt: while not self.loglines.empty(): - logline = self.loglines.get() + logline = await self.loglines.get() fields = self.logline_handler.validate_logline_and_get_fields_as_json( logline ) @@ -113,8 +115,7 @@ async def store(self, message: str): Args: message (str): Message to be stored """ - async with self.lock: - self.loglines.put(message) + await self.loglines.put(message) @staticmethod def get_subnet_id(address: ipaddress.IPv4Address | ipaddress.IPv6Address) -> str: diff --git a/src/logserver/server.py b/src/logserver/server.py index e2757b6..8a72db8 100644 --- a/src/logserver/server.py +++ b/src/logserver/server.py @@ -1,12 +1,14 @@ import asyncio import os import sys -from asyncio import Lock import aiofiles sys.path.append(os.getcwd()) -from src.base.kafka_handler import SimpleKafkaConsumeHandler, SimpleKafkaProduceHandler +from src.base.kafka_handler import ( + SimpleKafkaConsumeHandler, + ExactlyOnceKafkaProduceHandler, +) from src.base.utils import setup_config from src.base import utils from src.base.log_config import get_logger @@ -30,12 +32,8 @@ def __init__(self) -> None: self.host = None self.host = utils.validate_host(HOSTNAME) - self.lock = Lock() - - self.kafka_consume_handler = SimpleKafkaConsumeHandler(topics=LISTEN_ON_TOPIC) - self.kafka_produce_handler = SimpleKafkaProduceHandler( - transactional_id="TODO: Change" - ) + self.kafka_consume_handler = SimpleKafkaConsumeHandler(LISTEN_ON_TOPIC) + self.kafka_produce_handler = ExactlyOnceKafkaProduceHandler("LogServer") async def start(self) -> None: """ @@ -63,9 +61,8 @@ async def send(self, message: str) -> None: Args: message (str): Message to be sent """ - async with self.lock: - logger.debug("Sending...") - self.kafka_produce_handler.produce(topic=SEND_TO_TOPIC, data=message) + self.kafka_produce_handler.produce(topic=SEND_TO_TOPIC, data=message) + logger.debug(f"Sent: '{message}'") async def fetch_from_kafka(self) -> None: """ @@ -77,8 +74,8 @@ async def fetch_from_kafka(self) -> None: key, value, topic = await loop.run_in_executor( None, self.kafka_consume_handler.consume ) - logger.debug(f"From Kafka: '{value}'") + await self.send(value) async def fetch_from_file(self, file: str = READ_FROM_FILE) -> None: From 729fc55d348801b99d2394f54423663372c452c1 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 13 Nov 2024 18:29:28 +0100 Subject: [PATCH 12/29] Add topics to config.yaml --- config.yaml | 12 +++++----- src/base/utils.py | 29 ++++++++++++++++++++++++ src/detector/detector.py | 11 +++++---- src/inspector/inspector.py | 30 ++++++++++++++++--------- src/logcollector/collector.py | 17 +++++++------- src/logserver/server.py | 38 ++++++++++++++++++------------- src/prefilter/prefilter.py | 42 ++++++++++++++++++++++------------- 7 files changed, 121 insertions(+), 58 deletions(-) diff --git a/config.yaml b/config.yaml index bb92b76..8d596ba 100644 --- a/config.yaml +++ b/config.yaml @@ -18,7 +18,6 @@ logging: pipeline: log_storage: logserver: - input_kafka_topic: "LogServer" input_file: "/opt/file.txt" max_number_of_connections: 1000 @@ -79,10 +78,13 @@ environment: port: 8098 - hostname: 172.27.0.5 port: 8099 - logserver: - hostname: 172.27.0.8 - port_in: 9998 - port_out: 9999 + kafka_topics: + pipeline: + logserver_in: "pipeline.logserver_in" + logserver_to_collector: "pipeline.logserver_to_collector" + batch_sender_to_prefilter: "pipeline.batch_sender_to_prefilter" + prefilter_to_inspector: "pipeline.prefilter_to_inspector" + inspector_to_detector: "pipeline.inspector_to_detector" monitoring: clickhouse_server: hostname: 172.27.0.11 diff --git a/src/base/utils.py b/src/base/utils.py index c4bd938..1d70547 100644 --- a/src/base/utils.py +++ b/src/base/utils.py @@ -4,6 +4,7 @@ import yaml from confluent_kafka import KafkaError, Message +from confluent_kafka.admin import AdminClient sys.path.append(os.getcwd()) from src.base.log_config import get_logger @@ -141,3 +142,31 @@ def normalize_ipv6_address( net = ipaddress.IPv6Network((address, prefix_length), strict=False) return net.network_address, prefix_length + + +def generate_unique_transactional_id(base_name: str, bootstrap_servers: str) -> str: + """ + Checks if the given name is already a transactional ID. If so, it a number is added to make it unique. + + Args: + base_name (str): Name of the transactional ID to be checked + bootstrap_servers (str): Kafka brokers as string in the form `'host1:port1,host2:port2'` + + Returns: + Unique transactional ID using the base_name + """ + admin_client = AdminClient({"bootstrap.servers": bootstrap_servers}) + existing_ids = set() + + consumer_groups = admin_client.list_groups(timeout=10) + + for group in consumer_groups: + existing_ids.add(group.id) + + transactional_id = base_name + counter = 1 + while transactional_id in existing_ids: + transactional_id = f"{base_name}-{counter}" + counter += 1 + + return transactional_id diff --git a/src/detector/detector.py b/src/detector/detector.py index 1334448..cceb9cc 100644 --- a/src/detector/detector.py +++ b/src/detector/detector.py @@ -5,8 +5,8 @@ import sys import tempfile -import numpy as np import math +import numpy as np import requests from numpy import median @@ -18,7 +18,8 @@ ) from src.base.log_config import get_logger -logger = get_logger("data_analysis.detector") +module_name = "data_analysis.detector" +logger = get_logger(module_name) BUF_SIZE = 65536 # let's read stuff in 64kb chunks! @@ -27,6 +28,9 @@ CHECKSUM = config["pipeline"]["data_analysis"]["detector"]["checksum"] MODEL_BASE_URL = config["pipeline"]["data_analysis"]["detector"]["base_url"] THRESHOLD = config["pipeline"]["data_analysis"]["detector"]["threshold"] +CONSUME_TOPIC = config["environment"]["kafka_topics"]["pipeline"][ + "inspector_to_detector" +] class WrongChecksum(Exception): # pragma: no cover @@ -52,8 +56,7 @@ def __init__(self) -> None: ) logger.debug(f"Initializing Detector...") - logger.debug(f"Calling KafkaConsumeHandler(topic='Detector')...") - self.kafka_consume_handler = ExactlyOnceKafkaConsumeHandler(topics="Detector") + self.kafka_consume_handler = ExactlyOnceKafkaConsumeHandler(CONSUME_TOPIC) self.model = self._get_model() diff --git a/src/inspector/inspector.py b/src/inspector/inspector.py index 338caf4..26f2ff6 100644 --- a/src/inspector/inspector.py +++ b/src/inspector/inspector.py @@ -15,9 +15,11 @@ ExactlyOnceKafkaProduceHandler, KafkaMessageFetchException, ) +from src.base.utils import generate_unique_transactional_id from src.base.log_config import get_logger -logger = get_logger("data_inspection.inspector") +module_name = "data_inspection.inspector" +logger = get_logger(module_name) config = setup_config() MODE = config["pipeline"]["data_inspection"]["inspector"]["mode"] @@ -30,7 +32,18 @@ TIME_TYPE = config["pipeline"]["data_inspection"]["inspector"]["time_type"] TIME_RANGE = config["pipeline"]["data_inspection"]["inspector"]["time_range"] TIMESTAMP_FORMAT = config["environment"]["timestamp_format"] - +CONSUME_TOPIC = config["environment"]["kafka_topics"]["pipeline"][ + "prefilter_to_inspector" +] +PRODUCE_TOPIC = config["environment"]["kafka_topics"]["pipeline"][ + "inspector_to_detector" +] +KAFKA_BROKERS = ",".join( + [ + f"{broker['hostname']}:{broker['port']}" + for broker in config["environment"]["kafka_brokers"] + ] +) VALID_UNIVARIATE_MODELS = [ "KNNDetector", @@ -41,7 +54,7 @@ "MadDetector", "SArimaDetector", ] -VALID_MULTIVARIATE_MODLS = [ +VALID_MULTIVARIATE_MODELS = [ "xStreamDetector", "RShashDetector", "HSTreeDetector", @@ -70,12 +83,9 @@ def __init__(self) -> None: self.anomalies = [] logger.debug(f"Initializing Inspector...") - logger.debug(f"Calling KafkaConsumeHandler(topic='Inspect')...") - self.kafka_consume_handler = ExactlyOnceKafkaConsumeHandler(topics="Inspect") - logger.debug(f"Calling KafkaProduceHandler(transactional_id='Inspect')...") - self.kafka_produce_handler = ExactlyOnceKafkaProduceHandler( - transactional_id="inspect" - ) + self.kafka_consume_handler = ExactlyOnceKafkaConsumeHandler(CONSUME_TOPIC) + transactional_id = generate_unique_transactional_id(module_name, KAFKA_BROKERS) + self.kafka_produce_handler = ExactlyOnceKafkaProduceHandler(transactional_id) logger.debug(f"Initialized Inspector.") def get_and_fill_data(self) -> None: @@ -291,7 +301,7 @@ def inspect(self): def _inspect_multivariate(self, model: str): logger.debug(f"Load Model: {model['model']} from {model['module']}.") - if not model["model"] in VALID_MULTIVARIATE_MODLS: + if not model["model"] in VALID_MULTIVARIATE_MODELS: logger.error(f"Model {model} is not a valid univariate model.") raise NotImplementedError(f"Model {model} is not a valid univariate model.") diff --git a/src/logcollector/collector.py b/src/logcollector/collector.py index 3826940..92a6418 100644 --- a/src/logcollector/collector.py +++ b/src/logcollector/collector.py @@ -14,15 +14,17 @@ logger = get_logger("log_collection.collector") -CONFIG = utils.setup_config() -IPV4_PREFIX_LENGTH = CONFIG["pipeline"]["log_collection"]["batch_handler"]["subnet_id"][ +config = utils.setup_config() +IPV4_PREFIX_LENGTH = config["pipeline"]["log_collection"]["batch_handler"]["subnet_id"][ "ipv4_prefix_length" ] -IPV6_PREFIX_LENGTH = CONFIG["pipeline"]["log_collection"]["batch_handler"]["subnet_id"][ +IPV6_PREFIX_LENGTH = config["pipeline"]["log_collection"]["batch_handler"]["subnet_id"][ "ipv6_prefix_length" ] -BATCH_SIZE = CONFIG["pipeline"]["log_collection"]["batch_handler"]["batch_size"] -LISTEN_ON_TOPIC = "logserver_to_collector" # TODO: Change +BATCH_SIZE = config["pipeline"]["log_collection"]["batch_handler"]["batch_size"] +CONSUME_TOPIC = config["environment"]["kafka_topics"]["pipeline"][ + "logserver_to_collector" +] class LogCollector: @@ -36,7 +38,7 @@ def __init__(self) -> None: self.loglines = asyncio.Queue() self.batch_handler = BufferedBatchSender() self.logline_handler = LoglineHandler() - self.kafka_consume_handler = ExactlyOnceKafkaConsumeHandler(LISTEN_ON_TOPIC) + self.kafka_consume_handler = ExactlyOnceKafkaConsumeHandler(CONSUME_TOPIC) async def start(self) -> None: """ @@ -44,8 +46,7 @@ async def start(self) -> None: """ logger.info( "LogCollector started:\n" - f" ⤷ receiving on Kafka topic '{LISTEN_ON_TOPIC}'\n" - f" ⤷ sending on Kafka topic 'TODO'" + f" ⤷ receiving on Kafka topic '{CONSUME_TOPIC}'" ) try: diff --git a/src/logserver/server.py b/src/logserver/server.py index 8a72db8..30bfecf 100644 --- a/src/logserver/server.py +++ b/src/logserver/server.py @@ -9,17 +9,25 @@ SimpleKafkaConsumeHandler, ExactlyOnceKafkaProduceHandler, ) +from src.base.utils import generate_unique_transactional_id from src.base.utils import setup_config -from src.base import utils from src.base.log_config import get_logger -logger = get_logger("log_storage.logserver") - -CONFIG = setup_config() -HOSTNAME = CONFIG["environment"]["logserver"]["hostname"] -LISTEN_ON_TOPIC = CONFIG["pipeline"]["log_storage"]["logserver"]["input_kafka_topic"] -SEND_TO_TOPIC = "logserver_to_collector" # TODO: Change -READ_FROM_FILE = CONFIG["pipeline"]["log_storage"]["logserver"]["input_file"] +module_name = "log_storage.logserver" +logger = get_logger(module_name) + +config = setup_config() +CONSUME_TOPIC = config["environment"]["kafka_topics"]["pipeline"]["logserver_in"] +PRODUCE_TOPIC = config["environment"]["kafka_topics"]["pipeline"][ + "logserver_to_collector" +] +READ_FROM_FILE = config["pipeline"]["log_storage"]["logserver"]["input_file"] +KAFKA_BROKERS = ",".join( + [ + f"{broker['hostname']}:{broker['port']}" + for broker in config["environment"]["kafka_brokers"] + ] +) class LogServer: @@ -29,11 +37,9 @@ class LogServer: """ def __init__(self) -> None: - self.host = None - self.host = utils.validate_host(HOSTNAME) - - self.kafka_consume_handler = SimpleKafkaConsumeHandler(LISTEN_ON_TOPIC) - self.kafka_produce_handler = ExactlyOnceKafkaProduceHandler("LogServer") + self.kafka_consume_handler = SimpleKafkaConsumeHandler(CONSUME_TOPIC) + transactional_id = generate_unique_transactional_id(module_name, KAFKA_BROKERS) + self.kafka_produce_handler = ExactlyOnceKafkaProduceHandler(transactional_id) async def start(self) -> None: """ @@ -41,9 +47,9 @@ async def start(self) -> None: """ logger.info( "LogServer started:\n" - f" ⤷ receiving on Kafka topic '{LISTEN_ON_TOPIC}'\n" + f" ⤷ receiving on Kafka topic '{CONSUME_TOPIC}'\n" f" ⤷ receiving from input file '{READ_FROM_FILE}'\n" - f" ⤷ sending on Kafka topic 'TODO'" + f" ⤷ sending on Kafka topic '{PRODUCE_TOPIC}'" ) try: @@ -61,7 +67,7 @@ async def send(self, message: str) -> None: Args: message (str): Message to be sent """ - self.kafka_produce_handler.produce(topic=SEND_TO_TOPIC, data=message) + self.kafka_produce_handler.produce(topic=PRODUCE_TOPIC, data=message) logger.debug(f"Sent: '{message}'") async def fetch_from_kafka(self) -> None: diff --git a/src/prefilter/prefilter.py b/src/prefilter/prefilter.py index e96667e..785c04d 100644 --- a/src/prefilter/prefilter.py +++ b/src/prefilter/prefilter.py @@ -10,9 +10,26 @@ ExactlyOnceKafkaProduceHandler, KafkaMessageFetchException, ) +from src.base.utils import generate_unique_transactional_id from src.base.log_config import get_logger - -logger = get_logger("log_filtering.prefilter") +from src.base.utils import setup_config + +module_name = "log_filtering.prefilter" +logger = get_logger(module_name) + +config = setup_config() +CONSUME_TOPIC = config["environment"]["kafka_topics"]["pipeline"][ + "batch_sender_to_prefilter" +] +PRODUCE_TOPIC = config["environment"]["kafka_topics"]["pipeline"][ + "prefilter_to_inspector" +] +KAFKA_BROKERS = ",".join( + [ + f"{broker['hostname']}:{broker['port']}" + for broker in config["environment"]["kafka_brokers"] + ] +) class Prefilter: @@ -22,22 +39,17 @@ class Prefilter: """ def __init__(self): - logger.debug(f"Initializing Prefilter...") self.begin_timestamp = None self.end_timestamp = None + self.subnet_id = None + self.unfiltered_data = [] self.filtered_data = [] - self.subnet_id = None - logger.debug(f"Calling LoglineHandler()...") self.logline_handler = LoglineHandler() - logger.debug(f"Calling KafkaProduceHandler(transactional_id='prefilter')...") - self.kafka_produce_handler = ExactlyOnceKafkaProduceHandler( - transactional_id="prefilter" - ) - logger.debug(f"Calling KafkaConsumeHandler(topic='Prefilter')...") - self.kafka_consume_handler = ExactlyOnceKafkaConsumeHandler(topics="Prefilter") - logger.debug("Initialized Prefilter.") + self.kafka_consume_handler = ExactlyOnceKafkaConsumeHandler(CONSUME_TOPIC) + transactional_id = generate_unique_transactional_id(module_name, KAFKA_BROKERS) + self.kafka_produce_handler = ExactlyOnceKafkaProduceHandler(transactional_id) def get_and_fill_data(self) -> None: """ @@ -51,7 +63,7 @@ def get_and_fill_data(self) -> None: logger.debug("Cleared existing data.") logger.debug("Calling KafkaConsumeHandler for consuming JSON data...") - key, data = self.kafka_consume_handler.consume_and_return_json_data() + key, data = self.kafka_consume_handler.consume_as_json() self.subnet_id = key if data: @@ -109,8 +121,8 @@ def send_filtered_data(self): } logger.debug("Calling KafkaProduceHandler...") logger.debug(f"{data_to_send=}") - self.kafka_produce_handler.send( - topic="Inspect", + self.kafka_produce_handler.produce( + topic=PRODUCE_TOPIC, data=json.dumps(data_to_send), key=self.subnet_id, ) From 1ae6d90b2bc647a61f57479366022e5d0d3726e5 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 13 Nov 2024 18:33:41 +0100 Subject: [PATCH 13/29] Update .gitignore --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c812c1d..f750cb3 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ __pycache__/ *.py[cod] *$py.class *.pkl + # C extensions *.so *.html @@ -320,6 +321,8 @@ dmypy.json cython_debug/ # End of https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks + +# Others /docs/api/ !/docs/api/index.rst -/pipeline_prototype/ +/docker/default.txt From 411b9ae4d592f0a9264d017ccda9825e68c361be Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 13 Nov 2024 19:07:31 +0100 Subject: [PATCH 14/29] Update tests --- src/base/utils.py | 32 +++--- tests/test_collector.py | 34 +++--- tests/test_detector.py | 3 +- ...test_exactly_once_kafka_produce_handler.py | 4 +- tests/test_inspector.py | 3 +- tests/test_kafka_handler.py | 21 ---- tests/test_prefilter.py | 22 ++-- tests/test_server.py | 107 +++++------------- 8 files changed, 82 insertions(+), 144 deletions(-) diff --git a/src/base/utils.py b/src/base/utils.py index 1d70547..26f6f04 100644 --- a/src/base/utils.py +++ b/src/base/utils.py @@ -4,7 +4,6 @@ import yaml from confluent_kafka import KafkaError, Message -from confluent_kafka.admin import AdminClient sys.path.append(os.getcwd()) from src.base.log_config import get_logger @@ -155,18 +154,19 @@ def generate_unique_transactional_id(base_name: str, bootstrap_servers: str) -> Returns: Unique transactional ID using the base_name """ - admin_client = AdminClient({"bootstrap.servers": bootstrap_servers}) - existing_ids = set() - - consumer_groups = admin_client.list_groups(timeout=10) - - for group in consumer_groups: - existing_ids.add(group.id) - - transactional_id = base_name - counter = 1 - while transactional_id in existing_ids: - transactional_id = f"{base_name}-{counter}" - counter += 1 - - return transactional_id + # TODO: Test and activate + # admin_client = AdminClient({"bootstrap.servers": bootstrap_servers}) + # existing_ids = set() + # + # consumer_groups = admin_client.list_consumer_groups() + # + # for group in consumer_groups: + # existing_ids.add(group.id) + # + # transactional_id = base_name + # counter = 1 + # while transactional_id in existing_ids: + # transactional_id = f"{base_name}-{counter}" + # counter += 1 + + return base_name diff --git a/tests/test_collector.py b/tests/test_collector.py index c7eba2f..b2b5d26 100644 --- a/tests/test_collector.py +++ b/tests/test_collector.py @@ -7,8 +7,8 @@ class TestInit(unittest.TestCase): - @patch("src.logcollector.collector.LISTEN_ON_TOPIC", "test_topic") - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.CONSUME_TOPIC", "test_topic") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_valid_init( @@ -29,14 +29,14 @@ def test_valid_init( mock_batch_handler.assert_called_once() mock_logline_handler.assert_called_once() - mock_kafka_handler.assert_called_once_with(topics="test_topic") + mock_kafka_handler.assert_called_once_with("test_topic") class TestStart(unittest.IsolatedAsyncioTestCase): @patch("src.logcollector.collector.logger") @patch("src.logcollector.collector.LogCollector.send") @patch("src.logcollector.collector.LogCollector.fetch") - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") async def test_start( @@ -62,7 +62,7 @@ async def test_start( class TestFetch(unittest.IsolatedAsyncioTestCase): @patch("src.logcollector.collector.LoglineHandler") @patch("src.logcollector.collector.BufferedBatchSender") - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") async def asyncSetUp( self, mock_kafka_handler, mock_batch_sender, mock_logline_handler ): @@ -99,7 +99,7 @@ async def test_handle_kafka_inputs( class TestSend(unittest.IsolatedAsyncioTestCase): @patch("src.logcollector.collector.logger") @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 22) - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") async def test_send_with_one_logline( @@ -147,7 +147,7 @@ async def test_send_with_one_logline( @patch("src.logcollector.collector.logger") @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 22) - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") async def test_send_keyboard_interrupt( @@ -194,7 +194,7 @@ async def test_send_keyboard_interrupt( class TestStore(unittest.IsolatedAsyncioTestCase): - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") async def test_store( @@ -208,13 +208,13 @@ async def test_store( await sut.store("test_message") # Assert - self.assertEqual("test_message", sut.loglines.get()) + self.assertEqual("test_message", await sut.loglines.get()) self.assertTrue(sut.loglines.empty()) class TestGetSubnetId(unittest.TestCase): @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 24) - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_ipv4( @@ -232,7 +232,7 @@ def test_get_subnet_id_ipv4( self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 24) - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_ipv4_zero( @@ -250,7 +250,7 @@ def test_get_subnet_id_ipv4_zero( self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 23) - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_ipv4_max( @@ -268,7 +268,7 @@ def test_get_subnet_id_ipv4_max( self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 64) - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_ipv6( @@ -286,7 +286,7 @@ def test_get_subnet_id_ipv6( self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 64) - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_ipv6_zero( @@ -304,7 +304,7 @@ def test_get_subnet_id_ipv6_zero( self.assertEqual(expected_result, result) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 48) - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_ipv6_max( @@ -323,7 +323,7 @@ def test_get_subnet_id_ipv6_max( @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 24) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 48) - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_unsupported_type( @@ -340,7 +340,7 @@ def test_get_subnet_id_unsupported_type( @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 24) @patch("src.logcollector.collector.IPV6_PREFIX_LENGTH", 48) - @patch("src.logcollector.collector.SimpleKafkaConsumeHandler") + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") def test_get_subnet_id_none( diff --git a/tests/test_detector.py b/tests/test_detector.py index 6fc6bb9..be7c487 100644 --- a/tests/test_detector.py +++ b/tests/test_detector.py @@ -131,6 +131,7 @@ def test_get_model_not_existing(self, mock_kafka_consume_handler): class TestInit(unittest.TestCase): + @patch("src.detector.detector.CONSUME_TOPIC", "test_topic") @patch("src.detector.detector.logger") @patch("src.detector.detector.ExactlyOnceKafkaConsumeHandler") def test_init(self, mock_kafka_consume_handler, mock_logger): @@ -141,7 +142,7 @@ def test_init(self, mock_kafka_consume_handler, mock_logger): self.assertEqual([], sut.messages) self.assertEqual(mock_kafka_consume_handler_instance, sut.kafka_consume_handler) - mock_kafka_consume_handler.assert_called_once_with(topics="Detector") + mock_kafka_consume_handler.assert_called_once_with("test_topic") class TestGetData(unittest.TestCase): diff --git a/tests/test_exactly_once_kafka_produce_handler.py b/tests/test_exactly_once_kafka_produce_handler.py index 8c58970..57d0b8e 100644 --- a/tests/test_exactly_once_kafka_produce_handler.py +++ b/tests/test_exactly_once_kafka_produce_handler.py @@ -32,9 +32,10 @@ def test_init(self, mock_producer): expected_conf = { "bootstrap.servers": "127.0.0.1:9999,127.0.0.2:9998,127.0.0.3:9997", "transactional.id": "test_transactional_id", + "enable.idempotence": True, } - sut = ExactlyOnceKafkaProduceHandler(transactional_id="test_transactional_id") + sut = ExactlyOnceKafkaProduceHandler("test_transactional_id") self.assertIsNone(sut.consumer) self.assertEqual(mock_producer_instance, sut.producer) @@ -68,6 +69,7 @@ def test_init_fail(self, mock_producer, mock_logger): expected_conf = { "bootstrap.servers": "127.0.0.1:9999,127.0.0.2:9998,127.0.0.3:9997", "transactional.id": "test_transactional_id", + "enable.idempotence": True, } with patch.object( diff --git a/tests/test_inspector.py b/tests/test_inspector.py index 2bd4669..5c5f7c7 100644 --- a/tests/test_inspector.py +++ b/tests/test_inspector.py @@ -34,6 +34,7 @@ def get_batch(data): class TestInit(unittest.TestCase): + @patch("src.inspector.inspector.CONSUME_TOPIC", "test_topic") @patch("src.inspector.inspector.ExactlyOnceKafkaProduceHandler") @patch("src.inspector.inspector.ExactlyOnceKafkaConsumeHandler") def test_init(self, mock_kafka_consume_handler, mock_produce_handler): @@ -46,7 +47,7 @@ def test_init(self, mock_kafka_consume_handler, mock_produce_handler): self.assertEqual([], sut.messages) self.assertEqual(mock_kafka_consume_handler_instance, sut.kafka_consume_handler) - mock_kafka_consume_handler.assert_called_once_with(topics="Inspect") + mock_kafka_consume_handler.assert_called_once_with("test_topic") class TestGetData(unittest.TestCase): diff --git a/tests/test_kafka_handler.py b/tests/test_kafka_handler.py index 1561434..db69638 100644 --- a/tests/test_kafka_handler.py +++ b/tests/test_kafka_handler.py @@ -1,34 +1,13 @@ import unittest -from unittest.mock import patch from src.base.kafka_handler import KafkaHandler class TestInit(unittest.TestCase): - @patch( - "src.base.kafka_handler.KAFKA_BROKERS", - [ - { - "hostname": "127.0.0.1", - "port": 9999, - }, - { - "hostname": "127.0.0.2", - "port": 9998, - }, - { - "hostname": "127.0.0.3", - "port": 9997, - }, - ], - ) def test_init(self): - expected_brokers = "127.0.0.1:9999,127.0.0.2:9998,127.0.0.3:9997" - sut = KafkaHandler() self.assertIsNone(sut.consumer) - self.assertEqual(expected_brokers, sut.brokers) if __name__ == "__main__": diff --git a/tests/test_prefilter.py b/tests/test_prefilter.py index dc84782..04fa6cc 100644 --- a/tests/test_prefilter.py +++ b/tests/test_prefilter.py @@ -7,6 +7,7 @@ class TestInit(unittest.TestCase): + @patch("src.prefilter.prefilter.CONSUME_TOPIC", "test_topic") @patch("src.prefilter.prefilter.LoglineHandler") @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") @patch("src.prefilter.prefilter.ExactlyOnceKafkaProduceHandler") @@ -25,8 +26,8 @@ def test_valid_init( self.assertIsNotNone(sut.kafka_consume_handler) self.assertIsNotNone(sut.logline_handler) - mock_produce_handler.assert_called_once_with(transactional_id="prefilter") - mock_consume_handler.assert_called_once_with(topics="Prefilter") + mock_produce_handler.assert_called_once() + mock_consume_handler.assert_called_once_with("test_topic") mock_logline_handler.assert_called_once() @@ -46,7 +47,7 @@ def test_get_data_without_new_data( mock_produce_handler.return_value = mock_produce_handler_instance mock_consume_handler_instance = MagicMock() mock_consume_handler.return_value = mock_consume_handler_instance - mock_consume_handler_instance.consume_and_return_json_data.return_value = ( + mock_consume_handler_instance.consume_as_json.return_value = ( None, {}, ) @@ -58,7 +59,7 @@ def test_get_data_without_new_data( self.assertEqual([], sut.filtered_data) self.assertEqual(None, sut.subnet_id) - mock_consume_handler_instance.consume_and_return_json_data.assert_called_once() + mock_consume_handler_instance.consume_as_json.assert_called_once() @patch("src.prefilter.prefilter.logger") @patch("src.prefilter.prefilter.LoglineHandler") @@ -75,7 +76,7 @@ def test_get_data_with_new_data( mock_produce_handler.return_value = mock_produce_handler_instance mock_consume_handler_instance = MagicMock() mock_consume_handler.return_value = mock_consume_handler_instance - mock_consume_handler_instance.consume_and_return_json_data.return_value = ( + mock_consume_handler_instance.consume_as_json.return_value = ( "127.0.0.0/24", { "begin_timestamp": "2024-05-21T08:31:28.119Z", @@ -91,7 +92,7 @@ def test_get_data_with_new_data( self.assertEqual([], sut.filtered_data) self.assertEqual("127.0.0.0/24", sut.subnet_id) - mock_consume_handler_instance.consume_and_return_json_data.assert_called_once() + mock_consume_handler_instance.consume_as_json.assert_called_once() @patch("src.prefilter.prefilter.logger") @patch("src.prefilter.prefilter.LoglineHandler") @@ -108,7 +109,7 @@ def test_get_data_with_existing_data( mock_batch_handler.return_value = mock_batch_handler_instance mock_consume_handler_instance = MagicMock() mock_consume_handler.return_value = mock_consume_handler_instance - mock_consume_handler_instance.consume_and_return_json_data.return_value = ( + mock_consume_handler_instance.consume_as_json.return_value = ( "127.0.0.0/24", { "begin_timestamp": "2024-05-21T08:31:28.119Z", @@ -125,7 +126,7 @@ def test_get_data_with_existing_data( self.assertEqual([], sut.filtered_data) self.assertEqual("127.0.0.0/24", sut.subnet_id) - mock_consume_handler_instance.consume_and_return_json_data.assert_called_once() + mock_consume_handler_instance.consume_as_json.assert_called_once() class TestFilterByError(unittest.TestCase): @@ -317,6 +318,7 @@ def test_filter_by_error_with_data_two_error_types( class TestSendFilteredData(unittest.TestCase): + @patch("src.prefilter.prefilter.PRODUCE_TOPIC", "test_topic") @patch("src.prefilter.prefilter.logger") @patch("src.prefilter.prefilter.LoglineHandler") @patch("src.prefilter.prefilter.ExactlyOnceKafkaConsumeHandler") @@ -369,8 +371,8 @@ def test_send_with_data( ) sut.send_filtered_data() - mock_produce_handler_instance.send.assert_called_once_with( - topic="Inspect", + mock_produce_handler_instance.produce.assert_called_once_with( + topic="test_topic", data=expected_message, key="192.168.1.0_24", ) diff --git a/tests/test_server.py b/tests/test_server.py index 22a42c4..acdbfa4 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -2,7 +2,6 @@ import os import tempfile import unittest -from ipaddress import IPv4Address, IPv6Address from unittest.mock import AsyncMock, MagicMock, patch import aiofiles @@ -13,83 +12,31 @@ class TestInit(unittest.TestCase): - @patch("src.logserver.server.HOSTNAME", "127.0.0.1") - @patch("src.logserver.server.LISTEN_ON_TOPIC", "test_topic") - @patch("src.logserver.server.Lock") - @patch("src.logserver.server.SimpleKafkaProduceHandler") + @patch("src.logserver.server.CONSUME_TOPIC", "test_topic") + @patch("src.logserver.server.ExactlyOnceKafkaProduceHandler") @patch("src.logserver.server.SimpleKafkaConsumeHandler") - def test_valid_init_ipv4( - self, mock_kafka_consume_handler, mock_kafka_produce_handler, mock_lock - ): + def test_valid_init(self, mock_kafka_consume_handler, mock_kafka_produce_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_produce_handler_instance = MagicMock() - mock_lock_instance = MagicMock() mock_kafka_produce_handler.return_value = mock_kafka_produce_handler_instance mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_lock.return_value = mock_lock_instance sut = LogServer() - self.assertEqual(IPv4Address("127.0.0.1"), sut.host) - self.assertEqual(mock_lock_instance, sut.lock) self.assertEqual(mock_kafka_consume_handler_instance, sut.kafka_consume_handler) self.assertEqual(mock_kafka_produce_handler_instance, sut.kafka_produce_handler) - mock_kafka_consume_handler.assert_called_once_with(topics="test_topic") - - @patch("src.logserver.server.HOSTNAME", "fe80::1") - @patch("src.logserver.server.LISTEN_ON_TOPIC", "test_topic") - @patch("src.logserver.server.Lock") - @patch("src.logserver.server.SimpleKafkaProduceHandler") - @patch("src.logserver.server.SimpleKafkaConsumeHandler") - def test_valid_init_ipv6( - self, mock_kafka_consume_handler, mock_kafka_produce_handler, mock_lock - ): - mock_kafka_consume_handler_instance = MagicMock() - mock_kafka_produce_handler_instance = MagicMock() - mock_lock_instance = MagicMock() - - mock_kafka_produce_handler.return_value = mock_kafka_produce_handler_instance - mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_lock.return_value = mock_lock_instance - - sut = LogServer() - self.assertEqual(IPv6Address("fe80::1"), sut.host) - self.assertEqual(mock_lock_instance, sut.lock) - self.assertEqual(mock_kafka_consume_handler_instance, sut.kafka_consume_handler) - self.assertEqual(mock_kafka_produce_handler_instance, sut.kafka_produce_handler) - mock_kafka_consume_handler.assert_called_once_with(topics="test_topic") - - @patch("src.logserver.server.HOSTNAME", "256.256.256.256") - @patch("src.logserver.server.LISTEN_ON_TOPIC", "test_topic") - @patch("src.logserver.server.Lock") - @patch("src.logserver.server.SimpleKafkaProduceHandler") - @patch("src.logserver.server.SimpleKafkaConsumeHandler") - def test_invalid_init_with_invalid_host( - self, mock_kafka_consume_handler, mock_kafka_produce_handler, mock_lock - ): - mock_kafka_consume_handler_instance = MagicMock() - mock_kafka_produce_handler_instance = MagicMock() - mock_lock_instance = MagicMock() - - mock_kafka_produce_handler.return_value = mock_kafka_produce_handler_instance - mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_lock.return_value = mock_lock_instance - - with self.assertRaises(ValueError): - LogServer() - - mock_kafka_consume_handler.assert_not_called() - mock_kafka_produce_handler.assert_not_called() + mock_kafka_consume_handler.assert_called_once_with("test_topic") class TestStart(unittest.IsolatedAsyncioTestCase): - @patch("src.logserver.server.HOSTNAME", "127.0.0.1") @patch("src.logserver.server.logger") @patch("src.logserver.server.LogServer.fetch_from_kafka") @patch("src.logserver.server.LogServer.fetch_from_file") @patch("src.logserver.server.SimpleKafkaConsumeHandler") + @patch("src.logserver.server.ExactlyOnceKafkaProduceHandler") async def test_start( self, + mock_kafka_produce_handler, mock_kafka_consume_handler, mock_fetch_from_file, mock_fetch_from_kafka, @@ -107,17 +54,21 @@ async def test_start( class TestFetchFromKafka(unittest.IsolatedAsyncioTestCase): - async def asyncSetUp(self): - self.sut = LogServer() - self.sut.kafka_consume_handler = AsyncMock() - self.sut.kafka_produce_handler = AsyncMock() - + @patch("src.logserver.server.ExactlyOnceKafkaProduceHandler") + @patch("src.logserver.server.SimpleKafkaConsumeHandler") @patch("src.logserver.server.LogServer.send") @patch("src.logserver.server.logger") @patch("asyncio.get_running_loop") async def test_handle_kafka_inputs( - self, mock_get_running_loop, mock_logger, mock_send + self, + mock_get_running_loop, + mock_logger, + mock_send, + mock_kafka_consume, + mock_kafka_produce, ): + self.sut = LogServer() + mock_send_instance = AsyncMock() mock_send.return_value = mock_send_instance mock_loop = AsyncMock() @@ -140,14 +91,20 @@ async def test_handle_kafka_inputs( class TestFetchFromFile(unittest.IsolatedAsyncioTestCase): - def setUp(self): - self.sut = LogServer() - self.sut.kafka_consume_handler = AsyncMock() - self.sut.kafka_produce_handler = AsyncMock() - @patch("src.logserver.server.SEND_TO_TOPIC", "test_topic") + @patch("src.logserver.server.ExactlyOnceKafkaProduceHandler") + @patch("src.logserver.server.SimpleKafkaConsumeHandler") + @patch("src.logserver.server.PRODUCE_TOPIC", "test_topic") + @patch("src.logserver.server.LogServer.send") @patch("src.logserver.server.logger") - async def test_fetch_from_file(self, mock_logger): + async def test_fetch_from_file( + self, mock_logger, mock_send, mock_kafka_consume, mock_kafka_produce + ): + self.sut = LogServer() + + mock_send_instance = AsyncMock() + mock_send.return_value = mock_send_instance + with tempfile.NamedTemporaryFile( delete=False, mode="w+", newline="" ) as temp_file: @@ -173,12 +130,8 @@ async def test_fetch_from_file(self, mock_logger): finally: os.remove(temp_file_path) - self.sut.kafka_produce_handler.produce.assert_any_call( - topic="test_topic", data="Test line 3" - ) - self.sut.kafka_produce_handler.produce.assert_any_call( - topic="test_topic", data="Test line 4" - ) + mock_send.assert_any_call("Test line 3") + mock_send.assert_any_call("Test line 4") class TestMainFunction(unittest.TestCase): From df4ef4b87649757a0d5dd93ec5bc0561a90c3bc4 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Fri, 15 Nov 2024 15:43:24 +0100 Subject: [PATCH 15/29] Fix missing timer deletion in clickhouse_batch.py --- src/monitoring/clickhouse_batch.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/monitoring/clickhouse_batch.py b/src/monitoring/clickhouse_batch.py index 1b41381..fb87795 100644 --- a/src/monitoring/clickhouse_batch.py +++ b/src/monitoring/clickhouse_batch.py @@ -62,22 +62,18 @@ def insert_all(self): self._client.insert( self.table_name, self.batch, - self.column_names, + column_names=self.column_names, ) - logger.info( - f""" - self.client.insert( - {self.table_name=}, - {self.batch=}, - {self.column_names=}, - ) - """ + logger.debug( + f"Inserted {self.table_name=},{self.batch=},{self.column_names=}" ) self.batch = [] if self.timer: self.timer.cancel() + self.timer = None + def _start_timer(self): if self.timer: self.timer.cancel() From f1cd162ee17de40c8acb5b88f3313b97a487c212 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Fri, 15 Nov 2024 15:47:05 +0100 Subject: [PATCH 16/29] Allow inserts via monitoring_agent.py --- src/base/kafka_handler.py | 7 +- src/logcollector/collector.py | 9 +-- src/logserver/server.py | 22 +++++-- src/monitoring/clickhouse_connector.py | 64 +++++++++++++++---- src/monitoring/clickhouse_kafka_sender.py | 6 +- .../create_tables/failed_dns_loglines.sql | 2 +- src/monitoring/monitoring_agent.py | 22 ++++--- 7 files changed, 93 insertions(+), 39 deletions(-) diff --git a/src/base/kafka_handler.py b/src/base/kafka_handler.py index 705dfed..c14a69a 100644 --- a/src/base/kafka_handler.py +++ b/src/base/kafka_handler.py @@ -110,6 +110,7 @@ def produce(self, topic: str, data: str, key: None | str = None) -> None: if not data: return + self.producer.flush() self.producer.produce( topic=topic, key=key, @@ -117,8 +118,6 @@ def produce(self, topic: str, data: str, key: None | str = None) -> None: callback=kafka_delivery_report, ) - self.producer.flush() - class ExactlyOnceKafkaProduceHandler(KafkaProduceHandler): """ @@ -156,8 +155,8 @@ def produce(self, topic: str, data: str, key: None | str = None) -> None: return self.producer.flush() - self.producer.begin_transaction() + try: self.producer.produce( topic=topic, @@ -167,7 +166,7 @@ def produce(self, topic: str, data: str, key: None | str = None) -> None: ) self.commit_transaction_with_retry() - except Exception as e: + except Exception: self.producer.abort_transaction() logger.error("Transaction aborted.") raise diff --git a/src/logcollector/collector.py b/src/logcollector/collector.py index 92a6418..a9cb35e 100644 --- a/src/logcollector/collector.py +++ b/src/logcollector/collector.py @@ -34,7 +34,6 @@ class LogCollector: """ def __init__(self) -> None: - self.lock = Lock() self.loglines = asyncio.Queue() self.batch_handler = BufferedBatchSender() self.logline_handler = LoglineHandler() @@ -82,11 +81,13 @@ async def send(self) -> None: while True: if not self.loglines.empty(): logline = await self.loglines.get() - fields = ( - self.logline_handler.validate_logline_and_get_fields_as_json( + try: + fields = self.logline_handler.validate_logline_and_get_fields_as_json( logline ) - ) + except ValueError: + continue + subnet_id = self.get_subnet_id( ipaddress.ip_address(fields.get("client_ip")) ) diff --git a/src/logserver/server.py b/src/logserver/server.py index 30bfecf..7e869df 100644 --- a/src/logserver/server.py +++ b/src/logserver/server.py @@ -1,6 +1,8 @@ import asyncio +import datetime import os import sys +import uuid import aiofiles @@ -9,6 +11,7 @@ SimpleKafkaConsumeHandler, ExactlyOnceKafkaProduceHandler, ) +from src.monitoring.clickhouse_kafka_sender import ClickHouseKafkaSender from src.base.utils import generate_unique_transactional_id from src.base.utils import setup_config from src.base.log_config import get_logger @@ -37,9 +40,11 @@ class LogServer: """ def __init__(self) -> None: - self.kafka_consume_handler = SimpleKafkaConsumeHandler(CONSUME_TOPIC) transactional_id = generate_unique_transactional_id(module_name, KAFKA_BROKERS) + + self.kafka_consume_handler = SimpleKafkaConsumeHandler(CONSUME_TOPIC) self.kafka_produce_handler = ExactlyOnceKafkaProduceHandler(transactional_id) + self.server_logs = ClickHouseKafkaSender("server_logs") async def start(self) -> None: """ @@ -60,7 +65,7 @@ async def start(self) -> None: except KeyboardInterrupt: logger.info("LogServer stopped.") - async def send(self, message: str) -> None: + def send(self, message: str) -> None: """ Sends a received message using Kafka. @@ -68,6 +73,15 @@ async def send(self, message: str) -> None: message (str): Message to be sent """ self.kafka_produce_handler.produce(topic=PRODUCE_TOPIC, data=message) + + self.server_logs.insert( + dict( + message_id=uuid.uuid4(), + timestamp_in=datetime.datetime.now(), + message_text=message, + ) + ) + logger.debug(f"Sent: '{message}'") async def fetch_from_kafka(self) -> None: @@ -82,7 +96,7 @@ async def fetch_from_kafka(self) -> None: ) logger.debug(f"From Kafka: '{value}'") - await self.send(value) + self.send(value) async def fetch_from_file(self, file: str = READ_FROM_FILE) -> None: """ @@ -109,7 +123,7 @@ async def fetch_from_file(self, file: str = READ_FROM_FILE) -> None: continue logger.debug(f"From file: '{cleaned_line}'") - await self.send(cleaned_line) + self.send(cleaned_line) def main() -> None: diff --git a/src/monitoring/clickhouse_connector.py b/src/monitoring/clickhouse_connector.py index 2065eba..5bb56c6 100644 --- a/src/monitoring/clickhouse_connector.py +++ b/src/monitoring/clickhouse_connector.py @@ -67,15 +67,22 @@ def __init__(self): def insert( self, message_text: str, - message_id: uuid.UUID = None, - timestamp_in: datetime.datetime | None = None, + message_id: None | str | uuid.UUID = None, + timestamp_in: str | datetime.datetime | None = None, ) -> uuid.UUID: + # TODO: Switch to Marshmallow if not message_id: message_id = uuid.uuid4() + if isinstance(message_id, str): + message_id = uuid.UUID(message_id) + if not timestamp_in: timestamp_in = datetime.datetime.now() + if isinstance(timestamp_in, str): + timestamp_in = datetime.datetime.fromisoformat(timestamp_in) + self._add_to_batch([message_id, timestamp_in, message_text]) return message_id @@ -92,10 +99,13 @@ def __init__(self): def insert( self, - message_id: uuid.UUID, + message_id: str | uuid.UUID, event: str, - event_timestamp: datetime.datetime | None = None, + event_timestamp: str | datetime.datetime | None = None, ): + if isinstance(message_id, str): + message_id = uuid.UUID(message_id) + if not event_timestamp: event_timestamp = datetime.datetime.now() @@ -116,8 +126,8 @@ def __init__(self): def insert( self, message_text: str, - timestamp_in: datetime.datetime, - timestamp_failed: datetime.datetime | None = None, + timestamp_in: str | datetime.datetime, + timestamp_failed: str | datetime.datetime | None = None, reason_for_failure: str | None = None, ) -> None: if not timestamp_failed: @@ -137,7 +147,14 @@ def __init__(self): super().__init__("logline_to_batches", column_names) - def insert(self, logline_id: uuid.UUID, batch_id: uuid.UUID): + def insert( + self, + logline_id: str | uuid.UUID, + batch_id: str | uuid.UUID, + ): + if isinstance(logline_id, str): + logline_id = uuid.UUID(logline_id) + self._add_to_batch( [ logline_id, @@ -163,7 +180,7 @@ def __init__(self): def insert( self, subnet_id: str, - timestamp: datetime.datetime, + timestamp: str | datetime.datetime, status_code: str, client_ip: str, record_type: str, @@ -196,8 +213,14 @@ def __init__(self): super().__init__("logline_status", column_names) def insert( - self, logline_id: uuid.UUID, status: str, exit_at_stage: str | None = None + self, + logline_id: str | uuid.UUID, + status: str, + exit_at_stage: str | None = None, ): + if isinstance(logline_id, str): + logline_id = uuid.UUID(logline_id) + self._add_to_batch( [ logline_id, @@ -220,11 +243,14 @@ def __init__(self): def insert( self, - logline_id: uuid.UUID, + logline_id: str | uuid.UUID, stage: str, status: str, - timestamp: datetime.datetime = None, + timestamp: str | datetime.datetime = None, ) -> None: + if isinstance(logline_id, str): + logline_id = uuid.UUID(logline_id) + if not timestamp: timestamp = datetime.datetime.now() @@ -249,8 +275,14 @@ def __init__(self): super().__init__("batch_status", column_names) def insert( - self, batch_id: uuid.UUID, status: str, exit_at_stage: str | None = None + self, + batch_id: str | uuid.UUID, + status: str, + exit_at_stage: str | None = None, ): + if isinstance(batch_id, str): + batch_id = uuid.UUID(batch_id) + self._add_to_batch( [ batch_id, @@ -274,14 +306,18 @@ def __init__(self): def insert( self, - batch_id: uuid.UUID, + batch_id: str | uuid.UUID, stage: str, status: str, message_count: int, - timestamp: datetime.datetime = None, + timestamp: str | datetime.datetime = None, ) -> None: + if isinstance(batch_id, str): + batch_id = uuid.UUID(batch_id) + if not timestamp: timestamp = datetime.datetime.now() + self._add_to_batch( [ batch_id, diff --git a/src/monitoring/clickhouse_kafka_sender.py b/src/monitoring/clickhouse_kafka_sender.py index 9991362..ef4338c 100644 --- a/src/monitoring/clickhouse_kafka_sender.py +++ b/src/monitoring/clickhouse_kafka_sender.py @@ -12,10 +12,10 @@ class ClickHouseKafkaSender: def __init__(self, table_name: str): self.table_name = table_name - self.kafka_producer = SimpleKafkaProduceHandler(transactional_id="clickhouse") + self.kafka_producer = SimpleKafkaProduceHandler() - def insert(self, data: list): + def insert(self, data: dict): self.kafka_producer.produce( topic=f"clickhouse_{self.table_name}", - data=json.dumps(data), + data=json.dumps(data, default=str), ) diff --git a/src/monitoring/create_tables/failed_dns_loglines.sql b/src/monitoring/create_tables/failed_dns_loglines.sql index c959f1e..846f6cd 100644 --- a/src/monitoring/create_tables/failed_dns_loglines.sql +++ b/src/monitoring/create_tables/failed_dns_loglines.sql @@ -1,4 +1,4 @@ -CREATE TABLE IF NOT EXISTS server_logs ( +CREATE TABLE IF NOT EXISTS failed_dns_loglines ( message_text String NOT NULL, timestamp_in DateTime64(6) NOT NULL, timestamp_failed DateTime64(6) NOT NULL, diff --git a/src/monitoring/monitoring_agent.py b/src/monitoring/monitoring_agent.py index 23b49e8..050032d 100644 --- a/src/monitoring/monitoring_agent.py +++ b/src/monitoring/monitoring_agent.py @@ -5,7 +5,7 @@ sys.path.append(os.getcwd()) from src.monitoring.clickhouse_connector import * -from src.base.kafka_handler import KafkaConsumeHandler +from src.base.kafka_handler import SimpleKafkaConsumeHandler from src.base.log_config import get_logger from src.base.utils import setup_config @@ -48,10 +48,9 @@ def __init__(self): } self.topics = [f"clickhouse_{table_name}" for table_name in self.connectors] - self.kafka_consumer = KafkaConsumeHandler(self.topics) + self.kafka_consumer = SimpleKafkaConsumeHandler(self.topics) async def start(self): - prepare_all_tables() loop = asyncio.get_running_loop() try: @@ -59,16 +58,21 @@ async def start(self): key, value, topic = await loop.run_in_executor( None, self.kafka_consumer.consume ) - logger.info(f"Received message via Kafka:\n ⤷ {value}") + logger.debug(f"From Kafka: {value}") data = json.loads(value) - task = self.connectors[value].insert(**data) - await task + table_name = topic.replace("clickhouse_", "") + + self.connectors[table_name].insert(**data) except KeyboardInterrupt: - logger.info("Stop consuming...") + logger.info("Stopped MonitoringAgent.") -if __name__ == "__main__": - logger.info("Starting Monitoring Agent...") +def main(): + prepare_all_tables() clickhouse_consumer = MonitoringAgent() asyncio.run(clickhouse_consumer.start()) + + +if __name__ == "__main__": + main() From bbffd2a96c89b089f4f383f5aaab9dfdea03cc96 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Fri, 15 Nov 2024 16:06:57 +0100 Subject: [PATCH 17/29] Update .gitignore --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index f750cb3..edfbd36 100644 --- a/.gitignore +++ b/.gitignore @@ -323,6 +323,6 @@ cython_debug/ # End of https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks # Others -/docs/api/ +docs/api/ !/docs/api/index.rst -/docker/default.txt +docker/default.txt From 3ffa1ae0ef9b599e3ddfd4b69808562d92363fcc Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Fri, 15 Nov 2024 16:13:15 +0100 Subject: [PATCH 18/29] Update tests for collector and server --- .gitignore | 1 - setup.cfg | 1 + src/base/log_config.py | 6 ++---- src/logcollector/collector.py | 1 - tests/test_collector.py | 19 ++++++++++++++++++- tests/test_server.py | 15 +++++++-------- 6 files changed, 28 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index edfbd36..227a00a 100644 --- a/.gitignore +++ b/.gitignore @@ -325,4 +325,3 @@ cython_debug/ # Others docs/api/ !/docs/api/index.rst -docker/default.txt diff --git a/setup.cfg b/setup.cfg index dad0ea2..d9961f2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,6 +6,7 @@ omit = tests/* version.py src/train/* + *sandbox.py [coverage:report] show_missing = True diff --git a/src/base/log_config.py b/src/base/log_config.py index 895e2d9..3179efc 100644 --- a/src/base/log_config.py +++ b/src/base/log_config.py @@ -77,10 +77,8 @@ def get_logger(module_name: str = "base") -> logging.Logger: if logger.hasHandlers(): logger.handlers.clear() - # Prevent multiple handler additions - if not logger.handlers: - handler = CustomHandler() - logger.addHandler(handler) + handler = CustomHandler() + logger.addHandler(handler) # Default to base debug setting debug_enabled = config["logging"]["base"]["debug"] diff --git a/src/logcollector/collector.py b/src/logcollector/collector.py index a9cb35e..0c1430d 100644 --- a/src/logcollector/collector.py +++ b/src/logcollector/collector.py @@ -3,7 +3,6 @@ import json import os import sys -from multiprocessing import Lock sys.path.append(os.getcwd()) from src.base.kafka_handler import ExactlyOnceKafkaConsumeHandler diff --git a/tests/test_collector.py b/tests/test_collector.py index b2b5d26..dd9782d 100644 --- a/tests/test_collector.py +++ b/tests/test_collector.py @@ -3,7 +3,7 @@ import unittest from unittest.mock import MagicMock, patch, AsyncMock -from src.logcollector.collector import LogCollector +from src.logcollector.collector import LogCollector, main class TestInit(unittest.TestCase): @@ -356,5 +356,22 @@ def test_get_subnet_id_none( sut.get_subnet_id(test_address) +class TestMain(unittest.TestCase): + @patch("src.logcollector.collector.logger") + @patch("src.logcollector.collector.LogCollector") + @patch("asyncio.run") + def test_main(self, mock_asyncio_run, mock_instance, mock_logger): + # Arrange + mock_instance_obj = MagicMock() + mock_instance.return_value = mock_instance_obj + + # Act + main() + + # Assert + mock_instance.assert_called_once() + mock_asyncio_run.assert_called_once_with(mock_instance_obj.start()) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_server.py b/tests/test_server.py index acdbfa4..2b7c648 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -134,22 +134,21 @@ async def test_fetch_from_file( mock_send.assert_any_call("Test line 4") -class TestMainFunction(unittest.TestCase): +class TestMain(unittest.TestCase): @patch("src.logserver.server.logger") - @patch("src.logserver.server.asyncio.run") @patch("src.logserver.server.LogServer") - def test_main(self, mock_log_server_class, mock_asyncio_run, mock_logger): + @patch("asyncio.run") + def test_main(self, mock_asyncio_run, mock_instance, mock_logger): # Arrange - mock_server_instance = MagicMock() - mock_log_server_class.return_value = mock_server_instance + mock_instance_obj = MagicMock() + mock_instance.return_value = mock_instance_obj # Act main() # Assert - mock_log_server_class.assert_called_once() - mock_server_instance.start.assert_called_once() - mock_asyncio_run.assert_called_once_with(mock_server_instance.start()) + mock_instance.assert_called_once() + mock_asyncio_run.assert_called_once_with(mock_instance_obj.start()) if __name__ == "__main__": From a206b9e7117ea95f0fd12efce4908e7054555b7e Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Fri, 15 Nov 2024 16:19:36 +0100 Subject: [PATCH 19/29] Move clickhouse_kafka_sender.py to src/base --- src/{monitoring => base}/clickhouse_kafka_sender.py | 0 src/logserver/server.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename src/{monitoring => base}/clickhouse_kafka_sender.py (100%) diff --git a/src/monitoring/clickhouse_kafka_sender.py b/src/base/clickhouse_kafka_sender.py similarity index 100% rename from src/monitoring/clickhouse_kafka_sender.py rename to src/base/clickhouse_kafka_sender.py diff --git a/src/logserver/server.py b/src/logserver/server.py index 7e869df..94f0b8c 100644 --- a/src/logserver/server.py +++ b/src/logserver/server.py @@ -11,7 +11,7 @@ SimpleKafkaConsumeHandler, ExactlyOnceKafkaProduceHandler, ) -from src.monitoring.clickhouse_kafka_sender import ClickHouseKafkaSender +from src.base.clickhouse_kafka_sender import ClickHouseKafkaSender from src.base.utils import generate_unique_transactional_id from src.base.utils import setup_config from src.base.log_config import get_logger From b7558383da1bf858dc94acaaaad94f04c75c5cf6 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Sun, 17 Nov 2024 19:41:21 +0100 Subject: [PATCH 20/29] Update tests --- src/monitoring/clickhouse_connector.py | 16 + src/monitoring/monitoring_agent.py | 2 +- tests/test_clickhouse_connector.py | 786 +++++++++++++++++++++++++ tests/test_clickhouse_kafka_sender.py | 43 ++ tests/test_collector.py | 132 ++++- tests/test_server.py | 75 ++- 6 files changed, 1032 insertions(+), 22 deletions(-) create mode 100644 tests/test_clickhouse_connector.py create mode 100644 tests/test_clickhouse_kafka_sender.py diff --git a/src/monitoring/clickhouse_connector.py b/src/monitoring/clickhouse_connector.py index 5bb56c6..cb4d2c9 100644 --- a/src/monitoring/clickhouse_connector.py +++ b/src/monitoring/clickhouse_connector.py @@ -109,6 +109,9 @@ def insert( if not event_timestamp: event_timestamp = datetime.datetime.now() + if isinstance(event_timestamp, str): + event_timestamp = datetime.datetime.fromisoformat(event_timestamp) + self._add_to_batch([message_id, event, event_timestamp]) @@ -133,6 +136,11 @@ def insert( if not timestamp_failed: timestamp_failed = datetime.datetime.now() + if isinstance(timestamp_in, str): + timestamp_in = datetime.datetime.fromisoformat(timestamp_in) + if isinstance(timestamp_failed, str): + timestamp_failed = datetime.datetime.fromisoformat(timestamp_failed) + self._add_to_batch( [message_text, timestamp_in, timestamp_failed, reason_for_failure] ) @@ -154,6 +162,8 @@ def insert( ): if isinstance(logline_id, str): logline_id = uuid.UUID(logline_id) + if isinstance(batch_id, str): + batch_id = uuid.UUID(batch_id) self._add_to_batch( [ @@ -254,6 +264,9 @@ def insert( if not timestamp: timestamp = datetime.datetime.now() + if isinstance(timestamp, str): + timestamp = datetime.datetime.fromisoformat(timestamp) + self._add_to_batch( [ logline_id, @@ -318,6 +331,9 @@ def insert( if not timestamp: timestamp = datetime.datetime.now() + if isinstance(timestamp, str): + timestamp = datetime.datetime.fromisoformat(timestamp) + self._add_to_batch( [ batch_id, diff --git a/src/monitoring/monitoring_agent.py b/src/monitoring/monitoring_agent.py index 050032d..8ea6c09 100644 --- a/src/monitoring/monitoring_agent.py +++ b/src/monitoring/monitoring_agent.py @@ -74,5 +74,5 @@ def main(): asyncio.run(clickhouse_consumer.start()) -if __name__ == "__main__": +if __name__ == "__main__": # pragma: no cover main() diff --git a/tests/test_clickhouse_connector.py b/tests/test_clickhouse_connector.py new file mode 100644 index 0000000..0953b6d --- /dev/null +++ b/tests/test_clickhouse_connector.py @@ -0,0 +1,786 @@ +import json +import unittest +from unittest.mock import patch, MagicMock, mock_open + +from src.monitoring.clickhouse_connector import * + + +class TestClickHouseConnector(unittest.TestCase): + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_init(self, mock_clickhouse_batch_sender): + # Arrange + mock_clickhouse_batch_sender_instance = MagicMock() + mock_clickhouse_batch_sender.return_value = ( + mock_clickhouse_batch_sender_instance + ) + + table_name = "test_table" + column_names = ["col_1", "col_2", "col_3"] + + # Act + sut = ClickHouseConnector(table_name, column_names) + + # Assert + self.assertEqual(table_name, sut._table_name) + self.assertEqual(column_names, sut._column_names) + self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) + + mock_clickhouse_batch_sender.assert_called_once_with( + table_name=table_name, + column_names=column_names, + ) + + @patch("src.monitoring.clickhouse_connector.os.path.join") + @patch( + "src.monitoring.clickhouse_connector.open", + new_callable=mock_open, + read_data="CREATE TABLE test;", + ) + @patch("src.monitoring.clickhouse_connector.clickhouse_connect.get_client") + def test_prepare_table_success( + self, mock_get_client, mock_open_file, mock_path_join + ): + # Arrange + mock_client = MagicMock() + mock_get_client.return_value.__enter__.return_value = mock_client + mock_path_join.return_value = "/fake/path/test_table.sql" + + sut = ClickHouseConnector("test_table", ["col_1", "col_2", "col_3"]) + + # Act + sut.prepare_table() + + # Assert + mock_open_file.assert_called_once_with("/fake/path/test_table.sql", "r") + mock_client.command.assert_called_once_with("CREATE TABLE test;") + + @patch("src.monitoring.clickhouse_connector.os.path.join") + @patch( + "src.monitoring.clickhouse_connector.open", + new_callable=mock_open, + read_data="CREATE TABLE test;", + ) + @patch("src.monitoring.clickhouse_connector.clickhouse_connect.get_client") + @patch("src.monitoring.clickhouse_connector.logger") + def test_prepare_table_failure( + self, mock_logger, mock_get_client, mock_open_file, mock_path_join + ): + mock_client = MagicMock() + mock_get_client.return_value.__enter__.return_value = mock_client + mock_path_join.return_value = "/fake/path/test_table.sql" + mock_client.command.side_effect = Exception("Test exception") + + sut = ClickHouseConnector("test_table", ["col_1", "col_2", "col_3"]) + + with self.assertRaises(Exception): + sut.prepare_table() + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_add_to_batch(self, mock_clickhouse_batch_sender): + # Arrange + mock_clickhouse_batch_sender_instance = MagicMock() + mock_clickhouse_batch_sender.return_value = ( + mock_clickhouse_batch_sender_instance + ) + + sut = ClickHouseConnector("test_table", ["col_1", "col_2", "col_3"]) + + # Act + sut._add_to_batch("test_data") + + # Assert + mock_clickhouse_batch_sender_instance.add.assert_called_once_with("test_data") + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert(self, mock_clickhouse_batch_sender): + # Arrange + sut = ClickHouseConnector("test_table", ["col_1", "col_2", "col_3"]) + + # Act + sut.insert("test_data") + + +class TestServerLogsConnector(unittest.TestCase): + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_init(self, mock_clickhouse_batch_sender): + # Arrange + mock_clickhouse_batch_sender_instance = MagicMock() + mock_clickhouse_batch_sender.return_value = ( + mock_clickhouse_batch_sender_instance + ) + + expected_table_name = "server_logs" + expected_column_names = [ + "message_id", + "timestamp_in", + "message_text", + ] + + # Act + sut = ServerLogsConnector() + + # Assert + self.assertEqual(expected_table_name, sut._table_name) + self.assertEqual(expected_column_names, sut._column_names) + self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) + + mock_clickhouse_batch_sender.assert_called_once_with( + table_name=expected_table_name, + column_names=expected_column_names, + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_all_given(self, mock_clickhouse_batch_sender): + # Arrange + message_text = "test_message_text" + message_id = "7299539b-6215-4f6b-b39f-69335aafbeff" + timestamp_in = "2034-12-13 12:34:12.132412" + + sut = ServerLogsConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + returned_value = sut.insert( + message_text=message_text, + message_id=message_id, + timestamp_in=timestamp_in, + ) + + # Assert + mock_add_to_batch.assert_called_once_with( + [ + uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), + datetime.datetime(2034, 12, 13, 12, 34, 12, 132412), + "test_message_text", + ] + ) + self.assertEqual( + uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), returned_value + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_none_given(self, mock_clickhouse_batch_sender): + # Arrange + sut = ServerLogsConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert("test_message_text") + + # Assert + mock_add_to_batch.assert_called_once() + + +class TestServerLogsTimestampsConnector(unittest.TestCase): + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_init(self, mock_clickhouse_batch_sender): + # Arrange + mock_clickhouse_batch_sender_instance = MagicMock() + mock_clickhouse_batch_sender.return_value = ( + mock_clickhouse_batch_sender_instance + ) + + expected_table_name = "server_logs_timestamps" + expected_column_names = [ + "message_id", + "event", + "event_timestamp", + ] + + # Act + sut = ServerLogsTimestampsConnector() + + # Assert + self.assertEqual(expected_table_name, sut._table_name) + self.assertEqual(expected_column_names, sut._column_names) + self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) + + mock_clickhouse_batch_sender.assert_called_once_with( + table_name=expected_table_name, + column_names=expected_column_names, + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_all_given(self, mock_clickhouse_batch_sender): + # Arrange + message_id = "7299539b-6215-4f6b-b39f-69335aafbeff" + event = "test_event" + event_timestamp = "2034-12-13 12:34:12.132412" + + sut = ServerLogsTimestampsConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + message_id=message_id, + event=event, + event_timestamp=event_timestamp, + ) + + # Assert + mock_add_to_batch.assert_called_once_with( + [ + uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), + "test_event", + datetime.datetime(2034, 12, 13, 12, 34, 12, 132412), + ] + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_none_given(self, mock_clickhouse_batch_sender): + # Arrange + sut = ServerLogsTimestampsConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), + "test_event", + ) + + # Assert + mock_add_to_batch.assert_called_once() + + +class TestFailedDNSLoglinesConnector(unittest.TestCase): + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_init(self, mock_clickhouse_batch_sender): + # Arrange + mock_clickhouse_batch_sender_instance = MagicMock() + mock_clickhouse_batch_sender.return_value = ( + mock_clickhouse_batch_sender_instance + ) + + expected_table_name = "failed_dns_loglines" + expected_column_names = [ + "message_text", + "timestamp_in", + "timestamp_failed", + "reason_for_failure", + ] + + # Act + sut = FailedDNSLoglinesConnector() + + # Assert + self.assertEqual(expected_table_name, sut._table_name) + self.assertEqual(expected_column_names, sut._column_names) + self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) + + mock_clickhouse_batch_sender.assert_called_once_with( + table_name=expected_table_name, + column_names=expected_column_names, + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_all_given(self, mock_clickhouse_batch_sender): + # Arrange + message_text = "test_message_text" + timestamp_in = "2034-12-13 12:34:12.132412" + timestamp_failed = "2034-12-13 12:35:35.542635" + reason_for_failure = "Wrong client_ip field" + + sut = FailedDNSLoglinesConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + message_text=message_text, + timestamp_in=timestamp_in, + timestamp_failed=timestamp_failed, + reason_for_failure=reason_for_failure, + ) + + # Assert + mock_add_to_batch.assert_called_once_with( + [ + "test_message_text", + datetime.datetime(2034, 12, 13, 12, 34, 12, 132412), + datetime.datetime(2034, 12, 13, 12, 35, 35, 542635), + "Wrong client_ip field", + ] + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_none_given(self, mock_clickhouse_batch_sender): + # Arrange + message_text = "test_message_text" + timestamp_in = "2034-12-13 12:34:12.132412" + + sut = FailedDNSLoglinesConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + message_text=message_text, + timestamp_in=datetime.datetime(2034, 12, 13, 12, 34, 12, 132412), + ) + + # Assert + mock_add_to_batch.assert_called_once() + + +class TestLoglineToBatchesConnector(unittest.TestCase): + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_init(self, mock_clickhouse_batch_sender): + # Arrange + mock_clickhouse_batch_sender_instance = MagicMock() + mock_clickhouse_batch_sender.return_value = ( + mock_clickhouse_batch_sender_instance + ) + + expected_table_name = "logline_to_batches" + expected_column_names = [ + "logline_id", + "batch_id", + ] + + # Act + sut = LoglineToBatchesConnector() + + # Assert + self.assertEqual(expected_table_name, sut._table_name) + self.assertEqual(expected_column_names, sut._column_names) + self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) + + mock_clickhouse_batch_sender.assert_called_once_with( + table_name=expected_table_name, + column_names=expected_column_names, + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_all_given_as_str(self, mock_clickhouse_batch_sender): + # Arrange + logline_id = "7299539b-6215-4f6b-b39f-69335aafbeff" + batch_id = "1f855c43-8a75-4b53-b6cd-4a13b89312d6" + + sut = LoglineToBatchesConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + logline_id=logline_id, + batch_id=batch_id, + ) + + # Assert + mock_add_to_batch.assert_called_once_with( + [ + uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), + uuid.UUID("1f855c43-8a75-4b53-b6cd-4a13b89312d6"), + ] + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_all_given_as_uuid(self, mock_clickhouse_batch_sender): + # Arrange + logline_id = uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff") + batch_id = uuid.UUID("1f855c43-8a75-4b53-b6cd-4a13b89312d6") + + sut = LoglineToBatchesConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + logline_id=logline_id, + batch_id=batch_id, + ) + + # Assert + mock_add_to_batch.assert_called_once_with( + [ + uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), + uuid.UUID("1f855c43-8a75-4b53-b6cd-4a13b89312d6"), + ] + ) + + +class TestDNSLoglinesConnector(unittest.TestCase): + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_init(self, mock_clickhouse_batch_sender): + # Arrange + mock_clickhouse_batch_sender_instance = MagicMock() + mock_clickhouse_batch_sender.return_value = ( + mock_clickhouse_batch_sender_instance + ) + + expected_table_name = "dns_loglines" + expected_column_names = [ + "logline_id", + "subnet_id", + "timestamp", + "status_code", + "client_ip", + "record_type", + "additional_fields", + ] + + # Act + sut = DNSLoglinesConnector() + + # Assert + self.assertEqual(expected_table_name, sut._table_name) + self.assertEqual(expected_column_names, sut._column_names) + self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) + + mock_clickhouse_batch_sender.assert_called_once_with( + table_name=expected_table_name, + column_names=expected_column_names, + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_all_given(self, mock_clickhouse_batch_sender): + # Arrange + subnet_id = "127.0.0.0_24" + timestamp = "2034-12-13 12:34:12.132412" + status_code = "NXDOMAIN" + client_ip = "127.0.0.1" + record_type = "A" + additional_fields = json.dumps(dict(test="some_field")) + + sut = DNSLoglinesConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + returned_value = sut.insert( + subnet_id=subnet_id, + timestamp=timestamp, + status_code=status_code, + client_ip=client_ip, + record_type=record_type, + additional_fields=additional_fields, + ) + + # Assert + mock_add_to_batch.assert_called_once() + self.assertTrue(isinstance(returned_value, uuid.UUID)) + + +class TestLoglineStatusConnector(unittest.TestCase): + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_init(self, mock_clickhouse_batch_sender): + # Arrange + mock_clickhouse_batch_sender_instance = MagicMock() + mock_clickhouse_batch_sender.return_value = ( + mock_clickhouse_batch_sender_instance + ) + + expected_table_name = "logline_status" + expected_column_names = [ + "logline_id", + "status", + "exit_at_stage", + ] + + # Act + sut = LoglineStatusConnector() + + # Assert + self.assertEqual(expected_table_name, sut._table_name) + self.assertEqual(expected_column_names, sut._column_names) + self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) + + mock_clickhouse_batch_sender.assert_called_once_with( + table_name=expected_table_name, + column_names=expected_column_names, + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_all_given(self, mock_clickhouse_batch_sender): + # Arrange + logline_id = "7299539b-6215-4f6b-b39f-69335aafbeff" + status = "inactive" + exit_at_stage = "prefilter" + + sut = LoglineStatusConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + logline_id=logline_id, + status=status, + exit_at_stage=exit_at_stage, + ) + + # Assert + mock_add_to_batch.assert_called_once_with( + [ + uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), + "inactive", + "prefilter", + ] + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_none_given(self, mock_clickhouse_batch_sender): + # Arrange + logline_id = uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff") + status = "inactive" + + sut = LoglineStatusConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + logline_id=logline_id, + status=status, + ) + + # Assert + mock_add_to_batch.assert_called_once_with( + [ + uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), + "inactive", + None, + ] + ) + + +class TestLoglineTimestampsConnector(unittest.TestCase): + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_init(self, mock_clickhouse_batch_sender): + # Arrange + mock_clickhouse_batch_sender_instance = MagicMock() + mock_clickhouse_batch_sender.return_value = ( + mock_clickhouse_batch_sender_instance + ) + + expected_table_name = "logline_timestamps" + expected_column_names = [ + "logline_id", + "stage", + "status", + "timestamp", + ] + + # Act + sut = LoglineTimestampsConnector() + + # Assert + self.assertEqual(expected_table_name, sut._table_name) + self.assertEqual(expected_column_names, sut._column_names) + self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) + + mock_clickhouse_batch_sender.assert_called_once_with( + table_name=expected_table_name, + column_names=expected_column_names, + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_all_given(self, mock_clickhouse_batch_sender): + # Arrange + logline_id = "7299539b-6215-4f6b-b39f-69335aafbeff" + stage = "prefilter" + status = "prefilter_out" + timestamp = "2034-12-13 12:35:35.542635" + + sut = LoglineTimestampsConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + logline_id=logline_id, + stage=stage, + status=status, + timestamp=timestamp, + ) + + # Assert + mock_add_to_batch.assert_called_once_with( + [ + uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), + "prefilter", + "prefilter_out", + datetime.datetime(2034, 12, 13, 12, 35, 35, 542635), + ] + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_none_given(self, mock_clickhouse_batch_sender): + # Arrange + logline_id = uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff") + stage = "prefilter" + status = "prefilter_out" + + sut = LoglineTimestampsConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + logline_id=logline_id, + stage=stage, + status=status, + ) + + # Assert + mock_add_to_batch.assert_called_once() + + +class TestBatchStatusConnector(unittest.TestCase): + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_init(self, mock_clickhouse_batch_sender): + # Arrange + mock_clickhouse_batch_sender_instance = MagicMock() + mock_clickhouse_batch_sender.return_value = ( + mock_clickhouse_batch_sender_instance + ) + + expected_table_name = "batch_status" + expected_column_names = [ + "batch_id", + "status", + "exit_at_stage", + ] + + # Act + sut = BatchStatusConnector() + + # Assert + self.assertEqual(expected_table_name, sut._table_name) + self.assertEqual(expected_column_names, sut._column_names) + self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) + + mock_clickhouse_batch_sender.assert_called_once_with( + table_name=expected_table_name, + column_names=expected_column_names, + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_all_given(self, mock_clickhouse_batch_sender): + # Arrange + batch_id = "7299539b-6215-4f6b-b39f-69335aafbeff" + status = "inactive" + exit_at_stage = "prefilter" + + sut = BatchStatusConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + batch_id=batch_id, + status=status, + exit_at_stage=exit_at_stage, + ) + + # Assert + mock_add_to_batch.assert_called_once_with( + [ + uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), + "inactive", + "prefilter", + ] + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_none_given(self, mock_clickhouse_batch_sender): + # Arrange + batch_id = uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff") + status = "inactive" + + sut = BatchStatusConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + batch_id=batch_id, + status=status, + ) + + # Assert + mock_add_to_batch.assert_called_once_with( + [ + uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), + "inactive", + None, + ] + ) + + +class TestBatchTimestampsConnector(unittest.TestCase): + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_init(self, mock_clickhouse_batch_sender): + # Arrange + mock_clickhouse_batch_sender_instance = MagicMock() + mock_clickhouse_batch_sender.return_value = ( + mock_clickhouse_batch_sender_instance + ) + + expected_table_name = "batch_timestamps" + expected_column_names = [ + "batch_id", + "stage", + "status", + "timestamp", + "message_count", + ] + + # Act + sut = BatchTimestampsConnector() + + # Assert + self.assertEqual(expected_table_name, sut._table_name) + self.assertEqual(expected_column_names, sut._column_names) + self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) + + mock_clickhouse_batch_sender.assert_called_once_with( + table_name=expected_table_name, + column_names=expected_column_names, + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_all_given(self, mock_clickhouse_batch_sender): + # Arrange + batch_id = "7299539b-6215-4f6b-b39f-69335aafbeff" + stage = "prefilter" + status = "prefilter_out" + timestamp = "2034-12-13 12:35:35.542635" + message_count = 456 + + sut = BatchTimestampsConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + batch_id=batch_id, + stage=stage, + status=status, + timestamp=timestamp, + message_count=message_count, + ) + + # Assert + mock_add_to_batch.assert_called_once_with( + [ + uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), + "prefilter", + "prefilter_out", + datetime.datetime(2034, 12, 13, 12, 35, 35, 542635), + 456, + ] + ) + + @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") + def test_insert_none_given(self, mock_clickhouse_batch_sender): + # Arrange + batch_id = uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff") + stage = "prefilter" + status = "prefilter_out" + message_count = 456 + + sut = BatchTimestampsConnector() + + with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: + # Act + sut.insert( + batch_id=batch_id, + stage=stage, + status=status, + message_count=message_count, + ) + + # Assert + mock_add_to_batch.assert_called_once() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_clickhouse_kafka_sender.py b/tests/test_clickhouse_kafka_sender.py new file mode 100644 index 0000000..0aacd1b --- /dev/null +++ b/tests/test_clickhouse_kafka_sender.py @@ -0,0 +1,43 @@ +import unittest +from unittest.mock import patch + +from src.base.clickhouse_kafka_sender import ClickHouseKafkaSender + + +class TestInit(unittest.TestCase): + @patch("src.base.clickhouse_kafka_sender.SimpleKafkaProduceHandler") + def test_init(self, mock_produce_handler): + # Arrange + table_name = "test_table" + mock_produce_handler_instance = mock_produce_handler + mock_produce_handler.return_value = mock_produce_handler_instance + + # Act + sut = ClickHouseKafkaSender(table_name) + + # Assert + self.assertEqual(table_name, sut.table_name) + self.assertEqual(mock_produce_handler_instance, sut.kafka_producer) + mock_produce_handler.assert_called_once() + + +class TestInsert(unittest.TestCase): + @patch("src.base.clickhouse_kafka_sender.SimpleKafkaProduceHandler") + def test_insert(self, mock_produce_handler): + # Arrange + mock_produce_handler_instance = mock_produce_handler + mock_produce_handler.return_value = mock_produce_handler_instance + sut = ClickHouseKafkaSender("test_table") + + # Act + sut.insert({"test_key": "test_value"}) + + # Assert + mock_produce_handler_instance.produce.assert_called_once_with( + topic="clickhouse_test_table", + data='{"test_key": "test_value"}', + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_collector.py b/tests/test_collector.py index dd9782d..e09cbef 100644 --- a/tests/test_collector.py +++ b/tests/test_collector.py @@ -34,29 +34,54 @@ def test_valid_init( class TestStart(unittest.IsolatedAsyncioTestCase): @patch("src.logcollector.collector.logger") - @patch("src.logcollector.collector.LogCollector.send") - @patch("src.logcollector.collector.LogCollector.fetch") @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") @patch("src.logcollector.collector.BufferedBatchSender") @patch("src.logcollector.collector.LoglineHandler") - async def test_start( + def setUp( self, mock_logline_handler, mock_batch_handler, mock_kafka_consume_handler, - mock_fetch, - mock_send, mock_logger, ): - # Arrange - sut = LogCollector() - - # Act - await sut.start() + self.sut = LogCollector() - # Assert - mock_send.assert_called_once() - mock_fetch.assert_called_once() + async def test_start_successful_execution(self): + # Arrange + self.sut.fetch = AsyncMock() + self.sut.send = AsyncMock() + + async def mock_gather(*args, **kwargs): + return None + + with patch( + "src.logcollector.collector.asyncio.gather", side_effect=mock_gather + ) as mock: + # Act + await self.sut.start() + + # Assert + mock.assert_called_once() + self.sut.fetch.assert_called_once() + self.sut.send.assert_called_once() + + # TODO: Update + # async def test_start_handles_keyboard_interrupt(self): + # # Arrange + # self.sut.fetch = AsyncMock() + # self.sut.send = AsyncMock() + # + # async def mock_gather(*args, **kwargs): + # raise KeyboardInterrupt + # + # with (patch('src.logcollector.collector.asyncio.gather', side_effect=mock_gather) as mock): + # # Act + # await self.sut.start() + # + # # Assert + # mock.assert_called_once() + # self.sut.fetch.assert_called_once() + # self.sut.send.assert_called_once() class TestFetch(unittest.IsolatedAsyncioTestCase): @@ -192,6 +217,87 @@ async def test_send_keyboard_interrupt( # Assert self.assertEqual(4, mock_batch_handler_instance.add_message.call_count) + @patch("src.logcollector.collector.logger") + @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 22) + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") + @patch("src.logcollector.collector.BufferedBatchSender") + @patch("src.logcollector.collector.LoglineHandler") + @patch("src.logcollector.collector.asyncio.Queue") + async def test_send_empty( + self, + mock_queue, + mock_logline_handler, + mock_batch_handler, + mock_kafka_handler, + mock_logger, + ): + # Arrange + mock_queue_instance = MagicMock() + mock_queue.return_value = mock_queue_instance + mock_queue_instance.empty.side_effect = [True, KeyboardInterrupt, True] + mock_batch_handler_instance = MagicMock() + mock_batch_handler.return_value = mock_batch_handler_instance + + sut = LogCollector() + sut.loglines = mock_queue_instance + + # Act + await sut.send() + + # Assert + mock_queue_instance.add_message.assert_not_called() + + @patch("src.logcollector.collector.logger") + @patch("src.logcollector.collector.IPV4_PREFIX_LENGTH", 22) + @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") + @patch("src.logcollector.collector.BufferedBatchSender") + @patch("src.logcollector.collector.LoglineHandler") + async def test_send_value_error( + self, mock_logline_handler, mock_batch_handler, mock_kafka_handler, mock_logger + ): + # Arrange + mock_batch_handler_instance = MagicMock() + mock_logline_handler_instance = MagicMock() + mock_batch_handler.return_value = mock_batch_handler_instance + mock_logline_handler.return_value = mock_logline_handler_instance + + mock_logline_handler_instance.validate_logline_and_get_fields_as_json.side_effect = [ + ValueError, + { + "timestamp": "2024-05-21T08:31:28.119Z", + "status": "NOERROR", + "client_ip": "192.168.0.105", + "dns_ip": "8.8.8.8", + "host_domain_name": "www.heidelberg-botanik.de", + "record_type": "A", + "response_ip": "b937:2f2e:2c1c:82a:33ad:9e59:ceb9:8e1", + "size": "150b", + }, + KeyboardInterrupt, + ] + expected_message = ( + '{"timestamp": "2024-05-21T08:31:28.119Z", "status": "NOERROR", "client_ip": ' + '"192.168.0.105", "dns_ip": "8.8.8.8", "host_domain_name": "www.heidelberg-botanik.de", ' + '"record_type": "A", "response_ip": "b937:2f2e:2c1c:82a:33ad:9e59:ceb9:8e1", ' + '"size": "150b"}' + ) + input_logline = ( + "2024-05-21T08:31:28.119Z NOERROR 192.168.0.105 8.8.8.8 www.heidelberg-botanik.de A " + "b937:2f2e:2c1c:82a:33ad:9e59:ceb9:8e1 150b" + ) + + sut = LogCollector() + await sut.store(input_logline) + await sut.store(input_logline) + await sut.store(input_logline) + + # Act + await sut.send() + + mock_batch_handler_instance.add_message.assert_called_once_with( + "192.168.0.0_22", expected_message + ) + class TestStore(unittest.IsolatedAsyncioTestCase): @patch("src.logcollector.collector.ExactlyOnceKafkaConsumeHandler") diff --git a/tests/test_server.py b/tests/test_server.py index 2b7c648..a8b19c8 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -13,44 +13,103 @@ class TestInit(unittest.TestCase): @patch("src.logserver.server.CONSUME_TOPIC", "test_topic") + @patch("src.logserver.server.ClickHouseKafkaSender") @patch("src.logserver.server.ExactlyOnceKafkaProduceHandler") @patch("src.logserver.server.SimpleKafkaConsumeHandler") - def test_valid_init(self, mock_kafka_consume_handler, mock_kafka_produce_handler): + def test_valid_init( + self, mock_kafka_consume_handler, mock_kafka_produce_handler, mock_server_logs + ): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_produce_handler_instance = MagicMock() + mock_server_logs_instance = MagicMock() mock_kafka_produce_handler.return_value = mock_kafka_produce_handler_instance mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance + mock_server_logs.return_value = mock_server_logs_instance sut = LogServer() self.assertEqual(mock_kafka_consume_handler_instance, sut.kafka_consume_handler) self.assertEqual(mock_kafka_produce_handler_instance, sut.kafka_produce_handler) + self.assertEqual(mock_server_logs_instance, sut.server_logs) mock_kafka_consume_handler.assert_called_once_with("test_topic") + mock_server_logs.assert_called_once_with("server_logs") class TestStart(unittest.IsolatedAsyncioTestCase): @patch("src.logserver.server.logger") - @patch("src.logserver.server.LogServer.fetch_from_kafka") - @patch("src.logserver.server.LogServer.fetch_from_file") @patch("src.logserver.server.SimpleKafkaConsumeHandler") @patch("src.logserver.server.ExactlyOnceKafkaProduceHandler") - async def test_start( + def setUp( self, mock_kafka_produce_handler, mock_kafka_consume_handler, + mock_logger, + ): + self.sut = LogServer() + + @patch("src.logserver.server.LogServer.fetch_from_kafka") + @patch("src.logserver.server.LogServer.fetch_from_file") + async def test_start( + self, mock_fetch_from_file, mock_fetch_from_kafka, - mock_logger, + ): + # Act + await self.sut.start() + + # Assert + mock_fetch_from_kafka.assert_called_once() + mock_fetch_from_file.assert_called_once() + + # TODO: Update + # @patch("src.logserver.server.LogServer.fetch_from_kafka") + # @patch("src.logserver.server.LogServer.fetch_from_file") + # async def test_start_handles_keyboard_interrupt( + # self, + # mock_fetch_from_file, + # mock_fetch_from_kafka, + # ): + # # Arrange + # async def mock_gather(*args, **kwargs): + # raise KeyboardInterrupt + # + # with (patch('src.logserver.server.asyncio.gather', side_effect=mock_gather) as mock): + # # Act + # await self.sut.start() + # + # # Assert + # mock.assert_called_once() + # mock_fetch_from_kafka.assert_called_once() + # mock_fetch_from_file.assert_called_once() + + +class TestSend(unittest.TestCase): + @patch("src.logserver.server.PRODUCE_TOPIC", "test_topic") + @patch("src.logserver.server.ClickHouseKafkaSender") + @patch("src.logserver.server.ExactlyOnceKafkaProduceHandler") + def test_send( + self, + mock_produce_handler, + mock_server_logs, ): # Arrange + mock_server_logs_instance = MagicMock() + mock_server_logs.return_value = mock_server_logs_instance + mock_kafka_produce_handler_instance = MagicMock() + mock_produce_handler.return_value = mock_kafka_produce_handler_instance + + message = "test_message" sut = LogServer() # Act - await sut.start() + sut.send(message) # Assert - mock_fetch_from_kafka.assert_called_once() - mock_fetch_from_file.assert_called_once() + mock_kafka_produce_handler_instance.produce.assert_called_once_with( + topic="test_topic", + data=message, + ) + mock_server_logs_instance.insert.assert_called_once() class TestFetchFromKafka(unittest.IsolatedAsyncioTestCase): From 367347bab217bd1a635c956e1f0a99cc0184df0c Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 20 Nov 2024 11:21:42 +0100 Subject: [PATCH 21/29] Remove unrelated files --- config.yaml | 5 - docker/docker-compose.yml | 33 +- docker/dockerfiles/Dockerfile.logserver | 4 +- docker/dockerfiles/Dockerfile.monitoring | 16 - requirements/requirements.monitoring.txt | 5 - src/base/clickhouse_kafka_sender.py | 21 - src/logserver/server.py | 13 - src/monitoring/__init__.py | 0 src/monitoring/clickhouse_batch.py | 82 -- src/monitoring/clickhouse_connector.py | 345 -------- src/monitoring/create_tables/batch_status.sql | 7 - .../create_tables/batch_timestamps.sql | 9 - src/monitoring/create_tables/dns_loglines.sql | 11 - .../create_tables/failed_dns_loglines.sql | 8 - .../create_tables/logline_status.sql | 7 - .../create_tables/logline_timestamps.sql | 8 - .../create_tables/logline_to_batches.sql | 6 - src/monitoring/create_tables/server_logs.sql | 7 - .../create_tables/server_logs_timestamps.sql | 7 - src/monitoring/monitoring_agent.py | 78 -- tests/test_clickhouse_connector.py | 786 ------------------ tests/test_clickhouse_kafka_sender.py | 43 - tests/test_server.py | 14 +- 23 files changed, 9 insertions(+), 1506 deletions(-) delete mode 100644 docker/dockerfiles/Dockerfile.monitoring delete mode 100644 requirements/requirements.monitoring.txt delete mode 100644 src/base/clickhouse_kafka_sender.py delete mode 100644 src/monitoring/__init__.py delete mode 100644 src/monitoring/clickhouse_batch.py delete mode 100644 src/monitoring/clickhouse_connector.py delete mode 100644 src/monitoring/create_tables/batch_status.sql delete mode 100644 src/monitoring/create_tables/batch_timestamps.sql delete mode 100644 src/monitoring/create_tables/dns_loglines.sql delete mode 100644 src/monitoring/create_tables/failed_dns_loglines.sql delete mode 100644 src/monitoring/create_tables/logline_status.sql delete mode 100644 src/monitoring/create_tables/logline_timestamps.sql delete mode 100644 src/monitoring/create_tables/logline_to_batches.sql delete mode 100644 src/monitoring/create_tables/server_logs.sql delete mode 100644 src/monitoring/create_tables/server_logs_timestamps.sql delete mode 100644 src/monitoring/monitoring_agent.py delete mode 100644 tests/test_clickhouse_connector.py delete mode 100644 tests/test_clickhouse_kafka_sender.py diff --git a/config.yaml b/config.yaml index 8d596ba..05c8c4f 100644 --- a/config.yaml +++ b/config.yaml @@ -64,11 +64,6 @@ pipeline: base_url: https://heibox.uni-heidelberg.de/d/0d5cbcbe16cd46a58021/ threshold: 0.5 - monitoring: - clickhouse_connector: - batch_size: 10000 - batch_timeout: 2.0 - environment: timestamp_format: "%Y-%m-%dT%H:%M:%S.%fZ" kafka_brokers: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 213458e..44d907d 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -160,13 +160,13 @@ services: limits: cpus: '2' memory: 512m - reservations: - cpus: '1' - memory: 256m - devices: - - driver: nvidia - count: 1 # alternatively, use `count: all` for all GPUs - capabilities: [gpu] + # reservations: + # cpus: '1' + # memory: 256m + # devices: + # - driver: nvidia + # count: 1 # alternatively, use `count: all` for all GPUs + # capabilities: [gpu] clickhouse-server: image: clickhouse/clickhouse-server:24.3.12.75-alpine @@ -184,25 +184,6 @@ services: timeout: 5s retries: 3 - monitoring_agent: - build: - context: .. - dockerfile: docker/dockerfiles/Dockerfile.monitoring - network: host - restart: "unless-stopped" - depends_on: - kafka1: - condition: service_healthy - kafka2: - condition: service_healthy - kafka3: - condition: service_healthy - clickhouse-server: - condition: service_healthy - networks: - heidgaf: - ipv4_address: 172.27.0.12 - networks: heidgaf: driver: bridge diff --git a/docker/dockerfiles/Dockerfile.logserver b/docker/dockerfiles/Dockerfile.logserver index c89476e..705627b 100644 --- a/docker/dockerfiles/Dockerfile.logserver +++ b/docker/dockerfiles/Dockerfile.logserver @@ -5,12 +5,10 @@ ENV PYTHONDONTWRITEBYTECODE=1 WORKDIR /usr/src/app COPY requirements/requirements.logserver.txt ./ -COPY requirements/requirements.monitoring.txt ./ -RUN pip --disable-pip-version-check install --no-cache-dir --no-compile -r requirements.logserver.txt -r requirements.monitoring.txt +RUN pip --disable-pip-version-check install --no-cache-dir --no-compile -r requirements.logserver.txt COPY src/base ./src/base COPY src/logserver ./src/logserver -COPY src/monitoring ./src/monitoring COPY config.yaml . RUN rm -rf /root/.cache diff --git a/docker/dockerfiles/Dockerfile.monitoring b/docker/dockerfiles/Dockerfile.monitoring deleted file mode 100644 index cf181a7..0000000 --- a/docker/dockerfiles/Dockerfile.monitoring +++ /dev/null @@ -1,16 +0,0 @@ -FROM python:3.11-slim-bookworm - -ENV PYTHONDONTWRITEBYTECODE=1 - -WORKDIR /usr/src/app - -COPY requirements/requirements.monitoring.txt ./ -RUN pip --disable-pip-version-check install --no-cache-dir --no-compile -r requirements.monitoring.txt - -COPY src/base ./src/base -COPY src/monitoring ./src/monitoring -COPY config.yaml . - -RUN rm -rf /root/.cache - -CMD [ "python", "src/monitoring/monitoring_agent.py"] diff --git a/requirements/requirements.monitoring.txt b/requirements/requirements.monitoring.txt deleted file mode 100644 index a2e9d98..0000000 --- a/requirements/requirements.monitoring.txt +++ /dev/null @@ -1,5 +0,0 @@ -clickhouse_connect~=0.8.3 -confluent-kafka~=2.4.0 -marshmallow_dataclass~=8.7.1 -colorlog~=6.8.2 -PyYAML~=6.0.1 diff --git a/src/base/clickhouse_kafka_sender.py b/src/base/clickhouse_kafka_sender.py deleted file mode 100644 index ef4338c..0000000 --- a/src/base/clickhouse_kafka_sender.py +++ /dev/null @@ -1,21 +0,0 @@ -import json -import os -import sys - -sys.path.append(os.getcwd()) -from src.base.kafka_handler import SimpleKafkaProduceHandler -from src.base.log_config import get_logger - -logger = get_logger() - - -class ClickHouseKafkaSender: - def __init__(self, table_name: str): - self.table_name = table_name - self.kafka_producer = SimpleKafkaProduceHandler() - - def insert(self, data: dict): - self.kafka_producer.produce( - topic=f"clickhouse_{self.table_name}", - data=json.dumps(data, default=str), - ) diff --git a/src/logserver/server.py b/src/logserver/server.py index 94f0b8c..17964f4 100644 --- a/src/logserver/server.py +++ b/src/logserver/server.py @@ -1,8 +1,6 @@ import asyncio -import datetime import os import sys -import uuid import aiofiles @@ -11,7 +9,6 @@ SimpleKafkaConsumeHandler, ExactlyOnceKafkaProduceHandler, ) -from src.base.clickhouse_kafka_sender import ClickHouseKafkaSender from src.base.utils import generate_unique_transactional_id from src.base.utils import setup_config from src.base.log_config import get_logger @@ -44,7 +41,6 @@ def __init__(self) -> None: self.kafka_consume_handler = SimpleKafkaConsumeHandler(CONSUME_TOPIC) self.kafka_produce_handler = ExactlyOnceKafkaProduceHandler(transactional_id) - self.server_logs = ClickHouseKafkaSender("server_logs") async def start(self) -> None: """ @@ -73,15 +69,6 @@ def send(self, message: str) -> None: message (str): Message to be sent """ self.kafka_produce_handler.produce(topic=PRODUCE_TOPIC, data=message) - - self.server_logs.insert( - dict( - message_id=uuid.uuid4(), - timestamp_in=datetime.datetime.now(), - message_text=message, - ) - ) - logger.debug(f"Sent: '{message}'") async def fetch_from_kafka(self) -> None: diff --git a/src/monitoring/__init__.py b/src/monitoring/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/monitoring/clickhouse_batch.py b/src/monitoring/clickhouse_batch.py deleted file mode 100644 index fb87795..0000000 --- a/src/monitoring/clickhouse_batch.py +++ /dev/null @@ -1,82 +0,0 @@ -import os -import sys -from threading import Timer - -import clickhouse_connect - -sys.path.append(os.getcwd()) -from src.base.log_config import get_logger -from src.base.utils import setup_config - -logger = get_logger() - -CONFIG = setup_config() -CLICKHOUSE_HOSTNAME = CONFIG["environment"]["monitoring"]["clickhouse_server"][ - "hostname" -] -BATCH_SIZE = CONFIG["pipeline"]["monitoring"]["clickhouse_connector"]["batch_size"] -BATCH_TIMEOUT = CONFIG["pipeline"]["monitoring"]["clickhouse_connector"][ - "batch_timeout" -] - - -class ClickHouseBatchSender: - def __init__(self, table_name: str, column_names: list[str]): - self.table_name = table_name - self.column_names = column_names - - self.max_batch_size = BATCH_SIZE - self.batch_timeout = BATCH_TIMEOUT - - self.timer = None - self.batch = [] - self._client = clickhouse_connect.get_client( - host=CLICKHOUSE_HOSTNAME, - ) - - def __del__(self): - self.insert_all() - - def add(self, data: list[str] | list[list[str]]): - def _add_element(element): - if len(element) != len(self.column_names): - raise ValueError( - "Number of elements in the insert does not match the number of columns" - ) - - self.batch.append(element) - - if any(isinstance(e, list) for e in data): - for e in data: - _add_element(e) - else: - _add_element(data) - - if len(self.batch) >= self.max_batch_size: - self.insert_all() - elif not self.timer: - self._start_timer() - - def insert_all(self): - if self.batch: - self._client.insert( - self.table_name, - self.batch, - column_names=self.column_names, - ) - logger.debug( - f"Inserted {self.table_name=},{self.batch=},{self.column_names=}" - ) - self.batch = [] - - if self.timer: - self.timer.cancel() - - self.timer = None - - def _start_timer(self): - if self.timer: - self.timer.cancel() - - self.timer = Timer(BATCH_TIMEOUT, self.insert_all) - self.timer.start() diff --git a/src/monitoring/clickhouse_connector.py b/src/monitoring/clickhouse_connector.py deleted file mode 100644 index cb4d2c9..0000000 --- a/src/monitoring/clickhouse_connector.py +++ /dev/null @@ -1,345 +0,0 @@ -import datetime -import os -import sys -import uuid -from abc import abstractmethod - -import clickhouse_connect - -sys.path.append(os.getcwd()) -from src.monitoring.clickhouse_batch import ClickHouseBatchSender -from src.base.log_config import get_logger -from src.base.utils import setup_config - -logger = get_logger() - -CONFIG = setup_config() -CLICKHOUSE_HOSTNAME = CONFIG["environment"]["monitoring"]["clickhouse_server"][ - "hostname" -] -CREATE_TABLES_DIRECTORY = "src/monitoring/create_tables" # TODO: Get from config - - -class ClickHouseConnector: - def __init__(self, table_name: str, column_names: list[str]): - self._table_name = table_name - self._column_names = column_names - - self._batch_sender = ClickHouseBatchSender( - table_name=self._table_name, - column_names=self._column_names, - ) - - def prepare_table(self): - def _load_contents(file_name: str) -> str: - with open(file_name, "r") as file: - return file.read() - - filename = self._table_name + ".sql" - file_path = os.path.join(CREATE_TABLES_DIRECTORY, filename) - sql_content = _load_contents(file_path) - - with clickhouse_connect.get_client(host=CLICKHOUSE_HOSTNAME) as client: - try: - client.command(sql_content) - except Exception as e: - logger.critical("Error in CREATE TABLE statement") - raise e - - def _add_to_batch(self, data): - self._batch_sender.add(data) - - @abstractmethod - def insert(self, *args, **kwargs): - pass - - -class ServerLogsConnector(ClickHouseConnector): - def __init__(self): - column_names = [ - "message_id", - "timestamp_in", - "message_text", - ] - - super().__init__("server_logs", column_names) - - def insert( - self, - message_text: str, - message_id: None | str | uuid.UUID = None, - timestamp_in: str | datetime.datetime | None = None, - ) -> uuid.UUID: - # TODO: Switch to Marshmallow - if not message_id: - message_id = uuid.uuid4() - - if isinstance(message_id, str): - message_id = uuid.UUID(message_id) - - if not timestamp_in: - timestamp_in = datetime.datetime.now() - - if isinstance(timestamp_in, str): - timestamp_in = datetime.datetime.fromisoformat(timestamp_in) - - self._add_to_batch([message_id, timestamp_in, message_text]) - return message_id - - -class ServerLogsTimestampsConnector(ClickHouseConnector): - def __init__(self): - column_names = [ - "message_id", - "event", - "event_timestamp", - ] - - super().__init__("server_logs_timestamps", column_names) - - def insert( - self, - message_id: str | uuid.UUID, - event: str, - event_timestamp: str | datetime.datetime | None = None, - ): - if isinstance(message_id, str): - message_id = uuid.UUID(message_id) - - if not event_timestamp: - event_timestamp = datetime.datetime.now() - - if isinstance(event_timestamp, str): - event_timestamp = datetime.datetime.fromisoformat(event_timestamp) - - self._add_to_batch([message_id, event, event_timestamp]) - - -class FailedDNSLoglinesConnector(ClickHouseConnector): - def __init__(self): - column_names = [ - "message_text", - "timestamp_in", - "timestamp_failed", - "reason_for_failure", - ] - - super().__init__("failed_dns_loglines", column_names) - - def insert( - self, - message_text: str, - timestamp_in: str | datetime.datetime, - timestamp_failed: str | datetime.datetime | None = None, - reason_for_failure: str | None = None, - ) -> None: - if not timestamp_failed: - timestamp_failed = datetime.datetime.now() - - if isinstance(timestamp_in, str): - timestamp_in = datetime.datetime.fromisoformat(timestamp_in) - if isinstance(timestamp_failed, str): - timestamp_failed = datetime.datetime.fromisoformat(timestamp_failed) - - self._add_to_batch( - [message_text, timestamp_in, timestamp_failed, reason_for_failure] - ) - - -class LoglineToBatchesConnector(ClickHouseConnector): - def __init__(self): - column_names = [ - "logline_id", - "batch_id", - ] - - super().__init__("logline_to_batches", column_names) - - def insert( - self, - logline_id: str | uuid.UUID, - batch_id: str | uuid.UUID, - ): - if isinstance(logline_id, str): - logline_id = uuid.UUID(logline_id) - if isinstance(batch_id, str): - batch_id = uuid.UUID(batch_id) - - self._add_to_batch( - [ - logline_id, - batch_id, - ] - ) - - -class DNSLoglinesConnector(ClickHouseConnector): - def __init__(self): - column_names = [ - "logline_id", - "subnet_id", - "timestamp", - "status_code", - "client_ip", - "record_type", - "additional_fields", - ] - - super().__init__("dns_loglines", column_names) - - def insert( - self, - subnet_id: str, - timestamp: str | datetime.datetime, - status_code: str, - client_ip: str, - record_type: str, - additional_fields: str | None = None, - ) -> uuid.UUID: - logline_id = uuid.uuid4() - - self._add_to_batch( - [ - logline_id, - subnet_id, - timestamp, - status_code, - client_ip, - record_type, - additional_fields, - ] - ) - return logline_id - - -class LoglineStatusConnector(ClickHouseConnector): - def __init__(self): - column_names = [ - "logline_id", - "status", - "exit_at_stage", - ] - - super().__init__("logline_status", column_names) - - def insert( - self, - logline_id: str | uuid.UUID, - status: str, - exit_at_stage: str | None = None, - ): - if isinstance(logline_id, str): - logline_id = uuid.UUID(logline_id) - - self._add_to_batch( - [ - logline_id, - status, - exit_at_stage, - ] - ) - - -class LoglineTimestampsConnector(ClickHouseConnector): - def __init__(self): - column_names = [ - "logline_id", - "stage", - "status", - "timestamp", - ] - - super().__init__("logline_timestamps", column_names) - - def insert( - self, - logline_id: str | uuid.UUID, - stage: str, - status: str, - timestamp: str | datetime.datetime = None, - ) -> None: - if isinstance(logline_id, str): - logline_id = uuid.UUID(logline_id) - - if not timestamp: - timestamp = datetime.datetime.now() - - if isinstance(timestamp, str): - timestamp = datetime.datetime.fromisoformat(timestamp) - - self._add_to_batch( - [ - logline_id, - stage, - status, - timestamp, - ] - ) - - -class BatchStatusConnector(ClickHouseConnector): - def __init__(self): - column_names = [ - "batch_id", - "status", - "exit_at_stage", - ] - - super().__init__("batch_status", column_names) - - def insert( - self, - batch_id: str | uuid.UUID, - status: str, - exit_at_stage: str | None = None, - ): - if isinstance(batch_id, str): - batch_id = uuid.UUID(batch_id) - - self._add_to_batch( - [ - batch_id, - status, - exit_at_stage, - ] - ) - - -class BatchTimestampsConnector(ClickHouseConnector): - def __init__(self): - column_names = [ - "batch_id", - "stage", - "status", - "timestamp", - "message_count", - ] - - super().__init__("batch_timestamps", column_names) - - def insert( - self, - batch_id: str | uuid.UUID, - stage: str, - status: str, - message_count: int, - timestamp: str | datetime.datetime = None, - ) -> None: - if isinstance(batch_id, str): - batch_id = uuid.UUID(batch_id) - - if not timestamp: - timestamp = datetime.datetime.now() - - if isinstance(timestamp, str): - timestamp = datetime.datetime.fromisoformat(timestamp) - - self._add_to_batch( - [ - batch_id, - stage, - status, - timestamp, - message_count, - ] - ) diff --git a/src/monitoring/create_tables/batch_status.sql b/src/monitoring/create_tables/batch_status.sql deleted file mode 100644 index 3f515b9..0000000 --- a/src/monitoring/create_tables/batch_status.sql +++ /dev/null @@ -1,7 +0,0 @@ -CREATE TABLE IF NOT EXISTS batch_status ( - batch_id UUID NOT NULL, - status String NOT NULL, - exit_at_stage Nullable(String) -) -ENGINE = MergeTree -PRIMARY KEY (batch_id); diff --git a/src/monitoring/create_tables/batch_timestamps.sql b/src/monitoring/create_tables/batch_timestamps.sql deleted file mode 100644 index c0e7a1c..0000000 --- a/src/monitoring/create_tables/batch_timestamps.sql +++ /dev/null @@ -1,9 +0,0 @@ -CREATE TABLE IF NOT EXISTS batch_timestamps ( - batch_id UUID NOT NULL, - stage String NOT NULL, - status String NOT NULL, - timestamp DateTime64(6) NOT NULL, - message_count UInt32 -) -ENGINE = MergeTree -PRIMARY KEY (batch_id); diff --git a/src/monitoring/create_tables/dns_loglines.sql b/src/monitoring/create_tables/dns_loglines.sql deleted file mode 100644 index c3468f7..0000000 --- a/src/monitoring/create_tables/dns_loglines.sql +++ /dev/null @@ -1,11 +0,0 @@ -CREATE TABLE IF NOT EXISTS dns_loglines ( - logline_id UUID NOT NULL, - subnet_id String NOT NULL, - timestamp DateTime64(6) NOT NULL, - status_code String NOT NULL, - client_ip String NOT NULL, - record_type String NOT NULL, - additional_fields Nullable(String) -) -ENGINE = MergeTree -PRIMARY KEY (logline_id); diff --git a/src/monitoring/create_tables/failed_dns_loglines.sql b/src/monitoring/create_tables/failed_dns_loglines.sql deleted file mode 100644 index 846f6cd..0000000 --- a/src/monitoring/create_tables/failed_dns_loglines.sql +++ /dev/null @@ -1,8 +0,0 @@ -CREATE TABLE IF NOT EXISTS failed_dns_loglines ( - message_text String NOT NULL, - timestamp_in DateTime64(6) NOT NULL, - timestamp_failed DateTime64(6) NOT NULL, - reason_for_failure Nullable(String) -) -ENGINE = MergeTree -PRIMARY KEY(message_text, timestamp_in); diff --git a/src/monitoring/create_tables/logline_status.sql b/src/monitoring/create_tables/logline_status.sql deleted file mode 100644 index cdeb6c2..0000000 --- a/src/monitoring/create_tables/logline_status.sql +++ /dev/null @@ -1,7 +0,0 @@ -CREATE TABLE IF NOT EXISTS logline_status ( - logline_id UUID NOT NULL, - status String NOT NULL, - exit_at_stage Nullable(String) -) -ENGINE = MergeTree -PRIMARY KEY (logline_id); diff --git a/src/monitoring/create_tables/logline_timestamps.sql b/src/monitoring/create_tables/logline_timestamps.sql deleted file mode 100644 index 4ff9887..0000000 --- a/src/monitoring/create_tables/logline_timestamps.sql +++ /dev/null @@ -1,8 +0,0 @@ -CREATE TABLE IF NOT EXISTS logline_timestamps ( - logline_id UUID NOT NULL, - stage String NOT NULL, - status String NOT NULL, - timestamp DateTime64(6) NOT NULL -) -ENGINE = MergeTree -PRIMARY KEY (logline_id); diff --git a/src/monitoring/create_tables/logline_to_batches.sql b/src/monitoring/create_tables/logline_to_batches.sql deleted file mode 100644 index 41d4348..0000000 --- a/src/monitoring/create_tables/logline_to_batches.sql +++ /dev/null @@ -1,6 +0,0 @@ -CREATE TABLE IF NOT EXISTS logline_to_batches ( - logline_id UUID NOT NULL, - batch_id UUID NOT NULL -) -ENGINE = MergeTree -PRIMARY KEY (logline_id); diff --git a/src/monitoring/create_tables/server_logs.sql b/src/monitoring/create_tables/server_logs.sql deleted file mode 100644 index b191d83..0000000 --- a/src/monitoring/create_tables/server_logs.sql +++ /dev/null @@ -1,7 +0,0 @@ -CREATE TABLE IF NOT EXISTS server_logs ( - message_id UUID NOT NULL, - timestamp_in DateTime64(6) NOT NULL, - message_text String NOT NULL -) -ENGINE = MergeTree -PRIMARY KEY(message_id); diff --git a/src/monitoring/create_tables/server_logs_timestamps.sql b/src/monitoring/create_tables/server_logs_timestamps.sql deleted file mode 100644 index 7a6c58c..0000000 --- a/src/monitoring/create_tables/server_logs_timestamps.sql +++ /dev/null @@ -1,7 +0,0 @@ -CREATE TABLE IF NOT EXISTS server_logs_timestamps ( - message_id UUID NOT NULL, - event String NOT NULL, - event_timestamp DateTime64(6) NOT NULL -) -ENGINE = MergeTree -PRIMARY KEY(message_id); diff --git a/src/monitoring/monitoring_agent.py b/src/monitoring/monitoring_agent.py deleted file mode 100644 index 8ea6c09..0000000 --- a/src/monitoring/monitoring_agent.py +++ /dev/null @@ -1,78 +0,0 @@ -import asyncio -import json -import os -import sys - -sys.path.append(os.getcwd()) -from src.monitoring.clickhouse_connector import * -from src.base.kafka_handler import SimpleKafkaConsumeHandler -from src.base.log_config import get_logger -from src.base.utils import setup_config - -logger = get_logger() - -CONFIG = setup_config() -CREATE_TABLES_DIRECTORY = "src/monitoring/create_tables" # TODO: Get from config - - -def prepare_all_tables(): - def _load_contents(file_name: str) -> str: - with open(file_name, "r") as file: - return file.read() - - for filename in os.listdir(CREATE_TABLES_DIRECTORY): - if filename.endswith(".sql"): - file_path = os.path.join(CREATE_TABLES_DIRECTORY, filename) - sql_content = _load_contents(file_path) - - with clickhouse_connect.get_client(host=CLICKHOUSE_HOSTNAME) as client: - try: - client.command(sql_content) - except Exception as e: - logger.critical("Error in CREATE TABLE statement") - raise e - - -class MonitoringAgent: - def __init__(self): - self.connectors = { - "server_logs": ServerLogsConnector(), - "server_logs_timestamps": ServerLogsTimestampsConnector(), - "failed_dns_loglines": FailedDNSLoglinesConnector(), - "logline_to_batches": LoglineToBatchesConnector(), - "dns_loglines": DNSLoglinesConnector(), - "logline_status": LoglineStatusConnector(), - "logline_timestamps": LoglineTimestampsConnector(), - "batch_status": BatchStatusConnector(), - "batch_timestamps": BatchTimestampsConnector(), - } - - self.topics = [f"clickhouse_{table_name}" for table_name in self.connectors] - self.kafka_consumer = SimpleKafkaConsumeHandler(self.topics) - - async def start(self): - loop = asyncio.get_running_loop() - - try: - while True: - key, value, topic = await loop.run_in_executor( - None, self.kafka_consumer.consume - ) - logger.debug(f"From Kafka: {value}") - - data = json.loads(value) - table_name = topic.replace("clickhouse_", "") - - self.connectors[table_name].insert(**data) - except KeyboardInterrupt: - logger.info("Stopped MonitoringAgent.") - - -def main(): - prepare_all_tables() - clickhouse_consumer = MonitoringAgent() - asyncio.run(clickhouse_consumer.start()) - - -if __name__ == "__main__": # pragma: no cover - main() diff --git a/tests/test_clickhouse_connector.py b/tests/test_clickhouse_connector.py deleted file mode 100644 index 0953b6d..0000000 --- a/tests/test_clickhouse_connector.py +++ /dev/null @@ -1,786 +0,0 @@ -import json -import unittest -from unittest.mock import patch, MagicMock, mock_open - -from src.monitoring.clickhouse_connector import * - - -class TestClickHouseConnector(unittest.TestCase): - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_init(self, mock_clickhouse_batch_sender): - # Arrange - mock_clickhouse_batch_sender_instance = MagicMock() - mock_clickhouse_batch_sender.return_value = ( - mock_clickhouse_batch_sender_instance - ) - - table_name = "test_table" - column_names = ["col_1", "col_2", "col_3"] - - # Act - sut = ClickHouseConnector(table_name, column_names) - - # Assert - self.assertEqual(table_name, sut._table_name) - self.assertEqual(column_names, sut._column_names) - self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) - - mock_clickhouse_batch_sender.assert_called_once_with( - table_name=table_name, - column_names=column_names, - ) - - @patch("src.monitoring.clickhouse_connector.os.path.join") - @patch( - "src.monitoring.clickhouse_connector.open", - new_callable=mock_open, - read_data="CREATE TABLE test;", - ) - @patch("src.monitoring.clickhouse_connector.clickhouse_connect.get_client") - def test_prepare_table_success( - self, mock_get_client, mock_open_file, mock_path_join - ): - # Arrange - mock_client = MagicMock() - mock_get_client.return_value.__enter__.return_value = mock_client - mock_path_join.return_value = "/fake/path/test_table.sql" - - sut = ClickHouseConnector("test_table", ["col_1", "col_2", "col_3"]) - - # Act - sut.prepare_table() - - # Assert - mock_open_file.assert_called_once_with("/fake/path/test_table.sql", "r") - mock_client.command.assert_called_once_with("CREATE TABLE test;") - - @patch("src.monitoring.clickhouse_connector.os.path.join") - @patch( - "src.monitoring.clickhouse_connector.open", - new_callable=mock_open, - read_data="CREATE TABLE test;", - ) - @patch("src.monitoring.clickhouse_connector.clickhouse_connect.get_client") - @patch("src.monitoring.clickhouse_connector.logger") - def test_prepare_table_failure( - self, mock_logger, mock_get_client, mock_open_file, mock_path_join - ): - mock_client = MagicMock() - mock_get_client.return_value.__enter__.return_value = mock_client - mock_path_join.return_value = "/fake/path/test_table.sql" - mock_client.command.side_effect = Exception("Test exception") - - sut = ClickHouseConnector("test_table", ["col_1", "col_2", "col_3"]) - - with self.assertRaises(Exception): - sut.prepare_table() - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_add_to_batch(self, mock_clickhouse_batch_sender): - # Arrange - mock_clickhouse_batch_sender_instance = MagicMock() - mock_clickhouse_batch_sender.return_value = ( - mock_clickhouse_batch_sender_instance - ) - - sut = ClickHouseConnector("test_table", ["col_1", "col_2", "col_3"]) - - # Act - sut._add_to_batch("test_data") - - # Assert - mock_clickhouse_batch_sender_instance.add.assert_called_once_with("test_data") - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert(self, mock_clickhouse_batch_sender): - # Arrange - sut = ClickHouseConnector("test_table", ["col_1", "col_2", "col_3"]) - - # Act - sut.insert("test_data") - - -class TestServerLogsConnector(unittest.TestCase): - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_init(self, mock_clickhouse_batch_sender): - # Arrange - mock_clickhouse_batch_sender_instance = MagicMock() - mock_clickhouse_batch_sender.return_value = ( - mock_clickhouse_batch_sender_instance - ) - - expected_table_name = "server_logs" - expected_column_names = [ - "message_id", - "timestamp_in", - "message_text", - ] - - # Act - sut = ServerLogsConnector() - - # Assert - self.assertEqual(expected_table_name, sut._table_name) - self.assertEqual(expected_column_names, sut._column_names) - self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) - - mock_clickhouse_batch_sender.assert_called_once_with( - table_name=expected_table_name, - column_names=expected_column_names, - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_all_given(self, mock_clickhouse_batch_sender): - # Arrange - message_text = "test_message_text" - message_id = "7299539b-6215-4f6b-b39f-69335aafbeff" - timestamp_in = "2034-12-13 12:34:12.132412" - - sut = ServerLogsConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - returned_value = sut.insert( - message_text=message_text, - message_id=message_id, - timestamp_in=timestamp_in, - ) - - # Assert - mock_add_to_batch.assert_called_once_with( - [ - uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), - datetime.datetime(2034, 12, 13, 12, 34, 12, 132412), - "test_message_text", - ] - ) - self.assertEqual( - uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), returned_value - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_none_given(self, mock_clickhouse_batch_sender): - # Arrange - sut = ServerLogsConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert("test_message_text") - - # Assert - mock_add_to_batch.assert_called_once() - - -class TestServerLogsTimestampsConnector(unittest.TestCase): - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_init(self, mock_clickhouse_batch_sender): - # Arrange - mock_clickhouse_batch_sender_instance = MagicMock() - mock_clickhouse_batch_sender.return_value = ( - mock_clickhouse_batch_sender_instance - ) - - expected_table_name = "server_logs_timestamps" - expected_column_names = [ - "message_id", - "event", - "event_timestamp", - ] - - # Act - sut = ServerLogsTimestampsConnector() - - # Assert - self.assertEqual(expected_table_name, sut._table_name) - self.assertEqual(expected_column_names, sut._column_names) - self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) - - mock_clickhouse_batch_sender.assert_called_once_with( - table_name=expected_table_name, - column_names=expected_column_names, - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_all_given(self, mock_clickhouse_batch_sender): - # Arrange - message_id = "7299539b-6215-4f6b-b39f-69335aafbeff" - event = "test_event" - event_timestamp = "2034-12-13 12:34:12.132412" - - sut = ServerLogsTimestampsConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - message_id=message_id, - event=event, - event_timestamp=event_timestamp, - ) - - # Assert - mock_add_to_batch.assert_called_once_with( - [ - uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), - "test_event", - datetime.datetime(2034, 12, 13, 12, 34, 12, 132412), - ] - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_none_given(self, mock_clickhouse_batch_sender): - # Arrange - sut = ServerLogsTimestampsConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), - "test_event", - ) - - # Assert - mock_add_to_batch.assert_called_once() - - -class TestFailedDNSLoglinesConnector(unittest.TestCase): - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_init(self, mock_clickhouse_batch_sender): - # Arrange - mock_clickhouse_batch_sender_instance = MagicMock() - mock_clickhouse_batch_sender.return_value = ( - mock_clickhouse_batch_sender_instance - ) - - expected_table_name = "failed_dns_loglines" - expected_column_names = [ - "message_text", - "timestamp_in", - "timestamp_failed", - "reason_for_failure", - ] - - # Act - sut = FailedDNSLoglinesConnector() - - # Assert - self.assertEqual(expected_table_name, sut._table_name) - self.assertEqual(expected_column_names, sut._column_names) - self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) - - mock_clickhouse_batch_sender.assert_called_once_with( - table_name=expected_table_name, - column_names=expected_column_names, - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_all_given(self, mock_clickhouse_batch_sender): - # Arrange - message_text = "test_message_text" - timestamp_in = "2034-12-13 12:34:12.132412" - timestamp_failed = "2034-12-13 12:35:35.542635" - reason_for_failure = "Wrong client_ip field" - - sut = FailedDNSLoglinesConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - message_text=message_text, - timestamp_in=timestamp_in, - timestamp_failed=timestamp_failed, - reason_for_failure=reason_for_failure, - ) - - # Assert - mock_add_to_batch.assert_called_once_with( - [ - "test_message_text", - datetime.datetime(2034, 12, 13, 12, 34, 12, 132412), - datetime.datetime(2034, 12, 13, 12, 35, 35, 542635), - "Wrong client_ip field", - ] - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_none_given(self, mock_clickhouse_batch_sender): - # Arrange - message_text = "test_message_text" - timestamp_in = "2034-12-13 12:34:12.132412" - - sut = FailedDNSLoglinesConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - message_text=message_text, - timestamp_in=datetime.datetime(2034, 12, 13, 12, 34, 12, 132412), - ) - - # Assert - mock_add_to_batch.assert_called_once() - - -class TestLoglineToBatchesConnector(unittest.TestCase): - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_init(self, mock_clickhouse_batch_sender): - # Arrange - mock_clickhouse_batch_sender_instance = MagicMock() - mock_clickhouse_batch_sender.return_value = ( - mock_clickhouse_batch_sender_instance - ) - - expected_table_name = "logline_to_batches" - expected_column_names = [ - "logline_id", - "batch_id", - ] - - # Act - sut = LoglineToBatchesConnector() - - # Assert - self.assertEqual(expected_table_name, sut._table_name) - self.assertEqual(expected_column_names, sut._column_names) - self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) - - mock_clickhouse_batch_sender.assert_called_once_with( - table_name=expected_table_name, - column_names=expected_column_names, - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_all_given_as_str(self, mock_clickhouse_batch_sender): - # Arrange - logline_id = "7299539b-6215-4f6b-b39f-69335aafbeff" - batch_id = "1f855c43-8a75-4b53-b6cd-4a13b89312d6" - - sut = LoglineToBatchesConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - logline_id=logline_id, - batch_id=batch_id, - ) - - # Assert - mock_add_to_batch.assert_called_once_with( - [ - uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), - uuid.UUID("1f855c43-8a75-4b53-b6cd-4a13b89312d6"), - ] - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_all_given_as_uuid(self, mock_clickhouse_batch_sender): - # Arrange - logline_id = uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff") - batch_id = uuid.UUID("1f855c43-8a75-4b53-b6cd-4a13b89312d6") - - sut = LoglineToBatchesConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - logline_id=logline_id, - batch_id=batch_id, - ) - - # Assert - mock_add_to_batch.assert_called_once_with( - [ - uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), - uuid.UUID("1f855c43-8a75-4b53-b6cd-4a13b89312d6"), - ] - ) - - -class TestDNSLoglinesConnector(unittest.TestCase): - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_init(self, mock_clickhouse_batch_sender): - # Arrange - mock_clickhouse_batch_sender_instance = MagicMock() - mock_clickhouse_batch_sender.return_value = ( - mock_clickhouse_batch_sender_instance - ) - - expected_table_name = "dns_loglines" - expected_column_names = [ - "logline_id", - "subnet_id", - "timestamp", - "status_code", - "client_ip", - "record_type", - "additional_fields", - ] - - # Act - sut = DNSLoglinesConnector() - - # Assert - self.assertEqual(expected_table_name, sut._table_name) - self.assertEqual(expected_column_names, sut._column_names) - self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) - - mock_clickhouse_batch_sender.assert_called_once_with( - table_name=expected_table_name, - column_names=expected_column_names, - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_all_given(self, mock_clickhouse_batch_sender): - # Arrange - subnet_id = "127.0.0.0_24" - timestamp = "2034-12-13 12:34:12.132412" - status_code = "NXDOMAIN" - client_ip = "127.0.0.1" - record_type = "A" - additional_fields = json.dumps(dict(test="some_field")) - - sut = DNSLoglinesConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - returned_value = sut.insert( - subnet_id=subnet_id, - timestamp=timestamp, - status_code=status_code, - client_ip=client_ip, - record_type=record_type, - additional_fields=additional_fields, - ) - - # Assert - mock_add_to_batch.assert_called_once() - self.assertTrue(isinstance(returned_value, uuid.UUID)) - - -class TestLoglineStatusConnector(unittest.TestCase): - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_init(self, mock_clickhouse_batch_sender): - # Arrange - mock_clickhouse_batch_sender_instance = MagicMock() - mock_clickhouse_batch_sender.return_value = ( - mock_clickhouse_batch_sender_instance - ) - - expected_table_name = "logline_status" - expected_column_names = [ - "logline_id", - "status", - "exit_at_stage", - ] - - # Act - sut = LoglineStatusConnector() - - # Assert - self.assertEqual(expected_table_name, sut._table_name) - self.assertEqual(expected_column_names, sut._column_names) - self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) - - mock_clickhouse_batch_sender.assert_called_once_with( - table_name=expected_table_name, - column_names=expected_column_names, - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_all_given(self, mock_clickhouse_batch_sender): - # Arrange - logline_id = "7299539b-6215-4f6b-b39f-69335aafbeff" - status = "inactive" - exit_at_stage = "prefilter" - - sut = LoglineStatusConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - logline_id=logline_id, - status=status, - exit_at_stage=exit_at_stage, - ) - - # Assert - mock_add_to_batch.assert_called_once_with( - [ - uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), - "inactive", - "prefilter", - ] - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_none_given(self, mock_clickhouse_batch_sender): - # Arrange - logline_id = uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff") - status = "inactive" - - sut = LoglineStatusConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - logline_id=logline_id, - status=status, - ) - - # Assert - mock_add_to_batch.assert_called_once_with( - [ - uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), - "inactive", - None, - ] - ) - - -class TestLoglineTimestampsConnector(unittest.TestCase): - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_init(self, mock_clickhouse_batch_sender): - # Arrange - mock_clickhouse_batch_sender_instance = MagicMock() - mock_clickhouse_batch_sender.return_value = ( - mock_clickhouse_batch_sender_instance - ) - - expected_table_name = "logline_timestamps" - expected_column_names = [ - "logline_id", - "stage", - "status", - "timestamp", - ] - - # Act - sut = LoglineTimestampsConnector() - - # Assert - self.assertEqual(expected_table_name, sut._table_name) - self.assertEqual(expected_column_names, sut._column_names) - self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) - - mock_clickhouse_batch_sender.assert_called_once_with( - table_name=expected_table_name, - column_names=expected_column_names, - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_all_given(self, mock_clickhouse_batch_sender): - # Arrange - logline_id = "7299539b-6215-4f6b-b39f-69335aafbeff" - stage = "prefilter" - status = "prefilter_out" - timestamp = "2034-12-13 12:35:35.542635" - - sut = LoglineTimestampsConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - logline_id=logline_id, - stage=stage, - status=status, - timestamp=timestamp, - ) - - # Assert - mock_add_to_batch.assert_called_once_with( - [ - uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), - "prefilter", - "prefilter_out", - datetime.datetime(2034, 12, 13, 12, 35, 35, 542635), - ] - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_none_given(self, mock_clickhouse_batch_sender): - # Arrange - logline_id = uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff") - stage = "prefilter" - status = "prefilter_out" - - sut = LoglineTimestampsConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - logline_id=logline_id, - stage=stage, - status=status, - ) - - # Assert - mock_add_to_batch.assert_called_once() - - -class TestBatchStatusConnector(unittest.TestCase): - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_init(self, mock_clickhouse_batch_sender): - # Arrange - mock_clickhouse_batch_sender_instance = MagicMock() - mock_clickhouse_batch_sender.return_value = ( - mock_clickhouse_batch_sender_instance - ) - - expected_table_name = "batch_status" - expected_column_names = [ - "batch_id", - "status", - "exit_at_stage", - ] - - # Act - sut = BatchStatusConnector() - - # Assert - self.assertEqual(expected_table_name, sut._table_name) - self.assertEqual(expected_column_names, sut._column_names) - self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) - - mock_clickhouse_batch_sender.assert_called_once_with( - table_name=expected_table_name, - column_names=expected_column_names, - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_all_given(self, mock_clickhouse_batch_sender): - # Arrange - batch_id = "7299539b-6215-4f6b-b39f-69335aafbeff" - status = "inactive" - exit_at_stage = "prefilter" - - sut = BatchStatusConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - batch_id=batch_id, - status=status, - exit_at_stage=exit_at_stage, - ) - - # Assert - mock_add_to_batch.assert_called_once_with( - [ - uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), - "inactive", - "prefilter", - ] - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_none_given(self, mock_clickhouse_batch_sender): - # Arrange - batch_id = uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff") - status = "inactive" - - sut = BatchStatusConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - batch_id=batch_id, - status=status, - ) - - # Assert - mock_add_to_batch.assert_called_once_with( - [ - uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), - "inactive", - None, - ] - ) - - -class TestBatchTimestampsConnector(unittest.TestCase): - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_init(self, mock_clickhouse_batch_sender): - # Arrange - mock_clickhouse_batch_sender_instance = MagicMock() - mock_clickhouse_batch_sender.return_value = ( - mock_clickhouse_batch_sender_instance - ) - - expected_table_name = "batch_timestamps" - expected_column_names = [ - "batch_id", - "stage", - "status", - "timestamp", - "message_count", - ] - - # Act - sut = BatchTimestampsConnector() - - # Assert - self.assertEqual(expected_table_name, sut._table_name) - self.assertEqual(expected_column_names, sut._column_names) - self.assertEqual(mock_clickhouse_batch_sender_instance, sut._batch_sender) - - mock_clickhouse_batch_sender.assert_called_once_with( - table_name=expected_table_name, - column_names=expected_column_names, - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_all_given(self, mock_clickhouse_batch_sender): - # Arrange - batch_id = "7299539b-6215-4f6b-b39f-69335aafbeff" - stage = "prefilter" - status = "prefilter_out" - timestamp = "2034-12-13 12:35:35.542635" - message_count = 456 - - sut = BatchTimestampsConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - batch_id=batch_id, - stage=stage, - status=status, - timestamp=timestamp, - message_count=message_count, - ) - - # Assert - mock_add_to_batch.assert_called_once_with( - [ - uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff"), - "prefilter", - "prefilter_out", - datetime.datetime(2034, 12, 13, 12, 35, 35, 542635), - 456, - ] - ) - - @patch("src.monitoring.clickhouse_connector.ClickHouseBatchSender") - def test_insert_none_given(self, mock_clickhouse_batch_sender): - # Arrange - batch_id = uuid.UUID("7299539b-6215-4f6b-b39f-69335aafbeff") - stage = "prefilter" - status = "prefilter_out" - message_count = 456 - - sut = BatchTimestampsConnector() - - with patch.object(sut, "_add_to_batch", MagicMock()) as mock_add_to_batch: - # Act - sut.insert( - batch_id=batch_id, - stage=stage, - status=status, - message_count=message_count, - ) - - # Assert - mock_add_to_batch.assert_called_once() - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_clickhouse_kafka_sender.py b/tests/test_clickhouse_kafka_sender.py deleted file mode 100644 index 0aacd1b..0000000 --- a/tests/test_clickhouse_kafka_sender.py +++ /dev/null @@ -1,43 +0,0 @@ -import unittest -from unittest.mock import patch - -from src.base.clickhouse_kafka_sender import ClickHouseKafkaSender - - -class TestInit(unittest.TestCase): - @patch("src.base.clickhouse_kafka_sender.SimpleKafkaProduceHandler") - def test_init(self, mock_produce_handler): - # Arrange - table_name = "test_table" - mock_produce_handler_instance = mock_produce_handler - mock_produce_handler.return_value = mock_produce_handler_instance - - # Act - sut = ClickHouseKafkaSender(table_name) - - # Assert - self.assertEqual(table_name, sut.table_name) - self.assertEqual(mock_produce_handler_instance, sut.kafka_producer) - mock_produce_handler.assert_called_once() - - -class TestInsert(unittest.TestCase): - @patch("src.base.clickhouse_kafka_sender.SimpleKafkaProduceHandler") - def test_insert(self, mock_produce_handler): - # Arrange - mock_produce_handler_instance = mock_produce_handler - mock_produce_handler.return_value = mock_produce_handler_instance - sut = ClickHouseKafkaSender("test_table") - - # Act - sut.insert({"test_key": "test_value"}) - - # Assert - mock_produce_handler_instance.produce.assert_called_once_with( - topic="clickhouse_test_table", - data='{"test_key": "test_value"}', - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_server.py b/tests/test_server.py index a8b19c8..9b60125 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -13,26 +13,19 @@ class TestInit(unittest.TestCase): @patch("src.logserver.server.CONSUME_TOPIC", "test_topic") - @patch("src.logserver.server.ClickHouseKafkaSender") @patch("src.logserver.server.ExactlyOnceKafkaProduceHandler") @patch("src.logserver.server.SimpleKafkaConsumeHandler") - def test_valid_init( - self, mock_kafka_consume_handler, mock_kafka_produce_handler, mock_server_logs - ): + def test_valid_init(self, mock_kafka_consume_handler, mock_kafka_produce_handler): mock_kafka_consume_handler_instance = MagicMock() mock_kafka_produce_handler_instance = MagicMock() - mock_server_logs_instance = MagicMock() mock_kafka_produce_handler.return_value = mock_kafka_produce_handler_instance mock_kafka_consume_handler.return_value = mock_kafka_consume_handler_instance - mock_server_logs.return_value = mock_server_logs_instance sut = LogServer() self.assertEqual(mock_kafka_consume_handler_instance, sut.kafka_consume_handler) self.assertEqual(mock_kafka_produce_handler_instance, sut.kafka_produce_handler) - self.assertEqual(mock_server_logs_instance, sut.server_logs) mock_kafka_consume_handler.assert_called_once_with("test_topic") - mock_server_logs.assert_called_once_with("server_logs") class TestStart(unittest.IsolatedAsyncioTestCase): @@ -85,16 +78,12 @@ async def test_start( class TestSend(unittest.TestCase): @patch("src.logserver.server.PRODUCE_TOPIC", "test_topic") - @patch("src.logserver.server.ClickHouseKafkaSender") @patch("src.logserver.server.ExactlyOnceKafkaProduceHandler") def test_send( self, mock_produce_handler, - mock_server_logs, ): # Arrange - mock_server_logs_instance = MagicMock() - mock_server_logs.return_value = mock_server_logs_instance mock_kafka_produce_handler_instance = MagicMock() mock_produce_handler.return_value = mock_kafka_produce_handler_instance @@ -109,7 +98,6 @@ def test_send( topic="test_topic", data=message, ) - mock_server_logs_instance.insert.assert_called_once() class TestFetchFromKafka(unittest.IsolatedAsyncioTestCase): From 22d42956f7f5f65b9fee4f97b197dc6bf5692aa3 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 20 Nov 2024 11:23:39 +0100 Subject: [PATCH 22/29] Remove unrelated entry in config.yaml --- config.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/config.yaml b/config.yaml index 05c8c4f..66abb3f 100644 --- a/config.yaml +++ b/config.yaml @@ -80,6 +80,3 @@ environment: batch_sender_to_prefilter: "pipeline.batch_sender_to_prefilter" prefilter_to_inspector: "pipeline.prefilter_to_inspector" inspector_to_detector: "pipeline.inspector_to_detector" - monitoring: - clickhouse_server: - hostname: 172.27.0.11 From 1979bfb23856eed4a0b15549979f5db16ad92031 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 20 Nov 2024 11:24:40 +0100 Subject: [PATCH 23/29] Uncomment docker-compose.yml field --- docker/default.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docker/default.txt b/docker/default.txt index e69de29..08214bc 100644 --- a/docker/default.txt +++ b/docker/default.txt @@ -0,0 +1,12 @@ +2024-11-07T11:05:30.811Z NXDOMAIN 192.168.3.78 10.10.0.9 random-ip.de A 13.31.39.36 117b +2024-11-15T15:25:42.974Z NXDOMAIN 192.168.3.63 10.10.0.10 random-ip PR 5904:471c:9109:bc67:20a8:de4c:eb8:e24b 223b +2024-11-15T15:25:42.974Z NXDOMAIN 192.168.2.37 10.10.0.6 random-ip AAAA 46.5.230.163 168b +2024-11-15T15:25:42.974Z NXDOMAIN 192.168.2.113 10.10.0.4 random-ip A 1b9e:7056:4b11:709:9b34:5137:742:c836 82b +2024-11-15T15:25:42.973Z NOERROR 192.168.2.212 10.10.0.9 random-ip AAAA 33a1:3169:2131:92ae:d0b4:e174:dd85:8ff6 176b +2024-11-15T15:25:42.973Z NOERROR 192.168.2.212 10.10.0.9 random-ip AAAA 33a1:3169:2131:92ae:d0b4:e174:dd85:8ff6 176b +2024-11-07T11:05:30.811Z NXDOMAIN 192.168.3.78 10.10.0.9 random-ip.de A 13.31.39.36 117b +2024-11-15T15:25:42.974Z NXDOMAIN 192.168.3.63 10.10.0.10 random-ip PR 5904:471c:9109:bc67:20a8:de4c:eb8:e24b 223b +2024-11-15T15:25:42.974Z NXDOMAIN 192.168.2.37 10.10.0.6 random-ip AAAA 46.5.230.163 168b +2024-11-15T15:25:42.974Z NXDOMAIN 192.168.2.113 10.10.0.4 random-ip A 1b9e:7056:4b11:709:9b34:5137:742:c836 82b +2024-11-15T15:25:42.973Z NOERROR 192.168.2.212 10.10.0.9 random-ip AAAA 33a1:3169:2131:92ae:d0b4:e174:dd85:8ff6 176b +2024-11-15T15:25:42.973Z NOERROR 192.168.2.212 10.10.0.9 random-ip AAAA 33a1:3169:2131:92ae:d0b4:e174:dd85:8ff6 176b From 6ab4ce8e4441edaced9dcfb3ee4a7b65f1c10df5 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 20 Nov 2024 11:24:40 +0100 Subject: [PATCH 24/29] Uncomment docker-compose.yml field --- docker/docker-compose.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 44d907d..2440e4b 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -160,13 +160,13 @@ services: limits: cpus: '2' memory: 512m - # reservations: - # cpus: '1' - # memory: 256m - # devices: - # - driver: nvidia - # count: 1 # alternatively, use `count: all` for all GPUs - # capabilities: [gpu] + reservations: + cpus: '1' + memory: 256m + devices: + - driver: nvidia + count: 1 # alternatively, use `count: all` for all GPUs + capabilities: [gpu] clickhouse-server: image: clickhouse/clickhouse-server:24.3.12.75-alpine From 64f1d180a06f2430f35d888bde1be23b02aa91fd Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 20 Nov 2024 11:26:21 +0100 Subject: [PATCH 25/29] Remove default.txt entries --- docker/default.txt | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/docker/default.txt b/docker/default.txt index 08214bc..e69de29 100644 --- a/docker/default.txt +++ b/docker/default.txt @@ -1,12 +0,0 @@ -2024-11-07T11:05:30.811Z NXDOMAIN 192.168.3.78 10.10.0.9 random-ip.de A 13.31.39.36 117b -2024-11-15T15:25:42.974Z NXDOMAIN 192.168.3.63 10.10.0.10 random-ip PR 5904:471c:9109:bc67:20a8:de4c:eb8:e24b 223b -2024-11-15T15:25:42.974Z NXDOMAIN 192.168.2.37 10.10.0.6 random-ip AAAA 46.5.230.163 168b -2024-11-15T15:25:42.974Z NXDOMAIN 192.168.2.113 10.10.0.4 random-ip A 1b9e:7056:4b11:709:9b34:5137:742:c836 82b -2024-11-15T15:25:42.973Z NOERROR 192.168.2.212 10.10.0.9 random-ip AAAA 33a1:3169:2131:92ae:d0b4:e174:dd85:8ff6 176b -2024-11-15T15:25:42.973Z NOERROR 192.168.2.212 10.10.0.9 random-ip AAAA 33a1:3169:2131:92ae:d0b4:e174:dd85:8ff6 176b -2024-11-07T11:05:30.811Z NXDOMAIN 192.168.3.78 10.10.0.9 random-ip.de A 13.31.39.36 117b -2024-11-15T15:25:42.974Z NXDOMAIN 192.168.3.63 10.10.0.10 random-ip PR 5904:471c:9109:bc67:20a8:de4c:eb8:e24b 223b -2024-11-15T15:25:42.974Z NXDOMAIN 192.168.2.37 10.10.0.6 random-ip AAAA 46.5.230.163 168b -2024-11-15T15:25:42.974Z NXDOMAIN 192.168.2.113 10.10.0.4 random-ip A 1b9e:7056:4b11:709:9b34:5137:742:c836 82b -2024-11-15T15:25:42.973Z NOERROR 192.168.2.212 10.10.0.9 random-ip AAAA 33a1:3169:2131:92ae:d0b4:e174:dd85:8ff6 176b -2024-11-15T15:25:42.973Z NOERROR 192.168.2.212 10.10.0.9 random-ip AAAA 33a1:3169:2131:92ae:d0b4:e174:dd85:8ff6 176b From f6fbabe8c5a007e7b67d176d23739e347a29e399 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Wed, 20 Nov 2024 11:27:33 +0100 Subject: [PATCH 26/29] Update reservations field in docker-compose.yml --- docker/docker-compose.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 2440e4b..f428797 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -160,13 +160,13 @@ services: limits: cpus: '2' memory: 512m - reservations: - cpus: '1' - memory: 256m - devices: - - driver: nvidia - count: 1 # alternatively, use `count: all` for all GPUs - capabilities: [gpu] + reservations: + cpus: '1' + memory: 256m + devices: + - driver: nvidia + count: 1 # alternatively, use `count: all` for all GPUs + capabilities: [ gpu ] clickhouse-server: image: clickhouse/clickhouse-server:24.3.12.75-alpine From da31bfba0119ac7a691ee853a17eaf6a62537377 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Sat, 23 Nov 2024 15:11:23 +0100 Subject: [PATCH 27/29] Update tests --- src/base/kafka_handler.py | 6 +- src/logcollector/collector.py | 10 ++- src/logserver/server.py | 13 ++- tests/test_collector.py | 35 ++++---- tests/test_kafka_consume_handler.py | 121 ++++++++++++++++++++++++++++ tests/test_server.py | 41 +++++----- 6 files changed, 181 insertions(+), 45 deletions(-) create mode 100644 tests/test_kafka_consume_handler.py diff --git a/src/base/kafka_handler.py b/src/base/kafka_handler.py index c14a69a..9b01fac 100644 --- a/src/base/kafka_handler.py +++ b/src/base/kafka_handler.py @@ -74,7 +74,7 @@ def produce(self, *args, **kwargs): """ Encodes the given data for transport and sends it on the specified topic. """ - pass + raise NotImplementedError def __del__(self) -> None: self.producer.flush() @@ -237,7 +237,7 @@ def consume(self, *args, **kwargs): """ Consumes available messages on the specified topic and decodes it. """ - pass + raise NotImplementedError def consume_as_json(self) -> tuple[None | str, dict]: """ @@ -324,7 +324,7 @@ def consume(self) -> tuple[str | None, str | None, str | None]: except KeyboardInterrupt: logger.info("Stopping KafkaConsumeHandler...") raise KeyboardInterrupt - except Exception as e: + except Exception: raise diff --git a/src/logcollector/collector.py b/src/logcollector/collector.py index 0c1430d..5af98b7 100644 --- a/src/logcollector/collector.py +++ b/src/logcollector/collector.py @@ -47,12 +47,18 @@ async def start(self) -> None: f" ⤷ receiving on Kafka topic '{CONSUME_TOPIC}'" ) + task_fetch = asyncio.Task(self.fetch()) + task_send = asyncio.Task(self.send()) + try: await asyncio.gather( - self.fetch(), - self.send(), + task_fetch, + task_send, ) except KeyboardInterrupt: + task_fetch.cancel() + task_send.cancel() + logger.info("LogCollector stopped.") async def fetch(self) -> None: diff --git a/src/logserver/server.py b/src/logserver/server.py index 17964f4..59fc9e7 100644 --- a/src/logserver/server.py +++ b/src/logserver/server.py @@ -53,12 +53,19 @@ async def start(self) -> None: f" ⤷ sending on Kafka topic '{PRODUCE_TOPIC}'" ) + task_fetch_kafka = asyncio.Task(self.fetch_from_kafka()) + task_fetch_file = asyncio.Task(self.fetch_from_file()) + try: - await asyncio.gather( - self.fetch_from_kafka(), - self.fetch_from_file(), + task = asyncio.gather( + task_fetch_kafka, + task_fetch_file, ) + await task except KeyboardInterrupt: + task_fetch_kafka.cancel() + task_fetch_file.cancel() + logger.info("LogServer stopped.") def send(self, message: str) -> None: diff --git a/tests/test_collector.py b/tests/test_collector.py index e09cbef..033a4ce 100644 --- a/tests/test_collector.py +++ b/tests/test_collector.py @@ -65,23 +65,24 @@ async def mock_gather(*args, **kwargs): self.sut.fetch.assert_called_once() self.sut.send.assert_called_once() - # TODO: Update - # async def test_start_handles_keyboard_interrupt(self): - # # Arrange - # self.sut.fetch = AsyncMock() - # self.sut.send = AsyncMock() - # - # async def mock_gather(*args, **kwargs): - # raise KeyboardInterrupt - # - # with (patch('src.logcollector.collector.asyncio.gather', side_effect=mock_gather) as mock): - # # Act - # await self.sut.start() - # - # # Assert - # mock.assert_called_once() - # self.sut.fetch.assert_called_once() - # self.sut.send.assert_called_once() + async def test_start_handles_keyboard_interrupt(self): + # Arrange + self.sut.fetch = AsyncMock() + self.sut.send = AsyncMock() + + async def mock_gather(*args, **kwargs): + raise KeyboardInterrupt + + with patch( + "src.logcollector.collector.asyncio.gather", side_effect=mock_gather + ) as mock: + # Act + await self.sut.start() + + # Assert + mock.assert_called_once() + self.sut.fetch.assert_called_once() + self.sut.send.assert_called_once() class TestFetch(unittest.IsolatedAsyncioTestCase): diff --git a/tests/test_kafka_consume_handler.py b/tests/test_kafka_consume_handler.py new file mode 100644 index 0000000..86ed43c --- /dev/null +++ b/tests/test_kafka_consume_handler.py @@ -0,0 +1,121 @@ +import unittest +from unittest.mock import patch, MagicMock + +from src.base.kafka_handler import KafkaConsumeHandler + + +class TestInit(unittest.TestCase): + @patch("src.base.kafka_handler.CONSUMER_GROUP_ID", "test_group_id") + @patch( + "src.base.kafka_handler.KAFKA_BROKERS", + [ + { + "hostname": "127.0.0.1", + "port": 9999, + }, + { + "hostname": "127.0.0.2", + "port": 9998, + }, + { + "hostname": "127.0.0.3", + "port": 9997, + }, + ], + ) + @patch("src.base.kafka_handler.Consumer") + def test_init_successful(self, mock_consumer): + # Arrange + mock_consumer_instance = MagicMock() + mock_consumer.return_value = mock_consumer_instance + + expected_conf = { + "bootstrap.servers": "127.0.0.1:9999,127.0.0.2:9998,127.0.0.3:9997", + "group.id": "test_group_id", + "enable.auto.commit": False, + "auto.offset.reset": "earliest", + "enable.partition.eof": True, + } + + # Act + sut = KafkaConsumeHandler(topics="test_topic") + + # Assert + self.assertEqual(mock_consumer_instance, sut.consumer) + + mock_consumer.assert_called_once_with(expected_conf) + mock_consumer_instance.assign.assert_called_once() + + @patch("src.base.kafka_handler.CONSUMER_GROUP_ID", "test_group_id") + @patch( + "src.base.kafka_handler.KAFKA_BROKERS", + [ + { + "hostname": "127.0.0.1", + "port": 9999, + }, + { + "hostname": "127.0.0.2", + "port": 9998, + }, + { + "hostname": "127.0.0.3", + "port": 9997, + }, + ], + ) + @patch("src.base.kafka_handler.Consumer") + def test_init_successful_with_list(self, mock_consumer): + # Arrange + mock_consumer_instance = MagicMock() + mock_consumer.return_value = mock_consumer_instance + + expected_conf = { + "bootstrap.servers": "127.0.0.1:9999,127.0.0.2:9998,127.0.0.3:9997", + "group.id": "test_group_id", + "enable.auto.commit": False, + "auto.offset.reset": "earliest", + "enable.partition.eof": True, + } + + # Act + sut = KafkaConsumeHandler(topics=["test_topic_1", "test_topic_2"]) + + # Assert + self.assertEqual(mock_consumer_instance, sut.consumer) + + mock_consumer.assert_called_once_with(expected_conf) + mock_consumer_instance.assign.assert_called_once() + + +class TestConsume(unittest.TestCase): + @patch("src.base.kafka_handler.CONSUMER_GROUP_ID", "test_group_id") + @patch( + "src.base.kafka_handler.KAFKA_BROKERS", + [ + { + "hostname": "127.0.0.1", + "port": 9999, + }, + { + "hostname": "127.0.0.2", + "port": 9998, + }, + { + "hostname": "127.0.0.3", + "port": 9997, + }, + ], + ) + @patch("src.base.kafka_handler.Consumer") + def test_not_implemented(self, mock_consumer): + # Arrange + sut = KafkaConsumeHandler(topics="test_topic") + + # Act and Assert + with self.assertRaises(NotImplementedError): + sut.consume() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_server.py b/tests/test_server.py index 9b60125..06ee245 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -54,26 +54,27 @@ async def test_start( mock_fetch_from_kafka.assert_called_once() mock_fetch_from_file.assert_called_once() - # TODO: Update - # @patch("src.logserver.server.LogServer.fetch_from_kafka") - # @patch("src.logserver.server.LogServer.fetch_from_file") - # async def test_start_handles_keyboard_interrupt( - # self, - # mock_fetch_from_file, - # mock_fetch_from_kafka, - # ): - # # Arrange - # async def mock_gather(*args, **kwargs): - # raise KeyboardInterrupt - # - # with (patch('src.logserver.server.asyncio.gather', side_effect=mock_gather) as mock): - # # Act - # await self.sut.start() - # - # # Assert - # mock.assert_called_once() - # mock_fetch_from_kafka.assert_called_once() - # mock_fetch_from_file.assert_called_once() + @patch("src.logserver.server.LogServer.fetch_from_kafka") + @patch("src.logserver.server.LogServer.fetch_from_file") + async def test_start_handles_keyboard_interrupt( + self, + mock_fetch_from_file, + mock_fetch_from_kafka, + ): + # Arrange + async def mock_gather(*args, **kwargs): + raise KeyboardInterrupt + + with patch( + "src.logserver.server.asyncio.gather", side_effect=mock_gather + ) as mock: + # Act + await self.sut.start() + + # Assert + mock.assert_called_once() + mock_fetch_from_kafka.assert_called_once() + mock_fetch_from_file.assert_called_once() class TestSend(unittest.TestCase): From 21117206a53fcd7103d734b02f5d9ad4f77acdf0 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Sat, 23 Nov 2024 18:36:50 +0100 Subject: [PATCH 28/29] Update tests --- src/base/kafka_handler.py | 57 ++--- ...test_exactly_once_kafka_consume_handler.py | 229 +++++++++++++++++- tests/test_kafka_consume_handler.py | 101 +++++++- tests/test_kafka_produce_handler.py | 52 ++++ tests/test_simple_kafka_consume_handler.py | 146 +++++++++++ tests/test_simple_kafka_produce_handler.py | 85 +++++++ 6 files changed, 621 insertions(+), 49 deletions(-) create mode 100644 tests/test_kafka_produce_handler.py create mode 100644 tests/test_simple_kafka_consume_handler.py create mode 100644 tests/test_simple_kafka_produce_handler.py diff --git a/src/base/kafka_handler.py b/src/base/kafka_handler.py index 9b01fac..430eaf2 100644 --- a/src/base/kafka_handler.py +++ b/src/base/kafka_handler.py @@ -249,24 +249,20 @@ def consume_as_json(self) -> tuple[None | str, dict]: Raises: ValueError: Invalid data format - KafkaMessageFetchException: Error during message fetching/consuming - KeyboardInterrupt: Execution interrupted by user """ - try: - key, value, topic = self.consume() + key, value, topic = self.consume() - if not key and not value: - return None, {} - except KafkaMessageFetchException: - raise - except KeyboardInterrupt: - raise + if not key and not value: + return None, {} - eval_data = ast.literal_eval(value) + try: + eval_data = ast.literal_eval(value) - if isinstance(eval_data, dict): - return key, eval_data - else: + if isinstance(eval_data, dict): + return key, eval_data + else: + raise + except Exception: raise ValueError("Unknown data format") def __del__(self) -> None: @@ -290,10 +286,6 @@ def consume(self) -> tuple[str | None, str | None, str | None]: Returns: Either ``[None,None,None]`` if empty data was retrieved or ``[key,value,topic]`` as tuple of strings of the consumed data. - - Raises: - KeyboardInterrupt: Execution interrupted by user - Exception: Error during consuming """ empty_data_retrieved = False @@ -313,7 +305,7 @@ def consume(self) -> tuple[str | None, str | None, str | None]: continue else: logger.error(f"Consumer error: {msg.error()}") - raise + raise ValueError("Message is invalid") # unpack message key = msg.key().decode("utf-8") if msg.key() else None @@ -323,9 +315,6 @@ def consume(self) -> tuple[str | None, str | None, str | None]: return key, value, topic except KeyboardInterrupt: logger.info("Stopping KafkaConsumeHandler...") - raise KeyboardInterrupt - except Exception: - raise class ExactlyOnceKafkaConsumeHandler(KafkaConsumeHandler): @@ -345,10 +334,6 @@ def consume(self) -> tuple[str | None, str | None, str | None]: Returns: Either ``[None,None,None]`` if empty data was retrieved or ``[key,value,topic]`` as tuple of strings of the consumed data. - - Raises: - KeyboardInterrupt: Execution interrupted by user - Exception: Error during consuming """ empty_data_retrieved = False @@ -368,7 +353,7 @@ def consume(self) -> tuple[str | None, str | None, str | None]: continue else: logger.error(f"Consumer error: {msg.error()}") - raise + raise ValueError("Message is invalid") # unpack message key = msg.key().decode("utf-8") if msg.key() else None @@ -380,9 +365,6 @@ def consume(self) -> tuple[str | None, str | None, str | None]: return key, value, topic except KeyboardInterrupt: logger.info("Shutting down KafkaConsumeHandler...") - raise KeyboardInterrupt - except Exception: - raise @staticmethod def _is_dicts(obj): @@ -398,19 +380,12 @@ def consume_as_object(self) -> tuple[None | str, Batch]: Raises: ValueError: Invalid data format - KafkaMessageFetchException: Error during message fetching/consuming - KeyboardInterrupt: Execution interrupted by user """ - try: - key, value, topic = self.consume() + key, value, topic = self.consume() - if not key and not value: - return None, {} - except KafkaMessageFetchException as e: - logger.warning(e) - raise - except KeyboardInterrupt: - raise + if not key and not value: + # TODO: Change return value to fit the type, maybe switch to raise + return None, {} eval_data: dict = ast.literal_eval(value) diff --git a/tests/test_exactly_once_kafka_consume_handler.py b/tests/test_exactly_once_kafka_consume_handler.py index 6ef017d..901b94a 100644 --- a/tests/test_exactly_once_kafka_consume_handler.py +++ b/tests/test_exactly_once_kafka_consume_handler.py @@ -1,8 +1,11 @@ +import datetime +import json import unittest -from unittest.mock import MagicMock, patch +from unittest.mock import patch, Mock -from confluent_kafka import KafkaException +from confluent_kafka import KafkaException, KafkaError +from src.base import Batch from src.base.kafka_handler import ExactlyOnceKafkaConsumeHandler CONSUMER_GROUP_ID = "test_group_id" @@ -29,7 +32,7 @@ class TestInit(unittest.TestCase): ) @patch("src.base.kafka_handler.Consumer") def test_init(self, mock_consumer): - mock_consumer_instance = MagicMock() + mock_consumer_instance = Mock() mock_consumer.return_value = mock_consumer_instance expected_conf = { @@ -68,7 +71,7 @@ def test_init(self, mock_consumer): ) @patch("src.base.kafka_handler.Consumer") def test_init_fail(self, mock_consumer, mock_logger): - mock_consumer_instance = MagicMock() + mock_consumer_instance = Mock() mock_consumer.return_value = mock_consumer_instance expected_conf = { @@ -89,6 +92,101 @@ def test_init_fail(self, mock_consumer, mock_logger): mock_consumer_instance.assign.assert_called_once() +class TestConsume(unittest.TestCase): + @patch("src.base.kafka_handler.CONSUMER_GROUP_ID", "test_group_id") + @patch( + "src.base.kafka_handler.KAFKA_BROKERS", + [ + { + "hostname": "127.0.0.1", + "port": 9999, + }, + { + "hostname": "127.0.0.2", + "port": 9998, + }, + { + "hostname": "127.0.0.3", + "port": 9997, + }, + ], + ) + @patch("src.base.kafka_handler.Consumer") + def setUp(self, mock_consumer): + self.mock_consumer = mock_consumer + self.topics = ["test_topic_1", "test_topic_2"] + self.sut = ExactlyOnceKafkaConsumeHandler(self.topics) + + def test_no_messages_polling(self): + self.sut.consumer.poll.side_effect = [None, None, None, StopIteration] + + result = None + try: + result = self.sut.consume() + except StopIteration: + pass + + self.assertIsNone(result) + + def test_consumer_error_partition_eof(self): + eof_error = Mock() + eof_error.code.return_value = KafkaError._PARTITION_EOF + + msg = Mock() + msg.error.return_value = eof_error + self.sut.consumer.poll.side_effect = [msg, StopIteration] + + result = None + try: + result = self.sut.consume() + except StopIteration: + pass + + self.assertIsNone(result) + + def test_consumer_raises_other_error(self): + other_error = Mock() + other_error.code.return_value = KafkaError._ALL_BROKERS_DOWN + + msg = Mock() + msg.error.return_value = other_error + + self.sut.consumer.poll.side_effect = [msg] + + with self.assertRaises(Exception): + self.sut.consume() + + def test_message_processing(self): + key = "test_key" + value = "test_value" + topic = "test_topic" + + msg = Mock() + msg.key.return_value = key.encode("utf-8") + msg.value.return_value = value.encode("utf-8") + msg.topic.return_value = topic + msg.error.return_value = None + + self.sut.consumer.poll.side_effect = [msg, StopIteration] + self.sut.consumer.commit = Mock() + + result = None + try: + result = self.sut.consume() + except StopIteration: + pass + + self.sut.consumer.commit.assert_called_once() + self.assertEqual((key, value, topic), result) + + def test_consumer_raises_keyboard_interrupt(self): + self.sut.consumer.poll.side_effect = [KeyboardInterrupt] + + self.sut.consume() + + self.assertTrue(True) + + class TestDel(unittest.TestCase): @patch("src.base.kafka_handler.CONSUMER_GROUP_ID", "test_group_id") @patch( @@ -111,7 +209,7 @@ class TestDel(unittest.TestCase): @patch("src.base.kafka_handler.Consumer") def test_del_with_existing_consumer(self, mock_consumer): # Arrange - mock_consumer_instance = MagicMock() + mock_consumer_instance = Mock() mock_consumer.return_value = mock_consumer_instance sut = ExactlyOnceKafkaConsumeHandler(topics="test_topic") @@ -144,7 +242,7 @@ def test_del_with_existing_consumer(self, mock_consumer): @patch("src.base.kafka_handler.Consumer") def test_del_with_existing_consumer(self, mock_consumer): # Arrange - mock_consumer_instance = MagicMock() + mock_consumer_instance = Mock() mock_consumer.return_value = mock_consumer_instance sut = ExactlyOnceKafkaConsumeHandler(topics="test_topic") @@ -178,12 +276,129 @@ class TestDict(unittest.TestCase): ) @patch("src.base.kafka_handler.Consumer") def test_dict(self, mock_consumer): - mock_consumer_instance = MagicMock() + mock_consumer_instance = Mock() mock_consumer.return_value = mock_consumer_instance sut = ExactlyOnceKafkaConsumeHandler(topics="test_topic") self.assertTrue(sut._is_dicts([{}, {}])) +class TestConsumeAsObject(unittest.TestCase): + @patch("src.base.kafka_handler.CONSUMER_GROUP_ID", "test_group_id") + @patch( + "src.base.kafka_handler.KAFKA_BROKERS", + [ + { + "hostname": "127.0.0.1", + "port": 9999, + }, + { + "hostname": "127.0.0.2", + "port": 9998, + }, + { + "hostname": "127.0.0.3", + "port": 9997, + }, + ], + ) + @patch("src.base.kafka_handler.Consumer") + def setUp(self, mock_consumer): + self.sut = ExactlyOnceKafkaConsumeHandler(topics="test_topic") + self.sut.batch_schema = Mock() + self.sut.batch_schema.load = Mock( + return_value=Batch(datetime.datetime.now(), datetime.datetime.now()) + ) + + def test_consume_as_object_no_key_no_value(self): + with patch( + "src.base.kafka_handler.ExactlyOnceKafkaConsumeHandler.consume" + ) as mock_consume: + mock_consume.return_value = [None, None, None] + + result = self.sut.consume_as_object() + + self.assertEqual(result, (None, {})) + + def test_consume_as_object_valid_data(self): + key = "valid_key" + value = json.dumps({"data": [{"field1": "value1", "field2": "value2"}]}) + topic = "test_topic" + batch_data = [{"field1": "value1", "field2": "value2"}] + self.sut.batch_schema.load.return_value = Batch( + datetime.datetime.now(), + datetime.datetime.now(), + batch_data, + ) + + with patch( + "src.base.kafka_handler.ExactlyOnceKafkaConsumeHandler.consume" + ) as mock_consume: + mock_consume.return_value = [key, value, topic] + + result = self.sut.consume_as_object() + + self.assertEqual(result[0], key) + self.assertIsInstance(result[1], Batch) + + def test_consume_as_object_valid_data_with_inner_strings(self): + key = "valid_key" + value = json.dumps( + { + "data": [ + '{"field1": "value1", "field2": "value2"}', + '{"field3": "value3", "field4": "value4"}', + ] + } + ) + topic = "test_topic" + batch_data = [{"field1": "value1", "field2": "value2"}] + self.sut.batch_schema.load.return_value = Batch( + datetime.datetime.now(), + datetime.datetime.now(), + batch_data, + ) + + with patch( + "src.base.kafka_handler.ExactlyOnceKafkaConsumeHandler.consume" + ) as mock_consume: + mock_consume.return_value = [key, value, topic] + + result = self.sut.consume_as_object() + + self.assertEqual(result[0], key) + self.assertIsInstance(result[1], Batch) + + def test_consume_as_object_invalid_data(self): + key = "invalid_key" + value = json.dumps( + {"data": {"field1": "value1", "field2": "value2"}} # invalid format + ) + topic = "test_topic" + + with patch( + "src.base.kafka_handler.ExactlyOnceKafkaConsumeHandler.consume" + ) as mock_consume: + mock_consume.return_value = [key, value, topic] + + with self.assertRaises(ValueError): + self.sut.consume_as_object() + + def test_consume_as_object_invalid_batch(self): + key = "valid_key" + value = json.dumps({"data": [{"field1": "value1", "field2": "value2"}]}) + topic = "test_topic" + + self.sut.batch_schema.load.return_value = None + + with patch( + "src.base.kafka_handler.ExactlyOnceKafkaConsumeHandler.consume" + ) as mock_consume: + mock_consume.return_value = [key, value, topic] + + with self.assertRaises(ValueError): + self.sut.consume_as_object() + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_kafka_consume_handler.py b/tests/test_kafka_consume_handler.py index 86ed43c..48d3ced 100644 --- a/tests/test_kafka_consume_handler.py +++ b/tests/test_kafka_consume_handler.py @@ -1,7 +1,8 @@ +import json import unittest from unittest.mock import patch, MagicMock -from src.base.kafka_handler import KafkaConsumeHandler +from src.base.kafka_handler import KafkaConsumeHandler, KafkaMessageFetchException class TestInit(unittest.TestCase): @@ -117,5 +118,103 @@ def test_not_implemented(self, mock_consumer): sut.consume() +class TestConsumeAsJSON(unittest.TestCase): + @patch("src.base.kafka_handler.CONSUMER_GROUP_ID", "test_group_id") + @patch( + "src.base.kafka_handler.KAFKA_BROKERS", + [ + { + "hostname": "127.0.0.1", + "port": 9999, + }, + { + "hostname": "127.0.0.2", + "port": 9998, + }, + { + "hostname": "127.0.0.3", + "port": 9997, + }, + ], + ) + @patch("src.base.kafka_handler.Consumer") + def setUp(self, mock_consumer): + self.sut = KafkaConsumeHandler(topics="test_topic") + + def test_successful(self): + with patch( + "src.base.kafka_handler.KafkaConsumeHandler.consume" + ) as mock_consume: + # Arrange + mock_consume.return_value = [ + "test_key", + json.dumps(dict(test_value=123)), + "test_topic", + ] + + # Act + returned_values = self.sut.consume_as_json() + + # Assert + self.assertEqual(("test_key", dict(test_value=123)), returned_values) + + def test_wrong_data_format(self): + with patch( + "src.base.kafka_handler.KafkaConsumeHandler.consume" + ) as mock_consume: + # Arrange + mock_consume.return_value = ["test_key", "wrong_format", "test_topic"] + + # Act and Assert + with self.assertRaises(ValueError): + self.sut.consume_as_json() + + def test_wrong_data_format_list(self): + with patch( + "src.base.kafka_handler.KafkaConsumeHandler.consume" + ) as mock_consume: + # Arrange + mock_consume.return_value = [ + "test_key", + json.dumps([1, 2, 3]), + "test_topic", + ] + + # Act and Assert + with self.assertRaises(ValueError): + self.sut.consume_as_json() + + def test_kafka_message_fetch_exception(self): + with patch( + "src.base.kafka_handler.KafkaConsumeHandler.consume", + side_effect=KafkaMessageFetchException, + ): + # Act and Assert + with self.assertRaises(KafkaMessageFetchException): + self.sut.consume_as_json() + + def test_keyboard_interrupt(self): + with patch( + "src.base.kafka_handler.KafkaConsumeHandler.consume", + side_effect=KeyboardInterrupt, + ): + # Act and Assert + with self.assertRaises(KeyboardInterrupt): + self.sut.consume_as_json() + + def test_kafka_message_else(self): + with patch( + "src.base.kafka_handler.KafkaConsumeHandler.consume" + ) as mock_consume: + # Arrange + mock_consume.return_value = [None, None, "test_topic"] + + # Act + returned_values = self.sut.consume_as_json() + + # Assert + self.assertEqual((None, {}), returned_values) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_kafka_produce_handler.py b/tests/test_kafka_produce_handler.py new file mode 100644 index 0000000..859bfab --- /dev/null +++ b/tests/test_kafka_produce_handler.py @@ -0,0 +1,52 @@ +import unittest +from unittest.mock import patch, Mock + +from src.base.kafka_handler import KafkaProduceHandler + + +class TestInit(unittest.TestCase): + def test_successful(self): + # Arrange + conf = "test_conf" + + # Act + with patch("src.base.kafka_handler.Producer") as mock_producer: + mock_producer_instance = Mock() + mock_producer.return_value = mock_producer_instance + + sut = KafkaProduceHandler(conf) + + # Assert + self.assertEqual(None, sut.consumer) + self.assertEqual(mock_producer_instance, sut.producer) + mock_producer.assert_called_once_with(conf) + + +class TestProduce(unittest.TestCase): + @patch("src.base.kafka_handler.Producer") + def test_not_implemented(self, mock_producer): + # Arrange + sut = KafkaProduceHandler("test_conf") + + # Act and Assert + with self.assertRaises(NotImplementedError): + sut.produce() + + +class TestDel(unittest.TestCase): + @patch("src.base.kafka_handler.Producer") + def test_not_implemented(self, mock_producer): + # Arrange + mock_producer_instance = Mock() + mock_producer.return_value = mock_producer_instance + sut = KafkaProduceHandler("test_conf") + + # Act + del sut + + # Assert + mock_producer_instance.flush.assert_called_once() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_simple_kafka_consume_handler.py b/tests/test_simple_kafka_consume_handler.py new file mode 100644 index 0000000..f69179c --- /dev/null +++ b/tests/test_simple_kafka_consume_handler.py @@ -0,0 +1,146 @@ +import unittest +from unittest.mock import patch, Mock + +from confluent_kafka import KafkaError + +from src.base.kafka_handler import SimpleKafkaConsumeHandler + + +class TestInit(unittest.TestCase): + @patch("src.base.kafka_handler.CONSUMER_GROUP_ID", "test_group_id") + @patch( + "src.base.kafka_handler.KAFKA_BROKERS", + [ + { + "hostname": "127.0.0.1", + "port": 9999, + }, + { + "hostname": "127.0.0.2", + "port": 9998, + }, + { + "hostname": "127.0.0.3", + "port": 9997, + }, + ], + ) + @patch("src.base.kafka_handler.Consumer") + def test_init_successful(self, mock_consumer): + # Arrange + mock_consumer_instance = Mock() + mock_consumer.return_value = mock_consumer_instance + + expected_conf = { + "bootstrap.servers": "127.0.0.1:9999,127.0.0.2:9998,127.0.0.3:9997", + "group.id": "test_group_id", + "enable.auto.commit": False, + "auto.offset.reset": "earliest", + "enable.partition.eof": True, + } + + # Act + sut = SimpleKafkaConsumeHandler(topics="test_topic") + + # Assert + self.assertEqual(mock_consumer_instance, sut.consumer) + + mock_consumer.assert_called_once_with(expected_conf) + mock_consumer_instance.assign.assert_called_once() + + +class TestConsume(unittest.TestCase): + @patch("src.base.kafka_handler.CONSUMER_GROUP_ID", "test_group_id") + @patch( + "src.base.kafka_handler.KAFKA_BROKERS", + [ + { + "hostname": "127.0.0.1", + "port": 9999, + }, + { + "hostname": "127.0.0.2", + "port": 9998, + }, + { + "hostname": "127.0.0.3", + "port": 9997, + }, + ], + ) + @patch("src.base.kafka_handler.Consumer") + def setUp(self, mock_consumer): + self.mock_consumer = mock_consumer + self.topics = ["test_topic_1", "test_topic_2"] + self.sut = SimpleKafkaConsumeHandler(self.topics) + + def test_no_messages_polling(self): + self.sut.consumer.poll.side_effect = [None, None, None, StopIteration] + + result = None + try: + result = self.sut.consume() + except StopIteration: + pass + + self.assertIsNone(result) + + def test_consumer_error_partition_eof(self): + eof_error = Mock() + eof_error.code.return_value = KafkaError._PARTITION_EOF + + msg = Mock() + msg.error.return_value = eof_error + self.sut.consumer.poll.side_effect = [msg, StopIteration] + + result = None + try: + result = self.sut.consume() + except StopIteration: + pass + + self.assertIsNone(result) + + def test_consumer_raises_other_error(self): + other_error = Mock() + other_error.code.return_value = KafkaError._ALL_BROKERS_DOWN + + msg = Mock() + msg.error.return_value = other_error + + self.sut.consumer.poll.side_effect = [msg] + + with self.assertRaises(Exception): + self.sut.consume() + + def test_message_processing(self): + key = "test_key" + value = "test_value" + topic = "test_topic" + + msg = Mock() + msg.key.return_value = key.encode("utf-8") + msg.value.return_value = value.encode("utf-8") + msg.topic.return_value = topic + msg.error.return_value = None + + self.sut.consumer.poll.side_effect = [msg, StopIteration] + + result = None + try: + result = self.sut.consume() + except StopIteration: + pass + + self.assertEqual((key, value, topic), result) + + def test_consumer_raises_keyboard_interrupt(self): + self.sut.consumer.poll.side_effect = [KeyboardInterrupt] + + self.sut.consume() + + self.assertTrue(True) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_simple_kafka_produce_handler.py b/tests/test_simple_kafka_produce_handler.py new file mode 100644 index 0000000..8faed81 --- /dev/null +++ b/tests/test_simple_kafka_produce_handler.py @@ -0,0 +1,85 @@ +import unittest +from unittest.mock import patch, Mock + +from src.base.kafka_handler import SimpleKafkaProduceHandler +from src.base.utils import kafka_delivery_report + + +class TestInit(unittest.TestCase): + @patch( + "src.base.kafka_handler.KAFKA_BROKERS", + [ + { + "hostname": "127.0.0.1", + "port": 9999, + }, + { + "hostname": "127.0.0.2", + "port": 9998, + }, + { + "hostname": "127.0.0.3", + "port": 9997, + }, + ], + ) + def test_successful(self): + # Arrange + expected_conf = { + "bootstrap.servers": "127.0.0.1:9999,127.0.0.2:9998,127.0.0.3:9997", + "enable.idempotence": False, + "acks": "1", + } + + # Act + with patch("src.base.kafka_handler.Producer") as mock_producer: + mock_producer_instance = Mock() + mock_producer.return_value = mock_producer_instance + + sut = SimpleKafkaProduceHandler() + + # Assert + self.assertEqual("127.0.0.1:9999,127.0.0.2:9998,127.0.0.3:9997", sut.brokers) + self.assertIsNone(sut.consumer) + mock_producer.assert_called_once_with(expected_conf) + + +class TestProduce(unittest.TestCase): + def test_with_data(self): + with patch("src.base.kafka_handler.Producer") as mock_producer: + # Arrange + mock_producer_instance = Mock() + mock_producer.return_value = mock_producer_instance + + sut = SimpleKafkaProduceHandler() + + # Act + sut.produce("test_topic", "test_data") + + # Assert + mock_producer_instance.flush.assert_called_once() + mock_producer_instance.produce.assert_called_once_with( + topic="test_topic", + key=None, + value="test_data".encode("utf-8"), + callback=kafka_delivery_report, + ) + + def test_without_data(self): + with patch("src.base.kafka_handler.Producer") as mock_producer: + # Arrange + mock_producer_instance = Mock() + mock_producer.return_value = mock_producer_instance + + sut = SimpleKafkaProduceHandler() + + # Act + sut.produce("test_topic", "") + + # Assert + mock_producer_instance.flush.assert_not_called() + mock_producer_instance.produce.assert_not_called() + + +if __name__ == "__main__": + unittest.main() From f7f842ecc547a04f219d4feda44811cd3a8d9d59 Mon Sep 17 00:00:00 2001 From: Manuel Fuchs Date: Sun, 24 Nov 2024 22:07:31 +0100 Subject: [PATCH 29/29] Fix wrong produce topic of batch_handler.py --- src/logcollector/batch_handler.py | 5 ++++- tests/test_batch_handler.py | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/logcollector/batch_handler.py b/src/logcollector/batch_handler.py index f45c2f5..7509d79 100644 --- a/src/logcollector/batch_handler.py +++ b/src/logcollector/batch_handler.py @@ -15,6 +15,9 @@ config = setup_config() BATCH_SIZE = config["pipeline"]["log_collection"]["batch_handler"]["batch_size"] BATCH_TIMEOUT = config["pipeline"]["log_collection"]["batch_handler"]["batch_timeout"] +PRODUCE_TOPIC = config["environment"]["kafka_topics"]["pipeline"][ + "batch_sender_to_prefilter" +] class BufferedBatch: @@ -260,7 +263,7 @@ class BufferedBatchSender: """ def __init__(self): - self.topic = "Prefilter" + self.topic = PRODUCE_TOPIC self.batch = BufferedBatch() self.timer = None diff --git a/tests/test_batch_handler.py b/tests/test_batch_handler.py index aa59f1a..bd69c0d 100644 --- a/tests/test_batch_handler.py +++ b/tests/test_batch_handler.py @@ -5,6 +5,7 @@ class TestInit(unittest.TestCase): + @patch("src.logcollector.batch_handler.PRODUCE_TOPIC", "test_topic") @patch("src.logcollector.batch_handler.BufferedBatch") @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") def test_init_with_buffer(self, mock_kafka_produce_handler, mock_buffered_batch): @@ -18,7 +19,7 @@ def test_init_with_buffer(self, mock_kafka_produce_handler, mock_buffered_batch) sut = BufferedBatchSender() # Assert - self.assertEqual("Prefilter", sut.topic) + self.assertEqual("test_topic", sut.topic) self.assertEqual(mock_batch_instance, sut.batch) self.assertIsNone(sut.timer) self.assertEqual(mock_handler_instance, sut.kafka_produce_handler) @@ -290,6 +291,7 @@ def test_send_batch_for_key_value_error( class TestSendDataPacket(unittest.TestCase): + @patch("src.logcollector.batch_handler.PRODUCE_TOPIC", "test_topic") @patch("src.logcollector.batch_handler.ExactlyOnceKafkaProduceHandler") def test_send_data_packet(self, mock_produce_handler): # Arrange @@ -311,7 +313,7 @@ def test_send_data_packet(self, mock_produce_handler): # Assert mock_produce_handler_instance.produce.assert_called_once_with( - topic="Prefilter", + topic="test_topic", data='{"begin_timestamp": "test_begin", "end_timestamp": "test_end", "data": "test_data"}', key=key, )