From 451a332835e38302d8dd6c5548b589aa81f3cd38 Mon Sep 17 00:00:00 2001 From: Vadim Semenov Date: Tue, 29 Oct 2024 23:46:51 +0000 Subject: [PATCH] Multiple improvements in stability and accuracy --- .env | 11 +- LICENSE | 56 +-- README.md | 134 +++++--- client.py | 148 ++++++-- diagnostic.py | 325 ++++++++++++++++++ ....yml => docker-compose.legacy.allinone.txt | 0 docker-compose.yml | 2 - logstash/pipeline/logstash.conf | 186 ++++------ watcher/watcher-template/config.yml | 12 +- 9 files changed, 640 insertions(+), 234 deletions(-) create mode 100644 diagnostic.py rename docker-compose.legacy.allinone.yml => docker-compose.legacy.allinone.txt (100%) diff --git a/.env b/.env index 3a41d44..d57db1b 100644 --- a/.env +++ b/.env @@ -1,12 +1,12 @@ WATCHER_NAME=demo-watcher -LOGSTASH_OSS_VERSION=7.17.0 +LOGSTASH_OSS_VERSION=7.17.21 ELASTIC_IP=127.0.0.1 ELASTIC_PORT=9200 ELASTIC_USER_LOGIN=elastic ELASTIC_USER_PASS=changeme FRR_HOST=127.0.0.1 #cb FRR_HOST=172.20.20.11 -FRR_PORT=2806 +FRR_PORT=2608 #cb FRR_PORT=65001 FRR_PASSWORD=zebra FRR_SECRET=zebra @@ -30,10 +30,11 @@ EXPORT_TO_MONGO_BOOL=True ############################# # If ELK stack is not available or is not installed, comment the line below or set to False ############################# -EXPORT_TO_ELASTICSEARCH_BOOL=False -EXPORT_TO_ZABBIX_BOOL=False +# EXPORT_TO_ELASTICSEARCH_BOOL=False +# EXPORT_TO_ZABBIX_BOOL=False ############################# # If export logs to messagers is not needed, comment the line below or set to False ############################# EXPORT_TO_WEBHOOK_URL_BOOL=False -#WEBHOOK_URL=http://127.0.0.1/webhook \ No newline at end of file +WEBHOOK_URL=http://127.0.0.1/webhook +SSH_COMMAND_READ_TIMEOUT=60 \ No newline at end of file diff --git a/LICENSE b/LICENSE index 261eeb9..a5967aa 100644 --- a/LICENSE +++ b/LICENSE @@ -1,9 +1,13 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - + License + Version 1.0, January 2024 + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - + + This license is 99% compliant with Apache License 2.0, January 2004 + (http://www.apache.org/licenses/). The single difference between this + and Apache License 2.0 is that it doesn't include permissions to sell, + offer to sell without getting a commercial license from the Licensor. + 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, @@ -65,16 +69,17 @@ 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. + worldwide, non-exclusive, no-charge, royalty-free, copyright license + to reproduce for evaluation purposes, prepare Derivative Works of for + evaluation purposes, publicly display, publicly perform. This license + doesn't grant a permission to sell, offer to sell the Work. Such permission + has to be granted by the Licensor via any form of electronic, or + written communication sent by the Licensor or its representatives. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, + worldwide, non-exclusive, no-charge, royalty-free, patent license to + use for evaluation purposes, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) @@ -173,29 +178,4 @@ incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + END OF TERMS AND CONDITIONS \ No newline at end of file diff --git a/README.md b/README.md index c77c4c2..b010f51 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # IS-IS Topology Watcher IS-IS Watcher is a monitoring tool of IS-IS topology changes for network engineers. It works via passively listening to IS-IS control plane messages through a specially established IS-IS adjacency between IS-IS Watcher and one of the network device. The tool logs IS-IS events and/or export by Logstash to **Elastic Stack (ELK)**, **Zabbix**, **WebHooks** and **Topolograph** monitoring dashboard for keeping the history of events, alerting, instant notification. By encapsulating the solution's elements in containers, it becomes exceptionally quick to start. The only thing is needed to configure manually is GRE tunnel setup on the Linux host. +> **Note** +> Upvote in [issues/1](https://github.com/Vadims06/isiswatcher/issues/1) if you are interested in tracking IS-IS topology changes via BGP-LS. ## IS-IS Watcher detects the following network events: * IS-IS neighbor adjacency Up/Down * IS-IS link cost changes @@ -74,6 +76,10 @@ HTTP POST messages can be easily accepted by messengers, which allows to get ins #### Containerlab Containerlab's topology is placed under **containerlab** folder. Local `README` includes instructions how to run it. IS-IS topology changes are printed by Watcher in a text file only. ![IS-IS watcher containerlab](./containerlab/frr01/container_lab.drawio.png) +``` +./containerlab/frr01/prepare.sh +sudo clab deploy --topo ./containerlab/frr01/frr01.clab.yml +``` ## How to connect IS-IS watcher to real network @@ -83,6 +89,8 @@ Table below shows different options of possible setups, starting from the bare m | 1 | Bare minimum. Containerlab | 0 | + | - | - | - | | 2 | 1. Local Topolograph
2. local compose file with ELK **disabled** (commented) | 2 | + | + | + | - | | 3 | 1. Local Topolograph
2. local compose file with ELK **enabled** | 3 | + | + | + | + | + +#### Setup №2. Text logs + timeline of network changes on Topolograph 1. Choose a Linux host with Docker installed 2. Setup Topolograph * launch your own Topolograph on docker using [topolograph-docker](https://github.com/Vadims06/topolograph-docker) @@ -98,13 +106,18 @@ Set variables in `.env` file: 3. Setup ELK (skip it, it's only needed for setup № 3) * if you already have ELK instance running, so remember `ELASTIC_IP` for filling env file later and uncomment Elastic config here `isiswatcher/logstash/pipeline/logstash.conf`. Currently additional manual configuration is needed for creation Index Templates, because the demo script doesn't accept the certificate of ELK. It's needed to have one in case of security setting enabled. Required mapping for the Index Template is in `isiswatcher/logstash/index_template/create.py`. Fill free to edit such a script for your needs. -* if not - boot up a new ELK from [docker-elk](https://github.com/deviantony/docker-elk) compose. For demo purporse set license of ELK as basic and turn off security. The setting are in `docker-elk/elasticsearch/config/elasticsearch.yml` - ``` - xpack.license.self_generated.type: basic - xpack.security.enabled: false - ``` - > **Note about having Elastic config commented** +To create Index Templates, run: +``` +sudo docker run -it --rm --env-file=./.env -v ./logstash/index_template/create.py:/home/watcher/watcher/create.py vadims06/isis-watcher:latest python3 ./create.py +``` +* if not - boot up a new ELK from [docker-elk](https://github.com/deviantony/docker-elk) compose. For demo purporse set license of ELK as basic and turn off security. The setting are in docker-elk/elasticsearch/config/elasticsearch.yml +``` +xpack.license.self_generated.type: basic +xpack.security.enabled: false +``` +> **Note about having Elastic config commented** > When the Elastic output plugin fails to connect to the ELK host, it blocks all other outputs and ignores "EXPORT_TO_ELASTICSEARCH_BOOL" value from env file. Regardless of EXPORT_TO_ELASTICSEARCH_BOOL being False, it tries to connect to Elastic host. The solution - uncomment this portion of config in case of having running ELK. + 4. Setup IS-IS Watcher ```bash git clone https://github.com/Vadims06/isiswatcher.git @@ -113,8 +126,23 @@ cd isiswatcher Generate configuration files `isis-watcher:v1.1` includes a client for generating configurations for each Watcher for each IS-IS area. To generate individual settings - run the client with `--action add_watcher` ``` -sudo docker run -it --rm --user $UID -v ./:/home/watcher/watcher/ -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro vadims06/isis-watcher:v1.1 python3 ./client.py --action add_watcher +sudo docker run -it --rm --user $UID -v ./:/home/watcher/watcher/ -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro vadims06/isis-watcher:latest python3 ./client.py --action add_watcher ``` +The output: +``` ++---------------------------+ +| Watcher Host | +-------------------+ +| +------------+ | | Network device | +| | netns FRR | | | | +| | Tunnel [4] | | Tunnel [4] | +| | gre1 [3]TunnelIP----+-----------------------+[2]TunnelIP | +| | eth1------+-vhost1 | +-----+ | IS-IS area num [5]| +| | | Host IP[6]+-------+ LAN |--------[1]Device IP | +| | | | +-----+ | | +| +------------+ | | | +| | +-------------------+ ++---------------------------+ +``` The script will create: 1. a folder under `watcher` folder with FRR configuration under `router` folder 2. a containerlab configuration file with network settings @@ -198,45 +226,45 @@ To check XDP logs, run sudo cat /sys/kernel/debug/tracing/trace_pipe ``` ## Troubleshooting -This is a quick set of checks in case of absence of events on IS-IS Monitoring page. IS-IS Watcher consists of three services: IS-ISd/FRR [1] -> Watcher [2] -> Logstash [3] -> Topolograph & ELK & Zabbix & WebHooks. -1. Check if FRR tracks IS-IS changes, run the following command: -``` -docker exec -it frr cat /var/log/frr/isisd.log -``` -you should see logs similar to [this](https://github.com/Vadims06/ospfwatcher/blob/d8366508abc51627c7f9a2ce6e47b7f23e420f1e/watcher/tests/test25.txt) -If the log file is empty, check adjancency on Watcher: -``` -sudo docker exec -it watcher-gre-router vtysh -show isis neighbor -``` -if there is no IS-IS neighbor, ping remote end of GRE tunnel from the Watcher. At the same time, make tcpdump on watcher's interface and check counters of iptables -``` -sudo iptables -nvL -t filter --line-numbers -sudo iptables -nvL -t nat --line-numbers -``` -Clear connections of GRE tunnel -``` -sudo conntrack -D -p 47 -``` -Check ICMP ping packets on Watcher's host and on network device. -2. Check if Watcher parses changes: -``` -docker exec -it watcher cat /home/watcher/watcher/logs/watcher.log -``` -You should see tracked changes of your network, i.e. here we see that `10.0.0.0/29` network went up at `2023-10-27T07:50:24Z` on `10.10.1.4` router. -``` -2023-10-27T07:50:24Z,demo-watcher,network,10.0.0.0/29,up,10.10.1.4,28Oct2023_01h10m02s_7_hosts_ospfwatcher -``` -3. Check that messages are sent: - 1. Uncomment `DEBUG_BOOL="True"` in `.env` and check logs `docker logs logstash` and do: - - wait for the next event in your network - - change a cost of you stub network, return it back and see this event in this logs - - simulate network changes - ``` - docker exec -it watcher /bin/bash - echo "2023-10-27T07:50:24Z,demo-watcher,network,10.0.0.0/29,up,10.10.1.4,28Oct2023_01h10m02s_7_hosts_ospfwatcher" >> /home/watcher/watcher/logs/watcher.log - ``` - 2. Connect to mongoDB and check logs: +##### Symptoms +Networks changes are not tracked. Log file `./watcher/logs/watcher...log` is empty. + +##### Steps: +1. Run diagnostic script. It will check **IS-IS Watcher** <-> **Network device** connection (iptables, packets from FRR/network device) + + ``` + sudo docker run -it --rm -v ./:/home/watcher/watcher/ --cap-add=NET_ADMIN -u root --network host vadims06/isis-watcher:latest python3 ./client.py --action diagnostic --watcher_num + ``` +2. Login on FRR, check adjancency: + ``` + sudo docker exec -it watcher-gre-router vtysh + show isis neighbor + ``` + if there is no IS-IS neighbor, ping remote end of GRE tunnel from the Watcher. At the same time, make tcpdump on watcher's interface and check counters of iptables + ``` + sudo iptables -nvL -t filter --line-numbers + sudo iptables -nvL -t nat --line-numbers + ``` + Clear connections of GRE tunnel + ``` + sudo conntrack -D -p 47 + ``` + Check ICMP ping packets on Watcher's host and on network device. +##### Symptoms +Dashboard page is blank. Events are not present on OSPF/IS-IS Monitoring page. +##### Steps: +IS-IS Watcher consists of three services: IS-ISd/FRR [1] -> Watcher [2] -> Logstash [3] -> Topolograph & ELK & Zabbix & WebHooks. +1. Check if FRR tracks IS-IS changes in `./watcher/logs/watcher...log` file (previous case) +You should see tracked changes of your network, i.e. + ``` + 2024-10-08T22:54:54Z,watcher1,1,network,4ffe::192:168:145:4/127,changed,old_cost:44,new_cost:4,0200.1001.0004,,49.0002,12345,internal,0 + ``` +2. Check that logstash container from [docker-compose.yml](./docker-compose.yml) is running via `docker ps` command. + + 1. Uncomment `DEBUG_BOOL="True"` in `.env` and start continuous logs `docker logs -f logstash`. + 2. Copy and paste the log from the first step in watcher's log file `./watcher/logs/watcher#-gre#-isis.isis.log`. `docker logs -f logstash` should print the output. If not - check logstash container. + +3. Check if logs are in Topolograph's DB. Connect to mongoDB and run: ``` docker exec -it mongo /bin/bash ``` @@ -255,7 +283,7 @@ You should see tracked changes of your network, i.e. here we see that `10.0.0.0/ ### Versions #### FRR -FRR 8 perfectly logs any IS-IS LSPs, but doesn't establish IS-IS adjacency over GRE because of internal packet filtering. The filter has been updated to permit IS-IS over GRE through Pull request [#12979](https://github.com/FRRouting/frr/pull/12979) and is only supported in FRR 9 versions. However, this version only logs a partial IS-IS LSP ([#Issue 15654](https://github.com/FRRouting/frr/issues/15654)) and is incompatible with the Watcher. Therefore, a custom FRR build based on FRR 8.x version with an altered internal filter for GRE packets is required. `vadims06/frr:v8.5.4_isis_over_gre` is prepaired and tested to work IS-IS over GRE tunnel. If you need another 8.x version or want to build your own FRR image - follow the instructions mentioned below. +FRR 8 perfectly logs any IS-IS LSPs, but doesn't establish IS-IS adjacency over GRE because of internal packet filtering. The filter has been updated to permit IS-IS over GRE through Pull request [#12979](https://github.com/FRRouting/frr/pull/12979) and is only supported in FRR 9 versions. However, this version has wrong LSDB output (mix LSPID with hostnames) and is incompatible with the Watcher. Therefore, a custom FRR build based on FRR 8.x version with an altered internal filter for GRE packets is required. `vadims06/frr:v8.5.4_isis_over_gre` is prepaired and tested to work IS-IS over GRE tunnel. If you need another 8.x version or want to build your own FRR image - follow the instructions mentioned below. ##### How to prepare FRR instance (optional) 1. clone FRR and choose any FRR 8.x branch ``` @@ -275,7 +303,17 @@ FRR 8 perfectly logs any IS-IS LSPs, but doesn't establish IS-IS adjacency over 4. Inspect your new FRR image name using `docker image ls` and replace `router/image` by your own image name in `isiswatcher/watcher/watcher-template/config.yml` ### Minimum Logstash version - 7.17.0, this version includes bug fix of [issues_281](https://github.com/logstash-plugins/logstash-input-file/issues/281), [issues_5115](https://github.com/elastic/logstash/issues/5115) + 7.17.21, this version includes bug fix of [issues_281](https://github.com/logstash-plugins/logstash-input-file/issues/281), [issues_5115](https://github.com/elastic/logstash/issues/5115) + +### Topolograph suite +* OSPF Watcher [link](https://github.com/Vadims06/ospfwatcher) +* IS-IS Watcher [link](https://github.com/Vadims06/isiswatcher) +* Topolograph [link](https://github.com/Vadims06/topolograph) +* Topolograph in docker [link](https://github.com/Vadims06/topolograph-docker) + +### Community & feedback +* https://t.me/topolograph +* admin at topolograph.com ### License The functionality was tested using Basic ELK license. diff --git a/client.py b/client.py index 83373a3..c594a67 100755 --- a/client.py +++ b/client.py @@ -1,10 +1,12 @@ import argparse +import diagnostic import ipaddress import shutil from ruamel.yaml import YAML from jinja2 import Environment, FileSystemLoader from io import StringIO -import os +import os, re +import sys import enum ruamel_yaml_default_mode = YAML() @@ -14,32 +16,43 @@ class ACTIONS(enum.Enum): ADD_WATCHER = "add_watcher" STOP_WATCHER = "stop_watcher" GET_STATUS = "get_status" + DIAGNOSTIC = "diagnostic" class WATCHER_CONFIG: P2P_VETH_SUPERNET_W_MASK = "169.254.0.0/16" WATCHER_ROOT_FOLDER = "watcher" WATCHER_TEMPLATE_FOLDER_NAME = "watcher-template" - WATCHER_TEMPLATE_CONFIG_FILE = "config.yml" + WATCHER_CONFIG_FILE = "config.yml" ROUTER_NODE_NAME = "router" - ROUTER_ISIS_SYSTEMID = "49.{area_num}.{watcher_num}.{gre_num}.1111.00" + ROUTER_ISIS_SYSTEMID = "{area_num}.{watcher_num}.{gre_num}.1111.00" WATCHER_NODE_NAME = "isis-watcher" ISIS_FILTER_NODE_NAME = "receive_only_filter" ISIS_FILTER_NODE_IMAGE = "vadims06/isis-filter-xdp:latest" - PROTOCOL = "isis" - def __init__(self, watcher_num): + + def __init__(self, watcher_num, protocol="isis"): self.watcher_num = watcher_num # default self.gre_tunnel_network_device_ip = "" self.gre_tunnel_ip_w_mask_network_device = "" self.gre_tunnel_ip_w_mask_watcher = "" self.gre_tunnel_number = 0 - self.isis_area_num = 1 + self.isis_area_num = "" self.host_interface_device_ip = "" + self.protocol = protocol + self.asn = 0 + self.organisation_name = "" + self.watcher_name = "" def gen_next_free_number(self): """ Each Watcher installation has own sequense number starting from 1 """ - return len( self.get_existed_watchers() ) + 1 + numbers = [int(folder_name.split('-')[0][-1]) for folder_name in WATCHER_CONFIG.get_existed_watchers() if '-' in folder_name] + expected_numbers = set(range(1, max(numbers) + 1)) + if set(expected_numbers) == set(numbers): + next_number = len(numbers) + 1 + else: + next_number = next(iter(expected_numbers - set(numbers))) + return next_number @staticmethod def get_existed_watchers(): @@ -47,10 +60,29 @@ def get_existed_watchers(): watcher_root_folder_path = os.path.join(os.getcwd(), WATCHER_CONFIG.WATCHER_ROOT_FOLDER) return [file for file in os.listdir(watcher_root_folder_path) if os.path.isdir(os.path.join(watcher_root_folder_path, file)) and file.startswith("watcher") and not file.endswith("template")] + def import_from(self, watcher_num): + """ + Browse a folder directory and find a folder with watcher num. Parse GRE tunnel + """ + # watcher1-gre1025-ospf + watcher_re = re.compile("(?P[a-zA-Z]+)(?P\d+)-gre(?P\d+)(-(?P[a-zA-Z]+))?") + for file in self.get_existed_watchers(): + watcher_match = watcher_re.match(file) + if watcher_match and watcher_match.groupdict().get("watcher_num", "") == str(watcher_num): + # these two attributes are needed to build paths + self.protocol = watcher_match.groupdict().get("proto") if watcher_match.groupdict().get("proto") else self.protocol + self.gre_tunnel_number = int(watcher_match.groupdict().get("gre_num", 0)) + for label, value in self.watcher_config_file_yml.get('topology', {}).get('defaults', {}).get('labels', {}).items(): + setattr(self, label, value) + break + else: + raise ValueError(f"Watcher{watcher_num} was not found") + @property def p2p_veth_network_obj(self): + """ ISIS p2p subnet assigment is top down: start from the end (255) to the start (0) in order not to overlap with OSPF """ p2p_super_network_obj = ipaddress.ip_network(self.P2P_VETH_SUPERNET_W_MASK) - return self.get_nth_elem_from_iter(p2p_super_network_obj.subnets(new_prefix=24), self.watcher_num + 1) + return list(p2p_super_network_obj.subnets(new_prefix=24))[256 - self.watcher_num] @property def p2p_veth_watcher_ip_obj(self): @@ -78,7 +110,16 @@ def p2p_veth_host_ip_w_mask(self): @property def host_veth(self): - return f"vhost{self.gre_tunnel_number}" + """ Add organisation name at name of interface to allow different interfaces with the same GRE num """ + linux_ip_link_peer_max_len = 15 + vhost_inf_name = f"vhost{self.gre_tunnel_number}" + organisation_name_short = self.organisation_name[:linux_ip_link_peer_max_len - (len(vhost_inf_name)+1)] # 1 for dash + self._host_veth = f"{organisation_name_short}-{vhost_inf_name}" if organisation_name_short else vhost_inf_name + return self._host_veth + + @host_veth.setter + def host_veth(self, value_from_yaml_import): + self._host_veth = value_from_yaml_import @property def watcher_root_folder_path(self): @@ -86,11 +127,11 @@ def watcher_root_folder_path(self): @property def watcher_folder_name(self): - return f"watcher{self.watcher_num}-gre{self.gre_tunnel_number}" + return f"watcher{self.watcher_num}-gre{self.gre_tunnel_number}-{self.protocol}" @property def watcher_log_file_name(self): - return f"{self.watcher_folder_name}.{self.PROTOCOL}.log" + return f"{self.watcher_folder_name}.{self.protocol}.log" @property def watcher_folder_path(self): @@ -108,10 +149,21 @@ def router_template_path(self): def router_folder_path(self): return os.path.join(self.watcher_folder_path, self.ROUTER_NODE_NAME) + @property + def watcher_config_file_path(self): + return os.path.join(self.watcher_folder_path, self.WATCHER_CONFIG_FILE) + + @property + def watcher_config_file_yml(self) -> dict: + if os.path.exists(self.watcher_config_file_path): + with open(self.watcher_config_file_path) as f: + return ruamel_yaml_default_mode.load(f) + return {} + @property def watcher_config_template_yml(self): watcher_template_path = os.path.join(self.watcher_root_folder_path, self.WATCHER_TEMPLATE_FOLDER_NAME) - with open(os.path.join(watcher_template_path, self.WATCHER_TEMPLATE_CONFIG_FILE)) as f: + with open(os.path.join(watcher_template_path, self.WATCHER_CONFIG_FILE)) as f: return ruamel_yaml_default_mode.load(f) @property @@ -140,6 +192,12 @@ def do_check_ip(ip_address_w_mask): except: return "" + @staticmethod + def do_check_area_num(area_num): + """ 49.xxxx """ + area_match = re.match('^49\.\d{4}$', area_num) + return area_match.group(0) if area_match else "" + @staticmethod def _get_digit_net_mask(ip_address_w_mask): return ipaddress.ip_interface(ip_address_w_mask).network.prefixlen @@ -160,6 +218,8 @@ def create_folder_with_settings(self): os.mkdir(self.watcher_folder_path) # isis-watcher folder watcher_logs_folder_path = os.path.join(self.watcher_root_folder_path, "logs") + if not os.path.exists(watcher_logs_folder_path): + os.mkdir(watcher_logs_folder_path) #os.mkdir(isis_watcher_folder_path) shutil.copyfile( src=os.path.join(self.isis_watcher_template_path, "watcher.log"), @@ -168,12 +228,11 @@ def create_folder_with_settings(self): os.chmod(os.path.join(watcher_logs_folder_path, self.watcher_log_file_name), 0o755) # router folder inside watcher os.mkdir(self.router_folder_path) - for file_name in ["daemons", "isisd.log"]: + for file_name in ["daemons"]: shutil.copyfile( src=os.path.join(self.router_template_path, file_name), dst=os.path.join(self.router_folder_path, file_name) ) - os.chmod(os.path.join(self.router_folder_path, "isisd.log"), 0o777) # Config generation env = Environment( loader=FileSystemLoader(self.router_template_path) @@ -198,11 +257,26 @@ def create_folder_with_settings(self): # containerlab config watcher_config_yml = self.watcher_config_template_yml watcher_config_yml["name"] = self.watcher_folder_name + # remember user input for further user, i.e diagnostic + watcher_config_yml['topology']['defaults'].setdefault('labels', {}).update({'gre_num': int(self.gre_tunnel_number)}) + watcher_config_yml['topology']['defaults']['labels'].update({'gre_tunnel_network_device_ip': self.gre_tunnel_network_device_ip}) + watcher_config_yml['topology']['defaults']['labels'].update({'gre_tunnel_ip_w_mask_network_device': self.gre_tunnel_ip_w_mask_network_device}) + watcher_config_yml['topology']['defaults']['labels'].update({'gre_tunnel_ip_w_mask_watcher': self.gre_tunnel_ip_w_mask_watcher}) + watcher_config_yml['topology']['defaults']['labels'].update({'area_num': self.isis_area_num}) + watcher_config_yml['topology']['defaults']['labels'].update({'asn': self.asn}) + watcher_config_yml['topology']['defaults']['labels'].update({'organisation_name': self.organisation_name}) + watcher_config_yml['topology']['defaults']['labels'].update({'watcher_name': self.watcher_name}) + # Config watcher_config_yml['topology']['nodes']['h1']['exec'] = self.exec_cmds() watcher_config_yml['topology']['links'] = [{'endpoints': [f'{self.ROUTER_NODE_NAME}:veth1', f'host:{self.host_veth}']}] # Watcher watcher_config_yml['topology']['nodes'][self.WATCHER_NODE_NAME]['network-mode'] = f"container:{self.ROUTER_NODE_NAME}" watcher_config_yml['topology']['nodes'][self.WATCHER_NODE_NAME]['binds'].append(f"../logs/{self.watcher_log_file_name}:/home/watcher/watcher/logs/watcher.log") + watcher_config_yml['topology']['nodes'][self.WATCHER_NODE_NAME].update({'env': {'ASN': self.asn}}) + watcher_config_yml['topology']['nodes'][self.WATCHER_NODE_NAME]['env'].update({'WATCHER_NAME': self.watcher_name}) + watcher_config_yml['topology']['nodes'][self.WATCHER_NODE_NAME]['env'].update({'AREA_NUM': self.isis_area_num}) + watcher_config_yml['topology']['nodes'][self.WATCHER_NODE_NAME]['env'].update({'WATCHER_INTERFACE': "veth1"}) + watcher_config_yml['topology']['nodes'][self.WATCHER_NODE_NAME]['env'].update({'WATCHER_LOGFILE': "/home/watcher/watcher/logs/watcher.log"}) # IS-IS XDP filter, listen only watcher_config_yml['topology']['nodes'][self.ISIS_FILTER_NODE_NAME]['image'] = self.ISIS_FILTER_NODE_IMAGE watcher_config_yml['topology']['nodes'][self.ISIS_FILTER_NODE_NAME]['network-mode'] = "host" @@ -223,8 +297,8 @@ def do_add_watcher_prechecks(self): def do_print_banner(): print(""" +---------------------------+ -| Watcher Host | +-------------------+ -| +------------+ | | Network device | +| Watcher Host | +-------------------+ +| +------------+ | | Network device | | | netns FRR | | | | | | Tunnel [4] | | Tunnel [4] | | | gre1 [3]TunnelIP----+-----------------------+[2]TunnelIP | @@ -270,10 +344,19 @@ def add_watcher_dialog(self): print("Please provide any positive number") self.gre_tunnel_number = "" # ISIS settings - self.isis_area_num = input("[5]IS-IS area number: ") + while not self.isis_area_num: + self.isis_area_num = self.do_check_area_num(input("[5]IS-IS area number [49.xxxx]: ")) # Host interface name for NAT while not self.host_interface_device_ip: self.host_interface_device_ip = self.do_check_ip(input("[6]Watcher host IP address: ")) + # Tags + self.asn = input("AS number, where IS-IS is configured: ") + if not self.asn and not self.asn.isdigit(): + self.asn = 0 + self.organisation_name = str(input("Organisation name: ")).lower() + self.watcher_name = str(input("watcher name: ")).lower().replace(" ", "-") + if not self.watcher_name: + self.watcher_name = "isiswatcher-demo" def exec_cmds(self): return [ @@ -282,10 +365,10 @@ def exec_cmds(self): f'ip address add {self.p2p_veth_host_ip_w_mask} dev {self.host_veth}', f'ip netns exec {self.netns_name} ip tunnel add gre1 mode gre local {str(self.p2p_veth_watcher_ip_obj)} remote {self.gre_tunnel_network_device_ip}', f'ip netns exec {self.netns_name} ip address add {self.gre_tunnel_ip_w_mask_watcher} dev gre1', - f'sudo iptables -t nat -A POSTROUTING -p gre -s {self.p2p_veth_watcher_ip} -d {self.gre_tunnel_network_device_ip} -j SNAT --to-source {self.host_interface_device_ip}', - f'sudo iptables -t nat -A PREROUTING -p gre -s {self.gre_tunnel_network_device_ip} -d {self.host_interface_device_ip} -j DNAT --to-destination {self.p2p_veth_watcher_ip}', - f'sudo iptables -t filter -A FORWARD -p gre -s {self.p2p_veth_watcher_ip} -d {self.gre_tunnel_network_device_ip} -i {self.host_veth} -j ACCEPT', - f'sudo iptables -t filter -A FORWARD -p gre -s {self.gre_tunnel_network_device_ip} -j ACCEPT', # do not set output interface + f'bash -c \'RULE="-t nat -p gre -s {self.p2p_veth_watcher_ip} -d {self.gre_tunnel_network_device_ip} -j SNAT --to-source {self.host_interface_device_ip}"; sudo iptables -C POSTROUTING $$RULE &> /dev/null && echo "Rule exists in iptables." || sudo iptables -A POSTROUTING $$RULE\'', + f'bash -c \'RULE="-t nat -p gre -s {self.gre_tunnel_network_device_ip} -d {self.host_interface_device_ip} -j DNAT --to-destination {self.p2p_veth_watcher_ip}"; sudo iptables -C PREROUTING $$RULE &> /dev/null && echo "Rule exists in iptables." || sudo iptables -A PREROUTING $$RULE\'', + f'bash -c \'RULE="-t filter -p gre -s {self.p2p_veth_watcher_ip} -d {self.gre_tunnel_network_device_ip} -i {self.host_veth} -j ACCEPT"; sudo iptables -C FORWARD $$RULE &> /dev/null && echo "Rule exists in iptables." || sudo iptables -A FORWARD $$RULE\'', + f'bash -c \'RULE="-t filter -p gre -s {self.gre_tunnel_network_device_ip} -j ACCEPT"; sudo iptables -C FORWARD $$RULE &> /dev/null && echo "Rule exists in iptables." || sudo iptables -A FORWARD $$RULE\'', f'sudo ip netns exec {self.netns_name} ip link set mtu 1600 dev veth1', # for xdp # enable GRE after applying XDP filter #f'sudo ip netns exec {self.netns_name} ip link set up dev gre1', @@ -322,6 +405,25 @@ def get_status(self): # TODO add IS-IS neighborship status raise NotImplementedError("Not implemented yet. Please run manually `sudo docker ps -f label=clab-node-name=router`") + def diagnostic(self): + print(f"Diagnostic connection is started") + self.import_from(watcher_num=args.watcher_num) + diag_watcher_host = diagnostic.WATCHER_HOST( + if_names=[self.host_veth], + watcher_internal_ip=self.p2p_veth_watcher_ip, + network_device_ip=self.gre_tunnel_network_device_ip + ) + diag_watcher_host.does_conntrack_exist_for_gre() + # print(f"Please wait {diag_watcher_host.DUMP_FILTER_TIMEOUT} sec") + diag_watcher_host.run() + if not diagnostic.IPTABLES_NAT_FOR_REMOTE_NETWORK_DEVICE_UNIQUE.check(self.gre_tunnel_network_device_ip): + sys.exit() + if diag_watcher_host.is_watcher_alive: + diagnostic.IPTABLES_FRR_NETNS_FORWARD_TO_NETWORK_DEVICE_BEFORE_NAT.check(self.gre_tunnel_network_device_ip) + if diag_watcher_host.is_network_device_alive: + diagnostic.IPTABLES_REMOTE_NETWORK_DEVICE_FORWARD_TO_FRR_NETNS.check(self.gre_tunnel_network_device_ip) + diagnostic.IPTABLES_REMOTE_NETWORK_DEVICE_NAT_TO_FRR_NETNS.check(self.gre_tunnel_network_device_ip) + if __name__ == '__main__': @@ -329,10 +431,10 @@ def get_status(self): description="Provisioning Watcher instances for tracking IS-IS topology changes" ) parser.add_argument( - "--action", required=True, help="Options: add_watcher, stop_watcher, get_status" + "--action", required=True, help="Options: add_watcher, stop_watcher, get_status, diagnostic" ) parser.add_argument( - "--watcher_num", required=False, default=0, help="Number of watcher" + "--watcher_num", required=False, default=0, type=int, help="Number of watcher" ) args = parser.parse_args() diff --git a/diagnostic.py b/diagnostic.py new file mode 100644 index 0000000..f0f340f --- /dev/null +++ b/diagnostic.py @@ -0,0 +1,325 @@ +from abc import abstractmethod +from scapy.all import AsyncSniffer, IP, SndRcvList, PacketList, sniff +from scapy.config import conf +import netns +import time +import logging +import sys + +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) +log = logging.getLogger(__name__) + + +class BASE: + + DUMP_FILTER_GRE = "proto gre" + DUMP_FILTER_TIMEOUT = 10 + def __init__(self, if_names, nsname="") -> None: + # variable to sniff packets + self.if_names = if_names if isinstance(if_names, list) else [if_names] + self.nsname = nsname + self.packets: PacketList = [] + + @property + def sniffer(self): + if hasattr(self, "_sniffer"): + return self._sniffer + if self.nsname: + self.change_netns(self.nsname) + self._sniffer = AsyncSniffer(iface=self.if_names, filter=self.DUMP_FILTER_GRE) + return self._sniffer + + @staticmethod + def change_netns(nsname) -> None: + with netns.NetNS(nsname=nsname): + conf.ifaces.reload() # Reload interface list + conf.route.resync() # Reload IPv4 routes + + def do_print_progress_bar(self, timeout=10): + import time + import sys + for i in range(1, timeout+1): + sys.stdout.write('\r') + # the exact output you're looking for: + sys.stdout.write("[%-10s] %dsec" % ('='*i, i)) + sys.stdout.flush() + time.sleep(1) + sys.stdout.write('\n\r') + + def run(self, nsname="") -> None: + if nsname: + self.change_netns(nsname) + log.info(f"Start listening {self.if_names} interfaces") + self.sniffer.start() + # time.sleep(self.DUMP_FILTER_TIMEOUT) + self.do_print_progress_bar(self.DUMP_FILTER_TIMEOUT) + self.sniffer.stop() + self.packets = self.sniffer.results + + @abstractmethod + def is_watcher_alive(self): + pass + + @abstractmethod + def is_network_device_alive(self): + pass + +class WATCHER_NS(BASE): + + WATCHER_IP = "169.254.{watcher_num}.1" + def __init__(self, if_names, nsname, watcher_internal_ip, network_device_ip) -> None: + self.watcher_internal_ip = watcher_internal_ip + self.network_device_ip = network_device_ip + super().__init__(if_names, nsname) + + @property + def is_watcher_alive(self): + for pkt in self.packets: + if pkt[IP].src == self.watcher_internal_ip: + return True + return False + + @property + def is_network_device_alive(self): + for pkt in self.packets: + if pkt[IP].src == self.network_device_ip: + return True + return False + + +class WATCHER_HOST(BASE): + """ + 13:49:31.853767 IP 169.254.2.2 > 192.168.1.35: GREv0, length 72: IP 10.10.25.33 > 224.0.0.5: OSPFv2, Hello, length 48 + 13:49:32.853323 IP 192.168.1.35 > 169.254.2.2: GREv0, length 72: IP 10.10.25.35 > 224.0.0.5: OSPFv2, Hello, length 48 + """ + def __init__(self, if_names, watcher_internal_ip, network_device_ip) -> None: + self.watcher_internal_ip = watcher_internal_ip + self.network_device_ip = network_device_ip + super().__init__(if_names) + + @property + def is_watcher_alive(self): + for pkt in self.packets: + try: + if pkt[IP].src == self.watcher_internal_ip: + log.info("Watcher is alive") + return True + except IndexError: + # Layer IP not found + pass + log.critical( + """FRR watcher doesn't send IS-IS hellos over GRE. Please make sure that: + 1.FRR is running\n + 2.GRE1 is included into IS-IS process `sudo docker exec -it vtysh` + """) + return False + + @property + def is_network_device_alive(self): + for pkt in self.packets: + try: + if pkt[IP].src == self.network_device_ip: + log.info("Network device is alive") + return True + except IndexError: + # Layer IP not found + pass + log.critical( + """Network device doesn't send IS-IS hellos over GRE. Please make sure that: + 1.Network device has GRE interface configured \n + 2.Network device can reach Watcher's host + """) + return False + + def report(self): + if not self.is_watcher_alive: + log.critical( + """FRR watcher doesn't send IS-IS hellos over GRE. Please make sure that: + 1.FRR is running\n + 2.GRE1 is included into IS-IS process `sudo docker exec -it vtysh` + """) + if not self.is_network_device_alive: + log.critical( + """Network device doesn't send IS-IS hellos over GRE. Please make sure that: + 1.Network device has GRE interface configured \n + 2.Network device can reach Watcher's host + """) + if self.is_watcher_alive and self.is_network_device_alive: + log.info("Watcher and Network device have reachability") + +class IPTABLE_ENTRY_IP: + def __init__(self, ip:str) -> None: + import ipaddress + try: + self.ip = ipaddress.ip_interface(ip) + except ValueError: + self.ip = "" + def __repr__(self): + return str(self.ip) + def __eq__(self, other): + return str(self.ip) == other + +class IPTABLES_NAT_FOR_REMOTE_NETWORK_DEVICE_EXIST: + + ASSERT_MSG = """ + Duplicated settings for the same device IP are found. + It's possible to create GRE tunnel for a single Watchet - Network device pair only. + """ + + @staticmethod + def check(network_device_ip): + try: + import iptc + except Exception as e: + print(f"Iptables checks are ignored") + return True + existed_nat_records_ll = [] + for nat_table_row in iptc.easy.dump_chain('nat', 'PREROUTING', ipv6=False): + if nat_table_row.get('src', '') != IPTABLE_ENTRY_IP(network_device_ip): + continue + existed_nat_records_ll.append(nat_table_row) + if len(existed_nat_records_ll) == 0: + log.info("NAT doesn't have settings for such remote network device. Good to add it.") + return True + log.critical(IPTABLES_NAT_FOR_REMOTE_NETWORK_DEVICE_EXIST.ASSERT_MSG + + f"""Watcher's host has already {len(existed_nat_records_ll)} NAT records for {network_device_ip}. + To remove them, run `sudo iptables -nvL -t nat --line-numbers` and sudo iptables -t nat -D PREROUTING """ + ) + return False + +class IPTABLES_FORWARD_FOR_REMOTE_NETWORK_DEVICE_EXIST: + + ASSERT_MSG = """ + Duplicated settings for the same device IP are found. + It's possible to create GRE tunnel for a single Watchet - Network device pair only. + """ + @staticmethod + def check(network_device_ip): + try: + import iptc + except Exception as e: + print(f"Iptables checks are ignored") + return True + existed_records_ll = [] + for filter_table_row in iptc.easy.dump_chain('filter', 'FORWARD', ipv6=False): + if filter_table_row.get('src', '') != IPTABLE_ENTRY_IP(network_device_ip): + continue + existed_records_ll.append(filter_table_row) + if len(existed_records_ll) == 0: + log.info("Iptables -t filter doesn't have settings for such remote network device. Good to add it.") + return True + log.critical( IPTABLES_FORWARD_FOR_REMOTE_NETWORK_DEVICE_EXIST.ASSERT_MSG + + f"""Watcher's host has already {len(existed_records_ll)} filter records for {network_device_ip}. + To remove them, run `sudo iptables -nvL -t filter --line-numbers` and sudo iptables -t filter -D FORWARD """ + ) + return False + + +class IPTABLES_REMOTE_NETWORK_DEVICE_NAT_TO_FRR_NETNS: + """ + sudo iptables -nv -t nat -L PREROUTING --line-numbers | grep 192.168.1.35 + num pkts bytes target prot opt in out source destination + 2 1311 115K DNAT 47 -- * * 192.168.1.35 192.168.1.33 to:169.254.2.2 + 3 0 0 DNAT 47 -- * * 192.168.1.35 192.168.1.33 to:169.254.2.2 + """ + + ASSERT_MSG = """ + Check if GRE packets sent by remote network device reaches Watcher host and redirected to FRR netns + ! nat table counters are only incremented for the first packet of every connection. Then uses conntable + If False, it means: + * Network device doesn't sent packets: + * GRE is not configured on network device or in Down state or Watcher's host is not available. + Use ping to check that GRE works. + * GRE is not added into IGP process + * If you have such option - dump outgoing packets from network device + sudo tcpdump -i proto gre and dst -n + """ + + def bash_cmd(network_device_ip): + return f"sudo iptables -nv -t nat -L PREROUTING --line-numbers | grep {network_device_ip}" + + @staticmethod + def check(network_device_ip): + try: + import iptc + except Exception as e: + print(f"Iptables checks are ignored, please use {IPTABLES_REMOTE_NETWORK_DEVICE_NAT_TO_FRR_NETNS.bash_cmd(network_device_ip)}") + return True + for nat_table_row in iptc.easy.dump_chain('nat', 'PREROUTING', ipv6=False): + #for nat_table_row in nat_table['PREROUTING']: + if nat_table_row.get('src', '') != IPTABLE_ENTRY_IP(network_device_ip): + continue + pkts, bytes = nat_table_row.get('counters', (0, 0)) + if pkts > 0: + log.info("NAT is working for remote network device.") + return True + log.critical("Remote network device doesn't send IGP packets" + IPTABLES_REMOTE_NETWORK_DEVICE_NAT_TO_FRR_NETNS.ASSERT_MSG) + return False + + +class IPTABLES_REMOTE_NETWORK_DEVICE_FORWARD_TO_FRR_NETNS: + """ + sudo iptables -nv -t filter -L FORWARD --line-numbers | grep 192.168.1.35 + Chain FORWARD (policy DROP 0 packets, 0 bytes) + num pkts bytes target prot opt in out source destination + 12 928 85344 ACCEPT 47 -- * * 192.168.1.35 0.0.0.0/0 + 13 1074 97216 ACCEPT 47 -- vhost1025 * 169.254.2.2 192.168.1.35 + """ + ASSERT_MSG = """ + Check if GRE packets sent by watcher's FRR from FRR's netns reach host's namespace. + If False, it means: + * Network device doesn't sent packets: + * GRE is not configured on network device or in Down state or Watcher's host is not available. + Use ping to check that GRE works. + * GRE is not added into IGP process + * If you have such option - dump outgoing packets from network device + sudo tcpdump -i proto gre and dst -n + """ + + def bash_cmd(network_device_ip): + return f"sudo iptables -nv -t filter -L FORWARD --line-numbers | grep {network_device_ip}" + + @staticmethod + def check(network_device_ip): + try: + import iptc + except Exception as e: + print(f"Iptables checks are ignored, please use {IPTABLES_REMOTE_NETWORK_DEVICE_FORWARD_TO_FRR_NETNS.bash_cmd(network_device_ip)}") + return True + for filter_table_row in iptc.easy.dump_chain('filter', 'FORWARD', ipv6=False): + if filter_table_row.get('src', '') != IPTABLE_ENTRY_IP(network_device_ip): + continue + pkts, bytes = filter_table_row.get('counters', (0, 0)) + if pkts > 0: + log.info("Remote network device sends IGP packets and iptables allows them.") + return True + log.critical("Remote network device doesn't send IGP packets" + IPTABLES_REMOTE_NETWORK_DEVICE_FORWARD_TO_FRR_NETNS.ASSERT_MSG) + return False + +class IPTABLES_FRR_NETNS_FORWARD_TO_NETWORK_DEVICE_BEFORE_NAT: + + ASSERT_MSG = """Check if GRE packets sent by watcher's FRR from FRR's netns reaches host's namespace. + If False, it means: + * IGP protocol is not enabled on Watcher's FRR + * GRE1 is not enabled in FRR's netns. use `sudo ip netns exec watcher#-gre#- 0: + log.info("Watcher's FRR sends IGP packets and iptables allows them.") + return True + log.critical("Watcher's FRR doesn't send IGP packets" + IPTABLES_FRR_NETNS_FORWARD_TO_NETWORK_DEVICE_BEFORE_NAT.ASSERT_MSG) + return False diff --git a/docker-compose.legacy.allinone.yml b/docker-compose.legacy.allinone.txt similarity index 100% rename from docker-compose.legacy.allinone.yml rename to docker-compose.legacy.allinone.txt diff --git a/docker-compose.yml b/docker-compose.yml index 4f133a7..1419182 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.2' - services: logstash: build: diff --git a/logstash/pipeline/logstash.conf b/logstash/pipeline/logstash.conf index 3d6737a..82c73b4 100644 --- a/logstash/pipeline/logstash.conf +++ b/logstash/pipeline/logstash.conf @@ -1,137 +1,99 @@ input { file { - path => "/home/watcher/watcher/logs/watcher*.log" + path => "/home/watcher/watcher/logs/watcher*.isis.log" id => "watcher" } } filter { if "changed" in [message] { - if "p2p" in [message] or "transit" in [message] { - # 2024-02-05T17:42:33Z,demo-watcher,1,metric,0100.1001.0003,changed,old_cost:10,new_cost:11,0100.1001.0002,p2p,0100.1001.0003, - # 2023-08-07T10:44:38Z,demo-watcher,1,metric,10.1.123.24,changed,old_cost:10,new_cost:777,10.1.123.24,transit,10.1.123.24_10.1.1.2_10.1.123.23,01Jan2023_00h00m00s_7_hosts + if "metric" in [message] { + # 2024-10-08T22:55:32Z,watcher1,1,metric,0200.1001.0004,changed,old_cost:5,new_cost:-1,0200.1001.0005,10Oct2024_00h00m00s_7_hosts,49.0002,12345 + dissect { - mapping => { "message" => "%{watcher_time},%{watcher_name},%{level_number},%{event_name},%{event_object},%{event_status},old_cost:%{old_cost},new_cost:%{new_cost},%{event_detected_by},%{subnet_type},%{shared_subnet_remote_neighbors_ids},%{graph_time}"} + mapping => { "message" => "%{watcher_time},%{watcher_name},%{level_number},%{event_name},%{event_object},%{event_status},old_cost:%{old_cost},new_cost:%{new_cost},%{event_detected_by},%{graph_time},%{area_num},%{asn}"} } - if [@metadata][mongo_id] { + if "-1" == [new_cost] { mutate { - update => { "[@metadata][elasticsearch_index]" => "watcher-costs-changes" - "[@metadata][mongo_id]" => "output_mongo_cost" - "[@metadata][mongo_collection_name]" => "cost_change" - "[@metadata][zabbix_host]" => "isis_link_cost_change" - "[@metadata][zabbix_server_host]" => "${ZABBIX_HOST:127.0.0.1}" - "[@metadata][z_object_item_name]" => "isis_link_cost_change" - "[@metadata][z_item_value]" => "IS-IS L%{level_number} link cost changed between:%{event_object}-%{shared_subnet_remote_neighbors_ids}, old:%{old_cost}, new:%{new_cost}, detected by:%{event_detected_by}" - "[@metadata][webhook_item_value]" => "IS-IS L%{level_number} link cost changed between:%{event_object}-%{shared_subnet_remote_neighbors_ids}, old:%{old_cost}, new:%{new_cost}, detected by:%{event_detected_by}" } } - } else { - mutate { - add_field => { "[@metadata][elasticsearch_index]" => "watcher-costs-changes" - "[@metadata][mongo_id]" => "output_mongo_cost" - "[@metadata][mongo_collection_name]" => "cost_change" - "[@metadata][zabbix_host]" => "isis_link_cost_change" - "[@metadata][zabbix_server_host]" => "${ZABBIX_HOST:127.0.0.1}" - "[@metadata][z_object_item_name]" => "isis_link_cost_change" - "[@metadata][z_item_value]" => "IS-IS L%{level_number} link cost changed between:%{event_object}-%{shared_subnet_remote_neighbors_ids}, old:%{old_cost}, new:%{new_cost}, detected by:%{event_detected_by}" - "[@metadata][webhook_item_value]" => "IS-IS L%{level_number} link cost changed between:%{event_object}-%{shared_subnet_remote_neighbors_ids}, old:%{old_cost}, new:%{new_cost}, detected by:%{event_detected_by}" } } - } - } - else { - # 2023-01-01T00:00:00Z,demo-watcher,1,metric,10.1.14.0/24,changed,old_cost:10,new_cost:123,10.1.1.4,stub,10.1.1.4,01Jan2023_00h00m00s_7_hosts - dissect { - mapping => { "message" => "%{watcher_time},%{watcher_name},%{level_number},%{event_name},%{event_object},%{event_status},old_cost:%{old_cost},new_cost:%{new_cost},%{event_detected_by},%{subnet_type},%{shared_subnet_remote_neighbors_ids},%{graph_time}"} - } - if [@metadata][mongo_id] { + add_field => { + "[@metadata][elasticsearch_index]" => "watcher-updown-events" + "[@metadata][mongo_id]" => "output_mongo_neighbors" + "[@metadata][mongo_collection_name]" => "adj_change" + "[@metadata][zabbix_host]" => "isis_neighbor_up_down" + "[@metadata][z_object_item_name]" => "isis_neighbor_up_down" + + "[@metadata][z_item_value]" => "IS-IS L%{level_number} down between %{event_object}-%{event_detected_by}, cost:%{new_cost}, detected by:%{event_detected_by}" + "[@metadata][webhook_item_value]" => "IS-IS L%{level_number} down between %{event_object}-%{event_detected_by}, cost:%{new_cost}, detected by:%{event_detected_by}" }} + } else if "-1" == [old_cost] { mutate { - update => { "[@metadata][elasticsearch_index]" => "watcher-costs-changes" - "[@metadata][mongo_id]" => "output_mongo_cost" - "[@metadata][mongo_collection_name]" => "cost_change" - "[@metadata][zabbix_host]" => "isis_network_cost_change" - "[@metadata][zabbix_server_host]" => "${ZABBIX_HOST:127.0.0.1}" - "[@metadata][z_object_item_name]" => "isis_stub_network_cost_change" - "[@metadata][z_item_value]" => "IS-IS L%{level_number} network cost changed:%{event_object}, old:%{old_cost}, new:%{new_cost}, detected by:%{event_detected_by}" - "[@metadata][webhook_item_value]" => "IS-IS L%{level_number} network cost changed:%{event_object}, old:%{old_cost}, new:%{new_cost}, detected by:%{event_detected_by}" } } + add_field => { + "[@metadata][elasticsearch_index]" => "watcher-updown-events" + "[@metadata][mongo_id]" => "output_mongo_neighbors" + "[@metadata][mongo_collection_name]" => "adj_change" + "[@metadata][zabbix_host]" => "isis_neighbor_up_down" + "[@metadata][z_object_item_name]" => "isis_neighbor_up_down" + + "[@metadata][z_item_value]" => "IS-IS L%{level_number} up between %{event_object}-%{event_detected_by}, cost:%{new_cost}, detected by:%{event_detected_by}" + "[@metadata][webhook_item_value]" => "IS-IS L%{level_number} up between %{event_object}-%{event_detected_by}, cost:%{new_cost}, detected by:%{event_detected_by}" }} } else { mutate { - add_field => { "[@metadata][elasticsearch_index]" => "watcher-costs-changes" - "[@metadata][mongo_id]" => "output_mongo_cost" - "[@metadata][mongo_collection_name]" => "cost_change" - "[@metadata][zabbix_host]" => "isis_network_cost_change" - "[@metadata][zabbix_server_host]" => "${ZABBIX_HOST:127.0.0.1}" - "[@metadata][z_object_item_name]" => "isis_stub_network_cost_change" - "[@metadata][z_item_value]" => "IS-IS L%{level_number} network cost changed:%{event_object}, old:%{old_cost}, new:%{new_cost}, detected by:%{event_detected_by}" - "[@metadata][webhook_item_value]" => "IS-IS L%{level_number} network cost changed:%{event_object}, old:%{old_cost}, new:%{new_cost}, detected by:%{event_detected_by}" } } + add_field => { + "[@metadata][elasticsearch_index]" => "watcher-costs-changes" + "[@metadata][mongo_id]" => "output_mongo_cost" + "[@metadata][mongo_collection_name]" => "cost_change" + "[@metadata][zabbix_host]" => "isis_link_cost_change" + "[@metadata][z_item_value]" => "IS-IS L%{level_number} link cost changed between:%{event_object}-%{event_detected_by}, old:%{old_cost}, new:%{new_cost}, detected by:%{event_detected_by}" + "[@metadata][webhook_item_value]" => "IS-IS L%{level_number} link cost changed between:%{event_object}-%{event_detected_by}, old:%{old_cost}, new:%{new_cost}, detected by:%{event_detected_by}" } } } } - } - else { - # Up/Down event for IS-IS neighbors and subnets - if "network" in [message] { - # Up/Down subnets + else if "network" in [message] { + # 2024-08-31T12:56:51Z,watcher1,1,network,10.10.10.1/32,changed,old_cost:10,new_cost:-1,0200.1025.0002,10Oct2024_00h00m00s_7_hosts,49.0002,12345,internal,0 dissect { - # 2023-01-01T00:00:00ZZ,demo-watcher,1,network,172.17.1.0/24,down,10.1.1.4,01Jan2023_00h00m00s_7_hosts - mapping => { "message" => "%{watcher_time},%{watcher_name},%{level_number},%{event_name},%{event_object},%{event_status},%{event_detected_by},%{graph_time}"} - } - if [@metadata][mongo_id] { - mutate { - update => { - "[@metadata][elasticsearch_index]" => "watcher-updown-events" - "[@metadata][mongo_id]" => "output_mongo_neighbors" - "[@metadata][mongo_collection_name]" => "adj_change" - "[@metadata][zabbix_host]" => "isis_network_up_down" - "[@metadata][zabbix_server_host]" => "${ZABBIX_HOST:127.0.0.1}" - "[@metadata][z_object_item_name]" => "isis_network_up_down" - "[@metadata][z_item_value]" => "IS-IS %{event_name}:%{event_object} %{event_status}, detected by:%{event_detected_by}" - "[@metadata][webhook_item_value]" => "IS-IS %{event_name}:%{event_object} %{event_status}, detected by:%{event_detected_by}" - } - } + mapping => { "message" => "%{watcher_time},%{watcher_name},%{level_number},%{event_name},%{event_object},%{event_status},old_cost:%{old_cost},new_cost:%{new_cost},%{event_detected_by},%{graph_time},%{area_num},%{asn},%{subnet_type},%{int_ext_subtype}"} } - else { + if "-1" == [new_cost] { mutate { add_field => { - "[@metadata][elasticsearch_index]" => "watcher-updown-events" - "[@metadata][mongo_id]" => "output_mongo_neighbors" - "[@metadata][mongo_collection_name]" => "adj_change" - "[@metadata][zabbix_host]" => "isis_network_up_down" - "[@metadata][zabbix_server_host]" => "${ZABBIX_HOST:127.0.0.1}" - "[@metadata][z_object_item_name]" => "isis_network_up_down" - "[@metadata][z_item_value]" => "IS-IS L%{level_number} %{event_name}:%{event_object} %{event_status}, detected by:%{event_detected_by}" - "[@metadata][webhook_item_value]" => "IS-IS L%{level_number} %{event_name}:%{event_object} %{event_status}, detected by:%{event_detected_by}" - } - } - } - } - else { - dissect { - # 2023-01-01T00:00:00ZZ,demo-watcher,1,host,10.1.1.2,down,10.1.1.4,01Jan2023_00h00m00s_7_hosts - mapping => { "message" => "%{watcher_time},%{watcher_name},%{level_number},%{event_name},%{event_object},%{event_status},%{event_detected_by},%{graph_time}"} - } - if [@metadata][mongo_id] { + "[@metadata][elasticsearch_index]" => "watcher-updown-events" + "[@metadata][mongo_id]" => "output_mongo_neighbors" + "[@metadata][mongo_collection_name]" => "adj_change" + "[@metadata][zabbix_host]" => "isis_network_up_down" + "[@metadata][z_object_item_name]" => "isis_network_up_down" + "[@metadata][z_item_value]" => "IS-IS L%{level_number} %{subnet_type} %{event_object} network down, cost:%{old_cost}, detected by:%{event_detected_by}" + "[@metadata][webhook_item_value]" => "IS-IS L%{level_number} %{subnet_type} %{event_object} network down, cost:%{old_cost}, detected by:%{event_detected_by}" } } + } else if "-1" == [old_cost] { mutate { - update => { - "[@metadata][elasticsearch_index]" => "watcher-updown-events" - "[@metadata][mongo_id]" => "output_mongo_neighbors" - "[@metadata][mongo_collection_name]" => "adj_change" - "[@metadata][zabbix_host]" => "isis_neighbor_up_down" - "[@metadata][zabbix_server_host]" => "${ZABBIX_HOST:127.0.0.1}" - "[@metadata][z_object_item_name]" => "isis_neighbor_up_down" - "[@metadata][z_item_value]" => "IS-IS %{event_name}:%{event_object} %{event_status}, detected by:%{event_detected_by}" - "[@metadata][webhook_item_value]" => "IS-IS %{event_name}:%{event_object} %{event_status}, detected by:%{event_detected_by}" - } - } + add_field => { + "[@metadata][elasticsearch_index]" => "watcher-updown-events" + "[@metadata][mongo_id]" => "output_mongo_neighbors" + "[@metadata][mongo_collection_name]" => "adj_change" + "[@metadata][zabbix_host]" => "isis_network_up_down" + "[@metadata][z_object_item_name]" => "isis_network_up_down" + "[@metadata][z_item_value]" => "IS-IS L%{level_number} %{subnet_type} %{event_object} network up, cost:%{new_cost}, detected by:%{event_detected_by}" + "[@metadata][webhook_item_value]" => "IS-IS L%{level_number} %{subnet_type} %{event_object} network up, cost:%{new_cost}, detected by:%{event_detected_by}" } } } else { mutate { - add_field => { - "[@metadata][elasticsearch_index]" => "watcher-updown-events" - "[@metadata][mongo_id]" => "output_mongo_neighbors" - "[@metadata][mongo_collection_name]" => "adj_change" - "[@metadata][zabbix_host]" => "isis_neighbor_up_down" - "[@metadata][zabbix_server_host]" => "${ZABBIX_HOST:127.0.0.1}" - "[@metadata][z_object_item_name]" => "isis_neighbor_up_down" - "[@metadata][z_item_value]" => "IS-IS L%{level_number} %{event_name}:%{event_object} %{event_status}, detected by:%{event_detected_by}" - "[@metadata][webhook_item_value]" => "IS-IS L%{level_number} %{event_name}:%{event_object} %{event_status}, detected by:%{event_detected_by}" - } - } - } + add_field => { + "[@metadata][elasticsearch_index]" => "watcher-costs-changes" + "[@metadata][mongo_id]" => "output_mongo_cost" + "[@metadata][mongo_collection_name]" => "cost_change" + "[@metadata][zabbix_host]" => "isis_network_cost_change" + "[@metadata][z_object_item_name]" => "isis_stub_network_cost_change" + "[@metadata][z_item_value]" => "IS-IS L%{level_number} %{subnet_type} network cost changed:%{event_object}, old:%{old_cost}, new:%{new_cost}, detected by:%{event_detected_by}" + "[@metadata][webhook_item_value]" => "IS-IS L%{level_number} %{subnet_type} network cost changed:%{event_object}, old:%{old_cost}, new:%{new_cost}, detected by:%{event_detected_by}" } } + } } + } else { + drop {} + } + # Up/Down event for OSPF neighbors and subnets covered in changed event section! + # 2024-10-08T22:47:39Z,watcher1,2,network,4ffe:10::5:0:0:d8/127,up,0200.1001.0005,,49.0002,12345 + # 2024-10-08T22:55:36Z,watcher1,2,host,0200.1001.0004,down,0200.1001.0005,,49.0002,12345 + date { + match => [ "watcher_time", "ISO8601" ] + target => "watcher_time_iso8601" + } + mutate { + remove_field => ["@version", "message"] } } output { @@ -143,6 +105,8 @@ output { } } if "${EXPORT_TO_MONGO_BOOL:False}" == "True" { + # If EXPORT_TO_MONGO_BOOL is False - it doesn't block the pipeline # + # If EXPORT_TO_MONGO_BOOL is True and MongoDB is not available - block the pipeline # mongodb { id => "%{[@metadata][mongo_id]}" collection => "%{[@metadata][mongo_collection_name]}" diff --git a/watcher/watcher-template/config.yml b/watcher/watcher-template/config.yml index f1ad25c..fe1782b 100644 --- a/watcher/watcher-template/config.yml +++ b/watcher/watcher-template/config.yml @@ -2,21 +2,20 @@ name: watcher2-tun1025 prefix: __lab-name topology: + defaults: + env-files: + - ../../.env nodes: isis-watcher: kind: linux image: vadims06/isis-watcher:latest network-mode: container:router - startup-delay: 10 + startup-delay: 60 env: WATCHER_LOGFILE: "/home/watcher/watcher/logs/watcher.log" # Watcher -> Export. default is local dir logs/watcher.log ISISD_DUMP_FILE_DIR: "/var/log/frr/isisd.log" - TEST_MODE: "False" - FRR_HOST: 127.0.0.1 - FRR_PORT: "2608" binds: - - router/isisd.log:/var/log/frr/isisd.log # FRR -> Watcher - # - ../logs/watcher1-gre1011.log:/home/watcher/watcher/logs/watcher.log # Watcher -> Export + - ../logs/:/home/watcher/watcher/logs/ stages: create: wait-for: @@ -26,7 +25,6 @@ topology: kind: linux image: vadims06/frr:v8.5.4_isis_over_gre # quay.io/frrouting/frr:9.1.0, 9.0.0, 9.0.1, 9.0.2, 9.1.0 IS-IS works over GRE, but doesn't dump LSP https://github.com/FRRouting/frr/issues/15654 for tracking. IS-IS doesn't work over GRE 8.5.4. binds: - - router/isisd.log:/var/log/frr/isisd.log - router/daemons:/etc/frr/daemons - router/frr.conf:/etc/frr/frr.conf - router/vtysh.conf:/etc/frr/vtysh.conf