From 1ce8ac424695339036163745cae2f1951c8d686c Mon Sep 17 00:00:00 2001 From: Nathan Gotz <775979+nlgotz@users.noreply.github.com> Date: Tue, 12 Mar 2024 10:14:43 -0500 Subject: [PATCH 1/2] feat: Add Global Cloud XChange Parser (#274) * feat: Add Global Cloud XChange Parser * chore: fix test failures * fix: add parser tests for global cloud xchange * fix: use Impact Enum --- README.md | 1 + circuit_maintenance_parser/__init__.py | 19 ++-- .../parsers/globalcloudxchange.py | 86 +++++++++++++++++++ circuit_maintenance_parser/provider.py | 34 +++++--- .../globalcloudxchange1.eml | 65 ++++++++++++++ ...lobalcloudxchange1_html_parser_result.json | 11 +++ .../globalcloudxchange1_result.json | 17 ++++ .../globalcloudxchange1_subject.eml | 1 + ...alcloudxchange1_subject_parser_result.json | 9 ++ tests/unit/test_e2e.py | 30 +++++-- tests/unit/test_parsers.py | 18 +++- 11 files changed, 259 insertions(+), 32 deletions(-) create mode 100644 circuit_maintenance_parser/parsers/globalcloudxchange.py create mode 100644 tests/unit/data/globalcloudxchange/globalcloudxchange1.eml create mode 100644 tests/unit/data/globalcloudxchange/globalcloudxchange1_html_parser_result.json create mode 100644 tests/unit/data/globalcloudxchange/globalcloudxchange1_result.json create mode 100644 tests/unit/data/globalcloudxchange/globalcloudxchange1_subject.eml create mode 100644 tests/unit/data/globalcloudxchange/globalcloudxchange1_subject_parser_result.json diff --git a/README.md b/README.md index e9cb7249..0cc79400 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,7 @@ By default, there is a `GenericProvider` that supports a `SimpleProcessor` using - Equinix - EXA (formerly GTT) - HGC +- Global Cloud Xchange - Google - Lumen - Megaport diff --git a/circuit_maintenance_parser/__init__.py b/circuit_maintenance_parser/__init__.py index f0a73af7..de1b4b9f 100644 --- a/circuit_maintenance_parser/__init__.py +++ b/circuit_maintenance_parser/__init__.py @@ -1,28 +1,30 @@ """Circuit-maintenance-parser init.""" -from typing import Type, Optional + +from typing import Optional, Type from .data import NotificationData -from .output import Maintenance from .errors import NonexistentProviderError, ProviderError +from .output import Maintenance from .provider import ( - GenericProvider, - AquaComms, - Arelion, AWS, BSO, + GTT, + HGC, + NTT, + AquaComms, + Arelion, Cogent, Colt, CrownCastle, Equinix, EUNetworks, - GTT, + GenericProvider, + GlobalCloudXchange, Google, - HGC, Lumen, Megaport, Momentum, Netflix, - NTT, PacketFabric, Seaborn, Sparkle, @@ -44,6 +46,7 @@ CrownCastle, Equinix, EUNetworks, + GlobalCloudXchange, Google, GTT, HGC, diff --git a/circuit_maintenance_parser/parsers/globalcloudxchange.py b/circuit_maintenance_parser/parsers/globalcloudxchange.py new file mode 100644 index 00000000..c768f225 --- /dev/null +++ b/circuit_maintenance_parser/parsers/globalcloudxchange.py @@ -0,0 +1,86 @@ +"""Circuit Maintenance Parser for Equinix Email Notifications.""" + +import re +from datetime import datetime +from typing import Any, Dict, List + +from bs4.element import ResultSet # type: ignore + +from circuit_maintenance_parser.output import Impact +from circuit_maintenance_parser.parser import EmailSubjectParser, Html, Status + + +class HtmlParserGcx1(Html): + """Custom Parser for HTML portion of Global Cloud Xchange circuit maintenance notifications.""" + + def parse_html(self, soup: ResultSet) -> List[Dict]: + """Parse an Global Cloud Xchange circuit maintenance email. + + Args: + soup (ResultSet): beautiful soup object containing the html portion of an email. + + Returns: + Dict: The data dict containing circuit maintenance data. + """ + data: Dict[str, Any] = {"circuits": []} + + for div in soup.find_all("div"): + for pstring in div.strings: + search = re.search("Dear (.*),", pstring) + if search: + data["account"] = search.group(1) + + # Find Circuits + for table in soup.find_all("table"): + for row in table.find_all("tr"): + cols = row.find_all("td") + if len(cols) == 2 and "Service ID" not in cols[0].text: + impact = Impact.OUTAGE + if "at risk" in cols[1].text.lower(): + impact = Impact.REDUCED_REDUNDANCY + + data["circuits"].append({"circuit_id": cols[0].text, "impact": impact}) + + return [data] + + +class SubjectParserGcx1(EmailSubjectParser): + """Parse the subject of a Global Cloud Xchange circuit maintenance email. The subject contains the maintenance ID and status.""" + + def parse_subject(self, subject: str) -> List[Dict]: + """Parse the Global Cloud Xchange Email subject for summary and status. + + Args: + subject (str): subject of email + e.g. 'PE2024020844407 | Emergency | Service Advisory Notice | Span Loss Rectification | 12-Feb-2024 09:00 (GMT) - 12-Feb-2024 17:00 (GMT)'. + + + Returns: + List[Dict]: Returns the data object with summary and status fields. + """ + data = {} + search = re.search( + r"^([A-Z0-9]+) \| (\w+) \| ([\w\s]+) \| ([\w\s]+) \| (\d+-[A-Za-z]{3}-\d{4} \d{2}:\d{2}) \(GMT\) - (\d+-[A-Za-z]{3}-\d{4} \d{2}:\d{2}) \(GMT\)$", + subject, + ) + if search: + data["maintenance_id"] = search.group(1) + date_format = date_format = "%d-%b-%Y %H:%M" + data["start"] = self.dt2ts(datetime.strptime(search.group(5), date_format)) + data["end"] = self.dt2ts(datetime.strptime(search.group(6), date_format)) + data["summary"] = search.group(4) + + if "completed" in subject.lower(): + data["status"] = Status.COMPLETED + elif "rescheduled" in subject.lower(): + data["status"] = Status.RE_SCHEDULED + elif "scheduled" in subject.lower() or "reminder" in subject.lower() or "notice" in subject.lower(): + data["status"] = Status.CONFIRMED + elif "cancelled" in subject.lower(): + data["status"] = Status.CANCELLED + else: + # Some Global Cloud Xchange notifications don't clearly state a status in their subject. + # From inspection of examples, it looks like "Confirmed" would be the most appropriate in this case. + data["status"] = Status.CONFIRMED + + return [data] diff --git a/circuit_maintenance_parser/provider.py b/circuit_maintenance_parser/provider.py index ed1877d4..df6ef7f2 100644 --- a/circuit_maintenance_parser/provider.py +++ b/circuit_maintenance_parser/provider.py @@ -1,37 +1,35 @@ """Definition of Provider class as the entry point to the library.""" + import logging import os import re import traceback +from typing import Dict, Iterable, List -from typing import Iterable, List, Dict import chardet - from pydantic import BaseModel, PrivateAttr -from circuit_maintenance_parser.utils import rgetattr - -from circuit_maintenance_parser.output import Maintenance +from circuit_maintenance_parser.constants import EMAIL_HEADER_SUBJECT from circuit_maintenance_parser.data import NotificationData -from circuit_maintenance_parser.parser import ICal, EmailDateParser from circuit_maintenance_parser.errors import ProcessorError, ProviderError -from circuit_maintenance_parser.processor import CombinedProcessor, SimpleProcessor, GenericProcessor -from circuit_maintenance_parser.constants import EMAIL_HEADER_SUBJECT - +from circuit_maintenance_parser.output import Maintenance +from circuit_maintenance_parser.parser import EmailDateParser, ICal from circuit_maintenance_parser.parsers.aquacomms import HtmlParserAquaComms1, SubjectParserAquaComms1 from circuit_maintenance_parser.parsers.aws import SubjectParserAWS1, TextParserAWS1 from circuit_maintenance_parser.parsers.bso import HtmlParserBSO1 -from circuit_maintenance_parser.parsers.cogent import HtmlParserCogent1, TextParserCogent1, SubjectParserCogent1 +from circuit_maintenance_parser.parsers.cogent import HtmlParserCogent1, SubjectParserCogent1, TextParserCogent1 from circuit_maintenance_parser.parsers.colt import CsvParserColt1, SubjectParserColt1, SubjectParserColt2 from circuit_maintenance_parser.parsers.crowncastle import HtmlParserCrownCastle1 from circuit_maintenance_parser.parsers.equinix import HtmlParserEquinix, SubjectParserEquinix -from circuit_maintenance_parser.parsers.gtt import HtmlParserGTT1 +from circuit_maintenance_parser.parsers.globalcloudxchange import HtmlParserGcx1, SubjectParserGcx1 from circuit_maintenance_parser.parsers.google import HtmlParserGoogle1 +from circuit_maintenance_parser.parsers.gtt import HtmlParserGTT1 from circuit_maintenance_parser.parsers.hgc import HtmlParserHGC1, HtmlParserHGC2, SubjectParserHGC1 from circuit_maintenance_parser.parsers.lumen import HtmlParserLumen1 from circuit_maintenance_parser.parsers.megaport import HtmlParserMegaport1 from circuit_maintenance_parser.parsers.momentum import HtmlParserMomentum1, SubjectParserMomentum1 from circuit_maintenance_parser.parsers.netflix import TextParserNetflix1 +from circuit_maintenance_parser.parsers.openai import OpenAIParser from circuit_maintenance_parser.parsers.seaborn import ( HtmlParserSeaborn1, HtmlParserSeaborn2, @@ -43,7 +41,8 @@ from circuit_maintenance_parser.parsers.turkcell import HtmlParserTurkcell1 from circuit_maintenance_parser.parsers.verizon import HtmlParserVerizon1 from circuit_maintenance_parser.parsers.zayo import HtmlParserZayo1, SubjectParserZayo1 -from circuit_maintenance_parser.parsers.openai import OpenAIParser +from circuit_maintenance_parser.processor import CombinedProcessor, GenericProcessor, SimpleProcessor +from circuit_maintenance_parser.utils import rgetattr logger = logging.getLogger(__name__) @@ -282,6 +281,17 @@ class EUNetworks(GenericProvider): _default_organizer = "noc@eunetworks.com" +class GlobalCloudXchange(GenericProvider): + """Global Cloud Xchange provider custom class.""" + + _processors: List[GenericProcessor] = PrivateAttr( + [ + CombinedProcessor(data_parsers=[EmailDateParser, SubjectParserGcx1, HtmlParserGcx1]), + ] + ) + _default_organizer = PrivateAttr("Gnoc@globalcloudxchange.com") + + class Google(GenericProvider): """Google provider custom class.""" diff --git a/tests/unit/data/globalcloudxchange/globalcloudxchange1.eml b/tests/unit/data/globalcloudxchange/globalcloudxchange1.eml new file mode 100644 index 00000000..f2142910 --- /dev/null +++ b/tests/unit/data/globalcloudxchange/globalcloudxchange1.eml @@ -0,0 +1,65 @@ +MIME-Version: 1.0 +From: Gnoc@globalcloudxchange.com +To: email@examplecompany.com +Reply-To: change@gcxworld.com +Date: 8 Feb 2024 17:09:33 +0000 +Subject: PE2024020844407 | Emergency | Service Advisory Notice | + Span Loss Rectification | 12-Feb-2024 09:00 (GMT) - 12-Feb-2024 17:00 (GMT) +Content-Type: text/html; charset="UTF-8" +Content-Transfer-Encoding: quoted-printable + + + +
Service ID | Impact |
RGWLS31171 | At Risk |