Merge pull request #223 from networktocode/Release_v2.2.3

Release v2.2.3
networktocode · Mar 21, 2023 · ab0782c · ab0782c
2 parents 5713a77 + 228600a
commit ab0782c
Show file tree

Hide file tree

Showing 24 changed files with 1,764 additions and 78 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Changelog
 
+## v2.2.3 - 2023-03-21
+
+### Changed
+
+- #216 - Allow Lumen maintenance multiple windows to be parsed
+- #212 - Updated documentation: Contribution section
+- #210 - Ability to parse multiple maintenance windows from Zayo
+- #190 - Update Telstra for new notificaiton format
+
+### Fixed
+
+- #222 - Fix e22 tests when combining data from multiple maintenances
+
 ## v2.2.2 - 2023-01-27
 
 ### Changed

diff --git a/README.md b/README.md
@@ -312,6 +312,63 @@ The project is following Network to Code software development guidelines and is
    - The `Provider` also supports the definition of a `_include_filter` and a `_exclude_filter` to limit the notifications that are actually processed, avoiding false positive errors for notification that are not relevant.
 4. Update the `unit/test_e2e.py` with the new provider, providing some data to test and validate the final `Maintenances` created.
 5. **Expose the new `Provider` class** updating the map `SUPPORTED_PROVIDERS` in `circuit_maintenance_parser/__init__.py` to officially expose the `Provider`.
+6. You can run some tests here to verify that your new unit tests do not cause issues with existing tests, and in general they work as expected. You can do this by running `pytest --log-cli-level=DEBUG --capture=tee-sys`. You can narrow down the tests that you want to execute with the `-k` flag. If successful, your results should look similar to the following:
+
+```
+-> % pytest --log-cli-level=DEBUG --capture=tee-sys -k test_parsers
+...omitted debug logs...
+====================================================== 99 passed, 174 deselected, 17 warnings in 10.35s ======================================================
+```
+7. Run some final CI tests locally to ensure that there is no linting/formatting issues with your changes. You should look to get a code score of 10/10. See the example below: `invoke tests --local`
+
+```
+-> % invoke tests --local
+LOCAL - Running command black --check --diff .
+All done! ✨ 🍰 ✨
+41 files would be left unchanged.
+LOCAL - Running command flake8 .
+LOCAL - Running command find . -name "*.py" | xargs pylint
+************* Module tasks
+tasks.py:4:0: W0402: Uses of a deprecated module 'distutils.util' (deprecated-module)
+
+--------------------------------------------------------------------
+Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
+```
+
+### How to debug circuit-maintenance-parser library locally
+
+1. `poetry install` updates the library and its dependencies locally.
+2. `circuit-maintenance-parser` is now built with your recent local changes.
+
+If you were to add loggers or debuggers to one of the classes:
+
+```python
+class HtmlParserZayo1(Html):
+    def parse_bs(self, btags: ResultSet, data: dict):
+        """Parse B tag."""
+        raise Exception('Debugging exception')
+```
+
+After running `poetry install`:
+
+```
+-> % circuit-maintenance-parser --data-file ~/Downloads/zayo.eml --data-type email --provider-type zayo
+Provider processing failed: Failed creating Maintenance notification for Zayo.
+Details:
+- Processor CombinedProcessor from Zayo failed due to: Debugging exception
+```
+
+> Note: `invoke build` will result in an error due to no Dockerfile. This is expected as the library runs simple pytest testing without a container.
+
+```
+-> % invoke build
+Building image circuit-maintenance-parser:2.2.2-py3.8
+#1 [internal] load build definition from Dockerfile
+#1 transferring dockerfile: 2B done
+#1 DONE 0.0s
+WARNING: failed to get git remote url: fatal: No remote configured to list refs from.
+ERROR: failed to solve: rpc error: code = Unknown desc = failed to solve with frontend dockerfile.v0: failed to read dockerfile: open /var/lib/docker/tmp/buildkit-mount1243547759/Dockerfile: no such file or directory
+```
 
 ## Questions
 

diff --git a/circuit_maintenance_parser/parsers/lumen.py b/circuit_maintenance_parser/parsers/lumen.py
@@ -2,6 +2,7 @@
 import logging
 from typing import Dict
 
+from copy import deepcopy
 from dateutil import parser
 import bs4  # type: ignore
 from bs4.element import ResultSet  # type: ignore
@@ -19,10 +20,22 @@ class HtmlParserLumen1(Html):
 
     def parse_html(self, soup):
         """Execute parsing."""
+        maintenances = []
         data = {}
         self.parse_spans(soup.find_all("span"), data)
         self.parse_tables(soup.find_all("table"), data)
-        return [data]
+
+        # Iterates over multiple windows and duplicates other maintenance info to a new dictionary while also updating start and end times for the specific window.
+        for window in data["windows"]:
+            maintenance = deepcopy(data)
+            maintenance["start"], maintenance["end"] = window
+            del maintenance["windows"]
+            maintenances.append(maintenance)
+
+        # Deleting the key after we are finished checking for multiple windows and duplicating data.
+        del data["windows"]
+
+        return maintenances
 
     def parse_spans(self, spans: ResultSet, data: Dict):
         """Parse Span tag."""
@@ -56,8 +69,11 @@ def parse_spans(self, spans: ResultSet, data: Dict):
                                 data["stamp"] = self.dt2ts(stamp)
                             break
 
-    def parse_tables(self, tables: ResultSet, data: Dict):
+    def parse_tables(self, tables: ResultSet, data: Dict):  # pylint: disable=too-many-locals
         """Parse Table tag."""
+        # Initialise multiple windows list that will be used in parse_html
+        data["windows"] = []
+
         circuits = []
         for table in tables:
             cells = table.find_all("td")
@@ -68,9 +84,10 @@ def parse_tables(self, tables: ResultSet, data: Dict):
                 for idx in range(num_columns, len(cells), num_columns):
                     if "GMT" in cells[idx].string and "GMT" in cells[idx + 1].string:
                         start = parser.parse(cells[idx].string.split(" GMT")[0])
-                        data["start"] = self.dt2ts(start)
+                        start_ts = self.dt2ts(start)
                         end = parser.parse(cells[idx + 1].string.split(" GMT")[0])
-                        data["end"] = self.dt2ts(end)
+                        end_ts = self.dt2ts(end)
+                        data["windows"].append((start_ts, end_ts))
                         break
 
             elif cells[0].string == "Customer Name":

diff --git a/circuit_maintenance_parser/parsers/telstra.py b/circuit_maintenance_parser/parsers/telstra.py
@@ -1,12 +1,13 @@
 """Telstra parser."""
 import logging
 from typing import Dict, List
-
+import re
 from dateutil import parser
 from bs4.element import ResultSet  # type: ignore
 
 from circuit_maintenance_parser.parser import Html, Impact, CircuitImpact, Status
 
+
 # pylint: disable=too-many-branches
 
 
@@ -73,3 +74,91 @@ def parse_tables(self, tables: ResultSet, data: Dict):  # pylint: disable=too-ma
                         # First sentence containts 'Maintenance Details:' so we skip it
                         data["summary"] = ". ".join(sentences[1:])
             break
+
+
+class HtmlParserTelstra2(Html):
+    """Notifications Parser for Telstra notifications."""
+
+    def parse_html(self, soup):
+        """Execute parsing."""
+        data = {}
+        self.parse_tables(soup.find_all("table"), data)
+        return [data]
+
+    def add_maintenance_data(self, table: ResultSet, data: Dict):
+        """Populate data dict."""
+        for strong_element in table.find_all("strong"):
+            if not strong_element.string:
+                continue
+            strong_text = strong_element.string.strip()
+            strong_sibling = strong_element.next_sibling.next_sibling
+            if strong_text == "Reference number":
+                data["maintenance_id"] = strong_sibling.string.strip()
+            elif strong_text == "Start time":
+                text_start = strong_sibling.string
+                regex = re.search(r"\d{2}\s[a-zA-Z]{3}\s\d{4}\s\d{2}[:]\d{2}[:]\d{2}", text_start)
+                if regex is not None:
+                    start = parser.parse(regex.group())
+                    data["start"] = self.dt2ts(start)
+                else:
+                    data["start"] = "Not defined"
+            elif strong_text == "End time":
+                text_end = strong_sibling.string
+                regex = re.search(r"\d{2}\s[a-zA-Z]{3}\s\d{4}\s\d{2}[:]\d{2}[:]\d{2}", text_end)
+                if regex is not None:
+                    end = parser.parse(regex.group())
+                    data["end"] = self.dt2ts(end)
+                else:
+                    data["end"] = "is not defined"
+            elif strong_text == "Service/s under maintenance":
+                data["circuits"] = []
+                # TODO: This split is just an assumption of the multiple service, to be checked with more samples
+                impacted_circuits = strong_sibling.text.split(", ")
+                for circuit_id in impacted_circuits:
+                    data["circuits"].append(CircuitImpact(impact=Impact("OUTAGE"), circuit_id=circuit_id.strip()))
+            elif strong_text == "Maintenance details":
+                sentences: List[str] = []
+                for element in strong_element.next_elements:
+                    if element.string == "Reference number":
+                        break
+                    if element.string and element.string not in ["\n", "", "\xa0"] + sentences:
+                        sentences.append(element.string)
+                if sentences:
+                    # First sentence containts 'Maintenance Details' so we skip it
+                    data["summary"] = ". ".join(sentences[1:])
+
+    def parse_tables(self, tables: ResultSet, data: Dict):  # pylint: disable=too-many-locals
+        """Parse Table tag."""
+        for table in tables:
+            for p_element in table.find_all("p"):
+                # TODO: We should find a more consistent way to parse the status of a maintenance note
+                p_text = p_element.text.lower()
+                if "attention" in p_text:
+                    regex = re.search("[^attention ].*", p_text.strip())
+                    if regex is not None:
+                        data["account"] = regex.group()
+                    else:
+                        data["account"] = "not Found"
+            for span_element in table.find_all("span"):
+                span_text = span_element.text.lower()
+                if "planned maintenance to our network infrastructure" in span_text:
+                    data["status"] = Status("CONFIRMED")
+                elif "emergency maintenance to our network infrastructure" in span_text:
+                    data["status"] = Status("CONFIRMED")
+                elif "has been rescheduled" in span_text:
+                    data["status"] = Status("RE-SCHEDULED")
+                elif "has been completed successfully" in span_text:
+                    data["status"] = Status("COMPLETED")
+                elif (
+                    "did not proceed" in span_text
+                    or "has been withdrawn" in span_text
+                    or "has been cancelled" in span_text
+                ):
+                    data["status"] = Status("CANCELLED")
+                elif "was unsuccessful" in span_text:
+                    data["status"] = Status("CANCELLED")
+                else:
+                    continue
+                break
+            self.add_maintenance_data(table, data)
+            break
diff --git a/circuit_maintenance_parser/parsers/zayo.py b/circuit_maintenance_parser/parsers/zayo.py
@@ -1,6 +1,7 @@
 """Zayo parser."""
 import logging
 import re
+from copy import deepcopy
 from typing import Dict
 
 import bs4  # type: ignore
@@ -44,21 +45,30 @@ class HtmlParserZayo1(Html):
 
     def parse_html(self, soup):
         """Execute parsing."""
+        maintenances = []
         data = {}
         self.parse_bs(soup.find_all("b"), data)
         self.parse_tables(soup.find_all("table"), data)
 
-        if data:
-            if "status" not in data:
-                text = soup.get_text()
-                if "will be commencing momentarily" in text:
-                    data["status"] = Status("IN-PROCESS")
-                elif "has been completed" in text or "has closed" in text:
-                    data["status"] = Status("COMPLETED")
-                elif "has rescheduled" in text:
-                    data["status"] = Status("RE-SCHEDULED")
+        if not data:
+            return [{}]
 
-        return [data]
+        if "status" not in data:
+            text = soup.get_text()
+            if "will be commencing momentarily" in text:
+                data["status"] = Status("IN-PROCESS")
+            elif "has been completed" in text or "has closed" in text:
+                data["status"] = Status("COMPLETED")
+            elif "has rescheduled" in text:
+                data["status"] = Status("RE-SCHEDULED")
+
+        for maintenance_window in data.get("windows", []):
+            maintenance = deepcopy(data)
+            maintenance["start"], maintenance["end"] = maintenance_window
+            del maintenance["windows"]
+            maintenances.append(maintenance)
+
+        return maintenances
 
     def parse_bs(self, btags: ResultSet, data: dict):
         """Parse B tag."""
@@ -71,41 +81,23 @@ def parse_bs(self, btags: ResultSet, data: dict):
                         data["status"] = Status("CONFIRMED")
                     elif "has cancelled" in line.text.lower():
                         data["status"] = Status("CANCELLED")
-                # Some Zayo notifications may include multiple activity dates.
-                # For lack of a better way to handle this, we consolidate these into a single extended activity range.
-                #
-                # For example, given:
-                #
-                # 1st Activity Date
-                # 01-Nov-2021 00:01 to 01-Nov-2021 05:00 ( Mountain )
-                # 01-Nov-2021 06:01 to 01-Nov-2021 11:00 ( GMT )
-                #
-                # 2nd Activity Date
-                # 02-Nov-2021 00:01 to 02-Nov-2021 05:00 ( Mountain )
-                # 02-Nov-2021 06:01 to 02-Nov-2021 11:00 ( GMT )
-                #
-                # 3rd Activity Date
-                # 03-Nov-2021 00:01 to 03-Nov-2021 05:00 ( Mountain )
-                # 03-Nov-2021 06:01 to 03-Nov-2021 11:00 ( GMT )
-                #
-                # our end result would be (start: "01-Nov-2021 06:01", end: "03-Nov-2021 11:00")
                 elif "activity date" in line.text.lower():
                     logger.info("Found 'activity date': %s", line.text)
+
+                    if "windows" not in data:
+                        data["windows"] = []
+
                     for sibling in line.next_siblings:
                         text = sibling.text if isinstance(sibling, bs4.element.Tag) else sibling
                         logger.debug("Checking for GMT date/timestamp in sibling: %s", text)
+
                         if "( GMT )" in text:
                             window = self.clean_line(sibling).strip("( GMT )").split(" to ")
                             start = parser.parse(window.pop(0))
-                            start_ts = self.dt2ts(start)
-                            # Keep the earliest of any listed start times
-                            if "start" not in data or data["start"] > start_ts:
-                                data["start"] = start_ts
                             end = parser.parse(window.pop(0))
+                            start_ts = self.dt2ts(start)
                             end_ts = self.dt2ts(end)
-                            # Keep the latest of any listed end times
-                            if "end" not in data or data["end"] < end_ts:
-                                data["end"] = end_ts
+                            data["windows"].append((start_ts, end_ts))
                             break
                 elif line.text.lower().strip().startswith("reason for maintenance:"):
                     data["summary"] = self.clean_line(line.next_sibling)
@@ -148,13 +140,15 @@ def parse_tables(self, tables: ResultSet, data: Dict):
                     "Customer Circuit ID",
                 ],
             )
+
             if all(table_headers != expected_headers for expected_headers in expected_headers_ref):
                 logger.warning("Table headers are not as expected: %s", head_row)
                 continue
 
             data_rows = table.find_all("td")
             if len(data_rows) % 5 != 0:
                 raise AssertionError("Table format is not correct")
+
             number_of_circuits = int(len(data_rows) / 5)
             for idx in range(number_of_circuits):
                 data_circuit = {}
@@ -165,5 +159,6 @@ def parse_tables(self, tables: ResultSet, data: Dict):
                 elif "no expected impact" in impact.lower():
                     data_circuit["impact"] = Impact("NO-IMPACT")
                 circuits.append(CircuitImpact(**data_circuit))
+
         if circuits:
             data["circuits"] = circuits
diff --git a/circuit_maintenance_parser/provider.py b/circuit_maintenance_parser/provider.py
@@ -35,7 +35,7 @@
     SubjectParserSeaborn2,
 )
 from circuit_maintenance_parser.parsers.sparkle import HtmlParserSparkle1
-from circuit_maintenance_parser.parsers.telstra import HtmlParserTelstra1
+from circuit_maintenance_parser.parsers.telstra import HtmlParserTelstra1, HtmlParserTelstra2
 from circuit_maintenance_parser.parsers.turkcell import HtmlParserTurkcell1
 from circuit_maintenance_parser.parsers.verizon import HtmlParserVerizon1
 from circuit_maintenance_parser.parsers.zayo import HtmlParserZayo1, SubjectParserZayo1
@@ -330,6 +330,7 @@ class Telstra(GenericProvider):
 
     _processors: List[GenericProcessor] = [
         SimpleProcessor(data_parsers=[ICal]),
+        CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserTelstra2]),
         CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserTelstra1]),
     ]
     _default_organizer = "[email protected]"

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "circuit-maintenance-parser"
-version = "2.2.2"
+version = "2.2.3"
 description = "Python library to parse Circuit Maintenance notifications and return a structured data back"
 authors = ["Network to Code <[email protected]>"]
 license = "Apache-2.0"