Ianhelle/warning fixes 2024 02 11 (#752)

* Updates for new AML defaults * AML Tools updates: - removing unneeded functionality - adding warnings for unsupported kernel types * Fixing aml_tools tests for Python version * Adding method to check auth_methods in aml_tools * Fix to prevent queryprovider trying to load non-query yaml * Adding fix for handling of random yaml files reads as query files. * Fix if Azure.auth_methods key does not exist * Creating mypy warning suppressions and fixes. * Two more mypy issues in vtlookupv3 and proc_tree_build_mde
microsoft · Feb 12, 2024 · d1c0912 · d1c0912
1 parent 63f96f3
commit d1c0912
Show file tree

Hide file tree

Showing 67 changed files with 317 additions and 212 deletions.
diff --git a/msticpy/_version.py b/msticpy/_version.py
@@ -1,2 +1,3 @@
 """Version file."""
-VERSION = "2.9.0"
+
+VERSION = "2.10.0"
diff --git a/msticpy/analysis/anomalous_sequence/anomalous.py b/msticpy/analysis/anomalous_sequence/anomalous.py
@@ -60,7 +60,7 @@ def score_sessions(
         raise MsticpyException(f'"{session_column}" should be a column in the `data`')
 
     sessions_df = data.copy()
-    sessions = sessions_df[session_column].values.tolist()
+    sessions = sessions_df[session_column].values.tolist()  # type: ignore
 
     model = Model(sessions=sessions)
     model.train()

diff --git a/msticpy/analysis/anomalous_sequence/sessionize.py b/msticpy/analysis/anomalous_sequence/sessionize.py
@@ -11,6 +11,8 @@
 import pandas as pd
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 
+# mypy: ignore-errors
+
 
 def sessionize_data(
     data: pd.DataFrame,

diff --git a/msticpy/analysis/eventcluster.py b/msticpy/analysis/eventcluster.py
@@ -688,7 +688,7 @@ def plot_cluster(  # noqa: C901, MC0001
 
     # pylint: disable=no-member
     # Spectral color map does exist
-    colors = [cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
+    colors = [cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]  # type: ignore
     # Number of clusters in labels, ignoring noise if present.
     n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
     n_noise_ = list(labels).count(-1)
@@ -747,8 +747,8 @@ def plot_cluster(  # noqa: C901, MC0001
                 except IndexError:
                     pass
 
-    plt.xlabel(xlabel)
-    plt.ylabel(ylabel)
+    plt.xlabel(xlabel)  # type: ignore
+    plt.ylabel(ylabel)  # type: ignore
     plt.title(f"Estimated number of clusters: {n_clusters_}")
     plt.show()
     return plt
diff --git a/msticpy/analysis/outliers.py b/msticpy/analysis/outliers.py
@@ -126,7 +126,7 @@ def plot_outlier_results(
 
     plt.title(plt_title)
     # pylint: disable=no-member
-    plt.contourf(xx, yy, z, cmap=plt.cm.Blues_r)
+    plt.contourf(xx, yy, z, cmap=plt.cm.Blues_r)  # type: ignore
 
     b1 = plt.scatter(x[:, 0], x[:, 1], c="white", s=20, edgecolor="k")
     b2 = plt.scatter(x_predict[:, 0], x_predict[:, 1], c="green", s=40, edgecolor="k")
@@ -142,8 +142,8 @@ def plot_outlier_results(
 
     plt.xlim((xp_min_x, xp_max_x))
     plt.ylim((xp_min_y, xp_max_y))
-    plt.xlabel(feature_columns[0])
-    plt.ylabel(feature_columns[1])
+    plt.xlabel(feature_columns[0])  # type: ignore
+    plt.ylabel(feature_columns[1])  # type: ignore
 
     plt.legend(
         [b1, b2, c],
@@ -178,7 +178,7 @@ def remove_common_items(data: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
     # pylint: disable=cell-var-from-loop
     for col in columns:
         filtered_df = filtered_df.filter(
-            lambda x: (x[col].std() == 0 and x[col].count() > 10)
+            lambda x: (x[col].std() == 0 and x[col].count() > 10)  # type: ignore
         )
 
     return filtered_df
diff --git a/msticpy/analysis/polling_detection.py b/msticpy/analysis/polling_detection.py
@@ -202,15 +202,15 @@ def detect_polling(
         end = max(ts_col)
 
         if not groupby:
-            p_value, freq, interval = self._detect_polling_arr(ts_col, start, end)
+            p_value, freq, interval = self._detect_polling_arr(ts_col, start, end)  # type: ignore
 
             self.data["p_value"] = p_value
             self.data["dominant_frequency"] = freq
             self.data["dominant_interval"] = interval
         else:
             grouped_results = self.data.groupby(groupby).apply(
-                lambda x: self._detect_polling_arr(
-                    x[time_column], min(x[time_column]), max(x[time_column])
+                lambda x: self._detect_polling_arr(  # type: ignore
+                    x[time_column], min(x[time_column]), max(x[time_column])  # type: ignore
                 )
             )
 

diff --git a/msticpy/analysis/syslog_utils.py b/msticpy/analysis/syslog_utils.py
@@ -58,7 +58,7 @@ def create_host_record(
         Details of the host data collected
 
     """
-    host_entity = Host(src_event=syslog_df.iloc[0])
+    host_entity = Host(src_event=syslog_df.iloc[0])  # type: ignore
     # Produce list of processes on the host that are not
     # part of a 'standard' linux distro
     _apps = syslog_df["ProcessName"].unique().tolist()
@@ -186,7 +186,7 @@ def cluster_syslog_logons_df(logon_events: pd.DataFrame) -> pd.DataFrame:
         if ses_start <= ses_close_time and ses_opened != 0:
             ses_opened += 1
             continue
-        if ses_end < ses_start:
+        if ses_end < ses_start:  # type: ignore
             ses_closed += 1
             continue
         users.append(user)

diff --git a/msticpy/analysis/timeseries.py b/msticpy/analysis/timeseries.py
@@ -360,8 +360,8 @@ def extract_anomaly_periods(
         if not end_period:
             # If we're not already in an anomaly period
             # create start/end for a new one
-            start_period = time - pd.Timedelta(period)
-            end_period = time + pd.Timedelta(period)
+            start_period = time - pd.Timedelta(period)  # type: ignore
+            end_period = time + pd.Timedelta(period)  # type: ignore
             periods[start_period] = end_period
         elif (time - end_period) <= pd.Timedelta(
             period

diff --git a/msticpy/auth/cloud_mappings_offline.py b/msticpy/auth/cloud_mappings_offline.py
@@ -42,7 +42,9 @@
         "sqlManagement": "https://management.core.windows.net:8443/",
         "microsoftGraphResourceId": "https://graph.microsoft.com/",
         "appInsightsResourceId": "https://api.applicationinsights.io",
-        "appInsightsTelemetryChannelResourceId": "https://dc.applicationinsights.azure.com/v2/track",
+        "appInsightsTelemetryChannelResourceId": (
+            "https://dc.applicationinsights.azure.com/v2/track"
+        ),
         "attestationResourceId": "https://attest.azure.net",
         "synapseAnalyticsResourceId": "https://dev.azuresynapse.net",
         "logAnalyticsResourceId": "https://api.loganalytics.io",
@@ -81,7 +83,9 @@
         "sqlManagement": "https://management.core.usgovcloudapi.net:8443",
         "microsoftGraphResourceId": "https://graph.microsoft.us/",
         "appInsightsResourceId": "https://api.applicationinsights.us",
-        "appInsightsTelemetryChannelResourceId": "https://dc.applicationinsights.us/v2/track",
+        "appInsightsTelemetryChannelResourceId": (
+            "https://dc.applicationinsights.us/v2/track"
+        ),
         "synapseAnalyticsResourceId": "https://dev.azuresynapse.usgovcloudapi.net",
         "logAnalyticsResourceId": "https://api.loganalytics.us",
         "ossrDbmsResourceId": "https://ossrdbms-aad.database.usgovcloudapi.net",
@@ -117,7 +121,9 @@
         "sqlManagement": "https://management.core.chinacloudapi.cn:8443",
         "microsoftGraphResourceId": "https://microsoftgraph.chinacloudapi.cn",
         "appInsightsResourceId": "https://api.applicationinsights.azure.cn",
-        "appInsightsTelemetryChannelResourceId": "https://dc.applicationinsights.azure.cn/v2/track",
+        "appInsightsTelemetryChannelResourceId": (
+            "https://dc.applicationinsights.azure.cn/v2/track"
+        ),
         "synapseAnalyticsResourceId": "https://dev.azuresynapse.azure.cn",
         "logAnalyticsResourceId": "https://api.loganalytics.azure.cn",
         "ossrDbmsResourceId": "https://ossrdbms-aad.database.chinacloudapi.cn",

diff --git a/msticpy/common/data_utils.py b/msticpy/common/data_utils.py
@@ -42,7 +42,7 @@ def ensure_df_datetimes(
 
     """
     if not columns:
-        columns = list(data.filter(regex=".*[Tt]ime.*").columns)
+        columns = list(data.filter(regex=".*[Tt]ime.*").columns)  # type: ignore
     if isinstance(columns, str):
         columns = [columns]
     col_map = {

diff --git a/msticpy/common/pkg_config.py b/msticpy/common/pkg_config.py
@@ -15,12 +15,12 @@
 import contextlib
 import numbers
 import os
+from importlib.resources import path
 from importlib.util import find_spec
 from pathlib import Path
 from typing import Any, Callable, Dict, Optional, Union
 
 import httpx
-import pkg_resources
 import yaml
 from yaml.error import YAMLError
 
@@ -218,7 +218,7 @@ def _del_config(setting_path: str, settings_dict) -> Any:
     return current_value
 
 
-def _read_config_file(config_file: str) -> Dict[str, Any]:
+def _read_config_file(config_file: Union[str, Path]) -> Dict[str, Any]:
     """
     Read a yaml config definition file.
 
@@ -270,10 +270,11 @@ def _override_config(base_config: Dict[str, Any], new_config: Dict[str, Any]):
 
 def _get_default_config():
     """Return the package default config file."""
-    conf_file = None
+    config_path = None
     package = "msticpy"
     try:
-        conf_file = pkg_resources.resource_filename(package, _CONFIG_FILE)
+        with path(package, _CONFIG_FILE) as config_path:
+            return _read_config_file(config_path) if config_path else {}
     except ModuleNotFoundError as mod_err:
         # if all else fails we try to find the package default config somewhere
         # in the package tree - we use the first one we find
@@ -284,8 +285,8 @@ def _get_default_config():
                 "msticpy package may be corrupted.",
                 title=f"Package {_CONFIG_FILE} missing.",
             ) from mod_err
-        conf_file = next(iter(pkg_root.glob(f"**/{_CONFIG_FILE}")))
-    return _read_config_file(conf_file) if conf_file else {}
+        config_path = next(iter(pkg_root.glob(f"**/{_CONFIG_FILE}")))
+    return _read_config_file(config_path) if config_path else {}
 
 
 def _get_custom_config():

diff --git a/msticpy/config/ce_azure.py b/msticpy/config/ce_azure.py
@@ -46,8 +46,9 @@ class CEAzure(CESimpleSettings):
     This will override the cloud and its associated Authority and API endpoint URLs.
 
     """
+    # fmt: off
     _HELP_URI = {
-        "MSTICPy Package Configuration": (
-            "https://msticpy.readthedocs.io/en/latest/getting_started/msticpyconfig.html"
-        )
+        "MSTICPy Package Configuration":
+        "https://msticpy.readthedocs.io/en/latest/getting_started/msticpyconfig.html"
     }
+    # fmt: on
diff --git a/msticpy/config/ce_data_providers.py b/msticpy/config/ce_data_providers.py
@@ -29,9 +29,7 @@ class CEDataProviders(CEProviders):
         "Data Providers": (
             "https://msticpy.readthedocs.io/en/latest/" + "DataAcquisition.html"
         ),
-        "Spunk": (
-            "https://msticpy.readthedocs.io/en/latest/data_acquisition/SplunkProvider.html"
-        ),
+        "Spunk": "https://msticpy.readthedocs.io/en/latest/data_acquisition/SplunkProvider.html",
         "Sumologic": (
             "https://github.com/microsoft/msticpy/blob/main/docs/notebooks/"
             "Sumologic-DataConnector.ipynb"

diff --git a/msticpy/config/ce_provider_base.py b/msticpy/config/ce_provider_base.py
@@ -145,9 +145,9 @@ def _populate_edit_ctrls(
             prov_name=control_name or self._prov_ctrl_name,
             mp_controls=self.mp_controls,
             conf_path=self._COMP_PATH,
-            prov_instance_name=self._select_prov_instance_name
-            if not new_provider
-            else "",
+            prov_instance_name=(
+                self._select_prov_instance_name if not new_provider else ""
+            ),
         )
         self.edit_frame.children = [self.edit_ctrls]
 

diff --git a/msticpy/context/azure/azure_data.py b/msticpy/context/azure/azure_data.py
@@ -671,9 +671,11 @@ def get_network_details(
                     id=network_id,
                     private_ip=ip_addr.private_ip_address,
                     private_ip_allocation=str(ip_addr.private_ip_allocation_method),
-                    public_ip=ip_addr.public_ip_address.ip_address
-                    if ip_addr.public_ip_address
-                    else None,
+                    public_ip=(
+                        ip_addr.public_ip_address.ip_address
+                        if ip_addr.public_ip_address
+                        else None
+                    ),
                     public_ip_allocation=(
                         ip_addr.public_ip_address.public_ip_allocation_method
                         if ip_addr.public_ip_address

diff --git a/msticpy/context/azure/sentinel_core.py b/msticpy/context/azure/sentinel_core.py
@@ -321,8 +321,9 @@ def set_default_workspace(
             res_id_parts = parse_resource_id(ws_res_id)
             self.workspace_config = WorkspaceConfig.from_settings(
                 {
-                    "WorkspaceName": self._default_workspace
-                    or res_id_parts["workspace_name"],
+                    "WorkspaceName": (
+                        self._default_workspace or res_id_parts["workspace_name"]
+                    ),
                     "SubscriptionId": res_id_parts["subscription_id"],
                     "ResourceGroup": res_id_parts["resource_group"],
                 }

diff --git a/msticpy/context/azure/sentinel_dynamic_summary_types.py b/msticpy/context/azure/sentinel_dynamic_summary_types.py
@@ -265,9 +265,9 @@ def from_json(cls, data: Union[Dict[str, Any], str]) -> "DynamicSummary":
             ) from json_err
         for raw_item in raw_content:
             summary_item_props = {
-                _API_TO_CLS_MAP.get(name, name): pd.to_datetime(value)
-                if name == "eventTimeUTC"
-                else value
+                _API_TO_CLS_MAP.get(name, name): (
+                    pd.to_datetime(value) if name == "eventTimeUTC" else value
+                )
                 for name, value in raw_item.items()
             }
             summary_items.append(DynamicSummaryItem(**summary_item_props))
@@ -462,7 +462,8 @@ def _(
             self.summary_items.append(
                 DynamicSummaryItem(
                     packed_content={
-                        key: _convert_data_types(value) for key, value in row.items()
+                        key: _convert_data_types(value)  # type: ignore
+                        for key, value in row.items()  # type: ignore
                     },
                     **summary_params,
                     **kwargs,  # pass remaining kwargs as summary item properties

diff --git a/msticpy/context/azure/sentinel_ti.py b/msticpy/context/azure/sentinel_ti.py
@@ -419,9 +419,11 @@ def query_indicators(self, **kwargs) -> pd.DataFrame:
 def _build_additional_indicator_items(**kwargs) -> dict:
     """Add in additional data items for indicators."""
     data_items = {
-        "validFrom": kwargs["valid_from"].isoformat()
-        if "valid_from" in kwargs
-        else datetime.now().isoformat()
+        "validFrom": (
+            kwargs["valid_from"].isoformat()
+            if "valid_from" in kwargs
+            else datetime.now().isoformat()
+        )
     }
     for item, value in kwargs.items():
         if item in _INDICATOR_ITEMS:

diff --git a/msticpy/context/azure/sentinel_watchlists.py b/msticpy/context/azure/sentinel_watchlists.py
@@ -204,12 +204,12 @@ def add_watchlist_item(
             current_df, item_series = current_items_values.align(
                 pd.Series(new_item), axis=1, copy=False  # type: ignore
             )
-            if (current_df == item_series).all(axis=1).any() and overwrite:
+            if (current_df == item_series).all(axis=1).any() and overwrite:  # type: ignore
                 watchlist_id = current_items[
                     current_items.isin(list(new_item.values())).any(axis=1)
                 ]["properties.watchlistItemId"].iloc[0]
             # If not in watchlist already generate new ID
-            elif not (current_df == item_series).all(axis=1).any():
+            elif not (current_df == item_series).all(axis=1).any():  # type: ignore
                 watchlist_id = str(uuid4())
             else:
                 raise MsticpyUserError(

diff --git a/msticpy/context/contextproviders/__init__.py b/msticpy/context/contextproviders/__init__.py
@@ -1,4 +1,5 @@
 """Context Providers sub-package."""
+
 from typing import Dict, Tuple
 
 from ..._version import VERSION