Skip to content

Commit

Permalink
Ianhelle/warning fixes 2024 02 11 (#752)
Browse files Browse the repository at this point in the history
* Updates for new AML defaults

* AML Tools updates:
- removing unneeded functionality
- adding warnings for unsupported kernel types

* Fixing aml_tools tests for Python version

* Adding method to check auth_methods in aml_tools

* Fix to prevent queryprovider trying to load non-query yaml

* Adding fix for handling of random yaml files reads as query files.

* Fix if Azure.auth_methods key does not exist

* Creating mypy warning suppressions and fixes.

* Two more mypy issues in vtlookupv3 and proc_tree_build_mde
  • Loading branch information
ianhelle authored Feb 12, 2024
1 parent 63f96f3 commit d1c0912
Show file tree
Hide file tree
Showing 67 changed files with 317 additions and 212 deletions.
3 changes: 2 additions & 1 deletion msticpy/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
"""Version file."""
VERSION = "2.9.0"

VERSION = "2.10.0"
2 changes: 1 addition & 1 deletion msticpy/analysis/anomalous_sequence/anomalous.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def score_sessions(
raise MsticpyException(f'"{session_column}" should be a column in the `data`')

sessions_df = data.copy()
sessions = sessions_df[session_column].values.tolist()
sessions = sessions_df[session_column].values.tolist() # type: ignore

model = Model(sessions=sessions)
model.train()
Expand Down
2 changes: 2 additions & 0 deletions msticpy/analysis/anomalous_sequence/sessionize.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import pandas as pd
from pandas.core.dtypes.dtypes import DatetimeTZDtype

# mypy: ignore-errors


def sessionize_data(
data: pd.DataFrame,
Expand Down
6 changes: 3 additions & 3 deletions msticpy/analysis/eventcluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ def plot_cluster( # noqa: C901, MC0001

# pylint: disable=no-member
# Spectral color map does exist
colors = [cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
colors = [cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))] # type: ignore
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)
Expand Down Expand Up @@ -747,8 +747,8 @@ def plot_cluster( # noqa: C901, MC0001
except IndexError:
pass

plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.xlabel(xlabel) # type: ignore
plt.ylabel(ylabel) # type: ignore
plt.title(f"Estimated number of clusters: {n_clusters_}")
plt.show()
return plt
8 changes: 4 additions & 4 deletions msticpy/analysis/outliers.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def plot_outlier_results(

plt.title(plt_title)
# pylint: disable=no-member
plt.contourf(xx, yy, z, cmap=plt.cm.Blues_r)
plt.contourf(xx, yy, z, cmap=plt.cm.Blues_r) # type: ignore

b1 = plt.scatter(x[:, 0], x[:, 1], c="white", s=20, edgecolor="k")
b2 = plt.scatter(x_predict[:, 0], x_predict[:, 1], c="green", s=40, edgecolor="k")
Expand All @@ -142,8 +142,8 @@ def plot_outlier_results(

plt.xlim((xp_min_x, xp_max_x))
plt.ylim((xp_min_y, xp_max_y))
plt.xlabel(feature_columns[0])
plt.ylabel(feature_columns[1])
plt.xlabel(feature_columns[0]) # type: ignore
plt.ylabel(feature_columns[1]) # type: ignore

plt.legend(
[b1, b2, c],
Expand Down Expand Up @@ -178,7 +178,7 @@ def remove_common_items(data: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
# pylint: disable=cell-var-from-loop
for col in columns:
filtered_df = filtered_df.filter(
lambda x: (x[col].std() == 0 and x[col].count() > 10)
lambda x: (x[col].std() == 0 and x[col].count() > 10) # type: ignore
)

return filtered_df
6 changes: 3 additions & 3 deletions msticpy/analysis/polling_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,15 +202,15 @@ def detect_polling(
end = max(ts_col)

if not groupby:
p_value, freq, interval = self._detect_polling_arr(ts_col, start, end)
p_value, freq, interval = self._detect_polling_arr(ts_col, start, end) # type: ignore

self.data["p_value"] = p_value
self.data["dominant_frequency"] = freq
self.data["dominant_interval"] = interval
else:
grouped_results = self.data.groupby(groupby).apply(
lambda x: self._detect_polling_arr(
x[time_column], min(x[time_column]), max(x[time_column])
lambda x: self._detect_polling_arr( # type: ignore
x[time_column], min(x[time_column]), max(x[time_column]) # type: ignore
)
)

Expand Down
4 changes: 2 additions & 2 deletions msticpy/analysis/syslog_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def create_host_record(
Details of the host data collected
"""
host_entity = Host(src_event=syslog_df.iloc[0])
host_entity = Host(src_event=syslog_df.iloc[0]) # type: ignore
# Produce list of processes on the host that are not
# part of a 'standard' linux distro
_apps = syslog_df["ProcessName"].unique().tolist()
Expand Down Expand Up @@ -186,7 +186,7 @@ def cluster_syslog_logons_df(logon_events: pd.DataFrame) -> pd.DataFrame:
if ses_start <= ses_close_time and ses_opened != 0:
ses_opened += 1
continue
if ses_end < ses_start:
if ses_end < ses_start: # type: ignore
ses_closed += 1
continue
users.append(user)
Expand Down
4 changes: 2 additions & 2 deletions msticpy/analysis/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,8 @@ def extract_anomaly_periods(
if not end_period:
# If we're not already in an anomaly period
# create start/end for a new one
start_period = time - pd.Timedelta(period)
end_period = time + pd.Timedelta(period)
start_period = time - pd.Timedelta(period) # type: ignore
end_period = time + pd.Timedelta(period) # type: ignore
periods[start_period] = end_period
elif (time - end_period) <= pd.Timedelta(
period
Expand Down
12 changes: 9 additions & 3 deletions msticpy/auth/cloud_mappings_offline.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@
"sqlManagement": "https://management.core.windows.net:8443/",
"microsoftGraphResourceId": "https://graph.microsoft.com/",
"appInsightsResourceId": "https://api.applicationinsights.io",
"appInsightsTelemetryChannelResourceId": "https://dc.applicationinsights.azure.com/v2/track",
"appInsightsTelemetryChannelResourceId": (
"https://dc.applicationinsights.azure.com/v2/track"
),
"attestationResourceId": "https://attest.azure.net",
"synapseAnalyticsResourceId": "https://dev.azuresynapse.net",
"logAnalyticsResourceId": "https://api.loganalytics.io",
Expand Down Expand Up @@ -81,7 +83,9 @@
"sqlManagement": "https://management.core.usgovcloudapi.net:8443",
"microsoftGraphResourceId": "https://graph.microsoft.us/",
"appInsightsResourceId": "https://api.applicationinsights.us",
"appInsightsTelemetryChannelResourceId": "https://dc.applicationinsights.us/v2/track",
"appInsightsTelemetryChannelResourceId": (
"https://dc.applicationinsights.us/v2/track"
),
"synapseAnalyticsResourceId": "https://dev.azuresynapse.usgovcloudapi.net",
"logAnalyticsResourceId": "https://api.loganalytics.us",
"ossrDbmsResourceId": "https://ossrdbms-aad.database.usgovcloudapi.net",
Expand Down Expand Up @@ -117,7 +121,9 @@
"sqlManagement": "https://management.core.chinacloudapi.cn:8443",
"microsoftGraphResourceId": "https://microsoftgraph.chinacloudapi.cn",
"appInsightsResourceId": "https://api.applicationinsights.azure.cn",
"appInsightsTelemetryChannelResourceId": "https://dc.applicationinsights.azure.cn/v2/track",
"appInsightsTelemetryChannelResourceId": (
"https://dc.applicationinsights.azure.cn/v2/track"
),
"synapseAnalyticsResourceId": "https://dev.azuresynapse.azure.cn",
"logAnalyticsResourceId": "https://api.loganalytics.azure.cn",
"ossrDbmsResourceId": "https://ossrdbms-aad.database.chinacloudapi.cn",
Expand Down
2 changes: 1 addition & 1 deletion msticpy/common/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def ensure_df_datetimes(
"""
if not columns:
columns = list(data.filter(regex=".*[Tt]ime.*").columns)
columns = list(data.filter(regex=".*[Tt]ime.*").columns) # type: ignore
if isinstance(columns, str):
columns = [columns]
col_map = {
Expand Down
13 changes: 7 additions & 6 deletions msticpy/common/pkg_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
import contextlib
import numbers
import os
from importlib.resources import path
from importlib.util import find_spec
from pathlib import Path
from typing import Any, Callable, Dict, Optional, Union

import httpx
import pkg_resources
import yaml
from yaml.error import YAMLError

Expand Down Expand Up @@ -218,7 +218,7 @@ def _del_config(setting_path: str, settings_dict) -> Any:
return current_value


def _read_config_file(config_file: str) -> Dict[str, Any]:
def _read_config_file(config_file: Union[str, Path]) -> Dict[str, Any]:
"""
Read a yaml config definition file.
Expand Down Expand Up @@ -270,10 +270,11 @@ def _override_config(base_config: Dict[str, Any], new_config: Dict[str, Any]):

def _get_default_config():
"""Return the package default config file."""
conf_file = None
config_path = None
package = "msticpy"
try:
conf_file = pkg_resources.resource_filename(package, _CONFIG_FILE)
with path(package, _CONFIG_FILE) as config_path:
return _read_config_file(config_path) if config_path else {}
except ModuleNotFoundError as mod_err:
# if all else fails we try to find the package default config somewhere
# in the package tree - we use the first one we find
Expand All @@ -284,8 +285,8 @@ def _get_default_config():
"msticpy package may be corrupted.",
title=f"Package {_CONFIG_FILE} missing.",
) from mod_err
conf_file = next(iter(pkg_root.glob(f"**/{_CONFIG_FILE}")))
return _read_config_file(conf_file) if conf_file else {}
config_path = next(iter(pkg_root.glob(f"**/{_CONFIG_FILE}")))
return _read_config_file(config_path) if config_path else {}


def _get_custom_config():
Expand Down
7 changes: 4 additions & 3 deletions msticpy/config/ce_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ class CEAzure(CESimpleSettings):
This will override the cloud and its associated Authority and API endpoint URLs.
"""
# fmt: off
_HELP_URI = {
"MSTICPy Package Configuration": (
"https://msticpy.readthedocs.io/en/latest/getting_started/msticpyconfig.html"
)
"MSTICPy Package Configuration":
"https://msticpy.readthedocs.io/en/latest/getting_started/msticpyconfig.html"
}
# fmt: on
4 changes: 1 addition & 3 deletions msticpy/config/ce_data_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ class CEDataProviders(CEProviders):
"Data Providers": (
"https://msticpy.readthedocs.io/en/latest/" + "DataAcquisition.html"
),
"Spunk": (
"https://msticpy.readthedocs.io/en/latest/data_acquisition/SplunkProvider.html"
),
"Spunk": "https://msticpy.readthedocs.io/en/latest/data_acquisition/SplunkProvider.html",
"Sumologic": (
"https://github.com/microsoft/msticpy/blob/main/docs/notebooks/"
"Sumologic-DataConnector.ipynb"
Expand Down
6 changes: 3 additions & 3 deletions msticpy/config/ce_provider_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ def _populate_edit_ctrls(
prov_name=control_name or self._prov_ctrl_name,
mp_controls=self.mp_controls,
conf_path=self._COMP_PATH,
prov_instance_name=self._select_prov_instance_name
if not new_provider
else "",
prov_instance_name=(
self._select_prov_instance_name if not new_provider else ""
),
)
self.edit_frame.children = [self.edit_ctrls]

Expand Down
8 changes: 5 additions & 3 deletions msticpy/context/azure/azure_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,9 +671,11 @@ def get_network_details(
id=network_id,
private_ip=ip_addr.private_ip_address,
private_ip_allocation=str(ip_addr.private_ip_allocation_method),
public_ip=ip_addr.public_ip_address.ip_address
if ip_addr.public_ip_address
else None,
public_ip=(
ip_addr.public_ip_address.ip_address
if ip_addr.public_ip_address
else None
),
public_ip_allocation=(
ip_addr.public_ip_address.public_ip_allocation_method
if ip_addr.public_ip_address
Expand Down
5 changes: 3 additions & 2 deletions msticpy/context/azure/sentinel_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,9 @@ def set_default_workspace(
res_id_parts = parse_resource_id(ws_res_id)
self.workspace_config = WorkspaceConfig.from_settings(
{
"WorkspaceName": self._default_workspace
or res_id_parts["workspace_name"],
"WorkspaceName": (
self._default_workspace or res_id_parts["workspace_name"]
),
"SubscriptionId": res_id_parts["subscription_id"],
"ResourceGroup": res_id_parts["resource_group"],
}
Expand Down
9 changes: 5 additions & 4 deletions msticpy/context/azure/sentinel_dynamic_summary_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,9 @@ def from_json(cls, data: Union[Dict[str, Any], str]) -> "DynamicSummary":
) from json_err
for raw_item in raw_content:
summary_item_props = {
_API_TO_CLS_MAP.get(name, name): pd.to_datetime(value)
if name == "eventTimeUTC"
else value
_API_TO_CLS_MAP.get(name, name): (
pd.to_datetime(value) if name == "eventTimeUTC" else value
)
for name, value in raw_item.items()
}
summary_items.append(DynamicSummaryItem(**summary_item_props))
Expand Down Expand Up @@ -462,7 +462,8 @@ def _(
self.summary_items.append(
DynamicSummaryItem(
packed_content={
key: _convert_data_types(value) for key, value in row.items()
key: _convert_data_types(value) # type: ignore
for key, value in row.items() # type: ignore
},
**summary_params,
**kwargs, # pass remaining kwargs as summary item properties
Expand Down
8 changes: 5 additions & 3 deletions msticpy/context/azure/sentinel_ti.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,11 @@ def query_indicators(self, **kwargs) -> pd.DataFrame:
def _build_additional_indicator_items(**kwargs) -> dict:
"""Add in additional data items for indicators."""
data_items = {
"validFrom": kwargs["valid_from"].isoformat()
if "valid_from" in kwargs
else datetime.now().isoformat()
"validFrom": (
kwargs["valid_from"].isoformat()
if "valid_from" in kwargs
else datetime.now().isoformat()
)
}
for item, value in kwargs.items():
if item in _INDICATOR_ITEMS:
Expand Down
4 changes: 2 additions & 2 deletions msticpy/context/azure/sentinel_watchlists.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,12 +204,12 @@ def add_watchlist_item(
current_df, item_series = current_items_values.align(
pd.Series(new_item), axis=1, copy=False # type: ignore
)
if (current_df == item_series).all(axis=1).any() and overwrite:
if (current_df == item_series).all(axis=1).any() and overwrite: # type: ignore
watchlist_id = current_items[
current_items.isin(list(new_item.values())).any(axis=1)
]["properties.watchlistItemId"].iloc[0]
# If not in watchlist already generate new ID
elif not (current_df == item_series).all(axis=1).any():
elif not (current_df == item_series).all(axis=1).any(): # type: ignore
watchlist_id = str(uuid4())
else:
raise MsticpyUserError(
Expand Down
1 change: 1 addition & 0 deletions msticpy/context/contextproviders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Context Providers sub-package."""

from typing import Dict, Tuple

from ..._version import VERSION
Expand Down
Loading

0 comments on commit d1c0912

Please sign in to comment.