From bbaf162f37c054e1144a5251060bc45f1a0d2f86 Mon Sep 17 00:00:00 2001 From: Igor Infingardi Date: Wed, 16 Oct 2024 23:13:30 -0300 Subject: [PATCH 1/3] Add plotFilteredData function with options to filter NaN and zero-value columns --- bibmon/_bibmon_tools.py | 74 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/bibmon/_bibmon_tools.py b/bibmon/_bibmon_tools.py index fd598d9..b2829d3 100644 --- a/bibmon/_bibmon_tools.py +++ b/bibmon/_bibmon_tools.py @@ -692,4 +692,76 @@ def comparative_table (models, X_train, X_validation, X_test, return_tables.append(times_df) - return return_tables \ No newline at end of file + return return_tables + +############################################################################## + +def plotFilteredData(data, color, title, notWantedTags = [], removeNaNColumns = True, removeZeroColumns = True): + """ + Filters and plots data from a dictionary, concatenating the values into a single DataFrame. + Optionally removes columns that contain only NaN values or only zeros. The remaining data is + plotted with each column displayed in individual vertically stacked subplots, sharing the same X-axis. + + Parameters + ---------- + data: dict + A dictionary containing the data. The keys represent categories or filenames, and + the values are pandas DataFrames. + color: string + The color to be used for the plot lines. + title: string + The main title for the plot. + notWantedTags: list, optional + A list of column names (tags) to be excluded from the plots. Default is an empty list. + removeNaNColumns: bool, optional + If True, columns with only NaN values will be removed from the data before plotting. Default is True. + removeZeroColumns: bool, optional + If True, columns that contain only zeros will be removed from the data before plotting. Default is True. + + Returns + ---------- + filteredByData: pandas.DataFrame + The processed and filtered data that was used in the visualization. + tags: list + A list of column names (tags) that were plotted. + """ + + archivesKeys = data.keys() + + filteredByData = pd.concat([data[key] for key in archivesKeys]) # Concatenate data + filteredByData = filteredByData.apply(pd.to_numeric, errors='coerce') + + if removeNaNColumns: + filteredByData = filteredByData.dropna(axis=1, how='all') # Remove all-NaN columns + + if removeZeroColumns: + filteredByData = filteredByData.loc[:, (filteredByData != 0).any(axis=0)] # Remove all-zero columns + + tags = list(filteredByData.keys()) + tags = [key for key in tags if key not in notWantedTags] # Remove unwanted tags + + fig, ax = plt.subplots(len(tags), 1, figsize=(18, 10), sharex=True) + fig.suptitle(f"{title}", fontsize=16) + + for i, tag in enumerate(tags): + tagData = filteredByData[tag].values + ax[i].plot(tagData, c=color, linewidth=0.8) + ax[i].set_ylabel(tag, rotation=0, fontsize=14) + ax[i].set_yticks([]) + + # Clean up unnecessary borders + ax[i].spines["top"].set_visible(False) + ax[i].spines["right"].set_visible(False) + ax[i].spines["left"].set_visible(False) + + if i < len(tags) - 1: # Hide X-axis for all but the last plot + ax[i].set_xticks([]) + ax[i].spines["bottom"].set_visible(False) + ax[i].xaxis.set_ticks_position('none') + else: + # Rotate X-axis labels for readability + for label in ax[i].get_xticklabels(): + label.set_rotation(45) + label.set_ha('right') + + return filteredByData, tags From 22506d0b972851cb2873d24eda57d95829f24104 Mon Sep 17 00:00:00 2001 From: Igor Infingardi Date: Thu, 17 Oct 2024 23:41:03 -0300 Subject: [PATCH 2/3] fix: fixing filter_and_plot_data --- bibmon/_bibmon_tools.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/bibmon/_bibmon_tools.py b/bibmon/_bibmon_tools.py index b2829d3..506555c 100644 --- a/bibmon/_bibmon_tools.py +++ b/bibmon/_bibmon_tools.py @@ -696,7 +696,7 @@ def comparative_table (models, X_train, X_validation, X_test, ############################################################################## -def plotFilteredData(data, color, title, notWantedTags = [], removeNaNColumns = True, removeZeroColumns = True): +def filter_and_plot_data(data, color, title, not_wanted_tags = [], remove_NaN_columns = True, remove_zero_columns = True): """ Filters and plots data from a dictionary, concatenating the values into a single DataFrame. Optionally removes columns that contain only NaN values or only zeros. The remaining data is @@ -711,11 +711,11 @@ def plotFilteredData(data, color, title, notWantedTags = [], removeNaNColumns = The color to be used for the plot lines. title: string The main title for the plot. - notWantedTags: list, optional + not_wanted_tags: list, optional A list of column names (tags) to be excluded from the plots. Default is an empty list. - removeNaNColumns: bool, optional + remove_NaN_columns: bool, optional If True, columns with only NaN values will be removed from the data before plotting. Default is True. - removeZeroColumns: bool, optional + remove_zero_columns: bool, optional If True, columns that contain only zeros will be removed from the data before plotting. Default is True. Returns @@ -726,25 +726,25 @@ def plotFilteredData(data, color, title, notWantedTags = [], removeNaNColumns = A list of column names (tags) that were plotted. """ - archivesKeys = data.keys() + archives_keys = data.keys() - filteredByData = pd.concat([data[key] for key in archivesKeys]) # Concatenate data - filteredByData = filteredByData.apply(pd.to_numeric, errors='coerce') + filtered_by_data = pd.concat([data[key] for key in archives_keys]) # Concatenate data + filtered_by_data = filtered_by_data.apply(pd.to_numeric, errors='coerce') - if removeNaNColumns: - filteredByData = filteredByData.dropna(axis=1, how='all') # Remove all-NaN columns + if remove_NaN_columns: + filtered_by_data = filtered_by_data.dropna(axis=1, how='all') # Remove all-NaN columns - if removeZeroColumns: - filteredByData = filteredByData.loc[:, (filteredByData != 0).any(axis=0)] # Remove all-zero columns + if remove_zero_columns: + filtered_by_data = filtered_by_data.loc[:, (filtered_by_data != 0).any(axis=0)] # Remove all-zero columns - tags = list(filteredByData.keys()) - tags = [key for key in tags if key not in notWantedTags] # Remove unwanted tags + tags = list(filtered_by_data.keys()) + tags = [key for key in tags if key not in not_wanted_tags] # Remove unwanted tags fig, ax = plt.subplots(len(tags), 1, figsize=(18, 10), sharex=True) fig.suptitle(f"{title}", fontsize=16) for i, tag in enumerate(tags): - tagData = filteredByData[tag].values + tagData = filtered_by_data[tag].values ax[i].plot(tagData, c=color, linewidth=0.8) ax[i].set_ylabel(tag, rotation=0, fontsize=14) ax[i].set_yticks([]) @@ -764,4 +764,4 @@ def plotFilteredData(data, color, title, notWantedTags = [], removeNaNColumns = label.set_rotation(45) label.set_ha('right') - return filteredByData, tags + return filtered_by_data, tags From d5c313bdac37a7e05ffc5d1b5305a0eb3a17de3c Mon Sep 17 00:00:00 2001 From: Igor Infingardi Date: Fri, 18 Oct 2024 22:13:05 -0300 Subject: [PATCH 3/3] feat: filter_and_plot_data --- bibmon/_bibmon_tools.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/bibmon/_bibmon_tools.py b/bibmon/_bibmon_tools.py index 506555c..ceca675 100644 --- a/bibmon/_bibmon_tools.py +++ b/bibmon/_bibmon_tools.py @@ -725,43 +725,43 @@ def filter_and_plot_data(data, color, title, not_wanted_tags = [], remove_NaN_co tags: list A list of column names (tags) that were plotted. """ + from matplotlib.ticker import MaxNLocator - archives_keys = data.keys() + filtered_by_data = data.apply(pd.to_numeric, errors='coerce') - filtered_by_data = pd.concat([data[key] for key in archives_keys]) # Concatenate data - filtered_by_data = filtered_by_data.apply(pd.to_numeric, errors='coerce') - if remove_NaN_columns: - filtered_by_data = filtered_by_data.dropna(axis=1, how='all') # Remove all-NaN columns + filtered_by_data = filtered_by_data.dropna(axis=1, how='all') if remove_zero_columns: - filtered_by_data = filtered_by_data.loc[:, (filtered_by_data != 0).any(axis=0)] # Remove all-zero columns + filtered_by_data = filtered_by_data.loc[:, (filtered_by_data != 0).any(axis=0)] tags = list(filtered_by_data.keys()) - tags = [key for key in tags if key not in not_wanted_tags] # Remove unwanted tags + tags = [key for key in tags if key not in not_wanted_tags] fig, ax = plt.subplots(len(tags), 1, figsize=(18, 10), sharex=True) fig.suptitle(f"{title}", fontsize=16) + + timestamp_label = filtered_by_data.index for i, tag in enumerate(tags): tagData = filtered_by_data[tag].values ax[i].plot(tagData, c=color, linewidth=0.8) - ax[i].set_ylabel(tag, rotation=0, fontsize=14) + ax[i].set_ylabel(tag, rotation=0, fontsize=14,labelpad=100) ax[i].set_yticks([]) - - # Clean up unnecessary borders + ax[i].spines["top"].set_visible(False) ax[i].spines["right"].set_visible(False) ax[i].spines["left"].set_visible(False) + + ax[i].yaxis.set_major_locator(MaxNLocator(nbins=3)) - if i < len(tags) - 1: # Hide X-axis for all but the last plot + if i < len(tags) - 1: ax[i].set_xticks([]) ax[i].spines["bottom"].set_visible(False) ax[i].xaxis.set_ticks_position('none') else: - # Rotate X-axis labels for readability - for label in ax[i].get_xticklabels(): - label.set_rotation(45) - label.set_ha('right') + ax[i].set_xlabel('Time', fontsize=14) + ax[i].xaxis.set_major_locator(MaxNLocator(nbins=5)) + ax[i].set_xticklabels(timestamp_label.strftime('%Y-%m-%d %H:%M:%S'), rotation=0, ha='right') return filtered_by_data, tags