Skip to content

Commit

Permalink
update performance tracker
Browse files Browse the repository at this point in the history
add all files size sum
  • Loading branch information
bturkus committed Jun 24, 2024
1 parent d2c67c1 commit 011a419
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions ami_scripts/digitization_performance_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from hurry.filesize import size
from hurry.filesize import size, si
import numpy as np
import datetime
from matplotlib.backends.backend_pdf import PdfPages
Expand Down Expand Up @@ -109,9 +109,12 @@ def display_monthly_output_by_operator(df, args, fiscal=False):
year_column = 'fiscal_year' if fiscal else 'calendar_year'
current_year = get_fiscal_year(datetime.datetime.now()) if fiscal else datetime.datetime.now().year

# Filter data based on the historical flag
# Always define df_filtered. If not historical, filter by current year.
if not args.historical:
df_filtered = df[df[year_column] == current_year]
df_pm = df_pm[df_pm[year_column] == current_year]
else:
df_filtered = df # Use the whole dataset if historical is True

# Grouping data by operator and month, and aggregating unique IDs and average duration
output_by_operator = df_pm.groupby(['digitizer.operator.lastName', 'month']).agg({
Expand All @@ -137,7 +140,13 @@ def display_monthly_output_by_operator(df, args, fiscal=False):
output_by_operator_summed['formatted_avg_duration'] = pd.to_timedelta(output_by_operator_summed['technical.durationMilli.measure'], unit='ms').dt.components.apply(
lambda x: f"{int(x['hours']):02}:{int(x['minutes']):02}:{int(x['seconds']):02}", axis=1)

# Convert fileSize to numeric and compute the total
df_filtered['technical.fileSize.measure'] = pd.to_numeric(df_filtered['technical.fileSize.measure'], errors='coerce')
total_file_size = df_filtered['technical.fileSize.measure'].sum()

print(output_by_operator_summed)
print('\nTotal file size from all records: {}'.format(size(total_file_size, system=si)))


# Visualize data
sns.set_style("whitegrid")
Expand Down

0 comments on commit 011a419

Please sign in to comment.