Skip to content

Commit

Permalink
Merge pull request #197 from radical-cybertools/fix/df_merge
Browse files Browse the repository at this point in the history
who needs pandas anyway?
  • Loading branch information
andre-merzky authored Jun 27, 2024
2 parents 00aaa00 + 8adea0c commit 0f5c194
Showing 1 changed file with 25 additions and 16 deletions.
41 changes: 25 additions & 16 deletions src/radical/analytics/utils/plot.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@

import os
import sys
import glob
import functools

import pandas as pd
import matplotlib as mpl
import pandas as pd
import numpy as np
import matplotlib as mpl

import radical.utils as ru


Expand Down Expand Up @@ -75,7 +77,7 @@ def get_mplstyle(name):
# ------------------------------------------------------------------------------
#
def stack_transitions(series, tresource, to_stack):
'''Creates data frames for each metric and combines them into one data frame
'''Creates time series for each metric and combines them into one data frame
for alignment. Since transitions obviously happen at arbitrary times, the
timestamps for metric A may see no transitions for metric B. When using a
combined timeline, we end up with NaN entries for some metrics on most
Expand Down Expand Up @@ -104,20 +106,27 @@ def stack_transitions(series, tresource, to_stack):
that point in time.
'''

dfs = [pd.DataFrame(series[tresource][m], columns=['time', m])
for m in series[tresource]]
# find the global time line
glob_times = set()
for m,df in series[tresource].items():
for t,_ in df:
glob_times.add(t)

glob_times = sorted(glob_times)

# create a timeline for each metric, set missing values as NaN
tlines = dict()
tlines['time'] = glob_times
for m in series[tresource]:
tlines[m] = [np.nan] * len(glob_times)

# merge them into one data frame, creating a common time-line
merged = functools.reduce(lambda left, right:
pd.merge(left, right,
left_on='time',
right_on='time',
how='outer'), dfs)
# sort the global time line
merged.sort_values(by='time', inplace=True)
for t,v in series[tresource][m]:
t_idx = glob_times.index(t)
tlines[m][t_idx] = v

# fill in missing values (carry over previous ones)
merged.fillna(method='ffill', inplace=True)
# create dataframe and fill all NaN values with the previous valid value
merged = pd.DataFrame(tlines)
merged.ffill(inplace=True)

# stacked plotting and area filling don't play well together in matplotlib.
# Instead we use normal (unstacked) plot routines and fill in between, we
Expand Down

0 comments on commit 0f5c194

Please sign in to comment.