-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
366 additions
and
0 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
import pandas as pd | ||
from data_validation import clean_dataframe, clean_os_dataframe | ||
import streamlit as st | ||
import chainladder as cl | ||
from datetime import datetime | ||
from streamlit_extras.app_logo import add_logo | ||
|
||
st.set_page_config(layout='wide', page_title='Gralix Actuarial Reserving Interface', page_icon='Gralix Circle.ico') | ||
|
||
for k, v in st.session_state.items(): | ||
if k != 'upload': | ||
st.session_state[k] = v | ||
|
||
|
||
def logo(): | ||
add_logo("rsz_gralix2.png", height=150) | ||
|
||
logo() | ||
|
||
|
||
@st.cache_data | ||
def convert_df(data_frame): | ||
# Cache the conversion to prevent computation on every rerun | ||
return data_frame.to_csv().encode('utf-8') | ||
|
||
|
||
@st.cache_data | ||
def read_data(data): | ||
return pd.read_csv(data) | ||
|
||
|
||
@st.cache_data | ||
def find_index(file_list, match_string): | ||
for i, file in enumerate(file_list): | ||
file_name = file.name | ||
if match_string in file_name: | ||
return i | ||
|
||
|
||
# Data to be uploaded - Add to session state | ||
files = ['claims', 'case', 'premium'] | ||
|
||
for file in files: | ||
if file not in st.session_state: | ||
st.session_state[file] = None | ||
|
||
min_date = datetime(2000, 1, 1) | ||
max_date = datetime(2030, 12, 31) | ||
|
||
|
||
uploaded_files = st.sidebar.file_uploader(label='Upload Files', | ||
help='Upload the separate files containing Paid/Incurred Claims Data, ' | ||
'Case Reserves Data and Premium Data. Files MUST be ".csv"', | ||
accept_multiple_files=True, key='upload') | ||
|
||
if len(st.session_state['upload']) > 0: | ||
|
||
try: | ||
claims = st.session_state['upload'][find_index(st.session_state['upload'], 'Claims Data')] | ||
claims_df = read_data(claims) | ||
clean_df, error_df = clean_dataframe(claims_df) | ||
st.session_state['claims'] = clean_df | ||
st.session_state['errors'] = error_df | ||
except (TypeError, ValueError): | ||
pass | ||
|
||
try: | ||
case = st.session_state['upload'][find_index(st.session_state['upload'], 'Outstanding')] | ||
case_res_df = read_data(case) | ||
case_df, case_error = clean_os_dataframe(case_res_df) | ||
for col in case_df.columns: | ||
if 'Unnamed' in col: | ||
case_df = case_df.drop(col, axis=1) | ||
st.session_state['case'] = case_df | ||
|
||
except (TypeError, ValueError): | ||
pass | ||
|
||
try: | ||
premium = st.session_state['upload'][find_index(st.session_state['upload'], 'Premium')] | ||
premium_df = read_data(premium) | ||
premium_df = premium_df.dropna() | ||
premium_df = premium_df.set_index(keys='Year', drop=True) | ||
cols = premium_df.columns | ||
|
||
st.session_state['premium'] = premium_df | ||
|
||
except (TypeError, ValueError): | ||
pass | ||
|
||
claims_data = st.session_state.get('claims') | ||
errors_df = st.session_state.get('errors') | ||
case_data = st.session_state.get('case') | ||
premium_data = st.session_state.get('premium') | ||
|
||
if claims_data is not None: | ||
st.markdown("**USABLE DATA: Clean File**") | ||
st.dataframe(claims_data) | ||
clean_csv = convert_df(claims_data) | ||
download = st.sidebar.download_button(label="Download Clean File", | ||
data=clean_csv, | ||
file_name='Clean.csv', | ||
mime='text/csv') | ||
if errors_df is not None: | ||
st.markdown("**UNUSABLE DATA: Error File**") | ||
st.dataframe(errors_df) | ||
errors_csv = convert_df(errors_df) | ||
download = st.sidebar.download_button(label="Download Errors File", | ||
data=errors_csv, | ||
file_name='Errors.csv', | ||
mime='text/csv') | ||
|
||
if case_data is not None: | ||
st.markdown("**CASE RESERVES DATA**") | ||
st.dataframe(case_data) | ||
|
||
if premium_data is not None: | ||
st.markdown("**PREMIUM DATA**") | ||
st.dataframe(premium_data) | ||
|
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
import pandas as pd | ||
import chainladder as cl | ||
import warnings | ||
|
||
warnings.filterwarnings(action='ignore', category=UserWarning) | ||
|
||
def create_triangles(data, start_date, end_date): | ||
dff = data[(data['LOSS DATE'] >= start_date) & (data['PAID DATE'] <= end_date)] | ||
triangles = cl.Triangle( | ||
data=dff, | ||
origin='LOSS DATE', | ||
development='PAID DATE', | ||
columns=['GROSS AMOUNT', 'NET AMOUNT'], | ||
index=['MAIN CLASS', 'ADDITIONAL SEGMENTATION'], | ||
cumulative=False | ||
) | ||
return triangles | ||
|
||
|
||
def create_os_triangles(data, start_date, end_date): | ||
dff = data[(data['LOSS DATE'] >= start_date) & (data['OUTSTANDING DATE'] <= end_date)] | ||
triangles = cl.Triangle( | ||
data=dff, | ||
origin='LOSS DATE', | ||
development='OUTSTANDING DATE', | ||
columns=['GROSS AMOUNT', 'NET AMOUNT'], | ||
index=['MAIN CLASS', 'ADDITIONAL SEGMENTATION'], | ||
cumulative=False | ||
) | ||
return triangles | ||
|
||
|
||
def tri_size(triangle): | ||
size = triangle.shape[2] | ||
return size | ||
|
||
|
||
def pure_ibnr(ibnr_triangle, os_triangle): | ||
total = [] | ||
dates = [] | ||
zeroized = [] | ||
if tri_size(ibnr_triangle) == tri_size(os_triangle): | ||
for i in range(tri_size(ibnr_triangle)): | ||
if ibnr_triangle.origin_grain == 'Y': | ||
dates.append(ibnr_triangle.origin[i].strftime('%F')) | ||
elif ibnr_triangle.origin_grain == 'Q': | ||
dates.append( | ||
ibnr_triangle.origin[i].strftime('%F-Q%q')) | ||
elif ibnr_triangle.origin_grain == 'M': | ||
dates.append(ibnr_triangle.origin[i].strftime('%F-%m')) | ||
else: | ||
dates.append( | ||
ibnr_triangle.origin[i].strftime('%F-Q%q')) | ||
|
||
if os_triangle.iat[0, 0, i, 0] != os_triangle.iat[0, 0, i, 0]: | ||
emerging = ibnr_triangle.iat[0, 0, | ||
i, 0] | ||
else: | ||
emerging = ibnr_triangle.iat[0, 0, | ||
i, 0] - os_triangle.iat[0, 0, i, 0] | ||
|
||
total.append(emerging) | ||
if emerging >= 0: | ||
zeroized.append(emerging) | ||
elif emerging < 0: | ||
zeroized.append(0) | ||
elif emerging != emerging: | ||
zeroized.append(0) | ||
|
||
pure_ibnr_dict = {'Pure IBNR': total} | ||
pure_ibnr_df = (pd.DataFrame(pure_ibnr_dict, index=dates)) | ||
|
||
total_row_pure = pd.DataFrame( | ||
{'Pure IBNR': [pure_ibnr_df['Pure IBNR'].sum()]}, index=['TOTAL']) | ||
pure_ibnr_df = pd.concat([pure_ibnr_df, total_row_pure]) | ||
|
||
zeroized_dict = {'Zeroized Pure IBNR': zeroized} | ||
zeroized_df = (pd.DataFrame(zeroized_dict, index=dates)) | ||
|
||
total_row_zeroized = pd.DataFrame( | ||
{'Zeroized Pure IBNR': [zeroized_df['Zeroized Pure IBNR'].sum()]}, index=['TOTAL']) | ||
zeroized_df = pd.concat([zeroized_df, total_row_zeroized]) | ||
|
||
return pure_ibnr_df, zeroized_df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
import re | ||
import io | ||
import pandas as pd | ||
import base64 | ||
from datetime import datetime, timedelta | ||
# import streamlit as st | ||
|
||
|
||
def parse_content(contents, filename): | ||
print("Received contents:", contents) | ||
content_type, content_string = contents.split(',') | ||
decoded = base64.b64decode(content_string) | ||
if '.csv' in filename: | ||
df = pd.read_csv(io.StringIO(decoded.decode('utf-8'))) | ||
return df.to_dict('records') | ||
elif '.xls' in filename: | ||
df = pd.read_excel(io.BytesIO(decoded)) | ||
return df.to_dict('records') | ||
|
||
|
||
def decode_image(image_file): | ||
encoded = base64.b64encode(open(image_file, 'rb').read()) | ||
return f"data:image/png;base64,{encoded.decode()}" | ||
|
||
|
||
|
||
# def clean_dates(df: pd.DataFrame, old_column: str, new_column: str) -> pd.DataFrame: | ||
# """Cleans a date column with mixed types and unifies format. Creates a new column for the dates""" | ||
# df[new_column] = pd.to_datetime(df[old_column], errors="coerce", dayfirst=True, format="%d/%m/%Y") # try date coercion | ||
# # Coerce date if given in day count format | ||
# mask = pd.to_numeric(df[old_column], errors="coerce").notna() | ||
# df.loc[mask, new_column] = pd.to_datetime(df[old_column][mask].astype(float), errors="coerce", unit="D", origin="1899-12-30") | ||
# return df | ||
|
||
def clean_dates(date_str): | ||
original_value = date_str | ||
|
||
day_first_formats = [ | ||
# Day-First | ||
'%d-%m-%Y', | ||
'%d/%m/%Y', | ||
'%d-%m-%Y %H:%M:%S', | ||
'%d/%m/%Y %H:%M:%S', | ||
'%d-%m-%y', | ||
'%d-%m-%y %H:%M:%S', | ||
'%d/%m/%y', | ||
'%d/%m/%y %H:%M:%S' | ||
] | ||
|
||
month_first_formats = [ | ||
'%m-%d-%Y', | ||
'%m/%d/%Y', | ||
'%m-%d-%y', | ||
'%m-%d-%Y %H:%M:%S', | ||
'%m/%d/%Y %H:%M:%S', | ||
'%m-%d-%y %H:%M:%S', | ||
] | ||
|
||
other_formats = [ | ||
'%Y-%m-%d', | ||
'%Y-%m-%d %H:%M:%S', | ||
'%d-%b-%y', | ||
'%d-%b-%Y', | ||
'%m/%d/%y', | ||
'%m/%d/%Y', | ||
'%B %d %Y', | ||
'%B %d %Y %H:%M:%S' | ||
] | ||
|
||
try: | ||
# try to parse an Excel style date | ||
excel_date = float(date_str) | ||
date = datetime(1899, 12, 30) + timedelta(days=excel_date) | ||
return date.strftime('%Y-%m-%d') | ||
except ValueError: | ||
pass | ||
|
||
for date_format in day_first_formats: | ||
try: | ||
return datetime.strptime(date_str, date_format).strftime('%Y-%m-%d') | ||
except ValueError: | ||
pass | ||
|
||
for date_format in month_first_formats: | ||
try: | ||
return datetime.strptime(date_str, date_format).strftime('%Y-%m-%d') | ||
except ValueError: | ||
pass | ||
|
||
for date_format in other_formats: | ||
try: | ||
return datetime.strptime(date_str, date_format).strftime('%Y-%m-%d') | ||
except ValueError: | ||
return original_value | ||
|
||
|
||
def clean_amounts(amount_str): | ||
if amount_str == 'nan' or amount_str.strip() == '': | ||
return 0.0 | ||
try: | ||
if re.match(r'^\(.+\d\)$', amount_str): # check amount enclosed in parentheses | ||
return float('-' + re.sub(r'[^\d./-]', '', amount_str)) | ||
else: | ||
amount = re.sub(r'[^\d./-]', '', amount_str) # extract digits, periods and hyphens | ||
try: | ||
return float(amount) # convert amount to float | ||
except ValueError: | ||
return amount_str | ||
except ValueError: | ||
return amount_str | ||
|
||
|
||
# @st.cache_data | ||
def clean_dataframe(my_df): | ||
for col in my_df.columns: | ||
if 'Unnamed' in col: | ||
my_df = my_df.drop(col, axis=1) | ||
for col in my_df.columns: | ||
my_df[col] = my_df[col].astype(str) | ||
if 'DATE' in col: | ||
my_df[col] = my_df[col].apply(clean_dates) | ||
if 'AMOUNT' in col: | ||
my_df[col] = my_df[col].apply(clean_amounts) | ||
date_cols = my_df.filter(like='DATE') | ||
amount_cols = my_df.filter(like='AMOUNT') | ||
inconsistent_date = my_df[(my_df['LOSS DATE'] > my_df['PAID DATE']) | (my_df['LOSS DATE'] > my_df['REPORTED DATE'])] | ||
bad_dates = my_df[date_cols.apply(pd.to_datetime, errors='coerce', dayfirst=False).isna().any(axis=1)] | ||
bad_amounts = my_df[amount_cols.apply(pd.to_numeric, errors='coerce').isna().any(axis=1)] | ||
error_df = pd.concat([bad_dates, bad_amounts, inconsistent_date]) | ||
error_df['REFERENCE'] = error_df.index + 2 | ||
error_df = error_df.drop_duplicates().sort_index() | ||
clean_df = (my_df.drop(error_df.index, axis=0)).reset_index(drop=True) | ||
return clean_df, error_df | ||
|
||
# @st.cache_data | ||
def clean_os_dataframe(my_df): | ||
for col in my_df.columns: | ||
if 'Unnamed' in col: | ||
my_df = my_df.drop(col, axis=1) | ||
for col in my_df.columns: | ||
my_df[col] = my_df[col].astype(str) | ||
if 'DATE' in col: | ||
my_df[col] = my_df[col].apply(clean_dates) | ||
if 'AMOUNT' in col: | ||
my_df[col] = my_df[col].apply(clean_amounts) | ||
date_cols = my_df.filter(like='DATE') | ||
amount_cols = my_df.filter(like='AMOUNT') | ||
inconsistent_date = my_df[(my_df['LOSS DATE'] > my_df['OUTSTANDING DATE']) | (my_df['LOSS DATE'] > my_df['REPORTED DATE'])] | ||
bad_dates = my_df[date_cols.apply(pd.to_datetime, errors='coerce', dayfirst=False).isna().any(axis=1)] | ||
bad_amounts = my_df[amount_cols.apply(pd.to_numeric, errors='coerce').isna().any(axis=1)] | ||
error_df = pd.concat([bad_dates, bad_amounts, inconsistent_date]) | ||
error_df['REFERENCE'] = error_df.index + 2 | ||
error_df = error_df.drop_duplicates().sort_index() | ||
clean_df = (my_df.drop(error_df.index, axis=0)).reset_index(drop=True) | ||
return clean_df, error_df |
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
import sys | ||
from streamlit.web import cli as stcli | ||
|
||
|
||
if __name__ == '__main__': | ||
sys.argv = ["streamlit", "run", "C:/Users/mhang/Documents/IBNR APP/Home.py"] | ||
sys.exit(stcli.main()) |