-
Notifications
You must be signed in to change notification settings - Fork 0
/
LeaderFollowerFrequencyAnalysis.py
119 lines (72 loc) · 4.44 KB
/
LeaderFollowerFrequencyAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from CyclicityAnalysis import *
from FetchPrices import *
import os
import pandas as pd
from joblib import Parallel, delayed
import itertools
class LeaderFollowerFrequencyAnalysis:
def __init__(self,abbrevs, stock_type, start_date='2021-01-01', end_date=today, save_directory=None):
self.abbrevs=abbrevs
self.stock_type=stock_type
self.start_date = start_date
self.end_date= end_date
self.save_directory=save_directory
if self.save_directory != None:
try:
os.makedirs(self.save_directory)
except:
pass
self.strong_leader_follower_dates_df=self.get_strong_leader_follower_dates_df(N=10, update=False)
self.strong_frequency_counts_df=self.get_strong_frequency_counts_df(N=10, update=False)
def get_topN_intraday_leader_follower_pairs_df(self, date=today, N=10, update=False):
file_path = "{}/{}.csv".format(self.save_directory, date)
topN_intraday_leader_follower_pairs_df=None
if os.path.exists(file_path):
topN_intraday_leader_follower_pairs_df = pd.read_csv(file_path,index_col=0)
if os.path.exists(file_path) == False or date == today:
update=True
if update:
try:
intraday_prices_df=FetchPrices(self.abbrevs, self.stock_type).fetch_intraday_prices_df(date=date,preprocessed=True, parallelize=False)
intradayCA =CyclicityAnalysis(intraday_prices_df)
topN_intraday_leader_follower_pairs = intradayCA.get_topN_leader_follower_pairs(N)
topN_intraday_leader_follower_pairs_df=pd.DataFrame(topN_intraday_leader_follower_pairs,
columns=['Leader','Follower'], index=[date]*N)
topN_intraday_leader_follower_pairs_df.index.rename('Date',inplace=True)
except:
topN_intraday_leader_follower_pairs_df=pd.DataFrame(columns=['Leader','Follower'])
if self.save_directory !=None:
topN_intraday_leader_follower_pairs_df.to_csv(file_path)
return topN_intraday_leader_follower_pairs_df
def get_strong_leader_follower_dates_df(self, N=10, update=False):
date_range = pd.date_range(self.start_date, self.end_date, freq='D').strftime("%Y-%m-%d")
topN_intraday_leader_follower_pairs_dfs = Parallel(n_jobs=len(date_range), verbose=0, prefer='threads')\
(delayed(self.get_topN_intraday_leader_follower_pairs_df)(date,N,update) for date in date_range)
concatenated_df=pd.concat(topN_intraday_leader_follower_pairs_dfs, axis=0)
strong_leader_follower_dates_df = concatenated_df.drop_duplicates(keep='first', ignore_index=True)
strong_dates_lists=[list(concatenated_df[(concatenated_df['Leader'] == pair[0]) & (concatenated_df['Follower'] == pair[-1])].index) \
for pair in strong_leader_follower_dates_df.values]
strong_leader_follower_dates_df['Strong Leader Follower Dates']= strong_dates_lists
strong_leader_follower_dates_df['Number of Strong Leader Follower Dates']=strong_leader_follower_dates_df['Strong Leader Follower Dates'].apply(lambda x: len(x))
return strong_leader_follower_dates_df
def get_strong_frequency_counts_df(self,N=10, update=False):
strong_leader_follower_dates_df = self.get_strong_leader_follower_dates_df(N=N, update=update)
strong_frequency_counts_df = pd.DataFrame(columns=self.abbrevs, index=self.abbrevs)
all_pairs=itertools.product(self.abbrevs, repeat= 2)
for pair in all_pairs:
try:
num_strong_leader_follower_dates = strong_leader_follower_dates_df[\
(strong_leader_follower_dates_df['Leader'] == pair[0]) \
& (strong_leader_follower_dates_df['Follower'] == pair[-1])]['Number of Strong Leader Follower Dates'].values[-1]
except:
num_strong_leader_follower_dates=0
strong_frequency_counts_df.loc[pair[0],pair[1]] = num_strong_leader_follower_dates
return strong_frequency_counts_df
def plot_strong_frequency_counts_df(self, title='Leader Follower Frequency Matrix', color_label='Number of Strong Leader Follower Dates', color_continuous_scale='viridis' ):
fig=px.imshow(self.strong_frequency_counts_df,
title=title,
labels=dict(color=color_label),
x=self.strong_frequency_counts_df.columns,
y=self.strong_frequency_counts_df.columns,
color_continuous_scale=color_continuous_scale)
fig.show()