-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
84 lines (75 loc) · 2.67 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import pytz
import yfinance
import requests
import threading
import pandas as pd
from datetime import datetime
from bs4 import BeautifulSoup
def get_sp500_tickers():
res = requests.get("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")
soup = BeautifulSoup(res.content, 'html')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))
tickers = list(df[0].Symbol)
return tickers
tickers = get_sp500_tickers()
def get_history(ticker, period_start, period_end, granularity="1d", tries=0):
try:
df = yfinance.Ticker(ticker).history(
start = period_start,
end = period_end,
interval = granularity,
auto_adjust = True
).reset_index()
except Exception as err:
if tries < 5:
return get_history(ticker, period_start, period_end, granularity, tries+1)
return pd.DataFrame()
df = df.rename(columns={
"Date": "datetime",
"Open": "open",
"High": "high",
"Low": "low",
"Close": "close",
"Volume": "volume",
})
if df.empty:
return pd.DataFrame()
df["datetime"] = df["datetime"].dt.tz_convert(pytz.utc)
df = df.drop(columns=["Dividends", "Stock Splits"])
df = df.set_index("datetime", drop = True)
return df
# index datetime open high low close volume
def get_histories(tickers, period_starts, period_ends, granularity="1d"):
dfs = [None] * len(tickers)
def _helper(i):
print(tickers[i])
df = get_history(
tickers[i],
period_starts[i],
period_ends[i],
granularity=granularity
)
dfs[i] = df
threads = [threading.Thread(target=_helper, args=(i,)) for i in range(len(tickers))]
[thread.start() for thread in threads]
[thread.join() for thread in threads]
tickers = [tickers[i] for i in range(len(tickers)) if not dfs[i].empty]
dfs = [df for df in dfs if not df.empty]
return tickers, dfs
def get_ticker_dfs(start, end):
from utils import load_pickle, save_pickle
try:
tickers, ticker_dfs = load_pickle("dataset.obj")
except Exception as err:
tickers = get_sp500_tickers()
starts = [start] * len(tickers)
ends = [end] * len(tickers)
tickers, dfs = get_histories(tickers, starts, ends, granularity="1d")
ticker_dfs = {ticker:df for ticker,df in zip(tickers,dfs)}
save_pickle("dataset.obj", (tickers, ticker_dfs))
return tickers, ticker_dfs
period_start = datetime(2010,1,1, tzinfo=pytz.utc)
period_end = datetime.now(pytz.utc)
tickers, ticker_dfs = get_ticker_dfs(start = period_start, end = period_end)
print(ticker_dfs)