Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update-pandas for vresion 2.2.2 #13

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,9 @@ venv.bak/

# mypy
.mypy_cache/

# test file
test.py
test.ipynb
test?.py
test?.ipynb
25 changes: 18 additions & 7 deletions jqfactor_analyzer/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ def calc_mean_information_coefficient(self, group_adjust=False, by_group=False,
- False: 不分行业计算 IC
by_time:
- 'Y': 按年求均值
- 'M': 按月求均值
- 'ME': 按月求均值
- None: 对所有日期求均值
method:
- 'rank': 用秩相关系数计算IC值
Expand Down Expand Up @@ -608,7 +608,12 @@ def _calc_ic_mean_n_day_lag(self, n, group_adjust=False, by_group=False, method=
group_adjust=group_adjust, by_group=by_group,
method=method
)
return ac.mean(level=('group' if by_group else None))
if by_group:
_mean = ac.groupby(level='group').mean()
else:
_mean = ac.mean()
# return ac.mean(level=('group' if by_group else None))
return _mean

def calc_ic_mean_n_days_lag(self, n=10, group_adjust=False, by_group=False, method=None):
"""滞后 0 - n 天因子收益信息比率(IC)的均值
Expand All @@ -627,9 +632,15 @@ def calc_ic_mean_n_days_lag(self, n=10, group_adjust=False, by_group=False, meth
- 'rank': 用秩相关系数计算IC值
- 'normal': 用普通相关系数计算IC值
"""
ic_mean = [self.calc_factor_information_coefficient(
group_adjust=group_adjust, by_group=by_group, method=method,
).mean(level=('group' if by_group else None))]
df_coef = self.calc_factor_information_coefficient(group_adjust=group_adjust, by_group=by_group, method=method)
if by_group:
_mean = df_coef.groupby(level='group').mean()
else:
_mean = df_coef.mean()
ic_mean = [_mean]
# ic_mean = [self.calc_factor_information_coefficient(
# group_adjust=group_adjust, by_group=by_group, method=method,
# ).mean(level=('group' if by_group else None))]

for lag in range(1, n + 1):
ic_mean.append(self._calc_ic_mean_n_day_lag(
Expand Down Expand Up @@ -854,7 +865,7 @@ def ic_by_group(self):
def ic_monthly(self):
ic_monthly = self.calc_mean_information_coefficient(group_adjust=False,
by_group=False,
by_time="M").copy()
by_time="ME").copy()
ic_monthly.index = ic_monthly.index.map(lambda x: x.strftime('%Y-%m'))
return ic_monthly

Expand Down Expand Up @@ -1165,7 +1176,7 @@ def plot_monthly_ic_heatmap(self, group_adjust=False):
- False: 不使用行业中性收益
"""
ic_monthly = self.calc_mean_information_coefficient(
group_adjust=group_adjust, by_group=False, by_time="M"
group_adjust=group_adjust, by_group=False, by_time="ME"
)
pl.plot_monthly_ic_heatmap(ic_monthly)

Expand Down
35 changes: 24 additions & 11 deletions jqfactor_analyzer/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ def _get_price(self, securities, start_date=None, end_date=None, count=None,

def get_prices(self, securities, start_date=None, end_date=None,
period=None):
'''
调用jqdatasdk.get_price函数,并转换成因子格式,{index:day,column:code,values:price}
'''
if period is not None:
trade_days = self._get_trade_days(start_date=end_date)
if len(trade_days):
Expand All @@ -202,18 +205,18 @@ def get_prices(self, securities, start_date=None, end_date=None,
def _get_industry(self, securities, start_date, end_date,
industry='jq_l1'):
trade_days = self._get_trade_days(start_date, end_date)
industries = map(partial(self.api.get_industry, securities), trade_days)


day_ind_dict = {day:self.api.get_industry(securities,day) for day in trade_days}
industries = {
d: {
s: ind.get(s).get(industry, dict()).get('industry_name', 'NA')
for s in securities
}
for d, ind in zip(trade_days, industries)
}
return pd.DataFrame(industries).T.sort_index()
day:{code:ind.get(industry, dict()).get('industry_name', 'NA') for code,ind in ind_dict.items()}
for day,ind_dict in day_ind_dict.items()}
df_ind = pd.DataFrame(industries).T.sort_index()
return df_ind

def get_groupby(self, securities, start_date, end_date):
'''
get industry func for every code and day
'''
return self._get_industry(securities=securities,
start_date=start_date, end_date=end_date,
industry=self.industry)
Expand Down Expand Up @@ -279,6 +282,15 @@ def _get_average_weights(self, securities, start_date, end_date):
return {sec: 1.0 for sec in securities}

def get_weights(self, securities, start_date, end_date):
'''
计算各分位收益时, 每只股票权重, 默认为 'avg'
- 'avg': 等权重
- 'mktcap': 按总市值加权
- 'ln_mktcap': 按总市值的对数加权
- 'cmktcap': 按流通市值加权
- 'ln_cmktcap': 按流通市值的对数加权
'''

start_date = date2str(start_date)
end_date = date2str(end_date)

Expand All @@ -295,8 +307,9 @@ def get_weights(self, securities, start_date, end_date):
else:
raise ValueError('invalid weight_method')

return weight_api(securities=securities, start_date=start_date,
end_date=end_date)

weights = weight_api(securities=securities, start_date=start_date,end_date=end_date)
return weights

@property
def apis(self):
Expand Down
64 changes: 34 additions & 30 deletions jqfactor_analyzer/performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def src_ic(group):
grouper.append('group')

with np.errstate(divide='ignore', invalid='ignore'):
ic = factor_data.groupby(grouper).apply(src_ic)
ic = factor_data.groupby(grouper,group_keys=False).apply(src_ic)

return ic

Expand Down Expand Up @@ -98,7 +98,7 @@ def mean_information_coefficient(
ic = ic.mean()

else:
ic = (ic.reset_index().set_index('date').groupby(grouper).mean())
ic = (ic.reset_index().set_index('date').groupby(grouper,group_keys=False).mean())

return ic

Expand Down Expand Up @@ -137,17 +137,17 @@ def to_weights(group, is_long_short):
if group_adjust:
grouper.append('group')

weights = factor_data.groupby(grouper)['factor'] \
weights = factor_data.groupby(grouper,group_keys=False)['factor'] \
.apply(to_weights, demeaned)

if group_adjust:
weights = weights.groupby(level='date').apply(to_weights, False)
weights = weights.groupby(level='date',group_keys=False).apply(to_weights, False)

weighted_returns = \
factor_data[get_forward_returns_columns(factor_data.columns)] \
.multiply(weights, axis=0)

returns = weighted_returns.groupby(level='date').sum()
returns = weighted_returns.groupby(level='date',group_keys=False).sum()

return returns

Expand Down Expand Up @@ -178,7 +178,7 @@ def factor_alpha_beta(factor_data, demeaned=True, group_adjust=False):

returns = factor_returns(factor_data, demeaned, group_adjust)

universe_ret = factor_data.groupby(level='date')[
universe_ret = factor_data.groupby(level='date',group_keys=False)[
get_forward_returns_columns(factor_data.columns)] \
.mean().loc[returns.index]

Expand Down Expand Up @@ -233,8 +233,7 @@ def cumulative_returns(returns, period):
def split_portfolio(ret, period):
return pd.DataFrame(np.diag(ret))

sub_portfolios = returns.groupby(
np.arange(len(returns.index)) // period, axis=0
sub_portfolios = returns.groupby(np.arange(len(returns.index)) // period, axis=0,group_keys=False
).apply(split_portfolio, period)
sub_portfolios.index = returns.index

Expand Down Expand Up @@ -264,22 +263,27 @@ def weighted_mean_return(factor_data, grouper):
"""计算(年化)加权平均/标准差"""
forward_returns_columns = get_forward_returns_columns(factor_data.columns)

def agg(values, weights):
count = len(values)
average = np.average(values, weights=weights, axis=0)

def agg(df):
count = df.shape[0]
average = np.average(df.iloc[:,:-1], weights=df.iloc[:,-1], axis=0)
# Fast and numerically precise
variance = np.average(
(values - average)**2, weights=weights, axis=0
variance = np.average((df.iloc[:,:-1] - average)**2, weights=df.iloc[:,-1], axis=0
) * count / max((count - 1), 1)
return pd.Series(
[average, np.sqrt(variance), count], index=['mean', 'std', 'count']
)

group_stats = factor_data.groupby(grouper)[
forward_returns_columns.append(pd.Index(['weights']))] \
.apply(lambda x: x[forward_returns_columns].apply(
agg, weights=x['weights'].fillna(0.0).values
))

# return format
_col_list = df.columns.to_list()[:-1]
ser_mean = pd.Series(average, index=_col_list, name='mean')
ser_std = pd.Series(np.sqrt(variance), index=_col_list,name='std')
ser_count = pd.Series(count, index=_col_list,name='count')
df_agg = pd.concat([ser_mean,ser_std, ser_count],axis=1).T
return df_agg

factor_data2 = factor_data.copy()
factor_data2['weights'] = factor_data2['weights'].fillna(0)
col_list = forward_returns_columns.to_list()+['weights']
group_stats = factor_data2.groupby(grouper)[col_list].apply(agg)


mean_ret = group_stats.xs('mean', level=-1)

Expand Down Expand Up @@ -404,7 +408,7 @@ def quantile_turnover(quantile_factor, quantile, period=1):

quant_names = quantile_factor[quantile_factor == quantile]
quant_name_sets = quant_names.groupby(
level=['date']
level=['date'],group_keys=False
).apply(lambda x: set(x.index.get_level_values('asset')))
new_names = (quant_name_sets - quant_name_sets.shift(period)).dropna()
quant_turnover = new_names.apply(lambda x: len(x)) / quant_name_sets.apply(
Expand Down Expand Up @@ -437,7 +441,7 @@ def factor_autocorrelation(factor_data, period=1, rank=True):
grouper = [factor_data.index.get_level_values('date')]

if rank:
ranks = factor_data.groupby(grouper)[['factor']].rank()
ranks = factor_data.groupby(grouper,group_keys=False)[['factor']].rank()
else:
ranks = factor_data[['factor']]
asset_factor_rank = ranks.reset_index().pivot(
Expand Down Expand Up @@ -509,7 +513,7 @@ def average_cumulative_return(q_fact, demean_by):

returns_bygroup = []

for group, g_data in factor_data.groupby('group'):
for group, g_data in factor_data.groupby('group',group_keys=True):
g_fq = g_data['factor_quantile']
if group_adjust:
demean_by = g_fq # demeans at group level
Expand All @@ -521,7 +525,7 @@ def average_cumulative_return(q_fact, demean_by):
# Align cumulative return from different dates to the same index
# then compute mean and std
#
avgcumret = g_fq.groupby(g_fq).apply(
avgcumret = g_fq.groupby(g_fq,group_keys=True).apply(
average_cumulative_return, demean_by
)
avgcumret['group'] = group
Expand All @@ -534,9 +538,9 @@ def average_cumulative_return(q_fact, demean_by):

if group_adjust:
all_returns = []
for group, g_data in factor_data.groupby('group'):
for group, g_data in factor_data.groupby('group',group_keys=True):
g_fq = g_data['factor_quantile']
avgcumret = g_fq.groupby(g_fq).apply(cumulative_return, g_fq)
avgcumret = g_fq.groupby(g_fq,group_keys=True).apply(cumulative_return, g_fq)
all_returns.append(avgcumret)
q_returns = pd.concat(all_returns, axis=1)
q_returns = pd.DataFrame(
Expand All @@ -548,7 +552,7 @@ def average_cumulative_return(q_fact, demean_by):
return q_returns.unstack(level=1).stack(level=0)
elif demeaned:
fq = factor_data['factor_quantile']
return fq.groupby(fq).apply(average_cumulative_return, fq)
return fq.groupby(fq,group_keys=True).apply(average_cumulative_return, fq)
else:
fq = factor_data['factor_quantile']
return fq.groupby(fq).apply(average_cumulative_return, None)
return fq.groupby(fq, group_keys=True).apply(average_cumulative_return, None)
Loading