From dd87ff443cb0863943fed8a6af66857eb6c48072 Mon Sep 17 00:00:00 2001 From: sunshe35 Date: Tue, 11 Jun 2024 17:19:28 +0800 Subject: [PATCH 1/3] update-pandas for vresion 2.2.2 --- .gitignore | 6 +++ jqfactor_analyzer/analyze.py | 6 +-- jqfactor_analyzer/performance.py | 64 +++++++++++++++++--------------- jqfactor_analyzer/plotting.py | 33 ++++++++-------- jqfactor_analyzer/prepare.py | 11 +++--- tests/test_performance.py | 3 +- 6 files changed, 64 insertions(+), 59 deletions(-) diff --git a/.gitignore b/.gitignore index 894a44c..c8ee46d 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,9 @@ venv.bak/ # mypy .mypy_cache/ + +# test file +test.py +test.ipynb +test?.py +test?.ipynb \ No newline at end of file diff --git a/jqfactor_analyzer/analyze.py b/jqfactor_analyzer/analyze.py index c6d683b..b55cc3b 100644 --- a/jqfactor_analyzer/analyze.py +++ b/jqfactor_analyzer/analyze.py @@ -477,7 +477,7 @@ def calc_mean_information_coefficient(self, group_adjust=False, by_group=False, - False: 不分行业计算 IC by_time: - 'Y': 按年求均值 - - 'M': 按月求均值 + - 'ME': 按月求均值 - None: 对所有日期求均值 method: - 'rank': 用秩相关系数计算IC值 @@ -854,7 +854,7 @@ def ic_by_group(self): def ic_monthly(self): ic_monthly = self.calc_mean_information_coefficient(group_adjust=False, by_group=False, - by_time="M").copy() + by_time="ME").copy() ic_monthly.index = ic_monthly.index.map(lambda x: x.strftime('%Y-%m')) return ic_monthly @@ -1165,7 +1165,7 @@ def plot_monthly_ic_heatmap(self, group_adjust=False): - False: 不使用行业中性收益 """ ic_monthly = self.calc_mean_information_coefficient( - group_adjust=group_adjust, by_group=False, by_time="M" + group_adjust=group_adjust, by_group=False, by_time="ME" ) pl.plot_monthly_ic_heatmap(ic_monthly) diff --git a/jqfactor_analyzer/performance.py b/jqfactor_analyzer/performance.py index 3db1108..f0b05b8 100644 --- a/jqfactor_analyzer/performance.py +++ b/jqfactor_analyzer/performance.py @@ -49,7 +49,7 @@ def src_ic(group): grouper.append('group') with np.errstate(divide='ignore', invalid='ignore'): - ic = factor_data.groupby(grouper).apply(src_ic) + ic = factor_data.groupby(grouper,group_keys=False).apply(src_ic) return ic @@ -98,7 +98,7 @@ def mean_information_coefficient( ic = ic.mean() else: - ic = (ic.reset_index().set_index('date').groupby(grouper).mean()) + ic = (ic.reset_index().set_index('date').groupby(grouper,group_keys=False).mean()) return ic @@ -137,17 +137,17 @@ def to_weights(group, is_long_short): if group_adjust: grouper.append('group') - weights = factor_data.groupby(grouper)['factor'] \ + weights = factor_data.groupby(grouper,group_keys=False)['factor'] \ .apply(to_weights, demeaned) if group_adjust: - weights = weights.groupby(level='date').apply(to_weights, False) + weights = weights.groupby(level='date',group_keys=False).apply(to_weights, False) weighted_returns = \ factor_data[get_forward_returns_columns(factor_data.columns)] \ .multiply(weights, axis=0) - returns = weighted_returns.groupby(level='date').sum() + returns = weighted_returns.groupby(level='date',group_keys=False).sum() return returns @@ -178,7 +178,7 @@ def factor_alpha_beta(factor_data, demeaned=True, group_adjust=False): returns = factor_returns(factor_data, demeaned, group_adjust) - universe_ret = factor_data.groupby(level='date')[ + universe_ret = factor_data.groupby(level='date',group_keys=False)[ get_forward_returns_columns(factor_data.columns)] \ .mean().loc[returns.index] @@ -233,8 +233,7 @@ def cumulative_returns(returns, period): def split_portfolio(ret, period): return pd.DataFrame(np.diag(ret)) - sub_portfolios = returns.groupby( - np.arange(len(returns.index)) // period, axis=0 + sub_portfolios = returns.groupby(np.arange(len(returns.index)) // period, axis=0,group_keys=False ).apply(split_portfolio, period) sub_portfolios.index = returns.index @@ -264,22 +263,27 @@ def weighted_mean_return(factor_data, grouper): """计算(年化)加权平均/标准差""" forward_returns_columns = get_forward_returns_columns(factor_data.columns) - def agg(values, weights): - count = len(values) - average = np.average(values, weights=weights, axis=0) + + def agg(df): + count = df.shape[0] + average = np.average(df.iloc[:,:-1], weights=df.iloc[:,-1], axis=0) # Fast and numerically precise - variance = np.average( - (values - average)**2, weights=weights, axis=0 + variance = np.average((df.iloc[:,:-1] - average)**2, weights=df.iloc[:,-1], axis=0 ) * count / max((count - 1), 1) - return pd.Series( - [average, np.sqrt(variance), count], index=['mean', 'std', 'count'] - ) - - group_stats = factor_data.groupby(grouper)[ - forward_returns_columns.append(pd.Index(['weights']))] \ - .apply(lambda x: x[forward_returns_columns].apply( - agg, weights=x['weights'].fillna(0.0).values - )) + + # return format + _col_list = df.columns.to_list()[:-1] + ser_mean = pd.Series(average, index=_col_list, name='mean') + ser_std = pd.Series(np.sqrt(variance), index=_col_list,name='std') + ser_count = pd.Series(count, index=_col_list,name='count') + df_agg = pd.concat([ser_mean,ser_std, ser_count],axis=1).T + return df_agg + + factor_data2 = factor_data.copy() + factor_data2['weights'] = factor_data2['weights'].fillna(0) + col_list = forward_returns_columns.to_list()+['weights'] + group_stats = factor_data2.groupby(grouper)[col_list].apply(agg) + mean_ret = group_stats.xs('mean', level=-1) @@ -404,7 +408,7 @@ def quantile_turnover(quantile_factor, quantile, period=1): quant_names = quantile_factor[quantile_factor == quantile] quant_name_sets = quant_names.groupby( - level=['date'] + level=['date'],group_keys=False ).apply(lambda x: set(x.index.get_level_values('asset'))) new_names = (quant_name_sets - quant_name_sets.shift(period)).dropna() quant_turnover = new_names.apply(lambda x: len(x)) / quant_name_sets.apply( @@ -437,7 +441,7 @@ def factor_autocorrelation(factor_data, period=1, rank=True): grouper = [factor_data.index.get_level_values('date')] if rank: - ranks = factor_data.groupby(grouper)[['factor']].rank() + ranks = factor_data.groupby(grouper,group_keys=False)[['factor']].rank() else: ranks = factor_data[['factor']] asset_factor_rank = ranks.reset_index().pivot( @@ -509,7 +513,7 @@ def average_cumulative_return(q_fact, demean_by): returns_bygroup = [] - for group, g_data in factor_data.groupby('group'): + for group, g_data in factor_data.groupby('group',group_keys=True): g_fq = g_data['factor_quantile'] if group_adjust: demean_by = g_fq # demeans at group level @@ -521,7 +525,7 @@ def average_cumulative_return(q_fact, demean_by): # Align cumulative return from different dates to the same index # then compute mean and std # - avgcumret = g_fq.groupby(g_fq).apply( + avgcumret = g_fq.groupby(g_fq,group_keys=True).apply( average_cumulative_return, demean_by ) avgcumret['group'] = group @@ -534,9 +538,9 @@ def average_cumulative_return(q_fact, demean_by): if group_adjust: all_returns = [] - for group, g_data in factor_data.groupby('group'): + for group, g_data in factor_data.groupby('group',group_keys=True): g_fq = g_data['factor_quantile'] - avgcumret = g_fq.groupby(g_fq).apply(cumulative_return, g_fq) + avgcumret = g_fq.groupby(g_fq,group_keys=True).apply(cumulative_return, g_fq) all_returns.append(avgcumret) q_returns = pd.concat(all_returns, axis=1) q_returns = pd.DataFrame( @@ -548,7 +552,7 @@ def average_cumulative_return(q_fact, demean_by): return q_returns.unstack(level=1).stack(level=0) elif demeaned: fq = factor_data['factor_quantile'] - return fq.groupby(fq).apply(average_cumulative_return, fq) + return fq.groupby(fq,group_keys=True).apply(average_cumulative_return, fq) else: fq = factor_data['factor_quantile'] - return fq.groupby(fq).apply(average_cumulative_return, None) + return fq.groupby(fq, group_keys=True).apply(average_cumulative_return, None) diff --git a/jqfactor_analyzer/plotting.py b/jqfactor_analyzer/plotting.py index d7135f0..18bdc22 100644 --- a/jqfactor_analyzer/plotting.py +++ b/jqfactor_analyzer/plotting.py @@ -26,8 +26,7 @@ def plot_returns_table(alpha_beta, mean_ret_quantile, mean_ret_spread_quantile): - returns_table = pd.DataFrame() - returns_table = returns_table.append(alpha_beta) + returns_table = alpha_beta.copy() returns_table.loc["Mean Period Wise Return Top Quantile (bps)"] = \ mean_ret_quantile.iloc[-1] * DECIMAL_TO_BPS returns_table.loc["Mean Period Wise Return Bottom Quantile (bps)"] = \ @@ -42,11 +41,11 @@ def plot_returns_table(alpha_beta, mean_ret_quantile, mean_ret_spread_quantile): def plot_turnover_table(autocorrelation_data, quantile_turnover): turnover_table = pd.DataFrame() for period in sorted(quantile_turnover.keys()): - for quantile, p_data in quantile_turnover[period].iteritems(): + for quantile, p_data in quantile_turnover[period].items(): turnover_table.loc["Quantile {} Mean Turnover ".format(quantile), "{}".format(period)] = p_data.mean() auto_corr = pd.DataFrame() - for period, p_data in autocorrelation_data.iteritems(): + for period, p_data in autocorrelation_data.items(): auto_corr.loc["Mean Factor Rank Autocorrelation", "{}" .format(period)] = p_data.mean() @@ -71,8 +70,7 @@ def plot_information_table(ic_data): def plot_quantile_statistics_table(factor_data): - quantile_stats = factor_data.groupby('factor_quantile') \ - .agg(['min', 'max', 'mean', 'std', 'count'])['factor'] + quantile_stats = factor_data.groupby('factor_quantile',group_keys=False)['factor'].agg(['min', 'max', 'mean', 'std', 'count']) quantile_stats['count %'] = quantile_stats['count'] \ / quantile_stats['count'].sum() * 100. @@ -91,7 +89,7 @@ def plot_ic_ts(ic, ax=None): ax = np.asarray([ax]).flatten() ymin, ymax = (None, None) - for a, (period, ic) in zip(ax, ic.iteritems()): + for a, (period, ic) in zip(ax, ic.items()): period_num = period.replace('period_', '') ic.plot(alpha=0.7, ax=a, lw=0.7, color='steelblue') rolling_mean( @@ -143,7 +141,7 @@ def plot_ic_hist(ic, ax=None): f, ax = plt.subplots(v_spaces, 3, figsize=(18, v_spaces * 6)) ax = ax.flatten() - for a, (period, ic) in zip(ax, ic.iteritems()): + for a, (period, ic) in zip(ax, ic.items()): period_num = period.replace('period_', '') sns.distplot(ic.replace(np.nan, 0.), norm_hist=True, ax=a) a.set_xlim([-1, 1]) @@ -190,7 +188,7 @@ def plot_ic_qq(ic, theoretical_dist=stats.norm, ax=None): else: dist_name = ICQQ.get("CUSTOM") - for a, (period, ic) in zip(ax, ic.iteritems()): + for a, (period, ic) in zip(ax, ic.items()): period_num = period.replace('period_', '') qqplot( ic.replace(np.nan, 0.).values, @@ -254,7 +252,7 @@ def plot_quantile_returns_bar( ) ax = ax.flatten() - for a, (sc, cor) in zip(ax, mean_ret_by_q.groupby(level='group')): + for a, (sc, cor) in zip(ax, mean_ret_by_q.groupby(level='group',group_keys=False)): ( cor.xs(sc, level='group').multiply(DECIMAL_TO_BPS).plot( kind='bar', title=sc, ax=a @@ -352,8 +350,7 @@ def plot_mean_quantile_returns_spread_time_series( ax = [None for a in mean_returns_spread.columns] ymin, ymax = (None, None) - for (i, a), (name, fr_column - ) in zip(enumerate(ax), mean_returns_spread.iteritems()): + for (i, a), (name, fr_column) in zip(enumerate(ax), mean_returns_spread.items()): stdn = None if std_err is None else std_err[name] a = plot_mean_quantile_returns_spread_time_series( fr_column, std_err=stdn, bandwidth=bandwidth, ax=a @@ -504,7 +501,7 @@ def plot_monthly_ic_heatmap(mean_monthly_ic, ax=None): [new_index_year, new_index_month], names=["year", "month"] ) - for a, (period, ic) in zip(ax, mean_monthly_ic.iteritems()): + for a, (period, ic) in zip(ax, mean_monthly_ic.items()): periods_num = period.replace('period_', '') sns.heatmap( @@ -630,7 +627,7 @@ def plot_quantile_average_cumulative_return( ax = ax.flatten() for i, (quantile, q_ret) in enumerate( - avg_cumulative_returns.groupby(level='factor_quantile') + avg_cumulative_returns.groupby(level='factor_quantile',group_keys=False) ): mean = q_ret.loc[(quantile, 'mean')] @@ -659,7 +656,7 @@ def plot_quantile_average_cumulative_return( f, ax = plt.subplots(1, 1, figsize=(18, 6)) for i, (quantile, q_ret) in enumerate( - avg_cumulative_returns.groupby(level='factor_quantile') + avg_cumulative_returns.groupby(level='factor_quantile',group_keys=False) ): mean = q_ret.loc[(quantile, 'mean')] @@ -702,7 +699,7 @@ def plot_events_distribution(events, num_days=5, full_dates=None, ax=None): grouper_label = group.drop_duplicates() grouper = group.reindex(events.index.get_level_values('date')) - count = events.groupby(grouper.values).count() + count = events.groupby(grouper.values,group_keys=False).count() count = count.reindex(grouper_label.values, fill_value=0) count.index = grouper_label.index.map(lambda x: x.strftime('%Y-%m-%d')) count.plot(kind="bar", grid=False, ax=ax) @@ -742,7 +739,7 @@ def plot_missing_events_distribution( if full_dates is None: full_dates = events.index.get_level_values('date').unique() - daily_count = events.groupby(level='date').count() + daily_count = events.groupby(level='date',group_keys=False).count() most_common_count = np.argmax(np.bincount(daily_count)) daily_missing = daily_count / most_common_count - 1 daily_missing = daily_missing.reindex(full_dates, fill_value=-1.0) @@ -750,7 +747,7 @@ def plot_missing_events_distribution( grouper = pd.Series(range(len(full_dates)), index=full_dates) // num_days grouper_label = grouper.drop_duplicates() - missing = daily_missing.groupby(grouper.values).mean() + missing = daily_missing.groupby(grouper.values,group_keys=False).mean() missing = missing.reindex(grouper_label.values, fill_value=-1.0) missing.index = grouper_label.index.map(lambda x: x.strftime('%Y-%m-%d')) missing.plot(kind="bar", grid=False, ax=ax) diff --git a/jqfactor_analyzer/prepare.py b/jqfactor_analyzer/prepare.py index 3cbf3f8..813fe73 100644 --- a/jqfactor_analyzer/prepare.py +++ b/jqfactor_analyzer/prepare.py @@ -84,7 +84,7 @@ def quantile_calc(x, _quantiles, _bins, _zero_aware, _no_raise): raise ValueError('只有输入了 groupby 参数时 binning_by_group 才能为 True') grouper.append('group') - factor_quantile = factor_data.groupby(grouper)['factor'] \ + factor_quantile = factor_data.groupby(grouper,group_keys=False)['factor'] \ .apply(quantile_calc, quantiles, bins, zero_aware, no_raise) factor_quantile.name = 'factor_quantile' @@ -168,7 +168,7 @@ def demean_forward_returns(factor_data, grouper=None): cols = get_forward_returns_columns(factor_data.columns) factor_data[cols] = factor_data.groupby( - grouper, as_index=False + grouper, as_index=False,group_keys=False )[cols.append(pd.Index(['weights']))].apply( lambda x: x[cols].subtract( np.average(x[cols], axis=0, weights=x['weights'].fillna(0.0).values), @@ -300,7 +300,7 @@ def get_clean_factor(factor, if 'weights' in merged_data.columns: merged_data['weights'] = merged_data.set_index( 'factor_quantile', append=True - ).groupby(level=['date', 'factor_quantile'])['weights'].apply( + ).groupby(level=['date', 'factor_quantile'],group_keys=False)['weights'].apply( lambda s: s.divide(s.sum()) ).reset_index('factor_quantile', drop=True) @@ -410,7 +410,7 @@ def common_start_returns( all_returns = [] - for timestamp, df in factor.groupby(level='date'): + for timestamp, df in factor.groupby(level='date',group_keys=False): equities = df.index.get_level_values('asset') @@ -428,8 +428,7 @@ def common_start_returns( .index.get_level_values('asset') equities_slice |= set(demean_equities) - series = returns.loc[returns. - index[starting_index:ending_index], equities_slice] + series = returns.loc[returns.index[starting_index:ending_index], list(equities_slice)] series.index = range( starting_index - day_zero_index, ending_index - day_zero_index ) diff --git a/tests/test_performance.py b/tests/test_performance.py index 2384205..ebf5f98 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -76,8 +76,7 @@ def test_information_coefficient(factor_data, dr, [-1., -1.]), (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, False, 'W', pd.DatetimeIndex(['2015-01-04'], name='date', freq='W-SUN'), [1.]), - (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, True, None, - pd.Int64Index([1, 2], name='group'), [1., 1.]), + (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, True, None, pd.Index([1, 2],name='group', dtype='int64'), [1., 1.]), (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, True, 'W', pd.MultiIndex.from_product( [pd.DatetimeIndex(['2015-01-04'], name='date', freq='W-SUN'), From 24c50102bcb48eb610d8d30360b61b59e67e3f32 Mon Sep 17 00:00:00 2001 From: sunshe35 Date: Sat, 15 Jun 2024 22:23:28 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E5=85=B6=E4=BB=96=E4=B8=8D=E5=85=BC=E5=AE=B9bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- jqfactor_analyzer/analyze.py | 19 +++++++++++++++---- jqfactor_analyzer/prepare.py | 10 ++++------ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/jqfactor_analyzer/analyze.py b/jqfactor_analyzer/analyze.py index b55cc3b..0a77a5c 100644 --- a/jqfactor_analyzer/analyze.py +++ b/jqfactor_analyzer/analyze.py @@ -608,7 +608,12 @@ def _calc_ic_mean_n_day_lag(self, n, group_adjust=False, by_group=False, method= group_adjust=group_adjust, by_group=by_group, method=method ) - return ac.mean(level=('group' if by_group else None)) + if by_group: + _mean = ac.groupby(level='group').mean() + else: + _mean = ac.mean() + # return ac.mean(level=('group' if by_group else None)) + return _mean def calc_ic_mean_n_days_lag(self, n=10, group_adjust=False, by_group=False, method=None): """滞后 0 - n 天因子收益信息比率(IC)的均值 @@ -627,9 +632,15 @@ def calc_ic_mean_n_days_lag(self, n=10, group_adjust=False, by_group=False, meth - 'rank': 用秩相关系数计算IC值 - 'normal': 用普通相关系数计算IC值 """ - ic_mean = [self.calc_factor_information_coefficient( - group_adjust=group_adjust, by_group=by_group, method=method, - ).mean(level=('group' if by_group else None))] + df_coef = self.calc_factor_information_coefficient(group_adjust=group_adjust, by_group=by_group, method=method) + if by_group: + _mean = df_coef.groupby(level='group').mean() + else: + _mean = df_coef.mean() + ic_mean = [_mean] + # ic_mean = [self.calc_factor_information_coefficient( + # group_adjust=group_adjust, by_group=by_group, method=method, + # ).mean(level=('group' if by_group else None))] for lag in range(1, n + 1): ic_mean.append(self._calc_ic_mean_n_day_lag( diff --git a/jqfactor_analyzer/prepare.py b/jqfactor_analyzer/prepare.py index 813fe73..228fadc 100644 --- a/jqfactor_analyzer/prepare.py +++ b/jqfactor_analyzer/prepare.py @@ -58,12 +58,10 @@ def quantize_factor( def quantile_calc(x, _quantiles, _bins, _zero_aware, _no_raise): try: if _quantiles is not None and _bins is None and not _zero_aware: - return pd.qcut(x, _quantiles, labels=False) + 1 + return pd.qcut(x, _quantiles, labels=False, duplicates='drop') + 1 elif _quantiles is not None and _bins is None and _zero_aware: - pos_quantiles = pd.qcut(x[x >= 0], _quantiles // 2, - labels=False) + _quantiles // 2 + 1 - neg_quantiles = pd.qcut(x[x < 0], _quantiles // 2, - labels=False) + 1 + pos_quantiles = pd.qcut(x[x >= 0], _quantiles // 2,labels=False, duplicates='drop') + _quantiles // 2 + 1 + neg_quantiles = pd.qcut(x[x < 0], _quantiles // 2,labels=False, duplicates='drop') + 1 return pd.concat([pos_quantiles, neg_quantiles]).sort_index() elif _bins is not None and _quantiles is None and not _zero_aware: return pd.cut(x, _bins, labels=False) + 1 @@ -117,7 +115,7 @@ def compute_forward_returns(factor, """ factor_dateindex = factor.index.levels[0] - factor_dateindex = factor_dateindex.intersection(prices.index) + factor_dateindex = pd.to_datetime(factor_dateindex).intersection(prices.index) if len(factor_dateindex) == 0: raise ValueError("Factor and prices indices don't match: make sure " From 721029ed3c88141e391a0e8d7fc3a299bf2bb68c Mon Sep 17 00:00:00 2001 From: sunshe35 Date: Wed, 19 Jun 2024 17:00:17 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E6=9B=B4=E6=96=B0get=5Findustry=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- jqfactor_analyzer/data.py | 35 ++++++++++++++++++++++++----------- jqfactor_analyzer/plotting.py | 3 ++- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/jqfactor_analyzer/data.py b/jqfactor_analyzer/data.py index ba1a02f..184054c 100644 --- a/jqfactor_analyzer/data.py +++ b/jqfactor_analyzer/data.py @@ -187,6 +187,9 @@ def _get_price(self, securities, start_date=None, end_date=None, count=None, def get_prices(self, securities, start_date=None, end_date=None, period=None): + ''' + 调用jqdatasdk.get_price函数,并转换成因子格式,{index:day,column:code,values:price} + ''' if period is not None: trade_days = self._get_trade_days(start_date=end_date) if len(trade_days): @@ -202,18 +205,18 @@ def get_prices(self, securities, start_date=None, end_date=None, def _get_industry(self, securities, start_date, end_date, industry='jq_l1'): trade_days = self._get_trade_days(start_date, end_date) - industries = map(partial(self.api.get_industry, securities), trade_days) - + + day_ind_dict = {day:self.api.get_industry(securities,day) for day in trade_days} industries = { - d: { - s: ind.get(s).get(industry, dict()).get('industry_name', 'NA') - for s in securities - } - for d, ind in zip(trade_days, industries) - } - return pd.DataFrame(industries).T.sort_index() + day:{code:ind.get(industry, dict()).get('industry_name', 'NA') for code,ind in ind_dict.items()} + for day,ind_dict in day_ind_dict.items()} + df_ind = pd.DataFrame(industries).T.sort_index() + return df_ind def get_groupby(self, securities, start_date, end_date): + ''' + get industry func for every code and day + ''' return self._get_industry(securities=securities, start_date=start_date, end_date=end_date, industry=self.industry) @@ -279,6 +282,15 @@ def _get_average_weights(self, securities, start_date, end_date): return {sec: 1.0 for sec in securities} def get_weights(self, securities, start_date, end_date): + ''' + 计算各分位收益时, 每只股票权重, 默认为 'avg' + - 'avg': 等权重 + - 'mktcap': 按总市值加权 + - 'ln_mktcap': 按总市值的对数加权 + - 'cmktcap': 按流通市值加权 + - 'ln_cmktcap': 按流通市值的对数加权 + ''' + start_date = date2str(start_date) end_date = date2str(end_date) @@ -295,8 +307,9 @@ def get_weights(self, securities, start_date, end_date): else: raise ValueError('invalid weight_method') - return weight_api(securities=securities, start_date=start_date, - end_date=end_date) + + weights = weight_api(securities=securities, start_date=start_date,end_date=end_date) + return weights @property def apis(self): diff --git a/jqfactor_analyzer/plotting.py b/jqfactor_analyzer/plotting.py index 18bdc22..f7e2c05 100644 --- a/jqfactor_analyzer/plotting.py +++ b/jqfactor_analyzer/plotting.py @@ -143,7 +143,8 @@ def plot_ic_hist(ic, ax=None): for a, (period, ic) in zip(ax, ic.items()): period_num = period.replace('period_', '') - sns.distplot(ic.replace(np.nan, 0.), norm_hist=True, ax=a) + # sns.distplot(ic.replace(np.nan, 0.), norm_hist=True, ax=a) + sns.histplot(ic.replace(np.nan, 0.), kde=True, ax=a, stat="density") a.set_xlim([-1, 1]) a.set(title=ICHIST.get("TITLE") % period_num, xlabel='IC') a.text(