From dd87ff443cb0863943fed8a6af66857eb6c48072 Mon Sep 17 00:00:00 2001
From: sunshe35 <sunshe35@qq.com>
Date: Tue, 11 Jun 2024 17:19:28 +0800
Subject: [PATCH 1/3] update-pandas for vresion 2.2.2

---
 .gitignore                       |  6 +++
 jqfactor_analyzer/analyze.py     |  6 +--
 jqfactor_analyzer/performance.py | 64 +++++++++++++++++---------------
 jqfactor_analyzer/plotting.py    | 33 ++++++++--------
 jqfactor_analyzer/prepare.py     | 11 +++---
 tests/test_performance.py        |  3 +-
 6 files changed, 64 insertions(+), 59 deletions(-)

diff --git a/.gitignore b/.gitignore
index 894a44c..c8ee46d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,3 +102,9 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+# test file
+test.py
+test.ipynb 
+test?.py 
+test?.ipynb
\ No newline at end of file
diff --git a/jqfactor_analyzer/analyze.py b/jqfactor_analyzer/analyze.py
index c6d683b..b55cc3b 100644
--- a/jqfactor_analyzer/analyze.py
+++ b/jqfactor_analyzer/analyze.py
@@ -477,7 +477,7 @@ def calc_mean_information_coefficient(self, group_adjust=False, by_group=False,
         - False: 不分行业计算 IC
         by_time:
         - 'Y': 按年求均值
-        - 'M': 按月求均值
+        - 'ME': 按月求均值
         - None: 对所有日期求均值
         method:
         - 'rank': 用秩相关系数计算IC值
@@ -854,7 +854,7 @@ def ic_by_group(self):
     def ic_monthly(self):
         ic_monthly = self.calc_mean_information_coefficient(group_adjust=False,
                                                             by_group=False,
-                                                            by_time="M").copy()
+                                                            by_time="ME").copy()
         ic_monthly.index = ic_monthly.index.map(lambda x: x.strftime('%Y-%m'))
         return ic_monthly
 
@@ -1165,7 +1165,7 @@ def plot_monthly_ic_heatmap(self, group_adjust=False):
         - False: 不使用行业中性收益
         """
         ic_monthly = self.calc_mean_information_coefficient(
-            group_adjust=group_adjust, by_group=False, by_time="M"
+            group_adjust=group_adjust, by_group=False, by_time="ME"
         )
         pl.plot_monthly_ic_heatmap(ic_monthly)
 
diff --git a/jqfactor_analyzer/performance.py b/jqfactor_analyzer/performance.py
index 3db1108..f0b05b8 100644
--- a/jqfactor_analyzer/performance.py
+++ b/jqfactor_analyzer/performance.py
@@ -49,7 +49,7 @@ def src_ic(group):
         grouper.append('group')
 
     with np.errstate(divide='ignore', invalid='ignore'):
-        ic = factor_data.groupby(grouper).apply(src_ic)
+        ic = factor_data.groupby(grouper,group_keys=False).apply(src_ic)
 
     return ic
 
@@ -98,7 +98,7 @@ def mean_information_coefficient(
         ic = ic.mean()
 
     else:
-        ic = (ic.reset_index().set_index('date').groupby(grouper).mean())
+        ic = (ic.reset_index().set_index('date').groupby(grouper,group_keys=False).mean())
 
     return ic
 
@@ -137,17 +137,17 @@ def to_weights(group, is_long_short):
     if group_adjust:
         grouper.append('group')
 
-    weights = factor_data.groupby(grouper)['factor'] \
+    weights = factor_data.groupby(grouper,group_keys=False)['factor'] \
         .apply(to_weights, demeaned)
 
     if group_adjust:
-        weights = weights.groupby(level='date').apply(to_weights, False)
+        weights = weights.groupby(level='date',group_keys=False).apply(to_weights, False)
 
     weighted_returns = \
         factor_data[get_forward_returns_columns(factor_data.columns)] \
         .multiply(weights, axis=0)
 
-    returns = weighted_returns.groupby(level='date').sum()
+    returns = weighted_returns.groupby(level='date',group_keys=False).sum()
 
     return returns
 
@@ -178,7 +178,7 @@ def factor_alpha_beta(factor_data, demeaned=True, group_adjust=False):
 
     returns = factor_returns(factor_data, demeaned, group_adjust)
 
-    universe_ret = factor_data.groupby(level='date')[
+    universe_ret = factor_data.groupby(level='date',group_keys=False)[
         get_forward_returns_columns(factor_data.columns)] \
         .mean().loc[returns.index]
 
@@ -233,8 +233,7 @@ def cumulative_returns(returns, period):
     def split_portfolio(ret, period):
         return pd.DataFrame(np.diag(ret))
 
-    sub_portfolios = returns.groupby(
-        np.arange(len(returns.index)) // period, axis=0
+    sub_portfolios = returns.groupby(np.arange(len(returns.index)) // period, axis=0,group_keys=False
     ).apply(split_portfolio, period)
     sub_portfolios.index = returns.index
 
@@ -264,22 +263,27 @@ def weighted_mean_return(factor_data, grouper):
     """计算(年化)加权平均/标准差"""
     forward_returns_columns = get_forward_returns_columns(factor_data.columns)
 
-    def agg(values, weights):
-        count = len(values)
-        average = np.average(values, weights=weights, axis=0)
+
+    def agg(df):
+        count = df.shape[0]
+        average = np.average(df.iloc[:,:-1], weights=df.iloc[:,-1], axis=0)
         # Fast and numerically precise
-        variance = np.average(
-            (values - average)**2, weights=weights, axis=0
+        variance = np.average((df.iloc[:,:-1] - average)**2, weights=df.iloc[:,-1], axis=0
         ) * count / max((count - 1), 1)
-        return pd.Series(
-            [average, np.sqrt(variance), count], index=['mean', 'std', 'count']
-        )
-
-    group_stats = factor_data.groupby(grouper)[
-        forward_returns_columns.append(pd.Index(['weights']))] \
-        .apply(lambda x: x[forward_returns_columns].apply(
-            agg, weights=x['weights'].fillna(0.0).values
-        ))
+        
+        # return format
+        _col_list = df.columns.to_list()[:-1]
+        ser_mean = pd.Series(average, index=_col_list, name='mean')
+        ser_std = pd.Series(np.sqrt(variance), index=_col_list,name='std')
+        ser_count = pd.Series(count, index=_col_list,name='count')
+        df_agg = pd.concat([ser_mean,ser_std, ser_count],axis=1).T
+        return df_agg
+    
+    factor_data2 = factor_data.copy()
+    factor_data2['weights'] = factor_data2['weights'].fillna(0)
+    col_list = forward_returns_columns.to_list()+['weights']
+    group_stats = factor_data2.groupby(grouper)[col_list].apply(agg)
+        
 
     mean_ret = group_stats.xs('mean', level=-1)
 
@@ -404,7 +408,7 @@ def quantile_turnover(quantile_factor, quantile, period=1):
 
     quant_names = quantile_factor[quantile_factor == quantile]
     quant_name_sets = quant_names.groupby(
-        level=['date']
+        level=['date'],group_keys=False
     ).apply(lambda x: set(x.index.get_level_values('asset')))
     new_names = (quant_name_sets - quant_name_sets.shift(period)).dropna()
     quant_turnover = new_names.apply(lambda x: len(x)) / quant_name_sets.apply(
@@ -437,7 +441,7 @@ def factor_autocorrelation(factor_data, period=1, rank=True):
     grouper = [factor_data.index.get_level_values('date')]
 
     if rank:
-        ranks = factor_data.groupby(grouper)[['factor']].rank()
+        ranks = factor_data.groupby(grouper,group_keys=False)[['factor']].rank()
     else:
         ranks = factor_data[['factor']]
     asset_factor_rank = ranks.reset_index().pivot(
@@ -509,7 +513,7 @@ def average_cumulative_return(q_fact, demean_by):
 
         returns_bygroup = []
 
-        for group, g_data in factor_data.groupby('group'):
+        for group, g_data in factor_data.groupby('group',group_keys=True):
             g_fq = g_data['factor_quantile']
             if group_adjust:
                 demean_by = g_fq  # demeans at group level
@@ -521,7 +525,7 @@ def average_cumulative_return(q_fact, demean_by):
             # Align cumulative return from different dates to the same index
             # then compute mean and std
             #
-            avgcumret = g_fq.groupby(g_fq).apply(
+            avgcumret = g_fq.groupby(g_fq,group_keys=True).apply(
                 average_cumulative_return, demean_by
             )
             avgcumret['group'] = group
@@ -534,9 +538,9 @@ def average_cumulative_return(q_fact, demean_by):
 
         if group_adjust:
             all_returns = []
-            for group, g_data in factor_data.groupby('group'):
+            for group, g_data in factor_data.groupby('group',group_keys=True):
                 g_fq = g_data['factor_quantile']
-                avgcumret = g_fq.groupby(g_fq).apply(cumulative_return, g_fq)
+                avgcumret = g_fq.groupby(g_fq,group_keys=True).apply(cumulative_return, g_fq)
                 all_returns.append(avgcumret)
             q_returns = pd.concat(all_returns, axis=1)
             q_returns = pd.DataFrame(
@@ -548,7 +552,7 @@ def average_cumulative_return(q_fact, demean_by):
             return q_returns.unstack(level=1).stack(level=0)
         elif demeaned:
             fq = factor_data['factor_quantile']
-            return fq.groupby(fq).apply(average_cumulative_return, fq)
+            return fq.groupby(fq,group_keys=True).apply(average_cumulative_return, fq)
         else:
             fq = factor_data['factor_quantile']
-            return fq.groupby(fq).apply(average_cumulative_return, None)
+            return fq.groupby(fq, group_keys=True).apply(average_cumulative_return, None)
diff --git a/jqfactor_analyzer/plotting.py b/jqfactor_analyzer/plotting.py
index d7135f0..18bdc22 100644
--- a/jqfactor_analyzer/plotting.py
+++ b/jqfactor_analyzer/plotting.py
@@ -26,8 +26,7 @@
 
 
 def plot_returns_table(alpha_beta, mean_ret_quantile, mean_ret_spread_quantile):
-    returns_table = pd.DataFrame()
-    returns_table = returns_table.append(alpha_beta)
+    returns_table = alpha_beta.copy()
     returns_table.loc["Mean Period Wise Return Top Quantile (bps)"] = \
         mean_ret_quantile.iloc[-1] * DECIMAL_TO_BPS
     returns_table.loc["Mean Period Wise Return Bottom Quantile (bps)"] = \
@@ -42,11 +41,11 @@ def plot_returns_table(alpha_beta, mean_ret_quantile, mean_ret_spread_quantile):
 def plot_turnover_table(autocorrelation_data, quantile_turnover):
     turnover_table = pd.DataFrame()
     for period in sorted(quantile_turnover.keys()):
-        for quantile, p_data in quantile_turnover[period].iteritems():
+        for quantile, p_data in quantile_turnover[period].items():
             turnover_table.loc["Quantile {} Mean Turnover ".format(quantile),
                                "{}".format(period)] = p_data.mean()
     auto_corr = pd.DataFrame()
-    for period, p_data in autocorrelation_data.iteritems():
+    for period, p_data in autocorrelation_data.items():
         auto_corr.loc["Mean Factor Rank Autocorrelation", "{}"
                       .format(period)] = p_data.mean()
 
@@ -71,8 +70,7 @@ def plot_information_table(ic_data):
 
 
 def plot_quantile_statistics_table(factor_data):
-    quantile_stats = factor_data.groupby('factor_quantile') \
-        .agg(['min', 'max', 'mean', 'std', 'count'])['factor']
+    quantile_stats = factor_data.groupby('factor_quantile',group_keys=False)['factor'].agg(['min', 'max', 'mean', 'std', 'count'])
     quantile_stats['count %'] = quantile_stats['count'] \
         / quantile_stats['count'].sum() * 100.
 
@@ -91,7 +89,7 @@ def plot_ic_ts(ic, ax=None):
         ax = np.asarray([ax]).flatten()
 
     ymin, ymax = (None, None)
-    for a, (period, ic) in zip(ax, ic.iteritems()):
+    for a, (period, ic) in zip(ax, ic.items()):
         period_num = period.replace('period_', '')
         ic.plot(alpha=0.7, ax=a, lw=0.7, color='steelblue')
         rolling_mean(
@@ -143,7 +141,7 @@ def plot_ic_hist(ic, ax=None):
         f, ax = plt.subplots(v_spaces, 3, figsize=(18, v_spaces * 6))
         ax = ax.flatten()
 
-    for a, (period, ic) in zip(ax, ic.iteritems()):
+    for a, (period, ic) in zip(ax, ic.items()):
         period_num = period.replace('period_', '')
         sns.distplot(ic.replace(np.nan, 0.), norm_hist=True, ax=a)
         a.set_xlim([-1, 1])
@@ -190,7 +188,7 @@ def plot_ic_qq(ic, theoretical_dist=stats.norm, ax=None):
     else:
         dist_name = ICQQ.get("CUSTOM")
 
-    for a, (period, ic) in zip(ax, ic.iteritems()):
+    for a, (period, ic) in zip(ax, ic.items()):
         period_num = period.replace('period_', '')
         qqplot(
             ic.replace(np.nan, 0.).values,
@@ -254,7 +252,7 @@ def plot_quantile_returns_bar(
             )
             ax = ax.flatten()
 
-        for a, (sc, cor) in zip(ax, mean_ret_by_q.groupby(level='group')):
+        for a, (sc, cor) in zip(ax, mean_ret_by_q.groupby(level='group',group_keys=False)):
             (
                 cor.xs(sc, level='group').multiply(DECIMAL_TO_BPS).plot(
                     kind='bar', title=sc, ax=a
@@ -352,8 +350,7 @@ def plot_mean_quantile_returns_spread_time_series(
             ax = [None for a in mean_returns_spread.columns]
 
         ymin, ymax = (None, None)
-        for (i, a), (name, fr_column
-                     ) in zip(enumerate(ax), mean_returns_spread.iteritems()):
+        for (i, a), (name, fr_column) in zip(enumerate(ax), mean_returns_spread.items()):
             stdn = None if std_err is None else std_err[name]
             a = plot_mean_quantile_returns_spread_time_series(
                 fr_column, std_err=stdn, bandwidth=bandwidth, ax=a
@@ -504,7 +501,7 @@ def plot_monthly_ic_heatmap(mean_monthly_ic, ax=None):
         [new_index_year, new_index_month], names=["year", "month"]
     )
 
-    for a, (period, ic) in zip(ax, mean_monthly_ic.iteritems()):
+    for a, (period, ic) in zip(ax, mean_monthly_ic.items()):
         periods_num = period.replace('period_', '')
 
         sns.heatmap(
@@ -630,7 +627,7 @@ def plot_quantile_average_cumulative_return(
             ax = ax.flatten()
 
         for i, (quantile, q_ret) in enumerate(
-            avg_cumulative_returns.groupby(level='factor_quantile')
+            avg_cumulative_returns.groupby(level='factor_quantile',group_keys=False)
         ):
 
             mean = q_ret.loc[(quantile, 'mean')]
@@ -659,7 +656,7 @@ def plot_quantile_average_cumulative_return(
             f, ax = plt.subplots(1, 1, figsize=(18, 6))
 
         for i, (quantile, q_ret) in enumerate(
-            avg_cumulative_returns.groupby(level='factor_quantile')
+            avg_cumulative_returns.groupby(level='factor_quantile',group_keys=False)
         ):
 
             mean = q_ret.loc[(quantile, 'mean')]
@@ -702,7 +699,7 @@ def plot_events_distribution(events, num_days=5, full_dates=None, ax=None):
     grouper_label = group.drop_duplicates()
     grouper = group.reindex(events.index.get_level_values('date'))
 
-    count = events.groupby(grouper.values).count()
+    count = events.groupby(grouper.values,group_keys=False).count()
     count = count.reindex(grouper_label.values, fill_value=0)
     count.index = grouper_label.index.map(lambda x: x.strftime('%Y-%m-%d'))
     count.plot(kind="bar", grid=False, ax=ax)
@@ -742,7 +739,7 @@ def plot_missing_events_distribution(
     if full_dates is None:
         full_dates = events.index.get_level_values('date').unique()
 
-    daily_count = events.groupby(level='date').count()
+    daily_count = events.groupby(level='date',group_keys=False).count()
     most_common_count = np.argmax(np.bincount(daily_count))
     daily_missing = daily_count / most_common_count - 1
     daily_missing = daily_missing.reindex(full_dates, fill_value=-1.0)
@@ -750,7 +747,7 @@ def plot_missing_events_distribution(
     grouper = pd.Series(range(len(full_dates)), index=full_dates) // num_days
     grouper_label = grouper.drop_duplicates()
 
-    missing = daily_missing.groupby(grouper.values).mean()
+    missing = daily_missing.groupby(grouper.values,group_keys=False).mean()
     missing = missing.reindex(grouper_label.values, fill_value=-1.0)
     missing.index = grouper_label.index.map(lambda x: x.strftime('%Y-%m-%d'))
     missing.plot(kind="bar", grid=False, ax=ax)
diff --git a/jqfactor_analyzer/prepare.py b/jqfactor_analyzer/prepare.py
index 3cbf3f8..813fe73 100644
--- a/jqfactor_analyzer/prepare.py
+++ b/jqfactor_analyzer/prepare.py
@@ -84,7 +84,7 @@ def quantile_calc(x, _quantiles, _bins, _zero_aware, _no_raise):
             raise ValueError('只有输入了 groupby 参数时 binning_by_group 才能为 True')
         grouper.append('group')
 
-    factor_quantile = factor_data.groupby(grouper)['factor'] \
+    factor_quantile = factor_data.groupby(grouper,group_keys=False)['factor'] \
         .apply(quantile_calc, quantiles, bins, zero_aware, no_raise)
     factor_quantile.name = 'factor_quantile'
 
@@ -168,7 +168,7 @@ def demean_forward_returns(factor_data, grouper=None):
 
     cols = get_forward_returns_columns(factor_data.columns)
     factor_data[cols] = factor_data.groupby(
-        grouper, as_index=False
+        grouper, as_index=False,group_keys=False
     )[cols.append(pd.Index(['weights']))].apply(
         lambda x: x[cols].subtract(
             np.average(x[cols], axis=0, weights=x['weights'].fillna(0.0).values),
@@ -300,7 +300,7 @@ def get_clean_factor(factor,
     if 'weights' in merged_data.columns:
         merged_data['weights'] = merged_data.set_index(
             'factor_quantile', append=True
-        ).groupby(level=['date', 'factor_quantile'])['weights'].apply(
+        ).groupby(level=['date', 'factor_quantile'],group_keys=False)['weights'].apply(
             lambda s: s.divide(s.sum())
         ).reset_index('factor_quantile', drop=True)
 
@@ -410,7 +410,7 @@ def common_start_returns(
 
     all_returns = []
 
-    for timestamp, df in factor.groupby(level='date'):
+    for timestamp, df in factor.groupby(level='date',group_keys=False):
 
         equities = df.index.get_level_values('asset')
 
@@ -428,8 +428,7 @@ def common_start_returns(
                 .index.get_level_values('asset')
             equities_slice |= set(demean_equities)
 
-        series = returns.loc[returns.
-                             index[starting_index:ending_index], equities_slice]
+        series = returns.loc[returns.index[starting_index:ending_index], list(equities_slice)]
         series.index = range(
             starting_index - day_zero_index, ending_index - day_zero_index
         )
diff --git a/tests/test_performance.py b/tests/test_performance.py
index 2384205..ebf5f98 100644
--- a/tests/test_performance.py
+++ b/tests/test_performance.py
@@ -76,8 +76,7 @@ def test_information_coefficient(factor_data,
          dr, [-1., -1.]),
         (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, False, 'W',
          pd.DatetimeIndex(['2015-01-04'], name='date', freq='W-SUN'), [1.]),
-        (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, True, None,
-         pd.Int64Index([1, 2], name='group'), [1., 1.]),
+        (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, True, None, pd.Index([1, 2],name='group', dtype='int64'), [1., 1.]),
         (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, True, 'W',
          pd.MultiIndex.from_product(
              [pd.DatetimeIndex(['2015-01-04'], name='date', freq='W-SUN'),

From 24c50102bcb48eb610d8d30360b61b59e67e3f32 Mon Sep 17 00:00:00 2001
From: sunshe35 <sunshe35@qq.com>
Date: Sat, 15 Jun 2024 22:23:28 +0800
Subject: [PATCH 2/3] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?=E5=85=B6=E4=BB=96=E4=B8=8D=E5=85=BC=E5=AE=B9bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 jqfactor_analyzer/analyze.py | 19 +++++++++++++++----
 jqfactor_analyzer/prepare.py | 10 ++++------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/jqfactor_analyzer/analyze.py b/jqfactor_analyzer/analyze.py
index b55cc3b..0a77a5c 100644
--- a/jqfactor_analyzer/analyze.py
+++ b/jqfactor_analyzer/analyze.py
@@ -608,7 +608,12 @@ def _calc_ic_mean_n_day_lag(self, n, group_adjust=False, by_group=False, method=
             group_adjust=group_adjust, by_group=by_group,
             method=method
         )
-        return ac.mean(level=('group' if by_group else None))
+        if by_group:
+            _mean = ac.groupby(level='group').mean()
+        else:
+            _mean = ac.mean()
+        # return ac.mean(level=('group' if by_group else None))
+        return _mean
 
     def calc_ic_mean_n_days_lag(self, n=10, group_adjust=False, by_group=False, method=None):
         """滞后 0 - n 天因子收益信息比率(IC)的均值
@@ -627,9 +632,15 @@ def calc_ic_mean_n_days_lag(self, n=10, group_adjust=False, by_group=False, meth
         - 'rank': 用秩相关系数计算IC值
         - 'normal': 用普通相关系数计算IC值
         """
-        ic_mean = [self.calc_factor_information_coefficient(
-            group_adjust=group_adjust, by_group=by_group, method=method,
-        ).mean(level=('group' if by_group else None))]
+        df_coef = self.calc_factor_information_coefficient(group_adjust=group_adjust, by_group=by_group, method=method)
+        if by_group:
+            _mean = df_coef.groupby(level='group').mean()
+        else:
+            _mean = df_coef.mean()
+        ic_mean = [_mean]
+        # ic_mean = [self.calc_factor_information_coefficient(
+        #     group_adjust=group_adjust, by_group=by_group, method=method,
+        # ).mean(level=('group' if by_group else None))]
 
         for lag in range(1, n + 1):
             ic_mean.append(self._calc_ic_mean_n_day_lag(
diff --git a/jqfactor_analyzer/prepare.py b/jqfactor_analyzer/prepare.py
index 813fe73..228fadc 100644
--- a/jqfactor_analyzer/prepare.py
+++ b/jqfactor_analyzer/prepare.py
@@ -58,12 +58,10 @@ def quantize_factor(
     def quantile_calc(x, _quantiles, _bins, _zero_aware, _no_raise):
         try:
             if _quantiles is not None and _bins is None and not _zero_aware:
-                return pd.qcut(x, _quantiles, labels=False) + 1
+                return pd.qcut(x, _quantiles, labels=False, duplicates='drop') + 1
             elif _quantiles is not None and _bins is None and _zero_aware:
-                pos_quantiles = pd.qcut(x[x >= 0], _quantiles // 2,
-                                        labels=False) + _quantiles // 2 + 1
-                neg_quantiles = pd.qcut(x[x < 0], _quantiles // 2,
-                                        labels=False) + 1
+                pos_quantiles = pd.qcut(x[x >= 0], _quantiles // 2,labels=False, duplicates='drop') + _quantiles // 2 + 1
+                neg_quantiles = pd.qcut(x[x < 0], _quantiles // 2,labels=False, duplicates='drop') + 1
                 return pd.concat([pos_quantiles, neg_quantiles]).sort_index()
             elif _bins is not None and _quantiles is None and not _zero_aware:
                 return pd.cut(x, _bins, labels=False) + 1
@@ -117,7 +115,7 @@ def compute_forward_returns(factor,
     """
 
     factor_dateindex = factor.index.levels[0]
-    factor_dateindex = factor_dateindex.intersection(prices.index)
+    factor_dateindex = pd.to_datetime(factor_dateindex).intersection(prices.index)
 
     if len(factor_dateindex) == 0:
         raise ValueError("Factor and prices indices don't match: make sure "

From 721029ed3c88141e391a0e8d7fc3a299bf2bb68c Mon Sep 17 00:00:00 2001
From: sunshe35 <sunshe35@qq.com>
Date: Wed, 19 Jun 2024 17:00:17 +0800
Subject: [PATCH 3/3] =?UTF-8?q?=E6=9B=B4=E6=96=B0get=5Findustry=E9=80=BB?=
 =?UTF-8?q?=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 jqfactor_analyzer/data.py     | 35 ++++++++++++++++++++++++-----------
 jqfactor_analyzer/plotting.py |  3 ++-
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/jqfactor_analyzer/data.py b/jqfactor_analyzer/data.py
index ba1a02f..184054c 100644
--- a/jqfactor_analyzer/data.py
+++ b/jqfactor_analyzer/data.py
@@ -187,6 +187,9 @@ def _get_price(self, securities, start_date=None, end_date=None, count=None,
 
     def get_prices(self, securities, start_date=None, end_date=None,
                    period=None):
+        '''
+        调用jqdatasdk.get_price函数，并转换成因子格式，{index:day,column:code,values:price}
+        '''
         if period is not None:
             trade_days = self._get_trade_days(start_date=end_date)
             if len(trade_days):
@@ -202,18 +205,18 @@ def get_prices(self, securities, start_date=None, end_date=None,
     def _get_industry(self, securities, start_date, end_date,
                       industry='jq_l1'):
         trade_days = self._get_trade_days(start_date, end_date)
-        industries = map(partial(self.api.get_industry, securities), trade_days)
-
+        
+        day_ind_dict = {day:self.api.get_industry(securities,day) for day in trade_days}
         industries = {
-            d: {
-                s: ind.get(s).get(industry, dict()).get('industry_name', 'NA')
-                for s in securities
-            }
-            for d, ind in zip(trade_days, industries)
-        }
-        return pd.DataFrame(industries).T.sort_index()
+            day:{code:ind.get(industry, dict()).get('industry_name', 'NA') for code,ind in ind_dict.items()}
+            for day,ind_dict in day_ind_dict.items()}
+        df_ind = pd.DataFrame(industries).T.sort_index()
+        return df_ind 
 
     def get_groupby(self, securities, start_date, end_date):
+        '''
+        get industry func for every code and day
+        '''
         return self._get_industry(securities=securities,
                                   start_date=start_date, end_date=end_date,
                                   industry=self.industry)
@@ -279,6 +282,15 @@ def _get_average_weights(self, securities, start_date, end_date):
         return {sec: 1.0 for sec in securities}
 
     def get_weights(self, securities, start_date, end_date):
+        '''
+        计算各分位收益时, 每只股票权重, 默认为 'avg' 
+            - 'avg': 等权重
+            - 'mktcap': 按总市值加权
+            - 'ln_mktcap': 按总市值的对数加权
+            - 'cmktcap': 按流通市值加权
+            - 'ln_cmktcap': 按流通市值的对数加权
+        ''' 
+        
         start_date = date2str(start_date)
         end_date = date2str(end_date)
 
@@ -295,8 +307,9 @@ def get_weights(self, securities, start_date, end_date):
         else:
             raise ValueError('invalid weight_method')
 
-        return weight_api(securities=securities, start_date=start_date,
-                          end_date=end_date)
+
+        weights = weight_api(securities=securities, start_date=start_date,end_date=end_date)
+        return weights
 
     @property
     def apis(self):
diff --git a/jqfactor_analyzer/plotting.py b/jqfactor_analyzer/plotting.py
index 18bdc22..f7e2c05 100644
--- a/jqfactor_analyzer/plotting.py
+++ b/jqfactor_analyzer/plotting.py
@@ -143,7 +143,8 @@ def plot_ic_hist(ic, ax=None):
 
     for a, (period, ic) in zip(ax, ic.items()):
         period_num = period.replace('period_', '')
-        sns.distplot(ic.replace(np.nan, 0.), norm_hist=True, ax=a)
+        # sns.distplot(ic.replace(np.nan, 0.), norm_hist=True, ax=a)
+        sns.histplot(ic.replace(np.nan, 0.), kde=True, ax=a, stat="density")
         a.set_xlim([-1, 1])
         a.set(title=ICHIST.get("TITLE") % period_num, xlabel='IC')
         a.text(