From ef5f01ab703d41315ab8becb5a0b7f22000dbd9c Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sun, 3 Mar 2024 19:26:45 +0100 Subject: [PATCH 01/19] Call .map() if pandas version is 2.1.0 or greater --- src/estimagic/config.py | 19 +++++++++++--- src/estimagic/optimization/optimize_result.py | 4 +-- src/estimagic/utilities.py | 26 +++++++++++++++++++ .../visualization/estimation_table.py | 22 +++++++++------- 4 files changed, 55 insertions(+), 16 deletions(-) diff --git a/src/estimagic/config.py b/src/estimagic/config.py index dd0fcde1c..efa004724 100644 --- a/src/estimagic/config.py +++ b/src/estimagic/config.py @@ -1,4 +1,6 @@ from pathlib import Path +import pandas as pd +from packaging import version import plotly.express as px @@ -19,9 +21,9 @@ CRITERION_PENALTY_SLOPE = 0.1 CRITERION_PENALTY_CONSTANT = 100 -# ===================================================================================== +# ====================================================================================== # Check Available Packages -# ===================================================================================== +# ====================================================================================== try: from petsc4py import PETSc # noqa: F401 @@ -103,9 +105,18 @@ IS_NUMBA_INSTALLED = True -# ================================================================================= +# ====================================================================================== +# Check if pandas version is newer or equal to version 2.1.0 +# ====================================================================================== + +IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0 = version.parse( + pd.__version__ +) >= version.parse("2.1.0") + + +# ====================================================================================== # Dashboard Defaults -# ================================================================================= +# ====================================================================================== Y_RANGE_PADDING = 0.05 Y_RANGE_PADDING_UNITS = "absolute" diff --git a/src/estimagic/optimization/optimize_result.py b/src/estimagic/optimization/optimize_result.py index e991f5ebb..7ff2c18c6 100644 --- a/src/estimagic/optimization/optimize_result.py +++ b/src/estimagic/optimization/optimize_result.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from estimagic.utilities import to_pickle +from estimagic.utilities import to_pickle, pandas_df_map @dataclass @@ -128,7 +128,7 @@ def _format_convergence_report(report, algorithm): report = pd.DataFrame.from_dict(report) columns = ["one_step", "five_steps"] - table = report[columns].applymap(_format_float).astype(str) + table = pandas_df_map(report[columns], _format_float).astype(str) for col in "one_step", "five_steps": table[col] = table[col] + _create_stars(report[col]) diff --git a/src/estimagic/utilities.py b/src/estimagic/utilities.py index 4d4d119b3..81f45a0be 100644 --- a/src/estimagic/utilities.py +++ b/src/estimagic/utilities.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd from scipy.linalg import ldl, qr +from estimagic.config import IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) @@ -321,3 +322,28 @@ def get_rng(seed): else: raise TypeError("seed type must be in {None, int, numpy.random.Generator}.") return rng + + +def pandas_df_map(df, func, na_action=None, **kwargs): + """Apply a function to a Dataframe elementwise. + + pandas has depricated the .applymap() function with version 2.1.0. This function + calls either .map() (if pandas version is greater or equal to 2.1.0) or .applymap() + (if pandas version is smaller than 2.1.0). + + Args: + df (pd.DataFrame): A pandas DataFrame. + func (callable): Python function, returns a single value from a single value. + na_action (str): If 'ignore', propagate NaN values, without passing them to + func. If None, pass NaN values to func. Default is None. + **kwargs: Additional keyword arguments to pass as keywords arguments to func. + + Returns: + pd.DataFrame: Transformed DataFrame. + + """ + if IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0: + out = df.map(func, na_action=na_action, **kwargs) + else: + out = df.applymap(func, na_action=na_action, **kwargs) + return out diff --git a/src/estimagic/visualization/estimation_table.py b/src/estimagic/visualization/estimation_table.py index 48d611c7e..fb5297f87 100644 --- a/src/estimagic/visualization/estimation_table.py +++ b/src/estimagic/visualization/estimation_table.py @@ -3,6 +3,7 @@ from functools import partial from pathlib import Path from warnings import warn +from estimagic.utilities import pandas_df_map import numpy as np import pandas as pd @@ -305,7 +306,7 @@ def render_latex( ci_in_body = False if ci_in_body: - body.loc[("",)] = body.loc[("",)].applymap("{{{}}}".format).values + body.loc[("",)] = pandas_df_map(body.loc[("",)], "{{{}}}".format).values if body.columns.nlevels > 1: column_groups = body.columns.get_level_values(0) else: @@ -1383,22 +1384,23 @@ def _apply_number_format(df_raw, number_format, format_integers): if isinstance(processed_format, (list, tuple)): df_formatted = df_raw.copy(deep=True).astype("float") for formatter in processed_format[:-1]: - df_formatted = df_formatted.applymap(formatter.format).astype("float") - df_formatted = df_formatted.astype("float").applymap( - processed_format[-1].format + df_formatted = pandas_df_map(df_formatted, formatter.format).astype("float") + df_formatted = pandas_df_map( + df_formatted.astype("float"), processed_format[-1].format ) elif isinstance(processed_format, str): - df_formatted = df_raw.astype("str").applymap( - partial(_format_non_scientific_numbers, format_string=processed_format) + df_formatted = pandas_df_map( + df_raw.astype("str"), + partial(_format_non_scientific_numbers, format_string=processed_format), ) elif callable(processed_format): - df_formatted = df_raw.applymap(processed_format) + df_formatted = pandas_df_map(df_raw, processed_format) # Don't format integers: set to original value if not format_integers: - integer_locs = df_raw.applymap(_is_integer) - df_formatted[integer_locs] = ( - df_raw[integer_locs].astype(float).applymap("{:.0f}".format) + integer_locs = pandas_df_map(df_raw, _is_integer) + df_formatted[integer_locs] = pandas_df_map( + df_raw[integer_locs].astype(float), "{:.0f}".format ) return df_formatted From 5b0be837aaeff876addb8bb42696e8f6e5a646af Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 09:41:26 +0100 Subject: [PATCH 02/19] Add compat.py module --- src/estimagic/compat.py | 33 +++++++++++++++++++ src/estimagic/optimization/optimize_result.py | 3 +- src/estimagic/utilities.py | 26 --------------- .../visualization/estimation_table.py | 2 +- 4 files changed, 36 insertions(+), 28 deletions(-) create mode 100644 src/estimagic/compat.py diff --git a/src/estimagic/compat.py b/src/estimagic/compat.py new file mode 100644 index 000000000..c73970332 --- /dev/null +++ b/src/estimagic/compat.py @@ -0,0 +1,33 @@ +"""Compatibility module. + +Contains wrapper functions to handle compatibility issues between different versions of +external libraries. + +""" + +from estimagic.config import IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0 + + +def pandas_df_map(df, func, na_action=None, **kwargs): + """Apply a function to a Dataframe elementwise. + + pandas has depricated the .applymap() function with version 2.1.0. This function + calls either .map() (if pandas version is greater or equal to 2.1.0) or .applymap() + (if pandas version is smaller than 2.1.0). + + Args: + df (pd.DataFrame): A pandas DataFrame. + func (callable): Python function, returns a single value from a single value. + na_action (str): If 'ignore', propagate NaN values, without passing them to + func. If None, pass NaN values to func. Default is None. + **kwargs: Additional keyword arguments to pass as keywords arguments to func. + + Returns: + pd.DataFrame: Transformed DataFrame. + + """ + if IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0: + out = df.map(func, na_action=na_action, **kwargs) + else: + out = df.applymap(func, na_action=na_action, **kwargs) + return out diff --git a/src/estimagic/optimization/optimize_result.py b/src/estimagic/optimization/optimize_result.py index 7ff2c18c6..ff2204d87 100644 --- a/src/estimagic/optimization/optimize_result.py +++ b/src/estimagic/optimization/optimize_result.py @@ -4,7 +4,8 @@ import numpy as np import pandas as pd -from estimagic.utilities import to_pickle, pandas_df_map +from estimagic.utilities import to_pickle +from estimagic.compat import pandas_df_map @dataclass diff --git a/src/estimagic/utilities.py b/src/estimagic/utilities.py index 81f45a0be..4d4d119b3 100644 --- a/src/estimagic/utilities.py +++ b/src/estimagic/utilities.py @@ -6,7 +6,6 @@ import numpy as np import pandas as pd from scipy.linalg import ldl, qr -from estimagic.config import IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) @@ -322,28 +321,3 @@ def get_rng(seed): else: raise TypeError("seed type must be in {None, int, numpy.random.Generator}.") return rng - - -def pandas_df_map(df, func, na_action=None, **kwargs): - """Apply a function to a Dataframe elementwise. - - pandas has depricated the .applymap() function with version 2.1.0. This function - calls either .map() (if pandas version is greater or equal to 2.1.0) or .applymap() - (if pandas version is smaller than 2.1.0). - - Args: - df (pd.DataFrame): A pandas DataFrame. - func (callable): Python function, returns a single value from a single value. - na_action (str): If 'ignore', propagate NaN values, without passing them to - func. If None, pass NaN values to func. Default is None. - **kwargs: Additional keyword arguments to pass as keywords arguments to func. - - Returns: - pd.DataFrame: Transformed DataFrame. - - """ - if IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0: - out = df.map(func, na_action=na_action, **kwargs) - else: - out = df.applymap(func, na_action=na_action, **kwargs) - return out diff --git a/src/estimagic/visualization/estimation_table.py b/src/estimagic/visualization/estimation_table.py index fb5297f87..19d24c887 100644 --- a/src/estimagic/visualization/estimation_table.py +++ b/src/estimagic/visualization/estimation_table.py @@ -3,7 +3,7 @@ from functools import partial from pathlib import Path from warnings import warn -from estimagic.utilities import pandas_df_map +from estimagic.compat import pandas_df_map import numpy as np import pandas as pd From de9a346720901b40aa50b42c7621bc3b013e29e8 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 10:00:20 +0100 Subject: [PATCH 03/19] Ignore deprecation warning about MemoizeJac, raised in cyipopt --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 5a0d5721f..59e53025f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,7 @@ filterwarnings = [ "ignore:Method .ptp is deprecated and will be removed in a future version. Use numpy.ptp instead.", "ignore:In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only", "ignore:Please use `MemoizeJac` from the `scipy.optimize` namespace", + "ignore:DeprecationWarning: `scipy.optimize.optimize.MemoizeJac` is deprecated", "ignore:Some algorithms did not converge. Their walltime has been set to a very high value instead of infinity because Timedeltas do notsupport infinite values", "ignore:In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences", "ignore:distutils Version classes are deprecated. Use packaging.version instead", From 5fc72cb37ee381ac10eaedb9bc14eab458fdf6bc Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 10:08:47 +0100 Subject: [PATCH 04/19] Use np.prod instead of np.product --- src/estimagic/parameters/block_trees.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/estimagic/parameters/block_trees.py b/src/estimagic/parameters/block_trees.py index 526fd276b..b75095e80 100644 --- a/src/estimagic/parameters/block_trees.py +++ b/src/estimagic/parameters/block_trees.py @@ -37,8 +37,8 @@ def matrix_to_block_tree(matrix, outer_tree, inner_tree): shapes_outer = [np.shape(a) for a in flat_outer_np] shapes_inner = [np.shape(a) for a in flat_inner_np] - block_bounds_outer = np.cumsum([int(np.product(s)) for s in shapes_outer[:-1]]) - block_bounds_inner = np.cumsum([int(np.product(s)) for s in shapes_inner[:-1]]) + block_bounds_outer = np.cumsum([int(np.prod(s)) for s in shapes_outer[:-1]]) + block_bounds_inner = np.cumsum([int(np.prod(s)) for s in shapes_inner[:-1]]) blocks = [] for leaf_outer, s1, submat in zip( @@ -94,8 +94,8 @@ def hessian_to_block_tree(hessian, f_tree, params_tree): shapes_f = [np.shape(a) for a in flat_f_np] shapes_p = [np.shape(a) for a in flat_p_np] - block_bounds_f = np.cumsum([int(np.product(s)) for s in shapes_f[:-1]]) - block_bounds_p = np.cumsum([int(np.product(s)) for s in shapes_p[:-1]]) + block_bounds_f = np.cumsum([int(np.prod(s)) for s in shapes_f[:-1]]) + block_bounds_p = np.cumsum([int(np.prod(s)) for s in shapes_p[:-1]]) sub_block_trees = [] for s0, subarr in zip(shapes_f, np.split(hessian, block_bounds_f, axis=0)): From ce60facb0ace6c0d168a6c1a1f37e0d3f05c286c Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 10:16:21 +0100 Subject: [PATCH 05/19] Fix faulty ignore of deprecation warning --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 59e53025f..1452c137f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,7 +81,7 @@ filterwarnings = [ "ignore:Method .ptp is deprecated and will be removed in a future version. Use numpy.ptp instead.", "ignore:In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only", "ignore:Please use `MemoizeJac` from the `scipy.optimize` namespace", - "ignore:DeprecationWarning: `scipy.optimize.optimize.MemoizeJac` is deprecated", + "ignore:`scipy.optimize.optimize.MemoizeJac` is deprecated", "ignore:Some algorithms did not converge. Their walltime has been set to a very high value instead of infinity because Timedeltas do notsupport infinite values", "ignore:In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences", "ignore:distutils Version classes are deprecated. Use packaging.version instead", From 4ce3e40cc2d0271d4a7d3256fcf8a8ee35248be7 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 10:43:16 +0100 Subject: [PATCH 06/19] Call unique() and value_count() on Series and not list --- src/estimagic/parameters/parameter_groups.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/estimagic/parameters/parameter_groups.py b/src/estimagic/parameters/parameter_groups.py index cba3cb536..567a8b895 100644 --- a/src/estimagic/parameters/parameter_groups.py +++ b/src/estimagic/parameters/parameter_groups.py @@ -35,10 +35,10 @@ def get_params_groups_and_short_names(params, free_mask, max_group_size=8): names.append(name) # if every parameter has its own group, they should all actually be in one group - if len(pd.unique(groups)) == len(groups): + if len(pd.Series(groups).unique()) == len(groups): groups = ["Parameters"] * len(groups) - counts = pd.value_counts(groups) + counts = pd.Series(groups).value_counts() to_be_split = counts[counts > max_group_size] for group_name, n_occurrences in to_be_split.items(): split_group_names = _split_long_group( From f18dbfceaca652190717b9bef28aec8dc2a93a27 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 11:06:50 +0100 Subject: [PATCH 07/19] Do not pass maxiter option to scipy truncated newton --- src/estimagic/optimization/scipy_optimizers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/estimagic/optimization/scipy_optimizers.py b/src/estimagic/optimization/scipy_optimizers.py index 661956f27..143e5d561 100644 --- a/src/estimagic/optimization/scipy_optimizers.py +++ b/src/estimagic/optimization/scipy_optimizers.py @@ -355,7 +355,6 @@ def scipy_truncated_newton( upper_bounds, *, stopping_max_criterion_evaluations=STOPPING_MAX_CRITERION_EVALUATIONS, - stopping_max_iterations=STOPPING_MAX_ITERATIONS, convergence_absolute_criterion_tolerance=CONVERGENCE_ABSOLUTE_CRITERION_TOLERANCE, convergence_absolute_params_tolerance=CONVERGENCE_ABSOLUTE_PARAMS_TOLERANCE, convergence_absolute_gradient_tolerance=CONVERGENCE_ABSOLUTE_GRADIENT_TOLERANCE, @@ -381,7 +380,6 @@ def scipy_truncated_newton( "xtol": convergence_absolute_params_tolerance, "gtol": convergence_absolute_gradient_tolerance, "maxfun": stopping_max_criterion_evaluations, - "maxiter": stopping_max_iterations, "maxCGit": max_hess_evaluations_per_iteration, "stepmx": max_step_for_line_search, "minfev": func_min_estimate, From d3d07cbab3e5ffe46d73cf4cc9ba5ef8b8a077c2 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 11:07:07 +0100 Subject: [PATCH 08/19] Ignore deprecation of array to scalar conversion --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 1452c137f..ad7f7a157 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,6 +92,7 @@ filterwarnings = [ "ignore:Widget.widget_types is deprecated", "ignore:Widget.widgets is deprecated", "ignore:Parallelization together with", + "ignore:Conversion of an array with ndim > 0 to a scalar is deprecated", ] addopts = ["--doctest-modules"] markers = [ From 3c072c390299c2066f74006e7e288f59395e3924 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 11:28:12 +0100 Subject: [PATCH 09/19] Use ffill() instead of fillna(method=ffill) --- src/estimagic/visualization/deviation_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/estimagic/visualization/deviation_plot.py b/src/estimagic/visualization/deviation_plot.py index f7ac205d7..5756ba3ef 100644 --- a/src/estimagic/visualization/deviation_plot.py +++ b/src/estimagic/visualization/deviation_plot.py @@ -68,7 +68,7 @@ def deviation_plot( names=["problem", "algorithm", runtime_measure], ) ) - .fillna(method="ffill") + .ffill() .reset_index() ) average_deviations = ( From 7af5cb6d43fe5ad3ae26eeb8d12247bdb707c2b4 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 11:41:20 +0100 Subject: [PATCH 10/19] Use replace with categorical columns properly --- tests/inference/test_bootstrap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/inference/test_bootstrap.py b/tests/inference/test_bootstrap.py index ab17ef937..726a7d72a 100644 --- a/tests/inference/test_bootstrap.py +++ b/tests/inference/test_bootstrap.py @@ -65,9 +65,9 @@ def expected(): def seaborn_example(): out = {} - df = sns.load_dataset("exercise", index_col=0) + raw = sns.load_dataset("exercise", index_col=0) replacements = {"1 min": 1, "15 min": 15, "30 min": 30} - df = df.replace({"time": replacements}) + df = raw.assign(time=raw.time.cat.rename_categories(replacements).astype(int)) df["constant"] = 1 lower_ci = pd.Series([90.709236, 0.151193], index=["constant", "time"]) From 7345c056519fadb588d9319935f3d6799e9da63c Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 12:12:30 +0100 Subject: [PATCH 11/19] Remove usage of DataFrameGroupBy.grouper --- src/estimagic/parameters/consolidate_constraints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/estimagic/parameters/consolidate_constraints.py b/src/estimagic/parameters/consolidate_constraints.py index 875edd2c6..f71ab1cb9 100644 --- a/src/estimagic/parameters/consolidate_constraints.py +++ b/src/estimagic/parameters/consolidate_constraints.py @@ -592,7 +592,7 @@ def _drop_redundant_linear_constraints(weights, rhs): new_rhs (pd.DataFrame) """ - weights["dupl_group"] = weights.groupby(list(weights.columns)).grouper.group_info[0] + weights["dupl_group"] = weights.groupby(list(weights.columns)).ngroup() rhs["dupl_group"] = weights["dupl_group"] weights.set_index("dupl_group", inplace=True) From 07a8dd7d5c8f030a37c30d5154f554801056e9bc Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 12:20:56 +0100 Subject: [PATCH 12/19] Convert dataframe to float before assigning np.inf values --- src/estimagic/visualization/profile_plot.py | 7 +++---- tests/visualization/test_profile_plot.py | 12 ++++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/estimagic/visualization/profile_plot.py b/src/estimagic/visualization/profile_plot.py index 992b16e3f..be3fb87a3 100644 --- a/src/estimagic/visualization/profile_plot.py +++ b/src/estimagic/visualization/profile_plot.py @@ -160,13 +160,12 @@ def create_solution_times(df, runtime_measure, converged_info, return_tidy=True) problem, algorithm and runtime_measure. The values are either the number of evaluations or the walltime each algorithm needed to achieve the desired precision. If the desired precision was not achieved the value is - set to np.inf (for n_evaluations) or 7000 days (for walltime since there - no infinite value is allowed). + set to np.inf. """ solution_times = df.groupby(["problem", "algorithm"])[runtime_measure].max() - solution_times = solution_times.unstack() - solution_times[~converged_info] = np.inf + solution_times = solution_times.unstack().astype(float) + solution_times = solution_times.where(converged_info, other=np.inf) if not return_tidy: solution_times = solution_times.stack().reset_index() diff --git a/tests/visualization/test_profile_plot.py b/tests/visualization/test_profile_plot.py index 2d3a7fabb..ff1cc393a 100644 --- a/tests/visualization/test_profile_plot.py +++ b/tests/visualization/test_profile_plot.py @@ -57,8 +57,8 @@ def test_create_solution_times_n_evaluations(): ) expected = pd.DataFrame( { - "algo1": [1, 5], - "algo2": [3, np.inf], + "algo1": [1.0, 5], + "algo2": [3.0, np.inf], }, index=pd.Index(["prob1", "prob2"], name="problem"), ) @@ -95,8 +95,8 @@ def test_create_solution_times_n_batches(): ) expected = pd.DataFrame( { - "algo1": [1, 1], - "algo2": [2, np.inf], + "algo1": [1.0, 1], + "algo2": [2.0, np.inf], }, index=pd.Index(["prob1", "prob2"], name="problem"), ) @@ -131,8 +131,8 @@ def test_create_solution_times_walltime(): ) expected = pd.DataFrame( { - "algo1": [1, 5], - "algo2": [3, np.inf], + "algo1": [1.0, 5], + "algo2": [3.0, np.inf], }, index=pd.Index(["prob1", "prob2"], name="problem"), ) From 4c17ecadbe49c248abe4081693dc1b6864ac3ed6 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 12:50:47 +0100 Subject: [PATCH 13/19] Add action to CI that runs certain tests with pandas 1 --- .envs/testenv-pandas.yml | 32 ++++++++++++++++++++++++++++++++ .github/workflows/main.yml | 31 +++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 .envs/testenv-pandas.yml diff --git a/.envs/testenv-pandas.yml b/.envs/testenv-pandas.yml new file mode 100644 index 000000000..f8b7c6a28 --- /dev/null +++ b/.envs/testenv-pandas.yml @@ -0,0 +1,32 @@ +--- +name: estimagic +channels: + - conda-forge + - nodefaults +dependencies: + - jax + - pygmo + - nlopt # dev, tests + - pip # dev, tests, docs + - pytest # dev, tests + - pytest-cov # tests + - pytest-xdist # dev, tests + - statsmodels # dev, tests + - bokeh<=2.4.3 # run, tests + - click # run, tests + - cloudpickle # run, tests + - joblib # run, tests + - numpy>=1.17.0 # run, tests + - pandas<2.0.0 # run, tests + - plotly # run, tests + - pybaum >= 0.1.2 # run, tests + - scipy>=1.2.1 # run, tests + - sqlalchemy # run, tests + - tranquilo>=0.0.4 # dev, tests + - pip: # dev, tests, docs + - DFO-LS # dev, tests + - Py-BOBYQA # dev, tests + - fides==0.7.4 # dev, tests + - kaleido # dev, tests + - simoptlib==1.0.1 # dev, tests + - -e ../ diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cae534bd7..b3e4f244e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -72,6 +72,37 @@ jobs: run: | micromamba activate estimagic pytest -m "not slow and not jax" + run-tests-with-old-pandas: + name: Run tests for ${{ matrix.os}} on ${{ matrix.python-version }} with pandas 1 + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - macos-latest + - windows-latest + python-version: + - '3.9' + - '3.10' + - '3.11' + steps: + - uses: actions/checkout@v3 + - name: create build environment + uses: mamba-org/provision-with-micromamba@main + with: + environment-file: ./.envs/testenv-pandas.yml + environment-name: estimagic + cache-env: true + extra-specs: | + python=${{ matrix.python-version }} + - name: run pytest + shell: bash -l {0} + run: | + micromamba activate estimagic + pytest tests/visualization + pytest tests/parameters + pytest tests/inference code-in-docs: name: Run code snippets in documentation runs-on: ubuntu-latest From bc880628423a37af71cd3640c2c4d1685a5ec649 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 13:19:15 +0100 Subject: [PATCH 14/19] Remove unused packages from pandas test environment --- .envs/testenv-pandas.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.envs/testenv-pandas.yml b/.envs/testenv-pandas.yml index f8b7c6a28..31c7c6a30 100644 --- a/.envs/testenv-pandas.yml +++ b/.envs/testenv-pandas.yml @@ -4,8 +4,6 @@ channels: - conda-forge - nodefaults dependencies: - - jax - - pygmo - nlopt # dev, tests - pip # dev, tests, docs - pytest # dev, tests @@ -22,11 +20,5 @@ dependencies: - pybaum >= 0.1.2 # run, tests - scipy>=1.2.1 # run, tests - sqlalchemy # run, tests - - tranquilo>=0.0.4 # dev, tests - pip: # dev, tests, docs - - DFO-LS # dev, tests - - Py-BOBYQA # dev, tests - - fides==0.7.4 # dev, tests - - kaleido # dev, tests - - simoptlib==1.0.1 # dev, tests - -e ../ From f326a5b73219720b38e0c2322d7c4bbd4e7ca112 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 4 Mar 2024 13:46:02 +0100 Subject: [PATCH 15/19] Update environments --- .envs/testenv-linux.yml | 1 + .envs/testenv-others.yml | 1 + .envs/testenv-pandas.yml | 9 ++++++++- .envs/update_envs.py | 10 +++++++++- environment.yml | 1 + 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/.envs/testenv-linux.yml b/.envs/testenv-linux.yml index a4cd42372..44957d9d5 100644 --- a/.envs/testenv-linux.yml +++ b/.envs/testenv-linux.yml @@ -23,6 +23,7 @@ dependencies: - scipy>=1.2.1 # run, tests - sqlalchemy # run, tests - tranquilo>=0.0.4 # dev, tests + - seaborn # dev, tests - pip: # dev, tests, docs - DFO-LS # dev, tests - Py-BOBYQA # dev, tests diff --git a/.envs/testenv-others.yml b/.envs/testenv-others.yml index b2c78c5c0..4d3159408 100644 --- a/.envs/testenv-others.yml +++ b/.envs/testenv-others.yml @@ -22,6 +22,7 @@ dependencies: - scipy>=1.2.1 # run, tests - sqlalchemy # run, tests - tranquilo>=0.0.4 # dev, tests + - seaborn # dev, tests - pip: # dev, tests, docs - DFO-LS # dev, tests - Py-BOBYQA # dev, tests diff --git a/.envs/testenv-pandas.yml b/.envs/testenv-pandas.yml index 31c7c6a30..e98692d46 100644 --- a/.envs/testenv-pandas.yml +++ b/.envs/testenv-pandas.yml @@ -4,6 +4,7 @@ channels: - conda-forge - nodefaults dependencies: + - pandas<2.0.0 - nlopt # dev, tests - pip # dev, tests, docs - pytest # dev, tests @@ -15,10 +16,16 @@ dependencies: - cloudpickle # run, tests - joblib # run, tests - numpy>=1.17.0 # run, tests - - pandas<2.0.0 # run, tests - plotly # run, tests - pybaum >= 0.1.2 # run, tests - scipy>=1.2.1 # run, tests - sqlalchemy # run, tests + - tranquilo>=0.0.4 # dev, tests + - seaborn # dev, tests - pip: # dev, tests, docs + - DFO-LS # dev, tests + - Py-BOBYQA # dev, tests + - fides==0.7.4 # dev, tests + - kaleido # dev, tests + - simoptlib==1.0.1 # dev, tests - -e ../ diff --git a/.envs/update_envs.py b/.envs/update_envs.py index 2d9a3bf07..f0eff0d7a 100644 --- a/.envs/update_envs.py +++ b/.envs/update_envs.py @@ -34,13 +34,21 @@ def main(): test_env_others = deepcopy(test_env) test_env_others.insert(_insert_idx, " - cyipopt<=1.2.0") + ## test environment for pandas version 1 + test_env_pandas = deepcopy(test_env) + test_env_pandas = [line for line in test_env_pandas if "pandas" not in line] + test_env_pandas.insert(_insert_idx, " - pandas<2.0.0") + # create docs testing environment docs_env = [line for line in lines if _keep_line(line, "docs")] docs_env.append(" - -e ../") # add local installation # write environments - for name, env in zip(["linux", "others"], [test_env_linux, test_env_others]): + for name, env in zip( + ["linux", "others", "pandas"], + [test_env_linux, test_env_others, test_env_pandas], + ): # Specify newline to avoid wrong line endings on Windows. # See: https://stackoverflow.com/a/69869641 Path(f".envs/testenv-{name}.yml").write_text( diff --git a/environment.yml b/environment.yml index 4506cb1dd..d908fce43 100644 --- a/environment.yml +++ b/environment.yml @@ -35,6 +35,7 @@ dependencies: - sphinx-panels # docs - sphinxcontrib-bibtex # docs - tranquilo>=0.0.4 # dev, tests + - seaborn # dev, tests - pip: # dev, tests, docs - DFO-LS # dev, tests - Py-BOBYQA # dev, tests From 606dddf56e2f4fc4fad3971b846d26042eb26d42 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Tue, 5 Mar 2024 17:37:28 +0100 Subject: [PATCH 16/19] Update Python version matrix for CI tests --- .github/workflows/main.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b3e4f244e..086ddbb3d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -24,6 +24,7 @@ jobs: - '3.9' - '3.10' - '3.11' + - '3.12' steps: - uses: actions/checkout@v3 - name: create build environment @@ -57,6 +58,7 @@ jobs: - '3.9' - '3.10' - '3.11' + - '3.12' steps: - uses: actions/checkout@v3 - name: create build environment @@ -80,11 +82,7 @@ jobs: matrix: os: - ubuntu-latest - - macos-latest - - windows-latest python-version: - - '3.9' - - '3.10' - '3.11' steps: - uses: actions/checkout@v3 From 0eac8440fc667bc53df9d71c8d0797eeaf104510 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Tue, 5 Mar 2024 18:43:45 +0100 Subject: [PATCH 17/19] Implement requested changes from review --- .github/workflows/main.yml | 1 - src/estimagic/compat.py | 2 +- src/estimagic/optimization/optimize_result.py | 4 ++-- src/estimagic/parameters/parameter_groups.py | 2 +- src/estimagic/visualization/estimation_table.py | 16 ++++++++-------- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 086ddbb3d..c7914d284 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -58,7 +58,6 @@ jobs: - '3.9' - '3.10' - '3.11' - - '3.12' steps: - uses: actions/checkout@v3 - name: create build environment diff --git a/src/estimagic/compat.py b/src/estimagic/compat.py index c73970332..8bd69a71c 100644 --- a/src/estimagic/compat.py +++ b/src/estimagic/compat.py @@ -8,7 +8,7 @@ from estimagic.config import IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0 -def pandas_df_map(df, func, na_action=None, **kwargs): +def pd_df_map(df, func, na_action=None, **kwargs): """Apply a function to a Dataframe elementwise. pandas has depricated the .applymap() function with version 2.1.0. This function diff --git a/src/estimagic/optimization/optimize_result.py b/src/estimagic/optimization/optimize_result.py index ff2204d87..5b120d623 100644 --- a/src/estimagic/optimization/optimize_result.py +++ b/src/estimagic/optimization/optimize_result.py @@ -5,7 +5,7 @@ import pandas as pd from estimagic.utilities import to_pickle -from estimagic.compat import pandas_df_map +from estimagic.compat import pd_df_map @dataclass @@ -129,7 +129,7 @@ def _format_convergence_report(report, algorithm): report = pd.DataFrame.from_dict(report) columns = ["one_step", "five_steps"] - table = pandas_df_map(report[columns], _format_float).astype(str) + table = pd_df_map(report[columns], _format_float).astype(str) for col in "one_step", "five_steps": table[col] = table[col] + _create_stars(report[col]) diff --git a/src/estimagic/parameters/parameter_groups.py b/src/estimagic/parameters/parameter_groups.py index 567a8b895..75d840965 100644 --- a/src/estimagic/parameters/parameter_groups.py +++ b/src/estimagic/parameters/parameter_groups.py @@ -35,7 +35,7 @@ def get_params_groups_and_short_names(params, free_mask, max_group_size=8): names.append(name) # if every parameter has its own group, they should all actually be in one group - if len(pd.Series(groups).unique()) == len(groups): + if len(set(groups)) == len(groups): groups = ["Parameters"] * len(groups) counts = pd.Series(groups).value_counts() diff --git a/src/estimagic/visualization/estimation_table.py b/src/estimagic/visualization/estimation_table.py index 19d24c887..39dcdd6a3 100644 --- a/src/estimagic/visualization/estimation_table.py +++ b/src/estimagic/visualization/estimation_table.py @@ -3,7 +3,7 @@ from functools import partial from pathlib import Path from warnings import warn -from estimagic.compat import pandas_df_map +from estimagic.compat import pd_df_map import numpy as np import pandas as pd @@ -306,7 +306,7 @@ def render_latex( ci_in_body = False if ci_in_body: - body.loc[("",)] = pandas_df_map(body.loc[("",)], "{{{}}}".format).values + body.loc[("",)] = pd_df_map(body.loc[("",)], "{{{}}}".format).values if body.columns.nlevels > 1: column_groups = body.columns.get_level_values(0) else: @@ -1384,22 +1384,22 @@ def _apply_number_format(df_raw, number_format, format_integers): if isinstance(processed_format, (list, tuple)): df_formatted = df_raw.copy(deep=True).astype("float") for formatter in processed_format[:-1]: - df_formatted = pandas_df_map(df_formatted, formatter.format).astype("float") - df_formatted = pandas_df_map( + df_formatted = pd_df_map(df_formatted, formatter.format).astype("float") + df_formatted = pd_df_map( df_formatted.astype("float"), processed_format[-1].format ) elif isinstance(processed_format, str): - df_formatted = pandas_df_map( + df_formatted = pd_df_map( df_raw.astype("str"), partial(_format_non_scientific_numbers, format_string=processed_format), ) elif callable(processed_format): - df_formatted = pandas_df_map(df_raw, processed_format) + df_formatted = pd_df_map(df_raw, processed_format) # Don't format integers: set to original value if not format_integers: - integer_locs = pandas_df_map(df_raw, _is_integer) - df_formatted[integer_locs] = pandas_df_map( + integer_locs = pd_df_map(df_raw, _is_integer) + df_formatted[integer_locs] = pd_df_map( df_raw[integer_locs].astype(float), "{:.0f}".format ) return df_formatted From df15ada6db5d1b4c1459e8f0edffc51b8c14406d Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Wed, 6 Mar 2024 11:04:16 +0100 Subject: [PATCH 18/19] Describe why we convert to float --- src/estimagic/visualization/profile_plot.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/estimagic/visualization/profile_plot.py b/src/estimagic/visualization/profile_plot.py index be3fb87a3..7c46a0ef8 100644 --- a/src/estimagic/visualization/profile_plot.py +++ b/src/estimagic/visualization/profile_plot.py @@ -164,8 +164,9 @@ def create_solution_times(df, runtime_measure, converged_info, return_tidy=True) """ solution_times = df.groupby(["problem", "algorithm"])[runtime_measure].max() - solution_times = solution_times.unstack().astype(float) - solution_times = solution_times.where(converged_info, other=np.inf) + solution_times = solution_times.unstack() + # We convert the dtype to float to support the use of np.inf + solution_times = solution_times.astype(float).where(converged_info, other=np.inf) if not return_tidy: solution_times = solution_times.stack().reset_index() From 50c2c24bf0bd68db0f7fdfa15503d128a9ed473e Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Wed, 6 Mar 2024 15:41:43 +0100 Subject: [PATCH 19/19] Add comment to main.yml --- .github/workflows/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c7914d284..9ba44f4a9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -74,6 +74,9 @@ jobs: micromamba activate estimagic pytest -m "not slow and not jax" run-tests-with-old-pandas: + # This job is only for testing if estimagic works with older pandas versions, as + # many pandas functions we use will be deprecated in pandas 3. estimagic's behavior + # for older verions is handled in src/estimagic/compat.py. name: Run tests for ${{ matrix.os}} on ${{ matrix.python-version }} with pandas 1 runs-on: ${{ matrix.os }} strategy: