optimagic-dev · timmens · Mar 6, 2024 · Mar 3, 2024 · Mar 4, 2024 · Mar 4, 2024
diff --git a/.envs/testenv-linux.yml b/.envs/testenv-linux.yml
@@ -23,6 +23,7 @@ dependencies:
   - scipy>=1.2.1  # run, tests
   - sqlalchemy  # run, tests
   - tranquilo>=0.0.4  # dev, tests
+  - seaborn  # dev, tests
   - pip:  # dev, tests, docs
       - DFO-LS  # dev, tests
       - Py-BOBYQA  # dev, tests

diff --git a/.envs/testenv-others.yml b/.envs/testenv-others.yml
@@ -22,6 +22,7 @@ dependencies:
   - scipy>=1.2.1  # run, tests
   - sqlalchemy  # run, tests
   - tranquilo>=0.0.4  # dev, tests
+  - seaborn  # dev, tests
   - pip:  # dev, tests, docs
       - DFO-LS  # dev, tests
       - Py-BOBYQA  # dev, tests

diff --git a/.envs/testenv-pandas.yml b/.envs/testenv-pandas.yml
@@ -0,0 +1,31 @@
+---
+name: estimagic
+channels:
+  - conda-forge
+  - nodefaults
+dependencies:
+  - pandas<2.0.0
+  - nlopt  # dev, tests
+  - pip  # dev, tests, docs
+  - pytest  # dev, tests
+  - pytest-cov  # tests
+  - pytest-xdist  # dev, tests
+  - statsmodels  # dev, tests
+  - bokeh<=2.4.3  # run, tests
+  - click  # run, tests
+  - cloudpickle  # run, tests
+  - joblib  # run, tests
+  - numpy>=1.17.0  # run, tests
+  - plotly  # run, tests
+  - pybaum >= 0.1.2  # run, tests
+  - scipy>=1.2.1  # run, tests
+  - sqlalchemy  # run, tests
+  - tranquilo>=0.0.4  # dev, tests
+  - seaborn  # dev, tests
+  - pip:  # dev, tests, docs
+      - DFO-LS  # dev, tests
+      - Py-BOBYQA  # dev, tests
+      - fides==0.7.4  # dev, tests
+      - kaleido  # dev, tests
+      - simoptlib==1.0.1  # dev, tests
+      - -e ../
diff --git a/.envs/update_envs.py b/.envs/update_envs.py
@@ -34,13 +34,21 @@ def main():
     test_env_others = deepcopy(test_env)
     test_env_others.insert(_insert_idx, "  - cyipopt<=1.2.0")
 
+    ## test environment for pandas version 1
+    test_env_pandas = deepcopy(test_env)
+    test_env_pandas = [line for line in test_env_pandas if "pandas" not in line]
+    test_env_pandas.insert(_insert_idx, "  - pandas<2.0.0")
+
     # create docs testing environment
 
     docs_env = [line for line in lines if _keep_line(line, "docs")]
     docs_env.append("      - -e ../")  # add local installation
 
     # write environments
-    for name, env in zip(["linux", "others"], [test_env_linux, test_env_others]):
+    for name, env in zip(
+        ["linux", "others", "pandas"],
+        [test_env_linux, test_env_others, test_env_pandas],
+    ):
         # Specify newline to avoid wrong line endings on Windows.
         # See: https://stackoverflow.com/a/69869641
         Path(f".envs/testenv-{name}.yml").write_text(

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -24,6 +24,7 @@ jobs:
           - '3.9'
           - '3.10'
           - '3.11'
+          - '3.12'
     steps:
       - uses: actions/checkout@v3
       - name: create build environment
@@ -72,6 +73,36 @@ jobs:
         run: |
           micromamba activate estimagic
           pytest -m "not slow and not jax"
+  run-tests-with-old-pandas:
+    # This job is only for testing if estimagic works with older pandas versions, as
+    # many pandas functions we use will be deprecated in pandas 3. estimagic's behavior
+    # for older verions is handled in src/estimagic/compat.py.
+    name: Run tests for ${{ matrix.os}} on ${{ matrix.python-version }} with pandas 1
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+        python-version:
+          - '3.11'
+    steps:
+      - uses: actions/checkout@v3
+      - name: create build environment
+        uses: mamba-org/provision-with-micromamba@main
+        with:
+          environment-file: ./.envs/testenv-pandas.yml
+          environment-name: estimagic
+          cache-env: true
+          extra-specs: |
+            python=${{ matrix.python-version }}
+      - name: run pytest
+        shell: bash -l {0}
+        run: |
+          micromamba activate estimagic
+          pytest tests/visualization
+          pytest tests/parameters
+          pytest tests/inference
   code-in-docs:
     name: Run code snippets in documentation
     runs-on: ubuntu-latest

diff --git a/environment.yml b/environment.yml
@@ -35,6 +35,7 @@ dependencies:
   - sphinx-panels  # docs
   - sphinxcontrib-bibtex  # docs
   - tranquilo>=0.0.4  # dev, tests
+  - seaborn  # dev, tests
   - pip:  # dev, tests, docs
       - DFO-LS  # dev, tests
       - Py-BOBYQA  # dev, tests

diff --git a/pyproject.toml b/pyproject.toml
@@ -81,6 +81,7 @@ filterwarnings = [
     "ignore:Method .ptp is deprecated and will be removed in a future version. Use numpy.ptp instead.",
     "ignore:In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only",
     "ignore:Please use `MemoizeJac` from the `scipy.optimize` namespace",
+    "ignore:`scipy.optimize.optimize.MemoizeJac` is deprecated",
     "ignore:Some algorithms did not converge. Their walltime has been set to a very high value instead of infinity because Timedeltas do notsupport infinite values",
     "ignore:In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences",
     "ignore:distutils Version classes are deprecated. Use packaging.version instead",
@@ -91,6 +92,7 @@ filterwarnings = [
     "ignore:Widget.widget_types is deprecated",
     "ignore:Widget.widgets is deprecated",
     "ignore:Parallelization together with",
+    "ignore:Conversion of an array with ndim > 0 to a scalar is deprecated",
 ]
 addopts = ["--doctest-modules"]
 markers = [

diff --git a/src/estimagic/compat.py b/src/estimagic/compat.py
@@ -0,0 +1,33 @@
+"""Compatibility module.
+
+Contains wrapper functions to handle compatibility issues between different versions of
+external libraries.
+
+"""
+
+from estimagic.config import IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0
+
+
+def pd_df_map(df, func, na_action=None, **kwargs):
+    """Apply a function to a Dataframe elementwise.
+
+    pandas has depricated the .applymap() function with version 2.1.0. This function
+    calls either .map() (if pandas version is greater or equal to 2.1.0) or .applymap()
+    (if pandas version is smaller than 2.1.0).
+
+    Args:
+        df (pd.DataFrame): A pandas DataFrame.
+        func (callable): Python function, returns a single value from a single value.
+        na_action (str): If 'ignore', propagate NaN values, without passing them to
+            func. If None, pass NaN values to func. Default is None.
+        **kwargs: Additional keyword arguments to pass as keywords arguments to func.
+
+    Returns:
+        pd.DataFrame: Transformed DataFrame.
+
+    """
+    if IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0:
+        out = df.map(func, na_action=na_action, **kwargs)
+    else:
+        out = df.applymap(func, na_action=na_action, **kwargs)
+    return out
diff --git a/src/estimagic/config.py b/src/estimagic/config.py
@@ -1,4 +1,6 @@
 from pathlib import Path
+import pandas as pd
+from packaging import version
 
 import plotly.express as px
 
@@ -19,9 +21,9 @@
 CRITERION_PENALTY_SLOPE = 0.1
 CRITERION_PENALTY_CONSTANT = 100
 
-# =====================================================================================
+# ======================================================================================
 # Check Available Packages
-# =====================================================================================
+# ======================================================================================
 
 try:
     from petsc4py import PETSc  # noqa: F401
@@ -103,9 +105,18 @@
     IS_NUMBA_INSTALLED = True
 
 
-# =================================================================================
+# ======================================================================================
+# Check if pandas version is newer or equal to version 2.1.0
+# ======================================================================================
+
+IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0 = version.parse(
+    pd.__version__
+) >= version.parse("2.1.0")
+
+
+# ======================================================================================
 # Dashboard Defaults
-# =================================================================================
+# ======================================================================================
 
 Y_RANGE_PADDING = 0.05
 Y_RANGE_PADDING_UNITS = "absolute"

diff --git a/src/estimagic/optimization/optimize_result.py b/src/estimagic/optimization/optimize_result.py
@@ -5,6 +5,7 @@
 import pandas as pd
 
 from estimagic.utilities import to_pickle
+from estimagic.compat import pd_df_map
 
 
 @dataclass
@@ -128,7 +129,7 @@ def _format_convergence_report(report, algorithm):
     report = pd.DataFrame.from_dict(report)
     columns = ["one_step", "five_steps"]
 
-    table = report[columns].applymap(_format_float).astype(str)
+    table = pd_df_map(report[columns], _format_float).astype(str)
 
     for col in "one_step", "five_steps":
         table[col] = table[col] + _create_stars(report[col])

diff --git a/src/estimagic/optimization/scipy_optimizers.py b/src/estimagic/optimization/scipy_optimizers.py
@@ -355,7 +355,6 @@ def scipy_truncated_newton(
     upper_bounds,
     *,
     stopping_max_criterion_evaluations=STOPPING_MAX_CRITERION_EVALUATIONS,
-    stopping_max_iterations=STOPPING_MAX_ITERATIONS,
     convergence_absolute_criterion_tolerance=CONVERGENCE_ABSOLUTE_CRITERION_TOLERANCE,
     convergence_absolute_params_tolerance=CONVERGENCE_ABSOLUTE_PARAMS_TOLERANCE,
     convergence_absolute_gradient_tolerance=CONVERGENCE_ABSOLUTE_GRADIENT_TOLERANCE,
@@ -381,7 +380,6 @@ def scipy_truncated_newton(
         "xtol": convergence_absolute_params_tolerance,
         "gtol": convergence_absolute_gradient_tolerance,
         "maxfun": stopping_max_criterion_evaluations,
-        "maxiter": stopping_max_iterations,
         "maxCGit": max_hess_evaluations_per_iteration,
         "stepmx": max_step_for_line_search,
         "minfev": func_min_estimate,

diff --git a/src/estimagic/parameters/block_trees.py b/src/estimagic/parameters/block_trees.py
@@ -37,8 +37,8 @@ def matrix_to_block_tree(matrix, outer_tree, inner_tree):
     shapes_outer = [np.shape(a) for a in flat_outer_np]
     shapes_inner = [np.shape(a) for a in flat_inner_np]
 
-    block_bounds_outer = np.cumsum([int(np.product(s)) for s in shapes_outer[:-1]])
-    block_bounds_inner = np.cumsum([int(np.product(s)) for s in shapes_inner[:-1]])
+    block_bounds_outer = np.cumsum([int(np.prod(s)) for s in shapes_outer[:-1]])
+    block_bounds_inner = np.cumsum([int(np.prod(s)) for s in shapes_inner[:-1]])
 
     blocks = []
     for leaf_outer, s1, submat in zip(
@@ -94,8 +94,8 @@ def hessian_to_block_tree(hessian, f_tree, params_tree):
     shapes_f = [np.shape(a) for a in flat_f_np]
     shapes_p = [np.shape(a) for a in flat_p_np]
 
-    block_bounds_f = np.cumsum([int(np.product(s)) for s in shapes_f[:-1]])
-    block_bounds_p = np.cumsum([int(np.product(s)) for s in shapes_p[:-1]])
+    block_bounds_f = np.cumsum([int(np.prod(s)) for s in shapes_f[:-1]])
+    block_bounds_p = np.cumsum([int(np.prod(s)) for s in shapes_p[:-1]])
 
     sub_block_trees = []
     for s0, subarr in zip(shapes_f, np.split(hessian, block_bounds_f, axis=0)):

diff --git a/src/estimagic/parameters/consolidate_constraints.py b/src/estimagic/parameters/consolidate_constraints.py
@@ -592,7 +592,7 @@ def _drop_redundant_linear_constraints(weights, rhs):
         new_rhs (pd.DataFrame)
 
     """
-    weights["dupl_group"] = weights.groupby(list(weights.columns)).grouper.group_info[0]
+    weights["dupl_group"] = weights.groupby(list(weights.columns)).ngroup()
     rhs["dupl_group"] = weights["dupl_group"]
     weights.set_index("dupl_group", inplace=True)
 

diff --git a/src/estimagic/parameters/parameter_groups.py b/src/estimagic/parameters/parameter_groups.py
@@ -35,10 +35,10 @@ def get_params_groups_and_short_names(params, free_mask, max_group_size=8):
         names.append(name)
 
     # if every parameter has its own group, they should all actually be in one group
-    if len(pd.unique(groups)) == len(groups):
+    if len(set(groups)) == len(groups):
         groups = ["Parameters"] * len(groups)
 
-    counts = pd.value_counts(groups)
+    counts = pd.Series(groups).value_counts()
     to_be_split = counts[counts > max_group_size]
     for group_name, n_occurrences in to_be_split.items():
         split_group_names = _split_long_group(

diff --git a/src/estimagic/visualization/deviation_plot.py b/src/estimagic/visualization/deviation_plot.py
@@ -68,7 +68,7 @@ def deviation_plot(
                 names=["problem", "algorithm", runtime_measure],
             )
         )
-        .fillna(method="ffill")
+        .ffill()
         .reset_index()
     )
     average_deviations = (

diff --git a/src/estimagic/visualization/estimation_table.py b/src/estimagic/visualization/estimation_table.py
@@ -3,6 +3,7 @@
 from functools import partial
 from pathlib import Path
 from warnings import warn
+from estimagic.compat import pd_df_map
 
 import numpy as np
 import pandas as pd
@@ -305,7 +306,7 @@ def render_latex(
         ci_in_body = False
 
     if ci_in_body:
-        body.loc[("",)] = body.loc[("",)].applymap("{{{}}}".format).values
+        body.loc[("",)] = pd_df_map(body.loc[("",)], "{{{}}}".format).values
     if body.columns.nlevels > 1:
         column_groups = body.columns.get_level_values(0)
     else:
@@ -1383,22 +1384,23 @@ def _apply_number_format(df_raw, number_format, format_integers):
     if isinstance(processed_format, (list, tuple)):
         df_formatted = df_raw.copy(deep=True).astype("float")
         for formatter in processed_format[:-1]:
-            df_formatted = df_formatted.applymap(formatter.format).astype("float")
-        df_formatted = df_formatted.astype("float").applymap(
-            processed_format[-1].format
+            df_formatted = pd_df_map(df_formatted, formatter.format).astype("float")
+        df_formatted = pd_df_map(
+            df_formatted.astype("float"), processed_format[-1].format
         )
     elif isinstance(processed_format, str):
-        df_formatted = df_raw.astype("str").applymap(
-            partial(_format_non_scientific_numbers, format_string=processed_format)
+        df_formatted = pd_df_map(
+            df_raw.astype("str"),
+            partial(_format_non_scientific_numbers, format_string=processed_format),
         )
     elif callable(processed_format):
-        df_formatted = df_raw.applymap(processed_format)
+        df_formatted = pd_df_map(df_raw, processed_format)
 
     # Don't format integers: set to original value
     if not format_integers:
-        integer_locs = df_raw.applymap(_is_integer)
-        df_formatted[integer_locs] = (
-            df_raw[integer_locs].astype(float).applymap("{:.0f}".format)
+        integer_locs = pd_df_map(df_raw, _is_integer)
+        df_formatted[integer_locs] = pd_df_map(
+            df_raw[integer_locs].astype(float), "{:.0f}".format
         )
     return df_formatted
 

diff --git a/src/estimagic/visualization/profile_plot.py b/src/estimagic/visualization/profile_plot.py
@@ -160,13 +160,13 @@ def create_solution_times(df, runtime_measure, converged_info, return_tidy=True)
             problem, algorithm and runtime_measure. The values are either the number
             of evaluations or the walltime each algorithm needed to achieve the
             desired precision. If the desired precision was not achieved the value is
-            set to np.inf (for n_evaluations) or 7000 days (for walltime since there
-            no infinite value is allowed).
+            set to np.inf.
 
     """
     solution_times = df.groupby(["problem", "algorithm"])[runtime_measure].max()
     solution_times = solution_times.unstack()
-    solution_times[~converged_info] = np.inf
+    # We convert the dtype to float to support the use of np.inf
+    solution_times = solution_times.astype(float).where(converged_info, other=np.inf)
 
     if not return_tidy:
         solution_times = solution_times.stack().reset_index()

diff --git a/tests/inference/test_bootstrap.py b/tests/inference/test_bootstrap.py
@@ -65,9 +65,9 @@ def expected():
 def seaborn_example():
     out = {}
 
-    df = sns.load_dataset("exercise", index_col=0)
+    raw = sns.load_dataset("exercise", index_col=0)
     replacements = {"1 min": 1, "15 min": 15, "30 min": 30}
-    df = df.replace({"time": replacements})
+    df = raw.assign(time=raw.time.cat.rename_categories(replacements).astype(int))
     df["constant"] = 1
 
     lower_ci = pd.Series([90.709236, 0.151193], index=["constant", "time"])