Merge pull request #52 from NNPDF/refactor_workflow

Refactor benchmark workflow
NNPDF · Oct 28, 2022 · 85ca767 · 85ca767
2 parents 144993d + 1cd3c83
commit 85ca767
Show file tree

Hide file tree

Showing 21 changed files with 380 additions and 10,033 deletions.
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -8,43 +8,27 @@ jobs:
     runs-on: ubuntu-latest
 
     container:
-      image: ghcr.io/n3pdf/bench-evol:latest
+      image: ghcr.io/n3pdf/lhapdf:v2
       credentials:
         username: ${{ github.repository_owner }}
-        password: ${{ secrets.GITHUB_TOKEN }}
+        password: ${{ github.token }}
 
     steps:
       - uses: actions/checkout@v2
         with:
           # tags needed for dynamic versioning
           fetch-depth: 0
       - name: Install and configure Poetry
-        run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-          # update path for this and the other steps
-          export PATH=$HOME/.local/bin:$PATH
-          echo "$HOME/.local/bin" >> $GITHUB_PATH
-          # prevent poetry environments
-          # in order to access container pre-installed packages
-          poetry config virtualenvs.create false
-          # log all configurations
-          poetry config --list
-      - name: Cache Poetry virtualenv
-        uses: actions/cache@v2
-        id: cache
+        uses: snok/install-poetry@v1
         with:
-          path: ~/.virtualenvs
-          key: ${{ runner.os }}-py-${{ steps.setup-python.outputs.python-version }}-poetry-${{ hashFiles('poetry.lock') }}
-      - name: Install version management tool
-        run: |
-          # same poetry env
-          PIP="$(head -n1 $(which poetry) | cut -c 3-) -m pip"
-          ${PIP} install poetry-dynamic-versioning
+          virtualenvs-create: false
+          installer-parallel: true
       - name: Install dependencies
         run: poetry install --no-interaction --no-root --with test ${{ inputs.poetry-extras }}
-        if: steps.cache.outputs.cache-hit != 'true'
       - name: Install project
-        run: poetry install --no-interaction
+        # it is required to repeat extras, otherwise they will be removed from
+        # the environment
+        run: poetry install --no-interaction ${{ inputs.poetry-extras }}
       - name: Install task runner
         run: pip install poethepoet
       - name: Lint with pylint
@@ -53,6 +37,16 @@ jobs:
           poe lint
           # For warnings instead return always zero
           poe lint-warnings
+      - name: Get data files
+        id: cache-data-files
+        uses: actions/cache@v3
+        with:
+          path: data_files
+          key: data_files-v2
+      - name: Download data files
+        if: steps.cache-data_files.outputs.cache-hit != 'true'
+        run: |
+          sh download_test_data.sh
       - name: Test with pytest
         run: |
           poe bench

diff --git a/.gitignore b/.gitignore
@@ -129,3 +129,7 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# Benchmark files
+benchmarks/data_files
+benchmarks/fakepdfs
diff --git a/benchmarks/bench_checks.py b/benchmarks/bench_checks.py
@@ -15,12 +15,14 @@ def benchmark_check_grid_and_eko_compatible(test_files):
     wrong_grid = pineappl.grid.Grid.read(
         test_files / "data/grids/208/NUTEV_CC_NU_FE_SIGMARED.pineappl.lz4"
     )
-    ekoop = eko.output.Output.load_tar(
+    ekoop = eko.output.legacy.load_tar(
         test_files / "data/ekos/208/HERA_CC_318GEV_EM_SIGMARED.tar"
     )
     with pytest.raises(ValueError):
         pineko.check.check_grid_and_eko_compatible(wrong_grid, ekoop, 1.0)
     pineko.check.check_grid_and_eko_compatible(grid, ekoop, 1.0)
-    ekoop.xgrid_reshape(targetgrid=[0.0001, 0.001, 0.1, 0.5, 1.0])
+    eko.output.manipulate.xgrid_reshape(
+        ekoop, targetgrid=eko.interpolation.XGrid([0.0001, 0.001, 0.1, 0.5, 1.0])
+    )
     with pytest.raises(ValueError):
         pineko.check.check_grid_and_eko_compatible(grid, ekoop, 1.0)
diff --git a/benchmarks/bench_cli.py b/benchmarks/bench_cli.py
@@ -1,6 +1,5 @@
 import pathlib
 
-import pytest
 from click.testing import CliRunner
 
 from pineko.cli._base import command
@@ -59,10 +58,18 @@ def benchmark_opcard_cli(tmp_path, test_files):
         test_files / "data/grids/208/LHCB_DY_13TEV_DIMUON.pineappl.lz4"
     )
     default_card_path = pathlib.Path(test_files / "data/operator_cards/_template.yaml")
+    thcard_path = pathlib.Path(test_files / "data" / "theory_cards" / "208.yaml")
     target_path = pathlib.Path(tmp_path / "test_ope_card.yaml")
     runner = CliRunner()
     result = runner.invoke(
-        command, ["opcard", str(grid_path), str(default_card_path), str(target_path)]
+        command,
+        [
+            "opcard",
+            str(grid_path),
+            str(default_card_path),
+            str(thcard_path),
+            str(target_path),
+        ],
     )
     assert "Success" in result.output
 

diff --git a/benchmarks/bench_evolve.py b/benchmarks/bench_evolve.py
@@ -1,43 +1,102 @@
 import pathlib
 
 import eko
+import eko.compatibility
+import eko.output.legacy
+import numpy as np
 import pineappl
 import pytest
+import yaml
+from eko import couplings as sc
 
 import pineko
+import pineko.evolve
+import pineko.theory_card
 
 
-def benchmark_write_operator_card_from_file(tmp_path, test_files):
+def benchmark_write_operator_card_from_file(tmp_path, test_files, test_configs):
     pine_path = test_files / "data/grids/208/HERA_CC_318GEV_EM_SIGMARED.pineappl.lz4"
     default_path = test_files / "data/operator_cards/_template.yaml"
     target_path = pathlib.Path(tmp_path / "test_operator.yaml")
-    x_grid, q2_grid = pineko.evolve.write_operator_card_from_file(
-        pine_path, default_path, target_path, 1.0
+
+    # Load the theory card
+    tcard_path = pineko.theory_card.path(208)
+
+    x_grid, _q2_grid = pineko.evolve.write_operator_card_from_file(
+        pine_path, default_path, target_path, 1.0, tcard_path
     )
+
+    # Load the operator card
+    myopcard = yaml.safe_load(target_path.read_text(encoding="utf-8"))
+    # Check if it contains all the information for eko
+    assert np.allclose(myopcard["rotations"]["xgrid"], x_grid)
+    assert np.allclose(myopcard["rotations"]["targetgrid"], x_grid)
+    assert np.allclose(myopcard["rotations"]["inputgrid"], x_grid)
+    assert np.allclose(myopcard["rotations"]["inputpids"], pineko.evolve.DEFAULT_PIDS)
+    assert np.allclose(myopcard["rotations"]["targetpids"], pineko.evolve.DEFAULT_PIDS)
+
     wrong_pine_path = test_files / "data/grids/208/HERA_CC_318GEV_EM_wrong.pineappl.lz4"
     with pytest.raises(FileNotFoundError):
-        x_grid, q2_grid = pineko.evolve.write_operator_card_from_file(
-            wrong_pine_path, default_path, target_path, 1.0
+        _ = pineko.evolve.write_operator_card_from_file(
+            wrong_pine_path, default_path, target_path, 1.0, tcard_path
         )
 
 
+def benchmark_dglap(tmp_path, test_files, test_configs):
+    pine_path = test_files / "data/grids/208/HERA_CC_318GEV_EM_SIGMARED.pineappl.lz4"
+    default_path = test_files / "data/operator_cards/_template.yaml"
+    target_path = pathlib.Path(tmp_path / "test_operator.yaml")
+
+    theory_id = 208
+    # In order to check if the operator card is enough for eko, let's compute the eko
+    tcard = eko.compatibility.update_theory(pineko.theory_card.load(theory_id))
+    if "ModSV" not in tcard:
+        tcard["ModSV"] = "expanded"
+
+    pineko.evolve.write_operator_card_from_file(
+        pine_path, default_path, target_path, 1.0, pineko.theory_card.path(theory_id)
+    )
+
+    # Load the opcard
+    myopcard = yaml.safe_load(target_path.read_text(encoding="utf-8"))
+
+    # I need smaller x and q grids in order to compute a small eko
+    small_x_grid = np.geomspace(1e-3, 1.0, 5)
+    small_q2_grid = [100.0]
+    myopcard["xgrid"] = small_x_grid
+    myopcard["targetgrid"] = small_x_grid
+    myopcard["inputgrid"] = small_x_grid
+    myopcard["Q2grid"] = small_q2_grid
+
+    # upgrade cards layout
+    newtcard, newocard = eko.compatibility.update(tcard, myopcard)
+
+    # we are only interested in checking the configuration
+    # instatianting a runner is mostly sufficient
+    # TODO: speed up this step, and run a full run_dglap
+    _ = eko.runner.Runner(theory_card=newtcard, operators_card=newocard)
+
+
 def benchmark_evolve_grid(tmp_path, lhapdf_path, test_files, test_pdf):
     pine_path = test_files / "data/grids/208/HERA_CC_318GEV_EM_SIGMARED.pineappl.lz4"
     pinegrid = pineappl.grid.Grid.read(pine_path)
     eko_path = test_files / "data/ekos/208/HERA_CC_318GEV_EM_SIGMARED.tar"
-    eko_op = eko.output.Output.load_tar(eko_path)
+    eko_op = eko.output.legacy.load_tar(eko_path)
     target_path = pathlib.Path(tmp_path / "test_fktable.pineappl.lz4")
     max_as = 1
     max_al = 0
     base_configs = pineko.configs.load(test_files)
     pineko.configs.configs = pineko.configs.defaults(base_configs)
     tcard = pineko.theory_card.load(208)
+    new_tcard = eko.compatibility.update_theory(tcard)
+    astrong = sc.Couplings.from_dict(new_tcard)
     assumptions = pineko.theory_card.construct_assumptions(tcard)
     with lhapdf_path(test_pdf):
         pineko.evolve.evolve_grid(
             pinegrid,
             eko_op,
             target_path,
+            astrong,
             max_as,
             max_al,
             1.0,

diff --git a/benchmarks/bench_theory.py b/benchmarks/bench_theory.py
@@ -2,6 +2,9 @@
 import pathlib
 
 import pineko
+import pineko.configs
+import pineko.theory
+import pineko.theory_card
 
 theory_obj = pineko.theory.TheoryBuilder(208, ["LHCB_Z_13TEV_DIMUON"])
 theory_obj_hera = pineko.theory.TheoryBuilder(208, ["HERACOMBCCEM"])
@@ -71,12 +74,15 @@ def benchmark_inherit_ekos(test_files):
     folder_path.rmdir()
 
 
-def benchmark_opcard(test_files):
+def benchmark_opcard(test_files, test_configs):
+    th_path = pineko.theory_card.path(208)
+
     grid_name = "LHCB_DY_13TEV_DIMUON"
     theory_obj.opcard(
         grid_name,
         pathlib.Path(test_files / "data/grids/208/LHCB_DY_13TEV_DIMUON.pineappl.lz4"),
         1.0,
+        th_path,
     )
     op_path = pathlib.Path(
         test_files / theory_obj.operator_cards_path / "LHCB_DY_13TEV_DIMUON.yaml"
@@ -85,14 +91,17 @@ def benchmark_opcard(test_files):
         grid_name,
         pathlib.Path(test_files / "data/grids/208/LHCB_DY_13TEV_DIMUON.pineappl.lz4"),
         1.0,
+        th_path,
     )
     if os.path.exists(op_path):
         os.remove(op_path)
     else:
         raise ValueError("operator card not found")
 
 
-def benchmark_eko(test_files):
+def benchmark_eko(test_files, test_configs):
+    th_path = pineko.theory_card.path(208)
+
     grid_name = "LHCB_DY_13TEV_DIMUON"
     grid_path = pathlib.Path(theory_obj.grids_path() / (grid_name + ".pineappl.lz4"))
     base_configs = pineko.configs.load(test_files)
@@ -103,7 +112,7 @@ def benchmark_eko(test_files):
         "208-LHCB_DY_13TEV_DIMUON.log",
         ["208-LHCB_DY_13TEV_DIMUON.log"],
     )
-    theory_obj.opcard(grid_name, pathlib.Path(test_files / grid_path), 1.0)
+    theory_obj.opcard(grid_name, pathlib.Path(test_files / grid_path), 1.0, th_path)
 
     theory_obj.eko(grid_name, grid_path, tcard)
 
@@ -135,7 +144,9 @@ def benchmark_activate_logging(test_files):
         raise ValueError("log file not found")
 
 
-def benchmark_fk(test_files):
+def benchmark_fk(test_files, test_configs):
+    th_path = pineko.theory_card.path(208)
+
     grid_name = "HERA_CC_318GEV_EM_SIGMARED"
     grid_path = pathlib.Path(
         theory_obj_hera.grids_path() / (grid_name + ".pineappl.lz4")
@@ -148,7 +159,9 @@ def benchmark_fk(test_files):
         "208-HERA_CC_318GEV_EM_SIGMARED.log",
         ["208-HERA_CC_318GEV_EM_SIGMARED.log"],
     )
-    theory_obj_hera.opcard(grid_name, pathlib.Path(test_files / grid_path), 1.0)
+    theory_obj_hera.opcard(
+        grid_name, pathlib.Path(test_files / grid_path), 1.0, th_path
+    )
 
     theory_obj_hera.fk(grid_name, grid_path, tcard, pdf=None)
     # test overwrite function

diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py
@@ -4,18 +4,20 @@
 import pytest
 
 import pineko
+import pineko.configs
 
 
 @pytest.fixture
-def test_configs(test_files):
-    config_path = pineko.configs.detect(test_files)
-    base_configs = pineko.configs.load(config_path)
-    return base_configs
+def test_files():
+    return pathlib.Path(__file__).parents[0] / "data_files/"
 
 
 @pytest.fixture
-def test_files():
-    return pathlib.Path(__file__).parents[0] / "data_files/"
+def test_configs(test_files):
+    config_path = pineko.configs.detect(test_files)
+    base_configs = pineko.configs.load(config_path)
+    pineko.configs.configs = pineko.configs.defaults(base_configs)
+    return pineko.configs.configs
 
 
 @pytest.fixture

diff --git a/benchmarks/fakepdfs/NNPDF40_nlo_as_01180/NNPDF40_nlo_as_01180.info b/benchmarks/fakepdfs/NNPDF40_nlo_as_01180/NNPDF40_nlo_as_01180.info