NNPDF · scarlehoff · Mar 26, 2024 · Mar 13, 2024 · Mar 18, 2024 · Mar 18, 2024
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -24,11 +24,11 @@ jobs:
           virtualenvs-create: false
           installer-parallel: true
       - name: Install dependencies
-        run: poetry install --no-interaction --no-root --with test ${{ inputs.poetry-extras }}
+        run: poetry install --no-interaction --no-root --with test -E nnpdf
       - name: Install project
         # it is required to repeat extras, otherwise they will be removed from
         # the environment
-        run: poetry install --no-interaction ${{ inputs.poetry-extras }}
+        run: poetry install --no-interaction -E nnpdf
       - name: Install task runner
         run: pip install poethepoet
       - name: Lint with pylint

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -6,8 +6,9 @@ jobs:
   test:
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     uses: NNPDF/workflows/.github/workflows/python-poetry-tests.yml@v2
     with:
       python-version: ${{ matrix.python-version }}
+      poetry-extras: "-E nnpdf"
diff --git a/docs/source/overview/prerequisites.rst b/docs/source/overview/prerequisites.rst
@@ -12,9 +12,11 @@ This is a standard example:
 
 ::
 
+  [general]
+  nnpdf = true
+
   [paths]
   # inputs
-  ymldb = "data/yamldb"
   grids = "data/grids"
   theory_cards = "data/theory_cards"
   operator_card_template_name = "_template.yaml"
@@ -30,10 +32,23 @@ This is a standard example:
 
 All the relevant inputs are described below. The command ``pineko scaffold new`` will generate all necessary folders.
 
-*ymldb*
--------
+nnpdf
+-----
+The key ``nnpdf`` tells ``pineko`` it should use the data files from NNPDF to map datasets to FK Tables.
+If this key is given, a valid installation of ``nnpdf`` needs to be available as well.
+i.e, ``pineko`` should be installed with the ``nnpdf`` extra (``pip install pineko[nnpdf]``).
+
+Alternatively, it is possible not to set this key (or set it to false) and instead
+provide a path with ``yaml`` files containing such dataset-FK mapping.
+If a custom database of mappings is to be used, the path to the folder containing
+this files needs to be explicitly provided:
+
+::
+
+  [paths]
+  ymldb = "data/yamldb"
 
-You need all files of the *ymldb* [2]_ which define the mapping from datasets to FK tables.
+This *ymldb* [2]_ defines the mapping from datasets to FK tables.
 An actual (rather simple) example is the following:
 
 ::

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,6 +11,7 @@ authors = [
   "Alessandro Candido <[email protected]>",
   "Andrea Barontini <[email protected]>",
   "Felix Hekhorn <[email protected]>",
+  "Juan Cruz-Martinez <[email protected]>",
 ]
 classifiers = [
   "Programming Language :: Python",
@@ -23,15 +24,17 @@ repository = "https://github.com/N3PDF/pineko"
 packages = [{ include = "pineko", from = "src" }]
 
 [tool.poetry.dependencies]
-python = ">=3.8,<3.12"
-eko = "^0.14.0"
-pineappl = "^0.6.2"
+python = ">=3.9,<3.13"
+eko = "^0.14.2"
+pineappl = ">0.7.0"
 PyYAML = "^6.0"
 numpy = "^1.21.0"
-pandas = "^1.4.1"
+pandas = "^2.1"
 rich = "^12.5.1"
 click = "^8.0.4"
 tomli = "^2.0.1"
+nnpdf = { git = "https://github.com/NNPDF/nnpdf", optional = true}
+lhapdf-management = { version = "^0.5", optional = true }
 
 [tool.poetry.group.docs]
 optional = true
@@ -48,15 +51,16 @@ optional = true
 pytest = "^7.1.3"
 pytest-cov = "^4.0.0"
 pytest-env = "^0.6.2"
-pylint = "^2.11.1"
-banana-hep = "^0.6.11"
+pylint = "^3.1.0"
+banana-hep = "^0.6.13"
 
 [tool.poetry.group.dev.dependencies]
 pdbpp = "^0.10.3"
 ipython = "^8.0"
 
 [tool.poetry.extras]
 docs = ["sphinx", "sphinx-rtd-theme", "sphinxcontrib-bibtex"]
+nnpdf = ["nnpdf", "lhapdf-management"]
 
 [tool.poetry.scripts]
 pineko = "pineko:command"

diff --git a/src/pineko/configs.py b/src/pineko/configs.py
@@ -13,7 +13,6 @@
 "Holds loaded configurations"
 
 NEEDED_KEYS = [
-    "ymldb",
     "operator_cards",
     "grids",
     "operator_card_template_name",
@@ -23,6 +22,7 @@
 ]
 
 NEEDED_FILES = ["operator_card_template_name"]
+GENERIC_OPTIONS = "general"
 
 
 def defaults(base_configs):
@@ -59,8 +59,22 @@ def enhance_paths(configs_):
     configs_ : dict
         configuration
     """
+    required_keys = list(NEEDED_KEYS)
+    # Check that one of nnpdf / ymldb is given
+    generic_options = configs_.get(GENERIC_OPTIONS, {})
+    if generic_options.get("nnpdf", False):
+        # Fail as soon as possible
+        try:
+            import validphys
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError(
+                "Cannot use `nnpdf=True` without a valid installation of NNPDF"
+            )
+    else:
+        required_keys.append("ymldb")
+
     # required keys without default
-    for key in NEEDED_KEYS:
+    for key in required_keys:
         if key not in configs_["paths"]:
             raise ValueError(f"Configuration is missing a 'paths.{key}' key")
         if key in NEEDED_FILES:

diff --git a/src/pineko/fonll.py b/src/pineko/fonll.py
@@ -12,6 +12,7 @@
 import yaml
 
 from . import configs, parser, theory_card
+from .utils import read_grids_from_nnpdf
 
 logger = logging.getLogger(__name__)
 
@@ -228,8 +229,12 @@ def assembly_combined_fk(
     else:
         tcard["DAMPPOWERb"] = 0
         tcard["DAMPPOWERc"] = 0
+
     # Getting the paths to the grids
-    grids_name = grids_names(configs.configs["paths"]["ymldb"] / f"{dataset}.yaml")
+    grids_name = read_grids_from_nnpdf(dataset, configs.configs)
+    if grids_name is None:
+        grids_name = grids_names(configs.configs["paths"]["ymldb"] / f"{dataset}.yaml")
+
     for grid in grids_name:
         # Checking if it already exists
         new_fk_path = configs.configs["paths"]["fktables"] / str(theoryid) / grid

diff --git a/src/pineko/theory.py b/src/pineko/theory.py
@@ -18,6 +18,7 @@
 from eko.runner.managed import solve
 
 from . import check, configs, evolve, parser, scale_variations, theory_card
+from .utils import read_grids_from_nnpdf
 
 logger = logging.getLogger(__name__)
 
@@ -116,12 +117,18 @@ def load_grids(self, ds):
         grids : dict
             mapping basename to path
         """
-        paths = configs.configs["paths"]
-        _info, grids = parser.get_yaml_information(
-            paths["ymldb"] / f"{ds}.yaml", self.grids_path()
-        )
-        # the list is still nested, so flatten
-        grids = [grid for opgrids in grids for grid in opgrids]
+        # Take fktable information from NNPDF
+        raw_grids = read_grids_from_nnpdf(ds, configs.configs)
+        if raw_grids is not None:
+            grids = [self.grids_path() / i for i in raw_grids]
+        else:
+            paths = configs.configs["paths"]
+            _info, raw_grids = parser.get_yaml_information(
+                paths["ymldb"] / f"{ds}.yaml", self.grids_path()
+            )
+            # the list is still nested, so flatten
+            grids = [grid for opgrids in raw_grids for grid in opgrids]
+
         # then turn into a map name -> path
         grids = {grid.stem.rsplit(".", 1)[0]: grid for grid in grids}
         return grids

diff --git a/src/pineko/utils.py b/src/pineko/utils.py
@@ -0,0 +1,35 @@
+"""Shared utilities for pineko.
+
+Common tools typically used by several pineko functions.
+"""
+
+from .configs import GENERIC_OPTIONS
+
+
+def read_grids_from_nnpdf(dataset_name, configs=None):
+    """Read the list of fktables given a dataset name.
+
+    If NNPDF is not available, returns None.
+
+    Parameters
+    ----------
+        dataset_name: str
+        configs: dict
+            dictionary of configuration options
+            if None it it assumed that the NNPDF version is required
+    """
+    if configs is not None:
+        if not configs.get(GENERIC_OPTIONS, {}).get("nnpdf", False):
+            return None
+
+    # Import NNPDF only if we really want it!
+    from nnpdf_data import legacy_to_new_map
+    from validphys.commondataparser import EXT
+    from validphys.loader import Loader
+
+    # We only need the metadata, so this should be enough
+    dataset_name, variant = legacy_to_new_map(dataset_name)
+    cd = Loader().check_commondata(dataset_name, variant=variant)
+    fks = cd.metadata.theory.FK_tables
+    # Return it flat
+    return [f"{i}.{EXT}" for operand in fks for i in operand]
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -0,0 +1,16 @@
+import pytest
+
+from pineko import utils
+
+
+@pytest.mark.parametrize(
+    "dsname", ["HERA_NC_318GEV_EAVG_CHARM-SIGMARED", "ATLAS_DY_7TEV_46FB_CC"]
+)
+def test_nnpdf_grids(dsname):
+    """Checks that the grids can be read out from the NNPDF theory metadata."""
+    grids = utils.read_grids_from_nnpdf(dsname)
+    # Check that we get _something_
+    assert len(grids) > 0
+    # And that they look like pineappl grids
+    for grid_name in grids:
+        grid_name.endswith(".pineappl.lz4")