NNPDF · siranipour · Apr 27, 2021 · Apr 14, 2021 · Apr 14, 2021 · Apr 17, 2021
diff --git a/doc/sphinx/source/vp/dataspecification.rst b/doc/sphinx/source/vp/dataspecification.rst
@@ -458,9 +458,12 @@ results in the table or plot will have been collected over ``fits`` with
 .. warning::
   Whilst it is possible to specify ``data_input: {from_: fitinputcontext}``
   directly in the runcard, it is highly recommended **not** to do this where
-  possible. Instead take either ``dataset_inputs`` or ``experiments``
-  directly ``from_: fit`` depending on whether the fit uses new or old data
-  specification respectively. (See below for a detailed explanation).
+  possible. Instead take ``dataset_inputs`` directly ``from_: fit``
+  irrespective of whether the fit uses new or old data specification; since
+  the conversion from the old style data specification is handled internally
+  using :py:func:`validphys.utils.experiments_to_dataset_inputs` in
+  conjunction with :py:meth:`validphys.core.FitSpec.as_input`.  (See below for
+  a detailed explanation).
 
 Currently the ``pseudodata`` and ``chi2grids`` modules have not been updated to
 use ``dataset_inputs`` and so require ``experiments`` to be specified in the

diff --git a/validphys2/src/validphys/comparefittemplates/comparecard.yaml b/validphys2/src/validphys/comparefittemplates/comparecard.yaml
@@ -107,12 +107,11 @@ positivity:
 description:
   from_: fit
 
+dataset_inputs:
+  from_: fit
+
 dataspecs:
-  # WARNING: do not blindly copy and paste this: it can overwrite the datasets
-  # for any actions which rely on grouping datasets.
-  - data_input:
-      from_: fitinputcontext
-    theoryid:
+  - theoryid:
       from_: current
     pdf:
       from_: current
@@ -121,11 +120,7 @@ dataspecs:
     speclabel:
       from_: current
 
-  # WARNING: do not blindly copy and paste this: it can overwrite the datasets
-  # for any actions which rely on grouping datasets.
-  - data_input:
-      from_: fitinputcontext
-    theoryid:
+  - theoryid:
       from_: reference
     pdf:
       from_: reference

diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py
@@ -33,6 +33,7 @@
 from validphys import lhaindex, filters
 from validphys.tableloader import parse_exp_mat
 from validphys.theorydbutils import fetch_theory
+from validphys.utils import experiments_to_dataset_inputs
 
 log = logging.getLogger(__name__)
 
@@ -623,6 +624,12 @@ def as_input(self):
         except (yaml.YAMLError, FileNotFoundError) as e:
             raise AsInputError(str(e)) from e
         d['pdf'] = {'id': self.name, 'label': self.label}
+
+        if 'experiments' in d:
+            # Flatten old style experiments to dataset_inputs
+            dataset_inputs = experiments_to_dataset_inputs(d['experiments'])
+            d['dataset_inputs'] = dataset_inputs
+
         return d
 
     def __str__(self):

diff --git a/validphys2/src/validphys/tests/test_effexponents.py b/validphys2/src/validphys/tests/test_effexponents.py
@@ -24,6 +24,16 @@ def test_next_runcard():
     ite2_runcard = l.check_fit(FIT_ITERATED).as_input()
     ite2_runcard.pop("pdf")  # Removing the PDF key, it's an artefact of as_input
 
+    # We do this check incase FIT_ITERATED is changed to a new style fit in the
+    # future. By checking both namespaces are present, we ensure
+    # "dataset_inputs" was added to the fit namespace by the as_input method as
+    # opposed to actually being present in the fit runcard.
+    if "experiments" in ite2_runcard and 'dataset_inputs' in ite2_runcard:
+        # dataset_inputs was added to the as_input for backwards compatibility
+        # of the old style fits and wasn't actually present in the fit runcard
+        # just like "pdf" above.
+        ite2_runcard.pop('dataset_inputs')
+
     predicted_ite2_runcard = yaml.safe_load(
         API.iterated_runcard_yaml(fit=FIT)
     )

diff --git a/validphys2/src/validphys/utils.py b/validphys2/src/validphys/utils.py
@@ -78,6 +78,30 @@ def tempfile_cleaner(root, exit_func, exc, prefix=None, **kwargs):
         # e.g shutil.rmtree, shutil.move etc
         exit_func(tempdir, **kwargs)
 
+
+def experiments_to_dataset_inputs(experiments_list):
+    """Flatten a list of old style experiment inputs
+    to the new, flat, ``dataset_inputs`` style.
+
+    Example
+    -------
+    >>> from validphys.api import API
+    >>> from validphys.utils import experiments_to_dataset_inputs
+    >>> fit = API.fit(fit='NNPDF31_nnlo_as_0118_1000')
+    >>> experiments = fit.as_input()['experiments']
+    >>> dataset_inputs = experiments_to_dataset_inputs(experiments)
+    >>> dataset_inputs[:3]
+    [{'dataset': 'NMCPD', 'frac': 0.5},
+     {'dataset': 'NMC', 'frac': 0.5},
+     {'dataset': 'SLACP', 'frac': 0.5}]
+    """
+    dataset_inputs = []
+    for experiment in experiments_list:
+        dataset_inputs.extend(experiment['datasets'])
+
+    return dataset_inputs
+
+
 def split_by(it, crit):
     """Split ``it`` in two lists, the first is such that ``crit`` evaluates to
     True and the second such it doesn't. Crit can be either a function or an