From ef015b2eff5d3612145e65963fab234f8a01bff6 Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Wed, 18 Oct 2023 14:23:51 +0200
Subject: [PATCH 01/10] Initial F1 generator ready.

---
 data/kfactors/generate.py | 128 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 data/kfactors/generate.py

diff --git a/data/kfactors/generate.py b/data/kfactors/generate.py
new file mode 100644
index 00000000..d10d1d23
--- /dev/null
+++ b/data/kfactors/generate.py
@@ -0,0 +1,128 @@
+import lhapdf
+import pineappl
+from glob import glob
+from datetime import datetime as dt
+import numpy as np
+import os
+import argparse
+
+
+def get_gpaths(folder):
+    """
+    Get a list of paths to PineAPPL grids in the specified folder.
+
+    Args:
+        folder (str): The folder path where PineAPPL grids are located.
+
+    Returns:
+        pdf_name (str): The name of the PDF dataset.
+        gpaths (list): List of paths to PineAPPL grid files.
+    """
+    paths = glob(folder + "/*F1*")  # Find grids with "_F1" in the filename
+    gpaths = []
+    for p in paths:
+        gpaths.append(glob(p + "/*.pineappl.lz4")[0])
+    print(f"Found {len(gpaths)} pineapple grids.")
+    return pdf_name, gpaths
+
+
+def get_prediction(gpath, pdf_name):
+    """
+    Get predictions by convoluting a PineAPPL grid with a LHAPDF PDF.
+
+    Args:
+        gpath (str): Path to the PineAPPL grid file.
+        pdf_name (str): The name of the LHAPDF dataset.
+
+    Returns:
+        prediction (numpy.ndarray): Computed predictions.
+    """
+    # Load the PineAPPL grid
+    grid = pineappl.grid.Grid.read(gpath)
+
+    # Load the LHAPDF
+    pdf = lhapdf.mkPDF(pdf_name)
+
+    # Perform the convolution
+    prediction = grid.convolute_with_one(
+        2212,  # Proton target
+        pdf.xfxQ2,  # The PDF callable pdf.xfxQ2(pid, x, Q2) -> xfx
+        pdf.alphasQ2,  # The alpha_s callable pdf.alpha_s(Q2) -> alpha_s
+    )
+
+    # Compute the k-factor (1 / F1)
+    prediction = 1 / prediction
+
+    return prediction
+
+
+def save_data(
+    data,
+    dataset_name="<Name_of_dataset>",
+    author_name="<Your_name>",
+    theory_name="<Theory_name>",
+    output_name="results",
+):
+    """
+    Save computed data to a file with metadata.
+
+    Args:
+        data (numpy.ndarray): Computed data.
+        dataset_name (str): Name of the dataset.
+        author_name (str): Name of the author.
+        theory_name (str): Name of the theory.
+        output_name (str): Output folder name.
+    """
+    strf_data = ""
+    for i in range(data.shape[0]):
+        strf_data += f"{data[i]}  0.0\n"
+
+    date = dt.now().date()
+    string = (
+        f"""
+********************************************************************************
+SetName: {dataset_name}
+Author: {author_name}
+Date: {date}
+CodesUsed: https://github.com/NNPDF/yadism
+TheoryInput: {theory_name}
+PDFset: NNPDF40_nnlo_pch_as_01180
+Warnings: F1 normalization for {dataset_name}
+********************************************************************************
+"""
+        + strf_data
+    )
+
+    os.makedirs(output_name, exist_ok=True)
+    with open(output_name + f"/CF_QCD_{dataset_name}.dat", "w") as file:
+        file.write(string)
+
+
+# Create an argument parser
+parser = argparse.ArgumentParser()
+parser.add_argument("pdf", help="The name of the PDF dataset of LHAPDF")
+parser.add_argument("folder", help="The folder name of the F1 pineapple grids")
+parser.add_argument("--author", default="A.J. Hasenack")
+parser.add_argument("--theory", default="theory_800")
+parser.add_argument("--output", default="results")
+args = parser.parse_args()
+
+# Extract command line arguments
+pdf_name = args.pdf
+folder_name = args.folder
+author = args.author
+theory = args.theory
+output = args.output
+
+# Get PineAPPL grid paths and PDF name
+pdf_name, gpaths = get_gpaths(folder_name)
+
+# Process each PineAPPL grid
+for gpath in gpaths:
+    dataset_name = os.path.splitext(
+        os.path.splitext(os.path.basename(os.path.normpath(gpath)))[0]
+    )[0]
+
+    # Get predictions and save data
+    data = get_prediction(gpath, pdf_name)
+    save_data(data, dataset_name, author, theory, output)

From 7e2d356249d8e6d0e9a2abca149c1fda077aa768 Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Fri, 20 Oct 2023 18:42:40 +0200
Subject: [PATCH 02/10] Adjustment for compatibility of output with Pineko

---
 data/kfactors/generate.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/data/kfactors/generate.py b/data/kfactors/generate.py
index d10d1d23..9bf16c51 100644
--- a/data/kfactors/generate.py
+++ b/data/kfactors/generate.py
@@ -79,8 +79,7 @@ def save_data(
 
     date = dt.now().date()
     string = (
-        f"""
-********************************************************************************
+        f"""********************************************************************************
 SetName: {dataset_name}
 Author: {author_name}
 Date: {date}
@@ -94,7 +93,9 @@ def save_data(
     )
 
     os.makedirs(output_name, exist_ok=True)
-    with open(output_name + f"/CF_QCD_{dataset_name}.dat", "w") as file:
+    with open(
+        output_name + f"/CF_QCD_{dataset_name}.dat".replace("F1", "G1"), "w"
+    ) as file:
         file.write(string)
 
 

From 044e1a4b9f3dee2aba8a345c893e2b286baaf2e8 Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Mon, 23 Oct 2023 13:03:34 +0200
Subject: [PATCH 03/10] Adjusted docstring, arg types and some minor stuff

---
 data/kfactors/generate.py | 82 ++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 31 deletions(-)

diff --git a/data/kfactors/generate.py b/data/kfactors/generate.py
index 9bf16c51..c8a27932 100644
--- a/data/kfactors/generate.py
+++ b/data/kfactors/generate.py
@@ -5,18 +5,24 @@
 import numpy as np
 import os
 import argparse
+from typing import List, Tuple
 
 
-def get_gpaths(folder):
+def get_gpaths(folder: str) -> Tuple[str, List[str]]:
     """
     Get a list of paths to PineAPPL grids in the specified folder.
 
-    Args:
-        folder (str): The folder path where PineAPPL grids are located.
-
-    Returns:
-        pdf_name (str): The name of the PDF dataset.
-        gpaths (list): List of paths to PineAPPL grid files.
+    Parameters
+    ----------
+    folder : str
+        The folder path where PineAPPL grids are located.
+
+    Returns
+    -------
+    pdf_name : str
+        The name of the PDF dataset.
+    gpaths : List[str]
+        List of paths to PineAPPL grid files.
     """
     paths = glob(folder + "/*F1*")  # Find grids with "_F1" in the filename
     gpaths = []
@@ -26,16 +32,21 @@ def get_gpaths(folder):
     return pdf_name, gpaths
 
 
-def get_prediction(gpath, pdf_name):
+def get_prediction(gpath: str, pdf_name: str) -> np.ndarray:
     """
     Get predictions by convoluting a PineAPPL grid with a LHAPDF PDF.
 
-    Args:
-        gpath (str): Path to the PineAPPL grid file.
-        pdf_name (str): The name of the LHAPDF dataset.
-
-    Returns:
-        prediction (numpy.ndarray): Computed predictions.
+    Parameters
+    ----------
+    gpath : str
+        Path to the PineAPPL grid file.
+    pdf_name : str
+        The name of the LHAPDF dataset.
+
+    Returns
+    -------
+    prediction : np.ndarray
+        Computed predictions.
     """
     # Load the PineAPPL grid
     grid = pineappl.grid.Grid.read(gpath)
@@ -57,21 +68,28 @@ def get_prediction(gpath, pdf_name):
 
 
 def save_data(
-    data,
-    dataset_name="<Name_of_dataset>",
-    author_name="<Your_name>",
-    theory_name="<Theory_name>",
-    output_name="results",
+    data: np.ndarray,
+    dataset_name: str,
+    pdf_name: str,
+    author_name: str,
+    theory_name: str,
+    output_name: str = "results",
 ):
     """
     Save computed data to a file with metadata.
 
-    Args:
-        data (numpy.ndarray): Computed data.
-        dataset_name (str): Name of the dataset.
-        author_name (str): Name of the author.
-        theory_name (str): Name of the theory.
-        output_name (str): Output folder name.
+    Parameters
+    ----------
+    data : np.ndarray
+        Computed data.
+    dataset_name : str
+        Name of the dataset.
+    author_name : str
+        Name of the author.
+    theory_name : str
+        Name of the theory.
+    output_name : str, optional
+        Output folder name, default is "results".
     """
     strf_data = ""
     for i in range(data.shape[0]):
@@ -85,7 +103,7 @@ def save_data(
 Date: {date}
 CodesUsed: https://github.com/NNPDF/yadism
 TheoryInput: {theory_name}
-PDFset: NNPDF40_nnlo_pch_as_01180
+PDFset: {pdf_name}
 Warnings: F1 normalization for {dataset_name}
 ********************************************************************************
 """
@@ -94,7 +112,7 @@ def save_data(
 
     os.makedirs(output_name, exist_ok=True)
     with open(
-        output_name + f"/CF_QCD_{dataset_name}.dat".replace("F1", "G1"), "w"
+        output_name + f"/CF_NRM_{dataset_name}.dat".replace("F1", "G1"), "w"
     ) as file:
         file.write(string)
 
@@ -103,9 +121,11 @@ def save_data(
 parser = argparse.ArgumentParser()
 parser.add_argument("pdf", help="The name of the PDF dataset of LHAPDF")
 parser.add_argument("folder", help="The folder name of the F1 pineapple grids")
-parser.add_argument("--author", default="A.J. Hasenack")
-parser.add_argument("--theory", default="theory_800")
-parser.add_argument("--output", default="results")
+parser.add_argument("--author", help="The name of the author", default="A.J. Hasenack")
+parser.add_argument(
+    "--theory", help="The theory used, formatted as 'theory_'+int", default="theory_800"
+)
+parser.add_argument("--output", help="The name of the output folder", default="results")
 args = parser.parse_args()
 
 # Extract command line arguments
@@ -126,4 +146,4 @@ def save_data(
 
     # Get predictions and save data
     data = get_prediction(gpath, pdf_name)
-    save_data(data, dataset_name, author, theory, output)
+    save_data(data, dataset_name, pdf_name, author, theory, output)

From 0d43d7b36f1d940c4d3b69e0f6f93dc94c0d1d97 Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Tue, 21 Nov 2023 16:08:22 +0100
Subject: [PATCH 04/10] Added the g1 and corrected proton/neutron case

---
 data/kfactors/generate_g1.py   | 107 ++++++++++++++++++++++
 data/kfactors/generate_g1f1.py | 160 +++++++++++++++++++++++++++++++++
 2 files changed, 267 insertions(+)
 create mode 100644 data/kfactors/generate_g1.py
 create mode 100644 data/kfactors/generate_g1f1.py

diff --git a/data/kfactors/generate_g1.py b/data/kfactors/generate_g1.py
new file mode 100644
index 00000000..38262ee2
--- /dev/null
+++ b/data/kfactors/generate_g1.py
@@ -0,0 +1,107 @@
+import lhapdf
+import pineappl
+import yaml
+from glob import glob
+from datetime import datetime as dt
+import numpy as np
+import os
+import argparse
+from typing import List, Tuple
+
+
+def get_prediction(folder: str, pdf_name: str) -> np.ndarray:
+    """
+    Get predictions by convoluting a PineAPPL grid with a LHAPDF PDF.
+
+    Parameters
+    ----------
+    folder: str
+        Path to the kinematics.yaml grid file.
+    pdf_name : str
+        The name of the LHAPDF dataset.
+
+    Returns
+    -------
+    prediction : np.ndarray
+        Computed predictions.
+    """
+
+    with open(folder + "/kinematics.yaml", "r") as file:
+        data = yaml.safe_load(file)
+
+    bins = data["bins"]
+    prediction = np.zeros(len(bins))
+    for i, bin in enumerate(bins):
+        prediction[i] = 1 / (2 * bin["x"]["mid"])
+
+    return prediction
+
+
+def save_data(
+    data: np.ndarray,
+    dataset_name: str,
+    author_name: str,
+    theory_name: str,
+    output_name: str = "results",
+):
+    """
+    Save computed data to a file with metadata.
+
+    Parameters
+    ----------
+    data : np.ndarray
+        Computed data.
+    dataset_name : str
+        Name of the dataset.
+    author_name : str
+        Name of the author.
+    theory_name : str
+        Name of the theory.
+    output_name : str, optional
+        Output folder name, default is "results".
+    """
+    strf_data = ""
+    for i in range(data.shape[0]):
+        strf_data += f"{data[i]}  0.0\n"
+
+    date = dt.now().date()
+    string = (
+        f"""********************************************************************************
+SetName: {dataset_name}
+Author: {author_name}
+Date: {date}
+CodesUsed: https://github.com/NNPDF/yadism
+TheoryInput: {theory_name}
+Warnings: 1/2x normalization for {dataset_name}
+********************************************************************************
+"""
+        + strf_data
+    )
+
+    os.makedirs(output_name, exist_ok=True)
+    dataset_name = list(dataset_name)
+    dataset_name[-2] = "G"
+    dataset_name = "".join(dataset_name)
+    with open(output_name + f"/CF_NRM_{dataset_name}.dat", "w") as file:
+        file.write(string)
+
+
+# Create an argument parser
+parser = argparse.ArgumentParser()
+parser.add_argument("folder", help="The folder name of the F1 pineapple grids")
+parser.add_argument("--author", help="The name of the author", default="A.J. Hasenack")
+parser.add_argument(
+    "--theory", help="The theory used, formatted as 'theory_'+int", default="theory_800"
+)
+parser.add_argument("--output", help="The name of the output folder", default="results")
+args = parser.parse_args()
+
+# Extract command line arguments
+folder_name = args.folder
+author = args.author
+theory = args.theory
+output = args.output
+
+# Get predictions and save data
+data = get_prediction(folder_name)
+save_data(data, folder_name, author, theory, output)
diff --git a/data/kfactors/generate_g1f1.py b/data/kfactors/generate_g1f1.py
new file mode 100644
index 00000000..8cedce67
--- /dev/null
+++ b/data/kfactors/generate_g1f1.py
@@ -0,0 +1,160 @@
+import lhapdf
+import pineappl
+from glob import glob
+from datetime import datetime as dt
+import numpy as np
+import os
+import argparse
+from typing import List, Tuple
+
+
+def get_gpaths(folder: str) -> Tuple[str, List[str]]:
+    """
+    Get a list of paths to PineAPPL grids in the specified folder.
+
+    Parameters
+    ----------
+    folder : str
+        The folder path where PineAPPL grids are located.
+
+    Returns
+    -------
+    pdf_name : str
+        The name of the PDF dataset.
+    gpaths : List[str]
+        List of paths to PineAPPL grid files.
+    """
+    paths = glob(folder + "/*F1*")  # Find grids with "_F1" in the filename
+    gpaths = []
+    for p in paths:
+        gpaths.append(glob(p + "/*.pineappl.lz4")[0])
+    print(f"Found {len(gpaths)} pineapple grids.")
+    return gpaths
+
+
+def get_prediction(gpath: str, pdf_name: str, target: str) -> np.ndarray:
+    """
+    Get predictions by convoluting a PineAPPL grid with a LHAPDF PDF.
+
+    Parameters
+    ----------
+    gpath : str
+        Path to the PineAPPL grid file.
+    pdf_name : str
+        The name of the LHAPDF dataset.
+
+    Returns
+    -------
+    prediction : np.ndarray
+        Computed predictions.
+    """
+    # Load the PineAPPL grid
+    grid = pineappl.grid.Grid.read(gpath)
+
+    # Load the LHAPDF
+    pdf = lhapdf.mkPDF(pdf_name)
+
+    # Make case distinction for target cases
+    if target == "proton":
+        nr = 2212
+    elif target == "neutron":
+        nr = 2112
+    else:
+        pass
+
+    # Perform the convolution
+    prediction = grid.convolute_with_one(
+        nr,  # Type of target
+        pdf.xfxQ2,  # The PDF callable pdf.xfxQ2(pid, x, Q2) -> xfx
+        pdf.alphasQ2,  # The alpha_s callable pdf.alpha_s(Q2) -> alpha_s
+    )
+
+    # Compute the k-factor (1 / F1)
+    prediction = 1 / prediction
+
+    return prediction
+
+
+def save_data(
+    data: np.ndarray,
+    dataset_name: str,
+    pdf_name: str,
+    author_name: str,
+    theory_name: str,
+    output_name: str = "results",
+):
+    """
+    Save computed data to a file with metadata.
+
+    Parameters
+    ----------
+    data : np.ndarray
+        Computed data.
+    dataset_name : str
+        Name of the dataset.
+    author_name : str
+        Name of the author.
+    theory_name : str
+        Name of the theory.
+    output_name : str, optional
+        Output folder name, default is "results".
+    """
+    strf_data = ""
+    for i in range(data.shape[0]):
+        strf_data += f"{data[i]}  0.0\n"
+
+    date = dt.now().date()
+    string = (
+        f"""********************************************************************************
+SetName: {dataset_name}
+Author: {author_name}
+Date: {date}
+CodesUsed: https://github.com/NNPDF/yadism
+TheoryInput: {theory_name}
+PDFset: {pdf_name}
+Warnings: F1 normalization for {dataset_name}
+********************************************************************************
+"""
+        + strf_data
+    )
+
+    os.makedirs(output_name, exist_ok=True)
+    dataset_name = list(dataset_name)
+    dataset_name[-2] = "G"
+    dataset_name = "".join(dataset_name)
+    with open(output_name + f"/CF_NRM_{dataset_name}.dat", "w") as file:
+        file.write(string)
+
+
+# Create an argument parser
+parser = argparse.ArgumentParser()
+parser.add_argument("pdf", help="The name of the PDF dataset of LHAPDF")
+parser.add_argument("folder", help="The folder name of the F1 pineapple grids")
+parser.add_argument("target", help="Add the target type")
+parser.add_argument("--author", help="The name of the author", default="A.J. Hasenack")
+parser.add_argument(
+    "--theory", help="The theory used, formatted as 'theory_'+int", default="theory_800"
+)
+parser.add_argument("--output", help="The name of the output folder", default="results")
+args = parser.parse_args()
+
+# Extract command line arguments
+pdf_name = args.pdf
+folder_name = args.folder
+target = args.target
+author = args.author
+theory = args.theory
+output = args.output
+
+# Get PineAPPL grid paths and PDF name
+gpaths = get_gpaths(folder_name)
+
+# Process each PineAPPL grid
+for gpath in gpaths:
+    dataset_name = os.path.splitext(
+        os.path.splitext(os.path.basename(os.path.normpath(gpath)))[0]
+    )[0]
+
+    # Get predictions and save data
+    data = get_prediction(gpath, pdf_name, target)
+    save_data(data, dataset_name, pdf_name, author, theory, output)

From 359b37dafbd6f07d415764a62562a597cf123cd5 Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Mon, 11 Dec 2023 14:57:49 +0100
Subject: [PATCH 05/10] g1f1 polarized flag fix and kfactors

---
 data/kfactors/generate.py      | 149 ---------------------------------
 data/kfactors/generate_g1.py   |  15 ++--
 data/kfactors/generate_g1f1.py |  37 +++-----
 src/pineko/scale_variations.py |   2 +
 4 files changed, 23 insertions(+), 180 deletions(-)
 delete mode 100644 data/kfactors/generate.py

diff --git a/data/kfactors/generate.py b/data/kfactors/generate.py
deleted file mode 100644
index c8a27932..00000000
--- a/data/kfactors/generate.py
+++ /dev/null
@@ -1,149 +0,0 @@
-import lhapdf
-import pineappl
-from glob import glob
-from datetime import datetime as dt
-import numpy as np
-import os
-import argparse
-from typing import List, Tuple
-
-
-def get_gpaths(folder: str) -> Tuple[str, List[str]]:
-    """
-    Get a list of paths to PineAPPL grids in the specified folder.
-
-    Parameters
-    ----------
-    folder : str
-        The folder path where PineAPPL grids are located.
-
-    Returns
-    -------
-    pdf_name : str
-        The name of the PDF dataset.
-    gpaths : List[str]
-        List of paths to PineAPPL grid files.
-    """
-    paths = glob(folder + "/*F1*")  # Find grids with "_F1" in the filename
-    gpaths = []
-    for p in paths:
-        gpaths.append(glob(p + "/*.pineappl.lz4")[0])
-    print(f"Found {len(gpaths)} pineapple grids.")
-    return pdf_name, gpaths
-
-
-def get_prediction(gpath: str, pdf_name: str) -> np.ndarray:
-    """
-    Get predictions by convoluting a PineAPPL grid with a LHAPDF PDF.
-
-    Parameters
-    ----------
-    gpath : str
-        Path to the PineAPPL grid file.
-    pdf_name : str
-        The name of the LHAPDF dataset.
-
-    Returns
-    -------
-    prediction : np.ndarray
-        Computed predictions.
-    """
-    # Load the PineAPPL grid
-    grid = pineappl.grid.Grid.read(gpath)
-
-    # Load the LHAPDF
-    pdf = lhapdf.mkPDF(pdf_name)
-
-    # Perform the convolution
-    prediction = grid.convolute_with_one(
-        2212,  # Proton target
-        pdf.xfxQ2,  # The PDF callable pdf.xfxQ2(pid, x, Q2) -> xfx
-        pdf.alphasQ2,  # The alpha_s callable pdf.alpha_s(Q2) -> alpha_s
-    )
-
-    # Compute the k-factor (1 / F1)
-    prediction = 1 / prediction
-
-    return prediction
-
-
-def save_data(
-    data: np.ndarray,
-    dataset_name: str,
-    pdf_name: str,
-    author_name: str,
-    theory_name: str,
-    output_name: str = "results",
-):
-    """
-    Save computed data to a file with metadata.
-
-    Parameters
-    ----------
-    data : np.ndarray
-        Computed data.
-    dataset_name : str
-        Name of the dataset.
-    author_name : str
-        Name of the author.
-    theory_name : str
-        Name of the theory.
-    output_name : str, optional
-        Output folder name, default is "results".
-    """
-    strf_data = ""
-    for i in range(data.shape[0]):
-        strf_data += f"{data[i]}  0.0\n"
-
-    date = dt.now().date()
-    string = (
-        f"""********************************************************************************
-SetName: {dataset_name}
-Author: {author_name}
-Date: {date}
-CodesUsed: https://github.com/NNPDF/yadism
-TheoryInput: {theory_name}
-PDFset: {pdf_name}
-Warnings: F1 normalization for {dataset_name}
-********************************************************************************
-"""
-        + strf_data
-    )
-
-    os.makedirs(output_name, exist_ok=True)
-    with open(
-        output_name + f"/CF_NRM_{dataset_name}.dat".replace("F1", "G1"), "w"
-    ) as file:
-        file.write(string)
-
-
-# Create an argument parser
-parser = argparse.ArgumentParser()
-parser.add_argument("pdf", help="The name of the PDF dataset of LHAPDF")
-parser.add_argument("folder", help="The folder name of the F1 pineapple grids")
-parser.add_argument("--author", help="The name of the author", default="A.J. Hasenack")
-parser.add_argument(
-    "--theory", help="The theory used, formatted as 'theory_'+int", default="theory_800"
-)
-parser.add_argument("--output", help="The name of the output folder", default="results")
-args = parser.parse_args()
-
-# Extract command line arguments
-pdf_name = args.pdf
-folder_name = args.folder
-author = args.author
-theory = args.theory
-output = args.output
-
-# Get PineAPPL grid paths and PDF name
-pdf_name, gpaths = get_gpaths(folder_name)
-
-# Process each PineAPPL grid
-for gpath in gpaths:
-    dataset_name = os.path.splitext(
-        os.path.splitext(os.path.basename(os.path.normpath(gpath)))[0]
-    )[0]
-
-    # Get predictions and save data
-    data = get_prediction(gpath, pdf_name)
-    save_data(data, dataset_name, pdf_name, author, theory, output)
diff --git a/data/kfactors/generate_g1.py b/data/kfactors/generate_g1.py
index 38262ee2..17099814 100644
--- a/data/kfactors/generate_g1.py
+++ b/data/kfactors/generate_g1.py
@@ -9,7 +9,7 @@
 from typing import List, Tuple
 
 
-def get_prediction(folder: str, pdf_name: str) -> np.ndarray:
+def get_prediction(folder: str) -> np.ndarray:
     """
     Get predictions by convoluting a PineAPPL grid with a LHAPDF PDF.
 
@@ -79,16 +79,13 @@ def save_data(
     )
 
     os.makedirs(output_name, exist_ok=True)
-    dataset_name = list(dataset_name)
-    dataset_name[-2] = "G"
-    dataset_name = "".join(dataset_name)
-    with open(output_name + f"/CF_NRM_{dataset_name}.dat", "w") as file:
+    with open(output_name + f"/CF_NRM_{dataset_name}_G1.dat", "w") as file:
         file.write(string)
 
 
 # Create an argument parser
 parser = argparse.ArgumentParser()
-parser.add_argument("folder", help="The folder name of the F1 pineapple grids")
+parser.add_argument("folder", help="The folder name of the commondata set")
 parser.add_argument("--author", help="The name of the author", default="A.J. Hasenack")
 parser.add_argument(
     "--theory", help="The theory used, formatted as 'theory_'+int", default="theory_800"
@@ -102,6 +99,10 @@ def save_data(
 theory = args.theory
 output = args.output
 
+dataset_name = os.path.splitext(
+    os.path.splitext(os.path.basename(os.path.normpath(folder_name)))[0]
+)[0]
+
 # Get predictions and save data
 data = get_prediction(folder_name)
-save_data(data, folder_name, author, theory, output)
+save_data(data, dataset_name, author, theory, output)
diff --git a/data/kfactors/generate_g1f1.py b/data/kfactors/generate_g1f1.py
index 8cedce67..d53e4359 100644
--- a/data/kfactors/generate_g1f1.py
+++ b/data/kfactors/generate_g1f1.py
@@ -8,7 +8,7 @@
 from typing import List, Tuple
 
 
-def get_gpaths(folder: str) -> Tuple[str, List[str]]:
+def get_gpaths(folder: str, data: str) -> Tuple[str, List[str]]:
     """
     Get a list of paths to PineAPPL grids in the specified folder.
 
@@ -16,6 +16,8 @@ def get_gpaths(folder: str) -> Tuple[str, List[str]]:
     ----------
     folder : str
         The folder path where PineAPPL grids are located.
+    data : str
+        Name of the commondata set.
 
     Returns
     -------
@@ -24,7 +26,7 @@ def get_gpaths(folder: str) -> Tuple[str, List[str]]:
     gpaths : List[str]
         List of paths to PineAPPL grid files.
     """
-    paths = glob(folder + "/*F1*")  # Find grids with "_F1" in the filename
+    paths = glob(folder + f"/*{data}*F1*")  # Find the grids
     gpaths = []
     for p in paths:
         gpaths.append(glob(p + "/*.pineappl.lz4")[0])
@@ -32,7 +34,7 @@ def get_gpaths(folder: str) -> Tuple[str, List[str]]:
     return gpaths
 
 
-def get_prediction(gpath: str, pdf_name: str, target: str) -> np.ndarray:
+def get_prediction(gpath: str, pdf_name: str) -> np.ndarray:
     """
     Get predictions by convoluting a PineAPPL grid with a LHAPDF PDF.
 
@@ -54,13 +56,8 @@ def get_prediction(gpath: str, pdf_name: str, target: str) -> np.ndarray:
     # Load the LHAPDF
     pdf = lhapdf.mkPDF(pdf_name)
 
-    # Make case distinction for target cases
-    if target == "proton":
-        nr = 2212
-    elif target == "neutron":
-        nr = 2112
-    else:
-        pass
+    # Proton reference number
+    nr = 2212
 
     # Perform the convolution
     prediction = grid.convolute_with_one(
@@ -117,11 +114,7 @@ def save_data(
 """
         + strf_data
     )
-
-    os.makedirs(output_name, exist_ok=True)
-    dataset_name = list(dataset_name)
-    dataset_name[-2] = "G"
-    dataset_name = "".join(dataset_name)
+    dataset_name += "_G1"
     with open(output_name + f"/CF_NRM_{dataset_name}.dat", "w") as file:
         file.write(string)
 
@@ -130,7 +123,7 @@ def save_data(
 parser = argparse.ArgumentParser()
 parser.add_argument("pdf", help="The name of the PDF dataset of LHAPDF")
 parser.add_argument("folder", help="The folder name of the F1 pineapple grids")
-parser.add_argument("target", help="Add the target type")
+parser.add_argument("data", help="Name of the commondata set")
 parser.add_argument("--author", help="The name of the author", default="A.J. Hasenack")
 parser.add_argument(
     "--theory", help="The theory used, formatted as 'theory_'+int", default="theory_800"
@@ -141,20 +134,16 @@ def save_data(
 # Extract command line arguments
 pdf_name = args.pdf
 folder_name = args.folder
-target = args.target
+data = args.data
 author = args.author
 theory = args.theory
 output = args.output
 
 # Get PineAPPL grid paths and PDF name
-gpaths = get_gpaths(folder_name)
+gpaths = get_gpaths(folder_name, data)
 
 # Process each PineAPPL grid
 for gpath in gpaths:
-    dataset_name = os.path.splitext(
-        os.path.splitext(os.path.basename(os.path.normpath(gpath)))[0]
-    )[0]
-
     # Get predictions and save data
-    data = get_prediction(gpath, pdf_name, target)
-    save_data(data, dataset_name, pdf_name, author, theory, output)
+    df = get_prediction(gpath, pdf_name)
+    save_data(df, data, pdf_name, author, theory, output)
diff --git a/src/pineko/scale_variations.py b/src/pineko/scale_variations.py
index adea5518..9839b746 100644
--- a/src/pineko/scale_variations.py
+++ b/src/pineko/scale_variations.py
@@ -224,6 +224,8 @@ def construct_and_dump_order_exists_grid(ori_grid, to_construct_order):
     remap_obj = pineappl.bin.BinRemapper(norma, limits)
     new_grid.set_remapper(remap_obj)
     new_grid.set_key_value("initial_state_2", ori_grid.key_values()["initial_state_2"])
+    if "polarized" in ori_grid.key_values():
+        new_grid.set_key_value("polarized", ori_grid.key_values()["polarized"])
     return new_grid
 
 

From fa75aebd336014706476c800210710f89e647c2b Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Tue, 20 Feb 2024 09:48:29 +0100
Subject: [PATCH 06/10] automation good

---
 data/kfactors/generate_g1f1.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/data/kfactors/generate_g1f1.py b/data/kfactors/generate_g1f1.py
index d53e4359..757e70e9 100644
--- a/data/kfactors/generate_g1f1.py
+++ b/data/kfactors/generate_g1f1.py
@@ -8,7 +8,7 @@
 from typing import List, Tuple
 
 
-def get_gpaths(folder: str, data: str) -> Tuple[str, List[str]]:
+def get_gpaths(folder: str, data: str, theory: str) -> Tuple[str, List[str]]:
     """
     Get a list of paths to PineAPPL grids in the specified folder.
 
@@ -26,7 +26,7 @@ def get_gpaths(folder: str, data: str) -> Tuple[str, List[str]]:
     gpaths : List[str]
         List of paths to PineAPPL grid files.
     """
-    paths = glob(folder + f"/*{data}*F1*")  # Find the grids
+    paths = glob(folder + f"/{theory}*{data}*F1*")  # Find the grids
     gpaths = []
     for p in paths:
         gpaths.append(glob(p + "/*.pineappl.lz4")[0])
@@ -140,7 +140,7 @@ def save_data(
 output = args.output
 
 # Get PineAPPL grid paths and PDF name
-gpaths = get_gpaths(folder_name, data)
+gpaths = get_gpaths(folder_name, data, theory)
 
 # Process each PineAPPL grid
 for gpath in gpaths:

From e62dbbef671a16d63f55f411db21076af94c14bf Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Tue, 20 Feb 2024 10:22:52 +0100
Subject: [PATCH 07/10] theory key for g1f1

---
 src/pineko/scale_variations.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/pineko/scale_variations.py b/src/pineko/scale_variations.py
index ac4ffe46..e291ffc8 100644
--- a/src/pineko/scale_variations.py
+++ b/src/pineko/scale_variations.py
@@ -243,6 +243,8 @@ def construct_and_dump_order_exists_grid(ori_grid, to_construct_order):
     new_grid.set_key_value("initial_state_2", ori_grid.key_values()["initial_state_2"])
     if "polarized" in ori_grid.key_values():
         new_grid.set_key_value("polarized", ori_grid.key_values()["polarized"])
+    if "theory" in ori_grid.key_values():
+        new_grid.set_key_value("theory", ori_grid.key_values()["theory"])
     return new_grid
 
 

From 2c8d26063cb5791b9c4a8f4c40750d4d20214b4d Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Tue, 12 Mar 2024 17:33:12 +0100
Subject: [PATCH 08/10] initial push for ALL normalization

---
 data/kfactors/generate_ALL.py | 112 ++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 data/kfactors/generate_ALL.py

diff --git a/data/kfactors/generate_ALL.py b/data/kfactors/generate_ALL.py
new file mode 100644
index 00000000..427837b9
--- /dev/null
+++ b/data/kfactors/generate_ALL.py
@@ -0,0 +1,112 @@
+import lhapdf
+import pineappl
+import yaml
+from glob import glob
+from datetime import datetime as dt
+import numpy as np
+import os
+import argparse
+from typing import List, Tuple
+
+
+def get_prediction(folder: str) -> np.ndarray:
+    """
+    Get predictions by convoluting a PineAPPL grid with a LHAPDF PDF.
+
+    Parameters
+    ----------
+    folder: str
+        Path to the kinematics.yaml grid file.
+    pdf_name : str
+        The name of the LHAPDF dataset.
+
+    Returns
+    -------
+    prediction : np.ndarray
+        Computed predictions.
+    """
+
+    with open(folder + "/kinematics.yaml", "r") as file:
+        data = yaml.safe_load(file)
+
+    bins = data["bins"]
+    prediction = np.zeros(len(bins))
+    for i, bin in enumerate(bins):
+        prediction[i] = (
+            bin["y"]["mid"]
+            * (2 - bin["y"]["mid"])
+            / (bin["y"]["mid"] ** 2 + 2 * (1 - bin["y"]["mid"]))
+        )
+
+    return prediction
+
+
+def save_data(
+    data: np.ndarray,
+    dataset_name: str,
+    author_name: str,
+    theory_name: str,
+    output_name: str = "results",
+):
+    """
+    Save computed data to a file with metadata.
+
+    Parameters
+    ----------
+    data : np.ndarray
+        Computed data.
+    dataset_name : str
+        Name of the dataset.
+    author_name : str
+        Name of the author.
+    theory_name : str
+        Name of the theory.
+    output_name : str, optional
+        Output folder name, default is "results".
+    """
+    strf_data = ""
+    for i in range(data.shape[0]):
+        strf_data += f"{data[i]}  0.0\n"
+
+    date = dt.now().date()
+    string = (
+        f"""********************************************************************************
+SetName: {dataset_name}
+Author: {author_name}
+Date: {date}
+CodesUsed: https://github.com/NNPDF/yadism
+TheoryInput: {theory_name}
+Warnings: D(y) normalization for {dataset_name}
+********************************************************************************
+"""
+        + strf_data
+    )
+
+    os.makedirs(output_name, exist_ok=True)
+    with open(output_name + f"/CF_NRM_{dataset_name}_G1.dat", "w") as file:
+        file.write(string)
+
+
+# Create an argument parser
+parser = argparse.ArgumentParser()
+parser.add_argument("folder", help="The folder name of the commondata set")
+parser.add_argument("--author", help="The name of the author", default="A.J. Hasenack")
+parser.add_argument(
+    "--theory", help="The theory used, formatted as 'theory_'+int", default="theory_800"
+)
+parser.add_argument("--output", help="The name of the output folder", default="results")
+args = parser.parse_args()
+
+# Extract command line arguments
+folder_name = args.folder
+author = args.author
+theory = args.theory
+output = args.output
+
+dataset_name = os.path.splitext(
+    os.path.splitext(os.path.basename(os.path.normpath(folder_name)))[0]
+)[0]
+
+# Get predictions and save data
+data = get_prediction(folder_name)
+save_data(data, dataset_name, author, theory, output)

From 57b3b5e5924c8fc9ba2a36012af8d72678a3a54d Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Tue, 19 Mar 2024 12:38:44 +0100
Subject: [PATCH 09/10] g1f1 correctly adopted for nFONLL

---
 data/kfactors/generate_g1f1.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/data/kfactors/generate_g1f1.py b/data/kfactors/generate_g1f1.py
index 757e70e9..5e233565 100644
--- a/data/kfactors/generate_g1f1.py
+++ b/data/kfactors/generate_g1f1.py
@@ -8,7 +8,7 @@
 from typing import List, Tuple
 
 
-def get_gpaths(folder: str, data: str, theory: str) -> Tuple[str, List[str]]:
+def get_file(folder: str, data: str, theory: str) -> Tuple[str, List[str]]:
     """
     Get a list of paths to PineAPPL grids in the specified folder.
 
@@ -26,12 +26,8 @@ def get_gpaths(folder: str, data: str, theory: str) -> Tuple[str, List[str]]:
     gpaths : List[str]
         List of paths to PineAPPL grid files.
     """
-    paths = glob(folder + f"/{theory}*{data}*F1*")  # Find the grids
-    gpaths = []
-    for p in paths:
-        gpaths.append(glob(p + "/*.pineappl.lz4")[0])
-    print(f"Found {len(gpaths)} pineapple grids.")
-    return gpaths
+    file = glob(folder + f"/{data}_F1.pineappl.lz4")[0]
+    return file
 
 
 def get_prediction(gpath: str, pdf_name: str) -> np.ndarray:
@@ -140,10 +136,8 @@ def save_data(
 output = args.output
 
 # Get PineAPPL grid paths and PDF name
-gpaths = get_gpaths(folder_name, data, theory)
+file = get_file(folder_name, data, theory)
 
-# Process each PineAPPL grid
-for gpath in gpaths:
-    # Get predictions and save data
-    df = get_prediction(gpath, pdf_name)
-    save_data(df, data, pdf_name, author, theory, output)
+# Get predictions and save data
+df = get_prediction(file, pdf_name)
+save_data(df, data, pdf_name, author, theory, output)

From a18ff15ede3a7b64454971ece896d2277894dc0f Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Thu, 21 Mar 2024 10:51:06 +0100
Subject: [PATCH 10/10] 1/2xF1 for ALL too

---
 data/kfactors/generate_ALL.py | 63 +++++++++++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 7 deletions(-)

diff --git a/data/kfactors/generate_ALL.py b/data/kfactors/generate_ALL.py
index 427837b9..6a7d13ad 100644
--- a/data/kfactors/generate_ALL.py
+++ b/data/kfactors/generate_ALL.py
@@ -9,16 +9,41 @@
 from typing import List, Tuple
 
 
-def get_prediction(folder: str) -> np.ndarray:
+def get_file(folder: str, data: str, theory: str) -> Tuple[str, List[str]]:
+    """
+    Get a list of paths to PineAPPL grids in the specified folder.
+
+    Parameters
+    ----------
+    folder : str
+        The folder path where PineAPPL grids are located.
+    data : str
+        Name of the commondata set.
+
+    Returns
+    -------
+    pdf_name : str
+        The name of the PDF dataset.
+    gpaths : List[str]
+        List of paths to PineAPPL grid files.
+    """
+    file = glob(folder + f"/{data}_F1.pineappl.lz4")[0]
+    return file
+
+
+def get_prediction(file: str, pdf_name: str, folder: str) -> np.ndarray:
     """
     Get predictions by convoluting a PineAPPL grid with a LHAPDF PDF.
 
     Parameters
     ----------
-    folder: str
-        Path to the kinematics.yaml grid file.
+
+    file : str
+        Path to the PineAPPL grid file.
     pdf_name : str
         The name of the LHAPDF dataset.
+    folder: str
+        Path to the kinematics.yaml grid file.
 
     Returns
     -------
@@ -26,6 +51,22 @@ def get_prediction(folder: str) -> np.ndarray:
         Computed predictions.
     """
 
+    # Load the PineAPPL grid
+    grid = pineappl.grid.Grid.read(file)
+
+    # Load the LHAPDF
+    pdf = lhapdf.mkPDF(pdf_name)
+
+    # Proton reference number
+    nr = 2212
+
+    # Perform the convolution
+    convolution = grid.convolute_with_one(
+        nr,  # Type of target
+        pdf.xfxQ2,  # The PDF callable pdf.xfxQ2(pid, x, Q2) -> xfx
+        pdf.alphasQ2,  # The alpha_s callable pdf.alpha_s(Q2) -> alpha_s
+    )
+
     with open(folder + "/kinematics.yaml", "r") as file:
         data = yaml.safe_load(file)
 
@@ -37,6 +78,7 @@ def get_prediction(folder: str) -> np.ndarray:
             * (2 - bin["y"]["mid"])
             / (bin["y"]["mid"] ** 2 + 2 * (1 - bin["y"]["mid"]))
         )
+        prediction[i] = prediction[i] / convolution[i]
 
     return prediction
 
@@ -76,7 +118,7 @@ def save_data(
 Date: {date}
 CodesUsed: https://github.com/NNPDF/yadism
 TheoryInput: {theory_name}
-Warnings: D(y) normalization for {dataset_name}
+Warnings: D(y)/2xF1 normalization for {dataset_name}
 ********************************************************************************
 """
         + strf_data
@@ -89,6 +131,9 @@ def save_data(
 
 # Create an argument parser
 parser = argparse.ArgumentParser()
+parser.add_argument("pdf", help="The name of the PDF dataset of LHAPDF")
+parser.add_argument("gpath", help="The folder name of the F1 pineapple grids")
+parser.add_argument("data", help="Name of the commondata set")
 parser.add_argument("folder", help="The folder name of the commondata set")
 parser.add_argument("--author", help="The name of the author", default="A.J. Hasenack")
 parser.add_argument(
@@ -98,15 +143,19 @@ def save_data(
 args = parser.parse_args()
 
 # Extract command line arguments
-folder_name = args.folder
+pdf = args.pdf
+gpath = args.gpath
+data = args.data
+folder = args.folder
 author = args.author
 theory = args.theory
 output = args.output
 
 dataset_name = os.path.splitext(
-    os.path.splitext(os.path.basename(os.path.normpath(folder_name)))[0]
+    os.path.splitext(os.path.basename(os.path.normpath(folder)))[0]
 )[0]
 
 # Get predictions and save data
-data = get_prediction(folder_name)
+file = get_file(gpath, data, theory)
+data = get_prediction(file, pdf, folder)
 save_data(data, dataset_name, author, theory, output)