From 28ac750ce61e224a99a048b150e40b7cffdc2e07 Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Thu, 11 Mar 2021 22:15:33 +0100 Subject: [PATCH 01/11] gans should fetch the PDF name from compressor card --- runcards/ganpdfs.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/runcards/ganpdfs.yml b/runcards/ganpdfs.yml index ac1e1bd..ac56170 100644 --- a/runcards/ganpdfs.yml +++ b/runcards/ganpdfs.yml @@ -1,8 +1,3 @@ -############################################################################################# -# Input PDF # -############################################################################################# -pdf: NNPDF40_nnlo_as_0118_1000 - ############################################################################################# # PDF Grids: # # --------- # @@ -70,4 +65,3 @@ nd_steps : 4 # Number of steps to train ng_steps : 3 # Number of steps to train the Generator for one training run batch_size : 70 # Batch size per epoch in terms of percentage epochs : 1000 # Number of epochs -pdf: NNPDF40_nnlo_as_0118_1000 \ No newline at end of file From 5429600cbde16d1fd9616e246a77192469506ffb Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Thu, 11 Mar 2021 22:15:58 +0100 Subject: [PATCH 02/11] Automatically run adiabatic if existing_enhanced is True --- src/pycompressor/compressing.py | 216 +++++++++++++++++--------------- 1 file changed, 115 insertions(+), 101 deletions(-) diff --git a/src/pycompressor/compressing.py b/src/pycompressor/compressing.py index 8cd962d..e7a7f9a 100644 --- a/src/pycompressor/compressing.py +++ b/src/pycompressor/compressing.py @@ -63,20 +63,6 @@ def check_validity(pdfsetting, compressed, gans, est_dic): f" {members} members if enhancing is not active.") -@make_argcheck -def check_adiabaticity(pdfsetting, gans, compressed): - """ Check whether we are in an adiabatic optimization and if so if it can be performed """ - pdf_name = pdfsetting["pdf"] - if pdfsetting.get("existing_enhanced") and not gans.get("enhanced"): - adiabatic_result = f"{pdf_name}/compress_{pdf_name}_{compressed}_output.dat" - if not pathlib.Path(adiabatic_result).exists(): - raise CheckError( - "Adiabatic optimization needs to be ran first with existing_enhanced: False" - f"\nMissing the file: {adiabatic_result}" - ) - - -@check_adiabaticity @check_validity def compressing(pdfsetting, compressed, minimizer, est_dic, gans): """ @@ -94,7 +80,7 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): """ pdf = str(pdfsetting["pdf"]) - enhanced_already_exists = pdfsetting.get("existing_enhanced", False) + enhd_exists = pdfsetting.get("existing_enhanced", False) if gans["enhance"]: from pycompressor.postgans import postgans @@ -121,95 +107,123 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): postgans(str(pdf), outfolder, nbgen) splash() - # Set seed rndgen = Generator(PCG64(seed=0)) - console.print("\n• Load PDF sets & Printing Summary:", style="bold blue") xgrid = XGrid().build_xgrid() # Load Prior Sets prior = PdfSet(pdf, xgrid, Q0, NF).build_pdf() rndindex = rndgen.choice(prior.shape[0], compressed, replace=False) - # Load Enhanced Sets - if enhanced_already_exists: - try: - postgan = pdf + "_enhanced" - final_result = {"pdfset_name": postgan} - enhanced = PdfSet(postgan, xgrid, Q0, NF).build_pdf() - except RuntimeError as excp: - raise LoadingEnhancedError(f"{excp}") - nb_iter, ref_estimators = 100000, None - init_index = np.array(extract_index(pdf, compressed)) - else: - final_result = {"pdfset_name": pdf} - nb_iter, ref_estimators = 15000, None - init_index, enhanced = rndindex, prior - - # Create output folder - outrslt = postgan if enhanced_already_exists else pdf - folder = pathlib.Path().absolute() / outrslt - folder.mkdir(exist_ok=True) - # Create output folder for ERF stats - out_folder = pathlib.Path().absolute() / "erfs_output" - out_folder.mkdir(exist_ok=True) - - # Output Summary - table = Table(show_header=True, header_style="bold magenta") - table.add_column("Parameters", justify="left", width=24) - table.add_column("Description", justify="left", width=50) - table.add_row("PDF set name", f"{pdf}") - table.add_row("Size of Prior", f"{prior.shape[0] - 1} replicas") - if enhanced_already_exists: - table.add_row("Size of enhanced", f"{enhanced.shape[0] - 1} replicas") - table.add_row("Size of compression", f"{compressed} replicas") - table.add_row("Input energy Q0", f"{Q0} GeV") - table.add_row( - "x-grid size", - f"{xgrid.shape[0]} points, x=({xgrid[0]:.4e}, {xgrid[-1]:.4e})" - ) - table.add_row("Minimizer", f"{minimizer}") - console.print(table) - - # Init. Compressor class - comp = Compress( - prior, - enhanced, - est_dic, - compressed, - init_index, - ref_estimators, - out_folder, - rndgen - ) - # Start compression depending on the Evolution Strategy - erf_list = [] - console.print("\n• Compressing MC PDF replicas:", style="bold blue") - if minimizer == "genetic": - # Run compressor using GA - with trange(nb_iter) as iter_range: - for _ in iter_range: - iter_range.set_description("Compression") - erf, index = comp.genetic_algorithm(nb_mut=5) - erf_list.append(erf) - iter_range.set_postfix(ERF=erf) - elif minimizer == "cma": - # Run compressor using CMA - erf, index = comp.cma_algorithm(std_dev=0.8) - else: - raise ValueError(f"{minimizer} is not a valid minimizer.") - - # Prepare output file - final_result["ERFs"] = erf_list - final_result["index"] = index.tolist() - outfile = open(f"{outrslt}/compress_{pdf}_{compressed}_output.dat", "w") - outfile.write(json.dumps(final_result, indent=2)) - outfile.close() - # Fetching ERF and construct reduced PDF grid - console.print(f"\n• Final ERF: [bold red]{erf}.", style="bold red") - - # Compute final ERFs for the final choosen replicas - final_err_func = comp.final_erfs(index) - serfile = open(f"{out_folder}/erf_reduced.dat", "a+") - serfile.write(f"{compressed}:") - serfile.write(json.dumps(final_err_func)) - serfile.write("\n") - serfile.close() + + outname = [pdf] + final_result = [{"pdfset_name": pdf}] + nb_iter, ref_estimators = [15000], [None] + init_index, enhanced = [rndindex], [prior] + + # Methodological iterations + mtd_iteration = 2 if enhd_exists else 1 + + for cmtype in range(mtd_iteration): + # necessary to get the same normalization + rndgen = Generator(PCG64(seed=0)) + _ = rndgen.choice(prior.shape[0], compressed, replace=False) + # reference log + if cmtype==0: + console.print( + "Standard compression using Input set", + style="bold green underline" + ) + elif cmtype==1: + console.print( + "Adiabatic compression using Enhanced set", + style="bold green underline" + ) + + # Create output folder + outrslt = outname[cmtype] + folder = pathlib.Path().absolute() / outrslt + folder.mkdir(exist_ok=True) + # Create output folder for ERF stats + out_folder = pathlib.Path().absolute() / "erfs_output" + out_folder.mkdir(exist_ok=True) + + # Output Summary + console.print("\n• Compression Summary:", style="bold blue") + table = Table(show_header=True, header_style="bold magenta") + table.add_column("Parameters", justify="left", width=24) + table.add_column("Description", justify="left", width=50) + table.add_row("PDF set name", f"{pdf}") + table.add_row("Size of Prior", f"{prior.shape[0] - 1} replicas") + if cmtype!=0 and enhd_exists: + table.add_row( + "Size of enhanced", + f"{enhanced[1].shape[0] - 1} replicas" + ) + table.add_row("Size of compression", f"{compressed} replicas") + table.add_row("Input energy Q0", f"{Q0} GeV") + table.add_row( + "x-grid size", + f"{xgrid.shape[0]} points, x=({xgrid[0]:.4e}, {xgrid[-1]:.4e})" + ) + table.add_row("Minimizer", f"{minimizer}") + console.print(table) + + # Init. Compressor class + comp = Compress( + prior, + enhanced[cmtype], + est_dic, + compressed, + init_index[cmtype], + ref_estimators[cmtype], + out_folder, + rndgen + ) + # Start compression depending on the Evolution Strategy + erf_list = [] + console.print("\n• Compressing MC PDF replicas:", style="bold blue") + if minimizer == "genetic": + # Run compressor using GA + with trange(nb_iter[cmtype]) as iter_range: + for _ in iter_range: + iter_range.set_description("Compression") + erf, index = comp.genetic_algorithm(nb_mut=5) + erf_list.append(erf) + iter_range.set_postfix(ERF=erf) + elif minimizer == "cma": + # Run compressor using CMA + erf, index = comp.cma_algorithm(std_dev=0.8) + else: + raise ValueError(f"{minimizer} is not a valid minimizer.") + + # Prepare output file + final_result[cmtype]["ERFs"] = erf_list + final_result[cmtype]["index"] = index.tolist() + outfile = open(f"{outrslt}/compress_{pdf}_{compressed}_output.dat", "w") + outfile.write(json.dumps(final_result[cmtype], indent=2)) + outfile.close() + # Fetching ERF and construct reduced PDF grid + console.print(f"\n• Final ERF: {erf}.", style="bold blue") + + if (cmtype!=0 and enhd_exists) or (cmtype==0 and not enhd_exists): + # Compute final ERFs for the final choosen replicas + final_err_func = comp.final_erfs(index) + serfile = open(f"{out_folder}/erf_reduced.dat", "a+") + serfile.write(f"{compressed}:") + serfile.write(json.dumps(final_err_func)) + serfile.write("\n") + serfile.close() + + # Load Enhanced Sets + if cmtype==0 and enhd_exists: + try: + postgan = pdf + "_enhanced" + outname.append(postgan) + final_result.append({"pdfset_name": postgan}) + enhncd = PdfSet(postgan, xgrid, Q0, NF).build_pdf() + enhanced.append(enhncd) + except RuntimeError as excp: + raise LoadingEnhancedError(f"{excp}") + nb_iter.append(100000) + ref_estimators.append(None) + pre_index = np.array(extract_index(pdf, compressed)) + init_index.append(pre_index) From cbb42be5ea17cfddd2ad259b7d5ce90207ce8d11 Mon Sep 17 00:00:00 2001 From: Tanjona Rabemananjara Date: Thu, 18 Mar 2021 08:14:14 +0100 Subject: [PATCH 03/11] simplify readme --- README.md | 54 ++++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 1973b35..c4e23f7 100644 --- a/README.md +++ b/README.md @@ -1,53 +1,47 @@ ![pytest](https://github.com/N3PDF/pycompressor/workflows/pytest/badge.svg) [![documentation](https://github.com/N3PDF/pycompressor/workflows/docs/badge.svg)](https://n3pdf.github.io/pycompressor/) -### pycompressor +## pycompressor -Fast and efficient python implementation of PDF set **compressor** (https://arxiv.org/abs/1504.06469). +Fast and efficient python implementation of PDF **compression** (https://arxiv.org/abs/1504.06469). -#### New features - -Additional new features have been added to the following python package. The two main features are: -- **Covariance Matrix Adaptation-Evlotion strategy (CMA-ES):** in addition to the Genetic -Algorithm (GA), there is now the possibility to choose as a minimizer the CMA. The choice -of minimizer can be defined in the `runcard.yml` file. -- **Generative Adversarial Strategy (GANs):** this is a standalone python [package](https://github.com/N3PDF/ganpdfs/tree/master) -that can enhance the statistics of the prior PDF replicas before compression by generating -synthetic replicas. For more details, refer to the [documentation](https://n3pdf.github.io/ganpdfs/) -(still has to be done). In a similar way, in order to trigger the enhancement, one just has to set -the value of `enhance` in the runcard to be `True`. Setting this value to `False` will just run the -standard compression. The GANs also requires extra-parameters (as shown in the example -[runcard.yml](https://github.com/N3PDF/pycompressor/blob/master/runcard.yml)) that defines -the structure of the networks. - -#### Installation +### How to install To install `pyCompressor`, just type: ```bash -python setup.py install -``` -or if you are a developer: -```bash -python setup.py develop +python setup.py install # or python setup.py develop (if you want development mode) ``` -#### How to use +### How to use + +#### Standard compression -The input parameters that define the compression is contained in a YAML file. To run -the `pycompressor` code, just type the following: +The input parameters that define the compression is contained in a YAML file. To run the standard compression, +use the reference [runcard](https://github.com/N3PDF/pycompressor/blob/master/runcards/runcard.yml) as it is by +just replacing the entry of the `pdf` key with the name of the PDF set, then run the following: ```bash pycomp runcards/runcard.yml [--threads NUMB_THREADS] ``` -A detailed instruction on how to set the different parameters in the runcard can be found here. -#### Generating compressed PDF set & post-analysis +#### Using GAN and/or Compressing from an enhanced set + +Although it is advised to run the [ganpdfs](https://github.com/N3PDF/ganpdfs) independently, it is possible +to generate enhanced PDF replicas within the `pycompressor`. To do so, just set the entry `enhance` in the +runcard to `True` and specify the total number of replicas (prior+synthetics). + +Finally, in order to perform a compression with an enhanced set, set the entry `existing_enhanced` to `True`. + +A detailed instruction on how to set the different parameters in the runcard can be found +[here](https://n3pdf.github.io/pycompressor/howto/howto.html). + +### Generating compressed PDF set & post-analysis The code will create a folder named after the prior PDF sets. To generate the compressed PDF grid, run the following command: ```bash get-grid -i /compressed___output.dat ``` -Note that if the compression is done from an enhanced set, the output folder will be append by `_enhanced`. +Note that if the compression is done from an enhanced set, the output folder will be appended by `_enhanced`. Finally, in order to generate ERF plots, enter in the `erfs_output` directory and run the following: ```bash @@ -56,7 +50,7 @@ validate --random erf_randomized.dat --reduced erf_reduced.dat This script can also plot the ERF validation from the old compressor code by adding the flag `--format ccomp`. -#### Warning +### Warning This package cannot be installed with python 3.9 yet due to the numba dependency. This will be resolved soon according to [#6579](https://github.com/numba/numba/pull/6579). From f9c62b23921edd00579c76b38d5dde60841d14ca Mon Sep 17 00:00:00 2001 From: Tanjona Rabemananjara Date: Thu, 18 Mar 2021 09:07:37 +0100 Subject: [PATCH 04/11] fex typos in readme Co-authored-by: Juacrumar --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c4e23f7..2877f0f 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ python setup.py install # or python setup.py develop (if you want development mo #### Standard compression -The input parameters that define the compression is contained in a YAML file. To run the standard compression, +The input parameters that define the compression are contained in a YAML file. To run the standard compression, use the reference [runcard](https://github.com/N3PDF/pycompressor/blob/master/runcards/runcard.yml) as it is by just replacing the entry of the `pdf` key with the name of the PDF set, then run the following: ```bash From d1acad0fc7850a6aad04671c1de884fe12e5304f Mon Sep 17 00:00:00 2001 From: Tanjona Rabemananjara Date: Thu, 18 Mar 2021 09:07:51 +0100 Subject: [PATCH 05/11] more typos fix Co-authored-by: Juacrumar --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2877f0f..36602a9 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ python setup.py install # or python setup.py develop (if you want development mo The input parameters that define the compression are contained in a YAML file. To run the standard compression, use the reference [runcard](https://github.com/N3PDF/pycompressor/blob/master/runcards/runcard.yml) as it is by -just replacing the entry of the `pdf` key with the name of the PDF set, then run the following: +just replacing the entry of the `pdf` key with the name of the PDF set to be compressed, then run the following: ```bash pycomp runcards/runcard.yml [--threads NUMB_THREADS] ``` From 2f864d32240d513078db1d7c6820f4f87cab3eca Mon Sep 17 00:00:00 2001 From: Tanjona Rabemananjara Date: Thu, 18 Mar 2021 09:08:17 +0100 Subject: [PATCH 06/11] fix formatting Co-authored-by: Juacrumar --- src/pycompressor/compressing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pycompressor/compressing.py b/src/pycompressor/compressing.py index e7a7f9a..bcdae22 100644 --- a/src/pycompressor/compressing.py +++ b/src/pycompressor/compressing.py @@ -116,7 +116,8 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): outname = [pdf] final_result = [{"pdfset_name": pdf}] - nb_iter, ref_estimators = [15000], [None] + nb_iter =[15000] + ref_estimators = [None] init_index, enhanced = [rndindex], [prior] # Methodological iterations From 2e41372d5cd1e1ef3674324d5b68a5f4db568aae Mon Sep 17 00:00:00 2001 From: Tanjona Rabemananjara Date: Thu, 18 Mar 2021 09:08:31 +0100 Subject: [PATCH 07/11] more formatting fix Co-authored-by: Juacrumar --- src/pycompressor/compressing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pycompressor/compressing.py b/src/pycompressor/compressing.py index bcdae22..2b8e0a8 100644 --- a/src/pycompressor/compressing.py +++ b/src/pycompressor/compressing.py @@ -118,7 +118,8 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): final_result = [{"pdfset_name": pdf}] nb_iter =[15000] ref_estimators = [None] - init_index, enhanced = [rndindex], [prior] + init_index = [rndindex] + enhanced = [prior] # Methodological iterations mtd_iteration = 2 if enhd_exists else 1 From fb581cd93bf1bcc609ab51ef26b4e625664be84c Mon Sep 17 00:00:00 2001 From: Tanjona R Date: Thu, 11 Mar 2021 21:53:18 +0100 Subject: [PATCH 08/11] Add comment on adiabatic minization --- doc/source/howto/howto.rst | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/doc/source/howto/howto.rst b/doc/source/howto/howto.rst index c58f6cc..f5fa671 100644 --- a/doc/source/howto/howto.rst +++ b/doc/source/howto/howto.rst @@ -24,7 +24,7 @@ One of the keys for the ``gan`` entry is a ``runcard`` which gets passed to the For details on how to set the parameters for the GAN, have a look `here `_. -An example of run card is shown below: +An example of an input card is shown below: .. code-block:: yaml @@ -75,7 +75,14 @@ An example of run card is shown below: total_replicas: 3000 -If ``enhance`` is set to `True`, the code will first enhance the statistic the prior using GANs. + +Running GANs within pyCompressor +-------------------------------- + + +Although it is advised to run the `ganpdfs` code independently, it is possible to call it +within the `pyCompressor` code by setting ``enhance`` to `True` in the runcard. In this +scenario, the code will first enhance the statistic the prior using GANs. Once the generation of the extra-replicas is finished, the output grids are evolved using `evolven3fit `_. Then, the :mod:`pyCompressor.postgans` module (in a similar fashion as postfit) creates a @@ -123,6 +130,18 @@ If ``enhance`` is instead set to `False`, the folder will just simply be: +Adiabatic minimization +---------------------- + + +Since compressing from an enhanced set could be difficult due to the limitation of the minimization +algorithm, it is possible to perfrom an adiabatic minimization by setting ``existing_enhanced`` to +`True` in the runcard. In this case, the minimization is perfromed in two steps: (1) a standard +compression of the prior, (2) a compression using the enhanced set but using as a starting point +the space in which the best from the standard compression was generated. + + + PDF grid and Validation plot ============================ From 203fa6884345a21c73cf0f9cfc9b607b18b9fe5b Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Thu, 18 Mar 2021 09:39:22 +0100 Subject: [PATCH 09/11] =?UTF-8?q?Bump=20version:=201.0.0-dev=20=E2=86=92?= =?UTF-8?q?=201.1.0-dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- setup.py | 2 +- src/pycompressor/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index d47b8d3..1da0675 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,7 +1,7 @@ [bumpversion] commit = True tag = True -current_version = 1.0.0-dev +current_version = 1.1.0-dev parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+))? serialize = {major}.{minor}.{patch}-{release} diff --git a/setup.py b/setup.py index 869fcf0..baadee2 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ setup( name=PACKAGE, - version='1.0.0-dev', + version='1.1.0-dev', description="PDF Compression", long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", diff --git a/src/pycompressor/__init__.py b/src/pycompressor/__init__.py index 6c7936a..98e28b8 100644 --- a/src/pycompressor/__init__.py +++ b/src/pycompressor/__init__.py @@ -1 +1 @@ -__version__ = "1.0.0-dev" +__version__ = "1.1.0-dev" From 151c4664b35c1905090a9a52b6914c28ad7902c6 Mon Sep 17 00:00:00 2001 From: Tanjona R Date: Fri, 19 Mar 2021 14:28:13 +0100 Subject: [PATCH 10/11] add charms to grid --- src/pycompressor/compressing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pycompressor/compressing.py b/src/pycompressor/compressing.py index 2b8e0a8..3458d4f 100644 --- a/src/pycompressor/compressing.py +++ b/src/pycompressor/compressing.py @@ -24,7 +24,7 @@ # Initial scale (in GeV) Q0 = 1 # Total number of flavour to 2nf+1=7 -NF = 3 +NF = 4 class LoadingEnhancedError(Exception): From 80957bd42111390244a106bc4594b013311bd8cc Mon Sep 17 00:00:00 2001 From: Tanjona R Date: Fri, 19 Mar 2021 14:59:57 +0100 Subject: [PATCH 11/11] rebase master --- src/pycompressor/compressing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pycompressor/compressing.py b/src/pycompressor/compressing.py index 3458d4f..099b717 100644 --- a/src/pycompressor/compressing.py +++ b/src/pycompressor/compressing.py @@ -23,7 +23,7 @@ # Initial scale (in GeV) Q0 = 1 -# Total number of flavour to 2nf+1=7 +# Total number of flavour to 2nf+1=9 NF = 4