Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve compression of enhanced #40

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions runcards/runcard.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
# PDF Set #
###################################################
pdfsetting:
pdf: NNPDF40_nnlo_as_0118_1000
pdf: 210219-02-rs-nnpdf40-1000
existing_enhanced: False

###################################################
# Size of compressed PDF replicas #
###################################################
compressed: 500
compressed: 100

###################################################
# Choice of Minimizer #
Expand Down
40 changes: 31 additions & 9 deletions src/pycompressor/compressing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,18 @@
from pycompressor.pdfgrid import XGrid
from pycompressor.pdfgrid import PdfSet
from pycompressor.compressor import Compress
from pycompressor.utils import map_index
from pycompressor.utils import extract_index
from pycompressor.utils import preprocess_enhanced
from pycompressor.utils import restore_permutation
from pycompressor.estimators import ALLOWED_ESTIMATORS


console = Console()
log = logging.getLogger(__name__)

# Initial scale (in GeV)
Q0 = 1
Q0 = 1.65
# Total number of flavour to 2nf+1=7
NF = 4

Expand All @@ -37,11 +41,11 @@ def splash():

style = Style(color="blue")
logo = Table(show_header=True, header_style="bold blue", style=style)
logo.add_column("𝖕𝖞𝕮𝖔𝖒𝖕𝖗𝖊𝖘𝖘𝖔𝖗", justify="center", width=60)
logo.add_column("𝖕𝖞𝕮𝖔𝖒𝖕𝖗𝖊𝖘𝖘𝖔𝖗", justify="center", width=76)
logo.add_row("[bold blue]Fast python compressor for PDF replicas.")
logo.add_row("[bold blue]https://n3pdf.github.io/pycompressor/")
logo.add_row("[bold blue]© N3PDF 2021")
logo.add_row("[bold blue]Authors: Stefano Carrazza, Juan E. Cruz-Martinez, Tanjona R. Rabemananjara")
logo.add_row("[bold blue]Authors: Stefano Carrazza, Juan M. Cruz-Martinez, Tanjona R. Rabemananjara")
console.print(logo)


Expand All @@ -67,7 +71,7 @@ def check_validity(pdfsetting, compressed, gans, est_dic):
def check_adiabaticity(pdfsetting, gans, compressed):
""" Check whether we are in an adiabatic optimization and if so if it can be performed """
pdf_name = pdfsetting["pdf"]
if pdfsetting.get("existing_enhanced") and not gans.get("enhanced"):
if pdfsetting.get("existing_enhanced") and not gans.get("enhanced"):
adiabatic_result = f"{pdf_name}/compress_{pdf_name}_{compressed}_output.dat"
if not pathlib.Path(adiabatic_result).exists():
raise CheckError(
Expand Down Expand Up @@ -121,7 +125,6 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans):
postgans(str(pdf), outfolder, nbgen)

splash()
# Set seed
rndgen = Generator(PCG64(seed=0))

console.print("\n• Load PDF sets & Printing Summary:", style="bold blue")
Expand All @@ -134,15 +137,29 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans):
try:
postgan = pdf + "_enhanced"
final_result = {"pdfset_name": postgan}
enhanced = PdfSet(postgan, xgrid, Q0, NF).build_pdf()
enhcd_grid = PdfSet(postgan, xgrid, Q0, NF).build_pdf()
processed, pindex, counts = preprocess_enhanced(enhcd_grid)
# Shuffled the enhanced PDF grid and save the shuffling
# index in order to restore it later.
shuffled_index = rndgen.choice(
processed.shape[0],
processed.shape[0],
replace=False
)
enhanced = processed[shuffled_index]
except RuntimeError as excp:
raise LoadingEnhancedError(f"{excp}")
nb_iter, ref_estimators = 100000, None
init_index = np.array(extract_index(pdf, compressed))
extr_index = np.array(extract_index(pdf, compressed))
map_pindex = map_index(pindex, extr_index)
init_index = map_index(shuffled_index, map_pindex)
assert extr_index.shape[0] == init_index.shape[0]
else:
final_result = {"pdfset_name": pdf}
nb_iter, ref_estimators = 15000, None
init_index, enhanced = rndindex, prior
# reset seeds
rndgen = Generator(PCG64(seed=1))

# Create output folder
outrslt = postgan if enhanced_already_exists else pdf
Expand All @@ -159,7 +176,7 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans):
table.add_row("PDF set name", f"{pdf}")
table.add_row("Size of Prior", f"{prior.shape[0] - 1} replicas")
if enhanced_already_exists:
table.add_row("Size of enhanced", f"{enhanced.shape[0] - 1} replicas")
table.add_row("Size of enhanced", f"{enhcd_grid.shape[0] - 1} replicas")
table.add_row("Size of compression", f"{compressed} replicas")
table.add_row("Input energy Q0", f"{Q0} GeV")
table.add_row(
Expand Down Expand Up @@ -196,6 +213,10 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans):
erf, index = comp.cma_algorithm(std_dev=0.8)
else:
raise ValueError(f"{minimizer} is not a valid minimizer.")
# Restore the shuffled index back in case of compression from
# an enhanced set
if enhanced_already_exists:
index = restore_permutation(index, shuffled_index, pindex)

# Prepare output file
final_result["ERFs"] = erf_list
Expand All @@ -207,7 +228,8 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans):
console.print(f"\n• Final ERF: [bold red]{erf}.", style="bold red")

# Compute final ERFs for the final choosen replicas
final_err_func = comp.final_erfs(index)
samples = enhcd_grid if enhanced_already_exists else enhanced
final_err_func = comp.final_erfs(samples, index)
serfile = open(f"{out_folder}/erf_reduced.dat", "a+")
serfile.write(f"{compressed}:")
serfile.write(json.dumps(final_err_func))
Expand Down
4 changes: 2 additions & 2 deletions src/pycompressor/compressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def all_error_function(self, index):
erf_res = self.err_func.compute_all_erf(reduc_rep)
return erf_res

def final_erfs(self, index):
def final_erfs(self, enhanced, index):
"""Compute the final ERF after minimization.

Parameters
Expand All @@ -96,7 +96,7 @@ def final_erfs(self, index):
Dictionary containing the list of estimators and their respective
values.
"""
selected_replicas = self.enhanced[index]
selected_replicas = enhanced[index]
erfs = self.err_func.compute_all_erf(selected_replicas)
return erfs

Expand Down
2 changes: 1 addition & 1 deletion src/pycompressor/errfunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ class ErfComputation:
Number of trials
"""

def __init__(self, prior, est_dic, nreduc, folder, rndgen, trials=1000, norm=True):
def __init__(self, prior, est_dic, nreduc, folder, rndgen, trials=10000, norm=True):
self.prior = prior
self.est_dic = est_dic
# Compute estimators for PRIOR replicas
Expand Down
71 changes: 71 additions & 0 deletions src/pycompressor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,77 @@
log = logging.getLogger(__name__)


def preprocess_enhanced(enhanced, dec_check=15):
"""Pre-process the enhanced set by removing duplicates
in the PDF grid.

Parameters
----------
enhanced: np.array(float)
enhanced PDF grid

Returns
-------
tuple(np.array, np.array, np.array)
tuple that returns the pre-processed array, the indices
that are kept and the number of times each array occured.
"""

rounded = np.round(enhanced, dec_check)
preprocessed, index, counts = np.unique(
rounded,
axis=0,
return_index=True,
return_counts=True
)
return preprocessed, index, counts


def map_index(refarr, arr):
"""Map the the elements in `arr` to the index in which
they occur in `refarr`.

Parameters
----------
arr: np.array(int)
one dimensional array of integers with size N
refarr: np.array(int)
one dimentional array of integers with size M

Returns
-------
np.array(int)
one dimentional array of integers with size N
"""

inds = {e:i for i, e in enumerate(refarr)}
return np.vectorize(inds.get)(arr)


def restore_permutation(index, shuffle, preprocess):
"""Undo the maping of indices due to the preprocessing
and the shuffling.

Parameters
----------
index: np.array()
array containing the final indices
shuffle: np.array(float)
array containing the permutation
preprocess: np.array(float)
array containing the indices of the pre-processing

Returns
-------
np.array(float)
array of index
"""

undo_shuffle = shuffle[index]
undo_preproc = preprocess[undo_shuffle]
return undo_preproc


def remap_index(index, shuffled):
new_idx = []
for idx in index:
Expand Down