Skip to content

Commit

Permalink
Merge pull request #23 from denehoffman/reading-root
Browse files Browse the repository at this point in the history
Read ROOT files and clean up vectors
  • Loading branch information
denehoffman authored Dec 4, 2024
2 parents 09f11bf + 27a61a1 commit a3face0
Show file tree
Hide file tree
Showing 12 changed files with 314 additions and 204 deletions.
11 changes: 6 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ crate-type = ["cdylib", "rlib"]
indexmap = "2.6.0"
num = "0.4.3"
nalgebra = "0.33.2"
arrow = "53.2.0"
parquet = "53.2.0"
arrow = "53.3.0"
parquet = "53.3.0"
factorial = "0.4.0"
parking_lot = "0.12.3"
dyn-clone = "1.0.17"
Expand All @@ -36,18 +36,19 @@ pyo3 = { version = "0.22.6", optional = true, features = [
] }
numpy = { version = "0.22.1", optional = true, features = ["nalgebra"] }
ganesh = "0.13.1"
thiserror = "2.0.0"
thiserror = "2.0.3"
shellexpand = "3.1.0"
accurate = "0.4.1"
serde = "1.0.214"
serde-pickle = "1.1.1"
serde = "1.0.215"
serde-pickle = "1.2.0"
bincode = "1.3.3"

[dev-dependencies]
approx = "0.5.1"
criterion = { version = "2.7.2", package = "codspeed-criterion-compat", features = [
"html_reports",
] }
num_cpus = "1.16.0"

[[bench]]
name = "kmatrix_benchmark"
Expand Down
44 changes: 30 additions & 14 deletions benches/kmatrix_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::time::Duration;

use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
use laddu::{
amplitudes::{
constant,
Expand All @@ -19,6 +19,10 @@ use laddu::{
};
use rand::{distributions::Uniform, prelude::*};

#[cfg(feature = "rayon")]
use rayon::ThreadPoolBuilder;

#[cfg(feature = "rayon")]
fn kmatrix_nll_benchmark(c: &mut Criterion) {
let ds_data = open("benches/bench.parquet").unwrap();
let ds_mc = open("benches/bench.parquet").unwrap();
Expand Down Expand Up @@ -140,20 +144,32 @@ fn kmatrix_nll_benchmark(c: &mut Criterion) {
let neg_im = (&s0n * z00n.imag()).norm_sqr();
let model = pos_re + pos_im + neg_re + neg_im;
let nll = NLL::new(&manager, &model, &ds_data, &ds_mc);
let mut rng = rand::thread_rng();
let range = Uniform::new(-100.0, 100.0);
c.bench_function("kmatrix benchmark (nll)", |b| {
b.iter_batched(
|| {
let p: Vec<Float> = (0..nll.parameters().len())
.map(|_| rng.sample(range))
.collect();
p
let mut group = c.benchmark_group("K-Matrix NLL Performance");
for threads in 1..=num_cpus::get() {
let pool = ThreadPoolBuilder::new()
.num_threads(threads)
.build()
.unwrap();
group.bench_with_input(
BenchmarkId::from_parameter(threads),
&threads,
|b, &_threads| {
let mut rng = rand::thread_rng();
let range = Uniform::new(-100.0, 100.0);
b.iter_batched(
|| {
let p: Vec<Float> = (0..nll.parameters().len())
.map(|_| rng.sample(range))
.collect();
p
},
|p| pool.install(|| black_box(nll.evaluate(&p))),
BatchSize::SmallInput,
)
},
|p| black_box(nll.evaluate(&p)),
BatchSize::SmallInput,
)
});
);
}
group.finish();
}

criterion_group! {
Expand Down
83 changes: 56 additions & 27 deletions python/laddu/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
from __future__ import annotations

from abc import ABCMeta, abstractmethod
from typing import TYPE_CHECKING

import numpy as np

from laddu.amplitudes import Manager, constant, parameter
from laddu.amplitudes.breit_wigner import BreitWigner
from laddu.amplitudes.common import ComplexScalar, PolarComplexScalar, Scalar
from laddu.amplitudes.ylm import Ylm
from laddu.amplitudes.zlm import Zlm
from laddu.convert import convert_from_amptools
from laddu.data import BinnedDataset, Dataset, open
from laddu.convert import convert_from_amptools, read_root_file
from laddu.data import BinnedDataset, Dataset, Event, open # noqa: A004
from laddu.likelihoods import NLL, LikelihoodManager, Status
from laddu.utils.variables import Angles, CosTheta, Mandelstam, Mass, Phi, PolAngle, Polarization, PolMagnitude
from laddu.utils.vectors import Vector3, Vector4

from . import amplitudes, convert, data, likelihoods, utils
from .laddu import version

if TYPE_CHECKING:
from pathlib import Path

__version__ = version()


Expand All @@ -23,38 +31,59 @@ def callback(self, step: int, status: Status) -> tuple[Status, bool]:
pass


def open_amptools(
path: str | Path,
tree: str = "kin",
*,
pol_in_beam: bool = False,
pol_angle: float | None = None,
pol_magnitude: float | None = None,
num_entries: int | None = None,
) -> Dataset:
pol_angle_rad = pol_angle * np.pi / 180 if pol_angle else None
p4s_list, eps_list, weight_list = read_root_file(path, tree, pol_in_beam, pol_angle_rad, pol_magnitude, num_entries)
return Dataset(
[
Event([Vector4.from_array(p4) for p4 in p4s], [Vector3.from_array(eps_vec) for eps_vec in eps], weight)
for p4s, eps, weight in zip(p4s_list, eps_list, weight_list)
]
)


__all__ = [
"__version__",
"convert",
"convert_from_amptools",
"Dataset",
"open",
"NLL",
"Angles",
"BinnedDataset",
"utils",
"data",
"amplitudes",
"likelihoods",
"Vector3",
"Vector4",
"BreitWigner",
"ComplexScalar",
"CosTheta",
"Phi",
"Angles",
"PolMagnitude",
"PolAngle",
"Polarization",
"Dataset",
"Event",
"LikelihoodManager",
"Manager",
"Mandelstam",
"Mass",
"Manager",
"LikelihoodManager",
"NLL",
"Status",
"Observer",
"parameter",
"constant",
"Scalar",
"ComplexScalar",
"Phi",
"PolAngle",
"PolMagnitude",
"PolarComplexScalar",
"Polarization",
"Scalar",
"Status",
"Vector3",
"Vector4",
"Ylm",
"Zlm",
"BreitWigner",
"__version__",
"amplitudes",
"constant",
"convert",
"convert_from_amptools",
"data",
"likelihoods",
"open",
"open_amptools",
"parameter",
"utils",
]
67 changes: 40 additions & 27 deletions python/laddu/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,56 +1,69 @@
from abc import ABCMeta, abstractmethod
from pathlib import Path

from laddu.amplitudes import Expression, Manager, constant, parameter
from laddu.amplitudes.breit_wigner import BreitWigner
from laddu.amplitudes.common import ComplexScalar, PolarComplexScalar, Scalar
from laddu.amplitudes.ylm import Ylm
from laddu.amplitudes.zlm import Zlm
from laddu.convert import convert_from_amptools
from laddu.data import BinnedDataset, Dataset, open
from laddu.data import BinnedDataset, Dataset, Event, open # noqa: A004
from laddu.likelihoods import NLL, LikelihoodManager, Status
from laddu.utils.variables import Angles, CosTheta, Mandelstam, Mass, Phi, PolAngle, Polarization, PolMagnitude
from laddu.utils.vectors import Vector3, Vector4

from . import amplitudes, convert, data, utils

__version__: str

class Observer(metaclass=ABCMeta):
@abstractmethod
def callback(self, step: int, status: Status) -> tuple[Status, bool]: ...

__version__: str
def open_amptools(
path: str | Path,
tree: str = "kin",
*,
pol_in_beam: bool = False,
pol_angle: float | None = None,
pol_magnitude: float | None = None,
num_entries: int | None = None,
) -> Dataset: ...

__all__ = [
"__version__",
"convert",
"convert_from_amptools",
"Dataset",
"open",
"NLL",
"Angles",
"BinnedDataset",
"utils",
"data",
"amplitudes",
"Vector3",
"Vector4",
"BreitWigner",
"ComplexScalar",
"CosTheta",
"Phi",
"Angles",
"PolMagnitude",
"PolAngle",
"Polarization",
"Dataset",
"Event",
"Expression",
"LikelihoodManager",
"Manager",
"Mandelstam",
"Mass",
"Manager",
"LikelihoodManager",
"NLL",
"Expression",
"Status",
"Observer",
"parameter",
"constant",
"Scalar",
"ComplexScalar",
"Phi",
"PolAngle",
"PolMagnitude",
"PolarComplexScalar",
"Polarization",
"Scalar",
"Status",
"Vector3",
"Vector4",
"Ylm",
"Zlm",
"BreitWigner",
"__version__",
"amplitudes",
"constant",
"convert",
"convert_from_amptools",
"data",
"open",
"open_amptools",
"parameter",
"utils",
]
35 changes: 18 additions & 17 deletions python/laddu/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from loguru import logger


def read_root_file(input_file, tree_name, pol_in_beam, pol_angle, pol_magnitude, num_entries=None):
def read_root_file(
input_file, tree_name, pol_in_beam, pol_angle, pol_magnitude, num_entries=None
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Read ROOT file and extract data with optional polarization from the beam."""
logger.info(f"Reading ROOT file: {input_file}")
tfile = uproot.open(input_file)
Expand All @@ -40,10 +42,10 @@ def read_root_file(input_file, tree_name, pol_in_beam, pol_angle, pol_magnitude,
Pz_final = np.array(list(tree["Pz_FinalState"].array(library="np", entry_stop=num_entries))) # pyright: ignore

# Handle beam four-vector: (nevents, 4)
p4_beam = np.stack([E_beam, Px_beam, Py_beam, Pz_beam], axis=-1)
p4_beam = np.stack([Px_beam, Py_beam, Pz_beam, E_beam], axis=-1)

# Handle final state four-vectors: (nevents, nparticles, 4)
p4_final = np.stack([E_final, Px_final, Py_final, Pz_final], axis=-1)
p4_final = np.stack([Px_final, Py_final, Pz_final, E_final], axis=-1)

# Check if EPS branch exists and update eps if needed
if "EPS" in tree:
Expand All @@ -58,8 +60,9 @@ def read_root_file(input_file, tree_name, pol_in_beam, pol_angle, pol_magnitude,
logger.info("Using beam's momentum for polarization (eps).")
eps = np.stack([Px_beam, Py_beam, Pz_beam], axis=-1)[:, np.newaxis]
# Reset beam momentum
p4_beam[:, 1:] = 0 # Set Px, Py to 0
p4_beam[:, 3] = E_beam # Set Pz = E for beam
p4_beam[:, 0] = 0 # Set Px to 0
p4_beam[:, 1] = 0 # Set Py to 0
p4_beam[:, 2] = E_beam # Set Pz = E for beam
elif pol_angle is not None and pol_magnitude is not None:
logger.info(f"Using input polarization angle ({pol_angle}) and magnitude ({pol_magnitude}).")
eps_x = pol_magnitude * np.cos(pol_angle) * np.ones_like(E_beam)
Expand All @@ -74,21 +77,21 @@ def read_root_file(input_file, tree_name, pol_in_beam, pol_angle, pol_magnitude,
logger.info("Concatenating beam and final state particles.")
p4s = np.concatenate([p4_beam[:, np.newaxis, :], p4_final], axis=1)

return p4s.astype(np.float32), weight, eps.astype(np.float32)
return p4s.astype(np.float32), eps.astype(np.float32), weight


def save_as_parquet(p4s, weight, eps, output_file):
def save_as_parquet(p4s, eps, weight, output_file):
"""Save the processed data into Parquet format."""
logger.info("Saving data to Parquet format.")

# Flatten the p4s and eps into individual columns
columns = {}
n_particles = p4s.shape[1]
for i in range(n_particles):
columns[f"p4_{i}_E"] = p4s[:, i, 0]
columns[f"p4_{i}_Px"] = p4s[:, i, 1]
columns[f"p4_{i}_Py"] = p4s[:, i, 2]
columns[f"p4_{i}_Pz"] = p4s[:, i, 3]
columns[f"p4_{i}_Px"] = p4s[:, i, 0]
columns[f"p4_{i}_Py"] = p4s[:, i, 1]
columns[f"p4_{i}_Pz"] = p4s[:, i, 2]
columns[f"p4_{i}_E"] = p4s[:, i, 3]

n_eps = eps.shape[1]
for i in range(n_eps):
Expand All @@ -110,12 +113,12 @@ def convert_from_amptools(
output_path: Path,
tree_name: str = "kin",
pol_in_beam: bool = False, # noqa: FBT001, FBT002
pol_angle_deg: float | None = None,
pol_angle: float | None = None,
pol_magnitude: float | None = None,
num_entries: int | None = None,
):
p4s, weight, eps = read_root_file(input_path, tree_name, pol_in_beam, pol_angle_deg, pol_magnitude, num_entries)
save_as_parquet(p4s, weight, eps, output_path)
p4s, eps, weight = read_root_file(input_path, tree_name, pol_in_beam, pol_angle, pol_magnitude, num_entries)
save_as_parquet(p4s, eps, weight, output_path)


def run():
Expand All @@ -136,6 +139,4 @@ def run():
df_read = pd.read_parquet(output_file)
print("Output Parquet File (head):") # noqa: T201
print(df_read.head()) # noqa: T201
print("Output Columns:") # noqa: T201
for column in df_read.columns:
print(column) # noqa: T201
print(f"Total Entries: {len(df_read)}") # noqa: T201
Loading

0 comments on commit a3face0

Please sign in to comment.