7.5. Benchmarking#

Tip

This notebook benchmarks JAX on a single CPU core. Compare with Julia results as reported in ComPWA/polarimetry#27. See also the Extended benchmark #68 discussion.

Note

This notebook uses only one run and one loop for %timeit, because JAX seems to cache its return values.

Hide code cell source

from __future__ import annotations

import logging
from collections import defaultdict

import numpy as np
import pandas as pd
import sympy as sp
from ampform_dpd.io import cached
from IPython.display import Markdown
from psutil import cpu_count

from polarimetry import formulate_polarimetry
from polarimetry.data import (
    create_data_transformer,
    generate_meshgrid_sample,
    generate_phasespace_sample,
)
from polarimetry.io import mute_jax_warnings
from polarimetry.lhcb import load_model_builder, load_model_parameters
from polarimetry.lhcb.particle import load_particles

LOGGER = logging.getLogger()
LOGGER.setLevel(logging.ERROR)
mute_jax_warnings()

model_choice = 0
model_file = "../../data/model-definitions.yaml"
particles = load_particles("../../data/particle-definitions.yaml")
amplitude_builder = load_model_builder(model_file, particles, model_choice)
imported_parameter_values = load_model_parameters(
    model_file, amplitude_builder.decay, model_choice, particles
)
reference_subsystem = 1
model = amplitude_builder.formulate(reference_subsystem)
model.parameter_defaults.update(imported_parameter_values)

timing_parametrized = defaultdict(dict)
timing_substituted = defaultdict(dict)

print("Physical cores:", cpu_count(logical=False))
print("Total cores:", cpu_count(logical=True))
Physical cores: 2
Total cores: 4
%%time
polarimetry_exprs = formulate_polarimetry(amplitude_builder, reference_subsystem)
unfolded_polarimetry_exprs = [
    cached.unfold(expr, model.amplitudes) for expr in polarimetry_exprs
]
unfolded_intensity_expr = cached.unfold(model)
CPU times: user 4.69 s, sys: 9.04 ms, total: 4.69 s
Wall time: 4.69 s

7.5.1. DataTransformer performance#

n_events = 100_000
phsp_sample = generate_phasespace_sample(model.decay, n_events, seed=0)
transformer = create_data_transformer(model)
%timeit -n1 -r1 transformer(phsp_sample)  # first run, so no cache and JIT-compilation
%timeit -n1 -r1 transformer(phsp_sample)  # second run with cache
%timeit -n1 -r1 transformer(phsp_sample)  # third run with cache
phsp_sample = transformer(phsp_sample)
random_point = {k: v[0] if len(v.shape) > 0 else v for k, v in phsp_sample.items()}
304 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
1.19 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
2.07 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
res = 54
grid_sample = generate_meshgrid_sample(model.decay, res)
%timeit -n1 -r1 transformer(grid_sample)  # first run, without cache, but already compiled
%timeit -n1 -r1 transformer(grid_sample)  # second run with cache
%timeit -n1 -r1 transformer(grid_sample)  # third run with cache
grid_sample = transformer(grid_sample)
296 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
359 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
131 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)

7.5.2. Parametrized function#

Total number of mathematical operations:

  • \(\alpha_x\): 129,038

  • \(\alpha_y\): 129,042

  • \(\alpha_z\): 129,038

  • \(I_\mathrm{tot}\): 42,334

%%time
parametrized_polarimetry_funcs = [
    cached.lambdify(expr, model.parameter_defaults)
    for expr in unfolded_polarimetry_exprs
]
parametrized_intensity_func = cached.lambdify(
    unfolded_intensity_expr, model.parameter_defaults
)
CPU times: user 5.05 s, sys: 49.9 ms, total: 5.1 s
Wall time: 5.1 s
rng = np.random.default_rng(seed=0)
original_parameters = dict(parametrized_intensity_func.parameters)
modified_parameters = {
    k: rng.uniform(0.9, 1.1) * v
    for k, v in parametrized_intensity_func.parameters.items()
}

7.5.2.1. One data point#

7.5.2.1.1. JIT-compilation#

%%timeit -n1 -r1 -q -o
array = parametrized_intensity_func(random_point)
<TimeitResult : 690 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = parametrized_polarimetry_funcs[0](random_point)
array = parametrized_polarimetry_funcs[1](random_point)
array = parametrized_polarimetry_funcs[2](random_point)
<TimeitResult : 3.13 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.2.1.2. Compiled performance#

%%timeit -n1 -r1 -q -o
array = parametrized_intensity_func(random_point)
<TimeitResult : 591 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = parametrized_polarimetry_funcs[0](random_point)
array = parametrized_polarimetry_funcs[1](random_point)
array = parametrized_polarimetry_funcs[2](random_point)
<TimeitResult : 1.31 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.2.2. 54x54 grid sample#

7.5.2.2.1. Compiled but uncached#

%%timeit -n1 -r1 -q -o
array = parametrized_intensity_func(grid_sample)
<TimeitResult : 206 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = parametrized_polarimetry_funcs[0](grid_sample)
array = parametrized_polarimetry_funcs[1](grid_sample)
array = parametrized_polarimetry_funcs[2](grid_sample)
<TimeitResult : 797 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.2.2.2. Second run with cache#

%%timeit -n1 -r1 -q -o
array = parametrized_intensity_func(grid_sample)
<TimeitResult : 591 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = parametrized_polarimetry_funcs[0](grid_sample)
array = parametrized_polarimetry_funcs[1](grid_sample)
array = parametrized_polarimetry_funcs[2](grid_sample)
<TimeitResult : 1.24 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.2.3. 100.000 event phase space sample#

7.5.2.3.1. Compiled but uncached#

%%timeit -n1 -r1 -q -o
array = parametrized_intensity_func(phsp_sample)
<TimeitResult : 205 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = parametrized_polarimetry_funcs[0](phsp_sample)
array = parametrized_polarimetry_funcs[1](phsp_sample)
array = parametrized_polarimetry_funcs[2](phsp_sample)
<TimeitResult : 880 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.2.3.2. Second run with cache#

%%timeit -n1 -r1 -q -o
array = parametrized_intensity_func(phsp_sample)
<TimeitResult : 3.37 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = parametrized_polarimetry_funcs[0](phsp_sample)
array = parametrized_polarimetry_funcs[1](phsp_sample)
array = parametrized_polarimetry_funcs[2](phsp_sample)
<TimeitResult : 3.68 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.2.4. Recompilation after parameter modification#

parametrized_intensity_func.update_parameters(modified_parameters)
for func in parametrized_polarimetry_funcs:
    func.update_parameters(modified_parameters)

7.5.2.4.1. Compiled but uncached#

%%timeit -n1 -r1 -q -o
array = parametrized_intensity_func(phsp_sample)
<TimeitResult : 257 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = parametrized_polarimetry_funcs[0](phsp_sample)
array = parametrized_polarimetry_funcs[1](phsp_sample)
array = parametrized_polarimetry_funcs[2](phsp_sample)
<TimeitResult : 851 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.2.4.2. Second run with cache#

%%timeit -n1 -r1 -q -o
array = parametrized_intensity_func(phsp_sample)
<TimeitResult : 609 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = parametrized_polarimetry_funcs[0](phsp_sample)
array = parametrized_polarimetry_funcs[1](phsp_sample)
array = parametrized_polarimetry_funcs[2](phsp_sample)
<TimeitResult : 1.47 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
parametrized_intensity_func.update_parameters(original_parameters)
for func in parametrized_polarimetry_funcs:
    func.update_parameters(original_parameters)

7.5.3. All parameters substituted#

subs_polarimetry_exprs = [
    cached.xreplace(expr, model.parameter_defaults)
    for expr in unfolded_polarimetry_exprs
]
subs_intensity_expr = cached.xreplace(unfolded_intensity_expr, model.parameter_defaults)

Number of mathematical operations after substituting all parameters:

  • \(\alpha_x\): 29,312

  • \(\alpha_y\): 29,316

  • \(\alpha_z\): 29,312

  • \(I_\mathrm{tot}\): 9,544

%%time
polarimetry_funcs = [cached.lambdify(expr) for expr in subs_polarimetry_exprs]
intensity_func = cached.lambdify(subs_intensity_expr)
CPU times: user 2.73 s, sys: 84 ms, total: 2.81 s
Wall time: 2.81 s

7.5.3.1. One data point#

7.5.3.1.1. JIT-compilation#

%%timeit -n1 -r1 -q -o
array = intensity_func(random_point)
<TimeitResult : 488 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = polarimetry_funcs[0](random_point)
array = polarimetry_funcs[1](random_point)
array = polarimetry_funcs[2](random_point)
<TimeitResult : 2.39 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.3.1.2. Compiled performance#

%%timeit -n1 -r1 -q -o
array = intensity_func(random_point)
<TimeitResult : 141 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = polarimetry_funcs[0](random_point)
array = polarimetry_funcs[1](random_point)
array = polarimetry_funcs[2](random_point)
<TimeitResult : 402 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.3.2. 54x54 grid sample#

7.5.3.2.1. Compiled but uncached#

%%timeit -n1 -r1 -q -o
array = intensity_func(grid_sample)
<TimeitResult : 129 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = polarimetry_funcs[0](grid_sample)
array = polarimetry_funcs[1](grid_sample)
array = polarimetry_funcs[2](grid_sample)
<TimeitResult : 563 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.3.2.2. Second run with cache#

%%timeit -n1 -r1 -q -o
array = intensity_func(grid_sample)
<TimeitResult : 804 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = polarimetry_funcs[0](grid_sample)
array = polarimetry_funcs[1](grid_sample)
array = polarimetry_funcs[2](grid_sample)
<TimeitResult : 350 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.3.3. 100.000 event phase space sample#

7.5.3.3.1. Compiled but uncached#

%%timeit -n1 -r1 -q -o
array = intensity_func(phsp_sample)
<TimeitResult : 122 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = polarimetry_funcs[0](phsp_sample)
array = polarimetry_funcs[1](phsp_sample)
array = polarimetry_funcs[2](phsp_sample)
<TimeitResult : 596 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.3.3.2. Second run with cache#

%%timeit -n1 -r1 -q -o
array = intensity_func(phsp_sample)
<TimeitResult : 141 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>
%%timeit -n1 -r1 -q -o
array = polarimetry_funcs[0](phsp_sample)
array = polarimetry_funcs[1](phsp_sample)
array = polarimetry_funcs[2](phsp_sample)
<TimeitResult : 154 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

7.5.4. Summary#

Hide code cell source

def collect_sorted_row_title() -> list[str]:
    row_titles = {}
    row_titles.update(timing_parametrized["intensity"])
    row_titles.update(timing_parametrized["polarimetry"])
    row_titles.update(timing_substituted["intensity"])
    row_titles.update(timing_substituted["polarimetry"])
    return list(row_titles)


def remove_loop_info(timing) -> str:
    if timing is None:
        return ""
    pattern = " ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)"
    return str(timing).replace(pattern, "")


row_titles = collect_sorted_row_title()
values = [
    (
        remove_loop_info(timing_parametrized["intensity"].get(row)),
        remove_loop_info(timing_parametrized["polarimetry"].get(row)),
        remove_loop_info(timing_substituted["intensity"].get(row)),
        remove_loop_info(timing_substituted["polarimetry"].get(row)),
    )
    for row in row_titles
]
columns = pd.MultiIndex.from_tuples(
    [
        ("parametrized", "I"),
        ("parametrized", "ɑ"),
        ("substituted", "I"),
        ("substituted", "ɑ"),
    ],
)
df = pd.DataFrame(values, index=row_titles, columns=columns)
df.style.set_table_styles([
    dict(selector="th", props=[("text-align", "left")]),
    dict(selector="td", props=[("text-align", "left")]),
])
  parametrized substituted
  I ɑ I ɑ
random point (compilation) 690 ms 3.13 s 488 ms 2.39 s
random point (cached) 591 μs 1.31 ms 141 μs 402 μs
54x54 grid 206 ms 797 ms 129 ms 563 ms
54x54 grid (cached) 591 μs 1.24 ms 804 μs 350 μs
100,000 phsp 205 ms 880 ms 122 ms 596 ms
100,000 phsp (cached) 3.37 ms 3.68 ms 141 μs 154 μs
modified 100,000 phsp 257 ms 851 ms
modified 100,000 phsp (cached) 609 μs 1.47 ms