"""Compute a grid and compare using a given PDF."""
import logging
import pathlib
import sys
import time
import click
import rich
import yaml
from .. import configs, info, install, log, table, tools
from ..external import mg5
from ._base import command
logger = logging.getLogger(__name__)
@command.command("run")
@click.argument("pinecard")
@click.argument("theory-path", type=click.Path(exists=True))
@click.option(
"--pdf",
help="PDF to compare the original results to the grid",
default="NNPDF40MC_nnlo_as_01180_qed",
)
@click.option("--dry", is_flag=True, help="Don't execute the underlying code")
@click.option(
"--finalize",
type=click.Path(exists=True),
help="Run the postprocess step given a runfolder",
)
def subcommand(pinecard, theory_path, pdf, dry, finalize=None):
"""Compute the grids as defined in the given pinecard.
Given a PINECARD and a THEORY-PATH, pinefarm will execute the
appropiate external program to generate the grids.
The given PDF will be used to compare the original results (from the generator) with PineAPPL interpolation - this checks any interpolation issues.
Setting the DRY flag prevents the generator from actually running.
Note: not all external programs can be automatically run by pinefarm,
in those cases only the relevant run files will be generated.
Pinefarm provides a ``--finalize`` flag to wrap up the grid and add relevant metadata.
\f
Parameters
----------
pinecard: str
pinecard name
theory_path: pathlib.Path
path to a theory card
pdf: str
pdf name
dry: bool
run only the preparation step
finalize: str
path to the runfolder in which to run the post processing step
"""
# Check whether pinecard is a path. If it is, override the configuration.
if (pinpath := pathlib.Path(pinecard)).exists():
# If this pinecard is not in the runcards folder, warn the user but let it continue
if pinpath.parent.absolute() != (
rcards := configs.configs["paths"]["runcards"]
):
logger.warning(
f"The pinecard ({pinecard}) is not in the runcards ({rcards}) folder, overriding config."
)
configs.configs["paths"]["runcards"] = pinpath.parent
pinecard = pinpath
# Otherwise, use the configuration to fill the path
else:
pinecard = configs.configs["paths"]["runcards"] / pinecard
# Check for existence
if not pinecard.exists():
raise FileNotFoundError(f"The pinecard {pinecard} cannot be found")
if finalize is not None:
finalize = pathlib.Path(finalize)
# read theory card from file
with open(theory_path) as f:
theory_card = yaml.safe_load(f)
# Fix (possible) problems with CKM matrix loading
if isinstance(theory_card.get("CKM"), str):
theory_card["CKM"] = [float(i) for i in theory_card["CKM"].split()]
# _in principle_ the pinecard is just the name, but a path should also be accepted
dataset = pinecard.name
# Check for old, unsupported behaviour
if "-" in dataset:
# NB: originally, instead of a finalize flag, pinefarm would decide whether to run
# in "postprocessing" mode according to whether the input was a pinecard or a runfolder
# This behaviour was not documented and now errors out
dataset_raw, timestamp = dataset.rsplit("-", 1)
try:
# Check whether the timestamp is really an integer
_ = int(timestamp)
pinecard = dataset_raw
raise ValueError(
f"""Using a runfolder as a pinecard. This behaviour is no longer supported.
Please, run instead using the --finalize flag.
~$ pinefarm run {pinecard} --finalize {dataset}
"""
)
except ValueError:
timestamp = None
rich.print(dataset)
try:
datainfo = info.label(dataset)
except UnboundLocalError as e:
raise UnboundLocalError(f"Runcard {dataset} could not be found") from e
rich.print(f"Computing [{datainfo.color}]{dataset}[/]...")
runner = datainfo.external(dataset, theory_card, pdf, output_folder=finalize)
install_reqs(runner, pdf)
# Run the preparation step of the runner (if any)
if finalize is None:
runner_stop = runner.preparation()
if dry or runner_stop:
rich.print(
f"""Running in dry mode, exiting now.
The preparation step can be found in:
{runner.dest}"""
)
sys.exit(0)
###### <this part will eventually go to -prepare->
run_dataset(runner)
[docs]
def install_reqs(runner, pdf):
"""Install requirements.
Parameters
----------
runner : interface.External
runner instance
pdf : str
pdf name
"""
t0 = time.perf_counter()
install.init_prefix()
install.update_environ()
runner.install()
# install chosen PDF set
install.lhapdf_conf(pdf)
# lhapdf_management determine paths at import time, so it is important to
# late import it, in particular after environ has been updated by `install.lhlhapdf_conf`
import lhapdf_management # pylint: disable=import-error,import-outside-toplevel
try:
lhapdf_management.pdf_update()
# survive even if it's not possible to write 'pdfsets.index'
except PermissionError:
pass
lhapdf_management.pdf_install(pdf)
tools.print_time(t0, "Installation")
[docs]
def run_dataset(runner):
"""Execute runner and apply common post process.
Parameters
----------
runner : interface.External
runner instance
"""
t0 = time.perf_counter()
tools.print_time(t0, "Grid calculation")
with log.Tee(runner.dest / "errors.log", stdout=False, stderr=True):
# if output folder specified, do not rerun
if runner.timestamp is None:
runner.run()
# collect results in the output pineappl grid
runner.generate_pineappl()
table.print_table(
table.convolute_grid(
runner.grid, runner.pdf, integrated=isinstance(runner, mg5.Mg5)
),
runner.results(),
runner.dest,
)
# TODO: annotate_version should be a post-processing step
# however at the moment only works in 1-grid cases
runner.annotate_versions()
runner.postprocess()
print(f"Output stored in {runner.dest}")