"""Abstract interface."""
import abc
import base64
import os
import pathlib
import shutil
import subprocess
import tempfile
import pineappl
from .. import __version__, configs, install, tools
[docs]
class External(abc.ABC):
"""Interface class for external providers.
Parameters
----------
name : str
dataset name
theory : dict
theory dictionary
pdf : str
PDF name
timestamp : str
timestamp of already generated output folder
output_folder : pathlib.Path
path of the already generated output folder
"""
kind = None
def __init__(
self, name, theory, pdf, timestamp=None, runcards_path=None, output_folder=None
):
self.name = name
self.theory = theory
self.pdf = pdf
self.timestamp = timestamp
if runcards_path is None:
self._runcards_path = configs.configs["paths"]["runcards"]
else:
self._runcards_path = pathlib.Path(runcards_path)
if timestamp is None and output_folder is None:
self.dest = tools.create_output_folder(self.name, self.theory["ID"])
elif timestamp is None:
# If an output_folder is present, it takes precedence with respect to the timestamp
self.dest = output_folder
self.timestamp = output_folder.as_posix().split("-")[-1]
if (
not self.grid.exists()
and self.grid.with_suffix(".pineappl.lz4").exists()
):
tools.decompress(self.grid.with_suffix(".pineappl.lz4"))
else:
self.dest = configs.configs["paths"]["results"] / (
str(theory["ID"]) + "-" + self.name + "-" + self.timestamp
)
if not self.grid.exists():
tools.decompress(self.grid.with_suffix(".pineappl.lz4"))
@property
def source(self):
"""Runcard base directory."""
return self._runcards_path / self.name
@property
def grid(self):
"""Target PineAPPL grid name."""
return self.dest / f"{self.name}.pineappl"
@property
def gridtmp(self):
"""Intermediate PineAPPL grid name."""
return self.dest / f"{self.name}.pineappl.tmp"
[docs]
def update_with_tmp(self, output_grid=None):
"""Move intermediate grid to final position."""
if output_grid is None:
output_grid = self.grid
shutil.move(str(self.gridtmp), str(output_grid))
[docs]
@staticmethod
def install():
"""Install all needed programs."""
# Everybody needs LHAPDF unless explicitly skipped
_ = install.lhapdf()
[docs]
def preparation(self):
"""Run the preparation method of the runner."""
return False
[docs]
@abc.abstractmethod
def run(self):
"""Execute the program."""
[docs]
@abc.abstractmethod
def generate_pineappl(self):
"""Generate PineAPPL grid and extract output.
Returns
-------
str
output of ``pineappl convolute`` on the generate grid and selected
:attr:`pdf`
"""
[docs]
@abc.abstractmethod
def results(self):
"""Results as computed by the program.
Returns
-------
pandas.DataFrame
standardized dataframe with results (containing ``result``,
``error``, ``sv_min``, and ``sv_max`` columns)
"""
[docs]
@abc.abstractmethod
def collect_versions(self) -> dict:
"""Collect necessary version informations.
Returns
-------
dict
program - version mapping related to programs specific to a single
runner (common ones are already abstracted)
"""
[docs]
def load_pinecard(self) -> str:
"""Load directory as b64encoded .tar.gz file."""
# shutils wants to create a true file, so we go through a temp dir
with tempfile.TemporaryDirectory() as tmpdirname:
p = pathlib.Path(tmpdirname) / "pinecard"
shutil.make_archive(p, format="gztar", root_dir=self.source)
with open(p.with_suffix(".tar.gz"), "rb") as fd:
return base64.b64encode(fd.read()).decode("ascii")
[docs]
def annotate_versions(self):
"""Add version informations as meta data."""
results_log = self.dest / "results.log"
versions = self.collect_versions()
# the pinefarm version will also pin pineappl_py version and all the
# other python dependencies versions
versions["pinefarm"] = __version__
versions["pinecard"] = self.load_pinecard()
versions["pineappl"] = pineappl.version
entries = {}
entries.update(versions)
entries["results_pdf"] = self.pdf
tools.update_grid_metadata(
self.grid, self.gridtmp, entries, {"results": results_log}
)
self.update_with_tmp()
[docs]
def postprocess(self):
"""Postprocess grid(s).
First run the postrun.sh script (if present),
then apply metadata to all grids present in the folder.
The following environment variables will be populated for the
underlying scripts to use:
GRID: if only one grid is available, path to the grid
PINECARD: path to the pinecard folder
"""
if self.grid.exists():
os.environ["GRID"] = str(self.grid)
grids = [self.grid]
else:
grids = list(self.dest.glob("*.pineappl*"))
if not grids:
raise ValueError("Tried to run postprocessing in a folder with no grids?")
os.environ["PINECARD"] = self.source.as_posix()
# apply postrun, if present and executable
postrun = self.source / "postrun.sh"
if postrun.exists():
if os.access(postrun, os.X_OK):
shutil.copy2(self.source / "postrun.sh", self.dest)
subprocess.run("./postrun.sh", cwd=self.dest, check=True)
else:
raise ValueError(f"Postrun file present but not executable: {postrun}")
# Add the metadata to *every single grid in the folder*
# some of these might be just intermediate, apply it anyway
metadata = self.source / "metadata.txt"
entries = {}
if metadata.exists():
for line in metadata.read_text().splitlines():
k, v = line.split("=")
entries[k] = v
for ext in ["*.pineappl.lz4", "*.pineappl"]:
for grid in self.dest.glob(ext):
tools.update_grid_metadata(grid, self.gridtmp, entries)
self.update_with_tmp(grid)
# compress if we have a single grid
if self.grid.exists():
compressed_path = tools.compress(self.grid)
if compressed_path.exists():
self.grid.unlink()