Source code for gammapy.makers.reduce

# Licensed under a 3-clause BSD style license - see LICENSE.rst
import logging
from astropy.coordinates import Angle
import gammapy.utils.parallel as parallel
from gammapy.datasets import Datasets, MapDataset, MapDatasetOnOff, SpectrumDataset
from .core import Maker
from .safe import SafeMaskMaker

log = logging.getLogger(__name__)


__all__ = [
    "DatasetsMaker",
]


[docs]class DatasetsMaker(Maker, parallel.ParallelMixin):
    """Run makers in a chain.

    Parameters
    ----------
    makers : list of `Maker` objects
        Makers.
    stack_datasets : bool, optional
        If True, stack into the reference dataset (see `run` method arguments).
        Default is True.
    n_jobs : int, optional
        Number of processes to run in parallel.
        Default is one, unless `~gammapy.utils.parallel.N_JOBS_DEFAULT` was modified.
    cutout_mode : {'trim', 'partial', 'strict'}
        Used only to cutout the reference `MapDataset` around each processed observation.
        Mode is an option for Cutout2D, for details see `~astropy.nddata.utils.Cutout2D`.
        Default is "trim".
    cutout_width : tuple of `~astropy.coordinates.Angle`, optional
        Angular sizes of the region in (lon, lat) in that specific order.
        If only one value is passed, a square region is extracted.
        If None it returns an error, except if the list of makers includes a `SafeMaskMaker`
        with the offset-max method defined. In that case it is set to two times `offset_max`.
        Default is None.
    parallel_backend : {'multiprocessing', 'ray'}, optional
        Which backend to use for multiprocessing.
        Default is None.
    """

    tag = "DatasetsMaker"

    def __init__(
        self,
        makers,
        stack_datasets=True,
        n_jobs=None,
        cutout_mode="trim",
        cutout_width=None,
        parallel_backend=None,
    ):
        self.log = logging.getLogger(__name__)
        self.makers = makers
        self.cutout_mode = cutout_mode

        if cutout_width is not None:
            cutout_width = Angle(cutout_width)

        self.cutout_width = cutout_width
        self._apply_cutout = True

        if self.cutout_width is None:
            if self.offset_max is None:
                self._apply_cutout = False
            else:
                self.cutout_width = 2 * self.offset_max

        self.n_jobs = n_jobs
        self.parallel_backend = parallel_backend
        self.stack_datasets = stack_datasets

        self._datasets = []
        self._error = False

    @property
    def offset_max(self):
        maker = self.safe_mask_maker
        if maker is not None and hasattr(maker, "offset_max"):
            return maker.offset_max

    @property
    def safe_mask_maker(self):
        for m in self.makers:
            if isinstance(m, SafeMaskMaker):
                return m

[docs]    def make_dataset(self, dataset, observation):
        """Make single dataset.

        Parameters
        ----------
        dataset : `~gammapy.datasets.MapDataset`
            Reference dataset.
        observation : `Observation`
            Observation.
        """
        if self._apply_cutout:
            cutouts_kwargs = {
                "position": observation.get_pointing_icrs(observation.tmid).galactic,
                "width": self.cutout_width,
                "mode": self.cutout_mode,
            }
            dataset_obs = dataset.cutout(
                **cutouts_kwargs,
            )
        else:
            dataset_obs = dataset.copy()

        if dataset.models is not None:
            models = dataset.models.copy()
            models.reassign(dataset.name, dataset_obs.name)
            dataset_obs.models = models

        log.info(f"Computing dataset for observation {observation.obs_id}")

        for maker in self.makers:
            log.info(f"Running {maker.tag}")
            dataset_obs = maker.run(dataset=dataset_obs, observation=observation)

        return dataset_obs

[docs]    def callback(self, dataset):
        if self.stack_datasets:
            if type(self._dataset) is MapDataset and type(dataset) is MapDatasetOnOff:
                dataset = dataset.to_map_dataset(name=dataset.name)
            self._dataset.stack(dataset)
        else:
            self._datasets.append(dataset)

[docs]    def error_callback(self, dataset):
        # parallel run could cause a memory error with non-explicit message.
        self._error = True

[docs]    def run(self, dataset, observations, datasets=None):
        """Run data reduction.

        Parameters
        ----------
        dataset : `~gammapy.datasets.MapDataset`
            Reference dataset (used only for stacking if datasets are provided).
        observations : `Observations`
            Observations.
        datasets : `~gammapy.datasets.Datasets`
            Base datasets, if provided its length must be the same as the observations.

        Returns
        -------
        datasets : `~gammapy.datasets.Datasets`
            Datasets.

        """
        if isinstance(dataset, MapDataset):
            # also valid for Spectrum as it inherits from MapDataset
            self._dataset = dataset
        else:
            raise TypeError("Invalid reference dataset.")

        if isinstance(dataset, SpectrumDataset):
            self._apply_cutout = False

        if datasets is not None:
            self._apply_cutout = False
        else:
            datasets = len(observations) * [dataset]

        n_jobs = min(self.n_jobs, len(observations))

        parallel.run_multiprocessing(
            self.make_dataset,
            zip(datasets, observations),
            backend=self.parallel_backend,
            pool_kwargs=dict(processes=n_jobs),
            method="apply_async",
            method_kwargs=dict(
                callback=self.callback,
                error_callback=self.error_callback,
            ),
            task_name="Data reduction",
        )

        if self._error:
            raise RuntimeError("Execution of a sub-process failed")

        if self.stack_datasets:
            return Datasets([self._dataset])

        lookup = {
            d.meta_table["OBS_ID"][0]: idx for idx, d in enumerate(self._datasets)
        }
        return Datasets([self._datasets[lookup[obs.obs_id]] for obs in observations])