Source code for gammapy.scripts.analysis

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""Session class driving the high-level interface API"""
import copy
import logging
from collections import defaultdict
from pathlib import Path
from astropy import units as u
from astropy.coordinates import Angle, SkyCoord
from regions import CircleSkyRegion
import jsonschema
import yaml
from gammapy.cube import MapDataset, MapMakerObs
from gammapy.cube.fit import BINSZ_IRF
from gammapy.data import DataStore, ObservationTable
from gammapy.maps import Map, MapAxis, WcsGeom
from gammapy.modeling import Datasets, Fit
from gammapy.modeling.models import SkyModels
from gammapy.modeling.serialize import dict_to_models
from gammapy.spectrum import (
    FluxPointsDataset,
    FluxPointsEstimator,
    ReflectedRegionsBackgroundEstimator,
    SpectrumExtraction,
)
from gammapy.utils.scripts import make_path, read_yaml

__all__ = ["Analysis", "AnalysisConfig"]

log = logging.getLogger(__name__)
CONFIG_PATH = Path(__file__).resolve().parent / "config"
SCHEMA_FILE = CONFIG_PATH / "schema.yaml"
DOCS_FILE = CONFIG_PATH / "docs.yaml"

ANALYSIS_TEMPLATES = {
    "basic": "template-basic.yaml",
    "1d": "template-1d.yaml",
    "3d": "template-3d.yaml",
}


[docs]class Analysis:
    """Config-driven high-level analysis interface.

    It is initialized by default with a set of configuration parameters and values declared in
    an internal configuration schema YAML file, though the user can also provide configuration
    parameters passed as a nested dictionary at the moment of instantiation. In that case these
    parameters will overwrite the default values of those present in the configuration file.

    For more info see  :ref:`HLI`.

    Parameters
    ----------
    config : dict or `AnalysisConfig`
        Configuration options following `AnalysisConfig` schema
    """

    def __init__(self, config=None):
        if isinstance(config, dict):
            self._config = AnalysisConfig(config)
        elif isinstance(config, AnalysisConfig):
            self._config = config
        else:
            raise ValueError("Dict or `AnalysiConfig` object required.")

        self._set_logging()
        self.observations = None
        self.background_estimator = None
        self.datasets = None
        self.extraction = None
        self.model = None
        self.fit = None
        self.fit_result = None
        self.flux_points = None

    @property
    def config(self):
        """Analysis configuration (`AnalysisConfig`)"""
        return self._config

    @property
    def settings(self):
        """Configuration settings for the analysis session."""
        return self.config.settings

[docs]    def get_observations(self):
        """Fetch observations from the data store according to criteria defined in the configuration."""
        self.config.validate()
        log.info("Fetching observations.")
        datastore_path = make_path(self.settings["observations"]["datastore"])
        if datastore_path.is_file():
            datastore = DataStore().from_file(datastore_path)
        elif datastore_path.is_dir():
            datastore = DataStore().from_dir(datastore_path)
        else:
            raise FileNotFoundError(f"Datastore {datastore_path} not found.")
        ids = set()
        selection = dict()
        for criteria in self.settings["observations"]["filters"]:
            selected_obs = ObservationTable()

            # TODO: Reduce significantly the code.
            # This block would be handled by datastore.obs_table.select_observations
            selection["type"] = criteria["filter_type"]
            for key, val in criteria.items():
                if key in ["lon", "lat", "radius", "border"]:
                    val = Angle(val)
                selection[key] = val
            if selection["type"] == "angle_box":
                selection["type"] = "par_box"
                selection["value_range"] = Angle(criteria["value_range"])
            if selection["type"] == "sky_circle" or selection["type"].endswith("_box"):
                selected_obs = datastore.obs_table.select_observations(selection)
            if selection["type"] == "par_value":
                mask = (
                    datastore.obs_table[criteria["variable"]] == criteria["value_param"]
                )
                selected_obs = datastore.obs_table[mask]
            if selection["type"] == "ids":
                obs_list = datastore.get_observations(criteria["obs_ids"])
                selected_obs["OBS_ID"] = [obs.obs_id for obs in obs_list.list]
            if selection["type"] == "all":
                obs_list = datastore.get_observations()
                selected_obs["OBS_ID"] = [obs.obs_id for obs in obs_list.list]

            if len(selected_obs):
                if "exclude" in criteria and criteria["exclude"]:
                    ids.difference_update(selected_obs["OBS_ID"].tolist())
                else:
                    ids.update(selected_obs["OBS_ID"].tolist())
        self.observations = datastore.get_observations(ids, skip_missing=True)
        for obs in self.observations.list:
            log.info(obs)

[docs]    def get_datasets(self):
        """Produce reduced datasets."""
        if not self._validate_reduction_settings():
            return False
        if self.settings["datasets"]["dataset-type"] == "SpectrumDatasetOnOff":
            self._spectrum_extraction()
        elif self.settings["datasets"]["dataset-type"] == "MapDataset":
            self._map_making()
        else:
            # TODO raise error?
            log.info("Data reduction method not available.")
            return False

[docs]    def set_model(self, model=None, filename=""):
        """Read the model from dict or filename and attach it to datasets.

        Parameters
        ----------
        model: dict or string
            Dictionary or string in YAML format with the serialized model.
        filename : string
            Name of the model YAML file describing the model.
        """
        if not self._validate_set_model():
            return False
        log.info(f"Reading model.")
        if isinstance(model, str):
            model = yaml.safe_load(model)
        if model:
            self.model = SkyModels(dict_to_models(model))
        elif filename:
            filepath = make_path(filename)
            self.model = SkyModels.from_yaml(filepath)
        else:
            return False
        # TODO: Deal with multiple components
        for dataset in self.datasets.datasets:
            if isinstance(dataset, MapDataset):
                dataset.model = self.model
            else:
                if len(self.model.skymodels) > 1:
                    raise ValueError(
                        "Can only fit a single spectral model at one time."
                    )
                dataset.model = self.model.skymodels[0].spectral_model
        log.info(self.model)

[docs]    def run_fit(self, optimize_opts=None):
        """Fitting reduced datasets to model."""
        if not self._validate_fitting_settings():
            return False

        for ds in self.datasets.datasets:
            # TODO: fit_range handled in jsonschema validation class
            if "fit_range" in self.settings["fit"]:
                e_min = u.Quantity(self.settings["fit"]["fit_range"]["min"])
                e_max = u.Quantity(self.settings["fit"]["fit_range"]["max"])
                if isinstance(ds, MapDataset):
                    ds.mask_fit = ds.counts.geom.energy_mask(e_min, e_max)
                else:
                    ds.mask_fit = ds.counts.energy_mask(e_min, e_max)
        log.info("Fitting reduced datasets.")
        self.fit = Fit(self.datasets)
        self.fit_result = self.fit.run(optimize_opts=optimize_opts)
        log.info(self.fit_result)

[docs]    def get_flux_points(self, source="source"):
        """Calculate flux points for a specific model component.

        Parameters
        ----------
        source : string
            Name of the model component where to calculate the flux points.
        """
        if not self._validate_fp_settings():
            return False

        # TODO: add "source" to config
        log.info("Calculating flux points.")
        axis_params = self.settings["flux-points"]["fp_binning"]
        e_edges = MapAxis.from_bounds(**axis_params).edges
        flux_point_estimator = FluxPointsEstimator(
            e_edges=e_edges, datasets=self.datasets, source=source
        )
        fp = flux_point_estimator.run()
        fp.table["is_ul"] = fp.table["ts"] < 4
        model = self.model[source].spectral_model.copy()
        self.flux_points = FluxPointsDataset(data=fp, model=model)
        cols = ["e_ref", "ref_flux", "dnde", "dnde_ul", "dnde_err", "is_ul"]
        log.info("\n{}".format(self.flux_points.data.table[cols]))

    @staticmethod
    def _create_geometry(params):
        """Create the geometry."""
        # TODO: handled in jsonschema validation class
        geom_params = copy.deepcopy(params)

        axes = []
        for axis_params in geom_params.get("axes", []):
            ax = MapAxis.from_bounds(**axis_params)
            axes.append(ax)

        geom_params["axes"] = axes
        geom_params["skydir"] = tuple(geom_params["skydir"])
        return WcsGeom.create(**geom_params)

    def _map_making(self):
        """Make maps and datasets for 3d analysis."""
        log.info("Creating geometry.")
        geom = self._create_geometry(self.settings["datasets"]["geom"])

        if "geom-irf" in self.settings["datasets"]:
            geom_irf = self._create_geometry(self.settings["datasets"]["geom-irf"])
        else:
            geom_irf = geom.to_binsz(binsz=BINSZ_IRF)

        offset_max = Angle(self.settings["datasets"]["offset-max"])
        stack_datasets = self.settings["datasets"]["stack-datasets"]
        log.info("Creating datasets.")

        if stack_datasets:
            stacked = MapDataset.create(geom=geom, geom_irf=geom_irf, name="stacked")
            for obs in self.observations:
                dataset = self._get_dataset(obs, geom, geom_irf, offset_max)
                stacked.stack(dataset)
            self._extract_irf_kernels(stacked)
            datasets = [stacked]
        else:
            datasets = []
            for obs in self.observations:
                dataset = self._get_dataset(obs, geom, geom_irf, offset_max)
                self._extract_irf_kernels(dataset)
                datasets.append(dataset)

        self.datasets = Datasets(datasets)

    @staticmethod
    def _get_dataset(obs, geom, geom_irf, offset_max):
        position, width = obs.pointing_radec, 2 * offset_max
        geom_cutout = geom.cutout(position=position, width=width)
        geom_irf_cutout = geom_irf.cutout(position=position, width=width)
        maker = MapMakerObs(
            observation=obs,
            geom=geom_cutout,
            geom_true=geom_irf_cutout,
            offset_max=offset_max,
        )
        return maker.run()

    def _extract_irf_kernels(self, dataset):
        # TODO: remove hard-coded default value
        max_radius = self.settings["datasets"].get("psf-kernel-radius", "0.6 deg")
        # TODO: handle IRF maps in fit
        geom = dataset.counts.geom
        geom_irf = dataset.exposure.geom
        position = geom.center_skydir
        geom_psf = geom.to_image().to_cube(geom_irf.axes)
        dataset.psf = dataset.psf.get_psf_kernel(
            position=position, geom=geom_psf, max_radius=max_radius
        )
        e_reco = geom.get_axis_by_name("energy").edges
        dataset.edisp = dataset.edisp.get_energy_dispersion(
            position=position, e_reco=e_reco
        )

    def _set_logging(self):
        """Set logging parameters for API."""
        logging.basicConfig(**self.settings["general"]["logging"])
        log.info(
            "Setting logging config: {!r}".format(self.settings["general"]["logging"])
        )

    def _spectrum_extraction(self):
        """Run all steps for the spectrum extraction."""
        region = self.settings["datasets"]["geom"]["region"]
        log.info("Reducing spectrum datasets.")
        on_lon = Angle(region["center"][0])
        on_lat = Angle(region["center"][1])
        on_center = SkyCoord(on_lon, on_lat, frame=region["frame"])
        on_region = CircleSkyRegion(on_center, Angle(region["radius"]))
        background_params = {"on_region": on_region}
        background = self.settings["datasets"]["background"]
        if "exclusion_mask" in background:
            map_hdu = {}
            filename = background["exclusion_mask"]["filename"]
            if "hdu" in background["exclusion_mask"]:
                map_hdu = {"hdu": background["exclusion_mask"]["hdu"]}
            exclusion_region = Map.read(filename, **map_hdu)
            background_params["exclusion_mask"] = exclusion_region
        if background["background_estimator"] == "reflected":
            self.background_estimator = ReflectedRegionsBackgroundEstimator(
                observations=self.observations, **background_params
            )
            self.background_estimator.run()
        else:
            # TODO: raise error?
            log.info("Background estimation only for reflected regions method.")

        extraction_params = {}
        if "containment_correction" in self.settings["datasets"]:
            extraction_params["containment_correction"] = self.settings["datasets"][
                "containment_correction"
            ]
        params = self.settings["datasets"]["geom"]["axes"][0]
        e_reco = MapAxis.from_bounds(**params).edges
        extraction_params["e_reco"] = e_reco
        extraction_params["e_true"] = None
        self.extraction = SpectrumExtraction(
            observations=self.observations,
            bkg_estimate=self.background_estimator.result,
            **extraction_params,
        )
        self.extraction.run()
        self.datasets = Datasets(self.extraction.spectrum_observations)
        if self.settings["datasets"]["stack-datasets"]:
            stacked = self.datasets.stack_reduce()
            stacked.name = "stacked"
            self.datasets = Datasets([stacked])

    def _validate_reduction_settings(self):
        """Validate settings before proceeding to data reduction."""
        if self.observations and len(self.observations):
            self.config.validate()
            return True
        else:
            log.info("No observations selected.")
            log.info("Data reduction cannot be done.")
            return False

    def _validate_set_model(self):
        if self.datasets and self.datasets.datasets:
            self.config.validate()
            return True
        else:
            log.info("No datasets reduced.")
            return False

    def _validate_fitting_settings(self):
        """Validate settings before proceeding to fit 1D."""
        if not self.model:
            log.info("No model fetched for datasets.")
            log.info("Fit cannot be done.")
            return False
        else:
            return True

    def _validate_fp_settings(self):
        """Validate settings before proceeding to flux points estimation."""
        valid = True
        if self.fit:
            self.config.validate()
        else:
            log.info("No results available from fit.")
            valid = False
        if "flux-points" not in self.settings:
            log.info("No values declared for the energy bins.")
            valid = False
        elif "fp_binning" not in self.settings["flux-points"]:
            log.info("No values declared for the energy bins.")
            valid = False
        if not valid:
            log.info("Flux points calculation cannot be done.")
        return valid


[docs]class AnalysisConfig:
    """Analysis configuration.

    Parameters
    ----------
    config : dict
        Configuration parameters
    """

    def __init__(self, config=None, filename="config.yaml"):
        self.settings = {}
        self.template = ""
        if config is None:
            self.template = CONFIG_PATH / ANALYSIS_TEMPLATES["basic"]
        # add user settings
        self.update_settings(config, self.template)
        self.filename = Path(filename).name

    def __str__(self):
        """Display settings in pretty YAML format."""
        info = self.__class__.__name__ + "\n\n\t"

        data = yaml.dump(self.settings, sort_keys=False, indent=4)
        data = data.replace("\n", "\n\t")
        info += data
        return info.expandtabs(tabsize=4)

[docs]    def to_yaml(self, filename=None, overwrite=False):
        """Serialize config into a yaml formatted file.

        Parameters
        ----------
        filename : str, Path
            Configuration settings filename
            Default config.yaml
        overwrite : bool
            Whether to overwrite an existing file.
        """
        if filename is None:
            filename = self.filename

        self.filename = Path(filename).name
        path_file = Path(self.settings["general"]["outdir"]) / self.filename

        if path_file.exists() and not overwrite:
            raise IOError(f"File {filename} already exists.")

        path_file.write_text(yaml.dump(self.settings, sort_keys=False, indent=4))
        log.info(f"Configuration settings saved into {path_file}")

[docs]    @classmethod
    def from_yaml(cls, filename):
        """Read config from filename"""
        filename = make_path(filename)
        config = read_yaml(filename)
        return cls(config, filename=filename)

[docs]    @classmethod
    def from_template(cls, template="basic"):
        """Create AnalysisConfig from existing templates.

        Parameters
        ----------
        template : {"basic", "1d", "3d"}
            Build in templates.

        Returns
        -------
        analysis : `AnalysisConfig`
            AnalysisConfig class
        """
        filename = CONFIG_PATH / ANALYSIS_TEMPLATES[template]
        return cls.from_yaml(filename)

[docs]    def help(self, section=""):
        """Print template configuration settings."""
        doc = self._get_doc_sections()
        for keyword in doc.keys():
            if section == "" or section == keyword:
                print(doc[keyword])

[docs]    def update_settings(self, config=None, filename=""):
        """Update settings with config dictionary or values in configfile"""
        if filename:
            filepath = make_path(filename)
            config = read_yaml(filepath)
        if config is None:
            config = {}
        if isinstance(config, str):
            config = yaml.safe_load(config)
        if len(config):
            self._update_settings(config, self.settings)
        self.validate()

[docs]    def validate(self):
        """Validate and/or fill initial config parameters against schema."""
        validator = _gp_units_validator
        try:
            jsonschema.validate(self.settings, read_yaml(SCHEMA_FILE), validator)
        except jsonschema.exceptions.ValidationError as ex:
            log.error("Error when validating configuration parameters against schema.")
            log.error(ex.message)

    @staticmethod
    def _get_doc_sections():
        """Returns dict with commented docs from docs file"""
        doc = defaultdict(str)
        with open(DOCS_FILE) as f:
            for line in filter(lambda line: not line.startswith("---"), f):
                line = line.strip("\n")
                if line.startswith("# Section: "):
                    keyword = line.replace("# Section: ", "")
                doc[keyword] += line + "\n"
        return doc

    def _update_settings(self, source, target):
        for key, val in source.items():
            if key not in target:
                target[key] = {}
            if not isinstance(val, dict) or val == {}:
                target[key] = val
            else:
                self._update_settings(val, target[key])


def is_quantity(instance):
    try:
        _ = u.Quantity(instance)
        return True
    except ValueError:
        return False


def _astropy_quantity(_, instance):
    """Check a number may also be an astropy quantity."""
    is_number = jsonschema.Draft7Validator.TYPE_CHECKER.is_type(instance, "number")
    return is_number or is_quantity(instance)


_type_checker = jsonschema.Draft7Validator.TYPE_CHECKER.redefine(
    "number", _astropy_quantity
)
_gp_units_validator = jsonschema.validators.extend(
    jsonschema.Draft7Validator, type_checker=_type_checker
)