Source code for summit.experiment

__all__ = ["Experiment"]

from abc import ABC, abstractmethod
from summit.domain import Domain
from summit.utils.dataset import DataSet
from summit.utils.multiobjective import pareto_efficient
from summit.utils import jsonify_dict, unjsonify_dict

import pandas as pd
import numpy as np
import time
import logging

COLORS = [
    (165, 0, 38),
    (215, 48, 39),
    (244, 109, 67),
    (253, 174, 97),
    (254, 224, 144),
    (255, 255, 191),
    (224, 243, 248),
    (171, 217, 233),
    (116, 173, 209),
    (69, 117, 180),
    (49, 54, 149),
]
COLORS = np.array(COLORS) / 256


[docs]class Experiment(ABC):
    """Base class for experiments

    Parameters
    ----------

    domain: summit.domain.Domain
        The domain of the experiment

    Notes
    -----

    Developers that subclass `Experiment` need to implement
    `_run`, which runs the experiments.

    """

    def __init__(self, domain, **kwargs):
        self.logger = kwargs.get("logger", logging.getLogger(__name__))
        self._domain = domain
        self.reset()

    @property
    def domain(self):
        """The  domain for the experiment"""
        return self._domain

    @property
    def data(self):
        """Datast of all experiments run"""
        self._data = self._data.reset_index(drop=True)
        return self._data

[docs]    def run_experiments(self, conditions, computation_time=None, **kwargs):
        """Run the experiment(s) at the given conditions

        Parameters
        ----------

        conditions: summit.utils.dataset.Dataset
            A dataset with columns matching the variables in the domain
            of a experiment(s) to run.
        computation_time: float, optional
            The time used by the strategy in calculating the next experiments.
            By default, the time since the last call to run_experiment is used.

        """
        # Bookeeping for time used by strategy when suggesting next experiment
        if computation_time is not None:
            diff = computation_time
        if computation_time is None and self.prev_itr_time is not None:
            diff = time.time() - self.prev_itr_time
        elif self.prev_itr_time is None:
            diff = 0

        # Run experiments
        # TODO: Add an option to run these in parallel
        for i, condition in conditions.iterrows():
            start = time.time()
            res, extras = self._run(condition, **kwargs)
            experiment_time = time.time() - start
            res = DataSet(res).T
            self._data = pd.concat([self._data, res], axis=0)
            self._data["experiment_t"].iat[-1] = float(experiment_time)
            self._data["computation_t"].iat[-1] = float(diff)
            if condition.get("strategy") is not None:
                self._data["strategy"].iat[-1] = condition.get("strategy").values[0]
            self.extras.append(extras)
        self.prev_itr_time = time.time()
        return self._data.iloc[-len(conditions) :]

    @abstractmethod
    def _run(self, conditions, **kwargs):
        """Run experiments at the specified conditions.

        Arguments
        ---------
        conditions: summit.utils.dataset.Dataset
            A dataset with columns matching the variables in the domain
            of a experiment(s) to run.

        Returns
        -------
        res, extras
            Should return a tuple where the first element is the
            DataSet with the conditions and results.  The second element
            is a dictionary with extra parameters to store about the run.
            The later can be an empty dictionary.
        """

        raise NotImplementedError("_run be implemented by subclasses of Experiment")

[docs]    def reset(self):
        """Reset the experiment

        This will clear all data.

        """
        self.prev_itr_time = None
        columns = [var.name for var in self.domain.variables]
        md_columns = ["computation_t", "experiment_t", "strategy"]
        columns += md_columns
        self._data = DataSet(columns=columns, metadata_columns=md_columns)
        self.extras = []

[docs]    def to_dict(self, **experiment_params):
        """Serialize the class to a dictionary

        Subclasses can add a experiment_params dictionary
        key with custom parameters for the experiment
        """
        extras = []

        for e in self.extras:
            if type(e) == dict:
                extras.append(jsonify_dict(e))
            if type(e) == np.ndarray:
                extras.append(e.tolist())
            else:
                extras.append(e)

        return dict(
            domain=self.domain.to_dict(),
            name=str(self.__class__.__name__),
            data=self.data.to_dict(),
            experiment_params=experiment_params,
            extras=extras,
        )

    @classmethod
    def from_dict(cls, d):
        domain = Domain.from_dict(d["domain"])
        experiment_params = d.get("experiment_params", {})
        exp = cls(domain=domain, **experiment_params)
        exp._data = DataSet.from_dict(d["data"])
        for e in d["extras"]:
            if type(e) == dict:
                exp.extras.append(unjsonify_dict(e))
            elif type(e) == list:
                exp.extras.append(np.array(e))
            else:
                exp.extras.append(e)
        return exp

[docs]    def pareto_plot(self, objectives=None, colorbar=False, ax=None):
        """Make a 2D pareto plot of the experiments thus far

        Parameters
        ----------
        objectives: array-like, optional
            List of names of objectives to plot.
            By default picks the first two objectives
        ax: `matplotlib.pyplot.axes`, optional
            An existing axis to apply the plot to

        Returns
        -------
        if ax is None returns a tuple with the first component
        as the a new figure and the second component the axis

        if ax is a matplotlib axis, returns only the axis

        Raises
        ------
        ValueError
            If the number of objectives is not equal to two


        """
        import matplotlib.pyplot as plt
        import matplotlib as mpl
        from matplotlib.colors import ListedColormap, LinearSegmentedColormap

        if objectives is None:
            objectives = [v.name for v in self.domain.variables if v.is_objective]
            objectives = objectives[0:2]

        if len(objectives) != 2:
            raise ValueError("Can only plot 2 objectives")

        data = self._data[objectives].copy()

        # Handle minimize objectives
        for objective in objectives:
            if not self.domain[objective].maximize:
                data[objective] = -1.0 * data[objective]

        values, indices = pareto_efficient(data.to_numpy(), maximize=True)

        if ax is None:
            fig, ax = plt.subplots(1)
            return_fig = True
        else:
            return_fig = False

        # Plot all data
        if len(self.data) > 0:
            strategies = pd.unique(self.data["strategy"])
            markers = ["o", "x"]
            for strategy, marker in zip(strategies, markers):
                strat_data = self.data[self.data["strategy"] == strategy]
                c = strat_data.index.values if colorbar else "k"
                cmap = ListedColormap(COLORS[: len(c)])
                im = ax.scatter(
                    strat_data[objectives[0]],
                    strat_data[objectives[1]],
                    cmap=cmap,
                    c=c,
                    alpha=1 if colorbar else 0.5,
                    marker=marker,
                    s=100,
                    label=strategy,
                )

            # Sort data so get nice pareto plot
            self.pareto_data = self.data.iloc[indices].copy()
            self.pareto_data = self.pareto_data.sort_values(by=objectives[0])
            if len(self.pareto_data) > 2:
                ax.plot(
                    self.pareto_data[objectives[0]],
                    self.pareto_data[objectives[1]],
                    c=(165 / 256, 0, 38 / 256),
                    label="Pareto Front",
                    linewidth=3,
                )
            ax.set_xlabel(objectives[0])
            ax.set_ylabel(objectives[1])
            if return_fig and colorbar:
                fig.colorbar(im)
            ax.tick_params(direction="in")
            ax.legend()

        if return_fig:
            return fig, ax
        elif return_fig and colorbar:
            return fig, ax, im
        elif not return_fig and colorbar:
            return ax, im
        else:
            return ax


def add_metadata_columns(df, metadata_df):
    for column in metadata_df.metadata_columns:
        df[(column, "METADATA")] = metadata_df[column]
    return df