Source code for summit.experiment

__all__ = ["Experiment"]

from abc import ABC, abstractmethod
from summit.domain import Domain
from summit.utils.dataset import DataSet
from summit.utils.multiobjective import pareto_efficient
from summit.utils import jsonify_dict, unjsonify_dict

import pandas as pd
import numpy as np
import time
import logging

    (165, 0, 38),
    (215, 48, 39),
    (244, 109, 67),
    (253, 174, 97),
    (254, 224, 144),
    (255, 255, 191),
    (224, 243, 248),
    (171, 217, 233),
    (116, 173, 209),
    (69, 117, 180),
    (49, 54, 149),
COLORS = np.array(COLORS) / 256

[docs]class Experiment(ABC): """Base class for experiments Parameters ---------- domain: summit.domain.Domain The domain of the experiment Notes ----- Developers that subclass `Experiment` need to implement `_run`, which runs the experiments. """ def __init__(self, domain, **kwargs): self.logger = kwargs.get("logger", logging.getLogger(__name__)) self._domain = domain self.reset() @property def domain(self): """The domain for the experiment""" return self._domain @property def data(self): """Datast of all experiments run""" self._data = self._data.reset_index(drop=True) return self._data
[docs] def run_experiments(self, conditions, computation_time=None, **kwargs): """Run the experiment(s) at the given conditions Parameters ---------- conditions: summit.utils.dataset.Dataset A dataset with columns matching the variables in the domain of a experiment(s) to run. computation_time: float, optional The time used by the strategy in calculating the next experiments. By default, the time since the last call to run_experiment is used. """ # Bookeeping for time used by strategy when suggesting next experiment if computation_time is not None: diff = computation_time if computation_time is None and self.prev_itr_time is not None: diff = time.time() - self.prev_itr_time elif self.prev_itr_time is None: diff = 0 # Run experiments # TODO: Add an option to run these in parallel for i, condition in conditions.iterrows(): start = time.time() res, extras = self._run(condition, **kwargs) experiment_time = time.time() - start res = DataSet(res).T self._data = pd.concat([self._data, res], axis=0) self._data["experiment_t"].iat[-1] = float(experiment_time) self._data["computation_t"].iat[-1] = float(diff) if condition.get("strategy") is not None: self._data["strategy"].iat[-1] = condition.get("strategy").values[0] self.extras.append(extras) self.prev_itr_time = time.time() return self._data.iloc[-len(conditions) :]
@abstractmethod def _run(self, conditions, **kwargs): """Run experiments at the specified conditions. Arguments --------- conditions: summit.utils.dataset.Dataset A dataset with columns matching the variables in the domain of a experiment(s) to run. Returns ------- res, extras Should return a tuple where the first element is the DataSet with the conditions and results. The second element is a dictionary with extra parameters to store about the run. The later can be an empty dictionary. """ raise NotImplementedError("_run be implemented by subclasses of Experiment")
[docs] def reset(self): """Reset the experiment This will clear all data. """ self.prev_itr_time = None columns = [ for var in self.domain.variables] md_columns = ["computation_t", "experiment_t", "strategy"] columns += md_columns self._data = DataSet(columns=columns, metadata_columns=md_columns) self.extras = []
[docs] def to_dict(self, **experiment_params): """Serialize the class to a dictionary Subclasses can add a experiment_params dictionary key with custom parameters for the experiment """ extras = [] for e in self.extras: if type(e) == dict: extras.append(jsonify_dict(e)) if type(e) == np.ndarray: extras.append(e.tolist()) else: extras.append(e) return dict( domain=self.domain.to_dict(), name=str(self.__class__.__name__),, experiment_params=experiment_params, extras=extras, )
@classmethod def from_dict(cls, d): domain = Domain.from_dict(d["domain"]) experiment_params = d.get("experiment_params", {}) exp = cls(domain=domain, **experiment_params) exp._data = DataSet.from_dict(d["data"]) for e in d["extras"]: if type(e) == dict: exp.extras.append(unjsonify_dict(e)) elif type(e) == list: exp.extras.append(np.array(e)) else: exp.extras.append(e) return exp
[docs] def pareto_plot(self, objectives=None, colorbar=False, ax=None): """Make a 2D pareto plot of the experiments thus far Parameters ---------- objectives: array-like, optional List of names of objectives to plot. By default picks the first two objectives ax: `matplotlib.pyplot.axes`, optional An existing axis to apply the plot to Returns ------- if ax is None returns a tuple with the first component as the a new figure and the second component the axis if ax is a matplotlib axis, returns only the axis Raises ------ ValueError If the number of objectives is not equal to two """ import matplotlib.pyplot as plt import matplotlib as mpl from matplotlib.colors import ListedColormap, LinearSegmentedColormap if objectives is None: objectives = [ for v in self.domain.variables if v.is_objective] objectives = objectives[0:2] if len(objectives) != 2: raise ValueError("Can only plot 2 objectives") data = self._data[objectives].copy() # Handle minimize objectives for objective in objectives: if not self.domain[objective].maximize: data[objective] = -1.0 * data[objective] values, indices = pareto_efficient(data.to_numpy(), maximize=True) if ax is None: fig, ax = plt.subplots(1) return_fig = True else: return_fig = False # Plot all data if len( > 0: strategies = pd.unique(["strategy"]) markers = ["o", "x"] for strategy, marker in zip(strategies, markers): strat_data =[["strategy"] == strategy] c = strat_data.index.values if colorbar else "k" cmap = ListedColormap(COLORS[: len(c)]) im = ax.scatter( strat_data[objectives[0]], strat_data[objectives[1]], cmap=cmap, c=c, alpha=1 if colorbar else 0.5, marker=marker, s=100, label=strategy, ) # Sort data so get nice pareto plot self.pareto_data =[indices].copy() self.pareto_data = self.pareto_data.sort_values(by=objectives[0]) if len(self.pareto_data) > 2: ax.plot( self.pareto_data[objectives[0]], self.pareto_data[objectives[1]], c=(165 / 256, 0, 38 / 256), label="Pareto Front", linewidth=3, ) ax.set_xlabel(objectives[0]) ax.set_ylabel(objectives[1]) if return_fig and colorbar: fig.colorbar(im) ax.tick_params(direction="in") ax.legend() if return_fig: return fig, ax elif return_fig and colorbar: return fig, ax, im elif not return_fig and colorbar: return ax, im else: return ax
def add_metadata_columns(df, metadata_df): for column in metadata_df.metadata_columns: df[(column, "METADATA")] = metadata_df[column] return df