__all__ = ["Experiment"]
from abc import ABC, abstractmethod
from summit.domain import Domain
from summit.utils.dataset import DataSet
from summit.utils.multiobjective import pareto_efficient
from summit.utils import jsonify_dict, unjsonify_dict
import pandas as pd
import numpy as np
import time
import logging
COLORS = [
(165, 0, 38),
(215, 48, 39),
(244, 109, 67),
(253, 174, 97),
(254, 224, 144),
(255, 255, 191),
(224, 243, 248),
(171, 217, 233),
(116, 173, 209),
(69, 117, 180),
(49, 54, 149),
]
COLORS = np.array(COLORS) / 256
[docs]class Experiment(ABC):
"""Base class for experiments
Parameters
----------
domain: summit.domain.Domain
The domain of the experiment
Notes
-----
Developers that subclass `Experiment` need to implement
`_run`, which runs the experiments.
"""
def __init__(self, domain, **kwargs):
self.logger = kwargs.get("logger", logging.getLogger(__name__))
self._domain = domain
self.reset()
@property
def domain(self):
"""The domain for the experiment"""
return self._domain
@property
def data(self):
"""Datast of all experiments run"""
self._data = self._data.reset_index(drop=True)
return self._data
[docs] def run_experiments(self, conditions, computation_time=None, **kwargs):
"""Run the experiment(s) at the given conditions
Parameters
----------
conditions: summit.utils.dataset.Dataset
A dataset with columns matching the variables in the domain
of a experiment(s) to run.
computation_time: float, optional
The time used by the strategy in calculating the next experiments.
By default, the time since the last call to run_experiment is used.
"""
# Bookeeping for time used by strategy when suggesting next experiment
if computation_time is not None:
diff = computation_time
if computation_time is None and self.prev_itr_time is not None:
diff = time.time() - self.prev_itr_time
elif self.prev_itr_time is None:
diff = 0
# Run experiments
# TODO: Add an option to run these in parallel
for i, condition in conditions.iterrows():
start = time.time()
res, extras = self._run(condition, **kwargs)
experiment_time = time.time() - start
res = DataSet(res).T
self._data = pd.concat([self._data, res], axis=0)
self._data["experiment_t"].iat[-1] = float(experiment_time)
self._data["computation_t"].iat[-1] = float(diff)
if condition.get("strategy") is not None:
self._data["strategy"].iat[-1] = condition.get("strategy").values[0]
self.extras.append(extras)
self.prev_itr_time = time.time()
return self._data.iloc[-len(conditions) :]
@abstractmethod
def _run(self, conditions, **kwargs):
"""Run experiments at the specified conditions.
Arguments
---------
conditions: summit.utils.dataset.Dataset
A dataset with columns matching the variables in the domain
of a experiment(s) to run.
Returns
-------
res, extras
Should return a tuple where the first element is the
DataSet with the conditions and results. The second element
is a dictionary with extra parameters to store about the run.
The later can be an empty dictionary.
"""
raise NotImplementedError("_run be implemented by subclasses of Experiment")
[docs] def reset(self):
"""Reset the experiment
This will clear all data.
"""
self.prev_itr_time = None
columns = [var.name for var in self.domain.variables]
md_columns = ["computation_t", "experiment_t", "strategy"]
columns += md_columns
self._data = DataSet(columns=columns, metadata_columns=md_columns)
self.extras = []
[docs] def to_dict(self, **experiment_params):
"""Serialize the class to a dictionary
Subclasses can add a experiment_params dictionary
key with custom parameters for the experiment
"""
extras = []
for e in self.extras:
if type(e) == dict:
extras.append(jsonify_dict(e))
if type(e) == np.ndarray:
extras.append(e.tolist())
else:
extras.append(e)
return dict(
domain=self.domain.to_dict(),
name=str(self.__class__.__name__),
data=self.data.to_dict(),
experiment_params=experiment_params,
extras=extras,
)
@classmethod
def from_dict(cls, d):
domain = Domain.from_dict(d["domain"])
experiment_params = d.get("experiment_params", {})
exp = cls(domain=domain, **experiment_params)
exp._data = DataSet.from_dict(d["data"])
for e in d["extras"]:
if type(e) == dict:
exp.extras.append(unjsonify_dict(e))
elif type(e) == list:
exp.extras.append(np.array(e))
else:
exp.extras.append(e)
return exp
[docs] def pareto_plot(self, objectives=None, colorbar=False, ax=None):
"""Make a 2D pareto plot of the experiments thus far
Parameters
----------
objectives: array-like, optional
List of names of objectives to plot.
By default picks the first two objectives
ax: `matplotlib.pyplot.axes`, optional
An existing axis to apply the plot to
Returns
-------
if ax is None returns a tuple with the first component
as the a new figure and the second component the axis
if ax is a matplotlib axis, returns only the axis
Raises
------
ValueError
If the number of objectives is not equal to two
"""
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
if objectives is None:
objectives = [v.name for v in self.domain.variables if v.is_objective]
objectives = objectives[0:2]
if len(objectives) != 2:
raise ValueError("Can only plot 2 objectives")
data = self._data[objectives].copy()
# Handle minimize objectives
for objective in objectives:
if not self.domain[objective].maximize:
data[objective] = -1.0 * data[objective]
values, indices = pareto_efficient(data.to_numpy(), maximize=True)
if ax is None:
fig, ax = plt.subplots(1)
return_fig = True
else:
return_fig = False
# Plot all data
if len(self.data) > 0:
strategies = pd.unique(self.data["strategy"])
markers = ["o", "x"]
for strategy, marker in zip(strategies, markers):
strat_data = self.data[self.data["strategy"] == strategy]
c = strat_data.index.values if colorbar else "k"
cmap = ListedColormap(COLORS[: len(c)])
im = ax.scatter(
strat_data[objectives[0]],
strat_data[objectives[1]],
cmap=cmap,
c=c,
alpha=1 if colorbar else 0.5,
marker=marker,
s=100,
label=strategy,
)
# Sort data so get nice pareto plot
self.pareto_data = self.data.iloc[indices].copy()
self.pareto_data = self.pareto_data.sort_values(by=objectives[0])
if len(self.pareto_data) > 2:
ax.plot(
self.pareto_data[objectives[0]],
self.pareto_data[objectives[1]],
c=(165 / 256, 0, 38 / 256),
label="Pareto Front",
linewidth=3,
)
ax.set_xlabel(objectives[0])
ax.set_ylabel(objectives[1])
if return_fig and colorbar:
fig.colorbar(im)
ax.tick_params(direction="in")
ax.legend()
if return_fig:
return fig, ax
elif return_fig and colorbar:
return fig, ax, im
elif not return_fig and colorbar:
return ax, im
else:
return ax
def add_metadata_columns(df, metadata_df):
for column in metadata_df.metadata_columns:
df[(column, "METADATA")] = metadata_df[column]
return df