__all__ = [
"Variable",
"ContinuousVariable",
"CategoricalVariable",
"Constraint",
"Domain",
"DomainError",
]
from summit.utils.dataset import DataSet
import numpy as np
from abc import ABC, abstractmethod
import json
from copy import deepcopy
[docs]class Variable(ABC):
"""A base class for variables
Parameters
----------
name: str
The name of the variable
description: str
A short description of the variable
is_objective: bool, optional
If True, this variable is an output. Defaults to False (i.e., an input variable)
maximize: bool, optional
If True, the output will be maximized; if False, it will be minimized.
Defaults to True.
units: str, optional
Units of the variable. Defaults to None.
Attributes
---------
name
description
"""
def __init__(self, name: str, description: str, variable_type: str, **kwargs):
Variable._check_name(name)
self._name = name
self._description = description
self._variable_type = variable_type
self._is_objective = kwargs.get("is_objective", False)
self._maximize = kwargs.get("maximize", True)
self._units = kwargs.get("units", None)
@property
def name(self) -> str:
"""str: name of the variable"""
return self._name
@name.setter
def name(self, value: str):
Variable._check_name(value)
self._name = value
@property
def description(self) -> str:
"""str: description of the variable"""
return self._description
@description.setter
def description(self, value: str):
self._description = value
@property
def variable_type(self) -> str:
return self._variable_type
@property
def maximize(self) -> bool:
return self._maximize
@property
def is_objective(self) -> bool:
return self._is_objective
@property
def units(self) -> str:
return self._units
def to_dict(self):
variable_dict = {
"type": self.__class__.__name__,
"is_objective": self._is_objective,
"maximize": self.maximize,
"name": self.name,
"description": self.description,
"units": self.units,
}
return variable_dict
@staticmethod
@abstractmethod
def from_dict():
raise NotImplementedError("Must be implemented by subclasses of Variable")
@staticmethod
def _check_name(name: str):
# Check string
if type(name) != str:
raise ValueError(
f"""{name} is not a string. Variable names must be strings."""
)
# No spaces
test_name = name
if name != test_name.replace(" ", ""):
raise ValueError(
f"""Error with variable name "{name}". Variable names cannot have spaces. Try replacing spaces with _ or -"""
)
# No python keywords
kwds = [
"as",
"assert",
"break",
"class",
"continue",
"def",
"del",
"elif",
"else",
"except",
"False",
"finally",
"for",
"from",
"global",
"if",
"import",
"in",
"is",
"lambda",
"None",
"nonlocal",
"pass",
"raise",
"return",
"True",
"try",
"while",
"with",
"yield",
]
if name in kwds:
raise ValueError(
f"Variable names cannot be python keywords ({name}). For a full list of python keywords, see https://www.w3schools.com/python/python_ref_keywords.asp"
)
def __repr__(self):
return f"Variable(name={self.name}, description={self.description})"
@abstractmethod
def _html_table_rows(self):
pass
def _make_html_table_rows(self, value):
name_column = f"<td>{self.name}</td>"
input_output = "output" if self.is_objective else "input"
if self.is_objective:
direction = "maximize" if self.maximize else "minimize"
input_output = f"{direction} objective"
else:
input_output = "input"
type_column = f"<td>{self.variable_type}, {input_output}</td>"
description_column = f"<td>{self.description}</td>"
values_column = f"<td>{value}</td>"
return f"<tr>{name_column}{type_column}{description_column}{values_column}</tr>"
[docs]class ContinuousVariable(Variable):
"""Representation of a continuous variable
Parameters
----------
name: str
The name of the variable
description: str
A short description of the variable
bounds: list of float or int
The lower and upper bounds (respectively) of the variable
is_objective: bool, optional
If True, this variable is an output. Defaults to False (i.e., an input variable)
maximize: bool, optional
If True, the output will be maximized; if False, it will be minimized.
Defaults to True.
Attributes
---------
name
description
bounds
lower_bound
upper_bound
Examples
--------
>>> var = ContinuousVariable('temperature', 'reaction temperature', [1, 100])
"""
def __init__(self, name: str, description: str, bounds: list, **kwargs):
Variable.__init__(self, name, description, "continuous", **kwargs)
self._lower_bound = bounds[0]
self._upper_bound = bounds[1]
@property
def bounds(self):
"""`numpy.ndarray`: Lower and upper bound of the variable"""
return np.array([self.lower_bound, self.upper_bound])
@property
def lower_bound(self):
"""float or int: lower bound of the variable"""
return self._lower_bound
@property
def upper_bound(self):
"""float or int: upper bound of the variable"""
return self._upper_bound
def _html_table_rows(self):
return self._make_html_table_rows(f"[{self.lower_bound},{self.upper_bound}]")
def to_dict(self):
variable_dict = super().to_dict()
variable_dict.update(
{"bounds": [float(self.lower_bound), float(self.upper_bound)]}
)
return variable_dict
@staticmethod
def from_dict(variable_dict):
return ContinuousVariable(
name=variable_dict["name"],
description=variable_dict["description"],
bounds=variable_dict["bounds"],
is_objective=variable_dict["is_objective"],
maximize=variable_dict["maximize"]
)
[docs]class CategoricalVariable(Variable):
"""Representation of a categorical variable
Categorical variables are discrete choices that do not have an ordering.
Common examples are selections of catalysts, bases, or ligands.
Each possible discrete choice is referred to as a level. These are added as a list
using the `list` keyword argument.
When available, descriptors can be added to a categorical variable. These might be values
such as the melting point, logP, etc. of each level of the categorical variable. These descriptors
can significantly improve the speed of optimization and also make many more strategies compatible
with categorical variables (i.e., all that work with continuos variables).
Parameters
----------
name : str
The name of the variable
description : str
A short description of the variable
levels : list of any serializable object, optional
The potential values of the Categorical variable. When descriptors
are passed, this can be left empty, and the levels will be inferred from
the index of the descriptors DataSet.
descriptors : :class:`~summit.utils.dataset.DataSet`, optional
A DataSet where the keys correspond to the levels and the data
columns are descriptors.
Attributes
---------
name
description
levels
ds : descriptors DataSet
Raises
------
ValueError
When the levels are not unique
TypeError
When levels is not a list
Examples
--------
The simplest way to use a CategoricalVariable is without descriptors:
>>> base = CategoricalVariable('base', 'Organic Base', levels=['DBU', 'BMTG', 'TEA'])
When descriptors are available, they can be used directly without specfying the levels:
>>> solvent_df = DataSet([[5, 81],[-93, 111]], index=['benzene', 'toluene'], columns=['melting_point', 'boiling_point'])
>>> solvent = CategoricalVariable('solvent', 'solvent descriptors', descriptors=solvent_df)
It is also possible to specify a subset of the descriptors as possible choices by passing both descriptors and levels.
The levels must match the index of the descriptors DataSet.
>>> solvent_df = DataSet([[5, 81],[-93, 111]], index=['benzene', 'toluene'], columns=['melting_point', 'boiling_point'])
>>> solvent = CategoricalVariable('solvent', 'solvent descriptors', levels=['benzene', 'toluene'],descriptors=solvent_df)
"""
def __init__(self, name, description, **kwargs):
"""
Returns
-------
object
"""
Variable.__init__(self, name, description, "categorical", **kwargs)
# Get descriptors DataSet
self.ds = kwargs.get("descriptors")
if self.ds is not None and not isinstance(self.ds, DataSet):
raise TypeError("descriptors must be a DataSet")
self._levels = kwargs.get("levels")
# If levels and descriptors passed, check they match
if self.ds is not None and self._levels is not None:
index = self.ds.index
for level in self._levels:
assert (
level in index
), "Levels must be in the descriptors DataSet index."
# If no levels passed but descriptors passed, make levels the whole index
elif self.ds is not None and self._levels is None:
self._levels = self.ds.index.to_list()
elif self.ds is None and self._levels is None:
raise ValueError("Levels, descriptors or both must be passed.")
if type(self._levels) != list:
raise TypeError("Levels must be a list")
# check that levels are unique
if len(self._levels) != len(set(self._levels)):
raise ValueError("Levels must have unique values.")
@property
def levels(self) -> np.ndarray:
"""`numpy.ndarray`: Potential values of the discrete variable"""
return self._levels
@property
def num_levels(self) -> int:
return len(self._levels)
@property
def num_descriptors(self) -> int:
"""Returns the number of descriptors"""
if self.ds is not None:
return len(self.ds.data_columns)
[docs] def add_level(self, level):
"""Add a level to the discrete variable
Parameters
---------
level
Value to add to the levels of the discrete variable
Raises
------
ValueError
If the level is already in the list of levels
"""
if level in self._levels:
raise ValueError("Levels must have unique values.")
self._levels.append(level)
[docs] def remove_level(self, level):
"""Remove a level from the discrete variable
Parameters
---------
level
Level to remove from the discrete variable
Raises
------
ValueError
If the level does not exist for the discrete variable
"""
try:
self._levels.remove(level)
except ValueError:
raise ValueError(f"Level {level} is not in the list of levels.")
[docs] def to_dict(self):
""" Return json encoding of the variable"""
variable_dict = super().to_dict()
ds = self.ds.to_dict() if self.ds is not None else None
variable_dict.update(dict(levels=self.levels, ds=ds))
return variable_dict
@staticmethod
def from_dict(variable_dict):
ds = variable_dict["ds"]
ds = DataSet.from_dict(ds) if ds is not None else None
return CategoricalVariable(
name=variable_dict["name"],
description=variable_dict["description"],
levels=variable_dict["levels"],
descriptors=ds,
is_objective=variable_dict["is_objective"],
)
def _html_table_rows(self):
"""Return representation for Jupyter notebooks"""
return self._make_html_table_rows(f"{self.num_levels} levels")
[docs]class Constraint:
"""A constraint for an optimization domain
Parameters
----------
lhs: str
The left hand side of a constraint equation
constraint_type: str
The type of constraint. Must be <, <=, ==, > or >=. Default: "<="
Raises
------
ValueError
Examples
--------
These should be constraints in the form "lhs constraint_type constraint 0"
So for example, x+y=3 should be rewritten as x+y-3=0 and therefore:
>>> domain = Domain()
>>> domain += Constraint(lhs="x+y-3", constraint_type="==")
Or x+y<0 would be:
>>> domain = Domain()
>>> domain += Constraint(lhs="x+y", constraint_type="<")
"""
def __init__(self, lhs, constraint_type="<="):
self._lhs = lhs
self._constraint_type = constraint_type
if self.constraint_type not in ["<", "<=", "==", ">", ">="]:
raise ValueError("Constraint type must be <, <=, ==, > or >=")
@property
def lhs(self):
return self._lhs
@property
def constraint_type(self):
return self._constraint_type
def _html_table_rows(self):
columns = []
columns.append("") # name column
columns.append("constraint") # type column
columns.append(self.lhs) # description columns
columns.append("") # value column
html = "".join([f"<td>{column}</td>" for column in columns])
return f"<tr>{html}</tr>"
[docs]class Domain:
"""Representation of the optimization domain
Parameters
---------
variables: :class:`~summit.domain.Variable` or list of :class:`~summit.domain.Variable` like objects, optional
list of variable objects (i.e., `ContinuousVariable`, `CategoricalVariable`)
constraints: :class:`~summit.domain.Constraint` or list of :class:`~summit.domain.Constraint` objects, optional
list of constraints on the problem
Attributes
----------
variables
Raises
------
TypeError
If variables or constraints are not lists or a single instance of the object
ValueError
If variable names are not unique
Examples
--------
>>> domain = Domain()
>>> domain += ContinuousVariable('temperature', 'reaction temperature', [1, 100])
"""
def __init__(self, variables=[], constraints=[]):
# Check types
e = TypeError("variables must be Variable or list of Variable objects")
if isinstance(variables, Variable):
variables = [variables]
elif not isinstance(variables, list):
raise e
else:
for l in variables:
if not isinstance(l, Variable):
raise e
e = TypeError("constraints must be Constraint or list of Constraint objects")
if isinstance(constraints, Constraint):
constraints = [constraints]
elif not isinstance(constraints, list):
raise e
else:
for l in constraints:
if not isinstance(l, Constraint):
raise e
self._variables = variables
self._constraints = constraints
# Check that all the output variables continuous
# self._raise_noncontinuous_outputs()
self._raise_names_not_unique()
@property
def variables(self):
"""[List[Type[Variable]]]: List of variables in the domain"""
return self._variables
@property
def constraints(self):
return self._constraints
# def _ipython_key_completions_(self):
# return [v.name for v in self.variables]
@property
def input_variables(self):
input_variables = []
for v in self.variables:
if v.is_objective:
pass
else:
input_variables.append(v)
return input_variables
@property
def output_variables(self):
output_variables = []
for v in self.variables:
if v.is_objective:
output_variables.append(v)
else:
pass
return output_variables
def num_categorical_variables(self):
k = 0
for v in self.variables:
if v.variable_type == "categorical":
k += 1
return k
[docs] def get_categorical_combinations(self):
"""Get all combinations of categoricals using full factorial design
Returns
-------
ds: DataSet
A dataset containing the combinations of all categorical cvariables.
"""
levels = [
len(v.levels)
for v in self.input_variables
if v.variable_type == "categorical"
]
doe = fullfact(levels)
i = 0
combos = {}
for v in self.input_variables:
if v.variable_type == "categorical":
indices = doe[:, i]
indices = indices.astype(int)
combos[v.name, "DATA"] = [v.levels[i] for i in indices]
i += 1
return DataSet(combos)
def _raise_noncontinuous_outputs(self):
"""Raise an error if the outputs are not continuous variables"""
for v in self.output_variables:
if v.variable_type != "continuous":
raise DomainError("All output variables must be continuous")
def _raise_names_not_unique(self):
if len(set(self._variables)) != len(self._variables):
raise ValueError("Variable names are not unique")
[docs] def num_variables(self, include_outputs=False) -> int:
"""Number of variables in the domain
Parameters
----------
include_outputs: bool, optional
If True include output variables in the count.
Defaults to False.
Returns
-------
num_variables: int
Number of variables in the domain
"""
k = 0
for v in self.variables:
if v.is_objective and not include_outputs:
continue
k += 1
return k
def num_discrete_variables(self, include_outputs=False) -> int:
raise NotImplementedError(
"num_discrete_variables has been deprecated due to the change of Discrete to Categorical variables"
)
[docs] def num_continuous_dimensions(
self, include_descriptors=False, include_outputs=False
) -> int:
"""The number of continuous dimensions
Parameters
----------
include_descriptors : bool, optional
If True, the number of descriptors columns are considered.
Defaults to False.
include_outputs : bool, optional
If True include output variables in the count.
Defaults to False.
Returns
-------
num_variables: int
Number of variables in the domain
"""
k = 0
for v in self.variables:
if v.is_objective and not include_outputs:
continue
if isinstance(v, ContinuousVariable):
k += 1
if isinstance(v, CategoricalVariable) and include_descriptors:
if v.num_descriptors is not None:
k += v.num_descriptors
return k
[docs] def to_dict(self):
"""Return a dictionary representation of the domain"""
return [variable.to_dict() for variable in self.variables]
[docs] def to_json(self):
"""Return the a json representation of the domain"""
return json.dumps(self.to_dict())
@staticmethod
def from_dict(domain_list):
variables = []
for variable in domain_list:
if variable["type"] == "ContinuousVariable":
new_variable = ContinuousVariable.from_dict(variable)
elif variable["type"] == "CategoricalVariable":
new_variable = CategoricalVariable.from_dict(variable)
else:
raise ValueError(
f"Cannot load variable of type:{variable['type']}. Variable should be continuous, discrete or descriptors"
)
variables.append(new_variable)
return Domain(variables)
def __add__(self, obj):
# TODO: make this work with adding arrays of variable or constraints
if isinstance(obj, Variable):
if obj.is_objective and obj.variable_type != "continuous":
raise DomainError("Output variables must be continuous")
return Domain(
variables=self._variables + [obj], constraints=self.constraints
)
elif isinstance(obj, Constraint):
return Domain(
variables=self.variables, constraints=self.constraints + [obj]
)
else:
raise RuntimeError("Not a supported domain object.")
def _repr_html_(self):
"""Build html string for table display in jupyter notebooks.
Notes
-----
Adapted from https://github.com/GPflow/GPflowOpt/blob/master/gpflowopt/domain.py
"""
html = ["<table id='domain' width=100%>"]
# Table header
columns = ["Name", "Type", "Description", "Values"]
header = "<tr>"
header += "".join(map(lambda l: "<td><b>{0}</b></td>".format(l), columns))
header += "</tr>"
html.append(header)
# Add parameters
html.append(self._html_table_rows())
html.append("</table>")
return "".join(html)
def _html_table_rows(self):
variables = "".join([v._html_table_rows() for v in self.variables])
constraints = "".join([c._html_table_rows() for c in self.constraints])
return f"{variables}{constraints}"
def __getitem__(self, key):
for v in self.variables:
if v.name == key:
return v
raise ValueError("Variable not in domain")
def __setitem__(self, key, value):
for i, v in enumerate(self.variables):
if v.name == key:
self._variables.pop(i)
self._variables.insert(i, value)
def copy(self):
return deepcopy(self)
[docs]class DomainError(Exception):
pass
def fullfact(levels):
"""
Create a general full-factorial design
Parameters
----------
levels : array-like
An array of integers that indicate the number of levels of each input
design factor.
Returns
-------
mat : 2d-array
The design matrix with coded levels 0 to k-1 for a k-level factor
Notes
------
This code is copied from pydoe2: https://github.com/clicumu/pyDOE2/blob/master/pyDOE2/doe_factorial.py
"""
n = len(levels) # number of factors
nb_lines = np.prod(levels) # number of trial conditions
H = np.zeros((nb_lines, n))
level_repeat = 1
range_repeat = np.prod(levels)
for i in range(n):
range_repeat //= levels[i]
lvl = []
for j in range(levels[i]):
lvl += [j] * level_repeat
rng = lvl * range_repeat
level_repeat *= levels[i]
H[:, i] = rng
return H