Vocs

Bases: XoptBaseModel

Variables, Objectives, Constraints, and other Settings (VOCS) data structure to describe optimization problems.

Source code in xopt/vocs.py

class VOCS(XoptBaseModel):
    """
    Variables, Objectives, Constraints, and other Settings (VOCS) data structure
    to describe optimization problems.
    """

    variables: Dict[str, conlist(float, min_length=2, max_length=2)] = Field(
        default={},
        description="input variable names with a list of minimum and maximum values",
    )
    constraints: Dict[
        str, conlist(Union[float, ConstraintEnum], min_length=2, max_length=2)
    ] = Field(
        default={},
        description="constraint names with a list of constraint type and value",
    )
    objectives: Dict[str, ObjectiveEnum] = Field(
        default={}, description="objective names with type of objective"
    )
    constants: Dict[str, Any] = Field(
        default={}, description="constant names and values passed to evaluate function"
    )
    observables: List[str] = Field(
        default=[],
        description="observation names tracked alongside objectives and constraints",
    )

    model_config = ConfigDict(
        validate_assignment=True, use_enum_values=True, extra="forbid"
    )

    @field_validator("constraints")
    def coorect_list_types(cls, v):
        """make sure that constraint list types are correct"""
        for _, item in v.items():
            if not isinstance(item[0], str):
                raise ValueError(
                    "constraint specification list must have the first "
                    "element as a string`"
                )

            if not isinstance(item[1], float):
                raise ValueError(
                    "constraint specification list must have the second "
                    "element as a float"
                )

        return v

    @classmethod
    def from_yaml(cls, yaml_text):
        loaded = yaml.safe_load(yaml_text)
        return cls(**loaded)

    def as_yaml(self):
        return yaml.dump(self.model_dump(), default_flow_style=None, sort_keys=False)

    @property
    def bounds(self):
        """
        Returns a bounds array (mins, maxs) of shape (2, n_variables)
        Arrays of lower and upper bounds can be extracted by:
            mins, maxs = vocs.bounds
        """
        return np.array([v for _, v in sorted(self.variables.items())]).T

    @property
    def variable_names(self):
        """Returns a sorted list of variable names"""
        return list(sorted(self.variables.keys()))

    @property
    def objective_names(self):
        """Returns a sorted list of objective names"""
        return list(sorted(self.objectives.keys()))

    @property
    def constraint_names(self):
        """Returns a sorted list of constraint names"""
        if self.constraints is None:
            return []
        return list(sorted(self.constraints.keys()))

    @property
    def observable_names(self):
        return sorted(self.observables)

    @property
    def output_names(self):
        """
        Returns a list of expected output keys:
            (objectives + constraints + observables)
        Each sub-list is sorted.
        """
        full_list = self.objective_names
        for ele in self.constraint_names:
            if ele not in full_list:
                full_list += [ele]

        for ele in self.observable_names:
            if ele not in full_list:
                full_list += [ele]

        return full_list

    @property
    def constant_names(self):
        """Returns a sorted list of constraint names"""
        if self.constants is None:
            return []
        return list(sorted(self.constants.keys()))

    @property
    def all_names(self):
        """Returns all vocs names (variables, constants, objectives, constraints)"""
        return self.variable_names + self.constant_names + self.output_names

    @property
    def n_variables(self):
        """Returns the number of variables"""
        return len(self.variables)

    @property
    def n_constants(self):
        """Returns the number of constants"""
        return len(self.constants)

    @property
    def n_inputs(self):
        """Returns the number of inputs (variables and constants)"""
        return self.n_variables + self.n_constants

    @property
    def n_objectives(self):
        """Returns the number of objectives"""
        return len(self.objectives)

    @property
    def n_constraints(self):
        """Returns the number of constraints"""
        return len(self.constraints)

    @property
    def n_observables(self):
        """Returns the number of constraints"""
        return len(self.observables)

    @property
    def n_outputs(self):
        """
        Returns the number of outputs
            len(objectives + constraints + observables)
        """
        return len(self.output_names)

    def random_inputs(
        self,
        n: int = None,
        custom_bounds: dict = None,
        include_constants: bool = True,
        seed: int = None,
    ) -> list[dict]:
        """
        Uniform sampling of the variables.

        Returns a dict of inputs.

        If include_constants, the vocs.constants are added to the dict.

        Optional:
            n (integer) to make arrays of inputs, of size n.
            seed (integer) to initialize the random number generator

        """
        inputs = {}
        if seed is None:
            rng_sample_function = np.random.random
        else:
            rng = np.random.default_rng(seed=seed)
            rng_sample_function = rng.random

        # get bounds
        # if custom_bounds is specified then they will be clipped inside
        # vocs variable bounds
        if custom_bounds is None:
            bounds = self.variables
        else:
            variable_bounds = pd.DataFrame(self.variables)
            custom_bounds = pd.DataFrame(custom_bounds)
            custom_bounds = custom_bounds.clip(
                variable_bounds.iloc[0], variable_bounds.iloc[1], axis=1
            )
            bounds = custom_bounds.to_dict()
            for k in bounds.keys():
                bounds[k] = [bounds[k][i] for i in range(2)]

        for key, val in bounds.items():  # No need to sort here
            a, b = val
            x = rng_sample_function(n)
            inputs[key] = x * a + (1 - x) * b

        # Constants
        if include_constants and self.constants is not None:
            inputs.update(self.constants)

        if n is None:
            return [inputs]
        else:
            return pd.DataFrame(inputs).to_dict("records")

    def convert_dataframe_to_inputs(
        self, data: pd.DataFrame, include_constants=True
    ) -> pd.DataFrame:
        """
        Extracts only inputs from a dataframe.
        This will add constants if `include_constants` is true.
        """
        # make sure that the df keys only contain vocs variables
        if not set(self.variable_names) == set(data.keys()):
            raise ValueError(
                "input dataframe column set must equal set of vocs variables"
            )

        # only keep the variables
        inner_copy = data.copy()

        # append constants if requested
        if include_constants:
            constants = self.constants
            if constants is not None:
                for name, val in constants.items():
                    inner_copy[name] = val

        return inner_copy

    def convert_numpy_to_inputs(
        self, inputs: np.ndarray, include_constants=True
    ) -> pd.DataFrame:
        """
        convert 2D numpy array to list of dicts (inputs) for evaluation
        Assumes that the columns of the array match correspond to
        `sorted(self.vocs.variables.keys())

        """
        df = pd.DataFrame(inputs, columns=self.variable_names)
        return self.convert_dataframe_to_inputs(df, include_constants)

    # Extract optimization data (in correct column order)
    def variable_data(
        self,
        data: Union[pd.DataFrame, List[Dict], List[Dict]],
        prefix: str = "variable_",
    ) -> pd.DataFrame:
        """
        Returns a dataframe containing variables according to `vocs.variables` in sorted
        order

        Parameters
        ----------
            data: DataFrame
                Data to be processed.
            prefix: str, optional
                Prefix added to column names.

        Returns
        -------
            result: DataFrame
                Processed Dataframe
        """
        return form_variable_data(self.variables, data, prefix=prefix)

    def objective_data(
        self,
        data: Union[pd.DataFrame, List[Dict], List[Dict]],
        prefix: str = "objective_",
        return_raw=False,
    ) -> pd.DataFrame:
        """
        Returns a dataframe containing objective data transformed according to
        `vocs.objectives` such that we always assume minimization.

        Parameters
        ----------
            data: DataFrame
                Data to be processed.
            prefix: str, optional
                Prefix added to column names.

        Returns
        -------
            result: DataFrame
                Processed Dataframe
        """
        return form_objective_data(self.objectives, data, prefix, return_raw)

    def constraint_data(
        self,
        data: Union[pd.DataFrame, List[Dict], List[Dict]],
        prefix: str = "constraint_",
    ) -> pd.DataFrame:
        """
        Returns a dataframe containing constraint data transformed according to
        `vocs.constraints` such that values that satisfy each constraint are negative.

        Parameters
        ----------
            data: DataFrame
                Data to be processed.
            prefix: str, optional
                Prefix added to column names.

        Returns
        -------
            result: DataFrame
                Processed Dataframe
        """
        return form_constraint_data(self.constraints, data, prefix)

    def observable_data(
        self,
        data: Union[pd.DataFrame, List[Dict], List[Dict]],
        prefix: str = "observable_",
    ) -> pd.DataFrame:
        """
        Returns a dataframe containing observable data

        Parameters
        ----------
            data: DataFrame
                Data to be processed.
            prefix: str, optional
                Prefix added to column names.

        Returns
        -------
            result: DataFrame
                Processed Dataframe
        """
        return form_observable_data(self.observable_names, data, prefix)

    def feasibility_data(
        self,
        data: Union[pd.DataFrame, List[Dict], List[Dict]],
        prefix: str = "feasible_",
    ) -> pd.DataFrame:
        """
        Returns a dataframe containing booleans denoting if a constraint is satisfied or
        not. Returned dataframe also contains a column `feasible` which denotes if
        all constraints are satisfied.

        Parameters
        ----------
            data: DataFrame
                Data to be processed.
            prefix: str, optional
                Prefix added to column names.

        Returns
        -------
            result: DataFrame
                Processed Dataframe
        """
        return form_feasibility_data(self.constraints, data, prefix)

    def normalize_inputs(self, input_points: pd.DataFrame) -> pd.DataFrame:
        """
        Normalize input data (transform data into the range [0,1]) based on the
        variable ranges defined in the VOCS.

        Parameters
        ----------
        input_points : pd.DataFrame
            A DataFrame containing input data to be normalized.

        Returns
        -------
        result : pd.DataFrame
            A DataFrame with input data in the range [0,1] corresponding to the
            specified variable ranges. Contains columns equal to the intersection
            between `input_points` and `vocs.variable_names`.

        Notes
        -----

        If the input DataFrame is empty or no variable information is available in
        the VOCS, an empty DataFrame is returned.

        """
        normed_data = {}
        for name in self.variable_names:
            if name in input_points.columns:
                width = self.variables[name][1] - self.variables[name][0]
                normed_data[name] = (
                    input_points[name] - self.variables[name][0]
                ) / width

        if len(normed_data):
            return pd.DataFrame(normed_data)
        else:
            return pd.DataFrame([])

    def denormalize_inputs(self, input_points: pd.DataFrame) -> pd.DataFrame:
        """
        Denormalize input data (transform data from the range [0,1]) based on the
        variable ranges defined in the VOCS.

        Parameters
        ----------
        input_points : pd.DataFrame
            A DataFrame containing normalized input data in the range [0,1].

        Returns
        -------
        result : pd.DataFrame
            A DataFrame with denormalized input data corresponding to the
            specified variable ranges. Contains columns equal to the intersection
            between `input_points` and `vocs.variable_names`.

        Notes
        -----

        If the input DataFrame is empty or no variable information is available in
        the VOCS, an empty DataFrame is returned.

        """
        denormed_data = {}
        for name in self.variable_names:
            if name in input_points.columns:
                width = self.variables[name][1] - self.variables[name][0]
                denormed_data[name] = (
                    input_points[name] * width + self.variables[name][0]
                )

        if len(denormed_data):
            return pd.DataFrame(denormed_data)
        else:
            return pd.DataFrame([])

    def validate_input_data(self, input_points: pd.DataFrame) -> None:
        """
        Validates input data. Raises an error if the input data does not satisfy
        requirements given by vocs.

        Parameters
        ----------
            input_points : DataFrame
                Input data to be validated.

        Returns
        -------
            None

        Raises
        ------
            ValueError: if input data does not satisfy requirements.
        """
        validate_input_data(self, input_points)

    def extract_data(self, data: pd.DataFrame, return_raw=False):
        """
        split dataframe into seperate dataframes for variables, objectives and
        constraints based on vocs - objective data is transformed based on
        `vocs.objectives` properties

        Parameters
        ----------
            data: DataFrame
                Dataframe to be split
            return_raw : bool, optional
                If True, return untransformed objective data

        Returns
        -------
            variable_data : DataFrame
                Dataframe containing variable data
            objective_data : DataFrame
                Dataframe containing objective data
            constraint_data : DataFrame
                Dataframe containing constraint data
        """
        variable_data = self.variable_data(data, "")
        objective_data = self.objective_data(data, "", return_raw)
        constraint_data = self.constraint_data(data, "")
        return variable_data, objective_data, constraint_data

    def select_best(self, data: pd.DataFrame, n: int = 1):
        """
        get the best value and point for a given data set based on vocs
        - does not work for multi-objective problems
        - data that violates any constraints is ignored

        Parameters
        ----------
            data: DataFrame
                Dataframe to select best point from
            n: int, optional
                Number of best points to return

        Returns
        -------
            index: index of best point
            value: value of best point
            params: input parameters that give the best point
        """
        if self.n_objectives != 1:
            raise NotImplementedError(
                "cannot select best point when n_objectives is not 1"
            )

        feasible_data = self.feasibility_data(data)
        ascending_flag = {"MINIMIZE": True, "MAXIMIZE": False}
        obj = self.objectives[self.objective_names[0]]
        obj_name = self.objective_names[0]
        res = data[feasible_data["feasible"]].sort_values(
            obj_name, ascending=ascending_flag[obj]
        )[obj_name][:n]

        params = data.iloc[res.index.to_numpy()][self.variable_names].to_dict(
            orient="records"
        )[0]

        return res.index.to_numpy(), res.to_numpy(), params

    def cumulative_optimum(self, data: pd.DataFrame) -> pd.DataFrame:
        """
        Returns the cumulative optimum for the given DataFrame.

        Parameters
        ----------
        data: DataFrame
            Data for which the cumulative optimum shall be calculated.

        Returns
        -------
        DataFrame
            Cumulative optimum for the given DataFrame.

        """
        if not self.objectives:
            raise RuntimeError("No objectives defined.")
        if data.empty:
            return pd.DataFrame()
        obj_name = self.objective_names[0]
        obj = self.objectives[obj_name]
        get_opt = np.nanmax if obj == "MAXIMIZE" else np.nanmin
        feasible = self.feasibility_data(data)["feasible"]
        feasible_obj_values = [
            data[obj_name].values[i] if feasible[i] else np.nan
            for i in range(len(data))
        ]
        cumulative_optimum = np.array(
            [get_opt(feasible_obj_values[: i + 1]) for i in range(len(data))]
        )
        return pd.DataFrame({f"best_{obj_name}": cumulative_optimum}, index=data.index)

`all_names` `property`

Returns all vocs names (variables, constants, objectives, constraints)

`bounds` `property`

Returns a bounds array (mins, maxs) of shape (2, n_variables) Arrays of lower and upper bounds can be extracted by: mins, maxs = vocs.bounds

`constant_names` `property`

Returns a sorted list of constraint names

`constraint_names` `property`

Returns a sorted list of constraint names

`n_constants` `property`

Returns the number of constants

`n_constraints` `property`

Returns the number of constraints

`n_inputs` `property`

Returns the number of inputs (variables and constants)

`n_objectives` `property`

Returns the number of objectives

`n_observables` `property`

Returns the number of constraints

`n_outputs` `property`

Returns the number of outputs len(objectives + constraints + observables)

`n_variables` `property`

Returns the number of variables

`objective_names` `property`

Returns a sorted list of objective names

`output_names` `property`

Returns a list of expected output keys

(objectives + constraints + observables)

Each sub-list is sorted.

`variable_names` `property`

Returns a sorted list of variable names

`constraint_data(data, prefix='constraint_')`

Returns a dataframe containing constraint data transformed according to vocs.constraints such that values that satisfy each constraint are negative.

Parameters

data: DataFrame
    Data to be processed.
prefix: str, optional
    Prefix added to column names.

Returns

result: DataFrame
    Processed Dataframe

Source code in xopt/vocs.py

def constraint_data(
    self,
    data: Union[pd.DataFrame, List[Dict], List[Dict]],
    prefix: str = "constraint_",
) -> pd.DataFrame:
    """
    Returns a dataframe containing constraint data transformed according to
    `vocs.constraints` such that values that satisfy each constraint are negative.

    Parameters
    ----------
        data: DataFrame
            Data to be processed.
        prefix: str, optional
            Prefix added to column names.

    Returns
    -------
        result: DataFrame
            Processed Dataframe
    """
    return form_constraint_data(self.constraints, data, prefix)

`convert_dataframe_to_inputs(data, include_constants=True)`

Extracts only inputs from a dataframe. This will add constants if include_constants is true.

Source code in xopt/vocs.py

def convert_dataframe_to_inputs(
    self, data: pd.DataFrame, include_constants=True
) -> pd.DataFrame:
    """
    Extracts only inputs from a dataframe.
    This will add constants if `include_constants` is true.
    """
    # make sure that the df keys only contain vocs variables
    if not set(self.variable_names) == set(data.keys()):
        raise ValueError(
            "input dataframe column set must equal set of vocs variables"
        )

    # only keep the variables
    inner_copy = data.copy()

    # append constants if requested
    if include_constants:
        constants = self.constants
        if constants is not None:
            for name, val in constants.items():
                inner_copy[name] = val

    return inner_copy

`convert_numpy_to_inputs(inputs, include_constants=True)`

convert 2D numpy array to list of dicts (inputs) for evaluation Assumes that the columns of the array match correspond to `sorted(self.vocs.variables.keys())

Source code in xopt/vocs.py

def convert_numpy_to_inputs(
    self, inputs: np.ndarray, include_constants=True
) -> pd.DataFrame:
    """
    convert 2D numpy array to list of dicts (inputs) for evaluation
    Assumes that the columns of the array match correspond to
    `sorted(self.vocs.variables.keys())

    """
    df = pd.DataFrame(inputs, columns=self.variable_names)
    return self.convert_dataframe_to_inputs(df, include_constants)

`coorect_list_types(v)`

make sure that constraint list types are correct

Source code in xopt/vocs.py

@field_validator("constraints")
def coorect_list_types(cls, v):
    """make sure that constraint list types are correct"""
    for _, item in v.items():
        if not isinstance(item[0], str):
            raise ValueError(
                "constraint specification list must have the first "
                "element as a string`"
            )

        if not isinstance(item[1], float):
            raise ValueError(
                "constraint specification list must have the second "
                "element as a float"
            )

    return v

`cumulative_optimum(data)`

Returns the cumulative optimum for the given DataFrame.

Parameters

data: DataFrame Data for which the cumulative optimum shall be calculated.

Returns

DataFrame Cumulative optimum for the given DataFrame.

Source code in xopt/vocs.py

def cumulative_optimum(self, data: pd.DataFrame) -> pd.DataFrame:
    """
    Returns the cumulative optimum for the given DataFrame.

    Parameters
    ----------
    data: DataFrame
        Data for which the cumulative optimum shall be calculated.

    Returns
    -------
    DataFrame
        Cumulative optimum for the given DataFrame.

    """
    if not self.objectives:
        raise RuntimeError("No objectives defined.")
    if data.empty:
        return pd.DataFrame()
    obj_name = self.objective_names[0]
    obj = self.objectives[obj_name]
    get_opt = np.nanmax if obj == "MAXIMIZE" else np.nanmin
    feasible = self.feasibility_data(data)["feasible"]
    feasible_obj_values = [
        data[obj_name].values[i] if feasible[i] else np.nan
        for i in range(len(data))
    ]
    cumulative_optimum = np.array(
        [get_opt(feasible_obj_values[: i + 1]) for i in range(len(data))]
    )
    return pd.DataFrame({f"best_{obj_name}": cumulative_optimum}, index=data.index)

`denormalize_inputs(input_points)`

Denormalize input data (transform data from the range [0,1]) based on the variable ranges defined in the VOCS.

Parameters

input_points : pd.DataFrame A DataFrame containing normalized input data in the range [0,1].

Returns

result : pd.DataFrame A DataFrame with denormalized input data corresponding to the specified variable ranges. Contains columns equal to the intersection between input_points and vocs.variable_names.

Notes

If the input DataFrame is empty or no variable information is available in the VOCS, an empty DataFrame is returned.

Source code in xopt/vocs.py

def denormalize_inputs(self, input_points: pd.DataFrame) -> pd.DataFrame:
    """
    Denormalize input data (transform data from the range [0,1]) based on the
    variable ranges defined in the VOCS.

    Parameters
    ----------
    input_points : pd.DataFrame
        A DataFrame containing normalized input data in the range [0,1].

    Returns
    -------
    result : pd.DataFrame
        A DataFrame with denormalized input data corresponding to the
        specified variable ranges. Contains columns equal to the intersection
        between `input_points` and `vocs.variable_names`.

    Notes
    -----

    If the input DataFrame is empty or no variable information is available in
    the VOCS, an empty DataFrame is returned.

    """
    denormed_data = {}
    for name in self.variable_names:
        if name in input_points.columns:
            width = self.variables[name][1] - self.variables[name][0]
            denormed_data[name] = (
                input_points[name] * width + self.variables[name][0]
            )

    if len(denormed_data):
        return pd.DataFrame(denormed_data)
    else:
        return pd.DataFrame([])

`extract_data(data, return_raw=False)`

split dataframe into seperate dataframes for variables, objectives and constraints based on vocs - objective data is transformed based on vocs.objectives properties

Parameters

data: DataFrame
    Dataframe to be split
return_raw : bool, optional
    If True, return untransformed objective data

Returns

variable_data : DataFrame
    Dataframe containing variable data
objective_data : DataFrame
    Dataframe containing objective data
constraint_data : DataFrame
    Dataframe containing constraint data

Source code in xopt/vocs.py

def extract_data(self, data: pd.DataFrame, return_raw=False):
    """
    split dataframe into seperate dataframes for variables, objectives and
    constraints based on vocs - objective data is transformed based on
    `vocs.objectives` properties

    Parameters
    ----------
        data: DataFrame
            Dataframe to be split
        return_raw : bool, optional
            If True, return untransformed objective data

    Returns
    -------
        variable_data : DataFrame
            Dataframe containing variable data
        objective_data : DataFrame
            Dataframe containing objective data
        constraint_data : DataFrame
            Dataframe containing constraint data
    """
    variable_data = self.variable_data(data, "")
    objective_data = self.objective_data(data, "", return_raw)
    constraint_data = self.constraint_data(data, "")
    return variable_data, objective_data, constraint_data

`feasibility_data(data, prefix='feasible_')`

Returns a dataframe containing booleans denoting if a constraint is satisfied or not. Returned dataframe also contains a column feasible which denotes if all constraints are satisfied.

Parameters

data: DataFrame
    Data to be processed.
prefix: str, optional
    Prefix added to column names.

Returns

result: DataFrame
    Processed Dataframe

Source code in xopt/vocs.py

def feasibility_data(
    self,
    data: Union[pd.DataFrame, List[Dict], List[Dict]],
    prefix: str = "feasible_",
) -> pd.DataFrame:
    """
    Returns a dataframe containing booleans denoting if a constraint is satisfied or
    not. Returned dataframe also contains a column `feasible` which denotes if
    all constraints are satisfied.

    Parameters
    ----------
        data: DataFrame
            Data to be processed.
        prefix: str, optional
            Prefix added to column names.

    Returns
    -------
        result: DataFrame
            Processed Dataframe
    """
    return form_feasibility_data(self.constraints, data, prefix)

`normalize_inputs(input_points)`

Normalize input data (transform data into the range [0,1]) based on the variable ranges defined in the VOCS.

Parameters

input_points : pd.DataFrame A DataFrame containing input data to be normalized.

Returns

result : pd.DataFrame A DataFrame with input data in the range [0,1] corresponding to the specified variable ranges. Contains columns equal to the intersection between input_points and vocs.variable_names.

Notes

If the input DataFrame is empty or no variable information is available in the VOCS, an empty DataFrame is returned.

Source code in xopt/vocs.py

def normalize_inputs(self, input_points: pd.DataFrame) -> pd.DataFrame:
    """
    Normalize input data (transform data into the range [0,1]) based on the
    variable ranges defined in the VOCS.

    Parameters
    ----------
    input_points : pd.DataFrame
        A DataFrame containing input data to be normalized.

    Returns
    -------
    result : pd.DataFrame
        A DataFrame with input data in the range [0,1] corresponding to the
        specified variable ranges. Contains columns equal to the intersection
        between `input_points` and `vocs.variable_names`.

    Notes
    -----

    If the input DataFrame is empty or no variable information is available in
    the VOCS, an empty DataFrame is returned.

    """
    normed_data = {}
    for name in self.variable_names:
        if name in input_points.columns:
            width = self.variables[name][1] - self.variables[name][0]
            normed_data[name] = (
                input_points[name] - self.variables[name][0]
            ) / width

    if len(normed_data):
        return pd.DataFrame(normed_data)
    else:
        return pd.DataFrame([])

`objective_data(data, prefix='objective_', return_raw=False)`

Returns a dataframe containing objective data transformed according to vocs.objectives such that we always assume minimization.

Parameters

data: DataFrame
    Data to be processed.
prefix: str, optional
    Prefix added to column names.

Returns

result: DataFrame
    Processed Dataframe

Source code in xopt/vocs.py

def objective_data(
    self,
    data: Union[pd.DataFrame, List[Dict], List[Dict]],
    prefix: str = "objective_",
    return_raw=False,
) -> pd.DataFrame:
    """
    Returns a dataframe containing objective data transformed according to
    `vocs.objectives` such that we always assume minimization.

    Parameters
    ----------
        data: DataFrame
            Data to be processed.
        prefix: str, optional
            Prefix added to column names.

    Returns
    -------
        result: DataFrame
            Processed Dataframe
    """
    return form_objective_data(self.objectives, data, prefix, return_raw)

`observable_data(data, prefix='observable_')`

Returns a dataframe containing observable data

Parameters

data: DataFrame
    Data to be processed.
prefix: str, optional
    Prefix added to column names.

Returns

result: DataFrame
    Processed Dataframe

Source code in xopt/vocs.py

def observable_data(
    self,
    data: Union[pd.DataFrame, List[Dict], List[Dict]],
    prefix: str = "observable_",
) -> pd.DataFrame:
    """
    Returns a dataframe containing observable data

    Parameters
    ----------
        data: DataFrame
            Data to be processed.
        prefix: str, optional
            Prefix added to column names.

    Returns
    -------
        result: DataFrame
            Processed Dataframe
    """
    return form_observable_data(self.observable_names, data, prefix)

`random_inputs(n=None, custom_bounds=None, include_constants=True, seed=None)`

Uniform sampling of the variables.

Returns a dict of inputs.

If include_constants, the vocs.constants are added to the dict.

Optional

n (integer) to make arrays of inputs, of size n. seed (integer) to initialize the random number generator

Source code in xopt/vocs.py

def random_inputs(
    self,
    n: int = None,
    custom_bounds: dict = None,
    include_constants: bool = True,
    seed: int = None,
) -> list[dict]:
    """
    Uniform sampling of the variables.

    Returns a dict of inputs.

    If include_constants, the vocs.constants are added to the dict.

    Optional:
        n (integer) to make arrays of inputs, of size n.
        seed (integer) to initialize the random number generator

    """
    inputs = {}
    if seed is None:
        rng_sample_function = np.random.random
    else:
        rng = np.random.default_rng(seed=seed)
        rng_sample_function = rng.random

    # get bounds
    # if custom_bounds is specified then they will be clipped inside
    # vocs variable bounds
    if custom_bounds is None:
        bounds = self.variables
    else:
        variable_bounds = pd.DataFrame(self.variables)
        custom_bounds = pd.DataFrame(custom_bounds)
        custom_bounds = custom_bounds.clip(
            variable_bounds.iloc[0], variable_bounds.iloc[1], axis=1
        )
        bounds = custom_bounds.to_dict()
        for k in bounds.keys():
            bounds[k] = [bounds[k][i] for i in range(2)]

    for key, val in bounds.items():  # No need to sort here
        a, b = val
        x = rng_sample_function(n)
        inputs[key] = x * a + (1 - x) * b

    # Constants
    if include_constants and self.constants is not None:
        inputs.update(self.constants)

    if n is None:
        return [inputs]
    else:
        return pd.DataFrame(inputs).to_dict("records")

`select_best(data, n=1)`

get the best value and point for a given data set based on vocs - does not work for multi-objective problems - data that violates any constraints is ignored

Parameters

data: DataFrame
    Dataframe to select best point from
n: int, optional
    Number of best points to return

Returns

index: index of best point
value: value of best point
params: input parameters that give the best point

Source code in xopt/vocs.py

def select_best(self, data: pd.DataFrame, n: int = 1):
    """
    get the best value and point for a given data set based on vocs
    - does not work for multi-objective problems
    - data that violates any constraints is ignored

    Parameters
    ----------
        data: DataFrame
            Dataframe to select best point from
        n: int, optional
            Number of best points to return

    Returns
    -------
        index: index of best point
        value: value of best point
        params: input parameters that give the best point
    """
    if self.n_objectives != 1:
        raise NotImplementedError(
            "cannot select best point when n_objectives is not 1"
        )

    feasible_data = self.feasibility_data(data)
    ascending_flag = {"MINIMIZE": True, "MAXIMIZE": False}
    obj = self.objectives[self.objective_names[0]]
    obj_name = self.objective_names[0]
    res = data[feasible_data["feasible"]].sort_values(
        obj_name, ascending=ascending_flag[obj]
    )[obj_name][:n]

    params = data.iloc[res.index.to_numpy()][self.variable_names].to_dict(
        orient="records"
    )[0]

    return res.index.to_numpy(), res.to_numpy(), params

`validate_input_data(input_points)`

Validates input data. Raises an error if the input data does not satisfy requirements given by vocs.

Parameters

input_points : DataFrame
    Input data to be validated.

Returns

None

Raises

ValueError: if input data does not satisfy requirements.

Source code in xopt/vocs.py

def validate_input_data(self, input_points: pd.DataFrame) -> None:
    """
    Validates input data. Raises an error if the input data does not satisfy
    requirements given by vocs.

    Parameters
    ----------
        input_points : DataFrame
            Input data to be validated.

    Returns
    -------
        None

    Raises
    ------
        ValueError: if input data does not satisfy requirements.
    """
    validate_input_data(self, input_points)

`variable_data(data, prefix='variable_')`

Returns a dataframe containing variables according to vocs.variables in sorted order

Parameters

data: DataFrame
    Data to be processed.
prefix: str, optional
    Prefix added to column names.

Returns

result: DataFrame
    Processed Dataframe

Source code in xopt/vocs.py

def variable_data(
    self,
    data: Union[pd.DataFrame, List[Dict], List[Dict]],
    prefix: str = "variable_",
) -> pd.DataFrame:
    """
    Returns a dataframe containing variables according to `vocs.variables` in sorted
    order

    Parameters
    ----------
        data: DataFrame
            Data to be processed.
        prefix: str, optional
            Prefix added to column names.

    Returns
    -------
        result: DataFrame
            Processed Dataframe
    """
    return form_variable_data(self.variables, data, prefix=prefix)

Vocs

all_names property

bounds property

constant_names property

constraint_names property

n_constants property

n_constraints property

n_inputs property

n_objectives property

n_observables property

n_outputs property

n_variables property

objective_names property

output_names property

variable_names property

constraint_data(data, prefix='constraint_')

Parameters

Returns

convert_dataframe_to_inputs(data, include_constants=True)

convert_numpy_to_inputs(inputs, include_constants=True)

coorect_list_types(v)

cumulative_optimum(data)

Parameters

Returns

denormalize_inputs(input_points)

Parameters

Returns

Notes

extract_data(data, return_raw=False)

Parameters

Returns

feasibility_data(data, prefix='feasible_')

Parameters

Returns

normalize_inputs(input_points)

Parameters

Returns

Notes

objective_data(data, prefix='objective_', return_raw=False)

Parameters

Returns

observable_data(data, prefix='observable_')

Parameters

Returns

random_inputs(n=None, custom_bounds=None, include_constants=True, seed=None)

select_best(data, n=1)

Parameters

Returns

validate_input_data(input_points)

Parameters

Returns

Raises

variable_data(data, prefix='variable_')

Parameters

Returns

`all_names` `property`

`bounds` `property`

`constant_names` `property`

`constraint_names` `property`

`n_constants` `property`

`n_constraints` `property`

`n_inputs` `property`

`n_objectives` `property`

`n_observables` `property`

`n_outputs` `property`

`n_variables` `property`

`objective_names` `property`

`output_names` `property`

`variable_names` `property`

`constraint_data(data, prefix='constraint_')`

`convert_dataframe_to_inputs(data, include_constants=True)`

`convert_numpy_to_inputs(inputs, include_constants=True)`

`coorect_list_types(v)`

`cumulative_optimum(data)`

`denormalize_inputs(input_points)`

`extract_data(data, return_raw=False)`

`feasibility_data(data, prefix='feasible_')`

`normalize_inputs(input_points)`

`objective_data(data, prefix='objective_', return_raw=False)`

`observable_data(data, prefix='observable_')`

`random_inputs(n=None, custom_bounds=None, include_constants=True, seed=None)`

`select_best(data, n=1)`

`validate_input_data(input_points)`

`variable_data(data, prefix='variable_')`