Source code for pybayopt.bayopt_smac

import os
import time
import shutil
import numpy as np
import pandas as pd

from smac.configspace import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
    UniformFloatHyperparameter, UniformIntegerHyperparameter

from smac.scenario.scenario import Scenario
from smac.facade.smac_facade import SMAC

from .bayopt_base import BayoptBase


[docs]class SMACOPT(BayoptBase): """ Interface of SMAC (Bayesian Optimization). Parameters ---------- :type para_space: dict or list of dictionaries :param para_space: It has three types: Continuous: Specify `Type` as `continuous`, and include the keys of `Range` (a list with lower-upper elements pair) and `Wrapper`, a callable function for wrapping the values. Integer: Specify `Type` as `integer`, and include the keys of `Mapping` (a list with all the sortted integer elements). Categorical: Specify `Type` as `categorical`, and include the keys of `Mapping` (a list with all the possible categories). :type max_runs: int, optional, default=100 :param max_runs: The maximum number of trials to be evaluated. When this values is reached, then the algorithm will stop. :type estimator: estimator object :param estimator: This is assumed to implement the scikit-learn estimator interface. :type cv: cross-validation method, an sklearn object. :param cv: e.g., `StratifiedKFold` and KFold` is used. :type scoring: string, callable, list/tuple, dict or None, optional, default=None :param scoring: A sklearn type scoring function. If None, the estimator's default scorer (if available) is used. See the package `sklearn` for details. :type refit: boolean, or string, optional, default=True :param refit: It controls whether to refit an estimator using the best found parameters on the whole dataset. :type random_state: int, optional, default=0 :param random_state: The random seed for optimization. :type verbose: boolean, optional, default=False :param verbose: It controls whether the searching history will be printed. Examples ---------- >>> import numpy as np >>> from sklearn import svm >>> from sklearn import datasets >>> from sequd import SMACOPT >>> from sklearn.model_selection import KFold >>> iris = datasets.load_iris() >>> ParaSpace = {'C':{'Type': 'continuous', 'Range': [-6, 16], 'Wrapper': np.exp2}, 'gamma': {'Type': 'continuous', 'Range': [-16, 6], 'Wrapper': np.exp2}} >>> estimator = svm.SVC() >>> cv = KFold(n_splits=5, random_state=0, shuffle=True) >>> clf = SMACOPT(ParaSpace, max_runs=100, estimator=estimator, cv=cv, scoring=None, refit=None, random_state=0, verbose=False) >>> clf.fit(iris.data, iris.target) Attributes ---------- :vartype best_score\_: float :ivar best_score\_: The best average cv score among the evaluated trials. :vartype best_params\_: dict :ivar best_params\_: Parameters that reaches `best_score_`. :vartype best_estimator\_: sklearn estimator :ivar best_estimator\_: The estimator refitted based on the `best_params_`. Not available if estimator = None or `refit=False`. :vartype search_time_consumed\_: float :ivar search_time_consumed\_: Seconds used for whole searching procedure. :vartype refit_time\_: float :ivar refit_time\_: Seconds used for refitting the best model on the whole dataset. Not available if estimator=None or `refit=False`. """ def __init__(self, para_space, max_runs=100, estimator=None, cv=None, scoring=None, refit=True, random_state=0, verbose=False): super(SMACOPT, self).__init__(para_space, max_runs, verbose) self.cv = cv self.refit = refit self.scoring = scoring self.estimator = estimator self.random_state = random_state self.method = "SMAC" self.cs = ConfigurationSpace() for item, values in self.para_space.items(): if values['Type'] == "continuous": para = UniformFloatHyperparameter(item, values['Range'][0], values['Range'][1]) elif values['Type'] == "integer": para = UniformIntegerHyperparameter(item, min(values['Mapping']), max(values['Mapping'])) elif values['Type'] == "categorical": para = CategoricalHyperparameter(item, values['Mapping']) self.cs.add_hyperparameter(para) def obj_func(self, cfg): cfg = {k: cfg[k] for k in cfg} next_params = pd.DataFrame(cfg, columns=self.para_names, index=[0]) parameters = {} for item, values in self.para_space.items(): if (values['Type'] == "continuous"): parameters[item] = values['Wrapper'](float(next_params[item].iloc[0])) elif (values['Type'] == "integer"): parameters[item] = int(next_params[item].iloc[0]) elif (values['Type'] == "categorical"): parameters[item] = next_params[item].iloc[0] score = self.wrapper_func(parameters) logs_aug = parameters logs_aug.update({"score": score}) logs_aug = pd.DataFrame(logs_aug, index=[self.iteration]) self.logs = pd.concat([self.logs, logs_aug]).reset_index(drop=True) if self.verbose: self.pbar.update(1) self.iteration += 1 self.pbar.set_description("Iteration %d:" % self.iteration) self.pbar.set_postfix_str("Current Best Score = %.5f" % (self.logs.loc[:, "score"].max())) return -score def _run(self, wrapper_func): """ Main loop for searching the best hyperparameters. """ self.wrapper_func = wrapper_func file_dir = "./temp" + "/" + str(time.time()) + str(np.random.rand(1)[0]) + "/" if not os.path.exists(file_dir): os.makedirs(file_dir) scenario = Scenario({"run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": self.max_runs, # maximum function evaluations "cs": self.cs, # configuration space "deterministic": "true", "output_dir": file_dir, "abort_on_first_run_crash": False}) self.smac = SMAC(scenario=scenario, rng=np.random.seed(self.random_state), tae_runner=self.obj_func) self.smac.solver.intensifier.tae_runner.use_pynisher = False # turn off the limit for resources self.smac.optimize() shutil.rmtree(file_dir)