# 1. Bayesian optimization method

(attention is a method, an idea)
Bayesian Optimization establishes an alternative function (probability model) based on the past evaluation results of the objective function to find the value of the minimization objective function. The difference between Bayesian method and random or grid search is that it will refer to the previous evaluation results when trying the next set of super parameters, so it can save a lot of useless work. The cost of superparametric evaluation is very high, because it requires to use the hyperparametric to be evaluated to train the model once, while many deep learning models can only complete the training and evaluate the model in a few hours and days, so it costs a lot. Bayesian parameter tuning uses a constantly updated probability model to "focus" promising hyperparameters by inferring past results.

# 2. Selection in Python

There are several Bayesian Optimization libraries in Python, and their alternative functions of objective functions are different. In this article, we will use Hyperopt, which uses Tree Parzen Estimator (TPE). Other Python libraries include spearint (Gaussian process proxy) and SMAC (random forest regression).

# 3. Four parts of optimization problem

The Bayesian optimization problem has four parts:

• Objective function: we want to minimize the content. Here, the objective function is the loss of the machine learning model on the verification set using this set of super parameters.
• Domain space: the value range of the super parameter to search
• Optimization algorithm: a method of constructing an alternative function and selecting the next hyperparametric value for evaluation.
• Result history: stored results from objective function evaluation, including hyperparameters and losses on validation sets

# 4. Code demonstration

```import datetime
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,f1_score
import matplotlib.pyplot as plt
from hyperopt import fmin,hp,Trials,space_eval,rand,tpe,anneal
import warnings
warnings.filterwarnings('ignore')

def printlog(info):
nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print("\n"+"=========="*8 + "%s"%nowtime)
print(info+'...\n\n')

#================================================================================
# 1, Read data
#================================================================================
printlog("step1: reading data...")

# Read dftrain,dftest
breast = datasets.load_breast_cancer()
df = pd.DataFrame(breast.data,columns = [x.replace(' ','_') for x in breast.feature_names])
df['label'] = breast.target
df['mean_radius'] = df['mean_radius'].apply(lambda x:int(x))
df['mean_texture'] = df['mean_texture'].apply(lambda x:int(x))
dftrain,dftest = train_test_split(df)

categorical_features = ['mean_radius','mean_texture']
lgb_train = lgb.Dataset(dftrain.drop(['label'],axis = 1),label=dftrain['label'],
categorical_feature = categorical_features,free_raw_data=False)

lgb_valid = lgb.Dataset(dftest.drop(['label'],axis = 1),label=dftest['label'],
categorical_feature = categorical_features,
reference=lgb_train,free_raw_data=False)

#================================================================================
# 2, Search hyperparameters
#================================================================================
printlog("step2: searching parameters...")

boost_round = 10
early_stop_rounds = 5

params = {
'learning_rate': 0.1,
'boosting_type': 'gbdt',#'dart','rf'
'objective':'binary',
'metric': ['auc'],
'num_leaves': 31,
'max_depth':  6,
'min_data_in_leaf': 5,
'min_gain_to_split': 0,
'reg_alpha':0,
'reg_lambda':0,
'feature_fraction': 0.9,
'bagging_fraction': 0.8,
'bagging_freq': 5,
'feature_pre_filter':False,
'verbose': -1
}

# 1. Define the objective function
def loss(config):
params.update(config)
gbm = lgb.train(params,
lgb_train,
num_boost_round= boost_round,
valid_sets=(lgb_valid, lgb_train),
valid_names=('validate','train'),
early_stopping_rounds = early_stop_rounds,
verbose_eval = False)
y_pred_test = gbm.predict(dftest.drop('label',axis = 1), num_iteration=gbm.best_iteration)
val_score = f1_score(dftest['label'],y_pred_test>0.5)

return -val_score

# 2. Define the hyperparametric space

#You can comment out some unimportant super parameters after deviation as needed
spaces = {"learning_rate":hp.loguniform("learning_rate",np.log(0.001),np.log(0.5)),
"boosting_type":hp.choice("boosting_type",['gbdt','dart','rf']),
"num_leaves":hp.choice("num_leaves",range(15,128)),
#"max_depth":hp.choice("max_depth",range(3,11)),
#"min_data_in_leaf":hp.choice("min_data_in_leaf",range(1,50)),
#"min_gain_to_split":hp.uniform("min_gain_to_split",0.0,1.0),
#"reg_alpha": hp.uniform("reg_alpha", 0, 2),
#"reg_lambda": hp.uniform("reg_lambda", 0, 2),
#"feature_fraction":hp.uniform("feature_fraction",0.5,1.0),
#"bagging_fraction":hp.uniform("bagging_fraction",0.5,1.0),
#"bagging_freq":hp.choice("bagging_freq",range(1,20))
}

# 3. Execute super parameter search
# hyperopt supports the following search algorithms
#Random search (hyperopt.rand.suggest)
#Simulated annealing (hyperopt.anneal.suggest)
#TPE algorithm (hyperopt.tpe.suggest, the full name of the algorithm is tree structured Parzen estimator approach)

trials = Trials()
best = fmin(fn=loss, space=spaces, algo= tpe.suggest, max_evals=100, trials=trials)

# 4. Obtain the optimal parameters
best_params = space_eval(spaces,best)
print("best_params = ",best_params)

# 5. Draw the search process
losses = [x["result"]["loss"] for x in trials.trials]
minlosses = [np.min(losses[0:i+1]) for i in range(len(losses))]
steps = range(len(losses))

fig,ax = plt.subplots(figsize=(6,3.7),dpi=144)
ax.scatter(x = steps, y = losses, alpha = 0.3)
ax.plot(steps,minlosses,color = "red",axes = ax)
plt.xlabel("step")
plt.ylabel("loss")

#================================================================================
# 3, Training model
#================================================================================
printlog("step3: training model...")

params.update(best_params)
results = {}
gbm = lgb.train(params,
lgb_train,
num_boost_round= boost_round,
valid_sets=(lgb_valid, lgb_train),
valid_names=('validate','train'),
early_stopping_rounds = early_stop_rounds,
evals_result= results,
verbose_eval = True)

#================================================================================
# 4, Evaluation model
#================================================================================
printlog("step4: evaluating model ...")

y_pred_train = gbm.predict(dftrain.drop('label',axis = 1), num_iteration=gbm.best_iteration)
y_pred_test = gbm.predict(dftest.drop('label',axis = 1), num_iteration=gbm.best_iteration)

train_score = f1_score(dftrain['label'],y_pred_train>0.5)
val_score = f1_score(dftest['label'],y_pred_test>0.5)

print('train f1_score: {:.5} '.format(train_score))
print('valid f1_score: {:.5} \n'.format(val_score))

fig2,ax2 = plt.subplots(figsize=(6,3.7),dpi=144)
fig3,ax3 = plt.subplots(figsize=(6,3.7),dpi=144)
lgb.plot_metric(results,ax = ax2)
lgb.plot_importance(gbm,importance_type = "gain",ax=ax3)

#================================================================================
# 5, Save model
#================================================================================
printlog("step5: saving model ...")

model_dir = "gbm.model"
print("model_dir: %s"%model_dir)
gbm.save_model("gbm.model",num_iteration=gbm.best_iteration)
printlog("task end...")

```

Posted on Wed, 27 Oct 2021 11:35:41 -0400 by Joe_Dean