refactor hyperband implementation
This commit is contained in:
parent
090c89a127
commit
605447440f
74
hyperband.py
74
hyperband.py
|
@ -3,13 +3,15 @@
|
||||||
# https://arxiv.org/pdf/1603.06560.pdf
|
# https://arxiv.org/pdf/1603.06560.pdf
|
||||||
import logging
|
import logging
|
||||||
import random
|
import random
|
||||||
from math import log, ceil
|
from math import ceil, log
|
||||||
from random import random as rng
|
from random import random as rng
|
||||||
from time import time, ctime
|
from time import ctime, time
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from keras.callbacks import EarlyStopping
|
||||||
|
|
||||||
import models
|
import models
|
||||||
|
from main import create_model
|
||||||
|
|
||||||
logger = logging.getLogger('logger')
|
logger = logging.getLogger('logger')
|
||||||
|
|
||||||
|
@ -22,10 +24,10 @@ def sample_params(param_distribution: dict):
|
||||||
|
|
||||||
|
|
||||||
class Hyperband:
|
class Hyperband:
|
||||||
def __init__(self, param_distribution, X, y):
|
def __init__(self, param_distribution, X, y, max_iter=81):
|
||||||
self.get_params = lambda: sample_params(param_distribution)
|
self.get_params = lambda: sample_params(param_distribution)
|
||||||
|
|
||||||
self.max_iter = 81 # maximum iterations per configuration
|
self.max_iter = max_iter # maximum iterations per configuration
|
||||||
self.eta = 3 # defines configuration downsampling rate (default = 3)
|
self.eta = 3 # defines configuration downsampling rate (default = 3)
|
||||||
|
|
||||||
self.logeta = lambda x: log(x) / log(self.eta)
|
self.logeta = lambda x: log(x) / log(self.eta)
|
||||||
|
@ -39,57 +41,69 @@ class Hyperband:
|
||||||
|
|
||||||
self.X = X
|
self.X = X
|
||||||
self.y = y
|
self.y = y
|
||||||
|
|
||||||
def try_params(self, n_iterations, params):
|
def try_params(self, n_iterations, params):
|
||||||
n_iterations = int(round(n_iterations))
|
n_iterations = int(round(n_iterations))
|
||||||
embedding, model = models.get_models_by_params(params)
|
embedding, model, new_model = models.get_models_by_params(params)
|
||||||
|
|
||||||
|
model = create_model(model, params["output"])
|
||||||
|
new_model = create_model(new_model, params["output"])
|
||||||
|
|
||||||
|
if params["type"] in ("inter", "staggered"):
|
||||||
|
model = new_model
|
||||||
|
|
||||||
|
callbacks = [EarlyStopping(monitor='val_loss',
|
||||||
|
patience=5,
|
||||||
|
verbose=False)]
|
||||||
|
|
||||||
model.compile(optimizer='adam',
|
model.compile(optimizer='adam',
|
||||||
loss='categorical_crossentropy',
|
loss='binary_crossentropy',
|
||||||
metrics=['accuracy'])
|
metrics=['accuracy'])
|
||||||
|
|
||||||
history = model.fit(self.X,
|
history = model.fit(self.X,
|
||||||
self.y,
|
self.y,
|
||||||
batch_size=params["batch_size"],
|
batch_size=params["batch_size"],
|
||||||
epochs=n_iterations,
|
epochs=n_iterations,
|
||||||
|
callbacks=callbacks,
|
||||||
shuffle=True,
|
shuffle=True,
|
||||||
validation_split=0.2)
|
validation_split=0.4)
|
||||||
|
|
||||||
return {"loss": history.history['loss'][-1]}
|
|
||||||
|
|
||||||
|
return {"loss": history.history['val_loss'][-1], "early_stop": True}
|
||||||
|
|
||||||
# can be called multiple times
|
# can be called multiple times
|
||||||
def run(self, skip_last=0, dry_run=False):
|
def run(self, skip_last=0, dry_run=False):
|
||||||
|
|
||||||
for s in reversed(range(self.s_max + 1)):
|
for s in reversed(range(self.s_max + 1)):
|
||||||
|
|
||||||
# initial number of configurations
|
# initial number of configurations
|
||||||
n = int(ceil(self.B / self.max_iter / (s + 1) * self.eta ** s))
|
n = int(ceil(self.B / self.max_iter / (s + 1) * self.eta ** s))
|
||||||
|
|
||||||
# initial number of iterations per config
|
# initial number of iterations per config
|
||||||
r = self.max_iter * self.eta ** (-s)
|
r = self.max_iter * self.eta ** (-s)
|
||||||
|
|
||||||
# n random configurations
|
# n random configurations
|
||||||
T = [self.get_params() for _ in range(n)]
|
random_configs = [self.get_params() for _ in range(n)]
|
||||||
|
|
||||||
for i in range((s + 1) - int(skip_last)): # changed from s + 1
|
for i in range((s + 1) - int(skip_last)): # changed from s + 1
|
||||||
|
|
||||||
# Run each of the n configs for <iterations>
|
# Run each of the n configs for <iterations>
|
||||||
# and keep best (n_configs / eta) configurations
|
# and keep best (n_configs / eta) configurations
|
||||||
|
|
||||||
n_configs = n * self.eta ** (-i)
|
n_configs = n * self.eta ** (-i)
|
||||||
n_iterations = r * self.eta ** (i)
|
n_iterations = r * self.eta ** (i)
|
||||||
|
|
||||||
logger.info("\n*** {} configurations x {:.1f} iterations each".format(
|
logger.info("\n*** {} configurations x {:.1f} iterations each".format(
|
||||||
n_configs, n_iterations))
|
n_configs, n_iterations))
|
||||||
|
|
||||||
val_losses = []
|
val_losses = []
|
||||||
early_stops = []
|
early_stops = []
|
||||||
|
|
||||||
for t in T:
|
for t in random_configs:
|
||||||
|
|
||||||
self.counter += 1
|
self.counter += 1
|
||||||
logger.info("\n{} | {} | lowest loss so far: {:.4f} (run {})\n".format(
|
logger.info("\n{} | {} | lowest loss so far: {:.4f} (run {})\n".format(
|
||||||
self.counter, ctime(), self.best_loss, self.best_counter))
|
self.counter, ctime(), self.best_loss, self.best_counter))
|
||||||
|
|
||||||
start_time = time()
|
start_time = time()
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
|
@ -121,11 +135,11 @@ class Hyperband:
|
||||||
result['iterations'] = n_iterations
|
result['iterations'] = n_iterations
|
||||||
|
|
||||||
self.results.append(result)
|
self.results.append(result)
|
||||||
|
|
||||||
# select a number of best configurations for the next loop
|
# select a number of best configurations for the next loop
|
||||||
# filter out early stops, if any
|
# filter out early stops, if any
|
||||||
indices = np.argsort(val_losses)
|
indices = np.argsort(val_losses)
|
||||||
T = [T[i] for i in indices if not early_stops[i]]
|
random_configs = [random_configs[i] for i in indices if not early_stops[i]]
|
||||||
T = T[0:int(n_configs / self.eta)]
|
random_configs = random_configs[0:int(n_configs / self.eta)]
|
||||||
|
|
||||||
return self.results
|
return self.results
|
||||||
|
|
33
main.py
33
main.py
|
@ -1,4 +1,3 @@
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
@ -100,33 +99,39 @@ def main_paul_best():
|
||||||
def main_hyperband():
|
def main_hyperband():
|
||||||
params = {
|
params = {
|
||||||
# static params
|
# static params
|
||||||
"type": ["paul"],
|
"type": [args.model_type],
|
||||||
|
"depth": [args.model_depth],
|
||||||
|
"output": [args.model_output],
|
||||||
"batch_size": [args.batch_size],
|
"batch_size": [args.batch_size],
|
||||||
"window_size": [10],
|
"window_size": [10],
|
||||||
"domain_length": [40],
|
|
||||||
"flow_features": [3],
|
"flow_features": [3],
|
||||||
"input_length": [40],
|
"input_length": [40],
|
||||||
# model params
|
# model params
|
||||||
"embedding_size": [8, 16, 32, 64, 128, 256],
|
"embedding_size": [2 ** x for x in range(3, 7)],
|
||||||
"filter_embedding": [8, 16, 32, 64, 128, 256],
|
"filter_embedding": [2 ** x for x in range(1, 10)],
|
||||||
"kernel_embedding": [1, 3, 5, 7, 9],
|
"kernel_embedding": [1, 3, 5, 7, 9],
|
||||||
"hidden_embedding": [8, 16, 32, 64, 128, 256],
|
"dense_embedding": [2 ** x for x in range(4, 10)],
|
||||||
"dropout": [0.5],
|
"dropout": [0.5],
|
||||||
"domain_features": [8, 16, 32, 64, 128, 256],
|
"filter_main": [2 ** x for x in range(1, 10)],
|
||||||
"filter_main": [8, 16, 32, 64, 128, 256],
|
"kernel_main": [1, 3, 5, 7, 9],
|
||||||
"kernels_main": [1, 3, 5, 7, 9],
|
"dense_main": [2 ** x for x in range(1, 12)],
|
||||||
"dense_main": [8, 16, 32, 64, 128, 256],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("create training dataset")
|
logger.info("create training dataset")
|
||||||
domain_tr, flow_tr, name_tr, client_tr, server_tr = dataset.load_or_generate_h5data(args.train_h5data,
|
domain_tr, flow_tr, name_tr, client_tr, server_windows_tr = dataset.load_or_generate_h5data(args.train_h5data,
|
||||||
args.train_data,
|
args.train_data,
|
||||||
args.domain_length, args.window)
|
args.domain_length,
|
||||||
|
args.window)
|
||||||
|
server_tr = np.max(server_windows_tr, axis=1)
|
||||||
|
|
||||||
|
if args.model_type in ("inter", "staggered"):
|
||||||
|
server_tr = np.expand_dims(server_windows_tr, 2)
|
||||||
|
|
||||||
hp = hyperband.Hyperband(params,
|
hp = hyperband.Hyperband(params,
|
||||||
[domain_tr, flow_tr],
|
[domain_tr, flow_tr],
|
||||||
[client_tr, server_tr])
|
[client_tr, server_tr])
|
||||||
results = hp.run()
|
results = hp.run()
|
||||||
json.dump(results, open("hyperband.json"))
|
joblib.dump(results, "hyperband.joblib")
|
||||||
|
|
||||||
|
|
||||||
def main_train(param=None):
|
def main_train(param=None):
|
||||||
|
|
|
@ -17,7 +17,6 @@ def get_models_by_params(params: dict):
|
||||||
dropout = params.get("dropout")
|
dropout = params.get("dropout")
|
||||||
# mainly prediction model
|
# mainly prediction model
|
||||||
flow_features = params.get("flow_features")
|
flow_features = params.get("flow_features")
|
||||||
domain_features = params.get("domain_features")
|
|
||||||
window_size = params.get("window_size")
|
window_size = params.get("window_size")
|
||||||
domain_length = params.get("domain_length")
|
domain_length = params.get("domain_length")
|
||||||
filter_main = params.get("filter_main")
|
filter_main = params.get("filter_main")
|
||||||
|
@ -36,10 +35,10 @@ def get_models_by_params(params: dict):
|
||||||
embedding_model = networks.get_embedding(embedding_size, input_length, filter_embedding, kernel_embedding,
|
embedding_model = networks.get_embedding(embedding_size, input_length, filter_embedding, kernel_embedding,
|
||||||
hidden_embedding, 0.5)
|
hidden_embedding, 0.5)
|
||||||
|
|
||||||
old_model = networks.get_model(0.25, flow_features, domain_features, window_size, domain_length,
|
old_model = networks.get_model(0.25, flow_features, hidden_embedding, window_size, domain_length,
|
||||||
filter_main, kernel_main, dense_dim, embedding_model, model_output)
|
filter_main, kernel_main, dense_dim, embedding_model, model_output)
|
||||||
|
|
||||||
new_model = networks.get_new_model(0.25, flow_features, domain_features, window_size, domain_length,
|
new_model = networks.get_new_model(0.25, flow_features, hidden_embedding, window_size, domain_length,
|
||||||
filter_main, kernel_main, dense_dim, embedding_model, model_output)
|
filter_main, kernel_main, dense_dim, embedding_model, model_output)
|
||||||
|
|
||||||
return embedding_model, old_model, new_model
|
return embedding_model, old_model, new_model
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
SRC=$1
|
||||||
|
DEST=$2
|
||||||
|
DATADIR=$3
|
||||||
|
INIT=$4
|
||||||
|
EPOCHS=$5
|
||||||
|
BS=128
|
||||||
|
|
||||||
|
for i in `ls -d $SRC*/`
|
||||||
|
do
|
||||||
|
echo "retrain model in ${i}"
|
||||||
|
name=$(basename $i)
|
||||||
|
python3 main.py --mode retrain --model_src ${i} --model_dest ${DEST}/${name} --init_epoch $INIT --epochs $EPOCHS --batch $BS --train ${DATADIR}
|
||||||
|
|
||||||
|
done
|
Loading…
Reference in New Issue