refactor hyperband implementation

This commit is contained in:
René Knaebel 2017-09-29 22:59:57 +02:00
parent 090c89a127
commit 605447440f
4 changed files with 81 additions and 47 deletions

View File

@ -3,13 +3,15 @@
# https://arxiv.org/pdf/1603.06560.pdf # https://arxiv.org/pdf/1603.06560.pdf
import logging import logging
import random import random
from math import log, ceil from math import ceil, log
from random import random as rng from random import random as rng
from time import time, ctime from time import ctime, time
import numpy as np import numpy as np
from keras.callbacks import EarlyStopping
import models import models
from main import create_model
logger = logging.getLogger('logger') logger = logging.getLogger('logger')
@ -22,10 +24,10 @@ def sample_params(param_distribution: dict):
class Hyperband: class Hyperband:
def __init__(self, param_distribution, X, y): def __init__(self, param_distribution, X, y, max_iter=81):
self.get_params = lambda: sample_params(param_distribution) self.get_params = lambda: sample_params(param_distribution)
self.max_iter = 81 # maximum iterations per configuration self.max_iter = max_iter # maximum iterations per configuration
self.eta = 3 # defines configuration downsampling rate (default = 3) self.eta = 3 # defines configuration downsampling rate (default = 3)
self.logeta = lambda x: log(x) / log(self.eta) self.logeta = lambda x: log(x) / log(self.eta)
@ -39,57 +41,69 @@ class Hyperband:
self.X = X self.X = X
self.y = y self.y = y
def try_params(self, n_iterations, params): def try_params(self, n_iterations, params):
n_iterations = int(round(n_iterations)) n_iterations = int(round(n_iterations))
embedding, model = models.get_models_by_params(params) embedding, model, new_model = models.get_models_by_params(params)
model = create_model(model, params["output"])
new_model = create_model(new_model, params["output"])
if params["type"] in ("inter", "staggered"):
model = new_model
callbacks = [EarlyStopping(monitor='val_loss',
patience=5,
verbose=False)]
model.compile(optimizer='adam', model.compile(optimizer='adam',
loss='categorical_crossentropy', loss='binary_crossentropy',
metrics=['accuracy']) metrics=['accuracy'])
history = model.fit(self.X, history = model.fit(self.X,
self.y, self.y,
batch_size=params["batch_size"], batch_size=params["batch_size"],
epochs=n_iterations, epochs=n_iterations,
callbacks=callbacks,
shuffle=True, shuffle=True,
validation_split=0.2) validation_split=0.4)
return {"loss": history.history['loss'][-1]}
return {"loss": history.history['val_loss'][-1], "early_stop": True}
# can be called multiple times # can be called multiple times
def run(self, skip_last=0, dry_run=False): def run(self, skip_last=0, dry_run=False):
for s in reversed(range(self.s_max + 1)): for s in reversed(range(self.s_max + 1)):
# initial number of configurations # initial number of configurations
n = int(ceil(self.B / self.max_iter / (s + 1) * self.eta ** s)) n = int(ceil(self.B / self.max_iter / (s + 1) * self.eta ** s))
# initial number of iterations per config # initial number of iterations per config
r = self.max_iter * self.eta ** (-s) r = self.max_iter * self.eta ** (-s)
# n random configurations # n random configurations
T = [self.get_params() for _ in range(n)] random_configs = [self.get_params() for _ in range(n)]
for i in range((s + 1) - int(skip_last)): # changed from s + 1 for i in range((s + 1) - int(skip_last)): # changed from s + 1
# Run each of the n configs for <iterations> # Run each of the n configs for <iterations>
# and keep best (n_configs / eta) configurations # and keep best (n_configs / eta) configurations
n_configs = n * self.eta ** (-i) n_configs = n * self.eta ** (-i)
n_iterations = r * self.eta ** (i) n_iterations = r * self.eta ** (i)
logger.info("\n*** {} configurations x {:.1f} iterations each".format( logger.info("\n*** {} configurations x {:.1f} iterations each".format(
n_configs, n_iterations)) n_configs, n_iterations))
val_losses = [] val_losses = []
early_stops = [] early_stops = []
for t in T: for t in random_configs:
self.counter += 1 self.counter += 1
logger.info("\n{} | {} | lowest loss so far: {:.4f} (run {})\n".format( logger.info("\n{} | {} | lowest loss so far: {:.4f} (run {})\n".format(
self.counter, ctime(), self.best_loss, self.best_counter)) self.counter, ctime(), self.best_loss, self.best_counter))
start_time = time() start_time = time()
if dry_run: if dry_run:
@ -121,11 +135,11 @@ class Hyperband:
result['iterations'] = n_iterations result['iterations'] = n_iterations
self.results.append(result) self.results.append(result)
# select a number of best configurations for the next loop # select a number of best configurations for the next loop
# filter out early stops, if any # filter out early stops, if any
indices = np.argsort(val_losses) indices = np.argsort(val_losses)
T = [T[i] for i in indices if not early_stops[i]] random_configs = [random_configs[i] for i in indices if not early_stops[i]]
T = T[0:int(n_configs / self.eta)] random_configs = random_configs[0:int(n_configs / self.eta)]
return self.results return self.results

33
main.py
View File

@ -1,4 +1,3 @@
import json
import logging import logging
import os import os
@ -100,33 +99,39 @@ def main_paul_best():
def main_hyperband(): def main_hyperband():
params = { params = {
# static params # static params
"type": ["paul"], "type": [args.model_type],
"depth": [args.model_depth],
"output": [args.model_output],
"batch_size": [args.batch_size], "batch_size": [args.batch_size],
"window_size": [10], "window_size": [10],
"domain_length": [40],
"flow_features": [3], "flow_features": [3],
"input_length": [40], "input_length": [40],
# model params # model params
"embedding_size": [8, 16, 32, 64, 128, 256], "embedding_size": [2 ** x for x in range(3, 7)],
"filter_embedding": [8, 16, 32, 64, 128, 256], "filter_embedding": [2 ** x for x in range(1, 10)],
"kernel_embedding": [1, 3, 5, 7, 9], "kernel_embedding": [1, 3, 5, 7, 9],
"hidden_embedding": [8, 16, 32, 64, 128, 256], "dense_embedding": [2 ** x for x in range(4, 10)],
"dropout": [0.5], "dropout": [0.5],
"domain_features": [8, 16, 32, 64, 128, 256], "filter_main": [2 ** x for x in range(1, 10)],
"filter_main": [8, 16, 32, 64, 128, 256], "kernel_main": [1, 3, 5, 7, 9],
"kernels_main": [1, 3, 5, 7, 9], "dense_main": [2 ** x for x in range(1, 12)],
"dense_main": [8, 16, 32, 64, 128, 256],
} }
logger.info("create training dataset") logger.info("create training dataset")
domain_tr, flow_tr, name_tr, client_tr, server_tr = dataset.load_or_generate_h5data(args.train_h5data, domain_tr, flow_tr, name_tr, client_tr, server_windows_tr = dataset.load_or_generate_h5data(args.train_h5data,
args.train_data, args.train_data,
args.domain_length, args.window) args.domain_length,
args.window)
server_tr = np.max(server_windows_tr, axis=1)
if args.model_type in ("inter", "staggered"):
server_tr = np.expand_dims(server_windows_tr, 2)
hp = hyperband.Hyperband(params, hp = hyperband.Hyperband(params,
[domain_tr, flow_tr], [domain_tr, flow_tr],
[client_tr, server_tr]) [client_tr, server_tr])
results = hp.run() results = hp.run()
json.dump(results, open("hyperband.json")) joblib.dump(results, "hyperband.joblib")
def main_train(param=None): def main_train(param=None):

View File

@ -17,7 +17,6 @@ def get_models_by_params(params: dict):
dropout = params.get("dropout") dropout = params.get("dropout")
# mainly prediction model # mainly prediction model
flow_features = params.get("flow_features") flow_features = params.get("flow_features")
domain_features = params.get("domain_features")
window_size = params.get("window_size") window_size = params.get("window_size")
domain_length = params.get("domain_length") domain_length = params.get("domain_length")
filter_main = params.get("filter_main") filter_main = params.get("filter_main")
@ -36,10 +35,10 @@ def get_models_by_params(params: dict):
embedding_model = networks.get_embedding(embedding_size, input_length, filter_embedding, kernel_embedding, embedding_model = networks.get_embedding(embedding_size, input_length, filter_embedding, kernel_embedding,
hidden_embedding, 0.5) hidden_embedding, 0.5)
old_model = networks.get_model(0.25, flow_features, domain_features, window_size, domain_length, old_model = networks.get_model(0.25, flow_features, hidden_embedding, window_size, domain_length,
filter_main, kernel_main, dense_dim, embedding_model, model_output) filter_main, kernel_main, dense_dim, embedding_model, model_output)
new_model = networks.get_new_model(0.25, flow_features, domain_features, window_size, domain_length, new_model = networks.get_new_model(0.25, flow_features, hidden_embedding, window_size, domain_length,
filter_main, kernel_main, dense_dim, embedding_model, model_output) filter_main, kernel_main, dense_dim, embedding_model, model_output)
return embedding_model, old_model, new_model return embedding_model, old_model, new_model

16
rerun_models.sh Normal file
View File

@ -0,0 +1,16 @@
#!/usr/bin/env bash
SRC=$1
DEST=$2
DATADIR=$3
INIT=$4
EPOCHS=$5
BS=128
for i in `ls -d $SRC*/`
do
echo "retrain model in ${i}"
name=$(basename $i)
python3 main.py --mode retrain --model_src ${i} --model_dest ${DEST}/${name} --init_epoch $INIT --epochs $EPOCHS --batch $BS --train ${DATADIR}
done