From 605447440f2e14aa585fa3c96a54b61056a53b86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Knaebel?= Date: Fri, 29 Sep 2017 22:59:57 +0200 Subject: [PATCH] refactor hyperband implementation --- hyperband.py | 74 +++++++++++++++++++++++++++------------------- main.py | 33 ++++++++++++--------- models/__init__.py | 5 ++-- rerun_models.sh | 16 ++++++++++ 4 files changed, 81 insertions(+), 47 deletions(-) create mode 100644 rerun_models.sh diff --git a/hyperband.py b/hyperband.py index d01ce11..3a42417 100644 --- a/hyperband.py +++ b/hyperband.py @@ -3,13 +3,15 @@ # https://arxiv.org/pdf/1603.06560.pdf import logging import random -from math import log, ceil +from math import ceil, log from random import random as rng -from time import time, ctime +from time import ctime, time import numpy as np +from keras.callbacks import EarlyStopping import models +from main import create_model logger = logging.getLogger('logger') @@ -22,10 +24,10 @@ def sample_params(param_distribution: dict): class Hyperband: - def __init__(self, param_distribution, X, y): + def __init__(self, param_distribution, X, y, max_iter=81): self.get_params = lambda: sample_params(param_distribution) - self.max_iter = 81 # maximum iterations per configuration + self.max_iter = max_iter # maximum iterations per configuration self.eta = 3 # defines configuration downsampling rate (default = 3) self.logeta = lambda x: log(x) / log(self.eta) @@ -39,57 +41,69 @@ class Hyperband: self.X = X self.y = y - + def try_params(self, n_iterations, params): n_iterations = int(round(n_iterations)) - embedding, model = models.get_models_by_params(params) + embedding, model, new_model = models.get_models_by_params(params) + + model = create_model(model, params["output"]) + new_model = create_model(new_model, params["output"]) + + if params["type"] in ("inter", "staggered"): + model = new_model + + callbacks = [EarlyStopping(monitor='val_loss', + patience=5, + verbose=False)] + model.compile(optimizer='adam', - loss='categorical_crossentropy', + loss='binary_crossentropy', metrics=['accuracy']) history = model.fit(self.X, self.y, batch_size=params["batch_size"], epochs=n_iterations, + callbacks=callbacks, shuffle=True, - validation_split=0.2) - - return {"loss": history.history['loss'][-1]} + validation_split=0.4) + return {"loss": history.history['val_loss'][-1], "early_stop": True} + # can be called multiple times def run(self, skip_last=0, dry_run=False): - + for s in reversed(range(self.s_max + 1)): - + # initial number of configurations n = int(ceil(self.B / self.max_iter / (s + 1) * self.eta ** s)) - + # initial number of iterations per config r = self.max_iter * self.eta ** (-s) - + # n random configurations - T = [self.get_params() for _ in range(n)] - + random_configs = [self.get_params() for _ in range(n)] + for i in range((s + 1) - int(skip_last)): # changed from s + 1 - + # Run each of the n configs for # and keep best (n_configs / eta) configurations - + n_configs = n * self.eta ** (-i) n_iterations = r * self.eta ** (i) - + logger.info("\n*** {} configurations x {:.1f} iterations each".format( - n_configs, n_iterations)) - + n_configs, n_iterations)) + val_losses = [] early_stops = [] - - for t in T: - + + for t in random_configs: + self.counter += 1 logger.info("\n{} | {} | lowest loss so far: {:.4f} (run {})\n".format( - self.counter, ctime(), self.best_loss, self.best_counter)) - + self.counter, ctime(), self.best_loss, self.best_counter)) + start_time = time() if dry_run: @@ -121,11 +135,11 @@ class Hyperband: result['iterations'] = n_iterations self.results.append(result) - + # select a number of best configurations for the next loop # filter out early stops, if any indices = np.argsort(val_losses) - T = [T[i] for i in indices if not early_stops[i]] - T = T[0:int(n_configs / self.eta)] - + random_configs = [random_configs[i] for i in indices if not early_stops[i]] + random_configs = random_configs[0:int(n_configs / self.eta)] + return self.results diff --git a/main.py b/main.py index fbbbfab..6c49b35 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,3 @@ -import json import logging import os @@ -100,33 +99,39 @@ def main_paul_best(): def main_hyperband(): params = { # static params - "type": ["paul"], + "type": [args.model_type], + "depth": [args.model_depth], + "output": [args.model_output], "batch_size": [args.batch_size], "window_size": [10], - "domain_length": [40], "flow_features": [3], "input_length": [40], # model params - "embedding_size": [8, 16, 32, 64, 128, 256], - "filter_embedding": [8, 16, 32, 64, 128, 256], + "embedding_size": [2 ** x for x in range(3, 7)], + "filter_embedding": [2 ** x for x in range(1, 10)], "kernel_embedding": [1, 3, 5, 7, 9], - "hidden_embedding": [8, 16, 32, 64, 128, 256], + "dense_embedding": [2 ** x for x in range(4, 10)], "dropout": [0.5], - "domain_features": [8, 16, 32, 64, 128, 256], - "filter_main": [8, 16, 32, 64, 128, 256], - "kernels_main": [1, 3, 5, 7, 9], - "dense_main": [8, 16, 32, 64, 128, 256], + "filter_main": [2 ** x for x in range(1, 10)], + "kernel_main": [1, 3, 5, 7, 9], + "dense_main": [2 ** x for x in range(1, 12)], } logger.info("create training dataset") - domain_tr, flow_tr, name_tr, client_tr, server_tr = dataset.load_or_generate_h5data(args.train_h5data, - args.train_data, - args.domain_length, args.window) + domain_tr, flow_tr, name_tr, client_tr, server_windows_tr = dataset.load_or_generate_h5data(args.train_h5data, + args.train_data, + args.domain_length, + args.window) + server_tr = np.max(server_windows_tr, axis=1) + + if args.model_type in ("inter", "staggered"): + server_tr = np.expand_dims(server_windows_tr, 2) + hp = hyperband.Hyperband(params, [domain_tr, flow_tr], [client_tr, server_tr]) results = hp.run() - json.dump(results, open("hyperband.json")) + joblib.dump(results, "hyperband.joblib") def main_train(param=None): diff --git a/models/__init__.py b/models/__init__.py index 1852a04..40e3f52 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -17,7 +17,6 @@ def get_models_by_params(params: dict): dropout = params.get("dropout") # mainly prediction model flow_features = params.get("flow_features") - domain_features = params.get("domain_features") window_size = params.get("window_size") domain_length = params.get("domain_length") filter_main = params.get("filter_main") @@ -36,10 +35,10 @@ def get_models_by_params(params: dict): embedding_model = networks.get_embedding(embedding_size, input_length, filter_embedding, kernel_embedding, hidden_embedding, 0.5) - old_model = networks.get_model(0.25, flow_features, domain_features, window_size, domain_length, + old_model = networks.get_model(0.25, flow_features, hidden_embedding, window_size, domain_length, filter_main, kernel_main, dense_dim, embedding_model, model_output) - new_model = networks.get_new_model(0.25, flow_features, domain_features, window_size, domain_length, + new_model = networks.get_new_model(0.25, flow_features, hidden_embedding, window_size, domain_length, filter_main, kernel_main, dense_dim, embedding_model, model_output) return embedding_model, old_model, new_model diff --git a/rerun_models.sh b/rerun_models.sh new file mode 100644 index 0000000..8a84b61 --- /dev/null +++ b/rerun_models.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +SRC=$1 +DEST=$2 +DATADIR=$3 +INIT=$4 +EPOCHS=$5 +BS=128 + +for i in `ls -d $SRC*/` +do + echo "retrain model in ${i}" + name=$(basename $i) + python3 main.py --mode retrain --model_src ${i} --model_dest ${DEST}/${name} --init_epoch $INIT --epochs $EPOCHS --batch $BS --train ${DATADIR} + +done \ No newline at end of file