From be56112b3395a69f4ae608f3d228030107c49ed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Knaebel?= Date: Fri, 7 Jul 2017 16:48:10 +0200 Subject: [PATCH] added params --- Makefile | 4 +- hyperband.py | 176 +++++++++++++++++++++++++-------------- main.py | 69 +++++++++++++-- models/pauls_networks.py | 14 ++++ 4 files changed, 191 insertions(+), 72 deletions(-) diff --git a/Makefile b/Makefile index 3effa6a..579b107 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,5 @@ test: - python3 main.py --epochs 1 --batch 64 --train data/rk_data.csv.gz --test data/rk_data.csv.gz + python3 main.py --modes train --epochs 1 --batch 64 --train data/rk_data.csv.gz +hyper: + python3 main.py --modes hyperband --epochs 1 --batch 64 --train data/rk_data.csv.gz diff --git a/hyperband.py b/hyperband.py index 4e925e9..641d4c6 100644 --- a/hyperband.py +++ b/hyperband.py @@ -1,76 +1,128 @@ # -*- coding: utf-8 -*- # implementation of hyperband: # https://arxiv.org/pdf/1603.06560.pdf +import random +from math import log, ceil +from random import random as rng +from time import time, ctime + import numpy as np - -def get_hyperparameter_configuration(configGenerator, n): - configurations = [] - for i in np.arange(0, n, 1): - configurations.append(configGenerator()) - return configurations +import models -def run_then_return_val_loss(config, r_i, modelGenerator, trainData, trainLabel, - testData, testLabel): - # parameter - batch_size = 128 - model = modelGenerator(config) - if model != None: - model.fit(x=trainData, y=trainLabel, - epochs=int(r_i), shuffle=True, initial_epoch=0, - batch_size=batch_size) - score = model.evaluate(testData, testLabel, - batch_size=batch_size) - score = score[0] - else: - score = np.infty - return score +def sample_params(param_distribution: dict): + p = {} + for key, val in param_distribution.items(): + p[key] = random.choice(val) + return p -def top_k(configurations, L, k): - outConfigs = [] - sortIDs = np.argsort(np.array(L)) - for i in np.arange(0, k, 1): - outConfigs.append(configurations[sortIDs[i]]) - return outConfigs +class Hyperband: + def __init__(self, param_distribution, X, y): + self.get_params = lambda: sample_params(param_distribution) + self.max_iter = 81 # maximum iterations per configuration + self.eta = 3 # defines configuration downsampling rate (default = 3) -def hyperband(R, nu, modelGenerator, - configGenerator, - trainData, trainLabel, - testData, testLabel, - outputFile=''): - allLosses = [] - allConfigs = [] - # input + self.logeta = lambda x: log(x) / log(self.eta) + self.s_max = int(self.logeta(self.max_iter)) + self.B = (self.s_max + 1) * self.max_iter - # initialization - s_max = np.floor(np.log(R) / np.log(nu)) - B = (s_max + 1) * R + self.results = [] # list of dicts + self.counter = 0 + self.best_loss = np.inf + self.best_counter = -1 - for s in np.arange(s_max, -1, -1): - n = np.ceil(np.float(B) / np.float(R) * (np.float(np.power(nu, s)) / np.float(s + 1))) - r = np.float(R) * np.power(nu, -s) - configurations = get_hyperparameter_configuration(configGenerator, n) - for i in np.arange(0, s + 1, 1): - n_i = np.floor(np.float(n) * np.power(nu, -i)) - r_i = np.float(r) * np.power(nu, i) - L = [] - for config in configurations: - curLoss = run_then_return_val_loss(config, r_i, modelGenerator, - trainData, trainLabel, - testData, testLabel) - L.append(curLoss) - allLosses.append(curLoss) - allConfigs.append(config) - if outputFile != '': - with open(outputFile, 'a') as myfile: - myfile.write(str(config) + '\t' + str(curLoss) + \ - '\t' + str(r_i) + '\n') - configurations = top_k(configurations, L, np.floor(np.float(n_i) / nu)) + self.X = X + self.y = y - # print('n_i: ' + str(n_i)) - # print('r_i: ' + str(r_i)) - bestConfig = top_k(allConfigs, allLosses, 1) - return (bestConfig[0], allConfigs, allLosses) + def try_params(self, n_iterations, params): + n_iterations = int(round(n_iterations)) + embedding, model = models.get_models_by_params(params) + model.compile(optimizer='adam', + loss='categorical_crossentropy', + metrics=['accuracy']) + + history = model.fit(self.X, + self.y, + batch_size=params["batch_size"], + epochs=n_iterations, + shuffle=True, + validation_split=0.2) + + return {"loss": history.history['loss'][-1]} + + # can be called multiple times + def run(self, skip_last=0, dry_run=False): + + for s in reversed(range(self.s_max + 1)): + + # initial number of configurations + n = int(ceil(self.B / self.max_iter / (s + 1) * self.eta ** s)) + + # initial number of iterations per config + r = self.max_iter * self.eta ** (-s) + + # n random configurations + T = [self.get_params() for i in range(n)] + + for i in range((s + 1) - int(skip_last)): # changed from s + 1 + + # Run each of the n configs for + # and keep best (n_configs / eta) configurations + + n_configs = n * self.eta ** (-i) + n_iterations = r * self.eta ** (i) + + print("\n*** {} configurations x {:.1f} iterations each".format( + n_configs, n_iterations)) + + val_losses = [] + early_stops = [] + + for t in T: + + self.counter += 1 + print("\n{} | {} | lowest loss so far: {:.4f} (run {})\n".format( + self.counter, ctime(), self.best_loss, self.best_counter)) + + start_time = time() + + if dry_run: + result = {'loss': rng(), 'log_loss': rng(), 'auc': rng()} + else: + result = self.try_params(n_iterations, t) # <--- + + assert (type(result) == dict) + assert ('loss' in result) + + seconds = int(round(time() - start_time)) + print("\n{} seconds.".format(seconds)) + + loss = result['loss'] + val_losses.append(loss) + + early_stop = result.get('early_stop', False) + early_stops.append(early_stop) + + # keeping track of the best result so far (for display only) + # could do it be checking results each time, but hey + if loss < self.best_loss: + self.best_loss = loss + self.best_counter = self.counter + + result['counter'] = self.counter + result['seconds'] = seconds + result['params'] = t + result['iterations'] = n_iterations + + self.results.append(result) + + # select a number of best configurations for the next loop + # filter out early stops, if any + indices = np.argsort(val_losses) + T = [T[i] for i in indices if not early_stops[i]] + T = T[0:int(n_configs / self.eta)] + + return self.results diff --git a/main.py b/main.py index 9243124..5e751f0 100644 --- a/main.py +++ b/main.py @@ -3,11 +3,12 @@ import argparse from keras.utils import np_utils import dataset +import hyperband import models parser = argparse.ArgumentParser() -# parser.add_argument("--modes", action="store", dest="modes", nargs="+") +parser.add_argument("--modes", action="store", dest="modes", nargs="+") parser.add_argument("--train", action="store", dest="train_data", default="data/full_dataset.csv.tar.bz2") @@ -24,9 +25,9 @@ parser.add_argument("--model", action="store", dest="model", # parser.add_argument("--pred", action="store", dest="pred", # default="") # -# parser.add_argument("--type", action="store", dest="model_type", -# default="simple_conv") -# +parser.add_argument("--type", action="store", dest="model_type", + default="paul") + parser.add_argument("--batch", action="store", dest="batch_size", default=64, type=int) @@ -79,13 +80,52 @@ args = parser.parse_args() # session = tf.Session(config=config) +def main_hyperband(): + char_dict = dataset.get_character_dict() + user_flow_df = dataset.get_user_flow_data(args.train_data) + + params = { + # static params + "type": ["paul"], + "batch_size": [64], + "vocab_size": [len(char_dict) + 1], + "window_size": [10], + "domain_length": [40], + "flow_features": [3], + "input_length": [40], + # model params + "embedding_size": [16, 32, 64, 128, 256, 512], + "filter_embedding": [16, 32, 64, 128, 256, 512], + "kernel_embedding": [1, 3, 5, 7, 9], + "hidden_embedding": [16, 32, 64, 128, 256, 512], + "dropout": [0.5], + "domain_features": [16, 32, 64, 128, 256, 512], + "filter_main": [16, 32, 64, 128, 256, 512], + "kernels_main": [1, 3, 5, 7, 9], + "dense_main": [16, 32, 64, 128, 256, 512], + } + param = hyperband.sample_params(params) + print(param) + + print("create training dataset") + domain_tr, flow_tr, client_tr, server_tr = dataset.create_dataset_from_flows( + user_flow_df, char_dict, + max_len=args.domain_length, + window_size=args.window) + client_tr = np_utils.to_categorical(client_tr, 2) + server_tr = np_utils.to_categorical(server_tr, 2) + + hp = hyperband.Hyperband(params, [domain_tr, flow_tr], [client_tr, server_tr]) + hp.run() + + def main_train(): # parameter cnnDropout = 0.5 cnnHiddenDims = 512 kernel_size = 3 filters = 128 - network = models.pauls_networks + network = models.pauls_networks if args.model_type == "paul" else models.renes_networks char_dict = dataset.get_character_dict() user_flow_df = dataset.get_user_flow_data(args.train_data) @@ -94,6 +134,8 @@ def main_train(): domain_tr, flow_tr, client_tr, server_tr = dataset.create_dataset_from_flows( user_flow_df, char_dict, max_len=args.domain_length, window_size=args.window) + client_tr = np_utils.to_categorical(client_tr, 2) + server_tr = np_utils.to_categorical(server_tr, 2) shared_cnn = network.get_embedding(len(char_dict) + 1, args.embedding, args.domain_length, args.hidden_char_dims, kernel_size, args.domain_embedding, 0.5) @@ -105,11 +147,9 @@ def main_train(): model.summary() model.compile(optimizer='adam', - loss='binary_crossentropy', + loss='categorical_crossentropy', metrics=['accuracy']) - client_tr = np_utils.to_categorical(client_tr, 2) - server_tr = np_utils.to_categorical(server_tr, 2) model.fit([domain_tr, flow_tr], [client_tr, server_tr], batch_size=args.batch_size, @@ -117,6 +157,8 @@ def main_train(): shuffle=True, validation_split=0.2) + model.save(args.model) + def main_test(): char_dict = dataset.get_character_dict() @@ -154,7 +196,16 @@ def main_score(): def main(): - main_train() + if "train" in args.modes: + main_train() + if "hyperband" in args.modes: + main_hyperband() + if "test" in args.modes: + main_test() + if "fancy" in args.modes: + main_visualization() + if "score" in args.modes: + main_score() if __name__ == "__main__": diff --git a/models/pauls_networks.py b/models/pauls_networks.py index 837fc02..402f5da 100644 --- a/models/pauls_networks.py +++ b/models/pauls_networks.py @@ -2,6 +2,20 @@ import keras from keras.engine import Input, Model from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, Activation, TimeDistributed +best_config = { + 'domain_features': 32, + 'drop_out': 0.5, + 'embedding_size': 64, + 'filter_main': 512, + 'flow_features': 3, + 'hidden_dims': 32, + 'filter_embedding': 32, + 'hidden_embedding': 32, + 'kernel_embedding': 8, + 'kernels_main': 8, + 'input_length': 40 +} + def get_embedding(vocab_size, embedding_size, input_length, filters, kernel_size, hidden_dims, drop_out=0.5):