diff --git a/hyperband.py b/hyperband.py new file mode 100644 index 0000000..4e925e9 --- /dev/null +++ b/hyperband.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +# implementation of hyperband: +# https://arxiv.org/pdf/1603.06560.pdf +import numpy as np + + +def get_hyperparameter_configuration(configGenerator, n): + configurations = [] + for i in np.arange(0, n, 1): + configurations.append(configGenerator()) + return configurations + + +def run_then_return_val_loss(config, r_i, modelGenerator, trainData, trainLabel, + testData, testLabel): + # parameter + batch_size = 128 + model = modelGenerator(config) + if model != None: + model.fit(x=trainData, y=trainLabel, + epochs=int(r_i), shuffle=True, initial_epoch=0, + batch_size=batch_size) + score = model.evaluate(testData, testLabel, + batch_size=batch_size) + score = score[0] + else: + score = np.infty + return score + + +def top_k(configurations, L, k): + outConfigs = [] + sortIDs = np.argsort(np.array(L)) + for i in np.arange(0, k, 1): + outConfigs.append(configurations[sortIDs[i]]) + return outConfigs + + +def hyperband(R, nu, modelGenerator, + configGenerator, + trainData, trainLabel, + testData, testLabel, + outputFile=''): + allLosses = [] + allConfigs = [] + # input + + # initialization + s_max = np.floor(np.log(R) / np.log(nu)) + B = (s_max + 1) * R + + for s in np.arange(s_max, -1, -1): + n = np.ceil(np.float(B) / np.float(R) * (np.float(np.power(nu, s)) / np.float(s + 1))) + r = np.float(R) * np.power(nu, -s) + configurations = get_hyperparameter_configuration(configGenerator, n) + for i in np.arange(0, s + 1, 1): + n_i = np.floor(np.float(n) * np.power(nu, -i)) + r_i = np.float(r) * np.power(nu, i) + L = [] + for config in configurations: + curLoss = run_then_return_val_loss(config, r_i, modelGenerator, + trainData, trainLabel, + testData, testLabel) + L.append(curLoss) + allLosses.append(curLoss) + allConfigs.append(config) + if outputFile != '': + with open(outputFile, 'a') as myfile: + myfile.write(str(config) + '\t' + str(curLoss) + \ + '\t' + str(r_i) + '\n') + configurations = top_k(configurations, L, np.floor(np.float(n_i) / nu)) + + # print('n_i: ' + str(n_i)) + # print('r_i: ' + str(r_i)) + bestConfig = top_k(allConfigs, allLosses, 1) + return (bestConfig[0], allConfigs, allLosses) diff --git a/main.py b/main.py index 6fa8c28..9243124 100644 --- a/main.py +++ b/main.py @@ -79,13 +79,11 @@ args = parser.parse_args() # session = tf.Session(config=config) -def main(): +def main_train(): # parameter cnnDropout = 0.5 - cnnHiddenDims = 1024 - numCiscoFeatures = 30 + cnnHiddenDims = 512 kernel_size = 3 - drop_out = 0.5 filters = 128 network = models.pauls_networks @@ -120,10 +118,6 @@ def main(): validation_split=0.2) -def main_train(): - pass - - def main_test(): char_dict = dataset.get_character_dict() user_flow_df = dataset.get_user_flow_data(args.test_data) @@ -133,5 +127,35 @@ def main_test(): # TODO: get model and exec model.evaluate(...) +def main_visualization(): + mask = dataset.load_mask_eval(args.data, args.test_image) + y_pred_path = args.model_path + "pred.npy" + print("plot model") + model = load_model(args.model_path + "model.h5", + custom_objects=evaluation.get_metrics()) + visualize.plot_model(model, args.model_path + "model.png") + print("plot training curve") + logs = pd.read_csv(args.model_path + "train.log") + visualize.plot_training_curve(logs, "{}/train.png".format(args.model_path)) + pred = np.load(y_pred_path) + print("plot pr curve") + visualize.plot_precision_recall(mask, pred, "{}/prc.png".format(args.model_path)) + visualize.plot_precision_recall_curves(mask, pred, "{}/prc2.png".format(args.model_path)) + print("plot roc curve") + visualize.plot_roc_curve(mask, pred, "{}/roc.png".format(args.model_path)) + print("store prediction image") + visualize.save_image_as(pred, "{}/pred.png".format(args.model_path)) + + +def main_score(): + mask = dataset.load_mask_eval(args.data, args.test_image) + pred = np.load(args.pred) + visualize.score_model(mask, pred) + + +def main(): + main_train() + + if __name__ == "__main__": main() diff --git a/models/__init__.py b/models/__init__.py index 1bd832e..fee9e4c 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -1,2 +1,32 @@ from . import pauls_networks from . import renes_networks + + +def get_models_by_params(params: dict): + # decomposing param section + # mainly embedding model + network_type = params.get("type") + vocab_size = params.get("vocab_size") + embedding_size = params.get("embedding_size") + input_length = params.get("input_length") + filter_embedding = params.get("filter_embedding") + kernel_embedding = params.get("kernel_embedding") + hidden_embedding = params.get("hidden_embedding") + dropout = params.get("dropout") + # mainly prediction model + flow_features = params.get("flow_features") + domain_features = params.get("domain_features") + window_size = params.get("window_size") + domain_length = params.get("domain_length") + filter_main = params.get("filter_main") + kernel_main = params.get("kernels_main") + dense_dim = params.get("dense_main") + # create models + networks = renes_networks if network_type == "rene" else pauls_networks + embedding_model = networks.get_embedding(vocab_size, embedding_size, input_length, + filter_embedding, kernel_embedding, hidden_embedding, drop_out=dropout) + + predict_model = networks.get_model(dropout, flow_features, domain_features, window_size, domain_length, + filter_main, kernel_main, dense_dim, embedding_model) + + return embedding_model, predict_model diff --git a/models/renes_networks.py b/models/renes_networks.py index 5122720..222ef2e 100644 --- a/models/renes_networks.py +++ b/models/renes_networks.py @@ -4,14 +4,14 @@ from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, def get_embedding(vocab_size, embedding_size, input_length, - hidden_char_dims, kernel_size, hidden_dims, drop_out=0.5): + filter_size, kernel_size, hidden_dims, drop_out=0.5): x = y = Input(shape=(input_length,)) y = Embedding(input_dim=vocab_size, output_dim=embedding_size)(y) - y = Conv1D(hidden_char_dims, kernel_size=5, activation='relu')(y) + y = Conv1D(filter_size, kernel_size=5, activation='relu')(y) y = MaxPool1D(pool_size=3, strides=1)(y) - y = Conv1D(hidden_char_dims, kernel_size=3, activation='relu')(y) + y = Conv1D(filter_size, kernel_size=3, activation='relu')(y) y = MaxPool1D(pool_size=3, strides=1)(y) - y = Conv1D(hidden_char_dims, kernel_size=3, activation='relu')(y) + y = Conv1D(filter_size, kernel_size=3, activation='relu')(y) y = GlobalMaxPooling1D()(y) y = Dropout(drop_out)(y) y = Dense(hidden_dims, activation="relu")(y) @@ -35,6 +35,7 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le y = GlobalMaxPooling1D()(y) y = Dropout(cnnDropout)(y) y = Dense(dense_dim, activation='relu')(y) + y = Dense(dense_dim // 2, activation='relu')(y) y1 = Dense(2, activation='softmax', name="client")(y) y2 = Dense(2, activation='softmax', name="server")(y)