From fbe6d6a584ce168ca252f412bd65f7f3386a0867 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Knaebel?= Date: Sun, 17 Sep 2017 17:26:09 +0200 Subject: [PATCH] remove input shape of first conv layer in networks because unnecessary add selu activation to deeper network designs --- models/pauls_networks.py | 6 ++-- models/renes_networks.py | 68 ++++++++++++++++++++++++++++------------ 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/models/pauls_networks.py b/models/pauls_networks.py index 4f216db..79f527b 100644 --- a/models/pauls_networks.py +++ b/models/pauls_networks.py @@ -52,8 +52,7 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le # CNN processing a small slides of flow windows y = Conv1D(cnn_dims, kernel_size, - activation='relu', - input_shape=(window_size, domain_features + flow_features))(merged) + activation='relu' # remove temporal dimension by global max pooling y = GlobalMaxPooling1D()(y) y = Dropout(cnnDropout)(y) @@ -78,8 +77,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l # CNN processing a small slides of flow windows y = Conv1D(cnn_dims, kernel_size, - activation='relu', - input_shape=(window_size, domain_features + flow_features))(merged) + activation='relu')(merged) # remove temporal dimension by global max pooling y = GlobalMaxPooling1D()(y) y = Dropout(dropout)(y) diff --git a/models/renes_networks.py b/models/renes_networks.py index a3e29f6..bff63b2 100644 --- a/models/renes_networks.py +++ b/models/renes_networks.py @@ -1,11 +1,26 @@ +from collections import namedtuple + import keras +from keras.activations import elu from keras.engine import Input, Model as KerasModel -from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, TimeDistributed, MaxPool1D, \ - GlobalAveragePooling1D +from keras.layers import Conv1D, Dense, Dropout, Embedding, GlobalAveragePooling1D, GlobalMaxPooling1D, MaxPool1D, \ + TimeDistributed import dataset -from collections import namedtuple + +def selu(x): + """Scaled Exponential Linear Unit. (Klambauer et al., 2017) + # Arguments + x: A tensor or variable to compute the activation function for. + # References + - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) + # copied from keras.io + """ + alpha = 1.6732632423543772848170429916717 + scale = 1.0507009873554804934193349852946 + return scale * elu(x, alpha) + Model = namedtuple("Model", ["in_domains", "in_flows", "out_client", "out_server"]) @@ -13,9 +28,9 @@ Model = namedtuple("Model", ["in_domains", "in_flows", "out_client", "out_server def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden_dims, drop_out=0.5): x = y = Input(shape=(input_length,)) y = Embedding(input_dim=dataset.get_vocab_size(), output_dim=embedding_size)(y) - y = Conv1D(filter_size, kernel_size=5, activation='relu')(y) - y = Conv1D(filter_size, kernel_size=3, activation='relu')(y) - y = Conv1D(filter_size, kernel_size=3, activation='relu')(y) + y = Conv1D(filter_size, kernel_size=5, activation=selu)(y) + y = Conv1D(filter_size, kernel_size=3, activation=selu)(y) + y = Conv1D(filter_size, kernel_size=3, activation=selu)(y) y = GlobalAveragePooling1D()(y) y = Dense(hidden_dims, activation="relu")(y) return KerasModel(x, y) @@ -28,17 +43,17 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows") merged = keras.layers.concatenate([encoded, ipt_flows], -1) # CNN processing a small slides of flow windows - y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', padding="same", + y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation=selu, padding="same", input_shape=(window_size, domain_features + flow_features))(merged) y = MaxPool1D(pool_size=3, strides=1)(y) - y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', padding="same")(y) + y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation=selu, padding="same")(y) y = MaxPool1D(pool_size=3, strides=1)(y) - y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', padding="same")(y) + y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation=selu, padding="same")(y) # remove temporal dimension by global max pooling y = GlobalMaxPooling1D()(y) y = Dropout(cnnDropout)(y) - y = Dense(dense_dim, activation='relu')(y) - y = Dense(dense_dim // 2, activation='relu')(y) + y = Dense(dense_dim, activation=selu)(y) + y = Dense(dense_dim // 2, activation=selu)(y) out_client = Dense(1, activation='sigmoid', name="client")(y) out_server = Dense(1, activation='sigmoid', name="server")(y) @@ -49,22 +64,35 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l dense_dim, cnn, model_output="both"): ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains") ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows") - encoded = TimeDistributed(cnn)(ipt_domains) + encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains) merged = keras.layers.concatenate([encoded, ipt_flows], -1) y = Dense(dense_dim, activation="relu")(merged) out_server = Dense(1, activation="sigmoid", name="server")(y) + merged = keras.layers.concatenate([merged, y], -1) # CNN processing a small slides of flow windows - y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', padding="same", - input_shape=(window_size, domain_features + flow_features))(y) - y = MaxPool1D(pool_size=3, strides=1)(y) - y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', padding="same")(y) - y = MaxPool1D(pool_size=3, strides=1)(y) - y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', padding="same")(y) + y = Conv1D(filters=cnn_dims, + kernel_size=kernel_size, + activation=selu, + padding="same", + input_shape=(window_size, domain_features + flow_features))(merged) + y = MaxPool1D(pool_size=3, + strides=1)(y) + y = Conv1D(filters=cnn_dims, + kernel_size=kernel_size, + activation=selu, + padding="same")(y) + y = MaxPool1D(pool_size=3, + strides=1)(y) + y = Conv1D(filters=cnn_dims, + kernel_size=kernel_size, + activation=selu, + padding="same")(y) # remove temporal dimension by global max pooling y = GlobalMaxPooling1D()(y) y = Dropout(dropout)(y) - y = Dense(dense_dim, activation='relu')(y) - + y = Dense(dense_dim, + activation=selu, + name="dense_client")(y) out_client = Dense(1, activation='sigmoid', name="client")(y) return Model(ipt_domains, ipt_flows, out_client, out_server)