2017-09-10 18:06:40 +02:00
|
|
|
from collections import namedtuple
|
|
|
|
|
2017-07-05 18:10:22 +02:00
|
|
|
import keras
|
2017-09-07 14:24:55 +02:00
|
|
|
from keras.engine import Input, Model as KerasModel
|
2017-09-10 18:06:40 +02:00
|
|
|
from keras.layers import Activation, Conv1D, Dense, Dropout, Embedding, GlobalMaxPooling1D, TimeDistributed
|
2017-07-05 18:10:22 +02:00
|
|
|
|
2017-07-30 13:47:11 +02:00
|
|
|
import dataset
|
|
|
|
|
2017-09-07 14:24:55 +02:00
|
|
|
Model = namedtuple("Model", ["in_domains", "in_flows", "out_client", "out_server"])
|
|
|
|
|
2017-07-07 16:48:10 +02:00
|
|
|
best_config = {
|
2017-07-08 11:53:03 +02:00
|
|
|
"type": "paul",
|
|
|
|
"batch_size": 64,
|
|
|
|
"window_size": 10,
|
|
|
|
"domain_length": 40,
|
|
|
|
"flow_features": 3,
|
|
|
|
#
|
|
|
|
'dropout': 0.5,
|
2017-07-07 16:48:10 +02:00
|
|
|
'domain_features': 32,
|
|
|
|
'drop_out': 0.5,
|
|
|
|
'embedding_size': 64,
|
|
|
|
'filter_main': 512,
|
|
|
|
'flow_features': 3,
|
2017-07-08 11:53:03 +02:00
|
|
|
'dense_main': 32,
|
2017-07-07 16:48:10 +02:00
|
|
|
'filter_embedding': 32,
|
|
|
|
'hidden_embedding': 32,
|
|
|
|
'kernel_embedding': 8,
|
|
|
|
'kernels_main': 8,
|
|
|
|
'input_length': 40
|
|
|
|
}
|
|
|
|
|
2017-07-05 18:10:22 +02:00
|
|
|
|
2017-09-07 14:24:55 +02:00
|
|
|
def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden_dims, drop_out=0.5) -> KerasModel:
|
2017-07-05 18:10:22 +02:00
|
|
|
x = y = Input(shape=(input_length,))
|
2017-07-30 13:47:11 +02:00
|
|
|
y = Embedding(input_dim=dataset.get_vocab_size(), output_dim=embedding_size)(y)
|
2017-09-10 18:06:40 +02:00
|
|
|
y = Conv1D(filter_size,
|
|
|
|
kernel_size,
|
|
|
|
activation='relu')(y)
|
2017-07-05 18:10:22 +02:00
|
|
|
y = GlobalMaxPooling1D()(y)
|
|
|
|
y = Dropout(drop_out)(y)
|
2017-09-10 23:40:14 +02:00
|
|
|
y = Dense(hidden_dims)(y)
|
2017-07-05 18:10:22 +02:00
|
|
|
y = Activation('relu')(y)
|
2017-09-07 14:24:55 +02:00
|
|
|
return KerasModel(x, y)
|
2017-07-05 18:10:22 +02:00
|
|
|
|
|
|
|
|
|
|
|
def get_model(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
|
2017-09-07 14:24:55 +02:00
|
|
|
dense_dim, cnn, model_output="both") -> Model:
|
2017-07-05 18:10:22 +02:00
|
|
|
ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains")
|
2017-09-10 23:40:14 +02:00
|
|
|
encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains)
|
2017-07-05 18:10:22 +02:00
|
|
|
ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows")
|
|
|
|
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
|
|
|
|
# CNN processing a small slides of flow windows
|
|
|
|
y = Conv1D(cnn_dims,
|
|
|
|
kernel_size,
|
2017-09-20 14:43:28 +02:00
|
|
|
activation='relu',
|
|
|
|
input_shape=(window_size, domain_features + flow_features))(merged)
|
2017-07-05 18:10:22 +02:00
|
|
|
# remove temporal dimension by global max pooling
|
|
|
|
y = GlobalMaxPooling1D()(y)
|
|
|
|
y = Dropout(cnnDropout)(y)
|
2017-09-12 08:36:23 +02:00
|
|
|
y = Dense(dense_dim, activation='relu')(y)
|
2017-09-07 14:24:55 +02:00
|
|
|
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
|
|
|
out_server = Dense(1, activation='sigmoid', name="server")(y)
|
2017-07-05 18:10:22 +02:00
|
|
|
|
2017-09-07 14:24:55 +02:00
|
|
|
return Model(ipt_domains, ipt_flows, out_client, out_server)
|
2017-07-29 19:42:36 +02:00
|
|
|
|
|
|
|
|
|
|
|
def get_new_model(dropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
|
2017-09-07 14:24:55 +02:00
|
|
|
dense_dim, cnn, model_output="both") -> Model:
|
2017-07-29 19:42:36 +02:00
|
|
|
ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains")
|
|
|
|
ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows")
|
2017-09-10 23:40:14 +02:00
|
|
|
encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains)
|
2017-08-05 09:33:07 +02:00
|
|
|
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
|
2017-09-10 18:06:40 +02:00
|
|
|
y = Dense(dense_dim,
|
|
|
|
activation="relu",
|
|
|
|
name="dense_server")(merged)
|
2017-09-07 14:24:55 +02:00
|
|
|
out_server = Dense(1, activation="sigmoid", name="server")(y)
|
|
|
|
merged = keras.layers.concatenate([merged, y], -1)
|
2017-08-05 09:33:07 +02:00
|
|
|
# CNN processing a small slides of flow windows
|
2017-07-29 19:42:36 +02:00
|
|
|
y = Conv1D(cnn_dims,
|
|
|
|
kernel_size,
|
2017-09-17 17:26:09 +02:00
|
|
|
activation='relu')(merged)
|
2017-07-29 19:42:36 +02:00
|
|
|
# remove temporal dimension by global max pooling
|
|
|
|
y = GlobalMaxPooling1D()(y)
|
|
|
|
y = Dropout(dropout)(y)
|
2017-09-10 18:06:40 +02:00
|
|
|
y = Dense(dense_dim,
|
|
|
|
activation='relu',
|
|
|
|
name="dense_client")(y)
|
2017-07-29 19:42:36 +02:00
|
|
|
|
2017-09-07 14:24:55 +02:00
|
|
|
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
2017-07-29 19:42:36 +02:00
|
|
|
|
2017-09-07 14:24:55 +02:00
|
|
|
return Model(ipt_domains, ipt_flows, out_client, out_server)
|