add staggered model training for intermediate sever prediction; refactor model return values

This commit is contained in:
René Knaebel 2017-09-07 14:24:55 +02:00
parent 2080444fb7
commit 5bd8e41711
6 changed files with 92 additions and 70 deletions

View File

@ -1,16 +1,19 @@
run:
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test1 --epochs 10 --depth small \
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test1 --epochs 2 --depth small \
--hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type final
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test2 --epochs 10 --depth small \
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test2 --epochs 2 --depth small \
--hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type inter
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test3 --epochs 10 --depth medium \
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test3 --epochs 2 --depth medium \
--hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type final
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test4 --epochs 10 --depth medium \
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test4 --epochs 2 --depth medium \
--hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type inter
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test5 --epochs 2 --depth small \
--hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type staggered
test:
python3 main.py --mode test --batch 128 --models results/test* --test data/rk_mini.csv.gz

65
main.py
View File

@ -6,7 +6,7 @@ import numpy as np
import pandas as pd
import tensorflow as tf
from keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping
from keras.models import load_model
from keras.models import load_model, Model
import arguments
import dataset
@ -123,11 +123,6 @@ def main_train(param=None):
args.train_data,
args.domain_length,
args.window)
if not param:
param = PARAMS
logger.info(f"Generator model with params: {param}")
embedding, model, new_model = models.get_models_by_params(param)
logger.info("define callbacks")
callbacks = []
callbacks.append(ModelCheckpoint(filepath=args.clf_model,
@ -154,26 +149,45 @@ def main_train(param=None):
logger.info(f"select model: {args.model_type}")
if args.model_type == "staggered":
if not param:
param = PARAMS
logger.info(f"Generator model with params: {param}")
embedding, model, new_model = models.get_models_by_params(param)
if args.model_output == "both":
model = Model(inputs=[new_model.in_domains, new_model.in_flows],
outputs=(new_model.out_server, new_model.out_client))
else:
raise Exception("unknown model output")
server_tr = np.expand_dims(server_windows_tr, 2)
model = new_model
logger.info("compile and train model")
embedding.summary()
model.summary()
logger.info(model.get_config())
model.outputs
model.compile(optimizer='adam',
loss='binary_crossentropy',
loss_weights={"client": 0.0, "server": 1.0},
metrics=['accuracy'] + custom_metrics)
if args.model_output == "both":
labels = [client_tr, server_tr]
else:
raise ValueError("unknown model output")
model.fit({"ipt_domains": domain_tr, "ipt_flows": flow_tr},
{"client": client_tr, "server": server_tr},
batch_size=args.batch_size,
epochs=args.epochs,
shuffle=True,
validation_split=0.2,
class_weight=custom_class_weights)
model.fit([domain_tr, flow_tr],
labels,
model.get_layer("dense_server").trainable = False
model.compile(optimizer='adam',
loss='binary_crossentropy',
loss_weights={"client": 1.0, "server": 0.0},
metrics=['accuracy'] + custom_metrics)
model.summary()
model.fit({"ipt_domains": domain_tr, "ipt_flows": flow_tr},
{"client": client_tr, "server": server_tr},
batch_size=args.batch_size,
epochs=args.epochs,
callbacks=callbacks,
@ -182,6 +196,21 @@ def main_train(param=None):
class_weight=custom_class_weights)
else:
if not param:
param = PARAMS
logger.info(f"Generator model with params: {param}")
embedding, model, new_model = models.get_models_by_params(param)
if args.model_output == "both":
model = Model(inputs=[model.in_domains, model.in_flows], outputs=(model.out_client, model.out_server))
new_model = Model(inputs=[new_model.in_domains, new_model.in_flows],
outputs=(new_model.out_client, new_model.out_server))
elif args.model_output == "client":
model = Model(inputs=[model.in_domains, model.in_flows], outputs=(model.out_client,))
new_model = Model(inputs=[new_model.in_domains, new_model.in_flows], outputs=(new_model.out_client,))
else:
raise Exception("unknown model output")
if args.model_type == "inter":
server_tr = np.expand_dims(server_windows_tr, 2)
model = new_model
@ -301,9 +330,9 @@ def main_visualization():
visualize.plot_confusion_matrix(client_val, client_pred.flatten().round(),
"{}/client_cov.png".format(args.model_path),
normalize=False, title="Client Confusion Matrix")
# visualize.plot_confusion_matrix(server_val.argmax(1), server_pred.argmax(1),
# "{}/server_cov.png".format(args.model_path),
# normalize=False, title="Server Confusion Matrix")
visualize.plot_confusion_matrix(user_vals, user_preds.flatten().round(),
"{}/user_cov.png".format(args.model_path),
normalize=False, title="User Confusion Matrix")
logger.info("visualize embedding")
domain_encs, labels = dataset.load_or_generate_domains(args.test_data, args.domain_length)
domain_embedding = np.load(args.model_path + "/domain_embds.npy")

View File

@ -34,13 +34,13 @@ def get_models_by_params(params: dict):
embedding_model = networks.get_embedding(embedding_size, input_length, filter_embedding, kernel_embedding,
hidden_embedding, dropout)
predict_model = networks.get_model(dropout, flow_features, domain_features, window_size, domain_length,
filter_main, kernel_main, dense_dim, embedding_model, model_output)
old_model = networks.get_model(dropout, flow_features, domain_features, window_size, domain_length,
filter_main, kernel_main, dense_dim, embedding_model, model_output)
new_model = networks.get_new_model(dropout, flow_features, domain_features, window_size, domain_length,
filter_main, kernel_main, dense_dim, embedding_model, model_output)
return embedding_model, predict_model, new_model
return embedding_model, old_model, new_model
def get_metrics():

View File

@ -1,9 +1,13 @@
import keras
from keras.engine import Input, Model
from keras.engine import Input, Model as KerasModel
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, Activation, TimeDistributed
import dataset
from collections import namedtuple
Model = namedtuple("Model", ["in_domains", "in_flows", "out_client", "out_server"])
best_config = {
"type": "paul",
"batch_size": 64,
@ -26,7 +30,7 @@ best_config = {
}
def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden_dims, drop_out=0.5):
def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden_dims, drop_out=0.5) -> KerasModel:
x = y = Input(shape=(input_length,))
y = Embedding(input_dim=dataset.get_vocab_size(), output_dim=embedding_size)(y)
y = Conv1D(filter_size, kernel_size, activation='relu')(y)
@ -34,11 +38,11 @@ def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden
y = Dropout(drop_out)(y)
y = Dense(hidden_dims)(y)
y = Activation('relu')(y)
return Model(x, y)
return KerasModel(x, y)
def get_model(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
dense_dim, cnn, model_output="both"):
dense_dim, cnn, model_output="both") -> Model:
ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains")
encoded = TimeDistributed(cnn)(ipt_domains)
ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows")
@ -52,40 +56,31 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le
y = GlobalMaxPooling1D()(y)
y = Dropout(cnnDropout)(y)
y = Dense(dense_dim, activation='relu')(y)
y1 = Dense(1, activation='sigmoid', name="client")(y)
y2 = Dense(1, activation='sigmoid', name="server")(y)
out_client = Dense(1, activation='sigmoid', name="client")(y)
out_server = Dense(1, activation='sigmoid', name="server")(y)
if model_output == "both":
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2))
elif model_output == "client":
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1,))
elif model_output == "server":
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y2,))
return Model(ipt_domains, ipt_flows, out_client, out_server)
def get_new_model(dropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
dense_dim, cnn, model_output="both"):
dense_dim, cnn, model_output="both") -> Model:
ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains")
ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows")
encoded = TimeDistributed(cnn)(ipt_domains)
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
y = Dense(dense_dim, activation="relu")(merged)
y2 = Dense(1, activation="sigmoid", name="server")(y)
y = Dense(dense_dim, activation="relu", name="dense_server")(merged)
out_server = Dense(1, activation="sigmoid", name="server")(y)
merged = keras.layers.concatenate([merged, y], -1)
# CNN processing a small slides of flow windows
y = Conv1D(cnn_dims,
kernel_size,
activation='relu',
input_shape=(window_size, domain_features + flow_features))(y)
input_shape=(window_size, domain_features + flow_features))(merged)
# remove temporal dimension by global max pooling
y = GlobalMaxPooling1D()(y)
y = Dropout(dropout)(y)
y = Dense(dense_dim, activation='relu')(y)
y = Dense(dense_dim, activation='relu', name="dense_client")(y)
y1 = Dense(1, activation='sigmoid', name="client")(y)
out_client = Dense(1, activation='sigmoid', name="client")(y)
if model_output == "both":
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2))
elif model_output == "client":
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1,))
elif model_output == "server":
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y2,))
return Model(ipt_domains, ipt_flows, out_client, out_server)

View File

@ -1,10 +1,14 @@
import keras
from keras.engine import Input, Model
from keras.engine import Input, Model as KerasModel
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, TimeDistributed, MaxPool1D, \
GlobalAveragePooling1D
import dataset
from collections import namedtuple
Model = namedtuple("Model", ["in_domains", "in_flows", "out_client", "out_server"])
def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden_dims, drop_out=0.5):
x = y = Input(shape=(input_length,))
@ -14,7 +18,7 @@ def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden
y = Conv1D(filter_size, kernel_size=3, activation='relu')(y)
y = GlobalAveragePooling1D()(y)
y = Dense(hidden_dims, activation="relu")(y)
return Model(x, y)
return KerasModel(x, y)
def get_model(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
@ -35,15 +39,10 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le
y = Dropout(cnnDropout)(y)
y = Dense(dense_dim, activation='relu')(y)
y = Dense(dense_dim // 2, activation='relu')(y)
y1 = Dense(1, activation='sigmoid', name="client")(y)
y2 = Dense(1, activation='sigmoid', name="server")(y)
out_client = Dense(1, activation='sigmoid', name="client")(y)
out_server = Dense(1, activation='sigmoid', name="server")(y)
if model_output == "both":
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2))
elif model_output == "client":
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1,))
elif model_output == "server":
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y2,))
return Model(ipt_domains, ipt_flows, out_client, out_server)
def get_new_model(dropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
@ -53,7 +52,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l
encoded = TimeDistributed(cnn)(ipt_domains)
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
y = Dense(dense_dim, activation="relu")(merged)
y2 = Dense(1, activation="sigmoid", name="server")(y)
out_server = Dense(1, activation="sigmoid", name="server")(y)
# CNN processing a small slides of flow windows
y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', padding="same",
input_shape=(window_size, domain_features + flow_features))(y)
@ -66,11 +65,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l
y = Dropout(dropout)(y)
y = Dense(dense_dim, activation='relu')(y)
y1 = Dense(1, activation='sigmoid', name="client")(y)
out_client = Dense(1, activation='sigmoid', name="client")(y)
return Model(ipt_domains, ipt_flows, out_client, out_server)
if model_output == "both":
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2))
elif model_output == "client":
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1,))
elif model_output == "server":
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y2,))

6
run.sh
View File

@ -7,7 +7,7 @@ DATADIR=$2
for output in client both
do
for depth in small medium
for depth in small
do
for mtype in inter final
do
@ -29,7 +29,7 @@ do
done
done
for depth in small medium
for depth in small
do
python main.py --mode train \
--train ${DATADIR}/currentData.csv \
@ -41,6 +41,6 @@ do
--batch 256 \
--balanced_weights \
--model_output both \
--type inter \
--type staggered \
--depth ${depth}
done