add tsne (does not work with big data)

fix model loading with custom selu function
This commit is contained in:
René Knaebel 2017-09-22 10:01:12 +02:00
parent e2bf2dc90f
commit 607d74998c
4 changed files with 33 additions and 11 deletions

16
main.py
View File

@ -248,7 +248,7 @@ def main_test():
else: else:
results["server_pred"] = pred results["server_pred"] = pred
embd_model = load_model(model_args["embedding_model"]) embd_model = load_model(model_args["embedding_model"], custom_objects=models.get_metrics())
domain_embeddings = embd_model.predict(domain_encs, batch_size=args.batch_size, verbose=1) domain_embeddings = embd_model.predict(domain_encs, batch_size=args.batch_size, verbose=1)
results["domain_embds"] = domain_embeddings results["domain_embds"] = domain_embeddings
@ -333,7 +333,17 @@ def main_visualization():
logger.info("visualize embedding") logger.info("visualize embedding")
domain_encs, labels = dataset.load_or_generate_domains(args.test_data, args.domain_length) domain_encs, labels = dataset.load_or_generate_domains(args.test_data, args.domain_length)
domain_embedding = results["domain_embds"] domain_embedding = results["domain_embds"]
visualize.plot_embedding(domain_embedding, labels, path="{}/embd.png".format(args.model_path)) visualize.plot_embedding(domain_embedding, labels, path="{}/embd_svd.png".format(args.model_path), method="svd")
visualize.plot_embedding(domain_embedding, labels, path="{}/embd_tsne.png".format(args.model_path), method="tsne")
def plot_embedding():
logger.info("visualize embedding")
results = dataset.load_predictions(args.model_path)
domain_encs, labels = dataset.load_or_generate_domains(args.test_data, args.domain_length)
domain_embedding = results["domain_embds"]
visualize.plot_embedding(domain_embedding, labels, path="{}/embd_svd.png".format(args.model_path), method="svd")
visualize.plot_embedding(domain_embedding, labels, path="{}/embd_tsne.png".format(args.model_path), method="tsne")
def main_visualize_all(): def main_visualize_all():
@ -409,6 +419,8 @@ def main():
main_visualization() main_visualization()
if "all_fancy" == args.mode: if "all_fancy" == args.mode:
main_visualize_all() main_visualize_all()
if "embd" == args.mode:
plot_embedding()
if "paul" == args.mode: if "paul" == args.mode:
main_paul_best() main_paul_best()

View File

@ -1,5 +1,6 @@
import keras.backend as K import keras.backend as K
from models.renes_networks import selu
from . import flat_2, pauls_networks, renes_networks from . import flat_2, pauls_networks, renes_networks
@ -24,11 +25,11 @@ def get_models_by_params(params: dict):
dense_dim = params.get("dense_main") dense_dim = params.get("dense_main")
model_output = params.get("model_output", "both") model_output = params.get("model_output", "both")
# create models # create models
if network_depth == "small": if network_depth == "flat1":
networks = pauls_networks networks = pauls_networks
elif network_depth == "flat": elif network_depth == "flat2":
networks = flat_2 networks = flat_2
elif network_depth == "medium": elif network_depth == "deep1":
networks = renes_networks networks = renes_networks
else: else:
raise Exception("network not found") raise Exception("network not found")
@ -49,6 +50,7 @@ def get_metrics():
("precision", precision), ("precision", precision),
("recall", recall), ("recall", recall),
("f1_score", f1_score), ("f1_score", f1_score),
("selu", selu)
]) ])

View File

@ -32,7 +32,7 @@ def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden
y = Conv1D(filter_size, kernel_size=3, activation=selu)(y) y = Conv1D(filter_size, kernel_size=3, activation=selu)(y)
y = Conv1D(filter_size, kernel_size=3, activation=selu)(y) y = Conv1D(filter_size, kernel_size=3, activation=selu)(y)
y = GlobalAveragePooling1D()(y) y = GlobalAveragePooling1D()(y)
y = Dense(hidden_dims, activation="relu")(y) y = Dense(hidden_dims, activation=selu)(y)
return KerasModel(x, y) return KerasModel(x, y)
@ -53,7 +53,7 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le
y = GlobalMaxPooling1D()(y) y = GlobalMaxPooling1D()(y)
y = Dropout(cnnDropout)(y) y = Dropout(cnnDropout)(y)
y = Dense(dense_dim, activation=selu)(y) y = Dense(dense_dim, activation=selu)(y)
y = Dense(dense_dim // 2, activation=selu)(y) y = Dense(dense_dim, activation=selu)(y)
out_client = Dense(1, activation='sigmoid', name="client")(y) out_client = Dense(1, activation='sigmoid', name="client")(y)
out_server = Dense(1, activation='sigmoid', name="server")(y) out_server = Dense(1, activation='sigmoid', name="server")(y)
@ -67,6 +67,9 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l
encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains) encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains)
merged = keras.layers.concatenate([encoded, ipt_flows], -1) merged = keras.layers.concatenate([encoded, ipt_flows], -1)
y = Dense(dense_dim, activation=selu)(merged) y = Dense(dense_dim, activation=selu)(merged)
y = Dense(dense_dim,
activation="relu",
name="dense_server")(y)
out_server = Dense(1, activation="sigmoid", name="server")(y) out_server = Dense(1, activation="sigmoid", name="server")(y)
merged = keras.layers.concatenate([merged, y], -1) merged = keras.layers.concatenate([merged, y], -1)
# CNN processing a small slides of flow windows # CNN processing a small slides of flow windows
@ -90,6 +93,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l
# remove temporal dimension by global max pooling # remove temporal dimension by global max pooling
y = GlobalMaxPooling1D()(y) y = GlobalMaxPooling1D()(y)
y = Dropout(dropout)(y) y = Dropout(dropout)(y)
y = Dense(dense_dim, activation=selu)(y)
y = Dense(dense_dim, y = Dense(dense_dim,
activation=selu, activation=selu,
name="dense_client")(y) name="dense_client")(y)

View File

@ -3,6 +3,7 @@ import os
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from sklearn.decomposition import TruncatedSVD from sklearn.decomposition import TruncatedSVD
from sklearn.manifold import TSNE
from sklearn.metrics import ( from sklearn.metrics import (
auc, classification_report, confusion_matrix, fbeta_score, precision_recall_curve, auc, classification_report, confusion_matrix, fbeta_score, precision_recall_curve,
roc_auc_score, roc_curve roc_auc_score, roc_curve
@ -155,10 +156,13 @@ def plot_training_curve(logs, key, path, dpi=600):
plt.close() plt.close()
def plot_embedding(domain_embedding, labels, path, dpi=600): def plot_embedding(domain_embedding, labels, path, dpi=600, method="svd"):
svd = TruncatedSVD(n_components=2) if method == "svd":
domain_reduced = svd.fit_transform(domain_embedding) red = TruncatedSVD(n_components=2)
print(svd.explained_variance_ratio_) elif method == "tsne":
red = TSNE(n_components=2, verbose=2)
domain_reduced = red.fit_transform(domain_embedding)
print(red.explained_variance_ratio_)
# use if draw subset of predictions # use if draw subset of predictions
# idx = np.random.choice(np.arange(len(domain_reduced)), 10000) # idx = np.random.choice(np.arange(len(domain_reduced)), 10000)
plt.scatter(domain_reduced[:, 0], plt.scatter(domain_reduced[:, 0],