add tsne (does not work with big data)
fix model loading with custom selu function
This commit is contained in:
parent
e2bf2dc90f
commit
607d74998c
16
main.py
16
main.py
|
@ -248,7 +248,7 @@ def main_test():
|
||||||
else:
|
else:
|
||||||
results["server_pred"] = pred
|
results["server_pred"] = pred
|
||||||
|
|
||||||
embd_model = load_model(model_args["embedding_model"])
|
embd_model = load_model(model_args["embedding_model"], custom_objects=models.get_metrics())
|
||||||
domain_embeddings = embd_model.predict(domain_encs, batch_size=args.batch_size, verbose=1)
|
domain_embeddings = embd_model.predict(domain_encs, batch_size=args.batch_size, verbose=1)
|
||||||
results["domain_embds"] = domain_embeddings
|
results["domain_embds"] = domain_embeddings
|
||||||
|
|
||||||
|
@ -333,7 +333,17 @@ def main_visualization():
|
||||||
logger.info("visualize embedding")
|
logger.info("visualize embedding")
|
||||||
domain_encs, labels = dataset.load_or_generate_domains(args.test_data, args.domain_length)
|
domain_encs, labels = dataset.load_or_generate_domains(args.test_data, args.domain_length)
|
||||||
domain_embedding = results["domain_embds"]
|
domain_embedding = results["domain_embds"]
|
||||||
visualize.plot_embedding(domain_embedding, labels, path="{}/embd.png".format(args.model_path))
|
visualize.plot_embedding(domain_embedding, labels, path="{}/embd_svd.png".format(args.model_path), method="svd")
|
||||||
|
visualize.plot_embedding(domain_embedding, labels, path="{}/embd_tsne.png".format(args.model_path), method="tsne")
|
||||||
|
|
||||||
|
|
||||||
|
def plot_embedding():
|
||||||
|
logger.info("visualize embedding")
|
||||||
|
results = dataset.load_predictions(args.model_path)
|
||||||
|
domain_encs, labels = dataset.load_or_generate_domains(args.test_data, args.domain_length)
|
||||||
|
domain_embedding = results["domain_embds"]
|
||||||
|
visualize.plot_embedding(domain_embedding, labels, path="{}/embd_svd.png".format(args.model_path), method="svd")
|
||||||
|
visualize.plot_embedding(domain_embedding, labels, path="{}/embd_tsne.png".format(args.model_path), method="tsne")
|
||||||
|
|
||||||
|
|
||||||
def main_visualize_all():
|
def main_visualize_all():
|
||||||
|
@ -409,6 +419,8 @@ def main():
|
||||||
main_visualization()
|
main_visualization()
|
||||||
if "all_fancy" == args.mode:
|
if "all_fancy" == args.mode:
|
||||||
main_visualize_all()
|
main_visualize_all()
|
||||||
|
if "embd" == args.mode:
|
||||||
|
plot_embedding()
|
||||||
if "paul" == args.mode:
|
if "paul" == args.mode:
|
||||||
main_paul_best()
|
main_paul_best()
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import keras.backend as K
|
import keras.backend as K
|
||||||
|
|
||||||
|
from models.renes_networks import selu
|
||||||
from . import flat_2, pauls_networks, renes_networks
|
from . import flat_2, pauls_networks, renes_networks
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,11 +25,11 @@ def get_models_by_params(params: dict):
|
||||||
dense_dim = params.get("dense_main")
|
dense_dim = params.get("dense_main")
|
||||||
model_output = params.get("model_output", "both")
|
model_output = params.get("model_output", "both")
|
||||||
# create models
|
# create models
|
||||||
if network_depth == "small":
|
if network_depth == "flat1":
|
||||||
networks = pauls_networks
|
networks = pauls_networks
|
||||||
elif network_depth == "flat":
|
elif network_depth == "flat2":
|
||||||
networks = flat_2
|
networks = flat_2
|
||||||
elif network_depth == "medium":
|
elif network_depth == "deep1":
|
||||||
networks = renes_networks
|
networks = renes_networks
|
||||||
else:
|
else:
|
||||||
raise Exception("network not found")
|
raise Exception("network not found")
|
||||||
|
@ -49,6 +50,7 @@ def get_metrics():
|
||||||
("precision", precision),
|
("precision", precision),
|
||||||
("recall", recall),
|
("recall", recall),
|
||||||
("f1_score", f1_score),
|
("f1_score", f1_score),
|
||||||
|
("selu", selu)
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,7 @@ def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden
|
||||||
y = Conv1D(filter_size, kernel_size=3, activation=selu)(y)
|
y = Conv1D(filter_size, kernel_size=3, activation=selu)(y)
|
||||||
y = Conv1D(filter_size, kernel_size=3, activation=selu)(y)
|
y = Conv1D(filter_size, kernel_size=3, activation=selu)(y)
|
||||||
y = GlobalAveragePooling1D()(y)
|
y = GlobalAveragePooling1D()(y)
|
||||||
y = Dense(hidden_dims, activation="relu")(y)
|
y = Dense(hidden_dims, activation=selu)(y)
|
||||||
return KerasModel(x, y)
|
return KerasModel(x, y)
|
||||||
|
|
||||||
|
|
||||||
|
@ -53,7 +53,7 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le
|
||||||
y = GlobalMaxPooling1D()(y)
|
y = GlobalMaxPooling1D()(y)
|
||||||
y = Dropout(cnnDropout)(y)
|
y = Dropout(cnnDropout)(y)
|
||||||
y = Dense(dense_dim, activation=selu)(y)
|
y = Dense(dense_dim, activation=selu)(y)
|
||||||
y = Dense(dense_dim // 2, activation=selu)(y)
|
y = Dense(dense_dim, activation=selu)(y)
|
||||||
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
||||||
out_server = Dense(1, activation='sigmoid', name="server")(y)
|
out_server = Dense(1, activation='sigmoid', name="server")(y)
|
||||||
|
|
||||||
|
@ -67,6 +67,9 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l
|
||||||
encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains)
|
encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains)
|
||||||
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
|
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
|
||||||
y = Dense(dense_dim, activation=selu)(merged)
|
y = Dense(dense_dim, activation=selu)(merged)
|
||||||
|
y = Dense(dense_dim,
|
||||||
|
activation="relu",
|
||||||
|
name="dense_server")(y)
|
||||||
out_server = Dense(1, activation="sigmoid", name="server")(y)
|
out_server = Dense(1, activation="sigmoid", name="server")(y)
|
||||||
merged = keras.layers.concatenate([merged, y], -1)
|
merged = keras.layers.concatenate([merged, y], -1)
|
||||||
# CNN processing a small slides of flow windows
|
# CNN processing a small slides of flow windows
|
||||||
|
@ -90,6 +93,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l
|
||||||
# remove temporal dimension by global max pooling
|
# remove temporal dimension by global max pooling
|
||||||
y = GlobalMaxPooling1D()(y)
|
y = GlobalMaxPooling1D()(y)
|
||||||
y = Dropout(dropout)(y)
|
y = Dropout(dropout)(y)
|
||||||
|
y = Dense(dense_dim, activation=selu)(y)
|
||||||
y = Dense(dense_dim,
|
y = Dense(dense_dim,
|
||||||
activation=selu,
|
activation=selu,
|
||||||
name="dense_client")(y)
|
name="dense_client")(y)
|
||||||
|
|
12
visualize.py
12
visualize.py
|
@ -3,6 +3,7 @@ import os
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.decomposition import TruncatedSVD
|
from sklearn.decomposition import TruncatedSVD
|
||||||
|
from sklearn.manifold import TSNE
|
||||||
from sklearn.metrics import (
|
from sklearn.metrics import (
|
||||||
auc, classification_report, confusion_matrix, fbeta_score, precision_recall_curve,
|
auc, classification_report, confusion_matrix, fbeta_score, precision_recall_curve,
|
||||||
roc_auc_score, roc_curve
|
roc_auc_score, roc_curve
|
||||||
|
@ -155,10 +156,13 @@ def plot_training_curve(logs, key, path, dpi=600):
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
def plot_embedding(domain_embedding, labels, path, dpi=600):
|
def plot_embedding(domain_embedding, labels, path, dpi=600, method="svd"):
|
||||||
svd = TruncatedSVD(n_components=2)
|
if method == "svd":
|
||||||
domain_reduced = svd.fit_transform(domain_embedding)
|
red = TruncatedSVD(n_components=2)
|
||||||
print(svd.explained_variance_ratio_)
|
elif method == "tsne":
|
||||||
|
red = TSNE(n_components=2, verbose=2)
|
||||||
|
domain_reduced = red.fit_transform(domain_embedding)
|
||||||
|
print(red.explained_variance_ratio_)
|
||||||
# use if draw subset of predictions
|
# use if draw subset of predictions
|
||||||
# idx = np.random.choice(np.arange(len(domain_reduced)), 10000)
|
# idx = np.random.choice(np.arange(len(domain_reduced)), 10000)
|
||||||
plt.scatter(domain_reduced[:, 0],
|
plt.scatter(domain_reduced[:, 0],
|
||||||
|
|
Loading…
Reference in New Issue