diff --git a/arguments.py b/arguments.py
index f7f0bdd..75fc866 100644
--- a/arguments.py
+++ b/arguments.py
@@ -113,6 +113,7 @@ def get_model_args(args):
 
 def parse():
     args = parser.parse_args()
+    args.model_name = os.path.split(os.path.normpath(args.model_path))[1]
     args.embedding_model = os.path.join(args.model_path, "embd.h5")
     args.clf_model = os.path.join(args.model_path, "clf.h5")
     args.train_log = os.path.join(args.model_path, "train.log.csv")
diff --git a/fancy.sh b/fancy.sh
index 00d571e..af8665f 100644
--- a/fancy.sh
+++ b/fancy.sh
@@ -3,11 +3,11 @@
 RESDIR=$1
 DATADIR=$2
 
-python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_small_final --test ${DATADIR}/futureData.csv --model_output both
-python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_small_inter --test ${DATADIR}/futureData.csv --model_output both
-python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_small_staggered --test ${DATADIR}/futureData.csv --model_output both
-python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/client_small_final --test ${DATADIR}/futureData.csv --model_output client
-python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/client_small_inter --test ${DATADIR}/futureData.csv --model_output client
+python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_final --test ${DATADIR}/futureData.csv --model_output both
+python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_inter --test ${DATADIR}/futureData.csv --model_output both
+python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_staggered --test ${DATADIR}/futureData.csv --model_output both
+python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/client_final --test ${DATADIR}/futureData.csv --model_output client
+#python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/client_inter --test ${DATADIR}/futureData.csv --model_output client
 
 #python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_medium_final --test ${DATADIR}/futureData.csv --model_output both
 #python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_medium_inter --test ${DATADIR}/futureData.csv --model_output both
diff --git a/main.py b/main.py
index 61ad0c4..dc85a85 100644
--- a/main.py
+++ b/main.py
@@ -5,7 +5,7 @@ import os
 import numpy as np
 import pandas as pd
 import tensorflow as tf
-from keras.callbacks import CSVLogger, EarlyStopping, ModelCheckpoint
+from keras.callbacks import CSVLogger, EarlyStopping, LambdaCallback, ModelCheckpoint
 from keras.models import Model, load_model
 
 import arguments
@@ -192,13 +192,16 @@ def main_train(param=None):
                       metrics=['accuracy'] + custom_metrics)
 
         model.summary()
+        callbacks.append(LambdaCallback(
+                on_epoch_end=lambda epoch, logs: embedding.save(args.embedding_model))
+        )
         model.fit({"ipt_domains": domain_tr, "ipt_flows": flow_tr},
                   {"client": client_tr, "server": server_tr},
                   batch_size=args.batch_size,
                   epochs=args.epochs,
                   callbacks=callbacks,
                   shuffle=True,
-                  validation_split=0.2,
+                  validation_split=0.3,
                   class_weight=custom_class_weights)
 
     else:
@@ -230,16 +233,17 @@ def main_train(param=None):
         else:
             raise ValueError("unknown model output")
 
+        callbacks.append(LambdaCallback(
+                on_epoch_end=lambda epoch, logs: embedding.save(args.embedding_model))
+        )
         model.fit([domain_tr, flow_tr],
                   labels,
                   batch_size=args.batch_size,
                   epochs=args.epochs,
                   callbacks=callbacks,
                   shuffle=True,
-                  validation_split=0.2,
+                  validation_split=0.3,
                   class_weight=custom_class_weights)
-    logger.info("save embedding")
-    embedding.save(args.embedding_model)
 
 
 def main_test():
@@ -313,36 +317,43 @@ def main_visualization():
     
     logger.info("plot pr curve")
     visualize.plot_clf()
-    visualize.plot_precision_recall(df.client_val.as_matrix(), df.client_pred.as_matrix(), args.model_path)
+    visualize.plot_precision_recall(df.client_val.as_matrix(), df.client_pred.as_matrix(), args.model_name)
     visualize.plot_precision_recall(df_paul.client_val.as_matrix(), df_paul.client_pred.as_matrix(), "paul")
     visualize.plot_legend()
     visualize.plot_save("{}/window_client_prc.png".format(args.model_path))
     
     logger.info("plot roc curve")
     visualize.plot_clf()
-    visualize.plot_roc_curve(df.client_val.as_matrix(), df.client_pred.as_matrix(), args.model_path)
+    visualize.plot_roc_curve(df.client_val.as_matrix(), df.client_pred.as_matrix(), args.model_name)
     visualize.plot_roc_curve(df_paul.client_val.as_matrix(), df_paul.client_pred.as_matrix(), "paul")
     visualize.plot_legend()
     visualize.plot_save("{}/window_client_roc.png".format(args.model_path))
     
     visualize.plot_clf()
-    visualize.plot_precision_recall(df_user.client_val.as_matrix(), df_user.client_pred.as_matrix(), args.model_path)
+    visualize.plot_precision_recall(df_user.client_val.as_matrix(), df_user.client_pred.as_matrix(), args.model_name)
     visualize.plot_precision_recall(df_paul_user.client_val.as_matrix(), df_paul_user.client_pred.as_matrix(), "paul")
     visualize.plot_legend()
     visualize.plot_save("{}/user_client_prc.png".format(args.model_path))
     
     visualize.plot_clf()
-    visualize.plot_roc_curve(df_user.client_val.as_matrix(), df_user.client_pred.as_matrix(), args.model_path)
+    visualize.plot_roc_curve(df_user.client_val.as_matrix(), df_user.client_pred.as_matrix(), args.model_name)
     visualize.plot_roc_curve(df_paul_user.client_val.as_matrix(), df_paul_user.client_pred.as_matrix(), "paul")
     visualize.plot_legend()
     visualize.plot_save("{}/user_client_roc.png".format(args.model_path))
-    
+    # absolute values
     visualize.plot_confusion_matrix(df.client_val.as_matrix(), df.client_pred.as_matrix().round(),
                                     "{}/client_cov.png".format(args.model_path),
                                     normalize=False, title="Client Confusion Matrix")
     visualize.plot_confusion_matrix(df_user.client_val.as_matrix(), df_user.client_pred.as_matrix().round(),
                                     "{}/user_cov.png".format(args.model_path),
                                     normalize=False, title="User Confusion Matrix")
+    # normalized
+    visualize.plot_confusion_matrix(df.client_val.as_matrix(), df.client_pred.as_matrix().round(),
+                                    "{}/client_cov_norm.png".format(args.model_path),
+                                    normalize=True, title="Client Confusion Matrix")
+    visualize.plot_confusion_matrix(df_user.client_val.as_matrix(), df_user.client_pred.as_matrix().round(),
+                                    "{}/user_cov_norm.png".format(args.model_path),
+                                    normalize=True, title="User Confusion Matrix")
     logger.info("visualize embedding")
     domain_encs, labels = dataset.load_or_generate_domains(args.test_data, args.domain_length)
     domain_embedding = results["domain_embds"]
diff --git a/models/pauls_networks.py b/models/pauls_networks.py
index 88978bc..08b4747 100644
--- a/models/pauls_networks.py
+++ b/models/pauls_networks.py
@@ -36,12 +36,10 @@ def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden
     y = Embedding(input_dim=dataset.get_vocab_size(), output_dim=embedding_size)(y)
     y = Conv1D(filter_size,
                kernel_size,
-               kernel_regularizer=l2(0.01),
                activation='relu')(y)
     y = GlobalMaxPooling1D()(y)
     y = Dropout(drop_out)(y)
-    y = Dense(hidden_dims,
-              kernel_regularizer=l2(0.01))(y)
+    y = Dense(hidden_dims)(y)
     y = Activation('relu')(y)
     return KerasModel(x, y)
 
@@ -49,19 +47,18 @@ def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden
 def get_model(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
               dense_dim, cnn, model_output="both") -> Model:
     ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains")
-    encoded = TimeDistributed(cnn)(ipt_domains)
+    encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains)
     ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows")
     merged = keras.layers.concatenate([encoded, ipt_flows], -1)
     # CNN processing a small slides of flow windows
     y = Conv1D(cnn_dims,
                kernel_size,
-               kernel_regularizer=l2(0.01),
                activation='relu',
                input_shape=(window_size, domain_features + flow_features))(merged)
     # remove temporal dimension by global max pooling
     y = GlobalMaxPooling1D()(y)
     y = Dropout(cnnDropout)(y)
-    y = Dense(dense_dim, kernel_regularizer=l2(0.01), activation='relu')(y)
+    y = Dense(dense_dim, kernel_regularizer=l2(0.1), activation='relu')(y)
     out_client = Dense(1, activation='sigmoid', name="client")(y)
     out_server = Dense(1, activation='sigmoid', name="server")(y)
 
@@ -72,10 +69,10 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l
                   dense_dim, cnn, model_output="both") -> Model:
     ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains")
     ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows")
-    encoded = TimeDistributed(cnn)(ipt_domains)
+    encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains)
     merged = keras.layers.concatenate([encoded, ipt_flows], -1)
     y = Dense(dense_dim,
-              kernel_regularizer=l2(0.01),
+              kernel_regularizer=l2(0.1),
               activation="relu",
               name="dense_server")(merged)
     out_server = Dense(1, activation="sigmoid", name="server")(y)
@@ -83,14 +80,13 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l
     # CNN processing a small slides of flow windows
     y = Conv1D(cnn_dims,
                kernel_size,
-               kernel_regularizer=l2(0.01),
                activation='relu',
                input_shape=(window_size, domain_features + flow_features))(merged)
     # remove temporal dimension by global max pooling
     y = GlobalMaxPooling1D()(y)
     y = Dropout(dropout)(y)
     y = Dense(dense_dim,
-              kernel_regularizer=l2(0.01),
+              kernel_regularizer=l2(0.1),
               activation='relu',
               name="dense_client")(y)
 
diff --git a/run.sh b/run.sh
index 61df6d8..1b9226c 100644
--- a/run.sh
+++ b/run.sh
@@ -50,4 +50,4 @@ do
 done
 
 # python main.py --mode train --epochs 100 --embd 64 --filter_embd 128 --kernel_embd 5 --dense_embd 128 --domain_embd 32 --filter_main 32  --kernel_main 5 --dense_main 512 --batch 256 --balanced_weights --model_output ${output} --type ${mtype} --depth ${depth} --train ${DATADIR}/currentData.csv --model ${RESDIR}/${output}_${depth}_${mtype}
-# python main.py --mode train --epochs 100 --embd 64 --filter_embd 128 --kernel_embd 5 --dense_embd 128 --domain_embd 32 --filter_main 32  --kernel_main 5 --dense_main 512 --batch 256 --balanced_weights --model_output ${output} --type ${mtype} --depth ${depth} --train ${DATADIR}/currentData.csv --model ${RESDIR}/${output}_${depth}_${mtype}
+# python main.py --mode train --epochs 100 --embd 64 --filter_embd 128 --kernel_embd 5 --dense_embd 128 --domain_embd 32 --filter_main 32  --kernel_main 5 --dense_main 512 --batch 256 --balanced_weights --model_output client --type final --depth small --train /tmp/rk/data/currentData.csv --model /tmp/rk/results/paul3/client_final
diff --git a/visualize.py b/visualize.py
index ab143bb..28f7a63 100644
--- a/visualize.py
+++ b/visualize.py
@@ -95,6 +95,7 @@ def plot_roc_curve(mask, prediction, label=""):
 
 def plot_confusion_matrix(y_true, y_pred, path,
                           normalize=False,
+                          classes=("benign", "malicious"),
                           title='Confusion matrix',
                           cmap="Blues", dpi=600):
     """
@@ -103,7 +104,6 @@ def plot_confusion_matrix(y_true, y_pred, path,
     """
     plt.clf()
     cm = confusion_matrix(y_true, y_pred)
-    classes = [0, 1]
     plt.imshow(cm, interpolation='nearest', cmap=cmap)
     plt.title(title)
     plt.colorbar()
@@ -137,8 +137,8 @@ def plot_training_curve(logs, key, path, dpi=600):
     plt.plot(logs[f"{key}acc"], label="accuracy")
     plt.plot(logs[f"{key}f1_score"], label="f1_score")
 
-    plt.plot(logs[f"val_{key}acc"], label="accuracy")
-    plt.plot(logs[f"val_{key}f1_score"], label="val_f1_score")
+    plt.plot(logs[f"val_{key}acc"], label="val_accuracy")
+    # plt.plot(logs[f"val_{key}f1_score"], label="val_f1_score")
 
     plt.xlabel('epoch')
     plt.ylabel('percentage')