fix network props, add PCA to visualize main

This commit is contained in:
René Knaebel 2017-07-14 21:01:08 +02:00
parent 6b787792db
commit 336be37032
2 changed files with 31 additions and 8 deletions

33
main.py
View File

@ -7,6 +7,7 @@ import pandas as pd
import tensorflow as tf
from keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping
from keras.models import load_model
from sklearn.decomposition import PCA
from sklearn.utils import class_weight
import arguments
@ -122,7 +123,7 @@ def main_train(param=None):
# parameter
p = {
"type": "paul",
"type": args.model_type,
"batch_size": 64,
"window_size": args.window,
"domain_length": args.domain_length,
@ -134,7 +135,8 @@ def main_train(param=None):
'embedding_size': args.embedding,
'filter_main': 128,
'flow_features': 3,
'dense_main': 512,
# 'dense_main': 512,
'dense_main': 128,
'filter_embedding': args.hidden_char_dims,
'hidden_embedding': args.domain_embedding,
'kernel_embedding': 3,
@ -167,6 +169,7 @@ def main_train(param=None):
if args.class_weights:
logger.info("class weights: compute custom weights")
custom_class_weights = get_custom_class_weights(client_tr, server_tr)
logger.info(custom_class_weights)
else:
logger.info("class weights: set default")
custom_class_weights = None
@ -197,11 +200,11 @@ def main_test():
def main_visualization():
_, _, client_val, server_val = load_or_generate_h5data(args.test_h5data, args.test_data,
args.domain_length, args.window)
domain_val, flow_val, client_val, server_val = load_or_generate_h5data(args.test_h5data, args.test_data,
args.domain_length, args.window)
logger.info("plot model")
model = load_model(args.clf_model, custom_objects=models.get_metrics())
visualize.plot_model(model, args.model_path + "model.png")
visualize.plot_model(model, os.path.join(args.model_path, "model.png"))
logger.info("plot training curve")
logs = pd.read_csv(args.train_log)
visualize.plot_training_curve(logs, "client", "{}/client_train.png".format(args.model_path))
@ -223,6 +226,26 @@ def main_visualization():
"{}/server_cov.png".format(args.model_path),
normalize=False, title="Server Confusion Matrix")
# embedding visi
import matplotlib.pyplot as plt
model = load_model(args.embedding_model)
domains = np.reshape(domain_val, (12800, 40))
domain_embedding = model.predict(domains)
pca = PCA(n_components=2)
domain_reduced = pca.fit_transform(domain_embedding)
print(pca.explained_variance_ratio_)
clients = np.repeat(client_val, 10, axis=0)
clients = clients.argmax(1)
servers = np.repeat(server_val, 10, axis=0)
servers = servers.argmax(1)
plt.scatter(domain_reduced[:, 0], domain_reduced[:, 1], c=clients, cmap=plt.cm.bwr, s=2)
plt.show()
plt.scatter(domain_reduced[:, 0], domain_reduced[:, 1], c=servers, cmap=plt.cm.bwr, s=2)
plt.show()
def main_score():
# mask = dataset.load_mask_eval(args.data, args.test_image)

View File

@ -25,12 +25,12 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le
ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows")
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
# CNN processing a small slides of flow windows
y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu',
y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', padding="same",
input_shape=(window_size, domain_features + flow_features))(merged)
y = MaxPool1D(pool_size=3, strides=1)(y)
y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu')(y)
y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', padding="same")(y)
y = MaxPool1D(pool_size=3, strides=1)(y)
y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu')(y)
y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', padding="same")(y)
# remove temporal dimension by global max pooling
y = GlobalMaxPooling1D()(y)
y = Dropout(cnnDropout)(y)