add bulk embedding visualization and deep1 network
This commit is contained in:
parent
33063f3081
commit
a686f147f0
@ -106,6 +106,7 @@ parser.add_argument("--out-prefix", action="store", dest="output_prefix",
|
||||
#
|
||||
parser.add_argument("--stop_early", action="store_true", dest="stop_early")
|
||||
parser.add_argument("--balanced_weights", action="store_true", dest="class_weights")
|
||||
parser.add_argument("--sample_weights", action="store_true", dest="sample_weights")
|
||||
parser.add_argument("--gpu", action="store_true", dest="gpu")
|
||||
parser.add_argument("--new_model", action="store_true", dest="new_model")
|
||||
|
||||
|
38
fancy.sh
38
fancy.sh
@ -1,24 +1,24 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
RESDIR=$1
|
||||
DATADIR=$2
|
||||
N1=$1
|
||||
N2=$2
|
||||
RESDIR=$3
|
||||
DATADIR=$4
|
||||
|
||||
python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_final --data ${DATADIR}/futureData.csv --model_output both
|
||||
python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_inter --data ${DATADIR}/futureData.csv --model_output both
|
||||
#python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_staggered --data ${DATADIR}/futureData.csv --model_output both
|
||||
python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/client_final --data ${DATADIR}/futureData.csv --model_output client
|
||||
#python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/client_inter --data ${DATADIR}/futureData.csv --model_output client
|
||||
for ((i = ${N1}; i <= ${N2}; i++))
|
||||
do
|
||||
python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/client_final_${i} --data ${DATADIR} --model_output client
|
||||
python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_final_${i} --data ${DATADIR} --model_output both
|
||||
python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_inter_${i} --data ${DATADIR} --model_output both
|
||||
python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_staggered_${i} --data ${DATADIR} --model_output both
|
||||
done
|
||||
|
||||
#python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_medium_final --data ${DATADIR}/futureData.csv --model_output both
|
||||
#python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/both_medium_inter --data ${DATADIR}/futureData.csv --model_output both
|
||||
#python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/client_medium_final --data ${DATADIR}/futureData.csv --model_output client
|
||||
#python3 main.py --mode fancy --batch 1024 --model ${RESDIR}/client_medium_inter --data ${DATADIR}/futureData.csv --model_output client
|
||||
python3 main.py --mode all_fancy --batch 1024 --models ${RESDIR}/client_final_*/ --data ${DATADIR} --model_output client --out-prefix ${RESDIR}/client_final
|
||||
python3 main.py --mode all_fancy --batch 1024 --models ${RESDIR}/both_final_*/ --data ${DATADIR} --model_output both --out-prefix ${RESDIR}/both_final
|
||||
python3 main.py --mode all_fancy --batch 1024 --models ${RESDIR}/both_inter_*/ --data ${DATADIR} --model_output both --out-prefix ${RESDIR}/both_inter
|
||||
python3 main.py --mode all_fancy --batch 1024 --models ${RESDIR}/both_staggered_*/ --data ${DATADIR} --model_output both --out-prefix ${RESDIR}/both_staggered
|
||||
|
||||
#python3 main.py --mode all_fancy --batch 256 --data ${DATADIR}/futureData.csv \
|
||||
# --models ${RESDIR}/*_small_*/ --out-prefix ${RESDIR}/small
|
||||
|
||||
#python3 main.py --mode all_fancy --batch 256 --data ${DATADIR}/futureData.csv \
|
||||
# --models ${RESDIR}/*_medium_*/ --out-prefix ${RESDIR}/medium
|
||||
|
||||
python3 main.py --mode all_fancy --batch 256 --data ${DATADIR}/futureData.csv \
|
||||
--models ${RESDIR}/*/ --out-prefix ${RESDIR}/all
|
||||
python3 main.py --mode beta --batch 1024 --models ${RESDIR}/client_final_*/ --data ${DATADIR} --model_output client --out-prefix ${RESDIR}/client_final
|
||||
python3 main.py --mode beta --batch 1024 --models ${RESDIR}/both_final_*/ --data ${DATADIR} --model_output both --out-prefix ${RESDIR}/both_final
|
||||
python3 main.py --mode beta --batch 1024 --models ${RESDIR}/both_inter_*/ --data ${DATADIR} --model_output both --out-prefix ${RESDIR}/both_inter
|
||||
python3 main.py --mode beta --batch 1024 --models ${RESDIR}/both_staggered_*/ --data ${DATADIR} --model_output both --out-prefix ${RESDIR}/both_staggered
|
||||
|
52
main.py
52
main.py
@ -57,7 +57,7 @@ if args.gpu:
|
||||
PARAMS = {
|
||||
"type": args.model_type,
|
||||
"depth": args.model_depth,
|
||||
# "batch_size": 64,
|
||||
"batch_size": args.batch_size,
|
||||
"window_size": args.window,
|
||||
"domain_length": args.domain_length,
|
||||
"flow_features": 3,
|
||||
@ -72,7 +72,6 @@ PARAMS = {
|
||||
'filter_main': args.filter_main,
|
||||
'dense_main': args.dense_main,
|
||||
'kernel_main': args.kernel_main,
|
||||
'input_length': 40,
|
||||
'model_output': args.model_output
|
||||
}
|
||||
|
||||
@ -101,7 +100,6 @@ def main_hyperband():
|
||||
"window_size": [args.window],
|
||||
"flow_features": [3],
|
||||
"domain_length": [args.domain_length],
|
||||
'input_length': [40],
|
||||
# model params
|
||||
"embedding_size": [2 ** x for x in range(3, 7)],
|
||||
"filter_embedding": [2 ** x for x in range(1, 10)],
|
||||
@ -133,6 +131,10 @@ def main_hyperband():
|
||||
return results
|
||||
|
||||
|
||||
def train(parameters, features, labels):
|
||||
pass
|
||||
|
||||
|
||||
def main_train(param=None):
|
||||
logger.info(f"Create model path {args.model_path}")
|
||||
exists_or_make_path(args.model_path)
|
||||
@ -475,6 +477,47 @@ def main_visualize_all():
|
||||
vis(args.output_prefix, dfs, df_paul, "user", "roc")
|
||||
|
||||
|
||||
def main_visualize_all_embds():
|
||||
import matplotlib.pyplot as plt
|
||||
_, _, name_val, hits_vt, hits_trusted, server_val = dataset.load_or_generate_raw_h5data(args.data,
|
||||
args.data,
|
||||
args.domain_length,
|
||||
args.window)
|
||||
|
||||
def load_df(path):
|
||||
res = dataset.load_predictions(path)
|
||||
return res["domain_embds"]
|
||||
|
||||
dfs = [(model_args["model_name"], load_df(model_args["model_path"])) for model_args in get_model_args(args)]
|
||||
|
||||
plt.clf()
|
||||
|
||||
from sklearn.decomposition import TruncatedSVD
|
||||
|
||||
def vis(ax, domain_embedding, labels):
|
||||
red = TruncatedSVD(n_components=2)
|
||||
# use if draw subset of predictions
|
||||
idx = np.random.choice(np.arange(len(domain_embedding)), 5000)
|
||||
domain_embedding = domain_embedding[idx]
|
||||
labels = labels[idx]
|
||||
domain_reduced = red.fit_transform(domain_embedding)
|
||||
ax.scatter(domain_reduced[:, 0],
|
||||
domain_reduced[:, 1],
|
||||
c=(labels * (1, 2)).sum(1).astype(int),
|
||||
cmap=plt.cm.plasma,
|
||||
s=3,
|
||||
alpha=0.1)
|
||||
|
||||
domain_encs, labels = dataset.load_or_generate_domains(args.data, args.domain_length)
|
||||
|
||||
fig, axes = plt.subplots(nrows=5, ncols=4)
|
||||
|
||||
for (model_name, embd), ax in zip(dfs, axes.flat):
|
||||
logger.info(f"plot embedding for {model_name}")
|
||||
vis(ax, embd, labels)
|
||||
|
||||
visualize.plot_save("{}_svd.png".format(args.output_prefix, 600))
|
||||
|
||||
import joblib
|
||||
|
||||
|
||||
@ -709,6 +752,7 @@ def vis_server():
|
||||
visualize.plot_legend()
|
||||
visualize.plot_save("results/server_model/windows_roc.pdf")
|
||||
|
||||
|
||||
def main():
|
||||
if "train" == args.mode:
|
||||
main_train()
|
||||
@ -730,6 +774,8 @@ def main():
|
||||
train_server_only()
|
||||
if "server_test" == args.mode:
|
||||
test_server_only()
|
||||
if "embedding" == args.mode:
|
||||
main_visualize_all_embds()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -1,5 +1,6 @@
|
||||
import keras.backend as K
|
||||
|
||||
from models import deep1
|
||||
from models.renes_networks import selu
|
||||
from . import flat_2, pauls_networks, renes_networks
|
||||
|
||||
@ -10,7 +11,6 @@ def get_models_by_params(params: dict):
|
||||
# network_type = params.get("type")
|
||||
network_depth = params.get("depth")
|
||||
embedding_size = params.get("embedding")
|
||||
input_length = params.get("input_length")
|
||||
filter_embedding = params.get("filter_embedding")
|
||||
kernel_embedding = params.get("kernel_embedding")
|
||||
hidden_embedding = params.get("dense_embedding")
|
||||
@ -32,7 +32,7 @@ def get_models_by_params(params: dict):
|
||||
networks = renes_networks
|
||||
else:
|
||||
raise Exception("network not found")
|
||||
embedding_model = networks.get_embedding(embedding_size, input_length, filter_embedding, kernel_embedding,
|
||||
embedding_model = networks.get_embedding(embedding_size, domain_length, filter_embedding, kernel_embedding,
|
||||
hidden_embedding, 0.5)
|
||||
|
||||
old_model = networks.get_model(0.25, flow_features, hidden_embedding, window_size, domain_length,
|
||||
@ -63,6 +63,8 @@ def get_server_model_by_params(params: dict):
|
||||
elif network_depth == "flat2":
|
||||
networks = flat_2
|
||||
elif network_depth == "deep1":
|
||||
networks = deep1
|
||||
elif network_depth == "deep2":
|
||||
networks = renes_networks
|
||||
else:
|
||||
raise Exception("network not found")
|
||||
|
70
models/deep1.py
Normal file
70
models/deep1.py
Normal file
@ -0,0 +1,70 @@
|
||||
from collections import namedtuple
|
||||
|
||||
import keras
|
||||
from keras.engine import Input, Model as KerasModel
|
||||
from keras.layers import Conv1D, Dense, Dropout, Embedding, GlobalAveragePooling1D, GlobalMaxPooling1D, TimeDistributed
|
||||
|
||||
import dataset
|
||||
|
||||
Model = namedtuple("Model", ["in_domains", "in_flows", "out_client", "out_server"])
|
||||
|
||||
|
||||
def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden_dims, drop_out=0.5):
|
||||
x = y = Input(shape=(input_length,))
|
||||
y = Embedding(input_dim=dataset.get_vocab_size(), output_dim=embedding_size)(y)
|
||||
y = Conv1D(filter_size, kernel_size=kernel_size, activation="relu")(y)
|
||||
y = Conv1D(filter_size, kernel_size=3, activation="relu")(y)
|
||||
y = Conv1D(filter_size, kernel_size=3, activation="relu")(y)
|
||||
y = GlobalAveragePooling1D()(y)
|
||||
y = Dense(hidden_dims, activation="relu")(y)
|
||||
return KerasModel(x, y)
|
||||
|
||||
|
||||
def get_model(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
|
||||
dense_dim, cnn, model_output="both"):
|
||||
ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains")
|
||||
encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains)
|
||||
ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows")
|
||||
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
|
||||
# CNN processing a small slides of flow windows
|
||||
y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation="relu", padding="same",
|
||||
input_shape=(window_size, domain_features + flow_features))(merged)
|
||||
# remove temporal dimension by global max pooling
|
||||
y = GlobalMaxPooling1D()(y)
|
||||
y = Dropout(cnnDropout)(y)
|
||||
y = Dense(dense_dim, activation="relu")(y)
|
||||
y = Dense(dense_dim, activation="relu")(y)
|
||||
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
||||
out_server = Dense(1, activation='sigmoid', name="server")(y)
|
||||
|
||||
return Model(ipt_domains, ipt_flows, out_client, out_server)
|
||||
|
||||
|
||||
def get_new_model(dropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
|
||||
dense_dim, cnn, model_output="both"):
|
||||
ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains")
|
||||
ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows")
|
||||
encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains)
|
||||
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
|
||||
y = Dense(dense_dim, activation="relu")(merged)
|
||||
y = Dense(dense_dim,
|
||||
activation="relu",
|
||||
name="dense_server")(y)
|
||||
out_server = Dense(1, activation="sigmoid", name="server")(y)
|
||||
merged = keras.layers.concatenate([merged, y], -1)
|
||||
# CNN processing a small slides of flow windows
|
||||
y = Conv1D(filters=cnn_dims,
|
||||
kernel_size=kernel_size,
|
||||
activation="relu",
|
||||
padding="same",
|
||||
input_shape=(window_size, domain_features + flow_features))(merged)
|
||||
# remove temporal dimension by global max pooling
|
||||
y = GlobalMaxPooling1D()(y)
|
||||
y = Dropout(dropout)(y)
|
||||
y = Dense(dense_dim, activation="relu")(y)
|
||||
y = Dense(dense_dim,
|
||||
activation="relu",
|
||||
name="dense_client")(y)
|
||||
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
||||
|
||||
return Model(ipt_domains, ipt_flows, out_client, out_server)
|
@ -95,6 +95,8 @@ def get_server_model(flow_features, domain_length, dense_dim, cnn):
|
||||
ipt_domains = Input(shape=(domain_length,), name="ipt_domains")
|
||||
ipt_flows = Input(shape=(flow_features,), name="ipt_flows")
|
||||
encoded = cnn(ipt_domains)
|
||||
cnn.name = "domain_cnn"
|
||||
|
||||
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
|
||||
y = Dense(dense_dim,
|
||||
activation="relu",
|
||||
|
12
utils.py
12
utils.py
@ -13,16 +13,17 @@ def exists_or_make_path(p):
|
||||
|
||||
|
||||
def get_custom_class_weights(client, server):
|
||||
client_class_weight = class_weight.compute_class_weight('balanced', np.unique(client), client)
|
||||
server_class_weight = class_weight.compute_class_weight('balanced', np.unique(server), server)
|
||||
return {
|
||||
"client": client_class_weight,
|
||||
"server": server_class_weight
|
||||
"client": class_weight.compute_class_weight('balanced', np.unique(client), client),
|
||||
"server": class_weight.compute_class_weight('balanced', np.unique(server), server)
|
||||
}
|
||||
|
||||
|
||||
def get_custom_sample_weights(client, server):
|
||||
return class_weight.compute_sample_weight("balanced", np.vstack((client, server)).T)
|
||||
return {
|
||||
"client": class_weight.compute_sample_weight("balanced", client),
|
||||
"server": class_weight.compute_sample_weight("balanced", server)
|
||||
}
|
||||
|
||||
|
||||
def load_ordered_hyperband_results(path):
|
||||
@ -42,5 +43,4 @@ def load_model(path, custom_objects=None):
|
||||
except Exception:
|
||||
embd = clf.get_layer("domain_cnn")
|
||||
|
||||
|
||||
return embd, clf
|
||||
|
Loading…
Reference in New Issue
Block a user