add soft parameter sharing network
This commit is contained in:
parent
7b8dfcebbe
commit
b1f48c1895
17
hyperband.py
17
hyperband.py
@ -8,6 +8,7 @@ from random import random as rng
|
|||||||
from time import ctime, time
|
from time import ctime, time
|
||||||
|
|
||||||
import joblib
|
import joblib
|
||||||
|
import keras.backend as K
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from keras.callbacks import EarlyStopping
|
from keras.callbacks import EarlyStopping
|
||||||
|
|
||||||
@ -47,14 +48,28 @@ class Hyperband:
|
|||||||
|
|
||||||
def try_params(self, n_iterations, params):
|
def try_params(self, n_iterations, params):
|
||||||
n_iterations = int(round(n_iterations))
|
n_iterations = int(round(n_iterations))
|
||||||
embedding, model, new_model, long_model = models.get_models_by_params(params)
|
embedding, model, new_model, long_model, soft_model = models.get_models_by_params(params)
|
||||||
|
|
||||||
if params["type"] in ("inter", "staggered"):
|
if params["type"] in ("inter", "staggered"):
|
||||||
model = new_model
|
model = new_model
|
||||||
if params["type"] == "long":
|
if params["type"] == "long":
|
||||||
model = long_model
|
model = long_model
|
||||||
|
if params["type"] == "soft":
|
||||||
|
model = soft_model
|
||||||
|
|
||||||
model = create_model(model, params["model_output"])
|
model = create_model(model, params["model_output"])
|
||||||
|
|
||||||
|
if params["type"] == "soft":
|
||||||
|
conv_server = model.get_layer("conv_server").trainable_weights
|
||||||
|
conv_client = model.get_layer("conv_client").trainable_weights
|
||||||
|
l1 = [0.001 * K.sum(K.abs(x - y)) for (x, y) in zip(conv_server, conv_client)]
|
||||||
|
model.add_loss(l1)
|
||||||
|
|
||||||
|
dense_server = model.get_layer("dense_server").trainable_weights
|
||||||
|
dense_client = model.get_layer("dense_client").trainable_weights
|
||||||
|
l2 = [0.001 * K.sum(K.abs(x - y)) for (x, y) in zip(dense_server, dense_client)]
|
||||||
|
model.add_loss(l2)
|
||||||
|
|
||||||
callbacks = [EarlyStopping(monitor='val_loss',
|
callbacks = [EarlyStopping(monitor='val_loss',
|
||||||
patience=5,
|
patience=5,
|
||||||
verbose=False)]
|
verbose=False)]
|
||||||
|
17
main.py
17
main.py
@ -3,6 +3,7 @@ import operator
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
import joblib
|
import joblib
|
||||||
|
import keras.backend as K
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
@ -246,13 +247,27 @@ def main_train(param=None):
|
|||||||
custom_sample_weights = None
|
custom_sample_weights = None
|
||||||
|
|
||||||
logger.info(f"Generator model with params: {param}")
|
logger.info(f"Generator model with params: {param}")
|
||||||
embedding, model, new_model, long_model = models.get_models_by_params(param)
|
embedding, model, new_model, long_model, soft_model = models.get_models_by_params(param)
|
||||||
|
|
||||||
if args.model_type in ("inter", "staggered"):
|
if args.model_type in ("inter", "staggered"):
|
||||||
model = new_model
|
model = new_model
|
||||||
if args.model_type == "long":
|
if args.model_type == "long":
|
||||||
model = long_model
|
model = long_model
|
||||||
|
if args.model_type == "soft":
|
||||||
|
model = soft_model
|
||||||
|
|
||||||
model = create_model(model, args.model_output)
|
model = create_model(model, args.model_output)
|
||||||
|
|
||||||
|
if args.model_type == "soft":
|
||||||
|
conv_server = model.get_layer("conv_server").trainable_weights
|
||||||
|
conv_client = model.get_layer("conv_client").trainable_weights
|
||||||
|
l1 = [0.001 * K.sum(K.abs(x - y)) for (x, y) in zip(conv_server, conv_client)]
|
||||||
|
model.add_loss(l1)
|
||||||
|
|
||||||
|
dense_server = model.get_layer("dense_server").trainable_weights
|
||||||
|
dense_client = model.get_layer("dense_client").trainable_weights
|
||||||
|
l2 = [0.001 * K.sum(K.abs(x - y)) for (x, y) in zip(dense_server, dense_client)]
|
||||||
|
model.add_loss(l2)
|
||||||
|
|
||||||
features = {"ipt_domains": domain_tr.value, "ipt_flows": flow_tr.value}
|
features = {"ipt_domains": domain_tr.value, "ipt_flows": flow_tr.value}
|
||||||
if args.model_output == "both":
|
if args.model_output == "both":
|
||||||
|
@ -46,7 +46,10 @@ def get_models_by_params(params: dict):
|
|||||||
long = networks.get_new_model2(0.25, flow_features, hidden_embedding, window_size, domain_length,
|
long = networks.get_new_model2(0.25, flow_features, hidden_embedding, window_size, domain_length,
|
||||||
filter_main, kernel_main, dense_dim, embedding_model, model_output)
|
filter_main, kernel_main, dense_dim, embedding_model, model_output)
|
||||||
|
|
||||||
return embedding_model, final, inter, long
|
soft = networks.get_new_soft(0.25, flow_features, hidden_embedding, window_size, domain_length,
|
||||||
|
filter_main, kernel_main, dense_dim, embedding_model, model_output)
|
||||||
|
|
||||||
|
return embedding_model, final, inter, long, soft
|
||||||
|
|
||||||
|
|
||||||
def get_server_model_by_params(params: dict):
|
def get_server_model_by_params(params: dict):
|
||||||
|
@ -135,3 +135,49 @@ def get_new_model2(dropout, flow_features, domain_features, window_size, domain_
|
|||||||
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
||||||
|
|
||||||
return Model(ipt_domains, ipt_flows, out_client, out_server)
|
return Model(ipt_domains, ipt_flows, out_client, out_server)
|
||||||
|
|
||||||
|
|
||||||
|
import keras.backend as K
|
||||||
|
|
||||||
|
|
||||||
|
def get_new_soft(dropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
|
||||||
|
dense_dim, cnn, model_output="both") -> Model:
|
||||||
|
def dist_reg(distant_layer):
|
||||||
|
def dist_reg_h(weights):
|
||||||
|
print("REG FUNCTION")
|
||||||
|
print(weights)
|
||||||
|
print(distant_layer)
|
||||||
|
return 0.01 * K.sum(K.abs(weights - distant_layer))
|
||||||
|
|
||||||
|
return dist_reg_h
|
||||||
|
|
||||||
|
ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains")
|
||||||
|
ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows")
|
||||||
|
encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains)
|
||||||
|
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
|
||||||
|
y = conv_server = Conv1D(cnn_dims,
|
||||||
|
kernel_size,
|
||||||
|
activation='relu', name="conv_server")(merged)
|
||||||
|
# remove temporal dimension by global max pooling
|
||||||
|
y = GlobalMaxPooling1D()(y)
|
||||||
|
y = Dropout(dropout)(y)
|
||||||
|
y = dense_server = Dense(dense_dim,
|
||||||
|
activation="relu",
|
||||||
|
name="dense_server")(y)
|
||||||
|
out_server = Dense(1, activation="sigmoid", name="server")(y)
|
||||||
|
# CNN processing a small slides of flow windows
|
||||||
|
y = Conv1D(cnn_dims,
|
||||||
|
kernel_size,
|
||||||
|
activation='relu', name="conv_client")(merged)
|
||||||
|
# remove temporal dimension by global max pooling
|
||||||
|
y = GlobalMaxPooling1D()(y)
|
||||||
|
y = Dropout(dropout)(y)
|
||||||
|
y = Dense(dense_dim,
|
||||||
|
activation='relu',
|
||||||
|
name="dense_client")(y)
|
||||||
|
|
||||||
|
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
||||||
|
# model = KerasModel(inputs=(ipt_domains, ipt_flows), outputs=(out_client, out_server))
|
||||||
|
|
||||||
|
|
||||||
|
return Model(ipt_domains, ipt_flows, out_client, out_server)
|
||||||
|
Loading…
Reference in New Issue
Block a user