fix: replace X_tr by its elements; choose selected samples for training data too

This commit is contained in:
René Knaebel 2017-07-05 18:37:29 +02:00
parent 7c05ef6a12
commit a70d1cb03a
2 changed files with 7 additions and 5 deletions

View File

@ -169,8 +169,8 @@ def create_dataset_from_lists(domains, features, vocab, max_len,
names.append(np.unique(features[i]['user_hash']))
servers.append(np.max(features[i]['serverLabel']))
trusted_hits.append(np.max(features[i]['trustedHits']))
X = [domain_features, flow_features]
return X, np.array(hits), np.array(names), np.array(servers), np.array(trusted_hits)
return (domain_features, flow_features,
np.array(hits), np.array(names), np.array(servers), np.array(trusted_hits))
def discretize_label(values, threshold):

View File

@ -92,7 +92,7 @@ def main():
user_flow_df = dataset.get_user_flow_data()
print("create training dataset")
(X_tr, hits_tr, names_tr, server_tr, trusted_hits_tr) = dataset.create_dataset_from_flows(
domain_tr, flow_tr, hits_tr, names_tr, server_tr, trusted_hits_tr = dataset.create_dataset_from_flows(
user_flow_df, char_dict,
max_len=args.domain_length, window_size=args.window)
# make client labels discrete with 4 different values
@ -102,7 +102,9 @@ def main():
pos_idx = np.where(client_labels == 1.0)[0]
neg_idx = np.where(client_labels == 0.0)[0]
idx = np.concatenate((pos_idx, neg_idx))
# select labels for prediction
# choose selected sample to train on
domain_tr = domain_tr[idx]
flow_tr = flow_tr[idx]
client_labels = client_labels[idx]
server_labels = server_tr[idx]
@ -121,7 +123,7 @@ def main():
client_labels = np_utils.to_categorical(client_labels, 2)
server_labels = np_utils.to_categorical(server_labels, 2)
model.fit(X_tr,
model.fit([domain_tr, flow_tr],
[client_labels, server_labels],
batch_size=args.batch_size,
epochs=args.epochs,