From a196daa89574f32c0876f270a58269144df7df94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Knaebel?= Date: Tue, 11 Jul 2017 13:46:25 +0200 Subject: [PATCH] add simple flow feature extraction function --- dataset.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dataset.py b/dataset.py index b1af606..f4a40e6 100644 --- a/dataset.py +++ b/dataset.py @@ -92,11 +92,18 @@ def get_flow_features(flow): return features +def get_all_flow_features(features): + flows = np.stack(list( + map(lambda f: f[["duration", "bytes_up", "bytes_down"]], features)) + ) + return np.log1p(flows) + + def create_dataset_from_flows(user_flow_df, char_dict, max_len, window_size=10, use_cisco_features=False): domains = [] features = [] print("get chunks from user data frames") - for i, user_flow in tqdm(list(enumerate(get_flow_per_user(user_flow_df)))[:50]): + for i, user_flow in tqdm(list(enumerate(get_flow_per_user(user_flow_df)))): (domain_windows, feature_windows) = get_user_chunks(user_flow, windowSize=window_size, overlapping=False,