diff --git a/dataset.py b/dataset.py index 2f6408a..a52d9a6 100644 --- a/dataset.py +++ b/dataset.py @@ -108,12 +108,12 @@ def create_dataset_from_flows(user_flow_df, char_dict, max_len, window_size=10, for i, user_flow in enumerate(get_flow_per_user(user_flow_df)): (domain_windows, feature_windows) = get_user_chunks(user_flow, windowSize=window_size, - overlapping=True, + overlapping=False, maxLengthInSeconds=-1) domains += domain_windows features += feature_windows # TODO: remove later - if i >= 10: + if i >= 50: break print("create training dataset")