From 933eaae04a4cc097c547b78e092b4636c1a5eff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Knaebel?= Date: Thu, 31 Aug 2017 13:49:33 +0200 Subject: [PATCH] change exception type in get_flow_per_user function and replace index to new range index --- dataset.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dataset.py b/dataset.py index 80444cc..ccfc8e1 100644 --- a/dataset.py +++ b/dataset.py @@ -201,9 +201,9 @@ def get_user_flow_data(csv_file): "serverLabel": int, "trustedHits": int } - df = pd.read_csv(csv_file) + df = pd.read_csv(csv_file, index_col=False) df = df[list(types.keys())] - df.set_index(keys=['user_hash'], drop=False, inplace=True) + # df.set_index(keys=['user_hash'], drop=False, inplace=True) return df @@ -236,10 +236,10 @@ def load_or_generate_domains(train_data, domain_length): try: user_flow_df = pd.read_csv(fn) - except Exception: + except FileNotFoundError: char_dict = get_character_dict() user_flow_df = get_user_flow_data(train_data) - user_flow_df.reset_index(inplace=True) + # user_flow_df.reset_index(inplace=True) user_flow_df = user_flow_df[["domain", "serverLabel", "trustedHits", "virusTotalHits"]].dropna(axis=0, how="any") user_flow_df = user_flow_df.groupby(user_flow_df.domain).mean()