change exception type in get_flow_per_user function and replace index to new range index

This commit is contained in:
René Knaebel 2017-08-31 13:49:33 +02:00
parent dceaf47211
commit 933eaae04a

View File

@ -201,9 +201,9 @@ def get_user_flow_data(csv_file):
"serverLabel": int, "serverLabel": int,
"trustedHits": int "trustedHits": int
} }
df = pd.read_csv(csv_file) df = pd.read_csv(csv_file, index_col=False)
df = df[list(types.keys())] df = df[list(types.keys())]
df.set_index(keys=['user_hash'], drop=False, inplace=True) # df.set_index(keys=['user_hash'], drop=False, inplace=True)
return df return df
@ -236,10 +236,10 @@ def load_or_generate_domains(train_data, domain_length):
try: try:
user_flow_df = pd.read_csv(fn) user_flow_df = pd.read_csv(fn)
except Exception: except FileNotFoundError:
char_dict = get_character_dict() char_dict = get_character_dict()
user_flow_df = get_user_flow_data(train_data) user_flow_df = get_user_flow_data(train_data)
user_flow_df.reset_index(inplace=True) # user_flow_df.reset_index(inplace=True)
user_flow_df = user_flow_df[["domain", "serverLabel", "trustedHits", "virusTotalHits"]].dropna(axis=0, user_flow_df = user_flow_df[["domain", "serverLabel", "trustedHits", "virusTotalHits"]].dropna(axis=0,
how="any") how="any")
user_flow_df = user_flow_df.groupby(user_flow_df.domain).mean() user_flow_df = user_flow_df.groupby(user_flow_df.domain).mean()