#!/usr/bin/python2 import sys import joblib import numpy as np import pandas as pd fn = sys.argv[1] df = joblib.load("/mnt/projekte/pmlcluster/cisco/trainData/multipleTaskLearning/{}.joblib".format(fn)) df = pd.concat(df["data"]) df.reset_index(inplace=True) df.dropna(axis=0, how="any", inplace=True) df.serverLabel = pd.to_numeric(df.serverLabel, errors='coerce') df.duration = pd.to_numeric(df.duration, errors='coerce') df.bytes_down = pd.to_numeric(df.bytes_down, errors='coerce') df.bytes_up = pd.to_numeric(df.bytes_up, errors='coerce') df.http_method = df.http_method.astype("category") df.serverLabel = df.serverLabel.astype(np.bool) df.virusTotalHits = df.virusTotalHits.astype(np.int8) df.trustedHits = df.trustedHits.astype(np.int8) df.to_csv("/tmp/rk/data/{}.csv".format(fn), encoding="utf-8")