diff --git a/scripts/make_csv_dataset.py b/scripts/make_csv_dataset.py index 2f1b12b..bee7223 100644 --- a/scripts/make_csv_dataset.py +++ b/scripts/make_csv_dataset.py @@ -1,6 +1,10 @@ #!/usr/bin/python2 import joblib +import pandas as pd datafile = joblib.load("/mnt/projekte/pmlcluster/cisco/trainData/multipleTaskLearning/currentData.joblib") user_flows = datafile["data"] +df = pd.concat(user_flows) +df.reset_index(inplace=True) +df.to_csv("/tmp/rk/full_dataset.csv.gz", compression="gzip")