# -*- coding: utf-8 -*- from tqdm import tqdm import tensorflow as tf config = tf.ConfigProto(log_device_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True session = tf.Session(config=config) from pymongo import MongoClient import joblib import pickle import numpy as np import ciscoProcessing as ciscoProcessing import stackedNeuralModels as stackedNeuralModels from sklearn.metrics import precision_recall_curve from sklearn.metrics import auc, roc_curve import matplotlib.pyplot as plt import keras from keras.models import Sequential from keras.layers import Dense, Activation,LSTM,Embedding,Dropout,Conv1D, GlobalMaxPooling1D, Merge, Reshape, Lambda from keras.layers import Convolution1D from keras.layers import Input from keras.models import Model from keras.utils import np_utils if __name__ == "__main__": # parameter innerCNNFilters = 512 innerCNNKernelSize = 2 cnnDropout = 0.5 cnnHiddenDims = 1024 domainFeatures = 512 flowFeatures = 3 numCiscoFeatures=30 windowSize = 10 maxLen = 40 embeddingSize = 100 kernel_size = 2 drop_out = 0.5 filters = 2 hidden_dims = 100 vocabSize = 40 threshold = 3 minFlowsPerUser = 10 numEpochs = 100 maxLengthInSeconds = -1 timesNeg = -1 trainDataPath = '/mnt/projekte/pmlcluster/cisco/trainData/equalClass/currentData.joblib' testDataPath = '/mnt/projekte/pmlcluster/cisco/trainData/equalClass/futureData.joblib' if 'characterDict' not in locals(): characterDictPath = 'trainData/characterIDDict.joblib' characterDict = joblib.load(characterDictPath)['characterIDDict'] # load train and test data from joblib # created with createTrainDataMultipleTaskLearning.py if 'trainDFs' not in locals(): tmpLoad = joblib.load(trainDataPath) trainDFs = tmpLoad['data'] if 'testDFs' not in locals(): tmpLoad = joblib.load(testDataPath) sharedCNNFun = stackedNeuralModels.getCNNWitoutLastLayerFunctional(len(characterDict)+1,embeddingSize,maxLen,domainFeatures,kernel_size,domainFeatures,0.5) domainLists = [] dfLists = [] for i in tqdm(np.arange(len(trainDFs)), miniters=10): (domainListsTmp,dfListsTmp) = stackedNeuralModels.getChunksFromUserDataFrame(trainDFs[i], windowSize=windowSize,overlapping=False,maxLengthInSeconds=maxLengthInSeconds) domainLists += domainListsTmp dfLists += dfListsTmp if i == 100: break (testData,testLabel,testHits,testNames) = stackedNeuralModels.createTrainData( domainLists=domainLists,dfLists=dfLists,charachterDict=characterDict, maxLen=maxLen,threshold = threshold, flagUseCiscoFeatures=False,urlSIPDIct=dict(), windowSize=windowSize) useIDs = np.where(testLabel == 1.0)[0] useIDs = np.concatenate([useIDs,np.where(testLabel == 0.0)[0]]) testLabel = testLabel[useIDs] testHits = testHits[useIDs] testNames = testNames[useIDs] for i in range(len(testData)): testData[i] = testData[i][useIDs] inputList = [] encodedList = [] numFeatures = flowFeatures for i in range(windowSize): inputList.append(Input(shape=(maxLen,))) encodedList.append(sharedCNNFun(inputList[-1])) # add shared domain model inputList.append(Input(shape=(numFeatures,))) merge_layer_input = [] for i in range(windowSize): merge_layer_input.append(encodedList[i]) merge_layer_input.append(inputList[(2*i)+1]) # We can then concatenate the two vectors: merged_vector = keras.layers.concatenate(merge_layer_input, axis=-1) reshape = Reshape((windowSize, domainFeatures+numFeatures))(merged_vector) # add second cnn cnn = Conv1D(filters, kernel_size, activation='relu', input_shape=(windowSize,domainFeatures+numFeatures))(reshape) # we use max pooling: maxPool = GlobalMaxPooling1D()(cnn) cnnDropout = Dropout(cnnDropout)(maxPool) cnnDense = Dense(cnnHiddenDims,activation='relu')(cnnDropout) cnnOutput = Dense(2,activation='softmax')(cnnDense) # We define a trainable model linking the # tweet inputs to the predictions model = Model(inputs=inputList, outputs=cnnOutput) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) epochNumber= 0 trainLabel = np_utils.to_categorical(testLabel, 2) model.fit(x=testData, y=trainLabel, epochs=epochNumber + 1,shuffle=True,initial_epoch=epochNumber)#, #validation_data=(testData,testLabel))