From 87b927cdc903e2337eb654b957eaec1ca8292601 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Knaebel?= Date: Thu, 29 Jun 2017 09:19:36 +0200 Subject: [PATCH] reformat code --- cnnOnCnnParameterSelection.py | 122 +++++++++++++++------------------- 1 file changed, 53 insertions(+), 69 deletions(-) diff --git a/cnnOnCnnParameterSelection.py b/cnnOnCnnParameterSelection.py index 21a5f9d..5fe8fb4 100644 --- a/cnnOnCnnParameterSelection.py +++ b/cnnOnCnnParameterSelection.py @@ -1,43 +1,31 @@ # -*- coding: utf-8 -*- +import joblib +import keras +import numpy as np +import tensorflow as tf +from keras.layers import Dense, Dropout, Conv1D, GlobalMaxPooling1D, Reshape +from keras.layers import Input +from keras.models import Model +from keras.utils import np_utils from tqdm import tqdm -import tensorflow as tf +import stackedNeuralModels as stackedNeuralModels + config = tf.ConfigProto(log_device_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True session = tf.Session(config=config) -from pymongo import MongoClient -import joblib -import pickle -import numpy as np - -import ciscoProcessing as ciscoProcessing -import stackedNeuralModels as stackedNeuralModels - -from sklearn.metrics import precision_recall_curve -from sklearn.metrics import auc, roc_curve -import matplotlib.pyplot as plt - -import keras -from keras.models import Sequential -from keras.layers import Dense, Activation,LSTM,Embedding,Dropout,Conv1D, GlobalMaxPooling1D, Merge, Reshape, Lambda -from keras.layers import Convolution1D -from keras.layers import Input -from keras.models import Model -from keras.utils import np_utils - - -if __name__ == "__main__": +if __name__ == "__main__": # parameter innerCNNFilters = 512 innerCNNKernelSize = 2 cnnDropout = 0.5 cnnHiddenDims = 1024 - domainFeatures = 512 - flowFeatures = 3 - numCiscoFeatures=30 - windowSize = 10 + domainFeatures = 512 + flowFeatures = 3 + numCiscoFeatures = 30 + windowSize = 10 maxLen = 40 embeddingSize = 100 kernel_size = 2 @@ -50,82 +38,81 @@ if __name__ == "__main__": numEpochs = 100 maxLengthInSeconds = -1 timesNeg = -1 - - trainDataPath = '/mnt/projekte/pmlcluster/cisco/trainData/equalClass/currentData.joblib' - testDataPath = '/mnt/projekte/pmlcluster/cisco/trainData/equalClass/futureData.joblib' - + + trainDataPath = '/mnt/projekte/pmlcluster/cisco/trainData/equalClass/currentData.joblib' + testDataPath = '/mnt/projekte/pmlcluster/cisco/trainData/equalClass/futureData.joblib' + if 'characterDict' not in locals(): characterDictPath = 'trainData/characterIDDict.joblib' characterDict = joblib.load(characterDictPath)['characterIDDict'] - + # load train and test data from joblib # created with createTrainDataMultipleTaskLearning.py if 'trainDFs' not in locals(): tmpLoad = joblib.load(trainDataPath) trainDFs = tmpLoad['data'] - + if 'testDFs' not in locals(): tmpLoad = joblib.load(testDataPath) - - - sharedCNNFun = stackedNeuralModels.getCNNWitoutLastLayerFunctional(len(characterDict)+1,embeddingSize,maxLen,domainFeatures,kernel_size,domainFeatures,0.5) - + + sharedCNNFun = stackedNeuralModels.getCNNWitoutLastLayerFunctional(len(characterDict) + 1, embeddingSize, maxLen, + domainFeatures, kernel_size, domainFeatures, 0.5) + domainLists = [] - dfLists = [] + dfLists = [] for i in tqdm(np.arange(len(trainDFs)), miniters=10): - (domainListsTmp,dfListsTmp) = stackedNeuralModels.getChunksFromUserDataFrame(trainDFs[i], - windowSize=windowSize,overlapping=False,maxLengthInSeconds=maxLengthInSeconds) + (domainListsTmp, dfListsTmp) = stackedNeuralModels.getChunksFromUserDataFrame(trainDFs[i], + windowSize=windowSize, + overlapping=False, + maxLengthInSeconds=maxLengthInSeconds) domainLists += domainListsTmp dfLists += dfListsTmp if i == 100: break - - (testData,testLabel,testHits,testNames) = stackedNeuralModels.createTrainData( - domainLists=domainLists,dfLists=dfLists,charachterDict=characterDict, - maxLen=maxLen,threshold = threshold, - flagUseCiscoFeatures=False,urlSIPDIct=dict(), - windowSize=windowSize) - + + (testData, testLabel, testHits, testNames) = stackedNeuralModels.createTrainData( + domainLists=domainLists, dfLists=dfLists, charachterDict=characterDict, + maxLen=maxLen, threshold=threshold, + flagUseCiscoFeatures=False, urlSIPDIct=dict(), + windowSize=windowSize) + useIDs = np.where(testLabel == 1.0)[0] - useIDs = np.concatenate([useIDs,np.where(testLabel == 0.0)[0]]) - - + useIDs = np.concatenate([useIDs, np.where(testLabel == 0.0)[0]]) + testLabel = testLabel[useIDs] testHits = testHits[useIDs] testNames = testNames[useIDs] for i in range(len(testData)): testData[i] = testData[i][useIDs] - - + inputList = [] encodedList = [] numFeatures = flowFeatures for i in range(windowSize): inputList.append(Input(shape=(maxLen,))) - encodedList.append(sharedCNNFun(inputList[-1])) # add shared domain model + encodedList.append(sharedCNNFun(inputList[-1])) # add shared domain model inputList.append(Input(shape=(numFeatures,))) - + merge_layer_input = [] for i in range(windowSize): merge_layer_input.append(encodedList[i]) - merge_layer_input.append(inputList[(2*i)+1]) - - + merge_layer_input.append(inputList[(2 * i) + 1]) + # We can then concatenate the two vectors: merged_vector = keras.layers.concatenate(merge_layer_input, axis=-1) - reshape = Reshape((windowSize, domainFeatures+numFeatures))(merged_vector) + reshape = Reshape((windowSize, domainFeatures + numFeatures))(merged_vector) # add second cnn - + cnn = Conv1D(filters, kernel_size, activation='relu', - input_shape=(windowSize,domainFeatures+numFeatures))(reshape) + input_shape=(windowSize, domainFeatures + numFeatures))(reshape) # we use max pooling: maxPool = GlobalMaxPooling1D()(cnn) cnnDropout = Dropout(cnnDropout)(maxPool) - cnnDense = Dense(cnnHiddenDims,activation='relu')(cnnDropout) - cnnOutput = Dense(2,activation='softmax')(cnnDense) - + cnnDense = Dense(cnnHiddenDims, activation='relu')(cnnDropout) + cnnOutput = Dense(2, activation='softmax')(cnnDense) + # We define a trainable model linking the # tweet inputs to the predictions model = Model(inputs=inputList, outputs=cnnOutput) @@ -133,11 +120,8 @@ if __name__ == "__main__": loss='binary_crossentropy', metrics=['accuracy']) - - - epochNumber= 0 + epochNumber = 0 trainLabel = np_utils.to_categorical(testLabel, 2) model.fit(x=testData, y=trainLabel, - epochs=epochNumber + 1,shuffle=True,initial_epoch=epochNumber)#, - #validation_data=(testData,testLabel)) - \ No newline at end of file + epochs=epochNumber + 1, shuffle=True, initial_epoch=epochNumber) # , + # validation_data=(testData,testLabel))