reformat code
This commit is contained in:
parent
24d677e101
commit
87b927cdc9
@ -1,43 +1,31 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import joblib
|
||||
import keras
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from keras.layers import Dense, Dropout, Conv1D, GlobalMaxPooling1D, Reshape
|
||||
from keras.layers import Input
|
||||
from keras.models import Model
|
||||
from keras.utils import np_utils
|
||||
from tqdm import tqdm
|
||||
|
||||
import tensorflow as tf
|
||||
import stackedNeuralModels as stackedNeuralModels
|
||||
|
||||
config = tf.ConfigProto(log_device_placement=True)
|
||||
config.gpu_options.per_process_gpu_memory_fraction = 0.5
|
||||
config.gpu_options.allow_growth = True
|
||||
session = tf.Session(config=config)
|
||||
|
||||
from pymongo import MongoClient
|
||||
import joblib
|
||||
import pickle
|
||||
import numpy as np
|
||||
|
||||
import ciscoProcessing as ciscoProcessing
|
||||
import stackedNeuralModels as stackedNeuralModels
|
||||
|
||||
from sklearn.metrics import precision_recall_curve
|
||||
from sklearn.metrics import auc, roc_curve
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import keras
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Activation,LSTM,Embedding,Dropout,Conv1D, GlobalMaxPooling1D, Merge, Reshape, Lambda
|
||||
from keras.layers import Convolution1D
|
||||
from keras.layers import Input
|
||||
from keras.models import Model
|
||||
from keras.utils import np_utils
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == "__main__":
|
||||
# parameter
|
||||
innerCNNFilters = 512
|
||||
innerCNNKernelSize = 2
|
||||
cnnDropout = 0.5
|
||||
cnnHiddenDims = 1024
|
||||
domainFeatures = 512
|
||||
flowFeatures = 3
|
||||
numCiscoFeatures=30
|
||||
windowSize = 10
|
||||
domainFeatures = 512
|
||||
flowFeatures = 3
|
||||
numCiscoFeatures = 30
|
||||
windowSize = 10
|
||||
maxLen = 40
|
||||
embeddingSize = 100
|
||||
kernel_size = 2
|
||||
@ -50,82 +38,81 @@ if __name__ == "__main__":
|
||||
numEpochs = 100
|
||||
maxLengthInSeconds = -1
|
||||
timesNeg = -1
|
||||
|
||||
trainDataPath = '/mnt/projekte/pmlcluster/cisco/trainData/equalClass/currentData.joblib'
|
||||
testDataPath = '/mnt/projekte/pmlcluster/cisco/trainData/equalClass/futureData.joblib'
|
||||
|
||||
|
||||
trainDataPath = '/mnt/projekte/pmlcluster/cisco/trainData/equalClass/currentData.joblib'
|
||||
testDataPath = '/mnt/projekte/pmlcluster/cisco/trainData/equalClass/futureData.joblib'
|
||||
|
||||
if 'characterDict' not in locals():
|
||||
characterDictPath = 'trainData/characterIDDict.joblib'
|
||||
characterDict = joblib.load(characterDictPath)['characterIDDict']
|
||||
|
||||
|
||||
# load train and test data from joblib
|
||||
# created with createTrainDataMultipleTaskLearning.py
|
||||
if 'trainDFs' not in locals():
|
||||
tmpLoad = joblib.load(trainDataPath)
|
||||
trainDFs = tmpLoad['data']
|
||||
|
||||
|
||||
if 'testDFs' not in locals():
|
||||
tmpLoad = joblib.load(testDataPath)
|
||||
|
||||
|
||||
sharedCNNFun = stackedNeuralModels.getCNNWitoutLastLayerFunctional(len(characterDict)+1,embeddingSize,maxLen,domainFeatures,kernel_size,domainFeatures,0.5)
|
||||
|
||||
|
||||
sharedCNNFun = stackedNeuralModels.getCNNWitoutLastLayerFunctional(len(characterDict) + 1, embeddingSize, maxLen,
|
||||
domainFeatures, kernel_size, domainFeatures, 0.5)
|
||||
|
||||
domainLists = []
|
||||
dfLists = []
|
||||
dfLists = []
|
||||
for i in tqdm(np.arange(len(trainDFs)), miniters=10):
|
||||
(domainListsTmp,dfListsTmp) = stackedNeuralModels.getChunksFromUserDataFrame(trainDFs[i],
|
||||
windowSize=windowSize,overlapping=False,maxLengthInSeconds=maxLengthInSeconds)
|
||||
(domainListsTmp, dfListsTmp) = stackedNeuralModels.getChunksFromUserDataFrame(trainDFs[i],
|
||||
windowSize=windowSize,
|
||||
overlapping=False,
|
||||
maxLengthInSeconds=maxLengthInSeconds)
|
||||
domainLists += domainListsTmp
|
||||
dfLists += dfListsTmp
|
||||
if i == 100:
|
||||
break
|
||||
|
||||
(testData,testLabel,testHits,testNames) = stackedNeuralModels.createTrainData(
|
||||
domainLists=domainLists,dfLists=dfLists,charachterDict=characterDict,
|
||||
maxLen=maxLen,threshold = threshold,
|
||||
flagUseCiscoFeatures=False,urlSIPDIct=dict(),
|
||||
windowSize=windowSize)
|
||||
|
||||
|
||||
(testData, testLabel, testHits, testNames) = stackedNeuralModels.createTrainData(
|
||||
domainLists=domainLists, dfLists=dfLists, charachterDict=characterDict,
|
||||
maxLen=maxLen, threshold=threshold,
|
||||
flagUseCiscoFeatures=False, urlSIPDIct=dict(),
|
||||
windowSize=windowSize)
|
||||
|
||||
useIDs = np.where(testLabel == 1.0)[0]
|
||||
useIDs = np.concatenate([useIDs,np.where(testLabel == 0.0)[0]])
|
||||
|
||||
|
||||
useIDs = np.concatenate([useIDs, np.where(testLabel == 0.0)[0]])
|
||||
|
||||
testLabel = testLabel[useIDs]
|
||||
testHits = testHits[useIDs]
|
||||
testNames = testNames[useIDs]
|
||||
for i in range(len(testData)):
|
||||
testData[i] = testData[i][useIDs]
|
||||
|
||||
|
||||
|
||||
inputList = []
|
||||
encodedList = []
|
||||
numFeatures = flowFeatures
|
||||
for i in range(windowSize):
|
||||
inputList.append(Input(shape=(maxLen,)))
|
||||
encodedList.append(sharedCNNFun(inputList[-1])) # add shared domain model
|
||||
encodedList.append(sharedCNNFun(inputList[-1])) # add shared domain model
|
||||
inputList.append(Input(shape=(numFeatures,)))
|
||||
|
||||
|
||||
merge_layer_input = []
|
||||
for i in range(windowSize):
|
||||
merge_layer_input.append(encodedList[i])
|
||||
merge_layer_input.append(inputList[(2*i)+1])
|
||||
|
||||
|
||||
merge_layer_input.append(inputList[(2 * i) + 1])
|
||||
|
||||
# We can then concatenate the two vectors:
|
||||
merged_vector = keras.layers.concatenate(merge_layer_input, axis=-1)
|
||||
reshape = Reshape((windowSize, domainFeatures+numFeatures))(merged_vector)
|
||||
reshape = Reshape((windowSize, domainFeatures + numFeatures))(merged_vector)
|
||||
# add second cnn
|
||||
|
||||
|
||||
cnn = Conv1D(filters,
|
||||
kernel_size,
|
||||
activation='relu',
|
||||
input_shape=(windowSize,domainFeatures+numFeatures))(reshape)
|
||||
input_shape=(windowSize, domainFeatures + numFeatures))(reshape)
|
||||
# we use max pooling:
|
||||
maxPool = GlobalMaxPooling1D()(cnn)
|
||||
cnnDropout = Dropout(cnnDropout)(maxPool)
|
||||
cnnDense = Dense(cnnHiddenDims,activation='relu')(cnnDropout)
|
||||
cnnOutput = Dense(2,activation='softmax')(cnnDense)
|
||||
|
||||
cnnDense = Dense(cnnHiddenDims, activation='relu')(cnnDropout)
|
||||
cnnOutput = Dense(2, activation='softmax')(cnnDense)
|
||||
|
||||
# We define a trainable model linking the
|
||||
# tweet inputs to the predictions
|
||||
model = Model(inputs=inputList, outputs=cnnOutput)
|
||||
@ -133,11 +120,8 @@ if __name__ == "__main__":
|
||||
loss='binary_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
|
||||
|
||||
|
||||
epochNumber= 0
|
||||
epochNumber = 0
|
||||
trainLabel = np_utils.to_categorical(testLabel, 2)
|
||||
model.fit(x=testData, y=trainLabel,
|
||||
epochs=epochNumber + 1,shuffle=True,initial_epoch=epochNumber)#,
|
||||
#validation_data=(testData,testLabel))
|
||||
|
||||
epochs=epochNumber + 1, shuffle=True, initial_epoch=epochNumber) # ,
|
||||
# validation_data=(testData,testLabel))
|
||||
|
Loading…
x
Reference in New Issue
Block a user