import os import matplotlib.pyplot as plt import numpy as np from sklearn.decomposition import TruncatedSVD from sklearn.manifold import TSNE from sklearn.metrics import ( auc, classification_report, confusion_matrix, fbeta_score, precision_recall_curve, roc_auc_score, roc_curve ) def scores(y_true): for (path, dirnames, fnames) in os.walk("results/"): for f in fnames: if path[-1] == "1" and f.endswith("npy"): y_pred = np.load(os.path.join(path, f)).flatten() print(path) tp = np.sum(np.logical_and(y_pred >= 0.5, y_true == 1)) tn = np.sum(np.logical_and(y_pred < 0.5, y_true == 0)) fp = np.sum(np.logical_and(y_pred >= 0.5, y_true == 0)) fn = np.sum(np.logical_and(y_pred < 0.5, y_true == 1)) precision = tp / (tp + fp) recall = tp / (tp + fn) accuracy = (tp + tn) / len(y_true) f1_score = 2 * (precision * recall) / (precision + recall) f05_score = (1 + 0.5 ** 2) * (precision * recall) / (0.5 ** 2 * precision + recall) print(" precision:", precision) print(" recall:", recall) print(" accuracy:", accuracy) print(" f1 score:", f1_score) print(" f0.5 score:", f05_score) def plot_clf(): plt.clf() def plot_save(path, dpi=300): fig = plt.gcf() fig.set_size_inches(18.5, 10.5) fig.savefig(path, dpi=dpi) plt.close() def plot_legend(): plt.legend() def plot_precision_recall(y, y_pred, label=""): y = y.flatten() y_pred = y_pred.flatten() precision, recall, thresholds = precision_recall_curve(y, y_pred) # decreasing_max_precision = np.maximum.accumulate(precision)[::-1] # fig, ax = plt.subplots(1, 1) # ax.hold(True) score = fbeta_score(y, y_pred.round(), 1) # prc_ap = average_precision_score(y, y_pred) plt.plot(recall, precision, '--', label=f"{label} - {score:5.4}") # ax.step(recall[::-1], decreasing_max_precision, '-r') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.0]) plt.xlim([0.0, 1.0]) def plot_pr_curves(y, y_preds, label=""): for idx, y in enumerate(y_preds): y = y.flatten() y_pred = y_pred.flatten() precision, recall, thresholds = precision_recall_curve(y, y_pred) score = fbeta_score(y, y_pred.round(), 1) plt.plot(recall, precision, '--', label=f"{idx}{label} - {score:5.4}") plt.xlabel('Recall') plt.ylabel('Precision') def score_model(y, prediction): y = y.flatten() y_pred = prediction.flatten() precision, recall, thresholds = precision_recall_curve(y, y_pred) print(classification_report(y, y_pred.round())) print("Area under PR curve", auc(recall, precision)) print("roc auc score", roc_auc_score(y, y_pred)) print("F1 Score", fbeta_score(y, y_pred.round(), 1)) print("F0.5 Score", fbeta_score(y, y_pred.round(), 0.5)) def plot_roc_curve(mask, prediction, label=""): y = mask.flatten() y_pred = prediction.flatten() fpr, tpr, thresholds = roc_curve(y, y_pred) roc_auc = auc(fpr, tpr) plt.xscale('log') plt.plot(fpr, tpr, label=f"{label} - {roc_auc:5.4}") plt.ylim([0.0, 1.0]) plt.xlim([0.0, 1.0]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') def plot_confusion_matrix(y_true, y_pred, path, normalize=False, classes=("benign", "malicious"), title='Confusion matrix', cmap="Blues", dpi=600): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ plt.clf() cm = confusion_matrix(y_true, y_pred) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) thresh = cm.max() / 2. for i, j in ((i, j) for i in range(cm.shape[0]) for j in range(cm.shape[1])): plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') plt.savefig(path, dpi=dpi) plt.close() def plot_training_curve(logs, key, path, dpi=600): plt.clf() plt.plot(logs[f"{key}acc"], label="accuracy") plt.plot(logs[f"{key}f1_score"], label="f1_score") plt.plot(logs[f"val_{key}acc"], label="val_accuracy") # plt.plot(logs[f"val_{key}f1_score"], label="val_f1_score") plt.xlabel('epoch') plt.ylabel('percentage') plt.legend() plt.savefig(path, dpi=dpi) plt.close() def plot_embedding(domain_embedding, labels, path, dpi=600, method="svd"): if method == "svd": red = TruncatedSVD(n_components=2) elif method == "tsne": red = TSNE(n_components=2, verbose=2) domain_reduced = red.fit_transform(domain_embedding) print(red.explained_variance_ratio_) # use if draw subset of predictions # idx = np.random.choice(np.arange(len(domain_reduced)), 10000) plt.scatter(domain_reduced[:, 0], domain_reduced[:, 1], c=(labels * (1, 2)).sum(1).astype(int), cmap=plt.cm.plasma, s=3, alpha=0.2) plt.colorbar() plt.savefig(path, dpi=dpi) def plot_model_as(model, path): from keras.utils.vis_utils import plot_model plot_model(model, to_file=path, show_shapes=True, show_layer_names=True)