#!/usr/bin/python import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import scikitplot as skplt from sklearn.preprocessing import label_binarize from sklearn.preprocessing import LabelEncoder from sklearn.metrics import auc, confusion_matrix from sklearn.metrics import precision_recall_curve from sklearn.metrics import average_precision_score from sklearn.metrics import classification_report flags = tf.app.flags flags.DEFINE_string('softmax', None, 'The softmax.npz file contained labels and probas') flags.DEFINE_string('dinfo', None, 'The dinfo.npz file') flags.DEFINE_integer('chunks', 4, 'The number of plots to produce') FLAGS = flags.FLAGS def plot_classification_report(classification_report, title='Classification report ', cmap='RdBu'): ''' Plot scikit-learn classification report. Extension based on https://stackoverflow.com/a/31689645/395857 ''' lines = classification_report.split('\n') classes = [] plotMat = [] support = [] class_names = [] for line in lines[2 : (len(lines) - 2)]: t = line.strip().split() if len(t) < 2: continue classes.append(t[0]) v = [float(x) for x in t[1: len(t) - 1]] support.append(int(t[-1])) class_names.append(t[0]) print(v) plotMat.append(v) print('plotMat: {0}'.format(plotMat)) print('support: {0}'.format(support)) xlabel = 'Metrics' ylabel = 'Classes' xticklabels = ['Precision', 'Recall', 'F1-score'] yticklabels = ['{0} ({1})'.format(class_names[idx], sup) for idx, sup in enumerate(support)] figure_width = 25 figure_height = len(class_names) + 7 correct_orientation = False heatmap(np.array(plotMat), title, xlabel, ylabel, xticklabels, yticklabels, figure_width, figure_height, correct_orientation, cmap=cmap) softmax = np.load(FLAGS.softmax) dinfo = np.load(FLAGS.dinfo) class_names=dinfo['classes'] y_true = softmax['labels'] y_proba = softmax['predictions'] y_true_sparse = label_binarize(y_true, classes=np.unique(y_true)) y_pred = np.argmax(y_proba, axis=1) cl_report= classification_report(y_true, y_pred, target_names=class_names, labels=np.arange(len(class_names))) print(cl_report) cm = confusion_matrix(y_true, y_pred, labels=np.arange(len(class_names))) print(cm) def top_wrong(cm, N=5): a=cm N = 150 idx = np.argsort(a.ravel())[-N:][::-1] #single slicing: `[:N-2:-1]` topN_val = a.ravel()[idx] row_col = np.c_[np.unravel_index(idx, a.shape)] return row_col #print(top_wrong(cm)) for idxs in top_wrong(cm): if idxs[0] != idxs[1]: print(class_names[idxs[0]],"\t",class_names[idxs[1]],"\t",cm[idxs[0], idxs[1]]) benign_class = np.where(class_names=='benign') benign_pages, _ = np.where(y_true == benign_class) cnt=0 cnt9=0 for benign_page in benign_pages: guess = y_pred[benign_page] if guess != benign_class: softmax_val = y_proba[benign_page][guess] cnt +=1 if softmax_val > 0.95: print("B: " + class_names[guess] + "\t" + str(softmax_val)) cnt9 += 1 print('We have ' + str(cnt9) + ' false-positives with softmax > 0.95 out of ' +str(cnt) + '/' + str(benign_pages.size))