import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
from matplotlib import pyplot as plt
import os
import pickle
from PIL import Image
from matplotlib.pyplot import imshow
from IPython.display import display
import sklearn
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import StratifiedKFold
import cv2
import random
import keras
from keras import backend as K
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Reshape
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, Conv3D, MaxPooling2D, MaxPooling3D
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
from sklearn import metrics as mt
from keras.models import load_model
from tensorflow.keras.regularizers import l2
filepath = '/Users/princendhlovu/Downloads/brain_tumor_dataset/'
nontumor = os.listdir(filepath + 'no' )
tumor = os.listdir(filepath + 'yes' )
no_std_path = filepath + 'no/'
yes_std_path = filepath + 'yes/'
tumor_images = []
non_tumor_images = []
target = []
h,w = (80,75)
# read the images into the respective arrays
def addImageToArray(label,std_path,imageFilePath,imageArrays):
for pic in imageFilePath:
image_array = cv2.imread(std_path + pic, cv2.IMREAD_GRAYSCALE)
# print(image_array.shape[0]/image_array.shape[1])
image_array = cv2.resize(image_array, (w,h))
imageArrays.append(image_array.flatten())
if(label == 'yes'):
target.append(1)
if(label == 'no'):
target.append(0)
non_tumor_images.clear()
tumor_images.clear()
target.clear()
addImageToArray('yes', yes_std_path,tumor,tumor_images)
addImageToArray('no', no_std_path,nontumor,non_tumor_images)
X_data = non_tumor_images + tumor_images
# print('frequencies',target.astype(np.int).value_counts())
X_data = np.array(X_data)
target = np.array(target)
rstate = np.random.get_state()
np.random.shuffle(X_data)
np.random.set_state(rstate)
np.random.shuffle(target)
print("Images readIn:", len(X_data))
unique, counts = np.unique(target, return_counts=True)
print(np.asarray((unique, counts)).T)
# Class distributions
labels_df = pd.DataFrame(data=target)
labels_df.hist()
plt.title("Class Distribution")
When radiologists are diagnosing patients who might have brain tumors, they have to minimize the risk of "missing" brain tumors which might give their clients false confidence causing them to abandon their medication and other necessary therapy sessions.In most cases if the tumor is unnoticed, it might be malignant and spread to other parts of the brain or body causing more damage. In this dataset were are trying to minimize the number of false negatives where the model declares an image tumor free when it is not as this is very detrimental to the present and future health or wellbeing of a person. We also do not want to diagnose people who are cancer free with tumors that they do not have, that is we want to minimize the number of false positives as they can cause healthy people to take medication that they do not need leading to health complications, substantial stress and even fatalities due to the irrelevant therapy and medication that they would be receiving. False positives are not desirable but the presence of false negatives has far wide reaching effects that cannot be mitigated and often times reduce the human lifespan by a considerable amount. Therefor were are going to use recall as an evaluation metric for our CNN model. Recall gives us a good picture of how good our model is at predicting true positives and reducing the number of false negatives and it is given by:
$$
\begin{align}
Recall = \frac{True Positives}{True Positives + False Negatives}
\end{align}
$$
We had a small dataset of 253 images and we used image transformations to increase to 1253 images. This data expansion helps the model to experience wide variations of image orientations that would make it good at generalizing to new data that it has not seen before.It is unlikely that our model will see images or scans that have been taken in a certain orientation so we are going to divide our data using the StratifiedKFold with 5 folds. The images would be shuffled and and distributed among the 5 folds to expose our model. This would help reduce the variances during the classification process.Then we would evaluate our model with an 80 - 20 split of training and testing respectively.
#create new sample data
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False, #adding to each pixel
samplewise_std_normalization=False, #adding to each picture
zca_whitening=False,
rotation_range=5, # used, Int. Degree range for random rotations.
width_shift_range=0.1, # used, Float (fraction of total width). Range for random horizontal shifts.
height_shift_range=0.1, # used, Float (fraction of total height). Range for random vertical shifts.
shear_range=0., # Float. Shear Intensity (Shear angle in counter-clockwise direction as radians)
zoom_range=0.,
channel_shift_range=0.,
fill_mode='nearest',
cval=0.,
horizontal_flip=True,
vertical_flip=True,
rescale=None)
# X_data = X_data/255 - 0.5
#expand dimensions
X_data = np.expand_dims(X_data.reshape((-1,h, w)), axis=3)
datagen.fit(X_data)
new_images = datagen.flow(X_data, target, batch_size=1)
print('new images',len(new_images))
print('original data',len(X_data))
print(X_data.shape)
# for tmp in new_images:
# imshow(tmp[0].squeeze(),cmap='bone')
# break
j = 0
for img in new_images:
if j == 1000:
break
j += 1
if j % 100 == 0:
print(f'Appended {j} images')
X_data = np.vstack((X_data,np.expand_dims(img[0][0].squeeze().reshape((-1,h,w)), axis=3)))
target = np.append(target,img[1][0])
classes = {0:"No Tumor", 1:"Tumor"}
# classes = {0:"Daisy", 1:"Dandelion", 2:"Rose", 3:"Sunflower", 4:"Tulip"}
# select random images to visualize
random.seed(1)
plt.figure(figsize=(2.0 * 7, 2.3 * 3))
plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
random_sample = random.sample(range(0,X_data.shape[0]), k=21)
for n,i in enumerate(random_sample):
plt.subplot(3,7, n + 1)
imshow(X_data[i].reshape((h,w)))
plt.title(classes[target[i]], size=12)
plt.xticks(())
plt.yticks(())
# Class distributions
labels_df = pd.DataFrame(data=target)
labels_df.hist()
plt.title("Class Distribution")
unique, counts = np.unique(target, return_counts=True)
print(np.asarray((unique, counts)).T)
#We could not use the recall from the keras model so, i used this one from:
#https://datascience.stackexchange.com/questions/45165/how-to-get-accuracy-f1-precision-and-recall-for-a-keras-model
def recall_m(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
kfold_data = StratifiedKFold(n_splits=5, random_state=42, shuffle=True).split(X_data, target)
X_train, X_test, y_train, y_test = train_test_split(X_data, target, test_size= 0.20, random_state=42)
y_cat = to_categorical(target)
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)
NUM_CLASSES = 2
%%time
# now lets use the LeNet architecture with batch norm
# We will also use ReLU where approriate and drop out
from tensorflow.keras.layers import Add, Input
from tensorflow.keras.layers import average, concatenate
from tensorflow.keras.models import Model
l2_lambda = 0.000001
input_holder = Input(shape=(h, w, 1))
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (h,w,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x_split = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=64,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x_split)
x = Conv2D(filters=64,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
# now add back in the split layer, x_split (residual added in)
x = Add()([x, x_split])
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Flatten()(x)
x = Dropout(0.25)(x)
x = Dense(256)(x)
x = Activation("relu")(x)
x = Dropout(0.5)(x)
x = Dense(NUM_CLASSES)(x)
x = Activation('softmax')(x)
resnet1 = Model(inputs=input_holder,outputs=x)
resnet1.summary()
resnet1.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='adam', # 'adadelta' 'rmsprop'
metrics=[recall_m])
cnn3 = []
for i, (train_data, test_data) in enumerate(kfold_data):
res_history1 = resnet1.fit(X_data[train_data], y_cat[train_data], batch_size=16,
epochs=50, verbose=1,
validation_data=(X_data[test_data],y_cat[test_data]),
callbacks=[EarlyStopping(monitor='val_loss', patience=4)]
)
cnn3.append(res_history1)
%matplotlib inline
plt.figure(figsize=(10,4))
plt.subplot(2,2,1)
plt.plot(res_history1.history['recall_m'])
plt.ylabel('Recall %')
plt.title('Training')
plt.subplot(2,2,2)
plt.plot(res_history1.history['val_recall_m'])
plt.title('Validation')
plt.subplot(2,2,3)
plt.plot(res_history1.history['loss'])
plt.ylabel('Training Loss')
plt.xlabel('epochs')
plt.subplot(2,2,4)
plt.plot(res_history1.history['val_loss'])
plt.xlabel('epochs')
With the following implementation, we will be using nadam as the optimizer
kfold_data = StratifiedKFold(n_splits=5, random_state=42, shuffle=True).split(X_data, target)
X_train, X_test, y_train, y_test = train_test_split(X_data, target, test_size= 0.20, random_state=42)
y_cat = to_categorical(target)
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)
NUM_CLASSES = 2
%%time
input_holder = Input(shape=(h, w, 1))
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (h,w,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x_split = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=64,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x_split)
x = Conv2D(filters=64,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
# now add back in the split layer, x_split (residual added in)
x = Add()([x, x_split])
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Flatten()(x)
x = Dropout(0.25)(x)
x = Dense(256)(x)
x = Activation("relu")(x)
x = Dropout(0.5)(x)
x = Dense(NUM_CLASSES)(x)
x = Activation('softmax')(x)
resnet2 = Model(inputs=input_holder,outputs=x)
resnet2.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='nadam', # 'adadelta' 'rmsprop'
metrics=[recall_m])
cnn5 = []
for i, (train_data, test_data) in enumerate(kfold_data):
res_history2 = resnet2.fit(X_data[train_data], y_cat[train_data], batch_size=16,
epochs=50, verbose=1,
validation_data=(X_data[test_data],y_cat[test_data]),
callbacks=[EarlyStopping(monitor='val_loss', patience=4)]
)
cnn5.append(res_history2)
plt.figure(figsize=(10,4))
plt.subplot(2,2,1)
plt.plot(res_history2.history['recall_m'])
plt.ylabel('Recall %')
plt.title('Training')
plt.subplot(2,2,2)
plt.plot(res_history2.history['val_recall_m'])
plt.title('Validation')
plt.subplot(2,2,3)
plt.plot(res_history2.history['loss'])
plt.ylabel('Training Loss')
plt.xlabel('epochs')
plt.subplot(2,2,4)
plt.plot(res_history2.history['val_loss'])
plt.xlabel('epochs')
'First ResNet Architecture AUC & ROC'
y_pred_resnet = np.argmax(resnet1.predict(X_test), axis=1)
# np.argmax(alexnet.predict(X_test), axis=1)
#false positive and true positive rates using roc
fpr_resnet, tpr_resnet, thresholds_resnet = mt.roc_curve(np.argmax(y_test_cat, axis=1), y_pred_resnet)
#area under the curve
auc_resnet = auc(fpr_resnet, tpr_resnet)
'Second ResNet Architecture AUC & ROC'
y_pred_resnet2 = np.argmax(resnet2.predict(X_test), axis=1)
#false positive and true positive rates using roc
fpr_resnet1, tpr_resnet1, thresholds_resnet1 = mt.roc_curve(np.argmax(y_test_cat, axis=1), y_pred_resnet2)
#area under the curve
auc_resnet1 = auc(fpr_resnet1, tpr_resnet1)
plt.figure(figsize=(12,12))
#plot halfway line
plt.plot([0,1], [0,1], 'k--')
#plot for CNN
plt.plot(fpr_resnet, tpr_resnet,label='ResNet1 (area = {:.3f})'.format(auc_resnet))
#plot for CNN1
plt.plot(fpr_resnet1, tpr_resnet1,label='ResNet2 (area = {:.3f})'.format(auc_resnet1))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Resnet1 vs Resnet2')
plt.legend(loc='best')
plt.show()
Our first implementation of ResNet is closer to the top left of the graph and has a larger area under the curve (AUC). The AUC of the first implementation is 0.914, which is about 0.8 higher than the second implementation's AUC. Thus, we can conclude that the first ResNet is the better implementation.
print('0:',y_test_cat.shape)
yn = np.argmax(y_test_cat, axis=-1)
print('1:',yn.shape)
contingency = mcnemar_table(y_target=yn ,
y_model1=y_pred_resnet,
y_model2=y_pred_resnet2)
contingency
from mlxtend.plotting import checkerboard_plot
brd = checkerboard_plot(contingency,
figsize=(3, 3),
fmt='%d',
col_labels=['ResNet2 right', 'ResNet2 wrong'],
row_labels=['ResNet1 right', 'ResNet1 wrong'])
plt.show()
contingency2 = np.array(contingency)
chiSq, p = mcnemar(ary=contingency2, corrected=True)
print("Chi Squared: ", chiSq)
print("P-val: ", p)
At a significance level of 0.05, our p-value of 0.008 is less than the siginificance level. Therefor we can reject our null hypothesis for the 2 models. We can conclude that the models are significantly different and that the ResNet1 perfomes better than the ResNet2.
kfold_data = StratifiedKFold(n_splits=5, random_state=42, shuffle=True).split(X_data, target)
X_train, X_test, y_train, y_test = train_test_split(X_data, target, test_size= 0.20, random_state=42)
y_cat = to_categorical(target)
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)
NUM_CLASSES = 2
# Xception style architecture
from tensorflow.keras.layers import SeparableConv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Add, Input
from tensorflow.keras.layers import average, concatenate
from tensorflow.keras.models import Model
l2_lambda = 0.000001
input_holder = Input(shape=(h, w, 1))
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (h,w,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x_split = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = SeparableConv2D(filters=32,
input_shape = (h,w,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
depth_multiplier = 1, # controls output channels
data_format="channels_last")(x_split)
x_split = Add()([x, x_split])
x = SeparableConv2D(filters=32,
input_shape = (h,w,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
depth_multiplier = 1, # controls output channels
data_format="channels_last")(x_split)
x_split = Add()([x, x_split])
x = Activation("relu")(x_split)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Flatten()(x)
x = Dropout(0.25)(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(NUM_CLASSES,activation="softmax")(x)
xception1 = Model(inputs=input_holder,outputs=x)
xception1.summary()
# speed up by training by not using augmentation, perhaps there are faster ways??
xception1.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='adam', # 'adadelta' 'rmsprop'
metrics=[recall_m])
cnn4 = []
for i, (train_data, test_data) in enumerate(kfold_data):
x_history1 = xception1.fit(X_data[train_data], y_cat[train_data], batch_size=16,
epochs=50, verbose=1,
validation_data=(X_data[test_data],y_cat[test_data]),
callbacks=[EarlyStopping(monitor='val_loss', patience=4)]
)
cnn4.append(x_history1)
plt.figure(figsize=(10,4))
plt.subplot(2,2,1)
plt.plot(x_history1.history['recall_m'])
plt.ylabel('Recall %')
plt.title('Training')
plt.subplot(2,2,2)
plt.plot(x_history1.history['val_recall_m'])
plt.title('Validation')
plt.subplot(2,2,3)
plt.plot(x_history1.history['loss'])
plt.ylabel('Training Loss')
plt.xlabel('epochs')
plt.subplot(2,2,4)
plt.plot(x_history1.history['val_loss'])
plt.xlabel('epochs')
With the following implementation, we will be using nadam as the optimizer
kfold_data = StratifiedKFold(n_splits=5, random_state=42, shuffle=True).split(X_data, target)
X_train, X_test, y_train, y_test = train_test_split(X_data, target, test_size= 0.20, random_state=42)
y_cat = to_categorical(target)
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)
NUM_CLASSES = 2
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (h,w,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x_split = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = SeparableConv2D(filters=32,
input_shape = (h,w,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
depth_multiplier = 1, # controls output channels
data_format="channels_last")(x_split)
x_split = Add()([x, x_split])
x = SeparableConv2D(filters=32,
input_shape = (h,w,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
depth_multiplier = 1, # controls output channels
data_format="channels_last")(x_split)
x_split = Add()([x, x_split])
x = Activation("relu")(x_split)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Flatten()(x)
x = Dropout(0.25)(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(NUM_CLASSES,activation="softmax")(x)
xception2 = Model(inputs=input_holder,outputs=x)
# speed up by training by not using augmentation, perhaps there are faster ways??
xception2.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='nadam', # 'adadelta' 'rmsprop'
metrics=[recall_m])
cnn6 = []
for i, (train_data, test_data) in enumerate(kfold_data):
x_history2 = xception2.fit(X_data[train_data], y_cat[train_data], batch_size=8,
epochs=50, verbose=1,
validation_data=(X_data[test_data],y_cat[test_data]),
callbacks=[EarlyStopping(monitor='val_loss', patience=4)]
)
cnn6.append(x_history2)
plt.figure(figsize=(10,4))
plt.subplot(2,2,1)
plt.plot(x_history2.history['recall_m'])
plt.ylabel('Recall %')
plt.title('Training')
plt.subplot(2,2,2)
plt.plot(x_history2.history['val_recall_m'])
plt.title('Validation')
plt.subplot(2,2,3)
plt.plot(x_history2.history['loss'])
plt.ylabel('Training Loss')
plt.xlabel('epochs')
plt.subplot(2,2,4)
plt.plot(x_history2.history['val_loss'])
plt.xlabel('epochs')
'First Xception Architecture AUC & ROC'
y_pred_xception = np.argmax(xception1.predict(X_test), axis=1)
# np.argmax(alexnet.predict(X_test), axis=1)
#false positive and true positive rates using roc
fpr_xception, tpr_xception, thresholds_xception = mt.roc_curve(np.argmax(y_test_cat, axis=1), y_pred_xception)
#area under the curve
auc_xception = auc(fpr_xception, tpr_xception)
'Second Xception Architecture AUC & ROC'
y_pred_xception1 = np.argmax(xception2.predict(X_test), axis=1)
#false positive and true positive rates using roc
fpr_xception1, tpr_xception1, thresholds_xception1 = mt.roc_curve(np.argmax(y_test_cat, axis=1), y_pred_xception1)
#area under the curve
auc_xception1 = auc(fpr_xception1, tpr_xception1)
plt.figure(figsize=(12,12))
#plot halfway line
plt.plot([0,1], [0,1], 'k--')
#plot for CNN
plt.plot(fpr_xception, tpr_xception,label='xception1 (area = {:.3f})'.format(auc_xception))
#plot for CNN1
plt.plot(fpr_xception1, tpr_xception1,label='xception2 (area = {:.3f})'.format(auc_xception1))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('xception1 vs xception2')
plt.legend(loc='best')
plt.show()
Our first implementation of the Xception architecture is better than the second implementation as shown by their AUC's. The first implementation has a AUC of 0.811, which is about 0.05 higher than the second implementation.
print('0:',y_test_cat.shape)
yn = np.argmax(y_test_cat, axis=-1)
print('1:',yn.shape)
contingency = mcnemar_table(y_target=yn ,
y_model1=y_pred_xception,
y_model2=y_pred_xception1)
contingency
from mlxtend.plotting import checkerboard_plot
brd = checkerboard_plot(contingency,
figsize=(3, 3),
fmt='%d',
col_labels=['Xception2 right', 'Xception2 wrong'],
row_labels=['Xception1 right', 'Xception1 wrong'])
plt.show()
contingency2 = np.array(contingency)
chiSq, p = mcnemar(ary=contingency2, corrected=True)
print("Chi Squared: ", chiSq)
print("P-val: ", p)
At a significance level of 0..05, our p-value of 0.21592493894013678 is greaeer than the siginificance level. Therefore we can accept our null hypothesis for the 2 models. We can conclude that these two models are similar.
from mlxtend.evaluate import mcnemar_table, mcnemar
print('0:',y_test_cat.shape)
yn = np.argmax(y_test_cat, axis=-1)
print('1:',yn.shape)
contingency = mcnemar_table(y_target=yn ,
y_model1=y_pred_resnet,
y_model2=y_pred_xception)
contingency
from mlxtend.plotting import checkerboard_plot
brd = checkerboard_plot(contingency,
figsize=(3, 3),
fmt='%d',
row_labels=['ResNet1 right', 'ResNet1 wrong'],
col_labels=['Xception1 right', 'Xception1 wrong'])
plt.show()
contingency2 = np.array(contingency)
chiSq, p = mcnemar(ary=contingency2, corrected=True)
print("Chi Squared: ", chiSq)
print("P-val: ", p)
At a significance level of 0.05, our p-value of 0.0007 is less than the siginificance level. Therefore we reject our null hypothesis for the 2 models. We can conclude that the models are significantly different and that the ResNet performs better than the Xception architecture.
flattened_train_data = np.asarray([x.flatten() for x in X_train ])
flattened_test_data = np.asarray([x.flatten() for x in X_test])
flattened_train_data.shape
mlp = Sequential()
mlp.add( Dense(input_dim=flattened_train_data.shape[1], units=30, activation='relu') )
mlp.add( Dense(units=15, activation='relu') )
mlp.add( Dense(2) )
mlp.add( Activation('softmax') )
mlp.compile(loss='mean_squared_error',
optimizer='rmsprop',
metrics=['Precision'])
mlp.fit(flattened_train_data, y_train_cat,
batch_size=32, epochs=150,
shuffle=True, verbose=0)
'ROC and AUC for RESNET'
y_pred_cnn = np.argmax(resnet1.predict(X_test), axis=1)
#false positve and true postive rates using roc
fpr_cnn, tpr_cnn, thresholds_cnn = roc_curve(np.argmax(y_test_cat,axis=1), y_pred_cnn)
#area under the curve
auc_cnn = auc(fpr_cnn, tpr_cnn)
'ROC and AUC for MLP'
y_pred_mlp = np.argmax(mlp.predict(flattened_test_data), axis=1)
#false positve and true postive rates using roc
fpr_mlp, tpr_mlp, thresholds_mlp = roc_curve(np.argmax(y_test_cat, axis=1), y_pred_mlp)
#area under the curve
auc_mlp = auc(fpr_mlp, tpr_mlp)
plt.figure(figsize=(12,12))
#plot halfway line
plt.plot([0,1], [0,1], 'k--')
#plot model 1 ROC
plt.plot(fpr_cnn, tpr_cnn, label='Resnet (area = {:.3f})'.format(auc_cnn))
#plot model 2 ROC
plt.plot(fpr_mlp, tpr_mlp, label='MLP (area = {:.3f})'.format(auc_mlp))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('CNN vs MLP')
plt.legend(loc='best')
plt.show()
We can conclude that our ResNet implementation performed better than the Multi Layer Perceptron (mlp). The ROC curve of the ResNet approaches the top left of the graph, while the MLP stays along the diagonal. The ResNet's AUC is 0.914, which is much better than the MLP's AUC of 0.5
from mlxtend.evaluate import mcnemar_table, mcnemar
print('0:',y_test_cat.shape)
yn = np.argmax(y_test_cat, axis=-1)
print('1:',yn.shape)
print('2:',y_pred_cnn.shape)
print('3:',y_pred_mlp.shape )
contingency = mcnemar_table(y_target=yn ,
y_model1=y_pred_cnn,
y_model2=y_pred_mlp)
contingency
from mlxtend.plotting import checkerboard_plot
brd = checkerboard_plot(contingency,
figsize=(3, 3),
fmt='%d',
col_labels=['MLP right', 'MLP wrong'],
row_labels=['CNN right', 'CNN wrong'])
plt.show()
contingency2 = np.array(contingency)
chiSq, p = mcnemar(ary=contingency2, corrected=True)
print("Chi Squared: ", chiSq)
print("P-val: ", p)
At a significance level of 0..05, our p-value of 3.8-16 is less than the siginificance level. Therefor we can reject our null hypothesis for the 2 models. We can conclude that the models are significantly different and that the CNN perfomes better than the MLP.
We will be implementing another ResNet, but this time we will add back into the residual layer twice and then compare the performance to our first implementation of ResNet
kfold_data = StratifiedKFold(n_splits=5, random_state=42, shuffle=True).split(X_data, target)
X_train, X_test, y_train, y_test = train_test_split(X_data, target, test_size= 0.20, random_state=42)
y_cat = to_categorical(target)
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)
NUM_CLASSES = 2
%%time
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (h,w,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x_split = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=64,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x_split)
x = Conv2D(filters=64,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
# now add back in the split layer, x_split (residual added in)
x = Add()([x, x_split])
x = Add()([x, x_split])
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Flatten()(x)
x = Dropout(0.25)(x)
x = Dense(256)(x)
x = Activation("relu")(x)
x = Dropout(0.5)(x)
x = Dense(NUM_CLASSES)(x)
x = Activation('softmax')(x)
resnet3 = Model(inputs=input_holder,outputs=x)
resnet3.summary()
resnet3.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='adam', # 'adadelta' 'rmsprop'
metrics=[recall_m])
cnn10 = []
for i, (train_data, test_data) in enumerate(kfold_data):
res_history3 = resnet3.fit(X_data[train_data], y_cat[train_data], batch_size=16,
epochs=50, verbose=1,
validation_data=(X_data[test_data],y_cat[test_data]),
callbacks=[EarlyStopping(monitor='val_loss', patience=4)]
)
cnn10.append(res_history3)
plt.figure(figsize=(10,4))
plt.subplot(2,2,1)
plt.plot(res_history3.history['recall_m'])
plt.ylabel('Recall %')
plt.title('Training')
plt.subplot(2,2,2)
plt.plot(res_history3.history['val_recall_m'])
plt.title('Validation')
plt.subplot(2,2,3)
plt.plot(res_history3.history['loss'])
plt.ylabel('Training Loss')
plt.xlabel('epochs')
plt.subplot(2,2,4)
plt.plot(res_history3.history['val_loss'])
plt.xlabel('epochs')
'First ResNet Architecture AUC & ROC'
y_pred_resnet = np.argmax(resnet1.predict(X_test), axis=1)
# np.argmax(alexnet.predict(X_test), axis=1)
#false positive and true positive rates using roc
fpr_resnet, tpr_resnet, thresholds_resnet = mt.roc_curve(np.argmax(y_test_cat, axis=1), y_pred_resnet)
#area under the curve
auc_resnet = auc(fpr_resnet, tpr_resnet)
'New ResNet Architecture AUC & ROC'
y_pred_resnet2 = np.argmax(resnet3.predict(X_test), axis=1)
#false positive and true positive rates using roc
fpr_resnet1, tpr_resnet1, thresholds_resnet1 = mt.roc_curve(np.argmax(y_test_cat, axis=1), y_pred_resnet2)
#area under the curve
auc_resnet1 = auc(fpr_resnet1, tpr_resnet1)
plt.figure(figsize=(12,12))
#plot halfway line
plt.plot([0,1], [0,1], 'k--')
#plot for CNN
plt.plot(fpr_resnet, tpr_resnet,label='ResNet1 (area = {:.3f})'.format(auc_resnet))
#plot for CNN1
plt.plot(fpr_resnet1, tpr_resnet1,label='ResNet_New (area = {:.3f})'.format(auc_resnet1))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Resnet1 vs Resnet_New')
plt.legend(loc='best')
plt.show()
As shown by the graph above, the new ResNet approaches the top left corner of the graph much faster than our original implementation. The new ResNet also has a larger AUC of 0.944, allowing us to conclude that it is a slighlty better implementation.
print('0:',y_test_cat.shape)
yn = np.argmax(y_test_cat, axis=-1)
print('1:',yn.shape)
contingency = mcnemar_table(y_target=yn ,
y_model1=y_pred_resnet,
y_model2=y_pred_resnet2)
contingency
brd = checkerboard_plot(contingency,
figsize=(3, 3),
fmt='%d',
col_labels=['ResNet_New right', 'ResNet_New wrong'],
row_labels=['ResNet1 right', 'ResNet1 wrong'])
plt.show()
contingency2 = np.array(contingency)
chiSq, p = mcnemar(ary=contingency2, corrected=True)
print("Chi Squared: ", chiSq)
print("P-val: ", p)
However, at a significance level of 0.05, our p-value of 1.0 is greater than the siginificance level. Therefor we can accept our null hypothesis for the 2 models. With a pvalue of 1, we can conclude that these models are almost no differences.