Run the Cell to import the packages and data
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
import numpy as np
Data Loading
Run the below cells to load the data
# load dataset
(trainX, trainy), (testX, testy) = fashion_mnist.load_data()
# load train and test dataset
def load_dataset():
# load dataset
(trainX, trainy), (testX, testY) = fashion_mnist.load_data()
# reshape dataset to have a single channel
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))
# one hot encode target values
trainy = to_categorical(trainy)
testY = to_categorical(testY)
return trainX, trainy, testX, testY
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-
datasets/train-labels-idx1-ubyte.gz
32768/29515 [=================================] - 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-
datasets/train-images-idx3-ubyte.gz
26427392/26421880 [==============================] - 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-
datasets/t10k-labels-idx1-ubyte.gz
8192/5148 [===============================================] - 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-
datasets/t10k-images-idx3-ubyte.gz
4423680/4422102 [==============================] - 0s 0us/step
Subset Generation
Perform data split with StratifiedShuffleSplit with following parameters
test_size = 0.08
random_state = seed
Perform train test split with StratifiedShuffleSplit with following parameters
test_size = 0.3
random_state = seed
seed=9
from sklearn.model_selection import StratifiedShuffleSplit
data_split = StratifiedShuffleSplit( test_size=0.08,random_state=seed
)
for train_index, test_index in data_split.split(trainX, trainy):
split_data_92, split_data_8 = trainX[train_index], trainX[test_index]
split_label_92, split_label_8 = trainy[train_index], trainy[test_index]
train_test_split = StratifiedShuffleSplit( test_size=0.3,random_state=seed )
#test_size=0.3 denotes that 30 % of the dataset is used for testing.
Data Splitting
Print the shape of
train_data
train_labels
test_data
test_labels
for train_index, test_index in train_test_split.split(split_data_8,split_label_8):
train_data_70, test_data_30 = split_data_8[train_index],
split_data_8[test_index]
train_label_70, test_label_30 = split_label_8[train_index],
split_label_8[test_index]
train_data = train_data_70 #assigning to variable train_data
train_labels = train_label_70 #assigning to variable train_labels
test_data = test_data_30
test_labels = test_label_30
print('train_data : ', train_data.shape )
print('train_labels : ', train_labels.shape )
print('test_data : ', test_data.shape )
print('test_labels : ', test_labels.shape )
train_data : (3360, 28, 28)
train_labels : (3360,)
test_data : (1440, 28, 28)
test_labels : (1440,)
Normalization
Apply the mean for data with following parameters
axis = (0,1,2)
keepdims=True
Apply the square root for data with following parameters
axis = (0,1,2)
ddof = 1
keepdims = True
Print the shape of
train_data
test_data
# definition of normalization function
def normalize(data, eps=1e-8):
data -=data.mean(axis=(0,1, 2), keepdims=True)
std = np.sqrt(data.var(axis=(0,1, 2), ddof=1, keepdims=True))
std[std < eps] = 1.
data /= std
return data
train_data=train_data.astype('float64')
test_data=test_data.astype('float64')
# calling the function
train_data = normalize(train_data)
test_data = normalize(test_data)
# prints the shape of train data and test data
print('train_data: ', train_data.shape )
print('test_data: ', test_data.shape )
train_data: (3360, 28, 28)
test_data: (1440, 28, 28)
ZCA Whitening
Print the shape of
train_data
test_data
# Computing whitening matrix
train_data_flat = train_data.reshape(train_data.shape[0], -1).T
test_data_flat = test_data.reshape(test_data.shape[0], -1).T
print('train_data_flat: ', train_data_flat.shape )
print('test_data_flat: ', test_data_flat.shape )
train_data_flat_t = train_data_flat.T
test_data_flat_t = test_data_flat.T
# Computing whitening matrix
train_data_flat = train_data.reshape(train_data.shape[0], -1).T
test_data_flat = test_data.reshape(test_data.shape[0], -1).T
print('train_data_flat: ', train_data_flat.shape )
print('test_data_flat: ', test_data_flat.shape )
train_data_flat_t = train_data_flat.T
test_data_flat_t = test_data_flat.T
train_data_flat: (784, 3360)
test_data_flat: (784, 1440)
Principle Component Analysis (PCA)
Keep n_components of train_data_pca as size of train_data columns and fit transform
with train_data_flat
Keep n_components of test_data_pca as size of test_data columns and fit transform
with test_data_flat
Print the shape of
train_data_pca
test_data_pca
train_data_pca
from sklearn.decomposition import PCA
# n_components specify the no.of components to keep
train_data_pca
=PCA(n_components=train_data.shape[1]).fit_transform(train_data_flat)
test_data_pca =PCA(n_components=test_data.shape[1]).fit_transform(test_data_flat)
print( train_data_pca.shape )
print( test_data_pca.shape )
train_data_pca = train_data_pca.T
test_data_pca = test_data_pca.T
Singular Value Decomposition (SVD)
Execute the below cells to perform Singular Value Decomposition
from skimage import color
def svdFeatures(input_data):
svdArray_input_data=[]
size = input_data.shape[0]
for i in range (0,size):
img=color.rgb2gray(input_data[i])
U, s, V = np.linalg.svd(img, full_matrices=False);
S=[s[i] for i in range(28)]
svdArray_input_data.append(S)
svdMatrix_input_data=np.matrix(svdArray_input_data)
return svdMatrix_input_data
# apply SVD for train and test data
train_data_svd=svdFeatures(train_data)
test_data_svd=svdFeatures(test_data)
print(train_data_svd.shape)
print(test_data_svd.shape)
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:10: FutureWarning: The
behavior of rgb2gray will change in scikit-image 0.19. Currently, rgb2gray allows
2D grayscale image to be passed as inputs and leaves them unmodified as outputs.
Starting from version 0.19, 2D arrays will be treated as 1D images with 3 channels.
# Remove the CWD from sys.path while we load stuff.
(3360, 28)
(1440, 28)
Support Vector Machine (SVM)
Initialize SVM classifier with following parameters
gamma=.001
probability=True
Train the model with train_data_flat_t and train_labels
Now predict the output with test_data_flat_t
Evaluate the classifier with score from test_data_flat_t and test_labels
Print the predicted score
from sklearn import svm #Creating a svm classifier model
clf = svm.SVC( gamma=.001 , probability=True) #train_data_flat_tModel training
train =
predicted=
score =
print("score",score)
with open('output.txt', 'w') as file:
file.write(str(np.mean(score)))