machine learning – MNIST implementing with FNN

import struct
import random
import numpy as np

'''
----- FNN configuration -----
input data: MNIST dataset, images of hand written digits from 0 to 9
output: prediction of digits 0 to 9

input layer: 784 (from 28 * 28 images)
hidden layer: 1024
output layer: 10 (prediction of digit 0~9)

activation function for hidden layer: relu
activation function for output layer: softmax

loss function: cross-entropy loss
optimizer: mini-batch gradient descent

'''


def read_MNIST_image(filename="train-images.idx3-ubyte"):
    file = open(filename, 'rb')
    magic_number = struct.unpack('>I', file.read(4))[0]
    image_number = struct.unpack('>I', file.read(4))[0]
    row = struct.unpack('>I', file.read(4))[0]
    col = struct.unpack('>I', file.read(4))[0]
    piexl_number = row*col
    print("Image: %d, Size: %dx%d"%(image_number,row,col))

    image_set = np.zeros((image_number,row,col))
    
    for idx in range(image_number):
        img = np.frombuffer(file.read(piexl_number), dtype=np.uint8, count=piexl_number)
        image_set[idx] = img.reshape((row,col))
        
    return image_set


def read_MNIST_label(filename="train-labels.idx1-ubyte"):
    file = open(filename, 'rb')
    magic_number = struct.unpack('>I', file.read(4))[0]
    label_number = struct.unpack('>I', file.read(4))[0]
    print("Label: %d"%label_number)

    label_set = np.zeros((label_number,1), dtype=np.uint8)
    
    for idx in range(label_number):
        label_set[idx] = struct.unpack('>B', file.read(1))

    return label_set

cache = {}
grads = {}

def ReLU(array): #if x<0 y=0 ; if x>0 y=x 
    value=np.multiply( np.add(array,np.abs(array)),0.5 )
    return value


def D_ReLU(array): #if x<0 y=0 ; if x>0 y=1 
    value=np.multiply( np.add(array,np.abs(array)),0.5 )
    value=np.multiply(value,np.reciprocal(array+1e-7))
    return value


def Onehot_encode(index, total):
    encode = np.zeros((1,total))
    encode[0,index] = 1
    return encode


def Softmax(array): #e^x/sum(e^x)
    array = array - np.max(array) # avoid overflow
    Output=np.exp(array)
    recipSum=np.reciprocal( np.sum(Output) )
    Output=np.multiply(Output,recipSum)
    return Output

all_image = read_MNIST_image()
all_label = read_MNIST_label()


# Train:5500 Validate:500 Test:not include
# You can select the index to do "cross-validation"
# eg: train = all_image[:5000] + all_image[10000:60000]
#     test  = all_image[5000:10000]

train_image = all_image[:55000]
train_label = all_label[:55000]

test_image = all_image[55000:]
test_label = all_label[55000:]



# initialize NN weight sample from the "standard normal" distribution
layer1W= np.random.randn(784,1024)*0.01
OutputW= np.random.randn(1024,10)*0.01
layer1b= np.random.randn(1024,1)*0.01
Outputb= np.random.randn(10,1)*0.01

params = {
            "W1": layer1W.T,
            "b1": layer1b,
            "W2": OutputW.T,
            "b2": Outputb
        }

def feed_forward(x):
        '''
            y = σ(wX + b)
        '''
        cache["X"] = x # 784 * 128
        
        cache["Z1"] = np.matmul(params["W1"], cache["X"].T)
        cache["Z2"] = np.matmul(params["W1"], cache["X"].T)
        cache["A1"] = ReLU(cache["Z1"])
        cache["Z2"] = np.matmul(params["W2"], cache["A1"]) + params["b2"]
        cache["A2"] = Softmax(cache["Z2"])
        return cache["A2"]

def back_propagate(y, output):
        '''
            This is the backpropagation algorithm, for calculating the updates
            of the neural network's parameters.

            Note: There is a stability issue that causes warnings. This is 
                  caused  by the dot and multiply operations on the huge arrays.
                  
                  RuntimeWarning: invalid value encountered in true_divide
                  RuntimeWarning: overflow encountered in exp
                  RuntimeWarning: overflow encountered in square
        '''
        current_batch_size = y.shape[0]
        
        dZ2 = output - y.T
        dW2 = (1./current_batch_size) * np.matmul(dZ2, cache["A1"].T)
        db2 = (1./current_batch_size) * np.sum(dZ2, axis=1, keepdims=True)

        dA1 = np.matmul(params["W2"].T, dZ2)
        dZ1 = dA1 * D_ReLU(cache["Z1"])
        dW1 = (1./current_batch_size) * np.matmul(dZ1, cache["X"])
        db1 = (1./current_batch_size) * np.sum(dZ1, axis=1, keepdims=True)

        grads = {"W1": dW1, "b1": db1, "W2": dW2, "b2": db2}
        return grads


# Iteration
max_iteration = 1000    # hyperparameter 
batch_size = 16       # hyperparameter 
learning_rate = 0.00001  # hyperparameter 

train_size = train_image.shape[0]
for it in range(max_iteration):
    #print("Iteration: %d"%it)

    # Get batch
    batch_index = np.random.choice(train_size, size=batch_size, replace=False)
    batchX = np.zeros((0,784)) # 16 * 784
    batchY = np.zeros((0,10))
    for idx in batch_index:
        batchX = np.vstack( (batchX, train_image[idx].flatten()) )
        batchY = np.vstack( (batchY, Onehot_encode(train_label[idx], 10)) )
    
    
    # Forward propagation
    ''' TODO: YOUR CODE '''
    output = feed_forward(batchX)
    
    # Backpropagation
    ''' TODO: YOUR CODE '''
    
    grad = back_propagate(batchY, output)
    
    for key in params:
            params[key] = params[key] - learning_rate * grad[key]

    

# Validate
test_size = test_image.shape[0]

testX = test_image.reshape(-1,784)
testY = np.zeros((test_size,10))


for num in range(test_size):
    testY[num] = Onehot_encode(test_label[num], 10)

# Forward propagation
''' TODO: YOUR CODE '''

result = feed_forward(testX)

print(testX)

output = np.random.randn(test_size,10)
''' Random guess: you can do better than this !! '''

# Result
predict = np.argmax(result, axis=1)
answer = np.argmax(testY, axis=1)

accuarcy = np.sum(np.equal(predict, answer)/test_size)
print("Accuarcy: %f "%accuarcy)

I am practicing to write a program for MNIST for FNN.

So I learn to write it with some online sample.

But I found that after forward propagation from result = feed_forward(testX)

The output will be like,

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

It seems like the testing data doesn’t do the forward propagation,

but I check for the function called feed_forwardthe weight has changed.

So, I think that feed_forward isn’t correct.

Leave a Comment