Neural Network CNN(Convolution)

zi,jl=โˆ‘mโˆ‘nai+m,j+nlโˆ’1(ฯ‰โ€ฒ)m,nl+bi,jlz^l_{i,j} = \sum_m \sum_n{a^{l-1}_{i+m,j+n} (\omega')^l_{m,n} + b^l_{i,j}}
Al=ฯƒ(Zl)=ฯƒ(Alโˆ’1โˆ—Wl+Bl)A^l = \sigma (Z^l) = \sigma ( A^{l-1} * W^l + B^l )

Convolution

K,Kโ€ฒโˆˆRMร—NK(m,n)=Kโ€ฒ(Mโˆ’1โˆ’m,Nโˆ’1โˆ’n)K, K' \isin \R^{M \times N} \quad K(m, n) = K'(M-1-m, N-1-n)
(Iโˆ—K)ij=โˆ‘mโˆ‘nI(i+m,j+n)K(Mโˆ’1โˆ’m,Nโˆ’1โˆ’n)=โˆ‘mโˆ‘nI(i+m,j+n)Kโ€ฒ(m,n)=(IโŠ—Kโ€ฒ)ij\begin{aligned} ( I * K )_{ij} &= \sum_m \sum_n {I(i+m, j+n)K(M-1-m, N-1-n)} \\ &= \sum_m \sum_n {I(i+m, j+n)K'(m, n)} &= (I \otimes K')_{ij} \end{aligned}
info

In the CNN description, most of the picture representing convolution seem to the cross-correlation using Kโ€ฒK'.

Kernel example

K=[123456789]Kโ€ฒ=[987654321]K = \begin{bmatrix}1 & 2 & 3 \\ 4 & 5 & 6 \\ 7 & 8 & 9\end{bmatrix} \quad K' = \begin{bmatrix}9 & 8 & 7 \\ 6 & 5 & 4 \\ 3 & 2 & 1\end{bmatrix}

Back-propagation

ฮดi,jlโ‰กโˆ‚Cโˆ‚zi,jl\delta^l_{i,j} \equiv \frac{\partial C}{\partial z^l_{i,j}}
ฮดi,jl=โˆ‚Cโˆ‚zi,jl=โˆ‘xโˆ‘yโˆ‚Cโˆ‚zx,yl+1โˆ‚zx,yl+1โˆ‚zi,jl=โˆ‘xโˆ‘yฮดx,yl+1(ฯ‰โ€ฒ)iโˆ’x,jโˆ’yl+1ฯƒโ€ฒ(zi,jl)=โˆ‘mโˆ‘nฮดiโˆ’m,jโˆ’nl+1(ฯ‰โ€ฒ)m,nl+1ฯƒโ€ฒ(zi,jl)\begin{aligned} \delta^l_{i,j} = \frac{\partial C}{\partial z^l_{i,j}} &= \sum_x \sum_y { \frac { \partial C } { \partial z^{l+1}_{x,y} } \frac { \partial z^{l+1}_{x,y} } { \partial z^l_{i,j} } } \\ &= \sum_x \sum_y { \delta^{l+1}_{x,y} (\omega')^{l+1}_{i-x,j-y} \sigma' (z^l_{i,j}) } \\ &= \sum_m \sum_n { \delta^{l+1}_{i-m,j-n} (\omega')^{l+1}_{m,n} \sigma' (z^l_{i,j}) } \end{aligned}
โˆ‚Cโˆ‚(ฯ‰โ€ฒ)m,nl=โˆ‘iโˆ‘jโˆ‚Cโˆ‚zi,jlโˆ‚zi,jlโˆ‚(ฯ‰โ€ฒ)m,nl=โˆ‘iโˆ‘jฮดi,jlai+m,j+nlโˆ’1\begin{aligned} \frac {\partial C} {\partial (\omega')^l_{m,n}} &= \sum_i \sum_j { \frac { \partial C } { \partial z^{l}_{i,j} } \frac { \partial z^{l}_{i,j} } { \partial ( \omega' )^l_{m,n} } } \\ &= \sum_i \sum_j { \delta^l_{i,j} a^{l-1}_{i+m, j+n} } \end{aligned}
โˆ‚Cโˆ‚bi,jl=โˆ‘xโˆ‘yโˆ‚Cโˆ‚zx,ylโˆ‚zx,ylโˆ‚bi,jl=ฮดi,jl\begin{aligned} \frac {\partial C} {\partial b^l_{i,j}} &= \sum_x \sum_y { \frac { \partial C } { \partial z^{l}_{x,y} } \frac { \partial z^{l}_{x,y} } { \partial b^l_{i,j} } } \\ & = \delta^l_{i,j} \end{aligned}

TensorFlow example

Training

hong_net_5.py
import tensorflow as tf
import numpy as np
mnist = tf.keras.datasets.mnist
"""
x: gray scale handwritten digits
y: label
x_train[0].shape == (28, 28)
y_train[0] == 5
len(x_train) == 60000
len(x_test) == 10000
"""
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# image, image_row, image_col => (32,32)
x_train = np.pad(x_train, ((0, 0), (2, 2), (2, 2)), mode="constant")
x_test = np.pad(x_test, ((0, 0), (2, 2), (2, 2)), mode="constant")
# Add channels
x_train = x_train.reshape(*x_train.shape, 1)
x_test = x_test.reshape(*x_test.shape, 1)
# Type cast
x_train = x_train.astype("float32")
x_test = x_test.astype("float32")
class HongNet5(tf.keras.Model):
def __init__(self):
super(HongNet5, self).__init__()
"""
filters: Integer, the dimensionality of the output space.
kernel_size: An integer or tuple/list of 2 integers, specifying the
height and width of the 2D convolution window. Can be a single
integer to specify the same value for all spatial dimensions.
activation: Activation function to use.
Input shape: (batch_size, rows, cols, channels)
Output shape: (batch_size, rows, cols, filters)
"""
self.conv1 = tf.keras.layers.Conv2D(
filters=6, kernel_size=5, activation="relu"
)
self.avgPool1 = tf.keras.layers.AveragePooling2D()
self.conv2 = tf.keras.layers.Conv2D(
filters=16, kernel_size=5, activation="relu"
)
self.avgPool2 = tf.keras.layers.AveragePooling2D()
self.conv3 = tf.keras.layers.Conv2D(
filters=120, kernel_size=5, activation="relu"
)
self.flatten = tf.keras.layers.Flatten()
"""
units: Positive integer, dimensionality of the output space.
"""
self.d1 = tf.keras.layers.Dense(84, activation="relu")
self.d2 = tf.keras.layers.Dense(10, activation="softmax")
def call(self, inputs):
x = self.conv1(inputs)
x = self.avgPool1(x)
x = self.conv2(x)
x = self.avgPool2(x)
x = self.conv3(x)
x = self.flatten(x)
x = self.d1(x)
x = self.d2(x)
return x
model = HongNet5()
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"],
)
model.fit(
x_train, y_train, epochs=5, validation_data=(x_test, y_test),
)
model.summary()
score = model.evaluate(x_test, y_test, verbose=1)
print("Test loss:", score[0])
print("Test accuracy: {}%".format(score[1] * 100))
model.save("hong_net_5")
run_model = tf.function(lambda x: model(x))
concrete_func = run_model.get_concrete_function(
tf.TensorSpec((1, *x_train.shape[1:]), x_train.dtype)
)
converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
tflite_model = converter.convert()
open("converted_model.tflite", "wb").write(tflite_model)
np.save("x_test.npy", x_test)
np.save("y_test.npy", y_test)

Inference

inference.py
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time
x_test = np.load("x_test.npy")
y_test = np.load("y_test.npy")
model = tf.keras.models.load_model("hong_net_5")
for _ in range(5):
num = int(np.random.rand(1) * 10000)
img = x_test[num].reshape((32, 32))
ret = model.predict(x_test[num : num + 1])
inference = np.argmax(ret)
print("{}: {}".format(inference, inference == y_test[num]))
plt.imshow(img)
plt.show()
inference_tflite.py
import tflite_runtime.interpreter as tflite
import numpy as np
import matplotlib.pyplot as plt
import time
x_test = np.load("x_test.npy")
y_test = np.load("y_test.npy")
interpreter = tflite.Interpreter(model_path="converted_model.tflite")
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
input_shape = input_details[0]["shape"]
output_details = interpreter.get_output_details()
for _ in range(5):
num = int(np.random.rand(1) * 10000)
img = x_test[num].reshape((32, 32))
interpreter.set_tensor(input_details[0]["index"], x_test[num : num + 1])
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]["index"])
inference = np.argmax(output_data)
print("{}: {}".format(inference, inference == y_test[num]))
plt.imshow(img)
plt.show()

Reference

Last updated on