卷积神经网络示例

使用TensorFlow v2构建卷积神经网络。

这个例子使用低级方法来更好地理解构建卷积神经网络和训练过程背后的所有机制。

  • 作者: Aymeric Damien
  • 原项目: https://github.com/aymericdamien/TensorFlow-Examples/

CNN 概述

MNIST 数据集概述

此示例使用手写数字的MNIST数据集。该数据集包含60,000个用于训练的示例和10,000个用于测试的示例。这些数字已经过尺寸标准化并位于图像中心,图像是固定大小(28x28像素),值为0到255。

在此示例中,每个图像将转换为float32并归一化为[0,1]。

更多信息请查看链接: http://yann.lecun.com/exdb/mnist/

from __future__ import absolute_import, division, print_function

import tensorflow as tf
from tensorflow.keras import Model, layers
import numpy as np
# MNIST 数据集参数
num_classes = 10 # 所有类别(数字 0-9)

# 训练参数
learning_rate = 0.001
training_steps = 200
batch_size = 128
display_step = 10

# 网络参数
conv1_filters = 32 # 第一层卷积层卷积核的数目
conv2_filters = 64 # 第二层卷积层卷积核的数目
fc1_units = 1024 # 第一层全连接层神经元的数目
# 准备MNIST数据
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# 转化为float32
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
# 将图像值从[0,255]归一化到[0,1]
x_train, x_test = x_train / 255., x_test / 255.
# 使用tf.data API对数据进行随机排序和批处理
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size)
# 创建TF模型
class ConvNet(Model):
    # 设置层
    def __init__(self):
        super(ConvNet, self).__init__()
        # 具有32个卷积核且核大小为5的卷积层
        self.conv1 = layers.Conv2D(32, kernel_size=5, activation=tf.nn.relu)
        # 最大池化(下采样),核大小为2,步长为2
        self.maxpool1 = layers.MaxPool2D(2, strides=2)

        # 具有64个卷积核且核大小为3的卷积层
        self.conv2 = layers.Conv2D(64, kernel_size=3, activation=tf.nn.relu)
        # 最大池化(下采样),核大小为2,步长为2
        self.maxpool2 = layers.MaxPool2D(2, strides=2)

        # 将数据展平为一个一维向量,作为全连接层的输入
        self.flatten = layers.Flatten()

        # 全连接层
        self.fc1 = layers.Dense(1024)
        # 应用Dropout(如果is_training为False,则不应用Dropout)
        self.dropout = layers.Dropout(rate=0.5)

        # 输出层,类预测
        self.out = layers.Dense(num_classes)

    # 前向传播
    def call(self, x, is_training=False):
        x = tf.reshape(x, [-1, 28, 28, 1])
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.maxpool2(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.dropout(x, training=is_training)
        x = self.out(x)
        if not is_training:
            # tf 交叉熵接收没有经过softmax的概率输出,所以只有不是训练时才应用softmax
            x = tf.nn.softmax(x)
        return x

# 创建神经网络模型
conv_net = ConvNet()
# 交叉熵损失
# 注意,这将对概率输出应用'softmax'
def cross_entropy_loss(x, y):
    # 将标签转换为int 64 作为tf交叉熵函数的输入
    y = tf.cast(y, tf.int64)
    # 对概率输出应用softmax并计算交叉熵
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=x)
    # 批中的平均损失
    return tf.reduce_mean(loss)

# 准确率评估
def accuracy(y_pred, y_true):
    # 预测类是预测向量(即argmax)分数最高的分量下标
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)

# 随机梯度下降优化器
optimizer = tf.optimizers.Adam(learning_rate)
# 优化过程 
def run_optimization(x, y):
    # 将计算封装在GradientTape中以实现自动微分
    with tf.GradientTape() as g:
        # 前向传播
        pred = conv_net(x, is_training=True)
        # 计算损失
        loss = cross_entropy_loss(pred, y)

    # 要更新的变量,即可训练变量
    trainable_variables = conv_net.trainable_variables

    # 计算梯度
    gradients = g.gradient(loss, trainable_variables)

    # 按gradients更新 W 和 b
    optimizer.apply_gradients(zip(gradients, trainable_variables))
# 针对给定步骤数进行训练
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
    # 运行优化过程以更新W和b值
    run_optimization(batch_x, batch_y)

    if step % display_step == 0:
        pred = conv_net(batch_x)
        loss = cross_entropy_loss(pred, batch_y)
        acc = accuracy(pred, batch_y)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

output:

step: 10, loss: 1.877234, accuracy: 0.789062

step: 20, loss: 1.609390, accuracy: 0.898438

step: 30, loss: 1.555458, accuracy: 0.960938

step: 40, loss: 1.588296, accuracy: 0.921875

step: 50, loss: 1.561057, accuracy: 0.929688

step: 60, loss: 1.539851, accuracy: 0.945312

step: 70, loss: 1.527458, accuracy: 0.976562

step: 80, loss: 1.526701, accuracy: 0.945312

step: 90, loss: 1.522610, accuracy: 0.968750

step: 100, loss: 1.514970, accuracy: 0.968750

step: 110, loss: 1.489902, accuracy: 0.976562

step: 120, loss: 1.520697, accuracy: 0.953125

step: 130, loss: 1.494326, accuracy: 0.968750

step: 140, loss: 1.501781, accuracy: 0.984375

step: 150, loss: 1.506588, accuracy: 0.976562

step: 160, loss: 1.493378, accuracy: 0.984375

step: 170, loss: 1.494006, accuracy: 0.984375

step: 180, loss: 1.509782, accuracy: 0.953125

step: 190, loss: 1.516123, accuracy: 0.953125

step: 200, loss: 1.515508, accuracy: 0.953125
# 在验证集上测试模型
pred = conv_net(x_test)
print("Test Accuracy: %f" % accuracy(pred, y_test))

output:

Test Accuracy: 0.977700
# 可视化预测
import matplotlib.pyplot as plt

# 从验证集中预测5张图像
n_images = 5
test_images = x_test[:n_images]
predictions = conv_net(test_images)

# 显示图片和模型预测结果
for i in range(n_images):
    plt.imshow(np.reshape(test_images[i], [28, 28]), cmap='gray')
    plt.show()
    print("Model prediction: %i" % np.argmax(predictions.numpy()[i]))

output:

Model prediction: 7

Model prediction:2

Model prediction: 1

Model prediction: 0

Model prediction: 4