两层神经网络的类

In [33]:

1
2
3
4
import sys, os
sys.path.append(os.pardir)
from common.functions import *
from common.gradient import numerical_gradient

In [61]:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
class TwoLayerNet:

def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
#初始化权重
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)

def predict(self, x):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']

a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)

return y

# x:输入数据,t:监督数据
def loss(self, x, t):
y = self.predict(x)
return cross_entropy_error(y, t)

def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis = 1)
t = np.argmax(t, axis = 1)

accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy

# x:输入数据,t:监督数据
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x, t)
# 不管是多批数据还是单个数据,loss函数都会处理好,对于梯度,只要分离每个参数然后求对应梯度项就行了
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W1'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

return grads

def gradient(self, x, t):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
grads = {}

batch_num = x.shape[0]

# forward
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)

# backward
dy = (y - t) / batch_num
grads['W2'] = np.dot(z1.T, dy)
grads['b2'] = np.sum(dy, axis=0)

da1 = np.dot(dy, W2.T)
dz1 = sigmoid_grad(a1) * da1
grads['W1'] = np.dot(x.T, dz1)
grads['b1'] = np.sum(dz1, axis=0)

return grads

mini-batch 实现

In [62]:

1
2
import numpy as np
from dataset.mnist import load_mnist

In [63]:

1
(x_train, t_train), (x_test, t_test) = load_mnist(normalize = True, one_hot_label = True)

In [64]:

1
2
3
4
5
6
7
8
9
10
11
train_loss_list = []
train_acc_list = []
test_acc_list = []
# 平均每个epoch的重复次数
iter_per_epoch = max(train_size / batch_size, 1)

#超参数
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

In [65]:

1
network = TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)

In [66]:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
for i in range(iters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

# 计算梯度
#grad = network.numerical_gradient(x_batch, t_batch)
grad = network.gradient(x_batch, t_batch)

# 更新参数
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]

loss = network.loss(x_batch, t_batch)
train_loss_list.append(loss)

if i % iter_per_epoch == 0:
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))

train acc, test acc | 0.10441666666666667, 0.1028
train acc, test acc | 0.7974333333333333, 0.8023
train acc, test acc | 0.87805, 0.8812
train acc, test acc | 0.8989333333333334, 0.9015
train acc, test acc | 0.90805, 0.9106
train acc, test acc | 0.9139166666666667, 0.9156
train acc, test acc | 0.9194166666666667, 0.9213
train acc, test acc | 0.9242833333333333, 0.9252
train acc, test acc | 0.9275666666666667, 0.9291
train acc, test acc | 0.9314833333333333, 0.9324
train acc, test acc | 0.9336333333333333, 0.9354
train acc, test acc | 0.93695, 0.9371
train acc, test acc | 0.9392333333333334, 0.9403
train acc, test acc | 0.9419, 0.9399
train acc, test acc | 0.94385, 0.9419
train acc, test acc | 0.9459333333333333, 0.9447
train acc, test acc | 0.9472, 0.9443

In [67]:

1
2
3
4
5
6
7
8
9
10
# 绘制图形
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()