实现Softmax回归

从零实现softmax回归

softmax的公式: $$ softmax(X){ij} = \frac{e^{X{ij}}}{\sum_k{e^{X_{ij}}}} $$

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import torch
from IPython import display
from d2l import torch as d2l

# 这里对图像进行展平,将它们视为长度为784的向量,因为数据有10个类别,所以输出维度为10
batch_size = 256
# 每次选取256张图片
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

num_inputs = 784
num_outputs = 10
W = torch.normal(mean=0,
                 std=0.01,
                 size=(num_inputs, num_outputs),
                 requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)


def softmax(x):
    x_exp = torch.exp(x)
    partition = x_exp.sum(dim=1, keepdim=True)
    # 这里直接返回的就是张量
    return x_exp / partition  #这里应用了广播机制


# 实现softmax回归模型
def net(x, w=W, b=b):
    return softmax(torch.matmul(x.reshape(-1, w.shape[0]), w) + b)

y = torch.tensor([0, 2, 2])
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5], [0.1, 0.2, 0.6]])
# 这种选取方式相当于选取了[0,0],[1,2],[2,2]三个点
y_hat[[0, 1, 2], y]

# 定义交叉熵函数
def cross_entropy(y_hat, y):
    return -torch.log(y_hat[range(len(y_hat)), y])

cross_entropy(y_hat, y)


# 定义预测类别与真实类别之间的误差函数


def accuracy(y_hat, y):
    """计算预测正确的数量"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:  # y_hat维数大于1且y_hat的列数大于1
        # 将每个行中元素最大值的下标提取出来,作为y_hat的值(预测的类别)
        y_hat = y_hat.argmax(dim=1)
    # cmp用于判断每一行判断的分类与y实际值的比较
    cmp = y_hat.type(y.dtype) == y
    # 为True转换为1,False
    return float(cmp.type(y.dtype).sum())


accuracy(y_hat, y) / len(y)



# 该类的主要用处在于储存正确预测的数量和预测的总数量,包含在创建的两个变量中
class Accumulator:
    """在n个变量上面累加"""
    def __init__(self, n):
        self.data = [0.0] * n

    # 这个函数用于储存正确预测的数量和预测的总数量
    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]



# 接下来定义一个评估任意net模型的准确率函数
def evaluate_accuracy(net, data_iter):
    """计算在指定数据集上的模型精确度"""
    # isinstance() 用于判断目标函数是否属于目标类型,返回为bool类型
    if isinstance(net, torch.nn.Module):
        net.eval()  # 将模型设置为评估模式,评估模式下不会进行梯度计算
    metric = Accumulator(2)  # 正确预测数,预测总数
    # numel,返回tensor里面的元素个数
    for X, y in data_iter:
        metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]


def train_epoch_ch3(net, train_iter, loss, updater):
    if isinstance(net, torch.nn.Module):
        net.train()
    metric = Accumulator(3)
    for X, y in train_iter:
        # 对X进行softmax转换
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            # 梯度清除
            updater.zero_grad()
            # 计算梯度
            l.backward()
            # 参数更新
            updater.step()
            # 类中放入 损失函数\准确率\样本数
            metric.add(float(l) * len(y), accuracy(y_hat, y), y.size().numel())
        else:  # 这里如果是自己创建的计算方式,那么就按如下方式进行计算
            # 计算梯度
            l.sum().backward()
            # 参数更新
            updater(X.shape[0])
            metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 这里返回的是损失率和预测准确率
    return metric[0] / metric[2], metric[1] / metric[2]



class Animator: #@save
    """在动画中绘制数据。"""
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
        ylim=None, xscale='linear', yscale='linear',
        fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
        figsize=(6.5, 4.5)):
        # 增量地绘制多条线
        if legend is None:
            legend = []
        d2l.use_svg_display()
        self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes, ]
        # 使⽤lambda函数捕获参数
        self.config_axes = lambda: d2l.set_axes(
        self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
        self.X, self.Y, self.fmts = None, None, fmts

    def add(self, x, y):
        # 向图表中添加多个数据点
        if not hasattr(y, "__len__"):
            y = [y]
        n = len(y)
        if not hasattr(x, "__len__"):
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)]
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a, b) in enumerate(zip(x, y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
                self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
            self.config_axes()
            display.display(self.fig)
            display.clear_output(wait=True)



def train_ch3(net,train_iter,test_iter,loss,num_epochs,updater):
    """完整的迭代周期
        num_epochs 迭代次数
    """    
    animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
    legend=['train loss', 'train acc', 'test acc'])
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        animator.add(epoch + 1, train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc


lr = 0.1
# d2l.sgd 小批量梯度下降法更新参数
def updater(batch_size):
    return d2l.sgd([W, b], lr, batch_size)
num_epochs = 10
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)


def predict_ch3(net, test_iter, n=6): #@save
    """预测标签(定义⻅第3章)。"""
    for X, y in test_iter:
        break
    trues = d2l.get_fashion_mnist_labels(y)
    preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
    titles = [true +'\n' + pred for true, pred in zip(trues, preds)]
    d2l.show_images(
        X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])
predict_ch3(net, test_iter)

使用PyTorch实现softmax回归

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""
使用PyTorch实现sofamax回归
"""

import torch
from torch import nn
from d2l import d2ltorch as d2l
batch_size = 256
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size)


# PyTorch不会隐式地调整输⼊的形状。因此,
# 我们在线性层前定义了展平层(flatten),来调整⽹络输⼊的形状
net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))
def init_weights(m):
    """初始化权重"""
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)
net.apply(init_weights)


# 交叉熵损失
loss = nn.CrossEntropyLoss()

# 随机梯度下降
trainer = torch.optim.SGD(net.parameters(), lr=0.1)

num_epochs = 10
from visdom import Visdom
import time

wind = Visdom(env=u"testV", use_incoming_socket=False)
# 2.初始化窗口信息
wind.line([0.],  # Y的第一个点的坐标
          [0.],  # X的第一个点的坐标
          win='train_loss',  # 窗口的名称
          opts=dict(title='train_loss')  # 图像的标例
          )



def train_ch_test(net,train_iter,test_iter,loss,num_epochs,updater):
    """完整的迭代周期
        num_epochs 迭代次数
    """
    for epoch in range(num_epochs):
        # line
        train_metrics = d2l.train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = d2l.evaluate_accuracy(net, test_iter)
        train_loss, train_acc = train_metrics
        print(train_metrics)
        # 使用visdom画出训练损失
        wind.line([train_loss], [epoch+1], win='train_loss', update='append')
        time.sleep(0.5)
#     animator.add(epoch + 1, train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc


train_ch_test(net, train_iter, test_iter, loss, num_epochs, trainer)

简洁实现

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
import torch
from torch import nn
from d2l import torch as d2l
batch_size = 256
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size)

# PyTorch不会隐式地调整输⼊的形状。因此,
# 我们在线性层前定义了展平层(flatten),来调整⽹络输⼊的形状
net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))
def init_weights(m):
    """初始化权重"""
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)
net.apply(init_weights)

loss = nn.CrossEntropyLoss()
trainer = torch.optim.SGD(net.parameters(), lr=0.1)
num_epochs = 10
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)

也可以:http://www.xpshuai.cn/Pytorch%E5%AD%A6%E4%B9%A0-nn.html

0%