章三习题 | LEMON的博客

导入包

numpy是用Python进行科学计算的基本软件包。 sklearn为数据挖掘和数据分析提供了简单高效的工具。 matplotlib是一个用于在Python中绘制图表的库。 testCases提供了一些测试例子来评估你的函数的正确性 planar_utils提供了在这个任务中使用的各种有用的功能

import numpy as np
import matplotlib.pyplot as plt
from testCases import * 
import sklearn  
import sklearn.datasets
import sklearn.linear_model
from planar_utils import plot_decision_boundary, sigmoid, load_planar_dataset, load_extra_datasets

%matplotlib inline

np.random.seed(1) # 设置seed，使结果一致

导入“花卉数据集”

加载一个“花” 2级的数据集中到变量X和Y。

1	X, Y = load_planar_dataset()

使用matplotlib可视化数据集。

数据看起来像一朵红色（标签y = 0）和一些蓝色（y = 1）点的“花朵”。你的目标是建立一个模型来适应这个数据。

1	plt.scatter(X[0, :], X[1, :], c=Y, s=40, cmap=plt.cm.Spectral);

mark

练习

你有几个训练样例？此外，什么是shape变量X和Y？ shape属性通常用于获取数组的当前形状，但也可用于通过为其分配数组维数来重新定形数组。

shape_X = X.shape   
shape_Y = Y.shape
m = shape_X[1]  # training set size

print ('The shape of X is: ' + str(shape_X))
print ('The shape of Y is: ' + str(shape_Y))
print ('I have m = %d training examples!' % (m))

简单的logistic逻辑回归

使用sklearn的内置函数来训练数据集上的logistic回归分类器。

1 2	clf = sklearn.linear_model.LogisticRegressionCV(); clf.fit(X.T, Y.T);

绘制逻辑回归的决策边界

1 2	plot_decision_boundary(lambda x: clf.predict(x), X, Y) plt.title("Logistic Regression")

打印精度

1
2
3

LR_predictions = clf.predict(X.T)
print ('逻辑回归的准确性：%d ' % float((np.dot(Y,LR_predictions) + np.dot(1-Y,1-LR_predictions))/float(Y.size)*100) +
       '% ' + "（正确标记的数据点的百分比）")

mark 注：数据集不是线性可分的，所以逻辑回归表现不好。希望神经网络能做得更好。

神经网络模型

构建神经网络模型的一般方法

定义输入单位，隐藏单位等的神经网络结构。
初始化模型的参数
循环： - 3.1 实施前向传播 - 3.2 计算损失 - 3.3 实现向后传播以获得渐变 - 3.4 更新参数（梯度下降）

模型

定义神经网络结构

定义三个变量： - n_x: the size of the input layer - n_h: the size of the hidden layer (set this to 4) - n_y: the size of the output layer

# 分级功能
def layer_sizes(X, Y):
    n_x = X.shape[0] # size of input layer
    n_h = 4
    n_y = Y.shape[0] # size of output layer
    return (n_x, n_h, n_y)

X_assess, Y_assess = layer_sizes_test_case()
(n_x, n_h, n_y) = layer_sizes(X_assess, Y_assess)
print("The size of the input layer is: n_x = " + str(n_x))
print("The size of the hidden layer is: n_h = " + str(n_h))
print("The size of the output layer is: n_y = " + str(n_y))

预计输出 mark

初始化模型的参数

实现该功能initialize_parameters() * 用随机值初始化权重矩阵。使用：np.random.randn(a,b) * 0.01随机初始化形状矩阵（a，b）。 * 初始化偏置向量为零。使用：np.zeros((a,b))用零初始化形状矩阵（a，b）。

def initialize_parameters(n_x, n_h, n_y):
    """
    Returns:
    params -- python dictionary containing your parameters:
                    W1 -- weight matrix of shape (n_h, n_x) - 形状的权重矩阵
                    b1 -- bias vector of shape (n_h, 1) - 形状的偏向量
                    W2 -- weight matrix of shape (n_y, n_h)  - 形状权重矩阵
                    b2 -- bias vector of shape (n_y, 1) - 形状的偏向量
    """
    
    np.random.seed(2)
    
    W1 = np.random.randn(n_h,n_x) * 0.01 #随机初始化
    b1 = np.zeros((n_h,1))
    W2 = np.random.randn(n_y,n_h) * 0.01 #随机初始化
    b2 = np.zeros((n_y,1))
    
    assert (W1.shape == (n_h, n_x))
    assert (b1.shape == (n_h, 1))
    assert (W2.shape == (n_y, n_h))
    assert (b2.shape == (n_y, 1))
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

n_x, n_h, n_y = initialize_parameters_test_case()

parameters = initialize_parameters(n_x, n_h, n_y)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

循环

前向传播

def forward_propagation(X, parameters):
    """
    论据:
    X -- 输入数据大小 (n_x, m)
    parameters -- 包含你的参数的python字典 (初始化函数的输出)
    
    返回:
    A2 -- The sigmoid output of the second activation 第二次激活的s的输出
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2"
    """
    # 从字典“参数”中检索每个参数
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    # 实现正向传播来计算A2（概论）
    Z1 = np.dot(W1, X)+b1
    A1 = np.tanh(Z1) 
    Z2 = np.dot(W2, A1)+b2
    A2 = sigmoid(Z2)
    
    assert(A2.shape == (1, X.shape[1]))
    
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    
    return A2, cache

计算成本函数

#计算成本函数
def compute_cost(A2, Y, parameters):
    m = Y.shape[1]
    
    cost = -np.sum(np.multiply(Y,np.log(A2)) + np.multiply((1-Y),np.log(1-A2)))/m
    cost = np.squeeze(cost)
    
    return cost

反向传播

def backward_propagation(parameters, cache, X, Y):
    m = X.shape[1]
    
    # 从字典“parameters”中检索W1和W2
    W1 = parameters["W1"]
    W2 = parameters["W2"]

    # 从字典“cache”中检索W1和W2
    A1 = cache["A1"]
    A2 = cache["A2"]
    
    # 反向传播：计算dW1, db1, dW2, db2
    dZ2 = A2 -Y
    dW2 = np.dot(dZ2, A1.T)/m
    db2 = np.sum(dZ2, axis=1, keepdims=True)/m
    dZ1 = np.dot(W2.T, dZ2)*(1 - np.power(A1,2)) #g'(x) = 1-(g(x))^2
    dW1 = np.dot(dZ1, X.T)/m
    db1 = np.sum(dZ1, axis=1, keepdims=True)/m
    
    grads = {"dW1":dW1, "db1":db1, "dW2":dW2, "db2":db2}
    
    return grads

更新参数

def update_parameters(parameters, grads, learning_rate = 1.2):
    # 从字典“parameters”中检索W1和W2
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    # 从字典中检索每个渐变“grads”
    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]
    
    # 更新每个参数的规则
    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*db2
     
    parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2}
    
    return parameters

建立神经网络模型

def nn_model(X, Y, n_h, num_iterations = 10000, print_cost = False):
    np.random.seed(3)
    n_x = layer_sizes(X, Y)[0]
    n_y = layer_sizes(X, Y)[2]
    
    # 初始化参数，输入n_x, n_h, n_y，输出W1, b1, W2, b2，参数
    parameters = initialize_parameters(n_x, n_h, n_y);
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    for i in range(0, num_iterations):
        # 前向传播
        A2, cache = forward_propagation(X, parameters)
        cost = compute_cost(A2, Y, parameters)
        grads = backward_propagation(parameters, cache, X, Y)
        parameters = update_parameters(parameters, grads)
        
        if print_cost and i % 1000 == 0:
            print("循环%i次后的成本: %f" %(i, cost))

    return parameters

预测结果

使用您的模型通过构建predict（）来进行预测。使用前向传播来预测结果

def predict(parameters, X):
    # 使用正向传播计算概论，并使用0.5作为阈值分类为0/1
    A2, cache= forward_propagation(X, parameters)
    predictions = (A2 > 0.5)
    
    return predictions

将数据输入神经网络模型

用一个隐藏层测试你的模型 ñHñH 隐藏单位

# 建立一个n_h维隐藏层的模型
parameters = nn_model(X, Y, n_h = 4, num_iterations = 10000, print_cost=True)

# 绘制决策边界
plot_decision_boundary(lambda x: predict(parameters, x.T), X, Y)
plt.title("Decision Boundary for hidden layer size " + str(4))

# 打印精度
predictions = predict(parameters, X)
print ('准确度: %d' % float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100) + '%') #打印精确度

调整隐藏层大小（可选）

观察模型的各种隐藏层大小的不同行为

plt.figure(figsize=(16, 32))
hidden_layer_sizes = [1, 2, 3, 4, 5, 20, 50, 100]
for i, n_h in enumerate(hidden_layer_sizes):
    plt.subplot(5, 2, i+1)
    plt.title('Hidden Layer of size %d' % n_h)
    parameters = nn_model(X, Y, n_h, num_iterations = 5000)
    plot_decision_boundary(lambda x: predict(parameters, x.T), X, Y)
    predictions = predict(parameters, X)
    accuracy = float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100)
    print ("节点数为{}时的分类准确度为 : {} %".format(n_h, accuracy))

较大的模型（具有更多的隐藏单元）能够更好地适应训练集，直到最终的最大模型过度拟合数据。
最好的隐藏层大小似乎在n_h = 5左右。事实上，这里的一个值似乎很适合数据，而且不会引起显着的过度拟合。

代码

课程代码-GitHub

导入包