使用多层神经网络
我们展示如何用TensorFlow构建多层神经网络
###低出生率数据 Low Birthrate data:
#Columns Variable Abbreviation
#---------------------------------------------------------------------
# Low Birth Weight (0 = Birth Weight >= 2500g, LOW
# 1 = Birth Weight < 2500g)
# Age of the Mother in Years AGE
# Weight in Pounds at the Last Menstrual Period LWT
# Race (1 = White, 2 = Black, 3 = Other) RACE
# Smoking Status During Pregnancy (1 = Yes, 0 = No) SMOKE
# History of Premature Labor (0 = None 1 = One, etc.) PTL
# History of Hypertension (1 = Yes, 0 = No) HT
# Presence of Uterine Irritability (1 = Yes, 0 = No) UI
# Birth Weight in Grams BWT
#---------------------------------------------------------------------
我们要创建的多层神经网络由三个全链接隐藏层组成, 节点数分别为 50, 25, 和 5
import tensorflow as tf
import matplotlib.pyplot as plt
import csv
import os
import os.path
import random
import numpy as np
import random
import requests
from tensorflow.python.framework import ops
# name of data file
birth_weight_file = 'birth_weight.csv'
# download data and create data file if file does not exist in current directory
if not os.path.exists(birth_weight_file):
birthdata_url = 'https://github.com/nfmcclure/tensorflow_cookbook/raw/master/01_Introduction/07_Working_with_Data_Sources/birthweight_data/birthweight.dat'
birth_file = requests.get(birthdata_url)
birth_data = birth_file.text.split('\r\n')
birth_header = birth_data[0].split('\t')
birth_data = [[float(x) for x in y.split('\t') if len(x)>=1] for y in birth_data[1:] if len(y)>=1]
with open(birth_weight_file, "w") as f:
writer = csv.writer(f)
writer.writerows([birth_header])
writer.writerows(birth_data)
f.close()
# read birth weight data into memory
birth_data = []
with open(birth_weight_file, newline='') as csvfile:
csv_reader = csv.reader(csvfile)
birth_header = next(csv_reader)
for row in csv_reader:
birth_data.append(row)
birth_data = [[float(x) for x in row] for row in birth_data]
birth_data
# Extract y-target (birth weight)
y_vals = np.array([x[8:9] for x in birth_data])
# Filter for features of interest
cols_of_interest = ['AGE', 'LWT', 'RACE', 'SMOKE', 'PTL', 'HT', 'UI']
x_vals = np.array([[x[ix] for ix, feature in enumerate(birth_header) if feature in cols_of_interest] for x in birth_data])
# set batch size for training
batch_size = 10
# make results reproducible
seed = 3
np.random.seed(seed)
#tf.set_random_seed(seed)
# Split data into train/test = 80%/20%
train_indices = np.random.choice(len(x_vals), round(len(x_vals)*0.8), replace=False)
test_indices = np.array(list(set(range(len(x_vals))) - set(train_indices)))
x_vals_train = x_vals[train_indices]
x_vals_test = x_vals[test_indices]
y_vals_train = y_vals[train_indices]
y_vals_test = y_vals[test_indices]
# Record training column max and min for scaling of non-training data
train_max = np.max(x_vals_train, axis=0)
train_min = np.min(x_vals_train, axis=0)
# Normalize by column (min-max norm to be between 0 and 1)
def normalize_cols(mat, max_vals, min_vals):
return (mat - min_vals) / (max_vals - min_vals)
x_vals_train = np.nan_to_num(normalize_cols(x_vals_train, train_max, train_min))
x_vals_test = np.nan_to_num(normalize_cols(x_vals_test, train_max, train_min))
#定义权重和偏置。
# Define Variable Functions (weights and bias)
def init_weight(shape, st_dev):
weight = tf.Variable(tf.random.normal(shape, stddev=st_dev))
return(weight)
def init_bias(shape, st_dev):
bias = tf.Variable(tf.random.normal(shape, stddev=st_dev))
return(bias)
#定义模型!我们先创建一个根据变量生成全链接层的函数。
x_data = tf.Variable(np.random.randn(1,7),dtype=tf.float32)
y_target = tf.Variable(np.random.randn(1,1),dtype=tf.float32)
# Create a fully connected layer:
def fully_connected(input_layer, weights, biases):
layer = tf.add(tf.matmul(input_layer, weights), biases)
return(tf.nn.relu(layer))
#我们初始化变量并开始训练循环。
learning_rate=0.001
# Training loop
loss_vec = []
test_loss = []
weight_1 = init_weight(shape=[7, 25], st_dev=1.0)
bias_1 = init_bias(shape=[25], st_dev=10.0)
weight_2 = init_weight(shape=[25, 10], st_dev=1.0)
bias_2 = init_bias(shape=[10], st_dev=10.0)
weight_3 = init_weight(shape=[10, 3], st_dev=1.0)
bias_3 = init_bias(shape=[3], st_dev=10.0)
weight_4 = init_weight(shape=[3, 1], st_dev=1.0)
bias_4 = init_bias(shape=[1], st_dev=1.0)
for i in range(3000):
rand_index = np.random.choice(len(x_vals_train), size=batch_size)
rand_x = x_vals_train[rand_index]
rand_y = np.transpose([y_vals_train[rand_index]])
with tf.GradientTape() as tape:
#--------Create the first layer (50 hidden nodes)--------
layer_1 = fully_connected(x_data, weight_1, bias_1)
#--------Create second layer (25 hidden nodes)--------
layer_2 = fully_connected(layer_1, weight_2, bias_2)
#--------Create third layer (5 hidden nodes)--------
layer_3 = fully_connected(layer_2, weight_3, bias_3)
#--------Create output layer (1 output value)--------
final_output = fully_connected(layer_3, weight_4, bias_4)
# Declare loss function (L1)
loss = tf.reduce_mean(tf.abs(y_target - final_output))
grads=tape.gradient(loss,[weight_1,bias_1,weight_2,bias_2,weight_3,bias_3,weight_4,bias_4])
loss_vec.append(loss)
weight_1.assign_sub(learning_rate*grads[0])
bias_1.assign_sub(learning_rate*grads[1])
weight_2.assign_sub(learning_rate*grads[2])
bias_2.assign_sub(learning_rate*grads[3])
weight_3.assign_sub(learning_rate*grads[4])
bias_3.assign_sub(learning_rate*grads[5])
weight_4.assign_sub(learning_rate*grads[6])
bias_4.assign_sub(learning_rate*grads[7])
#test_loss.append(test_temp_loss)
if (i+1) % 25 == 0:
print('Generation: ' + str(i+1) + '. Loss = ' + str(loss.numpy()))
#绘制损失函数。
%matplotlib inline
# Plot loss (MSE) over time
plt.plot(loss_vec, 'k-', label='Train Loss')
#plt.plot(test_loss, 'r--', label='Test Loss')
plt.title('Loss (MSE) per Generation')
plt.legend(loc='upper right')
plt.xlabel('Generation')
plt.ylabel('Loss')
plt.show()
# Create variable definition
def init_variable(shape):
return(tf.Variable(tf.random.normal(shape=shape)))
# Create a logistic layer definition
def logistic(input_layer, multiplication_weight, bias_weight, activation = True):
linear_layer = tf.add(tf.matmul(input_layer, multiplication_weight), bias_weight)
# We separate the activation at the end because the loss function will
# implement the last sigmoid necessary
if activation:
return(tf.nn.sigmoid(linear_layer))
else:
return(linear_layer)
# Declare optimizer
#my_opt = tf.train.AdamOptimizer(learning_rate = 0.002)
#train_step = my_opt.minimize(loss)
# Actual Prediction
A1 = init_variable(shape=[7,14])
b1 = init_variable(shape=[14])
A2 = init_variable(shape=[14,5])
b2 = init_variable(shape=[5])
A3 = init_variable(shape=[5,1])
b3 = init_variable(shape=[1])
optimizer = tf.optimizers.SGD(learning_rate)
# Training loop
loss_vec = []
train_acc = []
test_acc = []
for i in range(3000):
rand_index = np.random.choice(len(x_vals_train), size=batch_size)
rand_x = x_vals_train[rand_index]
rand_y = np.transpose([y_vals_train[rand_index]])
with tf.GradientTape() as tape:
# First logistic layer (7 inputs to 14 hidden nodes)
logistic_layer1 = logistic(x_data, A1, b1)
# Second logistic layer (14 hidden inputs to 5 hidden nodes)
logistic_layer2 = logistic(logistic_layer1, A2, b2)
# Final output layer (5 hidden nodes to 1 output)
final_output = logistic(logistic_layer2, A3, b3, activation=False)
# Declare loss function (Cross Entropy loss)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=final_output, labels=y_target))
grads=tape.gradient(loss,[A1,b1,A2,b2,A3,b3])
optimizer.apply_gradients(zip(grads, [A1,b1,A2,b2,A3,b3]))
loss_vec.append(loss)
#A1.assign_sub(learning_rate*grads[0])
#b1.assign_sub(learning_rate*grads[1])
#A2.assign_sub(learning_rate*grads[2])
#b2.assign_sub(learning_rate*grads[3])
#A3.assign_sub(learning_rate*grads[4])
#b3.assign_sub(learning_rate*grads[5])
if (i+1)%150==0:
print('Loss = ' + str(loss.numpy()))
%matplotlib inline
# Plot loss over time
plt.plot(loss_vec, 'k-')
plt.title('Cross Entropy Loss per Generation')
plt.xlabel('Generation')
plt.ylabel('Cross Entropy Loss')
plt.show()