梯度下降是一阶优化方法。它只考虑损失函数的一阶导数,而不考虑更高阶的导数。这基本上意味着它不知道损失函数的曲率。它只能说明损失是否下降以及下降的速度,而不能区分曲线是平坦的,向上的,还是向下的。
network.py
import time
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.layers import *
from keras.layers.convolutional import *
from keras.layers.core import *
from keras.models import Model
from keras.optimizers import adam
import processor
from path import MODEL_PATH
def one_obj(frame_l=32, joint_n=25, joint_d=3, words_size=500):
input_joints = Input(name='joints', shape=(frame_l, joint_n, joint_d))
input_joints_diff = Input(name='joints_diff', shape=(frame_l, joint_n, joint_d))
x = Conv2D(filters=64, kernel_size=(1, 1), padding='same')(input_joints)
x = BatchNormalization()(x)
x = LeakyReLU()(x) # @LeakyReLU()(x)
x = Conv2D(filters=32, kernel_size=(3, 1), padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x) # LeakyReLU()(x)
x = Permute((1, 3, 2))(x)
x = Conv2D(filters=32, kernel_size=(3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x) # LeakyReLU()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(filters=64, kernel_size=(3, 3), padding='same')(x)
x = LeakyReLU()(x) # BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x_d = Conv2D(filters=64, kernel_size=(1, 1), padding='same')(input_joints_diff)
x_d = BatchNormalization()(x_d)
x_d = LeakyReLU()(x_d) # LeakyReLU()(x_d)
x_d = Conv2D(filters=32, kernel_size=(3, 1), padding='same')(x_d)
x_d = BatchNormalization()(x_d)
x_d = LeakyReLU()(x_d) # LeakyReLU()(x_d)
x_d = Permute((1, 3, 2))(x_d)
x_d = Conv2D(filters=32, kernel_size=(3, 3), padding='same')(x_d)
x_d = BatchNormalization()(x_d)
x_d = LeakyReLU()(x_d) # LeakyReLU()(x_d)
x_d = MaxPool2D(pool_size=(2, 2))(x_d)
x_d = Conv2D(filters=64, kernel_size=(3, 3), padding='same')(x_d)
x_d = BatchNormalization()(x_d)
x_d = LeakyReLU()(x_d) # LeakyReLU()(x_d)
x_d = MaxPool2D(pool_size=(2, 2))(x_d)
x = concatenate([x, x_d], axis=-1)
x = Conv2D(filters=128, kernel_size=(1, 1), padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x) # LeakyReLU()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Dropout(0.1)(x)
x = Conv2D(filters=256, kernel_size=(1, 1), padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x) # LeakyReLU()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Dropout(0.1)(x)
x = Flatten()(x)
x = Dense(256)(x)
x = LeakyReLU()(x) # LeakyReLU()(x)
x = Dense(words_size, activation='softmax')(x)
model = Model([input_joints, input_joints_diff], x)
return model
class SLRNetwork():
# 定义神经网络
def __init__(self, words_size, is_training=True):
self.words_size = words_size
self.is_training = is_training
model = one_obj(processor.MAX_DEPTH, processor.NUM_KEYPOINT, 2, words_size)
opt = adam(0.005, epsilon=1e-8)
model.compile(opt, 'categorical_crossentropy', metrics=['acc'])
self.model = model
def train_speech_to_text_network(self, epochs, batch_size, dataset, model_helper, cont_train=False):
model = self.model
early = EarlyStopping(monitor="loss", mode="min", patience=10)
lr_change = ReduceLROnPlateau(monitor="loss", verbose=1, factor=0.2, patience=100, min_lr=1e-8, cooldown=100)
if cont_train:
model_helper.load_model(model, MODEL_PATH)
lr_change.set_model(model)
lr_change.on_train_begin()
best_acc = 0.0
for epoch in epochs:
start = time.clock()
x_train, y_train, _, _ = dataset.next_batch(batch_size, test_data=False)
loss_batch = model.train_on_batch(x=x_train, y=y_train)
lr_change.on_epoch_end(epoch, logs={"loss": loss_batch[0]})
print(epoch, time.clock() - start, loss_batch)
if epoch % 10 == 0:
_, _, x_test, y_test = dataset.next_batch(batch_size, test_data=True, test_size=1024)
val = model.evaluate(x_test, y_test, batch_size=batch_size)
print('val data info', val)
if val[1] > best_acc:
best_acc = val[1]
model_helper.save_model(model, MODEL_PATH)