1.6 在call方法中给与训练参数特权
在一些层中,特别是BatchNormalization
层和Dropout
层,在训练和推断过程中,有不同的行为。对于这样的层,标准的做法是在调用方法中公开一个训练(布尔)参数。通过在call中暴露这个参数,您可以使内置的训练和评估循环(例如fit)在训练和推断中正确使用该层
class CustomDropout(layers.Layer): def __init__(self, rate, **kwargs): super(CustomDropout, self).__init__(**kwargs) self.rate = rate def call(self, inputs, training=None): if training: return tf.nn.dropout(inputs, rate=self.rate) return inputs
2. 建立模型
2.1 模型类
在通常的情况下,我们使用Layer类定义内部计算块,比如ResNet中残差块,我们使用Model类定义外部模型即我们要训练的网络。例如,在ResNet50模型中,将有多个残差块的子类化层和一个包含了整个ResNet50的网络模型。Model类具有与Layer相同的API,但有以下区别:
它公开了内置的训练,评估和预测循环(model.fit(),model.evaluate(),model.predict())。
它通过model.layers属性,公开其内层列表。
它公开保存和序列化API。
实际上,“层”类对应于我们在文献中提到的“层”(如“卷积层”或“递归层”)或“块”(如“ResNet块”或“Inception块”)。同时,“Model”类对应于文献中所称的“Model”(如“deep learning Model”)或“network”(如“deep neural network”)。
class ResNet(tf.keras.Model): def __init__(self): super(ResNet, self).__init__() self.block_1 = ResNetBlock() self.block_2 = ResNetBlock() self.global_pool = layers.GlobalAveragePooling2D() self.classifier = Dense(num_classes) def call(self, inputs): x = self.block_1(inputs) x = self.block_2(x) x = self.global_pool(x) return self.classifier(x) resnet = ResNet() dataset = ... resnet.fit(dataset, epochs=10) resnet.save_weights(filepath)
2.1 实例化:一个端到端的例子
到目前为止我们已经学习了以下的内容
层封装了状态(在init或者build中创建)和一些运算(在call中定义)。
层可以递归嵌套以创建新的、更大的计算块。
层可以创建并追踪代价(典型地如:regularization losses)
外部容器,你想要训练的东西,是一个模型。模型就像一个图层,但是增加了训练和序列化实用程序。
我们把上面所说的东西整合进一个端到端的例子中:执行一个VariationalAutoEncoder(VAE)。我们将在MNIST数字集中训练它。我们的VAE将会是模型的一个子类,它被构建为一个嵌套的层的组合,而这些层又构成了这个子类。它将以正则化损失(KL散度)为特征。
class Sampling(layers.Layer): """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit.""" def call(self, inputs): z_mean, z_log_var = inputs batch = tf.shape(z_mean)[0] dim = tf.shape(z_mean)[1] epsilon = tf.keras.backend.random_normal(shape=(batch, dim)) return z_mean + tf.exp(0.5 * z_log_var) * epsilon class Encoder(layers.Layer): """Maps MNIST digits to a triplet (z_mean, z_log_var, z).""" def __init__(self, latent_dim=32, intermediate_dim=64, name='encoder', **kwargs): super(Encoder, self).__init__(name=name, **kwargs) self.dense_proj = layers.Dense(intermediate_dim, activation='relu') self.dense_mean = layers.Dense(latent_dim) self.dense_log_var = layers.Dense(latent_dim) self.sampling = Sampling() def call(self, inputs): x = self.dense_proj(inputs) z_mean = self.dense_mean(x) z_log_var = self.dense_log_var(x) z = self.sampling((z_mean, z_log_var)) return z_mean, z_log_var, z class Decoder(layers.Layer): """Converts z, the encoded digit vector, back into a readable digit.""" def __init__(self, original_dim, intermediate_dim=64, name='decoder', **kwargs): super(Decoder, self).__init__(name=name, **kwargs) self.dense_proj = layers.Dense(intermediate_dim, activation='relu') self.dense_output = layers.Dense(original_dim, activation='sigmoid') def call(self, inputs): x = self.dense_proj(inputs) return self.dense_output(x) class VariationalAutoEncoder(tf.keras.Model): """Combines the encoder and decoder into an end-to-end model for training.""" def __init__(self, original_dim, intermediate_dim=64, latent_dim=32, name='autoencoder', **kwargs): super(VariationalAutoEncoder, self).__init__(name=name, **kwargs) self.original_dim = original_dim self.encoder = Encoder(latent_dim=latent_dim, intermediate_dim=intermediate_dim) self.decoder = Decoder(original_dim, intermediate_dim=intermediate_dim) def call(self, inputs): z_mean, z_log_var, z = self.encoder(inputs) reconstructed = self.decoder(z) # Add KL divergence regularization loss. kl_loss = - 0.5 * tf.reduce_mean( z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1) self.add_loss(kl_loss) return reconstructed original_dim = 784 vae = VariationalAutoEncoder(original_dim, 64, 32) optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) mse_loss_fn = tf.keras.losses.MeanSquaredError() loss_metric = tf.keras.metrics.Mean() (x_train, _), _ = tf.keras.datasets.mnist.load_data() x_train = x_train.reshape(60000, 784).astype('float32') / 255 train_dataset = tf.data.Dataset.from_tensor_slices(x_train) train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64) epochs = 3 # Iterate over epochs. for epoch in range(epochs): print('Start of epoch %d' % (epoch,)) # Iterate over the batches of the dataset. for step, x_batch_train in enumerate(train_dataset): with tf.GradientTape() as tape: reconstructed = vae(x_batch_train) # Compute reconstruction loss loss = mse_loss_fn(x_batch_train, reconstructed) loss += sum(vae.losses) # Add KLD regularization loss grads = tape.gradient(loss, vae.trainable_weights) optimizer.apply_gradients(zip(grads, vae.trainable_weights)) loss_metric(loss) if step % 100 == 0: print('step %s: mean loss = %s' % (step, loss_metric.result()))
2.3 超越面向对象开发: Functional API
上面这个例子是不是太过于面向对象的开发了?我们还可以使用Functional API构建模型。重要的是,选择一种或另一种样式并不会阻止您利用以另一种样式编写的组件:您总是可以混合使用。
例如,下面的‘Functional API’示例重用了我们在上面的示例中定义的采样层。
original_dim = 784 intermediate_dim = 64 latent_dim = 32 # Define encoder model. original_inputs = tf.keras.Input(shape=(original_dim,), name='encoder_input') x = layers.Dense(intermediate_dim, activation='relu')(original_inputs) z_mean = layers.Dense(latent_dim, name='z_mean')(x) z_log_var = layers.Dense(latent_dim, name='z_log_var')(x) z = Sampling()((z_mean, z_log_var)) encoder = tf.keras.Model(inputs=original_inputs, outputs=z, name='encoder') # Define decoder model. latent_inputs = tf.keras.Input(shape=(latent_dim,), name='z_sampling') x = layers.Dense(intermediate_dim, activation='relu')(latent_inputs) outputs = layers.Dense(original_dim, activation='sigmoid')(x) decoder = tf.keras.Model(inputs=latent_inputs, outputs=outputs, name='decoder') # Define VAE model. outputs = decoder(z) vae = tf.keras.Model(inputs=original_inputs, outputs=outputs, name='vae') # Add KL divergence regularization loss. kl_loss = - 0.5 * tf.reduce_mean( z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1) vae.add_loss(kl_loss) # Train. optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) vae.compile(optimizer, loss=tf.keras.losses.MeanSquaredError()) vae.fit(x_train, x_train, epochs=3, batch_size=64)
注:本笔记参照TensorFlow官方教程,主要是对‘Writing custom layers and models with Keras’教程内容翻译和内容结构编排。