风格迁移 Neural Transfer
# desired size of the output image
imsize = 512 if use_cuda else 128 # use small size if no gpu
loader = transforms.Compose([
transforms.Scale(imsize), # scale imported image
transforms.ToTensor()]) # transform it into a torch tensor
loader_new = transforms.Compose([ # 通过loader_new 可以将任意大小图像剪裁到相同大小
def image_loader(image_name):
image = Image.open(image_name)
image = Variable(loader(image))
# fake batch dimension required to fit network's input dimensions
image = image.unsqueeze(0)
return image
style_img = image_loader("images/picasso.jpg").type(dtype)
content_img = image_loader("images/dancing.jpg").type(dtype)
assert style_img.size() == content_img.size(), \
"we need to import style and content images of the same size"
导入的PIL图像的像素值范围是0-255,转化为torch.tensors的时候会变为0-1 。注意:在pytorch中训练好的网络是按照0-1的tensor来的。如果你将0-255的图像 放入pytoch训练好的网络就没有任何效果。而对于Caffe是0-255,是可以使用的。
The content loss is a function that takes as input the feature maps
this image and the content image. Hence, the weight
stated as a third parameter.
We will add our content losses at each desired layer as additive modules of the neural network. That way, each time we will feed the network with an input image
method of our module returning the input: the module becomes a ”transparent layer” of the neural network. The computed loss is saved as a parameter of the module.
Finally, we define a fake backward
method, that just call the backward method of nn.MSELoss
in order to reconstruct the gradient.This method returns the computed loss: this will be useful when running the gradient descent in order to display the evolution of style and content losses.
class ContentLoss(nn.Module):
def __init__(self, target, weight):
super(ContentLoss, self).__init__()
# we 'detach' the target content from the tree used
self.target = target.detach() * weight
# to dynamically compute the gradient: this is a stated value,
# not a variable. Otherwise the forward method of the criterion
# will throw an error.
self.weight = weight
self.criterion = nn.MSELoss()
def forward(self, input):
self.loss = self.criterion(input * self.weight, self.target)
self.output = input
return self.output
def backward(self, retain_graph=True):
return self.loss
class GramMatrix(nn.Module):
def forward(self, input):
a, b, c, d = input.size() # a = batch size(=1)
# b=number of feature maps
# (c,d)=dimensions of a f. map (N=c*d)
features = input.view(a * b, c * d) # resise F_XL into \hat F_XL
G = torch.mm(features, features.t()) # compute the gram product
# we 'normalize' the values of the gram matrix
# by dividing by the number of element in each feature maps.
return G.div(a * b * c * d)
# The longer is the feature maps dimension :math:`N`, the bigger are the
# values of the gram matrix. Therefore, if we don't normalize by :math:`N`,
# the loss computed at the first layers (before pooling layers) will have
# much more importance during the gradient descent. We dont want that,
# since the most interesting style features are in the deepest layers!
# 风格损失模块和内容模块几乎是一样的,但我们需要将gramMatrix加到类中
# Then, the style loss module is implemented exactly the same way than the
# content loss module, but we have to add the ``gramMatrix`` as a
# parameter:
class StyleLoss(nn.Module):
def __init__(self, target, weight):
super(StyleLoss, self).__init__()
self.target = target.detach() * weight
self.weight = weight
self.gram = GramMatrix()
self.criterion = nn.MSELoss()
def forward(self, input):
self.output = input.clone()
self.G = self.gram(input)
self.loss = self.criterion(self.G, self.target)
return self.output
def backward(self, retain_graph=True):
return self.loss
# A ``Sequential`` module contains an ordered list of child modules. For
# instance, ``vgg19.features`` contains a sequence (Conv2d, ReLU,
# Maxpool2d, Conv2d, ReLU...) aligned in the right order of depth. As we
# said in *Content loss* section, we wand to add our style and content
# loss modules as additive 'transparent' layers in our network, at desired
# depths. For that, we construct a new ``Sequential`` module, in wich we
# are going to add modules from ``vgg19`` and our loss modules in the
# right order:
# 根据VGG19构造一个和VGG19结构类似的神经网络,其中包括设计好的内容损失层和风格损失层
# 这两个层在对于在网络中的训练作用为0,我们需要的是图像在经过时产生的损失值。
# desired depth layers to compute style/content losses :
content_layers_default = ['conv_4']
style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
def get_style_model_and_losses(cnn, style_img, content_img,
style_weight=1000, content_weight=1,
cnn = copy.deepcopy(cnn)
# just in order to have an iterable access to or list of content/syle
# losses
content_losses = []
style_losses = []
model = nn.Sequential() # the new Sequential module network
gram = GramMatrix() # we need a gram module in order to compute style targets
# move these modules to the GPU if possible:
if use_cuda:
model = model.cuda()
gram = gram.cuda()
i = 1
for layer in list(cnn):
if isinstance(layer, nn.Conv2d):
name = "conv_" + str(i)
model.add_module(name, layer)
if name in content_layers:
# add content loss:
target = model(content_img).clone()
content_loss = ContentLoss(target, content_weight)
model.add_module("content_loss_" + str(i), content_loss)
if name in style_layers:
# add style loss:
target_feature = model(style_img).clone()
target_feature_gram = gram(target_feature)
style_loss = StyleLoss(target_feature_gram, style_weight)
model.add_module("style_loss_" + str(i), style_loss)
if isinstance(layer, nn.ReLU):
name = "relu_" + str(i)
model.add_module(name, layer)
if name in content_layers:
# add content loss:
target = model(content_img).clone()
content_loss = ContentLoss(target, content_weight)
model.add_module("content_loss_" + str(i), content_loss)
if name in style_layers:
# add style loss:
target_feature = model(style_img).clone()
target_feature_gram = gram(target_feature)
style_loss = StyleLoss(target_feature_gram, style_weight)
model.add_module("style_loss_" + str(i), style_loss)
i += 1
if isinstance(layer, nn.MaxPool2d):
name = "pool_" + str(i)
model.add_module(name, layer) # ***
return model, style_losses, content_losses
# 输入图像
# ~~~~~~~~~~~
# 为了方便,输入图像为内容图像的copy,也可以创造一个白噪声图片
input_img = content_img.clone()
# if you want to use a white noise instead uncomment the below line:
# input_img = Variable(torch.randn(content_img.data.size())).type(dtype)
# add the original input image to the figure:
imshow(input_img.data, title='Input Image')
# 梯度下降
# ~~~~~~~~~~~~~~~~
# 这里我们使用L-BFGS算法来进行梯度下降,不同于训练一个网络,我们想要训练这个输入图片以降低内容/风格损失。我们就简单
# 创建一个python 的L-BFGS优化器,将输入图像当做变量来进行优化。但是optim.LBFGS()接受的第一个参数是一个Pytorch中包含需要进行梯度更新的Variable列表
# 我们的输入图像是一个Variable类型但不是计算树中的一部分。为了让这个函数知道输入图像这个Variable需要进行梯度计算。
# 一种可能的方法就是从输入图像中构造一个Parameter对象。然后我们只需要将其给了优化器的构造器即可。
def get_input_param_optimizer(input_img):
# this line to show that input is a parameter that requires a gradient
input_param = nn.Parameter(input_img.data)
optimizer = optim.LBFGS([input_param])
return input_param, optimizer
# **Last step**: the loop of gradient descent. At each step, we must feed
# the network with the updated input in order to compute the new losses,
# we must run the ``backward`` methods of each loss to dynamically compute
# their gradients and perform the step of gradient descent. The optimizer
# requires as argument a "closure": a function that reevaluates the model
# and returns the loss.
# 最后一步:进行梯度下降的循环。每一步我们必须将更新后的数值输入到网络中去计算新的损失
# 在个损失中我们用backward方法来计算他们的梯度然后进行梯度下降,优化器需要一个功能函数来
# 重新
# However, there's a small catch. The optimized image may take its values
# between :math:`-\infty` and :math:`+\infty` instead of staying between 0
# and 1. In other words, the image might be well optimized and have absurd
# values. In fact, we must perform an optimization under constraints in
# order to keep having right vaues into our input image. There is a simple
# solution: at each step, to correct the image to maintain its values into
# the 0-1 interval.
def run_style_transfer(cnn, content_img, style_img, input_img, num_steps=300,
style_weight=1000, content_weight=1):
"""Run the style transfer."""
print('Building the style transfer model..')
model, style_losses, content_losses = get_style_model_and_losses(cnn,
style_img, content_img, style_weight, content_weight)
input_param, optimizer = get_input_param_optimizer(input_img)
run = [0]
since = time.time()
while run[0] <= num_steps:
def closure():
# correct the values of updated input image
input_param.data.clamp_(0, 1)
style_score = 0
content_score = 0
for sl in style_losses:
style_score += sl.backward()
for cl in content_losses:
content_score += cl.backward()
run[0] += 1
if run[0] % 50 == 0:
print("run {}:".format(run))
print('Style Loss : {:4f} Content Loss: {:4f}'.format(
style_score.data[0], content_score.data[0]))
return style_score + content_score
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
# a last correction...
input_param.data.clamp_(0, 1)
return input_param.data
1、A Neural Algorithm of Artistic Style https://arxiv.org/abs/1508.06576