4.2 放大招—生成美女图片
4.2.1 美女图片
# 爬取数据 import requests import os import urllib class Spider_baidu_image(): def __init__(self): self.url = 'http://image.baidu.com/search/acjson?' self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.\ 3497.81 Safari/537.36'} self.headers_image = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.\ 3497.81 Safari/537.36','Referer':'http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1557124645631_R&pv=&ic=&nc=1&z=&hd=1&latest=0©right=0&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&sid=&word=%E8%83%A1%E6%AD%8C'} # self.keyword = '刘亦菲壁纸' self.keyword = input("请输入搜索图片关键字:") self.paginator = int(input("请输入搜索页数,每页30张图片:")) # self.paginator = 50 # print(type(self.keyword),self.paginator) # exit() def get_param(self): """ 获取url请求的参数,存入列表并返回 :return: """ keyword = urllib.parse.quote(self.keyword) params = [] for i in range(1,self.paginator+1): params.append('tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord={}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=1&latest=0©right=0&word={}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&cg=star&pn={}&rn=30&gsm=78&1557125391211='.format(keyword,keyword,30*i)) return params def get_urls(self,params): """ 由url参数返回各个url拼接后的响应,存入列表并返回 :return: """ urls = [] for i in params: urls.append(self.url+i) return urls def get_image_url(self,urls): image_url = [] for url in urls: json_data = requests.get(url,headers = self.headers).json() json_data = json_data.get('data') for i in json_data: if i: image_url.append(i.get('thumbURL')) return image_url def get_image(self,image_url): """ 根据图片url,在本地目录下新建一个以搜索关键字命名的文件夹,然后将每一个图片存入。 :param image_url: :return: """ cwd = os.getcwd() file_name = os.path.join(cwd,self.keyword) if not os.path.exists(self.keyword): os.mkdir(file_name) for index,url in enumerate(image_url,start=1): with open(file_name+'\\{}.jpg'.format(index),'wb') as f: f.write(requests.get(url,headers = self.headers_image).content) if index != 0 and index % 30 == 0: print('{}第{}页下载完成'.format(self.keyword,index/30)) def __call__(self, *args, **kwargs): params = self.get_param() urls = self.get_urls(params) image_url = self.get_image_url(urls) self.get_image(image_url) if __name__ == '__main__': spider = Spider_baidu_image() spider() 复制代码
4.2.2 提取人脸
- 首先去github.com/nagadomi/lb… 下载xml文件
- 将文件夹重命名英文(我这里改为girls),为了解决opencv读取图片不能有中文的问题
最后将文件夹中的图片全部变为128*128
# 提取美女的面部图片 import cv2 import sys import numpy as np import os.path from glob import glob def detect(filename, cascade_file="lbpcascade_animeface.xml"): if not os.path.isfile(cascade_file): raise RuntimeError("%s: not found" % cascade_file) cascade = cv2.CascadeClassifier(cascade_file) image = cv2.imread(filename) #image = cv2.imdecode(np.fromfile(filename, dtype=np.uint8),0) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.equalizeHist(gray) faces = cascade.detectMultiScale(gray, # detector options scaleFactor=1.1, minNeighbors=5, minSize=(48, 48)) for i, (x, y, w, h) in enumerate(faces): face = image[y: y + h, x:x + w, :] face = cv2.resize(face, (128,128)) save_filename = '%s-%d.jpg' % (os.path.basename(filename).split('.')[0], i) cv2.imwrite("faces/" + save_filename, face) if __name__ == '__main__': if os.path.exists('faces') is False: os.makedirs('faces') file_list = glob('girls/*.jpg') for filename in file_list: detect(filename) 复制代码
4.2.3 开始训练
这里是基于tensorflow 1.X写的DCGAN,当然如果你现在环境是2.X也无所谓,使用
import tensorflow.compat.v1 as tf tf.disable_v2_behavior() 复制代码
屏蔽掉2.X的代码部分就好
# 开始训练生成美女图片 import os import time import tensorflow.compat.v1 as tf tf.disable_v2_behavior() import numpy as np from glob import glob import datetime import random from PIL import Image import matplotlib.pyplot as plt %matplotlib inline # 生成器 def generator(z, output_channel_dim, training): with tf.variable_scope("generator", reuse= not training): # 8x8x1024 fully_connected = tf.layers.dense(z, 8*8*1024) fully_connected = tf.reshape(fully_connected, (-1, 8, 8, 1024)) fully_connected = tf.nn.leaky_relu(fully_connected) # 8x8x1024 -> 16x16x512 trans_conv1 = tf.layers.conv2d_transpose(inputs=fully_connected, filters=512, kernel_size=[5,5], strides=[2,2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=WEIGHT_INIT_STDDEV), name="trans_conv1") batch_trans_conv1 = tf.layers.batch_normalization(inputs = trans_conv1, training=training, epsilon=EPSILON, name="batch_trans_conv1") trans_conv1_out = tf.nn.leaky_relu(batch_trans_conv1, name="trans_conv1_out") # 16x16x512 -> 32x32x256 trans_conv2 = tf.layers.conv2d_transpose(inputs=trans_conv1_out, filters=256, kernel_size=[5,5], strides=[2,2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=WEIGHT_INIT_STDDEV), name="trans_conv2") batch_trans_conv2 = tf.layers.batch_normalization(inputs = trans_conv2, training=training, epsilon=EPSILON, name="batch_trans_conv2") trans_conv2_out = tf.nn.leaky_relu(batch_trans_conv2, name="trans_conv2_out") # 32x32x256 -> 64x64x128 trans_conv3 = tf.layers.conv2d_transpose(inputs=trans_conv2_out, filters=128, kernel_size=[5,5], strides=[2,2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=WEIGHT_INIT_STDDEV), name="trans_conv3") batch_trans_conv3 = tf.layers.batch_normalization(inputs = trans_conv3, training=training, epsilon=EPSILON, name="batch_trans_conv3") trans_conv3_out = tf.nn.leaky_relu(batch_trans_conv3, name="trans_conv3_out") # 64x64x128 -> 128x128x64 trans_conv4 = tf.layers.conv2d_transpose(inputs=trans_conv3_out, filters=64, kernel_size=[5,5], strides=[2,2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=WEIGHT_INIT_STDDEV), name="trans_conv4") batch_trans_conv4 = tf.layers.batch_normalization(inputs = trans_conv4, training=training, epsilon=EPSILON, name="batch_trans_conv4") trans_conv4_out = tf.nn.leaky_relu(batch_trans_conv4, name="trans_conv4_out") # 128x128x64 -> 128x128x3 logits = tf.layers.conv2d_transpose(inputs=trans_conv4_out, filters=3, kernel_size=[5,5], strides=[1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=WEIGHT_INIT_STDDEV), name="logits") out = tf.tanh(logits, name="out") return out # 判定器 def discriminator(x, reuse): with tf.variable_scope("discriminator", reuse=reuse): # 128*128*3 -> 64x64x64 conv1 = tf.layers.conv2d(inputs=x, filters=64, kernel_size=[5,5], strides=[2,2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=WEIGHT_INIT_STDDEV), name='conv1') batch_norm1 = tf.layers.batch_normalization(conv1, training=True, epsilon=EPSILON, name='batch_norm1') conv1_out = tf.nn.leaky_relu(batch_norm1, name="conv1_out") # 64x64x64-> 32x32x128 conv2 = tf.layers.conv2d(inputs=conv1_out, filters=128, kernel_size=[5, 5], strides=[2, 2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=WEIGHT_INIT_STDDEV), name='conv2') batch_norm2 = tf.layers.batch_normalization(conv2, training=True, epsilon=EPSILON, name='batch_norm2') conv2_out = tf.nn.leaky_relu(batch_norm2, name="conv2_out") # 32x32x128 -> 16x16x256 conv3 = tf.layers.conv2d(inputs=conv2_out, filters=256, kernel_size=[5, 5], strides=[2, 2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=WEIGHT_INIT_STDDEV), name='conv3') batch_norm3 = tf.layers.batch_normalization(conv3, training=True, epsilon=EPSILON, name='batch_norm3') conv3_out = tf.nn.leaky_relu(batch_norm3, name="conv3_out") # 16x16x256 -> 16x16x512 conv4 = tf.layers.conv2d(inputs=conv3_out, filters=512, kernel_size=[5, 5], strides=[1, 1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=WEIGHT_INIT_STDDEV), name='conv4') batch_norm4 = tf.layers.batch_normalization(conv4, training=True, epsilon=EPSILON, name='batch_norm4') conv4_out = tf.nn.leaky_relu(batch_norm4, name="conv4_out") # 16x16x512 -> 8x8x1024 conv5 = tf.layers.conv2d(inputs=conv4_out, filters=1024, kernel_size=[5, 5], strides=[2, 2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=WEIGHT_INIT_STDDEV), name='conv5') batch_norm5 = tf.layers.batch_normalization(conv5, training=True, epsilon=EPSILON, name='batch_norm5') conv5_out = tf.nn.leaky_relu(batch_norm5, name="conv5_out") flatten = tf.reshape(conv5_out, (-1, 8*8*1024)) logits = tf.layers.dense(inputs=flatten, units=1, activation=None) out = tf.sigmoid(logits) return out, logits # 模型损失 def model_loss(input_real, input_z, output_channel_dim): g_model = generator(input_z, output_channel_dim, True) noisy_input_real = input_real + tf.random_normal(shape=tf.shape(input_real), mean=0.0, stddev=random.uniform(0.0, 0.1), dtype=tf.float32) d_model_real, d_logits_real = discriminator(noisy_input_real, reuse=False) d_model_fake, d_logits_fake = discriminator(g_model, reuse=True) d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real, labels=tf.ones_like(d_model_real)*random.uniform(0.9, 1.0))) d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.zeros_like(d_model_fake))) d_loss = tf.reduce_mean(0.5 * (d_loss_real + d_loss_fake)) g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.ones_like(d_model_fake))) return d_loss, g_loss # 模型优化 def model_optimizers(d_loss, g_loss): t_vars = tf.trainable_variables() g_vars = [var for var in t_vars if var.name.startswith("generator")] d_vars = [var for var in t_vars if var.name.startswith("discriminator")] update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) gen_updates = [op for op in update_ops if op.name.startswith('generator')] with tf.control_dependencies(gen_updates): d_train_opt = tf.train.AdamOptimizer(learning_rate=LR_D, beta1=BETA1).minimize(d_loss, var_list=d_vars) g_train_opt = tf.train.AdamOptimizer(learning_rate=LR_G, beta1=BETA1).minimize(g_loss, var_list=g_vars) return d_train_opt, g_train_opt # 模型输入 def model_inputs(real_dim, z_dim): inputs_real = tf.placeholder(tf.float32, (None, *real_dim), name='inputs_real') inputs_z = tf.placeholder(tf.float32, (None, z_dim), name="input_z") learning_rate_G = tf.placeholder(tf.float32, name="lr_g") learning_rate_D = tf.placeholder(tf.float32, name="lr_d") return inputs_real, inputs_z, learning_rate_G, learning_rate_D # 展示样例 def show_samples(sample_images, name, epoch): figure, axes = plt.subplots(1, len(sample_images), figsize = (IMAGE_SIZE, IMAGE_SIZE)) for index, axis in enumerate(axes): axis.axis('off') image_array = sample_images[index] axis.imshow(image_array) image = Image.fromarray(image_array) image.save(name+"_"+str(epoch)+"_"+str(index)+".png") plt.savefig(name+"_"+str(epoch)+".png", bbox_inches='tight', pad_inches=0) plt.show() plt.close() # 测试 def test(sess, input_z, out_channel_dim, epoch): example_z = np.random.uniform(-1, 1, size=[SAMPLES_TO_SHOW, input_z.get_shape().as_list()[-1]]) samples = sess.run(generator(input_z, out_channel_dim, False), feed_dict={input_z: example_z}) sample_images = [((sample + 1.0) * 127.5).astype(np.uint8) for sample in samples] show_samples(sample_images, OUTPUT_DIR + "samples", epoch) # 每个epoch的训练情况总结 def summarize_epoch(epoch, duration, sess, d_losses, g_losses, input_z, data_shape): minibatch_size = int(data_shape[0]//BATCH_SIZE) print("Epoch {}/{}".format(epoch, EPOCHS), "\nDuration: {:.5f}".format(duration), "\nD Loss: {:.5f}".format(np.mean(d_losses[-minibatch_size:])), "\nG Loss: {:.5f}".format(np.mean(g_losses[-minibatch_size:]))) fig, ax = plt.subplots() plt.plot(d_losses, label='Discriminator', alpha=0.6) plt.plot(g_losses, label='Generator', alpha=0.6) plt.title("Losses") plt.legend() plt.savefig(OUTPUT_DIR + "losses_" + str(epoch) + ".png") plt.show() plt.close() test(sess, input_z, data_shape[3], epoch) # 获得批次 def get_batches(data): batches = [] for i in range(int(data.shape[0]//BATCH_SIZE)): batch = data[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] augmented_images = [] for img in batch: image = Image.fromarray(img) if random.choice([True, False]): image = image.transpose(Image.FLIP_LEFT_RIGHT) augmented_images.append(np.asarray(image)) batch = np.asarray(augmented_images) normalized_batch = (batch / 127.5) - 1.0 batches.append(normalized_batch) return batches # 训练 def train(get_batches, data_shape, checkpoint_to_load=None): input_images, input_z, lr_G, lr_D = model_inputs(data_shape[1:], NOISE_SIZE) d_loss, g_loss = model_loss(input_images, input_z, data_shape[3]) d_opt, g_opt = model_optimizers(d_loss, g_loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) d_losses = [] g_losses = [] for epoch in range(EPOCHS): start_time = time.time() for batch_images in get_batches: batch_z = np.random.uniform(-1, 1, size=(BATCH_SIZE, NOISE_SIZE)) _ = sess.run(d_opt, feed_dict={input_images: batch_images, input_z: batch_z, lr_D: LR_D}) _ = sess.run(g_opt, feed_dict={input_images: batch_images, input_z: batch_z, lr_G: LR_G}) d_losses.append(d_loss.eval({input_z: batch_z, input_images: batch_images})) g_losses.append(g_loss.eval({input_z: batch_z})) if epoch%30==0: summarize_epoch(epoch, time.time()-start_time, sess, d_losses, g_losses, input_z, data_shape) # 参数设置 INPUT_DATA_DIR = "./faces" OUTPUT_DIR = './newpics/' if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) IMAGE_SIZE = 128 NOISE_SIZE = 100 LR_D = 0.00003 LR_G = 0.0003 BATCH_SIZE = 64 EPOCHS = 300 BETA1 = 0.5 WEIGHT_INIT_STDDEV = 0.02 EPSILON = 0.00005 SAMPLES_TO_SHOW = 5 # 正式训练 input_images = np.asarray([np.asarray(Image.open(file).resize((IMAGE_SIZE, IMAGE_SIZE))) for file in glob(INPUT_DATA_DIR + '/*.jpg')]) print ("Input: " + str(input_images.shape)) np.random.shuffle(input_images) sample_images = random.sample(list(input_images), SAMPLES_TO_SHOW) show_samples(sample_images, OUTPUT_DIR + "inputs", 0) with tf.Graph().as_default(): train(get_batches(input_images), input_images.shape) 复制代码
随机输出五张输入的图片
开始的loss
第15个epoch的loss以及生成的图片
这个时候女朋友已经初具雏形,继续训练,第30个epoch
轮廓更明显,但是整体还是很模糊。
生成的图片这哪里是女朋友,明明是阿姨,还带点异国风情的那种。。。 失败原因可能在于:
- 用取的百度图片里面不全是美女,有卡通图片以及一些一场图片干扰,我没有对图片进行清理,下次可以用一下百度的api,颜值没有60分的pass掉。
- 网络训练提前结束,笔记本训练属实不给力,慢而且烫,明天用工作站试试。
- DCGAN中生成器网络设计还可以优化。
这一次制作女朋友应该算是以失败告终,下次用其它网络再试试!毕竟这可比现实中找女朋友容易多了,呜呜呜