簡介

介紹CGAN和ACGAN的原理,通過引入額外的Condition來控制生成的圖片,並在DCGAN和WGAN的基礎上進行實現

CGAN原理

樣本 x 可以包含一些屬性,或者說條件,記作 y

例如MNIST中每張圖片對應的數字可以是0至9

從一張圖來瞭解CGAN(Conditional GAN)的思想

生成器 G 從隨機噪音 z 和條件 y 生成假樣本,判別器 D 接受真假樣本和條件 y ,判斷樣本是否為滿足條件 y 的真實樣本

總的目標函數如下

min_{G}max_{D} V(D,G)=mathbb{E}_{xsim p_{data}}[log D(x|y)] + mathbb{E}_{zsim p_z}[log(1-D(G(z|y)))]

實現

先用MNIST,在DCGAN的基礎上稍作改動以實現CGAN

載入庫

# -*- coding: utf-8 -*-

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os, imageio
from tqdm import tqdm

載入數據,指定one_hot=True

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(MNIST_data, one_hot=True)

定義一些常量、網路輸入、輔助函數,這裡加上了y_labely_noise

batch_size = 100
z_dim = 100
WIDTH = 28
HEIGHT = 28
LABEL = 10

OUTPUT_DIR = samples
if not os.path.exists(OUTPUT_DIR):
os.mkdir(OUTPUT_DIR)

X = tf.placeholder(dtype=tf.float32, shape=[None, HEIGHT, WIDTH, 1], name=X)
y_label = tf.placeholder(dtype=tf.float32, shape=[None, HEIGHT, WIDTH, LABEL], name=y_label)
noise = tf.placeholder(dtype=tf.float32, shape=[None, z_dim], name=noise)
y_noise = tf.placeholder(dtype=tf.float32, shape=[None, LABEL], name=y_noise)
is_training = tf.placeholder(dtype=tf.bool, name=is_training)

def lrelu(x, leak=0.2):
return tf.maximum(x, leak * x)

def sigmoid_cross_entropy_with_logits(x, y):
return tf.nn.sigmoid_cross_entropy_with_logits(logits=x, labels=y)

判別器部分

def discriminator(image, label, reuse=None, is_training=is_training):
momentum = 0.9
with tf.variable_scope(discriminator, reuse=reuse):
h0 = tf.concat([image, label], axis=3)
h0 = lrelu(tf.layers.conv2d(h0, kernel_size=5, filters=64, strides=2, padding=same))

h1 = tf.layers.conv2d(h0, kernel_size=5, filters=128, strides=2, padding=same)
h1 = lrelu(tf.contrib.layers.batch_norm(h1, is_training=is_training, decay=momentum))

h2 = tf.layers.conv2d(h1, kernel_size=5, filters=256, strides=2, padding=same)
h2 = lrelu(tf.contrib.layers.batch_norm(h2, is_training=is_training, decay=momentum))

h3 = tf.layers.conv2d(h2, kernel_size=5, filters=512, strides=2, padding=same)
h3 = lrelu(tf.contrib.layers.batch_norm(h3, is_training=is_training, decay=momentum))

h4 = tf.contrib.layers.flatten(h3)
h4 = tf.layers.dense(h4, units=1)
return tf.nn.sigmoid(h4), h4

生成器部分

def generator(z, label, is_training=is_training):
momentum = 0.9
with tf.variable_scope(generator, reuse=None):
d = 3
z = tf.concat([z, label], axis=1)
h0 = tf.layers.dense(z, units=d * d * 512)
h0 = tf.reshape(h0, shape=[-1, d, d, 512])
h0 = tf.nn.relu(tf.contrib.layers.batch_norm(h0, is_training=is_training, decay=momentum))

h1 = tf.layers.conv2d_transpose(h0, kernel_size=5, filters=256, strides=2, padding=same)
h1 = tf.nn.relu(tf.contrib.layers.batch_norm(h1, is_training=is_training, decay=momentum))

h2 = tf.layers.conv2d_transpose(h1, kernel_size=5, filters=128, strides=2, padding=same)
h2 = tf.nn.relu(tf.contrib.layers.batch_norm(h2, is_training=is_training, decay=momentum))

h3 = tf.layers.conv2d_transpose(h2, kernel_size=5, filters=64, strides=2, padding=same)
h3 = tf.nn.relu(tf.contrib.layers.batch_norm(h3, is_training=is_training, decay=momentum))

h4 = tf.layers.conv2d_transpose(h3, kernel_size=5, filters=1, strides=1, padding=valid, activation=tf.nn.tanh, name=g)
return h4

損失函數

g = generator(noise, y_noise)
d_real, d_real_logits = discriminator(X, y_label)
d_fake, d_fake_logits = discriminator(g, y_label, reuse=True)

vars_g = [var for var in tf.trainable_variables() if var.name.startswith(generator)]
vars_d = [var for var in tf.trainable_variables() if var.name.startswith(discriminator)]

loss_d_real = tf.reduce_mean(sigmoid_cross_entropy_with_logits(d_real_logits, tf.ones_like(d_real)))
loss_d_fake = tf.reduce_mean(sigmoid_cross_entropy_with_logits(d_fake_logits, tf.zeros_like(d_fake)))
loss_g = tf.reduce_mean(sigmoid_cross_entropy_with_logits(d_fake_logits, tf.ones_like(d_fake)))
loss_d = loss_d_real + loss_d_fake

優化函數

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
optimizer_d = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5).minimize(loss_d, var_list=vars_d)
optimizer_g = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5).minimize(loss_g, var_list=vars_g)

拼接圖片的函數

def montage(images):
if isinstance(images, list):
images = np.array(images)
img_h = images.shape[1]
img_w = images.shape[2]
n_plots = int(np.ceil(np.sqrt(images.shape[0])))
m = np.ones((images.shape[1] * n_plots + n_plots + 1, images.shape[2] * n_plots + n_plots + 1)) * 0.5
for i in range(n_plots):
for j in range(n_plots):
this_filter = i * n_plots + j
if this_filter < images.shape[0]:
this_img = images[this_filter]
m[1 + i + i * img_h:1 + i + (i + 1) * img_h,
1 + j + j * img_w:1 + j + (j + 1) * img_w] = this_img
return m

訓練模型,加入條件信息

sess = tf.Session()
sess.run(tf.global_variables_initializer())
z_samples = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32)
y_samples = np.zeros([batch_size, LABEL])
for i in range(LABEL):
for j in range(LABEL):
y_samples[i * LABEL + j, i] = 1
samples = []
loss = {d: [], g: []}

for i in tqdm(range(60000)):
n = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32)
batch, label = mnist.train.next_batch(batch_size=batch_size)
batch = np.reshape(batch, [batch_size, HEIGHT, WIDTH, 1])
batch = (batch - 0.5) * 2
yn = np.copy(label)
yl = np.reshape(label, [batch_size, 1, 1, LABEL])
yl = yl * np.ones([batch_size, HEIGHT, WIDTH, LABEL])

d_ls, g_ls = sess.run([loss_d, loss_g], feed_dict={X: batch, noise: n, y_label: yl, y_noise: yn, is_training: True})
loss[d].append(d_ls)
loss[g].append(g_ls)

sess.run(optimizer_d, feed_dict={X: batch, noise: n, y_label: yl, y_noise: yn, is_training: True})
sess.run(optimizer_g, feed_dict={X: batch, noise: n, y_label: yl, y_noise: yn, is_training: True})
sess.run(optimizer_g, feed_dict={X: batch, noise: n, y_label: yl, y_noise: yn, is_training: True})

if i % 1000 == 0:
print(i, d_ls, g_ls)
gen_imgs = sess.run(g, feed_dict={noise: z_samples, y_noise: y_samples, is_training: False})
gen_imgs = (gen_imgs + 1) / 2
imgs = [img[:, :, 0] for img in gen_imgs]
gen_imgs = montage(imgs)
plt.axis(off)
plt.imshow(gen_imgs, cmap=gray)
imageio.imsave(os.path.join(OUTPUT_DIR, sample_%d.jpg % i), gen_imgs)
plt.show()
samples.append(gen_imgs)

plt.plot(loss[d], label=Discriminator)
plt.plot(loss[g], label=Generator)
plt.legend(loc=upper right)
plt.savefig(Loss.png)
plt.show()
imageio.mimsave(os.path.join(OUTPUT_DIR, samples.gif), samples, fps=5)

生成的手寫數字圖片如下,每一行對應的數字相同

保存模型,便於後續使用

saver = tf.train.Saver()
saver.save(sess, ./mnist_cgan, global_step=60000)

在單機上使用模型生成手寫數字圖片

# -*- coding: utf-8 -*-

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

batch_size = 100
z_dim = 100
LABEL = 10

def montage(images):
if isinstance(images, list):
images = np.array(images)
img_h = images.shape[1]
img_w = images.shape[2]
n_plots = int(np.ceil(np.sqrt(images.shape[0])))
m = np.ones((images.shape[1] * n_plots + n_plots + 1, images.shape[2] * n_plots + n_plots + 1)) * 0.5
for i in range(n_plots):
for j in range(n_plots):
this_filter = i * n_plots + j
if this_filter < images.shape[0]:
this_img = images[this_filter]
m[1 + i + i * img_h:1 + i + (i + 1) * img_h,
1 + j + j * img_w:1 + j + (j + 1) * img_w] = this_img
return m

sess = tf.Session()
sess.run(tf.global_variables_initializer())

saver = tf.train.import_meta_graph(./mnist_cgan-60000.meta)
saver.restore(sess, tf.train.latest_checkpoint(./))

graph = tf.get_default_graph()
g = graph.get_tensor_by_name(generator/g/Tanh:0)
noise = graph.get_tensor_by_name(noise:0)
y_noise = graph.get_tensor_by_name(y_noise:0)
is_training = graph.get_tensor_by_name(is_training:0)

n = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32)
y_samples = np.zeros([batch_size, LABEL])
for i in range(LABEL):
for j in range(LABEL):
y_samples[i * LABEL + j, i] = 1
gen_imgs = sess.run(g, feed_dict={noise: n, y_noise: y_samples, is_training: False})
gen_imgs = (gen_imgs + 1) / 2
imgs = [img[:, :, 0] for img in gen_imgs]
gen_imgs = montage(imgs)
plt.axis(off)
plt.imshow(gen_imgs, cmap=gray)
plt.show()

講條件的CelebA

瞭解CGAN的原理和實現之後,再嘗試下別的數據集,比如之前用過的CelebA

CelebA提供了每張圖片40個屬性的01標註,這裡將Male(是否為男性)作為條件,在WGAN的基礎上實現CGAN

載入庫

# -*- coding: utf-8 -*-

import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline
from imageio import imread, imsave, mimsave
import cv2
import glob
from tqdm import tqdm

載入圖片

images = glob.glob(celeba/*.jpg)
print(len(images))

讀取圖片的Male標籤

tags = {}
target = Male
with open(list_attr_celeba.txt, r) as fr:
lines = fr.readlines()
all_tags = lines[0].strip(
).split()
for i in range(1, len(lines)):
line = lines[i].strip(
).split()
if int(line[all_tags.index(target) + 1]) == 1:
tags[line[0]] = [1, 0] # 男
else:
tags[line[0]] = [0, 1] # 女
print(len(tags))
print(all_tags)

定義一些常量、網路輸入、輔助函數

batch_size = 100
z_dim = 100
WIDTH = 64
HEIGHT = 64
LABEL = 2
LAMBDA = 10
DIS_ITERS = 3 # 5

OUTPUT_DIR = samples
if not os.path.exists(OUTPUT_DIR):
os.mkdir(OUTPUT_DIR)

X = tf.placeholder(dtype=tf.float32, shape=[batch_size, HEIGHT, WIDTH, 3], name=X)
y_label = tf.placeholder(dtype=tf.float32, shape=[batch_size, HEIGHT, WIDTH, LABEL], name=y_label)
noise = tf.placeholder(dtype=tf.float32, shape=[batch_size, z_dim], name=noise)
y_noise = tf.placeholder(dtype=tf.float32, shape=[batch_size, LABEL], name=y_noise)
is_training = tf.placeholder(dtype=tf.bool, name=is_training)

def lrelu(x, leak=0.2):
return tf.maximum(x, leak * x)

判別器部分

def discriminator(image, label, reuse=None, is_training=is_training):
momentum = 0.9
with tf.variable_scope(discriminator, reuse=reuse):
h0 = tf.concat([image, label], axis=3)
h0 = lrelu(tf.layers.conv2d(h0, kernel_size=5, filters=64, strides=2, padding=same))

h1 = lrelu(tf.layers.conv2d(h0, kernel_size=5, filters=128, strides=2, padding=same))

h2 = lrelu(tf.layers.conv2d(h1, kernel_size=5, filters=256, strides=2, padding=same))

h3 = lrelu(tf.layers.conv2d(h2, kernel_size=5, filters=512, strides=2, padding=same))

h4 = tf.contrib.layers.flatten(h3)
h4 = tf.layers.dense(h4, units=1)
return h4

生成器部分

def generator(z, label, is_training=is_training):
momentum = 0.9
with tf.variable_scope(generator, reuse=None):
d = 4
z = tf.concat([z, label], axis=1)
h0 = tf.layers.dense(z, units=d * d * 512)
h0 = tf.reshape(h0, shape=[-1, d, d, 512])
h0 = tf.nn.relu(tf.contrib.layers.batch_norm(h0, is_training=is_training, decay=momentum))

h1 = tf.layers.conv2d_transpose(h0, kernel_size=5, filters=256, strides=2, padding=same)
h1 = tf.nn.relu(tf.contrib.layers.batch_norm(h1, is_training=is_training, decay=momentum))

h2 = tf.layers.conv2d_transpose(h1, kernel_size=5, filters=128, strides=2, padding=same)
h2 = tf.nn.relu(tf.contrib.layers.batch_norm(h2, is_training=is_training, decay=momentum))

h3 = tf.layers.conv2d_transpose(h2, kernel_size=5, filters=64, strides=2, padding=same)
h3 = tf.nn.relu(tf.contrib.layers.batch_norm(h3, is_training=is_training, decay=momentum))

h4 = tf.layers.conv2d_transpose(h3, kernel_size=5, filters=3, strides=2, padding=same, activation=tf.nn.tanh, name=g)
return h4

定義損失函數

g = generator(noise, y_noise)
d_real = discriminator(X, y_label)
d_fake = discriminator(g, y_label, reuse=True)

loss_d_real = -tf.reduce_mean(d_real)
loss_d_fake = tf.reduce_mean(d_fake)
loss_g = -tf.reduce_mean(d_fake)
loss_d = loss_d_real + loss_d_fake

alpha = tf.random_uniform(shape=[batch_size, 1, 1, 1], minval=0., maxval=1.)
interpolates = alpha * X + (1 - alpha) * g
grad = tf.gradients(discriminator(interpolates, y_label, reuse=True), [interpolates])[0]
slop = tf.sqrt(tf.reduce_sum(tf.square(grad), axis=[1]))
gp = tf.reduce_mean((slop - 1.) ** 2)
loss_d += LAMBDA * gp

vars_g = [var for var in tf.trainable_variables() if var.name.startswith(generator)]
vars_d = [var for var in tf.trainable_variables() if var.name.startswith(discriminator)]

定義優化器

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
optimizer_d = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5).minimize(loss_d, var_list=vars_d)
optimizer_g = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5).minimize(loss_g, var_list=vars_g)

拼接圖片的函數

def montage(images):
if isinstance(images, list):
images = np.array(images)
img_h = images.shape[1]
img_w = images.shape[2]
n_plots = int(np.ceil(np.sqrt(images.shape[0])))
if len(images.shape) == 4 and images.shape[3] == 3:
m = np.ones(
(images.shape[1] * n_plots + n_plots + 1,
images.shape[2] * n_plots + n_plots + 1, 3)) * 0.5
elif len(images.shape) == 4 and images.shape[3] == 1:
m = np.ones(
(images.shape[1] * n_plots + n_plots + 1,
images.shape[2] * n_plots + n_plots + 1, 1)) * 0.5
elif len(images.shape) == 3:
m = np.ones(
(images.shape[1] * n_plots + n_plots + 1,
images.shape[2] * n_plots + n_plots + 1)) * 0.5
else:
raise ValueError(Could not parse image shape of {}.format(images.shape))
for i in range(n_plots):
for j in range(n_plots):
this_filter = i * n_plots + j
if this_filter < images.shape[0]:
this_img = images[this_filter]
m[1 + i + i * img_h:1 + i + (i + 1) * img_h,
1 + j + j * img_w:1 + j + (j + 1) * img_w] = this_img
return m

整理數據

X_all = []
Y_all = []
for i in tqdm(range(len(images))):
image = imread(images[i])
h = image.shape[0]
w = image.shape[1]
if h > w:
image = image[h // 2 - w // 2: h // 2 + w // 2, :, :]
else:
image = image[:, w // 2 - h // 2: w // 2 + h // 2, :]
image = cv2.resize(image, (WIDTH, HEIGHT))
image = (image / 255. - 0.5) * 2
X_all.append(image)

image_name = images[i][images[i].find(/) + 1:]
Y_all.append(tags[image_name])

X_all = np.array(X_all)
Y_all = np.array(Y_all)
print(X_all.shape, Y_all.shape)

查看數據樣例

for i in range(10):
plt.imshow((X_all[i, :, :, :] + 1) / 2)
plt.show()
print(Y_all[i, :])

定義隨機產生批數據的函數

def get_random_batch():
data_index = np.arange(X_all.shape[0])
np.random.shuffle(data_index)
data_index = data_index[:batch_size]
X_batch = X_all[data_index, :, :, :]
Y_batch = Y_all[data_index, :]
yn = np.copy(Y_batch)
yl = np.reshape(Y_batch, [batch_size, 1, 1, LABEL])
yl = yl * np.ones([batch_size, HEIGHT, WIDTH, LABEL])

return X_batch, yn, yl

訓練模型

sess = tf.Session()
sess.run(tf.global_variables_initializer())
zs = np.random.uniform(-1.0, 1.0, [batch_size // 2, z_dim]).astype(np.float32)
z_samples = []
y_samples = []
for i in range(batch_size // 2):
z_samples.append(zs[i, :])
y_samples.append([1, 0])
z_samples.append(zs[i, :])
y_samples.append([0, 1])
samples = []
loss = {d: [], g: []}

for i in tqdm(range(60000)):
for j in range(DIS_ITERS):
n = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32)
X_batch, yn, yl = get_random_batch()
_, d_ls = sess.run([optimizer_d, loss_d], feed_dict={X: X_batch, noise: n, y_label: yl, y_noise: yn, is_training: True})

_, g_ls = sess.run([optimizer_g, loss_g], feed_dict={X: X_batch, noise: n, y_label: yl, y_noise: yn, is_training: True})

loss[d].append(d_ls)
loss[g].append(g_ls)

if i % 500 == 0:
print(i, d_ls, g_ls)
gen_imgs = sess.run(g, feed_dict={noise: z_samples, y_noise: y_samples, is_training: False})
gen_imgs = (gen_imgs + 1) / 2
imgs = [img[:, :, :] for img in gen_imgs]
gen_imgs = montage(imgs)
plt.axis(off)
plt.imshow(gen_imgs)
imsave(os.path.join(OUTPUT_DIR, sample_%d.jpg % i), gen_imgs)
plt.show()
samples.append(gen_imgs)

plt.plot(loss[d], label=Discriminator)
plt.plot(loss[g], label=Generator)
plt.legend(loc=upper right)
plt.savefig(Loss.png)
plt.show()
mimsave(os.path.join(OUTPUT_DIR, samples.gif), samples, fps=10)

結果如下,對於每一組圖片,噪音部分相同但條件不同,男左女右

保存模型

saver = tf.train.Saver()
saver.save(sess, ./celeba_cgan, global_step=60000)

視頻講解課程

深度有趣(一) - 網易雲課堂?

study.163.com


推薦閱讀:

相關文章