Clean the code & Add some features and annotations

Esse commit está contido em:
yunjey
2018-04-12 12:36:57 +09:00
commit c53e59871b
5 arquivos alterados com 574 adições e 794 exclusões
+59 -78
Ver Arquivo
@@ -1,111 +1,92 @@
from torch.utils import data
from torchvision import transforms as T
from torchvision.datasets import ImageFolder
from PIL import Image
import torch import torch
import os import os
import random import random
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from PIL import Image
class CelebDataset(Dataset): class CelebA(data.Dataset):
def __init__(self, image_path, metadata_path, transform, mode): """Dataset class for the CelebA dataset."""
self.image_path = image_path
def __init__(self, image_dir, attr_path, selected_attrs, transform, mode):
"""Initialize and preprocess the CelebA dataset."""
self.image_dir = image_dir
self.attr_path = attr_path
self.selected_attrs = selected_attrs
self.transform = transform self.transform = transform
self.mode = mode self.mode = mode
self.lines = open(metadata_path, 'r').readlines() self.train_dataset = []
self.num_data = int(self.lines[0]) self.test_dataset = []
self.attr2idx = {} self.attr2idx = {}
self.idx2attr = {} self.idx2attr = {}
print ('Start preprocessing dataset..!')
random.seed(1234)
self.preprocess() self.preprocess()
print ('Finished preprocessing dataset..!')
if self.mode == 'train': if mode == 'train':
self.num_data = len(self.train_filenames) self.num_images = len(self.train_dataset)
elif self.mode == 'test': else:
self.num_data = len(self.test_filenames) self.num_images = len(self.test_dataset)
def preprocess(self): def preprocess(self):
attrs = self.lines[1].split() """Preprocess the CelebA attribute file."""
for i, attr in enumerate(attrs): lines = [line.rstrip() for line in open(self.attr_path, 'r')]
self.attr2idx[attr] = i all_attr_names = lines[1].split()
self.idx2attr[i] = attr for i, attr_name in enumerate(all_attr_names):
self.attr2idx[attr_name] = i
self.idx2attr[i] = attr_name
self.selected_attrs = ['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'] lines = lines[2:]
self.train_filenames = [] random.seed(1234)
self.train_labels = [] random.shuffle(lines)
self.test_filenames = []
self.test_labels = []
lines = self.lines[2:]
random.shuffle(lines) # random shuffling
for i, line in enumerate(lines): for i, line in enumerate(lines):
split = line.split()
splits = line.split() filename = split[0]
filename = splits[0] values = split[1:]
values = splits[1:]
label = [] label = []
for idx, value in enumerate(values): for attr_name in self.selected_attrs:
attr = self.idx2attr[idx] idx = self.attr2idx[attr_name]
label.append(values[idx] == '1')
if attr in self.selected_attrs:
if value == '1':
label.append(1)
else:
label.append(0)
if (i+1) < 2000: if (i+1) < 2000:
self.test_filenames.append(filename) self.test_dataset.append([filename, label])
self.test_labels.append(label)
else: else:
self.train_filenames.append(filename) self.train_dataset.append([filename, label])
self.train_labels.append(label)
print('Finished preprocessing the CelebA dataset...')
def __getitem__(self, index): def __getitem__(self, index):
if self.mode == 'train': """Return one image and its corresponding attribute label."""
image = Image.open(os.path.join(self.image_path, self.train_filenames[index])) dataset = self.train_dataset if self.mode == 'train' else self.test_dataset
label = self.train_labels[index] filename, label = dataset[index]
elif self.mode in ['test']: image = Image.open(os.path.join(self.image_dir, filename))
image = Image.open(os.path.join(self.image_path, self.test_filenames[index]))
label = self.test_labels[index]
return self.transform(image), torch.FloatTensor(label) return self.transform(image), torch.FloatTensor(label)
def __len__(self): def __len__(self):
return self.num_data """Return the number of images."""
return self.num_images
def get_loader(image_path, metadata_path, crop_size, image_size, batch_size, dataset='CelebA', mode='train'): def get_loader(image_dir, attr_path, selected_attrs, crop_size=178, image_size=128,
"""Build and return data loader.""" batch_size=16, dataset='CelebA', mode='train', num_workers=1):
"""Build and return a data loader."""
transform = []
if mode == 'train': if mode == 'train':
transform = transforms.Compose([ transform.append(T.RandomHorizontalFlip())
transforms.CenterCrop(crop_size), transform.append(T.CenterCrop(crop_size))
transforms.Resize(image_size, interpolation=Image.ANTIALIAS), transform.append(T.Resize(image_size))
transforms.RandomHorizontalFlip(), transform.append(T.ToTensor())
transforms.ToTensor(), transform.append(T.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)))
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) transform = T.Compose(transform)
else:
transform = transforms.Compose([
transforms.CenterCrop(crop_size),
transforms.Scale(image_size, interpolation=Image.ANTIALIAS),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
if dataset == 'CelebA': if dataset == 'CelebA':
dataset = CelebDataset(image_path, metadata_path, transform, mode) dataset = CelebA(image_dir, attr_path, selected_attrs, transform, mode)
elif dataset == 'RaFD': elif dataset == 'RaFD':
dataset = ImageFolder(image_path, transform) dataset = ImageFolder(image_dir, transform)
shuffle = False data_loader = data.DataLoader(dataset=dataset,
if mode == 'train': batch_size=batch_size,
shuffle = True shuffle=(mode=='train'),
num_workers=num_workers)
data_loader = DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=shuffle)
return data_loader return data_loader
+5 -62
Ver Arquivo
@@ -1,71 +1,14 @@
# Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
import tensorflow as tf import tensorflow as tf
import numpy as np
import scipy.misc
try:
from StringIO import StringIO # Python 2.7
except ImportError:
from io import BytesIO # Python 3.5+
class Logger(object): class Logger(object):
"""Tensorboard logger."""
def __init__(self, log_dir): def __init__(self, log_dir):
"""Create a summary writer logging to log_dir.""" """Initialize summary writer."""
self.writer = tf.summary.FileWriter(log_dir) self.writer = tf.summary.FileWriter(log_dir)
def scalar_summary(self, tag, value, step): def scalar_summary(self, tag, value, step):
"""Log a scalar variable.""" """Add scalar summary."""
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
self.writer.add_summary(summary, step) self.writer.add_summary(summary, step)
def image_summary(self, tag, images, step):
"""Log a list of images."""
img_summaries = []
for i, img in enumerate(images):
# Write the image to a string
try:
s = StringIO()
except:
s = BytesIO()
scipy.misc.toimage(img).save(s, format="png")
# Create an Image object
img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
height=img.shape[0],
width=img.shape[1])
# Create a Summary value
img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
# Create and write Summary
summary = tf.Summary(value=img_summaries)
self.writer.add_summary(summary, step)
def histo_summary(self, tag, values, step, bins=1000):
"""Log a histogram of the tensor of values."""
# Create a histogram using numpy
counts, bin_edges = np.histogram(values, bins=bins)
# Fill the fields of the histogram proto
hist = tf.HistogramProto()
hist.min = float(np.min(values))
hist.max = float(np.max(values))
hist.num = int(np.prod(values.shape))
hist.sum = float(np.sum(values))
hist.sum_squares = float(np.sum(values**2))
# Drop the start of the first bin
bin_edges = bin_edges[1:]
# Add bin edges and counts
for edge in bin_edges:
hist.bucket_limit.append(edge)
for c in counts:
hist.bucket.append(c)
# Create and write Summary
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
self.writer.add_summary(summary, step)
self.writer.flush()
+65 -61
Ver Arquivo
@@ -9,32 +9,35 @@ def str2bool(v):
return v.lower() in ('true') return v.lower() in ('true')
def main(config): def main(config):
# For fast training # For fast training.
cudnn.benchmark = True cudnn.benchmark = True
# Create directories if not exist # Create directories if not exist.
if not os.path.exists(config.log_path): if not os.path.exists(config.log_dir):
os.makedirs(config.log_path) os.makedirs(config.log_dir)
if not os.path.exists(config.model_save_path): if not os.path.exists(config.model_save_dir):
os.makedirs(config.model_save_path) os.makedirs(config.model_save_dir)
if not os.path.exists(config.sample_path): if not os.path.exists(config.sample_dir):
os.makedirs(config.sample_path) os.makedirs(config.sample_dir)
if not os.path.exists(config.result_path): if not os.path.exists(config.result_dir):
os.makedirs(config.result_path) os.makedirs(config.result_dir)
# Data loader # Data loader.
celebA_loader = None celeba_loader = None
rafd_loader = None rafd_loader = None
if config.dataset in ['CelebA', 'Both']: if config.dataset in ['CelebA', 'Both']:
celebA_loader = get_loader(config.celebA_image_path, config.metadata_path, config.celebA_crop_size, celeba_loader = get_loader(config.celeba_image_dir, config.attr_path, config.selected_attrs,
config.image_size, config.batch_size, 'CelebA', config.mode) config.celeba_crop_size, config.image_size, config.batch_size,
'CelebA', config.mode, config.num_workers)
if config.dataset in ['RaFD', 'Both']: if config.dataset in ['RaFD', 'Both']:
rafd_loader = get_loader(config.rafd_image_path, None, config.rafd_crop_size, rafd_loader = get_loader(config.rafd_image_dir, None, None,
config.image_size, config.batch_size, 'RaFD', config.mode) config.rafd_crop_size, config.image_size, config.batch_size,
'RaFD', config.mode, config.num_workers)
# Solver # Solver for training and testing StarGAN.
solver = Solver(celebA_loader, rafd_loader, config) solver = Solver(celeba_loader, rafd_loader, config)
if config.mode == 'train': if config.mode == 'train':
if config.dataset in ['CelebA', 'RaFD']: if config.dataset in ['CelebA', 'RaFD']:
@@ -51,55 +54,56 @@ def main(config):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
# Model hyper-parameters # Model configuration.
parser.add_argument('--c_dim', type=int, default=5) parser.add_argument('--c_dim', type=int, default=5, help='dimension of domain labels (1st dataset)')
parser.add_argument('--c2_dim', type=int, default=8) parser.add_argument('--c2_dim', type=int, default=8, help='dimension of domain labels (2nd dataset)')
parser.add_argument('--celebA_crop_size', type=int, default=178) parser.add_argument('--celeba_crop_size', type=int, default=178, help='crop size for the CelebA dataset')
parser.add_argument('--rafd_crop_size', type=int, default=256) parser.add_argument('--rafd_crop_size', type=int, default=256, help='crop size for the RaFD dataset')
parser.add_argument('--image_size', type=int, default=128) parser.add_argument('--image_size', type=int, default=128, help='image resolution')
parser.add_argument('--g_conv_dim', type=int, default=64) parser.add_argument('--g_conv_dim', type=int, default=64, help='number of conv filters in the first layer of G')
parser.add_argument('--d_conv_dim', type=int, default=64) parser.add_argument('--d_conv_dim', type=int, default=64, help='number of conv filters in the first layer of D')
parser.add_argument('--g_repeat_num', type=int, default=6) parser.add_argument('--g_repeat_num', type=int, default=6, help='number of residual blocks in G')
parser.add_argument('--d_repeat_num', type=int, default=6) parser.add_argument('--d_repeat_num', type=int, default=6, help='number of strided conv layers in D')
parser.add_argument('--g_lr', type=float, default=0.0001) parser.add_argument('--lambda_cls', type=float, default=1, help='weight for domain classification loss')
parser.add_argument('--d_lr', type=float, default=0.0001) parser.add_argument('--lambda_rec', type=float, default=10, help='weight for reconstruction loss')
parser.add_argument('--lambda_cls', type=float, default=1) parser.add_argument('--lambda_gp', type=float, default=10, help='weight for gradient penalty')
parser.add_argument('--lambda_rec', type=float, default=10)
parser.add_argument('--lambda_gp', type=float, default=10) # Training configuration.
parser.add_argument('--d_train_repeat', type=int, default=5)
# Training settings
parser.add_argument('--dataset', type=str, default='CelebA', choices=['CelebA', 'RaFD', 'Both']) parser.add_argument('--dataset', type=str, default='CelebA', choices=['CelebA', 'RaFD', 'Both'])
parser.add_argument('--num_epochs', type=int, default=20) parser.add_argument('--batch_size', type=int, default=16, help='mini-batch size')
parser.add_argument('--num_epochs_decay', type=int, default=10) parser.add_argument('--num_iters', type=int, default=200000, help='number of total iterations for training D')
parser.add_argument('--num_iters', type=int, default=200000) parser.add_argument('--num_iters_decay', type=int, default=100000, help='number of iterations for decaying lr')
parser.add_argument('--num_iters_decay', type=int, default=100000) parser.add_argument('--g_lr', type=float, default=0.0001, help='learning rate for G')
parser.add_argument('--batch_size', type=int, default=16) parser.add_argument('--d_lr', type=float, default=0.0001, help='learning rate for D')
parser.add_argument('--n_critic', type=int, default=5, help='number of D updates per each G update')
parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for Adam optimizer')
parser.add_argument('--beta2', type=float, default=0.999, help='beta2 for Adam optimizer')
parser.add_argument('--resume_iters', type=int, default=None, help='resume training from this step')
parser.add_argument('--selected_attrs', '--list', nargs='+', help='selected attributes for the CelebA dataset',
default=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'])
# Test configuration.
parser.add_argument('--test_iters', type=int, default=200000, help='test model from this step')
# Miscellaneous.
parser.add_argument('--num_workers', type=int, default=1) parser.add_argument('--num_workers', type=int, default=1)
parser.add_argument('--beta1', type=float, default=0.5)
parser.add_argument('--beta2', type=float, default=0.999)
parser.add_argument('--pretrained_model', type=str, default=None)
# Test settings
parser.add_argument('--test_model', type=str, default='20_1000')
# Misc
parser.add_argument('--mode', type=str, default='train', choices=['train', 'test']) parser.add_argument('--mode', type=str, default='train', choices=['train', 'test'])
parser.add_argument('--use_tensorboard', type=str2bool, default=False) parser.add_argument('--use_tensorboard', type=str2bool, default=True)
# Path # Directories.
parser.add_argument('--celebA_image_path', type=str, default='./data/CelebA_nocrop/images') parser.add_argument('--celeba_image_dir', type=str, default='data/CelebA_nocrop/images')
parser.add_argument('--rafd_image_path', type=str, default='./data/RaFD/train') parser.add_argument('--attr_path', type=str, default='data/list_attr_celeba.txt')
parser.add_argument('--metadata_path', type=str, default='./data/list_attr_celeba.txt') parser.add_argument('--rafd_image_dir', type=str, default='data/RaFD/train')
parser.add_argument('--log_path', type=str, default='./stargan/logs') parser.add_argument('--log_dir', type=str, default='stargan/logs')
parser.add_argument('--model_save_path', type=str, default='./stargan/models') parser.add_argument('--model_save_dir', type=str, default='stargan/models')
parser.add_argument('--sample_path', type=str, default='./stargan/samples') parser.add_argument('--sample_dir', type=str, default='stargan/samples')
parser.add_argument('--result_path', type=str, default='./stargan/results') parser.add_argument('--result_dir', type=str, default='stargan/results')
# Step size # Step size.
parser.add_argument('--log_step', type=int, default=10) parser.add_argument('--log_step', type=int, default=10)
parser.add_argument('--sample_step', type=int, default=500) parser.add_argument('--sample_step', type=int, default=1000)
parser.add_argument('--model_save_step', type=int, default=1000) parser.add_argument('--model_save_step', type=int, default=10000)
parser.add_argument('--lr_update_step', type=int, default=1000)
config = parser.parse_args() config = parser.parse_args()
print(config) print(config)
+17 -18
Ver Arquivo
@@ -5,7 +5,7 @@ import numpy as np
class ResidualBlock(nn.Module): class ResidualBlock(nn.Module):
"""Residual Block.""" """Residual Block with instance normalization."""
def __init__(self, dim_in, dim_out): def __init__(self, dim_in, dim_out):
super(ResidualBlock, self).__init__() super(ResidualBlock, self).__init__()
self.main = nn.Sequential( self.main = nn.Sequential(
@@ -20,7 +20,7 @@ class ResidualBlock(nn.Module):
class Generator(nn.Module): class Generator(nn.Module):
"""Generator. Encoder-Decoder Architecture.""" """Generator network."""
def __init__(self, conv_dim=64, c_dim=5, repeat_num=6): def __init__(self, conv_dim=64, c_dim=5, repeat_num=6):
super(Generator, self).__init__() super(Generator, self).__init__()
@@ -29,7 +29,7 @@ class Generator(nn.Module):
layers.append(nn.InstanceNorm2d(conv_dim, affine=True)) layers.append(nn.InstanceNorm2d(conv_dim, affine=True))
layers.append(nn.ReLU(inplace=True)) layers.append(nn.ReLU(inplace=True))
# Down-Sampling # Down-sampling layers.
curr_dim = conv_dim curr_dim = conv_dim
for i in range(2): for i in range(2):
layers.append(nn.Conv2d(curr_dim, curr_dim*2, kernel_size=4, stride=2, padding=1, bias=False)) layers.append(nn.Conv2d(curr_dim, curr_dim*2, kernel_size=4, stride=2, padding=1, bias=False))
@@ -37,11 +37,11 @@ class Generator(nn.Module):
layers.append(nn.ReLU(inplace=True)) layers.append(nn.ReLU(inplace=True))
curr_dim = curr_dim * 2 curr_dim = curr_dim * 2
# Bottleneck # Bottleneck layers.
for i in range(repeat_num): for i in range(repeat_num):
layers.append(ResidualBlock(dim_in=curr_dim, dim_out=curr_dim)) layers.append(ResidualBlock(dim_in=curr_dim, dim_out=curr_dim))
# Up-Sampling # Up-sampling layers.
for i in range(2): for i in range(2):
layers.append(nn.ConvTranspose2d(curr_dim, curr_dim//2, kernel_size=4, stride=2, padding=1, bias=False)) layers.append(nn.ConvTranspose2d(curr_dim, curr_dim//2, kernel_size=4, stride=2, padding=1, bias=False))
layers.append(nn.InstanceNorm2d(curr_dim//2, affine=True)) layers.append(nn.InstanceNorm2d(curr_dim//2, affine=True))
@@ -53,35 +53,34 @@ class Generator(nn.Module):
self.main = nn.Sequential(*layers) self.main = nn.Sequential(*layers)
def forward(self, x, c): def forward(self, x, c):
# replicate spatially and concatenate domain information # Replicate spatially and concatenate domain information.
c = c.unsqueeze(2).unsqueeze(3) c = c.view(c.size(0), c.size(1), 1, 1)
c = c.expand(c.size(0), c.size(1), x.size(2), x.size(3)) c = c.repeat(1, 1, x.size(2), x.size(3))
x = torch.cat([x, c], dim=1) x = torch.cat([x, c], dim=1)
return self.main(x) return self.main(x)
class Discriminator(nn.Module): class Discriminator(nn.Module):
"""Discriminator. PatchGAN.""" """Discriminator network with PatchGAN."""
def __init__(self, image_size=128, conv_dim=64, c_dim=5, repeat_num=6): def __init__(self, image_size=128, conv_dim=64, c_dim=5, repeat_num=6):
super(Discriminator, self).__init__() super(Discriminator, self).__init__()
layers = [] layers = []
layers.append(nn.Conv2d(3, conv_dim, kernel_size=4, stride=2, padding=1)) layers.append(nn.Conv2d(3, conv_dim, kernel_size=4, stride=2, padding=1))
layers.append(nn.LeakyReLU(0.01, inplace=True)) layers.append(nn.LeakyReLU(0.01))
curr_dim = conv_dim curr_dim = conv_dim
for i in range(1, repeat_num): for i in range(1, repeat_num):
layers.append(nn.Conv2d(curr_dim, curr_dim*2, kernel_size=4, stride=2, padding=1)) layers.append(nn.Conv2d(curr_dim, curr_dim*2, kernel_size=4, stride=2, padding=1))
layers.append(nn.LeakyReLU(0.01, inplace=True)) layers.append(nn.LeakyReLU(0.01))
curr_dim = curr_dim * 2 curr_dim = curr_dim * 2
k_size = int(image_size / np.power(2, repeat_num)) kernel_size = int(image_size / np.power(2, repeat_num))
self.main = nn.Sequential(*layers) self.main = nn.Sequential(*layers)
self.conv1 = nn.Conv2d(curr_dim, 1, kernel_size=3, stride=1, padding=1, bias=False) self.conv1 = nn.Conv2d(curr_dim, 1, kernel_size=3, stride=1, padding=1, bias=False)
self.conv2 = nn.Conv2d(curr_dim, c_dim, kernel_size=k_size, bias=False) self.conv2 = nn.Conv2d(curr_dim, c_dim, kernel_size=kernel_size, bias=False)
def forward(self, x): def forward(self, x):
h = self.main(x) h = self.main(x)
out_real = self.conv1(h) out_src = self.conv1(h)
out_aux = self.conv2(h) out_cls = self.conv2(h)
return out_real.squeeze(), out_aux.squeeze() return out_src, out_cls.view(out_cls.size(0), out_cls.size(1))
+428 -575
Ver Arquivo
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff