conv3d_transpose in tf, th, and cntk (#7161)

* conv3d_tranpose in tf and th

* fix _preprocess_deconv_output_shape error

* cntk conv3d_tranpose

* conv3d_tranpose test

* formatting

* cleanup tests

* fix incorrect axis ordering and docs

* fix incorrect axis ordering and docs

* deconv3d_output_shape to fix errors

* remove conv2d_transpose reference in theano backend

* remove kernel_size loop from test

* put depth first in test and add dim to invalid use case input

* formatting - removed extra line

* fix pep8

* remove extraneous args from tf conv3d_transpose function

* default val for data_format=None
Esse commit está contido em:
NC Cullen
2017-06-29 12:09:12 -04:00
commit de François Chollet
commit 6c2dea64fc
5 arquivos alterados com 426 adições e 0 exclusões
+35
Ver Arquivo
@@ -1386,6 +1386,41 @@ def conv3d(x, kernel, strides=(1, 1, 1), padding='valid',
return _postprocess_conv3d_output(x, data_format)
def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1),
padding='valid', data_format=None):
if data_format is None:
data_format = image_data_format()
if data_format not in {'channels_first', 'channels_last'}:
raise ValueError('Unknown data_format ' + str(data_format))
x = _preprocess_conv3d_input(x, data_format)
kernel = _preprocess_conv3d_kernel(kernel, data_format)
padding = _preprocess_border_mode(padding)
strides = (1,) + strides
# cntk output_shape does not include batch axis
output_shape = output_shape[1:]
# in keras2, need handle output shape in different format
if data_format == 'channels_last':
shape = list(output_shape)
shape[0] = output_shape[3]
shape[1] = output_shape[0]
shape[2] = output_shape[1]
shape[3] = output_shape[2]
output_shape = tuple(shape)
x = C.convolution_transpose(
kernel,
x,
strides,
auto_padding=[
False,
padding,
padding,
padding],
output_shape=output_shape)
return _postprocess_conv3d_output(x, data_format)
def pool2d(x, pool_size, strides=(1, 1),
padding='valid', data_format=None,
pool_mode='max'):
+57
Ver Arquivo
@@ -2911,6 +2911,26 @@ def in_top_k(predictions, targets, k):
# CONVOLUTIONS
def _preprocess_deconv3d_output_shape(x, shape, data_format):
"""Get the output_shape for the 3D deconvolution.
# Arguments
x: input tensor.
shape: output shape.
data_format: string, `"channels_last"` or `"channels_first"`.
# Returns
The output shape.
"""
if data_format == 'channels_first':
shape = (shape[0], shape[2], shape[3], shape[4], shape[1])
if shape[0] is None:
shape = (tf.shape(x)[0], ) + tuple(shape[1:])
shape = tf.stack(list(shape))
return shape
def _preprocess_deconv_output_shape(x, shape, data_format):
"""Get the output_shape for the deconvolution.
@@ -3286,6 +3306,43 @@ def conv3d(x, kernel, strides=(1, 1, 1), padding='valid',
return _postprocess_conv3d_output(x, data_format)
def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1),
padding='valid', data_format=None):
"""3D deconvolution (i.e. transposed convolution).
# Arguments
x: input tensor.
kernel: kernel tensor.
output_shape: 1D int tensor for the output shape.
strides: strides tuple.
padding: string, "same" or "valid".
data_format: string, `"channels_last"` or `"channels_first"`.
Whether to use Theano or TensorFlow data format
for inputs/kernels/outputs.
# Returns
A tensor, result of transposed 3D convolution.
# Raises
ValueError: if `data_format` is neither `channels_last` or `channels_first`.
"""
if data_format is None:
data_format = image_data_format()
if data_format not in {'channels_first', 'channels_last'}:
raise ValueError('Unknown data_format ' + str(data_format))
if isinstance(output_shape, (tuple, list)):
output_shape = tf.stack(output_shape)
x = _preprocess_conv3d_input(x, data_format)
output_shape = _preprocess_deconv3d_output_shape(x, output_shape, data_format)
padding = _preprocess_padding(padding)
strides = (1,) + strides + (1,)
x = tf.nn.conv3d_transpose(x, kernel, output_shape, strides,
padding=padding)
return _postprocess_conv3d_output(x, data_format)
def pool2d(x, pool_size, strides=(1, 1),
padding='valid', data_format=None,
pool_mode='max'):
+57
Ver Arquivo
@@ -1968,6 +1968,63 @@ def conv3d(x, kernel, strides=(1, 1, 1),
return conv_out
def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1),
padding='valid', data_format=None):
"""3D deconvolution (transposed convolution).
# Arguments
kernel: kernel tensor.
output_shape: desired dimensions of output.
strides: strides tuple.
padding: string, "same" or "valid".
data_format: "channels_last" or "channels_first".
Whether to use Theano or TensorFlow data format
in inputs/kernels/outputs.
# Raises
ValueError: if using an even kernel size with padding 'same'.
"""
flip_filters = False
if data_format is None:
data_format = image_data_format()
if data_format not in {'channels_first', 'channels_last'}:
raise ValueError('Unknown data_format ' + data_format)
if data_format == 'channels_last':
output_shape = (output_shape[0],
output_shape[4],
output_shape[1],
output_shape[2],
output_shape[3])
if hasattr(kernel, '_keras_shape'):
kernel_shape = kernel._keras_shape
else:
# Will only work if `kernel` is a shared variable.
kernel_shape = kernel.eval().shape
if padding == 'same' and kernel_shape[0] % 2 == 0:
raise ValueError('In `Conv3DTranspose`, with padding mode `same`, '
'even kernel sizes are only supported with Tensorflow. '
'With Theano, set `kernel_size` to an odd number.')
kernel_shape = _preprocess_conv3d_filter_shape(kernel_shape, data_format)
x = _preprocess_conv3d_input(x, data_format)
kernel = _preprocess_conv3d_kernel(kernel, data_format)
th_padding = _preprocess_padding(padding)
op = T.nnet.abstract_conv.AbstractConv3d_gradInputs(imshp=None,
kshp=kernel_shape,
subsample=strides,
border_mode=th_padding,
filter_flip=not flip_filters)
conv_out = op(kernel, x, output_shape[2:])
conv_out = _postprocess_conv3d_output(conv_out, x, padding,
kernel_shape, strides, data_format)
return conv_out
def pool2d(x, pool_size, strides=(1, 1), padding='valid',
data_format=None, pool_mode='max'):
if data_format is None:
+232
Ver Arquivo
@@ -806,6 +806,237 @@ class Conv2DTranspose(Conv2D):
return config
class Conv3DTranspose(Conv3D):
"""Transposed convolution layer (sometimes called Deconvolution).
The need for transposed convolutions generally arises
from the desire to use a transformation going in the opposite direction
of a normal convolution, i.e., from something that has the shape of the
output of some convolution to something that has the shape of its input
while maintaining a connectivity pattern that is compatible with
said convolution.
When using this layer as the first layer in a model,
provide the keyword argument `input_shape`
(tuple of integers, does not include the sample axis),
e.g. `input_shape=(128, 128, 128, 3)` for a 128x128x128 volume with 3 channels
if `data_format="channels_last"`.
# Arguments
filters: Integer, the dimensionality of the output space
(i.e. the number of output filters in the convolution).
kernel_size: An integer or tuple/list of 3 integers, specifying the
width and height of the 3D convolution window.
Can be a single integer to specify the same value for
all spatial dimensions.
strides: An integer or tuple/list of 3 integers,
specifying the strides of the convolution along the width and height.
Can be a single integer to specify the same value for
all spatial dimensions.
Specifying any stride value != 1 is incompatible with specifying
any `dilation_rate` value != 1.
padding: one of `"valid"` or `"same"` (case-insensitive).
data_format: A string,
one of `channels_last` (default) or `channels_first`.
The ordering of the dimensions in the inputs.
`channels_last` corresponds to inputs with shape
`(batch, depth, height, width, channels)` while `channels_first`
corresponds to inputs with shape
`(batch, channels, depth, height, width)`.
It defaults to the `image_data_format` value found in your
Keras config file at `~/.keras/keras.json`.
If you never set it, then it will be "channels_last".
dilation_rate: an integer or tuple/list of 3 integers, specifying
the dilation rate to use for dilated convolution.
Can be a single integer to specify the same value for
all spatial dimensions.
Currently, specifying any `dilation_rate` value != 1 is
incompatible with specifying any stride value != 1.
activation: Activation function to use
(see [activations](../activations.md)).
If you don't specify anything, no activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix
(see [initializers](../initializers.md)).
bias_initializer: Initializer for the bias vector
(see [initializers](../initializers.md)).
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix
(see [regularizer](../regularizers.md)).
bias_regularizer: Regularizer function applied to the bias vector
(see [regularizer](../regularizers.md)).
activity_regularizer: Regularizer function applied to
the output of the layer (its "activation").
(see [regularizer](../regularizers.md)).
kernel_constraint: Constraint function applied to the kernel matrix
(see [constraints](../constraints.md)).
bias_constraint: Constraint function applied to the bias vector
(see [constraints](../constraints.md)).
# Input shape
5D tensor with shape:
`(batch, channels, depth, rows, cols)` if data_format='channels_first'
or 5D tensor with shape:
`(batch, depth, rows, cols, channels)` if data_format='channels_last'.
# Output shape
5D tensor with shape:
`(batch, filters, new_depth, new_rows, new_cols)` if data_format='channels_first'
or 5D tensor with shape:
`(batch, new_depth, new_rows, new_cols, filters)` if data_format='channels_last'.
`depth` and `rows` and `cols` values might have changed due to padding.
# References
- [A guide to convolution arithmetic for deep learning](https://arxiv.org/abs/1603.07285v1)
- [Deconvolutional Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
"""
def __init__(self, filters,
kernel_size,
strides=(1, 1, 1),
padding='valid',
data_format=None,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
super(Conv3DTranspose, self).__init__(
filters,
kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
activation=activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
**kwargs)
self.input_spec = InputSpec(ndim=5)
def build(self, input_shape):
if len(input_shape) != 5:
raise ValueError('Inputs should have rank ' +
str(5) +
'; Received input shape:', str(input_shape))
if self.data_format == 'channels_first':
channel_axis = 1
else:
channel_axis = -1
if input_shape[channel_axis] is None:
raise ValueError('The channel dimension of the inputs '
'should be defined. Found `None`.')
input_dim = input_shape[channel_axis]
kernel_shape = self.kernel_size + (self.filters, input_dim)
self.kernel = self.add_weight(shape=kernel_shape,
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.filters,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
# Set input spec.
self.input_spec = InputSpec(ndim=5, axes={channel_axis: input_dim})
self.built = True
def call(self, inputs):
input_shape = K.shape(inputs)
batch_size = input_shape[0]
if self.data_format == 'channels_first':
d_axis, h_axis, w_axis = 2, 3, 4
else:
d_axis, h_axis, w_axis = 1, 2, 3
depth = input_shape[d_axis]
height = input_shape[h_axis]
width = input_shape[w_axis]
kernel_d, kernel_h, kernel_w = self.kernel_size
stride_d, stride_h, stride_w = self.strides
# Infer the dynamic output shape:
out_depth = conv_utils.deconv_length(depth,
stride_d, kernel_d,
self.padding)
out_height = conv_utils.deconv_length(height,
stride_h, kernel_h,
self.padding)
out_width = conv_utils.deconv_length(width,
stride_w, kernel_w,
self.padding)
if self.data_format == 'channels_first':
output_shape = (batch_size, self.filters, out_depth, out_height, out_width)
else:
output_shape = (batch_size, out_depth, out_height, out_width, self.filters)
outputs = K.conv3d_transpose(inputs,
self.kernel,
output_shape,
self.strides,
padding=self.padding,
data_format=self.data_format)
if self.bias:
outputs = K.bias_add(
outputs,
self.bias,
data_format=self.data_format)
if self.activation is not None:
return self.activation(outputs)
return outputs
def compute_output_shape(self, input_shape):
output_shape = list(input_shape)
if self.data_format == 'channels_first':
c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4
else:
c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3
kernel_d, kernel_h, kernel_w = self.kernel_size
stride_d, stride_h, stride_w = self.strides
output_shape[c_axis] = self.filters
output_shape[d_axis] = conv_utils.deconv_length(output_shape[d_axis],
stride_d,
kernel_d,
self.padding)
output_shape[h_axis] = conv_utils.deconv_length(output_shape[h_axis],
stride_h,
kernel_h,
self.padding)
output_shape[w_axis] = conv_utils.deconv_length(output_shape[w_axis],
stride_w,
kernel_w,
self.padding)
return tuple(output_shape)
def get_config(self):
config = super(Conv3DTranspose, self).get_config()
config.pop('dilation_rate')
return config
class SeparableConv2D(Conv2D):
"""Depthwise separable 2D convolution.
@@ -1891,6 +2122,7 @@ Convolution3D = Conv3D
SeparableConvolution2D = SeparableConv2D
Convolution2DTranspose = Conv2DTranspose
Deconvolution2D = Deconv2D = Conv2DTranspose
Deconvolution3D = Deconv3D = Conv3DTranspose
# Legacy aliases
AtrousConv1D = AtrousConvolution1D
+45
Ver Arquivo
@@ -388,6 +388,51 @@ def test_convolution_3d():
stack_size))
@keras_test
def test_conv3d_transpose():
filters = 2
stack_size = 3
num_depth = 7
num_row = 5
num_col = 6
for padding in _convolution_paddings:
for strides in [(1, 1, 1), (2, 2, 2)]:
for data_format in ['channels_first', 'channels_last']:
if padding == 'same' and strides != (1, 1, 1):
continue
layer_test(convolutional.Conv3DTranspose,
kwargs={'filters': filters,
'kernel_size': 3,
'padding': padding,
'strides': strides,
'data_format': data_format},
input_shape=(None, num_depth, num_row, num_col, stack_size),
fixed_batch_size=True)
layer_test(convolutional.Conv3DTranspose,
kwargs={'filters': filters,
'kernel_size': 3,
'padding': padding,
'data_format': 'channels_first',
'activation': None,
'kernel_regularizer': 'l2',
'bias_regularizer': 'l2',
'activity_regularizer': 'l2',
'kernel_constraint': 'max_norm',
'bias_constraint': 'max_norm',
'strides': strides},
input_shape=(None, stack_size, num_depth, num_row, num_col),
fixed_batch_size=True)
# Test invalid use case
with pytest.raises(ValueError):
model = Sequential([convolutional.Conv3DTranspose(filters=filters,
kernel_size=3,
padding=padding,
batch_input_shape=(None, None, 5, None, None))])
@keras_test
def test_maxpooling_3d():
pool_size = (3, 3, 3)