conv3d_transpose in tf, th, and cntk (#7161)
* conv3d_tranpose in tf and th * fix _preprocess_deconv_output_shape error * cntk conv3d_tranpose * conv3d_tranpose test * formatting * cleanup tests * fix incorrect axis ordering and docs * fix incorrect axis ordering and docs * deconv3d_output_shape to fix errors * remove conv2d_transpose reference in theano backend * remove kernel_size loop from test * put depth first in test and add dim to invalid use case input * formatting - removed extra line * fix pep8 * remove extraneous args from tf conv3d_transpose function * default val for data_format=None
Esse commit está contido em:
@@ -1386,6 +1386,41 @@ def conv3d(x, kernel, strides=(1, 1, 1), padding='valid',
|
||||
return _postprocess_conv3d_output(x, data_format)
|
||||
|
||||
|
||||
def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1),
|
||||
padding='valid', data_format=None):
|
||||
if data_format is None:
|
||||
data_format = image_data_format()
|
||||
if data_format not in {'channels_first', 'channels_last'}:
|
||||
raise ValueError('Unknown data_format ' + str(data_format))
|
||||
|
||||
x = _preprocess_conv3d_input(x, data_format)
|
||||
kernel = _preprocess_conv3d_kernel(kernel, data_format)
|
||||
padding = _preprocess_border_mode(padding)
|
||||
strides = (1,) + strides
|
||||
# cntk output_shape does not include batch axis
|
||||
output_shape = output_shape[1:]
|
||||
# in keras2, need handle output shape in different format
|
||||
if data_format == 'channels_last':
|
||||
shape = list(output_shape)
|
||||
shape[0] = output_shape[3]
|
||||
shape[1] = output_shape[0]
|
||||
shape[2] = output_shape[1]
|
||||
shape[3] = output_shape[2]
|
||||
output_shape = tuple(shape)
|
||||
|
||||
x = C.convolution_transpose(
|
||||
kernel,
|
||||
x,
|
||||
strides,
|
||||
auto_padding=[
|
||||
False,
|
||||
padding,
|
||||
padding,
|
||||
padding],
|
||||
output_shape=output_shape)
|
||||
return _postprocess_conv3d_output(x, data_format)
|
||||
|
||||
|
||||
def pool2d(x, pool_size, strides=(1, 1),
|
||||
padding='valid', data_format=None,
|
||||
pool_mode='max'):
|
||||
|
||||
@@ -2911,6 +2911,26 @@ def in_top_k(predictions, targets, k):
|
||||
|
||||
# CONVOLUTIONS
|
||||
|
||||
def _preprocess_deconv3d_output_shape(x, shape, data_format):
|
||||
"""Get the output_shape for the 3D deconvolution.
|
||||
|
||||
# Arguments
|
||||
x: input tensor.
|
||||
shape: output shape.
|
||||
data_format: string, `"channels_last"` or `"channels_first"`.
|
||||
|
||||
# Returns
|
||||
The output shape.
|
||||
"""
|
||||
if data_format == 'channels_first':
|
||||
shape = (shape[0], shape[2], shape[3], shape[4], shape[1])
|
||||
|
||||
if shape[0] is None:
|
||||
shape = (tf.shape(x)[0], ) + tuple(shape[1:])
|
||||
shape = tf.stack(list(shape))
|
||||
return shape
|
||||
|
||||
|
||||
def _preprocess_deconv_output_shape(x, shape, data_format):
|
||||
"""Get the output_shape for the deconvolution.
|
||||
|
||||
@@ -3286,6 +3306,43 @@ def conv3d(x, kernel, strides=(1, 1, 1), padding='valid',
|
||||
return _postprocess_conv3d_output(x, data_format)
|
||||
|
||||
|
||||
def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1),
|
||||
padding='valid', data_format=None):
|
||||
"""3D deconvolution (i.e. transposed convolution).
|
||||
|
||||
# Arguments
|
||||
x: input tensor.
|
||||
kernel: kernel tensor.
|
||||
output_shape: 1D int tensor for the output shape.
|
||||
strides: strides tuple.
|
||||
padding: string, "same" or "valid".
|
||||
data_format: string, `"channels_last"` or `"channels_first"`.
|
||||
Whether to use Theano or TensorFlow data format
|
||||
for inputs/kernels/outputs.
|
||||
|
||||
# Returns
|
||||
A tensor, result of transposed 3D convolution.
|
||||
|
||||
# Raises
|
||||
ValueError: if `data_format` is neither `channels_last` or `channels_first`.
|
||||
"""
|
||||
if data_format is None:
|
||||
data_format = image_data_format()
|
||||
if data_format not in {'channels_first', 'channels_last'}:
|
||||
raise ValueError('Unknown data_format ' + str(data_format))
|
||||
if isinstance(output_shape, (tuple, list)):
|
||||
output_shape = tf.stack(output_shape)
|
||||
|
||||
x = _preprocess_conv3d_input(x, data_format)
|
||||
output_shape = _preprocess_deconv3d_output_shape(x, output_shape, data_format)
|
||||
padding = _preprocess_padding(padding)
|
||||
strides = (1,) + strides + (1,)
|
||||
|
||||
x = tf.nn.conv3d_transpose(x, kernel, output_shape, strides,
|
||||
padding=padding)
|
||||
return _postprocess_conv3d_output(x, data_format)
|
||||
|
||||
|
||||
def pool2d(x, pool_size, strides=(1, 1),
|
||||
padding='valid', data_format=None,
|
||||
pool_mode='max'):
|
||||
|
||||
@@ -1968,6 +1968,63 @@ def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
return conv_out
|
||||
|
||||
|
||||
def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1),
|
||||
padding='valid', data_format=None):
|
||||
"""3D deconvolution (transposed convolution).
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
output_shape: desired dimensions of output.
|
||||
strides: strides tuple.
|
||||
padding: string, "same" or "valid".
|
||||
data_format: "channels_last" or "channels_first".
|
||||
Whether to use Theano or TensorFlow data format
|
||||
in inputs/kernels/outputs.
|
||||
|
||||
# Raises
|
||||
ValueError: if using an even kernel size with padding 'same'.
|
||||
"""
|
||||
flip_filters = False
|
||||
if data_format is None:
|
||||
data_format = image_data_format()
|
||||
if data_format not in {'channels_first', 'channels_last'}:
|
||||
raise ValueError('Unknown data_format ' + data_format)
|
||||
|
||||
if data_format == 'channels_last':
|
||||
output_shape = (output_shape[0],
|
||||
output_shape[4],
|
||||
output_shape[1],
|
||||
output_shape[2],
|
||||
output_shape[3])
|
||||
|
||||
if hasattr(kernel, '_keras_shape'):
|
||||
kernel_shape = kernel._keras_shape
|
||||
else:
|
||||
# Will only work if `kernel` is a shared variable.
|
||||
kernel_shape = kernel.eval().shape
|
||||
|
||||
if padding == 'same' and kernel_shape[0] % 2 == 0:
|
||||
raise ValueError('In `Conv3DTranspose`, with padding mode `same`, '
|
||||
'even kernel sizes are only supported with Tensorflow. '
|
||||
'With Theano, set `kernel_size` to an odd number.')
|
||||
|
||||
kernel_shape = _preprocess_conv3d_filter_shape(kernel_shape, data_format)
|
||||
|
||||
x = _preprocess_conv3d_input(x, data_format)
|
||||
kernel = _preprocess_conv3d_kernel(kernel, data_format)
|
||||
|
||||
th_padding = _preprocess_padding(padding)
|
||||
op = T.nnet.abstract_conv.AbstractConv3d_gradInputs(imshp=None,
|
||||
kshp=kernel_shape,
|
||||
subsample=strides,
|
||||
border_mode=th_padding,
|
||||
filter_flip=not flip_filters)
|
||||
conv_out = op(kernel, x, output_shape[2:])
|
||||
conv_out = _postprocess_conv3d_output(conv_out, x, padding,
|
||||
kernel_shape, strides, data_format)
|
||||
return conv_out
|
||||
|
||||
|
||||
def pool2d(x, pool_size, strides=(1, 1), padding='valid',
|
||||
data_format=None, pool_mode='max'):
|
||||
if data_format is None:
|
||||
|
||||
@@ -806,6 +806,237 @@ class Conv2DTranspose(Conv2D):
|
||||
return config
|
||||
|
||||
|
||||
class Conv3DTranspose(Conv3D):
|
||||
"""Transposed convolution layer (sometimes called Deconvolution).
|
||||
|
||||
The need for transposed convolutions generally arises
|
||||
from the desire to use a transformation going in the opposite direction
|
||||
of a normal convolution, i.e., from something that has the shape of the
|
||||
output of some convolution to something that has the shape of its input
|
||||
while maintaining a connectivity pattern that is compatible with
|
||||
said convolution.
|
||||
|
||||
When using this layer as the first layer in a model,
|
||||
provide the keyword argument `input_shape`
|
||||
(tuple of integers, does not include the sample axis),
|
||||
e.g. `input_shape=(128, 128, 128, 3)` for a 128x128x128 volume with 3 channels
|
||||
if `data_format="channels_last"`.
|
||||
|
||||
# Arguments
|
||||
filters: Integer, the dimensionality of the output space
|
||||
(i.e. the number of output filters in the convolution).
|
||||
kernel_size: An integer or tuple/list of 3 integers, specifying the
|
||||
width and height of the 3D convolution window.
|
||||
Can be a single integer to specify the same value for
|
||||
all spatial dimensions.
|
||||
strides: An integer or tuple/list of 3 integers,
|
||||
specifying the strides of the convolution along the width and height.
|
||||
Can be a single integer to specify the same value for
|
||||
all spatial dimensions.
|
||||
Specifying any stride value != 1 is incompatible with specifying
|
||||
any `dilation_rate` value != 1.
|
||||
padding: one of `"valid"` or `"same"` (case-insensitive).
|
||||
data_format: A string,
|
||||
one of `channels_last` (default) or `channels_first`.
|
||||
The ordering of the dimensions in the inputs.
|
||||
`channels_last` corresponds to inputs with shape
|
||||
`(batch, depth, height, width, channels)` while `channels_first`
|
||||
corresponds to inputs with shape
|
||||
`(batch, channels, depth, height, width)`.
|
||||
It defaults to the `image_data_format` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "channels_last".
|
||||
dilation_rate: an integer or tuple/list of 3 integers, specifying
|
||||
the dilation rate to use for dilated convolution.
|
||||
Can be a single integer to specify the same value for
|
||||
all spatial dimensions.
|
||||
Currently, specifying any `dilation_rate` value != 1 is
|
||||
incompatible with specifying any stride value != 1.
|
||||
activation: Activation function to use
|
||||
(see [activations](../activations.md)).
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: `a(x) = x`).
|
||||
use_bias: Boolean, whether the layer uses a bias vector.
|
||||
kernel_initializer: Initializer for the `kernel` weights matrix
|
||||
(see [initializers](../initializers.md)).
|
||||
bias_initializer: Initializer for the bias vector
|
||||
(see [initializers](../initializers.md)).
|
||||
kernel_regularizer: Regularizer function applied to
|
||||
the `kernel` weights matrix
|
||||
(see [regularizer](../regularizers.md)).
|
||||
bias_regularizer: Regularizer function applied to the bias vector
|
||||
(see [regularizer](../regularizers.md)).
|
||||
activity_regularizer: Regularizer function applied to
|
||||
the output of the layer (its "activation").
|
||||
(see [regularizer](../regularizers.md)).
|
||||
kernel_constraint: Constraint function applied to the kernel matrix
|
||||
(see [constraints](../constraints.md)).
|
||||
bias_constraint: Constraint function applied to the bias vector
|
||||
(see [constraints](../constraints.md)).
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape:
|
||||
`(batch, channels, depth, rows, cols)` if data_format='channels_first'
|
||||
or 5D tensor with shape:
|
||||
`(batch, depth, rows, cols, channels)` if data_format='channels_last'.
|
||||
|
||||
# Output shape
|
||||
5D tensor with shape:
|
||||
`(batch, filters, new_depth, new_rows, new_cols)` if data_format='channels_first'
|
||||
or 5D tensor with shape:
|
||||
`(batch, new_depth, new_rows, new_cols, filters)` if data_format='channels_last'.
|
||||
`depth` and `rows` and `cols` values might have changed due to padding.
|
||||
|
||||
# References
|
||||
- [A guide to convolution arithmetic for deep learning](https://arxiv.org/abs/1603.07285v1)
|
||||
- [Deconvolutional Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
|
||||
"""
|
||||
|
||||
def __init__(self, filters,
|
||||
kernel_size,
|
||||
strides=(1, 1, 1),
|
||||
padding='valid',
|
||||
data_format=None,
|
||||
activation=None,
|
||||
use_bias=True,
|
||||
kernel_initializer='glorot_uniform',
|
||||
bias_initializer='zeros',
|
||||
kernel_regularizer=None,
|
||||
bias_regularizer=None,
|
||||
activity_regularizer=None,
|
||||
kernel_constraint=None,
|
||||
bias_constraint=None,
|
||||
**kwargs):
|
||||
super(Conv3DTranspose, self).__init__(
|
||||
filters,
|
||||
kernel_size,
|
||||
strides=strides,
|
||||
padding=padding,
|
||||
data_format=data_format,
|
||||
activation=activation,
|
||||
use_bias=use_bias,
|
||||
kernel_initializer=kernel_initializer,
|
||||
bias_initializer=bias_initializer,
|
||||
kernel_regularizer=kernel_regularizer,
|
||||
bias_regularizer=bias_regularizer,
|
||||
activity_regularizer=activity_regularizer,
|
||||
kernel_constraint=kernel_constraint,
|
||||
bias_constraint=bias_constraint,
|
||||
**kwargs)
|
||||
self.input_spec = InputSpec(ndim=5)
|
||||
|
||||
def build(self, input_shape):
|
||||
if len(input_shape) != 5:
|
||||
raise ValueError('Inputs should have rank ' +
|
||||
str(5) +
|
||||
'; Received input shape:', str(input_shape))
|
||||
if self.data_format == 'channels_first':
|
||||
channel_axis = 1
|
||||
else:
|
||||
channel_axis = -1
|
||||
if input_shape[channel_axis] is None:
|
||||
raise ValueError('The channel dimension of the inputs '
|
||||
'should be defined. Found `None`.')
|
||||
input_dim = input_shape[channel_axis]
|
||||
kernel_shape = self.kernel_size + (self.filters, input_dim)
|
||||
|
||||
self.kernel = self.add_weight(shape=kernel_shape,
|
||||
initializer=self.kernel_initializer,
|
||||
name='kernel',
|
||||
regularizer=self.kernel_regularizer,
|
||||
constraint=self.kernel_constraint)
|
||||
if self.use_bias:
|
||||
self.bias = self.add_weight(shape=(self.filters,),
|
||||
initializer=self.bias_initializer,
|
||||
name='bias',
|
||||
regularizer=self.bias_regularizer,
|
||||
constraint=self.bias_constraint)
|
||||
else:
|
||||
self.bias = None
|
||||
# Set input spec.
|
||||
self.input_spec = InputSpec(ndim=5, axes={channel_axis: input_dim})
|
||||
self.built = True
|
||||
|
||||
def call(self, inputs):
|
||||
input_shape = K.shape(inputs)
|
||||
batch_size = input_shape[0]
|
||||
if self.data_format == 'channels_first':
|
||||
d_axis, h_axis, w_axis = 2, 3, 4
|
||||
else:
|
||||
d_axis, h_axis, w_axis = 1, 2, 3
|
||||
|
||||
depth = input_shape[d_axis]
|
||||
height = input_shape[h_axis]
|
||||
width = input_shape[w_axis]
|
||||
|
||||
kernel_d, kernel_h, kernel_w = self.kernel_size
|
||||
stride_d, stride_h, stride_w = self.strides
|
||||
|
||||
# Infer the dynamic output shape:
|
||||
out_depth = conv_utils.deconv_length(depth,
|
||||
stride_d, kernel_d,
|
||||
self.padding)
|
||||
out_height = conv_utils.deconv_length(height,
|
||||
stride_h, kernel_h,
|
||||
self.padding)
|
||||
out_width = conv_utils.deconv_length(width,
|
||||
stride_w, kernel_w,
|
||||
self.padding)
|
||||
|
||||
if self.data_format == 'channels_first':
|
||||
output_shape = (batch_size, self.filters, out_depth, out_height, out_width)
|
||||
else:
|
||||
output_shape = (batch_size, out_depth, out_height, out_width, self.filters)
|
||||
|
||||
outputs = K.conv3d_transpose(inputs,
|
||||
self.kernel,
|
||||
output_shape,
|
||||
self.strides,
|
||||
padding=self.padding,
|
||||
data_format=self.data_format)
|
||||
|
||||
if self.bias:
|
||||
outputs = K.bias_add(
|
||||
outputs,
|
||||
self.bias,
|
||||
data_format=self.data_format)
|
||||
|
||||
if self.activation is not None:
|
||||
return self.activation(outputs)
|
||||
return outputs
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
output_shape = list(input_shape)
|
||||
if self.data_format == 'channels_first':
|
||||
c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4
|
||||
else:
|
||||
c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3
|
||||
|
||||
kernel_d, kernel_h, kernel_w = self.kernel_size
|
||||
stride_d, stride_h, stride_w = self.strides
|
||||
|
||||
output_shape[c_axis] = self.filters
|
||||
output_shape[d_axis] = conv_utils.deconv_length(output_shape[d_axis],
|
||||
stride_d,
|
||||
kernel_d,
|
||||
self.padding)
|
||||
output_shape[h_axis] = conv_utils.deconv_length(output_shape[h_axis],
|
||||
stride_h,
|
||||
kernel_h,
|
||||
self.padding)
|
||||
output_shape[w_axis] = conv_utils.deconv_length(output_shape[w_axis],
|
||||
stride_w,
|
||||
kernel_w,
|
||||
self.padding)
|
||||
|
||||
return tuple(output_shape)
|
||||
|
||||
def get_config(self):
|
||||
config = super(Conv3DTranspose, self).get_config()
|
||||
config.pop('dilation_rate')
|
||||
return config
|
||||
|
||||
|
||||
class SeparableConv2D(Conv2D):
|
||||
"""Depthwise separable 2D convolution.
|
||||
|
||||
@@ -1891,6 +2122,7 @@ Convolution3D = Conv3D
|
||||
SeparableConvolution2D = SeparableConv2D
|
||||
Convolution2DTranspose = Conv2DTranspose
|
||||
Deconvolution2D = Deconv2D = Conv2DTranspose
|
||||
Deconvolution3D = Deconv3D = Conv3DTranspose
|
||||
|
||||
# Legacy aliases
|
||||
AtrousConv1D = AtrousConvolution1D
|
||||
|
||||
@@ -388,6 +388,51 @@ def test_convolution_3d():
|
||||
stack_size))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_conv3d_transpose():
|
||||
filters = 2
|
||||
stack_size = 3
|
||||
num_depth = 7
|
||||
num_row = 5
|
||||
num_col = 6
|
||||
|
||||
for padding in _convolution_paddings:
|
||||
for strides in [(1, 1, 1), (2, 2, 2)]:
|
||||
for data_format in ['channels_first', 'channels_last']:
|
||||
if padding == 'same' and strides != (1, 1, 1):
|
||||
continue
|
||||
layer_test(convolutional.Conv3DTranspose,
|
||||
kwargs={'filters': filters,
|
||||
'kernel_size': 3,
|
||||
'padding': padding,
|
||||
'strides': strides,
|
||||
'data_format': data_format},
|
||||
input_shape=(None, num_depth, num_row, num_col, stack_size),
|
||||
fixed_batch_size=True)
|
||||
|
||||
layer_test(convolutional.Conv3DTranspose,
|
||||
kwargs={'filters': filters,
|
||||
'kernel_size': 3,
|
||||
'padding': padding,
|
||||
'data_format': 'channels_first',
|
||||
'activation': None,
|
||||
'kernel_regularizer': 'l2',
|
||||
'bias_regularizer': 'l2',
|
||||
'activity_regularizer': 'l2',
|
||||
'kernel_constraint': 'max_norm',
|
||||
'bias_constraint': 'max_norm',
|
||||
'strides': strides},
|
||||
input_shape=(None, stack_size, num_depth, num_row, num_col),
|
||||
fixed_batch_size=True)
|
||||
|
||||
# Test invalid use case
|
||||
with pytest.raises(ValueError):
|
||||
model = Sequential([convolutional.Conv3DTranspose(filters=filters,
|
||||
kernel_size=3,
|
||||
padding=padding,
|
||||
batch_input_shape=(None, None, 5, None, None))])
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_maxpooling_3d():
|
||||
pool_size = (3, 3, 3)
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário