conv3d_transpose in tf, th, and cntk (#7161)

* conv3d_tranpose in tf and th * fix _preprocess_deconv_output_shape error * cntk conv3d_tranpose * conv3d_tranpose test * formatting * cleanup tests * fix incorrect axis ordering and docs * fix incorrect axis ordering and docs * deconv3d_output_shape to fix errors * remove conv2d_transpose reference in theano backend * remove kernel_size loop from test * put depth first in test and add dim to invalid use case input * formatting - removed extra line * fix pep8 * remove extraneous args from tf conv3d_transpose function * default val for data_format=None
2017-06-29 12:09:12 -04:00
commit 6c2dea64fc
@@ -1386,6 +1386,41 @@ def conv3d(x, kernel, strides=(1, 1, 1), padding='valid',
    return _postprocess_conv3d_output(x, data_format)


+def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1),
+                     padding='valid', data_format=None):
+    if data_format is None:
+        data_format = image_data_format()
+    if data_format not in {'channels_first', 'channels_last'}:
+        raise ValueError('Unknown data_format ' + str(data_format))
+
+    x = _preprocess_conv3d_input(x, data_format)
+    kernel = _preprocess_conv3d_kernel(kernel, data_format)
+    padding = _preprocess_border_mode(padding)
+    strides = (1,) + strides
+    # cntk output_shape does not include batch axis
+    output_shape = output_shape[1:]
+    # in keras2, need handle output shape in different format
+    if data_format == 'channels_last':
+        shape = list(output_shape)
+        shape[0] = output_shape[3]
+        shape[1] = output_shape[0]
+        shape[2] = output_shape[1]
+        shape[3] = output_shape[2]
+        output_shape = tuple(shape)
+
+    x = C.convolution_transpose(
+        kernel,
+        x,
+        strides,
+        auto_padding=[
+            False,
+            padding,
+            padding,
+            padding],
+        output_shape=output_shape)
+    return _postprocess_conv3d_output(x, data_format)
+
+
 def pool2d(x, pool_size, strides=(1, 1),
           padding='valid', data_format=None,
           pool_mode='max'):
@@ -2911,6 +2911,26 @@ def in_top_k(predictions, targets, k):

 # CONVOLUTIONS

+def _preprocess_deconv3d_output_shape(x, shape, data_format):
+    """Get the output_shape for the 3D deconvolution.
+
+    # Arguments
+        x: input tensor.
+        shape: output shape.
+        data_format: string, `"channels_last"` or `"channels_first"`.
+
+    # Returns
+        The output shape.
+    """
+    if data_format == 'channels_first':
+        shape = (shape[0], shape[2], shape[3], shape[4], shape[1])
+
+    if shape[0] is None:
+        shape = (tf.shape(x)[0], ) + tuple(shape[1:])
+        shape = tf.stack(list(shape))
+    return shape
+
+
 def _preprocess_deconv_output_shape(x, shape, data_format):
    """Get the output_shape for the deconvolution.

@@ -3286,6 +3306,43 @@ def conv3d(x, kernel, strides=(1, 1, 1), padding='valid',
    return _postprocess_conv3d_output(x, data_format)


+def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1),
+                     padding='valid', data_format=None):
+    """3D deconvolution (i.e. transposed convolution).
+
+    # Arguments
+        x: input tensor.
+        kernel: kernel tensor.
+        output_shape: 1D int tensor for the output shape.
+        strides: strides tuple.
+        padding: string, "same" or "valid".
+        data_format: string, `"channels_last"` or `"channels_first"`.
+            Whether to use Theano or TensorFlow data format
+            for inputs/kernels/outputs.
+
+    # Returns
+        A tensor, result of transposed 3D convolution.
+
+    # Raises
+        ValueError: if `data_format` is neither `channels_last` or `channels_first`.
+    """
+    if data_format is None:
+        data_format = image_data_format()
+    if data_format not in {'channels_first', 'channels_last'}:
+        raise ValueError('Unknown data_format ' + str(data_format))
+    if isinstance(output_shape, (tuple, list)):
+        output_shape = tf.stack(output_shape)
+
+    x = _preprocess_conv3d_input(x, data_format)
+    output_shape = _preprocess_deconv3d_output_shape(x, output_shape, data_format)
+    padding = _preprocess_padding(padding)
+    strides = (1,) + strides + (1,)
+
+    x = tf.nn.conv3d_transpose(x, kernel, output_shape, strides,
+                               padding=padding)
+    return _postprocess_conv3d_output(x, data_format)
+
+
 def pool2d(x, pool_size, strides=(1, 1),
           padding='valid', data_format=None,
           pool_mode='max'):
@@ -1968,6 +1968,63 @@ def conv3d(x, kernel, strides=(1, 1, 1),
    return conv_out


+def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1),
+                     padding='valid', data_format=None):
+    """3D deconvolution (transposed convolution).
+
+    # Arguments
+        kernel: kernel tensor.
+        output_shape: desired dimensions of output.
+        strides: strides tuple.
+        padding: string, "same" or "valid".
+        data_format: "channels_last" or "channels_first".
+            Whether to use Theano or TensorFlow data format
+        in inputs/kernels/outputs.
+
+    # Raises
+        ValueError: if using an even kernel size with padding 'same'.
+    """
+    flip_filters = False
+    if data_format is None:
+        data_format = image_data_format()
+    if data_format not in {'channels_first', 'channels_last'}:
+        raise ValueError('Unknown data_format ' + data_format)
+
+    if data_format == 'channels_last':
+        output_shape = (output_shape[0],
+                        output_shape[4],
+                        output_shape[1],
+                        output_shape[2],
+                        output_shape[3])
+
+    if hasattr(kernel, '_keras_shape'):
+        kernel_shape = kernel._keras_shape
+    else:
+        # Will only work if `kernel` is a shared variable.
+        kernel_shape = kernel.eval().shape
+
+    if padding == 'same' and kernel_shape[0] % 2 == 0:
+        raise ValueError('In `Conv3DTranspose`, with padding mode `same`, '
+                         'even kernel sizes are only supported with Tensorflow. '
+                         'With Theano, set `kernel_size` to an odd number.')
+
+    kernel_shape = _preprocess_conv3d_filter_shape(kernel_shape, data_format)
+
+    x = _preprocess_conv3d_input(x, data_format)
+    kernel = _preprocess_conv3d_kernel(kernel, data_format)
+
+    th_padding = _preprocess_padding(padding)
+    op = T.nnet.abstract_conv.AbstractConv3d_gradInputs(imshp=None,
+                                                        kshp=kernel_shape,
+                                                        subsample=strides,
+                                                        border_mode=th_padding,
+                                                        filter_flip=not flip_filters)
+    conv_out = op(kernel, x, output_shape[2:])
+    conv_out = _postprocess_conv3d_output(conv_out, x, padding,
+                                          kernel_shape, strides, data_format)
+    return conv_out
+
+
 def pool2d(x, pool_size, strides=(1, 1), padding='valid',
           data_format=None, pool_mode='max'):
    if data_format is None:
@@ -806,6 +806,237 @@ class Conv2DTranspose(Conv2D):
        return config


+class Conv3DTranspose(Conv3D):
+    """Transposed convolution layer (sometimes called Deconvolution).
+
+    The need for transposed convolutions generally arises
+    from the desire to use a transformation going in the opposite direction
+    of a normal convolution, i.e., from something that has the shape of the
+    output of some convolution to something that has the shape of its input
+    while maintaining a connectivity pattern that is compatible with
+    said convolution.
+
+    When using this layer as the first layer in a model,
+    provide the keyword argument `input_shape`
+    (tuple of integers, does not include the sample axis),
+    e.g. `input_shape=(128, 128, 128, 3)` for a 128x128x128 volume with 3 channels
+    if `data_format="channels_last"`.
+
+    # Arguments
+        filters: Integer, the dimensionality of the output space
+            (i.e. the number of output filters in the convolution).
+        kernel_size: An integer or tuple/list of 3 integers, specifying the
+            width and height of the 3D convolution window.
+            Can be a single integer to specify the same value for
+            all spatial dimensions.
+        strides: An integer or tuple/list of 3 integers,
+            specifying the strides of the convolution along the width and height.
+            Can be a single integer to specify the same value for
+            all spatial dimensions.
+            Specifying any stride value != 1 is incompatible with specifying
+            any `dilation_rate` value != 1.
+        padding: one of `"valid"` or `"same"` (case-insensitive).
+        data_format: A string,
+            one of `channels_last` (default) or `channels_first`.
+            The ordering of the dimensions in the inputs.
+            `channels_last` corresponds to inputs with shape
+            `(batch, depth, height, width, channels)` while `channels_first`
+            corresponds to inputs with shape
+            `(batch, channels, depth, height, width)`.
+            It defaults to the `image_data_format` value found in your
+            Keras config file at `~/.keras/keras.json`.
+            If you never set it, then it will be "channels_last".
+        dilation_rate: an integer or tuple/list of 3 integers, specifying
+            the dilation rate to use for dilated convolution.
+            Can be a single integer to specify the same value for
+            all spatial dimensions.
+            Currently, specifying any `dilation_rate` value != 1 is
+            incompatible with specifying any stride value != 1.
+        activation: Activation function to use
+            (see [activations](../activations.md)).
+            If you don't specify anything, no activation is applied
+            (ie. "linear" activation: `a(x) = x`).
+        use_bias: Boolean, whether the layer uses a bias vector.
+        kernel_initializer: Initializer for the `kernel` weights matrix
+            (see [initializers](../initializers.md)).
+        bias_initializer: Initializer for the bias vector
+            (see [initializers](../initializers.md)).
+        kernel_regularizer: Regularizer function applied to
+            the `kernel` weights matrix
+            (see [regularizer](../regularizers.md)).
+        bias_regularizer: Regularizer function applied to the bias vector
+            (see [regularizer](../regularizers.md)).
+        activity_regularizer: Regularizer function applied to
+            the output of the layer (its "activation").
+            (see [regularizer](../regularizers.md)).
+        kernel_constraint: Constraint function applied to the kernel matrix
+            (see [constraints](../constraints.md)).
+        bias_constraint: Constraint function applied to the bias vector
+            (see [constraints](../constraints.md)).
+
+    # Input shape
+        5D tensor with shape:
+        `(batch, channels, depth, rows, cols)` if data_format='channels_first'
+        or 5D tensor with shape:
+        `(batch, depth, rows, cols, channels)` if data_format='channels_last'.
+
+    # Output shape
+        5D tensor with shape:
+        `(batch, filters, new_depth, new_rows, new_cols)` if data_format='channels_first'
+        or 5D tensor with shape:
+        `(batch, new_depth, new_rows, new_cols, filters)` if data_format='channels_last'.
+        `depth` and `rows` and `cols` values might have changed due to padding.
+
+    # References
+        - [A guide to convolution arithmetic for deep learning](https://arxiv.org/abs/1603.07285v1)
+        - [Deconvolutional Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
+    """
+
+    def __init__(self, filters,
+                 kernel_size,
+                 strides=(1, 1, 1),
+                 padding='valid',
+                 data_format=None,
+                 activation=None,
+                 use_bias=True,
+                 kernel_initializer='glorot_uniform',
+                 bias_initializer='zeros',
+                 kernel_regularizer=None,
+                 bias_regularizer=None,
+                 activity_regularizer=None,
+                 kernel_constraint=None,
+                 bias_constraint=None,
+                 **kwargs):
+        super(Conv3DTranspose, self).__init__(
+            filters,
+            kernel_size,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            activation=activation,
+            use_bias=use_bias,
+            kernel_initializer=kernel_initializer,
+            bias_initializer=bias_initializer,
+            kernel_regularizer=kernel_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            kernel_constraint=kernel_constraint,
+            bias_constraint=bias_constraint,
+            **kwargs)
+        self.input_spec = InputSpec(ndim=5)
+
+    def build(self, input_shape):
+        if len(input_shape) != 5:
+            raise ValueError('Inputs should have rank ' +
+                             str(5) +
+                             '; Received input shape:', str(input_shape))
+        if self.data_format == 'channels_first':
+            channel_axis = 1
+        else:
+            channel_axis = -1
+        if input_shape[channel_axis] is None:
+            raise ValueError('The channel dimension of the inputs '
+                             'should be defined. Found `None`.')
+        input_dim = input_shape[channel_axis]
+        kernel_shape = self.kernel_size + (self.filters, input_dim)
+
+        self.kernel = self.add_weight(shape=kernel_shape,
+                                      initializer=self.kernel_initializer,
+                                      name='kernel',
+                                      regularizer=self.kernel_regularizer,
+                                      constraint=self.kernel_constraint)
+        if self.use_bias:
+            self.bias = self.add_weight(shape=(self.filters,),
+                                        initializer=self.bias_initializer,
+                                        name='bias',
+                                        regularizer=self.bias_regularizer,
+                                        constraint=self.bias_constraint)
+        else:
+            self.bias = None
+        # Set input spec.
+        self.input_spec = InputSpec(ndim=5, axes={channel_axis: input_dim})
+        self.built = True
+
+    def call(self, inputs):
+        input_shape = K.shape(inputs)
+        batch_size = input_shape[0]
+        if self.data_format == 'channels_first':
+            d_axis, h_axis, w_axis = 2, 3, 4
+        else:
+            d_axis, h_axis, w_axis = 1, 2, 3
+
+        depth = input_shape[d_axis]
+        height = input_shape[h_axis]
+        width = input_shape[w_axis]
+
+        kernel_d, kernel_h, kernel_w = self.kernel_size
+        stride_d, stride_h, stride_w = self.strides
+
+        # Infer the dynamic output shape:
+        out_depth = conv_utils.deconv_length(depth,
+                                             stride_d, kernel_d,
+                                             self.padding)
+        out_height = conv_utils.deconv_length(height,
+                                              stride_h, kernel_h,
+                                              self.padding)
+        out_width = conv_utils.deconv_length(width,
+                                             stride_w, kernel_w,
+                                             self.padding)
+
+        if self.data_format == 'channels_first':
+            output_shape = (batch_size, self.filters, out_depth, out_height, out_width)
+        else:
+            output_shape = (batch_size, out_depth, out_height, out_width, self.filters)
+
+        outputs = K.conv3d_transpose(inputs,
+                                     self.kernel,
+                                     output_shape,
+                                     self.strides,
+                                     padding=self.padding,
+                                     data_format=self.data_format)
+
+        if self.bias:
+            outputs = K.bias_add(
+                outputs,
+                self.bias,
+                data_format=self.data_format)
+
+        if self.activation is not None:
+            return self.activation(outputs)
+        return outputs
+
+    def compute_output_shape(self, input_shape):
+        output_shape = list(input_shape)
+        if self.data_format == 'channels_first':
+            c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4
+        else:
+            c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3
+
+        kernel_d, kernel_h, kernel_w = self.kernel_size
+        stride_d, stride_h, stride_w = self.strides
+
+        output_shape[c_axis] = self.filters
+        output_shape[d_axis] = conv_utils.deconv_length(output_shape[d_axis],
+                                                        stride_d,
+                                                        kernel_d,
+                                                        self.padding)
+        output_shape[h_axis] = conv_utils.deconv_length(output_shape[h_axis],
+                                                        stride_h,
+                                                        kernel_h,
+                                                        self.padding)
+        output_shape[w_axis] = conv_utils.deconv_length(output_shape[w_axis],
+                                                        stride_w,
+                                                        kernel_w,
+                                                        self.padding)
+
+        return tuple(output_shape)
+
+    def get_config(self):
+        config = super(Conv3DTranspose, self).get_config()
+        config.pop('dilation_rate')
+        return config
+
+
 class SeparableConv2D(Conv2D):
    """Depthwise separable 2D convolution.

@@ -1891,6 +2122,7 @@ Convolution3D = Conv3D
 SeparableConvolution2D = SeparableConv2D
 Convolution2DTranspose = Conv2DTranspose
 Deconvolution2D = Deconv2D = Conv2DTranspose
+Deconvolution3D = Deconv3D = Conv3DTranspose

 # Legacy aliases
 AtrousConv1D = AtrousConvolution1D
@@ -388,6 +388,51 @@ def test_convolution_3d():
                            stack_size))


+@keras_test
+def test_conv3d_transpose():
+    filters = 2
+    stack_size = 3
+    num_depth = 7
+    num_row = 5
+    num_col = 6
+
+    for padding in _convolution_paddings:
+        for strides in [(1, 1, 1), (2, 2, 2)]:
+            for data_format in ['channels_first', 'channels_last']:
+                if padding == 'same' and strides != (1, 1, 1):
+                    continue
+                layer_test(convolutional.Conv3DTranspose,
+                           kwargs={'filters': filters,
+                                   'kernel_size': 3,
+                                   'padding': padding,
+                                   'strides': strides,
+                                   'data_format': data_format},
+                           input_shape=(None, num_depth, num_row, num_col, stack_size),
+                           fixed_batch_size=True)
+
+    layer_test(convolutional.Conv3DTranspose,
+               kwargs={'filters': filters,
+                       'kernel_size': 3,
+                       'padding': padding,
+                       'data_format': 'channels_first',
+                       'activation': None,
+                       'kernel_regularizer': 'l2',
+                       'bias_regularizer': 'l2',
+                       'activity_regularizer': 'l2',
+                       'kernel_constraint': 'max_norm',
+                       'bias_constraint': 'max_norm',
+                       'strides': strides},
+               input_shape=(None, stack_size, num_depth, num_row, num_col),
+               fixed_batch_size=True)
+
+    # Test invalid use case
+    with pytest.raises(ValueError):
+        model = Sequential([convolutional.Conv3DTranspose(filters=filters,
+                                                          kernel_size=3,
+                                                          padding=padding,
+                                                          batch_input_shape=(None, None, 5, None, None))])
+
+
@keras_test
 def test_maxpooling_3d():
    pool_size = (3, 3, 3)