Prepare new PyPI release

Don't add another header line to CSV logger when appending to an existing file (#4426 )
Sequential : Fix trainable arg (#4509 )
2016-11-25 20:52:27 -08:00 · 2016-11-25 14:49:50 -08:00 · 2016-11-25 11:59:05 -08:00 · 2016-11-25 01:20:10 -08:00 · 2016-11-24 23:59:51 -08:00 · 2016-11-24 23:59:33 -08:00
@@ -49,9 +49,9 @@ install:

  # install TensorFlow
  - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
-      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl;
+      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl;
    elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then
-      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl;
+      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl;
    fi
 # command to run tests
 script:
@@ -139,6 +139,7 @@ PAGES = [
            core.Dense,
            core.Activation,
            core.Dropout,
+            core.SpatialDropout1D,
            core.SpatialDropout2D,
            core.SpatialDropout3D,
            core.Flatten,
@@ -4,7 +4,7 @@ For simple, stateless custom operations, you are probably better off using `laye

 Here is the skeleton of a Keras layer. There are only three methods you need to implement:

- `build(input_shape)`: this is where you will define your weights. Trainable weights should be added to the list `self.trainable_weights`. Other attributes of note are: `self.non_trainable_weights` (list) and `self.updates` (list of update tuples (tensor, new_tensor)). For an example of how to use `non_trainable_weights` and `updates`, see the code for the `BatchNormalization` layer.
+- `build(input_shape)`: this is where you will define your weights. Trainable weights should be added to the list `self.trainable_weights`. Other attributes of note are: `self.non_trainable_weights` (list) and `self.updates` (list of update tuples (tensor, new_tensor)). For an example of how to use `non_trainable_weights` and `updates`, see the code for the `BatchNormalization` layer.  This method must set `self.built = True`, which can be done by calling `super([Layer], self).build()`.
 - `call(x)`: this is where the layer's logic lives. Unless you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor.
 - `get_output_shape_for(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference.

@@ -23,6 +23,7 @@ class MyLayer(Layer):
        initial_weight_value = np.random.random((input_dim, output_dim))
        self.W = K.variable(initial_weight_value)
        self.trainable_weights = [self.W]
+        super(MyLayer, self).build()  # be sure you call this somewhere!

    def call(self, x, mask=None):
        return K.dot(x, self.W)
@@ -31,4 +32,4 @@ class MyLayer(Layer):
        return (input_shape[0], self.output_dim)
 ```

-The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code!
+The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code!
@@ -18,6 +18,9 @@ Trains a simple deep CNN on the CIFAR10 small images dataset.
 [conv_filter_visualization.py](conv_filter_visualization.py)
 Visualization of the filters of VGG16, via gradient ascent in input space.

+[conv_lstm.py](conv_lstm.py)
+Demonstrates the use of a convolutional LSTM network.
+
 [deep_dream.py](deep_dream.py)
 Deep Dreams in Keras.

@@ -0,0 +1,142 @@
+""" This script demonstrates the use of a convolutional LSTM network.
+This network is used to predict the next frame of an artificially
+generated movie which contains moving squares.
+"""
+from keras.models import Sequential
+from keras.layers.convolutional import Convolution3D
+from keras.layers.convolutional_recurrent import ConvLSTM2D
+from keras.layers.normalization import BatchNormalization
+import numpy as np
+import pylab as plt
+
+# We create a layer which take as input movies of shape
+# (n_frames, width, height, channels) and returns a movie
+# of identical shape.
+
+seq = Sequential()
+seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
+                   input_shape=(None, 40, 40, 1),
+                   border_mode='same', return_sequences=True))
+seq.add(BatchNormalization())
+
+seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
+                   border_mode='same', return_sequences=True))
+seq.add(BatchNormalization())
+
+seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
+                   border_mode='same', return_sequences=True))
+seq.add(BatchNormalization())
+
+seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
+                   border_mode='same', return_sequences=True))
+seq.add(BatchNormalization())
+
+seq.add(Convolution3D(nb_filter=1, kernel_dim1=1, kernel_dim2=3,
+                      kernel_dim3=3, activation='sigmoid',
+                      border_mode='same', dim_ordering='tf'))
+
+seq.compile(loss='binary_crossentropy', optimizer='adadelta')
+
+
+# Artificial data generation:
+# Generate movies with 3 to 7 moving squares inside.
+# The squares are of shape 1x1 or 2x2 pixels,
+# which move linearly over time.
+# For convenience we first create movies with bigger width and height (80x80)
+# and at the end we select a 40x40 window.
+
+def generate_movies(n_samples=1200, n_frames=15):
+    row = 80
+    col = 80
+    noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float)
+    shifted_movies = np.zeros((n_samples, n_frames, row, col, 1),
+                              dtype=np.float)
+
+    for i in range(n_samples):
+        # Add 3 to 7 moving squares
+        n = np.random.randint(3, 8)
+
+        for j in range(n):
+            # Initial position
+            xstart = np.random.randint(20, 60)
+            ystart = np.random.randint(20, 60)
+            # Direction of motion
+            directionx = np.random.randint(0, 3) - 1
+            directiony = np.random.randint(0, 3) - 1
+
+            # Size of the square
+            w = np.random.randint(2, 4)
+
+            for t in range(n_frames):
+                x_shift = xstart + directionx * t
+                y_shift = ystart + directiony * t
+                noisy_movies[i, t, x_shift - w: x_shift + w,
+                             y_shift - w: y_shift + w, 0] += 1
+
+                # Make it more robust by adding noise.
+                # The idea is that if during inference,
+                # the value of the pixel is not exactly one,
+                # we need to train the network to be robust and still
+                # consider it as a pixel belonging to a square.
+                if np.random.randint(0, 2):
+                    noise_f = (-1)**np.random.randint(0, 2)
+                    noisy_movies[i, t,
+                                 x_shift - w - 1: x_shift + w + 1,
+                                 y_shift - w - 1: y_shift + w + 1,
+                                 0] += noise_f * 0.1
+
+                # Shift the ground truth by 1
+                x_shift = xstart + directionx * (t + 1)
+                y_shift = ystart + directiony * (t + 1)
+                shifted_movies[i, t, x_shift - w: x_shift + w,
+                               y_shift - w: y_shift + w, 0] += 1
+
+    # Cut to a 40x40 window
+    noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::]
+    shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::]
+    noisy_movies[noisy_movies >= 1] = 1
+    shifted_movies[shifted_movies >= 1] = 1
+    return noisy_movies, shifted_movies
+
+# Train the network
+noisy_movies, shifted_movies = generate_movies(n_samples=1200)
+seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10,
+        nb_epoch=300, validation_split=0.05)
+
+# Testing the network on one movie
+# feed it with the first 7 positions and then
+# predict the new positions
+which = 1004
+track = noisy_movies[which][:7, ::, ::, ::]
+
+for j in range(16):
+    new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::])
+    new = new_pos[::, -1, ::, ::, ::]
+    track = np.concatenate((track, new), axis=0)
+
+
+# And then compare the predictions
+# to the ground truth
+track2 = noisy_movies[which][::, ::, ::, ::]
+for i in range(15):
+    fig = plt.figure(figsize=(10, 5))
+
+    ax = fig.add_subplot(121)
+
+    if i >= 7:
+        ax.text(1, 3, 'Predictions !', fontsize=20, color='w')
+    else:
+        ax.text(1, 3, 'Inital trajectory', fontsize=20)
+
+    toplot = track[i, ::, ::, 0]
+
+    plt.imshow(toplot)
+    ax = fig.add_subplot(122)
+    plt.text(1, 3, 'Ground truth', fontsize=20)
+
+    toplot = track2[i, ::, ::, 0]
+    if i >= 2:
+        toplot = shifted_movies[which][i - 1, ::, ::, 0]
+
+    plt.imshow(toplot)
+    plt.savefig('%i_animate.png' % (i + 1))
@@ -0,0 +1,314 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Train an Auxiliary Classifier Generative Adversarial Network (ACGAN) on the
+MNIST dataset. See https://arxiv.org/abs/1610.09585 for more details.
+
+You should start to see reasonable images after ~5 epochs, and good images
+by ~15 epochs. You should use a GPU, as the convolution-heavy operations are
+very slow on the CPU. Prefer the TensorFlow backend if you plan on iterating, as
+the compilation time can be a blocker using Theano.
+
+Timings:
+
+Hardware           | Backend | Time / Epoch
+-------------------------------------------
+ CPU               | TF      | 3 hrs
+ Titan X (maxwell) | TF      | 4 min
+ Titan X (maxwell) | TH      | 7 min
+
+Consult https://github.com/lukedeo/keras-acgan for more information and
+example output
+"""
+from __future__ import print_function
+
+from collections import defaultdict
+import cPickle as pickle
+from PIL import Image
+
+from six.moves import range
+
+import keras.backend as K
+from keras.datasets import mnist
+from keras.layers import Input, Dense, Reshape, Flatten, Embedding, merge, Dropout
+from keras.layers.advanced_activations import LeakyReLU
+from keras.layers.convolutional import UpSampling2D, Convolution2D
+from keras.models import Sequential, Model
+from keras.optimizers import Adam
+from keras.utils.generic_utils import Progbar
+import numpy as np
+
+np.random.seed(1337)
+
+K.set_image_dim_ordering('th')
+
+
+def build_generator(latent_size):
+    # we will map a pair of (z, L), where z is a latent vector and L is a
+    # label drawn from P_c, to image space (..., 1, 28, 28)
+    cnn = Sequential()
+
+    cnn.add(Dense(1024, input_dim=latent_size, activation='relu'))
+    cnn.add(Dense(128 * 7 * 7, activation='relu'))
+    cnn.add(Reshape((128, 7, 7)))
+
+    # upsample to (..., 14, 14)
+    cnn.add(UpSampling2D(size=(2, 2)))
+    cnn.add(Convolution2D(256, 5, 5, border_mode='same',
+                          activation='relu', init='glorot_normal'))
+
+    # upsample to (..., 28, 28)
+    cnn.add(UpSampling2D(size=(2, 2)))
+    cnn.add(Convolution2D(128, 5, 5, border_mode='same',
+                          activation='relu', init='glorot_normal'))
+
+    # take a channel axis reduction
+    cnn.add(Convolution2D(1, 2, 2, border_mode='same',
+                          activation='tanh', init='glorot_normal'))
+
+    # this is the z space commonly refered to in GAN papers
+    latent = Input(shape=(latent_size, ))
+
+    # this will be our label
+    image_class = Input(shape=(1,), dtype='int32')
+
+    # 10 classes in MNIST
+    cls = Flatten()(Embedding(10, latent_size,
+                              init='glorot_normal')(image_class))
+
+    # hadamard product between z-space and a class conditional embedding
+    h = merge([latent, cls], mode='mul')
+
+    fake_image = cnn(h)
+
+    return Model(input=[latent, image_class], output=fake_image)
+
+
+def build_discriminator():
+    # build a relatively standard conv net, with LeakyReLUs as suggested in
+    # the reference paper
+    cnn = Sequential()
+
+    cnn.add(Convolution2D(32, 3, 3, border_mode='same', subsample=(2, 2),
+                          input_shape=(1, 28, 28)))
+    cnn.add(LeakyReLU())
+    cnn.add(Dropout(0.3))
+
+    cnn.add(Convolution2D(64, 3, 3, border_mode='same', subsample=(1, 1)))
+    cnn.add(LeakyReLU())
+    cnn.add(Dropout(0.3))
+
+    cnn.add(Convolution2D(128, 3, 3, border_mode='same', subsample=(2, 2)))
+    cnn.add(LeakyReLU())
+    cnn.add(Dropout(0.3))
+
+    cnn.add(Convolution2D(256, 3, 3, border_mode='same', subsample=(1, 1)))
+    cnn.add(LeakyReLU())
+    cnn.add(Dropout(0.3))
+
+    cnn.add(Flatten())
+
+    image = Input(shape=(1, 28, 28))
+
+    features = cnn(image)
+
+    # first output (name=generation) is whether or not the discriminator
+    # thinks the image that is being shown is fake, and the second output
+    # (name=auxiliary) is the class that the discriminator thinks the image
+    # belongs to.
+    fake = Dense(1, activation='sigmoid', name='generation')(features)
+    aux = Dense(10, activation='softmax', name='auxiliary')(features)
+
+    return Model(input=image, output=[fake, aux])
+
+if __name__ == '__main__':
+
+    # batch and latent size taken from the paper
+    nb_epochs = 50
+    batch_size = 100
+    latent_size = 100
+
+    # Adam parameters suggested in https://arxiv.org/abs/1511.06434
+    adam_lr = 0.0002
+    adam_beta_1 = 0.5
+
+    # build the discriminator
+    discriminator = build_discriminator()
+    discriminator.compile(
+        optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
+        loss=['binary_crossentropy', 'sparse_categorical_crossentropy']
+    )
+
+    # build the generator
+    generator = build_generator(latent_size)
+    generator.compile(optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
+                      loss='binary_crossentropy')
+
+    latent = Input(shape=(latent_size, ))
+    image_class = Input(shape=(1,), dtype='int32')
+
+    # get a fake image
+    fake = generator([latent, image_class])
+
+    # we only want to be able to train generation for the combined model
+    discriminator.trainable = False
+    fake, aux = discriminator(fake)
+    combined = Model(input=[latent, image_class], output=[fake, aux])
+
+    combined.compile(
+        optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
+        loss=['binary_crossentropy', 'sparse_categorical_crossentropy']
+    )
+
+    discriminator.trainable = True
+
+    # get our mnist data, and force it to be of shape (..., 1, 28, 28) with
+    # range [-1, 1]
+    (X_train, y_train), (X_test, y_test) = mnist.load_data()
+    X_train = (X_train.astype(np.float32) - 127.5) / 127.5
+    X_train = np.expand_dims(X_train, axis=1)
+
+    X_test = (X_test.astype(np.float32) - 127.5) / 127.5
+    X_test = np.expand_dims(X_test, axis=1)
+
+    nb_train, nb_test = X_train.shape[0], X_test.shape[0]
+
+    train_history = defaultdict(list)
+    test_history = defaultdict(list)
+
+    for epoch in range(nb_epochs):
+        print('Epoch {} of {}'.format(epoch + 1, nb_epochs))
+
+        nb_batches = int(X_train.shape[0] / batch_size)
+        progress_bar = Progbar(target=nb_batches)
+
+        epoch_gen_loss = []
+        epoch_disc_loss = []
+
+        for index in range(nb_batches):
+            progress_bar.update(index)
+            # generate a new batch of noise
+            noise = np.random.uniform(-1, 1, (batch_size, latent_size))
+
+            # get a batch of real images
+            image_batch = X_train[index * batch_size:(index + 1) * batch_size]
+            label_batch = y_train[index * batch_size:(index + 1) * batch_size]
+
+            # sample some labels from p_c
+            sampled_labels = np.random.randint(0, 10, batch_size)
+
+            # generate a batch of fake images, using the generated labels as a
+            # conditioner. We reshape the sampled labels to be
+            # (batch_size, 1) so that we can feed them into the embedding
+            # layer as a length one sequence
+            generated_images = generator.predict(
+                [noise, sampled_labels.reshape((-1, 1))], verbose=0)
+
+            X = np.concatenate((image_batch, generated_images))
+            y = np.array([1] * batch_size + [0] * batch_size)
+            aux_y = np.concatenate((label_batch, sampled_labels), axis=0)
+
+            # see if the discriminator can figure itself out...
+            epoch_disc_loss.append(discriminator.train_on_batch(X, [y, aux_y]))
+
+            # make new noise. we generate 2 * batch size here such that we have
+            # the generator optimize over an identical number of images as the
+            # discriminator
+            noise = np.random.uniform(-1, 1, (2 * batch_size, latent_size))
+            sampled_labels = np.random.randint(0, 10, 2 * batch_size)
+
+            # we want to fix the discriminator and let the generator train to
+            # trick it
+            discriminator.trainable = False
+
+            # For the generator, we want all the {fake, not-fake} labels to say
+            # not-fake
+            trick = np.ones(2 * batch_size)
+
+            epoch_gen_loss.append(combined.train_on_batch(
+                [noise, sampled_labels.reshape((-1, 1))], [trick, sampled_labels]))
+
+            discriminator.trainable = True
+
+        print('\nTesting for epoch {}:'.format(epoch + 1))
+
+        # evaluate the testing loss here
+
+        # generate a new batch of noise
+        noise = np.random.uniform(-1, 1, (nb_test, latent_size))
+
+        # sample some labels from p_c and generate images from them
+        sampled_labels = np.random.randint(0, 10, nb_test)
+        generated_images = generator.predict(
+            [noise, sampled_labels.reshape((-1, 1))], verbose=False)
+
+        X = np.concatenate((X_test, generated_images))
+        y = np.array([1] * nb_test + [0] * nb_test)
+        aux_y = np.concatenate((y_test, sampled_labels), axis=0)
+
+        # see if the discriminator can figure itself out...
+        discriminator_test_loss = discriminator.evaluate(
+            X, [y, aux_y], verbose=False)
+
+        discriminator_train_loss = np.mean(np.array(epoch_disc_loss), axis=0)
+
+        # make new noise
+        noise = np.random.uniform(-1, 1, (2 * nb_test, latent_size))
+        sampled_labels = np.random.randint(0, 10, 2 * nb_test)
+
+        trick = np.ones(2 * nb_test)
+
+        generator_test_loss = combined.evaluate(
+            [noise, sampled_labels.reshape((-1, 1))],
+            [trick, sampled_labels], verbose=False)
+
+        generator_train_loss = np.mean(np.array(epoch_gen_loss), axis=0)
+
+        # generate an epoch report on performance
+        train_history['generator'].append(generator_train_loss)
+        train_history['discriminator'].append(discriminator_train_loss)
+
+        test_history['generator'].append(generator_test_loss)
+        test_history['discriminator'].append(discriminator_test_loss)
+
+        print('{0:<22s} | {1:4s} | {2:15s} | {3:5s}'.format(
+            'component', *discriminator.metrics_names))
+        print('-' * 65)
+
+        ROW_FMT = '{0:<22s} | {1:<4.2f} | {2:<15.2f} | {3:<5.2f}'
+        print(ROW_FMT.format('generator (train)',
+                             *train_history['generator'][-1]))
+        print(ROW_FMT.format('generator (test)',
+                             *test_history['generator'][-1]))
+        print(ROW_FMT.format('discriminator (train)',
+                             *train_history['discriminator'][-1]))
+        print(ROW_FMT.format('discriminator (test)',
+                             *test_history['discriminator'][-1]))
+
+        # save weights every epoch
+        generator.save_weights(
+            'params_generator_epoch_{0:03d}.hdf5'.format(epoch), True)
+        discriminator.save_weights(
+            'params_discriminator_epoch_{0:03d}.hdf5'.format(epoch), True)
+
+        # generate some digits to display
+        noise = np.random.uniform(-1, 1, (100, latent_size))
+
+        sampled_labels = np.array([
+            [i] * 10 for i in range(10)
+        ]).reshape(-1, 1)
+
+        # get a batch to display
+        generated_images = generator.predict(
+            [noise, sampled_labels], verbose=0)
+
+        # arrange them into a grid
+        img = (np.concatenate([r.reshape(-1, 28)
+                               for r in np.split(generated_images, 10)
+                               ], axis=-1) * 127.5 + 127.5).astype(np.uint8)
+
+        Image.fromarray(img).save(
+            'plot_epoch_{0:03d}_generated.png'.format(epoch))
+
+    pickle.dump({'train': train_history, 'test': test_history},
+                open('acgan-history.pkl', 'wb'))
@@ -54,7 +54,6 @@ model.add(LSTM(50,
               return_sequences=True,
               stateful=True))
 model.add(LSTM(50,
-               batch_input_shape=(batch_size, tsteps, 1),
               return_sequences=False,
               stateful=True))
 model.add(Dense(1))
@@ -4,6 +4,7 @@ Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
 '''
 import numpy as np
 import matplotlib.pyplot as plt
+from scipy.stats import norm

 from keras.layers import Input, Dense, Lambda
 from keras.models import Model
@@ -16,7 +17,7 @@ original_dim = 784
 latent_dim = 2
 intermediate_dim = 256
 nb_epoch = 50
-epsilon_std = 0.01
+epsilon_std = 1.0

 x = Input(batch_shape=(batch_size, original_dim))
 h = Dense(intermediate_dim, activation='relu')(x)
@@ -82,9 +83,10 @@ generator = Model(decoder_input, _x_decoded_mean)
 n = 15  # figure with 15x15 digits
 digit_size = 28
 figure = np.zeros((digit_size * n, digit_size * n))
-# we will sample n points within [-15, 15] standard deviations
-grid_x = np.linspace(-15, 15, n)
-grid_y = np.linspace(-15, 15, n)
+# linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian
+# to produce values of the latent variables z, since the prior of the latent space is Gaussian
+grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
+grid_y = norm.ppf(np.linspace(0.05, 0.95, n))

 for i, yi in enumerate(grid_x):
    for j, xi in enumerate(grid_y):
@@ -95,5 +97,5 @@ for i, yi in enumerate(grid_x):
               j * digit_size: (j + 1) * digit_size] = digit

 plt.figure(figsize=(10, 10))
-plt.imshow(figure)
+plt.imshow(figure, cmap='Greys_r')
 plt.show()
@@ -5,6 +5,7 @@ Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
 '''
 import numpy as np
 import matplotlib.pyplot as plt
+from scipy.stats import norm

 from keras.layers import Input, Dense, Lambda, Flatten, Reshape
 from keras.layers import Convolution2D, Deconvolution2D
@@ -27,7 +28,7 @@ else:
    original_img_size = (img_rows, img_cols, img_chns)
 latent_dim = 2
 intermediate_dim = 128
-epsilon_std = 0.01
+epsilon_std = 1.0
 nb_epoch = 5

 x = Input(batch_shape=(batch_size,) + original_img_size)
@@ -153,9 +154,10 @@ generator = Model(decoder_input, _x_decoded_mean_squash)
 n = 15  # figure with 15x15 digits
 digit_size = 28
 figure = np.zeros((digit_size * n, digit_size * n))
-# we will sample n points within [-15, 15] standard deviations
-grid_x = np.linspace(-15, 15, n)
-grid_y = np.linspace(-15, 15, n)
+# linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian
+# to produce values of the latent variables z, since the prior of the latent space is Gaussian
+grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
+grid_y = norm.ppf(np.linspace(0.05, 0.95, n))

 for i, yi in enumerate(grid_x):
    for j, xi in enumerate(grid_y):
@@ -167,5 +169,5 @@ for i, yi in enumerate(grid_x):
               j * digit_size: (j + 1) * digit_size] = digit

 plt.figure(figsize=(10, 10))
-plt.imshow(figure)
+plt.imshow(figure, cmap='Greys_r')
 plt.show()
@@ -15,4 +15,4 @@ from . import objectives
 from . import optimizers
 from . import regularizers

-__version__ = '1.1.1'
+__version__ = '1.1.2'
@@ -1,5 +1,6 @@
 from __future__ import absolute_import
 from . import backend as K
+from .utils.generic_utils import get_from_module


 def softmax(x):
@@ -11,13 +12,15 @@ def softmax(x):
        s = K.sum(e, axis=-1, keepdims=True)
        return e / s
    else:
-        raise Exception('Cannot apply softmax to a tensor that is not 2D or 3D. ' +
-                        'Here, ndim=' + str(ndim))
+        raise ValueError('Cannot apply softmax to a tensor '
+                         'that is not 2D or 3D. '
+                         'Here, ndim=' + str(ndim))


 def elu(x, alpha=1.0):
    return K.elu(x, alpha)

+
 def softplus(x):
    return K.softplus(x)

@@ -43,13 +46,9 @@ def hard_sigmoid(x):


 def linear(x):
-    '''
-    The function returns the variable that is passed in, so all types work.
-    '''
    return x


-from .utils.generic_utils import get_from_module
 def get(identifier):
    if identifier is None:
        return linear
@@ -44,7 +44,8 @@ def decode_predictions(preds, top=5):
        CLASS_INDEX = json.load(open(fpath))
    results = []
    for pred in preds:
-        top_indices = np.argpartition(pred, -top)[-top:][::-1]
+        top_indices = pred.argsort()[-top:][::-1]
        result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices]
+        result.sort(key=lambda x: x[2], reverse=True)
        results.append(result)
    return results
@@ -1,6 +1,8 @@
 import tensorflow as tf

 from tensorflow.python.training import moving_averages
+from tensorflow.python.ops import tensor_array_ops
+from tensorflow.python.ops import control_flow_ops
 try:
    from tensorflow.python.ops import ctc_ops as ctc
 except ImportError:
@@ -10,27 +12,41 @@ import numpy as np
 import os
 import copy
 import warnings
-from .common import _FLOATX, _EPSILON, _IMAGE_DIM_ORDERING, reset_uids
+from .common import _FLOATX, _EPSILON, image_dim_ordering, reset_uids
 py_all = all

 # INTERNAL UTILS

+# This is the default internal TF session used by Keras.
+# It can be set manually via `set_session(sess)`.
 _SESSION = None
-_LEARNING_PHASE = tf.placeholder(dtype='uint8', name='keras_learning_phase')  # 0 = test, 1 = train
+# This dictionary holds a mapping {graph: learning_phase}.
+# A learning phase is a bool tensor used to run Keras models in
+# either train mode (learning_phase == 1) or test mode (learning_phase == 0).
+_GRAPH_LEARNING_PHASES = {}
+# This boolean flag can be set to True to leave variable initialization
+# up to the user.
+# Change its value via `manual_variable_initialization(value)`.
 _MANUAL_VAR_INIT = False


 def clear_session():
+    '''Destroys the current TF graph and creates a new one.
+
+    Useful to avoid clutter from old models / layers.
+    '''
    global _SESSION
-    global _LEARNING_PHASE
+    global _GRAPH_LEARNING_PHASES
    tf.reset_default_graph()
    reset_uids()
    _SESSION = None
-    _LEARNING_PHASE = tf.placeholder(dtype='uint8', name='keras_learning_phase')
+    phase = tf.placeholder(dtype='bool', name='keras_learning_phase')
+    _GRAPH_LEARNING_PHASES[tf.get_default_graph()] = phase


 def manual_variable_initialization(value):
-    '''Whether variables should be initialized
+    '''Returns a boolean:
+    whether variables should be initialized
    as they are instantiated (default), or if
    the user should handle the initialization
    (e.g. via tf.initialize_all_variables()).
@@ -42,19 +58,27 @@ def manual_variable_initialization(value):
 def learning_phase():
    '''Returns the learning phase flag.

-    The learning phase flag is an integer tensor (0 = test, 1 = train)
+    The learning phase flag is a bool tensor (0 = test, 1 = train)
    to be passed as input to any Keras function
    that uses a different behavior at train time and test time.
    '''
-    return _LEARNING_PHASE
+    graph = tf.get_default_graph()
+    if graph not in _GRAPH_LEARNING_PHASES:
+        phase = tf.placeholder(dtype='bool',
+                               name='keras_learning_phase')
+        _GRAPH_LEARNING_PHASES[graph] = phase
+    return _GRAPH_LEARNING_PHASES[graph]


 def set_learning_phase(value):
-    global _LEARNING_PHASE
+    '''Sets the learning phase to a fixed value,
+    either 0 or 1 (integers).
+    '''
+    global _GRAPH_LEARNING_PHASES
    if value not in {0, 1}:
        raise ValueError('Expected learning phase to be '
                         '0 or 1.')
-    _LEARNING_PHASE = value
+    _GRAPH_LEARNING_PHASES[tf.get_default_graph()] = value


 def get_session():
@@ -72,15 +96,20 @@ def get_session():
    '''
    global _SESSION
    if tf.get_default_session() is not None:
-        return tf.get_default_session()
-    if _SESSION is None:
-        if not os.environ.get('OMP_NUM_THREADS'):
-            _SESSION = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
-        else:
-            nb_thread = int(os.environ.get('OMP_NUM_THREADS'))
-            _SESSION = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=nb_thread,
-                                                        allow_soft_placement=True))
-    return _SESSION
+        session = tf.get_default_session()
+    else:
+        if _SESSION is None:
+            if not os.environ.get('OMP_NUM_THREADS'):
+                config = tf.ConfigProto(allow_soft_placement=True)
+            else:
+                nb_thread = int(os.environ.get('OMP_NUM_THREADS'))
+                config = tf.ConfigProto(intra_op_parallelism_threads=nb_thread,
+                                        allow_soft_placement=True)
+            _SESSION = tf.Session(config=config)
+        session = _SESSION
+    if not _MANUAL_VAR_INIT:
+        _initialize_variables()
+    return session


 def set_session(session):
@@ -144,30 +173,34 @@ def variable(value, dtype=_FLOATX, name=None):
    '''
    if hasattr(value, 'tocoo'):
        sparse_coo = value.tocoo()
-        indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), np.expand_dims(sparse_coo.col, 1)), 1)
+        indices = np.concatenate((np.expand_dims(sparse_coo.row, 1),
+                                  np.expand_dims(sparse_coo.col, 1)), 1)
        # SparseTensor doesn't need initialization
-        return tf.SparseTensor(indices=indices, values=sparse_coo.data, shape=sparse_coo.shape)
-
-    v = tf.Variable(value, dtype=_convert_string_dtype(dtype), name=name)
-    if _MANUAL_VAR_INIT:
+        v = tf.SparseTensor(indices=indices, values=sparse_coo.data, shape=sparse_coo.shape)
+        v._dims = len(sparse_coo.shape)
        return v
-    if tf.get_default_graph() is get_session().graph:
-        try:
-            get_session().run(v.initializer)
-        except tf.errors.InvalidArgumentError:
-            warnings.warn('Could not automatically initialize variable, '
-                          'make sure you do it manually (e.g. via '
-                          '`tf.initialize_all_variables()`).')
-    else:
-        warnings.warn('The default TensorFlow graph is not the graph '
-                      'associated with the TensorFlow session currently '
-                      'registered with Keras, and as such Keras '
-                      'was not able to automatically initialize a variable. '
-                      'You should consider registering the proper session '
-                      'with Keras via `K.set_session(sess)`.')
+    v = tf.Variable(value, dtype=_convert_string_dtype(dtype), name=name)
    return v


+def _initialize_variables():
+    if hasattr(tf, 'global_variables'):
+        variables = tf.global_variables()
+    else:
+        variables = tf.all_variables()
+
+    uninitialized_variables = []
+    for v in variables:
+        if not hasattr(v, '_keras_initialized') or not v._keras_initialized:
+            uninitialized_variables.append(v)
+            v._keras_initialized = True
+    if uninitialized_variables:
+        sess = get_session()
+        if hasattr(tf, 'variables_initializer'):
+            sess.run(tf.variables_initializer(uninitialized_variables))
+        else:
+            sess.run(tf.initialize_variables(uninitialized_variables))
+
 def placeholder(shape=None, ndim=None, dtype=_FLOATX, sparse=False, name=None):
    '''Instantiates a placeholder.

@@ -187,8 +220,8 @@ def placeholder(shape=None, ndim=None, dtype=_FLOATX, sparse=False, name=None):
        if ndim:
            shape = tuple([None for _ in range(ndim)])
    if sparse:
-        tf_shape = tf.constant(np.array(list([0 for _ in range(len(shape))]), dtype=np.int64))
-        x = tf.sparse_placeholder(dtype, shape=tf_shape, name=name)
+        x = tf.sparse_placeholder(dtype, name=name)
+        x._dims = len(shape)
    else:
        x = tf.placeholder(dtype, shape=shape, name=name)
    x._keras_shape = shape
@@ -215,7 +248,7 @@ def ndim(x):
    '''Returns the number of axes in a tensor, as an integer.
    '''
    if is_sparse(x):
-        return int(x.shape.get_shape()[0])
+        return x._dims

    dims = x.get_shape()._dims
    if dims is not None:
@@ -241,7 +274,8 @@ def zeros(shape, dtype=_FLOATX, name=None):
    '''
    shape = tuple(map(int, shape))
    tf_dtype = _convert_string_dtype(dtype)
-    return variable(tf.constant_initializer(0., dtype=tf_dtype)(shape), dtype, name)
+    return variable(tf.constant_initializer(0., dtype=tf_dtype)(shape),
+                    dtype, name)


 def ones(shape, dtype=_FLOATX, name=None):
@@ -249,7 +283,8 @@ def ones(shape, dtype=_FLOATX, name=None):
    '''
    shape = tuple(map(int, shape))
    tf_dtype = _convert_string_dtype(dtype)
-    return variable(tf.constant_initializer(1., dtype=tf_dtype)(shape), dtype, name)
+    return variable(tf.constant_initializer(1., dtype=tf_dtype)(shape),
+                    dtype, name)


 def eye(size, dtype=_FLOATX, name=None):
@@ -748,14 +783,16 @@ def resize_images(X, height_factor, width_factor, dim_ordering):
        X = permute_dimensions(X, [0, 2, 3, 1])
        X = tf.image.resize_nearest_neighbor(X, new_shape)
        X = permute_dimensions(X, [0, 3, 1, 2])
-        X.set_shape((None, None, original_shape[2] * height_factor, original_shape[3] * width_factor))
+        X.set_shape((None, None, original_shape[2] * height_factor if original_shape[2] is not None else None,
+                    original_shape[3] * width_factor if original_shape[3] is not None else None))
        return X
    elif dim_ordering == 'tf':
        original_shape = int_shape(X)
        new_shape = tf.shape(X)[1:3]
        new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32'))
        X = tf.image.resize_nearest_neighbor(X, new_shape)
-        X.set_shape((None, original_shape[1] * height_factor, original_shape[2] * width_factor, None))
+        X.set_shape((None, original_shape[1] * height_factor if original_shape[1] is not None else None,
+                    original_shape[2] * width_factor if original_shape[2] is not None else None, None))
        return X
    else:
        raise Exception('Invalid dim_ordering: ' + dim_ordering)
@@ -854,10 +891,15 @@ def asymmetric_temporal_padding(x, left_pad=1, right_pad=1):
    return tf.pad(x, pattern)


-def spatial_2d_padding(x, padding=(1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
+def spatial_2d_padding(x, padding=(1, 1), dim_ordering='default'):
    '''Pads the 2nd and 3rd dimensions of a 4D tensor
    with "padding[0]" and "padding[1]" (resp.) zeros left and right.
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
+    if dim_ordering not in {'th', 'tf'}:
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
+
    if dim_ordering == 'th':
        pattern = [[0, 0], [0, 0],
                   [padding[0], padding[0]], [padding[1], padding[1]]]
@@ -868,10 +910,18 @@ def spatial_2d_padding(x, padding=(1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
    return tf.pad(x, pattern)


-def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1, left_pad=1, right_pad=1, dim_ordering=_IMAGE_DIM_ORDERING):
+def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1,
+                                  left_pad=1, right_pad=1,
+                                  dim_ordering='default'):
    '''Pad the rows and columns of a 4D tensor
-    with "top_pad", "bottom_pad", "left_pad", "right_pad"  (resp.) zeros rows on top, bottom; cols on left, right.
+    with "top_pad", "bottom_pad", "left_pad", "right_pad" (resp.) zeros
+    rows on top, bottom; cols on left, right.
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
+    if dim_ordering not in {'th', 'tf'}:
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
+
    if dim_ordering == 'th':
        pattern = [[0, 0],
                   [0, 0],
@@ -885,13 +935,18 @@ def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1, left_pad=1, right_
    return tf.pad(x, pattern)


-def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
+def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering='default'):
    '''Pads 5D tensor with zeros for the depth, height, width dimension with
    "padding[0]", "padding[1]" and "padding[2]" (resp.) zeros left and right

    For 'tf' dim_ordering, the 2nd, 3rd and 4th dimension will be padded.
    For 'th' dim_ordering, the 3rd, 4th and 5th dimension will be padded.
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
+    if dim_ordering not in {'th', 'tf'}:
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
+
    if dim_ordering == 'th':
        pattern = [
            [0, 0],
@@ -1033,7 +1088,8 @@ class Function(object):
        for tensor, value in zip(self.inputs, inputs):
            if is_sparse(tensor):
                sparse_coo = value.tocoo()
-                indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), np.expand_dims(sparse_coo.col, 1)), 1)
+                indices = np.concatenate((np.expand_dims(sparse_coo.row, 1),
+                                          np.expand_dims(sparse_coo.col, 1)), 1)
                value = (indices, sparse_coo.data, sparse_coo.shape)
            feed_dict[tensor] = value
        session = get_session()
@@ -1051,8 +1107,8 @@ def function(inputs, outputs, updates=[], **kwargs):
    '''
    if len(kwargs) > 0:
        msg = [
-            "Expected no kwargs, you passed %s" % len(kwargs),
-            "kwargs passed to function are ignored with Tensorflow backend"
+            'Expected no kwargs, you passed %s' % len(kwargs),
+            'kwargs passed to function are ignored with Tensorflow backend'
        ]
        warnings.warn('\n'.join(msg))
    return Function(inputs, outputs, updates=updates)
@@ -1121,6 +1177,13 @@ def rnn(step_function, inputs, initial_states,
    axes = [1, 0] + list(range(2, ndim))
    inputs = tf.transpose(inputs, (axes))

+    if mask is not None:
+        if mask.dtype != tf.bool:
+            mask = tf.cast(mask, tf.bool)
+        if len(mask.get_shape()) == ndim - 1:
+            mask = expand_dims(mask)
+        mask = tf.transpose(mask, axes)
+
    if constants is None:
        constants = []

@@ -1137,13 +1200,7 @@ def rnn(step_function, inputs, initial_states,
            input_list.reverse()

        if mask is not None:
-            # Transpose not supported by bool tensor types, hence round-trip to uint8.
-            mask = tf.cast(mask, tf.uint8)
-            if len(mask.get_shape()) == ndim - 1:
-                mask = expand_dims(mask)
-            mask = tf.cast(tf.transpose(mask, axes), tf.bool)
            mask_list = tf.unpack(mask)
-
            if go_backwards:
                mask_list.reverse()

@@ -1187,26 +1244,25 @@ def rnn(step_function, inputs, initial_states,
            outputs = tf.pack(successive_outputs)

    else:
-        from tensorflow.python.ops.rnn import _dynamic_rnn_loop
-
        if go_backwards:
            inputs = tf.reverse(inputs, [True] + [False] * (ndim - 1))

-        states = initial_states
-        nb_states = len(states)
-        if nb_states == 0:
-            # use dummy state, otherwise _dynamic_rnn_loop breaks
-            state = inputs[:, 0, :]
-            state_size = state.get_shape()[-1]
-        else:
-            state_size = int(states[0].get_shape()[-1])
-            if nb_states == 1:
-                state = states[0]
-            else:
-                state = tf.concat(1, states)
+        states = tuple(initial_states)
+
+        time_steps = tf.shape(inputs)[0]
+        output_ta = tensor_array_ops.TensorArray(
+            dtype=inputs.dtype,
+            size=time_steps,
+            tensor_array_name='output_ta')
+        input_ta = tensor_array_ops.TensorArray(
+            dtype=inputs.dtype,
+            size=time_steps,
+            tensor_array_name='input_ta')
+        input_ta = input_ta.unpack(inputs)
+        time = tf.constant(0, dtype='int32', name='time')

        if mask is not None:
-            if len(initial_states) == 0:
+            if len(states) == 0:
                raise ValueError('No initial states provided! '
                                 'When using masking in an RNN, you should '
                                 'provide initial states '
@@ -1216,84 +1272,44 @@ def rnn(step_function, inputs, initial_states,
            if go_backwards:
                mask = tf.reverse(mask, [True] + [False] * (ndim - 2))

-            # Transpose not supported by bool tensor types, hence round-trip to uint8.
-            mask = tf.cast(mask, tf.uint8)
-            if len(mask.get_shape()) == ndim - 1:
-                mask = expand_dims(mask)
-            mask = tf.transpose(mask, axes)
-            inputs = tf.concat(2, [tf.cast(mask, inputs.dtype), inputs])
+            mask_ta = tensor_array_ops.TensorArray(
+                dtype=tf.bool,
+                size=time_steps,
+                tensor_array_name='mask_ta')
+            mask_ta = mask_ta.unpack(mask)

-            def _step(input, state):
-                if nb_states > 1:
-                    states = []
-                    for i in range(nb_states):
-                        states.append(state[:, i * state_size: (i + 1) * state_size])
-                else:
-                    states = [state]
-                mask_t = tf.cast(input[:, 0], tf.bool)
-                input = input[:, 1:]
-                output, new_states = step_function(input, states + constants)
-
-                output = tf.select(mask_t, output, states[0])
-                new_states = [tf.select(mask_t, new_states[i], states[i]) for i in range(len(states))]
-
-                if len(new_states) == 1:
-                    new_state = new_states[0]
-                else:
-                    new_state = tf.concat(1, new_states)
-
-                return output, new_state
+            def _step(time, output_ta_t, *states):
+                current_input = input_ta.read(time)
+                mask_t = mask_ta.read(time)
+                output, new_states = step_function(current_input,
+                                                   tuple(states) +
+                                                   tuple(constants))
+                tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(output)[1]]))
+                output = tf.select(tiled_mask_t, output, states[0])
+                new_states = [tf.select(tiled_mask_t, new_states[i], states[i]) for i in range(len(states))]
+                output_ta_t = output_ta_t.write(time, output)
+                return (time + 1, output_ta_t) + tuple(new_states)
        else:
-            def _step(input, state):
-                if nb_states > 1:
-                    states = []
-                    for i in range(nb_states):
-                        states.append(state[:, i * state_size: (i + 1) * state_size])
-                elif nb_states == 1:
-                    states = [state]
-                else:
-                    states = []
-                output, new_states = step_function(input, states + constants)
+            def _step(time, output_ta_t, *states):
+                current_input = input_ta.read(time)
+                output, new_states = step_function(current_input,
+                                                   tuple(states) +
+                                                   tuple(constants))
+                output_ta_t = output_ta_t.write(time, output)
+                return (time + 1, output_ta_t) + tuple(new_states)

-                if len(new_states) > 1:
-                    new_state = tf.concat(1, new_states)
-                elif len(new_states) == 1:
-                    new_state = new_states[0]
-                else:
-                    # return dummy state, otherwise _dynamic_rnn_loop breaks
-                    new_state = state
-                return output, new_state
-
-        _step.state_size = state_size * nb_states
-        # recover output size by calling _step on the first input
-        slice_begin = tf.pack([0] * ndim)
-        slice_size = tf.pack([1] + [-1] * (ndim - 1))
-        first_input = tf.slice(inputs, slice_begin, slice_size)
-        first_input = tf.squeeze(first_input, [0])
-        _step.output_size = int(_step(first_input, state)[0].get_shape()[-1])
-
-        (outputs, final_state) = _dynamic_rnn_loop(
-            _step,
-            inputs,
-            state,
+        final_outputs = control_flow_ops.while_loop(
+            cond=lambda time, *_: time < time_steps,
+            body=_step,
+            loop_vars=(time, output_ta) + states,
            parallel_iterations=32,
-            swap_memory=True,
-            sequence_length=None)
+            swap_memory=True)
+        last_time = final_outputs[0]
+        output_ta = final_outputs[1]
+        new_states = final_outputs[2:]

-        if nb_states > 1:
-            new_states = []
-            for i in range(nb_states):
-                new_states.append(final_state[:, i * state_size: (i + 1) * state_size])
-        elif nb_states == 1:
-            new_states = [final_state]
-        else:
-            new_states = []
-
-        # all this circus is to recover the last vector in the sequence.
-        slice_begin = tf.pack([tf.shape(outputs)[0] - 1] + [0] * (ndim - 1))
-        slice_size = tf.pack([1] + [-1] * (ndim - 1))
-        last_output = tf.slice(outputs, slice_begin, slice_size)
-        last_output = tf.squeeze(last_output, [0])
+        outputs = output_ta.pack()
+        last_output = output_ta.read(last_time - 1)

    axes = [1, 0] + list(range(2, len(outputs.get_shape())))
    outputs = tf.transpose(outputs, axes)
@@ -1301,7 +1317,8 @@ def rnn(step_function, inputs, initial_states,


 def _cond(condition, then_lambda, else_lambda):
-    '''Backwards compatible interface to tf.cond prior to public introduction.'''
+    '''Backwards compatible interface to tf.cond prior to public introduction.
+    '''
    try:
        cond_fn = tf.cond
    except AttributeError:
@@ -1311,7 +1328,8 @@ def _cond(condition, then_lambda, else_lambda):


 def switch(condition, then_expression, else_expression):
-    '''Switches between two operations depending on a scalar value (int or bool).
+    '''Switches between two operations
+    depending on a scalar value (int or bool).
    Note that both `then_expression` and `else_expression`
    should be symbolic tensors of the *same shape*.

@@ -1321,8 +1339,11 @@ def switch(condition, then_expression, else_expression):
        else_expression: TensorFlow operation.
    '''
    x_shape = copy.copy(then_expression.get_shape())
-    x = _cond(tf.cast(condition, 'bool'),
-              lambda: then_expression, lambda: else_expression)
+    if condition.dtype != tf.bool:
+        condition = tf.cast(condition, 'bool')
+    x = _cond(condition,
+              lambda: then_expression,
+              lambda: else_expression)
    x.set_shape(x_shape)
    return x

@@ -1331,15 +1352,13 @@ def in_train_phase(x, alt):
    '''Selects `x` in train phase, and `alt` otherwise.
    Note that `alt` should have the *same shape* as `x`.
    '''
-    if _LEARNING_PHASE is 1:
+    if learning_phase() is 1:
        return x
-    elif _LEARNING_PHASE is 0:
+    elif learning_phase() is 0:
        return alt
-    # else: assume learning phase is a placeholder.
-    x_shape = copy.copy(x.get_shape())
-    x = _cond(tf.cast(_LEARNING_PHASE, 'bool'), lambda: x, lambda: alt)
+    # else: assume learning phase is a placeholder tensor.
+    x = switch(learning_phase(), x, alt)
    x._uses_learning_phase = True
-    x.set_shape(x_shape)
    return x


@@ -1347,14 +1366,13 @@ def in_test_phase(x, alt):
    '''Selects `x` in test phase, and `alt` otherwise.
    Note that `alt` should have the *same shape* as `x`.
    '''
-    if _LEARNING_PHASE is 1:
+    if learning_phase() is 1:
        return alt
-    elif _LEARNING_PHASE is 0:
+    elif learning_phase() is 0:
        return x
-    x_shape = copy.copy(x.get_shape())
-    x = _cond(tf.cast(_LEARNING_PHASE, 'bool'), lambda: alt, lambda: x)
+    # else: assume learning phase is a placeholder tensor.
+    x = switch(learning_phase(), alt, x)
    x._uses_learning_phase = True
-    x.set_shape(x_shape)
    return x


@@ -1381,17 +1399,17 @@ def relu(x, alpha=0., max_value=None):


 def elu(x, alpha=1.):
-    """ Exponential linear unit
+    '''Exponential linear unit.

    # Arguments
        x: Tensor to compute the activation function for.
        alpha: scalar
-    """
+    '''
    res = tf.nn.elu(x)
    if alpha == 1:
        return res
    else:
-        return tf.select(x > 0, res, alpha*res)
+        return tf.select(x > 0, res, alpha * res)


 def softmax(x):
@@ -1407,6 +1425,8 @@ def softplus(x):


 def softsign(x):
+    '''Softsign of a tensor.
+    '''
    return tf.nn.softsign(x)


@@ -1516,8 +1536,9 @@ def l2_normalize(x, axis):
        axis = axis % len(x.get_shape())
    return tf.nn.l2_normalize(x, dim=axis)

+
 def in_top_k(predictions, targets, k):
-    '''Says whether the `targets` are in the top `k` `predictions`
+    '''Returns whether the `targets` are in the top `k` `predictions`

    # Arguments
        predictions: A tensor of shape batch_size x classess and type float32.
@@ -1615,8 +1636,29 @@ def _postprocess_conv3d_output(x, dim_ordering):
    return x


+def conv1d(x, kernel, stride=1, border_mode='valid',
+           image_shape=None, filter_shape=None):
+    '''1D convolution.
+
+    # Arguments
+        kernel: kernel tensor.
+        strides: stride integer.
+        border_mode: string, "same" or "valid".
+    '''
+    # pre-process dtype
+    if _FLOATX == 'float64':
+        x = tf.cast(x, 'float32')
+        kernel = tf.cast(kernel, 'float32')
+    padding = _preprocess_border_mode(border_mode)
+    x = tf.nn.conv1d(x, kernel, stride, padding=padding)
+    # post-process dtype
+    if _FLOATX == 'float64':
+        x = tf.cast(x, 'float64')
+    return x
+
+
 def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
-           dim_ordering=_IMAGE_DIM_ORDERING,
+           dim_ordering='default',
           image_shape=None, filter_shape=None, filter_dilation=(1, 1)):
    '''2D convolution.

@@ -1628,8 +1670,10 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
            Whether to use Theano or TensorFlow dimension ordering
            for inputs/kernels/ouputs.
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
    if dim_ordering not in {'th', 'tf'}:
-        raise Exception('Unknown dim_ordering ' + str(dim_ordering))
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))

    x = _preprocess_conv2d_input(x, dim_ordering)
    kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
@@ -1646,7 +1690,7 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',

 def deconv2d(x, kernel, output_shape, strides=(1, 1),
             border_mode='valid',
-             dim_ordering=_IMAGE_DIM_ORDERING,
+             dim_ordering='default',
             image_shape=None, filter_shape=None):
    '''2D deconvolution (i.e. transposed convolution).

@@ -1660,8 +1704,10 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
            Whether to use Theano or TensorFlow dimension ordering
            for inputs/kernels/ouputs.
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
    if dim_ordering not in {'th', 'tf'}:
-        raise Exception('Unknown dim_ordering ' + str(dim_ordering))
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))

    x = _preprocess_conv2d_input(x, dim_ordering)
    output_shape = _preprocess_deconv_output_shape(output_shape, dim_ordering)
@@ -1677,10 +1723,12 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),

 def atrous_conv2d(x, kernel, rate=1,
                  border_mode='valid',
-                  dim_ordering=_IMAGE_DIM_ORDERING,
+                  dim_ordering='default',
                  image_shape=None, filter_shape=None):
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
    if dim_ordering not in {'th', 'tf'}:
-        raise Exception('Unknown dim_ordering ' + str(dim_ordering))
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
    if rate == 1:
        return conv2d(x, kernel, strides=(1, 1), border_mode=border_mode,
                      dim_ordering=dim_ordering)
@@ -1694,9 +1742,11 @@ def atrous_conv2d(x, kernel, rate=1,


 def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1),
-                     border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING):
+                     border_mode='valid', dim_ordering='default'):
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
    if dim_ordering not in {'th', 'tf'}:
-        raise Exception('Unknown dim_ordering ' + str(dim_ordering))
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))

    x = _preprocess_conv2d_input(x, dim_ordering)
    depthwise_kernel = _preprocess_conv2d_kernel(depthwise_kernel,
@@ -1712,7 +1762,7 @@ def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1),


 def conv3d(x, kernel, strides=(1, 1, 1),
-           border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING,
+           border_mode='valid', dim_ordering='default',
           volume_shape=None, filter_shape=None):
    '''3D convolution.

@@ -1724,8 +1774,10 @@ def conv3d(x, kernel, strides=(1, 1, 1),
            Whether to use Theano or TensorFlow dimension ordering
            for inputs/kernels/ouputs.
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
    if dim_ordering not in {'th', 'tf'}:
-        raise Exception('Unknown dim_ordering ' + str(dim_ordering))
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))

    x = _preprocess_conv3d_input(x, dim_ordering)
    kernel = _preprocess_conv3d_kernel(kernel, dim_ordering)
@@ -1737,7 +1789,7 @@ def conv3d(x, kernel, strides=(1, 1, 1),


 def pool2d(x, pool_size, strides=(1, 1),
-           border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING,
+           border_mode='valid', dim_ordering='default',
           pool_mode='max'):
    '''2D Pooling.

@@ -1748,8 +1800,10 @@ def pool2d(x, pool_size, strides=(1, 1),
        dim_ordering: one of "th", "tf".
        pool_mode: one of "max", "avg".
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
    if dim_ordering not in {'th', 'tf'}:
-        raise Exception('Unknown dim_ordering ' + str(dim_ordering))
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))

    padding = _preprocess_border_mode(border_mode)
    strides = (1,) + strides + (1,)
@@ -1768,7 +1822,7 @@ def pool2d(x, pool_size, strides=(1, 1),


 def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
-           dim_ordering=_IMAGE_DIM_ORDERING, pool_mode='max'):
+           dim_ordering='default', pool_mode='max'):
    '''3D Pooling.

    # Arguments
@@ -1778,8 +1832,10 @@ def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
        dim_ordering: one of "th", "tf".
        pool_mode: one of "max", "avg".
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
    if dim_ordering not in {'th', 'tf'}:
-        raise Exception('Unknown dim_ordering ' + str(dim_ordering))
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))

    padding = _preprocess_border_mode(border_mode)
    strides = (1,) + strides + (1,)
@@ -1924,3 +1980,52 @@ def ctc_decode(y_pred, input_length, greedy=True, beam_width=100,
                     for st in decoded]

    return (decoded_dense, log_prob)
+
+
+# HIGH ORDER FUNCTIONS
+
+def map_fn(fn, elems, name=None):
+    '''Map the function fn over the elements elems and return the outputs.
+
+    # Arguments
+        fn: Callable that will be called upon each element in elems
+        elems: tensor
+        name: A string name for the map node in the graph
+
+    # Returns
+        Tensor with first dimension equal to the elems and second depending on
+        fn
+    '''
+    return tf.map_fn(fn, elems, name=name)
+
+
+def foldl(fn, elems, initializer=None, name=None):
+    '''Reduce elems using fn to combine them from left to right.
+
+    # Arguments
+        fn: Callable that will be called upon each element in elems and an
+            accumulator, for instance lambda acc, x: acc + x
+        elems: tensor
+        initializer: The first value used (elems[0] in case of None)
+        name: A string name for the foldl node in the graph
+
+    # Returns
+        Same type and shape as initializer
+    '''
+    return tf.foldl(fn, elems, initializer=initializer, name=name)
+
+
+def foldr(fn, elems, initializer=None, name=None):
+    '''Reduce elems using fn to combine them from right to left.
+
+    # Arguments
+        fn: Callable that will be called upon each element in elems and an
+            accumulator, for instance lambda acc, x: acc + x
+        elems: tensor
+        initializer: The first value used (elems[-1] in case of None)
+        name: A string name for the foldr node in the graph
+
+    # Returns
+        Same type and shape as initializer
+    '''
+    return tf.foldr(fn, elems, initializer=initializer, name=name)
@@ -14,7 +14,7 @@ except ImportError:
    from theano.sandbox.softsign import softsign as T_softsign
 import inspect
 import numpy as np
-from .common import _FLOATX, _EPSILON, _IMAGE_DIM_ORDERING
+from .common import _FLOATX, _EPSILON, image_dim_ordering
 py_all = all


@@ -35,6 +35,7 @@ def set_learning_phase(value):
                         '0 or 1.')
    _LEARNING_PHASE = value

+
 # VARIABLE MANIPULATION


@@ -88,7 +89,7 @@ def placeholder(shape=None, ndim=None, dtype=_FLOATX, sparse=False, name=None):


 def shape(x):
-    '''Return the shape of a tensor.
+    '''Returns the shape of a tensor.

    Warning: type returned will be different for
    Theano backend (Theano tensor type) and TF backend (TF TensorShape).
@@ -105,25 +106,25 @@ def dtype(x):


 def eval(x):
-    '''Run a graph.
+    '''Returns the value of a tensor.
    '''
    return to_dense(x).eval()


 def zeros(shape, dtype=_FLOATX, name=None):
-    '''Instantiate an all-zeros variable.
+    '''Instantiates an all-zeros variable.
    '''
    return variable(np.zeros(shape), dtype, name)


 def ones(shape, dtype=_FLOATX, name=None):
-    '''Instantiate an all-ones variable.
+    '''Instantiates an all-ones variable.
    '''
    return variable(np.ones(shape), dtype, name)


 def eye(size, dtype=_FLOATX, name=None):
-    '''Instantiate an identity matrix.
+    '''Instantiates an identity matrix.
    '''
    return variable(np.eye(size), dtype, name)

@@ -147,7 +148,7 @@ def random_normal_variable(shape, mean, scale, dtype=_FLOATX, name=None):


 def count_params(x):
-    '''Return number of scalars in a tensor.
+    '''Returns the number of scalars in a tensor.

    Return: numpy integer.
    '''
@@ -393,7 +394,7 @@ def cos(x):

 def normalize_batch_in_training(x, gamma, beta,
                                reduction_axes, epsilon=0.0001):
-    '''Compute mean and std for batch then apply batch_normalization on batch.
+    '''Computes mean and std for batch then apply batch_normalization on batch.
    '''
    dev = theano.config.device
    use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu'))
@@ -616,10 +617,15 @@ def asymmetric_temporal_padding(x, left_pad=1, right_pad=1):
    return T.set_subtensor(output[:, left_pad:x.shape[1] + left_pad, :], x)


-def spatial_2d_padding(x, padding=(1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
+def spatial_2d_padding(x, padding=(1, 1), dim_ordering='default'):
    '''Pad the 2nd and 3rd dimensions of a 4D tensor
    with "padding[0]" and "padding[1]" (resp.) zeros left and right.
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
+    if dim_ordering not in {'th', 'tf'}:
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
+
    input_shape = x.shape
    if dim_ordering == 'th':
        output_shape = (input_shape[0],
@@ -647,10 +653,18 @@ def spatial_2d_padding(x, padding=(1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
    return T.set_subtensor(output[indices], x)


-def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1, left_pad=1, right_pad=1, dim_ordering=_IMAGE_DIM_ORDERING):
+def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1,
+                                  left_pad=1, right_pad=1,
+                                  dim_ordering='default'):
    '''Pad the rows and columns of a 4D tensor
-    with "top_pad", "bottom_pad", "left_pad", "right_pad"  (resp.) zeros rows on top, bottom; cols on left, right.
+    with "top_pad", "bottom_pad", "left_pad", "right_pad" (resp.) zeros
+    rows on top, bottom; cols on left, right.
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
+    if dim_ordering not in {'th', 'tf'}:
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
+
    input_shape = x.shape
    if dim_ordering == 'th':
        output_shape = (input_shape[0],
@@ -679,10 +693,15 @@ def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1, left_pad=1, right_
    return T.set_subtensor(output[indices], x)


-def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
+def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering='default'):
    '''Pad the 2nd, 3rd and 4th dimensions of a 5D tensor
    with "padding[0]", "padding[1]" and "padding[2]" (resp.) zeros left and right.
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
+    if dim_ordering not in {'th', 'tf'}:
+        raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
+
    input_shape = x.shape
    if dim_ordering == 'th':
        output_shape = (input_shape[0],
@@ -1119,7 +1138,7 @@ def l2_normalize(x, axis):


 def in_top_k(predictions, targets, k):
-    '''Says whether the `targets` are in the top `k` `predictions`
+    '''Returns whether the `targets` are in the top `k` `predictions`

    # Arguments
        predictions: A tensor of shape batch_size x classess and type float32.
@@ -1182,6 +1201,8 @@ def _preprocess_border_mode(border_mode):
        th_border_mode = 'half'
    elif border_mode == 'valid':
        th_border_mode = 'valid'
+    elif border_mode == 'full':
+        th_border_mode = 'full'
    else:
        raise Exception('Border mode not supported: ' + str(border_mode))
    return th_border_mode
@@ -1275,8 +1296,20 @@ def _postprocess_conv3d_output(conv_out, x, border_mode, np_kernel, strides, dim
    return conv_out


+def conv1d(x, kernel, stride=1, border_mode='valid',
+           image_shape=None, filter_shape=None):
+    '''1D convolution.
+
+    # Arguments
+        kernel: kernel tensor.
+        strides: stride integer.
+        border_mode: string, "same" or "valid".
+    '''
+    raise NotImplementedError
+
+
 def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
-           dim_ordering=_IMAGE_DIM_ORDERING, image_shape=None,
+           dim_ordering='default', image_shape=None,
           filter_shape=None, filter_dilation=(1, 1)):
    '''2D convolution.

@@ -1288,6 +1321,8 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
            Whether to use Theano or TensorFlow dimension ordering
        in inputs/kernels/ouputs.
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
    if dim_ordering not in {'th', 'tf'}:
        raise Exception('Unknown dim_ordering ' + str(dim_ordering))

@@ -1320,7 +1355,7 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',

 def deconv2d(x, kernel, output_shape, strides=(1, 1),
             border_mode='valid',
-             dim_ordering=_IMAGE_DIM_ORDERING,
+             dim_ordering='default',
             image_shape=None, filter_shape=None):
    '''2D deconvolution (transposed convolution).

@@ -1334,6 +1369,8 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
        in inputs/kernels/ouputs.
    '''
    flip_filters = False
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
    if dim_ordering not in {'th', 'tf'}:
        raise Exception('Unknown dim_ordering ' + str(dim_ordering))

@@ -1358,18 +1395,18 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),

 def atrous_conv2d(x, kernel, rate=1,
                  border_mode='valid',
-                  dim_ordering=_IMAGE_DIM_ORDERING,
+                  dim_ordering='default',
                  image_shape=None, filter_shape=None):
    raise NotImplementedError


 def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1),
-                     border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING):
+                     border_mode='valid', dim_ordering='default'):
    raise NotImplementedError


 def conv3d(x, kernel, strides=(1, 1, 1),
-           border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING,
+           border_mode='valid', dim_ordering='default',
           volume_shape=None, filter_shape=None,
           filter_dilation=(1, 1, 1)):
    '''3D convolution.
@@ -1382,6 +1419,8 @@ def conv3d(x, kernel, strides=(1, 1, 1),
            Whether to use Theano or TensorFlow dimension ordering
        in inputs/kernels/ouputs.
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
    if dim_ordering not in {'th', 'tf'}:
        raise Exception('Unknown dim_ordering ' + str(dim_ordering))

@@ -1415,12 +1454,14 @@ def conv3d(x, kernel, strides=(1, 1, 1),

 # TODO: remove this function when theano without AbstractConv3d is deprecated
 def _old_theano_conv3d(x, kernel, strides=(1, 1, 1),
-                       border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING,
+                       border_mode='valid', dim_ordering='default',
                       volume_shape=None, filter_shape=None):
    '''
    Run on cuDNN if available.
    border_mode: string, "same" or "valid".
    '''
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
    if dim_ordering not in {'th', 'tf'}:
        raise Exception('Unknown dim_ordering ' + str(dim_ordering))

@@ -1477,7 +1518,12 @@ def _old_theano_conv3d(x, kernel, strides=(1, 1, 1),


 def pool2d(x, pool_size, strides=(1, 1), border_mode='valid',
-           dim_ordering=_IMAGE_DIM_ORDERING, pool_mode='max'):
+           dim_ordering='default', pool_mode='max'):
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
+    if dim_ordering not in {'th', 'tf'}:
+        raise Exception('Unknown dim_ordering ' + str(dim_ordering))
+
    if border_mode == 'same':
        w_pad = pool_size[0] - 2 if pool_size[0] % 2 == 1 else pool_size[0] - 1
        h_pad = pool_size[1] - 2 if pool_size[1] % 2 == 1 else pool_size[1] - 1
@@ -1494,15 +1540,33 @@ def pool2d(x, pool_size, strides=(1, 1), border_mode='valid',
        x = x.dimshuffle((0, 3, 1, 2))

    if pool_mode == 'max':
-        pool_out = pool.pool_2d(x, ds=pool_size, st=strides,
-                                ignore_border=True,
-                                padding=padding,
-                                mode='max')
+        # TODO remove the old call once Theano older than 0.9.0dev4 is deprecated
+        try:
+            # new interface (introduced in 0.9.0dev4)
+            pool_out = pool.pool_2d(x, ws=pool_size, stride=strides,
+                                    ignore_border=True,
+                                    pad=padding,
+                                    mode='max')
+        except TypeError:
+            # old interface
+            pool_out = pool.pool_2d(x, ds=pool_size, st=strides,
+                                    ignore_border=True,
+                                    padding=padding,
+                                    mode='max')
    elif pool_mode == 'avg':
-        pool_out = pool.pool_2d(x, ds=pool_size, st=strides,
-                                ignore_border=True,
-                                padding=padding,
-                                mode='average_exc_pad')
+        # TODO remove the old call once Theano older than 0.9.0dev4 is deprecated
+        try:
+            # new interface (introduced in 0.9.0dev4)
+            pool_out = pool.pool_2d(x, ws=pool_size, stride=strides,
+                                    ignore_border=True,
+                                    pad=padding,
+                                    mode='average_exc_pad')
+        except TypeError:
+            # old interface
+            pool_out = pool.pool_2d(x, ds=pool_size, st=strides,
+                                    ignore_border=True,
+                                    padding=padding,
+                                    mode='average_exc_pad')
    else:
        raise Exception('Invalid pooling mode: ' + str(pool_mode))

@@ -1520,7 +1584,12 @@ def pool2d(x, pool_size, strides=(1, 1), border_mode='valid',


 def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
-           dim_ordering=_IMAGE_DIM_ORDERING, pool_mode='max'):
+           dim_ordering='default', pool_mode='max'):
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
+    if dim_ordering not in {'th', 'tf'}:
+        raise Exception('Unknown dim_ordering ' + str(dim_ordering))
+
    # TODO: remove this if statement when Theano without pool_3d is deprecated
    #       (pool_3d was introduced after 0.9.0dev3)
    if not hasattr(T.signal.pool, 'pool_3d'):
@@ -1544,15 +1613,33 @@ def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
        x = x.dimshuffle((0, 4, 1, 2, 3))

    if pool_mode == 'max':
-        pool_out = pool.pool_3d(x, ds=pool_size, st=strides,
-                                ignore_border=True,
-                                padding=padding,
-                                mode='max')
+        # TODO remove the old call once Theano older than 0.9.0dev4 is deprecated
+        try:
+            # new interface (introduced in 0.9.0dev4)
+            pool_out = pool.pool_3d(x, ws=pool_size, stride=strides,
+                                    ignore_border=True,
+                                    pad=padding,
+                                    mode='max')
+        except TypeError:
+            # old interface
+            pool_out = pool.pool_3d(x, ds=pool_size, st=strides,
+                                    ignore_border=True,
+                                    padding=padding,
+                                    mode='max')
    elif pool_mode == 'avg':
-        pool_out = pool.pool_3d(x, ds=pool_size, st=strides,
-                                ignore_border=True,
-                                padding=padding,
-                                mode='average_exc_pad')
+        # TODO remove the old call once Theano older than 0.9.0dev4 is deprecated
+        try:
+            # new interface (introduced in 0.9.0dev4)
+            pool_out = pool.pool_3d(x, ws=pool_size, stride=strides,
+                                    ignore_border=True,
+                                    pad=padding,
+                                    mode='average_exc_pad')
+        except TypeError:
+            # old interface
+            pool_out = pool.pool_3d(x, ds=pool_size, st=strides,
+                                    ignore_border=True,
+                                    padding=padding,
+                                    mode='average_exc_pad')
    else:
        raise Exception('Invalid pooling mode: ' + str(pool_mode))

@@ -1574,7 +1661,12 @@ def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
 # TODO: remove this function when Theano without pool_3d is deprecated
 #       (pool_3d was introduced after 0.9.0dev3)
 def _old_theano_pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
-                       dim_ordering=_IMAGE_DIM_ORDERING, pool_mode='max'):
+                       dim_ordering='default', pool_mode='max'):
+    if dim_ordering == 'default':
+        dim_ordering = image_dim_ordering()
+    if dim_ordering not in {'th', 'tf'}:
+        raise Exception('Unknown dim_ordering ' + str(dim_ordering))
+
    if border_mode == 'same':
        # TODO: add implementation for border_mode="same"
        raise Exception('border_mode="same" not supported with Theano.')
@@ -1665,11 +1757,13 @@ def ctc_interleave_blanks(Y):
    Y_ = T.set_subtensor(Y_[T.arange(Y.shape[0]) * 2 + 1], Y)
    return Y_

+
 def ctc_create_skip_idxs(Y):
    skip_idxs = T.arange((Y.shape[0] - 3) // 2) * 2 + 1
    non_repeats = T.neq(Y[skip_idxs], Y[skip_idxs + 2])
    return skip_idxs[non_repeats.nonzero()]

+
 def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
@@ -1695,11 +1789,11 @@ def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    )
    return active_next, log_p_next

+
 def ctc_path_probs(predict, Y, alpha=1e-4):
    smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
    L = T.log(smoothed_predict)
    zeros = T.zeros_like(L[0])
-    base = T.set_subtensor(zeros[:1], np.float32(1))
    log_first = zeros

    f_skip_idxs = ctc_create_skip_idxs(Y)
@@ -1718,12 +1812,14 @@ def ctc_path_probs(predict, Y, alpha=1e-4):
    log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
    return log_probs, mask

+
 def ctc_cost(predict, Y):
    log_probs, mask = ctc_path_probs(predict, ctc_interleave_blanks(Y))
    common_factor = T.max(log_probs)
    total_log_prob = T.log(T.sum(T.exp(log_probs - common_factor)[mask.nonzero()])) + common_factor
    return -total_log_prob

+
 # batchifies original CTC code
 def ctc_batch_cost(y_true, y_pred, input_length, label_length):
    '''Runs CTC loss algorithm on each batch element.
@@ -1748,10 +1844,75 @@ def ctc_batch_cost(y_true, y_pred, input_length, label_length):
        return ctc_cost(y_pred_step, y_true_step)

    ret, _ = theano.scan(
-        fn = ctc_step,
+        fn=ctc_step,
        outputs_info=None,
        sequences=[y_true, y_pred, input_length, label_length]
    )

    ret = ret.dimshuffle('x', 0)
    return ret
+
+
+# HIGH ORDER FUNCTIONS
+
+def map_fn(fn, elems, name=None):
+    '''Map the function fn over the elements elems and return the outputs.
+
+    # Arguments
+        fn: Callable that will be called upon each element in elems
+        elems: tensor, at least 2 dimensional
+        name: A string name for the map node in the graph
+
+    # Returns
+        Tensor with first dimension equal to the elems and second depending on
+        fn
+    '''
+    return theano.map(fn, elems, name=name)[0]
+
+
+def foldl(fn, elems, initializer=None, name=None):
+    '''Reduce elems using fn to combine them from left to right.
+
+    # Arguments
+        fn: Callable that will be called upon each element in elems and an
+            accumulator, for instance lambda acc, x: acc + x
+        elems: tensor
+        initializer: The first value used (elems[0] in case of None)
+        name: A string name for the foldl node in the graph
+
+    # Returns
+        Same type and shape as initializer
+    '''
+    if initializer is None:
+        initializer = elems[0]
+        elems = elems[1:]
+
+    # We need to change the order of the arguments because theano accepts x as
+    # first parameter and accumulator as second
+    fn2 = lambda x, acc: fn(acc, x)
+
+    return theano.foldl(fn2, elems, initializer, name=name)[0]
+
+
+def foldr(fn, elems, initializer=None, name=None):
+    '''Reduce elems using fn to combine them from right to left.
+
+    # Arguments
+        fn: Callable that will be called upon each element in elems and an
+            accumulator, for instance lambda acc, x: acc + x
+        elems: tensor
+        initializer: The first value used (elems[-1] in case of None)
+        name: A string name for the foldr node in the graph
+
+    # Returns
+        Same type and shape as initializer
+    '''
+    if initializer is None:
+        initializer = elems[-1]
+        elems = elems[:-1]
+
+    # We need to change the order of the arguments because theano accepts x as
+    # first parameter and accumulator as second
+    fn2 = lambda x, acc: fn(acc, x)
+
+    return theano.foldr(fn2, elems, initializer, name=name)[0]
@@ -1,6 +1,7 @@
 from __future__ import absolute_import
 from __future__ import print_function

+import os
 import csv

 import numpy as np
@@ -337,6 +338,7 @@ class EarlyStopping(Callback):
        self.verbose = verbose
        self.min_delta = min_delta
        self.wait = 0
+        self.stopped_epoch = 0

        if mode not in ['auto', 'min', 'max']:
            warnings.warn('EarlyStopping mode %s is unknown, '
@@ -374,11 +376,14 @@ class EarlyStopping(Callback):
            self.wait = 0
        else:
            if self.wait >= self.patience:
-                if self.verbose > 0:
-                    print('Epoch %05d: early stopping' % (epoch))
+                self.stopped_epoch = epoch
                self.model.stop_training = True
            self.wait += 1

+    def on_train_end(self, logs={}):
+        if self.stopped_epoch > 0 and self.verbose > 0:
+            print('Epoch %05d: early stopping' % (self.stopped_epoch))
+

 class RemoteMonitor(Callback):
    '''Callback used to stream events to a server.
@@ -432,7 +437,11 @@ class LearningRateScheduler(Callback):
        assert hasattr(self.model.optimizer, 'lr'), \
            'Optimizer must have a "lr" attribute.'
        lr = self.schedule(epoch)
-        assert type(lr) == float, 'The output of the "schedule" function should be float.'
+
+        if not isinstance(lr, (float, np.float32, np.float64)):
+            raise ValueError('The output of the "schedule" function '
+                             'should be float.')
+
        K.set_value(self.model.optimizer.lr, lr)


@@ -675,10 +684,14 @@ class CSVLogger(Callback):
        self.append = append
        self.writer = None
        self.keys = None
+        self.append_header = True
        super(CSVLogger, self).__init__()

    def on_train_begin(self, logs={}):
        if self.append:
+            if os.path.exists(self.filename):
+                with open(self.filename) as f:
+                    self.append_header = len(f.readline()) == 0
            self.csv_file = open(self.filename, 'a')
        else:
            self.csv_file = open(self.filename, 'w')
@@ -694,7 +707,8 @@ class CSVLogger(Callback):
        if not self.writer:
            self.keys = sorted(logs.keys())
            self.writer = csv.DictWriter(self.csv_file, fieldnames=['epoch'] + self.keys)
-            self.writer.writeheader()
+            if self.append_header:
+                self.writer.writeheader()

        row_dict = OrderedDict({'epoch': epoch})
        row_dict.update((key, handle_value(logs[key])) for key in self.keys)
@@ -11,9 +11,10 @@ def load_batch(fpath, label_key='labels'):
    else:
        d = cPickle.load(f, encoding="bytes")
        # decode utf8
+        d_decoded = {}
        for k, v in d.items():
-            del(d[k])
-            d[k.decode("utf8")] = v
+            d_decoded[k.decode("utf8")] = v
+        d = d_decoded
    f.close()
    data = d["data"]
    labels = d[label_key]
@@ -186,13 +186,12 @@ def check_array_lengths(X, Y, W):


 def check_loss_and_target_compatibility(targets, losses, output_shapes):
-    assert len(targets) == len(losses) == len(output_shapes)
    key_losses = {'mean_square_error',
                  'binary_crossentropy',
                  'categorical_crossentropy'}
    for y, loss, shape in zip(targets, losses, output_shapes):
        if loss.__name__ == 'categorical_crossentropy':
-            if y.shape[1] == 1:
+            if y.shape[-1] == 1:
                raise Exception('You are passing a target array of shape ' + str(y.shape) +
                                ' while using as loss `categorical_crossentropy`. '
                                '`categorical_crossentropy` expects '
@@ -208,13 +207,15 @@ def check_loss_and_target_compatibility(targets, losses, output_shapes):
                                'Alternatively, you can use the loss function '
                                '`sparse_categorical_crossentropy` instead, '
                                'which does expect integer targets.')
-        if loss.__name__ in key_losses and shape[1] is not None and y.shape[1] != shape[1]:
-            raise Exception('A target array with shape ' + str(y.shape) +
-                            ' was passed for an output of shape ' + str(shape) +
-                            ' while using as loss `' + loss.__name__ + '`. '
-                            'This loss expects '
-                            'targets to have the same shape '
-                            'as the output.')
+        if loss.__name__ in key_losses:
+            for target_dim, out_dim in zip(y.shape[1:], shape[1:]):
+                if out_dim is not None and target_dim != out_dim:
+                    raise Exception('A target array with shape ' + str(y.shape) +
+                                    ' was passed for an output of shape ' + str(shape) +
+                                    ' while using as loss `' + loss.__name__ + '`. '
+                                    'This loss expects '
+                                    'targets to have the same shape '
+                                    'as the output.')


 def collect_metrics(metrics, output_names):
@@ -237,36 +238,6 @@ def collect_metrics(metrics, output_names):
                        str(metrics))


-def collect_trainable_weights(layer):
-    '''Collects all `trainable_weights` attributes,
-    excluding any sublayers where `trainable` is set the `False`.
-    '''
-    trainable = getattr(layer, 'trainable', True)
-    if not trainable:
-        return []
-    weights = []
-    if layer.__class__.__name__ == 'Sequential':
-        for sublayer in layer.flattened_layers:
-            weights += collect_trainable_weights(sublayer)
-    elif layer.__class__.__name__ == 'Model':
-        for sublayer in layer.layers:
-            weights += collect_trainable_weights(sublayer)
-    elif layer.__class__.__name__ == 'Graph':
-        for sublayer in layer._graph_nodes.values():
-            weights += collect_trainable_weights(sublayer)
-    else:
-        weights += layer.trainable_weights
-    # dedupe weights
-    weights = list(set(weights))
-    # TF variables have auto-generated the name, while Theano has auto-generated the auto_name variable. name in Theano is None
-    if weights:
-        if K.backend() == 'theano':
-            weights.sort(key=lambda x: x.auto_name)
-        else:
-            weights.sort(key=lambda x: x.name)
-    return weights
-
-
 def batch_shuffle(index_array, batch_size):
    '''This shuffles an array in a batch-wise fashion.
    Useful for shuffling HDF5 arrays
@@ -610,7 +581,10 @@ class Model(Container):
        for i in range(len(self.outputs)):
            shape = self.internal_output_shapes[i]
            name = self.output_names[i]
-            self.targets.append(K.placeholder(ndim=len(shape), name=name + '_target'))
+            self.targets.append(K.placeholder(ndim=len(shape),
+                                name=name + '_target',
+                                sparse=K.is_sparse(self.outputs[i]),
+                                dtype=K.dtype(self.outputs[i])))

        # prepare metrics
        self.metrics = metrics
@@ -698,7 +672,15 @@ class Model(Container):
        self.test_function = None
        self.predict_function = None

-        self._collected_trainable_weights = collect_trainable_weights(self)
+        # collected trainable weights and sort them deterministically.
+        trainable_weights = self.trainable_weights
+        # Sort weights by name
+        if trainable_weights:
+            if K.backend() == 'theano':
+                trainable_weights.sort(key=lambda x: x.name if x.name else x.auto_name)
+            else:
+                trainable_weights.sort(key=lambda x: x.name)
+        self._collected_trainable_weights = trainable_weights

    def _make_train_function(self):
        if not hasattr(self, 'train_function'):
@@ -754,7 +736,7 @@ class Model(Container):
    def _fit_loop(self, f, ins, out_labels=[], batch_size=32,
                  nb_epoch=100, verbose=1, callbacks=[],
                  val_f=None, val_ins=None, shuffle=True,
-                  callback_metrics=[]):
+                  callback_metrics=[], initial_epoch=0):
        '''Abstract fit function for f(ins).
        Assume that f returns a list, labeled by out_labels.

@@ -774,6 +756,8 @@ class Model(Container):
                passed to the callbacks. They should be the
                concatenation of list the display names of the outputs of
                 `f` and the list of display names of the outputs of `f_val`.
+            initial_epoch: epoch at which to start training
+                (useful for resuming a previous training run)

        # Returns
            `History` object.
@@ -814,7 +798,7 @@ class Model(Container):
        callback_model.stop_training = False
        self.validation_data = val_ins

-        for epoch in range(nb_epoch):
+        for epoch in range(initial_epoch, nb_epoch):
            callbacks.on_epoch_begin(epoch)
            if shuffle == 'batch':
                index_array = batch_shuffle(index_array, batch_size)
@@ -1001,7 +985,7 @@ class Model(Container):

    def fit(self, x, y, batch_size=32, nb_epoch=10, verbose=1, callbacks=[],
            validation_split=0., validation_data=None, shuffle=True,
-            class_weight=None, sample_weight=None):
+            class_weight=None, sample_weight=None, initial_epoch=0):
        '''Trains the model for a fixed number of epochs (iterations on a dataset).

        # Arguments
@@ -1038,6 +1022,8 @@ class Model(Container):
                with shape (samples, sequence_length),
                to apply a different weight to every timestep of every sample.
                In this case you should make sure to specify sample_weight_mode="temporal" in compile().
+            initial_epoch: epoch at which to start training
+                (useful for resuming a previous training run)


        # Returns
@@ -1121,7 +1107,8 @@ class Model(Container):
                              batch_size=batch_size, nb_epoch=nb_epoch,
                              verbose=verbose, callbacks=callbacks,
                              val_f=val_f, val_ins=val_ins, shuffle=shuffle,
-                              callback_metrics=callback_metrics)
+                              callback_metrics=callback_metrics,
+                              initial_epoch=initial_epoch)

    def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None):
        '''Returns the loss value and metrics values for the model
@@ -1297,7 +1284,8 @@ class Model(Container):
    def fit_generator(self, generator, samples_per_epoch, nb_epoch,
                      verbose=1, callbacks=[],
                      validation_data=None, nb_val_samples=None,
-                      class_weight={}, max_q_size=10, nb_worker=1, pickle_safe=False):
+                      class_weight={}, max_q_size=10, nb_worker=1, pickle_safe=False,
+                      initial_epoch=0):
        '''Fits the model on data generated batch-by-batch by
        a Python generator.
        The generator is run in parallel to the model, for efficiency.
@@ -1333,6 +1321,8 @@ class Model(Container):
                this implementation relies on multiprocessing, you should not pass
                non picklable arguments to the generator as they can't be passed
                easily to children processes.
+            initial_epoch: epoch at which to start training
+                (useful for resuming a previous training run)

        # Returns
            A `History` object.
@@ -1355,7 +1345,7 @@ class Model(Container):
        ```
        '''
        wait_time = 0.01  # in seconds
-        epoch = 0
+        epoch = initial_epoch

        do_validation = bool(validation_data)
        self._make_train_function()
@@ -1,6 +1,7 @@
 from __future__ import absolute_import
 import numpy as np
 from . import backend as K
+from .utils.generic_utils import get_from_module


 def get_fans(shape, dim_ordering='th'):
@@ -20,7 +21,7 @@ def get_fans(shape, dim_ordering='th'):
            fan_in = shape[-2] * receptive_field_size
            fan_out = shape[-1] * receptive_field_size
        else:
-            raise Exception('Invalid dim_ordering: ' + dim_ordering)
+            raise ValueError('Invalid dim_ordering: ' + dim_ordering)
    else:
        # no specific assumptions
        fan_in = np.sqrt(np.prod(shape))
@@ -101,7 +102,6 @@ def one(shape, name=None):
    return K.ones(shape, name=name)


-from .utils.generic_utils import get_from_module
 def get(identifier, **kwargs):
    return get_from_module(identifier, globals(),
                           'initialization', kwargs=kwargs)
@@ -10,3 +10,4 @@ from .embeddings import *
 from .noise import *
 from .advanced_activations import *
 from .wrappers import *
+from .convolutional_recurrent import *
@@ -47,7 +47,7 @@ class Convolution1D(Layer):
            If you don't specify anything, no activation is applied
            (ie. "linear" activation: a(x) = x).
        weights: list of numpy arrays to set as initial weights.
-        border_mode: 'valid' or 'same'.
+        border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
        subsample_length: factor by which to subsample output.
        W_regularizer: instance of [WeightRegularizer](../regularizers.md)
            (eg. L1 or L2 regularization), applied to the main weights matrix.
@@ -77,19 +77,18 @@ class Convolution1D(Layer):
        `steps` value might have changed due to padding.
    '''
    def __init__(self, nb_filter, filter_length,
-                 init='uniform', activation='linear', weights=None,
+                 init='glorot_uniform', activation=None, weights=None,
                 border_mode='valid', subsample_length=1,
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, input_dim=None, input_length=None, **kwargs):

-        if border_mode not in {'valid', 'same'}:
+        if border_mode not in {'valid', 'same', 'full'}:
            raise Exception('Invalid border mode for Convolution1D:', border_mode)
        self.nb_filter = nb_filter
        self.filter_length = filter_length
        self.init = initializations.get(init, dim_ordering='th')
        self.activation = activations.get(activation)
-        assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
        self.border_mode = border_mode
        self.subsample_length = subsample_length

@@ -143,6 +142,7 @@ class Convolution1D(Layer):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def get_output_shape_for(self, input_shape):
        length = conv_output_length(input_shape[1],
@@ -218,7 +218,7 @@ class AtrousConvolution1D(Convolution1D):
            If you don't specify anything, no activation is applied
            (ie. "linear" activation: a(x) = x).
        weights: list of numpy arrays to set as initial weights.
-        border_mode: 'valid' or 'same'.
+        border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
        subsample_length: factor by which to subsample output.
        atrous_rate: Factor for kernel dilation. Also called filter_dilation
            elsewhere.
@@ -250,13 +250,13 @@ class AtrousConvolution1D(Convolution1D):
        `steps` value might have changed due to padding.
    '''
    def __init__(self, nb_filter, filter_length,
-                 init='uniform', activation='linear', weights=None,
+                 init='glorot_uniform', activation=None, weights=None,
                 border_mode='valid', subsample_length=1, atrous_rate=1,
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):

-        if border_mode not in {'valid', 'same'}:
+        if border_mode not in {'valid', 'same', 'full'}:
            raise Exception('Invalid border mode for AtrousConv1D:', border_mode)

        self.atrous_rate = int(atrous_rate)
@@ -331,7 +331,7 @@ class Convolution2D(Layer):
            If you don't specify anything, no activation is applied
            (ie. "linear" activation: a(x) = x).
        weights: list of numpy arrays to set as initial weights.
-        border_mode: 'valid' or 'same'.
+        border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
        subsample: tuple of length 2. Factor by which to subsample output.
            Also called strides elsewhere.
        W_regularizer: instance of [WeightRegularizer](../regularizers.md)
@@ -366,21 +366,20 @@ class Convolution2D(Layer):
        `rows` and `cols` values might have changed due to padding.
    '''
    def __init__(self, nb_filter, nb_row, nb_col,
-                 init='glorot_uniform', activation='linear', weights=None,
+                 init='glorot_uniform', activation=None, weights=None,
                 border_mode='valid', subsample=(1, 1), dim_ordering='default',
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        if dim_ordering == 'default':
            dim_ordering = K.image_dim_ordering()
-        if border_mode not in {'valid', 'same'}:
+        if border_mode not in {'valid', 'same', 'full'}:
            raise Exception('Invalid border mode for Convolution2D:', border_mode)
        self.nb_filter = nb_filter
        self.nb_row = nb_row
        self.nb_col = nb_col
        self.init = initializations.get(init, dim_ordering=dim_ordering)
        self.activation = activations.get(activation)
-        assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
        self.border_mode = border_mode
        self.subsample = tuple(subsample)
        assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
@@ -436,6 +435,7 @@ class Convolution2D(Layer):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def get_output_shape_for(self, input_shape):
        if self.dim_ordering == 'th':
@@ -570,7 +570,7 @@ class Deconvolution2D(Convolution2D):
            If you don't specify anything, no activation is applied
            (ie. "linear" activation: a(x) = x).
        weights: list of numpy arrays to set as initial weights.
-        border_mode: 'valid' or 'same'.
+        border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
        subsample: tuple of length 2. Factor by which to oversample output.
            Also called strides elsewhere.
        W_regularizer: instance of [WeightRegularizer](../regularizers.md)
@@ -609,7 +609,7 @@ class Deconvolution2D(Convolution2D):
        [3] [Deconvolutional Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
    '''
    def __init__(self, nb_filter, nb_row, nb_col, output_shape,
-                 init='glorot_uniform', activation='linear', weights=None,
+                 init='glorot_uniform', activation=None, weights=None,
                 border_mode='valid', subsample=(1, 1),
                 dim_ordering='default',
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
@@ -617,7 +617,7 @@ class Deconvolution2D(Convolution2D):
                 bias=True, **kwargs):
        if dim_ordering == 'default':
            dim_ordering = K.image_dim_ordering()
-        if border_mode not in {'valid', 'same'}:
+        if border_mode not in {'valid', 'same', 'full'}:
            raise Exception('Invalid border mode for Deconvolution2D:', border_mode)

        self.output_shape_ = output_shape
@@ -665,7 +665,7 @@ class Deconvolution2D(Convolution2D):
        return output

    def get_config(self):
-        config = {'output_shape': self.output_shape}
+        config = {'output_shape': self.output_shape_}
        base_config = super(Deconvolution2D, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

@@ -703,7 +703,7 @@ class AtrousConvolution2D(Convolution2D):
            If you don't specify anything, no activation is applied
            (ie. "linear" activation: a(x) = x).
        weights: list of numpy arrays to set as initial weights.
-        border_mode: 'valid' or 'same'.
+        border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
        subsample: tuple of length 2. Factor by which to subsample output.
            Also called strides elsewhere.
        atrous_rate: tuple of length 2. Factor for kernel dilation.
@@ -742,7 +742,7 @@ class AtrousConvolution2D(Convolution2D):
        - [Multi-Scale Context Aggregation by Dilated Convolutions](https://arxiv.org/abs/1511.07122)
    '''
    def __init__(self, nb_filter, nb_row, nb_col,
-                 init='glorot_uniform', activation='linear', weights=None,
+                 init='glorot_uniform', activation=None, weights=None,
                 border_mode='valid', subsample=(1, 1),
                 atrous_rate=(1, 1), dim_ordering='default',
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
@@ -751,7 +751,7 @@ class AtrousConvolution2D(Convolution2D):
        if dim_ordering == 'default':
            dim_ordering = K.image_dim_ordering()

-        if border_mode not in {'valid', 'same'}:
+        if border_mode not in {'valid', 'same', 'full'}:
            raise Exception('Invalid border mode for AtrousConv2D:', border_mode)

        self.atrous_rate = tuple(atrous_rate)
@@ -889,7 +889,7 @@ class SeparableConvolution2D(Layer):
        `rows` and `cols` values might have changed due to padding.
    '''
    def __init__(self, nb_filter, nb_row, nb_col,
-                 init='glorot_uniform', activation='linear', weights=None,
+                 init='glorot_uniform', activation=None, weights=None,
                 border_mode='valid', subsample=(1, 1),
                 depth_multiplier=1, dim_ordering='default',
                 depthwise_regularizer=None, pointwise_regularizer=None,
@@ -984,6 +984,7 @@ class SeparableConvolution2D(Layer):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def get_output_shape_for(self, input_shape):
        if self.dim_ordering == 'th':
@@ -1068,7 +1069,7 @@ class Convolution3D(Layer):
            If you don't specify anything, no activation is applied
            (ie. "linear" activation: a(x) = x).
        weights: list of Numpy arrays to set as initial weights.
-        border_mode: 'valid' or 'same'.
+        border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
        subsample: tuple of length 3. Factor by which to subsample output.
            Also called strides elsewhere.
            Note: 'subsample' is implemented by slicing the output of conv3d with strides=(1,1,1).
@@ -1104,7 +1105,7 @@ class Convolution3D(Layer):
    '''

    def __init__(self, nb_filter, kernel_dim1, kernel_dim2, kernel_dim3,
-                 init='glorot_uniform', activation='linear', weights=None,
+                 init='glorot_uniform', activation=None, weights=None,
                 border_mode='valid', subsample=(1, 1, 1), dim_ordering='default',
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                 W_constraint=None, b_constraint=None,
@@ -1112,7 +1113,7 @@ class Convolution3D(Layer):
        if dim_ordering == 'default':
            dim_ordering = K.image_dim_ordering()

-        if border_mode not in {'valid', 'same'}:
+        if border_mode not in {'valid', 'same', 'full'}:
            raise Exception('Invalid border mode for Convolution3D:', border_mode)
        self.nb_filter = nb_filter
        self.kernel_dim1 = kernel_dim1
@@ -1120,7 +1121,6 @@ class Convolution3D(Layer):
        self.kernel_dim3 = kernel_dim3
        self.init = initializations.get(init, dim_ordering=dim_ordering)
        self.activation = activations.get(activation)
-        assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
        self.border_mode = border_mode
        self.subsample = tuple(subsample)
        assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
@@ -1182,6 +1182,7 @@ class Convolution3D(Layer):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def get_output_shape_for(self, input_shape):
        if self.dim_ordering == 'th':
@@ -1667,6 +1668,7 @@ class Cropping1D(Layer):

    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
+        self.built = True

    def get_output_shape_for(self, input_shape):
        length = input_shape[1] - self.cropping[0] - self.cropping[1] if input_shape[1] is not None else None
@@ -1683,6 +1685,7 @@ class Cropping1D(Layer):
        base_config = super(Cropping1D, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

+
 class Cropping2D(Layer):
    '''Cropping layer for 2D input (e.g. picture).
    It crops along spatial dimensions, i.e. width and height.
@@ -1735,6 +1738,7 @@ class Cropping2D(Layer):

    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
+        self.built = True

    def get_output_shape_for(self, input_shape):
        if self.dim_ordering == 'th':
@@ -1768,6 +1772,7 @@ class Cropping2D(Layer):
        base_config = super(Cropping2D, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

+
 class Cropping3D(Layer):
    '''Cropping layer for 3D data (e.g. spatial or saptio-temporal).

@@ -1807,6 +1812,7 @@ class Cropping3D(Layer):

    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
+        self.built = True

    def get_output_shape_for(self, input_shape):
        if self.dim_ordering == 'th':
@@ -0,0 +1,516 @@
+from .. import backend as K
+from .. import activations, initializations, regularizers
+
+import numpy as np
+from ..engine import Layer, InputSpec
+from ..utils.np_utils import conv_output_length
+import warnings
+
+
+class ConvRecurrent2D(Layer):
+    '''Abstract base class for convolutional recurrent layers.
+    Do not use in a model -- it's not a functional layer!
+
+    ConvLSTM2D
+    follow the specifications of this class and accept
+    the keyword arguments listed below.
+
+    # Input shape
+        5D tensor with shape `(nb_samples, timesteps, channels, rows, cols)`.
+
+    # Output shape
+        - if `return_sequences`: 5D tensor with shape
+            `(nb_samples, timesteps, channels, rows, cols)`.
+        - else, 4D tensor with shape `(nb_samples, channels, rows, cols)`.
+
+    # Arguments
+        weights: list of numpy arrays to set as initial weights.
+            The list should have 3 elements, of shapes:
+            `[(input_dim, nb_filter), (nb_filter, nb_filter), (nb_filter,)]`.
+        return_sequences: Boolean. Whether to return the last output
+            in the output sequence, or the full sequence.
+        go_backwards: Boolean (default False).
+            If True, rocess the input sequence backwards.
+        stateful: Boolean (default False). If True, the last state
+            for each sample at index i in a batch will be used as initial
+            state for the sample of index i in the following batch.
+        nb_filter: Number of convolution filters to use.
+        nb_row: Number of rows in the convolution kernel.
+        nb_col: Number of columns in the convolution kernel.
+            is required when using this layer as the first layer in a model.
+        input_shape: input_shape
+
+    # Masking
+        This layer supports masking for input data with a variable number
+        of timesteps. To introduce masks to your data,
+        use an [Embedding](embeddings.md) layer with the `mask_zero` parameter
+        set to `True`.
+        **Note:** for the time being, masking is only supported with Theano.
+
+    # TensorFlow warning
+        For the time being, when using the TensorFlow backend,
+        the number of timesteps used must be specified in your model.
+        Make sure to pass an `input_length` int argument to your
+        recurrent layer (if it comes first in your model),
+        or to pass a complete `input_shape` argument to the first layer
+        in your model otherwise.
+
+
+    # Note on using statefulness in RNNs
+        You can set RNN layers to be 'stateful', which means that the states
+        computed for the samples in one batch will be reused as initial states
+        for the samples in the next batch.
+        This assumes a one-to-one mapping between
+        samples in different successive batches.
+
+        To enable statefulness:
+            - specify `stateful=True` in the layer constructor.
+            - specify a fixed batch size for your model, by passing
+                a `batch_input_size=(...)` to the first layer in your model.
+                This is the expected shape of your inputs *including the batch
+                size*.
+                It should be a tuple of integers, e.g. `(32, 10, 100)`.
+
+        To reset the states of your model, call `.reset_states()` on either
+        a specific layer, or on your entire model.
+    '''
+
+    def __init__(self, weights=None, nb_row=None, nb_col=None, nb_filter=None,
+                 return_sequences=False, go_backwards=False, stateful=False,
+                 dim_ordering=None, **kwargs):
+        self.return_sequences = return_sequences
+        self.go_backwards = go_backwards
+        self.stateful = stateful
+        self.initial_weights = weights
+        self.nb_row = nb_row
+        self.nb_col = nb_col
+        self.nb_filter = nb_filter
+        self.dim_ordering = dim_ordering
+        self.input_spec = [InputSpec(ndim=5)]
+
+        super(ConvRecurrent2D, self).__init__(**kwargs)
+
+    def compute_mask(self, input, mask):
+        if self.return_sequences:
+            return mask
+        else:
+            return None
+
+    def get_output_shape_for(self, input_shape):
+
+        if self.dim_ordering == 'th':
+            rows = input_shape[3]
+            cols = input_shape[4]
+        elif self.dim_ordering == 'tf':
+            rows = input_shape[2]
+            cols = input_shape[3]
+        else:
+            raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
+
+        rows = conv_output_length(rows, self.nb_row,
+                                  self.border_mode, self.subsample[0])
+        cols = conv_output_length(cols, self.nb_col,
+                                  self.border_mode, self.subsample[1])
+
+        if self.return_sequences:
+            if self.dim_ordering == 'th':
+                return (input_shape[0], input_shape[1],
+                        self.nb_filter, rows, cols)
+            elif self.dim_ordering == 'tf':
+                return (input_shape[0], input_shape[1],
+                        rows, cols, self.nb_filter)
+            else:
+                raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
+        else:
+            if self.dim_ordering == 'th':
+                return (input_shape[0], self.nb_filter, rows, cols)
+            elif self.dim_ordering == 'tf':
+                return (input_shape[0], rows, cols, self.nb_filter)
+            else:
+                raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
+
+    def step(self, x, states):
+        raise NotImplementedError
+
+    def get_constants(self, X, train=False):
+        return None
+
+    def get_initial_states(self, X):
+        # (samples, timesteps, row, col, filter)
+        initial_state = K.zeros_like(X)
+        # (samples,row, col, filter)
+        initial_state = K.sum(initial_state, axis=1)
+        initial_state = self.conv_step(initial_state, K.zeros(self.W_shape),
+                                       border_mode=self.border_mode)
+
+        initial_states = [initial_state for _ in range(2)]
+        return initial_states
+
+    def preprocess_input(self, x):
+        return x
+
+    def call(self, x, mask=None):
+        assert K.ndim(x) == 5
+        input_shape = self.input_spec[0].shape
+        unroll = False
+
+        if self.stateful:
+            initial_states = self.states
+        else:
+            initial_states = self.get_initial_states(x)
+
+        constants = self.get_constants(x)
+        preprocessed_input = self.preprocess_input(x)
+
+        last_output, outputs, states = K.rnn(self.step, preprocessed_input,
+                                             initial_states,
+                                             go_backwards=self.go_backwards,
+                                             mask=mask,
+                                             constants=constants,
+                                             unroll=unroll,
+                                             input_length=input_shape[1])
+        if self.stateful:
+            self.updates = []
+            for i in range(len(states)):
+                self.updates.append((self.states[i], states[i]))
+
+        if self.return_sequences:
+            return outputs
+        else:
+            return last_output
+
+    def get_config(self):
+        config = {'return_sequences': self.return_sequences,
+                  'go_backwards': self.go_backwards,
+                  'stateful': self.stateful}
+        if self.stateful:
+            config['batch_input_shape'] = self.input_spec[0].shape
+
+        base_config = super(ConvRecurrent2D, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+
+class ConvLSTM2D(ConvRecurrent2D):
+    '''Convolutional LSTM.
+
+    # Input shape
+        - if dim_ordering='th'
+            5D tensor with shape:
+            `(samples,time, channels, rows, cols)`
+        - if dim_ordering='tf'
+            5D tensor with shape:
+            `(samples,time, rows, cols, channels)`
+
+     # Output shape
+        - if `return_sequences`
+             - if dim_ordering='th'
+                5D tensor with shape:
+                `(samples, time, nb_filter, output_row, output_col)`
+             - if dim_ordering='tf'
+                5D tensor with shape:
+                `(samples, time, output_row, output_col, nb_filter)`
+        - else
+            - if dim_ordering ='th'
+                4D tensor with shape:
+                `(samples, nb_filter, output_row, output_col)`
+            - if dim_ordering='tf'
+                4D tensor with shape:
+                `(samples, output_row, output_col, nb_filter)`
+
+        where o_row and o_col depend on the shape of the filter and
+        the border_mode
+
+        # Arguments
+            nb_filter: Number of convolution filters to use.
+            nb_row: Number of rows in the convolution kernel.
+            nb_col: Number of columns in the convolution kernel.
+            border_mode: 'valid' or 'same'.
+            sub_sample: tuple of length 2. Factor by which to subsample output.
+                Also called strides elsewhere.
+            dim_ordering: 'tf' if the feature are at the last dimension or 'th'
+            stateful : Boolean (default False). If True, the last state
+                for each sample at index i in a batch will be used as initial
+                state for the sample of index i in the following batch.
+            init: weight initialization function.
+                Can be the name of an existing function (str),
+                or a Theano function
+                (see: [initializations](../initializations.md)).
+            inner_init: initialization function of the inner cells.
+            forget_bias_init: initialization function for the bias of the
+            forget gate.
+                [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
+                recommend initializing with ones.
+            activation: activation function.
+                Can be the name of an existing function (str),
+                or a Theano function (see: [activations](../activations.md)).
+            inner_activation: activation function for the inner cells.
+
+    # References
+        - [Convolutional LSTM Network: A Machine Learning Approach for
+        Precipitation Nowcasting](http://arxiv.org/pdf/1506.04214v1.pdf)
+        The current implementation does not include the feedback loop on the
+        cells output
+    '''
+    def __init__(self, nb_filter, nb_row, nb_col,
+                 init='glorot_uniform', inner_init='orthogonal',
+                 forget_bias_init='one', activation='tanh',
+                 inner_activation='hard_sigmoid',
+                 dim_ordering='default',
+                 border_mode='valid', subsample=(1, 1),
+                 W_regularizer=None, U_regularizer=None, b_regularizer=None,
+                 dropout_W=0., dropout_U=0., **kwargs):
+
+        if dim_ordering == 'default':
+            dim_ordering = K.image_dim_ordering()
+        if dim_ordering not in {'tf', 'th'}:
+            raise ValueError('dim_ordering must be in {tf,th}', dim_ordering)
+        self.nb_filter = nb_filter
+        self.nb_row = nb_row
+        self.nb_col = nb_col
+        self.init = initializations.get(init)
+        self.inner_init = initializations.get(inner_init)
+        self.forget_bias_init = initializations.get(forget_bias_init)
+        self.activation = activations.get(activation)
+        self.inner_activation = activations.get(inner_activation)
+        self.border_mode = border_mode
+        self.subsample = subsample
+
+        if dim_ordering == 'th':
+            warnings.warn('Be carefull if used with convolution3D layers:\n'
+                          'th in convolution 3D corresponds to '
+                          '(samples, channels, conv_dim1, conv_dim2,'
+                          'conv_dim3)\n'
+                          'while for this network it corresponds to: '
+                          '(samples, time, channels, rows, cols)')
+        self.dim_ordering = dim_ordering
+
+        kwargs['nb_filter'] = nb_filter
+        kwargs['nb_row'] = nb_row
+        kwargs['nb_col'] = nb_col
+        kwargs['dim_ordering'] = dim_ordering
+
+        self.W_regularizer = regularizers.get(W_regularizer)
+        self.U_regularizer = regularizers.get(U_regularizer)
+        self.b_regularizer = regularizers.get(b_regularizer)
+        self.dropout_W, self.dropout_U = dropout_W, dropout_U
+        if self.dropout_W or self.dropout_U:
+            self.uses_learning_phase = True
+
+        super(ConvLSTM2D, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        self.input_spec = [InputSpec(shape=input_shape)]
+
+        if self.dim_ordering == 'th':
+            stack_size = input_shape[2]
+            self.W_shape = (self.nb_filter, stack_size,
+                            self.nb_row, self.nb_col)
+        elif self.dim_ordering == 'tf':
+            stack_size = input_shape[4]
+            self.W_shape = (self.nb_row, self.nb_col,
+                            stack_size, self.nb_filter)
+        else:
+            raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
+
+        if self.dim_ordering == 'th':
+            self.W_shape1 = (self.nb_filter, self.nb_filter,
+                             self.nb_row, self.nb_col)
+        elif self.dim_ordering == 'tf':
+            self.W_shape1 = (self.nb_row, self.nb_col,
+                             self.nb_filter, self.nb_filter)
+        else:
+            raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
+
+        if self.stateful:
+            self.reset_states()
+        else:
+            # initial states: 2 all-zero tensor of shape (nb_filter)
+            self.states = [None, None, None, None]
+
+        self.W_i = self.init(self.W_shape, name='{}_W_i'.format(self.name))
+        self.U_i = self.inner_init(self.W_shape1,
+                                   name='{}_U_i'.format(self.name))
+        self.b_i = K.zeros((self.nb_filter,), name='{}_b_i'.format(self.name))
+
+        self.W_f = self.init(self.W_shape, name='{}_W_f'.format(self.name))
+        self.U_f = self.inner_init(self.W_shape1,
+                                   name='{}_U_f'.format(self.name))
+        self.b_f = self.forget_bias_init((self.nb_filter,),
+                                         name='{}_b_f'.format(self.name))
+
+        self.W_c = self.init(self.W_shape, name='{}_W_c'.format(self.name))
+        self.U_c = self.inner_init(self.W_shape1,
+                                   name='{}_U_c'.format(self.name))
+        self.b_c = K.zeros((self.nb_filter,), name='{}_b_c'.format(self.name))
+
+        self.W_o = self.init(self.W_shape, name='{}_W_o'.format(self.name))
+        self.U_o = self.inner_init(self.W_shape1,
+                                   name='{}_U_o'.format(self.name))
+        self.b_o = K.zeros((self.nb_filter,), name='{}_b_o'.format(self.name))
+
+        self.trainable_weights = [self.W_i, self.U_i, self.b_i,
+                                  self.W_c, self.U_c, self.b_c,
+                                  self.W_f, self.U_f, self.b_f,
+                                  self.W_o, self.U_o, self.b_o]
+
+        self.W = K.concatenate([self.W_i, self.W_f, self.W_c, self.W_o])
+        self.U = K.concatenate([self.U_i, self.U_f, self.U_c, self.U_o])
+        self.b = K.concatenate([self.b_i, self.b_f, self.b_c, self.b_o])
+
+        self.regularizers = []
+        if self.W_regularizer:
+            self.W_regularizer.set_param(self.W)
+            self.regularizers.append(self.W_regularizer)
+        if self.U_regularizer:
+            self.U_regularizer.set_param(self.U)
+            self.regularizers.append(self.U_regularizer)
+        if self.b_regularizer:
+            self.b_regularizer.set_param(self.b)
+            self.regularizers.append(self.b_regularizer)
+
+        if self.initial_weights is not None:
+            self.set_weights(self.initial_weights)
+            del self.initial_weights
+        self.built = True
+
+    def reset_states(self):
+        assert self.stateful, 'Layer must be stateful.'
+        input_shape = self.input_spec[0].shape
+        output_shape = self.get_output_shape_for(input_shape)
+        if not input_shape[0]:
+            raise Exception('If a RNN is stateful, a complete ' +
+                            'input_shape must be provided ' +
+                            '(including batch size).')
+
+        if self.return_sequences:
+            out_row, out_col, out_filter = output_shape[2:]
+        else:
+            out_row, out_col, out_filter = output_shape[1:]
+
+        if hasattr(self, 'states'):
+            K.set_value(self.states[0],
+                        np.zeros((input_shape[0],
+                                  out_row, out_col, out_filter)))
+            K.set_value(self.states[1],
+                        np.zeros((input_shape[0],
+                                  out_row, out_col, out_filter)))
+        else:
+            self.states = [K.zeros((input_shape[0],
+                                    out_row, out_col, out_filter)),
+                           K.zeros((input_shape[0],
+                                    out_row, out_col, out_filter))]
+
+    def conv_step(self, x, W, b=None, border_mode='valid'):
+        input_shape = self.input_spec[0].shape
+
+        conv_out = K.conv2d(x, W, strides=self.subsample,
+                            border_mode=border_mode,
+                            dim_ordering=self.dim_ordering,
+                            image_shape=(input_shape[0],
+                                         input_shape[2],
+                                         input_shape[3],
+                                         input_shape[4]),
+                            filter_shape=self.W_shape)
+        if b:
+            if self.dim_ordering == 'th':
+                conv_out = conv_out + K.reshape(b, (1, self.nb_filter, 1, 1))
+            elif self.dim_ordering == 'tf':
+                conv_out = conv_out + K.reshape(b, (1, 1, 1, self.nb_filter))
+            else:
+                raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
+
+        return conv_out
+
+    def conv_step_hidden(self, x, W, border_mode='valid'):
+        # This new function was defined because the
+        # image shape must be hardcoded
+        input_shape = self.input_spec[0].shape
+        output_shape = self.get_output_shape_for(input_shape)
+        if self.return_sequences:
+            out_row, out_col, out_filter = output_shape[2:]
+        else:
+            out_row, out_col, out_filter = output_shape[1:]
+
+        conv_out = K.conv2d(x, W, strides=(1, 1),
+                            border_mode=border_mode,
+                            dim_ordering=self.dim_ordering,
+                            image_shape=(input_shape[0],
+                                         out_row, out_col,
+                                         out_filter),
+                            filter_shape=self.W_shape1)
+
+        return conv_out
+
+    def step(self, x, states):
+        assert len(states) == 4
+        h_tm1 = states[0]
+        c_tm1 = states[1]
+        B_U = states[2]
+        B_W = states[3]
+
+        x_i = self.conv_step(x * B_W[0], self.W_i, self.b_i,
+                             border_mode=self.border_mode)
+        x_f = self.conv_step(x * B_W[1], self.W_f, self.b_f,
+                             border_mode=self.border_mode)
+        x_c = self.conv_step(x * B_W[2], self.W_c, self.b_c,
+                             border_mode=self.border_mode)
+        x_o = self.conv_step(x * B_W[3], self.W_o, self.b_o,
+                             border_mode=self.border_mode)
+
+        # U : from nb_filter to nb_filter
+        # Same because must be stable in the output space
+        h_i = self.conv_step_hidden(h_tm1 * B_U[0], self.U_i,
+                                    border_mode='same')
+        h_f = self.conv_step_hidden(h_tm1 * B_U[1], self.U_f,
+                                    border_mode='same')
+        h_c = self.conv_step_hidden(h_tm1 * B_U[2], self.U_c,
+                                    border_mode='same')
+        h_o = self.conv_step_hidden(h_tm1 * B_U[3], self.U_o,
+                                    border_mode='same')
+
+        i = self.inner_activation(x_i + h_i)
+        f = self.inner_activation(x_f + h_f)
+        c = f * c_tm1 + i * self.activation(x_c + h_c)
+        o = self.inner_activation(x_o + h_o)
+        h = o * self.activation(c)
+
+        return h, [h, c]
+
+    def get_constants(self, x):
+        constants = []
+        if 0 < self.dropout_U < 1:
+            ones = K.zeros_like(x)
+            ones = K.sum(ones, axis=1)
+            ones = self.conv_step(ones, K.zeros(self.W_shape),
+                                  border_mode=self.border_mode)
+            ones = ones + 1
+            B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones)
+                   for _ in range(4)]
+            constants.append(B_U)
+        else:
+            constants.append([K.cast_to_floatx(1.) for _ in range(4)])
+
+        if 0 < self.dropout_W < 1:
+            ones = K.zeros_like(x)
+            ones = K.sum(ones, axis=1)
+            ones = ones + 1
+            B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones)
+                   for _ in range(4)]
+            constants.append(B_W)
+        else:
+            constants.append([K.cast_to_floatx(1.) for _ in range(4)])
+        return constants
+
+    def get_config(self):
+        config = {'nb_filter': self.nb_filter,
+                  'nb_row': self.nb_row,
+                  'nb_col': self.nb_col,
+                  'init': self.init.__name__,
+                  'inner_init': self.inner_init.__name__,
+                  'forget_bias_init': self.forget_bias_init.__name__,
+                  'activation': self.activation.__name__,
+                  'dim_ordering': self.dim_ordering,
+                  'border_mode': self.border_mode,
+                  'inner_activation': self.inner_activation.__name__}
+        base_config = super(ConvLSTM2D, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
@@ -96,6 +96,37 @@ class Dropout(Layer):
        return dict(list(base_config.items()) + list(config.items()))


+class SpatialDropout1D(Dropout):
+    '''This version performs the same function as Dropout, however it drops
+    entire 1D feature maps instead of individual elements. If adjacent frames
+    within feature maps are strongly correlated (as is normally the case in
+    early convolution layers) then regular dropout will not regularize the
+    activations and will otherwise just result in an effective learning rate
+    decrease. In this case, SpatialDropout1D will help promote independence
+    between feature maps and should be used instead.
+
+    # Arguments
+        p: float between 0 and 1. Fraction of the input units to drop.
+
+    # Input shape
+        3D tensor with shape:
+        `(samples, timesteps, channels)`
+
+    # Output shape
+        Same as input
+
+    # References
+        - [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/pdf/1411.4280.pdf)
+    '''
+    def __init__(self, p, **kwargs):
+        super(SpatialDropout1D, self).__init__(p, **kwargs)
+
+    def _get_noise_shape(self, x):
+        input_shape = K.shape(x)
+        noise_shape = (input_shape[0], 1, input_shape[2])
+        return noise_shape
+    
+    
 class SpatialDropout2D(Dropout):
    '''This version performs the same function as Dropout, however it drops
    entire 2D feature maps instead of individual elements. If adjacent pixels
@@ -661,7 +692,8 @@ class Dense(Layer):
    # Output shape
        2D tensor with shape: `(nb_samples, output_dim)`.
    '''
-    def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None,
+    def __init__(self, output_dim, init='glorot_uniform',
+                 activation=None, weights=None,
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, input_dim=None, **kwargs):
@@ -722,6 +754,7 @@ class Dense(Layer):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def call(self, x, mask=None):
        output = K.dot(x, self.W)
@@ -890,6 +923,7 @@ class MaxoutDense(Layer):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def get_output_shape_for(self, input_shape):
        assert input_shape and len(input_shape) == 2
@@ -962,7 +996,7 @@ class Highway(Layer):
        - [Highway Networks](http://arxiv.org/pdf/1505.00387v2.pdf)
    '''
    def __init__(self, init='glorot_uniform', transform_bias=-2,
-                 activation='linear', weights=None,
+                 activation=None, weights=None,
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, input_dim=None, **kwargs):
@@ -1027,6 +1061,7 @@ class Highway(Layer):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def call(self, x, mask=None):
        y = K.dot(x, self.W_carry)
@@ -1105,7 +1140,7 @@ class TimeDistributedDense(Layer):
    '''

    def __init__(self, output_dim,
-                 init='glorot_uniform', activation='linear', weights=None,
+                 init='glorot_uniform', activation=None, weights=None,
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, input_dim=None, input_length=None, **kwargs):
@@ -1167,6 +1202,7 @@ class TimeDistributedDense(Layer):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def get_output_shape_for(self, input_shape):
        return (input_shape[0], input_shape[1], self.output_dim)
@@ -110,6 +110,7 @@ class Embedding(Layer):

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
+        self.built = True

    def compute_mask(self, x, mask=None):
        if not self.mask_zero:
@@ -75,7 +75,7 @@ class LocallyConnected1D(Layer):
        `steps` value might have changed due to padding.
    '''
    def __init__(self, nb_filter, filter_length,
-                 init='uniform', activation='linear', weights=None,
+                 init='glorot_uniform', activation=None, weights=None,
                 border_mode='valid', subsample_length=1,
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                 W_constraint=None, b_constraint=None,
@@ -139,6 +139,7 @@ class LocallyConnected1D(Layer):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def get_output_shape_for(self, input_shape):
        length = conv_output_length(input_shape[1],
@@ -257,7 +258,7 @@ class LocallyConnected2D(Layer):
        `rows` and `cols` values might have changed due to padding.
    '''
    def __init__(self, nb_filter, nb_row, nb_col,
-                 init='glorot_uniform', activation='linear', weights=None,
+                 init='glorot_uniform', activation=None, weights=None,
                 border_mode='valid', subsample=(1, 1),
                 dim_ordering='default',
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
@@ -333,6 +334,7 @@ class LocallyConnected2D(Layer):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def get_output_shape_for(self, input_shape):
        if self.dim_ordering == 'th':
@@ -104,7 +104,6 @@ class BatchNormalization(Layer):
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True
-        self.called_with = None

    def call(self, x, mask=None):
        if self.mode == 0 or self.mode == 2:
@@ -122,23 +121,12 @@ class BatchNormalization(Layer):
                    epsilon=self.epsilon)
            else:
                # mode 0
-                if self.called_with not in {None, x}:
-                    raise Exception('You are attempting to share a '
-                                    'same `BatchNormalization` layer across '
-                                    'different data flows. '
-                                    'This is not possible. '
-                                    'You should use `mode=2` in '
-                                    '`BatchNormalization`, which has '
-                                    'a similar behavior but is shareable '
-                                    '(see docs for a description of '
-                                    'the behavior).')
-                self.called_with = x
                x_normed, mean, std = K.normalize_batch_in_training(
                    x, self.gamma, self.beta, reduction_axes,
                    epsilon=self.epsilon)

-                self.updates = [K.moving_average_update(self.running_mean, mean, self.momentum),
-                                K.moving_average_update(self.running_std, std, self.momentum)]
+                self.add_updates([K.moving_average_update(self.running_mean, mean, self.momentum),
+                                  K.moving_average_update(self.running_std, std, self.momentum)], x)

                if K.backend() == 'tensorflow' and sorted(reduction_axes) == range(K.ndim(x))[:-1]:
                    x_normed_running = K.batch_normalization(
@@ -168,11 +156,11 @@ class BatchNormalization(Layer):
        return x_normed

    def get_config(self):
-        config = {"epsilon": self.epsilon,
-                  "mode": self.mode,
-                  "axis": self.axis,
-                  "gamma_regularizer": self.gamma_regularizer.get_config() if self.gamma_regularizer else None,
-                  "beta_regularizer": self.beta_regularizer.get_config() if self.beta_regularizer else None,
-                  "momentum": self.momentum}
+        config = {'epsilon': self.epsilon,
+                  'mode': self.mode,
+                  'axis': self.axis,
+                  'gamma_regularizer': self.gamma_regularizer.get_config() if self.gamma_regularizer else None,
+                  'beta_regularizer': self.beta_regularizer.get_config() if self.beta_regularizer else None,
+                  'momentum': self.momentum}
        base_config = super(BatchNormalization, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
@@ -34,14 +34,12 @@ class _Pooling1D(Layer):
        raise NotImplementedError

    def call(self, x, mask=None):
-        x = K.expand_dims(x, -1)   # add dummy last dimension
-        x = K.permute_dimensions(x, (0, 2, 1, 3))
+        x = K.expand_dims(x, 2)   # add dummy last dimension
        output = self._pooling_function(inputs=x, pool_size=self.pool_size,
                                        strides=self.st,
                                        border_mode=self.border_mode,
-                                        dim_ordering='th')
-        output = K.permute_dimensions(output, (0, 2, 1, 3))
-        return K.squeeze(output, 3)  # remove dummy last dimension
+                                        dim_ordering='tf')
+        return K.squeeze(output, 2)  # remove dummy last dimension

    def get_config(self):
        config = {'stride': self.stride,
@@ -66,7 +64,6 @@ class MaxPooling1D(_Pooling1D):
            2 will halve the input.
            If None, it will default to `pool_length`.
        border_mode: 'valid' or 'same'.
-            Note: 'same' will only work with TensorFlow for the time being.
    '''

    def __init__(self, pool_length=2, stride=None,
@@ -89,7 +86,6 @@ class AveragePooling1D(_Pooling1D):
        stride: integer, or None. Stride value.
            If None, it will default to `pool_length`.
        border_mode: 'valid' or 'same'.
-            Note: 'same' will only work with TensorFlow for the time being.

    # Input shape
        3D tensor with shape: `(samples, steps, features)`.
@@ -181,7 +177,6 @@ class MaxPooling2D(_Pooling2D):
        strides: tuple of 2 integers, or None. Strides values.
            If None, it will default to `pool_size`.
        border_mode: 'valid' or 'same'.
-            Note: 'same' will only work with TensorFlow for the time being.
        dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
            (the depth) is at index 1, in 'tf' mode is it at index 3.
            It defaults to the `image_dim_ordering` value found in your
@@ -223,7 +218,6 @@ class AveragePooling2D(_Pooling2D):
        strides: tuple of 2 integers, or None. Strides values.
            If None, it will default to `pool_size`.
        border_mode: 'valid' or 'same'.
-            Note: 'same' will only work with TensorFlow for the time being.
        dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
            (the depth) is at index 1, in 'tf' mode is it at index 3.
            It defaults to the `image_dim_ordering` value found in your
@@ -199,6 +199,18 @@ class Recurrent(Layer):
        # note that the .build() method of subclasses MUST define
        # self.input_spec with a complete input shape.
        input_shape = self.input_spec[0].shape
+        if self.unroll and input_shape[1] is None:
+            raise ValueError('Cannot unroll a RNN if the '
+                             'time dimension is undefined. \n'
+                             '- If using a Sequential model, '
+                             'specify the time dimension by passing '
+                             'an `input_shape` or `batch_input_shape` '
+                             'argument to your first layer. If your '
+                             'first layer is an Embedding, you can '
+                             'also use the `input_length` argument.\n'
+                             '- If using the functional API, specify '
+                             'the time dimension by passing a `shape` '
+                             'or `batch_shape` argument to your Input layer.')
        if self.stateful:
            initial_states = self.states
        else:
@@ -214,9 +226,10 @@ class Recurrent(Layer):
                                             unroll=self.unroll,
                                             input_length=input_shape[1])
        if self.stateful:
-            self.updates = []
+            updates = []
            for i in range(len(states)):
-                self.updates.append((self.states[i], states[i]))
+                updates.append((self.states[i], states[i]))
+            self.add_updates(updates, x)

        if self.return_sequences:
            return outputs
@@ -229,7 +242,7 @@ class Recurrent(Layer):
                  'stateful': self.stateful,
                  'unroll': self.unroll,
                  'consume_less': self.consume_less}
-        if self.stateful:
+        if self.stateful and self.input_spec[0].shape:
            config['batch_input_shape'] = self.input_spec[0].shape
        else:
            config['input_dim'] = self.input_dim
@@ -313,13 +326,22 @@ class SimpleRNN(Recurrent):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def reset_states(self):
        assert self.stateful, 'Layer must be stateful.'
        input_shape = self.input_spec[0].shape
        if not input_shape[0]:
-            raise Exception('If a RNN is stateful, a complete ' +
-                            'input_shape must be provided (including batch size).')
+            raise Exception('If a RNN is stateful, it needs to know '
+                            'its batch size. Specify the batch size '
+                            'of your input tensors: \n'
+                            '- If using a Sequential model, '
+                            'specify the batch size by passing '
+                            'a `batch_input_shape` '
+                            'argument to your first layer.\n'
+                            '- If using the functional API, specify '
+                            'the time dimension by passing a '
+                            '`batch_shape` argument to your Input layer.')
        if hasattr(self, 'states'):
            K.set_value(self.states[0],
                        np.zeros((input_shape[0], self.output_dim)))
@@ -363,7 +385,7 @@ class SimpleRNN(Recurrent):
            input_shape = self.input_spec[0].shape
            input_dim = input_shape[-1]
            ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
-            ones = K.tile(ones, (1, input_dim))
+            ones = K.tile(ones, (1, int(input_dim)))
            B_W = K.in_train_phase(K.dropout(ones, self.dropout_W), ones)
            constants.append(B_W)
        else:
@@ -495,6 +517,7 @@ class GRU(Recurrent):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def reset_states(self):
        assert self.stateful, 'Layer must be stateful.'
@@ -577,7 +600,7 @@ class GRU(Recurrent):
            input_shape = self.input_spec[0].shape
            input_dim = input_shape[-1]
            ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
-            ones = K.tile(ones, (1, input_dim))
+            ones = K.tile(ones, (1, int(input_dim)))
            B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3)]
            constants.append(B_W)
        else:
@@ -725,6 +748,7 @@ class LSTM(Recurrent):
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
+        self.built = True

    def reset_states(self):
        assert self.stateful, 'Layer must be stateful.'
@@ -817,7 +841,7 @@ class LSTM(Recurrent):
            input_shape = self.input_spec[0].shape
            input_dim = input_shape[-1]
            ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
-            ones = K.tile(ones, (1, input_dim))
+            ones = K.tile(ones, (1, int(input_dim)))
            B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(4)]
            constants.append(B_W)
        else:
@@ -112,23 +112,11 @@ class TimeDistributed(Wrapper):
            def step(x, states):
                output = self.layer.call(x)
                return output, []
-            input_length = input_shape[1]
-            if K.backend() == 'tensorflow' and len(input_shape) > 3:
-                if input_length is None:
-                    raise Exception('When using TensorFlow, you should define '
-                                    'explicitly the number of timesteps of '
-                                    'your sequences.\n'
-                                    'If your first layer is an Embedding, '
-                                    'make sure to pass it an "input_length" '
-                                    'argument. Otherwise, make sure '
-                                    'the first layer has '
-                                    'an "input_shape" or "batch_input_shape" '
-                                    'argument, including the time axis.')
-                unroll = True
-            else:
-                unroll = False
-            last_output, outputs, states = K.rnn(step, X,
-                                                 initial_states=[], input_length=input_length, unroll=unroll)
+
+            _, outputs, _ = K.rnn(step, X,
+                                  initial_states=[],
+                                  input_length=input_shape[1],
+                                  unroll=False)
            y = outputs
        else:
            # no batch size specified, therefore the layer will be able
@@ -1,777 +0,0 @@
-from collections import OrderedDict
-import warnings
-import copy
-
-from .. import backend as K
-from ..layers import InputLayer, Layer, Merge
-from ..engine.training import Model
-
-
-class Graph(Model):
-    '''Arbitrary connection graph.
-
-    THIS IS A LEGACY MODEL AND SHOULD NOT BE USED
-    except for backwards compatibility support.
-
-    For multi-inputs/multi-outputs models, or
-    models using shared layers, use the functional API instead.
-    '''
-
-    def __init__(self, name=None):
-        # model attributes
-        self.inbound_nodes = []
-        self.outbound_nodes = []
-        self.built = False
-        self.supports_masking = False
-
-        # legacy attributes (we prefix them with _graph_)
-        self._graph_namespace = set()  # strings
-        self._graph_nodes = OrderedDict()  # layer-like
-        self._graph_inputs = OrderedDict()  # layer-like
-        self._graph_outputs = OrderedDict()  # layer-like
-        self._graph_input_config = []  # dicts
-        self._graph_output_config = []  # dicts
-        self._graph_node_config = []  # dicts
-        self._graph_shared_nodes_names = []
-
-        if not name:
-            prefix = 'graph_'
-            name = prefix + str(K.get_uid(prefix))
-        self.name = name
-
-    def __call__(self, x, mask=None):
-        self.build()
-        return super(Graph, self).__call__(x, mask)
-
-    def build(self, input_shape=None):
-        # this will crash if the input/output layers have multiple nodes
-        # no plans to support that case since Graph is deprecated
-        input_tensors = [layer.output for layer in self._graph_inputs.values()]
-        output_tensors = [layer.output for layer in self._graph_outputs.values()]
-        # actually create the model
-        super(Graph, self).__init__(input_tensors,
-                                    output_tensors,
-                                    name=self.name)
-        self.built = True
-
-    def compile(self, optimizer, loss,
-                metrics=[],
-                sample_weight_modes=None,
-                loss_weights=None,
-                **kwargs):
-        '''Configures the learning process.
-
-        # Arguments
-            optimizer: str (name of optimizer) or optimizer object.
-                See [optimizers](optimizers.md).
-            loss: dictionary mapping the name(s) of the output(s) to
-                a loss function (string name of objective function or
-                objective function. See [objectives](objectives.md)).
-            metrics: list of str (name of metrics) or
-                list of metrics functions. See [metrics](metrics.md).
-            sample_weight_modes: optional dictionary mapping certain
-                output names to a sample weight mode ("temporal" and None
-                are the only supported modes). If you need to do
-                timestep-wise loss weighting on one of your graph outputs,
-                you will need to set the sample weight mode for this output
-                to "temporal".
-            loss_weights: dictionary you can pass to specify a weight
-                coefficient for each loss function (in a multi-output model).
-                If no loss weight is specified for an output,
-                the weight for this output's loss will be considered to be 1.
-            kwargs: for Theano backend, these are passed into K.function.
-                Ignored for Tensorflow backend.
-        '''
-        # create the underlying Model
-        if not self.built:
-            self.build()
-        super(Graph, self).compile(optimizer, loss,
-                                   metrics=metrics,
-                                   sample_weight_mode=sample_weight_modes,
-                                   loss_weights=loss_weights,
-                                   **kwargs)
-
-    def add_input(self, name, input_shape=None,
-                  batch_input_shape=None, dtype='float'):
-        '''Adds an input to the graph.
-
-        # Arguments:
-            name: string. The name of the new input.
-                Must be unique in the graph.
-            input_shape: a tuple of integers,
-                the expected shape of the input samples.
-                Does not include the batch size.
-            batch_input_shape: a tuple of integers,
-                the expected shape of the whole input batch,
-                including the batch size.
-            dtype: 'float', or 'int'.
-        '''
-        if name in self._graph_namespace:
-            raise Exception('Duplicate node identifier: ' + name)
-        self._graph_namespace.add(name)
-        self.built = False
-
-        if dtype[:3] == 'int':
-            dtype = 'int32'
-        elif dtype[:5] == 'float':
-            dtype = K.floatx()
-        else:
-            raise Exception('Uknown dtype (should be "int" or "float"): ' +
-                            str(dtype))
-
-        # create input layer
-        input_layer = InputLayer(input_shape=input_shape,
-                                 batch_input_shape=batch_input_shape,
-                                 name=name, input_dtype=dtype)
-        self._graph_inputs[name] = input_layer
-
-        # append input config to self._graph_input_config
-        config = {'name': name, 'dtype': dtype}
-        if batch_input_shape:
-            config['batch_input_shape'] = batch_input_shape
-        else:
-            config['input_shape'] = input_shape
-        self._graph_input_config.append(config)
-
-    def add_node(self, layer, name, input=None, inputs=[],
-                 merge_mode='concat', concat_axis=-1, dot_axes=-1,
-                 create_output=False):
-        '''Adds a node in the graph. It can be connected to multiple
-        inputs, which will first be merged into one tensor
-        according to the mode specified.
-
-        # Arguments
-            layer: the layer at the node.
-            name: name for the node.
-            input: when connecting the layer to a single input,
-                this is the name of the incoming node.
-            inputs: when connecting the layer to multiple inputs,
-                this is a list of names of incoming nodes.
-            merge_mode: one of {concat, sum, dot, ave, mul}
-            concat_axis: when `merge_mode=='concat'`, this is the
-                input concatenation axis.
-            dot_axes: when `merge_mode='dot'`,
-                this is the contraction axes specification;
-                see the `Merge` layer for details.
-            create_output: boolean. Set this to `True` if you want the output
-                of your node to be an output of the graph.
-        '''
-        if name in self._graph_namespace:
-            raise Exception('Duplicate node identifier: ' + name)
-        self._graph_namespace.add(name)
-        layer.name = name
-        self.built = False
-
-        if input:
-            if input not in self._graph_namespace:
-                raise Exception('Unknown node/input identifier: ' + input)
-            if input in self._graph_nodes:
-                layer.add_inbound_node(self._graph_nodes[input])
-            elif input in self._graph_inputs:
-                layer.add_inbound_node(self._graph_inputs[input])
-        if inputs:
-            to_merge = []
-            for n in inputs:
-                if n in self._graph_nodes:
-                    to_merge.append(self._graph_nodes[n])
-                elif n in self._graph_inputs:
-                    to_merge.append(self._graph_inputs[n])
-                else:
-                    raise Exception('Unknown identifier: ' + n)
-            merge = Merge(to_merge, mode=merge_mode,
-                          concat_axis=concat_axis, dot_axes=dot_axes,
-                          name='merge_inputs_for_' + name)
-            layer.add_inbound_node(merge)
-        self._graph_nodes[name] = layer
-        self._graph_node_config.append({'name': name,
-                                        'input': input,
-                                        'inputs': inputs,
-                                        'merge_mode': merge_mode,
-                                        'concat_axis': concat_axis,
-                                        'dot_axes': dot_axes,
-                                        'create_output': create_output})
-        if create_output:
-            self.add_output(name, input=name)
-
-    def add_shared_node(self, layer, name, inputs=[], merge_mode=None,
-                        concat_axis=-1, dot_axes=-1, outputs=[],
-                        create_output=False):
-        '''Used to share a same layer across multiple nodes.
-
-        Supposed, for instance, that you want to apply one same `Dense` layer
-        after two different nodes ('node_a' and 'node_b').
-        You can then add the dense layer as a shared node by calling:
-
-        ```python
-        model.add_shared_node(my_dense, name='shared_dense', inputs=['node_a', 'node_b'], ...)
-        ```
-
-        If you want access to the output of dense(node_a) and dense(node_b) separately,
-        you can add these outputs to the Graph by passing an `outputs` argument:
-
-        ```python
-        model.add_shared_node(my_dense, name='shared_dense', inputs=['node_a', 'node_b'],
-                              outputs=['dense_output_a', 'dense_outputs_b'])
-        ```
-
-        Otherwise you can merge these different outputs via `merge_mode`.
-        In that case you can access the merged output
-        under the identifier `name`.
-
-        # Arguments
-            layer: The layer to be shared across multiple inputs
-            name: Name of the shared node
-            inputs: List of names of input nodes
-            merge_mode: Same meaning as `merge_mode` argument of `add_node()`
-            concat_axis: Same meaning as `concat_axis` argument of `add_node()`
-            dot_axes: Same meaning as `dot_axes` argument of `add_node()`
-            outputs: Used when `merge_mode=None`. Names for the output nodes.
-            create_output: Same meaning as `create_output` argument of `add_node()`.
-        '''
-        if name in self._graph_namespace:
-            raise Exception('Duplicate node identifier: ' + name)
-        self._graph_namespace.add(name)
-        self.built = False
-
-        for o in outputs:
-            if o in self._graph_namespace:
-                raise Exception('Duplicate node identifier: ' + o)
-        if merge_mode:
-            if merge_mode not in {'sum', 'ave', 'mul', 'dot', 'cos', 'concat'}:
-                raise Exception('Invalid merge mode:', merge_mode)
-        input_layers = []
-        for i in range(len(inputs)):
-            input = inputs[i]
-            if input in self._graph_nodes:
-                n = self._graph_nodes[input]
-                input_layers.append(n)
-            elif input in self._graph_inputs:
-                n = self._graph_inputs[input]
-                input_layers.append(n)
-            else:
-                raise Exception('Unknown identifier: ' + input)
-
-        created_node_indices = []
-        for input_layer in input_layers:
-            created_node_indices.append(len(layer.inbound_nodes))
-            layer.add_inbound_node(input_layer)
-
-        if merge_mode:
-            layer.name = 'input_for_' + name
-            # collect all output nodes of layer and merge them into a single output
-            merge = Merge([layer for _ in range(len(inputs))],
-                          mode=merge_mode,
-                          concat_axis=concat_axis, dot_axes=dot_axes,
-                          node_indices=created_node_indices,
-                          name=name)
-            self._graph_nodes[name] = merge
-            if create_output:
-                self.add_output(name, input=name)
-        else:
-            layer.name = name
-            # create one new layer per output node of layer,
-            # and add them to the Graph with their own identifiers
-            if len(outputs) != len(inputs):
-                raise Exception('When using merge_mode=None, '
-                                'you should provide a list of '
-                                'output names (`output` argument) '
-                                'the same size as `input`.')
-            for i in range(len(outputs)):
-                output_layer_name = outputs[i]
-                output_layer = Layer(name=output_layer_name)
-                output_layer.add_inbound_node(layer, created_node_indices[i])
-                self._graph_namespace.add(output_layer_name)
-                self._graph_nodes[output_layer_name] = output_layer
-                if create_output:
-                    self.add_output(output_layer_name, input=output_layer_name)
-
-        self._graph_node_config.append({'name': name,
-                                        'layer': {
-                                            'config': layer.get_config(),
-                                            'class_name': layer.__class__.__name__,
-                                        },
-                                        'inputs': inputs,
-                                        'merge_mode': merge_mode,
-                                        'concat_axis': concat_axis,
-                                        'dot_axes': dot_axes,
-                                        'outputs': outputs,
-                                        'create_output': create_output if merge_mode else False})
-        self._graph_shared_nodes_names.append(name)
-
-    def add_output(self, name, input=None, inputs=[],
-                   merge_mode='concat', concat_axis=-1, dot_axes=-1):
-        '''Adds an output to the graph.
-
-        This output can merge several node outputs into a single output.
-
-        # Arguments
-            name: name of the output.
-            input: when connecting the layer to a single input,
-                this is the name of the incoming node.
-            inputs: when connecting the layer to multiple inputs,
-                this is a list of names of incoming nodes.
-            merge_mode: one of {concat, sum, dot, ave, mul}
-            concat_axis: when `merge_mode=='concat'`, this is the
-                input concatenation axis.
-            dot_axes: when `merge_mode='dot'`,
-                this is the contraction axes specification;
-                see the `Merge layer for details.
-        '''
-        if name not in self._graph_namespace:
-            self._graph_namespace.add(name)
-        if name in self._graph_outputs:
-            raise Exception('Duplicate output identifier:', name)
-        self.built = False
-
-        if input:
-            if input in self._graph_nodes:
-                layer = self._graph_nodes[input]
-            elif input in self._graph_inputs:
-                layer = self._graph_inputs[input]
-            else:
-                raise Exception('Unknown node/input identifier: ' + input)
-            if layer.name == name:
-                self._graph_outputs[name] = layer
-            else:
-                layer.name = name
-                self._graph_outputs[name] = layer
-        if inputs:
-            to_merge = []
-            for n in inputs:
-                if n not in self._graph_nodes:
-                    raise Exception('Unknown identifier: ' + n)
-                to_merge.append(self._graph_nodes[n])
-            merge = Merge(to_merge, mode=merge_mode,
-                          concat_axis=concat_axis, dot_axes=dot_axes,
-                          name=name)
-            self._graph_outputs[name] = merge
-
-        self._graph_output_config.append({'name': name,
-                                          'input': input,
-                                          'inputs': inputs,
-                                          'merge_mode': merge_mode,
-                                          'concat_axis': concat_axis,
-                                          'dot_axes': dot_axes})
-
-    def _get_x(self, data):
-        x = []
-        for key in self._graph_inputs.keys():
-            if key not in data:
-                raise Exception('Expected to be provided an array '
-                                '(in dict argument `data`) for input "' +
-                                key + '".')
-            x.append(data[key])
-        return x
-
-    def _get_y(self, data):
-        y = []
-        for key in self._graph_outputs.keys():
-            if key not in data:
-                raise Exception('Expected to be provided an array '
-                                '(in dict argument `data`) for output "' +
-                                key + '".')
-            y.append(data[key])
-        return y
-
-    def fit(self, data, batch_size=32, nb_epoch=10, verbose=1, callbacks=[],
-            validation_split=0., validation_data=None, shuffle=True,
-            class_weight=None, sample_weight=None, **kwargs):
-        '''Trains the model for a fixed number of epochs.
-
-        Returns a history object. Its `history` attribute is a record of
-        training loss values at successive epochs,
-        as well as validation loss values (if applicable).
-
-        # Arguments
-            data: dictionary mapping input names and outputs names to
-                appropriate Numpy arrays. All arrays should contain
-                the same number of samples.
-            batch_size: int. Number of samples per gradient update.
-            nb_epoch: int.
-            verbose: 0 for no logging to stdout,
-                1 for progress bar logging, 2 for one log line per epoch.
-            callbacks: `keras.callbacks.Callback` list. List of callbacks
-                to apply during training. See [callbacks](callbacks.md).
-            validation_split: float (0. < x < 1). Fraction of the data to
-                use as held-out validation data.
-            validation_data: dictionary mapping input names and outputs names
-                to appropriate Numpy arrays to be used as
-                held-out validation data.
-                All arrays should contain the same number of samples.
-                Will override validation_split.
-            shuffle: boolean. Whether to shuffle the samples at each epoch.
-            class_weight: dictionary mapping output names to
-                class weight dictionaries.
-            sample_weight: dictionary mapping output names to
-                numpy arrays of sample weights.
-        '''
-        if 'show_accuracy' in kwargs:
-            kwargs.pop('show_accuracy')
-            warnings.warn('The "show_accuracy" argument is deprecated, '
-                          'instead you should pass the "accuracy" metric to '
-                          'the model at compile time:\n'
-                          '`model.compile(optimizer, loss, '
-                          'metrics=["accuracy"])`')
-        if kwargs:
-            raise Exception('Received unknown keyword arguments: ' +
-                            str(kwargs))
-        x = self._get_x(data)
-        y = self._get_y(data)
-
-        if type(validation_data) is tuple:
-            raise Exception('Cannot used sample_weight with '
-                            'validation data with legacy Graph model. '
-                            'validation_data should be a dictionary.')
-        if validation_data:
-            val_x = self._get_x(validation_data)
-            val_y = self._get_y(validation_data)
-            validation_data = (val_x, val_y)
-        return super(Graph, self).fit(x, y,
-                                      batch_size=batch_size,
-                                      nb_epoch=nb_epoch,
-                                      verbose=verbose,
-                                      callbacks=callbacks,
-                                      validation_split=validation_split,
-                                      validation_data=validation_data,
-                                      shuffle=shuffle,
-                                      class_weight=class_weight,
-                                      sample_weight=sample_weight)
-
-    def evaluate(self, data, batch_size=128,
-                 verbose=0, sample_weight={}, **kwargs):
-        '''Computes the loss on some input data, batch by batch.
-
-        Returns the scalar test loss over the data,
-        or a list of metrics values (starting with the test loss)
-        if applicable.
-
-        Arguments: see `fit` method.
-        '''
-        if 'show_accuracy' in kwargs:
-            kwargs.pop('show_accuracy')
-            warnings.warn('The "show_accuracy" argument is deprecated, '
-                          'instead you should pass the "accuracy" metric to '
-                          'the model at compile time:\n'
-                          '`model.compile(optimizer, loss, '
-                          'metrics=["accuracy"])`')
-        if kwargs:
-            raise Exception('Received unknown keyword arguments: ' +
-                            str(kwargs))
-        x = self._get_x(data)
-        y = self._get_y(data)
-        return super(Graph, self).evaluate(x, y,
-                                           batch_size=batch_size,
-                                           verbose=verbose,
-                                           sample_weight=sample_weight)
-
-    def predict(self, data, batch_size=128, verbose=0):
-        '''Generates output predictions for the input samples
-        batch by batch.
-
-        Arguments: see `fit` method.
-        '''
-        x = self._get_x(data)
-        output_list = super(Graph, self).predict(x, batch_size=batch_size,
-                                                 verbose=verbose)
-        if not isinstance(output_list, list):
-            output_list = [output_list]
-        return dict(zip(self._graph_outputs, output_list))
-
-    def train_on_batch(self, data,
-                       class_weight={},
-                       sample_weight={}, **kwargs):
-        '''Single gradient update on a batch of samples.
-
-        Returns the scalar train loss over the data,
-        or a list of metrics values (starting with the test loss)
-        if applicable.
-
-        Arguments: see `fit` method.
-        '''
-        if 'accuracy' in kwargs:
-            kwargs.pop('accuracy')
-            warnings.warn('The "accuracy" argument is deprecated, '
-                          'instead you should pass the "accuracy" metric to '
-                          'the model at compile time:\n'
-                          '`model.compile(optimizer, loss, '
-                          'metrics=["accuracy"])`')
-        if kwargs:
-            raise Exception('Received unknown keyword arguments: ' +
-                            str(kwargs))
-        x = self._get_x(data)
-        y = self._get_y(data)
-        return super(Graph, self).train_on_batch(x, y,
-                                                 sample_weight=sample_weight,
-                                                 class_weight=class_weight)
-
-    def test_on_batch(self, data, sample_weight={}, **kwargs):
-        '''Test the network on a single batch of samples.
-
-        Returns the scalar test loss over the data,
-        or a list of metrics values (starting with the test loss)
-        if applicable.
-
-        Arguments: see `fit` method.
-        '''
-        if 'accuracy' in kwargs:
-            kwargs.pop('accuracy')
-            warnings.warn('The "accuracy" argument is deprecated, '
-                          'instead you should pass the "accuracy" metric to '
-                          'the model at compile time:\n'
-                          '`model.compile(optimizer, loss, '
-                          'metrics=["accuracy"])`')
-        if kwargs:
-            raise Exception('Received unknown keyword arguments: ' +
-                            str(kwargs))
-        x = self._get_x(data)
-        y = self._get_y(data)
-        return super(Graph, self).test_on_batch(x, y,
-                                                sample_weight=sample_weight)
-
-    def predict_on_batch(self, data):
-        output_list = super(Graph, self).predict_on_batch(data)
-        if not isinstance(output_list, list):
-            output_list = [output_list]
-        return dict(zip(self._graph_outputs, output_list))
-
-    def fit_generator(self, generator, samples_per_epoch, nb_epoch,
-                      verbose=1, callbacks=[],
-                      validation_data=None, nb_val_samples=None,
-                      class_weight={},
-                      max_q_size=10, nb_worker=1,
-                      pickle_safe=False, **kwargs):
-        '''Fits a model on data generated batch-by-batch by a Python generator.
-        The generator is run in parallel to the model, for efficiency.
-        For instance, this allows you to do real-time data augmentation
-        on images on CPU in parallel to training your model on GPU.
-
-        # Arguments
-            generator: a generator.
-                The output of the generator must be either a tuple
-                of dictionaries `(input_data, sample_weight)`
-                or a dictionary `input_data`
-                (mapping names of inputs and outputs to Numpy arrays).
-                All arrays should contain the same number of samples.
-                The generator is expected to loop over its data
-                indefinitely. An epoch finishes when `samples_per_epoch`
-                samples have been seen by the model.
-            samples_per_epoch: integer, number of samples to process before
-                going to the next epoch.
-            nb_epoch: integer, total number of iterations on the data.
-            verbose: verbosity mode, 0, 1, or 2.
-            callbacks: list of callbacks to be called during training.
-            validation_data: dictionary mapping input names and outputs names
-                to appropriate Numpy arrays to be used as
-                held-out validation data, or a generator yielding such
-                dictionaries. All arrays should contain the same number
-                of samples. If a generator, will be called until more than
-                `nb_val_samples` examples have been generated at the
-                end of every epoch. These examples will then be used
-                as the validation data.
-            nb_val_samples: number of samples to use from validation
-                generator at the end of every epoch.
-            class_weight: dictionary mapping class indices to a weight
-                for the class.
-
-        # Returns
-            A `History` object.
-
-        # Examples
-
-        ```python
-            def generate_arrays_from_file(path):
-                while 1:
-                    f = open(path)
-                    for line in f:
-                        # create Numpy arrays of input data
-                        # and labels, from each line in the file
-                        x1, x2, y = process_line(line)
-                        yield ({'input_1': x1, 'input_2': x2, 'output': y})
-                    f.close()
-
-            graph.fit_generator(generate_arrays_from_file('/my_file.txt'),
-                                samples_per_epoch=10000, nb_epoch=10)
-        ```
-        '''
-        if 'show_accuracy' in kwargs:
-            kwargs.pop('show_accuracy')
-            warnings.warn('The "show_accuracy" argument is deprecated, '
-                          'instead you should pass the "accuracy" metric to '
-                          'the model at compile time:\n'
-                          '`model.compile(optimizer, loss, '
-                          'metrics=["accuracy"])`')
-        if 'nb_val_worker' in kwargs:
-            kwargs.pop('nb_val_worker')
-            warnings.warn('The "nb_val_worker" argument is deprecated, '
-                          'please remove it from your code.')
-        if kwargs:
-            raise Exception('Received unknown keyword arguments: ' +
-                            str(kwargs))
-
-        self._train_on_batch = self.train_on_batch
-        self.train_on_batch = super(Graph, self).train_on_batch
-        self._evaluate = self.evaluate
-        self.evaluate = super(Graph, self).evaluate
-
-        if validation_data and type(validation_data) is tuple:
-            raise Exception('Cannot use sample_weight with '
-                            'validation_data in legacy Graph model.')
-        if validation_data and type(validation_data) is dict:
-            validation_data = (self._get_x(validation_data),
-                               self._get_y(validation_data))
-
-        original_generator = generator
-
-        def fixed_generator():
-            while 1:
-                data = next(original_generator)
-                if type(data) is tuple:
-                    data, sample_weight = data
-                    x = self._get_x(data)
-                    y = self._get_y(data)
-                    yield x, y, sample_weight
-                else:
-                    x = self._get_x(data)
-                    y = self._get_y(data)
-                    yield x, y
-
-        generator = fixed_generator()
-        history = super(Graph, self).fit_generator(generator,
-                                                   samples_per_epoch,
-                                                   nb_epoch,
-                                                   verbose=verbose,
-                                                   callbacks=callbacks,
-                                                   validation_data=validation_data,
-                                                   nb_val_samples=nb_val_samples,
-                                                   class_weight=class_weight,
-                                                   max_q_size=max_q_size,
-                                                   nb_worker=nb_worker,
-                                                   pickle_safe=pickle_safe)
-        self.train_on_batch = self._train_on_batch
-        self.evaluate = self._evaluate
-        return history
-
-    def evaluate_generator(self, generator, val_samples,
-                           verbose=1, max_q_size=10, nb_worker=1,
-                           pickle_safe=False, **kwargs):
-        '''Evaluates the model on a generator. The generator should
-        return the same kind of data with every yield as accepted
-        by `evaluate`.
-
-        If `show_accuracy`, it returns a tuple `(loss, accuracy)`,
-        otherwise it returns the loss value.
-
-        Arguments:
-            generator:
-                generator yielding dictionaries of the kind accepted
-                by `evaluate`, or tuples of such dictionaries and
-                associated dictionaries of sample weights.
-            val_samples:
-                total number of samples to generate from `generator`
-                to use in validation.
-
-            Other arguments are the same as for `fit`.
-        '''
-        if 'show_accuracy' in kwargs:
-            kwargs.pop('show_accuracy')
-            warnings.warn('The "show_accuracy" argument is deprecated, '
-                          'instead you should pass the "accuracy" metric to '
-                          'the model at compile time:\n'
-                          '`model.compile(optimizer, loss, '
-                          'metrics=["accuracy"])`')
-        if 'verbose' in kwargs:
-            kwargs.pop('verbose')
-            warnings.warn('The "verbose" argument is deprecated.')
-        if kwargs:
-            raise Exception('Received unknown keyword arguments: ' +
-                            str(kwargs))
-
-        self._test_on_batch = self.test_on_batch
-        self.test_on_batch = super(Graph, self).test_on_batch
-
-        original_generator = generator
-
-        def fixed_generator():
-            while 1:
-                data = next(original_generator)
-                if type(data) is tuple:
-                    data, sample_weight = data
-                    x = self._get_x(data)
-                    y = self._get_y(data)
-                    yield x, y, sample_weight
-                else:
-                    x = self._get_x(data)
-                    y = self._get_y(data)
-                    yield x, y
-
-        generator = fixed_generator()
-        history = super(Graph, self).evaluate_generator(generator,
-                                                        val_samples,
-                                                        max_q_size=max_q_size,
-                                                        nb_worker=nb_worker,
-                                                        pickle_safe=pickle_safe)
-        self.test_on_batch = self._test_on_batch
-        return history
-
-    # get_weights, set_weights: inherited
-    def get_config(self):
-        config = {'input_config': self._graph_input_config,
-                  'node_config': self._graph_node_config,
-                  'output_config': self._graph_output_config}
-        nodes = {}
-        for name, node in self._graph_nodes.items():
-            nodes[name] = {'class_name': node.__class__.__name__,
-                           'config': node.get_config()}
-            if name in self._graph_shared_nodes_names:
-                nodes[name]['shared'] = True
-        config['nodes'] = nodes
-        return copy.deepcopy(config)
-
-    @classmethod
-    def from_config(cls, config):
-        # TODO: test legacy support
-        from keras.utils.layer_utils import layer_from_config
-
-        def normalize_legacy_config(conf):
-            if 'class_name' not in conf:
-                class_name = conf['name']
-                name = conf.get('custom_name')
-                conf['name'] = name
-                new_config = {
-                    'class_name': class_name,
-                    'config': conf,
-                }
-                return new_config
-            return conf
-
-        graph = cls()
-        inputs = config.get('input_config')
-        for input in inputs:
-            graph.add_input(**input)
-
-        nodes = config.get('node_config')
-        for node in nodes:
-            layer_config = config['nodes'][node['name']]
-            layer_config = normalize_legacy_config(layer_config)
-            if 'layer' in node:
-                # for add_shared_node
-                node['layer'] = layer_from_config(node['layer'])
-            else:
-                layer = layer_from_config(layer_config)
-                node['layer'] = layer
-
-            node['create_output'] = False  # outputs will be added below
-            if layer_config.get('shared'):
-                graph.add_shared_node(**node)
-            else:
-                graph.add_node(**node)
-
-        outputs = config.get('output_config')
-        for output in outputs:
-            graph.add_output(**output)
-        return graph
-
-    def load_weights(self, fname):
-        if not self.built:
-            self.build()
-        super(Graph, self).load_weights(fname)
@@ -5,14 +5,14 @@ from .utils.generic_utils import get_from_module

 def binary_accuracy(y_true, y_pred):
    '''Calculates the mean accuracy rate across all predictions for binary
-    classification problems
+    classification problems.
    '''
    return K.mean(K.equal(y_true, K.round(y_pred)))


 def categorical_accuracy(y_true, y_pred):
    '''Calculates the mean accuracy rate across all predictions for
-    multiclass classification problems
+    multiclass classification problems.
    '''
    return K.mean(K.equal(K.argmax(y_true, axis=-1),
                  K.argmax(y_pred, axis=-1)))
@@ -20,7 +20,7 @@ def categorical_accuracy(y_true, y_pred):

 def sparse_categorical_accuracy(y_true, y_pred):
    '''Same as categorical_accuracy, but useful when the predictions are for
-    sparse targets
+    sparse targets.
    '''
    return K.mean(K.equal(K.max(y_true, axis=-1),
                          K.cast(K.argmax(y_pred, axis=-1), K.floatx())))
@@ -28,36 +28,36 @@ def sparse_categorical_accuracy(y_true, y_pred):

 def top_k_categorical_accuracy(y_true, y_pred, k=5):
    '''Calculates the top-k categorical accuracy rate, i.e. success when the
-    target class is within the top-k predictions provided
+    target class is within the top-k predictions provided.
    '''
    return K.mean(K.in_top_k(y_pred, K.argmax(y_true, axis=-1), k))


 def mean_squared_error(y_true, y_pred):
-    '''Calculates the mean squared error (mse) rate between predicted and target
-    values
+    '''Calculates the mean squared error (mse) rate
+    between predicted and target values.
    '''
    return K.mean(K.square(y_pred - y_true))


 def mean_absolute_error(y_true, y_pred):
-    '''Calculates the mean absolute error (mae) rate between predicted and target
-    values
+    '''Calculates the mean absolute error (mae) rate
+    between predicted and target values.
    '''
    return K.mean(K.abs(y_pred - y_true))


 def mean_absolute_percentage_error(y_true, y_pred):
-    '''Calculates the mean absolute percentage error (mape) rate between predicted
-    and target values
+    '''Calculates the mean absolute percentage error (mape) rate
+    between predicted and target values.
    '''
    diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), np.inf))
    return 100. * K.mean(diff)


 def mean_squared_logarithmic_error(y_true, y_pred):
-    '''Calculates the mean squared logarithmic error (msle) rate between predicted
-    and target values
+    '''Calculates the mean squared logarithmic error (msle) rate
+    between predicted and target values.
    '''
    first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.)
    second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.)
@@ -66,13 +66,13 @@ def mean_squared_logarithmic_error(y_true, y_pred):

 def hinge(y_true, y_pred):
    '''Calculates the hinge loss, which is defined as
-    `max(1 - y_true * y_pred, 0)`
+    `max(1 - y_true * y_pred, 0)`.
    '''
    return K.mean(K.maximum(1. - y_true * y_pred, 0.))


 def squared_hinge(y_true, y_pred):
-    '''Calculates the squared value of the hinge loss
+    '''Calculates the squared value of the hinge loss.
    '''
    return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)))

@@ -104,7 +104,7 @@ def binary_crossentropy(y_true, y_pred):

 def kullback_leibler_divergence(y_true, y_pred):
    '''Calculates the Kullback-Leibler (KL) divergence between prediction
-    and target values
+    and target values.
    '''
    y_true = K.clip(y_true, K.epsilon(), 1)
    y_pred = K.clip(y_pred, K.epsilon(), 1)
@@ -148,11 +148,31 @@ def matthews_correlation(y_true, y_pred):
    return numerator / (denominator + K.epsilon())


-def fbeta_score(y_true, y_pred, beta=1):
-    '''Computes the F score, the weighted harmonic mean of precision and recall.
+def precision(y_true, y_pred):
+    '''Calculates the precision, a metric for multi-label classification of
+    how many selected items are relevant.
+    '''
+    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
+    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
+    precision = true_positives / (predicted_positives + K.epsilon())
+    return precision

-    This is useful for multi-label classification where input samples can be
-    tagged with a set of labels. By only using accuracy (precision) a model
+
+def recall(y_true, y_pred):
+    '''Calculates the recall, a metric for multi-label classification of
+    how many relevant items are selected.
+    '''
+    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
+    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
+    recall = true_positives / (possible_positives + K.epsilon())
+    return recall
+
+
+def fbeta_score(y_true, y_pred, beta=1):
+    '''Calculates the F score, the weighted harmonic mean of precision and recall.
+
+    This is useful for multi-label classification, where input samples can be
+    classified as sets of labels. By only using accuracy (precision) a model
    would achieve a perfect score by simply assigning every class to every
    input. In order to avoid this, a metric should penalize incorrect class
    assignments as well (recall). The F-beta score (ranged from 0.0 to 1.0)
@@ -162,30 +182,25 @@ def fbeta_score(y_true, y_pred, beta=1):
    With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning
    correct classes becomes more important, and with beta > 1 the metric is
    instead weighted towards penalizing incorrect class assignments.
-
    '''
    if beta < 0:
        raise ValueError('The lowest choosable beta is zero (only precision).')
-
-    # Count positive samples.
-    c1 = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
-    c2 = K.sum(K.round(K.clip(y_pred, 0, 1)))
-    c3 = K.sum(K.round(K.clip(y_true, 0, 1)))
-
-    # If there are no true samples, fix the F score at 0.
-    if c3 == 0:
+        
+    # If there are no true positives, fix the F score at 0 like sklearn.
+    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
        return 0

-    # How many selected items are relevant?
-    precision = c1 / c2
+    p = precision(y_true, y_pred)
+    r = recall(y_true, y_pred)
+    bb = beta ** 2
+    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
+    return fbeta_score

-    # How many relevant items are selected?
-    recall = c1 / c3

-    # Weight precision and recall together as a single scalar.
-    beta2 = beta ** 2
-    f_score = (1 + beta2) * (precision * recall) / (beta2 * precision + recall)
-    return f_score
+def fmeasure(y_true, y_pred):
+    '''Calculates the f-measure, the harmonic mean of precision and recall.
+    '''
+    return fbeta_score(y_true, y_pred, beta=1)


 # aliases
@@ -194,6 +209,7 @@ mae = MAE = mean_absolute_error
 mape = MAPE = mean_absolute_percentage_error
 msle = MSLE = mean_squared_logarithmic_error
 cosine = cosine_proximity
+fscore = f1score = fmeasure


 def get(identifier):
@@ -6,11 +6,11 @@ import os
 import numpy as np

 from . import backend as K
+from . import optimizers
 from .utils.io_utils import ask_to_proceed_with_overwrite
 from .engine.training import Model
-from .engine.topology import get_source_inputs, Node, Layer
+from .engine.topology import get_source_inputs, Node, Layer, Merge
 from .optimizers import optimizer_from_config
-from .legacy.models import Graph


 def save_model(model, filepath, overwrite=True):
@@ -56,40 +56,52 @@ def save_model(model, filepath, overwrite=True):
    model.save_weights_to_hdf5_group(model_weights_group)

    if hasattr(model, 'optimizer'):
-        f.attrs['training_config'] = json.dumps({
-            'optimizer_config': {
-                'class_name': model.optimizer.__class__.__name__,
-                'config': model.optimizer.get_config()
-            },
-            'loss': model.loss,
-            'metrics': model.metrics,
-            'sample_weight_mode': model.sample_weight_mode,
-            'loss_weights': model.loss_weights,
-        }, default=get_json_type).encode('utf8')
+        if isinstance(model.optimizer, optimizers.TFOptimizer):
+            warnings.warn(
+                'TensorFlow optimizers do not '
+                'make it possible to access '
+                'optimizer attributes or optimizer state '
+                'after instantiation. '
+                'As a result, we cannot save the optimizer '
+                'as part of the model save file.'
+                'You will have to compile your model again after loading it. '
+                'Prefer using a Keras optimizer instead '
+                '(see keras.io/optimizers).')
+        else:
+            f.attrs['training_config'] = json.dumps({
+                'optimizer_config': {
+                    'class_name': model.optimizer.__class__.__name__,
+                    'config': model.optimizer.get_config()
+                },
+                'loss': model.loss,
+                'metrics': model.metrics,
+                'sample_weight_mode': model.sample_weight_mode,
+                'loss_weights': model.loss_weights,
+            }, default=get_json_type).encode('utf8')

-        # save optimizer weights
-        symbolic_weights = getattr(model.optimizer, 'weights')
-        if symbolic_weights:
-            optimizer_weights_group = f.create_group('optimizer_weights')
-            weight_values = K.batch_get_value(symbolic_weights)
-            weight_names = []
-            for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
-                if hasattr(w, 'name') and w.name:
-                    name = str(w.name)
-                else:
-                    name = 'param_' + str(i)
-                weight_names.append(name.encode('utf8'))
-            optimizer_weights_group.attrs['weight_names'] = weight_names
-            for name, val in zip(weight_names, weight_values):
-                param_dset = optimizer_weights_group.create_dataset(
-                    name,
-                    val.shape,
-                    dtype=val.dtype)
-                if not val.shape:
-                    # scalar
-                    param_dset[()] = val
-                else:
-                    param_dset[:] = val
+            # save optimizer weights
+            symbolic_weights = getattr(model.optimizer, 'weights')
+            if symbolic_weights:
+                optimizer_weights_group = f.create_group('optimizer_weights')
+                weight_values = K.batch_get_value(symbolic_weights)
+                weight_names = []
+                for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
+                    if hasattr(w, 'name') and w.name:
+                        name = str(w.name)
+                    else:
+                        name = 'param_' + str(i)
+                    weight_names.append(name.encode('utf8'))
+                optimizer_weights_group.attrs['weight_names'] = weight_names
+                for name, val in zip(weight_names, weight_values):
+                    param_dset = optimizer_weights_group.create_dataset(
+                        name,
+                        val.shape,
+                        dtype=val.dtype)
+                    if not val.shape:
+                        # scalar
+                        param_dset[()] = val
+                    else:
+                        param_dset[:] = val
    f.flush()
    f.close()

@@ -157,7 +169,7 @@ def load_model(filepath, custom_objects={}):
    # set optimizer weights
    if 'optimizer_weights' in f:
        # build train function (to get weight updates)
-        if model.__class__.__name__ == 'Sequential':
+        if isinstance(model, Sequential):
            model.model._make_train_function()
        else:
            model._make_train_function()
@@ -238,7 +250,7 @@ class Sequential(Model):
        self.model = None  # internal Model instance
        self.inputs = []  # tensors
        self.outputs = []  # tensors (length 1)
-        self.trainable = True
+        self._trainable = True

        # model attributes
        self.inbound_nodes = []
@@ -371,6 +383,7 @@ class Sequential(Model):
                            ' Add some layers first.')
        # actually create the model
        self.model = Model(self.inputs, self.outputs[0], name=self.name + '_model')
+        self.model.trainable = self.trainable

        # mirror model attributes
        self.supports_masking = self.model.supports_masking
@@ -405,7 +418,7 @@ class Sequential(Model):
            return self._flattened_layers
        layers = []
        if self.layers:
-            if self.layers[0].__class__.__name__ == 'Merge':
+            if isinstance(self.layers[0], Merge):
                merge = self.layers[0]
                for layer in merge.layers:
                    if hasattr(layer, 'flattened_layers'):
@@ -442,6 +455,16 @@ class Sequential(Model):
                             list(layer_dict.items()))
        return all_attrs

+    @property
+    def trainable(self):
+        return self._trainable
+
+    @trainable.setter
+    def trainable(self, value):
+        if self.model:
+            self.model.trainable = value
+        self._trainable = value
+
    @property
    def trainable_weights(self):
        if not self.trainable:
@@ -460,13 +483,15 @@ class Sequential(Model):

    @property
    def updates(self):
-        # support for legacy behavior
-        return self._gather_list_attr('updates')
+        return self.model.updates

    @property
    def state_updates(self):
        # support for legacy behavior
-        return self._gather_list_attr('state_updates')
+        return self.model.state_updates
+
+    def get_updates_for(self, inputs):
+        return self.model.get_updates_for(inputs)

    @property
    def regularizers(self):
@@ -960,7 +985,7 @@ class Sequential(Model):
        as a Python list.
        '''
        config = []
-        if self.layers[0].__class__.__name__ == 'Merge':
+        if isinstance(self.layers[0], Merge):
            assert hasattr(self.layers[0], 'layers')
            layers = []
            for layer in self.layers[0].layers:
@@ -1,6 +1,7 @@
 from __future__ import absolute_import
 import numpy as np
 from . import backend as K
+from .utils.generic_utils import get_from_module


 def mean_squared_error(y_true, y_pred):
@@ -72,6 +73,6 @@ msle = MSLE = mean_squared_logarithmic_error
 kld = KLD = kullback_leibler_divergence
 cosine = cosine_proximity

-from .utils.generic_utils import get_from_module
+
 def get(identifier):
    return get_from_module(identifier, globals(), 'objective')
@@ -2,6 +2,7 @@ from __future__ import absolute_import
 from . import backend as K
 from .utils.generic_utils import get_from_module
 from six.moves import zip
+import warnings


 def clip_norm(g, c, n):
@@ -19,6 +20,7 @@ def optimizer_from_config(config, custom_objects={}):
        'adam': Adam,
        'adamax': Adamax,
        'nadam': Nadam,
+        'tfoptimizer': TFOptimizer,
    }
    class_name = config['class_name']
    if class_name in custom_objects:
@@ -53,14 +55,6 @@ class Optimizer(object):
        self.updates = []
        self.weights = []

-    def get_state(self):
-        return [K.get_value(u[0]) for u in self.updates]
-
-    def set_state(self, value_list):
-        assert len(self.updates) == len(value_list)
-        for u, v in zip(self.updates, value_list):
-            K.set_value(u[0], v)
-
    def get_updates(self, params, constraints, loss):
        raise NotImplementedError

@@ -570,6 +564,36 @@ class Nadam(Optimizer):
        return dict(list(base_config.items()) + list(config.items()))


+class TFOptimizer(Optimizer):
+
+    def __init__(self, optimizer):
+        self.optimizer = optimizer
+        self.iterations = K.variable(0.)
+        self.updates = []
+
+    def get_updates(self, params, constraints, loss):
+        if constraints:
+            raise ValueError('TF optimizers do not support '
+                             'weights constraints. Either remove '
+                             'all weights constraints in your model, '
+                             'or use a Keras optimizer.')
+        grads = self.optimizer.compute_gradients(loss, params)
+        opt_update = self.optimizer.apply_gradients(
+            grads, global_step=self.iterations)
+        self.updates.append(opt_update)
+        return self.updates
+
+    @property
+    def weights(self):
+        raise NotImplementedError
+
+    def get_config(self):
+        raise NotImplementedError
+
+    def from_config(self, config):
+        raise NotImplementedError
+
+
 # aliases
 sgd = SGD
 rmsprop = RMSprop
@@ -581,5 +605,11 @@ nadam = Nadam


 def get(identifier, kwargs=None):
+    if K.backend() == 'tensorflow':
+        # Wrap TF optimizer instances
+        import tensorflow as tf
+        if isinstance(identifier, tf.train.Optimizer):
+            return TFOptimizer(identifier)
+    # Instantiate a Keras optimizer
    return get_from_module(identifier, globals(), 'optimizer',
                           instantiate=True, kwargs=kwargs)
@@ -3,7 +3,7 @@ from __future__ import print_function
 from .generic_utils import get_from_module
 from .np_utils import convert_kernel
 from ..layers import *
-from ..models import Model, Sequential, Graph
+from ..models import Model, Sequential
 from .. import backend as K


@@ -15,7 +15,7 @@ def layer_from_config(config, custom_objects={}):
            of custom (non-Keras) objects to class/functions

    # Returns
-        Layer instance (may be Model, Sequential, Graph, Layer...)
+        Layer instance (may be Model, Sequential, Layer...)
    '''
    # Insert custom layers into globals so they can
    # be accessed by `get_from_module`.
@@ -26,8 +26,6 @@ def layer_from_config(config, custom_objects={}):

    if class_name == 'Sequential':
        layer_class = Sequential
-    elif class_name == 'Graph':
-        layer_class = Graph
    elif class_name in ['Model', 'Container']:
        layer_class = Model
    else:
@@ -53,6 +51,8 @@ def print_summary(layers, relevant_nodes=None, line_length=100, positions=[.33,
    def print_row(fields, positions):
        line = ''
        for i in range(len(fields)):
+            if i > 0:
+                line = line[:-1] + ' '
            line += str(fields[i])
            line = line[:positions[i]]
            line += ' ' * (positions[i] - len(line))
@@ -122,21 +122,25 @@ def convert_kernel(kernel, dim_ordering='default'):
 def conv_output_length(input_length, filter_size, border_mode, stride, dilation=1):
    if input_length is None:
        return None
-    assert border_mode in {'same', 'valid'}
+    assert border_mode in {'same', 'valid', 'full'}
    dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1)
    if border_mode == 'same':
        output_length = input_length
    elif border_mode == 'valid':
        output_length = input_length - dilated_filter_size + 1
+    elif border_mode == 'full':
+        output_length = input_length + dilated_filter_size - 1
    return (output_length + stride - 1) // stride


 def conv_input_length(output_length, filter_size, border_mode, stride):
    if output_length is None:
        return None
-    assert border_mode in {'same', 'valid'}
+    assert border_mode in {'same', 'valid', 'full'}
    if border_mode == 'same':
        pad = filter_size // 2
    elif border_mode == 'valid':
        pad = 0
+    elif border_mode == 'full':
+        pad = filter_size - 1
    return (output_length - 1) * stride - 2 * pad + filter_size
@@ -1,6 +1,7 @@
 import os

 from ..layers.wrappers import Wrapper
+from ..models import Sequential

 try:
    # pydot-ng is a fork of pydot that is better maintained
@@ -19,7 +20,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True):
    dot.set('concentrate', True)
    dot.set_node_defaults(shape='record')

-    if model.__class__.__name__ == 'Sequential':
+    if isinstance(model, Sequential):
        if not model.built:
            model.build()
        model = model.model
@@ -28,13 +29,14 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True):
    # Create graph nodes.
    for layer in layers:
        layer_id = str(id(layer))
-        
+
        # Append a wrapped layer's label to node's label, if it exists.
        layer_name = layer.name
        class_name = layer.__class__.__name__
        if isinstance(layer, Wrapper):
            layer_name = '{}({})'.format(layer_name, layer.layer.name)
-            class_name = '{}({})'.format(class_name, layer.layer.__class__.__name__)
+            child_class_name = layer.layer.__class__.__name__
+            class_name = '{}({})'.format(class_name, child_class_name)

        # Create node's label.
        if show_layer_names:
@@ -3,12 +3,12 @@ from setuptools import find_packages


 setup(name='Keras',
-      version='1.1.1',
+      version='1.1.2',
      description='Deep Learning for Python',
      author='Francois Chollet',
      author_email='francois.chollet@gmail.com',
      url='https://github.com/fchollet/keras',
-      download_url='https://github.com/fchollet/keras/tarball/1.1.1',
+      download_url='https://github.com/fchollet/keras/tarball/1.1.2',
      license='MIT',
      install_requires=['theano', 'pyyaml', 'six'],
      extras_require={
@@ -881,6 +881,35 @@ class TestBackend(object):
            assert k_s_d.shape == k_d.shape
            assert_allclose(k_s_d, k_d, atol=1e-05)

+    def test_map(self):
+        x = np.random.rand(10, 3).astype(np.float32)
+        for K in [KTF, KTH]:
+            kx = K.eval(K.map_fn(K.sum, x))
+
+            assert (10,) == kx.shape
+            assert_allclose(x.sum(axis=1), kx, atol=1e-05)
+
+    def test_foldl(self):
+        x = np.random.rand(10, 3).astype(np.float32)
+        for K in [KTF, KTH]:
+            kx = K.eval(K.foldl(lambda a, b: a+b, x))
+
+            assert (3,) == kx.shape
+            assert_allclose(x.sum(axis=0), kx, atol=1e-05)
+
+    def test_foldr(self):
+        # This test aims to make sure that we walk the array from right to left
+        # and checks it in the following way: multiplying left to right 1e-40
+        # cannot be held into a float32 so it causes an underflow while from
+        # right to left we have no such problem and the result is larger
+        x = np.array([1e-20, 1e-20, 10, 10, 10], dtype=np.float32)
+        for K in [KTF, KTH]:
+            p1 = K.eval(K.foldl(lambda a, b: a*b, x))
+            p2 = K.eval(K.foldr(lambda a, b: a*b, x))
+
+            assert p1 < p2
+            assert 9e-38 < p2 <= 1e-37
+

 if __name__ == '__main__':
    pytest.main([__file__])
@@ -4,10 +4,11 @@ from numpy.testing import assert_allclose

 from keras.layers import Dense, Dropout
 from keras.engine.topology import merge, Input
-from keras.engine.training import Model
+from keras.engine.training import Model, check_loss_and_target_compatibility
 from keras.models import Sequential
 from keras import backend as K
 from keras.utils.test_utils import keras_test
+from keras.callbacks import LambdaCallback


@keras_test
@@ -146,6 +147,28 @@ def test_model_methods():
                              [output_a_np, output_b_np])
    assert len(out) == 4

+    # test starting from non-zero initial epoch
+    trained_epochs = []
+
+    def on_epoch_begin(epoch, logs):
+        trained_epochs.append(epoch)
+    tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin)
+    out = model.fit([input_a_np, input_b_np],
+                    [output_a_np, output_b_np], nb_epoch=5, batch_size=4,
+                    initial_epoch=2, callbacks=[tracker_cb])
+    assert trained_epochs == [2, 3, 4]
+
+    # test starting from non-zero initial epoch for generator too
+    trained_epochs = []
+
+    def gen_data(batch_sz):
+        while True:
+            yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))],
+                   [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))])
+    out = model.fit_generator(gen_data(4), samples_per_epoch=10, nb_epoch=5,
+                              initial_epoch=2, callbacks=[tracker_cb])
+    assert trained_epochs == [2, 3, 4]
+
    # test with a custom metric function
    mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2))

@@ -202,5 +225,30 @@ def test_trainable_argument():
    assert_allclose(out, out_2)


+@keras_test
+def test_check_not_failing():
+    a = np.random.random((2, 1, 3))
+    check_loss_and_target_compatibility([a], [K.categorical_crossentropy], [a.shape])
+    check_loss_and_target_compatibility([a], [K.categorical_crossentropy], [(2, None, 3)])
+
+
+@keras_test
+def test_check_last_is_one():
+    a = np.random.random((2, 3, 1))
+    with pytest.raises(Exception) as exc:
+        check_loss_and_target_compatibility([a], [K.categorical_crossentropy], [a.shape])
+
+    assert "You are passing a target array" in str(exc)
+
+
+@keras_test
+def test_check_bad_shape():
+    a = np.random.random((2, 3, 5))
+    with pytest.raises(Exception) as exc:
+        check_loss_and_target_compatibility([a], [K.categorical_crossentropy], [(2, 3, 6)])
+
+    assert "targets to have the same shape" in str(exc)
+
+
 if __name__ == '__main__':
    pytest.main([__file__])
@@ -8,6 +8,13 @@ from keras import backend as K
 from keras.layers import convolutional, pooling


+# TensorFlow does not support full convolution.
+if K._BACKEND == 'theano':
+    _convolution_border_modes = ['valid', 'same', 'full']
+else:
+    _convolution_border_modes = ['valid', 'same']
+
+
@keras_test
 def test_convolution_1d():
    nb_samples = 2
@@ -16,7 +23,7 @@ def test_convolution_1d():
    filter_length = 3
    nb_filter = 3

-    for border_mode in ['valid', 'same']:
+    for border_mode in _convolution_border_modes:
        for subsample_length in [1, 2]:
            if border_mode == 'same' and subsample_length != 1:
                continue
@@ -47,7 +54,7 @@ def test_atrous_conv_1d():
    filter_length = 3
    nb_filter = 3

-    for border_mode in ['valid', 'same']:
+    for border_mode in _convolution_border_modes:
        for subsample_length in [1, 2]:
            for atrous_rate in [1, 2]:
                if border_mode == 'same' and subsample_length != 1:
@@ -101,7 +108,7 @@ def test_convolution_2d():
    nb_row = 10
    nb_col = 6

-    for border_mode in ['valid', 'same']:
+    for border_mode in _convolution_border_modes:
        for subsample in [(1, 1), (2, 2)]:
            if border_mode == 'same' and subsample != (1, 1):
                continue
@@ -134,7 +141,7 @@ def test_deconvolution_2d():
    nb_row = 10
    nb_col = 6

-    for border_mode in ['valid', 'same']:
+    for border_mode in _convolution_border_modes:
        for subsample in [(1, 1), (2, 2)]:
            if border_mode == 'same' and subsample != (1, 1):
                continue
@@ -175,7 +182,7 @@ def test_atrous_conv_2d():
    nb_row = 10
    nb_col = 6

-    for border_mode in ['valid', 'same']:
+    for border_mode in _convolution_border_modes:
        for subsample in [(1, 1), (2, 2)]:
            for atrous_rate in [(1, 1), (2, 2)]:
                if border_mode == 'same' and subsample != (1, 1):
@@ -214,7 +221,7 @@ def test_separable_conv_2d():
    nb_row = 10
    nb_col = 6

-    for border_mode in ['valid', 'same']:
+    for border_mode in _convolution_border_modes:
        for subsample in [(1, 1), (2, 2)]:
            for multiplier in [1, 2]:
                if border_mode == 'same' and subsample != (1, 1):
@@ -322,7 +329,7 @@ def test_convolution_3d():
    input_len_dim2 = 11
    input_len_dim3 = 12

-    for border_mode in ['same', 'valid']:
+    for border_mode in _convolution_border_modes:
        for subsample in [(1, 1, 1), (2, 2, 2)]:
            if border_mode == 'same' and subsample != (1, 1, 1):
                continue
@@ -382,7 +389,8 @@ def test_zero_padding_1d():
    nb_samples = 2
    input_dim = 2
    nb_steps = 5
-    input = np.ones((nb_samples, nb_steps, input_dim))
+    shape = (nb_samples, nb_steps, input_dim)
+    input = np.ones(shape)

    # basic test
    layer_test(convolutional.ZeroPadding1D,
@@ -397,22 +405,22 @@ def test_zero_padding_1d():

    # correctness test
    layer = convolutional.ZeroPadding1D(padding=2)
-    layer.set_input(K.variable(input), shape=input.shape)
-
-    out = K.eval(layer.output)
+    layer.build(shape)
+    output = layer(K.variable(input))
+    np_output = K.eval(output)
    for offset in [0, 1, -1, -2]:
-        assert_allclose(out[:, offset, :], 0.)
-    assert_allclose(out[:, 2:-2, :], 1.)
+        assert_allclose(np_output[:, offset, :], 0.)
+    assert_allclose(np_output[:, 2:-2, :], 1.)

    layer = convolutional.ZeroPadding1D(padding=(1, 2))
-    layer.set_input(K.variable(input), shape=input.shape)
-
-    out = K.eval(layer.output)
+    layer.build(shape)
+    output = layer(K.variable(input))
+    np_output = K.eval(output)
    for left_offset in [0]:
-        assert_allclose(out[:, left_offset, :], 0.)
+        assert_allclose(np_output[:, left_offset, :], 0.)
    for right_offset in [-1, -2]:
-        assert_allclose(out[:, right_offset, :], 0.)
-    assert_allclose(out[:, 1:-2, :], 1.)
+        assert_allclose(np_output[:, right_offset, :], 0.)
+    assert_allclose(np_output[:, 1:-2, :], 1.)
    layer.get_config()


@@ -443,44 +451,44 @@ def test_zero_padding_2d():

    # correctness test
    layer = convolutional.ZeroPadding2D(padding=(2, 2))
-    layer.set_input(K.variable(input), shape=input.shape)
-
-    out = K.eval(layer.output)
+    layer.build(input.shape)
+    output = layer(K.variable(input))
+    np_output = K.eval(output)
    if dim_ordering == 'tf':
        for offset in [0, 1, -1, -2]:
-            assert_allclose(out[:, offset, :, :], 0.)
-            assert_allclose(out[:, :, offset, :], 0.)
-        assert_allclose(out[:, 2:-2, 2:-2, :], 1.)
+            assert_allclose(np_output[:, offset, :, :], 0.)
+            assert_allclose(np_output[:, :, offset, :], 0.)
+        assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.)
    elif dim_ordering == 'th':
        for offset in [0, 1, -1, -2]:
-            assert_allclose(out[:, :, offset, :], 0.)
-            assert_allclose(out[:, :, :, offset], 0.)
-        assert_allclose(out[:, 2:-2, 2:-2, :], 1.)
+            assert_allclose(np_output[:, :, offset, :], 0.)
+            assert_allclose(np_output[:, :, :, offset], 0.)
+        assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.)

    layer = convolutional.ZeroPadding2D(padding=(1, 2, 3, 4))
-    layer.set_input(K.variable(input), shape=input.shape)
-
-    out = K.eval(layer.output)
+    layer.build(input.shape)
+    output = layer(K.variable(input))
+    np_output = K.eval(output)
    if dim_ordering == 'tf':
        for top_offset in [0]:
-            assert_allclose(out[:, top_offset, :, :], 0.)
+            assert_allclose(np_output[:, top_offset, :, :], 0.)
        for bottom_offset in [-1, -2]:
-            assert_allclose(out[:, bottom_offset, :, :], 0.)
+            assert_allclose(np_output[:, bottom_offset, :, :], 0.)
        for left_offset in [0, 1, 2]:
-            assert_allclose(out[:, :, left_offset, :], 0.)
+            assert_allclose(np_output[:, :, left_offset, :], 0.)
        for right_offset in [-1, -2, -3, -4]:
-            assert_allclose(out[:, :, right_offset, :], 0.)
-        assert_allclose(out[:, 1:-2, 3:-4, :], 1.)
+            assert_allclose(np_output[:, :, right_offset, :], 0.)
+        assert_allclose(np_output[:, 1:-2, 3:-4, :], 1.)
    elif dim_ordering == 'th':
        for top_offset in [0]:
-            assert_allclose(out[:, :, top_offset, :], 0.)
+            assert_allclose(np_output[:, :, top_offset, :], 0.)
        for bottom_offset in [-1, -2]:
-            assert_allclose(out[:, :, bottom_offset, :], 0.)
+            assert_allclose(np_output[:, :, bottom_offset, :], 0.)
        for left_offset in [0, 1, 2]:
-            assert_allclose(out[:, :, :, left_offset], 0.)
+            assert_allclose(np_output[:, :, :, left_offset], 0.)
        for right_offset in [-1, -2, -3, -4]:
-            assert_allclose(out[:, :, :, right_offset], 0.)
-        assert_allclose(out[:, :, 1:-2, 3:-4], 1.)
+            assert_allclose(np_output[:, :, :, right_offset], 0.)
+        assert_allclose(np_output[:, :, 1:-2, 3:-4], 1.)
    layer.get_config()


@@ -502,13 +510,14 @@ def test_zero_padding_3d():

    # correctness test
    layer = convolutional.ZeroPadding3D(padding=(2, 2, 2))
-    layer.set_input(K.variable(input), shape=input.shape)
-    out = K.eval(layer.output)
+    layer.build(input.shape)
+    output = layer(K.variable(input))
+    np_output = K.eval(output)
    for offset in [0, 1, -1, -2]:
-        assert_allclose(out[:, offset, :, :, :], 0.)
-        assert_allclose(out[:, :, offset, :, :], 0.)
-        assert_allclose(out[:, :, :, offset, :], 0.)
-    assert_allclose(out[:, 2:-2, 2:-2, 2:-2, :], 1.)
+        assert_allclose(np_output[:, offset, :, :, :], 0.)
+        assert_allclose(np_output[:, :, offset, :, :], 0.)
+        assert_allclose(np_output[:, :, :, offset, :], 0.)
+    assert_allclose(np_output[:, 2:-2, 2:-2, 2:-2, :], 1.)
    layer.get_config()


@@ -539,15 +548,15 @@ def test_upsampling_2d():
                layer = convolutional.UpSampling2D(
                    size=(length_row, length_col),
                    dim_ordering=dim_ordering)
-                layer.set_input(K.variable(input), shape=input.shape)
-
-                out = K.eval(layer.output)
+                layer.build(input.shape)
+                output = layer(K.variable(input))
+                np_output = K.eval(output)
                if dim_ordering == 'th':
-                    assert out.shape[2] == length_row * input_nb_row
-                    assert out.shape[3] == length_col * input_nb_col
+                    assert np_output.shape[2] == length_row * input_nb_row
+                    assert np_output.shape[3] == length_col * input_nb_col
                else:  # tf
-                    assert out.shape[1] == length_row * input_nb_row
-                    assert out.shape[2] == length_col * input_nb_col
+                    assert np_output.shape[1] == length_row * input_nb_row
+                    assert np_output.shape[2] == length_col * input_nb_col

                # compare with numpy
                if dim_ordering == 'th':
@@ -557,7 +566,7 @@ def test_upsampling_2d():
                    expected_out = np.repeat(input, length_row, axis=1)
                    expected_out = np.repeat(expected_out, length_col, axis=2)

-                assert_allclose(out, expected_out)
+                assert_allclose(np_output, expected_out)


 def test_upsampling_3d():
@@ -580,17 +589,17 @@ def test_upsampling_3d():
                    layer = convolutional.UpSampling3D(
                        size=(length_dim1, length_dim2, length_dim3),
                        dim_ordering=dim_ordering)
-                    layer.set_input(K.variable(input), shape=input.shape)
-
-                    out = K.eval(layer.output)
+                    layer.build(input.shape)
+                    output = layer(K.variable(input))
+                    np_output = K.eval(output)
                    if dim_ordering == 'th':
-                        assert out.shape[2] == length_dim1 * input_len_dim1
-                        assert out.shape[3] == length_dim2 * input_len_dim2
-                        assert out.shape[4] == length_dim3 * input_len_dim3
+                        assert np_output.shape[2] == length_dim1 * input_len_dim1
+                        assert np_output.shape[3] == length_dim2 * input_len_dim2
+                        assert np_output.shape[4] == length_dim3 * input_len_dim3
                    else:  # tf
-                        assert out.shape[1] == length_dim1 * input_len_dim1
-                        assert out.shape[2] == length_dim2 * input_len_dim2
-                        assert out.shape[3] == length_dim3 * input_len_dim3
+                        assert np_output.shape[1] == length_dim1 * input_len_dim1
+                        assert np_output.shape[2] == length_dim2 * input_len_dim2
+                        assert np_output.shape[3] == length_dim3 * input_len_dim3

                    # compare with numpy
                    if dim_ordering == 'th':
@@ -602,7 +611,7 @@ def test_upsampling_3d():
                        expected_out = np.repeat(expected_out, length_dim2, axis=2)
                        expected_out = np.repeat(expected_out, length_dim3, axis=3)

-                    assert_allclose(out, expected_out)
+                    assert_allclose(np_output, expected_out)


@keras_test
@@ -626,32 +635,35 @@ def test_cropping_2d():
    dim_ordering = K.image_dim_ordering()

    if dim_ordering == 'th':
-        input = np.random.rand(nb_samples, stack_size, input_len_dim1, input_len_dim2)
+        input = np.random.rand(nb_samples, stack_size,
+                               input_len_dim1, input_len_dim2)
    else:
-        input = np.random.rand(nb_samples, input_len_dim1, input_len_dim2, stack_size)
+        input = np.random.rand(nb_samples,
+                               input_len_dim1, input_len_dim2,
+                               stack_size)
    # basic test
    layer_test(convolutional.Cropping2D,
               kwargs={'cropping': cropping,
                       'dim_ordering': dim_ordering},
               input_shape=input.shape)
    # correctness test
-    layer = convolutional.Cropping2D(cropping=cropping, dim_ordering=dim_ordering)
-    layer.set_input(K.variable(input), shape=input.shape)
-
-    out = K.eval(layer.output)
+    layer = convolutional.Cropping2D(cropping=cropping,
+                                     dim_ordering=dim_ordering)
+    layer.build(input.shape)
+    output = layer(K.variable(input))
+    np_output = K.eval(output)
    # compare with numpy
    if dim_ordering == 'th':
        expected_out = input[:,
                             :,
-                             cropping[0][0]:-cropping[0][1],
-                             cropping[1][0]:-cropping[1][1]]
+                             cropping[0][0]: -cropping[0][1],
+                             cropping[1][0]: -cropping[1][1]]
    else:
        expected_out = input[:,
-                             cropping[0][0]:-cropping[0][1],
-                             cropping[1][0]:-cropping[1][1],
+                             cropping[0][0]: -cropping[0][1],
+                             cropping[1][0]: -cropping[1][1],
                             :]
-
-    assert_allclose(out, expected_out)
+    assert_allclose(np_output, expected_out)


 def test_cropping_3d():
@@ -664,34 +676,37 @@ def test_cropping_3d():
    dim_ordering = K.image_dim_ordering()

    if dim_ordering == 'th':
-        input = np.random.rand(nb_samples, stack_size, input_len_dim1, input_len_dim2, input_len_dim3)
+        input = np.random.rand(nb_samples, stack_size,
+                               input_len_dim1, input_len_dim2, input_len_dim3)
    else:
-        input = np.random.rand(nb_samples, input_len_dim1, input_len_dim2, input_len_dim3, stack_size)
+        input = np.random.rand(nb_samples,
+                               input_len_dim1, input_len_dim2,
+                               input_len_dim3, stack_size)
    # basic test
    layer_test(convolutional.Cropping3D,
               kwargs={'cropping': cropping,
                       'dim_ordering': dim_ordering},
               input_shape=input.shape)
    # correctness test
-    layer = convolutional.Cropping3D(cropping=cropping, dim_ordering=dim_ordering)
-    layer.set_input(K.variable(input), shape=input.shape)
-
-    out = K.eval(layer.output)
+    layer = convolutional.Cropping3D(cropping=cropping,
+                                     dim_ordering=dim_ordering)
+    layer.build(input.shape)
+    output = layer(K.variable(input))
+    np_output = K.eval(output)
    # compare with numpy
    if dim_ordering == 'th':
        expected_out = input[:,
                             :,
-                             cropping[0][0]:-cropping[0][1],
-                             cropping[1][0]:-cropping[1][1],
-                             cropping[2][0]:-cropping[2][1]]
+                             cropping[0][0]: -cropping[0][1],
+                             cropping[1][0]: -cropping[1][1],
+                             cropping[2][0]: -cropping[2][1]]
    else:
        expected_out = input[:,
-                             cropping[0][0]:-cropping[0][1],
-                             cropping[1][0]:-cropping[1][1],
-                             cropping[2][0]:-cropping[2][1],
+                             cropping[0][0]: -cropping[0][1],
+                             cropping[1][0]: -cropping[1][1],
+                             cropping[2][0]: -cropping[2][1],
                             :]
-
-    assert_allclose(out, expected_out)
+    assert_allclose(np_output, expected_out)

 if __name__ == '__main__':
    pytest.main([__file__])
@@ -0,0 +1,130 @@
+import pytest
+import numpy as np
+from numpy.testing import assert_allclose
+
+from keras import backend as K
+from keras.models import Sequential
+from keras.layers import convolutional_recurrent
+from keras.utils.test_utils import layer_test
+from keras import regularizers
+
+
+def test_recurrent_convolutional():
+    nb_row = 3
+    nb_col = 3
+    nb_filter = 5
+    nb_samples = 2
+    input_channel = 2
+    input_nb_row = 5
+    input_nb_col = 5
+    sequence_len = 2
+    for dim_ordering in ['th', 'tf']:
+
+        if dim_ordering == 'th':
+            input = np.random.rand(nb_samples, sequence_len,
+                                   input_channel,
+                                   input_nb_row, input_nb_col)
+        else:  # tf
+            input = np.random.rand(nb_samples, sequence_len,
+                                   input_nb_row, input_nb_col,
+                                   input_channel)
+
+        for return_sequences in [True, False]:
+            # test for ouptput shape:
+            output = layer_test(convolutional_recurrent.ConvLSTM2D,
+                                kwargs={'dim_ordering': dim_ordering,
+                                        'return_sequences': return_sequences,
+                                        'nb_filter': nb_filter,
+                                        'nb_row': nb_row,
+                                        'nb_col': nb_col,
+                                        'border_mode': "same"},
+                                input_shape=input.shape)
+
+            output_shape = [nb_samples, input_nb_row, input_nb_col]
+
+            if dim_ordering == 'th':
+                output_shape.insert(1, nb_filter)
+            else:
+                output_shape.insert(3, nb_filter)
+
+            if return_sequences:
+                output_shape.insert(1, sequence_len)
+
+            assert output.shape == tuple(output_shape)
+
+            # No need to check statefulness for both
+            if dim_ordering == 'th' or return_sequences:
+                continue
+
+            # Tests for statefulness
+            model = Sequential()
+            kwargs = {'dim_ordering': dim_ordering,
+                      'return_sequences': return_sequences,
+                      'nb_filter': nb_filter,
+                      'nb_row': nb_row,
+                      'nb_col': nb_col,
+                      'stateful': True,
+                      'batch_input_shape': input.shape,
+                      'border_mode': "same"}
+            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
+
+            model.add(layer)
+            model.compile(optimizer='sgd', loss='mse')
+            out1 = model.predict(np.ones_like(input))
+            assert(out1.shape == tuple(output_shape))
+
+            # train once so that the states change
+            model.train_on_batch(np.ones_like(input),
+                                 np.ones_like(output))
+            out2 = model.predict(np.ones_like(input))
+
+            # if the state is not reset, output should be different
+            assert(out1.max() != out2.max())
+
+            # check that output changes after states are reset
+            # (even though the model itself didn't change)
+            layer.reset_states()
+            out3 = model.predict(np.ones_like(input))
+            assert(out2.max() != out3.max())
+
+            # check that container-level reset_states() works
+            model.reset_states()
+            out4 = model.predict(np.ones_like(input))
+            assert_allclose(out3, out4, atol=1e-5)
+
+            # check that the call to `predict` updated the states
+            out5 = model.predict(np.ones_like(input))
+            assert(out4.max() != out5.max())
+
+            # check regularizers
+            kwargs = {'dim_ordering': dim_ordering,
+                      'return_sequences': return_sequences,
+                      'nb_filter': nb_filter,
+                      'nb_row': nb_row,
+                      'nb_col': nb_col,
+                      'stateful': True,
+                      'batch_input_shape': input.shape,
+                      'W_regularizer': regularizers.WeightRegularizer(l1=0.01),
+                      'U_regularizer': regularizers.WeightRegularizer(l1=0.01),
+                      'b_regularizer': 'l2',
+                      'border_mode': "same"}
+
+            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
+            layer.build(input.shape)
+            output = layer(K.variable(np.ones(input.shape)))
+            K.eval(output)
+
+            # check dropout
+            layer_test(convolutional_recurrent.ConvLSTM2D,
+                       kwargs={'dim_ordering': dim_ordering,
+                               'return_sequences': return_sequences,
+                               'nb_filter': nb_filter,
+                               'nb_row': nb_row,
+                               'nb_col': nb_col,
+                               'border_mode': "same",
+                               'dropout_W': 0.1,
+                               'dropout_U': 0.1},
+                       input_shape=input.shape)
+
+if __name__ == '__main__':
+    pytest.main([__file__])
@@ -15,7 +15,7 @@ def test_masking():

@keras_test
 def test_merge():
-    from keras.layers import Input, merge, Merge
+    from keras.layers import Input, merge, Merge, Masking
    from keras.models import Model

    # test modes: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot'.
@@ -53,7 +53,8 @@ def test_merge():
    input_b = Input(shape=input_shapes[1][1:])
    merged = merge([input_a, input_b],
                   mode=lambda tup: K.concatenate([tup[0], tup[1]]),
-                   output_shape=lambda tup: (tup[0][:-1],) + (tup[0][-1] + tup[1][-1],))
+                   output_shape=lambda tup: tup[0][:-1] + (tup[0][-1] + tup[1][-1],))
+    model = Model([input_a, input_b], merged)
    expected_output_shape = model.get_output_shape_for(input_shapes)
    actual_output_shape = model.predict(inputs).shape
    assert expected_output_shape == actual_output_shape
@@ -65,17 +66,18 @@ def test_merge():
    # test function with output_shape function
    def fn_mode(tup):
        x, y = tup
-        return K.concatenate([x, y])
+        return K.concatenate([x, y], axis=1)

    def fn_output_shape(tup):
        s1, s2 = tup
-        return (s1[:-1],) + (s1[-1] + s2[-1],)
+        return (s1[0], s1[1] + s2[1]) + s1[2:]

    input_a = Input(shape=input_shapes[0][1:])
    input_b = Input(shape=input_shapes[1][1:])
    merged = merge([input_a, input_b],
                   mode=fn_mode,
                   output_shape=fn_output_shape)
+    model = Model([input_a, input_b], merged)
    expected_output_shape = model.get_output_shape_for(input_shapes)
    actual_output_shape = model.predict(inputs).shape
    assert expected_output_shape == actual_output_shape
@@ -84,6 +86,74 @@ def test_merge():
    model = Model.from_config(config)
    model.compile('rmsprop', 'mse')

+    # test function with output_mask function
+    # time dimension is required for masking
+    input_shapes = [(4, 3, 2), (4, 3, 2)]
+    inputs = [np.random.random(shape) for shape in input_shapes]
+
+    def fn_output_mask(tup):
+        x_mask, y_mask = tup
+        return K.concatenate([x_mask, y_mask])
+
+    input_a = Input(shape=input_shapes[0][1:])
+    input_b = Input(shape=input_shapes[1][1:])
+    a = Masking()(input_a)
+    b = Masking()(input_b)
+    merged = merge([a, b], mode=fn_mode, output_shape=fn_output_shape, output_mask=fn_output_mask)
+    model = Model([input_a, input_b], merged)
+    expected_output_shape = model.get_output_shape_for(input_shapes)
+    actual_output_shape = model.predict(inputs).shape
+    assert expected_output_shape == actual_output_shape
+
+    config = model.get_config()
+    model = Model.from_config(config)
+    model.compile('rmsprop', 'mse')
+
+    mask_inputs = (np.zeros(input_shapes[0][:-1]), np.ones(input_shapes[1][:-1]))
+    expected_mask_output = np.concatenate(mask_inputs, axis=-1)
+    mask_input_placeholders = [K.placeholder(shape=input_shape[:-1]) for input_shape in input_shapes]
+    mask_output = model.layers[-1]._output_mask(mask_input_placeholders)
+    assert np.all(K.function(mask_input_placeholders, [mask_output])(mask_inputs)[0] == expected_mask_output)
+
+    # test lambda with output_mask lambda
+    input_a = Input(shape=input_shapes[0][1:])
+    input_b = Input(shape=input_shapes[1][1:])
+    a = Masking()(input_a)
+    b = Masking()(input_b)
+    merged = merge([a, b], mode=lambda tup: K.concatenate([tup[0], tup[1]], axis=1),
+                   output_shape=lambda tup: (tup[0][0], tup[0][1] + tup[1][1]) + tup[0][2:],
+                   output_mask=lambda tup: K.concatenate([tup[0], tup[1]]))
+    model = Model([input_a, input_b], merged)
+    expected_output_shape = model.get_output_shape_for(input_shapes)
+    actual_output_shape = model.predict(inputs).shape
+    assert expected_output_shape == actual_output_shape
+
+    config = model.get_config()
+    model = Model.from_config(config)
+    model.compile('rmsprop', 'mse')
+
+    mask_output = model.layers[-1]._output_mask(mask_input_placeholders)
+    assert np.all(K.function(mask_input_placeholders, [mask_output])(mask_inputs)[0] == expected_mask_output)
+
+    # test with arguments
+    input_shapes = [(3, 2), (3, 2)]
+    inputs = [np.random.random(shape) for shape in input_shapes]
+
+    def fn_mode(tup, a, b):
+        x, y = tup
+        return x * a + y * b
+
+    input_a = Input(shape=input_shapes[0][1:])
+    input_b = Input(shape=input_shapes[1][1:])
+    merged = merge([input_a, input_b], mode=fn_mode, output_shape=lambda s: s[0], arguments={'a': 0.7, 'b': 0.3})
+    model = Model([input_a, input_b], merged)
+    output = model.predict(inputs)
+
+    config = model.get_config()
+    model = Model.from_config(config)
+
+    assert np.all(model.predict(inputs) == output)
+

@keras_test
 def test_merge_mask_2d():
@@ -153,6 +223,10 @@ def test_dropout():
               kwargs={'p': 0.5},
               input_shape=(3, 2))

+    layer_test(core.SpatialDropout1D,
+               kwargs={'p': 0.5},
+               input_shape=(2, 3, 4))
+
    layer_test(core.SpatialDropout2D,
               kwargs={'p': 0.5},
               input_shape=(2, 3, 4, 5))
@@ -212,6 +286,11 @@ def test_lambda():
               kwargs={'function': lambda x: x + 1},
               input_shape=(3, 2))

+    layer_test(Lambda,
+               kwargs={'function': lambda x, a, b: x * a + b,
+                       'arguments': {'a': 0.6, 'b': 0.4}},
+               input_shape=(3, 2))
+
    # test serialization with function
    def f(x):
        return x + 1
@@ -2,10 +2,10 @@ import pytest
 import numpy as np
 from numpy.testing import assert_allclose

-from keras.layers.core import Dense, Activation
+from keras.layers import Dense, Activation, Input
 from keras.utils.test_utils import layer_test, keras_test
 from keras.layers import normalization
-from keras.models import Sequential
+from keras.models import Sequential, Model
 from keras import backend as K

 input_1 = np.arange(10)
@@ -78,5 +78,33 @@ def test_batchnorm_mode_1():
            assert_allclose(K.eval(K.std(out)), 0.0, atol=1e-1)


+@keras_test
+def test_shared_batchnorm():
+    '''Test that a BN layer can be shared
+    across different data streams.
+    '''
+    # Test single layer reuse
+    bn = normalization.BatchNormalization(input_shape=(10,), mode=0)
+    x1 = Input(shape=(10,))
+    bn(x1)
+
+    x2 = Input(shape=(10,))
+    y2 = bn(x2)
+
+    x = np.random.normal(loc=5.0, scale=10.0, size=(2, 10))
+    model = Model(x2, y2)
+    assert len(model.updates) == 2
+    model.compile('sgd', 'mse')
+    model.train_on_batch(x, x)
+
+    # Test model-level reuse
+    x3 = Input(shape=(10,))
+    y3 = model(x3)
+    new_model = Model(x3, y3)
+    assert len(model.updates) == 2
+    new_model.compile('sgd', 'mse')
+    new_model.train_on_batch(x, x)
+
+
 if __name__ == '__main__':
    pytest.main([__file__])
@@ -129,9 +129,9 @@ def test_regularizer(layer_class):
                        U_regularizer=regularizers.WeightRegularizer(l1=0.01),
                        b_regularizer='l2')
    shape = (nb_samples, timesteps, embedding_dim)
-    layer.set_input(K.variable(np.ones(shape)),
-                    shape=shape)
-    K.eval(layer.output)
+    layer.build(shape)
+    output = layer(K.variable(np.ones(shape)))
+    K.eval(output)


@keras_test
@@ -140,15 +140,30 @@ def test_masking_layer():
    https://github.com/fchollet/keras/issues/1567

    '''
-    model = Sequential()
-    model.add(Masking(input_shape=(3, 4)))
-    model.add(recurrent.LSTM(output_dim=5, return_sequences=True))
-    model.compile(loss='categorical_crossentropy', optimizer='adam')
    I = np.random.random((6, 3, 4))
    V = np.abs(np.random.random((6, 3, 5)))
    V /= V.sum(axis=-1, keepdims=True)
+
+    model = Sequential()
+    model.add(Masking(input_shape=(3, 4)))
+    model.add(recurrent.LSTM(output_dim=5, return_sequences=True, unroll=False))
+    model.compile(loss='categorical_crossentropy', optimizer='adam')
    model.fit(I, V, nb_epoch=1, batch_size=100, verbose=1)

+    model = Sequential()
+    model.add(Masking(input_shape=(3, 4)))
+    model.add(recurrent.LSTM(output_dim=5, return_sequences=True, unroll=True))
+    model.compile(loss='categorical_crossentropy', optimizer='adam')
+    model.fit(I, V, nb_epoch=1, batch_size=100, verbose=1)
+
+
+@rnn_test
+def test_from_config(layer_class):
+    for stateful in (False, True):
+        l1 = layer_class(output_dim=1, stateful=stateful)
+        l2 = layer_class.from_config(l1.get_config())
+        assert l1.get_config() == l2.get_config()
+

 if __name__ == '__main__':
    pytest.main([__file__])
@@ -115,6 +115,13 @@ def test_Bidirectional():
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, nb_epoch=1, batch_size=1)

+        # Bidirectional and stateful
+        input = Input(batch_shape=(1, timesteps, dim))
+        output = wrappers.Bidirectional(rnn(output_dim, stateful=True), merge_mode=mode)(input)
+        model = Model(input, output)
+        model.compile(loss='mse', optimizer='sgd')
+        model.fit(x, y, nb_epoch=1, batch_size=1)
+

 if __name__ == '__main__':
    pytest.main([__file__])
@@ -9,7 +9,7 @@ from keras import optimizers
 np.random.seed(1337)

 from keras import callbacks
-from keras.models import Graph, Sequential
+from keras.models import Sequential
 from keras.layers.core import Dense
 from keras.utils.test_utils import get_test_data
 from keras import backend as K
@@ -186,13 +186,12 @@ def test_ReduceLROnPlateau():
    assert np.allclose(float(K.get_value(model.optimizer.lr)), 0.1, atol=K.epsilon())


-@pytest.mark.skipif((K._BACKEND != 'tensorflow'),
+@pytest.mark.skipif((K.backend() != 'tensorflow'),
                    reason="Requires tensorflow backend")
 def test_TensorBoard():
    import shutil
    import tensorflow as tf
    import keras.backend.tensorflow_backend as KTF
-    old_session = KTF.get_session()
    filepath = './logs'
    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples,
                                                         nb_test=test_samples,
@@ -224,92 +223,44 @@ def test_TensorBoard():
                yield {'X_vars': X_test, 'output': y_test}

    # case 1 Sequential
+    model = Sequential()
+    model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
+    model.add(Dense(nb_class, activation='softmax'))
+    model.compile(loss='categorical_crossentropy',
+                  optimizer='sgd',
+                  metrics=['accuracy'])

-    with tf.Graph().as_default():
-        session = tf.Session('')
-        KTF.set_session(session)
-        model = Sequential()
-        model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
-        model.add(Dense(nb_class, activation='softmax'))
-        model.compile(loss='categorical_crossentropy',
-                      optimizer='sgd',
-                      metrics=['accuracy'])
+    tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1)
+    cbks = [tsb]

-        tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1)
-        cbks = [tsb]
+    # fit with validation data
+    model.fit(X_train, y_train, batch_size=batch_size,
+              validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)

-        # fit with validation data
-        model.fit(X_train, y_train, batch_size=batch_size,
-                  validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)
+    # fit with validation data and accuracy
+    model.fit(X_train, y_train, batch_size=batch_size,
+              validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)

-        # fit with validation data and accuracy
-        model.fit(X_train, y_train, batch_size=batch_size,
-                  validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)
+    # fit generator with validation data
+    model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
+                        validation_data=(X_test, y_test),
+                        callbacks=cbks)

-        # fit generator with validation data
-        model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
-                            validation_data=(X_test, y_test),
-                            callbacks=cbks)
+    # fit generator without validation data
+    model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
+                        callbacks=cbks)

-        # fit generator without validation data
-        model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
-                            callbacks=cbks)
+    # fit generator with validation data and accuracy
+    model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
+                        validation_data=(X_test, y_test),
+                        callbacks=cbks)

-        # fit generator with validation data and accuracy
-        model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
-                            validation_data=(X_test, y_test),
-                            callbacks=cbks)
+    # fit generator without validation data and accuracy
+    model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
+                        callbacks=cbks)

-        # fit generator without validation data and accuracy
-        model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
-                            callbacks=cbks)
-
-        assert os.path.exists(filepath)
-        shutil.rmtree(filepath)
-
-    # case 2 Graph
-
-    with tf.Graph().as_default():
-        session = tf.Session('')
-        KTF.set_session(session)
-        model = Graph()
-        model.add_input(name='X_vars', input_shape=(input_dim,))
-
-        model.add_node(Dense(nb_hidden, activation="sigmoid"),
-                       name='Dense1', input='X_vars')
-        model.add_node(Dense(nb_class, activation="softmax"),
-                       name='last_dense',
-                       input='Dense1')
-        model.add_output(name='output', input='last_dense')
-        model.compile(optimizer='sgd', loss={'output': 'mse'})
-
-        tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1)
-        cbks = [tsb]
-
-        # fit with validation
-        model.fit({'X_vars': X_train, 'output': y_train},
-                  batch_size=batch_size,
-                  validation_data={'X_vars': X_test, 'output': y_test},
-                  callbacks=cbks, nb_epoch=2)
-
-        # fit wo validation
-        model.fit({'X_vars': X_train, 'output': y_train},
-                  batch_size=batch_size,
-                  callbacks=cbks, nb_epoch=2)
-
-        # fit generator with validation
-        model.fit_generator(data_generator_graph(True), 1000, nb_epoch=2,
-                            validation_data={'X_vars': X_test, 'output': y_test},
-                            callbacks=cbks)
-
-        # fit generator wo validation
-        model.fit_generator(data_generator_graph(True), 1000, nb_epoch=2,
-                            callbacks=cbks)
-
-        assert os.path.exists(filepath)
-        shutil.rmtree(filepath)
-
-    KTF.set_session(old_session)
+    assert os.path.exists(filepath)
+    shutil.rmtree(filepath)


 def test_LambdaCallback():
@@ -343,7 +294,7 @@ def test_LambdaCallback():
    assert not p.is_alive()


-@pytest.mark.skipif((K._BACKEND != 'tensorflow'),
+@pytest.mark.skipif((K.backend() != 'tensorflow'),
                    reason="Requires tensorflow backend")
 def test_TensorBoard_with_ReduceLROnPlateau():
    import shutil
@@ -46,14 +46,50 @@ def test_matthews_correlation():
    assert expected - epsilon <= actual <= expected + epsilon


+def test_precision():
+    y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0]))
+    y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]))
+
+    # Calculated using sklearn.metrics.precision_score
+    expected = 0.40000000000000002
+
+    actual = K.eval(metrics.precision(y_true, y_pred))
+    epsilon = 1e-05
+    assert expected - epsilon <= actual <= expected + epsilon
+
+
+def test_recall():
+    y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0]))
+    y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]))
+
+    # Calculated using sklearn.metrics.recall_score
+    expected = 0.2857142857142857
+
+    actual = K.eval(metrics.recall(y_true, y_pred))
+    epsilon = 1e-05
+    assert expected - epsilon <= actual <= expected + epsilon
+
+
 def test_fbeta_score():
    y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0]))
    y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]))

+    # Calculated using sklearn.metrics.fbeta_score
+    expected = 0.30303030303030304
+
+    actual = K.eval(metrics.fbeta_score(y_true, y_pred, beta=2))
+    epsilon = 1e-05
+    assert expected - epsilon <= actual <= expected + epsilon
+
+
+def test_fmeasure():
+    y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0]))
+    y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]))
+
    # Calculated using sklearn.metrics.f1_score
    expected = 0.33333333333333331

-    actual = K.eval(metrics.fbeta_score(y_true, y_pred))
+    actual = K.eval(metrics.fmeasure(y_true, y_pred))
    epsilon = 1e-05
    assert expected - epsilon <= actual <= expected + epsilon

@@ -0,0 +1,114 @@
+from __future__ import absolute_import
+from __future__ import print_function
+import pytest
+
+from keras.utils.test_utils import keras_test
+from keras.models import Model, Sequential
+from keras.layers import Dense, Input
+
+
+@keras_test
+def test_layer_trainability_switch():
+    # with constructor argument, in Sequential
+    model = Sequential()
+    model.add(Dense(2, trainable=False, input_dim=1))
+    assert model.trainable_weights == []
+
+    # by setting the `trainable` argument, in Sequential
+    model = Sequential()
+    layer = Dense(2, input_dim=1)
+    model.add(layer)
+    assert model.trainable_weights == layer.trainable_weights
+    layer.trainable = False
+    assert model.trainable_weights == []
+
+    # with constructor argument, in Model
+    x = Input(shape=(1,))
+    y = Dense(2, trainable=False)(x)
+    model = Model(x, y)
+    assert model.trainable_weights == []
+
+    # by setting the `trainable` argument, in Model
+    x = Input(shape=(1,))
+    layer = Dense(2)
+    y = layer(x)
+    model = Model(x, y)
+    assert model.trainable_weights == layer.trainable_weights
+    layer.trainable = False
+    assert model.trainable_weights == []
+
+
+@keras_test
+def test_model_trainability_switch():
+    # a non-trainable model has no trainable weights
+    x = Input(shape=(1,))
+    y = Dense(2)(x)
+    model = Model(x, y)
+    model.trainable = False
+    assert model.trainable_weights == []
+
+    # same for Sequential
+    model = Sequential()
+    model.add(Dense(2, input_dim=1))
+    model.trainable = False
+    assert model.trainable_weights == []
+
+
+@keras_test
+def test_nested_model_trainability():
+    # a Sequential inside a Model
+    inner_model = Sequential()
+    inner_model.add(Dense(2, input_dim=1))
+
+    x = Input(shape=(1,))
+    y = inner_model(x)
+    outer_model = Model(x, y)
+    assert outer_model.trainable_weights == inner_model.trainable_weights
+    inner_model.trainable = False
+    assert outer_model.trainable_weights == []
+    inner_model.trainable = True
+    inner_model.layers[-1].trainable = False
+    assert outer_model.trainable_weights == []
+
+    # a Sequential inside a Sequential
+    inner_model = Sequential()
+    inner_model.add(Dense(2, input_dim=1))
+    outer_model = Sequential()
+    outer_model.add(inner_model)
+    assert outer_model.trainable_weights == inner_model.trainable_weights
+    inner_model.trainable = False
+    assert outer_model.trainable_weights == []
+    inner_model.trainable = True
+    inner_model.layers[-1].trainable = False
+    assert outer_model.trainable_weights == []
+
+    # a Model inside a Model
+    x = Input(shape=(1,))
+    y = Dense(2)(x)
+    inner_model = Model(x, y)
+    x = Input(shape=(1,))
+    y = inner_model(x)
+    outer_model = Model(x, y)
+    assert outer_model.trainable_weights == inner_model.trainable_weights
+    inner_model.trainable = False
+    assert outer_model.trainable_weights == []
+    inner_model.trainable = True
+    inner_model.layers[-1].trainable = False
+    assert outer_model.trainable_weights == []
+
+    # a Model inside a Sequential
+    x = Input(shape=(1,))
+    y = Dense(2)(x)
+    inner_model = Model(x, y)
+    outer_model = Sequential()
+    outer_model.add(inner_model)
+    assert outer_model.trainable_weights == inner_model.trainable_weights
+    inner_model.trainable = False
+    assert outer_model.trainable_weights == []
+    inner_model.trainable = True
+    inner_model.layers[-1].trainable = False
+    assert outer_model.trainable_weights == []
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])
Autor	SHA1	Mensagem	Data
Francois Chollet	2ddd2bd557	Prepare new PyPI release	2016-11-25 20:52:27 -08:00
Ken Chatfield	b2aebb30bf	Don't add another header line to CSV logger when appending to an existing file (#4426 )	2016-11-25 14:49:50 -08:00
Fariz Rahman	0a9c0ca461	Sequential : Fix trainable arg (#4509 )	2016-11-25 11:59:05 -08:00
fchollet	c0b32a9a04	Remove reference to legacy Graph model in tests.	2016-11-25 01:20:10 -08:00
fchollet	703d5a1298	Add dynamic trainability lightweight test	2016-11-24 23:59:51 -08:00
fchollet	c5cc96a4f4	Saner way to collect trainable weights	2016-11-24 23:59:33 -08:00
fchollet	de256cb5d5	Make sure ImageNet predictions are sorted	2016-11-24 23:30:00 -08:00
fchollet	ce814302ac	Remove support for legacy Graph model	2016-11-24 23:29:45 -08:00
Fariz Rahman	628bc6e03e	ACGAN: Remove unnecessary dimension in label input (#4501 )	2016-11-24 20:21:56 -08:00
Thomas Pinetz	dfb606bb19	Fix border_mode = same for pooling layers documentation. (#4341 )	2016-11-24 12:42:59 -08:00
Dontloo	88f3b3f75e	fixed variational autoencoder visualization for Gaussian latent space (#4423 )	2016-11-23 14:08:19 -08:00
Marzuk Kamal	773d4ce8cb	def fbeta_score(y_true, y_pred, beta=1) (#4492 ) set the default value of beta=1	2016-11-23 13:25:29 -08:00
Fariz Rahman	509d6d8235	Merge : Serialize output mask; Enable user arguments for callable mode (#4445 ) * Update topology.py * Update topology.py * Update topology.py * white space fix * indentation fix * add tests * fix all tests * add arguments arg to merge * space after period * add test with arguments * add test with arguments for lambda layer too * pep8 fixes * fix tf test * try fixing tf test; again * bug fix * finally	2016-11-23 13:24:54 -08:00
Taras Boiko	7bd5c862a2	Correctly check the output dimension for None instead of target (#4458 )	2016-11-23 13:21:48 -08:00
Angelos Katharopoulos	2878f60634	Add map, foldl, foldr to the backend (#4461 )	2016-11-23 13:21:13 -08:00
Luke de Oliveira	50fdb87888	adding mnist acgan example (#4475 )	2016-11-23 13:19:29 -08:00
Gijs van Tulder	dad7790ec3	Model summary: separate columns with a space. (#4469 )	2016-11-23 11:06:30 -08:00
Marzuk Kamal	709bc5e15a	tf.global_variables and tf.variables_initializer (#4490 ) tf.all_variables and tf.initialize_variables are replaced by tf.global_variables and tf.variables_initializer for the future version of tensorflow	2016-11-23 11:06:10 -08:00
Ken Chatfield	06cc6d7fea	Add initial epoch argument to fit functions (#4429 ) * Added initial_epoch argument to fit functions in trainer * Added unit test * PEP8 fixes	2016-11-19 21:51:57 -08:00
EdwardRaff	97484ec9c1	Finishing Colincsl's SpatialDropout1D (#4416 ) * Added SpatialDropout1D This is a straightforward modification of SpatialDropout2D but for 1D data. * Added SpatialDropout1D to docs * SpatialDropout1D test * Fixed indent issue * Combined TF and TH dimension conditions Use the same 1D dimensions for TensorFlow and Theano in SpatialDropout1D. * trailing whitespace * Removed dim_ordering variable * Removing dim_ordering values removing dim_ordering values as requested	2016-11-19 12:30:05 -08:00
Taras Boiko	6b04add932	Check all output dimensions for compatibility (#4420 )	2016-11-19 10:10:08 -08:00
Yu Kobayashi	04ea01f385	Bug fix of Bidirectional(LSTM(..., stateful=True)) (#4424 ) * Bug fix of Bidirectional(LSTM(..., stateful=True)) https://github.com/fchollet/keras/issues/4421 * Add Recurrent.from_config() test	2016-11-18 12:19:42 -08:00
Yu Kobayashi	8653060ae6	Update Travis TensorFlow to 0.11.0 (#4367 )	2016-11-17 09:55:39 -08:00
Francois Chollet	8df3effa5f	Merge branch 'shareable_bn'	2016-11-16 19:07:06 -08:00
Francois Chollet	771010f43b	Add shareable BN (per-datastream updates).	2016-11-16 19:06:46 -08:00
Carl Thomé	8d20bac7fa	Remove extraneous batch_input_shape (#4393 )	2016-11-16 18:59:03 -08:00
Francois Chollet	c4c4fac1ae	Make BN shareable (not yet working)	2016-11-15 05:16:40 -08:00
Francois Chollet	016d85c9e6	Minor style fixes	2016-11-14 15:09:58 -08:00
Francois Chollet	3ab29205fc	Merge branch 'master' of https://github.com/fchollet/keras	2016-11-14 15:08:04 -08:00
Francois Chollet	fdd150eb4d	Minor style fixes	2016-11-14 15:07:51 -08:00
Anton Chernyavski	789a2be8d9	Fix get_layer() by index (#4376 )	2016-11-14 09:47:27 -08:00
Francois Chollet	ae7ef37c1b	Merge branch 'master' of https://github.com/fchollet/keras	2016-11-09 20:57:43 -08:00
Francois Chollet	94fba3d8f0	Fix Theano tests	2016-11-09 20:57:30 -08:00
Yu Kobayashi	6ac9af0a5a	Fix the load_model() bug by sorting weights by names (#4338 )	2016-11-09 20:36:45 -08:00
Francois Chollet	e916f748db	Fix Theano tests	2016-11-09 20:33:42 -08:00
Francois Chollet	92e8a20761	Remove unused set_input method	2016-11-09 18:34:09 -08:00
Francois Chollet	cb3de665d1	Simplify tests	2016-11-09 18:01:19 -08:00
Francois Chollet	49a5cdf76d	Improve error message	2016-11-09 18:01:06 -08:00
Francois Chollet	08a090de43	Merge branch 'master' of https://github.com/fchollet/keras	2016-11-09 17:33:49 -08:00
Francois Chollet	fa3b17cd96	Minor code cleanup	2016-11-09 17:33:31 -08:00
Ken Chatfield	5266fdacf1	Bugfix to CIFAR pickle reading code in Python 3 (#4319 )	2016-11-09 17:14:36 -08:00
nagachika	b74c5953f0	Print EarlyStopping verbose message on_train_end. (#4332 ) The message print on_epoch_end would be overwritten by ProgbarLogger.	2016-11-09 16:35:22 -08:00
Yu Kobayashi	00e8d20eae	Theano tile() expects Python int, so casting from numpy.int32 to Python int. (#4330 )	2016-11-09 16:23:22 -08:00
Gijs van Tulder	e8e63e307e	Theano: try not to use the old pool_* interface. (#4321 )	2016-11-09 16:22:37 -08:00
Uwe Schmidt	7db6de848a	Fix for issue #3965 (#4333 ) * Fixes issue with resize_images and partially-definded tensors Disclaimer: I haven't tested this with `dim_ordering == 'th'` * PEP8 syntax	2016-11-09 16:21:37 -08:00
Matt Gardner	8360ef3a5a	Add documentation to set self.built = True in MyLayer.build() (#4315 ) * Added documentation to set self.built = True in MyLayer.build() * Update writing-your-own-keras-layers.md	2016-11-07 18:19:27 -08:00
Francois Chollet	d32b8fa4bd	Further code cleanup	2016-11-07 17:27:41 -08:00
Francois Chollet	c95c32e473	Improve docstrings	2016-11-07 15:36:57 -08:00
Francois Chollet	02fe371839	Merge branch 'master' of https://github.com/fchollet/keras	2016-11-07 12:46:54 -08:00
Francois Chollet	b7b7c2ea94	Normalize default argument values	2016-11-07 12:46:41 -08:00
Francois Chollet	105dd031dd	Documentation improvements	2016-11-07 12:46:18 -08:00
Joshua Loyal	4fa289166a	allow for learning rate dtypes returned by numpy (#4304 )	2016-11-07 10:33:11 -08:00
Carl Thomé	a8bbcf611f	ConvLSTM2D docstring spelling (#4306 ) * Spelling * "convolutionnal" spelling	2016-11-06 12:05:20 -08:00
Francois Chollet	d5030b1f8c	Add conv_lstm to examples/README	2016-11-05 15:30:33 -07:00
Francois Chollet	f127b2f81d	Merge branch 'imodpasteur-rebasedconvV1'	2016-11-05 13:46:02 -07:00
Francois Chollet	9d4087a1e9	Style fixes	2016-11-05 13:45:50 -07:00
Francois Chollet	fd326ddf1b	Merge branch 'rebasedconvV1' of https://github.com/imodpasteur/keras into imodpasteur-rebasedconvV1	2016-11-05 13:32:03 -07:00
Francois Chollet	7f42253f46	Add basic support for TF optimizers, part deux	2016-11-05 13:26:03 -07:00
Francois Chollet	18d7e5e6e4	Style fixes	2016-11-05 13:22:18 -07:00
Francois Chollet	6610880fd4	Merge branch 'master' of https://github.com/fchollet/keras	2016-11-05 13:21:38 -07:00
Arbona	11b73ae6b4	Tf dynamic	2016-11-04 21:20:30 +01:00
Carl Thomé	2b51317be8	Refactor F-score into precision and recall metrics (#4276 ) * Refactor f-score into precision and recall metrics * Docstring consistency * Add docstring for fmeasure * Added precision, recall, f-measure tests	2016-11-03 20:28:04 -07:00
Francois Chollet	650c2c8cf9	Add basic support for TF optimizers	2016-11-03 11:38:00 -07:00
Igor Macedo Quintanilha	49386e8da4	Bug fix when target is a SparseTensor. (#4200 ) * Bug fix when target is a SparseTensor. Check for sparsity when creating target placeholder. Remove shape argument when creating sparse placeholder. * Fixed ndim behavior for sparse tensor * Fix sparse variable instantiation. * Bug fix	2016-11-03 10:04:40 -07:00
Thang Bui	71494ffdbc	changed VAE sampling variance to 1 (#4211 ) * Update variational_autoencoder.py fixed sampling bug * Update variational_autoencoder_deconv.py fixed variance bug	2016-11-02 15:58:32 -07:00
Francois Chollet	a9b6bef062	Improve dynamic TF RNN implementation.	2016-11-02 11:51:29 -07:00
Francois Chollet	4840e435f7	Improve RNN error messages	2016-11-02 10:47:46 -07:00
Arbona	531147c877	Fix review	2016-11-02 12:08:31 +01:00
Francois Chollet	61c21ef9ee	Imagenet predictions sorting fix	2016-11-01 17:39:39 -07:00
Francois Chollet	058e54061b	Style fixes	2016-11-01 17:39:23 -07:00
Francois Chollet	32be731194	Some backend refactoring	2016-11-01 16:52:25 -07:00
Francois Chollet	9bf55395f1	Simplify 1D pooling implementation	2016-11-01 16:51:54 -07:00
Francois Chollet	114b82a212	Minor TF backend improvements	2016-11-01 15:26:01 -07:00
manelbaradad	7d143370d8	BUG: Deconvolution2D output shape not correctly referenced (#4251 )	2016-11-01 11:24:54 -07:00
Gijs van Tulder	bc6880fa34	Enable full convolution with the Theano backend. (#4250 )	2016-11-01 11:03:50 -07:00
Arbona	40fd415409	Changed name example	2016-10-27 10:46:54 +02:00
Arbona	8b11f13507	Changed name	2016-10-25 17:45:28 +02:00
Arbona	2c96373a41	remove another useless check	2016-10-13 21:30:01 +02:00
Arbona	731e1bb206	remove a useless check	2016-10-13 21:28:51 +02:00
Arbona	c1a72b3644	More test and fixed dropout	2016-10-13 20:58:01 +02:00
Arbona	0e7f3e04b0	pep fixed	2016-10-12 22:11:22 +02:00
Arbona	53552b1d6e	Various fix	2016-10-12 22:00:55 +02:00
Arbona	6b7421c448	Various fix	2016-10-09 10:46:04 +02:00
Arbona	1d0d79f61a	Various fix	2016-10-03 11:43:24 +02:00
Arbona	b5dddeb419	Removed notebook and added example in python	2016-10-03 10:45:53 +02:00
JM Arbona	a3697d097d	Added recurrent convolutionnal layer	2016-09-29 10:18:24 +02:00