Comparar commits
86 Commits
| Autor | SHA1 | Data | |
|---|---|---|---|
| 2ddd2bd557 | |||
| b2aebb30bf | |||
| 0a9c0ca461 | |||
| c0b32a9a04 | |||
| 703d5a1298 | |||
| c5cc96a4f4 | |||
| de256cb5d5 | |||
| ce814302ac | |||
| 628bc6e03e | |||
| dfb606bb19 | |||
| 88f3b3f75e | |||
| 773d4ce8cb | |||
| 509d6d8235 | |||
| 7bd5c862a2 | |||
| 2878f60634 | |||
| 50fdb87888 | |||
| dad7790ec3 | |||
| 709bc5e15a | |||
| 06cc6d7fea | |||
| 97484ec9c1 | |||
| 6b04add932 | |||
| 04ea01f385 | |||
| 8653060ae6 | |||
| 8df3effa5f | |||
| 771010f43b | |||
| 8d20bac7fa | |||
| c4c4fac1ae | |||
| 016d85c9e6 | |||
| 3ab29205fc | |||
| fdd150eb4d | |||
| 789a2be8d9 | |||
| ae7ef37c1b | |||
| 94fba3d8f0 | |||
| 6ac9af0a5a | |||
| e916f748db | |||
| 92e8a20761 | |||
| cb3de665d1 | |||
| 49a5cdf76d | |||
| 08a090de43 | |||
| fa3b17cd96 | |||
| 5266fdacf1 | |||
| b74c5953f0 | |||
| 00e8d20eae | |||
| e8e63e307e | |||
| 7db6de848a | |||
| 8360ef3a5a | |||
| d32b8fa4bd | |||
| c95c32e473 | |||
| 02fe371839 | |||
| b7b7c2ea94 | |||
| 105dd031dd | |||
| 4fa289166a | |||
| a8bbcf611f | |||
| d5030b1f8c | |||
| f127b2f81d | |||
| 9d4087a1e9 | |||
| fd326ddf1b | |||
| 7f42253f46 | |||
| 18d7e5e6e4 | |||
| 6610880fd4 | |||
| 11b73ae6b4 | |||
| 2b51317be8 | |||
| 650c2c8cf9 | |||
| 49386e8da4 | |||
| 71494ffdbc | |||
| a9b6bef062 | |||
| 4840e435f7 | |||
| 531147c877 | |||
| 61c21ef9ee | |||
| 058e54061b | |||
| 32be731194 | |||
| 9bf55395f1 | |||
| 114b82a212 | |||
| 7d143370d8 | |||
| bc6880fa34 | |||
| 40fd415409 | |||
| 8b11f13507 | |||
| 2c96373a41 | |||
| 731e1bb206 | |||
| c1a72b3644 | |||
| 0e7f3e04b0 | |||
| 53552b1d6e | |||
| 6b7421c448 | |||
| 1d0d79f61a | |||
| b5dddeb419 | |||
| a3697d097d |
+2
-2
@@ -49,9 +49,9 @@ install:
|
||||
|
||||
# install TensorFlow
|
||||
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl;
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl;
|
||||
elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl;
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl;
|
||||
fi
|
||||
# command to run tests
|
||||
script:
|
||||
|
||||
@@ -139,6 +139,7 @@ PAGES = [
|
||||
core.Dense,
|
||||
core.Activation,
|
||||
core.Dropout,
|
||||
core.SpatialDropout1D,
|
||||
core.SpatialDropout2D,
|
||||
core.SpatialDropout3D,
|
||||
core.Flatten,
|
||||
|
||||
@@ -4,7 +4,7 @@ For simple, stateless custom operations, you are probably better off using `laye
|
||||
|
||||
Here is the skeleton of a Keras layer. There are only three methods you need to implement:
|
||||
|
||||
- `build(input_shape)`: this is where you will define your weights. Trainable weights should be added to the list `self.trainable_weights`. Other attributes of note are: `self.non_trainable_weights` (list) and `self.updates` (list of update tuples (tensor, new_tensor)). For an example of how to use `non_trainable_weights` and `updates`, see the code for the `BatchNormalization` layer.
|
||||
- `build(input_shape)`: this is where you will define your weights. Trainable weights should be added to the list `self.trainable_weights`. Other attributes of note are: `self.non_trainable_weights` (list) and `self.updates` (list of update tuples (tensor, new_tensor)). For an example of how to use `non_trainable_weights` and `updates`, see the code for the `BatchNormalization` layer. This method must set `self.built = True`, which can be done by calling `super([Layer], self).build()`.
|
||||
- `call(x)`: this is where the layer's logic lives. Unless you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor.
|
||||
- `get_output_shape_for(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference.
|
||||
|
||||
@@ -23,6 +23,7 @@ class MyLayer(Layer):
|
||||
initial_weight_value = np.random.random((input_dim, output_dim))
|
||||
self.W = K.variable(initial_weight_value)
|
||||
self.trainable_weights = [self.W]
|
||||
super(MyLayer, self).build() # be sure you call this somewhere!
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.dot(x, self.W)
|
||||
@@ -31,4 +32,4 @@ class MyLayer(Layer):
|
||||
return (input_shape[0], self.output_dim)
|
||||
```
|
||||
|
||||
The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code!
|
||||
The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code!
|
||||
|
||||
@@ -18,6 +18,9 @@ Trains a simple deep CNN on the CIFAR10 small images dataset.
|
||||
[conv_filter_visualization.py](conv_filter_visualization.py)
|
||||
Visualization of the filters of VGG16, via gradient ascent in input space.
|
||||
|
||||
[conv_lstm.py](conv_lstm.py)
|
||||
Demonstrates the use of a convolutional LSTM network.
|
||||
|
||||
[deep_dream.py](deep_dream.py)
|
||||
Deep Dreams in Keras.
|
||||
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
""" This script demonstrates the use of a convolutional LSTM network.
|
||||
This network is used to predict the next frame of an artificially
|
||||
generated movie which contains moving squares.
|
||||
"""
|
||||
from keras.models import Sequential
|
||||
from keras.layers.convolutional import Convolution3D
|
||||
from keras.layers.convolutional_recurrent import ConvLSTM2D
|
||||
from keras.layers.normalization import BatchNormalization
|
||||
import numpy as np
|
||||
import pylab as plt
|
||||
|
||||
# We create a layer which take as input movies of shape
|
||||
# (n_frames, width, height, channels) and returns a movie
|
||||
# of identical shape.
|
||||
|
||||
seq = Sequential()
|
||||
seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
|
||||
input_shape=(None, 40, 40, 1),
|
||||
border_mode='same', return_sequences=True))
|
||||
seq.add(BatchNormalization())
|
||||
|
||||
seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
|
||||
border_mode='same', return_sequences=True))
|
||||
seq.add(BatchNormalization())
|
||||
|
||||
seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
|
||||
border_mode='same', return_sequences=True))
|
||||
seq.add(BatchNormalization())
|
||||
|
||||
seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
|
||||
border_mode='same', return_sequences=True))
|
||||
seq.add(BatchNormalization())
|
||||
|
||||
seq.add(Convolution3D(nb_filter=1, kernel_dim1=1, kernel_dim2=3,
|
||||
kernel_dim3=3, activation='sigmoid',
|
||||
border_mode='same', dim_ordering='tf'))
|
||||
|
||||
seq.compile(loss='binary_crossentropy', optimizer='adadelta')
|
||||
|
||||
|
||||
# Artificial data generation:
|
||||
# Generate movies with 3 to 7 moving squares inside.
|
||||
# The squares are of shape 1x1 or 2x2 pixels,
|
||||
# which move linearly over time.
|
||||
# For convenience we first create movies with bigger width and height (80x80)
|
||||
# and at the end we select a 40x40 window.
|
||||
|
||||
def generate_movies(n_samples=1200, n_frames=15):
|
||||
row = 80
|
||||
col = 80
|
||||
noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float)
|
||||
shifted_movies = np.zeros((n_samples, n_frames, row, col, 1),
|
||||
dtype=np.float)
|
||||
|
||||
for i in range(n_samples):
|
||||
# Add 3 to 7 moving squares
|
||||
n = np.random.randint(3, 8)
|
||||
|
||||
for j in range(n):
|
||||
# Initial position
|
||||
xstart = np.random.randint(20, 60)
|
||||
ystart = np.random.randint(20, 60)
|
||||
# Direction of motion
|
||||
directionx = np.random.randint(0, 3) - 1
|
||||
directiony = np.random.randint(0, 3) - 1
|
||||
|
||||
# Size of the square
|
||||
w = np.random.randint(2, 4)
|
||||
|
||||
for t in range(n_frames):
|
||||
x_shift = xstart + directionx * t
|
||||
y_shift = ystart + directiony * t
|
||||
noisy_movies[i, t, x_shift - w: x_shift + w,
|
||||
y_shift - w: y_shift + w, 0] += 1
|
||||
|
||||
# Make it more robust by adding noise.
|
||||
# The idea is that if during inference,
|
||||
# the value of the pixel is not exactly one,
|
||||
# we need to train the network to be robust and still
|
||||
# consider it as a pixel belonging to a square.
|
||||
if np.random.randint(0, 2):
|
||||
noise_f = (-1)**np.random.randint(0, 2)
|
||||
noisy_movies[i, t,
|
||||
x_shift - w - 1: x_shift + w + 1,
|
||||
y_shift - w - 1: y_shift + w + 1,
|
||||
0] += noise_f * 0.1
|
||||
|
||||
# Shift the ground truth by 1
|
||||
x_shift = xstart + directionx * (t + 1)
|
||||
y_shift = ystart + directiony * (t + 1)
|
||||
shifted_movies[i, t, x_shift - w: x_shift + w,
|
||||
y_shift - w: y_shift + w, 0] += 1
|
||||
|
||||
# Cut to a 40x40 window
|
||||
noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::]
|
||||
shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::]
|
||||
noisy_movies[noisy_movies >= 1] = 1
|
||||
shifted_movies[shifted_movies >= 1] = 1
|
||||
return noisy_movies, shifted_movies
|
||||
|
||||
# Train the network
|
||||
noisy_movies, shifted_movies = generate_movies(n_samples=1200)
|
||||
seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10,
|
||||
nb_epoch=300, validation_split=0.05)
|
||||
|
||||
# Testing the network on one movie
|
||||
# feed it with the first 7 positions and then
|
||||
# predict the new positions
|
||||
which = 1004
|
||||
track = noisy_movies[which][:7, ::, ::, ::]
|
||||
|
||||
for j in range(16):
|
||||
new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::])
|
||||
new = new_pos[::, -1, ::, ::, ::]
|
||||
track = np.concatenate((track, new), axis=0)
|
||||
|
||||
|
||||
# And then compare the predictions
|
||||
# to the ground truth
|
||||
track2 = noisy_movies[which][::, ::, ::, ::]
|
||||
for i in range(15):
|
||||
fig = plt.figure(figsize=(10, 5))
|
||||
|
||||
ax = fig.add_subplot(121)
|
||||
|
||||
if i >= 7:
|
||||
ax.text(1, 3, 'Predictions !', fontsize=20, color='w')
|
||||
else:
|
||||
ax.text(1, 3, 'Inital trajectory', fontsize=20)
|
||||
|
||||
toplot = track[i, ::, ::, 0]
|
||||
|
||||
plt.imshow(toplot)
|
||||
ax = fig.add_subplot(122)
|
||||
plt.text(1, 3, 'Ground truth', fontsize=20)
|
||||
|
||||
toplot = track2[i, ::, ::, 0]
|
||||
if i >= 2:
|
||||
toplot = shifted_movies[which][i - 1, ::, ::, 0]
|
||||
|
||||
plt.imshow(toplot)
|
||||
plt.savefig('%i_animate.png' % (i + 1))
|
||||
@@ -0,0 +1,314 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Train an Auxiliary Classifier Generative Adversarial Network (ACGAN) on the
|
||||
MNIST dataset. See https://arxiv.org/abs/1610.09585 for more details.
|
||||
|
||||
You should start to see reasonable images after ~5 epochs, and good images
|
||||
by ~15 epochs. You should use a GPU, as the convolution-heavy operations are
|
||||
very slow on the CPU. Prefer the TensorFlow backend if you plan on iterating, as
|
||||
the compilation time can be a blocker using Theano.
|
||||
|
||||
Timings:
|
||||
|
||||
Hardware | Backend | Time / Epoch
|
||||
-------------------------------------------
|
||||
CPU | TF | 3 hrs
|
||||
Titan X (maxwell) | TF | 4 min
|
||||
Titan X (maxwell) | TH | 7 min
|
||||
|
||||
Consult https://github.com/lukedeo/keras-acgan for more information and
|
||||
example output
|
||||
"""
|
||||
from __future__ import print_function
|
||||
|
||||
from collections import defaultdict
|
||||
import cPickle as pickle
|
||||
from PIL import Image
|
||||
|
||||
from six.moves import range
|
||||
|
||||
import keras.backend as K
|
||||
from keras.datasets import mnist
|
||||
from keras.layers import Input, Dense, Reshape, Flatten, Embedding, merge, Dropout
|
||||
from keras.layers.advanced_activations import LeakyReLU
|
||||
from keras.layers.convolutional import UpSampling2D, Convolution2D
|
||||
from keras.models import Sequential, Model
|
||||
from keras.optimizers import Adam
|
||||
from keras.utils.generic_utils import Progbar
|
||||
import numpy as np
|
||||
|
||||
np.random.seed(1337)
|
||||
|
||||
K.set_image_dim_ordering('th')
|
||||
|
||||
|
||||
def build_generator(latent_size):
|
||||
# we will map a pair of (z, L), where z is a latent vector and L is a
|
||||
# label drawn from P_c, to image space (..., 1, 28, 28)
|
||||
cnn = Sequential()
|
||||
|
||||
cnn.add(Dense(1024, input_dim=latent_size, activation='relu'))
|
||||
cnn.add(Dense(128 * 7 * 7, activation='relu'))
|
||||
cnn.add(Reshape((128, 7, 7)))
|
||||
|
||||
# upsample to (..., 14, 14)
|
||||
cnn.add(UpSampling2D(size=(2, 2)))
|
||||
cnn.add(Convolution2D(256, 5, 5, border_mode='same',
|
||||
activation='relu', init='glorot_normal'))
|
||||
|
||||
# upsample to (..., 28, 28)
|
||||
cnn.add(UpSampling2D(size=(2, 2)))
|
||||
cnn.add(Convolution2D(128, 5, 5, border_mode='same',
|
||||
activation='relu', init='glorot_normal'))
|
||||
|
||||
# take a channel axis reduction
|
||||
cnn.add(Convolution2D(1, 2, 2, border_mode='same',
|
||||
activation='tanh', init='glorot_normal'))
|
||||
|
||||
# this is the z space commonly refered to in GAN papers
|
||||
latent = Input(shape=(latent_size, ))
|
||||
|
||||
# this will be our label
|
||||
image_class = Input(shape=(1,), dtype='int32')
|
||||
|
||||
# 10 classes in MNIST
|
||||
cls = Flatten()(Embedding(10, latent_size,
|
||||
init='glorot_normal')(image_class))
|
||||
|
||||
# hadamard product between z-space and a class conditional embedding
|
||||
h = merge([latent, cls], mode='mul')
|
||||
|
||||
fake_image = cnn(h)
|
||||
|
||||
return Model(input=[latent, image_class], output=fake_image)
|
||||
|
||||
|
||||
def build_discriminator():
|
||||
# build a relatively standard conv net, with LeakyReLUs as suggested in
|
||||
# the reference paper
|
||||
cnn = Sequential()
|
||||
|
||||
cnn.add(Convolution2D(32, 3, 3, border_mode='same', subsample=(2, 2),
|
||||
input_shape=(1, 28, 28)))
|
||||
cnn.add(LeakyReLU())
|
||||
cnn.add(Dropout(0.3))
|
||||
|
||||
cnn.add(Convolution2D(64, 3, 3, border_mode='same', subsample=(1, 1)))
|
||||
cnn.add(LeakyReLU())
|
||||
cnn.add(Dropout(0.3))
|
||||
|
||||
cnn.add(Convolution2D(128, 3, 3, border_mode='same', subsample=(2, 2)))
|
||||
cnn.add(LeakyReLU())
|
||||
cnn.add(Dropout(0.3))
|
||||
|
||||
cnn.add(Convolution2D(256, 3, 3, border_mode='same', subsample=(1, 1)))
|
||||
cnn.add(LeakyReLU())
|
||||
cnn.add(Dropout(0.3))
|
||||
|
||||
cnn.add(Flatten())
|
||||
|
||||
image = Input(shape=(1, 28, 28))
|
||||
|
||||
features = cnn(image)
|
||||
|
||||
# first output (name=generation) is whether or not the discriminator
|
||||
# thinks the image that is being shown is fake, and the second output
|
||||
# (name=auxiliary) is the class that the discriminator thinks the image
|
||||
# belongs to.
|
||||
fake = Dense(1, activation='sigmoid', name='generation')(features)
|
||||
aux = Dense(10, activation='softmax', name='auxiliary')(features)
|
||||
|
||||
return Model(input=image, output=[fake, aux])
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# batch and latent size taken from the paper
|
||||
nb_epochs = 50
|
||||
batch_size = 100
|
||||
latent_size = 100
|
||||
|
||||
# Adam parameters suggested in https://arxiv.org/abs/1511.06434
|
||||
adam_lr = 0.0002
|
||||
adam_beta_1 = 0.5
|
||||
|
||||
# build the discriminator
|
||||
discriminator = build_discriminator()
|
||||
discriminator.compile(
|
||||
optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
|
||||
loss=['binary_crossentropy', 'sparse_categorical_crossentropy']
|
||||
)
|
||||
|
||||
# build the generator
|
||||
generator = build_generator(latent_size)
|
||||
generator.compile(optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
|
||||
loss='binary_crossentropy')
|
||||
|
||||
latent = Input(shape=(latent_size, ))
|
||||
image_class = Input(shape=(1,), dtype='int32')
|
||||
|
||||
# get a fake image
|
||||
fake = generator([latent, image_class])
|
||||
|
||||
# we only want to be able to train generation for the combined model
|
||||
discriminator.trainable = False
|
||||
fake, aux = discriminator(fake)
|
||||
combined = Model(input=[latent, image_class], output=[fake, aux])
|
||||
|
||||
combined.compile(
|
||||
optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
|
||||
loss=['binary_crossentropy', 'sparse_categorical_crossentropy']
|
||||
)
|
||||
|
||||
discriminator.trainable = True
|
||||
|
||||
# get our mnist data, and force it to be of shape (..., 1, 28, 28) with
|
||||
# range [-1, 1]
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
|
||||
X_train = np.expand_dims(X_train, axis=1)
|
||||
|
||||
X_test = (X_test.astype(np.float32) - 127.5) / 127.5
|
||||
X_test = np.expand_dims(X_test, axis=1)
|
||||
|
||||
nb_train, nb_test = X_train.shape[0], X_test.shape[0]
|
||||
|
||||
train_history = defaultdict(list)
|
||||
test_history = defaultdict(list)
|
||||
|
||||
for epoch in range(nb_epochs):
|
||||
print('Epoch {} of {}'.format(epoch + 1, nb_epochs))
|
||||
|
||||
nb_batches = int(X_train.shape[0] / batch_size)
|
||||
progress_bar = Progbar(target=nb_batches)
|
||||
|
||||
epoch_gen_loss = []
|
||||
epoch_disc_loss = []
|
||||
|
||||
for index in range(nb_batches):
|
||||
progress_bar.update(index)
|
||||
# generate a new batch of noise
|
||||
noise = np.random.uniform(-1, 1, (batch_size, latent_size))
|
||||
|
||||
# get a batch of real images
|
||||
image_batch = X_train[index * batch_size:(index + 1) * batch_size]
|
||||
label_batch = y_train[index * batch_size:(index + 1) * batch_size]
|
||||
|
||||
# sample some labels from p_c
|
||||
sampled_labels = np.random.randint(0, 10, batch_size)
|
||||
|
||||
# generate a batch of fake images, using the generated labels as a
|
||||
# conditioner. We reshape the sampled labels to be
|
||||
# (batch_size, 1) so that we can feed them into the embedding
|
||||
# layer as a length one sequence
|
||||
generated_images = generator.predict(
|
||||
[noise, sampled_labels.reshape((-1, 1))], verbose=0)
|
||||
|
||||
X = np.concatenate((image_batch, generated_images))
|
||||
y = np.array([1] * batch_size + [0] * batch_size)
|
||||
aux_y = np.concatenate((label_batch, sampled_labels), axis=0)
|
||||
|
||||
# see if the discriminator can figure itself out...
|
||||
epoch_disc_loss.append(discriminator.train_on_batch(X, [y, aux_y]))
|
||||
|
||||
# make new noise. we generate 2 * batch size here such that we have
|
||||
# the generator optimize over an identical number of images as the
|
||||
# discriminator
|
||||
noise = np.random.uniform(-1, 1, (2 * batch_size, latent_size))
|
||||
sampled_labels = np.random.randint(0, 10, 2 * batch_size)
|
||||
|
||||
# we want to fix the discriminator and let the generator train to
|
||||
# trick it
|
||||
discriminator.trainable = False
|
||||
|
||||
# For the generator, we want all the {fake, not-fake} labels to say
|
||||
# not-fake
|
||||
trick = np.ones(2 * batch_size)
|
||||
|
||||
epoch_gen_loss.append(combined.train_on_batch(
|
||||
[noise, sampled_labels.reshape((-1, 1))], [trick, sampled_labels]))
|
||||
|
||||
discriminator.trainable = True
|
||||
|
||||
print('\nTesting for epoch {}:'.format(epoch + 1))
|
||||
|
||||
# evaluate the testing loss here
|
||||
|
||||
# generate a new batch of noise
|
||||
noise = np.random.uniform(-1, 1, (nb_test, latent_size))
|
||||
|
||||
# sample some labels from p_c and generate images from them
|
||||
sampled_labels = np.random.randint(0, 10, nb_test)
|
||||
generated_images = generator.predict(
|
||||
[noise, sampled_labels.reshape((-1, 1))], verbose=False)
|
||||
|
||||
X = np.concatenate((X_test, generated_images))
|
||||
y = np.array([1] * nb_test + [0] * nb_test)
|
||||
aux_y = np.concatenate((y_test, sampled_labels), axis=0)
|
||||
|
||||
# see if the discriminator can figure itself out...
|
||||
discriminator_test_loss = discriminator.evaluate(
|
||||
X, [y, aux_y], verbose=False)
|
||||
|
||||
discriminator_train_loss = np.mean(np.array(epoch_disc_loss), axis=0)
|
||||
|
||||
# make new noise
|
||||
noise = np.random.uniform(-1, 1, (2 * nb_test, latent_size))
|
||||
sampled_labels = np.random.randint(0, 10, 2 * nb_test)
|
||||
|
||||
trick = np.ones(2 * nb_test)
|
||||
|
||||
generator_test_loss = combined.evaluate(
|
||||
[noise, sampled_labels.reshape((-1, 1))],
|
||||
[trick, sampled_labels], verbose=False)
|
||||
|
||||
generator_train_loss = np.mean(np.array(epoch_gen_loss), axis=0)
|
||||
|
||||
# generate an epoch report on performance
|
||||
train_history['generator'].append(generator_train_loss)
|
||||
train_history['discriminator'].append(discriminator_train_loss)
|
||||
|
||||
test_history['generator'].append(generator_test_loss)
|
||||
test_history['discriminator'].append(discriminator_test_loss)
|
||||
|
||||
print('{0:<22s} | {1:4s} | {2:15s} | {3:5s}'.format(
|
||||
'component', *discriminator.metrics_names))
|
||||
print('-' * 65)
|
||||
|
||||
ROW_FMT = '{0:<22s} | {1:<4.2f} | {2:<15.2f} | {3:<5.2f}'
|
||||
print(ROW_FMT.format('generator (train)',
|
||||
*train_history['generator'][-1]))
|
||||
print(ROW_FMT.format('generator (test)',
|
||||
*test_history['generator'][-1]))
|
||||
print(ROW_FMT.format('discriminator (train)',
|
||||
*train_history['discriminator'][-1]))
|
||||
print(ROW_FMT.format('discriminator (test)',
|
||||
*test_history['discriminator'][-1]))
|
||||
|
||||
# save weights every epoch
|
||||
generator.save_weights(
|
||||
'params_generator_epoch_{0:03d}.hdf5'.format(epoch), True)
|
||||
discriminator.save_weights(
|
||||
'params_discriminator_epoch_{0:03d}.hdf5'.format(epoch), True)
|
||||
|
||||
# generate some digits to display
|
||||
noise = np.random.uniform(-1, 1, (100, latent_size))
|
||||
|
||||
sampled_labels = np.array([
|
||||
[i] * 10 for i in range(10)
|
||||
]).reshape(-1, 1)
|
||||
|
||||
# get a batch to display
|
||||
generated_images = generator.predict(
|
||||
[noise, sampled_labels], verbose=0)
|
||||
|
||||
# arrange them into a grid
|
||||
img = (np.concatenate([r.reshape(-1, 28)
|
||||
for r in np.split(generated_images, 10)
|
||||
], axis=-1) * 127.5 + 127.5).astype(np.uint8)
|
||||
|
||||
Image.fromarray(img).save(
|
||||
'plot_epoch_{0:03d}_generated.png'.format(epoch))
|
||||
|
||||
pickle.dump({'train': train_history, 'test': test_history},
|
||||
open('acgan-history.pkl', 'wb'))
|
||||
@@ -54,7 +54,6 @@ model.add(LSTM(50,
|
||||
return_sequences=True,
|
||||
stateful=True))
|
||||
model.add(LSTM(50,
|
||||
batch_input_shape=(batch_size, tsteps, 1),
|
||||
return_sequences=False,
|
||||
stateful=True))
|
||||
model.add(Dense(1))
|
||||
|
||||
@@ -4,6 +4,7 @@ Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
|
||||
'''
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy.stats import norm
|
||||
|
||||
from keras.layers import Input, Dense, Lambda
|
||||
from keras.models import Model
|
||||
@@ -16,7 +17,7 @@ original_dim = 784
|
||||
latent_dim = 2
|
||||
intermediate_dim = 256
|
||||
nb_epoch = 50
|
||||
epsilon_std = 0.01
|
||||
epsilon_std = 1.0
|
||||
|
||||
x = Input(batch_shape=(batch_size, original_dim))
|
||||
h = Dense(intermediate_dim, activation='relu')(x)
|
||||
@@ -82,9 +83,10 @@ generator = Model(decoder_input, _x_decoded_mean)
|
||||
n = 15 # figure with 15x15 digits
|
||||
digit_size = 28
|
||||
figure = np.zeros((digit_size * n, digit_size * n))
|
||||
# we will sample n points within [-15, 15] standard deviations
|
||||
grid_x = np.linspace(-15, 15, n)
|
||||
grid_y = np.linspace(-15, 15, n)
|
||||
# linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian
|
||||
# to produce values of the latent variables z, since the prior of the latent space is Gaussian
|
||||
grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
|
||||
grid_y = norm.ppf(np.linspace(0.05, 0.95, n))
|
||||
|
||||
for i, yi in enumerate(grid_x):
|
||||
for j, xi in enumerate(grid_y):
|
||||
@@ -95,5 +97,5 @@ for i, yi in enumerate(grid_x):
|
||||
j * digit_size: (j + 1) * digit_size] = digit
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
plt.imshow(figure)
|
||||
plt.imshow(figure, cmap='Greys_r')
|
||||
plt.show()
|
||||
|
||||
@@ -5,6 +5,7 @@ Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
|
||||
'''
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy.stats import norm
|
||||
|
||||
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
|
||||
from keras.layers import Convolution2D, Deconvolution2D
|
||||
@@ -27,7 +28,7 @@ else:
|
||||
original_img_size = (img_rows, img_cols, img_chns)
|
||||
latent_dim = 2
|
||||
intermediate_dim = 128
|
||||
epsilon_std = 0.01
|
||||
epsilon_std = 1.0
|
||||
nb_epoch = 5
|
||||
|
||||
x = Input(batch_shape=(batch_size,) + original_img_size)
|
||||
@@ -153,9 +154,10 @@ generator = Model(decoder_input, _x_decoded_mean_squash)
|
||||
n = 15 # figure with 15x15 digits
|
||||
digit_size = 28
|
||||
figure = np.zeros((digit_size * n, digit_size * n))
|
||||
# we will sample n points within [-15, 15] standard deviations
|
||||
grid_x = np.linspace(-15, 15, n)
|
||||
grid_y = np.linspace(-15, 15, n)
|
||||
# linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian
|
||||
# to produce values of the latent variables z, since the prior of the latent space is Gaussian
|
||||
grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
|
||||
grid_y = norm.ppf(np.linspace(0.05, 0.95, n))
|
||||
|
||||
for i, yi in enumerate(grid_x):
|
||||
for j, xi in enumerate(grid_y):
|
||||
@@ -167,5 +169,5 @@ for i, yi in enumerate(grid_x):
|
||||
j * digit_size: (j + 1) * digit_size] = digit
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
plt.imshow(figure)
|
||||
plt.imshow(figure, cmap='Greys_r')
|
||||
plt.show()
|
||||
|
||||
+1
-1
@@ -15,4 +15,4 @@ from . import objectives
|
||||
from . import optimizers
|
||||
from . import regularizers
|
||||
|
||||
__version__ = '1.1.1'
|
||||
__version__ = '1.1.2'
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import absolute_import
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
|
||||
def softmax(x):
|
||||
@@ -11,13 +12,15 @@ def softmax(x):
|
||||
s = K.sum(e, axis=-1, keepdims=True)
|
||||
return e / s
|
||||
else:
|
||||
raise Exception('Cannot apply softmax to a tensor that is not 2D or 3D. ' +
|
||||
'Here, ndim=' + str(ndim))
|
||||
raise ValueError('Cannot apply softmax to a tensor '
|
||||
'that is not 2D or 3D. '
|
||||
'Here, ndim=' + str(ndim))
|
||||
|
||||
|
||||
def elu(x, alpha=1.0):
|
||||
return K.elu(x, alpha)
|
||||
|
||||
|
||||
def softplus(x):
|
||||
return K.softplus(x)
|
||||
|
||||
@@ -43,13 +46,9 @@ def hard_sigmoid(x):
|
||||
|
||||
|
||||
def linear(x):
|
||||
'''
|
||||
The function returns the variable that is passed in, so all types work.
|
||||
'''
|
||||
return x
|
||||
|
||||
|
||||
from .utils.generic_utils import get_from_module
|
||||
def get(identifier):
|
||||
if identifier is None:
|
||||
return linear
|
||||
|
||||
@@ -44,7 +44,8 @@ def decode_predictions(preds, top=5):
|
||||
CLASS_INDEX = json.load(open(fpath))
|
||||
results = []
|
||||
for pred in preds:
|
||||
top_indices = np.argpartition(pred, -top)[-top:][::-1]
|
||||
top_indices = pred.argsort()[-top:][::-1]
|
||||
result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices]
|
||||
result.sort(key=lambda x: x[2], reverse=True)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorflow.python.training import moving_averages
|
||||
from tensorflow.python.ops import tensor_array_ops
|
||||
from tensorflow.python.ops import control_flow_ops
|
||||
try:
|
||||
from tensorflow.python.ops import ctc_ops as ctc
|
||||
except ImportError:
|
||||
@@ -10,27 +12,41 @@ import numpy as np
|
||||
import os
|
||||
import copy
|
||||
import warnings
|
||||
from .common import _FLOATX, _EPSILON, _IMAGE_DIM_ORDERING, reset_uids
|
||||
from .common import _FLOATX, _EPSILON, image_dim_ordering, reset_uids
|
||||
py_all = all
|
||||
|
||||
# INTERNAL UTILS
|
||||
|
||||
# This is the default internal TF session used by Keras.
|
||||
# It can be set manually via `set_session(sess)`.
|
||||
_SESSION = None
|
||||
_LEARNING_PHASE = tf.placeholder(dtype='uint8', name='keras_learning_phase') # 0 = test, 1 = train
|
||||
# This dictionary holds a mapping {graph: learning_phase}.
|
||||
# A learning phase is a bool tensor used to run Keras models in
|
||||
# either train mode (learning_phase == 1) or test mode (learning_phase == 0).
|
||||
_GRAPH_LEARNING_PHASES = {}
|
||||
# This boolean flag can be set to True to leave variable initialization
|
||||
# up to the user.
|
||||
# Change its value via `manual_variable_initialization(value)`.
|
||||
_MANUAL_VAR_INIT = False
|
||||
|
||||
|
||||
def clear_session():
|
||||
'''Destroys the current TF graph and creates a new one.
|
||||
|
||||
Useful to avoid clutter from old models / layers.
|
||||
'''
|
||||
global _SESSION
|
||||
global _LEARNING_PHASE
|
||||
global _GRAPH_LEARNING_PHASES
|
||||
tf.reset_default_graph()
|
||||
reset_uids()
|
||||
_SESSION = None
|
||||
_LEARNING_PHASE = tf.placeholder(dtype='uint8', name='keras_learning_phase')
|
||||
phase = tf.placeholder(dtype='bool', name='keras_learning_phase')
|
||||
_GRAPH_LEARNING_PHASES[tf.get_default_graph()] = phase
|
||||
|
||||
|
||||
def manual_variable_initialization(value):
|
||||
'''Whether variables should be initialized
|
||||
'''Returns a boolean:
|
||||
whether variables should be initialized
|
||||
as they are instantiated (default), or if
|
||||
the user should handle the initialization
|
||||
(e.g. via tf.initialize_all_variables()).
|
||||
@@ -42,19 +58,27 @@ def manual_variable_initialization(value):
|
||||
def learning_phase():
|
||||
'''Returns the learning phase flag.
|
||||
|
||||
The learning phase flag is an integer tensor (0 = test, 1 = train)
|
||||
The learning phase flag is a bool tensor (0 = test, 1 = train)
|
||||
to be passed as input to any Keras function
|
||||
that uses a different behavior at train time and test time.
|
||||
'''
|
||||
return _LEARNING_PHASE
|
||||
graph = tf.get_default_graph()
|
||||
if graph not in _GRAPH_LEARNING_PHASES:
|
||||
phase = tf.placeholder(dtype='bool',
|
||||
name='keras_learning_phase')
|
||||
_GRAPH_LEARNING_PHASES[graph] = phase
|
||||
return _GRAPH_LEARNING_PHASES[graph]
|
||||
|
||||
|
||||
def set_learning_phase(value):
|
||||
global _LEARNING_PHASE
|
||||
'''Sets the learning phase to a fixed value,
|
||||
either 0 or 1 (integers).
|
||||
'''
|
||||
global _GRAPH_LEARNING_PHASES
|
||||
if value not in {0, 1}:
|
||||
raise ValueError('Expected learning phase to be '
|
||||
'0 or 1.')
|
||||
_LEARNING_PHASE = value
|
||||
_GRAPH_LEARNING_PHASES[tf.get_default_graph()] = value
|
||||
|
||||
|
||||
def get_session():
|
||||
@@ -72,15 +96,20 @@ def get_session():
|
||||
'''
|
||||
global _SESSION
|
||||
if tf.get_default_session() is not None:
|
||||
return tf.get_default_session()
|
||||
if _SESSION is None:
|
||||
if not os.environ.get('OMP_NUM_THREADS'):
|
||||
_SESSION = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
|
||||
else:
|
||||
nb_thread = int(os.environ.get('OMP_NUM_THREADS'))
|
||||
_SESSION = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=nb_thread,
|
||||
allow_soft_placement=True))
|
||||
return _SESSION
|
||||
session = tf.get_default_session()
|
||||
else:
|
||||
if _SESSION is None:
|
||||
if not os.environ.get('OMP_NUM_THREADS'):
|
||||
config = tf.ConfigProto(allow_soft_placement=True)
|
||||
else:
|
||||
nb_thread = int(os.environ.get('OMP_NUM_THREADS'))
|
||||
config = tf.ConfigProto(intra_op_parallelism_threads=nb_thread,
|
||||
allow_soft_placement=True)
|
||||
_SESSION = tf.Session(config=config)
|
||||
session = _SESSION
|
||||
if not _MANUAL_VAR_INIT:
|
||||
_initialize_variables()
|
||||
return session
|
||||
|
||||
|
||||
def set_session(session):
|
||||
@@ -144,30 +173,34 @@ def variable(value, dtype=_FLOATX, name=None):
|
||||
'''
|
||||
if hasattr(value, 'tocoo'):
|
||||
sparse_coo = value.tocoo()
|
||||
indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), np.expand_dims(sparse_coo.col, 1)), 1)
|
||||
indices = np.concatenate((np.expand_dims(sparse_coo.row, 1),
|
||||
np.expand_dims(sparse_coo.col, 1)), 1)
|
||||
# SparseTensor doesn't need initialization
|
||||
return tf.SparseTensor(indices=indices, values=sparse_coo.data, shape=sparse_coo.shape)
|
||||
|
||||
v = tf.Variable(value, dtype=_convert_string_dtype(dtype), name=name)
|
||||
if _MANUAL_VAR_INIT:
|
||||
v = tf.SparseTensor(indices=indices, values=sparse_coo.data, shape=sparse_coo.shape)
|
||||
v._dims = len(sparse_coo.shape)
|
||||
return v
|
||||
if tf.get_default_graph() is get_session().graph:
|
||||
try:
|
||||
get_session().run(v.initializer)
|
||||
except tf.errors.InvalidArgumentError:
|
||||
warnings.warn('Could not automatically initialize variable, '
|
||||
'make sure you do it manually (e.g. via '
|
||||
'`tf.initialize_all_variables()`).')
|
||||
else:
|
||||
warnings.warn('The default TensorFlow graph is not the graph '
|
||||
'associated with the TensorFlow session currently '
|
||||
'registered with Keras, and as such Keras '
|
||||
'was not able to automatically initialize a variable. '
|
||||
'You should consider registering the proper session '
|
||||
'with Keras via `K.set_session(sess)`.')
|
||||
v = tf.Variable(value, dtype=_convert_string_dtype(dtype), name=name)
|
||||
return v
|
||||
|
||||
|
||||
def _initialize_variables():
|
||||
if hasattr(tf, 'global_variables'):
|
||||
variables = tf.global_variables()
|
||||
else:
|
||||
variables = tf.all_variables()
|
||||
|
||||
uninitialized_variables = []
|
||||
for v in variables:
|
||||
if not hasattr(v, '_keras_initialized') or not v._keras_initialized:
|
||||
uninitialized_variables.append(v)
|
||||
v._keras_initialized = True
|
||||
if uninitialized_variables:
|
||||
sess = get_session()
|
||||
if hasattr(tf, 'variables_initializer'):
|
||||
sess.run(tf.variables_initializer(uninitialized_variables))
|
||||
else:
|
||||
sess.run(tf.initialize_variables(uninitialized_variables))
|
||||
|
||||
def placeholder(shape=None, ndim=None, dtype=_FLOATX, sparse=False, name=None):
|
||||
'''Instantiates a placeholder.
|
||||
|
||||
@@ -187,8 +220,8 @@ def placeholder(shape=None, ndim=None, dtype=_FLOATX, sparse=False, name=None):
|
||||
if ndim:
|
||||
shape = tuple([None for _ in range(ndim)])
|
||||
if sparse:
|
||||
tf_shape = tf.constant(np.array(list([0 for _ in range(len(shape))]), dtype=np.int64))
|
||||
x = tf.sparse_placeholder(dtype, shape=tf_shape, name=name)
|
||||
x = tf.sparse_placeholder(dtype, name=name)
|
||||
x._dims = len(shape)
|
||||
else:
|
||||
x = tf.placeholder(dtype, shape=shape, name=name)
|
||||
x._keras_shape = shape
|
||||
@@ -215,7 +248,7 @@ def ndim(x):
|
||||
'''Returns the number of axes in a tensor, as an integer.
|
||||
'''
|
||||
if is_sparse(x):
|
||||
return int(x.shape.get_shape()[0])
|
||||
return x._dims
|
||||
|
||||
dims = x.get_shape()._dims
|
||||
if dims is not None:
|
||||
@@ -241,7 +274,8 @@ def zeros(shape, dtype=_FLOATX, name=None):
|
||||
'''
|
||||
shape = tuple(map(int, shape))
|
||||
tf_dtype = _convert_string_dtype(dtype)
|
||||
return variable(tf.constant_initializer(0., dtype=tf_dtype)(shape), dtype, name)
|
||||
return variable(tf.constant_initializer(0., dtype=tf_dtype)(shape),
|
||||
dtype, name)
|
||||
|
||||
|
||||
def ones(shape, dtype=_FLOATX, name=None):
|
||||
@@ -249,7 +283,8 @@ def ones(shape, dtype=_FLOATX, name=None):
|
||||
'''
|
||||
shape = tuple(map(int, shape))
|
||||
tf_dtype = _convert_string_dtype(dtype)
|
||||
return variable(tf.constant_initializer(1., dtype=tf_dtype)(shape), dtype, name)
|
||||
return variable(tf.constant_initializer(1., dtype=tf_dtype)(shape),
|
||||
dtype, name)
|
||||
|
||||
|
||||
def eye(size, dtype=_FLOATX, name=None):
|
||||
@@ -748,14 +783,16 @@ def resize_images(X, height_factor, width_factor, dim_ordering):
|
||||
X = permute_dimensions(X, [0, 2, 3, 1])
|
||||
X = tf.image.resize_nearest_neighbor(X, new_shape)
|
||||
X = permute_dimensions(X, [0, 3, 1, 2])
|
||||
X.set_shape((None, None, original_shape[2] * height_factor, original_shape[3] * width_factor))
|
||||
X.set_shape((None, None, original_shape[2] * height_factor if original_shape[2] is not None else None,
|
||||
original_shape[3] * width_factor if original_shape[3] is not None else None))
|
||||
return X
|
||||
elif dim_ordering == 'tf':
|
||||
original_shape = int_shape(X)
|
||||
new_shape = tf.shape(X)[1:3]
|
||||
new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32'))
|
||||
X = tf.image.resize_nearest_neighbor(X, new_shape)
|
||||
X.set_shape((None, original_shape[1] * height_factor, original_shape[2] * width_factor, None))
|
||||
X.set_shape((None, original_shape[1] * height_factor if original_shape[1] is not None else None,
|
||||
original_shape[2] * width_factor if original_shape[2] is not None else None, None))
|
||||
return X
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
@@ -854,10 +891,15 @@ def asymmetric_temporal_padding(x, left_pad=1, right_pad=1):
|
||||
return tf.pad(x, pattern)
|
||||
|
||||
|
||||
def spatial_2d_padding(x, padding=(1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
def spatial_2d_padding(x, padding=(1, 1), dim_ordering='default'):
|
||||
'''Pads the 2nd and 3rd dimensions of a 4D tensor
|
||||
with "padding[0]" and "padding[1]" (resp.) zeros left and right.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
if dim_ordering == 'th':
|
||||
pattern = [[0, 0], [0, 0],
|
||||
[padding[0], padding[0]], [padding[1], padding[1]]]
|
||||
@@ -868,10 +910,18 @@ def spatial_2d_padding(x, padding=(1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
return tf.pad(x, pattern)
|
||||
|
||||
|
||||
def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1, left_pad=1, right_pad=1, dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1,
|
||||
left_pad=1, right_pad=1,
|
||||
dim_ordering='default'):
|
||||
'''Pad the rows and columns of a 4D tensor
|
||||
with "top_pad", "bottom_pad", "left_pad", "right_pad" (resp.) zeros rows on top, bottom; cols on left, right.
|
||||
with "top_pad", "bottom_pad", "left_pad", "right_pad" (resp.) zeros
|
||||
rows on top, bottom; cols on left, right.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
if dim_ordering == 'th':
|
||||
pattern = [[0, 0],
|
||||
[0, 0],
|
||||
@@ -885,13 +935,18 @@ def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1, left_pad=1, right_
|
||||
return tf.pad(x, pattern)
|
||||
|
||||
|
||||
def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering='default'):
|
||||
'''Pads 5D tensor with zeros for the depth, height, width dimension with
|
||||
"padding[0]", "padding[1]" and "padding[2]" (resp.) zeros left and right
|
||||
|
||||
For 'tf' dim_ordering, the 2nd, 3rd and 4th dimension will be padded.
|
||||
For 'th' dim_ordering, the 3rd, 4th and 5th dimension will be padded.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
if dim_ordering == 'th':
|
||||
pattern = [
|
||||
[0, 0],
|
||||
@@ -1033,7 +1088,8 @@ class Function(object):
|
||||
for tensor, value in zip(self.inputs, inputs):
|
||||
if is_sparse(tensor):
|
||||
sparse_coo = value.tocoo()
|
||||
indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), np.expand_dims(sparse_coo.col, 1)), 1)
|
||||
indices = np.concatenate((np.expand_dims(sparse_coo.row, 1),
|
||||
np.expand_dims(sparse_coo.col, 1)), 1)
|
||||
value = (indices, sparse_coo.data, sparse_coo.shape)
|
||||
feed_dict[tensor] = value
|
||||
session = get_session()
|
||||
@@ -1051,8 +1107,8 @@ def function(inputs, outputs, updates=[], **kwargs):
|
||||
'''
|
||||
if len(kwargs) > 0:
|
||||
msg = [
|
||||
"Expected no kwargs, you passed %s" % len(kwargs),
|
||||
"kwargs passed to function are ignored with Tensorflow backend"
|
||||
'Expected no kwargs, you passed %s' % len(kwargs),
|
||||
'kwargs passed to function are ignored with Tensorflow backend'
|
||||
]
|
||||
warnings.warn('\n'.join(msg))
|
||||
return Function(inputs, outputs, updates=updates)
|
||||
@@ -1121,6 +1177,13 @@ def rnn(step_function, inputs, initial_states,
|
||||
axes = [1, 0] + list(range(2, ndim))
|
||||
inputs = tf.transpose(inputs, (axes))
|
||||
|
||||
if mask is not None:
|
||||
if mask.dtype != tf.bool:
|
||||
mask = tf.cast(mask, tf.bool)
|
||||
if len(mask.get_shape()) == ndim - 1:
|
||||
mask = expand_dims(mask)
|
||||
mask = tf.transpose(mask, axes)
|
||||
|
||||
if constants is None:
|
||||
constants = []
|
||||
|
||||
@@ -1137,13 +1200,7 @@ def rnn(step_function, inputs, initial_states,
|
||||
input_list.reverse()
|
||||
|
||||
if mask is not None:
|
||||
# Transpose not supported by bool tensor types, hence round-trip to uint8.
|
||||
mask = tf.cast(mask, tf.uint8)
|
||||
if len(mask.get_shape()) == ndim - 1:
|
||||
mask = expand_dims(mask)
|
||||
mask = tf.cast(tf.transpose(mask, axes), tf.bool)
|
||||
mask_list = tf.unpack(mask)
|
||||
|
||||
if go_backwards:
|
||||
mask_list.reverse()
|
||||
|
||||
@@ -1187,26 +1244,25 @@ def rnn(step_function, inputs, initial_states,
|
||||
outputs = tf.pack(successive_outputs)
|
||||
|
||||
else:
|
||||
from tensorflow.python.ops.rnn import _dynamic_rnn_loop
|
||||
|
||||
if go_backwards:
|
||||
inputs = tf.reverse(inputs, [True] + [False] * (ndim - 1))
|
||||
|
||||
states = initial_states
|
||||
nb_states = len(states)
|
||||
if nb_states == 0:
|
||||
# use dummy state, otherwise _dynamic_rnn_loop breaks
|
||||
state = inputs[:, 0, :]
|
||||
state_size = state.get_shape()[-1]
|
||||
else:
|
||||
state_size = int(states[0].get_shape()[-1])
|
||||
if nb_states == 1:
|
||||
state = states[0]
|
||||
else:
|
||||
state = tf.concat(1, states)
|
||||
states = tuple(initial_states)
|
||||
|
||||
time_steps = tf.shape(inputs)[0]
|
||||
output_ta = tensor_array_ops.TensorArray(
|
||||
dtype=inputs.dtype,
|
||||
size=time_steps,
|
||||
tensor_array_name='output_ta')
|
||||
input_ta = tensor_array_ops.TensorArray(
|
||||
dtype=inputs.dtype,
|
||||
size=time_steps,
|
||||
tensor_array_name='input_ta')
|
||||
input_ta = input_ta.unpack(inputs)
|
||||
time = tf.constant(0, dtype='int32', name='time')
|
||||
|
||||
if mask is not None:
|
||||
if len(initial_states) == 0:
|
||||
if len(states) == 0:
|
||||
raise ValueError('No initial states provided! '
|
||||
'When using masking in an RNN, you should '
|
||||
'provide initial states '
|
||||
@@ -1216,84 +1272,44 @@ def rnn(step_function, inputs, initial_states,
|
||||
if go_backwards:
|
||||
mask = tf.reverse(mask, [True] + [False] * (ndim - 2))
|
||||
|
||||
# Transpose not supported by bool tensor types, hence round-trip to uint8.
|
||||
mask = tf.cast(mask, tf.uint8)
|
||||
if len(mask.get_shape()) == ndim - 1:
|
||||
mask = expand_dims(mask)
|
||||
mask = tf.transpose(mask, axes)
|
||||
inputs = tf.concat(2, [tf.cast(mask, inputs.dtype), inputs])
|
||||
mask_ta = tensor_array_ops.TensorArray(
|
||||
dtype=tf.bool,
|
||||
size=time_steps,
|
||||
tensor_array_name='mask_ta')
|
||||
mask_ta = mask_ta.unpack(mask)
|
||||
|
||||
def _step(input, state):
|
||||
if nb_states > 1:
|
||||
states = []
|
||||
for i in range(nb_states):
|
||||
states.append(state[:, i * state_size: (i + 1) * state_size])
|
||||
else:
|
||||
states = [state]
|
||||
mask_t = tf.cast(input[:, 0], tf.bool)
|
||||
input = input[:, 1:]
|
||||
output, new_states = step_function(input, states + constants)
|
||||
|
||||
output = tf.select(mask_t, output, states[0])
|
||||
new_states = [tf.select(mask_t, new_states[i], states[i]) for i in range(len(states))]
|
||||
|
||||
if len(new_states) == 1:
|
||||
new_state = new_states[0]
|
||||
else:
|
||||
new_state = tf.concat(1, new_states)
|
||||
|
||||
return output, new_state
|
||||
def _step(time, output_ta_t, *states):
|
||||
current_input = input_ta.read(time)
|
||||
mask_t = mask_ta.read(time)
|
||||
output, new_states = step_function(current_input,
|
||||
tuple(states) +
|
||||
tuple(constants))
|
||||
tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(output)[1]]))
|
||||
output = tf.select(tiled_mask_t, output, states[0])
|
||||
new_states = [tf.select(tiled_mask_t, new_states[i], states[i]) for i in range(len(states))]
|
||||
output_ta_t = output_ta_t.write(time, output)
|
||||
return (time + 1, output_ta_t) + tuple(new_states)
|
||||
else:
|
||||
def _step(input, state):
|
||||
if nb_states > 1:
|
||||
states = []
|
||||
for i in range(nb_states):
|
||||
states.append(state[:, i * state_size: (i + 1) * state_size])
|
||||
elif nb_states == 1:
|
||||
states = [state]
|
||||
else:
|
||||
states = []
|
||||
output, new_states = step_function(input, states + constants)
|
||||
def _step(time, output_ta_t, *states):
|
||||
current_input = input_ta.read(time)
|
||||
output, new_states = step_function(current_input,
|
||||
tuple(states) +
|
||||
tuple(constants))
|
||||
output_ta_t = output_ta_t.write(time, output)
|
||||
return (time + 1, output_ta_t) + tuple(new_states)
|
||||
|
||||
if len(new_states) > 1:
|
||||
new_state = tf.concat(1, new_states)
|
||||
elif len(new_states) == 1:
|
||||
new_state = new_states[0]
|
||||
else:
|
||||
# return dummy state, otherwise _dynamic_rnn_loop breaks
|
||||
new_state = state
|
||||
return output, new_state
|
||||
|
||||
_step.state_size = state_size * nb_states
|
||||
# recover output size by calling _step on the first input
|
||||
slice_begin = tf.pack([0] * ndim)
|
||||
slice_size = tf.pack([1] + [-1] * (ndim - 1))
|
||||
first_input = tf.slice(inputs, slice_begin, slice_size)
|
||||
first_input = tf.squeeze(first_input, [0])
|
||||
_step.output_size = int(_step(first_input, state)[0].get_shape()[-1])
|
||||
|
||||
(outputs, final_state) = _dynamic_rnn_loop(
|
||||
_step,
|
||||
inputs,
|
||||
state,
|
||||
final_outputs = control_flow_ops.while_loop(
|
||||
cond=lambda time, *_: time < time_steps,
|
||||
body=_step,
|
||||
loop_vars=(time, output_ta) + states,
|
||||
parallel_iterations=32,
|
||||
swap_memory=True,
|
||||
sequence_length=None)
|
||||
swap_memory=True)
|
||||
last_time = final_outputs[0]
|
||||
output_ta = final_outputs[1]
|
||||
new_states = final_outputs[2:]
|
||||
|
||||
if nb_states > 1:
|
||||
new_states = []
|
||||
for i in range(nb_states):
|
||||
new_states.append(final_state[:, i * state_size: (i + 1) * state_size])
|
||||
elif nb_states == 1:
|
||||
new_states = [final_state]
|
||||
else:
|
||||
new_states = []
|
||||
|
||||
# all this circus is to recover the last vector in the sequence.
|
||||
slice_begin = tf.pack([tf.shape(outputs)[0] - 1] + [0] * (ndim - 1))
|
||||
slice_size = tf.pack([1] + [-1] * (ndim - 1))
|
||||
last_output = tf.slice(outputs, slice_begin, slice_size)
|
||||
last_output = tf.squeeze(last_output, [0])
|
||||
outputs = output_ta.pack()
|
||||
last_output = output_ta.read(last_time - 1)
|
||||
|
||||
axes = [1, 0] + list(range(2, len(outputs.get_shape())))
|
||||
outputs = tf.transpose(outputs, axes)
|
||||
@@ -1301,7 +1317,8 @@ def rnn(step_function, inputs, initial_states,
|
||||
|
||||
|
||||
def _cond(condition, then_lambda, else_lambda):
|
||||
'''Backwards compatible interface to tf.cond prior to public introduction.'''
|
||||
'''Backwards compatible interface to tf.cond prior to public introduction.
|
||||
'''
|
||||
try:
|
||||
cond_fn = tf.cond
|
||||
except AttributeError:
|
||||
@@ -1311,7 +1328,8 @@ def _cond(condition, then_lambda, else_lambda):
|
||||
|
||||
|
||||
def switch(condition, then_expression, else_expression):
|
||||
'''Switches between two operations depending on a scalar value (int or bool).
|
||||
'''Switches between two operations
|
||||
depending on a scalar value (int or bool).
|
||||
Note that both `then_expression` and `else_expression`
|
||||
should be symbolic tensors of the *same shape*.
|
||||
|
||||
@@ -1321,8 +1339,11 @@ def switch(condition, then_expression, else_expression):
|
||||
else_expression: TensorFlow operation.
|
||||
'''
|
||||
x_shape = copy.copy(then_expression.get_shape())
|
||||
x = _cond(tf.cast(condition, 'bool'),
|
||||
lambda: then_expression, lambda: else_expression)
|
||||
if condition.dtype != tf.bool:
|
||||
condition = tf.cast(condition, 'bool')
|
||||
x = _cond(condition,
|
||||
lambda: then_expression,
|
||||
lambda: else_expression)
|
||||
x.set_shape(x_shape)
|
||||
return x
|
||||
|
||||
@@ -1331,15 +1352,13 @@ def in_train_phase(x, alt):
|
||||
'''Selects `x` in train phase, and `alt` otherwise.
|
||||
Note that `alt` should have the *same shape* as `x`.
|
||||
'''
|
||||
if _LEARNING_PHASE is 1:
|
||||
if learning_phase() is 1:
|
||||
return x
|
||||
elif _LEARNING_PHASE is 0:
|
||||
elif learning_phase() is 0:
|
||||
return alt
|
||||
# else: assume learning phase is a placeholder.
|
||||
x_shape = copy.copy(x.get_shape())
|
||||
x = _cond(tf.cast(_LEARNING_PHASE, 'bool'), lambda: x, lambda: alt)
|
||||
# else: assume learning phase is a placeholder tensor.
|
||||
x = switch(learning_phase(), x, alt)
|
||||
x._uses_learning_phase = True
|
||||
x.set_shape(x_shape)
|
||||
return x
|
||||
|
||||
|
||||
@@ -1347,14 +1366,13 @@ def in_test_phase(x, alt):
|
||||
'''Selects `x` in test phase, and `alt` otherwise.
|
||||
Note that `alt` should have the *same shape* as `x`.
|
||||
'''
|
||||
if _LEARNING_PHASE is 1:
|
||||
if learning_phase() is 1:
|
||||
return alt
|
||||
elif _LEARNING_PHASE is 0:
|
||||
elif learning_phase() is 0:
|
||||
return x
|
||||
x_shape = copy.copy(x.get_shape())
|
||||
x = _cond(tf.cast(_LEARNING_PHASE, 'bool'), lambda: alt, lambda: x)
|
||||
# else: assume learning phase is a placeholder tensor.
|
||||
x = switch(learning_phase(), alt, x)
|
||||
x._uses_learning_phase = True
|
||||
x.set_shape(x_shape)
|
||||
return x
|
||||
|
||||
|
||||
@@ -1381,17 +1399,17 @@ def relu(x, alpha=0., max_value=None):
|
||||
|
||||
|
||||
def elu(x, alpha=1.):
|
||||
""" Exponential linear unit
|
||||
'''Exponential linear unit.
|
||||
|
||||
# Arguments
|
||||
x: Tensor to compute the activation function for.
|
||||
alpha: scalar
|
||||
"""
|
||||
'''
|
||||
res = tf.nn.elu(x)
|
||||
if alpha == 1:
|
||||
return res
|
||||
else:
|
||||
return tf.select(x > 0, res, alpha*res)
|
||||
return tf.select(x > 0, res, alpha * res)
|
||||
|
||||
|
||||
def softmax(x):
|
||||
@@ -1407,6 +1425,8 @@ def softplus(x):
|
||||
|
||||
|
||||
def softsign(x):
|
||||
'''Softsign of a tensor.
|
||||
'''
|
||||
return tf.nn.softsign(x)
|
||||
|
||||
|
||||
@@ -1516,8 +1536,9 @@ def l2_normalize(x, axis):
|
||||
axis = axis % len(x.get_shape())
|
||||
return tf.nn.l2_normalize(x, dim=axis)
|
||||
|
||||
|
||||
def in_top_k(predictions, targets, k):
|
||||
'''Says whether the `targets` are in the top `k` `predictions`
|
||||
'''Returns whether the `targets` are in the top `k` `predictions`
|
||||
|
||||
# Arguments
|
||||
predictions: A tensor of shape batch_size x classess and type float32.
|
||||
@@ -1615,8 +1636,29 @@ def _postprocess_conv3d_output(x, dim_ordering):
|
||||
return x
|
||||
|
||||
|
||||
def conv1d(x, kernel, stride=1, border_mode='valid',
|
||||
image_shape=None, filter_shape=None):
|
||||
'''1D convolution.
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
strides: stride integer.
|
||||
border_mode: string, "same" or "valid".
|
||||
'''
|
||||
# pre-process dtype
|
||||
if _FLOATX == 'float64':
|
||||
x = tf.cast(x, 'float32')
|
||||
kernel = tf.cast(kernel, 'float32')
|
||||
padding = _preprocess_border_mode(border_mode)
|
||||
x = tf.nn.conv1d(x, kernel, stride, padding=padding)
|
||||
# post-process dtype
|
||||
if _FLOATX == 'float64':
|
||||
x = tf.cast(x, 'float64')
|
||||
return x
|
||||
|
||||
|
||||
def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
dim_ordering='default',
|
||||
image_shape=None, filter_shape=None, filter_dilation=(1, 1)):
|
||||
'''2D convolution.
|
||||
|
||||
@@ -1628,8 +1670,10 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
for inputs/kernels/ouputs.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
@@ -1646,7 +1690,7 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
|
||||
def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
dim_ordering='default',
|
||||
image_shape=None, filter_shape=None):
|
||||
'''2D deconvolution (i.e. transposed convolution).
|
||||
|
||||
@@ -1660,8 +1704,10 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
for inputs/kernels/ouputs.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
output_shape = _preprocess_deconv_output_shape(output_shape, dim_ordering)
|
||||
@@ -1677,10 +1723,12 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
|
||||
def atrous_conv2d(x, kernel, rate=1,
|
||||
border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
dim_ordering='default',
|
||||
image_shape=None, filter_shape=None):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
if rate == 1:
|
||||
return conv2d(x, kernel, strides=(1, 1), border_mode=border_mode,
|
||||
dim_ordering=dim_ordering)
|
||||
@@ -1694,9 +1742,11 @@ def atrous_conv2d(x, kernel, rate=1,
|
||||
|
||||
|
||||
def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1),
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
border_mode='valid', dim_ordering='default'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
depthwise_kernel = _preprocess_conv2d_kernel(depthwise_kernel,
|
||||
@@ -1712,7 +1762,7 @@ def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1),
|
||||
|
||||
|
||||
def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
border_mode='valid', dim_ordering='default',
|
||||
volume_shape=None, filter_shape=None):
|
||||
'''3D convolution.
|
||||
|
||||
@@ -1724,8 +1774,10 @@ def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
for inputs/kernels/ouputs.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
x = _preprocess_conv3d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv3d_kernel(kernel, dim_ordering)
|
||||
@@ -1737,7 +1789,7 @@ def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
|
||||
|
||||
def pool2d(x, pool_size, strides=(1, 1),
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
border_mode='valid', dim_ordering='default',
|
||||
pool_mode='max'):
|
||||
'''2D Pooling.
|
||||
|
||||
@@ -1748,8 +1800,10 @@ def pool2d(x, pool_size, strides=(1, 1),
|
||||
dim_ordering: one of "th", "tf".
|
||||
pool_mode: one of "max", "avg".
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
padding = _preprocess_border_mode(border_mode)
|
||||
strides = (1,) + strides + (1,)
|
||||
@@ -1768,7 +1822,7 @@ def pool2d(x, pool_size, strides=(1, 1),
|
||||
|
||||
|
||||
def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING, pool_mode='max'):
|
||||
dim_ordering='default', pool_mode='max'):
|
||||
'''3D Pooling.
|
||||
|
||||
# Arguments
|
||||
@@ -1778,8 +1832,10 @@ def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
dim_ordering: one of "th", "tf".
|
||||
pool_mode: one of "max", "avg".
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
padding = _preprocess_border_mode(border_mode)
|
||||
strides = (1,) + strides + (1,)
|
||||
@@ -1924,3 +1980,52 @@ def ctc_decode(y_pred, input_length, greedy=True, beam_width=100,
|
||||
for st in decoded]
|
||||
|
||||
return (decoded_dense, log_prob)
|
||||
|
||||
|
||||
# HIGH ORDER FUNCTIONS
|
||||
|
||||
def map_fn(fn, elems, name=None):
|
||||
'''Map the function fn over the elements elems and return the outputs.
|
||||
|
||||
# Arguments
|
||||
fn: Callable that will be called upon each element in elems
|
||||
elems: tensor
|
||||
name: A string name for the map node in the graph
|
||||
|
||||
# Returns
|
||||
Tensor with first dimension equal to the elems and second depending on
|
||||
fn
|
||||
'''
|
||||
return tf.map_fn(fn, elems, name=name)
|
||||
|
||||
|
||||
def foldl(fn, elems, initializer=None, name=None):
|
||||
'''Reduce elems using fn to combine them from left to right.
|
||||
|
||||
# Arguments
|
||||
fn: Callable that will be called upon each element in elems and an
|
||||
accumulator, for instance lambda acc, x: acc + x
|
||||
elems: tensor
|
||||
initializer: The first value used (elems[0] in case of None)
|
||||
name: A string name for the foldl node in the graph
|
||||
|
||||
# Returns
|
||||
Same type and shape as initializer
|
||||
'''
|
||||
return tf.foldl(fn, elems, initializer=initializer, name=name)
|
||||
|
||||
|
||||
def foldr(fn, elems, initializer=None, name=None):
|
||||
'''Reduce elems using fn to combine them from right to left.
|
||||
|
||||
# Arguments
|
||||
fn: Callable that will be called upon each element in elems and an
|
||||
accumulator, for instance lambda acc, x: acc + x
|
||||
elems: tensor
|
||||
initializer: The first value used (elems[-1] in case of None)
|
||||
name: A string name for the foldr node in the graph
|
||||
|
||||
# Returns
|
||||
Same type and shape as initializer
|
||||
'''
|
||||
return tf.foldr(fn, elems, initializer=initializer, name=name)
|
||||
|
||||
@@ -14,7 +14,7 @@ except ImportError:
|
||||
from theano.sandbox.softsign import softsign as T_softsign
|
||||
import inspect
|
||||
import numpy as np
|
||||
from .common import _FLOATX, _EPSILON, _IMAGE_DIM_ORDERING
|
||||
from .common import _FLOATX, _EPSILON, image_dim_ordering
|
||||
py_all = all
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ def set_learning_phase(value):
|
||||
'0 or 1.')
|
||||
_LEARNING_PHASE = value
|
||||
|
||||
|
||||
# VARIABLE MANIPULATION
|
||||
|
||||
|
||||
@@ -88,7 +89,7 @@ def placeholder(shape=None, ndim=None, dtype=_FLOATX, sparse=False, name=None):
|
||||
|
||||
|
||||
def shape(x):
|
||||
'''Return the shape of a tensor.
|
||||
'''Returns the shape of a tensor.
|
||||
|
||||
Warning: type returned will be different for
|
||||
Theano backend (Theano tensor type) and TF backend (TF TensorShape).
|
||||
@@ -105,25 +106,25 @@ def dtype(x):
|
||||
|
||||
|
||||
def eval(x):
|
||||
'''Run a graph.
|
||||
'''Returns the value of a tensor.
|
||||
'''
|
||||
return to_dense(x).eval()
|
||||
|
||||
|
||||
def zeros(shape, dtype=_FLOATX, name=None):
|
||||
'''Instantiate an all-zeros variable.
|
||||
'''Instantiates an all-zeros variable.
|
||||
'''
|
||||
return variable(np.zeros(shape), dtype, name)
|
||||
|
||||
|
||||
def ones(shape, dtype=_FLOATX, name=None):
|
||||
'''Instantiate an all-ones variable.
|
||||
'''Instantiates an all-ones variable.
|
||||
'''
|
||||
return variable(np.ones(shape), dtype, name)
|
||||
|
||||
|
||||
def eye(size, dtype=_FLOATX, name=None):
|
||||
'''Instantiate an identity matrix.
|
||||
'''Instantiates an identity matrix.
|
||||
'''
|
||||
return variable(np.eye(size), dtype, name)
|
||||
|
||||
@@ -147,7 +148,7 @@ def random_normal_variable(shape, mean, scale, dtype=_FLOATX, name=None):
|
||||
|
||||
|
||||
def count_params(x):
|
||||
'''Return number of scalars in a tensor.
|
||||
'''Returns the number of scalars in a tensor.
|
||||
|
||||
Return: numpy integer.
|
||||
'''
|
||||
@@ -393,7 +394,7 @@ def cos(x):
|
||||
|
||||
def normalize_batch_in_training(x, gamma, beta,
|
||||
reduction_axes, epsilon=0.0001):
|
||||
'''Compute mean and std for batch then apply batch_normalization on batch.
|
||||
'''Computes mean and std for batch then apply batch_normalization on batch.
|
||||
'''
|
||||
dev = theano.config.device
|
||||
use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu'))
|
||||
@@ -616,10 +617,15 @@ def asymmetric_temporal_padding(x, left_pad=1, right_pad=1):
|
||||
return T.set_subtensor(output[:, left_pad:x.shape[1] + left_pad, :], x)
|
||||
|
||||
|
||||
def spatial_2d_padding(x, padding=(1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
def spatial_2d_padding(x, padding=(1, 1), dim_ordering='default'):
|
||||
'''Pad the 2nd and 3rd dimensions of a 4D tensor
|
||||
with "padding[0]" and "padding[1]" (resp.) zeros left and right.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
input_shape = x.shape
|
||||
if dim_ordering == 'th':
|
||||
output_shape = (input_shape[0],
|
||||
@@ -647,10 +653,18 @@ def spatial_2d_padding(x, padding=(1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
return T.set_subtensor(output[indices], x)
|
||||
|
||||
|
||||
def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1, left_pad=1, right_pad=1, dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1,
|
||||
left_pad=1, right_pad=1,
|
||||
dim_ordering='default'):
|
||||
'''Pad the rows and columns of a 4D tensor
|
||||
with "top_pad", "bottom_pad", "left_pad", "right_pad" (resp.) zeros rows on top, bottom; cols on left, right.
|
||||
with "top_pad", "bottom_pad", "left_pad", "right_pad" (resp.) zeros
|
||||
rows on top, bottom; cols on left, right.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
input_shape = x.shape
|
||||
if dim_ordering == 'th':
|
||||
output_shape = (input_shape[0],
|
||||
@@ -679,10 +693,15 @@ def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1, left_pad=1, right_
|
||||
return T.set_subtensor(output[indices], x)
|
||||
|
||||
|
||||
def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering='default'):
|
||||
'''Pad the 2nd, 3rd and 4th dimensions of a 5D tensor
|
||||
with "padding[0]", "padding[1]" and "padding[2]" (resp.) zeros left and right.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
input_shape = x.shape
|
||||
if dim_ordering == 'th':
|
||||
output_shape = (input_shape[0],
|
||||
@@ -1119,7 +1138,7 @@ def l2_normalize(x, axis):
|
||||
|
||||
|
||||
def in_top_k(predictions, targets, k):
|
||||
'''Says whether the `targets` are in the top `k` `predictions`
|
||||
'''Returns whether the `targets` are in the top `k` `predictions`
|
||||
|
||||
# Arguments
|
||||
predictions: A tensor of shape batch_size x classess and type float32.
|
||||
@@ -1182,6 +1201,8 @@ def _preprocess_border_mode(border_mode):
|
||||
th_border_mode = 'half'
|
||||
elif border_mode == 'valid':
|
||||
th_border_mode = 'valid'
|
||||
elif border_mode == 'full':
|
||||
th_border_mode = 'full'
|
||||
else:
|
||||
raise Exception('Border mode not supported: ' + str(border_mode))
|
||||
return th_border_mode
|
||||
@@ -1275,8 +1296,20 @@ def _postprocess_conv3d_output(conv_out, x, border_mode, np_kernel, strides, dim
|
||||
return conv_out
|
||||
|
||||
|
||||
def conv1d(x, kernel, stride=1, border_mode='valid',
|
||||
image_shape=None, filter_shape=None):
|
||||
'''1D convolution.
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
strides: stride integer.
|
||||
border_mode: string, "same" or "valid".
|
||||
'''
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING, image_shape=None,
|
||||
dim_ordering='default', image_shape=None,
|
||||
filter_shape=None, filter_dilation=(1, 1)):
|
||||
'''2D convolution.
|
||||
|
||||
@@ -1288,6 +1321,8 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
@@ -1320,7 +1355,7 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
|
||||
def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
dim_ordering='default',
|
||||
image_shape=None, filter_shape=None):
|
||||
'''2D deconvolution (transposed convolution).
|
||||
|
||||
@@ -1334,6 +1369,8 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
flip_filters = False
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
@@ -1358,18 +1395,18 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
|
||||
def atrous_conv2d(x, kernel, rate=1,
|
||||
border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
dim_ordering='default',
|
||||
image_shape=None, filter_shape=None):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1),
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
border_mode='valid', dim_ordering='default'):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
border_mode='valid', dim_ordering='default',
|
||||
volume_shape=None, filter_shape=None,
|
||||
filter_dilation=(1, 1, 1)):
|
||||
'''3D convolution.
|
||||
@@ -1382,6 +1419,8 @@ def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
@@ -1415,12 +1454,14 @@ def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
|
||||
# TODO: remove this function when theano without AbstractConv3d is deprecated
|
||||
def _old_theano_conv3d(x, kernel, strides=(1, 1, 1),
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
border_mode='valid', dim_ordering='default',
|
||||
volume_shape=None, filter_shape=None):
|
||||
'''
|
||||
Run on cuDNN if available.
|
||||
border_mode: string, "same" or "valid".
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
@@ -1477,7 +1518,12 @@ def _old_theano_conv3d(x, kernel, strides=(1, 1, 1),
|
||||
|
||||
|
||||
def pool2d(x, pool_size, strides=(1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING, pool_mode='max'):
|
||||
dim_ordering='default', pool_mode='max'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
if border_mode == 'same':
|
||||
w_pad = pool_size[0] - 2 if pool_size[0] % 2 == 1 else pool_size[0] - 1
|
||||
h_pad = pool_size[1] - 2 if pool_size[1] % 2 == 1 else pool_size[1] - 1
|
||||
@@ -1494,15 +1540,33 @@ def pool2d(x, pool_size, strides=(1, 1), border_mode='valid',
|
||||
x = x.dimshuffle((0, 3, 1, 2))
|
||||
|
||||
if pool_mode == 'max':
|
||||
pool_out = pool.pool_2d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='max')
|
||||
# TODO remove the old call once Theano older than 0.9.0dev4 is deprecated
|
||||
try:
|
||||
# new interface (introduced in 0.9.0dev4)
|
||||
pool_out = pool.pool_2d(x, ws=pool_size, stride=strides,
|
||||
ignore_border=True,
|
||||
pad=padding,
|
||||
mode='max')
|
||||
except TypeError:
|
||||
# old interface
|
||||
pool_out = pool.pool_2d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='max')
|
||||
elif pool_mode == 'avg':
|
||||
pool_out = pool.pool_2d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='average_exc_pad')
|
||||
# TODO remove the old call once Theano older than 0.9.0dev4 is deprecated
|
||||
try:
|
||||
# new interface (introduced in 0.9.0dev4)
|
||||
pool_out = pool.pool_2d(x, ws=pool_size, stride=strides,
|
||||
ignore_border=True,
|
||||
pad=padding,
|
||||
mode='average_exc_pad')
|
||||
except TypeError:
|
||||
# old interface
|
||||
pool_out = pool.pool_2d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='average_exc_pad')
|
||||
else:
|
||||
raise Exception('Invalid pooling mode: ' + str(pool_mode))
|
||||
|
||||
@@ -1520,7 +1584,12 @@ def pool2d(x, pool_size, strides=(1, 1), border_mode='valid',
|
||||
|
||||
|
||||
def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING, pool_mode='max'):
|
||||
dim_ordering='default', pool_mode='max'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
# TODO: remove this if statement when Theano without pool_3d is deprecated
|
||||
# (pool_3d was introduced after 0.9.0dev3)
|
||||
if not hasattr(T.signal.pool, 'pool_3d'):
|
||||
@@ -1544,15 +1613,33 @@ def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
x = x.dimshuffle((0, 4, 1, 2, 3))
|
||||
|
||||
if pool_mode == 'max':
|
||||
pool_out = pool.pool_3d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='max')
|
||||
# TODO remove the old call once Theano older than 0.9.0dev4 is deprecated
|
||||
try:
|
||||
# new interface (introduced in 0.9.0dev4)
|
||||
pool_out = pool.pool_3d(x, ws=pool_size, stride=strides,
|
||||
ignore_border=True,
|
||||
pad=padding,
|
||||
mode='max')
|
||||
except TypeError:
|
||||
# old interface
|
||||
pool_out = pool.pool_3d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='max')
|
||||
elif pool_mode == 'avg':
|
||||
pool_out = pool.pool_3d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='average_exc_pad')
|
||||
# TODO remove the old call once Theano older than 0.9.0dev4 is deprecated
|
||||
try:
|
||||
# new interface (introduced in 0.9.0dev4)
|
||||
pool_out = pool.pool_3d(x, ws=pool_size, stride=strides,
|
||||
ignore_border=True,
|
||||
pad=padding,
|
||||
mode='average_exc_pad')
|
||||
except TypeError:
|
||||
# old interface
|
||||
pool_out = pool.pool_3d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='average_exc_pad')
|
||||
else:
|
||||
raise Exception('Invalid pooling mode: ' + str(pool_mode))
|
||||
|
||||
@@ -1574,7 +1661,12 @@ def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
# TODO: remove this function when Theano without pool_3d is deprecated
|
||||
# (pool_3d was introduced after 0.9.0dev3)
|
||||
def _old_theano_pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING, pool_mode='max'):
|
||||
dim_ordering='default', pool_mode='max'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
if border_mode == 'same':
|
||||
# TODO: add implementation for border_mode="same"
|
||||
raise Exception('border_mode="same" not supported with Theano.')
|
||||
@@ -1665,11 +1757,13 @@ def ctc_interleave_blanks(Y):
|
||||
Y_ = T.set_subtensor(Y_[T.arange(Y.shape[0]) * 2 + 1], Y)
|
||||
return Y_
|
||||
|
||||
|
||||
def ctc_create_skip_idxs(Y):
|
||||
skip_idxs = T.arange((Y.shape[0] - 3) // 2) * 2 + 1
|
||||
non_repeats = T.neq(Y[skip_idxs], Y[skip_idxs + 2])
|
||||
return skip_idxs[non_repeats.nonzero()]
|
||||
|
||||
|
||||
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
|
||||
active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
|
||||
active_next = T.cast(T.minimum(
|
||||
@@ -1695,11 +1789,11 @@ def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
|
||||
)
|
||||
return active_next, log_p_next
|
||||
|
||||
|
||||
def ctc_path_probs(predict, Y, alpha=1e-4):
|
||||
smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
|
||||
L = T.log(smoothed_predict)
|
||||
zeros = T.zeros_like(L[0])
|
||||
base = T.set_subtensor(zeros[:1], np.float32(1))
|
||||
log_first = zeros
|
||||
|
||||
f_skip_idxs = ctc_create_skip_idxs(Y)
|
||||
@@ -1718,12 +1812,14 @@ def ctc_path_probs(predict, Y, alpha=1e-4):
|
||||
log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
|
||||
return log_probs, mask
|
||||
|
||||
|
||||
def ctc_cost(predict, Y):
|
||||
log_probs, mask = ctc_path_probs(predict, ctc_interleave_blanks(Y))
|
||||
common_factor = T.max(log_probs)
|
||||
total_log_prob = T.log(T.sum(T.exp(log_probs - common_factor)[mask.nonzero()])) + common_factor
|
||||
return -total_log_prob
|
||||
|
||||
|
||||
# batchifies original CTC code
|
||||
def ctc_batch_cost(y_true, y_pred, input_length, label_length):
|
||||
'''Runs CTC loss algorithm on each batch element.
|
||||
@@ -1748,10 +1844,75 @@ def ctc_batch_cost(y_true, y_pred, input_length, label_length):
|
||||
return ctc_cost(y_pred_step, y_true_step)
|
||||
|
||||
ret, _ = theano.scan(
|
||||
fn = ctc_step,
|
||||
fn=ctc_step,
|
||||
outputs_info=None,
|
||||
sequences=[y_true, y_pred, input_length, label_length]
|
||||
)
|
||||
|
||||
ret = ret.dimshuffle('x', 0)
|
||||
return ret
|
||||
|
||||
|
||||
# HIGH ORDER FUNCTIONS
|
||||
|
||||
def map_fn(fn, elems, name=None):
|
||||
'''Map the function fn over the elements elems and return the outputs.
|
||||
|
||||
# Arguments
|
||||
fn: Callable that will be called upon each element in elems
|
||||
elems: tensor, at least 2 dimensional
|
||||
name: A string name for the map node in the graph
|
||||
|
||||
# Returns
|
||||
Tensor with first dimension equal to the elems and second depending on
|
||||
fn
|
||||
'''
|
||||
return theano.map(fn, elems, name=name)[0]
|
||||
|
||||
|
||||
def foldl(fn, elems, initializer=None, name=None):
|
||||
'''Reduce elems using fn to combine them from left to right.
|
||||
|
||||
# Arguments
|
||||
fn: Callable that will be called upon each element in elems and an
|
||||
accumulator, for instance lambda acc, x: acc + x
|
||||
elems: tensor
|
||||
initializer: The first value used (elems[0] in case of None)
|
||||
name: A string name for the foldl node in the graph
|
||||
|
||||
# Returns
|
||||
Same type and shape as initializer
|
||||
'''
|
||||
if initializer is None:
|
||||
initializer = elems[0]
|
||||
elems = elems[1:]
|
||||
|
||||
# We need to change the order of the arguments because theano accepts x as
|
||||
# first parameter and accumulator as second
|
||||
fn2 = lambda x, acc: fn(acc, x)
|
||||
|
||||
return theano.foldl(fn2, elems, initializer, name=name)[0]
|
||||
|
||||
|
||||
def foldr(fn, elems, initializer=None, name=None):
|
||||
'''Reduce elems using fn to combine them from right to left.
|
||||
|
||||
# Arguments
|
||||
fn: Callable that will be called upon each element in elems and an
|
||||
accumulator, for instance lambda acc, x: acc + x
|
||||
elems: tensor
|
||||
initializer: The first value used (elems[-1] in case of None)
|
||||
name: A string name for the foldr node in the graph
|
||||
|
||||
# Returns
|
||||
Same type and shape as initializer
|
||||
'''
|
||||
if initializer is None:
|
||||
initializer = elems[-1]
|
||||
elems = elems[:-1]
|
||||
|
||||
# We need to change the order of the arguments because theano accepts x as
|
||||
# first parameter and accumulator as second
|
||||
fn2 = lambda x, acc: fn(acc, x)
|
||||
|
||||
return theano.foldr(fn2, elems, initializer, name=name)[0]
|
||||
|
||||
+18
-4
@@ -1,6 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import csv
|
||||
|
||||
import numpy as np
|
||||
@@ -337,6 +338,7 @@ class EarlyStopping(Callback):
|
||||
self.verbose = verbose
|
||||
self.min_delta = min_delta
|
||||
self.wait = 0
|
||||
self.stopped_epoch = 0
|
||||
|
||||
if mode not in ['auto', 'min', 'max']:
|
||||
warnings.warn('EarlyStopping mode %s is unknown, '
|
||||
@@ -374,11 +376,14 @@ class EarlyStopping(Callback):
|
||||
self.wait = 0
|
||||
else:
|
||||
if self.wait >= self.patience:
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: early stopping' % (epoch))
|
||||
self.stopped_epoch = epoch
|
||||
self.model.stop_training = True
|
||||
self.wait += 1
|
||||
|
||||
def on_train_end(self, logs={}):
|
||||
if self.stopped_epoch > 0 and self.verbose > 0:
|
||||
print('Epoch %05d: early stopping' % (self.stopped_epoch))
|
||||
|
||||
|
||||
class RemoteMonitor(Callback):
|
||||
'''Callback used to stream events to a server.
|
||||
@@ -432,7 +437,11 @@ class LearningRateScheduler(Callback):
|
||||
assert hasattr(self.model.optimizer, 'lr'), \
|
||||
'Optimizer must have a "lr" attribute.'
|
||||
lr = self.schedule(epoch)
|
||||
assert type(lr) == float, 'The output of the "schedule" function should be float.'
|
||||
|
||||
if not isinstance(lr, (float, np.float32, np.float64)):
|
||||
raise ValueError('The output of the "schedule" function '
|
||||
'should be float.')
|
||||
|
||||
K.set_value(self.model.optimizer.lr, lr)
|
||||
|
||||
|
||||
@@ -675,10 +684,14 @@ class CSVLogger(Callback):
|
||||
self.append = append
|
||||
self.writer = None
|
||||
self.keys = None
|
||||
self.append_header = True
|
||||
super(CSVLogger, self).__init__()
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
if self.append:
|
||||
if os.path.exists(self.filename):
|
||||
with open(self.filename) as f:
|
||||
self.append_header = len(f.readline()) == 0
|
||||
self.csv_file = open(self.filename, 'a')
|
||||
else:
|
||||
self.csv_file = open(self.filename, 'w')
|
||||
@@ -694,7 +707,8 @@ class CSVLogger(Callback):
|
||||
if not self.writer:
|
||||
self.keys = sorted(logs.keys())
|
||||
self.writer = csv.DictWriter(self.csv_file, fieldnames=['epoch'] + self.keys)
|
||||
self.writer.writeheader()
|
||||
if self.append_header:
|
||||
self.writer.writeheader()
|
||||
|
||||
row_dict = OrderedDict({'epoch': epoch})
|
||||
row_dict.update((key, handle_value(logs[key])) for key in self.keys)
|
||||
|
||||
@@ -11,9 +11,10 @@ def load_batch(fpath, label_key='labels'):
|
||||
else:
|
||||
d = cPickle.load(f, encoding="bytes")
|
||||
# decode utf8
|
||||
d_decoded = {}
|
||||
for k, v in d.items():
|
||||
del(d[k])
|
||||
d[k.decode("utf8")] = v
|
||||
d_decoded[k.decode("utf8")] = v
|
||||
d = d_decoded
|
||||
f.close()
|
||||
data = d["data"]
|
||||
labels = d[label_key]
|
||||
|
||||
+366
-346
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
+37
-47
@@ -186,13 +186,12 @@ def check_array_lengths(X, Y, W):
|
||||
|
||||
|
||||
def check_loss_and_target_compatibility(targets, losses, output_shapes):
|
||||
assert len(targets) == len(losses) == len(output_shapes)
|
||||
key_losses = {'mean_square_error',
|
||||
'binary_crossentropy',
|
||||
'categorical_crossentropy'}
|
||||
for y, loss, shape in zip(targets, losses, output_shapes):
|
||||
if loss.__name__ == 'categorical_crossentropy':
|
||||
if y.shape[1] == 1:
|
||||
if y.shape[-1] == 1:
|
||||
raise Exception('You are passing a target array of shape ' + str(y.shape) +
|
||||
' while using as loss `categorical_crossentropy`. '
|
||||
'`categorical_crossentropy` expects '
|
||||
@@ -208,13 +207,15 @@ def check_loss_and_target_compatibility(targets, losses, output_shapes):
|
||||
'Alternatively, you can use the loss function '
|
||||
'`sparse_categorical_crossentropy` instead, '
|
||||
'which does expect integer targets.')
|
||||
if loss.__name__ in key_losses and shape[1] is not None and y.shape[1] != shape[1]:
|
||||
raise Exception('A target array with shape ' + str(y.shape) +
|
||||
' was passed for an output of shape ' + str(shape) +
|
||||
' while using as loss `' + loss.__name__ + '`. '
|
||||
'This loss expects '
|
||||
'targets to have the same shape '
|
||||
'as the output.')
|
||||
if loss.__name__ in key_losses:
|
||||
for target_dim, out_dim in zip(y.shape[1:], shape[1:]):
|
||||
if out_dim is not None and target_dim != out_dim:
|
||||
raise Exception('A target array with shape ' + str(y.shape) +
|
||||
' was passed for an output of shape ' + str(shape) +
|
||||
' while using as loss `' + loss.__name__ + '`. '
|
||||
'This loss expects '
|
||||
'targets to have the same shape '
|
||||
'as the output.')
|
||||
|
||||
|
||||
def collect_metrics(metrics, output_names):
|
||||
@@ -237,36 +238,6 @@ def collect_metrics(metrics, output_names):
|
||||
str(metrics))
|
||||
|
||||
|
||||
def collect_trainable_weights(layer):
|
||||
'''Collects all `trainable_weights` attributes,
|
||||
excluding any sublayers where `trainable` is set the `False`.
|
||||
'''
|
||||
trainable = getattr(layer, 'trainable', True)
|
||||
if not trainable:
|
||||
return []
|
||||
weights = []
|
||||
if layer.__class__.__name__ == 'Sequential':
|
||||
for sublayer in layer.flattened_layers:
|
||||
weights += collect_trainable_weights(sublayer)
|
||||
elif layer.__class__.__name__ == 'Model':
|
||||
for sublayer in layer.layers:
|
||||
weights += collect_trainable_weights(sublayer)
|
||||
elif layer.__class__.__name__ == 'Graph':
|
||||
for sublayer in layer._graph_nodes.values():
|
||||
weights += collect_trainable_weights(sublayer)
|
||||
else:
|
||||
weights += layer.trainable_weights
|
||||
# dedupe weights
|
||||
weights = list(set(weights))
|
||||
# TF variables have auto-generated the name, while Theano has auto-generated the auto_name variable. name in Theano is None
|
||||
if weights:
|
||||
if K.backend() == 'theano':
|
||||
weights.sort(key=lambda x: x.auto_name)
|
||||
else:
|
||||
weights.sort(key=lambda x: x.name)
|
||||
return weights
|
||||
|
||||
|
||||
def batch_shuffle(index_array, batch_size):
|
||||
'''This shuffles an array in a batch-wise fashion.
|
||||
Useful for shuffling HDF5 arrays
|
||||
@@ -610,7 +581,10 @@ class Model(Container):
|
||||
for i in range(len(self.outputs)):
|
||||
shape = self.internal_output_shapes[i]
|
||||
name = self.output_names[i]
|
||||
self.targets.append(K.placeholder(ndim=len(shape), name=name + '_target'))
|
||||
self.targets.append(K.placeholder(ndim=len(shape),
|
||||
name=name + '_target',
|
||||
sparse=K.is_sparse(self.outputs[i]),
|
||||
dtype=K.dtype(self.outputs[i])))
|
||||
|
||||
# prepare metrics
|
||||
self.metrics = metrics
|
||||
@@ -698,7 +672,15 @@ class Model(Container):
|
||||
self.test_function = None
|
||||
self.predict_function = None
|
||||
|
||||
self._collected_trainable_weights = collect_trainable_weights(self)
|
||||
# collected trainable weights and sort them deterministically.
|
||||
trainable_weights = self.trainable_weights
|
||||
# Sort weights by name
|
||||
if trainable_weights:
|
||||
if K.backend() == 'theano':
|
||||
trainable_weights.sort(key=lambda x: x.name if x.name else x.auto_name)
|
||||
else:
|
||||
trainable_weights.sort(key=lambda x: x.name)
|
||||
self._collected_trainable_weights = trainable_weights
|
||||
|
||||
def _make_train_function(self):
|
||||
if not hasattr(self, 'train_function'):
|
||||
@@ -754,7 +736,7 @@ class Model(Container):
|
||||
def _fit_loop(self, f, ins, out_labels=[], batch_size=32,
|
||||
nb_epoch=100, verbose=1, callbacks=[],
|
||||
val_f=None, val_ins=None, shuffle=True,
|
||||
callback_metrics=[]):
|
||||
callback_metrics=[], initial_epoch=0):
|
||||
'''Abstract fit function for f(ins).
|
||||
Assume that f returns a list, labeled by out_labels.
|
||||
|
||||
@@ -774,6 +756,8 @@ class Model(Container):
|
||||
passed to the callbacks. They should be the
|
||||
concatenation of list the display names of the outputs of
|
||||
`f` and the list of display names of the outputs of `f_val`.
|
||||
initial_epoch: epoch at which to start training
|
||||
(useful for resuming a previous training run)
|
||||
|
||||
# Returns
|
||||
`History` object.
|
||||
@@ -814,7 +798,7 @@ class Model(Container):
|
||||
callback_model.stop_training = False
|
||||
self.validation_data = val_ins
|
||||
|
||||
for epoch in range(nb_epoch):
|
||||
for epoch in range(initial_epoch, nb_epoch):
|
||||
callbacks.on_epoch_begin(epoch)
|
||||
if shuffle == 'batch':
|
||||
index_array = batch_shuffle(index_array, batch_size)
|
||||
@@ -1001,7 +985,7 @@ class Model(Container):
|
||||
|
||||
def fit(self, x, y, batch_size=32, nb_epoch=10, verbose=1, callbacks=[],
|
||||
validation_split=0., validation_data=None, shuffle=True,
|
||||
class_weight=None, sample_weight=None):
|
||||
class_weight=None, sample_weight=None, initial_epoch=0):
|
||||
'''Trains the model for a fixed number of epochs (iterations on a dataset).
|
||||
|
||||
# Arguments
|
||||
@@ -1038,6 +1022,8 @@ class Model(Container):
|
||||
with shape (samples, sequence_length),
|
||||
to apply a different weight to every timestep of every sample.
|
||||
In this case you should make sure to specify sample_weight_mode="temporal" in compile().
|
||||
initial_epoch: epoch at which to start training
|
||||
(useful for resuming a previous training run)
|
||||
|
||||
|
||||
# Returns
|
||||
@@ -1121,7 +1107,8 @@ class Model(Container):
|
||||
batch_size=batch_size, nb_epoch=nb_epoch,
|
||||
verbose=verbose, callbacks=callbacks,
|
||||
val_f=val_f, val_ins=val_ins, shuffle=shuffle,
|
||||
callback_metrics=callback_metrics)
|
||||
callback_metrics=callback_metrics,
|
||||
initial_epoch=initial_epoch)
|
||||
|
||||
def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None):
|
||||
'''Returns the loss value and metrics values for the model
|
||||
@@ -1297,7 +1284,8 @@ class Model(Container):
|
||||
def fit_generator(self, generator, samples_per_epoch, nb_epoch,
|
||||
verbose=1, callbacks=[],
|
||||
validation_data=None, nb_val_samples=None,
|
||||
class_weight={}, max_q_size=10, nb_worker=1, pickle_safe=False):
|
||||
class_weight={}, max_q_size=10, nb_worker=1, pickle_safe=False,
|
||||
initial_epoch=0):
|
||||
'''Fits the model on data generated batch-by-batch by
|
||||
a Python generator.
|
||||
The generator is run in parallel to the model, for efficiency.
|
||||
@@ -1333,6 +1321,8 @@ class Model(Container):
|
||||
this implementation relies on multiprocessing, you should not pass
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
initial_epoch: epoch at which to start training
|
||||
(useful for resuming a previous training run)
|
||||
|
||||
# Returns
|
||||
A `History` object.
|
||||
@@ -1355,7 +1345,7 @@ class Model(Container):
|
||||
```
|
||||
'''
|
||||
wait_time = 0.01 # in seconds
|
||||
epoch = 0
|
||||
epoch = initial_epoch
|
||||
|
||||
do_validation = bool(validation_data)
|
||||
self._make_train_function()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
|
||||
def get_fans(shape, dim_ordering='th'):
|
||||
@@ -20,7 +21,7 @@ def get_fans(shape, dim_ordering='th'):
|
||||
fan_in = shape[-2] * receptive_field_size
|
||||
fan_out = shape[-1] * receptive_field_size
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering: ' + dim_ordering)
|
||||
else:
|
||||
# no specific assumptions
|
||||
fan_in = np.sqrt(np.prod(shape))
|
||||
@@ -101,7 +102,6 @@ def one(shape, name=None):
|
||||
return K.ones(shape, name=name)
|
||||
|
||||
|
||||
from .utils.generic_utils import get_from_module
|
||||
def get(identifier, **kwargs):
|
||||
return get_from_module(identifier, globals(),
|
||||
'initialization', kwargs=kwargs)
|
||||
|
||||
@@ -10,3 +10,4 @@ from .embeddings import *
|
||||
from .noise import *
|
||||
from .advanced_activations import *
|
||||
from .wrappers import *
|
||||
from .convolutional_recurrent import *
|
||||
|
||||
@@ -47,7 +47,7 @@ class Convolution1D(Layer):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
subsample_length: factor by which to subsample output.
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
(eg. L1 or L2 regularization), applied to the main weights matrix.
|
||||
@@ -77,19 +77,18 @@ class Convolution1D(Layer):
|
||||
`steps` value might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, filter_length,
|
||||
init='uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample_length=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
if border_mode not in {'valid', 'same', 'full'}:
|
||||
raise Exception('Invalid border mode for Convolution1D:', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.filter_length = filter_length
|
||||
self.init = initializations.get(init, dim_ordering='th')
|
||||
self.activation = activations.get(activation)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
self.border_mode = border_mode
|
||||
self.subsample_length = subsample_length
|
||||
|
||||
@@ -143,6 +142,7 @@ class Convolution1D(Layer):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
length = conv_output_length(input_shape[1],
|
||||
@@ -218,7 +218,7 @@ class AtrousConvolution1D(Convolution1D):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
subsample_length: factor by which to subsample output.
|
||||
atrous_rate: Factor for kernel dilation. Also called filter_dilation
|
||||
elsewhere.
|
||||
@@ -250,13 +250,13 @@ class AtrousConvolution1D(Convolution1D):
|
||||
`steps` value might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, filter_length,
|
||||
init='uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample_length=1, atrous_rate=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
if border_mode not in {'valid', 'same', 'full'}:
|
||||
raise Exception('Invalid border mode for AtrousConv1D:', border_mode)
|
||||
|
||||
self.atrous_rate = int(atrous_rate)
|
||||
@@ -331,7 +331,7 @@ class Convolution2D(Layer):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
subsample: tuple of length 2. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
@@ -366,21 +366,20 @@ class Convolution2D(Layer):
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1), dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
if border_mode not in {'valid', 'same', 'full'}:
|
||||
raise Exception('Invalid border mode for Convolution2D:', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
self.init = initializations.get(init, dim_ordering=dim_ordering)
|
||||
self.activation = activations.get(activation)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
self.border_mode = border_mode
|
||||
self.subsample = tuple(subsample)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
@@ -436,6 +435,7 @@ class Convolution2D(Layer):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -570,7 +570,7 @@ class Deconvolution2D(Convolution2D):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
subsample: tuple of length 2. Factor by which to oversample output.
|
||||
Also called strides elsewhere.
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
@@ -609,7 +609,7 @@ class Deconvolution2D(Convolution2D):
|
||||
[3] [Deconvolutional Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col, output_shape,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
@@ -617,7 +617,7 @@ class Deconvolution2D(Convolution2D):
|
||||
bias=True, **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
if border_mode not in {'valid', 'same', 'full'}:
|
||||
raise Exception('Invalid border mode for Deconvolution2D:', border_mode)
|
||||
|
||||
self.output_shape_ = output_shape
|
||||
@@ -665,7 +665,7 @@ class Deconvolution2D(Convolution2D):
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
config = {'output_shape': self.output_shape}
|
||||
config = {'output_shape': self.output_shape_}
|
||||
base_config = super(Deconvolution2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -703,7 +703,7 @@ class AtrousConvolution2D(Convolution2D):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
subsample: tuple of length 2. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
atrous_rate: tuple of length 2. Factor for kernel dilation.
|
||||
@@ -742,7 +742,7 @@ class AtrousConvolution2D(Convolution2D):
|
||||
- [Multi-Scale Context Aggregation by Dilated Convolutions](https://arxiv.org/abs/1511.07122)
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
atrous_rate=(1, 1), dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
@@ -751,7 +751,7 @@ class AtrousConvolution2D(Convolution2D):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
if border_mode not in {'valid', 'same', 'full'}:
|
||||
raise Exception('Invalid border mode for AtrousConv2D:', border_mode)
|
||||
|
||||
self.atrous_rate = tuple(atrous_rate)
|
||||
@@ -889,7 +889,7 @@ class SeparableConvolution2D(Layer):
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
depth_multiplier=1, dim_ordering='default',
|
||||
depthwise_regularizer=None, pointwise_regularizer=None,
|
||||
@@ -984,6 +984,7 @@ class SeparableConvolution2D(Layer):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -1068,7 +1069,7 @@ class Convolution3D(Layer):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of Numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
subsample: tuple of length 3. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
Note: 'subsample' is implemented by slicing the output of conv3d with strides=(1,1,1).
|
||||
@@ -1104,7 +1105,7 @@ class Convolution3D(Layer):
|
||||
'''
|
||||
|
||||
def __init__(self, nb_filter, kernel_dim1, kernel_dim2, kernel_dim3,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1, 1), dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
@@ -1112,7 +1113,7 @@ class Convolution3D(Layer):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
if border_mode not in {'valid', 'same', 'full'}:
|
||||
raise Exception('Invalid border mode for Convolution3D:', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.kernel_dim1 = kernel_dim1
|
||||
@@ -1120,7 +1121,6 @@ class Convolution3D(Layer):
|
||||
self.kernel_dim3 = kernel_dim3
|
||||
self.init = initializations.get(init, dim_ordering=dim_ordering)
|
||||
self.activation = activations.get(activation)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
self.border_mode = border_mode
|
||||
self.subsample = tuple(subsample)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
@@ -1182,6 +1182,7 @@ class Convolution3D(Layer):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -1667,6 +1668,7 @@ class Cropping1D(Layer):
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
length = input_shape[1] - self.cropping[0] - self.cropping[1] if input_shape[1] is not None else None
|
||||
@@ -1683,6 +1685,7 @@ class Cropping1D(Layer):
|
||||
base_config = super(Cropping1D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Cropping2D(Layer):
|
||||
'''Cropping layer for 2D input (e.g. picture).
|
||||
It crops along spatial dimensions, i.e. width and height.
|
||||
@@ -1735,6 +1738,7 @@ class Cropping2D(Layer):
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -1768,6 +1772,7 @@ class Cropping2D(Layer):
|
||||
base_config = super(Cropping2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Cropping3D(Layer):
|
||||
'''Cropping layer for 3D data (e.g. spatial or saptio-temporal).
|
||||
|
||||
@@ -1807,6 +1812,7 @@ class Cropping3D(Layer):
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
|
||||
@@ -0,0 +1,516 @@
|
||||
from .. import backend as K
|
||||
from .. import activations, initializations, regularizers
|
||||
|
||||
import numpy as np
|
||||
from ..engine import Layer, InputSpec
|
||||
from ..utils.np_utils import conv_output_length
|
||||
import warnings
|
||||
|
||||
|
||||
class ConvRecurrent2D(Layer):
|
||||
'''Abstract base class for convolutional recurrent layers.
|
||||
Do not use in a model -- it's not a functional layer!
|
||||
|
||||
ConvLSTM2D
|
||||
follow the specifications of this class and accept
|
||||
the keyword arguments listed below.
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape `(nb_samples, timesteps, channels, rows, cols)`.
|
||||
|
||||
# Output shape
|
||||
- if `return_sequences`: 5D tensor with shape
|
||||
`(nb_samples, timesteps, channels, rows, cols)`.
|
||||
- else, 4D tensor with shape `(nb_samples, channels, rows, cols)`.
|
||||
|
||||
# Arguments
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
The list should have 3 elements, of shapes:
|
||||
`[(input_dim, nb_filter), (nb_filter, nb_filter), (nb_filter,)]`.
|
||||
return_sequences: Boolean. Whether to return the last output
|
||||
in the output sequence, or the full sequence.
|
||||
go_backwards: Boolean (default False).
|
||||
If True, rocess the input sequence backwards.
|
||||
stateful: Boolean (default False). If True, the last state
|
||||
for each sample at index i in a batch will be used as initial
|
||||
state for the sample of index i in the following batch.
|
||||
nb_filter: Number of convolution filters to use.
|
||||
nb_row: Number of rows in the convolution kernel.
|
||||
nb_col: Number of columns in the convolution kernel.
|
||||
is required when using this layer as the first layer in a model.
|
||||
input_shape: input_shape
|
||||
|
||||
# Masking
|
||||
This layer supports masking for input data with a variable number
|
||||
of timesteps. To introduce masks to your data,
|
||||
use an [Embedding](embeddings.md) layer with the `mask_zero` parameter
|
||||
set to `True`.
|
||||
**Note:** for the time being, masking is only supported with Theano.
|
||||
|
||||
# TensorFlow warning
|
||||
For the time being, when using the TensorFlow backend,
|
||||
the number of timesteps used must be specified in your model.
|
||||
Make sure to pass an `input_length` int argument to your
|
||||
recurrent layer (if it comes first in your model),
|
||||
or to pass a complete `input_shape` argument to the first layer
|
||||
in your model otherwise.
|
||||
|
||||
|
||||
# Note on using statefulness in RNNs
|
||||
You can set RNN layers to be 'stateful', which means that the states
|
||||
computed for the samples in one batch will be reused as initial states
|
||||
for the samples in the next batch.
|
||||
This assumes a one-to-one mapping between
|
||||
samples in different successive batches.
|
||||
|
||||
To enable statefulness:
|
||||
- specify `stateful=True` in the layer constructor.
|
||||
- specify a fixed batch size for your model, by passing
|
||||
a `batch_input_size=(...)` to the first layer in your model.
|
||||
This is the expected shape of your inputs *including the batch
|
||||
size*.
|
||||
It should be a tuple of integers, e.g. `(32, 10, 100)`.
|
||||
|
||||
To reset the states of your model, call `.reset_states()` on either
|
||||
a specific layer, or on your entire model.
|
||||
'''
|
||||
|
||||
def __init__(self, weights=None, nb_row=None, nb_col=None, nb_filter=None,
|
||||
return_sequences=False, go_backwards=False, stateful=False,
|
||||
dim_ordering=None, **kwargs):
|
||||
self.return_sequences = return_sequences
|
||||
self.go_backwards = go_backwards
|
||||
self.stateful = stateful
|
||||
self.initial_weights = weights
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
self.nb_filter = nb_filter
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=5)]
|
||||
|
||||
super(ConvRecurrent2D, self).__init__(**kwargs)
|
||||
|
||||
def compute_mask(self, input, mask):
|
||||
if self.return_sequences:
|
||||
return mask
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
rows = input_shape[3]
|
||||
cols = input_shape[4]
|
||||
elif self.dim_ordering == 'tf':
|
||||
rows = input_shape[2]
|
||||
cols = input_shape[3]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
rows = conv_output_length(rows, self.nb_row,
|
||||
self.border_mode, self.subsample[0])
|
||||
cols = conv_output_length(cols, self.nb_col,
|
||||
self.border_mode, self.subsample[1])
|
||||
|
||||
if self.return_sequences:
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], input_shape[1],
|
||||
self.nb_filter, rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], input_shape[1],
|
||||
rows, cols, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
else:
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], self.nb_filter, rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def step(self, x, states):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_constants(self, X, train=False):
|
||||
return None
|
||||
|
||||
def get_initial_states(self, X):
|
||||
# (samples, timesteps, row, col, filter)
|
||||
initial_state = K.zeros_like(X)
|
||||
# (samples,row, col, filter)
|
||||
initial_state = K.sum(initial_state, axis=1)
|
||||
initial_state = self.conv_step(initial_state, K.zeros(self.W_shape),
|
||||
border_mode=self.border_mode)
|
||||
|
||||
initial_states = [initial_state for _ in range(2)]
|
||||
return initial_states
|
||||
|
||||
def preprocess_input(self, x):
|
||||
return x
|
||||
|
||||
def call(self, x, mask=None):
|
||||
assert K.ndim(x) == 5
|
||||
input_shape = self.input_spec[0].shape
|
||||
unroll = False
|
||||
|
||||
if self.stateful:
|
||||
initial_states = self.states
|
||||
else:
|
||||
initial_states = self.get_initial_states(x)
|
||||
|
||||
constants = self.get_constants(x)
|
||||
preprocessed_input = self.preprocess_input(x)
|
||||
|
||||
last_output, outputs, states = K.rnn(self.step, preprocessed_input,
|
||||
initial_states,
|
||||
go_backwards=self.go_backwards,
|
||||
mask=mask,
|
||||
constants=constants,
|
||||
unroll=unroll,
|
||||
input_length=input_shape[1])
|
||||
if self.stateful:
|
||||
self.updates = []
|
||||
for i in range(len(states)):
|
||||
self.updates.append((self.states[i], states[i]))
|
||||
|
||||
if self.return_sequences:
|
||||
return outputs
|
||||
else:
|
||||
return last_output
|
||||
|
||||
def get_config(self):
|
||||
config = {'return_sequences': self.return_sequences,
|
||||
'go_backwards': self.go_backwards,
|
||||
'stateful': self.stateful}
|
||||
if self.stateful:
|
||||
config['batch_input_shape'] = self.input_spec[0].shape
|
||||
|
||||
base_config = super(ConvRecurrent2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class ConvLSTM2D(ConvRecurrent2D):
|
||||
'''Convolutional LSTM.
|
||||
|
||||
# Input shape
|
||||
- if dim_ordering='th'
|
||||
5D tensor with shape:
|
||||
`(samples,time, channels, rows, cols)`
|
||||
- if dim_ordering='tf'
|
||||
5D tensor with shape:
|
||||
`(samples,time, rows, cols, channels)`
|
||||
|
||||
# Output shape
|
||||
- if `return_sequences`
|
||||
- if dim_ordering='th'
|
||||
5D tensor with shape:
|
||||
`(samples, time, nb_filter, output_row, output_col)`
|
||||
- if dim_ordering='tf'
|
||||
5D tensor with shape:
|
||||
`(samples, time, output_row, output_col, nb_filter)`
|
||||
- else
|
||||
- if dim_ordering ='th'
|
||||
4D tensor with shape:
|
||||
`(samples, nb_filter, output_row, output_col)`
|
||||
- if dim_ordering='tf'
|
||||
4D tensor with shape:
|
||||
`(samples, output_row, output_col, nb_filter)`
|
||||
|
||||
where o_row and o_col depend on the shape of the filter and
|
||||
the border_mode
|
||||
|
||||
# Arguments
|
||||
nb_filter: Number of convolution filters to use.
|
||||
nb_row: Number of rows in the convolution kernel.
|
||||
nb_col: Number of columns in the convolution kernel.
|
||||
border_mode: 'valid' or 'same'.
|
||||
sub_sample: tuple of length 2. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
dim_ordering: 'tf' if the feature are at the last dimension or 'th'
|
||||
stateful : Boolean (default False). If True, the last state
|
||||
for each sample at index i in a batch will be used as initial
|
||||
state for the sample of index i in the following batch.
|
||||
init: weight initialization function.
|
||||
Can be the name of an existing function (str),
|
||||
or a Theano function
|
||||
(see: [initializations](../initializations.md)).
|
||||
inner_init: initialization function of the inner cells.
|
||||
forget_bias_init: initialization function for the bias of the
|
||||
forget gate.
|
||||
[Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
|
||||
recommend initializing with ones.
|
||||
activation: activation function.
|
||||
Can be the name of an existing function (str),
|
||||
or a Theano function (see: [activations](../activations.md)).
|
||||
inner_activation: activation function for the inner cells.
|
||||
|
||||
# References
|
||||
- [Convolutional LSTM Network: A Machine Learning Approach for
|
||||
Precipitation Nowcasting](http://arxiv.org/pdf/1506.04214v1.pdf)
|
||||
The current implementation does not include the feedback loop on the
|
||||
cells output
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
forget_bias_init='one', activation='tanh',
|
||||
inner_activation='hard_sigmoid',
|
||||
dim_ordering='default',
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
W_regularizer=None, U_regularizer=None, b_regularizer=None,
|
||||
dropout_W=0., dropout_U=0., **kwargs):
|
||||
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('dim_ordering must be in {tf,th}', dim_ordering)
|
||||
self.nb_filter = nb_filter
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
self.init = initializations.get(init)
|
||||
self.inner_init = initializations.get(inner_init)
|
||||
self.forget_bias_init = initializations.get(forget_bias_init)
|
||||
self.activation = activations.get(activation)
|
||||
self.inner_activation = activations.get(inner_activation)
|
||||
self.border_mode = border_mode
|
||||
self.subsample = subsample
|
||||
|
||||
if dim_ordering == 'th':
|
||||
warnings.warn('Be carefull if used with convolution3D layers:\n'
|
||||
'th in convolution 3D corresponds to '
|
||||
'(samples, channels, conv_dim1, conv_dim2,'
|
||||
'conv_dim3)\n'
|
||||
'while for this network it corresponds to: '
|
||||
'(samples, time, channels, rows, cols)')
|
||||
self.dim_ordering = dim_ordering
|
||||
|
||||
kwargs['nb_filter'] = nb_filter
|
||||
kwargs['nb_row'] = nb_row
|
||||
kwargs['nb_col'] = nb_col
|
||||
kwargs['dim_ordering'] = dim_ordering
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.U_regularizer = regularizers.get(U_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.dropout_W, self.dropout_U = dropout_W, dropout_U
|
||||
if self.dropout_W or self.dropout_U:
|
||||
self.uses_learning_phase = True
|
||||
|
||||
super(ConvLSTM2D, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
stack_size = input_shape[2]
|
||||
self.W_shape = (self.nb_filter, stack_size,
|
||||
self.nb_row, self.nb_col)
|
||||
elif self.dim_ordering == 'tf':
|
||||
stack_size = input_shape[4]
|
||||
self.W_shape = (self.nb_row, self.nb_col,
|
||||
stack_size, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
self.W_shape1 = (self.nb_filter, self.nb_filter,
|
||||
self.nb_row, self.nb_col)
|
||||
elif self.dim_ordering == 'tf':
|
||||
self.W_shape1 = (self.nb_row, self.nb_col,
|
||||
self.nb_filter, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
if self.stateful:
|
||||
self.reset_states()
|
||||
else:
|
||||
# initial states: 2 all-zero tensor of shape (nb_filter)
|
||||
self.states = [None, None, None, None]
|
||||
|
||||
self.W_i = self.init(self.W_shape, name='{}_W_i'.format(self.name))
|
||||
self.U_i = self.inner_init(self.W_shape1,
|
||||
name='{}_U_i'.format(self.name))
|
||||
self.b_i = K.zeros((self.nb_filter,), name='{}_b_i'.format(self.name))
|
||||
|
||||
self.W_f = self.init(self.W_shape, name='{}_W_f'.format(self.name))
|
||||
self.U_f = self.inner_init(self.W_shape1,
|
||||
name='{}_U_f'.format(self.name))
|
||||
self.b_f = self.forget_bias_init((self.nb_filter,),
|
||||
name='{}_b_f'.format(self.name))
|
||||
|
||||
self.W_c = self.init(self.W_shape, name='{}_W_c'.format(self.name))
|
||||
self.U_c = self.inner_init(self.W_shape1,
|
||||
name='{}_U_c'.format(self.name))
|
||||
self.b_c = K.zeros((self.nb_filter,), name='{}_b_c'.format(self.name))
|
||||
|
||||
self.W_o = self.init(self.W_shape, name='{}_W_o'.format(self.name))
|
||||
self.U_o = self.inner_init(self.W_shape1,
|
||||
name='{}_U_o'.format(self.name))
|
||||
self.b_o = K.zeros((self.nb_filter,), name='{}_b_o'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W_i, self.U_i, self.b_i,
|
||||
self.W_c, self.U_c, self.b_c,
|
||||
self.W_f, self.U_f, self.b_f,
|
||||
self.W_o, self.U_o, self.b_o]
|
||||
|
||||
self.W = K.concatenate([self.W_i, self.W_f, self.W_c, self.W_o])
|
||||
self.U = K.concatenate([self.U_i, self.U_f, self.U_c, self.U_o])
|
||||
self.b = K.concatenate([self.b_i, self.b_f, self.b_c, self.b_o])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def reset_states(self):
|
||||
assert self.stateful, 'Layer must be stateful.'
|
||||
input_shape = self.input_spec[0].shape
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
if not input_shape[0]:
|
||||
raise Exception('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided ' +
|
||||
'(including batch size).')
|
||||
|
||||
if self.return_sequences:
|
||||
out_row, out_col, out_filter = output_shape[2:]
|
||||
else:
|
||||
out_row, out_col, out_filter = output_shape[1:]
|
||||
|
||||
if hasattr(self, 'states'):
|
||||
K.set_value(self.states[0],
|
||||
np.zeros((input_shape[0],
|
||||
out_row, out_col, out_filter)))
|
||||
K.set_value(self.states[1],
|
||||
np.zeros((input_shape[0],
|
||||
out_row, out_col, out_filter)))
|
||||
else:
|
||||
self.states = [K.zeros((input_shape[0],
|
||||
out_row, out_col, out_filter)),
|
||||
K.zeros((input_shape[0],
|
||||
out_row, out_col, out_filter))]
|
||||
|
||||
def conv_step(self, x, W, b=None, border_mode='valid'):
|
||||
input_shape = self.input_spec[0].shape
|
||||
|
||||
conv_out = K.conv2d(x, W, strides=self.subsample,
|
||||
border_mode=border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
image_shape=(input_shape[0],
|
||||
input_shape[2],
|
||||
input_shape[3],
|
||||
input_shape[4]),
|
||||
filter_shape=self.W_shape)
|
||||
if b:
|
||||
if self.dim_ordering == 'th':
|
||||
conv_out = conv_out + K.reshape(b, (1, self.nb_filter, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
conv_out = conv_out + K.reshape(b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
return conv_out
|
||||
|
||||
def conv_step_hidden(self, x, W, border_mode='valid'):
|
||||
# This new function was defined because the
|
||||
# image shape must be hardcoded
|
||||
input_shape = self.input_spec[0].shape
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
if self.return_sequences:
|
||||
out_row, out_col, out_filter = output_shape[2:]
|
||||
else:
|
||||
out_row, out_col, out_filter = output_shape[1:]
|
||||
|
||||
conv_out = K.conv2d(x, W, strides=(1, 1),
|
||||
border_mode=border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
image_shape=(input_shape[0],
|
||||
out_row, out_col,
|
||||
out_filter),
|
||||
filter_shape=self.W_shape1)
|
||||
|
||||
return conv_out
|
||||
|
||||
def step(self, x, states):
|
||||
assert len(states) == 4
|
||||
h_tm1 = states[0]
|
||||
c_tm1 = states[1]
|
||||
B_U = states[2]
|
||||
B_W = states[3]
|
||||
|
||||
x_i = self.conv_step(x * B_W[0], self.W_i, self.b_i,
|
||||
border_mode=self.border_mode)
|
||||
x_f = self.conv_step(x * B_W[1], self.W_f, self.b_f,
|
||||
border_mode=self.border_mode)
|
||||
x_c = self.conv_step(x * B_W[2], self.W_c, self.b_c,
|
||||
border_mode=self.border_mode)
|
||||
x_o = self.conv_step(x * B_W[3], self.W_o, self.b_o,
|
||||
border_mode=self.border_mode)
|
||||
|
||||
# U : from nb_filter to nb_filter
|
||||
# Same because must be stable in the output space
|
||||
h_i = self.conv_step_hidden(h_tm1 * B_U[0], self.U_i,
|
||||
border_mode='same')
|
||||
h_f = self.conv_step_hidden(h_tm1 * B_U[1], self.U_f,
|
||||
border_mode='same')
|
||||
h_c = self.conv_step_hidden(h_tm1 * B_U[2], self.U_c,
|
||||
border_mode='same')
|
||||
h_o = self.conv_step_hidden(h_tm1 * B_U[3], self.U_o,
|
||||
border_mode='same')
|
||||
|
||||
i = self.inner_activation(x_i + h_i)
|
||||
f = self.inner_activation(x_f + h_f)
|
||||
c = f * c_tm1 + i * self.activation(x_c + h_c)
|
||||
o = self.inner_activation(x_o + h_o)
|
||||
h = o * self.activation(c)
|
||||
|
||||
return h, [h, c]
|
||||
|
||||
def get_constants(self, x):
|
||||
constants = []
|
||||
if 0 < self.dropout_U < 1:
|
||||
ones = K.zeros_like(x)
|
||||
ones = K.sum(ones, axis=1)
|
||||
ones = self.conv_step(ones, K.zeros(self.W_shape),
|
||||
border_mode=self.border_mode)
|
||||
ones = ones + 1
|
||||
B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones)
|
||||
for _ in range(4)]
|
||||
constants.append(B_U)
|
||||
else:
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(4)])
|
||||
|
||||
if 0 < self.dropout_W < 1:
|
||||
ones = K.zeros_like(x)
|
||||
ones = K.sum(ones, axis=1)
|
||||
ones = ones + 1
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones)
|
||||
for _ in range(4)]
|
||||
constants.append(B_W)
|
||||
else:
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(4)])
|
||||
return constants
|
||||
|
||||
def get_config(self):
|
||||
config = {'nb_filter': self.nb_filter,
|
||||
'nb_row': self.nb_row,
|
||||
'nb_col': self.nb_col,
|
||||
'init': self.init.__name__,
|
||||
'inner_init': self.inner_init.__name__,
|
||||
'forget_bias_init': self.forget_bias_init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'dim_ordering': self.dim_ordering,
|
||||
'border_mode': self.border_mode,
|
||||
'inner_activation': self.inner_activation.__name__}
|
||||
base_config = super(ConvLSTM2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
+39
-3
@@ -96,6 +96,37 @@ class Dropout(Layer):
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class SpatialDropout1D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
entire 1D feature maps instead of individual elements. If adjacent frames
|
||||
within feature maps are strongly correlated (as is normally the case in
|
||||
early convolution layers) then regular dropout will not regularize the
|
||||
activations and will otherwise just result in an effective learning rate
|
||||
decrease. In this case, SpatialDropout1D will help promote independence
|
||||
between feature maps and should be used instead.
|
||||
|
||||
# Arguments
|
||||
p: float between 0 and 1. Fraction of the input units to drop.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape:
|
||||
`(samples, timesteps, channels)`
|
||||
|
||||
# Output shape
|
||||
Same as input
|
||||
|
||||
# References
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/pdf/1411.4280.pdf)
|
||||
'''
|
||||
def __init__(self, p, **kwargs):
|
||||
super(SpatialDropout1D, self).__init__(p, **kwargs)
|
||||
|
||||
def _get_noise_shape(self, x):
|
||||
input_shape = K.shape(x)
|
||||
noise_shape = (input_shape[0], 1, input_shape[2])
|
||||
return noise_shape
|
||||
|
||||
|
||||
class SpatialDropout2D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
entire 2D feature maps instead of individual elements. If adjacent pixels
|
||||
@@ -661,7 +692,8 @@ class Dense(Layer):
|
||||
# Output shape
|
||||
2D tensor with shape: `(nb_samples, output_dim)`.
|
||||
'''
|
||||
def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None,
|
||||
def __init__(self, output_dim, init='glorot_uniform',
|
||||
activation=None, weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
@@ -722,6 +754,7 @@ class Dense(Layer):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = K.dot(x, self.W)
|
||||
@@ -890,6 +923,7 @@ class MaxoutDense(Layer):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
assert input_shape and len(input_shape) == 2
|
||||
@@ -962,7 +996,7 @@ class Highway(Layer):
|
||||
- [Highway Networks](http://arxiv.org/pdf/1505.00387v2.pdf)
|
||||
'''
|
||||
def __init__(self, init='glorot_uniform', transform_bias=-2,
|
||||
activation='linear', weights=None,
|
||||
activation=None, weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
@@ -1027,6 +1061,7 @@ class Highway(Layer):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def call(self, x, mask=None):
|
||||
y = K.dot(x, self.W_carry)
|
||||
@@ -1105,7 +1140,7 @@ class TimeDistributedDense(Layer):
|
||||
'''
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
@@ -1167,6 +1202,7 @@ class TimeDistributedDense(Layer):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
return (input_shape[0], input_shape[1], self.output_dim)
|
||||
|
||||
@@ -110,6 +110,7 @@ class Embedding(Layer):
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
self.built = True
|
||||
|
||||
def compute_mask(self, x, mask=None):
|
||||
if not self.mask_zero:
|
||||
|
||||
@@ -75,7 +75,7 @@ class LocallyConnected1D(Layer):
|
||||
`steps` value might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, filter_length,
|
||||
init='uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample_length=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
@@ -139,6 +139,7 @@ class LocallyConnected1D(Layer):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
length = conv_output_length(input_shape[1],
|
||||
@@ -257,7 +258,7 @@ class LocallyConnected2D(Layer):
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
@@ -333,6 +334,7 @@ class LocallyConnected2D(Layer):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
|
||||
@@ -104,7 +104,6 @@ class BatchNormalization(Layer):
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
self.called_with = None
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if self.mode == 0 or self.mode == 2:
|
||||
@@ -122,23 +121,12 @@ class BatchNormalization(Layer):
|
||||
epsilon=self.epsilon)
|
||||
else:
|
||||
# mode 0
|
||||
if self.called_with not in {None, x}:
|
||||
raise Exception('You are attempting to share a '
|
||||
'same `BatchNormalization` layer across '
|
||||
'different data flows. '
|
||||
'This is not possible. '
|
||||
'You should use `mode=2` in '
|
||||
'`BatchNormalization`, which has '
|
||||
'a similar behavior but is shareable '
|
||||
'(see docs for a description of '
|
||||
'the behavior).')
|
||||
self.called_with = x
|
||||
x_normed, mean, std = K.normalize_batch_in_training(
|
||||
x, self.gamma, self.beta, reduction_axes,
|
||||
epsilon=self.epsilon)
|
||||
|
||||
self.updates = [K.moving_average_update(self.running_mean, mean, self.momentum),
|
||||
K.moving_average_update(self.running_std, std, self.momentum)]
|
||||
self.add_updates([K.moving_average_update(self.running_mean, mean, self.momentum),
|
||||
K.moving_average_update(self.running_std, std, self.momentum)], x)
|
||||
|
||||
if K.backend() == 'tensorflow' and sorted(reduction_axes) == range(K.ndim(x))[:-1]:
|
||||
x_normed_running = K.batch_normalization(
|
||||
@@ -168,11 +156,11 @@ class BatchNormalization(Layer):
|
||||
return x_normed
|
||||
|
||||
def get_config(self):
|
||||
config = {"epsilon": self.epsilon,
|
||||
"mode": self.mode,
|
||||
"axis": self.axis,
|
||||
"gamma_regularizer": self.gamma_regularizer.get_config() if self.gamma_regularizer else None,
|
||||
"beta_regularizer": self.beta_regularizer.get_config() if self.beta_regularizer else None,
|
||||
"momentum": self.momentum}
|
||||
config = {'epsilon': self.epsilon,
|
||||
'mode': self.mode,
|
||||
'axis': self.axis,
|
||||
'gamma_regularizer': self.gamma_regularizer.get_config() if self.gamma_regularizer else None,
|
||||
'beta_regularizer': self.beta_regularizer.get_config() if self.beta_regularizer else None,
|
||||
'momentum': self.momentum}
|
||||
base_config = super(BatchNormalization, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -34,14 +34,12 @@ class _Pooling1D(Layer):
|
||||
raise NotImplementedError
|
||||
|
||||
def call(self, x, mask=None):
|
||||
x = K.expand_dims(x, -1) # add dummy last dimension
|
||||
x = K.permute_dimensions(x, (0, 2, 1, 3))
|
||||
x = K.expand_dims(x, 2) # add dummy last dimension
|
||||
output = self._pooling_function(inputs=x, pool_size=self.pool_size,
|
||||
strides=self.st,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering='th')
|
||||
output = K.permute_dimensions(output, (0, 2, 1, 3))
|
||||
return K.squeeze(output, 3) # remove dummy last dimension
|
||||
dim_ordering='tf')
|
||||
return K.squeeze(output, 2) # remove dummy last dimension
|
||||
|
||||
def get_config(self):
|
||||
config = {'stride': self.stride,
|
||||
@@ -66,7 +64,6 @@ class MaxPooling1D(_Pooling1D):
|
||||
2 will halve the input.
|
||||
If None, it will default to `pool_length`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
'''
|
||||
|
||||
def __init__(self, pool_length=2, stride=None,
|
||||
@@ -89,7 +86,6 @@ class AveragePooling1D(_Pooling1D):
|
||||
stride: integer, or None. Stride value.
|
||||
If None, it will default to `pool_length`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape: `(samples, steps, features)`.
|
||||
@@ -181,7 +177,6 @@ class MaxPooling2D(_Pooling2D):
|
||||
strides: tuple of 2 integers, or None. Strides values.
|
||||
If None, it will default to `pool_size`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 3.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
@@ -223,7 +218,6 @@ class AveragePooling2D(_Pooling2D):
|
||||
strides: tuple of 2 integers, or None. Strides values.
|
||||
If None, it will default to `pool_size`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 3.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
|
||||
@@ -199,6 +199,18 @@ class Recurrent(Layer):
|
||||
# note that the .build() method of subclasses MUST define
|
||||
# self.input_spec with a complete input shape.
|
||||
input_shape = self.input_spec[0].shape
|
||||
if self.unroll and input_shape[1] is None:
|
||||
raise ValueError('Cannot unroll a RNN if the '
|
||||
'time dimension is undefined. \n'
|
||||
'- If using a Sequential model, '
|
||||
'specify the time dimension by passing '
|
||||
'an `input_shape` or `batch_input_shape` '
|
||||
'argument to your first layer. If your '
|
||||
'first layer is an Embedding, you can '
|
||||
'also use the `input_length` argument.\n'
|
||||
'- If using the functional API, specify '
|
||||
'the time dimension by passing a `shape` '
|
||||
'or `batch_shape` argument to your Input layer.')
|
||||
if self.stateful:
|
||||
initial_states = self.states
|
||||
else:
|
||||
@@ -214,9 +226,10 @@ class Recurrent(Layer):
|
||||
unroll=self.unroll,
|
||||
input_length=input_shape[1])
|
||||
if self.stateful:
|
||||
self.updates = []
|
||||
updates = []
|
||||
for i in range(len(states)):
|
||||
self.updates.append((self.states[i], states[i]))
|
||||
updates.append((self.states[i], states[i]))
|
||||
self.add_updates(updates, x)
|
||||
|
||||
if self.return_sequences:
|
||||
return outputs
|
||||
@@ -229,7 +242,7 @@ class Recurrent(Layer):
|
||||
'stateful': self.stateful,
|
||||
'unroll': self.unroll,
|
||||
'consume_less': self.consume_less}
|
||||
if self.stateful:
|
||||
if self.stateful and self.input_spec[0].shape:
|
||||
config['batch_input_shape'] = self.input_spec[0].shape
|
||||
else:
|
||||
config['input_dim'] = self.input_dim
|
||||
@@ -313,13 +326,22 @@ class SimpleRNN(Recurrent):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def reset_states(self):
|
||||
assert self.stateful, 'Layer must be stateful.'
|
||||
input_shape = self.input_spec[0].shape
|
||||
if not input_shape[0]:
|
||||
raise Exception('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided (including batch size).')
|
||||
raise Exception('If a RNN is stateful, it needs to know '
|
||||
'its batch size. Specify the batch size '
|
||||
'of your input tensors: \n'
|
||||
'- If using a Sequential model, '
|
||||
'specify the batch size by passing '
|
||||
'a `batch_input_shape` '
|
||||
'argument to your first layer.\n'
|
||||
'- If using the functional API, specify '
|
||||
'the time dimension by passing a '
|
||||
'`batch_shape` argument to your Input layer.')
|
||||
if hasattr(self, 'states'):
|
||||
K.set_value(self.states[0],
|
||||
np.zeros((input_shape[0], self.output_dim)))
|
||||
@@ -363,7 +385,7 @@ class SimpleRNN(Recurrent):
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.tile(ones, (1, input_dim))
|
||||
ones = K.tile(ones, (1, int(input_dim)))
|
||||
B_W = K.in_train_phase(K.dropout(ones, self.dropout_W), ones)
|
||||
constants.append(B_W)
|
||||
else:
|
||||
@@ -495,6 +517,7 @@ class GRU(Recurrent):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def reset_states(self):
|
||||
assert self.stateful, 'Layer must be stateful.'
|
||||
@@ -577,7 +600,7 @@ class GRU(Recurrent):
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.tile(ones, (1, input_dim))
|
||||
ones = K.tile(ones, (1, int(input_dim)))
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3)]
|
||||
constants.append(B_W)
|
||||
else:
|
||||
@@ -725,6 +748,7 @@ class LSTM(Recurrent):
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def reset_states(self):
|
||||
assert self.stateful, 'Layer must be stateful.'
|
||||
@@ -817,7 +841,7 @@ class LSTM(Recurrent):
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.tile(ones, (1, input_dim))
|
||||
ones = K.tile(ones, (1, int(input_dim)))
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(4)]
|
||||
constants.append(B_W)
|
||||
else:
|
||||
|
||||
@@ -112,23 +112,11 @@ class TimeDistributed(Wrapper):
|
||||
def step(x, states):
|
||||
output = self.layer.call(x)
|
||||
return output, []
|
||||
input_length = input_shape[1]
|
||||
if K.backend() == 'tensorflow' and len(input_shape) > 3:
|
||||
if input_length is None:
|
||||
raise Exception('When using TensorFlow, you should define '
|
||||
'explicitly the number of timesteps of '
|
||||
'your sequences.\n'
|
||||
'If your first layer is an Embedding, '
|
||||
'make sure to pass it an "input_length" '
|
||||
'argument. Otherwise, make sure '
|
||||
'the first layer has '
|
||||
'an "input_shape" or "batch_input_shape" '
|
||||
'argument, including the time axis.')
|
||||
unroll = True
|
||||
else:
|
||||
unroll = False
|
||||
last_output, outputs, states = K.rnn(step, X,
|
||||
initial_states=[], input_length=input_length, unroll=unroll)
|
||||
|
||||
_, outputs, _ = K.rnn(step, X,
|
||||
initial_states=[],
|
||||
input_length=input_shape[1],
|
||||
unroll=False)
|
||||
y = outputs
|
||||
else:
|
||||
# no batch size specified, therefore the layer will be able
|
||||
|
||||
@@ -1,777 +0,0 @@
|
||||
from collections import OrderedDict
|
||||
import warnings
|
||||
import copy
|
||||
|
||||
from .. import backend as K
|
||||
from ..layers import InputLayer, Layer, Merge
|
||||
from ..engine.training import Model
|
||||
|
||||
|
||||
class Graph(Model):
|
||||
'''Arbitrary connection graph.
|
||||
|
||||
THIS IS A LEGACY MODEL AND SHOULD NOT BE USED
|
||||
except for backwards compatibility support.
|
||||
|
||||
For multi-inputs/multi-outputs models, or
|
||||
models using shared layers, use the functional API instead.
|
||||
'''
|
||||
|
||||
def __init__(self, name=None):
|
||||
# model attributes
|
||||
self.inbound_nodes = []
|
||||
self.outbound_nodes = []
|
||||
self.built = False
|
||||
self.supports_masking = False
|
||||
|
||||
# legacy attributes (we prefix them with _graph_)
|
||||
self._graph_namespace = set() # strings
|
||||
self._graph_nodes = OrderedDict() # layer-like
|
||||
self._graph_inputs = OrderedDict() # layer-like
|
||||
self._graph_outputs = OrderedDict() # layer-like
|
||||
self._graph_input_config = [] # dicts
|
||||
self._graph_output_config = [] # dicts
|
||||
self._graph_node_config = [] # dicts
|
||||
self._graph_shared_nodes_names = []
|
||||
|
||||
if not name:
|
||||
prefix = 'graph_'
|
||||
name = prefix + str(K.get_uid(prefix))
|
||||
self.name = name
|
||||
|
||||
def __call__(self, x, mask=None):
|
||||
self.build()
|
||||
return super(Graph, self).__call__(x, mask)
|
||||
|
||||
def build(self, input_shape=None):
|
||||
# this will crash if the input/output layers have multiple nodes
|
||||
# no plans to support that case since Graph is deprecated
|
||||
input_tensors = [layer.output for layer in self._graph_inputs.values()]
|
||||
output_tensors = [layer.output for layer in self._graph_outputs.values()]
|
||||
# actually create the model
|
||||
super(Graph, self).__init__(input_tensors,
|
||||
output_tensors,
|
||||
name=self.name)
|
||||
self.built = True
|
||||
|
||||
def compile(self, optimizer, loss,
|
||||
metrics=[],
|
||||
sample_weight_modes=None,
|
||||
loss_weights=None,
|
||||
**kwargs):
|
||||
'''Configures the learning process.
|
||||
|
||||
# Arguments
|
||||
optimizer: str (name of optimizer) or optimizer object.
|
||||
See [optimizers](optimizers.md).
|
||||
loss: dictionary mapping the name(s) of the output(s) to
|
||||
a loss function (string name of objective function or
|
||||
objective function. See [objectives](objectives.md)).
|
||||
metrics: list of str (name of metrics) or
|
||||
list of metrics functions. See [metrics](metrics.md).
|
||||
sample_weight_modes: optional dictionary mapping certain
|
||||
output names to a sample weight mode ("temporal" and None
|
||||
are the only supported modes). If you need to do
|
||||
timestep-wise loss weighting on one of your graph outputs,
|
||||
you will need to set the sample weight mode for this output
|
||||
to "temporal".
|
||||
loss_weights: dictionary you can pass to specify a weight
|
||||
coefficient for each loss function (in a multi-output model).
|
||||
If no loss weight is specified for an output,
|
||||
the weight for this output's loss will be considered to be 1.
|
||||
kwargs: for Theano backend, these are passed into K.function.
|
||||
Ignored for Tensorflow backend.
|
||||
'''
|
||||
# create the underlying Model
|
||||
if not self.built:
|
||||
self.build()
|
||||
super(Graph, self).compile(optimizer, loss,
|
||||
metrics=metrics,
|
||||
sample_weight_mode=sample_weight_modes,
|
||||
loss_weights=loss_weights,
|
||||
**kwargs)
|
||||
|
||||
def add_input(self, name, input_shape=None,
|
||||
batch_input_shape=None, dtype='float'):
|
||||
'''Adds an input to the graph.
|
||||
|
||||
# Arguments:
|
||||
name: string. The name of the new input.
|
||||
Must be unique in the graph.
|
||||
input_shape: a tuple of integers,
|
||||
the expected shape of the input samples.
|
||||
Does not include the batch size.
|
||||
batch_input_shape: a tuple of integers,
|
||||
the expected shape of the whole input batch,
|
||||
including the batch size.
|
||||
dtype: 'float', or 'int'.
|
||||
'''
|
||||
if name in self._graph_namespace:
|
||||
raise Exception('Duplicate node identifier: ' + name)
|
||||
self._graph_namespace.add(name)
|
||||
self.built = False
|
||||
|
||||
if dtype[:3] == 'int':
|
||||
dtype = 'int32'
|
||||
elif dtype[:5] == 'float':
|
||||
dtype = K.floatx()
|
||||
else:
|
||||
raise Exception('Uknown dtype (should be "int" or "float"): ' +
|
||||
str(dtype))
|
||||
|
||||
# create input layer
|
||||
input_layer = InputLayer(input_shape=input_shape,
|
||||
batch_input_shape=batch_input_shape,
|
||||
name=name, input_dtype=dtype)
|
||||
self._graph_inputs[name] = input_layer
|
||||
|
||||
# append input config to self._graph_input_config
|
||||
config = {'name': name, 'dtype': dtype}
|
||||
if batch_input_shape:
|
||||
config['batch_input_shape'] = batch_input_shape
|
||||
else:
|
||||
config['input_shape'] = input_shape
|
||||
self._graph_input_config.append(config)
|
||||
|
||||
def add_node(self, layer, name, input=None, inputs=[],
|
||||
merge_mode='concat', concat_axis=-1, dot_axes=-1,
|
||||
create_output=False):
|
||||
'''Adds a node in the graph. It can be connected to multiple
|
||||
inputs, which will first be merged into one tensor
|
||||
according to the mode specified.
|
||||
|
||||
# Arguments
|
||||
layer: the layer at the node.
|
||||
name: name for the node.
|
||||
input: when connecting the layer to a single input,
|
||||
this is the name of the incoming node.
|
||||
inputs: when connecting the layer to multiple inputs,
|
||||
this is a list of names of incoming nodes.
|
||||
merge_mode: one of {concat, sum, dot, ave, mul}
|
||||
concat_axis: when `merge_mode=='concat'`, this is the
|
||||
input concatenation axis.
|
||||
dot_axes: when `merge_mode='dot'`,
|
||||
this is the contraction axes specification;
|
||||
see the `Merge` layer for details.
|
||||
create_output: boolean. Set this to `True` if you want the output
|
||||
of your node to be an output of the graph.
|
||||
'''
|
||||
if name in self._graph_namespace:
|
||||
raise Exception('Duplicate node identifier: ' + name)
|
||||
self._graph_namespace.add(name)
|
||||
layer.name = name
|
||||
self.built = False
|
||||
|
||||
if input:
|
||||
if input not in self._graph_namespace:
|
||||
raise Exception('Unknown node/input identifier: ' + input)
|
||||
if input in self._graph_nodes:
|
||||
layer.add_inbound_node(self._graph_nodes[input])
|
||||
elif input in self._graph_inputs:
|
||||
layer.add_inbound_node(self._graph_inputs[input])
|
||||
if inputs:
|
||||
to_merge = []
|
||||
for n in inputs:
|
||||
if n in self._graph_nodes:
|
||||
to_merge.append(self._graph_nodes[n])
|
||||
elif n in self._graph_inputs:
|
||||
to_merge.append(self._graph_inputs[n])
|
||||
else:
|
||||
raise Exception('Unknown identifier: ' + n)
|
||||
merge = Merge(to_merge, mode=merge_mode,
|
||||
concat_axis=concat_axis, dot_axes=dot_axes,
|
||||
name='merge_inputs_for_' + name)
|
||||
layer.add_inbound_node(merge)
|
||||
self._graph_nodes[name] = layer
|
||||
self._graph_node_config.append({'name': name,
|
||||
'input': input,
|
||||
'inputs': inputs,
|
||||
'merge_mode': merge_mode,
|
||||
'concat_axis': concat_axis,
|
||||
'dot_axes': dot_axes,
|
||||
'create_output': create_output})
|
||||
if create_output:
|
||||
self.add_output(name, input=name)
|
||||
|
||||
def add_shared_node(self, layer, name, inputs=[], merge_mode=None,
|
||||
concat_axis=-1, dot_axes=-1, outputs=[],
|
||||
create_output=False):
|
||||
'''Used to share a same layer across multiple nodes.
|
||||
|
||||
Supposed, for instance, that you want to apply one same `Dense` layer
|
||||
after two different nodes ('node_a' and 'node_b').
|
||||
You can then add the dense layer as a shared node by calling:
|
||||
|
||||
```python
|
||||
model.add_shared_node(my_dense, name='shared_dense', inputs=['node_a', 'node_b'], ...)
|
||||
```
|
||||
|
||||
If you want access to the output of dense(node_a) and dense(node_b) separately,
|
||||
you can add these outputs to the Graph by passing an `outputs` argument:
|
||||
|
||||
```python
|
||||
model.add_shared_node(my_dense, name='shared_dense', inputs=['node_a', 'node_b'],
|
||||
outputs=['dense_output_a', 'dense_outputs_b'])
|
||||
```
|
||||
|
||||
Otherwise you can merge these different outputs via `merge_mode`.
|
||||
In that case you can access the merged output
|
||||
under the identifier `name`.
|
||||
|
||||
# Arguments
|
||||
layer: The layer to be shared across multiple inputs
|
||||
name: Name of the shared node
|
||||
inputs: List of names of input nodes
|
||||
merge_mode: Same meaning as `merge_mode` argument of `add_node()`
|
||||
concat_axis: Same meaning as `concat_axis` argument of `add_node()`
|
||||
dot_axes: Same meaning as `dot_axes` argument of `add_node()`
|
||||
outputs: Used when `merge_mode=None`. Names for the output nodes.
|
||||
create_output: Same meaning as `create_output` argument of `add_node()`.
|
||||
'''
|
||||
if name in self._graph_namespace:
|
||||
raise Exception('Duplicate node identifier: ' + name)
|
||||
self._graph_namespace.add(name)
|
||||
self.built = False
|
||||
|
||||
for o in outputs:
|
||||
if o in self._graph_namespace:
|
||||
raise Exception('Duplicate node identifier: ' + o)
|
||||
if merge_mode:
|
||||
if merge_mode not in {'sum', 'ave', 'mul', 'dot', 'cos', 'concat'}:
|
||||
raise Exception('Invalid merge mode:', merge_mode)
|
||||
input_layers = []
|
||||
for i in range(len(inputs)):
|
||||
input = inputs[i]
|
||||
if input in self._graph_nodes:
|
||||
n = self._graph_nodes[input]
|
||||
input_layers.append(n)
|
||||
elif input in self._graph_inputs:
|
||||
n = self._graph_inputs[input]
|
||||
input_layers.append(n)
|
||||
else:
|
||||
raise Exception('Unknown identifier: ' + input)
|
||||
|
||||
created_node_indices = []
|
||||
for input_layer in input_layers:
|
||||
created_node_indices.append(len(layer.inbound_nodes))
|
||||
layer.add_inbound_node(input_layer)
|
||||
|
||||
if merge_mode:
|
||||
layer.name = 'input_for_' + name
|
||||
# collect all output nodes of layer and merge them into a single output
|
||||
merge = Merge([layer for _ in range(len(inputs))],
|
||||
mode=merge_mode,
|
||||
concat_axis=concat_axis, dot_axes=dot_axes,
|
||||
node_indices=created_node_indices,
|
||||
name=name)
|
||||
self._graph_nodes[name] = merge
|
||||
if create_output:
|
||||
self.add_output(name, input=name)
|
||||
else:
|
||||
layer.name = name
|
||||
# create one new layer per output node of layer,
|
||||
# and add them to the Graph with their own identifiers
|
||||
if len(outputs) != len(inputs):
|
||||
raise Exception('When using merge_mode=None, '
|
||||
'you should provide a list of '
|
||||
'output names (`output` argument) '
|
||||
'the same size as `input`.')
|
||||
for i in range(len(outputs)):
|
||||
output_layer_name = outputs[i]
|
||||
output_layer = Layer(name=output_layer_name)
|
||||
output_layer.add_inbound_node(layer, created_node_indices[i])
|
||||
self._graph_namespace.add(output_layer_name)
|
||||
self._graph_nodes[output_layer_name] = output_layer
|
||||
if create_output:
|
||||
self.add_output(output_layer_name, input=output_layer_name)
|
||||
|
||||
self._graph_node_config.append({'name': name,
|
||||
'layer': {
|
||||
'config': layer.get_config(),
|
||||
'class_name': layer.__class__.__name__,
|
||||
},
|
||||
'inputs': inputs,
|
||||
'merge_mode': merge_mode,
|
||||
'concat_axis': concat_axis,
|
||||
'dot_axes': dot_axes,
|
||||
'outputs': outputs,
|
||||
'create_output': create_output if merge_mode else False})
|
||||
self._graph_shared_nodes_names.append(name)
|
||||
|
||||
def add_output(self, name, input=None, inputs=[],
|
||||
merge_mode='concat', concat_axis=-1, dot_axes=-1):
|
||||
'''Adds an output to the graph.
|
||||
|
||||
This output can merge several node outputs into a single output.
|
||||
|
||||
# Arguments
|
||||
name: name of the output.
|
||||
input: when connecting the layer to a single input,
|
||||
this is the name of the incoming node.
|
||||
inputs: when connecting the layer to multiple inputs,
|
||||
this is a list of names of incoming nodes.
|
||||
merge_mode: one of {concat, sum, dot, ave, mul}
|
||||
concat_axis: when `merge_mode=='concat'`, this is the
|
||||
input concatenation axis.
|
||||
dot_axes: when `merge_mode='dot'`,
|
||||
this is the contraction axes specification;
|
||||
see the `Merge layer for details.
|
||||
'''
|
||||
if name not in self._graph_namespace:
|
||||
self._graph_namespace.add(name)
|
||||
if name in self._graph_outputs:
|
||||
raise Exception('Duplicate output identifier:', name)
|
||||
self.built = False
|
||||
|
||||
if input:
|
||||
if input in self._graph_nodes:
|
||||
layer = self._graph_nodes[input]
|
||||
elif input in self._graph_inputs:
|
||||
layer = self._graph_inputs[input]
|
||||
else:
|
||||
raise Exception('Unknown node/input identifier: ' + input)
|
||||
if layer.name == name:
|
||||
self._graph_outputs[name] = layer
|
||||
else:
|
||||
layer.name = name
|
||||
self._graph_outputs[name] = layer
|
||||
if inputs:
|
||||
to_merge = []
|
||||
for n in inputs:
|
||||
if n not in self._graph_nodes:
|
||||
raise Exception('Unknown identifier: ' + n)
|
||||
to_merge.append(self._graph_nodes[n])
|
||||
merge = Merge(to_merge, mode=merge_mode,
|
||||
concat_axis=concat_axis, dot_axes=dot_axes,
|
||||
name=name)
|
||||
self._graph_outputs[name] = merge
|
||||
|
||||
self._graph_output_config.append({'name': name,
|
||||
'input': input,
|
||||
'inputs': inputs,
|
||||
'merge_mode': merge_mode,
|
||||
'concat_axis': concat_axis,
|
||||
'dot_axes': dot_axes})
|
||||
|
||||
def _get_x(self, data):
|
||||
x = []
|
||||
for key in self._graph_inputs.keys():
|
||||
if key not in data:
|
||||
raise Exception('Expected to be provided an array '
|
||||
'(in dict argument `data`) for input "' +
|
||||
key + '".')
|
||||
x.append(data[key])
|
||||
return x
|
||||
|
||||
def _get_y(self, data):
|
||||
y = []
|
||||
for key in self._graph_outputs.keys():
|
||||
if key not in data:
|
||||
raise Exception('Expected to be provided an array '
|
||||
'(in dict argument `data`) for output "' +
|
||||
key + '".')
|
||||
y.append(data[key])
|
||||
return y
|
||||
|
||||
def fit(self, data, batch_size=32, nb_epoch=10, verbose=1, callbacks=[],
|
||||
validation_split=0., validation_data=None, shuffle=True,
|
||||
class_weight=None, sample_weight=None, **kwargs):
|
||||
'''Trains the model for a fixed number of epochs.
|
||||
|
||||
Returns a history object. Its `history` attribute is a record of
|
||||
training loss values at successive epochs,
|
||||
as well as validation loss values (if applicable).
|
||||
|
||||
# Arguments
|
||||
data: dictionary mapping input names and outputs names to
|
||||
appropriate Numpy arrays. All arrays should contain
|
||||
the same number of samples.
|
||||
batch_size: int. Number of samples per gradient update.
|
||||
nb_epoch: int.
|
||||
verbose: 0 for no logging to stdout,
|
||||
1 for progress bar logging, 2 for one log line per epoch.
|
||||
callbacks: `keras.callbacks.Callback` list. List of callbacks
|
||||
to apply during training. See [callbacks](callbacks.md).
|
||||
validation_split: float (0. < x < 1). Fraction of the data to
|
||||
use as held-out validation data.
|
||||
validation_data: dictionary mapping input names and outputs names
|
||||
to appropriate Numpy arrays to be used as
|
||||
held-out validation data.
|
||||
All arrays should contain the same number of samples.
|
||||
Will override validation_split.
|
||||
shuffle: boolean. Whether to shuffle the samples at each epoch.
|
||||
class_weight: dictionary mapping output names to
|
||||
class weight dictionaries.
|
||||
sample_weight: dictionary mapping output names to
|
||||
numpy arrays of sample weights.
|
||||
'''
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
'instead you should pass the "accuracy" metric to '
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
|
||||
if type(validation_data) is tuple:
|
||||
raise Exception('Cannot used sample_weight with '
|
||||
'validation data with legacy Graph model. '
|
||||
'validation_data should be a dictionary.')
|
||||
if validation_data:
|
||||
val_x = self._get_x(validation_data)
|
||||
val_y = self._get_y(validation_data)
|
||||
validation_data = (val_x, val_y)
|
||||
return super(Graph, self).fit(x, y,
|
||||
batch_size=batch_size,
|
||||
nb_epoch=nb_epoch,
|
||||
verbose=verbose,
|
||||
callbacks=callbacks,
|
||||
validation_split=validation_split,
|
||||
validation_data=validation_data,
|
||||
shuffle=shuffle,
|
||||
class_weight=class_weight,
|
||||
sample_weight=sample_weight)
|
||||
|
||||
def evaluate(self, data, batch_size=128,
|
||||
verbose=0, sample_weight={}, **kwargs):
|
||||
'''Computes the loss on some input data, batch by batch.
|
||||
|
||||
Returns the scalar test loss over the data,
|
||||
or a list of metrics values (starting with the test loss)
|
||||
if applicable.
|
||||
|
||||
Arguments: see `fit` method.
|
||||
'''
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
'instead you should pass the "accuracy" metric to '
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
return super(Graph, self).evaluate(x, y,
|
||||
batch_size=batch_size,
|
||||
verbose=verbose,
|
||||
sample_weight=sample_weight)
|
||||
|
||||
def predict(self, data, batch_size=128, verbose=0):
|
||||
'''Generates output predictions for the input samples
|
||||
batch by batch.
|
||||
|
||||
Arguments: see `fit` method.
|
||||
'''
|
||||
x = self._get_x(data)
|
||||
output_list = super(Graph, self).predict(x, batch_size=batch_size,
|
||||
verbose=verbose)
|
||||
if not isinstance(output_list, list):
|
||||
output_list = [output_list]
|
||||
return dict(zip(self._graph_outputs, output_list))
|
||||
|
||||
def train_on_batch(self, data,
|
||||
class_weight={},
|
||||
sample_weight={}, **kwargs):
|
||||
'''Single gradient update on a batch of samples.
|
||||
|
||||
Returns the scalar train loss over the data,
|
||||
or a list of metrics values (starting with the test loss)
|
||||
if applicable.
|
||||
|
||||
Arguments: see `fit` method.
|
||||
'''
|
||||
if 'accuracy' in kwargs:
|
||||
kwargs.pop('accuracy')
|
||||
warnings.warn('The "accuracy" argument is deprecated, '
|
||||
'instead you should pass the "accuracy" metric to '
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
return super(Graph, self).train_on_batch(x, y,
|
||||
sample_weight=sample_weight,
|
||||
class_weight=class_weight)
|
||||
|
||||
def test_on_batch(self, data, sample_weight={}, **kwargs):
|
||||
'''Test the network on a single batch of samples.
|
||||
|
||||
Returns the scalar test loss over the data,
|
||||
or a list of metrics values (starting with the test loss)
|
||||
if applicable.
|
||||
|
||||
Arguments: see `fit` method.
|
||||
'''
|
||||
if 'accuracy' in kwargs:
|
||||
kwargs.pop('accuracy')
|
||||
warnings.warn('The "accuracy" argument is deprecated, '
|
||||
'instead you should pass the "accuracy" metric to '
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
return super(Graph, self).test_on_batch(x, y,
|
||||
sample_weight=sample_weight)
|
||||
|
||||
def predict_on_batch(self, data):
|
||||
output_list = super(Graph, self).predict_on_batch(data)
|
||||
if not isinstance(output_list, list):
|
||||
output_list = [output_list]
|
||||
return dict(zip(self._graph_outputs, output_list))
|
||||
|
||||
def fit_generator(self, generator, samples_per_epoch, nb_epoch,
|
||||
verbose=1, callbacks=[],
|
||||
validation_data=None, nb_val_samples=None,
|
||||
class_weight={},
|
||||
max_q_size=10, nb_worker=1,
|
||||
pickle_safe=False, **kwargs):
|
||||
'''Fits a model on data generated batch-by-batch by a Python generator.
|
||||
The generator is run in parallel to the model, for efficiency.
|
||||
For instance, this allows you to do real-time data augmentation
|
||||
on images on CPU in parallel to training your model on GPU.
|
||||
|
||||
# Arguments
|
||||
generator: a generator.
|
||||
The output of the generator must be either a tuple
|
||||
of dictionaries `(input_data, sample_weight)`
|
||||
or a dictionary `input_data`
|
||||
(mapping names of inputs and outputs to Numpy arrays).
|
||||
All arrays should contain the same number of samples.
|
||||
The generator is expected to loop over its data
|
||||
indefinitely. An epoch finishes when `samples_per_epoch`
|
||||
samples have been seen by the model.
|
||||
samples_per_epoch: integer, number of samples to process before
|
||||
going to the next epoch.
|
||||
nb_epoch: integer, total number of iterations on the data.
|
||||
verbose: verbosity mode, 0, 1, or 2.
|
||||
callbacks: list of callbacks to be called during training.
|
||||
validation_data: dictionary mapping input names and outputs names
|
||||
to appropriate Numpy arrays to be used as
|
||||
held-out validation data, or a generator yielding such
|
||||
dictionaries. All arrays should contain the same number
|
||||
of samples. If a generator, will be called until more than
|
||||
`nb_val_samples` examples have been generated at the
|
||||
end of every epoch. These examples will then be used
|
||||
as the validation data.
|
||||
nb_val_samples: number of samples to use from validation
|
||||
generator at the end of every epoch.
|
||||
class_weight: dictionary mapping class indices to a weight
|
||||
for the class.
|
||||
|
||||
# Returns
|
||||
A `History` object.
|
||||
|
||||
# Examples
|
||||
|
||||
```python
|
||||
def generate_arrays_from_file(path):
|
||||
while 1:
|
||||
f = open(path)
|
||||
for line in f:
|
||||
# create Numpy arrays of input data
|
||||
# and labels, from each line in the file
|
||||
x1, x2, y = process_line(line)
|
||||
yield ({'input_1': x1, 'input_2': x2, 'output': y})
|
||||
f.close()
|
||||
|
||||
graph.fit_generator(generate_arrays_from_file('/my_file.txt'),
|
||||
samples_per_epoch=10000, nb_epoch=10)
|
||||
```
|
||||
'''
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
'instead you should pass the "accuracy" metric to '
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if 'nb_val_worker' in kwargs:
|
||||
kwargs.pop('nb_val_worker')
|
||||
warnings.warn('The "nb_val_worker" argument is deprecated, '
|
||||
'please remove it from your code.')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
|
||||
self._train_on_batch = self.train_on_batch
|
||||
self.train_on_batch = super(Graph, self).train_on_batch
|
||||
self._evaluate = self.evaluate
|
||||
self.evaluate = super(Graph, self).evaluate
|
||||
|
||||
if validation_data and type(validation_data) is tuple:
|
||||
raise Exception('Cannot use sample_weight with '
|
||||
'validation_data in legacy Graph model.')
|
||||
if validation_data and type(validation_data) is dict:
|
||||
validation_data = (self._get_x(validation_data),
|
||||
self._get_y(validation_data))
|
||||
|
||||
original_generator = generator
|
||||
|
||||
def fixed_generator():
|
||||
while 1:
|
||||
data = next(original_generator)
|
||||
if type(data) is tuple:
|
||||
data, sample_weight = data
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
yield x, y, sample_weight
|
||||
else:
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
yield x, y
|
||||
|
||||
generator = fixed_generator()
|
||||
history = super(Graph, self).fit_generator(generator,
|
||||
samples_per_epoch,
|
||||
nb_epoch,
|
||||
verbose=verbose,
|
||||
callbacks=callbacks,
|
||||
validation_data=validation_data,
|
||||
nb_val_samples=nb_val_samples,
|
||||
class_weight=class_weight,
|
||||
max_q_size=max_q_size,
|
||||
nb_worker=nb_worker,
|
||||
pickle_safe=pickle_safe)
|
||||
self.train_on_batch = self._train_on_batch
|
||||
self.evaluate = self._evaluate
|
||||
return history
|
||||
|
||||
def evaluate_generator(self, generator, val_samples,
|
||||
verbose=1, max_q_size=10, nb_worker=1,
|
||||
pickle_safe=False, **kwargs):
|
||||
'''Evaluates the model on a generator. The generator should
|
||||
return the same kind of data with every yield as accepted
|
||||
by `evaluate`.
|
||||
|
||||
If `show_accuracy`, it returns a tuple `(loss, accuracy)`,
|
||||
otherwise it returns the loss value.
|
||||
|
||||
Arguments:
|
||||
generator:
|
||||
generator yielding dictionaries of the kind accepted
|
||||
by `evaluate`, or tuples of such dictionaries and
|
||||
associated dictionaries of sample weights.
|
||||
val_samples:
|
||||
total number of samples to generate from `generator`
|
||||
to use in validation.
|
||||
|
||||
Other arguments are the same as for `fit`.
|
||||
'''
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
'instead you should pass the "accuracy" metric to '
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if 'verbose' in kwargs:
|
||||
kwargs.pop('verbose')
|
||||
warnings.warn('The "verbose" argument is deprecated.')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
|
||||
self._test_on_batch = self.test_on_batch
|
||||
self.test_on_batch = super(Graph, self).test_on_batch
|
||||
|
||||
original_generator = generator
|
||||
|
||||
def fixed_generator():
|
||||
while 1:
|
||||
data = next(original_generator)
|
||||
if type(data) is tuple:
|
||||
data, sample_weight = data
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
yield x, y, sample_weight
|
||||
else:
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
yield x, y
|
||||
|
||||
generator = fixed_generator()
|
||||
history = super(Graph, self).evaluate_generator(generator,
|
||||
val_samples,
|
||||
max_q_size=max_q_size,
|
||||
nb_worker=nb_worker,
|
||||
pickle_safe=pickle_safe)
|
||||
self.test_on_batch = self._test_on_batch
|
||||
return history
|
||||
|
||||
# get_weights, set_weights: inherited
|
||||
def get_config(self):
|
||||
config = {'input_config': self._graph_input_config,
|
||||
'node_config': self._graph_node_config,
|
||||
'output_config': self._graph_output_config}
|
||||
nodes = {}
|
||||
for name, node in self._graph_nodes.items():
|
||||
nodes[name] = {'class_name': node.__class__.__name__,
|
||||
'config': node.get_config()}
|
||||
if name in self._graph_shared_nodes_names:
|
||||
nodes[name]['shared'] = True
|
||||
config['nodes'] = nodes
|
||||
return copy.deepcopy(config)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config):
|
||||
# TODO: test legacy support
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
|
||||
def normalize_legacy_config(conf):
|
||||
if 'class_name' not in conf:
|
||||
class_name = conf['name']
|
||||
name = conf.get('custom_name')
|
||||
conf['name'] = name
|
||||
new_config = {
|
||||
'class_name': class_name,
|
||||
'config': conf,
|
||||
}
|
||||
return new_config
|
||||
return conf
|
||||
|
||||
graph = cls()
|
||||
inputs = config.get('input_config')
|
||||
for input in inputs:
|
||||
graph.add_input(**input)
|
||||
|
||||
nodes = config.get('node_config')
|
||||
for node in nodes:
|
||||
layer_config = config['nodes'][node['name']]
|
||||
layer_config = normalize_legacy_config(layer_config)
|
||||
if 'layer' in node:
|
||||
# for add_shared_node
|
||||
node['layer'] = layer_from_config(node['layer'])
|
||||
else:
|
||||
layer = layer_from_config(layer_config)
|
||||
node['layer'] = layer
|
||||
|
||||
node['create_output'] = False # outputs will be added below
|
||||
if layer_config.get('shared'):
|
||||
graph.add_shared_node(**node)
|
||||
else:
|
||||
graph.add_node(**node)
|
||||
|
||||
outputs = config.get('output_config')
|
||||
for output in outputs:
|
||||
graph.add_output(**output)
|
||||
return graph
|
||||
|
||||
def load_weights(self, fname):
|
||||
if not self.built:
|
||||
self.build()
|
||||
super(Graph, self).load_weights(fname)
|
||||
+52
-36
@@ -5,14 +5,14 @@ from .utils.generic_utils import get_from_module
|
||||
|
||||
def binary_accuracy(y_true, y_pred):
|
||||
'''Calculates the mean accuracy rate across all predictions for binary
|
||||
classification problems
|
||||
classification problems.
|
||||
'''
|
||||
return K.mean(K.equal(y_true, K.round(y_pred)))
|
||||
|
||||
|
||||
def categorical_accuracy(y_true, y_pred):
|
||||
'''Calculates the mean accuracy rate across all predictions for
|
||||
multiclass classification problems
|
||||
multiclass classification problems.
|
||||
'''
|
||||
return K.mean(K.equal(K.argmax(y_true, axis=-1),
|
||||
K.argmax(y_pred, axis=-1)))
|
||||
@@ -20,7 +20,7 @@ def categorical_accuracy(y_true, y_pred):
|
||||
|
||||
def sparse_categorical_accuracy(y_true, y_pred):
|
||||
'''Same as categorical_accuracy, but useful when the predictions are for
|
||||
sparse targets
|
||||
sparse targets.
|
||||
'''
|
||||
return K.mean(K.equal(K.max(y_true, axis=-1),
|
||||
K.cast(K.argmax(y_pred, axis=-1), K.floatx())))
|
||||
@@ -28,36 +28,36 @@ def sparse_categorical_accuracy(y_true, y_pred):
|
||||
|
||||
def top_k_categorical_accuracy(y_true, y_pred, k=5):
|
||||
'''Calculates the top-k categorical accuracy rate, i.e. success when the
|
||||
target class is within the top-k predictions provided
|
||||
target class is within the top-k predictions provided.
|
||||
'''
|
||||
return K.mean(K.in_top_k(y_pred, K.argmax(y_true, axis=-1), k))
|
||||
|
||||
|
||||
def mean_squared_error(y_true, y_pred):
|
||||
'''Calculates the mean squared error (mse) rate between predicted and target
|
||||
values
|
||||
'''Calculates the mean squared error (mse) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
return K.mean(K.square(y_pred - y_true))
|
||||
|
||||
|
||||
def mean_absolute_error(y_true, y_pred):
|
||||
'''Calculates the mean absolute error (mae) rate between predicted and target
|
||||
values
|
||||
'''Calculates the mean absolute error (mae) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
return K.mean(K.abs(y_pred - y_true))
|
||||
|
||||
|
||||
def mean_absolute_percentage_error(y_true, y_pred):
|
||||
'''Calculates the mean absolute percentage error (mape) rate between predicted
|
||||
and target values
|
||||
'''Calculates the mean absolute percentage error (mape) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), np.inf))
|
||||
return 100. * K.mean(diff)
|
||||
|
||||
|
||||
def mean_squared_logarithmic_error(y_true, y_pred):
|
||||
'''Calculates the mean squared logarithmic error (msle) rate between predicted
|
||||
and target values
|
||||
'''Calculates the mean squared logarithmic error (msle) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.)
|
||||
second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.)
|
||||
@@ -66,13 +66,13 @@ def mean_squared_logarithmic_error(y_true, y_pred):
|
||||
|
||||
def hinge(y_true, y_pred):
|
||||
'''Calculates the hinge loss, which is defined as
|
||||
`max(1 - y_true * y_pred, 0)`
|
||||
`max(1 - y_true * y_pred, 0)`.
|
||||
'''
|
||||
return K.mean(K.maximum(1. - y_true * y_pred, 0.))
|
||||
|
||||
|
||||
def squared_hinge(y_true, y_pred):
|
||||
'''Calculates the squared value of the hinge loss
|
||||
'''Calculates the squared value of the hinge loss.
|
||||
'''
|
||||
return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)))
|
||||
|
||||
@@ -104,7 +104,7 @@ def binary_crossentropy(y_true, y_pred):
|
||||
|
||||
def kullback_leibler_divergence(y_true, y_pred):
|
||||
'''Calculates the Kullback-Leibler (KL) divergence between prediction
|
||||
and target values
|
||||
and target values.
|
||||
'''
|
||||
y_true = K.clip(y_true, K.epsilon(), 1)
|
||||
y_pred = K.clip(y_pred, K.epsilon(), 1)
|
||||
@@ -148,11 +148,31 @@ def matthews_correlation(y_true, y_pred):
|
||||
return numerator / (denominator + K.epsilon())
|
||||
|
||||
|
||||
def fbeta_score(y_true, y_pred, beta=1):
|
||||
'''Computes the F score, the weighted harmonic mean of precision and recall.
|
||||
def precision(y_true, y_pred):
|
||||
'''Calculates the precision, a metric for multi-label classification of
|
||||
how many selected items are relevant.
|
||||
'''
|
||||
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
||||
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
|
||||
precision = true_positives / (predicted_positives + K.epsilon())
|
||||
return precision
|
||||
|
||||
This is useful for multi-label classification where input samples can be
|
||||
tagged with a set of labels. By only using accuracy (precision) a model
|
||||
|
||||
def recall(y_true, y_pred):
|
||||
'''Calculates the recall, a metric for multi-label classification of
|
||||
how many relevant items are selected.
|
||||
'''
|
||||
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
||||
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
|
||||
recall = true_positives / (possible_positives + K.epsilon())
|
||||
return recall
|
||||
|
||||
|
||||
def fbeta_score(y_true, y_pred, beta=1):
|
||||
'''Calculates the F score, the weighted harmonic mean of precision and recall.
|
||||
|
||||
This is useful for multi-label classification, where input samples can be
|
||||
classified as sets of labels. By only using accuracy (precision) a model
|
||||
would achieve a perfect score by simply assigning every class to every
|
||||
input. In order to avoid this, a metric should penalize incorrect class
|
||||
assignments as well (recall). The F-beta score (ranged from 0.0 to 1.0)
|
||||
@@ -162,30 +182,25 @@ def fbeta_score(y_true, y_pred, beta=1):
|
||||
With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning
|
||||
correct classes becomes more important, and with beta > 1 the metric is
|
||||
instead weighted towards penalizing incorrect class assignments.
|
||||
|
||||
'''
|
||||
if beta < 0:
|
||||
raise ValueError('The lowest choosable beta is zero (only precision).')
|
||||
|
||||
# Count positive samples.
|
||||
c1 = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
||||
c2 = K.sum(K.round(K.clip(y_pred, 0, 1)))
|
||||
c3 = K.sum(K.round(K.clip(y_true, 0, 1)))
|
||||
|
||||
# If there are no true samples, fix the F score at 0.
|
||||
if c3 == 0:
|
||||
|
||||
# If there are no true positives, fix the F score at 0 like sklearn.
|
||||
if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
|
||||
return 0
|
||||
|
||||
# How many selected items are relevant?
|
||||
precision = c1 / c2
|
||||
p = precision(y_true, y_pred)
|
||||
r = recall(y_true, y_pred)
|
||||
bb = beta ** 2
|
||||
fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
|
||||
return fbeta_score
|
||||
|
||||
# How many relevant items are selected?
|
||||
recall = c1 / c3
|
||||
|
||||
# Weight precision and recall together as a single scalar.
|
||||
beta2 = beta ** 2
|
||||
f_score = (1 + beta2) * (precision * recall) / (beta2 * precision + recall)
|
||||
return f_score
|
||||
def fmeasure(y_true, y_pred):
|
||||
'''Calculates the f-measure, the harmonic mean of precision and recall.
|
||||
'''
|
||||
return fbeta_score(y_true, y_pred, beta=1)
|
||||
|
||||
|
||||
# aliases
|
||||
@@ -194,6 +209,7 @@ mae = MAE = mean_absolute_error
|
||||
mape = MAPE = mean_absolute_percentage_error
|
||||
msle = MSLE = mean_squared_logarithmic_error
|
||||
cosine = cosine_proximity
|
||||
fscore = f1score = fmeasure
|
||||
|
||||
|
||||
def get(identifier):
|
||||
|
||||
+67
-42
@@ -6,11 +6,11 @@ import os
|
||||
import numpy as np
|
||||
|
||||
from . import backend as K
|
||||
from . import optimizers
|
||||
from .utils.io_utils import ask_to_proceed_with_overwrite
|
||||
from .engine.training import Model
|
||||
from .engine.topology import get_source_inputs, Node, Layer
|
||||
from .engine.topology import get_source_inputs, Node, Layer, Merge
|
||||
from .optimizers import optimizer_from_config
|
||||
from .legacy.models import Graph
|
||||
|
||||
|
||||
def save_model(model, filepath, overwrite=True):
|
||||
@@ -56,40 +56,52 @@ def save_model(model, filepath, overwrite=True):
|
||||
model.save_weights_to_hdf5_group(model_weights_group)
|
||||
|
||||
if hasattr(model, 'optimizer'):
|
||||
f.attrs['training_config'] = json.dumps({
|
||||
'optimizer_config': {
|
||||
'class_name': model.optimizer.__class__.__name__,
|
||||
'config': model.optimizer.get_config()
|
||||
},
|
||||
'loss': model.loss,
|
||||
'metrics': model.metrics,
|
||||
'sample_weight_mode': model.sample_weight_mode,
|
||||
'loss_weights': model.loss_weights,
|
||||
}, default=get_json_type).encode('utf8')
|
||||
if isinstance(model.optimizer, optimizers.TFOptimizer):
|
||||
warnings.warn(
|
||||
'TensorFlow optimizers do not '
|
||||
'make it possible to access '
|
||||
'optimizer attributes or optimizer state '
|
||||
'after instantiation. '
|
||||
'As a result, we cannot save the optimizer '
|
||||
'as part of the model save file.'
|
||||
'You will have to compile your model again after loading it. '
|
||||
'Prefer using a Keras optimizer instead '
|
||||
'(see keras.io/optimizers).')
|
||||
else:
|
||||
f.attrs['training_config'] = json.dumps({
|
||||
'optimizer_config': {
|
||||
'class_name': model.optimizer.__class__.__name__,
|
||||
'config': model.optimizer.get_config()
|
||||
},
|
||||
'loss': model.loss,
|
||||
'metrics': model.metrics,
|
||||
'sample_weight_mode': model.sample_weight_mode,
|
||||
'loss_weights': model.loss_weights,
|
||||
}, default=get_json_type).encode('utf8')
|
||||
|
||||
# save optimizer weights
|
||||
symbolic_weights = getattr(model.optimizer, 'weights')
|
||||
if symbolic_weights:
|
||||
optimizer_weights_group = f.create_group('optimizer_weights')
|
||||
weight_values = K.batch_get_value(symbolic_weights)
|
||||
weight_names = []
|
||||
for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
|
||||
if hasattr(w, 'name') and w.name:
|
||||
name = str(w.name)
|
||||
else:
|
||||
name = 'param_' + str(i)
|
||||
weight_names.append(name.encode('utf8'))
|
||||
optimizer_weights_group.attrs['weight_names'] = weight_names
|
||||
for name, val in zip(weight_names, weight_values):
|
||||
param_dset = optimizer_weights_group.create_dataset(
|
||||
name,
|
||||
val.shape,
|
||||
dtype=val.dtype)
|
||||
if not val.shape:
|
||||
# scalar
|
||||
param_dset[()] = val
|
||||
else:
|
||||
param_dset[:] = val
|
||||
# save optimizer weights
|
||||
symbolic_weights = getattr(model.optimizer, 'weights')
|
||||
if symbolic_weights:
|
||||
optimizer_weights_group = f.create_group('optimizer_weights')
|
||||
weight_values = K.batch_get_value(symbolic_weights)
|
||||
weight_names = []
|
||||
for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
|
||||
if hasattr(w, 'name') and w.name:
|
||||
name = str(w.name)
|
||||
else:
|
||||
name = 'param_' + str(i)
|
||||
weight_names.append(name.encode('utf8'))
|
||||
optimizer_weights_group.attrs['weight_names'] = weight_names
|
||||
for name, val in zip(weight_names, weight_values):
|
||||
param_dset = optimizer_weights_group.create_dataset(
|
||||
name,
|
||||
val.shape,
|
||||
dtype=val.dtype)
|
||||
if not val.shape:
|
||||
# scalar
|
||||
param_dset[()] = val
|
||||
else:
|
||||
param_dset[:] = val
|
||||
f.flush()
|
||||
f.close()
|
||||
|
||||
@@ -157,7 +169,7 @@ def load_model(filepath, custom_objects={}):
|
||||
# set optimizer weights
|
||||
if 'optimizer_weights' in f:
|
||||
# build train function (to get weight updates)
|
||||
if model.__class__.__name__ == 'Sequential':
|
||||
if isinstance(model, Sequential):
|
||||
model.model._make_train_function()
|
||||
else:
|
||||
model._make_train_function()
|
||||
@@ -238,7 +250,7 @@ class Sequential(Model):
|
||||
self.model = None # internal Model instance
|
||||
self.inputs = [] # tensors
|
||||
self.outputs = [] # tensors (length 1)
|
||||
self.trainable = True
|
||||
self._trainable = True
|
||||
|
||||
# model attributes
|
||||
self.inbound_nodes = []
|
||||
@@ -371,6 +383,7 @@ class Sequential(Model):
|
||||
' Add some layers first.')
|
||||
# actually create the model
|
||||
self.model = Model(self.inputs, self.outputs[0], name=self.name + '_model')
|
||||
self.model.trainable = self.trainable
|
||||
|
||||
# mirror model attributes
|
||||
self.supports_masking = self.model.supports_masking
|
||||
@@ -405,7 +418,7 @@ class Sequential(Model):
|
||||
return self._flattened_layers
|
||||
layers = []
|
||||
if self.layers:
|
||||
if self.layers[0].__class__.__name__ == 'Merge':
|
||||
if isinstance(self.layers[0], Merge):
|
||||
merge = self.layers[0]
|
||||
for layer in merge.layers:
|
||||
if hasattr(layer, 'flattened_layers'):
|
||||
@@ -442,6 +455,16 @@ class Sequential(Model):
|
||||
list(layer_dict.items()))
|
||||
return all_attrs
|
||||
|
||||
@property
|
||||
def trainable(self):
|
||||
return self._trainable
|
||||
|
||||
@trainable.setter
|
||||
def trainable(self, value):
|
||||
if self.model:
|
||||
self.model.trainable = value
|
||||
self._trainable = value
|
||||
|
||||
@property
|
||||
def trainable_weights(self):
|
||||
if not self.trainable:
|
||||
@@ -460,13 +483,15 @@ class Sequential(Model):
|
||||
|
||||
@property
|
||||
def updates(self):
|
||||
# support for legacy behavior
|
||||
return self._gather_list_attr('updates')
|
||||
return self.model.updates
|
||||
|
||||
@property
|
||||
def state_updates(self):
|
||||
# support for legacy behavior
|
||||
return self._gather_list_attr('state_updates')
|
||||
return self.model.state_updates
|
||||
|
||||
def get_updates_for(self, inputs):
|
||||
return self.model.get_updates_for(inputs)
|
||||
|
||||
@property
|
||||
def regularizers(self):
|
||||
@@ -960,7 +985,7 @@ class Sequential(Model):
|
||||
as a Python list.
|
||||
'''
|
||||
config = []
|
||||
if self.layers[0].__class__.__name__ == 'Merge':
|
||||
if isinstance(self.layers[0], Merge):
|
||||
assert hasattr(self.layers[0], 'layers')
|
||||
layers = []
|
||||
for layer in self.layers[0].layers:
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
|
||||
def mean_squared_error(y_true, y_pred):
|
||||
@@ -72,6 +73,6 @@ msle = MSLE = mean_squared_logarithmic_error
|
||||
kld = KLD = kullback_leibler_divergence
|
||||
cosine = cosine_proximity
|
||||
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
def get(identifier):
|
||||
return get_from_module(identifier, globals(), 'objective')
|
||||
|
||||
+38
-8
@@ -2,6 +2,7 @@ from __future__ import absolute_import
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
from six.moves import zip
|
||||
import warnings
|
||||
|
||||
|
||||
def clip_norm(g, c, n):
|
||||
@@ -19,6 +20,7 @@ def optimizer_from_config(config, custom_objects={}):
|
||||
'adam': Adam,
|
||||
'adamax': Adamax,
|
||||
'nadam': Nadam,
|
||||
'tfoptimizer': TFOptimizer,
|
||||
}
|
||||
class_name = config['class_name']
|
||||
if class_name in custom_objects:
|
||||
@@ -53,14 +55,6 @@ class Optimizer(object):
|
||||
self.updates = []
|
||||
self.weights = []
|
||||
|
||||
def get_state(self):
|
||||
return [K.get_value(u[0]) for u in self.updates]
|
||||
|
||||
def set_state(self, value_list):
|
||||
assert len(self.updates) == len(value_list)
|
||||
for u, v in zip(self.updates, value_list):
|
||||
K.set_value(u[0], v)
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -570,6 +564,36 @@ class Nadam(Optimizer):
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class TFOptimizer(Optimizer):
|
||||
|
||||
def __init__(self, optimizer):
|
||||
self.optimizer = optimizer
|
||||
self.iterations = K.variable(0.)
|
||||
self.updates = []
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
if constraints:
|
||||
raise ValueError('TF optimizers do not support '
|
||||
'weights constraints. Either remove '
|
||||
'all weights constraints in your model, '
|
||||
'or use a Keras optimizer.')
|
||||
grads = self.optimizer.compute_gradients(loss, params)
|
||||
opt_update = self.optimizer.apply_gradients(
|
||||
grads, global_step=self.iterations)
|
||||
self.updates.append(opt_update)
|
||||
return self.updates
|
||||
|
||||
@property
|
||||
def weights(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_config(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def from_config(self, config):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
# aliases
|
||||
sgd = SGD
|
||||
rmsprop = RMSprop
|
||||
@@ -581,5 +605,11 @@ nadam = Nadam
|
||||
|
||||
|
||||
def get(identifier, kwargs=None):
|
||||
if K.backend() == 'tensorflow':
|
||||
# Wrap TF optimizer instances
|
||||
import tensorflow as tf
|
||||
if isinstance(identifier, tf.train.Optimizer):
|
||||
return TFOptimizer(identifier)
|
||||
# Instantiate a Keras optimizer
|
||||
return get_from_module(identifier, globals(), 'optimizer',
|
||||
instantiate=True, kwargs=kwargs)
|
||||
|
||||
@@ -3,7 +3,7 @@ from __future__ import print_function
|
||||
from .generic_utils import get_from_module
|
||||
from .np_utils import convert_kernel
|
||||
from ..layers import *
|
||||
from ..models import Model, Sequential, Graph
|
||||
from ..models import Model, Sequential
|
||||
from .. import backend as K
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ def layer_from_config(config, custom_objects={}):
|
||||
of custom (non-Keras) objects to class/functions
|
||||
|
||||
# Returns
|
||||
Layer instance (may be Model, Sequential, Graph, Layer...)
|
||||
Layer instance (may be Model, Sequential, Layer...)
|
||||
'''
|
||||
# Insert custom layers into globals so they can
|
||||
# be accessed by `get_from_module`.
|
||||
@@ -26,8 +26,6 @@ def layer_from_config(config, custom_objects={}):
|
||||
|
||||
if class_name == 'Sequential':
|
||||
layer_class = Sequential
|
||||
elif class_name == 'Graph':
|
||||
layer_class = Graph
|
||||
elif class_name in ['Model', 'Container']:
|
||||
layer_class = Model
|
||||
else:
|
||||
@@ -53,6 +51,8 @@ def print_summary(layers, relevant_nodes=None, line_length=100, positions=[.33,
|
||||
def print_row(fields, positions):
|
||||
line = ''
|
||||
for i in range(len(fields)):
|
||||
if i > 0:
|
||||
line = line[:-1] + ' '
|
||||
line += str(fields[i])
|
||||
line = line[:positions[i]]
|
||||
line += ' ' * (positions[i] - len(line))
|
||||
|
||||
@@ -122,21 +122,25 @@ def convert_kernel(kernel, dim_ordering='default'):
|
||||
def conv_output_length(input_length, filter_size, border_mode, stride, dilation=1):
|
||||
if input_length is None:
|
||||
return None
|
||||
assert border_mode in {'same', 'valid'}
|
||||
assert border_mode in {'same', 'valid', 'full'}
|
||||
dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1)
|
||||
if border_mode == 'same':
|
||||
output_length = input_length
|
||||
elif border_mode == 'valid':
|
||||
output_length = input_length - dilated_filter_size + 1
|
||||
elif border_mode == 'full':
|
||||
output_length = input_length + dilated_filter_size - 1
|
||||
return (output_length + stride - 1) // stride
|
||||
|
||||
|
||||
def conv_input_length(output_length, filter_size, border_mode, stride):
|
||||
if output_length is None:
|
||||
return None
|
||||
assert border_mode in {'same', 'valid'}
|
||||
assert border_mode in {'same', 'valid', 'full'}
|
||||
if border_mode == 'same':
|
||||
pad = filter_size // 2
|
||||
elif border_mode == 'valid':
|
||||
pad = 0
|
||||
elif border_mode == 'full':
|
||||
pad = filter_size - 1
|
||||
return (output_length - 1) * stride - 2 * pad + filter_size
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
|
||||
from ..layers.wrappers import Wrapper
|
||||
from ..models import Sequential
|
||||
|
||||
try:
|
||||
# pydot-ng is a fork of pydot that is better maintained
|
||||
@@ -19,7 +20,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True):
|
||||
dot.set('concentrate', True)
|
||||
dot.set_node_defaults(shape='record')
|
||||
|
||||
if model.__class__.__name__ == 'Sequential':
|
||||
if isinstance(model, Sequential):
|
||||
if not model.built:
|
||||
model.build()
|
||||
model = model.model
|
||||
@@ -28,13 +29,14 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True):
|
||||
# Create graph nodes.
|
||||
for layer in layers:
|
||||
layer_id = str(id(layer))
|
||||
|
||||
|
||||
# Append a wrapped layer's label to node's label, if it exists.
|
||||
layer_name = layer.name
|
||||
class_name = layer.__class__.__name__
|
||||
if isinstance(layer, Wrapper):
|
||||
layer_name = '{}({})'.format(layer_name, layer.layer.name)
|
||||
class_name = '{}({})'.format(class_name, layer.layer.__class__.__name__)
|
||||
child_class_name = layer.layer.__class__.__name__
|
||||
class_name = '{}({})'.format(class_name, child_class_name)
|
||||
|
||||
# Create node's label.
|
||||
if show_layer_names:
|
||||
|
||||
+2
-2
@@ -3,12 +3,12 @@ from setuptools import find_packages
|
||||
|
||||
|
||||
setup(name='Keras',
|
||||
version='1.1.1',
|
||||
version='1.1.2',
|
||||
description='Deep Learning for Python',
|
||||
author='Francois Chollet',
|
||||
author_email='francois.chollet@gmail.com',
|
||||
url='https://github.com/fchollet/keras',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.1.1',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.1.2',
|
||||
license='MIT',
|
||||
install_requires=['theano', 'pyyaml', 'six'],
|
||||
extras_require={
|
||||
|
||||
@@ -881,6 +881,35 @@ class TestBackend(object):
|
||||
assert k_s_d.shape == k_d.shape
|
||||
assert_allclose(k_s_d, k_d, atol=1e-05)
|
||||
|
||||
def test_map(self):
|
||||
x = np.random.rand(10, 3).astype(np.float32)
|
||||
for K in [KTF, KTH]:
|
||||
kx = K.eval(K.map_fn(K.sum, x))
|
||||
|
||||
assert (10,) == kx.shape
|
||||
assert_allclose(x.sum(axis=1), kx, atol=1e-05)
|
||||
|
||||
def test_foldl(self):
|
||||
x = np.random.rand(10, 3).astype(np.float32)
|
||||
for K in [KTF, KTH]:
|
||||
kx = K.eval(K.foldl(lambda a, b: a+b, x))
|
||||
|
||||
assert (3,) == kx.shape
|
||||
assert_allclose(x.sum(axis=0), kx, atol=1e-05)
|
||||
|
||||
def test_foldr(self):
|
||||
# This test aims to make sure that we walk the array from right to left
|
||||
# and checks it in the following way: multiplying left to right 1e-40
|
||||
# cannot be held into a float32 so it causes an underflow while from
|
||||
# right to left we have no such problem and the result is larger
|
||||
x = np.array([1e-20, 1e-20, 10, 10, 10], dtype=np.float32)
|
||||
for K in [KTF, KTH]:
|
||||
p1 = K.eval(K.foldl(lambda a, b: a*b, x))
|
||||
p2 = K.eval(K.foldr(lambda a, b: a*b, x))
|
||||
|
||||
assert p1 < p2
|
||||
assert 9e-38 < p2 <= 1e-37
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -4,10 +4,11 @@ from numpy.testing import assert_allclose
|
||||
|
||||
from keras.layers import Dense, Dropout
|
||||
from keras.engine.topology import merge, Input
|
||||
from keras.engine.training import Model
|
||||
from keras.engine.training import Model, check_loss_and_target_compatibility
|
||||
from keras.models import Sequential
|
||||
from keras import backend as K
|
||||
from keras.utils.test_utils import keras_test
|
||||
from keras.callbacks import LambdaCallback
|
||||
|
||||
|
||||
@keras_test
|
||||
@@ -146,6 +147,28 @@ def test_model_methods():
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 4
|
||||
|
||||
# test starting from non-zero initial epoch
|
||||
trained_epochs = []
|
||||
|
||||
def on_epoch_begin(epoch, logs):
|
||||
trained_epochs.append(epoch)
|
||||
tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin)
|
||||
out = model.fit([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np], nb_epoch=5, batch_size=4,
|
||||
initial_epoch=2, callbacks=[tracker_cb])
|
||||
assert trained_epochs == [2, 3, 4]
|
||||
|
||||
# test starting from non-zero initial epoch for generator too
|
||||
trained_epochs = []
|
||||
|
||||
def gen_data(batch_sz):
|
||||
while True:
|
||||
yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))],
|
||||
[np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))])
|
||||
out = model.fit_generator(gen_data(4), samples_per_epoch=10, nb_epoch=5,
|
||||
initial_epoch=2, callbacks=[tracker_cb])
|
||||
assert trained_epochs == [2, 3, 4]
|
||||
|
||||
# test with a custom metric function
|
||||
mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2))
|
||||
|
||||
@@ -202,5 +225,30 @@ def test_trainable_argument():
|
||||
assert_allclose(out, out_2)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_check_not_failing():
|
||||
a = np.random.random((2, 1, 3))
|
||||
check_loss_and_target_compatibility([a], [K.categorical_crossentropy], [a.shape])
|
||||
check_loss_and_target_compatibility([a], [K.categorical_crossentropy], [(2, None, 3)])
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_check_last_is_one():
|
||||
a = np.random.random((2, 3, 1))
|
||||
with pytest.raises(Exception) as exc:
|
||||
check_loss_and_target_compatibility([a], [K.categorical_crossentropy], [a.shape])
|
||||
|
||||
assert "You are passing a target array" in str(exc)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_check_bad_shape():
|
||||
a = np.random.random((2, 3, 5))
|
||||
with pytest.raises(Exception) as exc:
|
||||
check_loss_and_target_compatibility([a], [K.categorical_crossentropy], [(2, 3, 6)])
|
||||
|
||||
assert "targets to have the same shape" in str(exc)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -8,6 +8,13 @@ from keras import backend as K
|
||||
from keras.layers import convolutional, pooling
|
||||
|
||||
|
||||
# TensorFlow does not support full convolution.
|
||||
if K._BACKEND == 'theano':
|
||||
_convolution_border_modes = ['valid', 'same', 'full']
|
||||
else:
|
||||
_convolution_border_modes = ['valid', 'same']
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_convolution_1d():
|
||||
nb_samples = 2
|
||||
@@ -16,7 +23,7 @@ def test_convolution_1d():
|
||||
filter_length = 3
|
||||
nb_filter = 3
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample_length in [1, 2]:
|
||||
if border_mode == 'same' and subsample_length != 1:
|
||||
continue
|
||||
@@ -47,7 +54,7 @@ def test_atrous_conv_1d():
|
||||
filter_length = 3
|
||||
nb_filter = 3
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample_length in [1, 2]:
|
||||
for atrous_rate in [1, 2]:
|
||||
if border_mode == 'same' and subsample_length != 1:
|
||||
@@ -101,7 +108,7 @@ def test_convolution_2d():
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
@@ -134,7 +141,7 @@ def test_deconvolution_2d():
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
@@ -175,7 +182,7 @@ def test_atrous_conv_2d():
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
for atrous_rate in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
@@ -214,7 +221,7 @@ def test_separable_conv_2d():
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
for multiplier in [1, 2]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
@@ -322,7 +329,7 @@ def test_convolution_3d():
|
||||
input_len_dim2 = 11
|
||||
input_len_dim3 = 12
|
||||
|
||||
for border_mode in ['same', 'valid']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1, 1), (2, 2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1, 1):
|
||||
continue
|
||||
@@ -382,7 +389,8 @@ def test_zero_padding_1d():
|
||||
nb_samples = 2
|
||||
input_dim = 2
|
||||
nb_steps = 5
|
||||
input = np.ones((nb_samples, nb_steps, input_dim))
|
||||
shape = (nb_samples, nb_steps, input_dim)
|
||||
input = np.ones(shape)
|
||||
|
||||
# basic test
|
||||
layer_test(convolutional.ZeroPadding1D,
|
||||
@@ -397,22 +405,22 @@ def test_zero_padding_1d():
|
||||
|
||||
# correctness test
|
||||
layer = convolutional.ZeroPadding1D(padding=2)
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer.build(shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
for offset in [0, 1, -1, -2]:
|
||||
assert_allclose(out[:, offset, :], 0.)
|
||||
assert_allclose(out[:, 2:-2, :], 1.)
|
||||
assert_allclose(np_output[:, offset, :], 0.)
|
||||
assert_allclose(np_output[:, 2:-2, :], 1.)
|
||||
|
||||
layer = convolutional.ZeroPadding1D(padding=(1, 2))
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer.build(shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
for left_offset in [0]:
|
||||
assert_allclose(out[:, left_offset, :], 0.)
|
||||
assert_allclose(np_output[:, left_offset, :], 0.)
|
||||
for right_offset in [-1, -2]:
|
||||
assert_allclose(out[:, right_offset, :], 0.)
|
||||
assert_allclose(out[:, 1:-2, :], 1.)
|
||||
assert_allclose(np_output[:, right_offset, :], 0.)
|
||||
assert_allclose(np_output[:, 1:-2, :], 1.)
|
||||
layer.get_config()
|
||||
|
||||
|
||||
@@ -443,44 +451,44 @@ def test_zero_padding_2d():
|
||||
|
||||
# correctness test
|
||||
layer = convolutional.ZeroPadding2D(padding=(2, 2))
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
if dim_ordering == 'tf':
|
||||
for offset in [0, 1, -1, -2]:
|
||||
assert_allclose(out[:, offset, :, :], 0.)
|
||||
assert_allclose(out[:, :, offset, :], 0.)
|
||||
assert_allclose(out[:, 2:-2, 2:-2, :], 1.)
|
||||
assert_allclose(np_output[:, offset, :, :], 0.)
|
||||
assert_allclose(np_output[:, :, offset, :], 0.)
|
||||
assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.)
|
||||
elif dim_ordering == 'th':
|
||||
for offset in [0, 1, -1, -2]:
|
||||
assert_allclose(out[:, :, offset, :], 0.)
|
||||
assert_allclose(out[:, :, :, offset], 0.)
|
||||
assert_allclose(out[:, 2:-2, 2:-2, :], 1.)
|
||||
assert_allclose(np_output[:, :, offset, :], 0.)
|
||||
assert_allclose(np_output[:, :, :, offset], 0.)
|
||||
assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.)
|
||||
|
||||
layer = convolutional.ZeroPadding2D(padding=(1, 2, 3, 4))
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
if dim_ordering == 'tf':
|
||||
for top_offset in [0]:
|
||||
assert_allclose(out[:, top_offset, :, :], 0.)
|
||||
assert_allclose(np_output[:, top_offset, :, :], 0.)
|
||||
for bottom_offset in [-1, -2]:
|
||||
assert_allclose(out[:, bottom_offset, :, :], 0.)
|
||||
assert_allclose(np_output[:, bottom_offset, :, :], 0.)
|
||||
for left_offset in [0, 1, 2]:
|
||||
assert_allclose(out[:, :, left_offset, :], 0.)
|
||||
assert_allclose(np_output[:, :, left_offset, :], 0.)
|
||||
for right_offset in [-1, -2, -3, -4]:
|
||||
assert_allclose(out[:, :, right_offset, :], 0.)
|
||||
assert_allclose(out[:, 1:-2, 3:-4, :], 1.)
|
||||
assert_allclose(np_output[:, :, right_offset, :], 0.)
|
||||
assert_allclose(np_output[:, 1:-2, 3:-4, :], 1.)
|
||||
elif dim_ordering == 'th':
|
||||
for top_offset in [0]:
|
||||
assert_allclose(out[:, :, top_offset, :], 0.)
|
||||
assert_allclose(np_output[:, :, top_offset, :], 0.)
|
||||
for bottom_offset in [-1, -2]:
|
||||
assert_allclose(out[:, :, bottom_offset, :], 0.)
|
||||
assert_allclose(np_output[:, :, bottom_offset, :], 0.)
|
||||
for left_offset in [0, 1, 2]:
|
||||
assert_allclose(out[:, :, :, left_offset], 0.)
|
||||
assert_allclose(np_output[:, :, :, left_offset], 0.)
|
||||
for right_offset in [-1, -2, -3, -4]:
|
||||
assert_allclose(out[:, :, :, right_offset], 0.)
|
||||
assert_allclose(out[:, :, 1:-2, 3:-4], 1.)
|
||||
assert_allclose(np_output[:, :, :, right_offset], 0.)
|
||||
assert_allclose(np_output[:, :, 1:-2, 3:-4], 1.)
|
||||
layer.get_config()
|
||||
|
||||
|
||||
@@ -502,13 +510,14 @@ def test_zero_padding_3d():
|
||||
|
||||
# correctness test
|
||||
layer = convolutional.ZeroPadding3D(padding=(2, 2, 2))
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
out = K.eval(layer.output)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
for offset in [0, 1, -1, -2]:
|
||||
assert_allclose(out[:, offset, :, :, :], 0.)
|
||||
assert_allclose(out[:, :, offset, :, :], 0.)
|
||||
assert_allclose(out[:, :, :, offset, :], 0.)
|
||||
assert_allclose(out[:, 2:-2, 2:-2, 2:-2, :], 1.)
|
||||
assert_allclose(np_output[:, offset, :, :, :], 0.)
|
||||
assert_allclose(np_output[:, :, offset, :, :], 0.)
|
||||
assert_allclose(np_output[:, :, :, offset, :], 0.)
|
||||
assert_allclose(np_output[:, 2:-2, 2:-2, 2:-2, :], 1.)
|
||||
layer.get_config()
|
||||
|
||||
|
||||
@@ -539,15 +548,15 @@ def test_upsampling_2d():
|
||||
layer = convolutional.UpSampling2D(
|
||||
size=(length_row, length_col),
|
||||
dim_ordering=dim_ordering)
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
if dim_ordering == 'th':
|
||||
assert out.shape[2] == length_row * input_nb_row
|
||||
assert out.shape[3] == length_col * input_nb_col
|
||||
assert np_output.shape[2] == length_row * input_nb_row
|
||||
assert np_output.shape[3] == length_col * input_nb_col
|
||||
else: # tf
|
||||
assert out.shape[1] == length_row * input_nb_row
|
||||
assert out.shape[2] == length_col * input_nb_col
|
||||
assert np_output.shape[1] == length_row * input_nb_row
|
||||
assert np_output.shape[2] == length_col * input_nb_col
|
||||
|
||||
# compare with numpy
|
||||
if dim_ordering == 'th':
|
||||
@@ -557,7 +566,7 @@ def test_upsampling_2d():
|
||||
expected_out = np.repeat(input, length_row, axis=1)
|
||||
expected_out = np.repeat(expected_out, length_col, axis=2)
|
||||
|
||||
assert_allclose(out, expected_out)
|
||||
assert_allclose(np_output, expected_out)
|
||||
|
||||
|
||||
def test_upsampling_3d():
|
||||
@@ -580,17 +589,17 @@ def test_upsampling_3d():
|
||||
layer = convolutional.UpSampling3D(
|
||||
size=(length_dim1, length_dim2, length_dim3),
|
||||
dim_ordering=dim_ordering)
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
if dim_ordering == 'th':
|
||||
assert out.shape[2] == length_dim1 * input_len_dim1
|
||||
assert out.shape[3] == length_dim2 * input_len_dim2
|
||||
assert out.shape[4] == length_dim3 * input_len_dim3
|
||||
assert np_output.shape[2] == length_dim1 * input_len_dim1
|
||||
assert np_output.shape[3] == length_dim2 * input_len_dim2
|
||||
assert np_output.shape[4] == length_dim3 * input_len_dim3
|
||||
else: # tf
|
||||
assert out.shape[1] == length_dim1 * input_len_dim1
|
||||
assert out.shape[2] == length_dim2 * input_len_dim2
|
||||
assert out.shape[3] == length_dim3 * input_len_dim3
|
||||
assert np_output.shape[1] == length_dim1 * input_len_dim1
|
||||
assert np_output.shape[2] == length_dim2 * input_len_dim2
|
||||
assert np_output.shape[3] == length_dim3 * input_len_dim3
|
||||
|
||||
# compare with numpy
|
||||
if dim_ordering == 'th':
|
||||
@@ -602,7 +611,7 @@ def test_upsampling_3d():
|
||||
expected_out = np.repeat(expected_out, length_dim2, axis=2)
|
||||
expected_out = np.repeat(expected_out, length_dim3, axis=3)
|
||||
|
||||
assert_allclose(out, expected_out)
|
||||
assert_allclose(np_output, expected_out)
|
||||
|
||||
|
||||
@keras_test
|
||||
@@ -626,32 +635,35 @@ def test_cropping_2d():
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if dim_ordering == 'th':
|
||||
input = np.random.rand(nb_samples, stack_size, input_len_dim1, input_len_dim2)
|
||||
input = np.random.rand(nb_samples, stack_size,
|
||||
input_len_dim1, input_len_dim2)
|
||||
else:
|
||||
input = np.random.rand(nb_samples, input_len_dim1, input_len_dim2, stack_size)
|
||||
input = np.random.rand(nb_samples,
|
||||
input_len_dim1, input_len_dim2,
|
||||
stack_size)
|
||||
# basic test
|
||||
layer_test(convolutional.Cropping2D,
|
||||
kwargs={'cropping': cropping,
|
||||
'dim_ordering': dim_ordering},
|
||||
input_shape=input.shape)
|
||||
# correctness test
|
||||
layer = convolutional.Cropping2D(cropping=cropping, dim_ordering=dim_ordering)
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer = convolutional.Cropping2D(cropping=cropping,
|
||||
dim_ordering=dim_ordering)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
# compare with numpy
|
||||
if dim_ordering == 'th':
|
||||
expected_out = input[:,
|
||||
:,
|
||||
cropping[0][0]:-cropping[0][1],
|
||||
cropping[1][0]:-cropping[1][1]]
|
||||
cropping[0][0]: -cropping[0][1],
|
||||
cropping[1][0]: -cropping[1][1]]
|
||||
else:
|
||||
expected_out = input[:,
|
||||
cropping[0][0]:-cropping[0][1],
|
||||
cropping[1][0]:-cropping[1][1],
|
||||
cropping[0][0]: -cropping[0][1],
|
||||
cropping[1][0]: -cropping[1][1],
|
||||
:]
|
||||
|
||||
assert_allclose(out, expected_out)
|
||||
assert_allclose(np_output, expected_out)
|
||||
|
||||
|
||||
def test_cropping_3d():
|
||||
@@ -664,34 +676,37 @@ def test_cropping_3d():
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if dim_ordering == 'th':
|
||||
input = np.random.rand(nb_samples, stack_size, input_len_dim1, input_len_dim2, input_len_dim3)
|
||||
input = np.random.rand(nb_samples, stack_size,
|
||||
input_len_dim1, input_len_dim2, input_len_dim3)
|
||||
else:
|
||||
input = np.random.rand(nb_samples, input_len_dim1, input_len_dim2, input_len_dim3, stack_size)
|
||||
input = np.random.rand(nb_samples,
|
||||
input_len_dim1, input_len_dim2,
|
||||
input_len_dim3, stack_size)
|
||||
# basic test
|
||||
layer_test(convolutional.Cropping3D,
|
||||
kwargs={'cropping': cropping,
|
||||
'dim_ordering': dim_ordering},
|
||||
input_shape=input.shape)
|
||||
# correctness test
|
||||
layer = convolutional.Cropping3D(cropping=cropping, dim_ordering=dim_ordering)
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer = convolutional.Cropping3D(cropping=cropping,
|
||||
dim_ordering=dim_ordering)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
# compare with numpy
|
||||
if dim_ordering == 'th':
|
||||
expected_out = input[:,
|
||||
:,
|
||||
cropping[0][0]:-cropping[0][1],
|
||||
cropping[1][0]:-cropping[1][1],
|
||||
cropping[2][0]:-cropping[2][1]]
|
||||
cropping[0][0]: -cropping[0][1],
|
||||
cropping[1][0]: -cropping[1][1],
|
||||
cropping[2][0]: -cropping[2][1]]
|
||||
else:
|
||||
expected_out = input[:,
|
||||
cropping[0][0]:-cropping[0][1],
|
||||
cropping[1][0]:-cropping[1][1],
|
||||
cropping[2][0]:-cropping[2][1],
|
||||
cropping[0][0]: -cropping[0][1],
|
||||
cropping[1][0]: -cropping[1][1],
|
||||
cropping[2][0]: -cropping[2][1],
|
||||
:]
|
||||
|
||||
assert_allclose(out, expected_out)
|
||||
assert_allclose(np_output, expected_out)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -0,0 +1,130 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras import backend as K
|
||||
from keras.models import Sequential
|
||||
from keras.layers import convolutional_recurrent
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras import regularizers
|
||||
|
||||
|
||||
def test_recurrent_convolutional():
|
||||
nb_row = 3
|
||||
nb_col = 3
|
||||
nb_filter = 5
|
||||
nb_samples = 2
|
||||
input_channel = 2
|
||||
input_nb_row = 5
|
||||
input_nb_col = 5
|
||||
sequence_len = 2
|
||||
for dim_ordering in ['th', 'tf']:
|
||||
|
||||
if dim_ordering == 'th':
|
||||
input = np.random.rand(nb_samples, sequence_len,
|
||||
input_channel,
|
||||
input_nb_row, input_nb_col)
|
||||
else: # tf
|
||||
input = np.random.rand(nb_samples, sequence_len,
|
||||
input_nb_row, input_nb_col,
|
||||
input_channel)
|
||||
|
||||
for return_sequences in [True, False]:
|
||||
# test for ouptput shape:
|
||||
output = layer_test(convolutional_recurrent.ConvLSTM2D,
|
||||
kwargs={'dim_ordering': dim_ordering,
|
||||
'return_sequences': return_sequences,
|
||||
'nb_filter': nb_filter,
|
||||
'nb_row': nb_row,
|
||||
'nb_col': nb_col,
|
||||
'border_mode': "same"},
|
||||
input_shape=input.shape)
|
||||
|
||||
output_shape = [nb_samples, input_nb_row, input_nb_col]
|
||||
|
||||
if dim_ordering == 'th':
|
||||
output_shape.insert(1, nb_filter)
|
||||
else:
|
||||
output_shape.insert(3, nb_filter)
|
||||
|
||||
if return_sequences:
|
||||
output_shape.insert(1, sequence_len)
|
||||
|
||||
assert output.shape == tuple(output_shape)
|
||||
|
||||
# No need to check statefulness for both
|
||||
if dim_ordering == 'th' or return_sequences:
|
||||
continue
|
||||
|
||||
# Tests for statefulness
|
||||
model = Sequential()
|
||||
kwargs = {'dim_ordering': dim_ordering,
|
||||
'return_sequences': return_sequences,
|
||||
'nb_filter': nb_filter,
|
||||
'nb_row': nb_row,
|
||||
'nb_col': nb_col,
|
||||
'stateful': True,
|
||||
'batch_input_shape': input.shape,
|
||||
'border_mode': "same"}
|
||||
layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
|
||||
|
||||
model.add(layer)
|
||||
model.compile(optimizer='sgd', loss='mse')
|
||||
out1 = model.predict(np.ones_like(input))
|
||||
assert(out1.shape == tuple(output_shape))
|
||||
|
||||
# train once so that the states change
|
||||
model.train_on_batch(np.ones_like(input),
|
||||
np.ones_like(output))
|
||||
out2 = model.predict(np.ones_like(input))
|
||||
|
||||
# if the state is not reset, output should be different
|
||||
assert(out1.max() != out2.max())
|
||||
|
||||
# check that output changes after states are reset
|
||||
# (even though the model itself didn't change)
|
||||
layer.reset_states()
|
||||
out3 = model.predict(np.ones_like(input))
|
||||
assert(out2.max() != out3.max())
|
||||
|
||||
# check that container-level reset_states() works
|
||||
model.reset_states()
|
||||
out4 = model.predict(np.ones_like(input))
|
||||
assert_allclose(out3, out4, atol=1e-5)
|
||||
|
||||
# check that the call to `predict` updated the states
|
||||
out5 = model.predict(np.ones_like(input))
|
||||
assert(out4.max() != out5.max())
|
||||
|
||||
# check regularizers
|
||||
kwargs = {'dim_ordering': dim_ordering,
|
||||
'return_sequences': return_sequences,
|
||||
'nb_filter': nb_filter,
|
||||
'nb_row': nb_row,
|
||||
'nb_col': nb_col,
|
||||
'stateful': True,
|
||||
'batch_input_shape': input.shape,
|
||||
'W_regularizer': regularizers.WeightRegularizer(l1=0.01),
|
||||
'U_regularizer': regularizers.WeightRegularizer(l1=0.01),
|
||||
'b_regularizer': 'l2',
|
||||
'border_mode': "same"}
|
||||
|
||||
layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(np.ones(input.shape)))
|
||||
K.eval(output)
|
||||
|
||||
# check dropout
|
||||
layer_test(convolutional_recurrent.ConvLSTM2D,
|
||||
kwargs={'dim_ordering': dim_ordering,
|
||||
'return_sequences': return_sequences,
|
||||
'nb_filter': nb_filter,
|
||||
'nb_row': nb_row,
|
||||
'nb_col': nb_col,
|
||||
'border_mode': "same",
|
||||
'dropout_W': 0.1,
|
||||
'dropout_U': 0.1},
|
||||
input_shape=input.shape)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
@@ -15,7 +15,7 @@ def test_masking():
|
||||
|
||||
@keras_test
|
||||
def test_merge():
|
||||
from keras.layers import Input, merge, Merge
|
||||
from keras.layers import Input, merge, Merge, Masking
|
||||
from keras.models import Model
|
||||
|
||||
# test modes: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot'.
|
||||
@@ -53,7 +53,8 @@ def test_merge():
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
merged = merge([input_a, input_b],
|
||||
mode=lambda tup: K.concatenate([tup[0], tup[1]]),
|
||||
output_shape=lambda tup: (tup[0][:-1],) + (tup[0][-1] + tup[1][-1],))
|
||||
output_shape=lambda tup: tup[0][:-1] + (tup[0][-1] + tup[1][-1],))
|
||||
model = Model([input_a, input_b], merged)
|
||||
expected_output_shape = model.get_output_shape_for(input_shapes)
|
||||
actual_output_shape = model.predict(inputs).shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
@@ -65,17 +66,18 @@ def test_merge():
|
||||
# test function with output_shape function
|
||||
def fn_mode(tup):
|
||||
x, y = tup
|
||||
return K.concatenate([x, y])
|
||||
return K.concatenate([x, y], axis=1)
|
||||
|
||||
def fn_output_shape(tup):
|
||||
s1, s2 = tup
|
||||
return (s1[:-1],) + (s1[-1] + s2[-1],)
|
||||
return (s1[0], s1[1] + s2[1]) + s1[2:]
|
||||
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
merged = merge([input_a, input_b],
|
||||
mode=fn_mode,
|
||||
output_shape=fn_output_shape)
|
||||
model = Model([input_a, input_b], merged)
|
||||
expected_output_shape = model.get_output_shape_for(input_shapes)
|
||||
actual_output_shape = model.predict(inputs).shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
@@ -84,6 +86,74 @@ def test_merge():
|
||||
model = Model.from_config(config)
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
# test function with output_mask function
|
||||
# time dimension is required for masking
|
||||
input_shapes = [(4, 3, 2), (4, 3, 2)]
|
||||
inputs = [np.random.random(shape) for shape in input_shapes]
|
||||
|
||||
def fn_output_mask(tup):
|
||||
x_mask, y_mask = tup
|
||||
return K.concatenate([x_mask, y_mask])
|
||||
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
a = Masking()(input_a)
|
||||
b = Masking()(input_b)
|
||||
merged = merge([a, b], mode=fn_mode, output_shape=fn_output_shape, output_mask=fn_output_mask)
|
||||
model = Model([input_a, input_b], merged)
|
||||
expected_output_shape = model.get_output_shape_for(input_shapes)
|
||||
actual_output_shape = model.predict(inputs).shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
|
||||
config = model.get_config()
|
||||
model = Model.from_config(config)
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
mask_inputs = (np.zeros(input_shapes[0][:-1]), np.ones(input_shapes[1][:-1]))
|
||||
expected_mask_output = np.concatenate(mask_inputs, axis=-1)
|
||||
mask_input_placeholders = [K.placeholder(shape=input_shape[:-1]) for input_shape in input_shapes]
|
||||
mask_output = model.layers[-1]._output_mask(mask_input_placeholders)
|
||||
assert np.all(K.function(mask_input_placeholders, [mask_output])(mask_inputs)[0] == expected_mask_output)
|
||||
|
||||
# test lambda with output_mask lambda
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
a = Masking()(input_a)
|
||||
b = Masking()(input_b)
|
||||
merged = merge([a, b], mode=lambda tup: K.concatenate([tup[0], tup[1]], axis=1),
|
||||
output_shape=lambda tup: (tup[0][0], tup[0][1] + tup[1][1]) + tup[0][2:],
|
||||
output_mask=lambda tup: K.concatenate([tup[0], tup[1]]))
|
||||
model = Model([input_a, input_b], merged)
|
||||
expected_output_shape = model.get_output_shape_for(input_shapes)
|
||||
actual_output_shape = model.predict(inputs).shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
|
||||
config = model.get_config()
|
||||
model = Model.from_config(config)
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
mask_output = model.layers[-1]._output_mask(mask_input_placeholders)
|
||||
assert np.all(K.function(mask_input_placeholders, [mask_output])(mask_inputs)[0] == expected_mask_output)
|
||||
|
||||
# test with arguments
|
||||
input_shapes = [(3, 2), (3, 2)]
|
||||
inputs = [np.random.random(shape) for shape in input_shapes]
|
||||
|
||||
def fn_mode(tup, a, b):
|
||||
x, y = tup
|
||||
return x * a + y * b
|
||||
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
merged = merge([input_a, input_b], mode=fn_mode, output_shape=lambda s: s[0], arguments={'a': 0.7, 'b': 0.3})
|
||||
model = Model([input_a, input_b], merged)
|
||||
output = model.predict(inputs)
|
||||
|
||||
config = model.get_config()
|
||||
model = Model.from_config(config)
|
||||
|
||||
assert np.all(model.predict(inputs) == output)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_mask_2d():
|
||||
@@ -153,6 +223,10 @@ def test_dropout():
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(3, 2))
|
||||
|
||||
layer_test(core.SpatialDropout1D,
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
layer_test(core.SpatialDropout2D,
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(2, 3, 4, 5))
|
||||
@@ -212,6 +286,11 @@ def test_lambda():
|
||||
kwargs={'function': lambda x: x + 1},
|
||||
input_shape=(3, 2))
|
||||
|
||||
layer_test(Lambda,
|
||||
kwargs={'function': lambda x, a, b: x * a + b,
|
||||
'arguments': {'a': 0.6, 'b': 0.4}},
|
||||
input_shape=(3, 2))
|
||||
|
||||
# test serialization with function
|
||||
def f(x):
|
||||
return x + 1
|
||||
|
||||
@@ -2,10 +2,10 @@ import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras.layers.core import Dense, Activation
|
||||
from keras.layers import Dense, Activation, Input
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
from keras.layers import normalization
|
||||
from keras.models import Sequential
|
||||
from keras.models import Sequential, Model
|
||||
from keras import backend as K
|
||||
|
||||
input_1 = np.arange(10)
|
||||
@@ -78,5 +78,33 @@ def test_batchnorm_mode_1():
|
||||
assert_allclose(K.eval(K.std(out)), 0.0, atol=1e-1)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_shared_batchnorm():
|
||||
'''Test that a BN layer can be shared
|
||||
across different data streams.
|
||||
'''
|
||||
# Test single layer reuse
|
||||
bn = normalization.BatchNormalization(input_shape=(10,), mode=0)
|
||||
x1 = Input(shape=(10,))
|
||||
bn(x1)
|
||||
|
||||
x2 = Input(shape=(10,))
|
||||
y2 = bn(x2)
|
||||
|
||||
x = np.random.normal(loc=5.0, scale=10.0, size=(2, 10))
|
||||
model = Model(x2, y2)
|
||||
assert len(model.updates) == 2
|
||||
model.compile('sgd', 'mse')
|
||||
model.train_on_batch(x, x)
|
||||
|
||||
# Test model-level reuse
|
||||
x3 = Input(shape=(10,))
|
||||
y3 = model(x3)
|
||||
new_model = Model(x3, y3)
|
||||
assert len(model.updates) == 2
|
||||
new_model.compile('sgd', 'mse')
|
||||
new_model.train_on_batch(x, x)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -129,9 +129,9 @@ def test_regularizer(layer_class):
|
||||
U_regularizer=regularizers.WeightRegularizer(l1=0.01),
|
||||
b_regularizer='l2')
|
||||
shape = (nb_samples, timesteps, embedding_dim)
|
||||
layer.set_input(K.variable(np.ones(shape)),
|
||||
shape=shape)
|
||||
K.eval(layer.output)
|
||||
layer.build(shape)
|
||||
output = layer(K.variable(np.ones(shape)))
|
||||
K.eval(output)
|
||||
|
||||
|
||||
@keras_test
|
||||
@@ -140,15 +140,30 @@ def test_masking_layer():
|
||||
https://github.com/fchollet/keras/issues/1567
|
||||
|
||||
'''
|
||||
model = Sequential()
|
||||
model.add(Masking(input_shape=(3, 4)))
|
||||
model.add(recurrent.LSTM(output_dim=5, return_sequences=True))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
I = np.random.random((6, 3, 4))
|
||||
V = np.abs(np.random.random((6, 3, 5)))
|
||||
V /= V.sum(axis=-1, keepdims=True)
|
||||
|
||||
model = Sequential()
|
||||
model.add(Masking(input_shape=(3, 4)))
|
||||
model.add(recurrent.LSTM(output_dim=5, return_sequences=True, unroll=False))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
model.fit(I, V, nb_epoch=1, batch_size=100, verbose=1)
|
||||
|
||||
model = Sequential()
|
||||
model.add(Masking(input_shape=(3, 4)))
|
||||
model.add(recurrent.LSTM(output_dim=5, return_sequences=True, unroll=True))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
model.fit(I, V, nb_epoch=1, batch_size=100, verbose=1)
|
||||
|
||||
|
||||
@rnn_test
|
||||
def test_from_config(layer_class):
|
||||
for stateful in (False, True):
|
||||
l1 = layer_class(output_dim=1, stateful=stateful)
|
||||
l2 = layer_class.from_config(l1.get_config())
|
||||
assert l1.get_config() == l2.get_config()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -115,6 +115,13 @@ def test_Bidirectional():
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
model.fit(x, y, nb_epoch=1, batch_size=1)
|
||||
|
||||
# Bidirectional and stateful
|
||||
input = Input(batch_shape=(1, timesteps, dim))
|
||||
output = wrappers.Bidirectional(rnn(output_dim, stateful=True), merge_mode=mode)(input)
|
||||
model = Model(input, output)
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
model.fit(x, y, nb_epoch=1, batch_size=1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -9,7 +9,7 @@ from keras import optimizers
|
||||
np.random.seed(1337)
|
||||
|
||||
from keras import callbacks
|
||||
from keras.models import Graph, Sequential
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras import backend as K
|
||||
@@ -186,13 +186,12 @@ def test_ReduceLROnPlateau():
|
||||
assert np.allclose(float(K.get_value(model.optimizer.lr)), 0.1, atol=K.epsilon())
|
||||
|
||||
|
||||
@pytest.mark.skipif((K._BACKEND != 'tensorflow'),
|
||||
@pytest.mark.skipif((K.backend() != 'tensorflow'),
|
||||
reason="Requires tensorflow backend")
|
||||
def test_TensorBoard():
|
||||
import shutil
|
||||
import tensorflow as tf
|
||||
import keras.backend.tensorflow_backend as KTF
|
||||
old_session = KTF.get_session()
|
||||
filepath = './logs'
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples,
|
||||
nb_test=test_samples,
|
||||
@@ -224,92 +223,44 @@ def test_TensorBoard():
|
||||
yield {'X_vars': X_test, 'output': y_test}
|
||||
|
||||
# case 1 Sequential
|
||||
model = Sequential()
|
||||
model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
|
||||
model.add(Dense(nb_class, activation='softmax'))
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='sgd',
|
||||
metrics=['accuracy'])
|
||||
|
||||
with tf.Graph().as_default():
|
||||
session = tf.Session('')
|
||||
KTF.set_session(session)
|
||||
model = Sequential()
|
||||
model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
|
||||
model.add(Dense(nb_class, activation='softmax'))
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='sgd',
|
||||
metrics=['accuracy'])
|
||||
tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1)
|
||||
cbks = [tsb]
|
||||
|
||||
tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1)
|
||||
cbks = [tsb]
|
||||
# fit with validation data
|
||||
model.fit(X_train, y_train, batch_size=batch_size,
|
||||
validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)
|
||||
|
||||
# fit with validation data
|
||||
model.fit(X_train, y_train, batch_size=batch_size,
|
||||
validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)
|
||||
# fit with validation data and accuracy
|
||||
model.fit(X_train, y_train, batch_size=batch_size,
|
||||
validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)
|
||||
|
||||
# fit with validation data and accuracy
|
||||
model.fit(X_train, y_train, batch_size=batch_size,
|
||||
validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)
|
||||
# fit generator with validation data
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
validation_data=(X_test, y_test),
|
||||
callbacks=cbks)
|
||||
|
||||
# fit generator with validation data
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
validation_data=(X_test, y_test),
|
||||
callbacks=cbks)
|
||||
# fit generator without validation data
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
callbacks=cbks)
|
||||
|
||||
# fit generator without validation data
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
callbacks=cbks)
|
||||
# fit generator with validation data and accuracy
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
validation_data=(X_test, y_test),
|
||||
callbacks=cbks)
|
||||
|
||||
# fit generator with validation data and accuracy
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
validation_data=(X_test, y_test),
|
||||
callbacks=cbks)
|
||||
# fit generator without validation data and accuracy
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
callbacks=cbks)
|
||||
|
||||
# fit generator without validation data and accuracy
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
callbacks=cbks)
|
||||
|
||||
assert os.path.exists(filepath)
|
||||
shutil.rmtree(filepath)
|
||||
|
||||
# case 2 Graph
|
||||
|
||||
with tf.Graph().as_default():
|
||||
session = tf.Session('')
|
||||
KTF.set_session(session)
|
||||
model = Graph()
|
||||
model.add_input(name='X_vars', input_shape=(input_dim,))
|
||||
|
||||
model.add_node(Dense(nb_hidden, activation="sigmoid"),
|
||||
name='Dense1', input='X_vars')
|
||||
model.add_node(Dense(nb_class, activation="softmax"),
|
||||
name='last_dense',
|
||||
input='Dense1')
|
||||
model.add_output(name='output', input='last_dense')
|
||||
model.compile(optimizer='sgd', loss={'output': 'mse'})
|
||||
|
||||
tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1)
|
||||
cbks = [tsb]
|
||||
|
||||
# fit with validation
|
||||
model.fit({'X_vars': X_train, 'output': y_train},
|
||||
batch_size=batch_size,
|
||||
validation_data={'X_vars': X_test, 'output': y_test},
|
||||
callbacks=cbks, nb_epoch=2)
|
||||
|
||||
# fit wo validation
|
||||
model.fit({'X_vars': X_train, 'output': y_train},
|
||||
batch_size=batch_size,
|
||||
callbacks=cbks, nb_epoch=2)
|
||||
|
||||
# fit generator with validation
|
||||
model.fit_generator(data_generator_graph(True), 1000, nb_epoch=2,
|
||||
validation_data={'X_vars': X_test, 'output': y_test},
|
||||
callbacks=cbks)
|
||||
|
||||
# fit generator wo validation
|
||||
model.fit_generator(data_generator_graph(True), 1000, nb_epoch=2,
|
||||
callbacks=cbks)
|
||||
|
||||
assert os.path.exists(filepath)
|
||||
shutil.rmtree(filepath)
|
||||
|
||||
KTF.set_session(old_session)
|
||||
assert os.path.exists(filepath)
|
||||
shutil.rmtree(filepath)
|
||||
|
||||
|
||||
def test_LambdaCallback():
|
||||
@@ -343,7 +294,7 @@ def test_LambdaCallback():
|
||||
assert not p.is_alive()
|
||||
|
||||
|
||||
@pytest.mark.skipif((K._BACKEND != 'tensorflow'),
|
||||
@pytest.mark.skipif((K.backend() != 'tensorflow'),
|
||||
reason="Requires tensorflow backend")
|
||||
def test_TensorBoard_with_ReduceLROnPlateau():
|
||||
import shutil
|
||||
|
||||
@@ -46,14 +46,50 @@ def test_matthews_correlation():
|
||||
assert expected - epsilon <= actual <= expected + epsilon
|
||||
|
||||
|
||||
def test_precision():
|
||||
y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0]))
|
||||
y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]))
|
||||
|
||||
# Calculated using sklearn.metrics.precision_score
|
||||
expected = 0.40000000000000002
|
||||
|
||||
actual = K.eval(metrics.precision(y_true, y_pred))
|
||||
epsilon = 1e-05
|
||||
assert expected - epsilon <= actual <= expected + epsilon
|
||||
|
||||
|
||||
def test_recall():
|
||||
y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0]))
|
||||
y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]))
|
||||
|
||||
# Calculated using sklearn.metrics.recall_score
|
||||
expected = 0.2857142857142857
|
||||
|
||||
actual = K.eval(metrics.recall(y_true, y_pred))
|
||||
epsilon = 1e-05
|
||||
assert expected - epsilon <= actual <= expected + epsilon
|
||||
|
||||
|
||||
def test_fbeta_score():
|
||||
y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0]))
|
||||
y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]))
|
||||
|
||||
# Calculated using sklearn.metrics.fbeta_score
|
||||
expected = 0.30303030303030304
|
||||
|
||||
actual = K.eval(metrics.fbeta_score(y_true, y_pred, beta=2))
|
||||
epsilon = 1e-05
|
||||
assert expected - epsilon <= actual <= expected + epsilon
|
||||
|
||||
|
||||
def test_fmeasure():
|
||||
y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0]))
|
||||
y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]))
|
||||
|
||||
# Calculated using sklearn.metrics.f1_score
|
||||
expected = 0.33333333333333331
|
||||
|
||||
actual = K.eval(metrics.fbeta_score(y_true, y_pred))
|
||||
actual = K.eval(metrics.fmeasure(y_true, y_pred))
|
||||
epsilon = 1e-05
|
||||
assert expected - epsilon <= actual <= expected + epsilon
|
||||
|
||||
|
||||
@@ -0,0 +1,114 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import pytest
|
||||
|
||||
from keras.utils.test_utils import keras_test
|
||||
from keras.models import Model, Sequential
|
||||
from keras.layers import Dense, Input
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_layer_trainability_switch():
|
||||
# with constructor argument, in Sequential
|
||||
model = Sequential()
|
||||
model.add(Dense(2, trainable=False, input_dim=1))
|
||||
assert model.trainable_weights == []
|
||||
|
||||
# by setting the `trainable` argument, in Sequential
|
||||
model = Sequential()
|
||||
layer = Dense(2, input_dim=1)
|
||||
model.add(layer)
|
||||
assert model.trainable_weights == layer.trainable_weights
|
||||
layer.trainable = False
|
||||
assert model.trainable_weights == []
|
||||
|
||||
# with constructor argument, in Model
|
||||
x = Input(shape=(1,))
|
||||
y = Dense(2, trainable=False)(x)
|
||||
model = Model(x, y)
|
||||
assert model.trainable_weights == []
|
||||
|
||||
# by setting the `trainable` argument, in Model
|
||||
x = Input(shape=(1,))
|
||||
layer = Dense(2)
|
||||
y = layer(x)
|
||||
model = Model(x, y)
|
||||
assert model.trainable_weights == layer.trainable_weights
|
||||
layer.trainable = False
|
||||
assert model.trainable_weights == []
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_model_trainability_switch():
|
||||
# a non-trainable model has no trainable weights
|
||||
x = Input(shape=(1,))
|
||||
y = Dense(2)(x)
|
||||
model = Model(x, y)
|
||||
model.trainable = False
|
||||
assert model.trainable_weights == []
|
||||
|
||||
# same for Sequential
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=1))
|
||||
model.trainable = False
|
||||
assert model.trainable_weights == []
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_nested_model_trainability():
|
||||
# a Sequential inside a Model
|
||||
inner_model = Sequential()
|
||||
inner_model.add(Dense(2, input_dim=1))
|
||||
|
||||
x = Input(shape=(1,))
|
||||
y = inner_model(x)
|
||||
outer_model = Model(x, y)
|
||||
assert outer_model.trainable_weights == inner_model.trainable_weights
|
||||
inner_model.trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
inner_model.trainable = True
|
||||
inner_model.layers[-1].trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
|
||||
# a Sequential inside a Sequential
|
||||
inner_model = Sequential()
|
||||
inner_model.add(Dense(2, input_dim=1))
|
||||
outer_model = Sequential()
|
||||
outer_model.add(inner_model)
|
||||
assert outer_model.trainable_weights == inner_model.trainable_weights
|
||||
inner_model.trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
inner_model.trainable = True
|
||||
inner_model.layers[-1].trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
|
||||
# a Model inside a Model
|
||||
x = Input(shape=(1,))
|
||||
y = Dense(2)(x)
|
||||
inner_model = Model(x, y)
|
||||
x = Input(shape=(1,))
|
||||
y = inner_model(x)
|
||||
outer_model = Model(x, y)
|
||||
assert outer_model.trainable_weights == inner_model.trainable_weights
|
||||
inner_model.trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
inner_model.trainable = True
|
||||
inner_model.layers[-1].trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
|
||||
# a Model inside a Sequential
|
||||
x = Input(shape=(1,))
|
||||
y = Dense(2)(x)
|
||||
inner_model = Model(x, y)
|
||||
outer_model = Sequential()
|
||||
outer_model.add(inner_model)
|
||||
assert outer_model.trainable_weights == inner_model.trainable_weights
|
||||
inner_model.trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
inner_model.trainable = True
|
||||
inner_model.layers[-1].trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
Referência em uma Nova Issue
Bloquear um usuário