Comparar commits
43 Commits
| Autor | SHA1 | Data | |
|---|---|---|---|
| 8c2a573ebf | |||
| d7ff7cde92 | |||
| 15d0b0ea08 | |||
| 3695bc2db5 | |||
| a08995a90d | |||
| aea00258e7 | |||
| b581eb3f27 | |||
| 610ccba9f5 | |||
| d5ae6f32dd | |||
| 5308033936 | |||
| e2abb5ef2c | |||
| 1b11b4eeb6 | |||
| 39357b3045 | |||
| ed7a5a1418 | |||
| ae682a71f9 | |||
| 8327b37a0b | |||
| 973b5570aa | |||
| 7cb41fc5cc | |||
| 595d67ad7d | |||
| bb626c120e | |||
| ba8fefa8ec | |||
| 4b24f6d7b1 | |||
| 1c460e1e08 | |||
| 7b4e157356 | |||
| 5749f1b971 | |||
| 3c57aff85b | |||
| 18504bcc86 | |||
| d8864bfe48 | |||
| 078b20169b | |||
| 5f7e78df65 | |||
| fc470db7ab | |||
| f576f37801 | |||
| b74118a766 | |||
| 1c7a0248b9 | |||
| 36a829c20d | |||
| 33af75aa39 | |||
| 844420425e | |||
| da57a530f9 | |||
| 1f17013949 | |||
| f18899cb36 | |||
| 877f946e24 | |||
| a981a8c42c | |||
| ed365e94fd |
externo
+1
@@ -30,6 +30,7 @@ model.add(Activation(tanh))
|
||||
|
||||
- __softmax__: Softmax applied across inputs last dimension. Expects shape either `(nb_samples, nb_timesteps, nb_dims)` or `(nb_samples, nb_dims)`.
|
||||
- __softplus__
|
||||
- __softsign__
|
||||
- __relu__
|
||||
- __tanh__
|
||||
- __sigmoid__
|
||||
|
||||
+18
-4
@@ -20,7 +20,7 @@ Please cite Keras in your publications if it helps your research. Here is an exa
|
||||
|
||||
```
|
||||
@misc{chollet2015keras,
|
||||
author = {Chollet, François},
|
||||
author = {Chollet, Francois},
|
||||
title = {Keras},
|
||||
year = {2015},
|
||||
publisher = {GitHub},
|
||||
@@ -139,14 +139,28 @@ to pass the learning phase flag to your function:
|
||||
get_3rd_layer_output = K.function([model.layers[0].input, K.learning_phase()],
|
||||
[model.layers[3].output])
|
||||
|
||||
# output in train mode = 0
|
||||
# output in test mode = 0
|
||||
layer_output = get_3rd_layer_output([X, 0])[0]
|
||||
|
||||
# output in test mode = 1
|
||||
# output in train mode = 1
|
||||
layer_output = get_3rd_layer_output([X, 1])[0]
|
||||
```
|
||||
|
||||
Another more flexible way of getting output from intermediate layers is to use the [functional API](/getting-started/functional-api-guide).
|
||||
Another more flexible way of getting output from intermediate layers is to use the [functional API](/getting-started/functional-api-guide). For example, if you have created an autoencoder for MNIST:
|
||||
|
||||
```python
|
||||
inputs = Input(shape=(784,))
|
||||
encoded = Dense(32, activation='relu')(inputs)
|
||||
decoded = Dense(784)(encoded)
|
||||
model = Model(input=inputs, output=decoded)
|
||||
```
|
||||
|
||||
After compiling and training the model, you can get the output of the data from the encoder like this:
|
||||
|
||||
```python
|
||||
encoder = Model(input=inputs, output=encoded)
|
||||
X_encoded = encoder.predict(X)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -309,8 +309,8 @@ from keras.layers import merge, Convolution2D, Input
|
||||
|
||||
# input tensor for a 3-channel 256x256 image
|
||||
x = Input(shape=(3, 256, 256))
|
||||
# 3x3 conv with 16 output channels
|
||||
y = Convolution2D(16, 3, 3, border_mode='same')
|
||||
# 3x3 conv with 3 output channels (same as input channels)
|
||||
y = Convolution2D(3, 3, 3, border_mode='same')
|
||||
# this returns x + y.
|
||||
z = merge([x, y], mode='sum')
|
||||
```
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
# Writing your own Keras layers
|
||||
|
||||
For simple, stateless custom operations, you are probably better off using `layers.core.Lambda` layers. But for any custom operation that has trainable weights, you should implement your own layer.
|
||||
|
||||
Here is the skeleton of a Keras layer. There are only three methods you need to implement:
|
||||
|
||||
- `build(input_shape)`: this is where you will define your weights. Trainable weights should be added to the list `self.trainable_weights`. Other attributes of note are: `self.non_trainable_weights` (list) and `self.updates` (list of update tuples (tensor, new_tensor)). For an example of how to use `non_trainable_weights` and `updates`, see the code for the `BatchNormalization` layer.
|
||||
- `call(x)`: this is where the layer's logic lives. Unless you want you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor.
|
||||
- `get_output_shape_for(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference.
|
||||
|
||||
```python
|
||||
from keras import backend as K
|
||||
from keras.engine.topology import Layer
|
||||
import numpy as np
|
||||
|
||||
class MyLayer(Layer):
|
||||
def __init__(self, output_dim, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
super(MyLayer, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
input_dim = input_shape[1]
|
||||
initial_weight_value = np.random.random((input_dim, output_dim))
|
||||
self.W = K.variable(initial_weight_value)
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.dot(x, self.W)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
return (input_shape[0], self.output_dim)
|
||||
```
|
||||
|
||||
The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code!
|
||||
+6
-2
@@ -11,6 +11,10 @@ keras.preprocessing.image.ImageDataGenerator(featurewise_center=True,
|
||||
width_shift_range=0.,
|
||||
height_shift_range=0.,
|
||||
shear_range=0.,
|
||||
zoom_range=0.,
|
||||
channel_shift_range=0.,
|
||||
fill_mode='nearest',
|
||||
cval=0.,
|
||||
horizontal_flip=False,
|
||||
vertical_flip=False,
|
||||
dim_ordering='th')
|
||||
@@ -30,8 +34,8 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
- __shear_range__: Float. Shear Intensity (Shear angle in counter-clockwise direction as radians)
|
||||
- __zoom_range__: Float or [lower, upper]. Range for random zoom. If a float, `[lower, upper] = [1-zoom_range, 1+zoom_range]`.
|
||||
- __channel_shift_range__: Float. Range for random channel shifts.
|
||||
- __fill_mode__: One of {"constant", "nearest", "reflect" or "wrap"}.
|
||||
- __cval__: Float or Int. Value used for points outside the boundaries when `fill_mode` is "constant".
|
||||
- __fill_mode__: One of {"constant", "nearest", "reflect" or "wrap"}. Points outside the boundaries of the input are filled according to the given mode.
|
||||
- __cval__: Float or Int. Value used for points outside the boundaries when `fill_mode = "constant"`.
|
||||
- __horizontal_flip__: Boolean. Randomly flip inputs horizontally.
|
||||
- __vertical_flip__: Boolean. Randomly flip inputs vertically.
|
||||
- __dim_ordering__: One of {"th", "tf"}.
|
||||
|
||||
@@ -29,8 +29,7 @@ Five digits inverted:
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.engine.training import slice_X
|
||||
from keras.layers.core import Activation, TimeDistributedDense, RepeatVector
|
||||
from keras.layers import recurrent
|
||||
from keras.layers import Activation, TimeDistributedDense, RepeatVector, recurrent
|
||||
import numpy as np
|
||||
from six.moves import range
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ backend (`K`), our code can run both on TensorFlow and Theano.
|
||||
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Layer, Activation
|
||||
from keras.layers import Dense, Dropout, Layer, Activation
|
||||
from keras.datasets import mnist
|
||||
from keras import backend as K
|
||||
from keras.utils import np_utils
|
||||
|
||||
@@ -16,8 +16,8 @@ Time per epoch: 3s on CPU (core i7).
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.core import Activation, Dense, Merge, Permute, Dropout
|
||||
from keras.layers.recurrent import LSTM
|
||||
from keras.layers import Activation, Dense, Merge, Permute, Dropout
|
||||
from keras.layers import LSTM
|
||||
from keras.utils.data_utils import get_file
|
||||
from keras.preprocessing.sequence import pad_sequences
|
||||
from functools import reduce
|
||||
|
||||
@@ -66,7 +66,7 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.utils.data_utils import get_file
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.core import Dense, Merge, Dropout, RepeatVector
|
||||
from keras.layers import Dense, Merge, Dropout, RepeatVector
|
||||
from keras.layers import recurrent
|
||||
from keras.models import Sequential
|
||||
from keras.preprocessing.sequence import pad_sequences
|
||||
|
||||
@@ -15,8 +15,8 @@ from __future__ import print_function
|
||||
from keras.datasets import cifar10
|
||||
from keras.preprocessing.image import ImageDataGenerator
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.optimizers import SGD
|
||||
from keras.utils import np_utils
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ import h5py
|
||||
import os
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
|
||||
from keras.layers import Convolution2D, ZeroPadding2D, MaxPooling2D
|
||||
from keras import backend as K
|
||||
|
||||
parser = argparse.ArgumentParser(description='Deep Dreams with Keras.')
|
||||
|
||||
@@ -12,9 +12,9 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Lambda
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.convolutional import Convolution1D
|
||||
from keras.layers import Dense, Dropout, Activation, Lambda
|
||||
from keras.layers import Embedding
|
||||
from keras.layers import Convolution1D
|
||||
from keras.datasets import imdb
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
@@ -9,10 +9,10 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.recurrent import LSTM, GRU, SimpleRNN
|
||||
from keras.layers.convolutional import Convolution1D, MaxPooling1D
|
||||
from keras.layers import Dense, Dropout, Activation
|
||||
from keras.layers import Embedding
|
||||
from keras.layers import LSTM, GRU, SimpleRNN
|
||||
from keras.layers import Convolution1D, MaxPooling1D
|
||||
from keras.datasets import imdb
|
||||
|
||||
|
||||
|
||||
@@ -19,9 +19,8 @@ np.random.seed(1337) # for reproducibility
|
||||
from keras.preprocessing import sequence
|
||||
from keras.utils import np_utils
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.recurrent import LSTM, SimpleRNN, GRU
|
||||
from keras.layers import Dense, Dropout, Activation, Embedding
|
||||
from keras.layers import LSTM, SimpleRNN, GRU
|
||||
from keras.datasets import imdb
|
||||
|
||||
max_features = 20000
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
'''Compare LSTM implementations on the IMDB sentiment classification task.
|
||||
|
||||
consume_less='cpu' preprocesses input to the LSTM which typically results in
|
||||
faster computations at the expense of increased peak memory usage as the
|
||||
preprocessed input must be kept in memory.
|
||||
|
||||
consume_less='mem' does away with the preprocessing, meaning that it might take
|
||||
a little longer, but should require less peak memory.
|
||||
|
||||
consume_less='gpu' concatenates the input, output and forget gate's weights
|
||||
into one, large matrix, resulting in faster computation time as the GPU can
|
||||
utilize more cores, at the expense of reduced regularization because the same
|
||||
dropout is shared across the gates.
|
||||
|
||||
Note that the relative performance of the different `consume_less` modes
|
||||
can vary depending on your device, your model and the size of your data.
|
||||
'''
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Embedding, Dense, LSTM
|
||||
from keras.datasets import imdb
|
||||
|
||||
max_features = 20000
|
||||
max_length = 80
|
||||
embedding_dim = 256
|
||||
batch_size = 128
|
||||
epochs = 10
|
||||
modes = ['cpu', 'mem', 'gpu']
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
X_train = sequence.pad_sequences(X_train, max_length)
|
||||
X_test = sequence.pad_sequences(X_test, max_length)
|
||||
|
||||
# Compile and train different models while meauring performance.
|
||||
results = []
|
||||
for mode in modes:
|
||||
print('Testing mode: consume_less="{}"'.format(mode))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Embedding(max_features, embedding_dim, input_length=max_length, dropout=0.2))
|
||||
model.add(LSTM(embedding_dim, dropout_W=0.2, dropout_U=0.2, consume_less=mode))
|
||||
model.add(Dense(1, activation='sigmoid'))
|
||||
model.compile(loss='binary_crossentropy',
|
||||
optimizer='adam',
|
||||
metrics=['accuracy'])
|
||||
|
||||
start_time = time.time()
|
||||
history = model.fit(X_train, y_train,
|
||||
batch_size=batch_size,
|
||||
nb_epoch=epochs,
|
||||
validation_data=(X_test, y_test))
|
||||
average_time_per_epoch = (time.time() - start_time) / epochs
|
||||
|
||||
results.append((history, average_time_per_epoch))
|
||||
|
||||
# Compare models' accuracy, loss and elapsed time per epoch.
|
||||
plt.style.use('ggplot')
|
||||
ax1 = plt.subplot2grid((2, 2), (0, 0))
|
||||
ax1.set_title('Accuracy')
|
||||
ax1.set_ylabel('Validation Accuracy')
|
||||
ax1.set_xlabel('Epochs')
|
||||
ax2 = plt.subplot2grid((2, 2), (1, 0))
|
||||
ax2.set_title('Loss')
|
||||
ax2.set_ylabel('Validation Loss')
|
||||
ax2.set_xlabel('Epochs')
|
||||
ax3 = plt.subplot2grid((2, 2), (0, 1), rowspan=2)
|
||||
ax3.set_title('Time')
|
||||
ax3.set_ylabel('Seconds')
|
||||
for mode, result in zip(modes, results):
|
||||
ax1.plot(result[0].epoch, result[0].history['val_acc'], label=mode)
|
||||
ax2.plot(result[0].epoch, result[0].history['val_loss'], label=mode)
|
||||
ax1.legend()
|
||||
ax2.legend()
|
||||
ax3.bar(np.arange(len(results)), [x[1] for x in results],
|
||||
tick_label=modes, align='center')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
@@ -12,8 +12,8 @@ has at least ~100k characters. ~1M is better.
|
||||
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Activation, Dropout
|
||||
from keras.layers.recurrent import LSTM
|
||||
from keras.layers import Dense, Activation, Dropout
|
||||
from keras.layers import LSTM
|
||||
from keras.utils.data_utils import get_file
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
@@ -11,8 +11,8 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.utils import np_utils
|
||||
|
||||
batch_size = 128
|
||||
|
||||
@@ -17,9 +17,9 @@ from __future__ import print_function
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Activation
|
||||
from keras.layers import Dense, Activation
|
||||
from keras.layers import SimpleRNN
|
||||
from keras.initializations import normal, identity
|
||||
from keras.layers.recurrent import SimpleRNN
|
||||
from keras.optimizers import RMSprop
|
||||
from keras.utils import np_utils
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ def euclidean_distance(vects):
|
||||
|
||||
def eucl_dist_output_shape(shapes):
|
||||
shape1, shape2 = shapes
|
||||
return shape1
|
||||
return (shape1[0], 1)
|
||||
|
||||
|
||||
def contrastive_loss(y_true, y_pred):
|
||||
|
||||
@@ -9,8 +9,8 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.utils import np_utils
|
||||
from keras.wrappers.scikit_learn import KerasClassifier
|
||||
from sklearn.grid_search import GridSearchCV
|
||||
|
||||
@@ -19,8 +19,8 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.utils import np_utils
|
||||
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ import argparse
|
||||
import h5py
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
|
||||
from keras.layers import Convolution2D, ZeroPadding2D, MaxPooling2D
|
||||
from keras import backend as K
|
||||
|
||||
parser = argparse.ArgumentParser(description='Neural style transfer with Keras.')
|
||||
|
||||
@@ -8,8 +8,7 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.datasets import reuters
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.layers.normalization import BatchNormalization
|
||||
from keras.layers import Dense, Dropout, Activation
|
||||
from keras.utils import np_utils
|
||||
from keras.preprocessing.text import Tokenizer
|
||||
|
||||
|
||||
@@ -5,8 +5,7 @@ from __future__ import print_function
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense
|
||||
from keras.layers.recurrent import LSTM
|
||||
from keras.layers import Dense, LSTM
|
||||
|
||||
|
||||
# since we are using stateful rnn tsteps can be set to 1
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
'''This script demonstrates how to build a variational autoencoder with Keras.
|
||||
|
||||
Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
|
||||
'''
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from keras.layers import Input, Dense, Lambda
|
||||
from keras.models import Model
|
||||
from keras import backend as K
|
||||
from keras import objectives
|
||||
from keras.datasets import mnist
|
||||
|
||||
batch_size = 16
|
||||
original_dim = 784
|
||||
latent_dim = 2
|
||||
intermediate_dim = 128
|
||||
epsilon_std = 0.01
|
||||
nb_epoch = 40
|
||||
|
||||
x = Input(batch_shape=(batch_size, original_dim))
|
||||
h = Dense(intermediate_dim, activation='relu')(x)
|
||||
z_mean = Dense(latent_dim)(h)
|
||||
z_log_sigma = Dense(latent_dim)(h)
|
||||
|
||||
def sampling(args):
|
||||
z_mean, z_log_sigma = args
|
||||
epsilon = K.random_normal(shape=(batch_size, latent_dim),
|
||||
mean=0., std=epsilon_std)
|
||||
return z_mean + K.exp(z_log_sigma) * epsilon
|
||||
|
||||
# note that "output_shape" isn't necessary with the TensorFlow backend
|
||||
# so you could write `Lambda(sampling)([z_mean, z_log_sigma])`
|
||||
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])
|
||||
|
||||
# we instantiate these layers separately so as to reuse them later
|
||||
decoder_h = Dense(intermediate_dim, activation='relu')
|
||||
decoder_mean = Dense(original_dim, activation='sigmoid')
|
||||
h_decoded = decoder_h(z)
|
||||
x_decoded_mean = decoder_mean(h_decoded)
|
||||
|
||||
def vae_loss(x, x_decoded_mean):
|
||||
xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)
|
||||
kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
|
||||
return xent_loss + kl_loss
|
||||
|
||||
vae = Model(x, x_decoded_mean)
|
||||
vae.compile(optimizer='rmsprop', loss=vae_loss)
|
||||
|
||||
# train the VAE on MNIST digits
|
||||
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
||||
|
||||
x_train = x_train.astype('float32') / 255.
|
||||
x_test = x_test.astype('float32') / 255.
|
||||
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
|
||||
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
|
||||
|
||||
vae.fit(x_train, x_train,
|
||||
shuffle=True,
|
||||
nb_epoch=nb_epoch,
|
||||
batch_size=batch_size,
|
||||
validation_data=(x_test, x_test))
|
||||
|
||||
# build a model to project inputs on the latent space
|
||||
encoder = Model(x, z_mean)
|
||||
|
||||
# display a 2D plot of the digit classes in the latent space
|
||||
x_test_encoded = encoder.predict(x_test, batch_size=batch_size)
|
||||
plt.figure(figsize=(6, 6))
|
||||
plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)
|
||||
plt.colorbar()
|
||||
plt.show()
|
||||
|
||||
# build a digit generator that can sample from the learned distribution
|
||||
decoder_input = Input(shape=(latent_dim,))
|
||||
_h_decoded = decoder_h(decoder_input)
|
||||
_x_decoded_mean = decoder_mean(_h_decoded)
|
||||
generator = Model(decoder_input, _x_decoded_mean)
|
||||
|
||||
# display a 2D manifold of the digits
|
||||
n = 15 # figure with 15x15 digits
|
||||
digit_size = 28
|
||||
figure = np.zeros((digit_size * n, digit_size * n))
|
||||
# we will sample n points within [-15, 15] standard deviations
|
||||
grid_x = np.linspace(-15, 15, n)
|
||||
grid_y = np.linspace(-15, 15, n)
|
||||
|
||||
for i, yi in enumerate(grid_x):
|
||||
for j, xi in enumerate(grid_y):
|
||||
z_sample = np.array([[xi, yi]]) * epsilon_std
|
||||
x_decoded = generator.predict(z_sample)
|
||||
digit = x_decoded[0].reshape(digit_size, digit_size)
|
||||
figure[i * digit_size: (i + 1) * digit_size,
|
||||
j * digit_size: (j + 1) * digit_size] = digit
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
plt.imshow(figure)
|
||||
plt.show()
|
||||
+2
-1
@@ -1,5 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
__version__ = '1.0.2'
|
||||
from . import backend
|
||||
from . import datasets
|
||||
from . import engine
|
||||
@@ -15,3 +14,5 @@ from . import models
|
||||
from . import objectives
|
||||
from . import optimizers
|
||||
from . import regularizers
|
||||
|
||||
__version__ = '1.0.3'
|
||||
|
||||
@@ -19,6 +19,10 @@ def softplus(x):
|
||||
return K.softplus(x)
|
||||
|
||||
|
||||
def softsign(x):
|
||||
return K.softsign(x)
|
||||
|
||||
|
||||
def relu(x, alpha=0., max_value=None):
|
||||
return K.relu(x, alpha=alpha, max_value=max_value)
|
||||
|
||||
|
||||
@@ -499,15 +499,21 @@ def resize_images(X, height_factor, width_factor, dim_ordering):
|
||||
positive integers.
|
||||
'''
|
||||
if dim_ordering == 'th':
|
||||
original_shape = int_shape(X)
|
||||
new_shape = tf.shape(X)[2:]
|
||||
new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32'))
|
||||
X = permute_dimensions(X, [0, 2, 3, 1])
|
||||
X = tf.image.resize_nearest_neighbor(X, new_shape)
|
||||
return permute_dimensions(X, [0, 3, 1, 2])
|
||||
X = permute_dimensions(X, [0, 3, 1, 2])
|
||||
X.set_shape((None, None, original_shape[2] * height_factor, original_shape[3] * width_factor))
|
||||
return X
|
||||
elif dim_ordering == 'tf':
|
||||
original_shape = int_shape(X)
|
||||
new_shape = tf.shape(X)[1:3]
|
||||
new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32'))
|
||||
return tf.image.resize_nearest_neighbor(X, new_shape)
|
||||
X = tf.image.resize_nearest_neighbor(X, new_shape)
|
||||
X.set_shape((None, original_shape[1] * height_factor, original_shape[2] * width_factor, None))
|
||||
return X
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
|
||||
@@ -539,6 +545,8 @@ def repeat(x, n):
|
||||
|
||||
|
||||
def tile(x, n):
|
||||
if not hasattr(n, 'shape') and not hasattr(n, '__len__'):
|
||||
n = [n]
|
||||
return tf.tile(x, n)
|
||||
|
||||
|
||||
@@ -602,6 +610,16 @@ def get_value(x):
|
||||
return x.eval(session=get_session())
|
||||
|
||||
|
||||
def batch_get_value(xs):
|
||||
'''Returns the value of more than one tensor variable,
|
||||
as a list of Numpy arrays.
|
||||
'''
|
||||
if xs:
|
||||
return get_session().run(xs)
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
def set_value(x, value):
|
||||
'''Sets the value of a tensor variable,
|
||||
from a Numpy array.
|
||||
@@ -852,6 +870,10 @@ def softplus(x):
|
||||
return tf.nn.softplus(x)
|
||||
|
||||
|
||||
def softsign(x):
|
||||
return tf.nn.softsign(x)
|
||||
|
||||
|
||||
def categorical_crossentropy(output, target, from_logits=False):
|
||||
'''Categorical crossentropy between an output tensor
|
||||
and a target tensor, where the target is a tensor of the same
|
||||
|
||||
@@ -3,6 +3,10 @@ from theano import tensor as T
|
||||
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
|
||||
from theano.tensor.signal import pool
|
||||
from theano.tensor.nnet import conv3d2d
|
||||
try:
|
||||
from theano.tensor.nnet.nnet import softsign as T_softsign
|
||||
except ImportError:
|
||||
from theano.sandbox.softsign import softsign as T_softsign
|
||||
import inspect
|
||||
import numpy as np
|
||||
from .common import _FLOATX, _EPSILON
|
||||
@@ -483,6 +487,13 @@ def get_value(x):
|
||||
return x.get_value()
|
||||
|
||||
|
||||
def batch_get_value(xs):
|
||||
'''Returns the value of more than one tensor variable,
|
||||
as a list of Numpy arrays.
|
||||
'''
|
||||
return [get_value(x) for x in xs]
|
||||
|
||||
|
||||
def set_value(x, value):
|
||||
x.set_value(np.asarray(value, dtype=x.dtype))
|
||||
|
||||
@@ -725,6 +736,10 @@ def softplus(x):
|
||||
return T.nnet.softplus(x)
|
||||
|
||||
|
||||
def softsign(x):
|
||||
return T_softsign(x)
|
||||
|
||||
|
||||
def categorical_crossentropy(output, target, from_logits=False):
|
||||
if from_logits:
|
||||
output = T.nnet.softmax(output)
|
||||
|
||||
+15
-3
@@ -430,8 +430,11 @@ class TensorBoard(Callback):
|
||||
histogram_freq: frequency (in epochs) at which to compute activation
|
||||
histograms for the layers of the model. If set to 0,
|
||||
histograms won't be computed.
|
||||
write_graph: whether to visualize the graph in tensorboard. The log file can
|
||||
become quite large when write_graph is set to True.
|
||||
'''
|
||||
def __init__(self, log_dir='./logs', histogram_freq=0):
|
||||
|
||||
def __init__(self, log_dir='./logs', histogram_freq=0, write_graph=True):
|
||||
super(Callback, self).__init__()
|
||||
if K._BACKEND != 'tensorflow':
|
||||
raise Exception('TensorBoard callback only works '
|
||||
@@ -439,6 +442,7 @@ class TensorBoard(Callback):
|
||||
self.log_dir = log_dir
|
||||
self.histogram_freq = histogram_freq
|
||||
self.merged = None
|
||||
self.write_graph = write_graph
|
||||
|
||||
def _set_model(self, model):
|
||||
import tensorflow as tf
|
||||
@@ -457,8 +461,16 @@ class TensorBoard(Callback):
|
||||
tf.histogram_summary('{}_out'.format(layer),
|
||||
layer.output)
|
||||
self.merged = tf.merge_all_summaries()
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph_def)
|
||||
if self.write_graph:
|
||||
tf_version = tuple(int(i) for i in tf.__version__.split('.'))
|
||||
if tf_version >= (0, 8, 0):
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph_def)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir)
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
import tensorflow as tf
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
from __future__ import absolute_import
|
||||
import sys
|
||||
from six.moves import cPickle
|
||||
from six.moves import range
|
||||
|
||||
|
||||
def load_batch(fpath, label_key='labels'):
|
||||
|
||||
@@ -847,10 +847,11 @@ class Layer(object):
|
||||
if not params:
|
||||
return
|
||||
weight_value_tuples = []
|
||||
for p, w in zip(params, weights):
|
||||
if K.get_value(p).shape != w.shape:
|
||||
param_values = K.batch_get_value(params)
|
||||
for pv, p, w in zip(param_values, params, weights):
|
||||
if pv.shape != w.shape:
|
||||
raise Exception('Layer weight shape ' +
|
||||
str(K.get_value(p).shape) +
|
||||
str(pv.shape) +
|
||||
' not compatible with '
|
||||
'provided weight shape ' + str(w.shape))
|
||||
weight_value_tuples.append((p, w))
|
||||
@@ -861,10 +862,7 @@ class Layer(object):
|
||||
as a list of numpy arrays.
|
||||
'''
|
||||
params = self.trainable_weights + self.non_trainable_weights
|
||||
weights = []
|
||||
for p in params:
|
||||
weights.append(K.get_value(p))
|
||||
return weights
|
||||
return K.batch_get_value(params)
|
||||
|
||||
def get_config(self):
|
||||
'''Returns a Python dictionary (serializable)
|
||||
@@ -1266,7 +1264,7 @@ class Merge(Layer):
|
||||
self.add_inbound_node(layers, node_indices, tensor_indices)
|
||||
|
||||
outputs = self.inbound_nodes[-1].output_tensors
|
||||
return outputs[0] # merge only returns a single tensor
|
||||
return outputs[0] # merge only returns a single tensor
|
||||
else:
|
||||
return self.call(inputs, mask)
|
||||
|
||||
@@ -1300,8 +1298,6 @@ class Merge(Layer):
|
||||
break
|
||||
output_shape[self.concat_axis] += shape[self.concat_axis]
|
||||
return tuple(output_shape)
|
||||
elif self.mode == 'join':
|
||||
return None
|
||||
elif self.mode == 'dot':
|
||||
shape1 = list(input_shapes[0])
|
||||
shape2 = list(input_shapes[1])
|
||||
@@ -1402,7 +1398,7 @@ def merge(inputs, mode='sum', concat_axis=-1,
|
||||
|
||||
# Arguments
|
||||
mode: string or lambda/function. If string, must be one
|
||||
of: 'sum', 'mul', 'concat', 'ave', 'join', 'cos', 'dot'.
|
||||
of: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot'.
|
||||
If lambda/function, it should take as input a list of tensors
|
||||
and return a single tensor.
|
||||
concat_axis: integer, axis to use in mode `concat`.
|
||||
@@ -2275,7 +2271,7 @@ class Container(Layer):
|
||||
for layer in flattened_layers:
|
||||
g = f.create_group(layer.name)
|
||||
symbolic_weights = layer.trainable_weights + layer.non_trainable_weights
|
||||
weight_values = layer.get_weights()
|
||||
weight_values = K.batch_get_value(symbolic_weights)
|
||||
weight_names = []
|
||||
for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
|
||||
if hasattr(w, 'name') and w.name:
|
||||
|
||||
@@ -571,6 +571,10 @@ class Model(Container):
|
||||
name = self.output_names[i]
|
||||
self.targets.append(K.placeholder(ndim=len(shape), name=name + '_target'))
|
||||
|
||||
# prepare metrics
|
||||
self.metrics_names = ['loss']
|
||||
self.metrics = []
|
||||
|
||||
# compute total loss
|
||||
total_loss = None
|
||||
for i in range(len(self.outputs)):
|
||||
@@ -580,19 +584,20 @@ class Model(Container):
|
||||
sample_weight = sample_weights[i]
|
||||
mask = masks[i]
|
||||
loss_weight = loss_weights_list[i]
|
||||
output_loss = loss_weight * weighted_loss(y_true, y_pred,
|
||||
sample_weight, mask)
|
||||
output_loss = weighted_loss(y_true, y_pred,
|
||||
sample_weight, mask)
|
||||
if len(self.outputs) > 1:
|
||||
self.metrics.append(output_loss)
|
||||
self.metrics_names.append(self.output_names[i] + '_loss')
|
||||
if total_loss is None:
|
||||
total_loss = output_loss
|
||||
total_loss = loss_weight * output_loss
|
||||
else:
|
||||
total_loss += output_loss
|
||||
total_loss += loss_weight * output_loss
|
||||
|
||||
# add regularization penalties to the loss
|
||||
for r in self.regularizers:
|
||||
total_loss = r(total_loss)
|
||||
|
||||
# prepare metrics
|
||||
self.metrics_names = ['loss']
|
||||
self.metrics = []
|
||||
# list of same size as output_names.
|
||||
# contains tuples (metrics for output, names of metrics)
|
||||
nested_metrics = collect_metrics(metrics, self.output_names)
|
||||
@@ -681,7 +686,7 @@ class Model(Container):
|
||||
|
||||
def _make_predict_function(self):
|
||||
if not hasattr(self, 'predict_function'):
|
||||
raise Exception('You must compile your model before using it.')
|
||||
self.predict_function = None
|
||||
if self.predict_function is None:
|
||||
if self.uses_learning_phase:
|
||||
inputs = self.inputs + [K.learning_phase()]
|
||||
@@ -689,10 +694,11 @@ class Model(Container):
|
||||
inputs = self.inputs
|
||||
# returns network outputs. Does not update weights.
|
||||
# Does update the network states.
|
||||
kwargs = getattr(self, '_function_kwargs', {})
|
||||
self.predict_function = K.function(inputs,
|
||||
self.outputs,
|
||||
updates=self.state_updates,
|
||||
**self._function_kwargs)
|
||||
**kwargs)
|
||||
|
||||
def _fit_loop(self, f, ins, out_labels=[], batch_size=32,
|
||||
nb_epoch=100, verbose=1, callbacks=[],
|
||||
|
||||
@@ -65,6 +65,7 @@ class Convolution1D(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: Number of channels/dimensions in the input.
|
||||
Either this argument or the keyword argument `input_shape`must be
|
||||
provided when using this layer as the first layer in a model.
|
||||
@@ -85,7 +86,7 @@ class Convolution1D(Layer):
|
||||
border_mode='valid', subsample_length=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
input_dim=None, input_length=None, **kwargs):
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution1D:', border_mode)
|
||||
@@ -106,6 +107,7 @@ class Convolution1D(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
self.initial_weights = weights
|
||||
self.input_dim = input_dim
|
||||
@@ -118,15 +120,18 @@ class Convolution1D(Layer):
|
||||
input_dim = input_shape[2]
|
||||
self.W_shape = (self.nb_filter, input_dim, self.filter_length, 1)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -137,7 +142,7 @@ class Convolution1D(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -154,11 +159,11 @@ class Convolution1D(Layer):
|
||||
def call(self, x, mask=None):
|
||||
x = K.expand_dims(x, -1) # add a dimension of the right
|
||||
x = K.permute_dimensions(x, (0, 2, 1, 3))
|
||||
conv_out = K.conv2d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering='th')
|
||||
|
||||
output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
output = K.conv2d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering='th')
|
||||
if self.bias:
|
||||
output += K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
output = self.activation(output)
|
||||
output = K.squeeze(output, 3) # remove the dummy 3rd dimension
|
||||
output = K.permute_dimensions(output, (0, 2, 1))
|
||||
@@ -176,6 +181,7 @@ class Convolution1D(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim,
|
||||
'input_length': self.input_length}
|
||||
base_config = super(Convolution1D, self).get_config()
|
||||
@@ -232,6 +238,7 @@ class Convolution2D(Layer):
|
||||
applied to the bias.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 3.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
|
||||
# Input shape
|
||||
4D tensor with shape:
|
||||
@@ -250,7 +257,8 @@ class Convolution2D(Layer):
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1), dim_ordering='th',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution2D:', border_mode)
|
||||
@@ -272,6 +280,7 @@ class Convolution2D(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.input_spec = [InputSpec(ndim=4)]
|
||||
self.initial_weights = weights
|
||||
super(Convolution2D, self).__init__(**kwargs)
|
||||
@@ -286,15 +295,18 @@ class Convolution2D(Layer):
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -305,7 +317,7 @@ class Convolution2D(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -335,16 +347,17 @@ class Convolution2D(Layer):
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
conv_out = K.conv2d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
filter_shape=self.W_shape)
|
||||
if self.dim_ordering == 'th':
|
||||
output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output = conv_out + K.reshape(self.b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = K.conv2d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
filter_shape=self.W_shape)
|
||||
if self.bias:
|
||||
if self.dim_ordering == 'th':
|
||||
output += K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
@@ -361,7 +374,8 @@ class Convolution2D(Layer):
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None}
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias}
|
||||
base_config = super(Convolution2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -407,6 +421,7 @@ class Convolution3D(Layer):
|
||||
applied to the bias.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 4.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape:
|
||||
@@ -426,7 +441,8 @@ class Convolution3D(Layer):
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1, 1), dim_ordering='th',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
if K._BACKEND != 'theano':
|
||||
raise Exception(self.__class__.__name__ +
|
||||
' is currently only working with Theano backend.')
|
||||
@@ -451,6 +467,7 @@ class Convolution3D(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.input_spec = [InputSpec(ndim=5)]
|
||||
self.initial_weights = weights
|
||||
super(Convolution3D, self).__init__(**kwargs)
|
||||
@@ -471,15 +488,18 @@ class Convolution3D(Layer):
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -490,7 +510,7 @@ class Convolution3D(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -525,36 +545,37 @@ class Convolution3D(Layer):
|
||||
|
||||
def call(self, x, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
conv_out = K.conv3d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
volume_shape=input_shape,
|
||||
filter_shape=self.W_shape)
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output = conv_out + K.reshape(self.b, (1, 1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = K.conv3d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
volume_shape=input_shape,
|
||||
filter_shape=self.W_shape)
|
||||
if self.bias:
|
||||
if self.dim_ordering == 'th':
|
||||
output += K.reshape(self.b, (1, self.nb_filter, 1, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, 1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
config = {"nb_filter": self.nb_filter,
|
||||
"kernel_dim1": self.kernel_dim1,
|
||||
"kernel_dim2": self.kernel_dim2,
|
||||
"kernel_dim3": self.kernel_dim3,
|
||||
"dim_ordering": self.dim_ordering,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"border_mode": self.border_mode,
|
||||
"subsample": self.subsample,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None}
|
||||
config = {'nb_filter': self.nb_filter,
|
||||
'kernel_dim1': self.kernel_dim1,
|
||||
'kernel_dim2': self.kernel_dim2,
|
||||
'kernel_dim3': self.kernel_dim3,
|
||||
'dim_ordering': self.dim_ordering,
|
||||
'init': self.init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'border_mode': self.border_mode,
|
||||
'subsample': self.subsample,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias}
|
||||
base_config = super(Convolution3D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
+62
-35
@@ -550,12 +550,10 @@ class Dense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
bias: boolean
|
||||
Default True;
|
||||
Setting it to False will remove the bias (b) from all calculations.
|
||||
|
||||
# Input shape
|
||||
2D tensor with shape: `(nb_samples, input_dim)`.
|
||||
@@ -565,7 +563,8 @@ class Dense(Layer):
|
||||
'''
|
||||
def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None, bias=True, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
self.output_dim = output_dim
|
||||
@@ -606,7 +605,7 @@ class Dense(Layer):
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer and self.bias:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -617,7 +616,7 @@ class Dense(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint and self.bias:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -643,8 +642,8 @@ class Dense(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'input_dim': self.input_dim,
|
||||
'bias': self.bias}
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim}
|
||||
base_config = super(Dense, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -721,6 +720,7 @@ class MaxoutDense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
@@ -737,7 +737,8 @@ class MaxoutDense(Layer):
|
||||
def __init__(self, output_dim, nb_feature=4,
|
||||
init='glorot_uniform', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.nb_feature = nb_feature
|
||||
self.init = initializations.get(init)
|
||||
@@ -749,6 +750,7 @@ class MaxoutDense(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=2)]
|
||||
|
||||
@@ -764,17 +766,19 @@ class MaxoutDense(Layer):
|
||||
|
||||
self.W = self.init((self.nb_feature, input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.nb_feature, self.output_dim),
|
||||
name='{}_b'.format(self.name))
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_feature, self.output_dim),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -785,7 +789,7 @@ class MaxoutDense(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -798,7 +802,10 @@ class MaxoutDense(Layer):
|
||||
|
||||
def call(self, x, mask=None):
|
||||
# no activation, this layer is only linear.
|
||||
output = K.max(K.dot(x, self.W) + self.b, axis=1)
|
||||
output = K.dot(x, self.W)
|
||||
if self.bias:
|
||||
output += self.b
|
||||
output = K.max(output, axis=1)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
@@ -810,6 +817,7 @@ class MaxoutDense(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim}
|
||||
base_config = super(MaxoutDense, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
@@ -844,6 +852,7 @@ class Highway(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
@@ -860,7 +869,8 @@ class Highway(Layer):
|
||||
def __init__(self, init='glorot_uniform', transform_bias=-2,
|
||||
activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
self.init = initializations.get(init)
|
||||
self.transform_bias = transform_bias
|
||||
self.activation = activations.get(activation)
|
||||
@@ -872,6 +882,7 @@ class Highway(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=2)]
|
||||
|
||||
@@ -890,19 +901,21 @@ class Highway(Layer):
|
||||
self.W_carry = self.init((input_dim, input_dim),
|
||||
name='{}_W_carry'.format(self.name))
|
||||
|
||||
self.b = K.zeros((input_dim,), name='{}_b'.format(self.name))
|
||||
# initialize with a vector of values `transform_bias`
|
||||
self.b_carry = K.variable(np.ones((input_dim,)) * self.transform_bias,
|
||||
name='{}_b_carry'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W, self.b, self.W_carry, self.b_carry]
|
||||
if self.bias:
|
||||
self.b = K.zeros((input_dim,), name='{}_b'.format(self.name))
|
||||
# initialize with a vector of values `transform_bias`
|
||||
self.b_carry = K.variable(np.ones((input_dim,)) * self.transform_bias,
|
||||
name='{}_b_carry'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b, self.W_carry, self.b_carry]
|
||||
else:
|
||||
self.trainable_weights = [self.W, self.W_carry]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -913,7 +926,7 @@ class Highway(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -921,8 +934,14 @@ class Highway(Layer):
|
||||
del self.initial_weights
|
||||
|
||||
def call(self, x, mask=None):
|
||||
transform_weight = activations.sigmoid(K.dot(x, self.W_carry) + self.b_carry)
|
||||
act = self.activation(K.dot(x, self.W) + self.b)
|
||||
y = K.dot(x, self.W_carry)
|
||||
if self.bias:
|
||||
y += self.b_carry
|
||||
transform_weight = activations.sigmoid(y)
|
||||
y = K.dot(x, self.W)
|
||||
if self.bias:
|
||||
y += self.b
|
||||
act = self.activation(y)
|
||||
act *= transform_weight
|
||||
output = act + (1 - transform_weight) * x
|
||||
return output
|
||||
@@ -936,6 +955,7 @@ class Highway(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim}
|
||||
base_config = super(Highway, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
@@ -979,16 +999,19 @@ class TimeDistributedDense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
input_length: length of inputs sequences
|
||||
(integer, or None for variable-length sequences).
|
||||
'''
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
input_dim=None, input_length=None, **kwargs):
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
warnings.warn('TimeDistributedDense is deprecated, '
|
||||
'please use TimeDistributed(Dense(...)) instead.')
|
||||
self.output_dim = output_dim
|
||||
@@ -1002,6 +1025,7 @@ class TimeDistributedDense(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
self.supports_masking = True
|
||||
@@ -1019,17 +1043,17 @@ class TimeDistributedDense(Layer):
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -1040,7 +1064,7 @@ class TimeDistributedDense(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -1070,7 +1094,9 @@ class TimeDistributedDense(Layer):
|
||||
|
||||
# Squash samples and timesteps into a single axis
|
||||
x = K.reshape(x, (-1, input_shape[-1])) # (samples * timesteps, input_dim)
|
||||
y = K.dot(x, self.W) + self.b # (samples * timesteps, output_dim)
|
||||
y = K.dot(x, self.W) # (samples * timesteps, output_dim)
|
||||
if self.bias:
|
||||
y += self.b
|
||||
# We have to reshape Y to (samples, timesteps, output_dim)
|
||||
y = K.reshape(y, (-1, input_length, self.output_dim)) # (samples, timesteps, output_dim)
|
||||
y = self.activation(y)
|
||||
@@ -1085,6 +1111,7 @@ class TimeDistributedDense(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim,
|
||||
'input_length': self.input_length}
|
||||
base_config = super(TimeDistributedDense, self).get_config()
|
||||
|
||||
@@ -17,7 +17,7 @@ class Embedding(Layer):
|
||||
model = Sequential()
|
||||
model.add(Embedding(1000, 64, input_length=10))
|
||||
# the model will take as input an integer matrix of size (batch, input_length).
|
||||
# the largest integer (i.e. word index) in the input should be no larger than 1000 (vocabulary size).
|
||||
# the largest integer (i.e. word index) in the input should be no larger than 999 (vocabulary size).
|
||||
# now model.output_shape == (None, 10, 64), where None is the batch dimension.
|
||||
|
||||
input_array = np.random.randint(1000, size=(32, 10))
|
||||
@@ -28,7 +28,7 @@ class Embedding(Layer):
|
||||
```
|
||||
|
||||
# Arguments
|
||||
input_dim: int >= 0. Size of the vocabulary, ie.
|
||||
input_dim: int > 0. Size of the vocabulary, ie.
|
||||
1 + maximum integer index occurring in the input data.
|
||||
output_dim: int >= 0. Dimension of the dense embedding.
|
||||
init: name of initialization function for the weights
|
||||
@@ -46,6 +46,8 @@ class Embedding(Layer):
|
||||
This is useful for [recurrent layers](recurrent.md) which may take
|
||||
variable length input. If this is `True` then all subsequent layers
|
||||
in the model need to support masking or an exception will be raised.
|
||||
If mask_zero is set to True, as a consequence, index 0 cannot be
|
||||
used in the vocabulary (input_dim should equal |vocabulary| + 2).
|
||||
input_length: Length of input sequences, when it is constant.
|
||||
This argument is required if you are going to connect
|
||||
`Flatten` then `Dense` layers upstream
|
||||
|
||||
@@ -47,7 +47,7 @@ class BatchNormalization(Layer):
|
||||
Same shape as input.
|
||||
|
||||
# References
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](http://arxiv.org/pdf/1502.03167v3.pdf)
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](http://jmlr.org/proceedings/papers/v37/ioffe15.html)
|
||||
'''
|
||||
def __init__(self, epsilon=1e-6, mode=0, axis=-1, momentum=0.9,
|
||||
weights=None, beta_init='zero', gamma_init='one', **kwargs):
|
||||
@@ -94,8 +94,8 @@ class BatchNormalization(Layer):
|
||||
std = K.mean(K.square(x - brodcast_mean) + self.epsilon, axis=reduction_axes)
|
||||
std = K.sqrt(std)
|
||||
brodcast_std = K.reshape(std, broadcast_shape)
|
||||
mean_update = self.momentum * self.running_mean + (1-self.momentum) * mean
|
||||
std_update = self.momentum * self.running_std + (1-self.momentum) * std
|
||||
mean_update = self.momentum * self.running_mean + (1 - self.momentum) * mean
|
||||
std_update = self.momentum * self.running_std + (1 - self.momentum) * std
|
||||
self.updates = [(self.running_mean, mean_update),
|
||||
(self.running_std, std_update)]
|
||||
x_normed = (x - brodcast_mean) / (brodcast_std + self.epsilon)
|
||||
|
||||
+198
-137
@@ -81,12 +81,20 @@ class Recurrent(Layer):
|
||||
is always unrolled, so this argument does not do anything.
|
||||
Unrolling can speed-up a RNN, although it tends to be more memory-intensive.
|
||||
Unrolling is only suitable for short sequences.
|
||||
consume_less: one of "cpu", "mem". If set to "cpu", the RNN will use
|
||||
consume_less: one of "cpu", "mem", or "gpu" (LSTM/GRU only).
|
||||
If set to "cpu", the RNN will use
|
||||
an implementation that uses fewer, larger matrix products,
|
||||
thus running faster (at least on CPU) but consuming more memory.
|
||||
thus running faster on CPU but consuming more memory.
|
||||
|
||||
If set to "mem", the RNN will use more matrix products,
|
||||
but smaller ones, thus running slower (may actually be faster on GPU)
|
||||
while consuming less memory.
|
||||
|
||||
If set to "gpu" (LSTM/GRU only), the RNN will combine the input gate,
|
||||
the forget gate and the output gate into a single matrix,
|
||||
enabling more time-efficient parallelization on the GPU. Note: RNN
|
||||
dropout must be shared for all gates, resulting in a slightly
|
||||
reduced regularization.
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
@@ -383,15 +391,15 @@ class SimpleRNN(Recurrent):
|
||||
return constants
|
||||
|
||||
def get_config(self):
|
||||
config = {"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"dropout_W": self.dropout_W,
|
||||
"dropout_U": self.dropout_U}
|
||||
config = {'output_dim': self.output_dim,
|
||||
'init': self.init.__name__,
|
||||
'inner_init': self.inner_init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'dropout_W': self.dropout_W,
|
||||
'dropout_U': self.dropout_U}
|
||||
base_config = super(SimpleRNN, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -444,53 +452,66 @@ class GRU(Recurrent):
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
input_dim = input_shape[2]
|
||||
self.input_dim = input_dim
|
||||
self.input_dim = input_shape[2]
|
||||
|
||||
self.W_z = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_z'.format(self.name))
|
||||
self.U_z = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_z'.format(self.name))
|
||||
self.b_z = K.zeros((self.output_dim,), name='{}_b_z'.format(self.name))
|
||||
|
||||
self.W_r = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_r'.format(self.name))
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_r'.format(self.name))
|
||||
self.b_r = K.zeros((self.output_dim,), name='{}_b_r'.format(self.name))
|
||||
|
||||
self.W_h = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_h'.format(self.name))
|
||||
self.U_h = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_h'.format(self.name))
|
||||
self.b_h = K.zeros((self.output_dim,), name='{}_b_h'.format(self.name))
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(K.concatenate([self.W_z,
|
||||
self.W_r,
|
||||
self.W_h]))
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(K.concatenate([self.U_z,
|
||||
self.U_r,
|
||||
self.U_h]))
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(K.concatenate([self.b_z,
|
||||
self.b_r,
|
||||
self.b_h]))
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.trainable_weights = [self.W_z, self.U_z, self.b_z,
|
||||
self.W_r, self.U_r, self.b_r,
|
||||
self.W_h, self.U_h, self.b_h]
|
||||
if self.stateful:
|
||||
self.reset_states()
|
||||
else:
|
||||
# initial states: all-zero tensor of shape (output_dim)
|
||||
self.states = [None]
|
||||
|
||||
if self.consume_less == 'gpu':
|
||||
|
||||
self.W = self.init((self.input_dim, 3 * self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, 3 * self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
else:
|
||||
|
||||
self.W_z = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_z'.format(self.name))
|
||||
self.U_z = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_z'.format(self.name))
|
||||
self.b_z = K.zeros((self.output_dim,), name='{}_b_z'.format(self.name))
|
||||
|
||||
self.W_r = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_r'.format(self.name))
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_r'.format(self.name))
|
||||
self.b_r = K.zeros((self.output_dim,), name='{}_b_r'.format(self.name))
|
||||
|
||||
self.W_h = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_h'.format(self.name))
|
||||
self.U_h = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_h'.format(self.name))
|
||||
self.b_h = K.zeros((self.output_dim,), name='{}_b_h'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W_z, self.U_z, self.b_z,
|
||||
self.W_r, self.U_r, self.b_r,
|
||||
self.W_h, self.U_h, self.b_h]
|
||||
|
||||
self.W = K.concatenate([self.W_z, self.W_r, self.W_h])
|
||||
self.U = K.concatenate([self.U_z, self.U_r, self.U_h])
|
||||
self.b = K.concatenate([self.b_z, self.b_r, self.b_h])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
@@ -528,19 +549,37 @@ class GRU(Recurrent):
|
||||
B_U = states[1] # dropout matrices for recurrent units
|
||||
B_W = states[2]
|
||||
|
||||
if self.consume_less == 'cpu':
|
||||
x_z = x[:, :self.output_dim]
|
||||
x_r = x[:, self.output_dim: 2 * self.output_dim]
|
||||
x_h = x[:, 2 * self.output_dim:]
|
||||
if self.consume_less == 'gpu':
|
||||
|
||||
matrix_x = K.dot(x * B_W[0], self.W) + self.b
|
||||
matrix_inner = K.dot(h_tm1 * B_U[0], self.U[:, :2 * self.output_dim])
|
||||
|
||||
x_z = matrix_x[:, :self.output_dim]
|
||||
x_r = matrix_x[:, self.output_dim: 2 * self.output_dim]
|
||||
inner_z = matrix_inner[:, :self.output_dim]
|
||||
inner_r = matrix_inner[:, self.output_dim: 2 * self.output_dim]
|
||||
|
||||
z = self.inner_activation(x_z + inner_z)
|
||||
r = self.inner_activation(x_r + inner_r)
|
||||
|
||||
x_h = matrix_x[:, 2 * self.output_dim:]
|
||||
inner_h = K.dot(r * h_tm1 * B_U[0], self.U[:, 2 * self.output_dim:])
|
||||
hh = self.activation(x_h + inner_h)
|
||||
else:
|
||||
x_z = K.dot(x * B_W[0], self.W_z) + self.b_z
|
||||
x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
|
||||
x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
|
||||
if self.consume_less == 'cpu':
|
||||
x_z = x[:, :self.output_dim]
|
||||
x_r = x[:, self.output_dim: 2 * self.output_dim]
|
||||
x_h = x[:, 2 * self.output_dim:]
|
||||
elif self.consume_less == 'mem':
|
||||
x_z = K.dot(x * B_W[0], self.W_z) + self.b_z
|
||||
x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
|
||||
x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
|
||||
else:
|
||||
raise Exception('Unknown `consume_less` mode.')
|
||||
z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
|
||||
r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))
|
||||
|
||||
z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
|
||||
r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))
|
||||
|
||||
hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], self.U_h))
|
||||
hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], self.U_h))
|
||||
h = z * h_tm1 + (1 - z) * hh
|
||||
return h, [h]
|
||||
|
||||
@@ -566,16 +605,16 @@ class GRU(Recurrent):
|
||||
return constants
|
||||
|
||||
def get_config(self):
|
||||
config = {"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"dropout_W": self.dropout_W,
|
||||
"dropout_U": self.dropout_U}
|
||||
config = {'output_dim': self.output_dim,
|
||||
'init': self.init.__name__,
|
||||
'inner_init': self.inner_init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'inner_activation': self.inner_activation.__name__,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'dropout_W': self.dropout_W,
|
||||
'dropout_U': self.dropout_U}
|
||||
base_config = super(GRU, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -637,8 +676,7 @@ class LSTM(Recurrent):
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
input_dim = input_shape[2]
|
||||
self.input_dim = input_dim
|
||||
self.input_dim = input_shape[2]
|
||||
|
||||
if self.stateful:
|
||||
self.reset_states()
|
||||
@@ -646,56 +684,64 @@ class LSTM(Recurrent):
|
||||
# initial states: 2 all-zero tensors of shape (output_dim)
|
||||
self.states = [None, None]
|
||||
|
||||
self.W_i = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_i'.format(self.name))
|
||||
self.U_i = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_i'.format(self.name))
|
||||
self.b_i = K.zeros((self.output_dim,), name='{}_b_i'.format(self.name))
|
||||
if self.consume_less == 'gpu':
|
||||
self.W = self.init((self.input_dim, 4 * self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, 4 * self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
|
||||
self.W_f = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_f'.format(self.name))
|
||||
self.U_f = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_f'.format(self.name))
|
||||
self.b_f = self.forget_bias_init((self.output_dim,),
|
||||
name='{}_b_f'.format(self.name))
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
K.get_value(self.forget_bias_init(self.output_dim)),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
else:
|
||||
self.W_i = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_i'.format(self.name))
|
||||
self.U_i = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_i'.format(self.name))
|
||||
self.b_i = K.zeros((self.output_dim,), name='{}_b_i'.format(self.name))
|
||||
|
||||
self.W_c = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_c'.format(self.name))
|
||||
self.U_c = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_c'.format(self.name))
|
||||
self.b_c = K.zeros((self.output_dim,), name='{}_b_c'.format(self.name))
|
||||
self.W_f = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_f'.format(self.name))
|
||||
self.U_f = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_f'.format(self.name))
|
||||
self.b_f = self.forget_bias_init((self.output_dim,),
|
||||
name='{}_b_f'.format(self.name))
|
||||
|
||||
self.W_o = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_o'.format(self.name))
|
||||
self.U_o = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_o'.format(self.name))
|
||||
self.b_o = K.zeros((self.output_dim,), name='{}_b_o'.format(self.name))
|
||||
self.W_c = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_c'.format(self.name))
|
||||
self.U_c = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_c'.format(self.name))
|
||||
self.b_c = K.zeros((self.output_dim,), name='{}_b_c'.format(self.name))
|
||||
|
||||
self.W_o = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_o'.format(self.name))
|
||||
self.U_o = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_o'.format(self.name))
|
||||
self.b_o = K.zeros((self.output_dim,), name='{}_b_o'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W_i, self.U_i, self.b_i,
|
||||
self.W_c, self.U_c, self.b_c,
|
||||
self.W_f, self.U_f, self.b_f,
|
||||
self.W_o, self.U_o, self.b_o]
|
||||
|
||||
self.W = K.concatenate([self.W_i, self.W_f, self.W_c, self.W_o])
|
||||
self.U = K.concatenate([self.U_i, self.U_f, self.U_c, self.U_o])
|
||||
self.b = K.concatenate([self.b_i, self.b_f, self.b_c, self.b_o])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(K.concatenate([self.W_i,
|
||||
self.W_f,
|
||||
self.W_c,
|
||||
self.W_o]))
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(K.concatenate([self.U_i,
|
||||
self.U_f,
|
||||
self.U_c,
|
||||
self.U_o]))
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(K.concatenate([self.b_i,
|
||||
self.b_f,
|
||||
self.b_c,
|
||||
self.b_o]))
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.trainable_weights = [self.W_i, self.U_i, self.b_i,
|
||||
self.W_c, self.U_c, self.b_c,
|
||||
self.W_f, self.U_f, self.b_f,
|
||||
self.W_o, self.U_o, self.b_o]
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
@@ -743,21 +789,36 @@ class LSTM(Recurrent):
|
||||
B_U = states[2]
|
||||
B_W = states[3]
|
||||
|
||||
if self.consume_less == 'cpu':
|
||||
x_i = x[:, :self.output_dim]
|
||||
x_f = x[:, self.output_dim: 2 * self.output_dim]
|
||||
x_c = x[:, 2 * self.output_dim: 3 * self.output_dim]
|
||||
x_o = x[:, 3 * self.output_dim:]
|
||||
else:
|
||||
x_i = K.dot(x * B_W[0], self.W_i) + self.b_i
|
||||
x_f = K.dot(x * B_W[1], self.W_f) + self.b_f
|
||||
x_c = K.dot(x * B_W[2], self.W_c) + self.b_c
|
||||
x_o = K.dot(x * B_W[3], self.W_o) + self.b_o
|
||||
if self.consume_less == 'gpu':
|
||||
z = K.dot(x * B_W[0], self.W) + K.dot(h_tm1 * B_U[0], self.U) + self.b
|
||||
|
||||
i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i))
|
||||
f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f))
|
||||
c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * B_U[2], self.U_c))
|
||||
o = self.inner_activation(x_o + K.dot(h_tm1 * B_U[3], self.U_o))
|
||||
z0 = z[:, :self.output_dim]
|
||||
z1 = z[:, self.output_dim: 2 * self.output_dim]
|
||||
z2 = z[:, 2 * self.output_dim: 3 * self.output_dim]
|
||||
z3 = z[:, 3 * self.output_dim:]
|
||||
|
||||
i = self.inner_activation(z0)
|
||||
f = self.inner_activation(z1)
|
||||
c = f * c_tm1 + i * self.activation(z2)
|
||||
o = self.inner_activation(z3)
|
||||
else:
|
||||
if self.consume_less == 'cpu':
|
||||
x_i = x[:, :self.output_dim]
|
||||
x_f = x[:, self.output_dim: 2 * self.output_dim]
|
||||
x_c = x[:, 2 * self.output_dim: 3 * self.output_dim]
|
||||
x_o = x[:, 3 * self.output_dim:]
|
||||
elif self.consume_less == 'mem':
|
||||
x_i = K.dot(x * B_W[0], self.W_i) + self.b_i
|
||||
x_f = K.dot(x * B_W[1], self.W_f) + self.b_f
|
||||
x_c = K.dot(x * B_W[2], self.W_c) + self.b_c
|
||||
x_o = K.dot(x * B_W[3], self.W_o) + self.b_o
|
||||
else:
|
||||
raise Exception('Unknown `consume_less` mode.')
|
||||
|
||||
i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i))
|
||||
f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f))
|
||||
c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * B_U[2], self.U_c))
|
||||
o = self.inner_activation(x_o + K.dot(h_tm1 * B_U[3], self.U_o))
|
||||
|
||||
h = o * self.activation(c)
|
||||
return h, [h, c]
|
||||
@@ -784,16 +845,16 @@ class LSTM(Recurrent):
|
||||
return constants
|
||||
|
||||
def get_config(self):
|
||||
config = {"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"forget_bias_init": self.forget_bias_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"dropout_W": self.dropout_W,
|
||||
"dropout_U": self.dropout_U}
|
||||
config = {'output_dim': self.output_dim,
|
||||
'init': self.init.__name__,
|
||||
'inner_init': self.inner_init.__name__,
|
||||
'forget_bias_init': self.forget_bias_init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'inner_activation': self.inner_activation.__name__,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'dropout_W': self.dropout_W,
|
||||
'dropout_U': self.dropout_U}
|
||||
base_config = super(LSTM, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -473,6 +473,8 @@ class Graph(Model):
|
||||
x = self._get_x(data)
|
||||
output_list = super(Graph, self).predict(x, batch_size=batch_size,
|
||||
verbose=verbose)
|
||||
if not isinstance(output_list, list):
|
||||
output_list = [output_list]
|
||||
return dict(zip(self._graph_outputs, output_list))
|
||||
|
||||
def train_on_batch(self, data,
|
||||
@@ -528,6 +530,8 @@ class Graph(Model):
|
||||
|
||||
def predict_on_batch(self, data):
|
||||
output_list = super(Graph, self).predict_on_batch(data)
|
||||
if not isinstance(output_list, list):
|
||||
output_list = [output_list]
|
||||
return dict(zip(self._graph_outputs, output_list))
|
||||
|
||||
def fit_generator(self, generator, samples_per_epoch, nb_epoch,
|
||||
|
||||
+6
-6
@@ -10,6 +10,10 @@ from .legacy.models import Graph
|
||||
|
||||
def model_from_config(config, custom_objects={}):
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
if isinstance(config, list):
|
||||
raise Exception('model_fom_config expects a dictionary.'
|
||||
'To load an old-style config use the appropiate'
|
||||
'`load_config` method on Sequential or Graph')
|
||||
return layer_from_config(config, custom_objects=custom_objects)
|
||||
|
||||
|
||||
@@ -452,7 +456,7 @@ class Sequential(Model):
|
||||
A Numpy array of predictions.
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
self.build()
|
||||
return self.model.predict(x, batch_size=batch_size, verbose=verbose)
|
||||
|
||||
def predict_on_batch(self, x):
|
||||
@@ -534,8 +538,6 @@ class Sequential(Model):
|
||||
# Returns
|
||||
A Numpy array of probability predictions.
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
preds = self.predict(x, batch_size, verbose)
|
||||
if preds.min() < 0. or preds.max() > 1.:
|
||||
warnings.warn('Network returning invalid probability values. '
|
||||
@@ -557,8 +559,6 @@ class Sequential(Model):
|
||||
# Returns
|
||||
A numpy array of class predictions.
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
proba = self.predict(x, batch_size=batch_size, verbose=verbose)
|
||||
if proba.shape[-1] > 1:
|
||||
return proba.argmax(axis=-1)
|
||||
@@ -703,7 +703,7 @@ class Sequential(Model):
|
||||
|
||||
def get_config(self):
|
||||
'''Returns the model configuration
|
||||
as a Python dictionary.
|
||||
as a Python list.
|
||||
'''
|
||||
config = []
|
||||
if self.layers[0].__class__.__name__ == 'Merge':
|
||||
|
||||
+54
-38
@@ -29,6 +29,11 @@ class Optimizer(object):
|
||||
when their absolute value exceeds this value.
|
||||
'''
|
||||
def __init__(self, **kwargs):
|
||||
allowed_kwargs = {'clipnorm', 'clipvalue'}
|
||||
for k in kwargs:
|
||||
if k not in allowed_kwargs:
|
||||
raise Exception('Unexpected keyword argument '
|
||||
'passed to optimizer: ' + str(k))
|
||||
self.__dict__.update(kwargs)
|
||||
self.updates = []
|
||||
self.weights = []
|
||||
@@ -89,7 +94,12 @@ class Optimizer(object):
|
||||
return weights
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__}
|
||||
config = {'name': self.__class__.__name__}
|
||||
if hasattr(self, 'clipnorm'):
|
||||
config['clipnorm'] = self.clipnorm
|
||||
if hasattr(self, 'clipvalue'):
|
||||
config['clipvalue'] = self.clipvalue
|
||||
return config
|
||||
|
||||
|
||||
class SGD(Optimizer):
|
||||
@@ -102,8 +112,8 @@ class SGD(Optimizer):
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
nesterov: boolean. Whether to apply Nesterov momentum.
|
||||
'''
|
||||
def __init__(self, lr=0.01, momentum=0., decay=0., nesterov=False,
|
||||
*args, **kwargs):
|
||||
def __init__(self, lr=0.01, momentum=0., decay=0.,
|
||||
nesterov=False, **kwargs):
|
||||
super(SGD, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0.)
|
||||
@@ -135,11 +145,12 @@ class SGD(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"momentum": float(K.get_value(self.momentum)),
|
||||
"decay": float(K.get_value(self.decay)),
|
||||
"nesterov": self.nesterov}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'momentum': float(K.get_value(self.momentum)),
|
||||
'decay': float(K.get_value(self.decay)),
|
||||
'nesterov': self.nesterov}
|
||||
base_config = super(SGD, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class RMSprop(Optimizer):
|
||||
@@ -157,7 +168,7 @@ class RMSprop(Optimizer):
|
||||
rho: float >= 0.
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
'''
|
||||
def __init__(self, lr=0.001, rho=0.9, epsilon=1e-6, *args, **kwargs):
|
||||
def __init__(self, lr=0.001, rho=0.9, epsilon=1e-8, **kwargs):
|
||||
super(RMSprop, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
@@ -173,7 +184,7 @@ class RMSprop(Optimizer):
|
||||
# update accumulator
|
||||
new_a = self.rho * a + (1. - self.rho) * K.square(g)
|
||||
self.updates.append((a, new_a))
|
||||
new_p = p - self.lr * g / K.sqrt(new_a + self.epsilon)
|
||||
new_p = p - self.lr * g / (K.sqrt(new_a) + self.epsilon)
|
||||
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
@@ -183,10 +194,11 @@ class RMSprop(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"rho": float(K.get_value(self.rho)),
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'rho': float(K.get_value(self.rho)),
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(RMSprop, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Adagrad(Optimizer):
|
||||
@@ -199,7 +211,7 @@ class Adagrad(Optimizer):
|
||||
lr: float >= 0. Learning rate.
|
||||
epsilon: float >= 0.
|
||||
'''
|
||||
def __init__(self, lr=0.01, epsilon=1e-6, *args, **kwargs):
|
||||
def __init__(self, lr=0.01, epsilon=1e-8, **kwargs):
|
||||
super(Adagrad, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
@@ -213,7 +225,7 @@ class Adagrad(Optimizer):
|
||||
for p, g, a in zip(params, grads, self.weights):
|
||||
new_a = a + K.square(g) # update accumulator
|
||||
self.updates.append((a, new_a))
|
||||
new_p = p - self.lr * g / K.sqrt(new_a + self.epsilon)
|
||||
new_p = p - self.lr * g / (K.sqrt(new_a) + self.epsilon)
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
@@ -222,9 +234,10 @@ class Adagrad(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(Adagrad, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Adadelta(Optimizer):
|
||||
@@ -242,7 +255,7 @@ class Adadelta(Optimizer):
|
||||
# References
|
||||
- [Adadelta - an adaptive learning rate method](http://arxiv.org/abs/1212.5701)
|
||||
'''
|
||||
def __init__(self, lr=1.0, rho=0.95, epsilon=1e-6, *args, **kwargs):
|
||||
def __init__(self, lr=1.0, rho=0.95, epsilon=1e-8, **kwargs):
|
||||
super(Adadelta, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
@@ -275,10 +288,11 @@ class Adadelta(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"rho": self.rho,
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'rho': self.rho,
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(Adadelta, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Adam(Optimizer):
|
||||
@@ -294,8 +308,8 @@ class Adam(Optimizer):
|
||||
# References
|
||||
- [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
|
||||
'''
|
||||
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
|
||||
*args, **kwargs):
|
||||
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, **kwargs):
|
||||
super(Adam, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0)
|
||||
@@ -331,11 +345,12 @@ class Adam(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"beta_1": float(K.get_value(self.beta_1)),
|
||||
"beta_2": float(K.get_value(self.beta_2)),
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'beta_1': float(K.get_value(self.beta_1)),
|
||||
'beta_2': float(K.get_value(self.beta_2)),
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(Adam, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Adamax(Optimizer):
|
||||
@@ -352,8 +367,8 @@ class Adamax(Optimizer):
|
||||
# References
|
||||
- [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
|
||||
'''
|
||||
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
|
||||
*args, **kwargs):
|
||||
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, **kwargs):
|
||||
super(Adamax, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0.)
|
||||
@@ -392,11 +407,12 @@ class Adamax(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"beta_1": float(K.get_value(self.beta_1)),
|
||||
"beta_2": float(K.get_value(self.beta_2)),
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'beta_1': float(K.get_value(self.beta_1)),
|
||||
'beta_2': float(K.get_value(self.beta_2)),
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(Adamax, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
# aliases
|
||||
|
||||
@@ -248,9 +248,12 @@ class ImageDataGenerator(object):
|
||||
self.batch_index = 0
|
||||
|
||||
def _flow_index(self, N, batch_size=32, shuffle=False, seed=None):
|
||||
# ensure self.batch_index is 0
|
||||
self.reset()
|
||||
|
||||
while 1:
|
||||
index_array = np.arange(N)
|
||||
if self.batch_index == 0:
|
||||
index_array = np.arange(N)
|
||||
if shuffle:
|
||||
if seed is not None:
|
||||
np.random.seed(seed + self.total_batches_seen)
|
||||
|
||||
@@ -59,9 +59,11 @@ class ActivityRegularizer(Regularizer):
|
||||
raise Exception('Need to call `set_layer` on '
|
||||
'ActivityRegularizer instance '
|
||||
'before calling the instance.')
|
||||
output = self.layer.output
|
||||
regularized_loss = loss + self.l1 * K.sum(K.mean(K.abs(output), axis=0))
|
||||
regularized_loss += self.l2 * K.sum(K.mean(K.square(output), axis=0))
|
||||
regularized_loss = loss
|
||||
for i in range(len(self.layer.inbound_nodes)):
|
||||
output = self.layer.get_output_at(i)
|
||||
regularized_loss += self.l1 * K.sum(K.mean(K.abs(output), axis=0))
|
||||
regularized_loss += self.l2 * K.sum(K.mean(K.square(output), axis=0))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
|
||||
def get_config(self):
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import absolute_import
|
||||
import copy
|
||||
import inspect
|
||||
import types
|
||||
import numpy as np
|
||||
|
||||
from ..utils.np_utils import to_categorical
|
||||
from ..models import Sequential
|
||||
|
||||
+2
-2
@@ -3,12 +3,12 @@ from setuptools import find_packages
|
||||
|
||||
|
||||
setup(name='Keras',
|
||||
version='1.0.2',
|
||||
version='1.0.3',
|
||||
description='Deep Learning for Python',
|
||||
author='Francois Chollet',
|
||||
author_email='francois.chollet@gmail.com',
|
||||
url='https://github.com/fchollet/keras',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.0.2',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.0.3',
|
||||
license='MIT',
|
||||
install_requires=['theano', 'pyyaml', 'six'],
|
||||
extras_require={
|
||||
|
||||
@@ -23,7 +23,7 @@ def test_temporal_classification():
|
||||
'''
|
||||
np.random.seed(1337)
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
|
||||
nb_test=200,
|
||||
nb_test=500,
|
||||
input_shape=(3, 5),
|
||||
classification=True,
|
||||
nb_class=2)
|
||||
@@ -35,12 +35,12 @@ def test_temporal_classification():
|
||||
input_shape=(X_train.shape[1], X_train.shape[2]),
|
||||
activation='softmax'))
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='adadelta',
|
||||
optimizer='adagrad',
|
||||
metrics=['accuracy'])
|
||||
history = model.fit(X_train, y_train, nb_epoch=5, batch_size=16,
|
||||
history = model.fit(X_train, y_train, nb_epoch=20, batch_size=32,
|
||||
validation_data=(X_test, y_test),
|
||||
verbose=0)
|
||||
assert(history.history['val_acc'][-1] > 0.9)
|
||||
assert(history.history['val_acc'][-1] >= 0.85)
|
||||
|
||||
|
||||
def test_temporal_regression():
|
||||
@@ -182,4 +182,5 @@ def test_masked_temporal():
|
||||
assert(np.abs(history.history['val_loss'][-1] - ground_truth) < 0.06)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
# pytest.main([__file__])
|
||||
test_temporal_classification()
|
||||
|
||||
@@ -91,6 +91,17 @@ class TestBackend(object):
|
||||
assert_allclose(np_rep, th_rep, atol=1e-05)
|
||||
assert_allclose(np_rep, tf_rep, atol=1e-05)
|
||||
|
||||
def test_tile(self):
|
||||
shape = (3, 4)
|
||||
arr = np.arange(np.prod(shape)).reshape(shape)
|
||||
arr_th = KTH.variable(arr)
|
||||
arr_tf = KTF.variable(arr)
|
||||
|
||||
n = (2, 1)
|
||||
th_rep = KTH.eval(KTH.tile(arr_th, n))
|
||||
tf_rep = KTF.eval(KTF.tile(arr_tf, n))
|
||||
assert_allclose(tf_rep, th_rep, atol=1e-05)
|
||||
|
||||
def test_value_manipulation(self):
|
||||
val = np.random.random((4, 2))
|
||||
xth = KTH.variable(val)
|
||||
|
||||
@@ -117,10 +117,10 @@ def test_model_methods():
|
||||
|
||||
out = model.train_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 3
|
||||
assert len(out) == 5
|
||||
out = model.test_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 3
|
||||
assert len(out) == 5
|
||||
|
||||
# this should also work
|
||||
model.compile(optimizer, loss, metrics={'dense_1': 'acc'},
|
||||
@@ -128,10 +128,10 @@ def test_model_methods():
|
||||
|
||||
out = model.train_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 2
|
||||
assert len(out) == 4
|
||||
out = model.test_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 2
|
||||
assert len(out) == 4
|
||||
|
||||
# and this as well
|
||||
model.compile(optimizer, loss, metrics={'dense_1': ['acc']},
|
||||
@@ -139,10 +139,10 @@ def test_model_methods():
|
||||
|
||||
out = model.train_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 2
|
||||
assert len(out) == 4
|
||||
out = model.test_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 2
|
||||
assert len(out) == 4
|
||||
|
||||
# test with a custom metric function
|
||||
mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2))
|
||||
@@ -151,10 +151,10 @@ def test_model_methods():
|
||||
|
||||
out = model.train_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 3
|
||||
assert len(out) == 5
|
||||
out = model.test_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 3
|
||||
assert len(out) == 5
|
||||
|
||||
input_a_np = np.random.random((10, 3))
|
||||
input_b_np = np.random.random((10, 3))
|
||||
|
||||
@@ -32,6 +32,13 @@ def _runner(layer_class):
|
||||
'dropout_W': 0.1},
|
||||
input_shape=(3, 2, 3))
|
||||
|
||||
# check implementation modes
|
||||
for mode in ['cpu', 'mem', 'gpu']:
|
||||
layer_test(layer_class,
|
||||
kwargs={'output_dim': output_dim,
|
||||
'consume_less': mode},
|
||||
input_shape=(3, 2, 3))
|
||||
|
||||
# check statefulness
|
||||
model = Sequential()
|
||||
model.add(embeddings.Embedding(embedding_num, embedding_dim,
|
||||
|
||||
@@ -56,6 +56,22 @@ def test_softplus():
|
||||
assert_allclose(result, expected, rtol=1e-05)
|
||||
|
||||
|
||||
def test_softsign():
|
||||
'''
|
||||
Test using a reference softsign implementation
|
||||
'''
|
||||
def softsign(x):
|
||||
return np.divide(x, np.ones_like(x) + np.absolute(x))
|
||||
|
||||
x = K.placeholder(ndim=2)
|
||||
f = K.function([x], [activations.softsign(x)])
|
||||
test_values = get_standard_values()
|
||||
|
||||
result = f([test_values])[0]
|
||||
expected = softsign(test_values)
|
||||
assert_allclose(result, expected, rtol=1e-05)
|
||||
|
||||
|
||||
def test_sigmoid():
|
||||
'''
|
||||
Test using a numerically stable reference sigmoid implementation
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário