Comparar commits
130 Commits
| Autor | SHA1 | Data | |
|---|---|---|---|
| 8c2a573ebf | |||
| d7ff7cde92 | |||
| 15d0b0ea08 | |||
| 3695bc2db5 | |||
| a08995a90d | |||
| aea00258e7 | |||
| b581eb3f27 | |||
| 610ccba9f5 | |||
| d5ae6f32dd | |||
| 5308033936 | |||
| e2abb5ef2c | |||
| 1b11b4eeb6 | |||
| 39357b3045 | |||
| ed7a5a1418 | |||
| ae682a71f9 | |||
| 8327b37a0b | |||
| 973b5570aa | |||
| 7cb41fc5cc | |||
| 595d67ad7d | |||
| bb626c120e | |||
| ba8fefa8ec | |||
| 4b24f6d7b1 | |||
| 1c460e1e08 | |||
| 7b4e157356 | |||
| 5749f1b971 | |||
| 3c57aff85b | |||
| 18504bcc86 | |||
| d8864bfe48 | |||
| 078b20169b | |||
| 5f7e78df65 | |||
| fc470db7ab | |||
| f576f37801 | |||
| b74118a766 | |||
| 1c7a0248b9 | |||
| 36a829c20d | |||
| 33af75aa39 | |||
| 844420425e | |||
| da57a530f9 | |||
| 1f17013949 | |||
| f18899cb36 | |||
| 877f946e24 | |||
| a981a8c42c | |||
| 5467107fc9 | |||
| ad3107073b | |||
| 8d62f4da6c | |||
| 3779b8a008 | |||
| 6ec5e48969 | |||
| bfa5ca553d | |||
| c9f7d970e9 | |||
| f26ce6e236 | |||
| b001e36f18 | |||
| 9abb6ef723 | |||
| bfbdbb05bc | |||
| bd2bd51b5d | |||
| 4e547a31ed | |||
| de8d0defcd | |||
| 344437c491 | |||
| ed365e94fd | |||
| 5910278ca8 | |||
| 18841fa58d | |||
| 6fb4e0e441 | |||
| 39051ef3ca | |||
| 1f4084870b | |||
| 00e9d5b219 | |||
| 7f93747602 | |||
| a7156b8c27 | |||
| b1e47f7741 | |||
| 59f8d6ca22 | |||
| 5f4019d980 | |||
| f84389da08 | |||
| 63c1757df5 | |||
| d6ab850f45 | |||
| 61dd53e262 | |||
| 423a633b5b | |||
| 256d4ef71b | |||
| ad49962ba9 | |||
| 4680d70a78 | |||
| ee7f056779 | |||
| 66c8d7baf2 | |||
| 9f929999d1 | |||
| 24f96262ec | |||
| 0e6e7a41f4 | |||
| 5cac088d98 | |||
| 85f0448fee | |||
| 106c0b753a | |||
| c525e634dc | |||
| c398c0891b | |||
| 5ab48ac5d4 | |||
| ba29cd8e46 | |||
| b61235b77f | |||
| 0ed00e38f0 | |||
| 36eef0dd9a | |||
| 1904194c7a | |||
| 7ce144881a | |||
| 55159cf451 | |||
| 7a12fd0f85 | |||
| 9d60126661 | |||
| e341e73c6a | |||
| 5dad3786f6 | |||
| ed0cd2c60d | |||
| 2eea3a4c5d | |||
| 090a46763e | |||
| c4ed82cdf6 | |||
| b32248d615 | |||
| ca7437502b | |||
| fe9b797a46 | |||
| b8059aeaba | |||
| 85f80714c2 | |||
| 2cc9ebf28b | |||
| cb5d69c769 | |||
| 3b961a6b7b | |||
| cba3ea9d90 | |||
| 57ea065db7 | |||
| 4f5f88b9ba | |||
| c1c2b330a1 | |||
| 05e1d8e5f4 | |||
| 3cbca7bdba | |||
| 3e3c210f1d | |||
| cb65139aa8 | |||
| 1206120d10 | |||
| 80ebe80138 | |||
| fa1d6b478e | |||
| 345413fb8c | |||
| 66ebd2a843 | |||
| d50f469c09 | |||
| 26714bc635 | |||
| 30208ae08b | |||
| 6ea3188971 | |||
| 1db555a530 | |||
| 0772210dea |
@@ -57,6 +57,8 @@ install:
|
||||
script:
|
||||
# run keras backend init to initialize backend config
|
||||
- python -c "import keras.backend"
|
||||
# create dataset directory to avoid concurrent directory creation at runtime
|
||||
- mkdir ~/.keras/datasets
|
||||
# set up keras backend
|
||||
- sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
|
||||
- echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)"
|
||||
|
||||
+1
-1
@@ -38,7 +38,7 @@ Keras is compatible with: __Python 2.7-3.5__.
|
||||
|
||||
## Getting started: 30 seconds to Keras
|
||||
|
||||
The core data structure of Keras is a __model__, a way to organize layers. The main type of model is the [`Sequential`](http://keras.io/getting-started/sequential-model-guide) model, a linear stack of layers. For more complex architectures, you should use the [Keras function API](http://keras.io/getting-started/functional-api-guide).
|
||||
The core data structure of Keras is a __model__, a way to organize layers. The main type of model is the [`Sequential`](http://keras.io/getting-started/sequential-model-guide) model, a linear stack of layers. For more complex architectures, you should use the [Keras functional API](http://keras.io/getting-started/functional-api-guide).
|
||||
|
||||
Here's the `Sequential` model:
|
||||
|
||||
|
||||
+6
-2
@@ -53,10 +53,16 @@ Scikit-learn API
|
||||
|
||||
'''
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import inspect
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
if sys.version[0] == '2':
|
||||
reload(sys)
|
||||
sys.setdefaultencoding('utf8')
|
||||
|
||||
from keras.layers import convolutional
|
||||
from keras.layers import recurrent
|
||||
@@ -250,8 +256,6 @@ def get_function_signature(function, method=True):
|
||||
for a, v in kwargs:
|
||||
if type(v) == str:
|
||||
v = '\'' + v + '\''
|
||||
elif type(v) == unicode:
|
||||
v = 'u\'' + v + '\''
|
||||
st += str(a) + '=' + str(v) + ', '
|
||||
if kwargs or args:
|
||||
return st[:-2] + ')'
|
||||
|
||||
externo
+1
@@ -30,6 +30,7 @@ model.add(Activation(tanh))
|
||||
|
||||
- __softmax__: Softmax applied across inputs last dimension. Expects shape either `(nb_samples, nb_timesteps, nb_dims)` or `(nb_samples, nb_dims)`.
|
||||
- __softplus__
|
||||
- __softsign__
|
||||
- __relu__
|
||||
- __tanh__
|
||||
- __sigmoid__
|
||||
|
||||
+25
-4
@@ -20,7 +20,7 @@ Please cite Keras in your publications if it helps your research. Here is an exa
|
||||
|
||||
```
|
||||
@misc{chollet2015keras,
|
||||
author = {Chollet, François},
|
||||
author = {Chollet, Francois},
|
||||
title = {Keras},
|
||||
year = {2015},
|
||||
publisher = {GitHub},
|
||||
@@ -102,6 +102,11 @@ model = model_from_json(open('my_model_architecture.json').read())
|
||||
model.load_weights('my_model_weights.h5')
|
||||
```
|
||||
|
||||
Finally, before it can be used, the model shall be compiled.
|
||||
```python
|
||||
model.compile(optimizer='adagrad', loss='mse')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Why is the training loss much higher than the testing loss?
|
||||
@@ -134,13 +139,29 @@ to pass the learning phase flag to your function:
|
||||
get_3rd_layer_output = K.function([model.layers[0].input, K.learning_phase()],
|
||||
[model.layers[3].output])
|
||||
|
||||
# output in train mode
|
||||
layer_output = get_3rd_layer_output([X, 1])[0]
|
||||
# output in test mode = 0
|
||||
layer_output = get_3rd_layer_output([X, 0])[0]
|
||||
|
||||
# output in test mode
|
||||
# output in train mode = 1
|
||||
layer_output = get_3rd_layer_output([X, 1])[0]
|
||||
```
|
||||
|
||||
Another more flexible way of getting output from intermediate layers is to use the [functional API](/getting-started/functional-api-guide). For example, if you have created an autoencoder for MNIST:
|
||||
|
||||
```python
|
||||
inputs = Input(shape=(784,))
|
||||
encoded = Dense(32, activation='relu')(inputs)
|
||||
decoded = Dense(784)(encoded)
|
||||
model = Model(input=inputs, output=decoded)
|
||||
```
|
||||
|
||||
After compiling and training the model, you can get the output of the data from the encoder like this:
|
||||
|
||||
```python
|
||||
encoder = Model(input=inputs, output=encoded)
|
||||
X_encoded = encoder.predict(X)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### How can I use Keras with datasets that don't fit in memory?
|
||||
|
||||
@@ -45,7 +45,7 @@ With the functional API, it is easy to re-use trained models: you can treat any
|
||||
|
||||
```python
|
||||
x = Input(shape=(784,))
|
||||
# this works, and returns the 10-way softmax we defined above.
|
||||
# this works, and returns the 10-way softmax we defined above.
|
||||
y = model(x)
|
||||
```
|
||||
|
||||
@@ -127,12 +127,12 @@ model = Model(input=[main_input, auxiliary_input], output=[main_loss, auxiliary_
|
||||
```
|
||||
|
||||
We compile the model and assign a weight of 0.2 to the auxiliary loss.
|
||||
To specify different `loss_weight` or `loss` for each different output, you can use a list or a dictionary.
|
||||
To specify different `loss_weights` or `loss` for each different output, you can use a list or a dictionary.
|
||||
Here we pass a single loss as the `loss` argument, so the same loss will be used on all outputs.
|
||||
|
||||
```python
|
||||
model.compile(optimizer='rmsprop', loss='binary_crossentropy',
|
||||
loss_weight=[1., 0.2])
|
||||
loss_weights=[1., 0.2])
|
||||
```
|
||||
|
||||
We can train the model by passing it lists of input arrays and target arrays:
|
||||
@@ -148,7 +148,7 @@ We could also have compiled the model via:
|
||||
```python
|
||||
model.compile(optimizer='rmsprop',
|
||||
loss={'main_output': 'binary_crossentropy', 'aux_output': 'binary_crossentropy'},
|
||||
loss_weight={'main_output': 1., 'aux_output': 0.2})
|
||||
loss_weights={'main_output': 1., 'aux_output': 0.2})
|
||||
|
||||
# and trained it via:
|
||||
model.fit({'main_input': headline_data, 'aux_input': additional_data},
|
||||
@@ -196,7 +196,7 @@ encoded_b = shared_lstm(tweet_b)
|
||||
merged_vector = merge([encoded_a, encoded_b], mode='concat', concat_axis=-1)
|
||||
|
||||
# and add a logistic regression on top
|
||||
predictions = Dense(1, activation='sigmoid')(merged_vector)
|
||||
predictions = Dense(1, activation='sigmoid')(merged_vector)
|
||||
|
||||
# we define a trainable model linking the
|
||||
# tweet inputs to the predictions
|
||||
@@ -309,8 +309,8 @@ from keras.layers import merge, Convolution2D, Input
|
||||
|
||||
# input tensor for a 3-channel 256x256 image
|
||||
x = Input(shape=(3, 256, 256))
|
||||
# 3x3 conv with 16 output channels
|
||||
y = Convolution2D(16, 3, 3, border_mode='same')
|
||||
# 3x3 conv with 3 output channels (same as input channels)
|
||||
y = Convolution2D(3, 3, 3, border_mode='same')
|
||||
# this returns x + y.
|
||||
z = merge([x, y], mode='sum')
|
||||
```
|
||||
|
||||
@@ -112,7 +112,7 @@ Now you know enough to be able to define *almost* any model with Keras. For comp
|
||||
Before training a model, you need to configure the learning process, which is done via the `compile` method. It receives three arguments:
|
||||
|
||||
- an optimizer. This could be the string identifier of an existing optimizer (such as `rmsprop` or `adagrad`), or an instance of the `Optimizer` class. See: [optimizers](/optimizers).
|
||||
- a loss function. This is the objective that the model will try to minimize. If can be the string identifier of an existing loss function (such as `categorical_crossentropy` or `mse`), or it can be an objective function. See: [objectives](/objectives).
|
||||
- a loss function. This is the objective that the model will try to minimize. It can be the string identifier of an existing loss function (such as `categorical_crossentropy` or `mse`), or it can be an objective function. See: [objectives](/objectives).
|
||||
- a list of metrics. For any classification problem you will want to set this to `metrics=['accuracy']`. A metric could be the string identifier of an existing metric (only `accuracy` is supported at this point), or a custom metric function.
|
||||
|
||||
```python
|
||||
@@ -538,4 +538,4 @@ y_val = np.random.random((100, nb_classes))
|
||||
decoder.fit([x_train_a, x_train_b], y_train,
|
||||
batch_size=64, nb_epoch=5,
|
||||
validation_data=([x_val_a, x_val_b], y_val))
|
||||
```
|
||||
```
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
# Writing your own Keras layers
|
||||
|
||||
For simple, stateless custom operations, you are probably better off using `layers.core.Lambda` layers. But for any custom operation that has trainable weights, you should implement your own layer.
|
||||
|
||||
Here is the skeleton of a Keras layer. There are only three methods you need to implement:
|
||||
|
||||
- `build(input_shape)`: this is where you will define your weights. Trainable weights should be added to the list `self.trainable_weights`. Other attributes of note are: `self.non_trainable_weights` (list) and `self.updates` (list of update tuples (tensor, new_tensor)). For an example of how to use `non_trainable_weights` and `updates`, see the code for the `BatchNormalization` layer.
|
||||
- `call(x)`: this is where the layer's logic lives. Unless you want you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor.
|
||||
- `get_output_shape_for(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference.
|
||||
|
||||
```python
|
||||
from keras import backend as K
|
||||
from keras.engine.topology import Layer
|
||||
import numpy as np
|
||||
|
||||
class MyLayer(Layer):
|
||||
def __init__(self, output_dim, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
super(MyLayer, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
input_dim = input_shape[1]
|
||||
initial_weight_value = np.random.random((input_dim, output_dim))
|
||||
self.W = K.variable(initial_weight_value)
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.dot(x, self.W)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
return (input_shape[0], self.output_dim)
|
||||
```
|
||||
|
||||
The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code!
|
||||
externo
+2
-2
@@ -11,7 +11,7 @@ b = Dense(32)(a)
|
||||
model = Model(input=a, output=b)
|
||||
```
|
||||
|
||||
This model will include all layers required in the computation of `a` given `b`.
|
||||
This model will include all layers required in the computation of `b` given `a`.
|
||||
|
||||
In the case of multi-input or multi-output models, you can use lists as well:
|
||||
|
||||
@@ -29,4 +29,4 @@ For a detailed introduction of what `Model` can do, read [this guide to the Kera
|
||||
|
||||
## Methods
|
||||
|
||||
{{autogenerated}}
|
||||
{{autogenerated}}
|
||||
|
||||
externo
+1
@@ -26,5 +26,6 @@ For a few examples of such functions, check out the [objectives source](https://
|
||||
- __hinge__
|
||||
- __binary_crossentropy__: Also known as logloss.
|
||||
- __categorical_crossentropy__: Also known as multiclass logloss. __Note__: using this objective requires that your labels are binary arrays of shape `(nb_samples, nb_classes)`.
|
||||
- __sparse_categorical_crossentropy__: As above but accepts sparse labels. __Note__: this objective still requires that your labels have the same number of dimensions as your outputs; you may need to add a length-1 dimension to the shape of your labels, e.g with `np.expand_dims(y, -1)`.
|
||||
- __poisson__: mean of `(predictions - targets * log(predictions))`
|
||||
- __cosine_proximity__: the opposite (negative) of the mean cosine proximity between predictions and targets.
|
||||
|
||||
+13
-1
@@ -11,8 +11,13 @@ keras.preprocessing.image.ImageDataGenerator(featurewise_center=True,
|
||||
width_shift_range=0.,
|
||||
height_shift_range=0.,
|
||||
shear_range=0.,
|
||||
zoom_range=0.,
|
||||
channel_shift_range=0.,
|
||||
fill_mode='nearest',
|
||||
cval=0.,
|
||||
horizontal_flip=False,
|
||||
vertical_flip=False)
|
||||
vertical_flip=False,
|
||||
dim_ordering='th')
|
||||
```
|
||||
|
||||
Generate batches of tensor image data with real-time data augmentation. The data will be looped over (in batches) indefinitely.
|
||||
@@ -27,8 +32,15 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
- __width_shift_range__: Float (fraction of total width). Range for random horizontal shifts.
|
||||
- __height_shift_range__: Float (fraction of total height). Range for random vertical shifts.
|
||||
- __shear_range__: Float. Shear Intensity (Shear angle in counter-clockwise direction as radians)
|
||||
- __zoom_range__: Float or [lower, upper]. Range for random zoom. If a float, `[lower, upper] = [1-zoom_range, 1+zoom_range]`.
|
||||
- __channel_shift_range__: Float. Range for random channel shifts.
|
||||
- __fill_mode__: One of {"constant", "nearest", "reflect" or "wrap"}. Points outside the boundaries of the input are filled according to the given mode.
|
||||
- __cval__: Float or Int. Value used for points outside the boundaries when `fill_mode = "constant"`.
|
||||
- __horizontal_flip__: Boolean. Randomly flip inputs horizontally.
|
||||
- __vertical_flip__: Boolean. Randomly flip inputs vertically.
|
||||
- __dim_ordering__: One of {"th", "tf"}.
|
||||
"tf" mode means that the images should have shape `(samples, width, height, channels)`,
|
||||
"th" mode means that the images should have shape `(samples, channels, width, height)`.
|
||||
|
||||
- __Methods__:
|
||||
- __fit(X)__: Required if featurewise_center or featurewise_std_normalization or zca_whitening. Compute necessary quantities on some sample data.
|
||||
|
||||
@@ -29,8 +29,7 @@ Five digits inverted:
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.engine.training import slice_X
|
||||
from keras.layers.core import Activation, TimeDistributedDense, RepeatVector
|
||||
from keras.layers import recurrent
|
||||
from keras.layers import Activation, TimeDistributedDense, RepeatVector, recurrent
|
||||
import numpy as np
|
||||
from six.moves import range
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
We build a custom activation layer called 'Antirectifier',
|
||||
which modifies the shape of the tensor that passes through it.
|
||||
We need to specify two methods: `output_shape` and `get_output`.
|
||||
We need to specify two methods: `get_output_shape_for` and `call`.
|
||||
|
||||
Note that the same result can also be achieved via a Lambda layer.
|
||||
|
||||
@@ -12,7 +12,7 @@ backend (`K`), our code can run both on TensorFlow and Theano.
|
||||
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Layer, Activation
|
||||
from keras.layers import Dense, Dropout, Layer, Activation
|
||||
from keras.datasets import mnist
|
||||
from keras import backend as K
|
||||
from keras.utils import np_utils
|
||||
|
||||
@@ -16,8 +16,8 @@ Time per epoch: 3s on CPU (core i7).
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.core import Activation, Dense, Merge, Permute, Dropout
|
||||
from keras.layers.recurrent import LSTM
|
||||
from keras.layers import Activation, Dense, Merge, Permute, Dropout
|
||||
from keras.layers import LSTM
|
||||
from keras.utils.data_utils import get_file
|
||||
from keras.preprocessing.sequence import pad_sequences
|
||||
from functools import reduce
|
||||
|
||||
@@ -66,7 +66,7 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.utils.data_utils import get_file
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.core import Dense, Merge, Dropout, RepeatVector
|
||||
from keras.layers import Dense, Merge, Dropout, RepeatVector
|
||||
from keras.layers import recurrent
|
||||
from keras.models import Sequential
|
||||
from keras.preprocessing.sequence import pad_sequences
|
||||
|
||||
@@ -15,8 +15,8 @@ from __future__ import print_function
|
||||
from keras.datasets import cifar10
|
||||
from keras.preprocessing.image import ImageDataGenerator
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.optimizers import SGD
|
||||
from keras.utils import np_utils
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ import h5py
|
||||
import os
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
|
||||
from keras.layers import Convolution2D, ZeroPadding2D, MaxPooling2D
|
||||
from keras import backend as K
|
||||
|
||||
parser = argparse.ArgumentParser(description='Deep Dreams with Keras.')
|
||||
|
||||
+22
-15
@@ -1,6 +1,9 @@
|
||||
'''This example demonstrates the use of Convolution1D for text classification.
|
||||
|
||||
Gets to 0.835 test accuracy after 2 epochs. 100s/epoch on K520 GPU.
|
||||
Gets to 0.88 test accuracy after 2 epochs.
|
||||
90s/epoch on Intel i5 2.4Ghz CPU.
|
||||
10s/epoch on Tesla K40 GPU.
|
||||
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
@@ -9,17 +12,18 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.convolutional import Convolution1D, MaxPooling1D
|
||||
from keras.layers import Dense, Dropout, Activation, Lambda
|
||||
from keras.layers import Embedding
|
||||
from keras.layers import Convolution1D
|
||||
from keras.datasets import imdb
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
# set parameters:
|
||||
max_features = 5000
|
||||
maxlen = 100
|
||||
maxlen = 400
|
||||
batch_size = 32
|
||||
embedding_dims = 100
|
||||
embedding_dims = 50
|
||||
nb_filter = 250
|
||||
filter_length = 3
|
||||
hidden_dims = 250
|
||||
@@ -42,8 +46,10 @@ model = Sequential()
|
||||
|
||||
# we start off with an efficient embedding layer which maps
|
||||
# our vocab indices into embedding_dims dimensions
|
||||
model.add(Embedding(max_features, embedding_dims, input_length=maxlen))
|
||||
model.add(Dropout(0.25))
|
||||
model.add(Embedding(max_features,
|
||||
embedding_dims,
|
||||
input_length=maxlen,
|
||||
dropout=0.2))
|
||||
|
||||
# we add a Convolution1D, which will learn nb_filter
|
||||
# word group filters of size filter_length:
|
||||
@@ -52,16 +58,17 @@ model.add(Convolution1D(nb_filter=nb_filter,
|
||||
border_mode='valid',
|
||||
activation='relu',
|
||||
subsample_length=1))
|
||||
# we use standard max pooling (halving the output of the previous layer):
|
||||
model.add(MaxPooling1D(pool_length=2))
|
||||
|
||||
# We flatten the output of the conv layer,
|
||||
# so that we can add a vanilla dense layer:
|
||||
model.add(Flatten())
|
||||
# we use max over time pooling by defining a python function to use
|
||||
# in a Lambda layer
|
||||
def max_1d(X):
|
||||
return K.max(X, axis=1)
|
||||
|
||||
model.add(Lambda(max_1d, output_shape=(nb_filter,)))
|
||||
|
||||
# We add a vanilla hidden layer:
|
||||
model.add(Dense(hidden_dims))
|
||||
model.add(Dropout(0.25))
|
||||
model.add(Dropout(0.2))
|
||||
model.add(Activation('relu'))
|
||||
|
||||
# We project onto a single unit output layer, and squash it with a sigmoid:
|
||||
@@ -69,7 +76,7 @@ model.add(Dense(1))
|
||||
model.add(Activation('sigmoid'))
|
||||
|
||||
model.compile(loss='binary_crossentropy',
|
||||
optimizer='rmsprop',
|
||||
optimizer='adam',
|
||||
metrics=['accuracy'])
|
||||
model.fit(X_train, y_train,
|
||||
batch_size=batch_size,
|
||||
|
||||
@@ -9,10 +9,10 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.recurrent import LSTM, GRU, SimpleRNN
|
||||
from keras.layers.convolutional import Convolution1D, MaxPooling1D
|
||||
from keras.layers import Dense, Dropout, Activation
|
||||
from keras.layers import Embedding
|
||||
from keras.layers import LSTM, GRU, SimpleRNN
|
||||
from keras.layers import Convolution1D, MaxPooling1D
|
||||
from keras.datasets import imdb
|
||||
|
||||
|
||||
|
||||
@@ -19,9 +19,8 @@ np.random.seed(1337) # for reproducibility
|
||||
from keras.preprocessing import sequence
|
||||
from keras.utils import np_utils
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.recurrent import LSTM, SimpleRNN, GRU
|
||||
from keras.layers import Dense, Dropout, Activation, Embedding
|
||||
from keras.layers import LSTM, SimpleRNN, GRU
|
||||
from keras.datasets import imdb
|
||||
|
||||
max_features = 20000
|
||||
|
||||
@@ -0,0 +1,290 @@
|
||||
'''This script demonstrates how to build the Inception v3 architecture
|
||||
using the Keras functional API.
|
||||
We are not actually training it here, for lack of appropriate data.
|
||||
|
||||
For more information about this architecture, see:
|
||||
|
||||
"Rethinking the Inception Architecture for Computer Vision"
|
||||
Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna
|
||||
http://arxiv.org/abs/1512.00567
|
||||
'''
|
||||
from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D
|
||||
from keras.layers import BatchNormalization, Flatten, Dense, Dropout
|
||||
from keras.layers import Input, merge
|
||||
from keras.models import Model
|
||||
from keras import regularizers
|
||||
|
||||
|
||||
# global constants
|
||||
NB_CLASS = 1000 # number of classes
|
||||
DIM_ORDERING = 'th' # 'th' (channels, width, height) or 'tf' (width, height, channels)
|
||||
WEIGHT_DECAY = 0. # L2 regularization factor
|
||||
USE_BN = False # whether to use batch normalization
|
||||
|
||||
|
||||
def conv2D_bn(x, nb_filter, nb_row, nb_col,
|
||||
border_mode='same', subsample=(1, 1),
|
||||
activation='relu', batch_norm=USE_BN,
|
||||
weight_decay=WEIGHT_DECAY, dim_ordering=DIM_ORDERING):
|
||||
'''Utility function to apply to a tensor a module conv + BN
|
||||
with optional weight decay (L2 weight regularization).
|
||||
'''
|
||||
if weight_decay:
|
||||
W_regularizer = regularizers.l2(weight_decay)
|
||||
b_regularizer = regularizers.l2(weight_decay)
|
||||
else:
|
||||
W_regularizer = None
|
||||
b_regularizer = None
|
||||
x = Convolution2D(nb_filter, nb_row, nb_col,
|
||||
subsample=subsample,
|
||||
activation=activation,
|
||||
border_mode=border_mode,
|
||||
W_regularizer=W_regularizer,
|
||||
b_regularizer=b_regularizer,
|
||||
dim_ordering=dim_ordering)(x)
|
||||
if batch_norm:
|
||||
x = BatchNormalization()(x)
|
||||
return x
|
||||
|
||||
# Define image input layer
|
||||
|
||||
if DIM_ORDERING == 'th':
|
||||
img_input = Input(shape=(3, 299, 299))
|
||||
CONCAT_AXIS = 1
|
||||
elif DIM_ORDERING == 'tf':
|
||||
img_input = Input(shape=(299, 299, 3))
|
||||
CONCAT_AXIS = 3
|
||||
else:
|
||||
raise Exception('Invalid dim ordering: ' + str(DIM_ORDERING))
|
||||
|
||||
# Entry module
|
||||
|
||||
x = conv2D_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode='valid')
|
||||
x = conv2D_bn(x, 32, 3, 3, border_mode='valid')
|
||||
x = conv2D_bn(x, 64, 3, 3)
|
||||
x = MaxPooling2D((3, 3), strides=(2, 2), dim_ordering=DIM_ORDERING)(x)
|
||||
|
||||
x = conv2D_bn(x, 80, 1, 1, border_mode='valid')
|
||||
x = conv2D_bn(x, 192, 3, 3, border_mode='valid')
|
||||
x = MaxPooling2D((3, 3), strides=(2, 2), dim_ordering=DIM_ORDERING)(x)
|
||||
|
||||
# mixed: 35 x 35 x 256
|
||||
|
||||
branch1x1 = conv2D_bn(x, 64, 1, 1)
|
||||
|
||||
branch5x5 = conv2D_bn(x, 48, 1, 1)
|
||||
branch5x5 = conv2D_bn(branch5x5, 64, 5, 5)
|
||||
|
||||
branch3x3dbl = conv2D_bn(x, 64, 1, 1)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 32, 1, 1)
|
||||
x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed_1: 35 x 35 x 288
|
||||
|
||||
branch1x1 = conv2D_bn(x, 64, 1, 1)
|
||||
|
||||
branch5x5 = conv2D_bn(x, 48, 1, 1)
|
||||
branch5x5 = conv2D_bn(branch5x5, 64, 5, 5)
|
||||
|
||||
branch3x3dbl = conv2D_bn(x, 64, 1, 1)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 64, 1, 1)
|
||||
x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed2: 35 x 35 x 288
|
||||
|
||||
branch1x1 = conv2D_bn(x, 64, 1, 1)
|
||||
|
||||
branch5x5 = conv2D_bn(x, 48, 1, 1)
|
||||
branch5x5 = conv2D_bn(branch5x5, 64, 5, 5)
|
||||
|
||||
branch3x3dbl = conv2D_bn(x, 64, 1, 1)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 64, 1, 1)
|
||||
x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed3: 17 x 17 x 768
|
||||
|
||||
branch3x3 = conv2D_bn(x, 384, 3, 3, subsample=(2, 2), border_mode='valid')
|
||||
|
||||
branch3x3dbl = conv2D_bn(x, 64, 1, 1)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3, subsample=(2, 2), border_mode='valid')
|
||||
|
||||
branch_pool = MaxPooling2D((3, 3), strides=(2, 2), dim_ordering=DIM_ORDERING)(x)
|
||||
x = merge([branch3x3, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed4: 17 x 17 x 768
|
||||
|
||||
branch1x1 = conv2D_bn(x, 192, 1, 1)
|
||||
|
||||
branch7x7 = conv2D_bn(x, 128, 1, 1)
|
||||
branch7x7 = conv2D_bn(branch7x7, 128, 1, 7)
|
||||
branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
|
||||
|
||||
branch7x7dbl = conv2D_bn(x, 128, 1, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 128, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 128, 1, 7)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 128, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed5: 17 x 17 x 768
|
||||
|
||||
branch1x1 = conv2D_bn(x, 192, 1, 1)
|
||||
|
||||
branch7x7 = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7 = conv2D_bn(branch7x7, 160, 1, 7)
|
||||
branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
|
||||
|
||||
branch7x7dbl = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 1, 7)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed5: 17 x 17 x 768
|
||||
|
||||
branch1x1 = conv2D_bn(x, 192, 1, 1)
|
||||
|
||||
branch7x7 = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7 = conv2D_bn(branch7x7, 160, 1, 7)
|
||||
branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
|
||||
|
||||
branch7x7dbl = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 1, 7)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed6: 17 x 17 x 768
|
||||
|
||||
branch1x1 = conv2D_bn(x, 192, 1, 1)
|
||||
|
||||
branch7x7 = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7 = conv2D_bn(branch7x7, 160, 1, 7)
|
||||
branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
|
||||
|
||||
branch7x7dbl = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed7: 17 x 17 x 768
|
||||
|
||||
branch1x1 = conv2D_bn(x, 192, 1, 1)
|
||||
|
||||
branch7x7 = conv2D_bn(x, 192, 1, 1)
|
||||
branch7x7 = conv2D_bn(branch7x7, 192, 1, 7)
|
||||
branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
|
||||
|
||||
branch7x7dbl = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# Auxiliary head
|
||||
|
||||
aux_logits = AveragePooling2D((5, 5), strides=(3, 3), dim_ordering=DIM_ORDERING)(x)
|
||||
aux_logits = conv2D_bn(aux_logits, 128, 1, 1)
|
||||
aux_logits = conv2D_bn(aux_logits, 728, 5, 5, border_mode='valid')
|
||||
aux_logits = Flatten()(aux_logits)
|
||||
aux_preds = Dense(NB_CLASS, activation='softmax')(aux_logits)
|
||||
|
||||
# mixed8: 8 x 8 x 1280
|
||||
|
||||
branch3x3 = conv2D_bn(x, 192, 1, 1)
|
||||
branch3x3 = conv2D_bn(branch3x3, 320, 3, 3, subsample=(2, 2), border_mode='valid')
|
||||
|
||||
branch7x7x3 = conv2D_bn(x, 192, 1, 1)
|
||||
branch7x7x3 = conv2D_bn(branch7x7x3, 192, 1, 7)
|
||||
branch7x7x3 = conv2D_bn(branch7x7x3, 192, 7, 1)
|
||||
branch7x7x3 = conv2D_bn(branch7x7x3, 192, 3, 3, subsample=(2, 2), border_mode='valid')
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(2, 2), dim_ordering=DIM_ORDERING)(x)
|
||||
x = merge([branch3x3, branch7x7x3, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed9: 8 x 8 x 2048
|
||||
|
||||
branch1x1 = conv2D_bn(x, 320, 1, 1)
|
||||
|
||||
branch3x3 = conv2D_bn(x, 384, 1, 1)
|
||||
branch3x3_1 = conv2D_bn(branch3x3, 384, 1, 3)
|
||||
branch3x3_2 = conv2D_bn(branch3x3, 384, 3, 1)
|
||||
branch3x3 = merge([branch3x3_1, branch3x3_2], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
branch3x3dbl = conv2D_bn(x, 448, 1, 1)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 384, 3, 3)
|
||||
branch3x3dbl_1 = conv2D_bn(branch3x3dbl, 384, 1, 3)
|
||||
branch3x3dbl_2 = conv2D_bn(branch3x3dbl, 384, 3, 1)
|
||||
branch3x3dbl = merge([branch3x3dbl_1, branch3x3dbl_2], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch3x3, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed10: 8 x 8 x 2048
|
||||
|
||||
branch1x1 = conv2D_bn(x, 320, 1, 1)
|
||||
|
||||
branch3x3 = conv2D_bn(x, 384, 1, 1)
|
||||
branch3x3_1 = conv2D_bn(branch3x3, 384, 1, 3)
|
||||
branch3x3_2 = conv2D_bn(branch3x3, 384, 3, 1)
|
||||
branch3x3 = merge([branch3x3_1, branch3x3_2], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
branch3x3dbl = conv2D_bn(x, 448, 1, 1)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 384, 3, 3)
|
||||
branch3x3dbl_1 = conv2D_bn(branch3x3dbl, 384, 1, 3)
|
||||
branch3x3dbl_2 = conv2D_bn(branch3x3dbl, 384, 3, 1)
|
||||
branch3x3dbl = merge([branch3x3dbl_1, branch3x3dbl_2], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch3x3, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# Final pooling and prediction
|
||||
|
||||
x = AveragePooling2D((8, 8), strides=(1, 1), dim_ordering=DIM_ORDERING)(x)
|
||||
x = Dropout(0.5)(x)
|
||||
x = Flatten()(x)
|
||||
preds = Dense(NB_CLASS, activation='softmax')(x)
|
||||
|
||||
# Define model
|
||||
|
||||
model = Model(input=img_input, output=[preds, aux_preds])
|
||||
model.compile('rmsprop', 'categorical_crossentropy')
|
||||
|
||||
# train via e.g. `model.fit(x_train, [y_train] * 2, batch_size=32, nb_epoch=100)`
|
||||
# Note that for a large dataset it would be preferable
|
||||
# to train using `fit_generator` (see Keras docs).
|
||||
@@ -0,0 +1,83 @@
|
||||
'''Compare LSTM implementations on the IMDB sentiment classification task.
|
||||
|
||||
consume_less='cpu' preprocesses input to the LSTM which typically results in
|
||||
faster computations at the expense of increased peak memory usage as the
|
||||
preprocessed input must be kept in memory.
|
||||
|
||||
consume_less='mem' does away with the preprocessing, meaning that it might take
|
||||
a little longer, but should require less peak memory.
|
||||
|
||||
consume_less='gpu' concatenates the input, output and forget gate's weights
|
||||
into one, large matrix, resulting in faster computation time as the GPU can
|
||||
utilize more cores, at the expense of reduced regularization because the same
|
||||
dropout is shared across the gates.
|
||||
|
||||
Note that the relative performance of the different `consume_less` modes
|
||||
can vary depending on your device, your model and the size of your data.
|
||||
'''
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Embedding, Dense, LSTM
|
||||
from keras.datasets import imdb
|
||||
|
||||
max_features = 20000
|
||||
max_length = 80
|
||||
embedding_dim = 256
|
||||
batch_size = 128
|
||||
epochs = 10
|
||||
modes = ['cpu', 'mem', 'gpu']
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
X_train = sequence.pad_sequences(X_train, max_length)
|
||||
X_test = sequence.pad_sequences(X_test, max_length)
|
||||
|
||||
# Compile and train different models while meauring performance.
|
||||
results = []
|
||||
for mode in modes:
|
||||
print('Testing mode: consume_less="{}"'.format(mode))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Embedding(max_features, embedding_dim, input_length=max_length, dropout=0.2))
|
||||
model.add(LSTM(embedding_dim, dropout_W=0.2, dropout_U=0.2, consume_less=mode))
|
||||
model.add(Dense(1, activation='sigmoid'))
|
||||
model.compile(loss='binary_crossentropy',
|
||||
optimizer='adam',
|
||||
metrics=['accuracy'])
|
||||
|
||||
start_time = time.time()
|
||||
history = model.fit(X_train, y_train,
|
||||
batch_size=batch_size,
|
||||
nb_epoch=epochs,
|
||||
validation_data=(X_test, y_test))
|
||||
average_time_per_epoch = (time.time() - start_time) / epochs
|
||||
|
||||
results.append((history, average_time_per_epoch))
|
||||
|
||||
# Compare models' accuracy, loss and elapsed time per epoch.
|
||||
plt.style.use('ggplot')
|
||||
ax1 = plt.subplot2grid((2, 2), (0, 0))
|
||||
ax1.set_title('Accuracy')
|
||||
ax1.set_ylabel('Validation Accuracy')
|
||||
ax1.set_xlabel('Epochs')
|
||||
ax2 = plt.subplot2grid((2, 2), (1, 0))
|
||||
ax2.set_title('Loss')
|
||||
ax2.set_ylabel('Validation Loss')
|
||||
ax2.set_xlabel('Epochs')
|
||||
ax3 = plt.subplot2grid((2, 2), (0, 1), rowspan=2)
|
||||
ax3.set_title('Time')
|
||||
ax3.set_ylabel('Seconds')
|
||||
for mode, result in zip(modes, results):
|
||||
ax1.plot(result[0].epoch, result[0].history['val_acc'], label=mode)
|
||||
ax2.plot(result[0].epoch, result[0].history['val_loss'], label=mode)
|
||||
ax1.legend()
|
||||
ax2.legend()
|
||||
ax3.bar(np.arange(len(results)), [x[1] for x in results],
|
||||
tick_label=modes, align='center')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
@@ -12,8 +12,8 @@ has at least ~100k characters. ~1M is better.
|
||||
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Activation, Dropout
|
||||
from keras.layers.recurrent import LSTM
|
||||
from keras.layers import Dense, Activation, Dropout
|
||||
from keras.layers import LSTM
|
||||
from keras.utils.data_utils import get_file
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
@@ -11,8 +11,8 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.utils import np_utils
|
||||
|
||||
batch_size = 128
|
||||
|
||||
@@ -17,9 +17,9 @@ from __future__ import print_function
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Activation
|
||||
from keras.layers import Dense, Activation
|
||||
from keras.layers import SimpleRNN
|
||||
from keras.initializations import normal, identity
|
||||
from keras.layers.recurrent import SimpleRNN
|
||||
from keras.optimizers import RMSprop
|
||||
from keras.utils import np_utils
|
||||
|
||||
|
||||
@@ -28,6 +28,11 @@ def euclidean_distance(vects):
|
||||
return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))
|
||||
|
||||
|
||||
def eucl_dist_output_shape(shapes):
|
||||
shape1, shape2 = shapes
|
||||
return (shape1[0], 1)
|
||||
|
||||
|
||||
def contrastive_loss(y_true, y_pred):
|
||||
'''Contrastive loss from Hadsell-et-al.'06
|
||||
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
|
||||
@@ -103,7 +108,7 @@ input_b = Input(shape=(input_dim,))
|
||||
processed_a = base_network(input_a)
|
||||
processed_b = base_network(input_b)
|
||||
|
||||
distance = Lambda(euclidean_distance)([processed_a, processed_b])
|
||||
distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b])
|
||||
|
||||
model = Model(input=[input_a, input_b], output=distance)
|
||||
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
'''Example of how to use sklearn wrapper
|
||||
|
||||
Builds simple CNN models on MNIST and uses sklearn's GridSearchCV to find best model
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.utils import np_utils
|
||||
from keras.wrappers.scikit_learn import KerasClassifier
|
||||
from sklearn.grid_search import GridSearchCV
|
||||
|
||||
|
||||
nb_classes = 10
|
||||
|
||||
# input image dimensions
|
||||
img_rows, img_cols = 28, 28
|
||||
|
||||
# load training data and do basic data normalization
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
|
||||
X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
|
||||
X_train = X_train.astype('float32')
|
||||
X_test = X_test.astype('float32')
|
||||
X_train /= 255
|
||||
X_test /= 255
|
||||
|
||||
# convert class vectors to binary class matrices
|
||||
y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
def make_model(dense_layer_sizes, nb_filters, nb_conv, nb_pool):
|
||||
'''Creates model comprised of 2 convolutional layers followed by dense layers
|
||||
|
||||
dense_layer_sizes: List of layer sizes. This list has one number for each layer
|
||||
nb_filters: Number of convolutional filters in each convolutional layer
|
||||
nb_conv: Convolutional kernel size
|
||||
nb_pool: Size of pooling area for max pooling
|
||||
'''
|
||||
|
||||
model = Sequential()
|
||||
|
||||
model.add(Convolution2D(nb_filters, nb_conv, nb_conv,
|
||||
border_mode='valid',
|
||||
input_shape=(1, img_rows, img_cols)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
model.add(Flatten())
|
||||
for layer_size in dense_layer_sizes:
|
||||
model.add(Dense(layer_size))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(nb_classes))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='adadelta',
|
||||
metrics=['accuracy'])
|
||||
|
||||
return model
|
||||
|
||||
dense_size_candidates = [[32], [64], [32, 32], [64, 64]]
|
||||
my_classifier = KerasClassifier(make_model, batch_size=32)
|
||||
validator = GridSearchCV(my_classifier,
|
||||
param_grid={'dense_layer_sizes': dense_size_candidates,
|
||||
# nb_epoch is avail for tuning even when not
|
||||
# an argument to model building function
|
||||
'nb_epoch': [3, 6],
|
||||
'nb_filters': [8],
|
||||
'nb_conv': [3],
|
||||
'nb_pool': [2]},
|
||||
scoring='log_loss',
|
||||
n_jobs=1)
|
||||
validator.fit(X_train, y_train)
|
||||
|
||||
print('The parameters of the best model are: ')
|
||||
print(validator.best_params_)
|
||||
|
||||
# validator.best_estimator_ returns sklearn-wrapped version of best model.
|
||||
# validator.best_estimator_.model returns the (unwrapped) keras model
|
||||
best_model = validator.best_estimator_.model
|
||||
metric_names = best_model.metrics_names
|
||||
metric_values = best_model.evaluate(X_test, y_test)
|
||||
for metric, value in zip(metric_names, metric_values):
|
||||
print(metric, ': ', value)
|
||||
@@ -19,8 +19,8 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.utils import np_utils
|
||||
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ import argparse
|
||||
import h5py
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
|
||||
from keras.layers import Convolution2D, ZeroPadding2D, MaxPooling2D
|
||||
from keras import backend as K
|
||||
|
||||
parser = argparse.ArgumentParser(description='Neural style transfer with Keras.')
|
||||
|
||||
@@ -8,8 +8,7 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.datasets import reuters
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.layers.normalization import BatchNormalization
|
||||
from keras.layers import Dense, Dropout, Activation
|
||||
from keras.utils import np_utils
|
||||
from keras.preprocessing.text import Tokenizer
|
||||
|
||||
|
||||
@@ -5,8 +5,7 @@ from __future__ import print_function
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense
|
||||
from keras.layers.recurrent import LSTM
|
||||
from keras.layers import Dense, LSTM
|
||||
|
||||
|
||||
# since we are using stateful rnn tsteps can be set to 1
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
'''This script demonstrates how to build a variational autoencoder with Keras.
|
||||
|
||||
Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
|
||||
'''
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from keras.layers import Input, Dense, Lambda
|
||||
from keras.models import Model
|
||||
from keras import backend as K
|
||||
from keras import objectives
|
||||
from keras.datasets import mnist
|
||||
|
||||
batch_size = 16
|
||||
original_dim = 784
|
||||
latent_dim = 2
|
||||
intermediate_dim = 128
|
||||
epsilon_std = 0.01
|
||||
nb_epoch = 40
|
||||
|
||||
x = Input(batch_shape=(batch_size, original_dim))
|
||||
h = Dense(intermediate_dim, activation='relu')(x)
|
||||
z_mean = Dense(latent_dim)(h)
|
||||
z_log_sigma = Dense(latent_dim)(h)
|
||||
|
||||
def sampling(args):
|
||||
z_mean, z_log_sigma = args
|
||||
epsilon = K.random_normal(shape=(batch_size, latent_dim),
|
||||
mean=0., std=epsilon_std)
|
||||
return z_mean + K.exp(z_log_sigma) * epsilon
|
||||
|
||||
# note that "output_shape" isn't necessary with the TensorFlow backend
|
||||
# so you could write `Lambda(sampling)([z_mean, z_log_sigma])`
|
||||
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])
|
||||
|
||||
# we instantiate these layers separately so as to reuse them later
|
||||
decoder_h = Dense(intermediate_dim, activation='relu')
|
||||
decoder_mean = Dense(original_dim, activation='sigmoid')
|
||||
h_decoded = decoder_h(z)
|
||||
x_decoded_mean = decoder_mean(h_decoded)
|
||||
|
||||
def vae_loss(x, x_decoded_mean):
|
||||
xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)
|
||||
kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
|
||||
return xent_loss + kl_loss
|
||||
|
||||
vae = Model(x, x_decoded_mean)
|
||||
vae.compile(optimizer='rmsprop', loss=vae_loss)
|
||||
|
||||
# train the VAE on MNIST digits
|
||||
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
||||
|
||||
x_train = x_train.astype('float32') / 255.
|
||||
x_test = x_test.astype('float32') / 255.
|
||||
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
|
||||
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
|
||||
|
||||
vae.fit(x_train, x_train,
|
||||
shuffle=True,
|
||||
nb_epoch=nb_epoch,
|
||||
batch_size=batch_size,
|
||||
validation_data=(x_test, x_test))
|
||||
|
||||
# build a model to project inputs on the latent space
|
||||
encoder = Model(x, z_mean)
|
||||
|
||||
# display a 2D plot of the digit classes in the latent space
|
||||
x_test_encoded = encoder.predict(x_test, batch_size=batch_size)
|
||||
plt.figure(figsize=(6, 6))
|
||||
plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)
|
||||
plt.colorbar()
|
||||
plt.show()
|
||||
|
||||
# build a digit generator that can sample from the learned distribution
|
||||
decoder_input = Input(shape=(latent_dim,))
|
||||
_h_decoded = decoder_h(decoder_input)
|
||||
_x_decoded_mean = decoder_mean(_h_decoded)
|
||||
generator = Model(decoder_input, _x_decoded_mean)
|
||||
|
||||
# display a 2D manifold of the digits
|
||||
n = 15 # figure with 15x15 digits
|
||||
digit_size = 28
|
||||
figure = np.zeros((digit_size * n, digit_size * n))
|
||||
# we will sample n points within [-15, 15] standard deviations
|
||||
grid_x = np.linspace(-15, 15, n)
|
||||
grid_y = np.linspace(-15, 15, n)
|
||||
|
||||
for i, yi in enumerate(grid_x):
|
||||
for j, xi in enumerate(grid_y):
|
||||
z_sample = np.array([[xi, yi]]) * epsilon_std
|
||||
x_decoded = generator.predict(z_sample)
|
||||
digit = x_decoded[0].reshape(digit_size, digit_size)
|
||||
figure[i * digit_size: (i + 1) * digit_size,
|
||||
j * digit_size: (j + 1) * digit_size] = digit
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
plt.imshow(figure)
|
||||
plt.show()
|
||||
+18
-1
@@ -1 +1,18 @@
|
||||
__version__ = '1.0.0'
|
||||
from __future__ import absolute_import
|
||||
from . import backend
|
||||
from . import datasets
|
||||
from . import engine
|
||||
from . import layers
|
||||
from . import preprocessing
|
||||
from . import utils
|
||||
from . import wrappers
|
||||
from . import callbacks
|
||||
from . import constraints
|
||||
from . import initializations
|
||||
from . import metrics
|
||||
from . import models
|
||||
from . import objectives
|
||||
from . import optimizers
|
||||
from . import regularizers
|
||||
|
||||
__version__ = '1.0.3'
|
||||
|
||||
@@ -19,6 +19,10 @@ def softplus(x):
|
||||
return K.softplus(x)
|
||||
|
||||
|
||||
def softsign(x):
|
||||
return K.softsign(x)
|
||||
|
||||
|
||||
def relu(x, alpha=0., max_value=None):
|
||||
return K.relu(x, alpha=alpha, max_value=max_value)
|
||||
|
||||
|
||||
@@ -21,21 +21,39 @@ def learning_phase():
|
||||
return _LEARNING_PHASE
|
||||
|
||||
|
||||
def set_learning_phase(value):
|
||||
global _LEARNING_PHASE
|
||||
_LEARNING_PHASE = tf.constant(value, name='keras_learning_phase')
|
||||
|
||||
|
||||
def get_session():
|
||||
'''Returns the TF session in use by the backend.
|
||||
'''Returns the TF session to be used by the backend.
|
||||
|
||||
If a default TensorFlow session is available, we will return it.
|
||||
|
||||
Else, we will return the global Keras session.
|
||||
|
||||
If no global Keras session exists at this point:
|
||||
we will create a new global session.
|
||||
|
||||
Note that you can manually set the global session
|
||||
via `K.set_session(sess)`.
|
||||
'''
|
||||
global _SESSION
|
||||
if tf.get_default_session() is not None:
|
||||
return tf.get_default_session()
|
||||
if _SESSION is None:
|
||||
if not os.environ.get('OMP_NUM_THREADS'):
|
||||
_SESSION = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
|
||||
else:
|
||||
nb_thread = int(os.environ.get('OMP_NUM_THREADS'))
|
||||
_SESSION = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=nb_thread, allow_soft_placement=True))
|
||||
_SESSION = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=nb_thread,
|
||||
allow_soft_placement=True))
|
||||
return _SESSION
|
||||
|
||||
|
||||
def set_session(session):
|
||||
'''Sets the TF session.
|
||||
'''Sets the global TF session.
|
||||
'''
|
||||
global _SESSION
|
||||
_SESSION = session
|
||||
@@ -55,7 +73,20 @@ def variable(value, dtype=_FLOATX, name=None):
|
||||
Tensor variable instance.
|
||||
'''
|
||||
v = tf.Variable(np.asarray(value, dtype=dtype), name=name)
|
||||
get_session().run(v.initializer)
|
||||
if tf.get_default_graph() is get_session().graph:
|
||||
try:
|
||||
get_session().run(v.initializer)
|
||||
except tf.errors.InvalidArgumentError:
|
||||
warnings.warn('Could not automatically initialize variable, '
|
||||
'make sure you do it manually (e.g. via '
|
||||
'`tf.initialize_all_variables()`).')
|
||||
else:
|
||||
warnings.warn('The default TensorFlow graph is not the graph '
|
||||
'associated with the TensorFlow session currently '
|
||||
'registered with Keras, and as such Keras '
|
||||
'was not able to automatically initialize a variable. '
|
||||
'You should consider registering the proper session '
|
||||
'with Keras via `K.set_session(sess)`.')
|
||||
return v
|
||||
|
||||
|
||||
@@ -131,6 +162,12 @@ def ones(shape, dtype=_FLOATX, name=None):
|
||||
return variable(np.ones(shape), dtype, name)
|
||||
|
||||
|
||||
def eye(size, dtype=_FLOATX, name=None):
|
||||
'''Instantiate an identity matrix.
|
||||
'''
|
||||
return variable(np.eye(size), dtype, name)
|
||||
|
||||
|
||||
def zeros_like(x, name=None):
|
||||
'''Instantiates an all-zeros tensor
|
||||
of the same shape as another tensor.
|
||||
@@ -413,6 +450,18 @@ def minimum(x, y):
|
||||
return tf.minimum(x, y)
|
||||
|
||||
|
||||
def sin(x):
|
||||
'''Computes sin of x element-wise.
|
||||
'''
|
||||
return tf.sin(x)
|
||||
|
||||
|
||||
def cos(x):
|
||||
'''Computes cos of x element-wise.
|
||||
'''
|
||||
return tf.cos(x)
|
||||
|
||||
|
||||
# SHAPE OPERATIONS
|
||||
|
||||
def concatenate(tensors, axis=-1):
|
||||
@@ -450,15 +499,21 @@ def resize_images(X, height_factor, width_factor, dim_ordering):
|
||||
positive integers.
|
||||
'''
|
||||
if dim_ordering == 'th':
|
||||
original_shape = int_shape(X)
|
||||
new_shape = tf.shape(X)[2:]
|
||||
new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32'))
|
||||
X = permute_dimensions(X, [0, 2, 3, 1])
|
||||
X = tf.image.resize_nearest_neighbor(X, new_shape)
|
||||
return permute_dimensions(X, [0, 3, 1, 2])
|
||||
X = permute_dimensions(X, [0, 3, 1, 2])
|
||||
X.set_shape((None, None, original_shape[2] * height_factor, original_shape[3] * width_factor))
|
||||
return X
|
||||
elif dim_ordering == 'tf':
|
||||
original_shape = int_shape(X)
|
||||
new_shape = tf.shape(X)[1:3]
|
||||
new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32'))
|
||||
return tf.image.resize_nearest_neighbor(X, new_shape)
|
||||
X = tf.image.resize_nearest_neighbor(X, new_shape)
|
||||
X.set_shape((None, original_shape[1] * height_factor, original_shape[2] * width_factor, None))
|
||||
return X
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
|
||||
@@ -490,6 +545,8 @@ def repeat(x, n):
|
||||
|
||||
|
||||
def tile(x, n):
|
||||
if not hasattr(n, 'shape') and not hasattr(n, '__len__'):
|
||||
n = [n]
|
||||
return tf.tile(x, n)
|
||||
|
||||
|
||||
@@ -553,6 +610,16 @@ def get_value(x):
|
||||
return x.eval(session=get_session())
|
||||
|
||||
|
||||
def batch_get_value(xs):
|
||||
'''Returns the value of more than one tensor variable,
|
||||
as a list of Numpy arrays.
|
||||
'''
|
||||
if xs:
|
||||
return get_session().run(xs)
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
def set_value(x, value):
|
||||
'''Sets the value of a tensor variable,
|
||||
from a Numpy array.
|
||||
@@ -560,6 +627,17 @@ def set_value(x, value):
|
||||
tf.assign(x, np.asarray(value)).op.run(session=get_session())
|
||||
|
||||
|
||||
def batch_set_value(tuples):
|
||||
'''Sets the values of many tensor variables at once.
|
||||
|
||||
# Arguments
|
||||
tuples: a list of tuples `(tensor, value)`.
|
||||
`value` should be a Numpy array.
|
||||
'''
|
||||
if tuples:
|
||||
ops = [tf.assign(x, np.asarray(value)) for x, value in tuples]
|
||||
get_session().run(ops)
|
||||
|
||||
# GRAPH MANIPULATION
|
||||
|
||||
class Function(object):
|
||||
@@ -792,6 +870,10 @@ def softplus(x):
|
||||
return tf.nn.softplus(x)
|
||||
|
||||
|
||||
def softsign(x):
|
||||
return tf.nn.softsign(x)
|
||||
|
||||
|
||||
def categorical_crossentropy(output, target, from_logits=False):
|
||||
'''Categorical crossentropy between an output tensor
|
||||
and a target tensor, where the target is a tensor of the same
|
||||
|
||||
@@ -3,6 +3,10 @@ from theano import tensor as T
|
||||
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
|
||||
from theano.tensor.signal import pool
|
||||
from theano.tensor.nnet import conv3d2d
|
||||
try:
|
||||
from theano.tensor.nnet.nnet import softsign as T_softsign
|
||||
except ImportError:
|
||||
from theano.sandbox.softsign import softsign as T_softsign
|
||||
import inspect
|
||||
import numpy as np
|
||||
from .common import _FLOATX, _EPSILON
|
||||
@@ -79,6 +83,12 @@ def ones(shape, dtype=_FLOATX, name=None):
|
||||
return variable(np.ones(shape), dtype, name)
|
||||
|
||||
|
||||
def eye(size, dtype=_FLOATX, name=None):
|
||||
'''Instantiate an identity matrix.
|
||||
'''
|
||||
return variable(np.eye(size), dtype, name)
|
||||
|
||||
|
||||
def ones_like(x):
|
||||
return T.ones_like(x)
|
||||
|
||||
@@ -259,6 +269,14 @@ def minimum(x, y):
|
||||
return T.minimum(x, y)
|
||||
|
||||
|
||||
def sin(x):
|
||||
return T.sin(x)
|
||||
|
||||
|
||||
def cos(x):
|
||||
return T.cos(x)
|
||||
|
||||
|
||||
# SHAPE OPERATIONS
|
||||
|
||||
def concatenate(tensors, axis=-1):
|
||||
@@ -469,10 +487,22 @@ def get_value(x):
|
||||
return x.get_value()
|
||||
|
||||
|
||||
def batch_get_value(xs):
|
||||
'''Returns the value of more than one tensor variable,
|
||||
as a list of Numpy arrays.
|
||||
'''
|
||||
return [get_value(x) for x in xs]
|
||||
|
||||
|
||||
def set_value(x, value):
|
||||
x.set_value(np.asarray(value, dtype=x.dtype))
|
||||
|
||||
|
||||
def batch_set_value(tuples):
|
||||
for x, value in tuples:
|
||||
x.set_value(np.asarray(value, dtype=x.dtype))
|
||||
|
||||
|
||||
# GRAPH MANIPULATION
|
||||
|
||||
class Function(object):
|
||||
@@ -553,15 +583,15 @@ def rnn(step_function, inputs, initial_states,
|
||||
axes = [1, 0] + list(range(2, ndim))
|
||||
inputs = inputs.dimshuffle(axes)
|
||||
|
||||
if constants is None:
|
||||
constants = []
|
||||
|
||||
if mask is not None:
|
||||
if mask.ndim == ndim-1:
|
||||
mask = expand_dims(mask)
|
||||
assert mask.ndim == ndim
|
||||
mask = mask.dimshuffle(axes)
|
||||
|
||||
if constants is None:
|
||||
constants = []
|
||||
|
||||
if unroll:
|
||||
indices = list(range(input_length))
|
||||
if go_backwards:
|
||||
@@ -571,7 +601,7 @@ def rnn(step_function, inputs, initial_states,
|
||||
successive_states = []
|
||||
states = initial_states
|
||||
for i in indices:
|
||||
output, new_states = step_function(inputs[i], states)
|
||||
output, new_states = step_function(inputs[i], states + constants)
|
||||
|
||||
if len(successive_outputs) == 0:
|
||||
prev_output = zeros_like(output)
|
||||
@@ -630,7 +660,7 @@ def rnn(step_function, inputs, initial_states,
|
||||
successive_states = []
|
||||
states = initial_states
|
||||
for i in indices:
|
||||
output, states = step_function(inputs[i], states)
|
||||
output, states = step_function(inputs[i], states + constants)
|
||||
successive_outputs.append(output)
|
||||
successive_states.append(states)
|
||||
outputs = T.stack(*successive_outputs)
|
||||
@@ -706,6 +736,10 @@ def softplus(x):
|
||||
return T.nnet.softplus(x)
|
||||
|
||||
|
||||
def softsign(x):
|
||||
return T_softsign(x)
|
||||
|
||||
|
||||
def categorical_crossentropy(output, target, from_logits=False):
|
||||
if from_logits:
|
||||
output = T.nnet.softmax(output)
|
||||
@@ -1010,10 +1044,3 @@ def random_binomial(shape, p=0.0, dtype=_FLOATX, seed=None):
|
||||
seed = np.random.randint(10e6)
|
||||
rng = RandomStreams(seed=seed)
|
||||
return rng.binomial(shape, p=p, dtype=dtype)
|
||||
|
||||
'''
|
||||
more TODO:
|
||||
|
||||
tensordot -> soon to be introduced in TF
|
||||
batched_tensordot -> reimplement
|
||||
'''
|
||||
|
||||
+25
-8
@@ -60,8 +60,7 @@ class CallbackList(object):
|
||||
callback.on_batch_end(batch, logs)
|
||||
self._delta_ts_batch_end.append(time.time() - t_before_callbacks)
|
||||
delta_t_median = np.median(self._delta_ts_batch_end)
|
||||
if self._delta_t_batch > 0. and delta_t_median > 0.95 * \
|
||||
self._delta_t_batch and delta_t_median > 0.1:
|
||||
if self._delta_t_batch > 0. and (delta_t_median > 0.95 * self._delta_t_batch and delta_t_median > 0.1):
|
||||
warnings.warn('Method on_batch_end() is slow compared '
|
||||
'to the batch update (%f). Check your callbacks.'
|
||||
% delta_t_median)
|
||||
@@ -431,8 +430,11 @@ class TensorBoard(Callback):
|
||||
histogram_freq: frequency (in epochs) at which to compute activation
|
||||
histograms for the layers of the model. If set to 0,
|
||||
histograms won't be computed.
|
||||
write_graph: whether to visualize the graph in tensorboard. The log file can
|
||||
become quite large when write_graph is set to True.
|
||||
'''
|
||||
def __init__(self, log_dir='./logs', histogram_freq=0):
|
||||
|
||||
def __init__(self, log_dir='./logs', histogram_freq=0, write_graph=True):
|
||||
super(Callback, self).__init__()
|
||||
if K._BACKEND != 'tensorflow':
|
||||
raise Exception('TensorBoard callback only works '
|
||||
@@ -440,6 +442,7 @@ class TensorBoard(Callback):
|
||||
self.log_dir = log_dir
|
||||
self.histogram_freq = histogram_freq
|
||||
self.merged = None
|
||||
self.write_graph = write_graph
|
||||
|
||||
def _set_model(self, model):
|
||||
import tensorflow as tf
|
||||
@@ -447,7 +450,7 @@ class TensorBoard(Callback):
|
||||
|
||||
self.model = model
|
||||
self.sess = KTF.get_session()
|
||||
if self.histogram_freq and not self.merged:
|
||||
if self.histogram_freq and self.merged is None:
|
||||
layers = self.model.layers
|
||||
for layer in layers:
|
||||
if hasattr(layer, 'W'):
|
||||
@@ -458,8 +461,16 @@ class TensorBoard(Callback):
|
||||
tf.histogram_summary('{}_out'.format(layer),
|
||||
layer.output)
|
||||
self.merged = tf.merge_all_summaries()
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph_def)
|
||||
if self.write_graph:
|
||||
tf_version = tuple(int(i) for i in tf.__version__.split('.'))
|
||||
if tf_version >= (0, 8, 0):
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph_def)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir)
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
import tensorflow as tf
|
||||
@@ -468,8 +479,14 @@ class TensorBoard(Callback):
|
||||
if epoch % self.histogram_freq == 0:
|
||||
# TODO: implement batched calls to sess.run
|
||||
# (current call will likely go OOM on GPU)
|
||||
feed_dict = dict(zip(self.model.inputs,
|
||||
self.model.validation_data))
|
||||
if self.model.uses_learning_phase:
|
||||
cut_v_data = len(self.model.inputs)
|
||||
val_data = self.model.validation_data[:cut_v_data] + [0]
|
||||
tensors = self.model.inputs + [K.learning_phase()]
|
||||
else:
|
||||
val_data = self.model.validation_data
|
||||
tensors = self.model.inputs
|
||||
feed_dict = dict(zip(tensors, val_data))
|
||||
result = self.sess.run([self.merged], feed_dict=feed_dict)
|
||||
summary_str = result[0]
|
||||
self.writer.add_summary(summary_str, epoch)
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
from __future__ import absolute_import
|
||||
import sys
|
||||
from six.moves import cPickle
|
||||
from six.moves import range
|
||||
|
||||
|
||||
def load_batch(fpath, label_key='labels'):
|
||||
|
||||
+96
-43
@@ -844,23 +844,25 @@ class Layer(object):
|
||||
'" with a weight list of length ' + str(len(weights)) +
|
||||
', but the layer was expecting ' + str(len(params)) +
|
||||
' weights. Provided weights: ' + str(weights))
|
||||
for p, w in zip(params, weights):
|
||||
if K.get_value(p).shape != w.shape:
|
||||
if not params:
|
||||
return
|
||||
weight_value_tuples = []
|
||||
param_values = K.batch_get_value(params)
|
||||
for pv, p, w in zip(param_values, params, weights):
|
||||
if pv.shape != w.shape:
|
||||
raise Exception('Layer weight shape ' +
|
||||
str(K.get_value(p).shape) +
|
||||
str(pv.shape) +
|
||||
' not compatible with '
|
||||
'provided weight shape ' + str(w.shape))
|
||||
K.set_value(p, w)
|
||||
weight_value_tuples.append((p, w))
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
|
||||
def get_weights(self):
|
||||
'''Returns the current weights of the layer,
|
||||
as a list of numpy arrays.
|
||||
'''
|
||||
params = self.trainable_weights + self.non_trainable_weights
|
||||
weights = []
|
||||
for p in params:
|
||||
weights.append(K.get_value(p))
|
||||
return weights
|
||||
return K.batch_get_value(params)
|
||||
|
||||
def get_config(self):
|
||||
'''Returns a Python dictionary (serializable)
|
||||
@@ -1124,7 +1126,8 @@ class Merge(Layer):
|
||||
if mode not in {'sum', 'mul', 'concat', 'ave', 'cos', 'dot'}:
|
||||
raise Exception('Invalid merge mode: ' + str(mode))
|
||||
if type(layers) not in {list, tuple} or len(layers) < 2:
|
||||
raise Exception('A Merge should only be applied to a list of layers. Not a list: ' + str(layers))
|
||||
raise Exception('A Merge should only be applied to a list of '
|
||||
'layers with at least 2 elements. Found: ' + str(layers))
|
||||
|
||||
if tensor_indices is None:
|
||||
tensor_indices = [None for _ in range(len(layers))]
|
||||
@@ -1165,7 +1168,7 @@ class Merge(Layer):
|
||||
raise Exception('Invalid format for dot_axes - list elements should be "int".')
|
||||
if shape1[dot_axes[0]] != shape2[dot_axes[1]]:
|
||||
raise Exception('Dimension incompatibility using dot mode: ' +
|
||||
'%s != %s. ' % (shape1[dot_axes[0]], shape2[dot_axes[1][i]]) +
|
||||
'%s != %s. ' % (shape1[dot_axes[0]], shape2[dot_axes[1]]) +
|
||||
'Layer shapes: %s, %s' % (shape1, shape2))
|
||||
elif mode == 'concat':
|
||||
reduced_inputs_shapes = [list(shape) for shape in input_shapes]
|
||||
@@ -1237,20 +1240,33 @@ class Merge(Layer):
|
||||
'please use ' +
|
||||
'the "merge" function instead: ' +
|
||||
'`merged_tensor = merge([tensor_1, tensor2])`.')
|
||||
layers = []
|
||||
node_indices = []
|
||||
tensor_indices = []
|
||||
|
||||
all_keras_tensors = True
|
||||
for x in inputs:
|
||||
layer, node_index, tensor_index = x._keras_history
|
||||
layers.append(layer)
|
||||
node_indices.append(node_index)
|
||||
tensor_indices.append(tensor_index)
|
||||
self._arguments_validation(layers, self.mode,
|
||||
self.concat_axis, self.dot_axes,
|
||||
self._output_shape,
|
||||
node_indices, tensor_indices)
|
||||
self.built = True
|
||||
self.add_inbound_node(layers, node_indices, tensor_indices)
|
||||
if not hasattr(x, '_keras_history'):
|
||||
all_keras_tensors = False
|
||||
break
|
||||
|
||||
if all_keras_tensors:
|
||||
layers = []
|
||||
node_indices = []
|
||||
tensor_indices = []
|
||||
for x in inputs:
|
||||
layer, node_index, tensor_index = x._keras_history
|
||||
layers.append(layer)
|
||||
node_indices.append(node_index)
|
||||
tensor_indices.append(tensor_index)
|
||||
self._arguments_validation(layers, self.mode,
|
||||
self.concat_axis, self.dot_axes,
|
||||
self._output_shape,
|
||||
node_indices, tensor_indices)
|
||||
self.built = True
|
||||
self.add_inbound_node(layers, node_indices, tensor_indices)
|
||||
|
||||
outputs = self.inbound_nodes[-1].output_tensors
|
||||
return outputs[0] # merge only returns a single tensor
|
||||
else:
|
||||
return self.call(inputs, mask)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
assert type(input_shape) is list # must have mutiple input shape tuples
|
||||
@@ -1259,6 +1275,8 @@ class Merge(Layer):
|
||||
if hasattr(self._output_shape, '__call__'):
|
||||
output_shape = self._output_shape(input_shape)
|
||||
return output_shape
|
||||
elif self._output_shape is not None:
|
||||
return self._output_shape
|
||||
else:
|
||||
# TODO: consider shape auto-inference with TF
|
||||
raise Exception('The Merge layer ' + self.name +
|
||||
@@ -1280,8 +1298,6 @@ class Merge(Layer):
|
||||
break
|
||||
output_shape[self.concat_axis] += shape[self.concat_axis]
|
||||
return tuple(output_shape)
|
||||
elif self.mode == 'join':
|
||||
return None
|
||||
elif self.mode == 'dot':
|
||||
shape1 = list(input_shapes[0])
|
||||
shape2 = list(input_shapes[1])
|
||||
@@ -1382,7 +1398,7 @@ def merge(inputs, mode='sum', concat_axis=-1,
|
||||
|
||||
# Arguments
|
||||
mode: string or lambda/function. If string, must be one
|
||||
of: 'sum', 'mul', 'concat', 'ave', 'join', 'cos', 'dot'.
|
||||
of: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot'.
|
||||
If lambda/function, it should take as input a list of tensors
|
||||
and return a single tensor.
|
||||
concat_axis: integer, axis to use in mode `concat`.
|
||||
@@ -1399,20 +1415,35 @@ def merge(inputs, mode='sum', concat_axis=-1,
|
||||
to consider for merging
|
||||
(in case some input layer node returns multiple tensors).
|
||||
'''
|
||||
input_layers = []
|
||||
node_indices = []
|
||||
tensor_indices = []
|
||||
all_keras_tensors = True
|
||||
for x in inputs:
|
||||
assert hasattr(x, '_keras_history'), 'Input tensor to "merge" was not a Keras tensor: ' + str(x)
|
||||
input_layer, node_index, tensor_index = x._keras_history
|
||||
input_layers.append(input_layer)
|
||||
node_indices.append(node_index)
|
||||
tensor_indices.append(tensor_index)
|
||||
merge_layer = Merge(input_layers, mode=mode, concat_axis=concat_axis,
|
||||
dot_axes=dot_axes, output_shape=output_shape,
|
||||
node_indices=node_indices, tensor_indices=tensor_indices,
|
||||
name=name)
|
||||
return merge_layer.inbound_nodes[0].output_tensors[0]
|
||||
if not hasattr(x, '_keras_history'):
|
||||
all_keras_tensors = False
|
||||
break
|
||||
if all_keras_tensors:
|
||||
input_layers = []
|
||||
node_indices = []
|
||||
tensor_indices = []
|
||||
for x in inputs:
|
||||
input_layer, node_index, tensor_index = x._keras_history
|
||||
input_layers.append(input_layer)
|
||||
node_indices.append(node_index)
|
||||
tensor_indices.append(tensor_index)
|
||||
merge_layer = Merge(input_layers, mode=mode,
|
||||
concat_axis=concat_axis,
|
||||
dot_axes=dot_axes,
|
||||
output_shape=output_shape,
|
||||
node_indices=node_indices,
|
||||
tensor_indices=tensor_indices,
|
||||
name=name)
|
||||
return merge_layer.inbound_nodes[0].output_tensors[0]
|
||||
else:
|
||||
merge_layer = Merge(mode=mode,
|
||||
concat_axis=concat_axis,
|
||||
dot_axes=dot_axes,
|
||||
output_shape=output_shape,
|
||||
name=name)
|
||||
return merge_layer(inputs)
|
||||
|
||||
|
||||
class Container(Layer):
|
||||
@@ -1640,7 +1671,8 @@ class Container(Layer):
|
||||
layers_by_depth[depth] = []
|
||||
layers_by_depth[depth].append(layer)
|
||||
|
||||
depth_keys = list(nodes_by_depth.keys())
|
||||
# get sorted list of layer depths
|
||||
depth_keys = list(layers_by_depth.keys())
|
||||
depth_keys.sort(reverse=True)
|
||||
|
||||
# set self.layers and self.layers_by_depth
|
||||
@@ -1654,6 +1686,10 @@ class Container(Layer):
|
||||
self.layers = layers
|
||||
self.layers_by_depth = layers_by_depth
|
||||
|
||||
# get sorted list of node depths
|
||||
depth_keys = list(nodes_by_depth.keys())
|
||||
depth_keys.sort(reverse=True)
|
||||
|
||||
# check that all tensors required are computable.
|
||||
# computable_tensors: all tensors in the graph
|
||||
# that can be computed from the inputs provided
|
||||
@@ -2235,7 +2271,7 @@ class Container(Layer):
|
||||
for layer in flattened_layers:
|
||||
g = f.create_group(layer.name)
|
||||
symbolic_weights = layer.trainable_weights + layer.non_trainable_weights
|
||||
weight_values = layer.get_weights()
|
||||
weight_values = K.batch_get_value(symbolic_weights)
|
||||
weight_names = []
|
||||
for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
|
||||
if hasattr(w, 'name') and w.name:
|
||||
@@ -2285,12 +2321,29 @@ class Container(Layer):
|
||||
' layers into a model with ' +
|
||||
str(len(flattened_layers)) + ' layers.')
|
||||
|
||||
# we batch weight value assignments in a single backend call
|
||||
# which provides a speedup in TensorFlow.
|
||||
weight_value_tuples = []
|
||||
for k, name in enumerate(layer_names):
|
||||
g = f[name]
|
||||
weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]
|
||||
if len(weight_names):
|
||||
weights = [g[weight_name] for weight_name in weight_names]
|
||||
flattened_layers[k].set_weights(weights)
|
||||
weight_values = [g[weight_name] for weight_name in weight_names]
|
||||
layer = flattened_layers[k]
|
||||
symbolic_weights = layer.trainable_weights + layer.non_trainable_weights
|
||||
if len(weight_values) != len(symbolic_weights):
|
||||
raise Exception('Layer #' + str(k) +
|
||||
' (named "' + layer.name +
|
||||
'" in the current model) was found to '
|
||||
'correspond to layer ' + name +
|
||||
' in the save file. '
|
||||
'However the new layer ' + layer.name +
|
||||
' expects ' + str(len(symbolic_weights)) +
|
||||
' weights, but the saved weights have ' +
|
||||
str(len(weight_values)) +
|
||||
' elements.')
|
||||
weight_value_tuples += zip(symbolic_weights, weight_values)
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
f.close()
|
||||
|
||||
def to_json(self, **kwargs):
|
||||
|
||||
+102
-46
@@ -31,8 +31,8 @@ def standardize_input_data(data, names, shapes=None, check_batch_dim=True,
|
||||
arrays = []
|
||||
for name in names:
|
||||
if name not in data:
|
||||
raise Exception('No data provided for input "' +
|
||||
name + '". Input data keys: ' +
|
||||
raise Exception('No data provided for "' +
|
||||
name + '". Need data for each key in: ' +
|
||||
str(data.keys()))
|
||||
arrays.append(data[name])
|
||||
elif type(data) is list:
|
||||
@@ -66,6 +66,12 @@ def standardize_input_data(data, names, shapes=None, check_batch_dim=True,
|
||||
': data should be a Numpy array, '
|
||||
'or list/dict of Numpy arrays. '
|
||||
'Found: ' + str(data)[:200] + '...')
|
||||
if len(names) != 1:
|
||||
# case: model expects multiple inputs but only received
|
||||
# a single Numpy array
|
||||
raise Exception('The model expects ' + str(len(names)) +
|
||||
' input arrays, but only received one array. '
|
||||
'Found: array with shape ' + str(data.shape))
|
||||
arrays = [data]
|
||||
|
||||
# make arrays at least 2D
|
||||
@@ -153,7 +159,7 @@ def check_array_lengths(X, Y, W):
|
||||
raise Exception('All input arrays (x) should have '
|
||||
'the same number of samples.')
|
||||
set_y = set(y_lengths)
|
||||
if len(set_x) != 1:
|
||||
if len(set_y) != 1:
|
||||
raise Exception('All target arrays (y) should have '
|
||||
'the same number of samples.')
|
||||
set_w = set(w_lengths)
|
||||
@@ -195,7 +201,7 @@ def check_loss_and_target_compatibility(targets, losses, output_shapes):
|
||||
'Alternatively, you can use the loss function '
|
||||
'`sparse_categorical_crossentropy` instead, '
|
||||
'which does expect integer targets.')
|
||||
if loss.__name__ in key_losses and y.shape[1] != shape[1]:
|
||||
if loss.__name__ in key_losses and shape[1] is not None and y.shape[1] != shape[1]:
|
||||
raise Exception('A target array with shape ' + str(y.shape) +
|
||||
' was passed for an output of shape ' + str(shape) +
|
||||
' while using as loss `' + loss.__name__ + '`. '
|
||||
@@ -224,6 +230,22 @@ def collect_metrics(metrics, output_names):
|
||||
str(metrics))
|
||||
|
||||
|
||||
def collect_trainable_weights(layer):
|
||||
trainable = getattr(layer, 'trainable', True)
|
||||
if not trainable:
|
||||
return []
|
||||
weights = []
|
||||
if layer.__class__.__name__ in ['Sequential', 'Model']:
|
||||
for sublayer in layer.layers:
|
||||
weights += collect_trainable_weights(sublayer)
|
||||
elif layer.__class__.__name__ == 'Graph':
|
||||
for sublayer in layer._graph_nodes.values():
|
||||
weights += collect_trainable_weights(sublayer)
|
||||
else:
|
||||
weights += layer.trainable_weights
|
||||
return weights
|
||||
|
||||
|
||||
def batch_shuffle(index_array, batch_size):
|
||||
'''This shuffles an array in a batch-wise fashion.
|
||||
Useful for shuffling HDF5 arrays
|
||||
@@ -369,7 +391,7 @@ def standardize_weights(y, sample_weight=None, class_weight=None,
|
||||
def generator_queue(generator, max_q_size=10,
|
||||
wait_time=0.05, nb_worker=1):
|
||||
'''Builds a threading queue out of a data generator.
|
||||
Used in `fit_generator`, `evaluate_generator`.
|
||||
Used in `fit_generator`, `evaluate_generator`, `predict_generator`.
|
||||
'''
|
||||
q = queue.Queue()
|
||||
_stop = threading.Event()
|
||||
@@ -549,6 +571,10 @@ class Model(Container):
|
||||
name = self.output_names[i]
|
||||
self.targets.append(K.placeholder(ndim=len(shape), name=name + '_target'))
|
||||
|
||||
# prepare metrics
|
||||
self.metrics_names = ['loss']
|
||||
self.metrics = []
|
||||
|
||||
# compute total loss
|
||||
total_loss = None
|
||||
for i in range(len(self.outputs)):
|
||||
@@ -558,19 +584,20 @@ class Model(Container):
|
||||
sample_weight = sample_weights[i]
|
||||
mask = masks[i]
|
||||
loss_weight = loss_weights_list[i]
|
||||
output_loss = loss_weight * weighted_loss(y_true, y_pred,
|
||||
sample_weight, mask)
|
||||
output_loss = weighted_loss(y_true, y_pred,
|
||||
sample_weight, mask)
|
||||
if len(self.outputs) > 1:
|
||||
self.metrics.append(output_loss)
|
||||
self.metrics_names.append(self.output_names[i] + '_loss')
|
||||
if total_loss is None:
|
||||
total_loss = output_loss
|
||||
total_loss = loss_weight * output_loss
|
||||
else:
|
||||
total_loss += output_loss
|
||||
total_loss += loss_weight * output_loss
|
||||
|
||||
# add regularization penalties to the loss
|
||||
for r in self.regularizers:
|
||||
total_loss = r(total_loss)
|
||||
|
||||
# prepare metrics
|
||||
self.metrics_names = ['loss']
|
||||
self.metrics = []
|
||||
# list of same size as output_names.
|
||||
# contains tuples (metrics for output, names of metrics)
|
||||
nested_metrics = collect_metrics(metrics, self.output_names)
|
||||
@@ -586,8 +613,12 @@ class Model(Container):
|
||||
if output_shape[-1] == 1:
|
||||
# case: binary accuracy
|
||||
self.metrics.append(metrics_module.binary_accuracy(y_true, y_pred))
|
||||
elif self.loss_functions[i] == objectives.sparse_categorical_crossentropy:
|
||||
# case: categorical accuracy with sparse targets
|
||||
self.metrics.append(
|
||||
metrics_module.sparse_categorical_accuracy(y_true, y_pred))
|
||||
else:
|
||||
# case: categorical accuracy
|
||||
# case: categorical accuracy with dense targets
|
||||
self.metrics.append(metrics_module.categorical_accuracy(y_true, y_pred))
|
||||
if len(self.output_names) == 1:
|
||||
self.metrics_names.append('acc')
|
||||
@@ -616,19 +647,18 @@ class Model(Container):
|
||||
self.predict_function = None
|
||||
|
||||
def _make_train_function(self):
|
||||
if not hasattr(self, 'train_function'):
|
||||
raise Exception('You must compile your model before using it.')
|
||||
if self.train_function is None:
|
||||
if self.uses_learning_phase:
|
||||
inputs = self.inputs + self.targets + self.sample_weights + [K.learning_phase()]
|
||||
else:
|
||||
inputs = self.inputs + self.targets + self.sample_weights
|
||||
|
||||
# dedupe trainable weights
|
||||
trainable_weights_set = set()
|
||||
# get trainable weights
|
||||
trainable_weights = []
|
||||
for w in self.trainable_weights:
|
||||
if w not in trainable_weights_set:
|
||||
trainable_weights_set.add(w)
|
||||
trainable_weights.append(w)
|
||||
for layer in self.layers:
|
||||
trainable_weights += collect_trainable_weights(layer)
|
||||
|
||||
training_updates = self.optimizer.get_updates(trainable_weights, self.constraints, self.total_loss)
|
||||
updates = self.updates + training_updates
|
||||
@@ -640,6 +670,8 @@ class Model(Container):
|
||||
**self._function_kwargs)
|
||||
|
||||
def _make_test_function(self):
|
||||
if not hasattr(self, 'test_function'):
|
||||
raise Exception('You must compile your model before using it.')
|
||||
if self.test_function is None:
|
||||
if self.uses_learning_phase:
|
||||
inputs = self.inputs + self.targets + self.sample_weights + [K.learning_phase()]
|
||||
@@ -653,6 +685,8 @@ class Model(Container):
|
||||
**self._function_kwargs)
|
||||
|
||||
def _make_predict_function(self):
|
||||
if not hasattr(self, 'predict_function'):
|
||||
self.predict_function = None
|
||||
if self.predict_function is None:
|
||||
if self.uses_learning_phase:
|
||||
inputs = self.inputs + [K.learning_phase()]
|
||||
@@ -660,10 +694,11 @@ class Model(Container):
|
||||
inputs = self.inputs
|
||||
# returns network outputs. Does not update weights.
|
||||
# Does update the network states.
|
||||
kwargs = getattr(self, '_function_kwargs', {})
|
||||
self.predict_function = K.function(inputs,
|
||||
self.outputs,
|
||||
updates=self.state_updates,
|
||||
**self._function_kwargs)
|
||||
**kwargs)
|
||||
|
||||
def _fit_loop(self, f, ins, out_labels=[], batch_size=32,
|
||||
nb_epoch=100, verbose=1, callbacks=[],
|
||||
@@ -692,8 +727,6 @@ class Model(Container):
|
||||
# Returns
|
||||
`History` object.
|
||||
'''
|
||||
self.training_data = ins
|
||||
self.validation_data = val_ins
|
||||
do_validation = False
|
||||
if val_f and val_ins:
|
||||
do_validation = True
|
||||
@@ -710,7 +743,14 @@ class Model(Container):
|
||||
callbacks += [cbks.ProgbarLogger()]
|
||||
callbacks = cbks.CallbackList(callbacks)
|
||||
|
||||
callbacks._set_model(self)
|
||||
# it's possible to callback a different model than self
|
||||
# (used by Sequential models)
|
||||
if hasattr(self, 'callback_model') and self.callback_model:
|
||||
callback_model = self.callback_model
|
||||
else:
|
||||
callback_model = self
|
||||
|
||||
callbacks._set_model(callback_model)
|
||||
callbacks._set_params({
|
||||
'batch_size': batch_size,
|
||||
'nb_epoch': nb_epoch,
|
||||
@@ -720,8 +760,9 @@ class Model(Container):
|
||||
'metrics': callback_metrics,
|
||||
})
|
||||
callbacks.on_train_begin()
|
||||
callback_model.stop_training = False
|
||||
self.validation_data = val_ins
|
||||
|
||||
self.stop_training = False
|
||||
for epoch in range(nb_epoch):
|
||||
callbacks.on_epoch_begin(epoch)
|
||||
if shuffle == 'batch':
|
||||
@@ -768,7 +809,7 @@ class Model(Container):
|
||||
for l, o in zip(out_labels, val_outs):
|
||||
epoch_logs['val_' + l] = o
|
||||
callbacks.on_epoch_end(epoch, epoch_logs)
|
||||
if self.stop_training:
|
||||
if callback_model.stop_training:
|
||||
break
|
||||
callbacks.on_train_end()
|
||||
return self.history
|
||||
@@ -950,14 +991,6 @@ class Model(Container):
|
||||
class_weight=class_weight,
|
||||
check_batch_dim=False,
|
||||
batch_size=batch_size)
|
||||
# prepare input arrays and training function
|
||||
if self.uses_learning_phase:
|
||||
ins = x + y + sample_weights + [1.]
|
||||
else:
|
||||
ins = x + y + sample_weights
|
||||
self._make_train_function()
|
||||
f = self.train_function
|
||||
|
||||
# prepare validation data
|
||||
if validation_data:
|
||||
do_validation = True
|
||||
@@ -996,6 +1029,14 @@ class Model(Container):
|
||||
val_f = None
|
||||
val_ins = None
|
||||
|
||||
# prepare input arrays and training function
|
||||
if self.uses_learning_phase:
|
||||
ins = x + y + sample_weights + [1.]
|
||||
else:
|
||||
ins = x + y + sample_weights
|
||||
self._make_train_function()
|
||||
f = self.train_function
|
||||
|
||||
# prepare display labels
|
||||
out_labels = self.metrics_names
|
||||
if do_validation:
|
||||
@@ -1184,7 +1225,7 @@ class Model(Container):
|
||||
def fit_generator(self, generator, samples_per_epoch, nb_epoch,
|
||||
verbose=1, callbacks=[],
|
||||
validation_data=None, nb_val_samples=None,
|
||||
class_weight={}):
|
||||
class_weight={}, max_q_size=10):
|
||||
'''Fits the model on data generated batch-by-batch by
|
||||
a Python generator.
|
||||
The generator is run in parallel to the model, for efficiency.
|
||||
@@ -1214,6 +1255,7 @@ class Model(Container):
|
||||
at the end of every epoch.
|
||||
class_weight: dictionary mapping class indices to a weight
|
||||
for the class.
|
||||
max_q_size: maximum size for the generator queue
|
||||
|
||||
# Returns
|
||||
A `History` object.
|
||||
@@ -1261,7 +1303,12 @@ class Model(Container):
|
||||
callbacks += [cbks.ProgbarLogger()]
|
||||
callbacks = cbks.CallbackList(callbacks)
|
||||
|
||||
callbacks._set_model(self)
|
||||
# it's possible to callback a different model than self:
|
||||
if hasattr(self, 'callback_model') and self.callback_model:
|
||||
callback_model = self.callback_model
|
||||
else:
|
||||
callback_model = self
|
||||
callbacks._set_model(callback_model)
|
||||
callbacks._set_params({
|
||||
'nb_epoch': nb_epoch,
|
||||
'nb_sample': samples_per_epoch,
|
||||
@@ -1287,9 +1334,9 @@ class Model(Container):
|
||||
self.validation_data = None
|
||||
|
||||
# start generator thread storing batches into a queue
|
||||
data_gen_queue, _stop = generator_queue(generator)
|
||||
data_gen_queue, _stop = generator_queue(generator, max_q_size=max_q_size)
|
||||
|
||||
self.stop_training = False
|
||||
callback_model.stop_training = False
|
||||
while epoch < nb_epoch:
|
||||
callbacks.on_epoch_begin(epoch)
|
||||
samples_seen = 0
|
||||
@@ -1322,6 +1369,8 @@ class Model(Container):
|
||||
batch_logs = {}
|
||||
if type(x) is list:
|
||||
batch_size = len(x[0])
|
||||
elif type(x) is dict:
|
||||
batch_size = len(list(x.values())[0])
|
||||
else:
|
||||
batch_size = len(x)
|
||||
batch_logs['batch'] = batch_index
|
||||
@@ -1334,7 +1383,7 @@ class Model(Container):
|
||||
class_weight=class_weight)
|
||||
except Exception as e:
|
||||
_stop.set()
|
||||
raise e
|
||||
raise
|
||||
|
||||
if type(outs) != list:
|
||||
outs = [outs]
|
||||
@@ -1358,7 +1407,8 @@ class Model(Container):
|
||||
if samples_seen >= samples_per_epoch and do_validation:
|
||||
if val_gen:
|
||||
val_outs = self.evaluate_generator(validation_data,
|
||||
nb_val_samples)
|
||||
nb_val_samples,
|
||||
max_q_size=max_q_size)
|
||||
else:
|
||||
# no need for try/except because
|
||||
# data has already been validated
|
||||
@@ -1373,14 +1423,14 @@ class Model(Container):
|
||||
|
||||
callbacks.on_epoch_end(epoch, epoch_logs)
|
||||
epoch += 1
|
||||
if self.stop_training:
|
||||
if callback_model.stop_training:
|
||||
break
|
||||
|
||||
_stop.set()
|
||||
callbacks.on_train_end()
|
||||
return self.history
|
||||
|
||||
def evaluate_generator(self, generator, val_samples):
|
||||
def evaluate_generator(self, generator, val_samples, max_q_size=10):
|
||||
'''Evaluates the model on a data generator. The generator should
|
||||
return the same kind of data as accepted by `test_on_batch`.
|
||||
|
||||
@@ -1391,6 +1441,7 @@ class Model(Container):
|
||||
val_samples:
|
||||
total number of samples to generate from `generator`
|
||||
before returning.
|
||||
max_q_size: maximum size for the generator queue
|
||||
|
||||
# Returns
|
||||
Scalar test loss (if the model has a single output and no metrics)
|
||||
@@ -1404,7 +1455,7 @@ class Model(Container):
|
||||
wait_time = 0.01
|
||||
all_outs = []
|
||||
weights = []
|
||||
data_gen_queue, _stop = generator_queue(generator)
|
||||
data_gen_queue, _stop = generator_queue(generator, max_q_size=max_q_size)
|
||||
|
||||
while processed_samples < val_samples:
|
||||
generator_output = None
|
||||
@@ -1434,10 +1485,12 @@ class Model(Container):
|
||||
outs = self.test_on_batch(x, y, sample_weight=sample_weight)
|
||||
except Exception as e:
|
||||
_stop.set()
|
||||
raise e
|
||||
raise
|
||||
|
||||
if type(x) is list:
|
||||
nb_samples = len(x[0])
|
||||
elif type(x) is dict:
|
||||
nb_samples = len(list(x.values())[0])
|
||||
else:
|
||||
nb_samples = len(x)
|
||||
all_outs.append(outs)
|
||||
@@ -1456,7 +1509,7 @@ class Model(Container):
|
||||
weights=weights))
|
||||
return averages
|
||||
|
||||
def predict_generator(self, generator, val_samples):
|
||||
def predict_generator(self, generator, val_samples, max_q_size=10):
|
||||
'''Generates predictions for the input samples from a data generator.
|
||||
The generator should return the same kind of data as accepted by
|
||||
`predict_on_batch`.
|
||||
@@ -1465,6 +1518,7 @@ class Model(Container):
|
||||
generator: generator yielding batches of input samples.
|
||||
val_samples: total number of samples to generate from `generator`
|
||||
before returning.
|
||||
max_q_size: maximum size for the generator queue
|
||||
|
||||
# Returns
|
||||
Numpy array(s) of predictions.
|
||||
@@ -1474,7 +1528,7 @@ class Model(Container):
|
||||
processed_samples = 0
|
||||
wait_time = 0.01
|
||||
all_outs = []
|
||||
data_gen_queue, _stop = generator_queue(generator)
|
||||
data_gen_queue, _stop = generator_queue(generator, max_q_size=max_q_size)
|
||||
|
||||
while processed_samples < val_samples:
|
||||
generator_output = None
|
||||
@@ -1503,10 +1557,12 @@ class Model(Container):
|
||||
outs = self.predict_on_batch(x)
|
||||
except Exception as e:
|
||||
_stop.set()
|
||||
raise e
|
||||
raise
|
||||
|
||||
if type(x) is list:
|
||||
nb_samples = len(x[0])
|
||||
elif type(x) is dict:
|
||||
nb_samples = len(list(x.values())[0])
|
||||
else:
|
||||
nb_samples = len(x)
|
||||
|
||||
|
||||
@@ -65,6 +65,7 @@ class Convolution1D(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: Number of channels/dimensions in the input.
|
||||
Either this argument or the keyword argument `input_shape`must be
|
||||
provided when using this layer as the first layer in a model.
|
||||
@@ -85,7 +86,7 @@ class Convolution1D(Layer):
|
||||
border_mode='valid', subsample_length=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
input_dim=None, input_length=None, **kwargs):
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution1D:', border_mode)
|
||||
@@ -106,6 +107,7 @@ class Convolution1D(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
self.initial_weights = weights
|
||||
self.input_dim = input_dim
|
||||
@@ -118,15 +120,18 @@ class Convolution1D(Layer):
|
||||
input_dim = input_shape[2]
|
||||
self.W_shape = (self.nb_filter, input_dim, self.filter_length, 1)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -137,7 +142,7 @@ class Convolution1D(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -154,11 +159,11 @@ class Convolution1D(Layer):
|
||||
def call(self, x, mask=None):
|
||||
x = K.expand_dims(x, -1) # add a dimension of the right
|
||||
x = K.permute_dimensions(x, (0, 2, 1, 3))
|
||||
conv_out = K.conv2d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering='th')
|
||||
|
||||
output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
output = K.conv2d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering='th')
|
||||
if self.bias:
|
||||
output += K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
output = self.activation(output)
|
||||
output = K.squeeze(output, 3) # remove the dummy 3rd dimension
|
||||
output = K.permute_dimensions(output, (0, 2, 1))
|
||||
@@ -176,6 +181,7 @@ class Convolution1D(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim,
|
||||
'input_length': self.input_length}
|
||||
base_config = super(Convolution1D, self).get_config()
|
||||
@@ -232,6 +238,7 @@ class Convolution2D(Layer):
|
||||
applied to the bias.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 3.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
|
||||
# Input shape
|
||||
4D tensor with shape:
|
||||
@@ -250,7 +257,8 @@ class Convolution2D(Layer):
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1), dim_ordering='th',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution2D:', border_mode)
|
||||
@@ -272,6 +280,7 @@ class Convolution2D(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.input_spec = [InputSpec(ndim=4)]
|
||||
self.initial_weights = weights
|
||||
super(Convolution2D, self).__init__(**kwargs)
|
||||
@@ -286,15 +295,18 @@ class Convolution2D(Layer):
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -305,7 +317,7 @@ class Convolution2D(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -335,16 +347,17 @@ class Convolution2D(Layer):
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
conv_out = K.conv2d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
filter_shape=self.W_shape)
|
||||
if self.dim_ordering == 'th':
|
||||
output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output = conv_out + K.reshape(self.b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = K.conv2d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
filter_shape=self.W_shape)
|
||||
if self.bias:
|
||||
if self.dim_ordering == 'th':
|
||||
output += K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
@@ -361,7 +374,8 @@ class Convolution2D(Layer):
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None}
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias}
|
||||
base_config = super(Convolution2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -407,6 +421,7 @@ class Convolution3D(Layer):
|
||||
applied to the bias.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 4.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape:
|
||||
@@ -426,7 +441,8 @@ class Convolution3D(Layer):
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1, 1), dim_ordering='th',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
if K._BACKEND != 'theano':
|
||||
raise Exception(self.__class__.__name__ +
|
||||
' is currently only working with Theano backend.')
|
||||
@@ -451,6 +467,7 @@ class Convolution3D(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.input_spec = [InputSpec(ndim=5)]
|
||||
self.initial_weights = weights
|
||||
super(Convolution3D, self).__init__(**kwargs)
|
||||
@@ -471,15 +488,18 @@ class Convolution3D(Layer):
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -490,7 +510,7 @@ class Convolution3D(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -525,36 +545,37 @@ class Convolution3D(Layer):
|
||||
|
||||
def call(self, x, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
conv_out = K.conv3d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
volume_shape=input_shape,
|
||||
filter_shape=self.W_shape)
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output = conv_out + K.reshape(self.b, (1, 1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = K.conv3d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
volume_shape=input_shape,
|
||||
filter_shape=self.W_shape)
|
||||
if self.bias:
|
||||
if self.dim_ordering == 'th':
|
||||
output += K.reshape(self.b, (1, self.nb_filter, 1, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, 1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
config = {"nb_filter": self.nb_filter,
|
||||
"kernel_dim1": self.kernel_dim1,
|
||||
"kernel_dim2": self.kernel_dim2,
|
||||
"kernel_dim3": self.kernel_dim3,
|
||||
"dim_ordering": self.dim_ordering,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"border_mode": self.border_mode,
|
||||
"subsample": self.subsample,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None}
|
||||
config = {'nb_filter': self.nb_filter,
|
||||
'kernel_dim1': self.kernel_dim1,
|
||||
'kernel_dim2': self.kernel_dim2,
|
||||
'kernel_dim3': self.kernel_dim3,
|
||||
'dim_ordering': self.dim_ordering,
|
||||
'init': self.init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'border_mode': self.border_mode,
|
||||
'subsample': self.subsample,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias}
|
||||
base_config = super(Convolution3D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
+79
-38
@@ -76,9 +76,10 @@ class Dropout(Layer):
|
||||
- [Dropout: A Simple Way to Prevent Neural Networks from Overfitting](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
|
||||
'''
|
||||
def __init__(self, p, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.uses_learning_phase = True
|
||||
self.p = p
|
||||
if 0. < self.p < 1.:
|
||||
self.uses_learning_phase = True
|
||||
self.supports_masking = True
|
||||
super(Dropout, self).__init__(**kwargs)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
@@ -510,12 +511,14 @@ class Dense(Layer):
|
||||
|
||||
```python
|
||||
# as first layer in a sequential model:
|
||||
model = Sequential(Dense(32, input_dim=16))
|
||||
model = Sequential()
|
||||
model.add(Dense(32, input_dim=16))
|
||||
# now the model will take as input arrays of shape (*, 16)
|
||||
# and output arrays of shape (*, 32)
|
||||
|
||||
# this is equivalent to the above:
|
||||
model = Sequential(Dense(32, input_shape=(16,)))
|
||||
model = Sequential()
|
||||
model.add(Dense(32, input_shape=(16,)))
|
||||
|
||||
# after the first layer, you don't need to specify
|
||||
# the size of the input anymore:
|
||||
@@ -547,6 +550,7 @@ class Dense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
@@ -559,7 +563,8 @@ class Dense(Layer):
|
||||
'''
|
||||
def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
self.output_dim = output_dim
|
||||
@@ -572,6 +577,7 @@ class Dense(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=2)]
|
||||
|
||||
@@ -587,16 +593,19 @@ class Dense(Layer):
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -607,7 +616,7 @@ class Dense(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -615,7 +624,10 @@ class Dense(Layer):
|
||||
del self.initial_weights
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return self.activation(K.dot(x, self.W) + self.b)
|
||||
output = K.dot(x, self.W)
|
||||
if self.bias:
|
||||
output += self.b
|
||||
return self.activation(output)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
assert input_shape and len(input_shape) == 2
|
||||
@@ -630,6 +642,7 @@ class Dense(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim}
|
||||
base_config = super(Dense, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
@@ -707,6 +720,7 @@ class MaxoutDense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
@@ -723,7 +737,8 @@ class MaxoutDense(Layer):
|
||||
def __init__(self, output_dim, nb_feature=4,
|
||||
init='glorot_uniform', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.nb_feature = nb_feature
|
||||
self.init = initializations.get(init)
|
||||
@@ -735,6 +750,7 @@ class MaxoutDense(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=2)]
|
||||
|
||||
@@ -750,17 +766,19 @@ class MaxoutDense(Layer):
|
||||
|
||||
self.W = self.init((self.nb_feature, input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.nb_feature, self.output_dim),
|
||||
name='{}_b'.format(self.name))
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_feature, self.output_dim),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -771,7 +789,7 @@ class MaxoutDense(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -784,7 +802,10 @@ class MaxoutDense(Layer):
|
||||
|
||||
def call(self, x, mask=None):
|
||||
# no activation, this layer is only linear.
|
||||
output = K.max(K.dot(x, self.W) + self.b, axis=1)
|
||||
output = K.dot(x, self.W)
|
||||
if self.bias:
|
||||
output += self.b
|
||||
output = K.max(output, axis=1)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
@@ -796,6 +817,7 @@ class MaxoutDense(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim}
|
||||
base_config = super(MaxoutDense, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
@@ -830,6 +852,7 @@ class Highway(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
@@ -846,7 +869,8 @@ class Highway(Layer):
|
||||
def __init__(self, init='glorot_uniform', transform_bias=-2,
|
||||
activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
self.init = initializations.get(init)
|
||||
self.transform_bias = transform_bias
|
||||
self.activation = activations.get(activation)
|
||||
@@ -858,6 +882,7 @@ class Highway(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=2)]
|
||||
|
||||
@@ -876,19 +901,21 @@ class Highway(Layer):
|
||||
self.W_carry = self.init((input_dim, input_dim),
|
||||
name='{}_W_carry'.format(self.name))
|
||||
|
||||
self.b = K.zeros((input_dim,), name='{}_b'.format(self.name))
|
||||
# initialize with a vector of values `transform_bias`
|
||||
self.b_carry = K.variable(np.ones((input_dim,)) * self.transform_bias,
|
||||
name='{}_b_carry'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W, self.b, self.W_carry, self.b_carry]
|
||||
if self.bias:
|
||||
self.b = K.zeros((input_dim,), name='{}_b'.format(self.name))
|
||||
# initialize with a vector of values `transform_bias`
|
||||
self.b_carry = K.variable(np.ones((input_dim,)) * self.transform_bias,
|
||||
name='{}_b_carry'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b, self.W_carry, self.b_carry]
|
||||
else:
|
||||
self.trainable_weights = [self.W, self.W_carry]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -899,7 +926,7 @@ class Highway(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -907,8 +934,14 @@ class Highway(Layer):
|
||||
del self.initial_weights
|
||||
|
||||
def call(self, x, mask=None):
|
||||
transform_weight = activations.sigmoid(K.dot(x, self.W_carry) + self.b_carry)
|
||||
act = self.activation(K.dot(x, self.W) + self.b)
|
||||
y = K.dot(x, self.W_carry)
|
||||
if self.bias:
|
||||
y += self.b_carry
|
||||
transform_weight = activations.sigmoid(y)
|
||||
y = K.dot(x, self.W)
|
||||
if self.bias:
|
||||
y += self.b
|
||||
act = self.activation(y)
|
||||
act *= transform_weight
|
||||
output = act + (1 - transform_weight) * x
|
||||
return output
|
||||
@@ -922,6 +955,7 @@ class Highway(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim}
|
||||
base_config = super(Highway, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
@@ -965,16 +999,19 @@ class TimeDistributedDense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
input_length: length of inputs sequences
|
||||
(integer, or None for variable-length sequences).
|
||||
'''
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
input_dim=None, input_length=None, **kwargs):
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
warnings.warn('TimeDistributedDense is deprecated, '
|
||||
'please use TimeDistributed(Dense(...)) instead.')
|
||||
self.output_dim = output_dim
|
||||
@@ -988,6 +1025,7 @@ class TimeDistributedDense(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
self.supports_masking = True
|
||||
@@ -1005,17 +1043,17 @@ class TimeDistributedDense(Layer):
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -1026,7 +1064,7 @@ class TimeDistributedDense(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -1056,7 +1094,9 @@ class TimeDistributedDense(Layer):
|
||||
|
||||
# Squash samples and timesteps into a single axis
|
||||
x = K.reshape(x, (-1, input_shape[-1])) # (samples * timesteps, input_dim)
|
||||
y = K.dot(x, self.W) + self.b # (samples * timesteps, output_dim)
|
||||
y = K.dot(x, self.W) # (samples * timesteps, output_dim)
|
||||
if self.bias:
|
||||
y += self.b
|
||||
# We have to reshape Y to (samples, timesteps, output_dim)
|
||||
y = K.reshape(y, (-1, input_length, self.output_dim)) # (samples, timesteps, output_dim)
|
||||
y = self.activation(y)
|
||||
@@ -1071,6 +1111,7 @@ class TimeDistributedDense(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim,
|
||||
'input_length': self.input_length}
|
||||
base_config = super(TimeDistributedDense, self).get_config()
|
||||
|
||||
@@ -17,7 +17,7 @@ class Embedding(Layer):
|
||||
model = Sequential()
|
||||
model.add(Embedding(1000, 64, input_length=10))
|
||||
# the model will take as input an integer matrix of size (batch, input_length).
|
||||
# the largest integer (i.e. word index) in the input should be no larger than 1000 (vocabulary size).
|
||||
# the largest integer (i.e. word index) in the input should be no larger than 999 (vocabulary size).
|
||||
# now model.output_shape == (None, 10, 64), where None is the batch dimension.
|
||||
|
||||
input_array = np.random.randint(1000, size=(32, 10))
|
||||
@@ -28,7 +28,7 @@ class Embedding(Layer):
|
||||
```
|
||||
|
||||
# Arguments
|
||||
input_dim: int >= 0. Size of the vocabulary, ie.
|
||||
input_dim: int > 0. Size of the vocabulary, ie.
|
||||
1 + maximum integer index occurring in the input data.
|
||||
output_dim: int >= 0. Dimension of the dense embedding.
|
||||
init: name of initialization function for the weights
|
||||
@@ -46,6 +46,8 @@ class Embedding(Layer):
|
||||
This is useful for [recurrent layers](recurrent.md) which may take
|
||||
variable length input. If this is `True` then all subsequent layers
|
||||
in the model need to support masking or an exception will be raised.
|
||||
If mask_zero is set to True, as a consequence, index 0 cannot be
|
||||
used in the vocabulary (input_dim should equal |vocabulary| + 2).
|
||||
input_length: Length of input sequences, when it is constant.
|
||||
This argument is required if you are going to connect
|
||||
`Flatten` then `Dense` layers upstream
|
||||
@@ -77,7 +79,6 @@ class Embedding(Layer):
|
||||
self.dropout = dropout
|
||||
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.constraints = [self.W_constraint]
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.activity_regularizer = regularizers.get(activity_regularizer)
|
||||
@@ -93,6 +94,11 @@ class Embedding(Layer):
|
||||
self.W = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
@@ -112,7 +118,11 @@ class Embedding(Layer):
|
||||
return K.not_equal(x, 0)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
return (input_shape[0], self.input_length, self.output_dim)
|
||||
if not self.input_length:
|
||||
input_length = input_shape[1]
|
||||
else:
|
||||
input_length = self.input_length
|
||||
return (input_shape[0], input_length, self.output_dim)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if 0. < self.dropout < 1.:
|
||||
|
||||
@@ -47,7 +47,7 @@ class BatchNormalization(Layer):
|
||||
Same shape as input.
|
||||
|
||||
# References
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](http://arxiv.org/pdf/1502.03167v3.pdf)
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](http://jmlr.org/proceedings/papers/v37/ioffe15.html)
|
||||
'''
|
||||
def __init__(self, epsilon=1e-6, mode=0, axis=-1, momentum=0.9,
|
||||
weights=None, beta_init='zero', gamma_init='one', **kwargs):
|
||||
@@ -94,8 +94,8 @@ class BatchNormalization(Layer):
|
||||
std = K.mean(K.square(x - brodcast_mean) + self.epsilon, axis=reduction_axes)
|
||||
std = K.sqrt(std)
|
||||
brodcast_std = K.reshape(std, broadcast_shape)
|
||||
mean_update = self.momentum * self.running_mean + (1-self.momentum) * mean
|
||||
std_update = self.momentum * self.running_std + (1-self.momentum) * std
|
||||
mean_update = self.momentum * self.running_mean + (1 - self.momentum) * mean
|
||||
std_update = self.momentum * self.running_std + (1 - self.momentum) * std
|
||||
self.updates = [(self.running_mean, mean_update),
|
||||
(self.running_std, std_update)]
|
||||
x_normed = (x - brodcast_mean) / (brodcast_std + self.epsilon)
|
||||
|
||||
+209
-148
@@ -81,12 +81,20 @@ class Recurrent(Layer):
|
||||
is always unrolled, so this argument does not do anything.
|
||||
Unrolling can speed-up a RNN, although it tends to be more memory-intensive.
|
||||
Unrolling is only suitable for short sequences.
|
||||
consume_less: one of "cpu", "mem". If set to "cpu", the RNN will use
|
||||
consume_less: one of "cpu", "mem", or "gpu" (LSTM/GRU only).
|
||||
If set to "cpu", the RNN will use
|
||||
an implementation that uses fewer, larger matrix products,
|
||||
thus running faster (at least on CPU) but consuming more memory.
|
||||
thus running faster on CPU but consuming more memory.
|
||||
|
||||
If set to "mem", the RNN will use more matrix products,
|
||||
but smaller ones, thus running slower (may actually be faster on GPU)
|
||||
while consuming less memory.
|
||||
|
||||
If set to "gpu" (LSTM/GRU only), the RNN will combine the input gate,
|
||||
the forget gate and the output gate into a single matrix,
|
||||
enabling more time-efficient parallelization on the GPU. Note: RNN
|
||||
dropout must be shared for all gates, resulting in a slightly
|
||||
reduced regularization.
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
@@ -196,7 +204,7 @@ class Recurrent(Layer):
|
||||
def call(self, x, mask=None):
|
||||
# input shape: (nb_samples, time (padded with zeros), input_dim)
|
||||
# note that the .build() method of subclasses MUST define
|
||||
# self.input_sepc with a complete input shape.
|
||||
# self.input_spec with a complete input shape.
|
||||
input_shape = self.input_spec[0].shape
|
||||
if K._BACKEND == 'tensorflow':
|
||||
if not input_shape[1]:
|
||||
@@ -383,15 +391,15 @@ class SimpleRNN(Recurrent):
|
||||
return constants
|
||||
|
||||
def get_config(self):
|
||||
config = {"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"dropout_W": self.dropout_W,
|
||||
"dropout_U": self.dropout_U}
|
||||
config = {'output_dim': self.output_dim,
|
||||
'init': self.init.__name__,
|
||||
'inner_init': self.inner_init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'dropout_W': self.dropout_W,
|
||||
'dropout_U': self.dropout_U}
|
||||
base_config = super(SimpleRNN, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -444,53 +452,66 @@ class GRU(Recurrent):
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
input_dim = input_shape[2]
|
||||
self.input_dim = input_dim
|
||||
self.input_dim = input_shape[2]
|
||||
|
||||
self.W_z = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_z'.format(self.name))
|
||||
self.U_z = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_z'.format(self.name))
|
||||
self.b_z = K.zeros((self.output_dim,), name='{}_b_z'.format(self.name))
|
||||
|
||||
self.W_r = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_r'.format(self.name))
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_r'.format(self.name))
|
||||
self.b_r = K.zeros((self.output_dim,), name='{}_b_r'.format(self.name))
|
||||
|
||||
self.W_h = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_h'.format(self.name))
|
||||
self.U_h = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_h'.format(self.name))
|
||||
self.b_h = K.zeros((self.output_dim,), name='{}_b_h'.format(self.name))
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(K.concatenate([self.W_z,
|
||||
self.W_r,
|
||||
self.W_h]))
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(K.concatenate([self.U_z,
|
||||
self.U_r,
|
||||
self.U_h]))
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(K.concatenate([self.b_z,
|
||||
self.b_r,
|
||||
self.b_h]))
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.trainable_weights = [self.W_z, self.U_z, self.b_z,
|
||||
self.W_r, self.U_r, self.b_r,
|
||||
self.W_h, self.U_h, self.b_h]
|
||||
if self.stateful:
|
||||
self.reset_states()
|
||||
else:
|
||||
# initial states: all-zero tensor of shape (output_dim)
|
||||
self.states = [None]
|
||||
|
||||
if self.consume_less == 'gpu':
|
||||
|
||||
self.W = self.init((self.input_dim, 3 * self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, 3 * self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
else:
|
||||
|
||||
self.W_z = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_z'.format(self.name))
|
||||
self.U_z = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_z'.format(self.name))
|
||||
self.b_z = K.zeros((self.output_dim,), name='{}_b_z'.format(self.name))
|
||||
|
||||
self.W_r = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_r'.format(self.name))
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_r'.format(self.name))
|
||||
self.b_r = K.zeros((self.output_dim,), name='{}_b_r'.format(self.name))
|
||||
|
||||
self.W_h = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_h'.format(self.name))
|
||||
self.U_h = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_h'.format(self.name))
|
||||
self.b_h = K.zeros((self.output_dim,), name='{}_b_h'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W_z, self.U_z, self.b_z,
|
||||
self.W_r, self.U_r, self.b_r,
|
||||
self.W_h, self.U_h, self.b_h]
|
||||
|
||||
self.W = K.concatenate([self.W_z, self.W_r, self.W_h])
|
||||
self.U = K.concatenate([self.U_z, self.U_r, self.U_h])
|
||||
self.b = K.concatenate([self.b_z, self.b_r, self.b_h])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
@@ -528,19 +549,37 @@ class GRU(Recurrent):
|
||||
B_U = states[1] # dropout matrices for recurrent units
|
||||
B_W = states[2]
|
||||
|
||||
if self.consume_less == 'cpu':
|
||||
x_z = x[:, :self.output_dim]
|
||||
x_r = x[:, self.output_dim: 2 * self.output_dim]
|
||||
x_h = x[:, 2 * self.output_dim:]
|
||||
if self.consume_less == 'gpu':
|
||||
|
||||
matrix_x = K.dot(x * B_W[0], self.W) + self.b
|
||||
matrix_inner = K.dot(h_tm1 * B_U[0], self.U[:, :2 * self.output_dim])
|
||||
|
||||
x_z = matrix_x[:, :self.output_dim]
|
||||
x_r = matrix_x[:, self.output_dim: 2 * self.output_dim]
|
||||
inner_z = matrix_inner[:, :self.output_dim]
|
||||
inner_r = matrix_inner[:, self.output_dim: 2 * self.output_dim]
|
||||
|
||||
z = self.inner_activation(x_z + inner_z)
|
||||
r = self.inner_activation(x_r + inner_r)
|
||||
|
||||
x_h = matrix_x[:, 2 * self.output_dim:]
|
||||
inner_h = K.dot(r * h_tm1 * B_U[0], self.U[:, 2 * self.output_dim:])
|
||||
hh = self.activation(x_h + inner_h)
|
||||
else:
|
||||
x_z = K.dot(x * B_W[0], self.W_z) + self.b_z
|
||||
x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
|
||||
x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
|
||||
if self.consume_less == 'cpu':
|
||||
x_z = x[:, :self.output_dim]
|
||||
x_r = x[:, self.output_dim: 2 * self.output_dim]
|
||||
x_h = x[:, 2 * self.output_dim:]
|
||||
elif self.consume_less == 'mem':
|
||||
x_z = K.dot(x * B_W[0], self.W_z) + self.b_z
|
||||
x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
|
||||
x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
|
||||
else:
|
||||
raise Exception('Unknown `consume_less` mode.')
|
||||
z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
|
||||
r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))
|
||||
|
||||
z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
|
||||
r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))
|
||||
|
||||
hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], self.U_h))
|
||||
hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], self.U_h))
|
||||
h = z * h_tm1 + (1 - z) * hh
|
||||
return h, [h]
|
||||
|
||||
@@ -549,33 +588,33 @@ class GRU(Recurrent):
|
||||
if 0 < self.dropout_U < 1:
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.concatenate([ones] * self.output_dim, 1)
|
||||
B_U = [K.dropout(ones, self.dropout_U) for _ in range(3)]
|
||||
B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(3)]
|
||||
constants.append(B_U)
|
||||
else:
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(4)])
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(3)])
|
||||
|
||||
if self.consume_less == 'cpu' and 0 < self.dropout_W < 1:
|
||||
if 0 < self.dropout_W < 1:
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.concatenate([ones] * input_dim, 1)
|
||||
B_W = [K.dropout(ones, self.dropout_W) for _ in range(3)]
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3)]
|
||||
constants.append(B_W)
|
||||
else:
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(4)])
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(3)])
|
||||
return constants
|
||||
|
||||
def get_config(self):
|
||||
config = {"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"dropout_W": self.dropout_W,
|
||||
"dropout_U": self.dropout_U}
|
||||
config = {'output_dim': self.output_dim,
|
||||
'init': self.init.__name__,
|
||||
'inner_init': self.inner_init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'inner_activation': self.inner_activation.__name__,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'dropout_W': self.dropout_W,
|
||||
'dropout_U': self.dropout_U}
|
||||
base_config = super(GRU, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -637,8 +676,7 @@ class LSTM(Recurrent):
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
input_dim = input_shape[2]
|
||||
self.input_dim = input_dim
|
||||
self.input_dim = input_shape[2]
|
||||
|
||||
if self.stateful:
|
||||
self.reset_states()
|
||||
@@ -646,56 +684,64 @@ class LSTM(Recurrent):
|
||||
# initial states: 2 all-zero tensors of shape (output_dim)
|
||||
self.states = [None, None]
|
||||
|
||||
self.W_i = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_i'.format(self.name))
|
||||
self.U_i = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_i'.format(self.name))
|
||||
self.b_i = K.zeros((self.output_dim,), name='{}_b_i'.format(self.name))
|
||||
if self.consume_less == 'gpu':
|
||||
self.W = self.init((self.input_dim, 4 * self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, 4 * self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
|
||||
self.W_f = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_f'.format(self.name))
|
||||
self.U_f = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_f'.format(self.name))
|
||||
self.b_f = self.forget_bias_init((self.output_dim,),
|
||||
name='{}_b_f'.format(self.name))
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
K.get_value(self.forget_bias_init(self.output_dim)),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
else:
|
||||
self.W_i = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_i'.format(self.name))
|
||||
self.U_i = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_i'.format(self.name))
|
||||
self.b_i = K.zeros((self.output_dim,), name='{}_b_i'.format(self.name))
|
||||
|
||||
self.W_c = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_c'.format(self.name))
|
||||
self.U_c = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_c'.format(self.name))
|
||||
self.b_c = K.zeros((self.output_dim,), name='{}_b_c'.format(self.name))
|
||||
self.W_f = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_f'.format(self.name))
|
||||
self.U_f = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_f'.format(self.name))
|
||||
self.b_f = self.forget_bias_init((self.output_dim,),
|
||||
name='{}_b_f'.format(self.name))
|
||||
|
||||
self.W_o = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_o'.format(self.name))
|
||||
self.U_o = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_o'.format(self.name))
|
||||
self.b_o = K.zeros((self.output_dim,), name='{}_b_o'.format(self.name))
|
||||
self.W_c = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_c'.format(self.name))
|
||||
self.U_c = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_c'.format(self.name))
|
||||
self.b_c = K.zeros((self.output_dim,), name='{}_b_c'.format(self.name))
|
||||
|
||||
self.W_o = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_o'.format(self.name))
|
||||
self.U_o = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_o'.format(self.name))
|
||||
self.b_o = K.zeros((self.output_dim,), name='{}_b_o'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W_i, self.U_i, self.b_i,
|
||||
self.W_c, self.U_c, self.b_c,
|
||||
self.W_f, self.U_f, self.b_f,
|
||||
self.W_o, self.U_o, self.b_o]
|
||||
|
||||
self.W = K.concatenate([self.W_i, self.W_f, self.W_c, self.W_o])
|
||||
self.U = K.concatenate([self.U_i, self.U_f, self.U_c, self.U_o])
|
||||
self.b = K.concatenate([self.b_i, self.b_f, self.b_c, self.b_o])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(K.concatenate([self.W_i,
|
||||
self.W_f,
|
||||
self.W_c,
|
||||
self.W_o]))
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(K.concatenate([self.U_i,
|
||||
self.U_f,
|
||||
self.U_c,
|
||||
self.U_o]))
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(K.concatenate([self.b_i,
|
||||
self.b_f,
|
||||
self.b_c,
|
||||
self.b_o]))
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.trainable_weights = [self.W_i, self.U_i, self.b_i,
|
||||
self.W_c, self.U_c, self.b_c,
|
||||
self.W_f, self.U_f, self.b_f,
|
||||
self.W_o, self.U_o, self.b_o]
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
@@ -715,9 +761,9 @@ class LSTM(Recurrent):
|
||||
self.states = [K.zeros((input_shape[0], self.output_dim)),
|
||||
K.zeros((input_shape[0], self.output_dim))]
|
||||
|
||||
def preprocess_input(self, x, train=False):
|
||||
def preprocess_input(self, x):
|
||||
if self.consume_less == 'cpu':
|
||||
if train and (0 < self.dropout_W < 1):
|
||||
if 0 < self.dropout_W < 1:
|
||||
dropout = self.dropout_W
|
||||
else:
|
||||
dropout = 0
|
||||
@@ -743,21 +789,36 @@ class LSTM(Recurrent):
|
||||
B_U = states[2]
|
||||
B_W = states[3]
|
||||
|
||||
if self.consume_less == 'cpu':
|
||||
x_i = x[:, :self.output_dim]
|
||||
x_f = x[:, self.output_dim: 2 * self.output_dim]
|
||||
x_c = x[:, 2 * self.output_dim: 3 * self.output_dim]
|
||||
x_o = x[:, 3 * self.output_dim:]
|
||||
else:
|
||||
x_i = K.dot(x * B_W[0], self.W_i) + self.b_i
|
||||
x_f = K.dot(x * B_W[1], self.W_f) + self.b_f
|
||||
x_c = K.dot(x * B_W[2], self.W_c) + self.b_c
|
||||
x_o = K.dot(x * B_W[3], self.W_o) + self.b_o
|
||||
if self.consume_less == 'gpu':
|
||||
z = K.dot(x * B_W[0], self.W) + K.dot(h_tm1 * B_U[0], self.U) + self.b
|
||||
|
||||
i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i))
|
||||
f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f))
|
||||
c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * B_U[2], self.U_c))
|
||||
o = self.inner_activation(x_o + K.dot(h_tm1 * B_U[3], self.U_o))
|
||||
z0 = z[:, :self.output_dim]
|
||||
z1 = z[:, self.output_dim: 2 * self.output_dim]
|
||||
z2 = z[:, 2 * self.output_dim: 3 * self.output_dim]
|
||||
z3 = z[:, 3 * self.output_dim:]
|
||||
|
||||
i = self.inner_activation(z0)
|
||||
f = self.inner_activation(z1)
|
||||
c = f * c_tm1 + i * self.activation(z2)
|
||||
o = self.inner_activation(z3)
|
||||
else:
|
||||
if self.consume_less == 'cpu':
|
||||
x_i = x[:, :self.output_dim]
|
||||
x_f = x[:, self.output_dim: 2 * self.output_dim]
|
||||
x_c = x[:, 2 * self.output_dim: 3 * self.output_dim]
|
||||
x_o = x[:, 3 * self.output_dim:]
|
||||
elif self.consume_less == 'mem':
|
||||
x_i = K.dot(x * B_W[0], self.W_i) + self.b_i
|
||||
x_f = K.dot(x * B_W[1], self.W_f) + self.b_f
|
||||
x_c = K.dot(x * B_W[2], self.W_c) + self.b_c
|
||||
x_o = K.dot(x * B_W[3], self.W_o) + self.b_o
|
||||
else:
|
||||
raise Exception('Unknown `consume_less` mode.')
|
||||
|
||||
i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i))
|
||||
f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f))
|
||||
c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * B_U[2], self.U_c))
|
||||
o = self.inner_activation(x_o + K.dot(h_tm1 * B_U[3], self.U_o))
|
||||
|
||||
h = o * self.activation(c)
|
||||
return h, [h, c]
|
||||
@@ -767,33 +828,33 @@ class LSTM(Recurrent):
|
||||
if 0 < self.dropout_U < 1:
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.concatenate([ones] * self.output_dim, 1)
|
||||
B_U = [K.dropout(ones, self.dropout_U) for _ in range(4)]
|
||||
B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(4)]
|
||||
constants.append(B_U)
|
||||
else:
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(4)])
|
||||
|
||||
if self.consume_less == 'cpu' and 0 < self.dropout_W < 1:
|
||||
if 0 < self.dropout_W < 1:
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.concatenate([ones] * input_dim, 1)
|
||||
B_W = [K.dropout(ones, self.dropout_W) for _ in range(4)]
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(4)]
|
||||
constants.append(B_W)
|
||||
else:
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(4)])
|
||||
return constants
|
||||
|
||||
def get_config(self):
|
||||
config = {"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"forget_bias_init": self.forget_bias_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"dropout_W": self.dropout_W,
|
||||
"dropout_U": self.dropout_U}
|
||||
config = {'output_dim': self.output_dim,
|
||||
'init': self.init.__name__,
|
||||
'inner_init': self.inner_init.__name__,
|
||||
'forget_bias_init': self.forget_bias_init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'inner_activation': self.inner_activation.__name__,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'dropout_W': self.dropout_W,
|
||||
'dropout_U': self.dropout_U}
|
||||
base_config = super(LSTM, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -6,6 +6,7 @@ class Wrapper(Layer):
|
||||
|
||||
def __init__(self, layer, **kwargs):
|
||||
self.layer = layer
|
||||
self.uses_learning_phase = layer.uses_learning_phase
|
||||
super(Wrapper, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape=None):
|
||||
@@ -97,7 +98,9 @@ class TimeDistributed(Wrapper):
|
||||
'an "input_shape" or "batch_input_shape" '
|
||||
'argument, including the time axis.')
|
||||
child_input_shape = (input_shape[0],) + input_shape[2:]
|
||||
self.layer.build(child_input_shape)
|
||||
if not self.layer.built:
|
||||
self.layer.build(child_input_shape)
|
||||
self.layer.built = True
|
||||
super(TimeDistributed, self).build()
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
@@ -121,11 +124,11 @@ class TimeDistributed(Wrapper):
|
||||
# no batch size specified, therefore the layer will be able
|
||||
# to process batches of any size
|
||||
# we can go with reshape-based implementation for performance
|
||||
X = K.reshape(X, (-1, ) + input_shape[2:]) # (nb_samples * timesteps, ...)
|
||||
y = self.layer.call(X) # (nb_samples * timesteps, ...)
|
||||
input_length = input_shape[1]
|
||||
if not input_length:
|
||||
input_length = K.shape(X)[1]
|
||||
X = K.reshape(X, (-1, ) + input_shape[2:]) # (nb_samples * timesteps, ...)
|
||||
y = self.layer.call(X) # (nb_samples * timesteps, ...)
|
||||
# (nb_samples, timesteps, ...)
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
y = K.reshape(y, (-1, input_length) + output_shape[2:])
|
||||
|
||||
@@ -473,6 +473,8 @@ class Graph(Model):
|
||||
x = self._get_x(data)
|
||||
output_list = super(Graph, self).predict(x, batch_size=batch_size,
|
||||
verbose=verbose)
|
||||
if not isinstance(output_list, list):
|
||||
output_list = [output_list]
|
||||
return dict(zip(self._graph_outputs, output_list))
|
||||
|
||||
def train_on_batch(self, data,
|
||||
@@ -528,12 +530,15 @@ class Graph(Model):
|
||||
|
||||
def predict_on_batch(self, data):
|
||||
output_list = super(Graph, self).predict_on_batch(data)
|
||||
if not isinstance(output_list, list):
|
||||
output_list = [output_list]
|
||||
return dict(zip(self._graph_outputs, output_list))
|
||||
|
||||
def fit_generator(self, generator, samples_per_epoch, nb_epoch,
|
||||
verbose=1, callbacks=[],
|
||||
validation_data=None, nb_val_samples=None,
|
||||
class_weight={}, **kwargs):
|
||||
class_weight={},
|
||||
max_q_size=10, **kwargs):
|
||||
'''Fits a model on data generated batch-by-batch by a Python generator.
|
||||
The generator is run in parallel to the model, for efficiency.
|
||||
For instance, this allows you to do real-time data augmentation
|
||||
@@ -641,13 +646,14 @@ class Graph(Model):
|
||||
callbacks=callbacks,
|
||||
validation_data=validation_data,
|
||||
nb_val_samples=nb_val_samples,
|
||||
class_weight=class_weight)
|
||||
class_weight=class_weight,
|
||||
max_q_size=max_q_size)
|
||||
self.train_on_batch = self._train_on_batch
|
||||
self.evaluate = self._evaluate
|
||||
return history
|
||||
|
||||
def evaluate_generator(self, generator, val_samples,
|
||||
verbose=1, **kwargs):
|
||||
verbose=1, max_q_size=10, **kwargs):
|
||||
'''Evaluates the model on a generator. The generator should
|
||||
return the same kind of data with every yield as accepted
|
||||
by `evaluate`.
|
||||
@@ -700,7 +706,8 @@ class Graph(Model):
|
||||
|
||||
generator = fixed_generator()
|
||||
history = super(Graph, self).evaluate_generator(generator,
|
||||
val_samples)
|
||||
val_samples,
|
||||
max_q_size=max_q_size)
|
||||
self.test_on_batch = self._test_on_batch
|
||||
return history
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import numpy as np
|
||||
from . import backend as K
|
||||
|
||||
|
||||
@@ -8,3 +9,76 @@ def binary_accuracy(y_true, y_pred):
|
||||
def categorical_accuracy(y_true, y_pred):
|
||||
return K.mean(K.equal(K.argmax(y_true, axis=-1),
|
||||
K.argmax(y_pred, axis=-1)))
|
||||
|
||||
|
||||
def sparse_categorical_accuracy(y_true, y_pred):
|
||||
return K.mean(K.equal(K.max(y_true, axis=-1),
|
||||
K.cast(K.argmax(y_pred, axis=-1), K.floatx())))
|
||||
|
||||
|
||||
def mean_squared_error(y_true, y_pred):
|
||||
return K.mean(K.square(y_pred - y_true))
|
||||
|
||||
|
||||
def mean_absolute_error(y_true, y_pred):
|
||||
return K.mean(K.abs(y_pred - y_true))
|
||||
|
||||
|
||||
def mean_absolute_percentage_error(y_true, y_pred):
|
||||
diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), np.inf))
|
||||
return 100. * K.mean(diff)
|
||||
|
||||
|
||||
def mean_squared_logarithmic_error(y_true, y_pred):
|
||||
first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.)
|
||||
second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.)
|
||||
return K.mean(K.square(first_log - second_log))
|
||||
|
||||
|
||||
def squared_hinge(y_true, y_pred):
|
||||
return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)))
|
||||
|
||||
|
||||
def hinge(y_true, y_pred):
|
||||
return K.mean(K.maximum(1. - y_true * y_pred, 0.))
|
||||
|
||||
|
||||
def categorical_crossentropy(y_true, y_pred):
|
||||
'''Expects a binary class matrix instead of a vector of scalar classes.
|
||||
'''
|
||||
return K.mean(K.categorical_crossentropy(y_pred, y_true))
|
||||
|
||||
|
||||
def sparse_categorical_crossentropy(y_true, y_pred):
|
||||
'''expects an array of integer classes.
|
||||
Note: labels shape must have the same number of dimensions as output shape.
|
||||
If you get a shape error, add a length-1 dimension to labels.
|
||||
'''
|
||||
return K.mean(K.sparse_categorical_crossentropy(y_pred, y_true))
|
||||
|
||||
|
||||
def binary_crossentropy(y_true, y_pred):
|
||||
return K.mean(K.binary_crossentropy(y_pred, y_true))
|
||||
|
||||
|
||||
def poisson(y_true, y_pred):
|
||||
return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()))
|
||||
|
||||
|
||||
def cosine_proximity(y_true, y_pred):
|
||||
y_true = K.l2_normalize(y_true, axis=-1)
|
||||
y_pred = K.l2_normalize(y_pred, axis=-1)
|
||||
return -K.mean(y_true * y_pred)
|
||||
|
||||
|
||||
# aliases
|
||||
mse = MSE = mean_squared_error
|
||||
mae = MAE = mean_absolute_error
|
||||
mape = MAPE = mean_absolute_percentage_error
|
||||
msle = MSLE = mean_squared_logarithmic_error
|
||||
cosine = cosine_proximity
|
||||
|
||||
|
||||
from .utils.generic_utils import get_from_module
|
||||
def get(identifier):
|
||||
return get_from_module(identifier, globals(), 'metric')
|
||||
|
||||
+38
-13
@@ -8,11 +8,19 @@ from .engine.topology import get_source_inputs, Node
|
||||
from .legacy.models import Graph
|
||||
|
||||
|
||||
def model_from_config(config, custom_objects={}):
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
if isinstance(config, list):
|
||||
raise Exception('model_fom_config expects a dictionary.'
|
||||
'To load an old-style config use the appropiate'
|
||||
'`load_config` method on Sequential or Graph')
|
||||
return layer_from_config(config, custom_objects=custom_objects)
|
||||
|
||||
|
||||
def model_from_yaml(yaml_string, custom_objects={}):
|
||||
'''Parses a yaml model configuration file
|
||||
and returns a model instance.
|
||||
'''
|
||||
# TODO: legacy support?
|
||||
import yaml
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
config = yaml.load(yaml_string)
|
||||
@@ -23,7 +31,6 @@ def model_from_json(json_string, custom_objects={}):
|
||||
'''Parses a JSON model configuration file
|
||||
and returns a model instance.
|
||||
'''
|
||||
# TODO: legacy support?
|
||||
import json
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
config = json.loads(json_string)
|
||||
@@ -178,6 +185,9 @@ class Sequential(Model):
|
||||
self.output_names = self.model.output_names
|
||||
self.input_names = self.model.input_names
|
||||
|
||||
# make sure child model callbacks will call the parent Sequential model:
|
||||
self.model.callback_model = self
|
||||
|
||||
self.built = True
|
||||
|
||||
@property
|
||||
@@ -312,7 +322,7 @@ class Sequential(Model):
|
||||
model.add(Dense(10, activation='softmax'))
|
||||
model.compile(optimizer='rmsprop',
|
||||
loss='categorical_crossentropy',
|
||||
metrics=['acccuracy'])
|
||||
metrics=['accuracy'])
|
||||
```
|
||||
'''
|
||||
# create the underlying model
|
||||
@@ -375,6 +385,8 @@ class Sequential(Model):
|
||||
at successive epochs, as well as validation loss values
|
||||
and validation metrics values (if applicable).
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
@@ -414,6 +426,8 @@ class Sequential(Model):
|
||||
The attribute `model.metrics_names` will give you
|
||||
the display labels for the scalar outputs.
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
@@ -441,6 +455,8 @@ class Sequential(Model):
|
||||
# Returns
|
||||
A Numpy array of predictions.
|
||||
'''
|
||||
if self.model is None:
|
||||
self.build()
|
||||
return self.model.predict(x, batch_size=batch_size, verbose=verbose)
|
||||
|
||||
def predict_on_batch(self, x):
|
||||
@@ -552,8 +568,7 @@ class Sequential(Model):
|
||||
def fit_generator(self, generator, samples_per_epoch, nb_epoch,
|
||||
verbose=1, callbacks=[],
|
||||
validation_data=None, nb_val_samples=None,
|
||||
class_weight=None,
|
||||
**kwargs):
|
||||
class_weight=None, max_q_size=10, **kwargs):
|
||||
'''Fits the model on data generated batch-by-batch by
|
||||
a Python generator.
|
||||
The generator is run in parallel to the model, for efficiency.
|
||||
@@ -583,6 +598,7 @@ class Sequential(Model):
|
||||
at the end of every epoch.
|
||||
class_weight: dictionary mapping class indices to a weight
|
||||
for the class.
|
||||
max_q_size: maximum size for the generator queue
|
||||
|
||||
# Returns
|
||||
A `History` object.
|
||||
@@ -604,6 +620,8 @@ class Sequential(Model):
|
||||
samples_per_epoch=10000, nb_epoch=10)
|
||||
```
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
@@ -629,10 +647,10 @@ class Sequential(Model):
|
||||
callbacks=callbacks,
|
||||
validation_data=validation_data,
|
||||
nb_val_samples=nb_val_samples,
|
||||
class_weight=class_weight)
|
||||
class_weight=class_weight,
|
||||
max_q_size=max_q_size)
|
||||
|
||||
def evaluate_generator(self, generator, val_samples,
|
||||
**kwargs):
|
||||
def evaluate_generator(self, generator, val_samples, max_q_size=10, **kwargs):
|
||||
'''Evaluates the model on a data generator. The generator should
|
||||
return the same kind of data as accepted by `test_on_batch`.
|
||||
|
||||
@@ -643,7 +661,10 @@ class Sequential(Model):
|
||||
val_samples:
|
||||
total number of samples to generate from `generator`
|
||||
before returning.
|
||||
max_q_size: maximum size for the generator queue
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
@@ -658,9 +679,10 @@ class Sequential(Model):
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.evaluate_generator(generator,
|
||||
val_samples)
|
||||
val_samples,
|
||||
max_q_size=max_q_size)
|
||||
|
||||
def predict_generator(self, generator, val_samples):
|
||||
def predict_generator(self, generator, val_samples, max_q_size=10):
|
||||
'''Generates predictions for the input samples from a data generator.
|
||||
The generator should return the same kind of data as accepted by
|
||||
`predict_on_batch`.
|
||||
@@ -669,16 +691,19 @@ class Sequential(Model):
|
||||
generator: generator yielding batches of input samples.
|
||||
val_samples: total number of samples to generate from `generator`
|
||||
before returning.
|
||||
max_q_size: maximum size for the generator queue
|
||||
|
||||
# Returns
|
||||
A Numpy array of predictions.
|
||||
'''
|
||||
|
||||
return self.model.predict_generator(generator, val_samples)
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
return self.model.predict_generator(generator, val_samples,
|
||||
max_q_size=max_q_size)
|
||||
|
||||
def get_config(self):
|
||||
'''Returns the model configuration
|
||||
as a Python dictionary.
|
||||
as a Python list.
|
||||
'''
|
||||
config = []
|
||||
if self.layers[0].__class__.__name__ == 'Merge':
|
||||
|
||||
@@ -37,7 +37,9 @@ def categorical_crossentropy(y_true, y_pred):
|
||||
|
||||
|
||||
def sparse_categorical_crossentropy(y_true, y_pred):
|
||||
'''expects a 1-D or 2-D array of integer classes.
|
||||
'''expects an array of integer classes.
|
||||
Note: labels shape must have the same number of dimensions as output shape.
|
||||
If you get a shape error, add a length-1 dimension to labels.
|
||||
'''
|
||||
return K.sparse_categorical_crossentropy(y_pred, y_true)
|
||||
|
||||
|
||||
+57
-40
@@ -29,6 +29,11 @@ class Optimizer(object):
|
||||
when their absolute value exceeds this value.
|
||||
'''
|
||||
def __init__(self, **kwargs):
|
||||
allowed_kwargs = {'clipnorm', 'clipvalue'}
|
||||
for k in kwargs:
|
||||
if k not in allowed_kwargs:
|
||||
raise Exception('Unexpected keyword argument '
|
||||
'passed to optimizer: ' + str(k))
|
||||
self.__dict__.update(kwargs)
|
||||
self.updates = []
|
||||
self.weights = []
|
||||
@@ -89,7 +94,12 @@ class Optimizer(object):
|
||||
return weights
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__}
|
||||
config = {'name': self.__class__.__name__}
|
||||
if hasattr(self, 'clipnorm'):
|
||||
config['clipnorm'] = self.clipnorm
|
||||
if hasattr(self, 'clipvalue'):
|
||||
config['clipvalue'] = self.clipvalue
|
||||
return config
|
||||
|
||||
|
||||
class SGD(Optimizer):
|
||||
@@ -102,8 +112,8 @@ class SGD(Optimizer):
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
nesterov: boolean. Whether to apply Nesterov momentum.
|
||||
'''
|
||||
def __init__(self, lr=0.01, momentum=0., decay=0., nesterov=False,
|
||||
*args, **kwargs):
|
||||
def __init__(self, lr=0.01, momentum=0., decay=0.,
|
||||
nesterov=False, **kwargs):
|
||||
super(SGD, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0.)
|
||||
@@ -116,8 +126,9 @@ class SGD(Optimizer):
|
||||
lr = self.lr * (1. / (1. + self.decay * self.iterations))
|
||||
self.updates = [(self.iterations, self.iterations + 1.)]
|
||||
|
||||
for p, g in zip(params, grads):
|
||||
m = K.variable(np.zeros(K.get_value(p).shape)) # momentum
|
||||
# momentum
|
||||
self.weights = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
for p, g, m in zip(params, grads, self.weights):
|
||||
v = self.momentum * m - lr * g # velocity
|
||||
self.updates.append((m, v))
|
||||
|
||||
@@ -134,11 +145,12 @@ class SGD(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"momentum": float(K.get_value(self.momentum)),
|
||||
"decay": float(K.get_value(self.decay)),
|
||||
"nesterov": self.nesterov}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'momentum': float(K.get_value(self.momentum)),
|
||||
'decay': float(K.get_value(self.decay)),
|
||||
'nesterov': self.nesterov}
|
||||
base_config = super(SGD, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class RMSprop(Optimizer):
|
||||
@@ -156,7 +168,7 @@ class RMSprop(Optimizer):
|
||||
rho: float >= 0.
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
'''
|
||||
def __init__(self, lr=0.001, rho=0.9, epsilon=1e-6, *args, **kwargs):
|
||||
def __init__(self, lr=0.001, rho=0.9, epsilon=1e-8, **kwargs):
|
||||
super(RMSprop, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
@@ -172,7 +184,7 @@ class RMSprop(Optimizer):
|
||||
# update accumulator
|
||||
new_a = self.rho * a + (1. - self.rho) * K.square(g)
|
||||
self.updates.append((a, new_a))
|
||||
new_p = p - self.lr * g / K.sqrt(new_a + self.epsilon)
|
||||
new_p = p - self.lr * g / (K.sqrt(new_a) + self.epsilon)
|
||||
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
@@ -182,10 +194,11 @@ class RMSprop(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"rho": float(K.get_value(self.rho)),
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'rho': float(K.get_value(self.rho)),
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(RMSprop, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Adagrad(Optimizer):
|
||||
@@ -198,7 +211,7 @@ class Adagrad(Optimizer):
|
||||
lr: float >= 0. Learning rate.
|
||||
epsilon: float >= 0.
|
||||
'''
|
||||
def __init__(self, lr=0.01, epsilon=1e-6, *args, **kwargs):
|
||||
def __init__(self, lr=0.01, epsilon=1e-8, **kwargs):
|
||||
super(Adagrad, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
@@ -212,7 +225,7 @@ class Adagrad(Optimizer):
|
||||
for p, g, a in zip(params, grads, self.weights):
|
||||
new_a = a + K.square(g) # update accumulator
|
||||
self.updates.append((a, new_a))
|
||||
new_p = p - self.lr * g / K.sqrt(new_a + self.epsilon)
|
||||
new_p = p - self.lr * g / (K.sqrt(new_a) + self.epsilon)
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
@@ -221,9 +234,10 @@ class Adagrad(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(Adagrad, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Adadelta(Optimizer):
|
||||
@@ -241,7 +255,7 @@ class Adadelta(Optimizer):
|
||||
# References
|
||||
- [Adadelta - an adaptive learning rate method](http://arxiv.org/abs/1212.5701)
|
||||
'''
|
||||
def __init__(self, lr=1.0, rho=0.95, epsilon=1e-6, *args, **kwargs):
|
||||
def __init__(self, lr=1.0, rho=0.95, epsilon=1e-8, **kwargs):
|
||||
super(Adadelta, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
@@ -274,10 +288,11 @@ class Adadelta(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"rho": self.rho,
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'rho': self.rho,
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(Adadelta, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Adam(Optimizer):
|
||||
@@ -293,8 +308,8 @@ class Adam(Optimizer):
|
||||
# References
|
||||
- [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
|
||||
'''
|
||||
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
|
||||
*args, **kwargs):
|
||||
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, **kwargs):
|
||||
super(Adam, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0)
|
||||
@@ -330,11 +345,12 @@ class Adam(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"beta_1": float(K.get_value(self.beta_1)),
|
||||
"beta_2": float(K.get_value(self.beta_2)),
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'beta_1': float(K.get_value(self.beta_1)),
|
||||
'beta_2': float(K.get_value(self.beta_2)),
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(Adam, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Adamax(Optimizer):
|
||||
@@ -351,8 +367,8 @@ class Adamax(Optimizer):
|
||||
# References
|
||||
- [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
|
||||
'''
|
||||
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
|
||||
*args, **kwargs):
|
||||
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, **kwargs):
|
||||
super(Adamax, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0.)
|
||||
@@ -391,11 +407,12 @@ class Adamax(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"beta_1": float(K.get_value(self.beta_1)),
|
||||
"beta_2": float(K.get_value(self.beta_2)),
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'beta_1': float(K.get_value(self.beta_1)),
|
||||
'beta_2': float(K.get_value(self.beta_2)),
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(Adamax, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
# aliases
|
||||
|
||||
+227
-93
@@ -6,49 +6,70 @@ from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import re
|
||||
from scipy import ndimage
|
||||
from scipy import linalg
|
||||
|
||||
from os import listdir
|
||||
from os.path import isfile, join
|
||||
import math
|
||||
import scipy.ndimage as ndi
|
||||
from six.moves import range
|
||||
import os
|
||||
import threading
|
||||
|
||||
|
||||
def random_rotation(x, rg, fill_mode='nearest', cval=0.):
|
||||
angle = np.random.uniform(-rg, rg)
|
||||
x = ndimage.interpolation.rotate(x, angle,
|
||||
axes=(1, 2),
|
||||
reshape=False,
|
||||
mode=fill_mode,
|
||||
cval=cval)
|
||||
def random_rotation(x, rg, row_index=1, col_index=2, channel_index=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
theta = np.pi / 180 * np.random.uniform(-rg, rg)
|
||||
rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
|
||||
[np.sin(theta), np.cos(theta), 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_shift(x, wrg, hrg, fill_mode='nearest', cval=0.):
|
||||
shift_x = shift_y = 0
|
||||
def random_shift(x, wrg, hrg, row_index=1, col_index=2, channel_index=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
tx = np.random.uniform(-hrg, hrg) * h
|
||||
ty = np.random.uniform(-wrg, wrg) * w
|
||||
translation_matrix = np.array([[1, 0, tx],
|
||||
[0, 1, ty],
|
||||
[0, 0, 1]])
|
||||
|
||||
if wrg:
|
||||
shift_x = np.random.uniform(-wrg, wrg) * x.shape[2]
|
||||
if hrg:
|
||||
shift_y = np.random.uniform(-hrg, hrg) * x.shape[1]
|
||||
x = ndimage.interpolation.shift(x, (0, shift_y, shift_x),
|
||||
order=0,
|
||||
mode=fill_mode,
|
||||
cval=cval)
|
||||
transform_matrix = translation_matrix # no need to do offset
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def horizontal_flip(x):
|
||||
for i in range(x.shape[0]):
|
||||
x[i] = np.fliplr(x[i])
|
||||
def random_shear(x, intensity, row_index=1, col_index=2, channel_index=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
shear = np.random.uniform(-intensity, intensity)
|
||||
shear_matrix = np.array([[1, -np.sin(shear), 0],
|
||||
[0, np.cos(shear), 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def vertical_flip(x):
|
||||
for i in range(x.shape[0]):
|
||||
x[i] = np.flipud(x[i])
|
||||
def random_zoom(x, zoom_range, row_index=1, col_index=2, channel_index=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
if len(zoom_range) != 2:
|
||||
raise Exception('zoom_range should be a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
|
||||
if zoom_range[0] == 1 and zoom_range[1] == 1:
|
||||
zx, zy = 1, 1
|
||||
else:
|
||||
zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2)
|
||||
zoom_matrix = np.array([[zx, 0, 0],
|
||||
[0, zy, 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
@@ -57,35 +78,47 @@ def random_barrel_transform(x, intensity):
|
||||
pass
|
||||
|
||||
|
||||
def random_shear(x, intensity, fill_mode='nearest', cval=0.):
|
||||
shear = np.random.uniform(-intensity, intensity)
|
||||
shear_matrix = np.array([[1.0, -math.sin(shear), 0.0],
|
||||
[0.0, math.cos(shear), 0.0],
|
||||
[0.0, 0.0, 1.0]])
|
||||
x = ndimage.interpolation.affine_transform(x, shear_matrix,
|
||||
mode=fill_mode,
|
||||
order=3,
|
||||
cval=cval)
|
||||
def random_channel_shift(x, intensity, channel_index=0):
|
||||
x = np.rollaxis(x, channel_index, 0)
|
||||
min_x, max_x = np.min(x), np.max(x)
|
||||
channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x)
|
||||
for x_channel in x]
|
||||
x = np.stack(channel_images, axis=0)
|
||||
x = np.rollaxis(x, 0, channel_index+1)
|
||||
return x
|
||||
|
||||
|
||||
def random_channel_shift(x, rg):
|
||||
# TODO
|
||||
pass
|
||||
def transform_matrix_offset_center(matrix, x, y):
|
||||
o_x = float(x) / 2 + 0.5
|
||||
o_y = float(y) / 2 + 0.5
|
||||
offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
|
||||
reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
|
||||
transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
|
||||
return transform_matrix
|
||||
|
||||
|
||||
def random_zoom(x, rg, fill_mode='nearest', cval=0.):
|
||||
zoom_w = np.random.uniform(1.-rg, 1.)
|
||||
zoom_h = np.random.uniform(1.-rg, 1.)
|
||||
x = ndimage.interpolation.zoom(x, zoom=(1., zoom_w, zoom_h),
|
||||
mode=fill_mode,
|
||||
cval=cval)
|
||||
return x # shape of result will be different from shape of input!
|
||||
def apply_transform(x, transform_matrix, channel_index=0, fill_mode='nearest', cval=0.):
|
||||
x = np.rollaxis(x, channel_index, 0)
|
||||
final_affine_matrix = transform_matrix[:2, :2]
|
||||
final_offset = transform_matrix[:2, 2]
|
||||
channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
|
||||
final_offset, order=0, mode=fill_mode, cval=cval) for x_channel in x]
|
||||
x = np.stack(channel_images, axis=0)
|
||||
x = np.rollaxis(x, 0, channel_index+1)
|
||||
return x
|
||||
|
||||
|
||||
def array_to_img(x, scale=True):
|
||||
def flip_axis(x, axis):
|
||||
x = np.asarray(x).swapaxes(axis, 0)
|
||||
x = x[::-1, ...]
|
||||
x = x.swapaxes(0, axis)
|
||||
return x
|
||||
|
||||
|
||||
def array_to_img(x, dim_ordering='th', scale=True):
|
||||
from PIL import Image
|
||||
x = x.transpose(1, 2, 0)
|
||||
if dim_ordering == 'th':
|
||||
x = x.transpose(1, 2, 0)
|
||||
if scale:
|
||||
x += max(-np.min(x), 0)
|
||||
x /= np.max(x)
|
||||
@@ -93,19 +126,29 @@ def array_to_img(x, scale=True):
|
||||
if x.shape[2] == 3:
|
||||
# RGB
|
||||
return Image.fromarray(x.astype('uint8'), 'RGB')
|
||||
else:
|
||||
elif x.shape[2] == 1:
|
||||
# grayscale
|
||||
return Image.fromarray(x[:, :, 0].astype('uint8'), 'L')
|
||||
else:
|
||||
raise Exception('Unsupported channel number: ', x.shape[2])
|
||||
|
||||
|
||||
def img_to_array(img):
|
||||
# only used by tests/keras/preprocessing/test_image.py to convert PIL.Image to numpy array
|
||||
def img_to_array(img, dim_ordering='th'):
|
||||
if dim_ordering not in ['th', 'tf']:
|
||||
raise Exception('Unknown dim_ordering: ', dim_ordering)
|
||||
# image has dim_ordering (height, width, channel)
|
||||
x = np.asarray(img, dtype='float32')
|
||||
if len(x.shape) == 3:
|
||||
# RGB: height, width, channel -> channel, height, width
|
||||
x = x.transpose(2, 0, 1)
|
||||
if dim_ordering == 'th':
|
||||
x = x.transpose(2, 0, 1)
|
||||
elif len(x.shape) == 2:
|
||||
if dim_ordering == 'th':
|
||||
x = x.reshape((1, x.shape[0], x.shape[1]))
|
||||
else:
|
||||
x = x.reshape((x.shape[0], x.shape[1], 1))
|
||||
else:
|
||||
# grayscale: height, width -> channel, height, width
|
||||
x = x.reshape((1, x.shape[0], x.shape[1]))
|
||||
raise Exception('Unsupported image shape: ', x.shape)
|
||||
return x
|
||||
|
||||
|
||||
@@ -120,8 +163,8 @@ def load_img(path, grayscale=False):
|
||||
|
||||
|
||||
def list_pictures(directory, ext='jpg|jpeg|bmp|png'):
|
||||
return [join(directory, f) for f in listdir(directory)
|
||||
if isfile(join(directory, f)) and re.match('([\w]+\.(?:' + ext + '))', f)]
|
||||
return [os.path.join(directory, f) for f in os.listdir(directory)
|
||||
if os.path.isfile(os.path.join(directory, f)) and re.match('([\w]+\.(?:' + ext + '))', f)]
|
||||
|
||||
|
||||
class ImageDataGenerator(object):
|
||||
@@ -138,51 +181,92 @@ class ImageDataGenerator(object):
|
||||
width_shift_range: fraction of total width.
|
||||
height_shift_range: fraction of total height.
|
||||
shear_range: shear intensity (shear angle in radians).
|
||||
zoom_range: amount of zoom. if scalar z, zoom will be randomly picked
|
||||
in the range [1-z, 1+z]. A sequence of two can be passed instead
|
||||
to select this range.
|
||||
channel_shift_range: shift range for each channels.
|
||||
fill_mode: points outside the boundaries are filled according to the
|
||||
given mode ('constant', 'nearest', 'reflect' or 'wrap'). Default
|
||||
is 'nearest'.
|
||||
cval: value used for points outside the boundaries when fill_mode is
|
||||
'constant'. Default is 0.
|
||||
horizontal_flip: whether to randomly flip images horizontally.
|
||||
vertical_flip: whether to randomly flip images vertically.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode it is at index 3.
|
||||
'''
|
||||
def __init__(self,
|
||||
featurewise_center=True,
|
||||
featurewise_center=False,
|
||||
samplewise_center=False,
|
||||
featurewise_std_normalization=True,
|
||||
featurewise_std_normalization=False,
|
||||
samplewise_std_normalization=False,
|
||||
zca_whitening=False,
|
||||
rotation_range=0.,
|
||||
width_shift_range=0.,
|
||||
height_shift_range=0.,
|
||||
shear_range=0.,
|
||||
zoom_range=0.,
|
||||
channel_shift_range=0.,
|
||||
fill_mode='nearest',
|
||||
cval=0.,
|
||||
horizontal_flip=False,
|
||||
vertical_flip=False):
|
||||
vertical_flip=False,
|
||||
dim_ordering='th'):
|
||||
self.__dict__.update(locals())
|
||||
self.mean = None
|
||||
self.std = None
|
||||
self.principal_components = None
|
||||
self.lock = threading.Lock()
|
||||
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise Exception('dim_ordering should be "tf" (channel after row and '
|
||||
'column) or "th" (channel before row and column). '
|
||||
'Received arg: ', dim_ordering)
|
||||
self.dim_ordering = dim_ordering
|
||||
if dim_ordering == "th":
|
||||
self.channel_index = 1
|
||||
self.row_index = 2
|
||||
self.col_index = 3
|
||||
if dim_ordering == "tf":
|
||||
self.channel_index = 3
|
||||
self.row_index = 1
|
||||
self.col_index = 2
|
||||
|
||||
if np.isscalar(zoom_range):
|
||||
self.zoom_range = [1 - zoom_range, 1 + zoom_range]
|
||||
elif len(zoom_range) == 2:
|
||||
self.zoom_range = [zoom_range[0], zoom_range[1]]
|
||||
else:
|
||||
raise Exception('zoom_range should be a float or '
|
||||
'a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
|
||||
self.batch_index = 0
|
||||
self.total_batches_seen = 0
|
||||
|
||||
def reset(self):
|
||||
self.batch_index = 0
|
||||
|
||||
def _flow_index(self, N, batch_size=32, shuffle=False, seed=None):
|
||||
b = 0
|
||||
total_b = 0
|
||||
# ensure self.batch_index is 0
|
||||
self.reset()
|
||||
|
||||
while 1:
|
||||
if b == 0:
|
||||
if seed is not None:
|
||||
np.random.seed(seed + total_b)
|
||||
|
||||
if self.batch_index == 0:
|
||||
index_array = np.arange(N)
|
||||
if shuffle:
|
||||
if seed is not None:
|
||||
np.random.seed(seed + self.total_batches_seen)
|
||||
index_array = np.random.permutation(N)
|
||||
else:
|
||||
index_array = np.arange(N)
|
||||
|
||||
current_index = (b * batch_size) % N
|
||||
current_index = (self.batch_index * batch_size) % N
|
||||
if N >= current_index + batch_size:
|
||||
current_batch_size = batch_size
|
||||
self.batch_index += 1
|
||||
else:
|
||||
current_batch_size = N - current_index
|
||||
|
||||
if current_batch_size == batch_size:
|
||||
b += 1
|
||||
else:
|
||||
b = 0
|
||||
total_b += 1
|
||||
self.batch_index = 0
|
||||
self.total_batches_seen += 1
|
||||
yield (index_array[current_index: current_index + current_batch_size],
|
||||
current_index, current_batch_size)
|
||||
|
||||
@@ -194,6 +278,7 @@ class ImageDataGenerator(object):
|
||||
self.save_to_dir = save_to_dir
|
||||
self.save_prefix = save_prefix
|
||||
self.save_format = save_format
|
||||
self.reset()
|
||||
self.flow_generator = self._flow_index(X.shape[0], batch_size,
|
||||
shuffle, seed)
|
||||
return self
|
||||
@@ -219,8 +304,11 @@ class ImageDataGenerator(object):
|
||||
bX[i] = x
|
||||
if self.save_to_dir:
|
||||
for i in range(current_batch_size):
|
||||
img = array_to_img(bX[i], scale=True)
|
||||
img.save(self.save_to_dir + '/' + self.save_prefix + '_' + str(current_index + i) + '.' + self.save_format)
|
||||
img = array_to_img(bX[i], self.dim_ordering, scale=True)
|
||||
fname = '{prefix}_{index}.{format}'.format(prefix=self.save_prefix,
|
||||
index=current_index + i,
|
||||
format=self.save_format)
|
||||
img.save(os.path.join(self.save_to_dir, fname))
|
||||
bY = self.y[index_array]
|
||||
return bX, bY
|
||||
|
||||
@@ -229,10 +317,12 @@ class ImageDataGenerator(object):
|
||||
return self.next()
|
||||
|
||||
def standardize(self, x):
|
||||
# x is a single image, so it doesn't have image number at index 0
|
||||
img_channel_index = self.channel_index - 1
|
||||
if self.samplewise_center:
|
||||
x -= np.mean(x, axis=1, keepdims=True)
|
||||
x -= np.mean(x, axis=img_channel_index, keepdims=True)
|
||||
if self.samplewise_std_normalization:
|
||||
x /= (np.std(x, axis=1, keepdims=True) + 1e-7)
|
||||
x /= (np.std(x, axis=img_channel_index, keepdims=True) + 1e-7)
|
||||
|
||||
if self.featurewise_center:
|
||||
x -= self.mean
|
||||
@@ -240,29 +330,75 @@ class ImageDataGenerator(object):
|
||||
x /= (self.std + 1e-7)
|
||||
|
||||
if self.zca_whitening:
|
||||
flatx = np.reshape(x, (x.shape[0] * x.shape[1] * x.shape[2]))
|
||||
flatx = np.reshape(x, (x.size))
|
||||
whitex = np.dot(flatx, self.principal_components)
|
||||
x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2]))
|
||||
|
||||
return x
|
||||
|
||||
def random_transform(self, x):
|
||||
# x is a single image, so it doesn't have image number at index 0
|
||||
img_row_index = self.row_index - 1
|
||||
img_col_index = self.col_index - 1
|
||||
img_channel_index = self.channel_index - 1
|
||||
|
||||
# use composition of homographies to generate final transform that needs to be applied
|
||||
if self.rotation_range:
|
||||
x = random_rotation(x, self.rotation_range)
|
||||
if self.width_shift_range or self.height_shift_range:
|
||||
x = random_shift(x, self.width_shift_range, self.height_shift_range)
|
||||
theta = np.pi / 180 * np.random.uniform(-self.rotation_range, self.rotation_range)
|
||||
else:
|
||||
theta = 0
|
||||
rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
|
||||
[np.sin(theta), np.cos(theta), 0],
|
||||
[0, 0, 1]])
|
||||
if self.height_shift_range:
|
||||
tx = np.random.uniform(-self.height_shift_range, self.height_shift_range) * x.shape[img_row_index]
|
||||
else:
|
||||
tx = 0
|
||||
|
||||
if self.width_shift_range:
|
||||
ty = np.random.uniform(-self.width_shift_range, self.width_shift_range) * x.shape[img_col_index]
|
||||
else:
|
||||
ty = 0
|
||||
|
||||
translation_matrix = np.array([[1, 0, tx],
|
||||
[0, 1, ty],
|
||||
[0, 0, 1]])
|
||||
if self.shear_range:
|
||||
shear = np.random.uniform(-self.shear_range, self.shear_range)
|
||||
else:
|
||||
shear = 0
|
||||
shear_matrix = np.array([[1, -np.sin(shear), 0],
|
||||
[0, np.cos(shear), 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
if self.zoom_range[0] == 1 and self.zoom_range[1] == 1:
|
||||
zx, zy = 1, 1
|
||||
else:
|
||||
zx, zy = np.random.uniform(self.zoom_range[0], self.zoom_range[1], 2)
|
||||
zoom_matrix = np.array([[zx, 0, 0],
|
||||
[0, zy, 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
transform_matrix = np.dot(np.dot(np.dot(rotation_matrix, translation_matrix), shear_matrix), zoom_matrix)
|
||||
|
||||
h, w = x.shape[img_row_index], x.shape[img_col_index]
|
||||
transform_matrix = transform_matrix_offset_center(transform_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, img_channel_index,
|
||||
fill_mode=self.fill_mode, cval=self.cval)
|
||||
if self.channel_shift_range != 0:
|
||||
x = random_channel_shift(x, self.channel_shift_range, img_channel_index)
|
||||
|
||||
if self.horizontal_flip:
|
||||
if np.random.random() < 0.5:
|
||||
x = horizontal_flip(x)
|
||||
x = flip_axis(x, img_col_index)
|
||||
|
||||
if self.vertical_flip:
|
||||
if np.random.random() < 0.5:
|
||||
x = vertical_flip(x)
|
||||
if self.shear_range:
|
||||
x = random_shear(x, self.shear_range)
|
||||
x = flip_axis(x, img_row_index)
|
||||
|
||||
# TODO:
|
||||
# zoom
|
||||
# channel-wise normalization
|
||||
# barrel/fisheye
|
||||
# channel shifting
|
||||
return x
|
||||
|
||||
def fit(self, X,
|
||||
@@ -284,14 +420,13 @@ class ImageDataGenerator(object):
|
||||
aX = np.zeros(tuple([rounds * X.shape[0]] + list(X.shape)[1:]))
|
||||
for r in range(rounds):
|
||||
for i in range(X.shape[0]):
|
||||
img = array_to_img(X[i])
|
||||
img = self.random_transform(img)
|
||||
aX[i + r * X.shape[0]] = img_to_array(img)
|
||||
aX[i + r * X.shape[0]] = self.random_transform(X[i])
|
||||
X = aX
|
||||
|
||||
if self.featurewise_center:
|
||||
self.mean = np.mean(X, axis=0)
|
||||
X -= self.mean
|
||||
|
||||
if self.featurewise_std_normalization:
|
||||
self.std = np.std(X, axis=0)
|
||||
X /= (self.std + 1e-7)
|
||||
@@ -306,7 +441,6 @@ class ImageDataGenerator(object):
|
||||
class GraphImageDataGenerator(ImageDataGenerator):
|
||||
'''Example of how to build a generator for a Graph model
|
||||
'''
|
||||
|
||||
def next(self):
|
||||
bX, bY = super(GraphImageDataGenerator, self).next()
|
||||
return {'input': bX, 'output': bY}
|
||||
|
||||
@@ -59,9 +59,11 @@ class ActivityRegularizer(Regularizer):
|
||||
raise Exception('Need to call `set_layer` on '
|
||||
'ActivityRegularizer instance '
|
||||
'before calling the instance.')
|
||||
output = self.layer.output
|
||||
regularized_loss = loss + self.l1 * K.sum(K.mean(K.abs(output), axis=0))
|
||||
regularized_loss += self.l2 * K.sum(K.mean(K.square(output), axis=0))
|
||||
regularized_loss = loss
|
||||
for i in range(len(self.layer.inbound_nodes)):
|
||||
output = self.layer.get_output_at(i)
|
||||
regularized_loss += self.l1 * K.sum(K.mean(K.abs(output), axis=0))
|
||||
regularized_loss += self.l2 * K.sum(K.mean(K.square(output), axis=0))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
|
||||
def get_config(self):
|
||||
|
||||
@@ -73,7 +73,7 @@ def get_file(fname, origin, untar=False):
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
if os.path.exists(fpath):
|
||||
os.remove(fpath)
|
||||
raise e
|
||||
raise
|
||||
progbar = None
|
||||
|
||||
if untar:
|
||||
@@ -88,7 +88,7 @@ def get_file(fname, origin, untar=False):
|
||||
os.remove(untar_fpath)
|
||||
else:
|
||||
shutil.rmtree(untar_fpath)
|
||||
raise e
|
||||
raise
|
||||
tfile.close()
|
||||
return untar_fpath
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@ def to_categorical(y, nb_classes=None):
|
||||
'''Convert class vector (integers from 0 to nb_classes)
|
||||
to binary class matrix, for use with categorical_crossentropy.
|
||||
'''
|
||||
y = np.asarray(y, dtype='int32')
|
||||
if not nb_classes:
|
||||
nb_classes = np.max(y)+1
|
||||
Y = np.zeros((len(y), nb_classes))
|
||||
@@ -51,3 +50,27 @@ def probas_to_classes(y_pred):
|
||||
|
||||
def categorical_probas_to_classes(p):
|
||||
return np.argmax(p, axis=1)
|
||||
|
||||
|
||||
def convert_kernel(kernel, dim_ordering='th'):
|
||||
'''Converts a kernel matrix (numpy array)
|
||||
from Theano format to TensorFlow format
|
||||
(or reciprocally, since the transformation
|
||||
is its own inverse).
|
||||
'''
|
||||
new_kernel = np.copy(kernel)
|
||||
if dim_ordering == 'th':
|
||||
w = kernel.shape[2]
|
||||
h = kernel.shape[3]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
new_kernel[:, :, i, j] = kernel[:, :, w - i - 1, h - j - 1]
|
||||
elif dim_ordering == 'tf':
|
||||
w = kernel.shape[0]
|
||||
h = kernel.shape[1]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
new_kernel[i, j, :, :] = kernel[w - i - 1, h - j - 1, :, :]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + str(dim_ordering))
|
||||
return new_kernel
|
||||
|
||||
@@ -29,14 +29,17 @@ def model_to_dot(model, show_shapes=False):
|
||||
if show_shapes:
|
||||
# Build the label that will actually contain a table with the
|
||||
# input/output
|
||||
outputlabels = str(layer.output_shape)
|
||||
try:
|
||||
outputlabels = str(layer.output_shape)
|
||||
except:
|
||||
outputlabels = 'multiple'
|
||||
if hasattr(layer, 'input_shape'):
|
||||
inputlabels = str(layer.input_shape)
|
||||
elif hasattr(layer, 'input_shapes'):
|
||||
inputlabels = ', '.join(
|
||||
[str(ishape) for ishape in layer.input_shapes])
|
||||
else:
|
||||
inputlabels = ''
|
||||
inputlabels = 'multiple'
|
||||
label = '%s\n|{input:|output:}|{{%s}|{%s}}' % (label, inputlabels, outputlabels)
|
||||
|
||||
node = pydot.Node(layer_id, label=label)
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import absolute_import
|
||||
import copy
|
||||
import inspect
|
||||
import types
|
||||
import numpy as np
|
||||
|
||||
from ..utils.np_utils import to_categorical
|
||||
from ..models import Sequential
|
||||
|
||||
+2
-2
@@ -3,12 +3,12 @@ from setuptools import find_packages
|
||||
|
||||
|
||||
setup(name='Keras',
|
||||
version='1.0.0',
|
||||
version='1.0.3',
|
||||
description='Deep Learning for Python',
|
||||
author='Francois Chollet',
|
||||
author_email='francois.chollet@gmail.com',
|
||||
url='https://github.com/fchollet/keras',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.0.0',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.0.3',
|
||||
license='MIT',
|
||||
install_requires=['theano', 'pyyaml', 'six'],
|
||||
extras_require={
|
||||
|
||||
@@ -23,7 +23,7 @@ def test_temporal_classification():
|
||||
'''
|
||||
np.random.seed(1337)
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
|
||||
nb_test=200,
|
||||
nb_test=500,
|
||||
input_shape=(3, 5),
|
||||
classification=True,
|
||||
nb_class=2)
|
||||
@@ -35,12 +35,12 @@ def test_temporal_classification():
|
||||
input_shape=(X_train.shape[1], X_train.shape[2]),
|
||||
activation='softmax'))
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='adadelta',
|
||||
optimizer='adagrad',
|
||||
metrics=['accuracy'])
|
||||
history = model.fit(X_train, y_train, nb_epoch=5, batch_size=16,
|
||||
history = model.fit(X_train, y_train, nb_epoch=20, batch_size=32,
|
||||
validation_data=(X_test, y_test),
|
||||
verbose=0)
|
||||
assert(history.history['val_acc'][-1] > 0.9)
|
||||
assert(history.history['val_acc'][-1] >= 0.85)
|
||||
|
||||
|
||||
def test_temporal_regression():
|
||||
@@ -182,4 +182,5 @@ def test_masked_temporal():
|
||||
assert(np.abs(history.history['val_loss'][-1] - ground_truth) < 0.06)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
# pytest.main([__file__])
|
||||
test_temporal_classification()
|
||||
|
||||
@@ -5,6 +5,7 @@ import numpy as np
|
||||
|
||||
from keras.backend import theano_backend as KTH
|
||||
from keras.backend import tensorflow_backend as KTF
|
||||
from keras.utils.np_utils import convert_kernel
|
||||
|
||||
|
||||
def check_single_tensor_operation(function_name, input_shape, **kwargs):
|
||||
@@ -22,10 +23,12 @@ def check_single_tensor_operation(function_name, input_shape, **kwargs):
|
||||
def check_two_tensor_operation(function_name, x_input_shape,
|
||||
y_input_shape, **kwargs):
|
||||
xval = np.random.random(x_input_shape) - 0.5
|
||||
|
||||
xth = KTH.variable(xval)
|
||||
xtf = KTF.variable(xval)
|
||||
|
||||
yval = np.random.random(y_input_shape) - 0.5
|
||||
|
||||
yth = KTH.variable(yval)
|
||||
ytf = KTF.variable(yval)
|
||||
|
||||
@@ -88,6 +91,17 @@ class TestBackend(object):
|
||||
assert_allclose(np_rep, th_rep, atol=1e-05)
|
||||
assert_allclose(np_rep, tf_rep, atol=1e-05)
|
||||
|
||||
def test_tile(self):
|
||||
shape = (3, 4)
|
||||
arr = np.arange(np.prod(shape)).reshape(shape)
|
||||
arr_th = KTH.variable(arr)
|
||||
arr_tf = KTF.variable(arr)
|
||||
|
||||
n = (2, 1)
|
||||
th_rep = KTH.eval(KTH.tile(arr_th, n))
|
||||
tf_rep = KTF.eval(KTF.tile(arr_tf, n))
|
||||
assert_allclose(tf_rep, th_rep, atol=1e-05)
|
||||
|
||||
def test_value_manipulation(self):
|
||||
val = np.random.random((4, 2))
|
||||
xth = KTH.variable(val)
|
||||
@@ -369,42 +383,56 @@ class TestBackend(object):
|
||||
check_single_tensor_operation('l2_normalize', (4, 3), axis=-1)
|
||||
check_single_tensor_operation('l2_normalize', (4, 3), axis=1)
|
||||
|
||||
# def test_conv2d(self):
|
||||
# '''conv2d works "properly" with Theano and TF but outputs different
|
||||
# values in each case. Cause unclear (input / kernel shape format?)
|
||||
# '''
|
||||
# # TH kernel shape: (depth, input_depth, rows, cols)
|
||||
# check_two_tensor_operation('conv2d', (5, 3, 10, 12), (4, 3, 2, 2),
|
||||
# strides=(1, 1), border_mode='valid')
|
||||
# check_two_tensor_operation('conv2d', (5, 3, 10, 12), (4, 3, 2, 2),
|
||||
# strides=(1, 1), border_mode='same')
|
||||
def test_conv2d(self):
|
||||
# TH kernel shape: (depth, input_depth, rows, cols)
|
||||
# TF kernel shape: (rows, cols, input_depth, depth)
|
||||
|
||||
# # TF kernel shape: (rows, cols, input_depth, depth)
|
||||
# check_two_tensor_operation('conv2d', (5, 10, 12, 3), (2, 2, 3, 4),
|
||||
# strides=(1, 1), border_mode='valid', dim_ordering='tf')
|
||||
# check_two_tensor_operation('conv2d', (5, 10, 12, 3), (2, 2, 3, 4),
|
||||
# strides=(1, 1), border_mode='same', dim_ordering='tf')
|
||||
for input_shape in [(2, 3, 4, 5), (2, 3, 5, 6)]:
|
||||
for kernel_shape in [(4, 3, 2, 2), (4, 3, 3, 4)]:
|
||||
xval = np.random.random(input_shape)
|
||||
|
||||
# check_two_tensor_operation('conv2d', (5, 3, 10, 12), (4, 3, 3, 3),
|
||||
# strides=(1, 1), border_mode='valid')
|
||||
# check_two_tensor_operation('conv2d', (5, 3, 10, 12), (4, 3, 3, 3),
|
||||
# strides=(1, 1), border_mode='same')
|
||||
xth = KTH.variable(xval)
|
||||
xtf = KTF.variable(xval)
|
||||
|
||||
# check_two_tensor_operation('conv2d', (5, 3, 10, 12), (4, 3, 3, 3),
|
||||
# strides=(2, 2), border_mode='valid')
|
||||
kernel_val = np.random.random(kernel_shape) - 0.5
|
||||
|
||||
# def test_pool2d(self):
|
||||
# '''pool2d works "properly" with Theano and TF but outputs different
|
||||
# values in each case. Cause unclear (input shape format?)
|
||||
# '''
|
||||
# check_single_tensor_operation('pool2d', (5, 3, 10, 12), pool_size=(2, 2),
|
||||
# strides=(1, 1), border_mode='valid')
|
||||
kernel_th = KTH.variable(convert_kernel(kernel_val))
|
||||
kernel_tf = KTF.variable(kernel_val)
|
||||
|
||||
# check_single_tensor_operation('pool2d', (5, 3, 9, 11), pool_size=(2, 2),
|
||||
# strides=(1, 1), border_mode='valid')
|
||||
zth = KTH.eval(KTH.conv2d(xth, kernel_th))
|
||||
ztf = KTF.eval(KTF.conv2d(xtf, kernel_tf))
|
||||
|
||||
# check_single_tensor_operation('pool2d', (5, 3, 9, 11), pool_size=(2, 3),
|
||||
# strides=(1, 1), border_mode='valid')
|
||||
assert zth.shape == ztf.shape
|
||||
assert_allclose(zth, ztf, atol=1e-05)
|
||||
|
||||
input_shape = (1, 6, 5, 3)
|
||||
kernel_shape = (3, 3, 3, 2)
|
||||
|
||||
xval = np.random.random(input_shape)
|
||||
|
||||
xth = KTH.variable(xval)
|
||||
xtf = KTF.variable(xval)
|
||||
|
||||
kernel_val = np.random.random(kernel_shape) - 0.5
|
||||
|
||||
kernel_th = KTH.variable(convert_kernel(kernel_val, dim_ordering='tf'))
|
||||
kernel_tf = KTF.variable(kernel_val)
|
||||
|
||||
zth = KTH.eval(KTH.conv2d(xth, kernel_th, dim_ordering='tf'))
|
||||
ztf = KTF.eval(KTF.conv2d(xtf, kernel_tf, dim_ordering='tf'))
|
||||
|
||||
assert zth.shape == ztf.shape
|
||||
assert_allclose(zth, ztf, atol=1e-05)
|
||||
|
||||
def test_pool2d(self):
|
||||
check_single_tensor_operation('pool2d', (5, 3, 10, 12), pool_size=(2, 2),
|
||||
strides=(1, 1), border_mode='valid')
|
||||
|
||||
check_single_tensor_operation('pool2d', (5, 3, 9, 11), pool_size=(2, 2),
|
||||
strides=(1, 1), border_mode='valid')
|
||||
|
||||
check_single_tensor_operation('pool2d', (5, 3, 9, 11), pool_size=(2, 3),
|
||||
strides=(1, 1), border_mode='valid')
|
||||
|
||||
def test_random_normal(self):
|
||||
mean = 0.
|
||||
|
||||
@@ -389,6 +389,9 @@ def test_recursion():
|
||||
assert K.int_shape(m_tf) == (None, 64)
|
||||
assert K.int_shape(n_tf) == (None, 5)
|
||||
|
||||
# test merge
|
||||
o_tf = merge([j_tf, k_tf], mode='concat', concat_axis=1)
|
||||
|
||||
|
||||
def test_functional_guide():
|
||||
# MNIST
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras.layers import Dense, Dropout
|
||||
from keras.engine.topology import merge, Input
|
||||
from keras.engine.training import Model
|
||||
from keras.models import Sequential, Graph
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
@@ -115,10 +117,10 @@ def test_model_methods():
|
||||
|
||||
out = model.train_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 3
|
||||
assert len(out) == 5
|
||||
out = model.test_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 3
|
||||
assert len(out) == 5
|
||||
|
||||
# this should also work
|
||||
model.compile(optimizer, loss, metrics={'dense_1': 'acc'},
|
||||
@@ -126,10 +128,10 @@ def test_model_methods():
|
||||
|
||||
out = model.train_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 2
|
||||
assert len(out) == 4
|
||||
out = model.test_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 2
|
||||
assert len(out) == 4
|
||||
|
||||
# and this as well
|
||||
model.compile(optimizer, loss, metrics={'dense_1': ['acc']},
|
||||
@@ -137,10 +139,22 @@ def test_model_methods():
|
||||
|
||||
out = model.train_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 2
|
||||
assert len(out) == 4
|
||||
out = model.test_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 2
|
||||
assert len(out) == 4
|
||||
|
||||
# test with a custom metric function
|
||||
mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2))
|
||||
model.compile(optimizer, loss, metrics=[mse],
|
||||
sample_weight_mode=None)
|
||||
|
||||
out = model.train_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 5
|
||||
out = model.test_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 5
|
||||
|
||||
input_a_np = np.random.random((10, 3))
|
||||
input_b_np = np.random.random((10, 3))
|
||||
@@ -153,5 +167,28 @@ def test_model_methods():
|
||||
out = model.predict([input_a_np, input_b_np], batch_size=4)
|
||||
|
||||
|
||||
def test_trainable_argument():
|
||||
x = np.random.random((5, 3))
|
||||
y = np.random.random((5, 2))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=3, trainable=False))
|
||||
model.compile('rmsprop', 'mse')
|
||||
out = model.predict(x)
|
||||
model.train_on_batch(x, y)
|
||||
out_2 = model.predict(x)
|
||||
assert_allclose(out, out_2)
|
||||
|
||||
# test with nesting
|
||||
input = Input(shape=(3,))
|
||||
output = model(input)
|
||||
model = Model(input, output)
|
||||
model.compile('rmsprop', 'mse')
|
||||
out = model.predict(x)
|
||||
model.train_on_batch(x, y)
|
||||
out_2 = model.predict(x)
|
||||
assert_allclose(out, out_2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -14,19 +14,19 @@ def test_masking():
|
||||
|
||||
|
||||
def test_merge():
|
||||
from keras.layers import Input, merge
|
||||
from keras.layers import Input, merge, Merge
|
||||
from keras.models import Model
|
||||
|
||||
# test modes: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot'.
|
||||
input_shapes = [(3, 2), (3, 2)]
|
||||
inputs = [np.random.random(shape) for shape in input_shapes]
|
||||
|
||||
# test graph API
|
||||
for mode in ['sum', 'mul', 'concat', 'ave', 'cos', 'dot']:
|
||||
# test functional API
|
||||
for mode in ['sum', 'mul', 'concat', 'ave']:
|
||||
print(mode)
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
merged = merge([input_a, input_b], mode='sum')
|
||||
merged = merge([input_a, input_b], mode=mode)
|
||||
model = Model([input_a, input_b], merged)
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
@@ -38,6 +38,15 @@ def test_merge():
|
||||
model = Model.from_config(config)
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
# test Merge (#2460)
|
||||
merged = Merge(mode=mode)([input_a, input_b])
|
||||
model = Model([input_a, input_b], merged)
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
expected_output_shape = model.get_output_shape_for(input_shapes)
|
||||
actual_output_shape = model.predict(inputs).shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
|
||||
# test lambda with output_shape lambda
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
|
||||
@@ -32,6 +32,13 @@ def _runner(layer_class):
|
||||
'dropout_W': 0.1},
|
||||
input_shape=(3, 2, 3))
|
||||
|
||||
# check implementation modes
|
||||
for mode in ['cpu', 'mem', 'gpu']:
|
||||
layer_test(layer_class,
|
||||
kwargs={'output_dim': output_dim,
|
||||
'consume_less': mode},
|
||||
input_shape=(3, 2, 3))
|
||||
|
||||
# check statefulness
|
||||
model = Sequential()
|
||||
model.add(embeddings.Embedding(embedding_num, embedding_dim,
|
||||
|
||||
@@ -4,56 +4,92 @@ from PIL import Image
|
||||
import numpy as np
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
|
||||
def setup_function(func):
|
||||
os.mkdir('test_images')
|
||||
os.mkdir('test_images/rgb')
|
||||
os.mkdir('test_images/gsc')
|
||||
class TestImage:
|
||||
|
||||
img_w = img_h = 20
|
||||
for n in range(8):
|
||||
bias = np.random.rand(img_w, img_h, 1) * 64
|
||||
variance = np.random.rand(img_w, img_h, 1) * (255-64)
|
||||
imarray = np.random.rand(img_w, img_h, 3) * variance + bias
|
||||
im = Image.fromarray(imarray.astype('uint8')).convert('RGBA')
|
||||
im.save('test_images/rgb/rgb_test_image_'+str(n)+'.png')
|
||||
def setup_class(cls):
|
||||
img_w = img_h = 20
|
||||
rgb_images = []
|
||||
gray_images = []
|
||||
for n in range(8):
|
||||
bias = np.random.rand(img_w, img_h, 1) * 64
|
||||
variance = np.random.rand(img_w, img_h, 1) * (255-64)
|
||||
imarray = np.random.rand(img_w, img_h, 3) * variance + bias
|
||||
im = Image.fromarray(imarray.astype('uint8')).convert('RGB')
|
||||
rgb_images.append(im)
|
||||
|
||||
imarray = np.random.rand(img_w, img_h, 1) * variance + bias
|
||||
im = Image.fromarray(imarray.astype('uint8').squeeze()).convert('L')
|
||||
im.save('test_images/gsc/gsc_test_image_'+str(n)+'.png')
|
||||
imarray = np.random.rand(img_w, img_h, 1) * variance + bias
|
||||
im = Image.fromarray(imarray.astype('uint8').squeeze()).convert('L')
|
||||
gray_images.append(im)
|
||||
|
||||
cls.all_test_images = [rgb_images, gray_images]
|
||||
|
||||
def teardown_function(func):
|
||||
shutil.rmtree('test_images')
|
||||
def teardown_class(cls):
|
||||
del cls.all_test_images
|
||||
|
||||
def test_image_data_generator(self):
|
||||
for test_images in self.all_test_images:
|
||||
img_list = []
|
||||
for im in test_images:
|
||||
img_list.append(img_to_array(im)[None, ...])
|
||||
|
||||
def test_image_data_generator():
|
||||
for color_mode in ['gsc', 'rgb']:
|
||||
file_list = list_pictures('test_images/' + color_mode)
|
||||
img_list = []
|
||||
for f in file_list:
|
||||
img_list.append(img_to_array(load_img(f))[None, ...])
|
||||
images = np.vstack(img_list)
|
||||
generator = ImageDataGenerator(
|
||||
featurewise_center=True,
|
||||
samplewise_center=True,
|
||||
featurewise_std_normalization=True,
|
||||
samplewise_std_normalization=True,
|
||||
zca_whitening=True,
|
||||
rotation_range=90.,
|
||||
width_shift_range=0.1,
|
||||
height_shift_range=0.1,
|
||||
shear_range=0.5,
|
||||
zoom_range=0.2,
|
||||
channel_shift_range=0.,
|
||||
fill_mode='nearest',
|
||||
cval=0.5,
|
||||
horizontal_flip=True,
|
||||
vertical_flip=True)
|
||||
generator.fit(images, augment=True)
|
||||
|
||||
images = np.vstack(img_list)
|
||||
generator = ImageDataGenerator(
|
||||
featurewise_center=True,
|
||||
samplewise_center=True,
|
||||
featurewise_std_normalization=True,
|
||||
samplewise_std_normalization=True,
|
||||
zca_whitening=True,
|
||||
rotation_range=90.,
|
||||
width_shift_range=10.,
|
||||
height_shift_range=10.,
|
||||
shear_range=0.5,
|
||||
horizontal_flip=True,
|
||||
vertical_flip=True)
|
||||
generator.fit(images, augment=True)
|
||||
tmp_folder = tempfile.mkdtemp(prefix='test_images')
|
||||
for x, y in generator.flow(images, np.arange(images.shape[0]),
|
||||
shuffle=True, save_to_dir=tmp_folder):
|
||||
assert x.shape[1:] == images.shape[1:]
|
||||
break
|
||||
shutil.rmtree(tmp_folder)
|
||||
|
||||
def test_img_flip(self):
|
||||
x = np.array(range(4)).reshape([1, 1, 2, 2])
|
||||
assert (flip_axis(x, 0) == x).all()
|
||||
assert (flip_axis(x, 1) == x).all()
|
||||
assert (flip_axis(x, 2) == [[[[2, 3], [0, 1]]]]).all()
|
||||
assert (flip_axis(x, 3) == [[[[1, 0], [3, 2]]]]).all()
|
||||
|
||||
dim_ordering_and_col_index = (('tf', 2), ('th', 3))
|
||||
for dim_ordering, col_index in dim_ordering_and_col_index:
|
||||
image_generator_th = ImageDataGenerator(
|
||||
featurewise_center=False,
|
||||
samplewise_center=False,
|
||||
featurewise_std_normalization=False,
|
||||
samplewise_std_normalization=False,
|
||||
zca_whitening=False,
|
||||
rotation_range=0,
|
||||
width_shift_range=0,
|
||||
height_shift_range=0,
|
||||
shear_range=0,
|
||||
zoom_range=0,
|
||||
channel_shift_range=0,
|
||||
horizontal_flip=True,
|
||||
vertical_flip=False,
|
||||
dim_ordering=dim_ordering).flow(x, [1])
|
||||
for i in range(10):
|
||||
potentially_flipped_x, _ = next(image_generator_th)
|
||||
assert ((potentially_flipped_x == x).all() or
|
||||
(potentially_flipped_x == flip_axis(x, col_index)).all())
|
||||
|
||||
for x, y in generator.flow(images, np.arange(images.shape[0]),
|
||||
shuffle=True, save_to_dir='test_images'):
|
||||
assert x.shape[1:] == images.shape[1:]
|
||||
break
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -56,6 +56,22 @@ def test_softplus():
|
||||
assert_allclose(result, expected, rtol=1e-05)
|
||||
|
||||
|
||||
def test_softsign():
|
||||
'''
|
||||
Test using a reference softsign implementation
|
||||
'''
|
||||
def softsign(x):
|
||||
return np.divide(x, np.ones_like(x) + np.absolute(x))
|
||||
|
||||
x = K.placeholder(ndim=2)
|
||||
f = K.function([x], [activations.softsign(x)])
|
||||
test_values = get_standard_values()
|
||||
|
||||
result = f([test_values])[0]
|
||||
expected = softsign(test_values)
|
||||
assert_allclose(result, expected, rtol=1e-05)
|
||||
|
||||
|
||||
def test_sigmoid():
|
||||
'''
|
||||
Test using a numerically stable reference sigmoid implementation
|
||||
|
||||
@@ -126,7 +126,7 @@ def test_LearningRateScheduler():
|
||||
assert (float(K.get_value(model.optimizer.lr)) - 0.2) < K.epsilon()
|
||||
|
||||
|
||||
@pytest.mark.skipif((K._BACKEND != 'tensorflow') or (sys.version_info[0] == 3),
|
||||
@pytest.mark.skipif((K._BACKEND != 'tensorflow'),
|
||||
reason="Requires tensorflow backend")
|
||||
def test_TensorBoard():
|
||||
import shutil
|
||||
@@ -252,8 +252,4 @@ def test_TensorBoard():
|
||||
KTF.set_session(old_session)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pytest.main([__file__])
|
||||
# test_ModelCheckpoint()
|
||||
# test_EarlyStopping()
|
||||
# test_LearningRateScheduler()
|
||||
test_TensorBoard()
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -53,7 +53,7 @@ def test_graph_fit_generator():
|
||||
validation_data=data_generator_graph(False), nb_val_samples=batch_size * 3)
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data=data_generator_graph(False), nb_val_samples=batch_size * 3)
|
||||
gen_loss = graph.evaluate_generator(data_generator_graph(True), 128, verbose=0)
|
||||
gen_loss = graph.evaluate_generator(data_generator_graph(True), 128, verbose=0)
|
||||
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph}, verbose=0)
|
||||
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from keras import metrics
|
||||
from keras import backend as K
|
||||
|
||||
all_metrics = [
|
||||
metrics.binary_accuracy,
|
||||
metrics.categorical_accuracy,
|
||||
metrics.mean_squared_error,
|
||||
metrics.mean_absolute_error,
|
||||
metrics.mean_absolute_percentage_error,
|
||||
metrics.mean_squared_logarithmic_error,
|
||||
metrics.squared_hinge,
|
||||
metrics.hinge,
|
||||
metrics.categorical_crossentropy,
|
||||
metrics.binary_crossentropy,
|
||||
metrics.poisson,
|
||||
metrics.cosine_proximity,
|
||||
]
|
||||
|
||||
all_sparse_metrics = [
|
||||
metrics.sparse_categorical_accuracy,
|
||||
metrics.sparse_categorical_crossentropy,
|
||||
]
|
||||
|
||||
|
||||
def test_metrics():
|
||||
y_a = K.variable(np.random.random((6, 7)))
|
||||
y_b = K.variable(np.random.random((6, 7)))
|
||||
for metric in all_metrics:
|
||||
output = metric(y_a, y_b)
|
||||
assert K.eval(output).shape == ()
|
||||
|
||||
|
||||
def test_sparse_metrics():
|
||||
for metric in all_sparse_metrics:
|
||||
y_a = K.variable(np.random.randint(0, 7, (6,)), dtype=K.floatx())
|
||||
y_b = K.variable(np.random.random((6, 7)), dtype=K.floatx())
|
||||
assert K.eval(metric(y_a, y_b)).shape == ()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__])
|
||||
@@ -22,19 +22,23 @@ high_weight = 5
|
||||
max_train_samples = 5000
|
||||
max_test_samples = 1000
|
||||
|
||||
# the data, shuffled and split between tran and test sets
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
X_train = X_train.reshape(60000, 784)[:max_train_samples]
|
||||
X_test = X_test.reshape(10000, 784)[:max_test_samples]
|
||||
X_train = X_train.astype("float32") / 255
|
||||
X_test = X_test.astype("float32") / 255
|
||||
|
||||
# convert class vectors to binary class matrices
|
||||
y_train = y_train[:max_train_samples]
|
||||
y_test = y_test[:max_test_samples]
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
test_ids = np.where(y_test == np.array(weighted_class))[0]
|
||||
def get_data():
|
||||
# the data, shuffled and split between tran and test sets
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
X_train = X_train.reshape(60000, 784)[:max_train_samples]
|
||||
X_test = X_test.reshape(10000, 784)[:max_test_samples]
|
||||
X_train = X_train.astype("float32") / 255
|
||||
X_test = X_test.astype("float32") / 255
|
||||
|
||||
# convert class vectors to binary class matrices
|
||||
y_train = y_train[:max_train_samples]
|
||||
y_test = y_test[:max_test_samples]
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
test_ids = np.where(y_test == np.array(weighted_class))[0]
|
||||
|
||||
return (X_train, Y_train), (X_test, Y_test), test_ids
|
||||
|
||||
|
||||
def create_model(weight_reg=None, activity_reg=None):
|
||||
@@ -48,6 +52,7 @@ def create_model(weight_reg=None, activity_reg=None):
|
||||
|
||||
|
||||
def test_W_reg():
|
||||
(X_train, Y_train), (X_test, Y_test), test_ids = get_data()
|
||||
for reg in [regularizers.l1(),
|
||||
regularizers.l2(),
|
||||
regularizers.l1l2()]:
|
||||
@@ -59,6 +64,7 @@ def test_W_reg():
|
||||
|
||||
|
||||
def test_A_reg():
|
||||
(X_train, Y_train), (X_test, Y_test), test_ids = get_data()
|
||||
for reg in [regularizers.activity_l1(), regularizers.activity_l2()]:
|
||||
model = create_model(activity_reg=reg)
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
@@ -66,6 +66,7 @@ def test_sequential_fit_generator():
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch, validation_data=(X_test, y_test))
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch,
|
||||
validation_data=data_generator(False), nb_val_samples=batch_size * 3)
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch, max_q_size=2)
|
||||
|
||||
loss = model.evaluate(X_train, y_train)
|
||||
|
||||
@@ -100,8 +101,8 @@ def test_sequential():
|
||||
|
||||
loss = model.evaluate(X_test, y_test)
|
||||
|
||||
prediction = model.predict_generator(data_generator(X_test, y_test), X_test.shape[0])
|
||||
gen_loss = model.evaluate_generator(data_generator(X_test, y_test, 50), X_test.shape[0])
|
||||
prediction = model.predict_generator(data_generator(X_test, y_test), X_test.shape[0], max_q_size=2)
|
||||
gen_loss = model.evaluate_generator(data_generator(X_test, y_test, 50), X_test.shape[0], max_q_size=2)
|
||||
pred_loss = K.eval(K.mean(objectives.get(model.loss)(K.variable(y_test), K.variable(prediction))))
|
||||
|
||||
assert(np.isclose(pred_loss, loss))
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário