Comparar commits
95 Commits
| Autor | SHA1 | Data | |
|---|---|---|---|
| 8c2a573ebf | |||
| d7ff7cde92 | |||
| 15d0b0ea08 | |||
| 3695bc2db5 | |||
| a08995a90d | |||
| aea00258e7 | |||
| b581eb3f27 | |||
| 610ccba9f5 | |||
| d5ae6f32dd | |||
| 5308033936 | |||
| e2abb5ef2c | |||
| 1b11b4eeb6 | |||
| 39357b3045 | |||
| ed7a5a1418 | |||
| ae682a71f9 | |||
| 8327b37a0b | |||
| 973b5570aa | |||
| 7cb41fc5cc | |||
| 595d67ad7d | |||
| bb626c120e | |||
| ba8fefa8ec | |||
| 4b24f6d7b1 | |||
| 1c460e1e08 | |||
| 7b4e157356 | |||
| 5749f1b971 | |||
| 3c57aff85b | |||
| 18504bcc86 | |||
| d8864bfe48 | |||
| 078b20169b | |||
| 5f7e78df65 | |||
| fc470db7ab | |||
| f576f37801 | |||
| b74118a766 | |||
| 1c7a0248b9 | |||
| 36a829c20d | |||
| 33af75aa39 | |||
| 844420425e | |||
| da57a530f9 | |||
| 1f17013949 | |||
| f18899cb36 | |||
| 877f946e24 | |||
| a981a8c42c | |||
| 5467107fc9 | |||
| ad3107073b | |||
| 8d62f4da6c | |||
| 3779b8a008 | |||
| 6ec5e48969 | |||
| bfa5ca553d | |||
| c9f7d970e9 | |||
| f26ce6e236 | |||
| b001e36f18 | |||
| 9abb6ef723 | |||
| bfbdbb05bc | |||
| bd2bd51b5d | |||
| 4e547a31ed | |||
| de8d0defcd | |||
| 344437c491 | |||
| ed365e94fd | |||
| 5910278ca8 | |||
| 18841fa58d | |||
| 6fb4e0e441 | |||
| 39051ef3ca | |||
| 1f4084870b | |||
| 00e9d5b219 | |||
| 7f93747602 | |||
| a7156b8c27 | |||
| b1e47f7741 | |||
| 59f8d6ca22 | |||
| 5f4019d980 | |||
| f84389da08 | |||
| 63c1757df5 | |||
| d6ab850f45 | |||
| 61dd53e262 | |||
| 423a633b5b | |||
| 256d4ef71b | |||
| ad49962ba9 | |||
| 4680d70a78 | |||
| ee7f056779 | |||
| 66c8d7baf2 | |||
| 9f929999d1 | |||
| 24f96262ec | |||
| 0e6e7a41f4 | |||
| 5cac088d98 | |||
| 85f0448fee | |||
| 106c0b753a | |||
| c525e634dc | |||
| c398c0891b | |||
| 5ab48ac5d4 | |||
| ba29cd8e46 | |||
| b61235b77f | |||
| 0ed00e38f0 | |||
| 36eef0dd9a | |||
| 1904194c7a | |||
| 7ce144881a | |||
| 55159cf451 |
@@ -57,6 +57,8 @@ install:
|
||||
script:
|
||||
# run keras backend init to initialize backend config
|
||||
- python -c "import keras.backend"
|
||||
# create dataset directory to avoid concurrent directory creation at runtime
|
||||
- mkdir ~/.keras/datasets
|
||||
# set up keras backend
|
||||
- sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
|
||||
- echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)"
|
||||
|
||||
+1
-1
@@ -38,7 +38,7 @@ Keras is compatible with: __Python 2.7-3.5__.
|
||||
|
||||
## Getting started: 30 seconds to Keras
|
||||
|
||||
The core data structure of Keras is a __model__, a way to organize layers. The main type of model is the [`Sequential`](http://keras.io/getting-started/sequential-model-guide) model, a linear stack of layers. For more complex architectures, you should use the [Keras function API](http://keras.io/getting-started/functional-api-guide).
|
||||
The core data structure of Keras is a __model__, a way to organize layers. The main type of model is the [`Sequential`](http://keras.io/getting-started/sequential-model-guide) model, a linear stack of layers. For more complex architectures, you should use the [Keras functional API](http://keras.io/getting-started/functional-api-guide).
|
||||
|
||||
Here's the `Sequential` model:
|
||||
|
||||
|
||||
+6
-2
@@ -53,10 +53,16 @@ Scikit-learn API
|
||||
|
||||
'''
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import inspect
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
if sys.version[0] == '2':
|
||||
reload(sys)
|
||||
sys.setdefaultencoding('utf8')
|
||||
|
||||
from keras.layers import convolutional
|
||||
from keras.layers import recurrent
|
||||
@@ -250,8 +256,6 @@ def get_function_signature(function, method=True):
|
||||
for a, v in kwargs:
|
||||
if type(v) == str:
|
||||
v = '\'' + v + '\''
|
||||
elif type(v) == unicode:
|
||||
v = 'u\'' + v + '\''
|
||||
st += str(a) + '=' + str(v) + ', '
|
||||
if kwargs or args:
|
||||
return st[:-2] + ')'
|
||||
|
||||
externo
+1
@@ -30,6 +30,7 @@ model.add(Activation(tanh))
|
||||
|
||||
- __softmax__: Softmax applied across inputs last dimension. Expects shape either `(nb_samples, nb_timesteps, nb_dims)` or `(nb_samples, nb_dims)`.
|
||||
- __softplus__
|
||||
- __softsign__
|
||||
- __relu__
|
||||
- __tanh__
|
||||
- __sigmoid__
|
||||
|
||||
+23
-4
@@ -20,7 +20,7 @@ Please cite Keras in your publications if it helps your research. Here is an exa
|
||||
|
||||
```
|
||||
@misc{chollet2015keras,
|
||||
author = {Chollet, François},
|
||||
author = {Chollet, Francois},
|
||||
title = {Keras},
|
||||
year = {2015},
|
||||
publisher = {GitHub},
|
||||
@@ -102,6 +102,11 @@ model = model_from_json(open('my_model_architecture.json').read())
|
||||
model.load_weights('my_model_weights.h5')
|
||||
```
|
||||
|
||||
Finally, before it can be used, the model shall be compiled.
|
||||
```python
|
||||
model.compile(optimizer='adagrad', loss='mse')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Why is the training loss much higher than the testing loss?
|
||||
@@ -134,14 +139,28 @@ to pass the learning phase flag to your function:
|
||||
get_3rd_layer_output = K.function([model.layers[0].input, K.learning_phase()],
|
||||
[model.layers[3].output])
|
||||
|
||||
# output in train mode = 0
|
||||
# output in test mode = 0
|
||||
layer_output = get_3rd_layer_output([X, 0])[0]
|
||||
|
||||
# output in test mode = 1
|
||||
# output in train mode = 1
|
||||
layer_output = get_3rd_layer_output([X, 1])[0]
|
||||
```
|
||||
|
||||
Another more flexible way of getting output from intermediate layers is to use the [functional API](/getting-started/functional-api-guide).
|
||||
Another more flexible way of getting output from intermediate layers is to use the [functional API](/getting-started/functional-api-guide). For example, if you have created an autoencoder for MNIST:
|
||||
|
||||
```python
|
||||
inputs = Input(shape=(784,))
|
||||
encoded = Dense(32, activation='relu')(inputs)
|
||||
decoded = Dense(784)(encoded)
|
||||
model = Model(input=inputs, output=decoded)
|
||||
```
|
||||
|
||||
After compiling and training the model, you can get the output of the data from the encoder like this:
|
||||
|
||||
```python
|
||||
encoder = Model(input=inputs, output=encoded)
|
||||
X_encoded = encoder.predict(X)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -309,8 +309,8 @@ from keras.layers import merge, Convolution2D, Input
|
||||
|
||||
# input tensor for a 3-channel 256x256 image
|
||||
x = Input(shape=(3, 256, 256))
|
||||
# 3x3 conv with 16 output channels
|
||||
y = Convolution2D(16, 3, 3, border_mode='same')
|
||||
# 3x3 conv with 3 output channels (same as input channels)
|
||||
y = Convolution2D(3, 3, 3, border_mode='same')
|
||||
# this returns x + y.
|
||||
z = merge([x, y], mode='sum')
|
||||
```
|
||||
|
||||
@@ -112,7 +112,7 @@ Now you know enough to be able to define *almost* any model with Keras. For comp
|
||||
Before training a model, you need to configure the learning process, which is done via the `compile` method. It receives three arguments:
|
||||
|
||||
- an optimizer. This could be the string identifier of an existing optimizer (such as `rmsprop` or `adagrad`), or an instance of the `Optimizer` class. See: [optimizers](/optimizers).
|
||||
- a loss function. This is the objective that the model will try to minimize. If can be the string identifier of an existing loss function (such as `categorical_crossentropy` or `mse`), or it can be an objective function. See: [objectives](/objectives).
|
||||
- a loss function. This is the objective that the model will try to minimize. It can be the string identifier of an existing loss function (such as `categorical_crossentropy` or `mse`), or it can be an objective function. See: [objectives](/objectives).
|
||||
- a list of metrics. For any classification problem you will want to set this to `metrics=['accuracy']`. A metric could be the string identifier of an existing metric (only `accuracy` is supported at this point), or a custom metric function.
|
||||
|
||||
```python
|
||||
@@ -538,4 +538,4 @@ y_val = np.random.random((100, nb_classes))
|
||||
decoder.fit([x_train_a, x_train_b], y_train,
|
||||
batch_size=64, nb_epoch=5,
|
||||
validation_data=([x_val_a, x_val_b], y_val))
|
||||
```
|
||||
```
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
# Writing your own Keras layers
|
||||
|
||||
For simple, stateless custom operations, you are probably better off using `layers.core.Lambda` layers. But for any custom operation that has trainable weights, you should implement your own layer.
|
||||
|
||||
Here is the skeleton of a Keras layer. There are only three methods you need to implement:
|
||||
|
||||
- `build(input_shape)`: this is where you will define your weights. Trainable weights should be added to the list `self.trainable_weights`. Other attributes of note are: `self.non_trainable_weights` (list) and `self.updates` (list of update tuples (tensor, new_tensor)). For an example of how to use `non_trainable_weights` and `updates`, see the code for the `BatchNormalization` layer.
|
||||
- `call(x)`: this is where the layer's logic lives. Unless you want you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor.
|
||||
- `get_output_shape_for(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference.
|
||||
|
||||
```python
|
||||
from keras import backend as K
|
||||
from keras.engine.topology import Layer
|
||||
import numpy as np
|
||||
|
||||
class MyLayer(Layer):
|
||||
def __init__(self, output_dim, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
super(MyLayer, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
input_dim = input_shape[1]
|
||||
initial_weight_value = np.random.random((input_dim, output_dim))
|
||||
self.W = K.variable(initial_weight_value)
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.dot(x, self.W)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
return (input_shape[0], self.output_dim)
|
||||
```
|
||||
|
||||
The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code!
|
||||
externo
+1
@@ -26,5 +26,6 @@ For a few examples of such functions, check out the [objectives source](https://
|
||||
- __hinge__
|
||||
- __binary_crossentropy__: Also known as logloss.
|
||||
- __categorical_crossentropy__: Also known as multiclass logloss. __Note__: using this objective requires that your labels are binary arrays of shape `(nb_samples, nb_classes)`.
|
||||
- __sparse_categorical_crossentropy__: As above but accepts sparse labels. __Note__: this objective still requires that your labels have the same number of dimensions as your outputs; you may need to add a length-1 dimension to the shape of your labels, e.g with `np.expand_dims(y, -1)`.
|
||||
- __poisson__: mean of `(predictions - targets * log(predictions))`
|
||||
- __cosine_proximity__: the opposite (negative) of the mean cosine proximity between predictions and targets.
|
||||
|
||||
+8
@@ -11,6 +11,10 @@ keras.preprocessing.image.ImageDataGenerator(featurewise_center=True,
|
||||
width_shift_range=0.,
|
||||
height_shift_range=0.,
|
||||
shear_range=0.,
|
||||
zoom_range=0.,
|
||||
channel_shift_range=0.,
|
||||
fill_mode='nearest',
|
||||
cval=0.,
|
||||
horizontal_flip=False,
|
||||
vertical_flip=False,
|
||||
dim_ordering='th')
|
||||
@@ -28,6 +32,10 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
- __width_shift_range__: Float (fraction of total width). Range for random horizontal shifts.
|
||||
- __height_shift_range__: Float (fraction of total height). Range for random vertical shifts.
|
||||
- __shear_range__: Float. Shear Intensity (Shear angle in counter-clockwise direction as radians)
|
||||
- __zoom_range__: Float or [lower, upper]. Range for random zoom. If a float, `[lower, upper] = [1-zoom_range, 1+zoom_range]`.
|
||||
- __channel_shift_range__: Float. Range for random channel shifts.
|
||||
- __fill_mode__: One of {"constant", "nearest", "reflect" or "wrap"}. Points outside the boundaries of the input are filled according to the given mode.
|
||||
- __cval__: Float or Int. Value used for points outside the boundaries when `fill_mode = "constant"`.
|
||||
- __horizontal_flip__: Boolean. Randomly flip inputs horizontally.
|
||||
- __vertical_flip__: Boolean. Randomly flip inputs vertically.
|
||||
- __dim_ordering__: One of {"th", "tf"}.
|
||||
|
||||
@@ -29,8 +29,7 @@ Five digits inverted:
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.engine.training import slice_X
|
||||
from keras.layers.core import Activation, TimeDistributedDense, RepeatVector
|
||||
from keras.layers import recurrent
|
||||
from keras.layers import Activation, TimeDistributedDense, RepeatVector, recurrent
|
||||
import numpy as np
|
||||
from six.moves import range
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
We build a custom activation layer called 'Antirectifier',
|
||||
which modifies the shape of the tensor that passes through it.
|
||||
We need to specify two methods: `output_shape` and `get_output`.
|
||||
We need to specify two methods: `get_output_shape_for` and `call`.
|
||||
|
||||
Note that the same result can also be achieved via a Lambda layer.
|
||||
|
||||
@@ -12,7 +12,7 @@ backend (`K`), our code can run both on TensorFlow and Theano.
|
||||
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Layer, Activation
|
||||
from keras.layers import Dense, Dropout, Layer, Activation
|
||||
from keras.datasets import mnist
|
||||
from keras import backend as K
|
||||
from keras.utils import np_utils
|
||||
|
||||
@@ -16,8 +16,8 @@ Time per epoch: 3s on CPU (core i7).
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.core import Activation, Dense, Merge, Permute, Dropout
|
||||
from keras.layers.recurrent import LSTM
|
||||
from keras.layers import Activation, Dense, Merge, Permute, Dropout
|
||||
from keras.layers import LSTM
|
||||
from keras.utils.data_utils import get_file
|
||||
from keras.preprocessing.sequence import pad_sequences
|
||||
from functools import reduce
|
||||
|
||||
@@ -66,7 +66,7 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.utils.data_utils import get_file
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.core import Dense, Merge, Dropout, RepeatVector
|
||||
from keras.layers import Dense, Merge, Dropout, RepeatVector
|
||||
from keras.layers import recurrent
|
||||
from keras.models import Sequential
|
||||
from keras.preprocessing.sequence import pad_sequences
|
||||
|
||||
@@ -15,8 +15,8 @@ from __future__ import print_function
|
||||
from keras.datasets import cifar10
|
||||
from keras.preprocessing.image import ImageDataGenerator
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.optimizers import SGD
|
||||
from keras.utils import np_utils
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ import h5py
|
||||
import os
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
|
||||
from keras.layers import Convolution2D, ZeroPadding2D, MaxPooling2D
|
||||
from keras import backend as K
|
||||
|
||||
parser = argparse.ArgumentParser(description='Deep Dreams with Keras.')
|
||||
|
||||
@@ -12,9 +12,9 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Lambda
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.convolutional import Convolution1D
|
||||
from keras.layers import Dense, Dropout, Activation, Lambda
|
||||
from keras.layers import Embedding
|
||||
from keras.layers import Convolution1D
|
||||
from keras.datasets import imdb
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
@@ -9,10 +9,10 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.recurrent import LSTM, GRU, SimpleRNN
|
||||
from keras.layers.convolutional import Convolution1D, MaxPooling1D
|
||||
from keras.layers import Dense, Dropout, Activation
|
||||
from keras.layers import Embedding
|
||||
from keras.layers import LSTM, GRU, SimpleRNN
|
||||
from keras.layers import Convolution1D, MaxPooling1D
|
||||
from keras.datasets import imdb
|
||||
|
||||
|
||||
|
||||
@@ -19,9 +19,8 @@ np.random.seed(1337) # for reproducibility
|
||||
from keras.preprocessing import sequence
|
||||
from keras.utils import np_utils
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.layers.embeddings import Embedding
|
||||
from keras.layers.recurrent import LSTM, SimpleRNN, GRU
|
||||
from keras.layers import Dense, Dropout, Activation, Embedding
|
||||
from keras.layers import LSTM, SimpleRNN, GRU
|
||||
from keras.datasets import imdb
|
||||
|
||||
max_features = 20000
|
||||
|
||||
@@ -0,0 +1,290 @@
|
||||
'''This script demonstrates how to build the Inception v3 architecture
|
||||
using the Keras functional API.
|
||||
We are not actually training it here, for lack of appropriate data.
|
||||
|
||||
For more information about this architecture, see:
|
||||
|
||||
"Rethinking the Inception Architecture for Computer Vision"
|
||||
Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna
|
||||
http://arxiv.org/abs/1512.00567
|
||||
'''
|
||||
from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D
|
||||
from keras.layers import BatchNormalization, Flatten, Dense, Dropout
|
||||
from keras.layers import Input, merge
|
||||
from keras.models import Model
|
||||
from keras import regularizers
|
||||
|
||||
|
||||
# global constants
|
||||
NB_CLASS = 1000 # number of classes
|
||||
DIM_ORDERING = 'th' # 'th' (channels, width, height) or 'tf' (width, height, channels)
|
||||
WEIGHT_DECAY = 0. # L2 regularization factor
|
||||
USE_BN = False # whether to use batch normalization
|
||||
|
||||
|
||||
def conv2D_bn(x, nb_filter, nb_row, nb_col,
|
||||
border_mode='same', subsample=(1, 1),
|
||||
activation='relu', batch_norm=USE_BN,
|
||||
weight_decay=WEIGHT_DECAY, dim_ordering=DIM_ORDERING):
|
||||
'''Utility function to apply to a tensor a module conv + BN
|
||||
with optional weight decay (L2 weight regularization).
|
||||
'''
|
||||
if weight_decay:
|
||||
W_regularizer = regularizers.l2(weight_decay)
|
||||
b_regularizer = regularizers.l2(weight_decay)
|
||||
else:
|
||||
W_regularizer = None
|
||||
b_regularizer = None
|
||||
x = Convolution2D(nb_filter, nb_row, nb_col,
|
||||
subsample=subsample,
|
||||
activation=activation,
|
||||
border_mode=border_mode,
|
||||
W_regularizer=W_regularizer,
|
||||
b_regularizer=b_regularizer,
|
||||
dim_ordering=dim_ordering)(x)
|
||||
if batch_norm:
|
||||
x = BatchNormalization()(x)
|
||||
return x
|
||||
|
||||
# Define image input layer
|
||||
|
||||
if DIM_ORDERING == 'th':
|
||||
img_input = Input(shape=(3, 299, 299))
|
||||
CONCAT_AXIS = 1
|
||||
elif DIM_ORDERING == 'tf':
|
||||
img_input = Input(shape=(299, 299, 3))
|
||||
CONCAT_AXIS = 3
|
||||
else:
|
||||
raise Exception('Invalid dim ordering: ' + str(DIM_ORDERING))
|
||||
|
||||
# Entry module
|
||||
|
||||
x = conv2D_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode='valid')
|
||||
x = conv2D_bn(x, 32, 3, 3, border_mode='valid')
|
||||
x = conv2D_bn(x, 64, 3, 3)
|
||||
x = MaxPooling2D((3, 3), strides=(2, 2), dim_ordering=DIM_ORDERING)(x)
|
||||
|
||||
x = conv2D_bn(x, 80, 1, 1, border_mode='valid')
|
||||
x = conv2D_bn(x, 192, 3, 3, border_mode='valid')
|
||||
x = MaxPooling2D((3, 3), strides=(2, 2), dim_ordering=DIM_ORDERING)(x)
|
||||
|
||||
# mixed: 35 x 35 x 256
|
||||
|
||||
branch1x1 = conv2D_bn(x, 64, 1, 1)
|
||||
|
||||
branch5x5 = conv2D_bn(x, 48, 1, 1)
|
||||
branch5x5 = conv2D_bn(branch5x5, 64, 5, 5)
|
||||
|
||||
branch3x3dbl = conv2D_bn(x, 64, 1, 1)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 32, 1, 1)
|
||||
x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed_1: 35 x 35 x 288
|
||||
|
||||
branch1x1 = conv2D_bn(x, 64, 1, 1)
|
||||
|
||||
branch5x5 = conv2D_bn(x, 48, 1, 1)
|
||||
branch5x5 = conv2D_bn(branch5x5, 64, 5, 5)
|
||||
|
||||
branch3x3dbl = conv2D_bn(x, 64, 1, 1)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 64, 1, 1)
|
||||
x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed2: 35 x 35 x 288
|
||||
|
||||
branch1x1 = conv2D_bn(x, 64, 1, 1)
|
||||
|
||||
branch5x5 = conv2D_bn(x, 48, 1, 1)
|
||||
branch5x5 = conv2D_bn(branch5x5, 64, 5, 5)
|
||||
|
||||
branch3x3dbl = conv2D_bn(x, 64, 1, 1)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 64, 1, 1)
|
||||
x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed3: 17 x 17 x 768
|
||||
|
||||
branch3x3 = conv2D_bn(x, 384, 3, 3, subsample=(2, 2), border_mode='valid')
|
||||
|
||||
branch3x3dbl = conv2D_bn(x, 64, 1, 1)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3, subsample=(2, 2), border_mode='valid')
|
||||
|
||||
branch_pool = MaxPooling2D((3, 3), strides=(2, 2), dim_ordering=DIM_ORDERING)(x)
|
||||
x = merge([branch3x3, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed4: 17 x 17 x 768
|
||||
|
||||
branch1x1 = conv2D_bn(x, 192, 1, 1)
|
||||
|
||||
branch7x7 = conv2D_bn(x, 128, 1, 1)
|
||||
branch7x7 = conv2D_bn(branch7x7, 128, 1, 7)
|
||||
branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
|
||||
|
||||
branch7x7dbl = conv2D_bn(x, 128, 1, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 128, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 128, 1, 7)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 128, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed5: 17 x 17 x 768
|
||||
|
||||
branch1x1 = conv2D_bn(x, 192, 1, 1)
|
||||
|
||||
branch7x7 = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7 = conv2D_bn(branch7x7, 160, 1, 7)
|
||||
branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
|
||||
|
||||
branch7x7dbl = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 1, 7)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed5: 17 x 17 x 768
|
||||
|
||||
branch1x1 = conv2D_bn(x, 192, 1, 1)
|
||||
|
||||
branch7x7 = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7 = conv2D_bn(branch7x7, 160, 1, 7)
|
||||
branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
|
||||
|
||||
branch7x7dbl = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 1, 7)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed6: 17 x 17 x 768
|
||||
|
||||
branch1x1 = conv2D_bn(x, 192, 1, 1)
|
||||
|
||||
branch7x7 = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7 = conv2D_bn(branch7x7, 160, 1, 7)
|
||||
branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
|
||||
|
||||
branch7x7dbl = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed7: 17 x 17 x 768
|
||||
|
||||
branch1x1 = conv2D_bn(x, 192, 1, 1)
|
||||
|
||||
branch7x7 = conv2D_bn(x, 192, 1, 1)
|
||||
branch7x7 = conv2D_bn(branch7x7, 192, 1, 7)
|
||||
branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
|
||||
|
||||
branch7x7dbl = conv2D_bn(x, 160, 1, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 7, 1)
|
||||
branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# Auxiliary head
|
||||
|
||||
aux_logits = AveragePooling2D((5, 5), strides=(3, 3), dim_ordering=DIM_ORDERING)(x)
|
||||
aux_logits = conv2D_bn(aux_logits, 128, 1, 1)
|
||||
aux_logits = conv2D_bn(aux_logits, 728, 5, 5, border_mode='valid')
|
||||
aux_logits = Flatten()(aux_logits)
|
||||
aux_preds = Dense(NB_CLASS, activation='softmax')(aux_logits)
|
||||
|
||||
# mixed8: 8 x 8 x 1280
|
||||
|
||||
branch3x3 = conv2D_bn(x, 192, 1, 1)
|
||||
branch3x3 = conv2D_bn(branch3x3, 320, 3, 3, subsample=(2, 2), border_mode='valid')
|
||||
|
||||
branch7x7x3 = conv2D_bn(x, 192, 1, 1)
|
||||
branch7x7x3 = conv2D_bn(branch7x7x3, 192, 1, 7)
|
||||
branch7x7x3 = conv2D_bn(branch7x7x3, 192, 7, 1)
|
||||
branch7x7x3 = conv2D_bn(branch7x7x3, 192, 3, 3, subsample=(2, 2), border_mode='valid')
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(2, 2), dim_ordering=DIM_ORDERING)(x)
|
||||
x = merge([branch3x3, branch7x7x3, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed9: 8 x 8 x 2048
|
||||
|
||||
branch1x1 = conv2D_bn(x, 320, 1, 1)
|
||||
|
||||
branch3x3 = conv2D_bn(x, 384, 1, 1)
|
||||
branch3x3_1 = conv2D_bn(branch3x3, 384, 1, 3)
|
||||
branch3x3_2 = conv2D_bn(branch3x3, 384, 3, 1)
|
||||
branch3x3 = merge([branch3x3_1, branch3x3_2], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
branch3x3dbl = conv2D_bn(x, 448, 1, 1)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 384, 3, 3)
|
||||
branch3x3dbl_1 = conv2D_bn(branch3x3dbl, 384, 1, 3)
|
||||
branch3x3dbl_2 = conv2D_bn(branch3x3dbl, 384, 3, 1)
|
||||
branch3x3dbl = merge([branch3x3dbl_1, branch3x3dbl_2], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch3x3, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# mixed10: 8 x 8 x 2048
|
||||
|
||||
branch1x1 = conv2D_bn(x, 320, 1, 1)
|
||||
|
||||
branch3x3 = conv2D_bn(x, 384, 1, 1)
|
||||
branch3x3_1 = conv2D_bn(branch3x3, 384, 1, 3)
|
||||
branch3x3_2 = conv2D_bn(branch3x3, 384, 3, 1)
|
||||
branch3x3 = merge([branch3x3_1, branch3x3_2], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
branch3x3dbl = conv2D_bn(x, 448, 1, 1)
|
||||
branch3x3dbl = conv2D_bn(branch3x3dbl, 384, 3, 3)
|
||||
branch3x3dbl_1 = conv2D_bn(branch3x3dbl, 384, 1, 3)
|
||||
branch3x3dbl_2 = conv2D_bn(branch3x3dbl, 384, 3, 1)
|
||||
branch3x3dbl = merge([branch3x3dbl_1, branch3x3dbl_2], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
|
||||
branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
|
||||
x = merge([branch1x1, branch3x3, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
|
||||
|
||||
# Final pooling and prediction
|
||||
|
||||
x = AveragePooling2D((8, 8), strides=(1, 1), dim_ordering=DIM_ORDERING)(x)
|
||||
x = Dropout(0.5)(x)
|
||||
x = Flatten()(x)
|
||||
preds = Dense(NB_CLASS, activation='softmax')(x)
|
||||
|
||||
# Define model
|
||||
|
||||
model = Model(input=img_input, output=[preds, aux_preds])
|
||||
model.compile('rmsprop', 'categorical_crossentropy')
|
||||
|
||||
# train via e.g. `model.fit(x_train, [y_train] * 2, batch_size=32, nb_epoch=100)`
|
||||
# Note that for a large dataset it would be preferable
|
||||
# to train using `fit_generator` (see Keras docs).
|
||||
@@ -0,0 +1,83 @@
|
||||
'''Compare LSTM implementations on the IMDB sentiment classification task.
|
||||
|
||||
consume_less='cpu' preprocesses input to the LSTM which typically results in
|
||||
faster computations at the expense of increased peak memory usage as the
|
||||
preprocessed input must be kept in memory.
|
||||
|
||||
consume_less='mem' does away with the preprocessing, meaning that it might take
|
||||
a little longer, but should require less peak memory.
|
||||
|
||||
consume_less='gpu' concatenates the input, output and forget gate's weights
|
||||
into one, large matrix, resulting in faster computation time as the GPU can
|
||||
utilize more cores, at the expense of reduced regularization because the same
|
||||
dropout is shared across the gates.
|
||||
|
||||
Note that the relative performance of the different `consume_less` modes
|
||||
can vary depending on your device, your model and the size of your data.
|
||||
'''
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Embedding, Dense, LSTM
|
||||
from keras.datasets import imdb
|
||||
|
||||
max_features = 20000
|
||||
max_length = 80
|
||||
embedding_dim = 256
|
||||
batch_size = 128
|
||||
epochs = 10
|
||||
modes = ['cpu', 'mem', 'gpu']
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
X_train = sequence.pad_sequences(X_train, max_length)
|
||||
X_test = sequence.pad_sequences(X_test, max_length)
|
||||
|
||||
# Compile and train different models while meauring performance.
|
||||
results = []
|
||||
for mode in modes:
|
||||
print('Testing mode: consume_less="{}"'.format(mode))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Embedding(max_features, embedding_dim, input_length=max_length, dropout=0.2))
|
||||
model.add(LSTM(embedding_dim, dropout_W=0.2, dropout_U=0.2, consume_less=mode))
|
||||
model.add(Dense(1, activation='sigmoid'))
|
||||
model.compile(loss='binary_crossentropy',
|
||||
optimizer='adam',
|
||||
metrics=['accuracy'])
|
||||
|
||||
start_time = time.time()
|
||||
history = model.fit(X_train, y_train,
|
||||
batch_size=batch_size,
|
||||
nb_epoch=epochs,
|
||||
validation_data=(X_test, y_test))
|
||||
average_time_per_epoch = (time.time() - start_time) / epochs
|
||||
|
||||
results.append((history, average_time_per_epoch))
|
||||
|
||||
# Compare models' accuracy, loss and elapsed time per epoch.
|
||||
plt.style.use('ggplot')
|
||||
ax1 = plt.subplot2grid((2, 2), (0, 0))
|
||||
ax1.set_title('Accuracy')
|
||||
ax1.set_ylabel('Validation Accuracy')
|
||||
ax1.set_xlabel('Epochs')
|
||||
ax2 = plt.subplot2grid((2, 2), (1, 0))
|
||||
ax2.set_title('Loss')
|
||||
ax2.set_ylabel('Validation Loss')
|
||||
ax2.set_xlabel('Epochs')
|
||||
ax3 = plt.subplot2grid((2, 2), (0, 1), rowspan=2)
|
||||
ax3.set_title('Time')
|
||||
ax3.set_ylabel('Seconds')
|
||||
for mode, result in zip(modes, results):
|
||||
ax1.plot(result[0].epoch, result[0].history['val_acc'], label=mode)
|
||||
ax2.plot(result[0].epoch, result[0].history['val_loss'], label=mode)
|
||||
ax1.legend()
|
||||
ax2.legend()
|
||||
ax3.bar(np.arange(len(results)), [x[1] for x in results],
|
||||
tick_label=modes, align='center')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
@@ -12,8 +12,8 @@ has at least ~100k characters. ~1M is better.
|
||||
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Activation, Dropout
|
||||
from keras.layers.recurrent import LSTM
|
||||
from keras.layers import Dense, Activation, Dropout
|
||||
from keras.layers import LSTM
|
||||
from keras.utils.data_utils import get_file
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
@@ -11,8 +11,8 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.utils import np_utils
|
||||
|
||||
batch_size = 128
|
||||
|
||||
@@ -17,9 +17,9 @@ from __future__ import print_function
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Activation
|
||||
from keras.layers import Dense, Activation
|
||||
from keras.layers import SimpleRNN
|
||||
from keras.initializations import normal, identity
|
||||
from keras.layers.recurrent import SimpleRNN
|
||||
from keras.optimizers import RMSprop
|
||||
from keras.utils import np_utils
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ def euclidean_distance(vects):
|
||||
|
||||
def eucl_dist_output_shape(shapes):
|
||||
shape1, shape2 = shapes
|
||||
return shape1
|
||||
return (shape1[0], 1)
|
||||
|
||||
|
||||
def contrastive_loss(y_true, y_pred):
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
'''Example of how to use sklearn wrapper
|
||||
|
||||
Builds simple CNN models on MNIST and uses sklearn's GridSearchCV to find best model
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.utils import np_utils
|
||||
from keras.wrappers.scikit_learn import KerasClassifier
|
||||
from sklearn.grid_search import GridSearchCV
|
||||
|
||||
|
||||
nb_classes = 10
|
||||
|
||||
# input image dimensions
|
||||
img_rows, img_cols = 28, 28
|
||||
|
||||
# load training data and do basic data normalization
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
|
||||
X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
|
||||
X_train = X_train.astype('float32')
|
||||
X_test = X_test.astype('float32')
|
||||
X_train /= 255
|
||||
X_test /= 255
|
||||
|
||||
# convert class vectors to binary class matrices
|
||||
y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
def make_model(dense_layer_sizes, nb_filters, nb_conv, nb_pool):
|
||||
'''Creates model comprised of 2 convolutional layers followed by dense layers
|
||||
|
||||
dense_layer_sizes: List of layer sizes. This list has one number for each layer
|
||||
nb_filters: Number of convolutional filters in each convolutional layer
|
||||
nb_conv: Convolutional kernel size
|
||||
nb_pool: Size of pooling area for max pooling
|
||||
'''
|
||||
|
||||
model = Sequential()
|
||||
|
||||
model.add(Convolution2D(nb_filters, nb_conv, nb_conv,
|
||||
border_mode='valid',
|
||||
input_shape=(1, img_rows, img_cols)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
model.add(Flatten())
|
||||
for layer_size in dense_layer_sizes:
|
||||
model.add(Dense(layer_size))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(nb_classes))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='adadelta',
|
||||
metrics=['accuracy'])
|
||||
|
||||
return model
|
||||
|
||||
dense_size_candidates = [[32], [64], [32, 32], [64, 64]]
|
||||
my_classifier = KerasClassifier(make_model, batch_size=32)
|
||||
validator = GridSearchCV(my_classifier,
|
||||
param_grid={'dense_layer_sizes': dense_size_candidates,
|
||||
# nb_epoch is avail for tuning even when not
|
||||
# an argument to model building function
|
||||
'nb_epoch': [3, 6],
|
||||
'nb_filters': [8],
|
||||
'nb_conv': [3],
|
||||
'nb_pool': [2]},
|
||||
scoring='log_loss',
|
||||
n_jobs=1)
|
||||
validator.fit(X_train, y_train)
|
||||
|
||||
print('The parameters of the best model are: ')
|
||||
print(validator.best_params_)
|
||||
|
||||
# validator.best_estimator_ returns sklearn-wrapped version of best model.
|
||||
# validator.best_estimator_.model returns the (unwrapped) keras model
|
||||
best_model = validator.best_estimator_.model
|
||||
metric_names = best_model.metrics_names
|
||||
metric_values = best_model.evaluate(X_test, y_test)
|
||||
for metric, value in zip(metric_names, metric_values):
|
||||
print(metric, ': ', value)
|
||||
@@ -19,8 +19,8 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.utils import np_utils
|
||||
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ import argparse
|
||||
import h5py
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
|
||||
from keras.layers import Convolution2D, ZeroPadding2D, MaxPooling2D
|
||||
from keras import backend as K
|
||||
|
||||
parser = argparse.ArgumentParser(description='Neural style transfer with Keras.')
|
||||
|
||||
@@ -8,8 +8,7 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.datasets import reuters
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.layers.normalization import BatchNormalization
|
||||
from keras.layers import Dense, Dropout, Activation
|
||||
from keras.utils import np_utils
|
||||
from keras.preprocessing.text import Tokenizer
|
||||
|
||||
|
||||
@@ -5,8 +5,7 @@ from __future__ import print_function
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense
|
||||
from keras.layers.recurrent import LSTM
|
||||
from keras.layers import Dense, LSTM
|
||||
|
||||
|
||||
# since we are using stateful rnn tsteps can be set to 1
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
'''This script demonstrates how to build a variational autoencoder with Keras.
|
||||
|
||||
Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
|
||||
'''
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from keras.layers import Input, Dense, Lambda
|
||||
from keras.models import Model
|
||||
from keras import backend as K
|
||||
from keras import objectives
|
||||
from keras.datasets import mnist
|
||||
|
||||
batch_size = 16
|
||||
original_dim = 784
|
||||
latent_dim = 2
|
||||
intermediate_dim = 128
|
||||
epsilon_std = 0.01
|
||||
nb_epoch = 40
|
||||
|
||||
x = Input(batch_shape=(batch_size, original_dim))
|
||||
h = Dense(intermediate_dim, activation='relu')(x)
|
||||
z_mean = Dense(latent_dim)(h)
|
||||
z_log_sigma = Dense(latent_dim)(h)
|
||||
|
||||
def sampling(args):
|
||||
z_mean, z_log_sigma = args
|
||||
epsilon = K.random_normal(shape=(batch_size, latent_dim),
|
||||
mean=0., std=epsilon_std)
|
||||
return z_mean + K.exp(z_log_sigma) * epsilon
|
||||
|
||||
# note that "output_shape" isn't necessary with the TensorFlow backend
|
||||
# so you could write `Lambda(sampling)([z_mean, z_log_sigma])`
|
||||
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])
|
||||
|
||||
# we instantiate these layers separately so as to reuse them later
|
||||
decoder_h = Dense(intermediate_dim, activation='relu')
|
||||
decoder_mean = Dense(original_dim, activation='sigmoid')
|
||||
h_decoded = decoder_h(z)
|
||||
x_decoded_mean = decoder_mean(h_decoded)
|
||||
|
||||
def vae_loss(x, x_decoded_mean):
|
||||
xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)
|
||||
kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
|
||||
return xent_loss + kl_loss
|
||||
|
||||
vae = Model(x, x_decoded_mean)
|
||||
vae.compile(optimizer='rmsprop', loss=vae_loss)
|
||||
|
||||
# train the VAE on MNIST digits
|
||||
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
||||
|
||||
x_train = x_train.astype('float32') / 255.
|
||||
x_test = x_test.astype('float32') / 255.
|
||||
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
|
||||
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
|
||||
|
||||
vae.fit(x_train, x_train,
|
||||
shuffle=True,
|
||||
nb_epoch=nb_epoch,
|
||||
batch_size=batch_size,
|
||||
validation_data=(x_test, x_test))
|
||||
|
||||
# build a model to project inputs on the latent space
|
||||
encoder = Model(x, z_mean)
|
||||
|
||||
# display a 2D plot of the digit classes in the latent space
|
||||
x_test_encoded = encoder.predict(x_test, batch_size=batch_size)
|
||||
plt.figure(figsize=(6, 6))
|
||||
plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)
|
||||
plt.colorbar()
|
||||
plt.show()
|
||||
|
||||
# build a digit generator that can sample from the learned distribution
|
||||
decoder_input = Input(shape=(latent_dim,))
|
||||
_h_decoded = decoder_h(decoder_input)
|
||||
_x_decoded_mean = decoder_mean(_h_decoded)
|
||||
generator = Model(decoder_input, _x_decoded_mean)
|
||||
|
||||
# display a 2D manifold of the digits
|
||||
n = 15 # figure with 15x15 digits
|
||||
digit_size = 28
|
||||
figure = np.zeros((digit_size * n, digit_size * n))
|
||||
# we will sample n points within [-15, 15] standard deviations
|
||||
grid_x = np.linspace(-15, 15, n)
|
||||
grid_y = np.linspace(-15, 15, n)
|
||||
|
||||
for i, yi in enumerate(grid_x):
|
||||
for j, xi in enumerate(grid_y):
|
||||
z_sample = np.array([[xi, yi]]) * epsilon_std
|
||||
x_decoded = generator.predict(z_sample)
|
||||
digit = x_decoded[0].reshape(digit_size, digit_size)
|
||||
figure[i * digit_size: (i + 1) * digit_size,
|
||||
j * digit_size: (j + 1) * digit_size] = digit
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
plt.imshow(figure)
|
||||
plt.show()
|
||||
+18
-1
@@ -1 +1,18 @@
|
||||
__version__ = '1.0.1'
|
||||
from __future__ import absolute_import
|
||||
from . import backend
|
||||
from . import datasets
|
||||
from . import engine
|
||||
from . import layers
|
||||
from . import preprocessing
|
||||
from . import utils
|
||||
from . import wrappers
|
||||
from . import callbacks
|
||||
from . import constraints
|
||||
from . import initializations
|
||||
from . import metrics
|
||||
from . import models
|
||||
from . import objectives
|
||||
from . import optimizers
|
||||
from . import regularizers
|
||||
|
||||
__version__ = '1.0.3'
|
||||
|
||||
@@ -19,6 +19,10 @@ def softplus(x):
|
||||
return K.softplus(x)
|
||||
|
||||
|
||||
def softsign(x):
|
||||
return K.softsign(x)
|
||||
|
||||
|
||||
def relu(x, alpha=0., max_value=None):
|
||||
return K.relu(x, alpha=alpha, max_value=max_value)
|
||||
|
||||
|
||||
@@ -27,20 +27,33 @@ def set_learning_phase(value):
|
||||
|
||||
|
||||
def get_session():
|
||||
'''Returns the TF session in use by the backend.
|
||||
'''Returns the TF session to be used by the backend.
|
||||
|
||||
If a default TensorFlow session is available, we will return it.
|
||||
|
||||
Else, we will return the global Keras session.
|
||||
|
||||
If no global Keras session exists at this point:
|
||||
we will create a new global session.
|
||||
|
||||
Note that you can manually set the global session
|
||||
via `K.set_session(sess)`.
|
||||
'''
|
||||
global _SESSION
|
||||
if tf.get_default_session() is not None:
|
||||
return tf.get_default_session()
|
||||
if _SESSION is None:
|
||||
if not os.environ.get('OMP_NUM_THREADS'):
|
||||
_SESSION = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
|
||||
else:
|
||||
nb_thread = int(os.environ.get('OMP_NUM_THREADS'))
|
||||
_SESSION = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=nb_thread, allow_soft_placement=True))
|
||||
_SESSION = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=nb_thread,
|
||||
allow_soft_placement=True))
|
||||
return _SESSION
|
||||
|
||||
|
||||
def set_session(session):
|
||||
'''Sets the TF session.
|
||||
'''Sets the global TF session.
|
||||
'''
|
||||
global _SESSION
|
||||
_SESSION = session
|
||||
@@ -60,7 +73,20 @@ def variable(value, dtype=_FLOATX, name=None):
|
||||
Tensor variable instance.
|
||||
'''
|
||||
v = tf.Variable(np.asarray(value, dtype=dtype), name=name)
|
||||
get_session().run(v.initializer)
|
||||
if tf.get_default_graph() is get_session().graph:
|
||||
try:
|
||||
get_session().run(v.initializer)
|
||||
except tf.errors.InvalidArgumentError:
|
||||
warnings.warn('Could not automatically initialize variable, '
|
||||
'make sure you do it manually (e.g. via '
|
||||
'`tf.initialize_all_variables()`).')
|
||||
else:
|
||||
warnings.warn('The default TensorFlow graph is not the graph '
|
||||
'associated with the TensorFlow session currently '
|
||||
'registered with Keras, and as such Keras '
|
||||
'was not able to automatically initialize a variable. '
|
||||
'You should consider registering the proper session '
|
||||
'with Keras via `K.set_session(sess)`.')
|
||||
return v
|
||||
|
||||
|
||||
@@ -136,6 +162,12 @@ def ones(shape, dtype=_FLOATX, name=None):
|
||||
return variable(np.ones(shape), dtype, name)
|
||||
|
||||
|
||||
def eye(size, dtype=_FLOATX, name=None):
|
||||
'''Instantiate an identity matrix.
|
||||
'''
|
||||
return variable(np.eye(size), dtype, name)
|
||||
|
||||
|
||||
def zeros_like(x, name=None):
|
||||
'''Instantiates an all-zeros tensor
|
||||
of the same shape as another tensor.
|
||||
@@ -418,6 +450,18 @@ def minimum(x, y):
|
||||
return tf.minimum(x, y)
|
||||
|
||||
|
||||
def sin(x):
|
||||
'''Computes sin of x element-wise.
|
||||
'''
|
||||
return tf.sin(x)
|
||||
|
||||
|
||||
def cos(x):
|
||||
'''Computes cos of x element-wise.
|
||||
'''
|
||||
return tf.cos(x)
|
||||
|
||||
|
||||
# SHAPE OPERATIONS
|
||||
|
||||
def concatenate(tensors, axis=-1):
|
||||
@@ -455,15 +499,21 @@ def resize_images(X, height_factor, width_factor, dim_ordering):
|
||||
positive integers.
|
||||
'''
|
||||
if dim_ordering == 'th':
|
||||
original_shape = int_shape(X)
|
||||
new_shape = tf.shape(X)[2:]
|
||||
new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32'))
|
||||
X = permute_dimensions(X, [0, 2, 3, 1])
|
||||
X = tf.image.resize_nearest_neighbor(X, new_shape)
|
||||
return permute_dimensions(X, [0, 3, 1, 2])
|
||||
X = permute_dimensions(X, [0, 3, 1, 2])
|
||||
X.set_shape((None, None, original_shape[2] * height_factor, original_shape[3] * width_factor))
|
||||
return X
|
||||
elif dim_ordering == 'tf':
|
||||
original_shape = int_shape(X)
|
||||
new_shape = tf.shape(X)[1:3]
|
||||
new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32'))
|
||||
return tf.image.resize_nearest_neighbor(X, new_shape)
|
||||
X = tf.image.resize_nearest_neighbor(X, new_shape)
|
||||
X.set_shape((None, original_shape[1] * height_factor, original_shape[2] * width_factor, None))
|
||||
return X
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
|
||||
@@ -495,6 +545,8 @@ def repeat(x, n):
|
||||
|
||||
|
||||
def tile(x, n):
|
||||
if not hasattr(n, 'shape') and not hasattr(n, '__len__'):
|
||||
n = [n]
|
||||
return tf.tile(x, n)
|
||||
|
||||
|
||||
@@ -558,6 +610,16 @@ def get_value(x):
|
||||
return x.eval(session=get_session())
|
||||
|
||||
|
||||
def batch_get_value(xs):
|
||||
'''Returns the value of more than one tensor variable,
|
||||
as a list of Numpy arrays.
|
||||
'''
|
||||
if xs:
|
||||
return get_session().run(xs)
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
def set_value(x, value):
|
||||
'''Sets the value of a tensor variable,
|
||||
from a Numpy array.
|
||||
@@ -808,6 +870,10 @@ def softplus(x):
|
||||
return tf.nn.softplus(x)
|
||||
|
||||
|
||||
def softsign(x):
|
||||
return tf.nn.softsign(x)
|
||||
|
||||
|
||||
def categorical_crossentropy(output, target, from_logits=False):
|
||||
'''Categorical crossentropy between an output tensor
|
||||
and a target tensor, where the target is a tensor of the same
|
||||
|
||||
@@ -3,6 +3,10 @@ from theano import tensor as T
|
||||
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
|
||||
from theano.tensor.signal import pool
|
||||
from theano.tensor.nnet import conv3d2d
|
||||
try:
|
||||
from theano.tensor.nnet.nnet import softsign as T_softsign
|
||||
except ImportError:
|
||||
from theano.sandbox.softsign import softsign as T_softsign
|
||||
import inspect
|
||||
import numpy as np
|
||||
from .common import _FLOATX, _EPSILON
|
||||
@@ -79,6 +83,12 @@ def ones(shape, dtype=_FLOATX, name=None):
|
||||
return variable(np.ones(shape), dtype, name)
|
||||
|
||||
|
||||
def eye(size, dtype=_FLOATX, name=None):
|
||||
'''Instantiate an identity matrix.
|
||||
'''
|
||||
return variable(np.eye(size), dtype, name)
|
||||
|
||||
|
||||
def ones_like(x):
|
||||
return T.ones_like(x)
|
||||
|
||||
@@ -259,6 +269,14 @@ def minimum(x, y):
|
||||
return T.minimum(x, y)
|
||||
|
||||
|
||||
def sin(x):
|
||||
return T.sin(x)
|
||||
|
||||
|
||||
def cos(x):
|
||||
return T.cos(x)
|
||||
|
||||
|
||||
# SHAPE OPERATIONS
|
||||
|
||||
def concatenate(tensors, axis=-1):
|
||||
@@ -469,6 +487,13 @@ def get_value(x):
|
||||
return x.get_value()
|
||||
|
||||
|
||||
def batch_get_value(xs):
|
||||
'''Returns the value of more than one tensor variable,
|
||||
as a list of Numpy arrays.
|
||||
'''
|
||||
return [get_value(x) for x in xs]
|
||||
|
||||
|
||||
def set_value(x, value):
|
||||
x.set_value(np.asarray(value, dtype=x.dtype))
|
||||
|
||||
@@ -558,15 +583,15 @@ def rnn(step_function, inputs, initial_states,
|
||||
axes = [1, 0] + list(range(2, ndim))
|
||||
inputs = inputs.dimshuffle(axes)
|
||||
|
||||
if constants is None:
|
||||
constants = []
|
||||
|
||||
if mask is not None:
|
||||
if mask.ndim == ndim-1:
|
||||
mask = expand_dims(mask)
|
||||
assert mask.ndim == ndim
|
||||
mask = mask.dimshuffle(axes)
|
||||
|
||||
if constants is None:
|
||||
constants = []
|
||||
|
||||
if unroll:
|
||||
indices = list(range(input_length))
|
||||
if go_backwards:
|
||||
@@ -576,7 +601,7 @@ def rnn(step_function, inputs, initial_states,
|
||||
successive_states = []
|
||||
states = initial_states
|
||||
for i in indices:
|
||||
output, new_states = step_function(inputs[i], states)
|
||||
output, new_states = step_function(inputs[i], states + constants)
|
||||
|
||||
if len(successive_outputs) == 0:
|
||||
prev_output = zeros_like(output)
|
||||
@@ -635,7 +660,7 @@ def rnn(step_function, inputs, initial_states,
|
||||
successive_states = []
|
||||
states = initial_states
|
||||
for i in indices:
|
||||
output, states = step_function(inputs[i], states)
|
||||
output, states = step_function(inputs[i], states + constants)
|
||||
successive_outputs.append(output)
|
||||
successive_states.append(states)
|
||||
outputs = T.stack(*successive_outputs)
|
||||
@@ -711,6 +736,10 @@ def softplus(x):
|
||||
return T.nnet.softplus(x)
|
||||
|
||||
|
||||
def softsign(x):
|
||||
return T_softsign(x)
|
||||
|
||||
|
||||
def categorical_crossentropy(output, target, from_logits=False):
|
||||
if from_logits:
|
||||
output = T.nnet.softmax(output)
|
||||
@@ -1015,10 +1044,3 @@ def random_binomial(shape, p=0.0, dtype=_FLOATX, seed=None):
|
||||
seed = np.random.randint(10e6)
|
||||
rng = RandomStreams(seed=seed)
|
||||
return rng.binomial(shape, p=p, dtype=dtype)
|
||||
|
||||
'''
|
||||
more TODO:
|
||||
|
||||
tensordot -> soon to be introduced in TF
|
||||
batched_tensordot -> reimplement
|
||||
'''
|
||||
|
||||
+15
-3
@@ -430,8 +430,11 @@ class TensorBoard(Callback):
|
||||
histogram_freq: frequency (in epochs) at which to compute activation
|
||||
histograms for the layers of the model. If set to 0,
|
||||
histograms won't be computed.
|
||||
write_graph: whether to visualize the graph in tensorboard. The log file can
|
||||
become quite large when write_graph is set to True.
|
||||
'''
|
||||
def __init__(self, log_dir='./logs', histogram_freq=0):
|
||||
|
||||
def __init__(self, log_dir='./logs', histogram_freq=0, write_graph=True):
|
||||
super(Callback, self).__init__()
|
||||
if K._BACKEND != 'tensorflow':
|
||||
raise Exception('TensorBoard callback only works '
|
||||
@@ -439,6 +442,7 @@ class TensorBoard(Callback):
|
||||
self.log_dir = log_dir
|
||||
self.histogram_freq = histogram_freq
|
||||
self.merged = None
|
||||
self.write_graph = write_graph
|
||||
|
||||
def _set_model(self, model):
|
||||
import tensorflow as tf
|
||||
@@ -457,8 +461,16 @@ class TensorBoard(Callback):
|
||||
tf.histogram_summary('{}_out'.format(layer),
|
||||
layer.output)
|
||||
self.merged = tf.merge_all_summaries()
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph_def)
|
||||
if self.write_graph:
|
||||
tf_version = tuple(int(i) for i in tf.__version__.split('.'))
|
||||
if tf_version >= (0, 8, 0):
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph_def)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir)
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
import tensorflow as tf
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
from __future__ import absolute_import
|
||||
import sys
|
||||
from six.moves import cPickle
|
||||
from six.moves import range
|
||||
|
||||
|
||||
def load_batch(fpath, label_key='labels'):
|
||||
|
||||
+70
-40
@@ -847,10 +847,11 @@ class Layer(object):
|
||||
if not params:
|
||||
return
|
||||
weight_value_tuples = []
|
||||
for p, w in zip(params, weights):
|
||||
if K.get_value(p).shape != w.shape:
|
||||
param_values = K.batch_get_value(params)
|
||||
for pv, p, w in zip(param_values, params, weights):
|
||||
if pv.shape != w.shape:
|
||||
raise Exception('Layer weight shape ' +
|
||||
str(K.get_value(p).shape) +
|
||||
str(pv.shape) +
|
||||
' not compatible with '
|
||||
'provided weight shape ' + str(w.shape))
|
||||
weight_value_tuples.append((p, w))
|
||||
@@ -861,10 +862,7 @@ class Layer(object):
|
||||
as a list of numpy arrays.
|
||||
'''
|
||||
params = self.trainable_weights + self.non_trainable_weights
|
||||
weights = []
|
||||
for p in params:
|
||||
weights.append(K.get_value(p))
|
||||
return weights
|
||||
return K.batch_get_value(params)
|
||||
|
||||
def get_config(self):
|
||||
'''Returns a Python dictionary (serializable)
|
||||
@@ -1128,7 +1126,8 @@ class Merge(Layer):
|
||||
if mode not in {'sum', 'mul', 'concat', 'ave', 'cos', 'dot'}:
|
||||
raise Exception('Invalid merge mode: ' + str(mode))
|
||||
if type(layers) not in {list, tuple} or len(layers) < 2:
|
||||
raise Exception('A Merge should only be applied to a list of layers. Not a list: ' + str(layers))
|
||||
raise Exception('A Merge should only be applied to a list of '
|
||||
'layers with at least 2 elements. Found: ' + str(layers))
|
||||
|
||||
if tensor_indices is None:
|
||||
tensor_indices = [None for _ in range(len(layers))]
|
||||
@@ -1169,7 +1168,7 @@ class Merge(Layer):
|
||||
raise Exception('Invalid format for dot_axes - list elements should be "int".')
|
||||
if shape1[dot_axes[0]] != shape2[dot_axes[1]]:
|
||||
raise Exception('Dimension incompatibility using dot mode: ' +
|
||||
'%s != %s. ' % (shape1[dot_axes[0]], shape2[dot_axes[1][i]]) +
|
||||
'%s != %s. ' % (shape1[dot_axes[0]], shape2[dot_axes[1]]) +
|
||||
'Layer shapes: %s, %s' % (shape1, shape2))
|
||||
elif mode == 'concat':
|
||||
reduced_inputs_shapes = [list(shape) for shape in input_shapes]
|
||||
@@ -1241,20 +1240,33 @@ class Merge(Layer):
|
||||
'please use ' +
|
||||
'the "merge" function instead: ' +
|
||||
'`merged_tensor = merge([tensor_1, tensor2])`.')
|
||||
layers = []
|
||||
node_indices = []
|
||||
tensor_indices = []
|
||||
|
||||
all_keras_tensors = True
|
||||
for x in inputs:
|
||||
layer, node_index, tensor_index = x._keras_history
|
||||
layers.append(layer)
|
||||
node_indices.append(node_index)
|
||||
tensor_indices.append(tensor_index)
|
||||
self._arguments_validation(layers, self.mode,
|
||||
self.concat_axis, self.dot_axes,
|
||||
self._output_shape,
|
||||
node_indices, tensor_indices)
|
||||
self.built = True
|
||||
self.add_inbound_node(layers, node_indices, tensor_indices)
|
||||
if not hasattr(x, '_keras_history'):
|
||||
all_keras_tensors = False
|
||||
break
|
||||
|
||||
if all_keras_tensors:
|
||||
layers = []
|
||||
node_indices = []
|
||||
tensor_indices = []
|
||||
for x in inputs:
|
||||
layer, node_index, tensor_index = x._keras_history
|
||||
layers.append(layer)
|
||||
node_indices.append(node_index)
|
||||
tensor_indices.append(tensor_index)
|
||||
self._arguments_validation(layers, self.mode,
|
||||
self.concat_axis, self.dot_axes,
|
||||
self._output_shape,
|
||||
node_indices, tensor_indices)
|
||||
self.built = True
|
||||
self.add_inbound_node(layers, node_indices, tensor_indices)
|
||||
|
||||
outputs = self.inbound_nodes[-1].output_tensors
|
||||
return outputs[0] # merge only returns a single tensor
|
||||
else:
|
||||
return self.call(inputs, mask)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
assert type(input_shape) is list # must have mutiple input shape tuples
|
||||
@@ -1286,8 +1298,6 @@ class Merge(Layer):
|
||||
break
|
||||
output_shape[self.concat_axis] += shape[self.concat_axis]
|
||||
return tuple(output_shape)
|
||||
elif self.mode == 'join':
|
||||
return None
|
||||
elif self.mode == 'dot':
|
||||
shape1 = list(input_shapes[0])
|
||||
shape2 = list(input_shapes[1])
|
||||
@@ -1388,7 +1398,7 @@ def merge(inputs, mode='sum', concat_axis=-1,
|
||||
|
||||
# Arguments
|
||||
mode: string or lambda/function. If string, must be one
|
||||
of: 'sum', 'mul', 'concat', 'ave', 'join', 'cos', 'dot'.
|
||||
of: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot'.
|
||||
If lambda/function, it should take as input a list of tensors
|
||||
and return a single tensor.
|
||||
concat_axis: integer, axis to use in mode `concat`.
|
||||
@@ -1405,20 +1415,35 @@ def merge(inputs, mode='sum', concat_axis=-1,
|
||||
to consider for merging
|
||||
(in case some input layer node returns multiple tensors).
|
||||
'''
|
||||
input_layers = []
|
||||
node_indices = []
|
||||
tensor_indices = []
|
||||
all_keras_tensors = True
|
||||
for x in inputs:
|
||||
assert hasattr(x, '_keras_history'), 'Input tensor to "merge" was not a Keras tensor: ' + str(x)
|
||||
input_layer, node_index, tensor_index = x._keras_history
|
||||
input_layers.append(input_layer)
|
||||
node_indices.append(node_index)
|
||||
tensor_indices.append(tensor_index)
|
||||
merge_layer = Merge(input_layers, mode=mode, concat_axis=concat_axis,
|
||||
dot_axes=dot_axes, output_shape=output_shape,
|
||||
node_indices=node_indices, tensor_indices=tensor_indices,
|
||||
name=name)
|
||||
return merge_layer.inbound_nodes[0].output_tensors[0]
|
||||
if not hasattr(x, '_keras_history'):
|
||||
all_keras_tensors = False
|
||||
break
|
||||
if all_keras_tensors:
|
||||
input_layers = []
|
||||
node_indices = []
|
||||
tensor_indices = []
|
||||
for x in inputs:
|
||||
input_layer, node_index, tensor_index = x._keras_history
|
||||
input_layers.append(input_layer)
|
||||
node_indices.append(node_index)
|
||||
tensor_indices.append(tensor_index)
|
||||
merge_layer = Merge(input_layers, mode=mode,
|
||||
concat_axis=concat_axis,
|
||||
dot_axes=dot_axes,
|
||||
output_shape=output_shape,
|
||||
node_indices=node_indices,
|
||||
tensor_indices=tensor_indices,
|
||||
name=name)
|
||||
return merge_layer.inbound_nodes[0].output_tensors[0]
|
||||
else:
|
||||
merge_layer = Merge(mode=mode,
|
||||
concat_axis=concat_axis,
|
||||
dot_axes=dot_axes,
|
||||
output_shape=output_shape,
|
||||
name=name)
|
||||
return merge_layer(inputs)
|
||||
|
||||
|
||||
class Container(Layer):
|
||||
@@ -1646,7 +1671,8 @@ class Container(Layer):
|
||||
layers_by_depth[depth] = []
|
||||
layers_by_depth[depth].append(layer)
|
||||
|
||||
depth_keys = list(nodes_by_depth.keys())
|
||||
# get sorted list of layer depths
|
||||
depth_keys = list(layers_by_depth.keys())
|
||||
depth_keys.sort(reverse=True)
|
||||
|
||||
# set self.layers and self.layers_by_depth
|
||||
@@ -1660,6 +1686,10 @@ class Container(Layer):
|
||||
self.layers = layers
|
||||
self.layers_by_depth = layers_by_depth
|
||||
|
||||
# get sorted list of node depths
|
||||
depth_keys = list(nodes_by_depth.keys())
|
||||
depth_keys.sort(reverse=True)
|
||||
|
||||
# check that all tensors required are computable.
|
||||
# computable_tensors: all tensors in the graph
|
||||
# that can be computed from the inputs provided
|
||||
@@ -2241,7 +2271,7 @@ class Container(Layer):
|
||||
for layer in flattened_layers:
|
||||
g = f.create_group(layer.name)
|
||||
symbolic_weights = layer.trainable_weights + layer.non_trainable_weights
|
||||
weight_values = layer.get_weights()
|
||||
weight_values = K.batch_get_value(symbolic_weights)
|
||||
weight_names = []
|
||||
for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
|
||||
if hasattr(w, 'name') and w.name:
|
||||
|
||||
+36
-14
@@ -66,6 +66,12 @@ def standardize_input_data(data, names, shapes=None, check_batch_dim=True,
|
||||
': data should be a Numpy array, '
|
||||
'or list/dict of Numpy arrays. '
|
||||
'Found: ' + str(data)[:200] + '...')
|
||||
if len(names) != 1:
|
||||
# case: model expects multiple inputs but only received
|
||||
# a single Numpy array
|
||||
raise Exception('The model expects ' + str(len(names)) +
|
||||
' input arrays, but only received one array. '
|
||||
'Found: array with shape ' + str(data.shape))
|
||||
arrays = [data]
|
||||
|
||||
# make arrays at least 2D
|
||||
@@ -153,7 +159,7 @@ def check_array_lengths(X, Y, W):
|
||||
raise Exception('All input arrays (x) should have '
|
||||
'the same number of samples.')
|
||||
set_y = set(y_lengths)
|
||||
if len(set_x) != 1:
|
||||
if len(set_y) != 1:
|
||||
raise Exception('All target arrays (y) should have '
|
||||
'the same number of samples.')
|
||||
set_w = set(w_lengths)
|
||||
@@ -195,7 +201,7 @@ def check_loss_and_target_compatibility(targets, losses, output_shapes):
|
||||
'Alternatively, you can use the loss function '
|
||||
'`sparse_categorical_crossentropy` instead, '
|
||||
'which does expect integer targets.')
|
||||
if loss.__name__ in key_losses and y.shape[1] != shape[1]:
|
||||
if loss.__name__ in key_losses and shape[1] is not None and y.shape[1] != shape[1]:
|
||||
raise Exception('A target array with shape ' + str(y.shape) +
|
||||
' was passed for an output of shape ' + str(shape) +
|
||||
' while using as loss `' + loss.__name__ + '`. '
|
||||
@@ -565,6 +571,10 @@ class Model(Container):
|
||||
name = self.output_names[i]
|
||||
self.targets.append(K.placeholder(ndim=len(shape), name=name + '_target'))
|
||||
|
||||
# prepare metrics
|
||||
self.metrics_names = ['loss']
|
||||
self.metrics = []
|
||||
|
||||
# compute total loss
|
||||
total_loss = None
|
||||
for i in range(len(self.outputs)):
|
||||
@@ -574,19 +584,20 @@ class Model(Container):
|
||||
sample_weight = sample_weights[i]
|
||||
mask = masks[i]
|
||||
loss_weight = loss_weights_list[i]
|
||||
output_loss = loss_weight * weighted_loss(y_true, y_pred,
|
||||
sample_weight, mask)
|
||||
output_loss = weighted_loss(y_true, y_pred,
|
||||
sample_weight, mask)
|
||||
if len(self.outputs) > 1:
|
||||
self.metrics.append(output_loss)
|
||||
self.metrics_names.append(self.output_names[i] + '_loss')
|
||||
if total_loss is None:
|
||||
total_loss = output_loss
|
||||
total_loss = loss_weight * output_loss
|
||||
else:
|
||||
total_loss += output_loss
|
||||
total_loss += loss_weight * output_loss
|
||||
|
||||
# add regularization penalties to the loss
|
||||
for r in self.regularizers:
|
||||
total_loss = r(total_loss)
|
||||
|
||||
# prepare metrics
|
||||
self.metrics_names = ['loss']
|
||||
self.metrics = []
|
||||
# list of same size as output_names.
|
||||
# contains tuples (metrics for output, names of metrics)
|
||||
nested_metrics = collect_metrics(metrics, self.output_names)
|
||||
@@ -602,8 +613,12 @@ class Model(Container):
|
||||
if output_shape[-1] == 1:
|
||||
# case: binary accuracy
|
||||
self.metrics.append(metrics_module.binary_accuracy(y_true, y_pred))
|
||||
elif self.loss_functions[i] == objectives.sparse_categorical_crossentropy:
|
||||
# case: categorical accuracy with sparse targets
|
||||
self.metrics.append(
|
||||
metrics_module.sparse_categorical_accuracy(y_true, y_pred))
|
||||
else:
|
||||
# case: categorical accuracy
|
||||
# case: categorical accuracy with dense targets
|
||||
self.metrics.append(metrics_module.categorical_accuracy(y_true, y_pred))
|
||||
if len(self.output_names) == 1:
|
||||
self.metrics_names.append('acc')
|
||||
@@ -632,6 +647,8 @@ class Model(Container):
|
||||
self.predict_function = None
|
||||
|
||||
def _make_train_function(self):
|
||||
if not hasattr(self, 'train_function'):
|
||||
raise Exception('You must compile your model before using it.')
|
||||
if self.train_function is None:
|
||||
if self.uses_learning_phase:
|
||||
inputs = self.inputs + self.targets + self.sample_weights + [K.learning_phase()]
|
||||
@@ -653,6 +670,8 @@ class Model(Container):
|
||||
**self._function_kwargs)
|
||||
|
||||
def _make_test_function(self):
|
||||
if not hasattr(self, 'test_function'):
|
||||
raise Exception('You must compile your model before using it.')
|
||||
if self.test_function is None:
|
||||
if self.uses_learning_phase:
|
||||
inputs = self.inputs + self.targets + self.sample_weights + [K.learning_phase()]
|
||||
@@ -666,6 +685,8 @@ class Model(Container):
|
||||
**self._function_kwargs)
|
||||
|
||||
def _make_predict_function(self):
|
||||
if not hasattr(self, 'predict_function'):
|
||||
self.predict_function = None
|
||||
if self.predict_function is None:
|
||||
if self.uses_learning_phase:
|
||||
inputs = self.inputs + [K.learning_phase()]
|
||||
@@ -673,10 +694,11 @@ class Model(Container):
|
||||
inputs = self.inputs
|
||||
# returns network outputs. Does not update weights.
|
||||
# Does update the network states.
|
||||
kwargs = getattr(self, '_function_kwargs', {})
|
||||
self.predict_function = K.function(inputs,
|
||||
self.outputs,
|
||||
updates=self.state_updates,
|
||||
**self._function_kwargs)
|
||||
**kwargs)
|
||||
|
||||
def _fit_loop(self, f, ins, out_labels=[], batch_size=32,
|
||||
nb_epoch=100, verbose=1, callbacks=[],
|
||||
@@ -1361,7 +1383,7 @@ class Model(Container):
|
||||
class_weight=class_weight)
|
||||
except Exception as e:
|
||||
_stop.set()
|
||||
raise e
|
||||
raise
|
||||
|
||||
if type(outs) != list:
|
||||
outs = [outs]
|
||||
@@ -1463,7 +1485,7 @@ class Model(Container):
|
||||
outs = self.test_on_batch(x, y, sample_weight=sample_weight)
|
||||
except Exception as e:
|
||||
_stop.set()
|
||||
raise e
|
||||
raise
|
||||
|
||||
if type(x) is list:
|
||||
nb_samples = len(x[0])
|
||||
@@ -1535,7 +1557,7 @@ class Model(Container):
|
||||
outs = self.predict_on_batch(x)
|
||||
except Exception as e:
|
||||
_stop.set()
|
||||
raise e
|
||||
raise
|
||||
|
||||
if type(x) is list:
|
||||
nb_samples = len(x[0])
|
||||
|
||||
@@ -65,6 +65,7 @@ class Convolution1D(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: Number of channels/dimensions in the input.
|
||||
Either this argument or the keyword argument `input_shape`must be
|
||||
provided when using this layer as the first layer in a model.
|
||||
@@ -85,7 +86,7 @@ class Convolution1D(Layer):
|
||||
border_mode='valid', subsample_length=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
input_dim=None, input_length=None, **kwargs):
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution1D:', border_mode)
|
||||
@@ -106,6 +107,7 @@ class Convolution1D(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
self.initial_weights = weights
|
||||
self.input_dim = input_dim
|
||||
@@ -118,15 +120,18 @@ class Convolution1D(Layer):
|
||||
input_dim = input_shape[2]
|
||||
self.W_shape = (self.nb_filter, input_dim, self.filter_length, 1)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -137,7 +142,7 @@ class Convolution1D(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -154,11 +159,11 @@ class Convolution1D(Layer):
|
||||
def call(self, x, mask=None):
|
||||
x = K.expand_dims(x, -1) # add a dimension of the right
|
||||
x = K.permute_dimensions(x, (0, 2, 1, 3))
|
||||
conv_out = K.conv2d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering='th')
|
||||
|
||||
output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
output = K.conv2d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering='th')
|
||||
if self.bias:
|
||||
output += K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
output = self.activation(output)
|
||||
output = K.squeeze(output, 3) # remove the dummy 3rd dimension
|
||||
output = K.permute_dimensions(output, (0, 2, 1))
|
||||
@@ -176,6 +181,7 @@ class Convolution1D(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim,
|
||||
'input_length': self.input_length}
|
||||
base_config = super(Convolution1D, self).get_config()
|
||||
@@ -232,6 +238,7 @@ class Convolution2D(Layer):
|
||||
applied to the bias.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 3.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
|
||||
# Input shape
|
||||
4D tensor with shape:
|
||||
@@ -250,7 +257,8 @@ class Convolution2D(Layer):
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1), dim_ordering='th',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution2D:', border_mode)
|
||||
@@ -272,6 +280,7 @@ class Convolution2D(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.input_spec = [InputSpec(ndim=4)]
|
||||
self.initial_weights = weights
|
||||
super(Convolution2D, self).__init__(**kwargs)
|
||||
@@ -286,15 +295,18 @@ class Convolution2D(Layer):
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -305,7 +317,7 @@ class Convolution2D(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -335,16 +347,17 @@ class Convolution2D(Layer):
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
conv_out = K.conv2d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
filter_shape=self.W_shape)
|
||||
if self.dim_ordering == 'th':
|
||||
output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output = conv_out + K.reshape(self.b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = K.conv2d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
filter_shape=self.W_shape)
|
||||
if self.bias:
|
||||
if self.dim_ordering == 'th':
|
||||
output += K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
@@ -361,7 +374,8 @@ class Convolution2D(Layer):
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None}
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias}
|
||||
base_config = super(Convolution2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -407,6 +421,7 @@ class Convolution3D(Layer):
|
||||
applied to the bias.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 4.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape:
|
||||
@@ -426,7 +441,8 @@ class Convolution3D(Layer):
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1, 1), dim_ordering='th',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
if K._BACKEND != 'theano':
|
||||
raise Exception(self.__class__.__name__ +
|
||||
' is currently only working with Theano backend.')
|
||||
@@ -451,6 +467,7 @@ class Convolution3D(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.input_spec = [InputSpec(ndim=5)]
|
||||
self.initial_weights = weights
|
||||
super(Convolution3D, self).__init__(**kwargs)
|
||||
@@ -471,15 +488,18 @@ class Convolution3D(Layer):
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -490,7 +510,7 @@ class Convolution3D(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -525,36 +545,37 @@ class Convolution3D(Layer):
|
||||
|
||||
def call(self, x, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
conv_out = K.conv3d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
volume_shape=input_shape,
|
||||
filter_shape=self.W_shape)
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output = conv_out + K.reshape(self.b, (1, 1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = K.conv3d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
volume_shape=input_shape,
|
||||
filter_shape=self.W_shape)
|
||||
if self.bias:
|
||||
if self.dim_ordering == 'th':
|
||||
output += K.reshape(self.b, (1, self.nb_filter, 1, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, 1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
config = {"nb_filter": self.nb_filter,
|
||||
"kernel_dim1": self.kernel_dim1,
|
||||
"kernel_dim2": self.kernel_dim2,
|
||||
"kernel_dim3": self.kernel_dim3,
|
||||
"dim_ordering": self.dim_ordering,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"border_mode": self.border_mode,
|
||||
"subsample": self.subsample,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None}
|
||||
config = {'nb_filter': self.nb_filter,
|
||||
'kernel_dim1': self.kernel_dim1,
|
||||
'kernel_dim2': self.kernel_dim2,
|
||||
'kernel_dim3': self.kernel_dim3,
|
||||
'dim_ordering': self.dim_ordering,
|
||||
'init': self.init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'border_mode': self.border_mode,
|
||||
'subsample': self.subsample,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias}
|
||||
base_config = super(Convolution3D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
+76
-36
@@ -511,12 +511,14 @@ class Dense(Layer):
|
||||
|
||||
```python
|
||||
# as first layer in a sequential model:
|
||||
model = Sequential(Dense(32, input_dim=16))
|
||||
model = Sequential()
|
||||
model.add(Dense(32, input_dim=16))
|
||||
# now the model will take as input arrays of shape (*, 16)
|
||||
# and output arrays of shape (*, 32)
|
||||
|
||||
# this is equivalent to the above:
|
||||
model = Sequential(Dense(32, input_shape=(16,)))
|
||||
model = Sequential()
|
||||
model.add(Dense(32, input_shape=(16,)))
|
||||
|
||||
# after the first layer, you don't need to specify
|
||||
# the size of the input anymore:
|
||||
@@ -548,6 +550,7 @@ class Dense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
@@ -560,7 +563,8 @@ class Dense(Layer):
|
||||
'''
|
||||
def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
self.output_dim = output_dim
|
||||
@@ -573,6 +577,7 @@ class Dense(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=2)]
|
||||
|
||||
@@ -588,16 +593,19 @@ class Dense(Layer):
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -608,7 +616,7 @@ class Dense(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -616,7 +624,10 @@ class Dense(Layer):
|
||||
del self.initial_weights
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return self.activation(K.dot(x, self.W) + self.b)
|
||||
output = K.dot(x, self.W)
|
||||
if self.bias:
|
||||
output += self.b
|
||||
return self.activation(output)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
assert input_shape and len(input_shape) == 2
|
||||
@@ -631,6 +642,7 @@ class Dense(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim}
|
||||
base_config = super(Dense, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
@@ -708,6 +720,7 @@ class MaxoutDense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
@@ -724,7 +737,8 @@ class MaxoutDense(Layer):
|
||||
def __init__(self, output_dim, nb_feature=4,
|
||||
init='glorot_uniform', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.nb_feature = nb_feature
|
||||
self.init = initializations.get(init)
|
||||
@@ -736,6 +750,7 @@ class MaxoutDense(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=2)]
|
||||
|
||||
@@ -751,17 +766,19 @@ class MaxoutDense(Layer):
|
||||
|
||||
self.W = self.init((self.nb_feature, input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.nb_feature, self.output_dim),
|
||||
name='{}_b'.format(self.name))
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_feature, self.output_dim),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -772,7 +789,7 @@ class MaxoutDense(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -785,7 +802,10 @@ class MaxoutDense(Layer):
|
||||
|
||||
def call(self, x, mask=None):
|
||||
# no activation, this layer is only linear.
|
||||
output = K.max(K.dot(x, self.W) + self.b, axis=1)
|
||||
output = K.dot(x, self.W)
|
||||
if self.bias:
|
||||
output += self.b
|
||||
output = K.max(output, axis=1)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
@@ -797,6 +817,7 @@ class MaxoutDense(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim}
|
||||
base_config = super(MaxoutDense, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
@@ -831,6 +852,7 @@ class Highway(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
@@ -847,7 +869,8 @@ class Highway(Layer):
|
||||
def __init__(self, init='glorot_uniform', transform_bias=-2,
|
||||
activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
self.init = initializations.get(init)
|
||||
self.transform_bias = transform_bias
|
||||
self.activation = activations.get(activation)
|
||||
@@ -859,6 +882,7 @@ class Highway(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=2)]
|
||||
|
||||
@@ -877,19 +901,21 @@ class Highway(Layer):
|
||||
self.W_carry = self.init((input_dim, input_dim),
|
||||
name='{}_W_carry'.format(self.name))
|
||||
|
||||
self.b = K.zeros((input_dim,), name='{}_b'.format(self.name))
|
||||
# initialize with a vector of values `transform_bias`
|
||||
self.b_carry = K.variable(np.ones((input_dim,)) * self.transform_bias,
|
||||
name='{}_b_carry'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W, self.b, self.W_carry, self.b_carry]
|
||||
if self.bias:
|
||||
self.b = K.zeros((input_dim,), name='{}_b'.format(self.name))
|
||||
# initialize with a vector of values `transform_bias`
|
||||
self.b_carry = K.variable(np.ones((input_dim,)) * self.transform_bias,
|
||||
name='{}_b_carry'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b, self.W_carry, self.b_carry]
|
||||
else:
|
||||
self.trainable_weights = [self.W, self.W_carry]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -900,7 +926,7 @@ class Highway(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -908,8 +934,14 @@ class Highway(Layer):
|
||||
del self.initial_weights
|
||||
|
||||
def call(self, x, mask=None):
|
||||
transform_weight = activations.sigmoid(K.dot(x, self.W_carry) + self.b_carry)
|
||||
act = self.activation(K.dot(x, self.W) + self.b)
|
||||
y = K.dot(x, self.W_carry)
|
||||
if self.bias:
|
||||
y += self.b_carry
|
||||
transform_weight = activations.sigmoid(y)
|
||||
y = K.dot(x, self.W)
|
||||
if self.bias:
|
||||
y += self.b
|
||||
act = self.activation(y)
|
||||
act *= transform_weight
|
||||
output = act + (1 - transform_weight) * x
|
||||
return output
|
||||
@@ -923,6 +955,7 @@ class Highway(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim}
|
||||
base_config = super(Highway, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
@@ -966,16 +999,19 @@ class TimeDistributedDense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
input_length: length of inputs sequences
|
||||
(integer, or None for variable-length sequences).
|
||||
'''
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
input_dim=None, input_length=None, **kwargs):
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
warnings.warn('TimeDistributedDense is deprecated, '
|
||||
'please use TimeDistributed(Dense(...)) instead.')
|
||||
self.output_dim = output_dim
|
||||
@@ -989,6 +1025,7 @@ class TimeDistributedDense(Layer):
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
self.supports_masking = True
|
||||
@@ -1006,17 +1043,17 @@ class TimeDistributedDense(Layer):
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
@@ -1027,7 +1064,7 @@ class TimeDistributedDense(Layer):
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
@@ -1057,7 +1094,9 @@ class TimeDistributedDense(Layer):
|
||||
|
||||
# Squash samples and timesteps into a single axis
|
||||
x = K.reshape(x, (-1, input_shape[-1])) # (samples * timesteps, input_dim)
|
||||
y = K.dot(x, self.W) + self.b # (samples * timesteps, output_dim)
|
||||
y = K.dot(x, self.W) # (samples * timesteps, output_dim)
|
||||
if self.bias:
|
||||
y += self.b
|
||||
# We have to reshape Y to (samples, timesteps, output_dim)
|
||||
y = K.reshape(y, (-1, input_length, self.output_dim)) # (samples, timesteps, output_dim)
|
||||
y = self.activation(y)
|
||||
@@ -1072,6 +1111,7 @@ class TimeDistributedDense(Layer):
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim,
|
||||
'input_length': self.input_length}
|
||||
base_config = super(TimeDistributedDense, self).get_config()
|
||||
|
||||
@@ -17,7 +17,7 @@ class Embedding(Layer):
|
||||
model = Sequential()
|
||||
model.add(Embedding(1000, 64, input_length=10))
|
||||
# the model will take as input an integer matrix of size (batch, input_length).
|
||||
# the largest integer (i.e. word index) in the input should be no larger than 1000 (vocabulary size).
|
||||
# the largest integer (i.e. word index) in the input should be no larger than 999 (vocabulary size).
|
||||
# now model.output_shape == (None, 10, 64), where None is the batch dimension.
|
||||
|
||||
input_array = np.random.randint(1000, size=(32, 10))
|
||||
@@ -28,7 +28,7 @@ class Embedding(Layer):
|
||||
```
|
||||
|
||||
# Arguments
|
||||
input_dim: int >= 0. Size of the vocabulary, ie.
|
||||
input_dim: int > 0. Size of the vocabulary, ie.
|
||||
1 + maximum integer index occurring in the input data.
|
||||
output_dim: int >= 0. Dimension of the dense embedding.
|
||||
init: name of initialization function for the weights
|
||||
@@ -46,6 +46,8 @@ class Embedding(Layer):
|
||||
This is useful for [recurrent layers](recurrent.md) which may take
|
||||
variable length input. If this is `True` then all subsequent layers
|
||||
in the model need to support masking or an exception will be raised.
|
||||
If mask_zero is set to True, as a consequence, index 0 cannot be
|
||||
used in the vocabulary (input_dim should equal |vocabulary| + 2).
|
||||
input_length: Length of input sequences, when it is constant.
|
||||
This argument is required if you are going to connect
|
||||
`Flatten` then `Dense` layers upstream
|
||||
|
||||
@@ -47,7 +47,7 @@ class BatchNormalization(Layer):
|
||||
Same shape as input.
|
||||
|
||||
# References
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](http://arxiv.org/pdf/1502.03167v3.pdf)
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](http://jmlr.org/proceedings/papers/v37/ioffe15.html)
|
||||
'''
|
||||
def __init__(self, epsilon=1e-6, mode=0, axis=-1, momentum=0.9,
|
||||
weights=None, beta_init='zero', gamma_init='one', **kwargs):
|
||||
@@ -94,8 +94,8 @@ class BatchNormalization(Layer):
|
||||
std = K.mean(K.square(x - brodcast_mean) + self.epsilon, axis=reduction_axes)
|
||||
std = K.sqrt(std)
|
||||
brodcast_std = K.reshape(std, broadcast_shape)
|
||||
mean_update = self.momentum * self.running_mean + (1-self.momentum) * mean
|
||||
std_update = self.momentum * self.running_std + (1-self.momentum) * std
|
||||
mean_update = self.momentum * self.running_mean + (1 - self.momentum) * mean
|
||||
std_update = self.momentum * self.running_std + (1 - self.momentum) * std
|
||||
self.updates = [(self.running_mean, mean_update),
|
||||
(self.running_std, std_update)]
|
||||
x_normed = (x - brodcast_mean) / (brodcast_std + self.epsilon)
|
||||
|
||||
+200
-139
@@ -81,12 +81,20 @@ class Recurrent(Layer):
|
||||
is always unrolled, so this argument does not do anything.
|
||||
Unrolling can speed-up a RNN, although it tends to be more memory-intensive.
|
||||
Unrolling is only suitable for short sequences.
|
||||
consume_less: one of "cpu", "mem". If set to "cpu", the RNN will use
|
||||
consume_less: one of "cpu", "mem", or "gpu" (LSTM/GRU only).
|
||||
If set to "cpu", the RNN will use
|
||||
an implementation that uses fewer, larger matrix products,
|
||||
thus running faster (at least on CPU) but consuming more memory.
|
||||
thus running faster on CPU but consuming more memory.
|
||||
|
||||
If set to "mem", the RNN will use more matrix products,
|
||||
but smaller ones, thus running slower (may actually be faster on GPU)
|
||||
while consuming less memory.
|
||||
|
||||
If set to "gpu" (LSTM/GRU only), the RNN will combine the input gate,
|
||||
the forget gate and the output gate into a single matrix,
|
||||
enabling more time-efficient parallelization on the GPU. Note: RNN
|
||||
dropout must be shared for all gates, resulting in a slightly
|
||||
reduced regularization.
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
@@ -383,15 +391,15 @@ class SimpleRNN(Recurrent):
|
||||
return constants
|
||||
|
||||
def get_config(self):
|
||||
config = {"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"dropout_W": self.dropout_W,
|
||||
"dropout_U": self.dropout_U}
|
||||
config = {'output_dim': self.output_dim,
|
||||
'init': self.init.__name__,
|
||||
'inner_init': self.inner_init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'dropout_W': self.dropout_W,
|
||||
'dropout_U': self.dropout_U}
|
||||
base_config = super(SimpleRNN, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -444,53 +452,66 @@ class GRU(Recurrent):
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
input_dim = input_shape[2]
|
||||
self.input_dim = input_dim
|
||||
self.input_dim = input_shape[2]
|
||||
|
||||
self.W_z = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_z'.format(self.name))
|
||||
self.U_z = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_z'.format(self.name))
|
||||
self.b_z = K.zeros((self.output_dim,), name='{}_b_z'.format(self.name))
|
||||
|
||||
self.W_r = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_r'.format(self.name))
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_r'.format(self.name))
|
||||
self.b_r = K.zeros((self.output_dim,), name='{}_b_r'.format(self.name))
|
||||
|
||||
self.W_h = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_h'.format(self.name))
|
||||
self.U_h = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_h'.format(self.name))
|
||||
self.b_h = K.zeros((self.output_dim,), name='{}_b_h'.format(self.name))
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(K.concatenate([self.W_z,
|
||||
self.W_r,
|
||||
self.W_h]))
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(K.concatenate([self.U_z,
|
||||
self.U_r,
|
||||
self.U_h]))
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(K.concatenate([self.b_z,
|
||||
self.b_r,
|
||||
self.b_h]))
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.trainable_weights = [self.W_z, self.U_z, self.b_z,
|
||||
self.W_r, self.U_r, self.b_r,
|
||||
self.W_h, self.U_h, self.b_h]
|
||||
if self.stateful:
|
||||
self.reset_states()
|
||||
else:
|
||||
# initial states: all-zero tensor of shape (output_dim)
|
||||
self.states = [None]
|
||||
|
||||
if self.consume_less == 'gpu':
|
||||
|
||||
self.W = self.init((self.input_dim, 3 * self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, 3 * self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
else:
|
||||
|
||||
self.W_z = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_z'.format(self.name))
|
||||
self.U_z = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_z'.format(self.name))
|
||||
self.b_z = K.zeros((self.output_dim,), name='{}_b_z'.format(self.name))
|
||||
|
||||
self.W_r = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_r'.format(self.name))
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_r'.format(self.name))
|
||||
self.b_r = K.zeros((self.output_dim,), name='{}_b_r'.format(self.name))
|
||||
|
||||
self.W_h = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_h'.format(self.name))
|
||||
self.U_h = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_h'.format(self.name))
|
||||
self.b_h = K.zeros((self.output_dim,), name='{}_b_h'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W_z, self.U_z, self.b_z,
|
||||
self.W_r, self.U_r, self.b_r,
|
||||
self.W_h, self.U_h, self.b_h]
|
||||
|
||||
self.W = K.concatenate([self.W_z, self.W_r, self.W_h])
|
||||
self.U = K.concatenate([self.U_z, self.U_r, self.U_h])
|
||||
self.b = K.concatenate([self.b_z, self.b_r, self.b_h])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
@@ -528,19 +549,37 @@ class GRU(Recurrent):
|
||||
B_U = states[1] # dropout matrices for recurrent units
|
||||
B_W = states[2]
|
||||
|
||||
if self.consume_less == 'cpu':
|
||||
x_z = x[:, :self.output_dim]
|
||||
x_r = x[:, self.output_dim: 2 * self.output_dim]
|
||||
x_h = x[:, 2 * self.output_dim:]
|
||||
if self.consume_less == 'gpu':
|
||||
|
||||
matrix_x = K.dot(x * B_W[0], self.W) + self.b
|
||||
matrix_inner = K.dot(h_tm1 * B_U[0], self.U[:, :2 * self.output_dim])
|
||||
|
||||
x_z = matrix_x[:, :self.output_dim]
|
||||
x_r = matrix_x[:, self.output_dim: 2 * self.output_dim]
|
||||
inner_z = matrix_inner[:, :self.output_dim]
|
||||
inner_r = matrix_inner[:, self.output_dim: 2 * self.output_dim]
|
||||
|
||||
z = self.inner_activation(x_z + inner_z)
|
||||
r = self.inner_activation(x_r + inner_r)
|
||||
|
||||
x_h = matrix_x[:, 2 * self.output_dim:]
|
||||
inner_h = K.dot(r * h_tm1 * B_U[0], self.U[:, 2 * self.output_dim:])
|
||||
hh = self.activation(x_h + inner_h)
|
||||
else:
|
||||
x_z = K.dot(x * B_W[0], self.W_z) + self.b_z
|
||||
x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
|
||||
x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
|
||||
if self.consume_less == 'cpu':
|
||||
x_z = x[:, :self.output_dim]
|
||||
x_r = x[:, self.output_dim: 2 * self.output_dim]
|
||||
x_h = x[:, 2 * self.output_dim:]
|
||||
elif self.consume_less == 'mem':
|
||||
x_z = K.dot(x * B_W[0], self.W_z) + self.b_z
|
||||
x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
|
||||
x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
|
||||
else:
|
||||
raise Exception('Unknown `consume_less` mode.')
|
||||
z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
|
||||
r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))
|
||||
|
||||
z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
|
||||
r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))
|
||||
|
||||
hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], self.U_h))
|
||||
hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], self.U_h))
|
||||
h = z * h_tm1 + (1 - z) * hh
|
||||
return h, [h]
|
||||
|
||||
@@ -552,7 +591,7 @@ class GRU(Recurrent):
|
||||
B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(3)]
|
||||
constants.append(B_U)
|
||||
else:
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(4)])
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(3)])
|
||||
|
||||
if 0 < self.dropout_W < 1:
|
||||
input_shape = self.input_spec[0].shape
|
||||
@@ -562,20 +601,20 @@ class GRU(Recurrent):
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3)]
|
||||
constants.append(B_W)
|
||||
else:
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(4)])
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(3)])
|
||||
return constants
|
||||
|
||||
def get_config(self):
|
||||
config = {"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"dropout_W": self.dropout_W,
|
||||
"dropout_U": self.dropout_U}
|
||||
config = {'output_dim': self.output_dim,
|
||||
'init': self.init.__name__,
|
||||
'inner_init': self.inner_init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'inner_activation': self.inner_activation.__name__,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'dropout_W': self.dropout_W,
|
||||
'dropout_U': self.dropout_U}
|
||||
base_config = super(GRU, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -637,8 +676,7 @@ class LSTM(Recurrent):
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
input_dim = input_shape[2]
|
||||
self.input_dim = input_dim
|
||||
self.input_dim = input_shape[2]
|
||||
|
||||
if self.stateful:
|
||||
self.reset_states()
|
||||
@@ -646,56 +684,64 @@ class LSTM(Recurrent):
|
||||
# initial states: 2 all-zero tensors of shape (output_dim)
|
||||
self.states = [None, None]
|
||||
|
||||
self.W_i = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_i'.format(self.name))
|
||||
self.U_i = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_i'.format(self.name))
|
||||
self.b_i = K.zeros((self.output_dim,), name='{}_b_i'.format(self.name))
|
||||
if self.consume_less == 'gpu':
|
||||
self.W = self.init((self.input_dim, 4 * self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, 4 * self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
|
||||
self.W_f = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_f'.format(self.name))
|
||||
self.U_f = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_f'.format(self.name))
|
||||
self.b_f = self.forget_bias_init((self.output_dim,),
|
||||
name='{}_b_f'.format(self.name))
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
K.get_value(self.forget_bias_init(self.output_dim)),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
else:
|
||||
self.W_i = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_i'.format(self.name))
|
||||
self.U_i = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_i'.format(self.name))
|
||||
self.b_i = K.zeros((self.output_dim,), name='{}_b_i'.format(self.name))
|
||||
|
||||
self.W_c = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_c'.format(self.name))
|
||||
self.U_c = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_c'.format(self.name))
|
||||
self.b_c = K.zeros((self.output_dim,), name='{}_b_c'.format(self.name))
|
||||
self.W_f = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_f'.format(self.name))
|
||||
self.U_f = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_f'.format(self.name))
|
||||
self.b_f = self.forget_bias_init((self.output_dim,),
|
||||
name='{}_b_f'.format(self.name))
|
||||
|
||||
self.W_o = self.init((input_dim, self.output_dim),
|
||||
name='{}_W_o'.format(self.name))
|
||||
self.U_o = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_o'.format(self.name))
|
||||
self.b_o = K.zeros((self.output_dim,), name='{}_b_o'.format(self.name))
|
||||
self.W_c = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_c'.format(self.name))
|
||||
self.U_c = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_c'.format(self.name))
|
||||
self.b_c = K.zeros((self.output_dim,), name='{}_b_c'.format(self.name))
|
||||
|
||||
self.W_o = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_o'.format(self.name))
|
||||
self.U_o = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_o'.format(self.name))
|
||||
self.b_o = K.zeros((self.output_dim,), name='{}_b_o'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W_i, self.U_i, self.b_i,
|
||||
self.W_c, self.U_c, self.b_c,
|
||||
self.W_f, self.U_f, self.b_f,
|
||||
self.W_o, self.U_o, self.b_o]
|
||||
|
||||
self.W = K.concatenate([self.W_i, self.W_f, self.W_c, self.W_o])
|
||||
self.U = K.concatenate([self.U_i, self.U_f, self.U_c, self.U_o])
|
||||
self.b = K.concatenate([self.b_i, self.b_f, self.b_c, self.b_o])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(K.concatenate([self.W_i,
|
||||
self.W_f,
|
||||
self.W_c,
|
||||
self.W_o]))
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(K.concatenate([self.U_i,
|
||||
self.U_f,
|
||||
self.U_c,
|
||||
self.U_o]))
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(K.concatenate([self.b_i,
|
||||
self.b_f,
|
||||
self.b_c,
|
||||
self.b_o]))
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.trainable_weights = [self.W_i, self.U_i, self.b_i,
|
||||
self.W_c, self.U_c, self.b_c,
|
||||
self.W_f, self.U_f, self.b_f,
|
||||
self.W_o, self.U_o, self.b_o]
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
@@ -743,21 +789,36 @@ class LSTM(Recurrent):
|
||||
B_U = states[2]
|
||||
B_W = states[3]
|
||||
|
||||
if self.consume_less == 'cpu':
|
||||
x_i = x[:, :self.output_dim]
|
||||
x_f = x[:, self.output_dim: 2 * self.output_dim]
|
||||
x_c = x[:, 2 * self.output_dim: 3 * self.output_dim]
|
||||
x_o = x[:, 3 * self.output_dim:]
|
||||
else:
|
||||
x_i = K.dot(x * B_W[0], self.W_i) + self.b_i
|
||||
x_f = K.dot(x * B_W[1], self.W_f) + self.b_f
|
||||
x_c = K.dot(x * B_W[2], self.W_c) + self.b_c
|
||||
x_o = K.dot(x * B_W[3], self.W_o) + self.b_o
|
||||
if self.consume_less == 'gpu':
|
||||
z = K.dot(x * B_W[0], self.W) + K.dot(h_tm1 * B_U[0], self.U) + self.b
|
||||
|
||||
i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i))
|
||||
f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f))
|
||||
c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * B_U[2], self.U_c))
|
||||
o = self.inner_activation(x_o + K.dot(h_tm1 * B_U[3], self.U_o))
|
||||
z0 = z[:, :self.output_dim]
|
||||
z1 = z[:, self.output_dim: 2 * self.output_dim]
|
||||
z2 = z[:, 2 * self.output_dim: 3 * self.output_dim]
|
||||
z3 = z[:, 3 * self.output_dim:]
|
||||
|
||||
i = self.inner_activation(z0)
|
||||
f = self.inner_activation(z1)
|
||||
c = f * c_tm1 + i * self.activation(z2)
|
||||
o = self.inner_activation(z3)
|
||||
else:
|
||||
if self.consume_less == 'cpu':
|
||||
x_i = x[:, :self.output_dim]
|
||||
x_f = x[:, self.output_dim: 2 * self.output_dim]
|
||||
x_c = x[:, 2 * self.output_dim: 3 * self.output_dim]
|
||||
x_o = x[:, 3 * self.output_dim:]
|
||||
elif self.consume_less == 'mem':
|
||||
x_i = K.dot(x * B_W[0], self.W_i) + self.b_i
|
||||
x_f = K.dot(x * B_W[1], self.W_f) + self.b_f
|
||||
x_c = K.dot(x * B_W[2], self.W_c) + self.b_c
|
||||
x_o = K.dot(x * B_W[3], self.W_o) + self.b_o
|
||||
else:
|
||||
raise Exception('Unknown `consume_less` mode.')
|
||||
|
||||
i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i))
|
||||
f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f))
|
||||
c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * B_U[2], self.U_c))
|
||||
o = self.inner_activation(x_o + K.dot(h_tm1 * B_U[3], self.U_o))
|
||||
|
||||
h = o * self.activation(c)
|
||||
return h, [h, c]
|
||||
@@ -784,16 +845,16 @@ class LSTM(Recurrent):
|
||||
return constants
|
||||
|
||||
def get_config(self):
|
||||
config = {"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"forget_bias_init": self.forget_bias_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"dropout_W": self.dropout_W,
|
||||
"dropout_U": self.dropout_U}
|
||||
config = {'output_dim': self.output_dim,
|
||||
'init': self.init.__name__,
|
||||
'inner_init': self.inner_init.__name__,
|
||||
'forget_bias_init': self.forget_bias_init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'inner_activation': self.inner_activation.__name__,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'dropout_W': self.dropout_W,
|
||||
'dropout_U': self.dropout_U}
|
||||
base_config = super(LSTM, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -6,6 +6,7 @@ class Wrapper(Layer):
|
||||
|
||||
def __init__(self, layer, **kwargs):
|
||||
self.layer = layer
|
||||
self.uses_learning_phase = layer.uses_learning_phase
|
||||
super(Wrapper, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape=None):
|
||||
@@ -97,7 +98,9 @@ class TimeDistributed(Wrapper):
|
||||
'an "input_shape" or "batch_input_shape" '
|
||||
'argument, including the time axis.')
|
||||
child_input_shape = (input_shape[0],) + input_shape[2:]
|
||||
self.layer.build(child_input_shape)
|
||||
if not self.layer.built:
|
||||
self.layer.build(child_input_shape)
|
||||
self.layer.built = True
|
||||
super(TimeDistributed, self).build()
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
@@ -121,11 +124,11 @@ class TimeDistributed(Wrapper):
|
||||
# no batch size specified, therefore the layer will be able
|
||||
# to process batches of any size
|
||||
# we can go with reshape-based implementation for performance
|
||||
X = K.reshape(X, (-1, ) + input_shape[2:]) # (nb_samples * timesteps, ...)
|
||||
y = self.layer.call(X) # (nb_samples * timesteps, ...)
|
||||
input_length = input_shape[1]
|
||||
if not input_length:
|
||||
input_length = K.shape(X)[1]
|
||||
X = K.reshape(X, (-1, ) + input_shape[2:]) # (nb_samples * timesteps, ...)
|
||||
y = self.layer.call(X) # (nb_samples * timesteps, ...)
|
||||
# (nb_samples, timesteps, ...)
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
y = K.reshape(y, (-1, input_length) + output_shape[2:])
|
||||
|
||||
@@ -473,6 +473,8 @@ class Graph(Model):
|
||||
x = self._get_x(data)
|
||||
output_list = super(Graph, self).predict(x, batch_size=batch_size,
|
||||
verbose=verbose)
|
||||
if not isinstance(output_list, list):
|
||||
output_list = [output_list]
|
||||
return dict(zip(self._graph_outputs, output_list))
|
||||
|
||||
def train_on_batch(self, data,
|
||||
@@ -528,6 +530,8 @@ class Graph(Model):
|
||||
|
||||
def predict_on_batch(self, data):
|
||||
output_list = super(Graph, self).predict_on_batch(data)
|
||||
if not isinstance(output_list, list):
|
||||
output_list = [output_list]
|
||||
return dict(zip(self._graph_outputs, output_list))
|
||||
|
||||
def fit_generator(self, generator, samples_per_epoch, nb_epoch,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import numpy as np
|
||||
from . import backend as K
|
||||
|
||||
|
||||
@@ -10,6 +11,74 @@ def categorical_accuracy(y_true, y_pred):
|
||||
K.argmax(y_pred, axis=-1)))
|
||||
|
||||
|
||||
def sparse_categorical_accuracy(y_true, y_pred):
|
||||
return K.mean(K.equal(K.max(y_true, axis=-1),
|
||||
K.cast(K.argmax(y_pred, axis=-1), K.floatx())))
|
||||
|
||||
|
||||
def mean_squared_error(y_true, y_pred):
|
||||
return K.mean(K.square(y_pred - y_true))
|
||||
|
||||
|
||||
def mean_absolute_error(y_true, y_pred):
|
||||
return K.mean(K.abs(y_pred - y_true))
|
||||
|
||||
|
||||
def mean_absolute_percentage_error(y_true, y_pred):
|
||||
diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), np.inf))
|
||||
return 100. * K.mean(diff)
|
||||
|
||||
|
||||
def mean_squared_logarithmic_error(y_true, y_pred):
|
||||
first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.)
|
||||
second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.)
|
||||
return K.mean(K.square(first_log - second_log))
|
||||
|
||||
|
||||
def squared_hinge(y_true, y_pred):
|
||||
return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)))
|
||||
|
||||
|
||||
def hinge(y_true, y_pred):
|
||||
return K.mean(K.maximum(1. - y_true * y_pred, 0.))
|
||||
|
||||
|
||||
def categorical_crossentropy(y_true, y_pred):
|
||||
'''Expects a binary class matrix instead of a vector of scalar classes.
|
||||
'''
|
||||
return K.mean(K.categorical_crossentropy(y_pred, y_true))
|
||||
|
||||
|
||||
def sparse_categorical_crossentropy(y_true, y_pred):
|
||||
'''expects an array of integer classes.
|
||||
Note: labels shape must have the same number of dimensions as output shape.
|
||||
If you get a shape error, add a length-1 dimension to labels.
|
||||
'''
|
||||
return K.mean(K.sparse_categorical_crossentropy(y_pred, y_true))
|
||||
|
||||
|
||||
def binary_crossentropy(y_true, y_pred):
|
||||
return K.mean(K.binary_crossentropy(y_pred, y_true))
|
||||
|
||||
|
||||
def poisson(y_true, y_pred):
|
||||
return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()))
|
||||
|
||||
|
||||
def cosine_proximity(y_true, y_pred):
|
||||
y_true = K.l2_normalize(y_true, axis=-1)
|
||||
y_pred = K.l2_normalize(y_pred, axis=-1)
|
||||
return -K.mean(y_true * y_pred)
|
||||
|
||||
|
||||
# aliases
|
||||
mse = MSE = mean_squared_error
|
||||
mae = MAE = mean_absolute_error
|
||||
mape = MAPE = mean_absolute_percentage_error
|
||||
msle = MSLE = mean_squared_logarithmic_error
|
||||
cosine = cosine_proximity
|
||||
|
||||
|
||||
from .utils.generic_utils import get_from_module
|
||||
def get(identifier):
|
||||
return get_from_module(identifier, globals(), 'metric')
|
||||
|
||||
+11
-8
@@ -8,11 +8,19 @@ from .engine.topology import get_source_inputs, Node
|
||||
from .legacy.models import Graph
|
||||
|
||||
|
||||
def model_from_config(config, custom_objects={}):
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
if isinstance(config, list):
|
||||
raise Exception('model_fom_config expects a dictionary.'
|
||||
'To load an old-style config use the appropiate'
|
||||
'`load_config` method on Sequential or Graph')
|
||||
return layer_from_config(config, custom_objects=custom_objects)
|
||||
|
||||
|
||||
def model_from_yaml(yaml_string, custom_objects={}):
|
||||
'''Parses a yaml model configuration file
|
||||
and returns a model instance.
|
||||
'''
|
||||
# TODO: legacy support?
|
||||
import yaml
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
config = yaml.load(yaml_string)
|
||||
@@ -23,7 +31,6 @@ def model_from_json(json_string, custom_objects={}):
|
||||
'''Parses a JSON model configuration file
|
||||
and returns a model instance.
|
||||
'''
|
||||
# TODO: legacy support?
|
||||
import json
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
config = json.loads(json_string)
|
||||
@@ -449,7 +456,7 @@ class Sequential(Model):
|
||||
A Numpy array of predictions.
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
self.build()
|
||||
return self.model.predict(x, batch_size=batch_size, verbose=verbose)
|
||||
|
||||
def predict_on_batch(self, x):
|
||||
@@ -531,8 +538,6 @@ class Sequential(Model):
|
||||
# Returns
|
||||
A Numpy array of probability predictions.
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
preds = self.predict(x, batch_size, verbose)
|
||||
if preds.min() < 0. or preds.max() > 1.:
|
||||
warnings.warn('Network returning invalid probability values. '
|
||||
@@ -554,8 +559,6 @@ class Sequential(Model):
|
||||
# Returns
|
||||
A numpy array of class predictions.
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
proba = self.predict(x, batch_size=batch_size, verbose=verbose)
|
||||
if proba.shape[-1] > 1:
|
||||
return proba.argmax(axis=-1)
|
||||
@@ -700,7 +703,7 @@ class Sequential(Model):
|
||||
|
||||
def get_config(self):
|
||||
'''Returns the model configuration
|
||||
as a Python dictionary.
|
||||
as a Python list.
|
||||
'''
|
||||
config = []
|
||||
if self.layers[0].__class__.__name__ == 'Merge':
|
||||
|
||||
@@ -37,7 +37,9 @@ def categorical_crossentropy(y_true, y_pred):
|
||||
|
||||
|
||||
def sparse_categorical_crossentropy(y_true, y_pred):
|
||||
'''expects a 1-D or 2-D array of integer classes.
|
||||
'''expects an array of integer classes.
|
||||
Note: labels shape must have the same number of dimensions as output shape.
|
||||
If you get a shape error, add a length-1 dimension to labels.
|
||||
'''
|
||||
return K.sparse_categorical_crossentropy(y_pred, y_true)
|
||||
|
||||
|
||||
+57
-40
@@ -29,6 +29,11 @@ class Optimizer(object):
|
||||
when their absolute value exceeds this value.
|
||||
'''
|
||||
def __init__(self, **kwargs):
|
||||
allowed_kwargs = {'clipnorm', 'clipvalue'}
|
||||
for k in kwargs:
|
||||
if k not in allowed_kwargs:
|
||||
raise Exception('Unexpected keyword argument '
|
||||
'passed to optimizer: ' + str(k))
|
||||
self.__dict__.update(kwargs)
|
||||
self.updates = []
|
||||
self.weights = []
|
||||
@@ -89,7 +94,12 @@ class Optimizer(object):
|
||||
return weights
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__}
|
||||
config = {'name': self.__class__.__name__}
|
||||
if hasattr(self, 'clipnorm'):
|
||||
config['clipnorm'] = self.clipnorm
|
||||
if hasattr(self, 'clipvalue'):
|
||||
config['clipvalue'] = self.clipvalue
|
||||
return config
|
||||
|
||||
|
||||
class SGD(Optimizer):
|
||||
@@ -102,8 +112,8 @@ class SGD(Optimizer):
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
nesterov: boolean. Whether to apply Nesterov momentum.
|
||||
'''
|
||||
def __init__(self, lr=0.01, momentum=0., decay=0., nesterov=False,
|
||||
*args, **kwargs):
|
||||
def __init__(self, lr=0.01, momentum=0., decay=0.,
|
||||
nesterov=False, **kwargs):
|
||||
super(SGD, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0.)
|
||||
@@ -116,8 +126,9 @@ class SGD(Optimizer):
|
||||
lr = self.lr * (1. / (1. + self.decay * self.iterations))
|
||||
self.updates = [(self.iterations, self.iterations + 1.)]
|
||||
|
||||
for p, g in zip(params, grads):
|
||||
m = K.variable(np.zeros(K.get_value(p).shape)) # momentum
|
||||
# momentum
|
||||
self.weights = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
for p, g, m in zip(params, grads, self.weights):
|
||||
v = self.momentum * m - lr * g # velocity
|
||||
self.updates.append((m, v))
|
||||
|
||||
@@ -134,11 +145,12 @@ class SGD(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"momentum": float(K.get_value(self.momentum)),
|
||||
"decay": float(K.get_value(self.decay)),
|
||||
"nesterov": self.nesterov}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'momentum': float(K.get_value(self.momentum)),
|
||||
'decay': float(K.get_value(self.decay)),
|
||||
'nesterov': self.nesterov}
|
||||
base_config = super(SGD, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class RMSprop(Optimizer):
|
||||
@@ -156,7 +168,7 @@ class RMSprop(Optimizer):
|
||||
rho: float >= 0.
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
'''
|
||||
def __init__(self, lr=0.001, rho=0.9, epsilon=1e-6, *args, **kwargs):
|
||||
def __init__(self, lr=0.001, rho=0.9, epsilon=1e-8, **kwargs):
|
||||
super(RMSprop, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
@@ -172,7 +184,7 @@ class RMSprop(Optimizer):
|
||||
# update accumulator
|
||||
new_a = self.rho * a + (1. - self.rho) * K.square(g)
|
||||
self.updates.append((a, new_a))
|
||||
new_p = p - self.lr * g / K.sqrt(new_a + self.epsilon)
|
||||
new_p = p - self.lr * g / (K.sqrt(new_a) + self.epsilon)
|
||||
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
@@ -182,10 +194,11 @@ class RMSprop(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"rho": float(K.get_value(self.rho)),
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'rho': float(K.get_value(self.rho)),
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(RMSprop, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Adagrad(Optimizer):
|
||||
@@ -198,7 +211,7 @@ class Adagrad(Optimizer):
|
||||
lr: float >= 0. Learning rate.
|
||||
epsilon: float >= 0.
|
||||
'''
|
||||
def __init__(self, lr=0.01, epsilon=1e-6, *args, **kwargs):
|
||||
def __init__(self, lr=0.01, epsilon=1e-8, **kwargs):
|
||||
super(Adagrad, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
@@ -212,7 +225,7 @@ class Adagrad(Optimizer):
|
||||
for p, g, a in zip(params, grads, self.weights):
|
||||
new_a = a + K.square(g) # update accumulator
|
||||
self.updates.append((a, new_a))
|
||||
new_p = p - self.lr * g / K.sqrt(new_a + self.epsilon)
|
||||
new_p = p - self.lr * g / (K.sqrt(new_a) + self.epsilon)
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
@@ -221,9 +234,10 @@ class Adagrad(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(Adagrad, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Adadelta(Optimizer):
|
||||
@@ -241,7 +255,7 @@ class Adadelta(Optimizer):
|
||||
# References
|
||||
- [Adadelta - an adaptive learning rate method](http://arxiv.org/abs/1212.5701)
|
||||
'''
|
||||
def __init__(self, lr=1.0, rho=0.95, epsilon=1e-6, *args, **kwargs):
|
||||
def __init__(self, lr=1.0, rho=0.95, epsilon=1e-8, **kwargs):
|
||||
super(Adadelta, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
@@ -274,10 +288,11 @@ class Adadelta(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"rho": self.rho,
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'rho': self.rho,
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(Adadelta, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Adam(Optimizer):
|
||||
@@ -293,8 +308,8 @@ class Adam(Optimizer):
|
||||
# References
|
||||
- [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
|
||||
'''
|
||||
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
|
||||
*args, **kwargs):
|
||||
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, **kwargs):
|
||||
super(Adam, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0)
|
||||
@@ -330,11 +345,12 @@ class Adam(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"beta_1": float(K.get_value(self.beta_1)),
|
||||
"beta_2": float(K.get_value(self.beta_2)),
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'beta_1': float(K.get_value(self.beta_1)),
|
||||
'beta_2': float(K.get_value(self.beta_2)),
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(Adam, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Adamax(Optimizer):
|
||||
@@ -351,8 +367,8 @@ class Adamax(Optimizer):
|
||||
# References
|
||||
- [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
|
||||
'''
|
||||
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
|
||||
*args, **kwargs):
|
||||
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, **kwargs):
|
||||
super(Adamax, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0.)
|
||||
@@ -391,11 +407,12 @@ class Adamax(Optimizer):
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"lr": float(K.get_value(self.lr)),
|
||||
"beta_1": float(K.get_value(self.beta_1)),
|
||||
"beta_2": float(K.get_value(self.beta_2)),
|
||||
"epsilon": self.epsilon}
|
||||
config = {'lr': float(K.get_value(self.lr)),
|
||||
'beta_1': float(K.get_value(self.beta_1)),
|
||||
'beta_2': float(K.get_value(self.beta_2)),
|
||||
'epsilon': self.epsilon}
|
||||
base_config = super(Adamax, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
# aliases
|
||||
|
||||
+219
-102
@@ -6,38 +6,105 @@ from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import re
|
||||
from scipy import ndimage
|
||||
from scipy import linalg
|
||||
|
||||
from os import listdir
|
||||
from os.path import isfile, join
|
||||
import math
|
||||
import scipy.ndimage as ndi
|
||||
from six.moves import range
|
||||
import os
|
||||
import threading
|
||||
|
||||
|
||||
def random_rotation(x, rg, fill_mode='nearest',
|
||||
cval=0., axes=(1, 2)):
|
||||
angle = np.random.uniform(-rg, rg)
|
||||
x = ndimage.interpolation.rotate(x, angle,
|
||||
axes=axes,
|
||||
reshape=False,
|
||||
mode=fill_mode,
|
||||
cval=cval)
|
||||
def random_rotation(x, rg, row_index=1, col_index=2, channel_index=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
theta = np.pi / 180 * np.random.uniform(-rg, rg)
|
||||
rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
|
||||
[np.sin(theta), np.cos(theta), 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_shift(x, wrg, hrg, fill_mode='nearest',
|
||||
cval=0., row_index=1, col_index=2):
|
||||
shift_x = shift_y = 0
|
||||
if wrg:
|
||||
shift_x = np.random.uniform(-wrg, wrg) * x.shape[col_index]
|
||||
if hrg:
|
||||
shift_y = np.random.uniform(-hrg, hrg) * x.shape[row_index]
|
||||
x = ndimage.interpolation.shift(x, (0, shift_y, shift_x),
|
||||
order=0,
|
||||
mode=fill_mode,
|
||||
cval=cval)
|
||||
def random_shift(x, wrg, hrg, row_index=1, col_index=2, channel_index=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
tx = np.random.uniform(-hrg, hrg) * h
|
||||
ty = np.random.uniform(-wrg, wrg) * w
|
||||
translation_matrix = np.array([[1, 0, tx],
|
||||
[0, 1, ty],
|
||||
[0, 0, 1]])
|
||||
|
||||
transform_matrix = translation_matrix # no need to do offset
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_shear(x, intensity, row_index=1, col_index=2, channel_index=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
shear = np.random.uniform(-intensity, intensity)
|
||||
shear_matrix = np.array([[1, -np.sin(shear), 0],
|
||||
[0, np.cos(shear), 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_zoom(x, zoom_range, row_index=1, col_index=2, channel_index=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
if len(zoom_range) != 2:
|
||||
raise Exception('zoom_range should be a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
|
||||
if zoom_range[0] == 1 and zoom_range[1] == 1:
|
||||
zx, zy = 1, 1
|
||||
else:
|
||||
zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2)
|
||||
zoom_matrix = np.array([[zx, 0, 0],
|
||||
[0, zy, 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_barrel_transform(x, intensity):
|
||||
# TODO
|
||||
pass
|
||||
|
||||
|
||||
def random_channel_shift(x, intensity, channel_index=0):
|
||||
x = np.rollaxis(x, channel_index, 0)
|
||||
min_x, max_x = np.min(x), np.max(x)
|
||||
channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x)
|
||||
for x_channel in x]
|
||||
x = np.stack(channel_images, axis=0)
|
||||
x = np.rollaxis(x, 0, channel_index+1)
|
||||
return x
|
||||
|
||||
|
||||
def transform_matrix_offset_center(matrix, x, y):
|
||||
o_x = float(x) / 2 + 0.5
|
||||
o_y = float(y) / 2 + 0.5
|
||||
offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
|
||||
reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
|
||||
transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
|
||||
return transform_matrix
|
||||
|
||||
|
||||
def apply_transform(x, transform_matrix, channel_index=0, fill_mode='nearest', cval=0.):
|
||||
x = np.rollaxis(x, channel_index, 0)
|
||||
final_affine_matrix = transform_matrix[:2, :2]
|
||||
final_offset = transform_matrix[:2, 2]
|
||||
channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
|
||||
final_offset, order=0, mode=fill_mode, cval=cval) for x_channel in x]
|
||||
x = np.stack(channel_images, axis=0)
|
||||
x = np.rollaxis(x, 0, channel_index+1)
|
||||
return x
|
||||
|
||||
|
||||
@@ -48,40 +115,10 @@ def flip_axis(x, axis):
|
||||
return x
|
||||
|
||||
|
||||
def random_barrel_transform(x, intensity):
|
||||
# TODO
|
||||
pass
|
||||
|
||||
|
||||
def random_shear(x, intensity, fill_mode='nearest', cval=0.):
|
||||
shear = np.random.uniform(-intensity, intensity)
|
||||
shear_matrix = np.array([[1.0, -math.sin(shear), 0.0],
|
||||
[0.0, math.cos(shear), 0.0],
|
||||
[0.0, 0.0, 1.0]])
|
||||
x = ndimage.interpolation.affine_transform(x, shear_matrix,
|
||||
mode=fill_mode,
|
||||
order=3,
|
||||
cval=cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_channel_shift(x, rg):
|
||||
# TODO
|
||||
pass
|
||||
|
||||
|
||||
def random_zoom(x, rg, fill_mode='nearest', cval=0.):
|
||||
zoom_w = np.random.uniform(1.-rg, 1.)
|
||||
zoom_h = np.random.uniform(1.-rg, 1.)
|
||||
x = ndimage.interpolation.zoom(x, zoom=(1., zoom_w, zoom_h),
|
||||
mode=fill_mode,
|
||||
cval=cval)
|
||||
return x # shape of result will be different from shape of input!
|
||||
|
||||
|
||||
def array_to_img(x, scale=True):
|
||||
def array_to_img(x, dim_ordering='th', scale=True):
|
||||
from PIL import Image
|
||||
x = x.transpose(1, 2, 0)
|
||||
if dim_ordering == 'th':
|
||||
x = x.transpose(1, 2, 0)
|
||||
if scale:
|
||||
x += max(-np.min(x), 0)
|
||||
x /= np.max(x)
|
||||
@@ -89,19 +126,29 @@ def array_to_img(x, scale=True):
|
||||
if x.shape[2] == 3:
|
||||
# RGB
|
||||
return Image.fromarray(x.astype('uint8'), 'RGB')
|
||||
else:
|
||||
elif x.shape[2] == 1:
|
||||
# grayscale
|
||||
return Image.fromarray(x[:, :, 0].astype('uint8'), 'L')
|
||||
else:
|
||||
raise Exception('Unsupported channel number: ', x.shape[2])
|
||||
|
||||
|
||||
def img_to_array(img):
|
||||
# only used by tests/keras/preprocessing/test_image.py to convert PIL.Image to numpy array
|
||||
def img_to_array(img, dim_ordering='th'):
|
||||
if dim_ordering not in ['th', 'tf']:
|
||||
raise Exception('Unknown dim_ordering: ', dim_ordering)
|
||||
# image has dim_ordering (height, width, channel)
|
||||
x = np.asarray(img, dtype='float32')
|
||||
if len(x.shape) == 3:
|
||||
# RGB: height, width, channel -> channel, height, width
|
||||
x = x.transpose(2, 0, 1)
|
||||
if dim_ordering == 'th':
|
||||
x = x.transpose(2, 0, 1)
|
||||
elif len(x.shape) == 2:
|
||||
if dim_ordering == 'th':
|
||||
x = x.reshape((1, x.shape[0], x.shape[1]))
|
||||
else:
|
||||
x = x.reshape((x.shape[0], x.shape[1], 1))
|
||||
else:
|
||||
# grayscale: height, width -> channel, height, width
|
||||
x = x.reshape((1, x.shape[0], x.shape[1]))
|
||||
raise Exception('Unsupported image shape: ', x.shape)
|
||||
return x
|
||||
|
||||
|
||||
@@ -116,8 +163,8 @@ def load_img(path, grayscale=False):
|
||||
|
||||
|
||||
def list_pictures(directory, ext='jpg|jpeg|bmp|png'):
|
||||
return [join(directory, f) for f in listdir(directory)
|
||||
if isfile(join(directory, f)) and re.match('([\w]+\.(?:' + ext + '))', f)]
|
||||
return [os.path.join(directory, f) for f in os.listdir(directory)
|
||||
if os.path.isfile(os.path.join(directory, f)) and re.match('([\w]+\.(?:' + ext + '))', f)]
|
||||
|
||||
|
||||
class ImageDataGenerator(object):
|
||||
@@ -134,21 +181,34 @@ class ImageDataGenerator(object):
|
||||
width_shift_range: fraction of total width.
|
||||
height_shift_range: fraction of total height.
|
||||
shear_range: shear intensity (shear angle in radians).
|
||||
zoom_range: amount of zoom. if scalar z, zoom will be randomly picked
|
||||
in the range [1-z, 1+z]. A sequence of two can be passed instead
|
||||
to select this range.
|
||||
channel_shift_range: shift range for each channels.
|
||||
fill_mode: points outside the boundaries are filled according to the
|
||||
given mode ('constant', 'nearest', 'reflect' or 'wrap'). Default
|
||||
is 'nearest'.
|
||||
cval: value used for points outside the boundaries when fill_mode is
|
||||
'constant'. Default is 0.
|
||||
horizontal_flip: whether to randomly flip images horizontally.
|
||||
vertical_flip: whether to randomly flip images vertically.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode it is at index 3.
|
||||
'''
|
||||
def __init__(self,
|
||||
featurewise_center=True,
|
||||
featurewise_center=False,
|
||||
samplewise_center=False,
|
||||
featurewise_std_normalization=True,
|
||||
featurewise_std_normalization=False,
|
||||
samplewise_std_normalization=False,
|
||||
zca_whitening=False,
|
||||
rotation_range=0.,
|
||||
width_shift_range=0.,
|
||||
height_shift_range=0.,
|
||||
shear_range=0.,
|
||||
zoom_range=0.,
|
||||
channel_shift_range=0.,
|
||||
fill_mode='nearest',
|
||||
cval=0.,
|
||||
horizontal_flip=False,
|
||||
vertical_flip=False,
|
||||
dim_ordering='th'):
|
||||
@@ -157,9 +217,11 @@ class ImageDataGenerator(object):
|
||||
self.std = None
|
||||
self.principal_components = None
|
||||
self.lock = threading.Lock()
|
||||
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise Exception('dim_ordering should be "tf" (channel after row and \
|
||||
column) or "th" (channel before row and column). Received arg: ', dim_ordering)
|
||||
raise Exception('dim_ordering should be "tf" (channel after row and '
|
||||
'column) or "th" (channel before row and column). '
|
||||
'Received arg: ', dim_ordering)
|
||||
self.dim_ordering = dim_ordering
|
||||
if dim_ordering == "th":
|
||||
self.channel_index = 1
|
||||
@@ -170,30 +232,41 @@ class ImageDataGenerator(object):
|
||||
self.row_index = 1
|
||||
self.col_index = 2
|
||||
|
||||
if np.isscalar(zoom_range):
|
||||
self.zoom_range = [1 - zoom_range, 1 + zoom_range]
|
||||
elif len(zoom_range) == 2:
|
||||
self.zoom_range = [zoom_range[0], zoom_range[1]]
|
||||
else:
|
||||
raise Exception('zoom_range should be a float or '
|
||||
'a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
|
||||
self.batch_index = 0
|
||||
self.total_batches_seen = 0
|
||||
|
||||
def reset(self):
|
||||
self.batch_index = 0
|
||||
|
||||
def _flow_index(self, N, batch_size=32, shuffle=False, seed=None):
|
||||
b = 0
|
||||
total_b = 0
|
||||
# ensure self.batch_index is 0
|
||||
self.reset()
|
||||
|
||||
while 1:
|
||||
if b == 0:
|
||||
if seed is not None:
|
||||
np.random.seed(seed + total_b)
|
||||
|
||||
if self.batch_index == 0:
|
||||
index_array = np.arange(N)
|
||||
if shuffle:
|
||||
if seed is not None:
|
||||
np.random.seed(seed + self.total_batches_seen)
|
||||
index_array = np.random.permutation(N)
|
||||
else:
|
||||
index_array = np.arange(N)
|
||||
|
||||
current_index = (b * batch_size) % N
|
||||
current_index = (self.batch_index * batch_size) % N
|
||||
if N >= current_index + batch_size:
|
||||
current_batch_size = batch_size
|
||||
self.batch_index += 1
|
||||
else:
|
||||
current_batch_size = N - current_index
|
||||
|
||||
if current_batch_size == batch_size:
|
||||
b += 1
|
||||
else:
|
||||
b = 0
|
||||
total_b += 1
|
||||
self.batch_index = 0
|
||||
self.total_batches_seen += 1
|
||||
yield (index_array[current_index: current_index + current_batch_size],
|
||||
current_index, current_batch_size)
|
||||
|
||||
@@ -205,6 +278,7 @@ class ImageDataGenerator(object):
|
||||
self.save_to_dir = save_to_dir
|
||||
self.save_prefix = save_prefix
|
||||
self.save_format = save_format
|
||||
self.reset()
|
||||
self.flow_generator = self._flow_index(X.shape[0], batch_size,
|
||||
shuffle, seed)
|
||||
return self
|
||||
@@ -230,8 +304,11 @@ class ImageDataGenerator(object):
|
||||
bX[i] = x
|
||||
if self.save_to_dir:
|
||||
for i in range(current_batch_size):
|
||||
img = array_to_img(bX[i], scale=True)
|
||||
img.save(self.save_to_dir + '/' + self.save_prefix + '_' + str(current_index + i) + '.' + self.save_format)
|
||||
img = array_to_img(bX[i], self.dim_ordering, scale=True)
|
||||
fname = '{prefix}_{index}.{format}'.format(prefix=self.save_prefix,
|
||||
index=current_index + i,
|
||||
format=self.save_format)
|
||||
img.save(os.path.join(self.save_to_dir, fname))
|
||||
bY = self.y[index_array]
|
||||
return bX, bY
|
||||
|
||||
@@ -240,10 +317,12 @@ class ImageDataGenerator(object):
|
||||
return self.next()
|
||||
|
||||
def standardize(self, x):
|
||||
# x is a single image, so it doesn't have image number at index 0
|
||||
img_channel_index = self.channel_index - 1
|
||||
if self.samplewise_center:
|
||||
x -= np.mean(x, axis=self.channel_index, keepdims=True)
|
||||
x -= np.mean(x, axis=img_channel_index, keepdims=True)
|
||||
if self.samplewise_std_normalization:
|
||||
x /= (np.std(x, axis=self.channel_index, keepdims=True) + 1e-7)
|
||||
x /= (np.std(x, axis=img_channel_index, keepdims=True) + 1e-7)
|
||||
|
||||
if self.featurewise_center:
|
||||
x -= self.mean
|
||||
@@ -259,27 +338,67 @@ class ImageDataGenerator(object):
|
||||
|
||||
def random_transform(self, x):
|
||||
# x is a single image, so it doesn't have image number at index 0
|
||||
img_col_index = self.col_index - 1
|
||||
img_row_index = self.row_index - 1
|
||||
img_col_index = self.col_index - 1
|
||||
img_channel_index = self.channel_index - 1
|
||||
|
||||
# use composition of homographies to generate final transform that needs to be applied
|
||||
if self.rotation_range:
|
||||
x = random_rotation(x, self.rotation_range,
|
||||
axes=(img_row_index, img_col_index))
|
||||
if self.width_shift_range or self.height_shift_range:
|
||||
x = random_shift(x, self.width_shift_range, self.height_shift_range,
|
||||
row_index=img_row_index, col_index=img_col_index)
|
||||
theta = np.pi / 180 * np.random.uniform(-self.rotation_range, self.rotation_range)
|
||||
else:
|
||||
theta = 0
|
||||
rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
|
||||
[np.sin(theta), np.cos(theta), 0],
|
||||
[0, 0, 1]])
|
||||
if self.height_shift_range:
|
||||
tx = np.random.uniform(-self.height_shift_range, self.height_shift_range) * x.shape[img_row_index]
|
||||
else:
|
||||
tx = 0
|
||||
|
||||
if self.width_shift_range:
|
||||
ty = np.random.uniform(-self.width_shift_range, self.width_shift_range) * x.shape[img_col_index]
|
||||
else:
|
||||
ty = 0
|
||||
|
||||
translation_matrix = np.array([[1, 0, tx],
|
||||
[0, 1, ty],
|
||||
[0, 0, 1]])
|
||||
if self.shear_range:
|
||||
shear = np.random.uniform(-self.shear_range, self.shear_range)
|
||||
else:
|
||||
shear = 0
|
||||
shear_matrix = np.array([[1, -np.sin(shear), 0],
|
||||
[0, np.cos(shear), 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
if self.zoom_range[0] == 1 and self.zoom_range[1] == 1:
|
||||
zx, zy = 1, 1
|
||||
else:
|
||||
zx, zy = np.random.uniform(self.zoom_range[0], self.zoom_range[1], 2)
|
||||
zoom_matrix = np.array([[zx, 0, 0],
|
||||
[0, zy, 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
transform_matrix = np.dot(np.dot(np.dot(rotation_matrix, translation_matrix), shear_matrix), zoom_matrix)
|
||||
|
||||
h, w = x.shape[img_row_index], x.shape[img_col_index]
|
||||
transform_matrix = transform_matrix_offset_center(transform_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, img_channel_index,
|
||||
fill_mode=self.fill_mode, cval=self.cval)
|
||||
if self.channel_shift_range != 0:
|
||||
x = random_channel_shift(x, self.channel_shift_range, img_channel_index)
|
||||
|
||||
if self.horizontal_flip:
|
||||
if np.random.random() < 0.5:
|
||||
x = flip_axis(x, img_col_index)
|
||||
|
||||
if self.vertical_flip:
|
||||
if np.random.random() < 0.5:
|
||||
x = flip_axis(x, img_row_index)
|
||||
if self.shear_range:
|
||||
x = random_shear(x, self.shear_range)
|
||||
|
||||
# TODO:
|
||||
# zoom
|
||||
# channel-wise normalization
|
||||
# barrel/fisheye
|
||||
# channel shifting
|
||||
return x
|
||||
|
||||
def fit(self, X,
|
||||
@@ -301,14 +420,13 @@ class ImageDataGenerator(object):
|
||||
aX = np.zeros(tuple([rounds * X.shape[0]] + list(X.shape)[1:]))
|
||||
for r in range(rounds):
|
||||
for i in range(X.shape[0]):
|
||||
img = array_to_img(X[i])
|
||||
img = self.random_transform(img)
|
||||
aX[i + r * X.shape[0]] = img_to_array(img)
|
||||
aX[i + r * X.shape[0]] = self.random_transform(X[i])
|
||||
X = aX
|
||||
|
||||
if self.featurewise_center:
|
||||
self.mean = np.mean(X, axis=0)
|
||||
X -= self.mean
|
||||
|
||||
if self.featurewise_std_normalization:
|
||||
self.std = np.std(X, axis=0)
|
||||
X /= (self.std + 1e-7)
|
||||
@@ -323,7 +441,6 @@ class ImageDataGenerator(object):
|
||||
class GraphImageDataGenerator(ImageDataGenerator):
|
||||
'''Example of how to build a generator for a Graph model
|
||||
'''
|
||||
|
||||
def next(self):
|
||||
bX, bY = super(GraphImageDataGenerator, self).next()
|
||||
return {'input': bX, 'output': bY}
|
||||
|
||||
@@ -59,9 +59,11 @@ class ActivityRegularizer(Regularizer):
|
||||
raise Exception('Need to call `set_layer` on '
|
||||
'ActivityRegularizer instance '
|
||||
'before calling the instance.')
|
||||
output = self.layer.output
|
||||
regularized_loss = loss + self.l1 * K.sum(K.mean(K.abs(output), axis=0))
|
||||
regularized_loss += self.l2 * K.sum(K.mean(K.square(output), axis=0))
|
||||
regularized_loss = loss
|
||||
for i in range(len(self.layer.inbound_nodes)):
|
||||
output = self.layer.get_output_at(i)
|
||||
regularized_loss += self.l1 * K.sum(K.mean(K.abs(output), axis=0))
|
||||
regularized_loss += self.l2 * K.sum(K.mean(K.square(output), axis=0))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
|
||||
def get_config(self):
|
||||
|
||||
@@ -73,7 +73,7 @@ def get_file(fname, origin, untar=False):
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
if os.path.exists(fpath):
|
||||
os.remove(fpath)
|
||||
raise e
|
||||
raise
|
||||
progbar = None
|
||||
|
||||
if untar:
|
||||
@@ -88,7 +88,7 @@ def get_file(fname, origin, untar=False):
|
||||
os.remove(untar_fpath)
|
||||
else:
|
||||
shutil.rmtree(untar_fpath)
|
||||
raise e
|
||||
raise
|
||||
tfile.close()
|
||||
return untar_fpath
|
||||
|
||||
|
||||
@@ -29,14 +29,17 @@ def model_to_dot(model, show_shapes=False):
|
||||
if show_shapes:
|
||||
# Build the label that will actually contain a table with the
|
||||
# input/output
|
||||
outputlabels = str(layer.output_shape)
|
||||
try:
|
||||
outputlabels = str(layer.output_shape)
|
||||
except:
|
||||
outputlabels = 'multiple'
|
||||
if hasattr(layer, 'input_shape'):
|
||||
inputlabels = str(layer.input_shape)
|
||||
elif hasattr(layer, 'input_shapes'):
|
||||
inputlabels = ', '.join(
|
||||
[str(ishape) for ishape in layer.input_shapes])
|
||||
else:
|
||||
inputlabels = ''
|
||||
inputlabels = 'multiple'
|
||||
label = '%s\n|{input:|output:}|{{%s}|{%s}}' % (label, inputlabels, outputlabels)
|
||||
|
||||
node = pydot.Node(layer_id, label=label)
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import absolute_import
|
||||
import copy
|
||||
import inspect
|
||||
import types
|
||||
import numpy as np
|
||||
|
||||
from ..utils.np_utils import to_categorical
|
||||
from ..models import Sequential
|
||||
|
||||
+2
-2
@@ -3,12 +3,12 @@ from setuptools import find_packages
|
||||
|
||||
|
||||
setup(name='Keras',
|
||||
version='1.0.1',
|
||||
version='1.0.3',
|
||||
description='Deep Learning for Python',
|
||||
author='Francois Chollet',
|
||||
author_email='francois.chollet@gmail.com',
|
||||
url='https://github.com/fchollet/keras',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.0.1',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.0.3',
|
||||
license='MIT',
|
||||
install_requires=['theano', 'pyyaml', 'six'],
|
||||
extras_require={
|
||||
|
||||
@@ -23,7 +23,7 @@ def test_temporal_classification():
|
||||
'''
|
||||
np.random.seed(1337)
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
|
||||
nb_test=200,
|
||||
nb_test=500,
|
||||
input_shape=(3, 5),
|
||||
classification=True,
|
||||
nb_class=2)
|
||||
@@ -35,12 +35,12 @@ def test_temporal_classification():
|
||||
input_shape=(X_train.shape[1], X_train.shape[2]),
|
||||
activation='softmax'))
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='adadelta',
|
||||
optimizer='adagrad',
|
||||
metrics=['accuracy'])
|
||||
history = model.fit(X_train, y_train, nb_epoch=5, batch_size=16,
|
||||
history = model.fit(X_train, y_train, nb_epoch=20, batch_size=32,
|
||||
validation_data=(X_test, y_test),
|
||||
verbose=0)
|
||||
assert(history.history['val_acc'][-1] > 0.9)
|
||||
assert(history.history['val_acc'][-1] >= 0.85)
|
||||
|
||||
|
||||
def test_temporal_regression():
|
||||
@@ -182,4 +182,5 @@ def test_masked_temporal():
|
||||
assert(np.abs(history.history['val_loss'][-1] - ground_truth) < 0.06)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
# pytest.main([__file__])
|
||||
test_temporal_classification()
|
||||
|
||||
@@ -91,6 +91,17 @@ class TestBackend(object):
|
||||
assert_allclose(np_rep, th_rep, atol=1e-05)
|
||||
assert_allclose(np_rep, tf_rep, atol=1e-05)
|
||||
|
||||
def test_tile(self):
|
||||
shape = (3, 4)
|
||||
arr = np.arange(np.prod(shape)).reshape(shape)
|
||||
arr_th = KTH.variable(arr)
|
||||
arr_tf = KTF.variable(arr)
|
||||
|
||||
n = (2, 1)
|
||||
th_rep = KTH.eval(KTH.tile(arr_th, n))
|
||||
tf_rep = KTF.eval(KTF.tile(arr_tf, n))
|
||||
assert_allclose(tf_rep, th_rep, atol=1e-05)
|
||||
|
||||
def test_value_manipulation(self):
|
||||
val = np.random.random((4, 2))
|
||||
xth = KTH.variable(val)
|
||||
|
||||
@@ -389,6 +389,9 @@ def test_recursion():
|
||||
assert K.int_shape(m_tf) == (None, 64)
|
||||
assert K.int_shape(n_tf) == (None, 5)
|
||||
|
||||
# test merge
|
||||
o_tf = merge([j_tf, k_tf], mode='concat', concat_axis=1)
|
||||
|
||||
|
||||
def test_functional_guide():
|
||||
# MNIST
|
||||
|
||||
@@ -117,10 +117,10 @@ def test_model_methods():
|
||||
|
||||
out = model.train_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 3
|
||||
assert len(out) == 5
|
||||
out = model.test_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 3
|
||||
assert len(out) == 5
|
||||
|
||||
# this should also work
|
||||
model.compile(optimizer, loss, metrics={'dense_1': 'acc'},
|
||||
@@ -128,10 +128,10 @@ def test_model_methods():
|
||||
|
||||
out = model.train_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 2
|
||||
assert len(out) == 4
|
||||
out = model.test_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 2
|
||||
assert len(out) == 4
|
||||
|
||||
# and this as well
|
||||
model.compile(optimizer, loss, metrics={'dense_1': ['acc']},
|
||||
@@ -139,10 +139,10 @@ def test_model_methods():
|
||||
|
||||
out = model.train_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 2
|
||||
assert len(out) == 4
|
||||
out = model.test_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 2
|
||||
assert len(out) == 4
|
||||
|
||||
# test with a custom metric function
|
||||
mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2))
|
||||
@@ -151,10 +151,10 @@ def test_model_methods():
|
||||
|
||||
out = model.train_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 3
|
||||
assert len(out) == 5
|
||||
out = model.test_on_batch([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 3
|
||||
assert len(out) == 5
|
||||
|
||||
input_a_np = np.random.random((10, 3))
|
||||
input_b_np = np.random.random((10, 3))
|
||||
|
||||
@@ -14,19 +14,19 @@ def test_masking():
|
||||
|
||||
|
||||
def test_merge():
|
||||
from keras.layers import Input, merge
|
||||
from keras.layers import Input, merge, Merge
|
||||
from keras.models import Model
|
||||
|
||||
# test modes: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot'.
|
||||
input_shapes = [(3, 2), (3, 2)]
|
||||
inputs = [np.random.random(shape) for shape in input_shapes]
|
||||
|
||||
# test graph API
|
||||
for mode in ['sum', 'mul', 'concat', 'ave', 'cos', 'dot']:
|
||||
# test functional API
|
||||
for mode in ['sum', 'mul', 'concat', 'ave']:
|
||||
print(mode)
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
merged = merge([input_a, input_b], mode='sum')
|
||||
merged = merge([input_a, input_b], mode=mode)
|
||||
model = Model([input_a, input_b], merged)
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
@@ -38,6 +38,15 @@ def test_merge():
|
||||
model = Model.from_config(config)
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
# test Merge (#2460)
|
||||
merged = Merge(mode=mode)([input_a, input_b])
|
||||
model = Model([input_a, input_b], merged)
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
expected_output_shape = model.get_output_shape_for(input_shapes)
|
||||
actual_output_shape = model.predict(inputs).shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
|
||||
# test lambda with output_shape lambda
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
|
||||
@@ -32,6 +32,13 @@ def _runner(layer_class):
|
||||
'dropout_W': 0.1},
|
||||
input_shape=(3, 2, 3))
|
||||
|
||||
# check implementation modes
|
||||
for mode in ['cpu', 'mem', 'gpu']:
|
||||
layer_test(layer_class,
|
||||
kwargs={'output_dim': output_dim,
|
||||
'consume_less': mode},
|
||||
input_shape=(3, 2, 3))
|
||||
|
||||
# check statefulness
|
||||
model = Sequential()
|
||||
model.add(embeddings.Embedding(embedding_num, embedding_dim,
|
||||
|
||||
@@ -4,85 +4,91 @@ from PIL import Image
|
||||
import numpy as np
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
|
||||
def setup_function(func):
|
||||
paths = ['test_images', 'test_images/rgb', 'test_images/gsc']
|
||||
for path in paths:
|
||||
if not os.path.exists(path):
|
||||
os.mkdir(path)
|
||||
class TestImage:
|
||||
|
||||
img_w = img_h = 20
|
||||
for n in range(8):
|
||||
bias = np.random.rand(img_w, img_h, 1) * 64
|
||||
variance = np.random.rand(img_w, img_h, 1) * (255-64)
|
||||
imarray = np.random.rand(img_w, img_h, 3) * variance + bias
|
||||
im = Image.fromarray(imarray.astype('uint8')).convert('RGBA')
|
||||
im.save('test_images/rgb/rgb_test_image_'+str(n)+'.png')
|
||||
def setup_class(cls):
|
||||
img_w = img_h = 20
|
||||
rgb_images = []
|
||||
gray_images = []
|
||||
for n in range(8):
|
||||
bias = np.random.rand(img_w, img_h, 1) * 64
|
||||
variance = np.random.rand(img_w, img_h, 1) * (255-64)
|
||||
imarray = np.random.rand(img_w, img_h, 3) * variance + bias
|
||||
im = Image.fromarray(imarray.astype('uint8')).convert('RGB')
|
||||
rgb_images.append(im)
|
||||
|
||||
imarray = np.random.rand(img_w, img_h, 1) * variance + bias
|
||||
im = Image.fromarray(imarray.astype('uint8').squeeze()).convert('L')
|
||||
im.save('test_images/gsc/gsc_test_image_'+str(n)+'.png')
|
||||
imarray = np.random.rand(img_w, img_h, 1) * variance + bias
|
||||
im = Image.fromarray(imarray.astype('uint8').squeeze()).convert('L')
|
||||
gray_images.append(im)
|
||||
|
||||
cls.all_test_images = [rgb_images, gray_images]
|
||||
|
||||
def teardown_function(func):
|
||||
shutil.rmtree('test_images')
|
||||
def teardown_class(cls):
|
||||
del cls.all_test_images
|
||||
|
||||
def test_image_data_generator(self):
|
||||
for test_images in self.all_test_images:
|
||||
img_list = []
|
||||
for im in test_images:
|
||||
img_list.append(img_to_array(im)[None, ...])
|
||||
|
||||
def test_image_data_generator():
|
||||
for color_mode in ['gsc', 'rgb']:
|
||||
file_list = list_pictures('test_images/' + color_mode)
|
||||
img_list = []
|
||||
for f in file_list:
|
||||
img_list.append(img_to_array(load_img(f))[None, ...])
|
||||
images = np.vstack(img_list)
|
||||
generator = ImageDataGenerator(
|
||||
featurewise_center=True,
|
||||
samplewise_center=True,
|
||||
featurewise_std_normalization=True,
|
||||
samplewise_std_normalization=True,
|
||||
zca_whitening=True,
|
||||
rotation_range=90.,
|
||||
width_shift_range=0.1,
|
||||
height_shift_range=0.1,
|
||||
shear_range=0.5,
|
||||
zoom_range=0.2,
|
||||
channel_shift_range=0.,
|
||||
fill_mode='nearest',
|
||||
cval=0.5,
|
||||
horizontal_flip=True,
|
||||
vertical_flip=True)
|
||||
generator.fit(images, augment=True)
|
||||
|
||||
images = np.vstack(img_list)
|
||||
generator = ImageDataGenerator(
|
||||
featurewise_center=True,
|
||||
samplewise_center=True,
|
||||
featurewise_std_normalization=True,
|
||||
samplewise_std_normalization=True,
|
||||
zca_whitening=True,
|
||||
rotation_range=90.,
|
||||
width_shift_range=10.,
|
||||
height_shift_range=10.,
|
||||
shear_range=0.5,
|
||||
horizontal_flip=True,
|
||||
vertical_flip=True)
|
||||
generator.fit(images, augment=True)
|
||||
tmp_folder = tempfile.mkdtemp(prefix='test_images')
|
||||
for x, y in generator.flow(images, np.arange(images.shape[0]),
|
||||
shuffle=True, save_to_dir=tmp_folder):
|
||||
assert x.shape[1:] == images.shape[1:]
|
||||
break
|
||||
shutil.rmtree(tmp_folder)
|
||||
|
||||
for x, y in generator.flow(images, np.arange(images.shape[0]),
|
||||
shuffle=True, save_to_dir='test_images'):
|
||||
assert x.shape[1:] == images.shape[1:]
|
||||
break
|
||||
def test_img_flip(self):
|
||||
x = np.array(range(4)).reshape([1, 1, 2, 2])
|
||||
assert (flip_axis(x, 0) == x).all()
|
||||
assert (flip_axis(x, 1) == x).all()
|
||||
assert (flip_axis(x, 2) == [[[[2, 3], [0, 1]]]]).all()
|
||||
assert (flip_axis(x, 3) == [[[[1, 0], [3, 2]]]]).all()
|
||||
|
||||
|
||||
def test_img_flip():
|
||||
x = np.array(range(4)).reshape([1, 1, 2, 2])
|
||||
assert (flip_axis(x, 0) == x).all()
|
||||
assert (flip_axis(x, 1) == x).all()
|
||||
assert (flip_axis(x, 2) == [[[[2, 3], [0, 1]]]]).all()
|
||||
assert (flip_axis(x, 3) == [[[[1, 0], [3, 2]]]]).all()
|
||||
|
||||
dim_ordering_and_col_index = (('tf', 2), ('th', 3))
|
||||
for dim_ordering, col_index in dim_ordering_and_col_index:
|
||||
image_generator_th = ImageDataGenerator(
|
||||
featurewise_center=False,
|
||||
samplewise_center=False,
|
||||
featurewise_std_normalization=False,
|
||||
samplewise_std_normalization=False,
|
||||
zca_whitening=False,
|
||||
rotation_range=0,
|
||||
width_shift_range=0,
|
||||
height_shift_range=0,
|
||||
shear_range=0,
|
||||
horizontal_flip=True,
|
||||
vertical_flip=False,
|
||||
dim_ordering=dim_ordering).flow(x, [1])
|
||||
for i in range(10):
|
||||
potentially_flipped_x, _ = next(image_generator_th)
|
||||
assert ((potentially_flipped_x == x).all() or
|
||||
(potentially_flipped_x == flip_axis(x, col_index)).all())
|
||||
dim_ordering_and_col_index = (('tf', 2), ('th', 3))
|
||||
for dim_ordering, col_index in dim_ordering_and_col_index:
|
||||
image_generator_th = ImageDataGenerator(
|
||||
featurewise_center=False,
|
||||
samplewise_center=False,
|
||||
featurewise_std_normalization=False,
|
||||
samplewise_std_normalization=False,
|
||||
zca_whitening=False,
|
||||
rotation_range=0,
|
||||
width_shift_range=0,
|
||||
height_shift_range=0,
|
||||
shear_range=0,
|
||||
zoom_range=0,
|
||||
channel_shift_range=0,
|
||||
horizontal_flip=True,
|
||||
vertical_flip=False,
|
||||
dim_ordering=dim_ordering).flow(x, [1])
|
||||
for i in range(10):
|
||||
potentially_flipped_x, _ = next(image_generator_th)
|
||||
assert ((potentially_flipped_x == x).all() or
|
||||
(potentially_flipped_x == flip_axis(x, col_index)).all())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -56,6 +56,22 @@ def test_softplus():
|
||||
assert_allclose(result, expected, rtol=1e-05)
|
||||
|
||||
|
||||
def test_softsign():
|
||||
'''
|
||||
Test using a reference softsign implementation
|
||||
'''
|
||||
def softsign(x):
|
||||
return np.divide(x, np.ones_like(x) + np.absolute(x))
|
||||
|
||||
x = K.placeholder(ndim=2)
|
||||
f = K.function([x], [activations.softsign(x)])
|
||||
test_values = get_standard_values()
|
||||
|
||||
result = f([test_values])[0]
|
||||
expected = softsign(test_values)
|
||||
assert_allclose(result, expected, rtol=1e-05)
|
||||
|
||||
|
||||
def test_sigmoid():
|
||||
'''
|
||||
Test using a numerically stable reference sigmoid implementation
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from keras import metrics
|
||||
from keras import backend as K
|
||||
|
||||
all_metrics = [
|
||||
metrics.binary_accuracy,
|
||||
metrics.categorical_accuracy,
|
||||
metrics.mean_squared_error,
|
||||
metrics.mean_absolute_error,
|
||||
metrics.mean_absolute_percentage_error,
|
||||
metrics.mean_squared_logarithmic_error,
|
||||
metrics.squared_hinge,
|
||||
metrics.hinge,
|
||||
metrics.categorical_crossentropy,
|
||||
metrics.binary_crossentropy,
|
||||
metrics.poisson,
|
||||
metrics.cosine_proximity,
|
||||
]
|
||||
|
||||
all_sparse_metrics = [
|
||||
metrics.sparse_categorical_accuracy,
|
||||
metrics.sparse_categorical_crossentropy,
|
||||
]
|
||||
|
||||
|
||||
def test_metrics():
|
||||
y_a = K.variable(np.random.random((6, 7)))
|
||||
y_b = K.variable(np.random.random((6, 7)))
|
||||
for metric in all_metrics:
|
||||
output = metric(y_a, y_b)
|
||||
assert K.eval(output).shape == ()
|
||||
|
||||
|
||||
def test_sparse_metrics():
|
||||
for metric in all_sparse_metrics:
|
||||
y_a = K.variable(np.random.randint(0, 7, (6,)), dtype=K.floatx())
|
||||
y_b = K.variable(np.random.random((6, 7)), dtype=K.floatx())
|
||||
assert K.eval(metric(y_a, y_b)).shape == ()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__])
|
||||
@@ -22,19 +22,23 @@ high_weight = 5
|
||||
max_train_samples = 5000
|
||||
max_test_samples = 1000
|
||||
|
||||
# the data, shuffled and split between tran and test sets
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
X_train = X_train.reshape(60000, 784)[:max_train_samples]
|
||||
X_test = X_test.reshape(10000, 784)[:max_test_samples]
|
||||
X_train = X_train.astype("float32") / 255
|
||||
X_test = X_test.astype("float32") / 255
|
||||
|
||||
# convert class vectors to binary class matrices
|
||||
y_train = y_train[:max_train_samples]
|
||||
y_test = y_test[:max_test_samples]
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
test_ids = np.where(y_test == np.array(weighted_class))[0]
|
||||
def get_data():
|
||||
# the data, shuffled and split between tran and test sets
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
X_train = X_train.reshape(60000, 784)[:max_train_samples]
|
||||
X_test = X_test.reshape(10000, 784)[:max_test_samples]
|
||||
X_train = X_train.astype("float32") / 255
|
||||
X_test = X_test.astype("float32") / 255
|
||||
|
||||
# convert class vectors to binary class matrices
|
||||
y_train = y_train[:max_train_samples]
|
||||
y_test = y_test[:max_test_samples]
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
test_ids = np.where(y_test == np.array(weighted_class))[0]
|
||||
|
||||
return (X_train, Y_train), (X_test, Y_test), test_ids
|
||||
|
||||
|
||||
def create_model(weight_reg=None, activity_reg=None):
|
||||
@@ -48,6 +52,7 @@ def create_model(weight_reg=None, activity_reg=None):
|
||||
|
||||
|
||||
def test_W_reg():
|
||||
(X_train, Y_train), (X_test, Y_test), test_ids = get_data()
|
||||
for reg in [regularizers.l1(),
|
||||
regularizers.l2(),
|
||||
regularizers.l1l2()]:
|
||||
@@ -59,6 +64,7 @@ def test_W_reg():
|
||||
|
||||
|
||||
def test_A_reg():
|
||||
(X_train, Y_train), (X_test, Y_test), test_ids = get_data()
|
||||
for reg in [regularizers.activity_l1(), regularizers.activity_l2()]:
|
||||
model = create_model(activity_reg=reg)
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário