Comparar commits
191 Commits
| Autor | SHA1 | Data | |
|---|---|---|---|
| 12d068f675 | |||
| 070609cbac | |||
| 6b1bf7d917 | |||
| fefb70b217 | |||
| 48d8853cad | |||
| 2a3d4722c2 | |||
| d137d00182 | |||
| 9333179ad9 | |||
| 0c842391d3 | |||
| 1fcb74f218 | |||
| 1278bf9cfa | |||
| 18e5b75f67 | |||
| 766572b5b8 | |||
| 1de4d7cfba | |||
| 04107252f2 | |||
| 5f0e0d6c38 | |||
| 79406f111b | |||
| 30fa61d457 | |||
| 914d976801 | |||
| 839d4f108e | |||
| 5e73db6c00 | |||
| e9b8424839 | |||
| e3dd5d7ca5 | |||
| 2752a58730 | |||
| 69eb5752ce | |||
| 9090704f1d | |||
| fa9f863dbf | |||
| 4b1b706aa4 | |||
| 5e75b8506c | |||
| bc9f341165 | |||
| b40b8a00e4 | |||
| d811048887 | |||
| 0ba2626bd2 | |||
| c6eea03c8d | |||
| 0fd0218ef0 | |||
| 5e1a5d07c4 | |||
| 518fa3aa44 | |||
| ef1da479ec | |||
| 3f12d7ae44 | |||
| c4579a9c43 | |||
| ff62eb251b | |||
| 2b336756b6 | |||
| 0f0d8be884 | |||
| 3f3e0aa90e | |||
| c0ee5b859c | |||
| edae178532 | |||
| a0a0308061 | |||
| 74329d0c1d | |||
| 5777355972 | |||
| 0272587c29 | |||
| 22d3c8810c | |||
| 4aa8aa100b | |||
| bd404b1c88 | |||
| bed17efae8 | |||
| 8d0199ed42 | |||
| 9f33f8af5f | |||
| 7c4f033c6a | |||
| 7e2e7a5e5a | |||
| 909fbd19ea | |||
| 2b27ab1c9e | |||
| d244d38047 | |||
| 2a0b112d08 | |||
| d9657b70c0 | |||
| e8939f43a6 | |||
| 8e587fb17a | |||
| 757ae95cca | |||
| cc6e65d145 | |||
| df464c103e | |||
| d517b55576 | |||
| 4c1353c188 | |||
| 4871208f02 | |||
| 08566f22c7 | |||
| ea7b37a42a | |||
| 302eef7bad | |||
| 825adad18d | |||
| 4491212da4 | |||
| 52e2f3ed64 | |||
| 1de4fe0ba8 | |||
| e1208f5b9f | |||
| 0a8ac44617 | |||
| 1fd2108bcf | |||
| ad5e29a2b7 | |||
| 93b7dd9915 | |||
| 9256b76226 | |||
| fbd12f7d44 | |||
| 90c4895a7a | |||
| 6dfa8b1d60 | |||
| 5430844453 | |||
| 9dd06082e7 | |||
| cb4f93913e | |||
| 149946c706 | |||
| 78988b5cd6 | |||
| a081e049db | |||
| 68af216772 | |||
| b4a532e970 | |||
| 3bf913dc35 | |||
| 55163b5999 | |||
| 9bfbe6ae3e | |||
| b23e873e0f | |||
| 79ec9b8079 | |||
| 24d6cca275 | |||
| 83b90c172c | |||
| 57f2f11005 | |||
| bf502be578 | |||
| dfeca151a2 | |||
| 2ddd2bd557 | |||
| b2aebb30bf | |||
| 0a9c0ca461 | |||
| c0b32a9a04 | |||
| 703d5a1298 | |||
| c5cc96a4f4 | |||
| de256cb5d5 | |||
| ce814302ac | |||
| 628bc6e03e | |||
| dfb606bb19 | |||
| 88f3b3f75e | |||
| 773d4ce8cb | |||
| 509d6d8235 | |||
| 7bd5c862a2 | |||
| 2878f60634 | |||
| 50fdb87888 | |||
| dad7790ec3 | |||
| 709bc5e15a | |||
| 06cc6d7fea | |||
| 97484ec9c1 | |||
| 6b04add932 | |||
| 04ea01f385 | |||
| 8653060ae6 | |||
| 8df3effa5f | |||
| 771010f43b | |||
| 8d20bac7fa | |||
| c4c4fac1ae | |||
| 016d85c9e6 | |||
| 3ab29205fc | |||
| fdd150eb4d | |||
| 789a2be8d9 | |||
| ae7ef37c1b | |||
| 94fba3d8f0 | |||
| 6ac9af0a5a | |||
| e916f748db | |||
| 92e8a20761 | |||
| cb3de665d1 | |||
| 49a5cdf76d | |||
| 08a090de43 | |||
| fa3b17cd96 | |||
| 5266fdacf1 | |||
| b74c5953f0 | |||
| 00e8d20eae | |||
| e8e63e307e | |||
| 7db6de848a | |||
| 8360ef3a5a | |||
| d32b8fa4bd | |||
| c95c32e473 | |||
| 02fe371839 | |||
| b7b7c2ea94 | |||
| 105dd031dd | |||
| 4fa289166a | |||
| a8bbcf611f | |||
| d5030b1f8c | |||
| f127b2f81d | |||
| 9d4087a1e9 | |||
| fd326ddf1b | |||
| 7f42253f46 | |||
| 18d7e5e6e4 | |||
| 6610880fd4 | |||
| 11b73ae6b4 | |||
| 2b51317be8 | |||
| 650c2c8cf9 | |||
| 49386e8da4 | |||
| 71494ffdbc | |||
| a9b6bef062 | |||
| 4840e435f7 | |||
| 531147c877 | |||
| 61c21ef9ee | |||
| 058e54061b | |||
| 32be731194 | |||
| 9bf55395f1 | |||
| 114b82a212 | |||
| 7d143370d8 | |||
| bc6880fa34 | |||
| 40fd415409 | |||
| 8b11f13507 | |||
| 2c96373a41 | |||
| 731e1bb206 | |||
| c1a72b3644 | |||
| 0e7f3e04b0 | |||
| 53552b1d6e | |||
| 6b7421c448 | |||
| 1d0d79f61a | |||
| b5dddeb419 | |||
| a3697d097d |
+10
-10
@@ -3,18 +3,18 @@ dist: trusty
|
||||
language: python
|
||||
matrix:
|
||||
include:
|
||||
- python: 3.4
|
||||
env: KERAS_BACKEND=theano
|
||||
- python: 3.4
|
||||
env: KERAS_BACKEND=tensorflow
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=theano
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=tensorflow
|
||||
env: KERAS_BACKEND=theano TEST_MODE=PEP8
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=theano TEST_MODE=INTEGRATION_TESTS
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=theano TEST_MODE=PEP8
|
||||
env: KERAS_BACKEND=tensorflow
|
||||
- python: 3.4
|
||||
env: KERAS_BACKEND=tensorflow
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=theano
|
||||
- python: 3.4
|
||||
env: KERAS_BACKEND=theano
|
||||
install:
|
||||
# code below is taken from http://conda.pydata.org/docs/travis.html
|
||||
# We do this conditionally because it saves us some downloading if the
|
||||
@@ -49,9 +49,9 @@ install:
|
||||
|
||||
# install TensorFlow
|
||||
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl;
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl;
|
||||
elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl;
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl;
|
||||
fi
|
||||
# command to run tests
|
||||
script:
|
||||
|
||||
+6
-2
@@ -1,9 +1,13 @@
|
||||
Please make sure that the boxes below are checked before you submit your issue. Thank you!
|
||||
Please make sure that the boxes below are checked before you submit your issue. If your issue is an implementation question, please ask your question on [StackOverflow](http://stackoverflow.com/questions/tagged/keras) or [join the Keras Slack channel](https://keras-slack-autojoin.herokuapp.com/) and ask there instead of filing a GitHub issue.
|
||||
|
||||
Thank you!
|
||||
|
||||
- [ ] Check that you are up-to-date with the master branch of Keras. You can update with:
|
||||
pip install git+git://github.com/fchollet/keras.git --upgrade --no-deps
|
||||
|
||||
- [ ] If running on TensorFlow, check that you are up-to-date with the latest version. The installation instructions can be found [here](https://www.tensorflow.org/get_started/os_setup).
|
||||
|
||||
- [ ] If running on Theano, check that you are up-to-date with the master branch of Theano. You can update with:
|
||||
pip install git+git://github.com/Theano/Theano.git --upgrade --no-deps
|
||||
|
||||
- [ ] Provide a link to a GitHub Gist of a Python script that can reproduce your issue (or just copy the script here if it is short).
|
||||
- [ ] Provide a link to a GitHub Gist of a Python script that can reproduce your issue (or just copy the script here if it is short).
|
||||
|
||||
+2
-4
@@ -1,9 +1,7 @@
|
||||
# Keras: Deep Learning library for TensorFlow and Theano
|
||||
|
||||
[](https://travis-ci.org/fchollet/keras)
|
||||
[](https://badge.fury.io/py/keras)
|
||||
[](https://github.com/fchollet/keras/blob/master/LICENSE)
|
||||
[](https://gitter.im/Keras-io/Lobby)
|
||||
|
||||
|
||||
## You have just found Keras.
|
||||
@@ -152,9 +150,9 @@ By default, Keras will use TensorFlow as its tensor manipulation library. [Follo
|
||||
You can ask questions and join the development discussion:
|
||||
|
||||
- On the [Keras Google group](https://groups.google.com/forum/#!forum/keras-users).
|
||||
- On the [Keras Gitter channel](https://gitter.im/Keras-io/Lobby).
|
||||
- On the [Keras Slack channel](https://kerasteam.slack.com). Use [this link](https://keras-slack-autojoin.herokuapp.com/) to request an invitation to the channel.
|
||||
|
||||
You can also post bug reports and feature requests in [Github issues](https://github.com/fchollet/keras/issues). Make sure to read [our guidelines](https://github.com/fchollet/keras/blob/master/CONTRIBUTING.md) first.
|
||||
You can also post **bug reports and feature requests** (only) in [Github issues](https://github.com/fchollet/keras/issues). Make sure to read [our guidelines](https://github.com/fchollet/keras/blob/master/CONTRIBUTING.md) first.
|
||||
|
||||
|
||||
------------------
|
||||
|
||||
+4
-4
@@ -1,4 +1,4 @@
|
||||
FROM nvidia/cuda:7.5-cudnn5-devel
|
||||
FROM nvidia/cuda:8.0-cudnn5-devel
|
||||
|
||||
ENV CONDA_DIR /opt/conda
|
||||
ENV PATH $CONDA_DIR/bin:$PATH
|
||||
@@ -24,10 +24,10 @@ RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \
|
||||
USER keras
|
||||
|
||||
# Python
|
||||
ARG python_version=3.5.1
|
||||
ARG tensorflow_version=0.9.0rc0-cp35-cp35m
|
||||
ARG python_version=3.5.2
|
||||
ARG tensorflow_version=0.12.0rc0-cp35-cp35m
|
||||
RUN conda install -y python=${python_version} && \
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-${tensorflow_version}-linux_x86_64.whl && \
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-${tensorflow_version}-linux_x86_64.whl && \
|
||||
pip install git+git://github.com/Theano/Theano.git && \
|
||||
pip install ipdb pytest pytest-cov python-coveralls coverage==3.7.1 pytest-xdist pep8 pytest-pep8 pydot_ng && \
|
||||
conda install Pillow scikit-learn notebook pandas matplotlib nose pyyaml six h5py && \
|
||||
|
||||
@@ -139,6 +139,7 @@ PAGES = [
|
||||
core.Dense,
|
||||
core.Activation,
|
||||
core.Dropout,
|
||||
core.SpatialDropout1D,
|
||||
core.SpatialDropout2D,
|
||||
core.SpatialDropout3D,
|
||||
core.Flatten,
|
||||
|
||||
externo
+39
-5
@@ -181,7 +181,7 @@ model = InceptionV3(input_tensor=input_tensor, weights='imagenet', include_top=T
|
||||
|
||||
|
||||
```python
|
||||
keras.applications.xception.Xception(include_top=True, weights='imagenet', input_tensor=None)
|
||||
keras.applications.xception.Xception(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
|
||||
```
|
||||
|
||||
Xception V1 model, with weights pre-trained on ImageNet.
|
||||
@@ -200,6 +200,12 @@ The default input size for this model is 299x299.
|
||||
- include_top: whether to include the fully-connected layer at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- inputs_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(299, 299, 3)`.
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 71.
|
||||
E.g. `(150, 150, 3)` would be one valid value.
|
||||
|
||||
### Returns
|
||||
|
||||
@@ -220,7 +226,7 @@ These weights are trained by ourselves and are released under the MIT license.
|
||||
## VGG16
|
||||
|
||||
```python
|
||||
keras.applications.vgg16.VGG16(include_top=True, weights='imagenet', input_tensor=None)
|
||||
keras.applications.vgg16.VGG16(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
|
||||
```
|
||||
|
||||
VGG16 model, with weights pre-trained on ImageNet.
|
||||
@@ -235,6 +241,13 @@ The default input size for this model is 224x224.
|
||||
- include_top: whether to include the 3 fully-connected layers at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- inputs_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 48.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
|
||||
### Returns
|
||||
|
||||
@@ -254,7 +267,7 @@ These weights are ported from the ones [released by VGG at Oxford](http://www.ro
|
||||
|
||||
|
||||
```python
|
||||
keras.applications.vgg19.VGG19(include_top=True, weights='imagenet', input_tensor=None)
|
||||
keras.applications.vgg19.VGG19(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
|
||||
```
|
||||
|
||||
|
||||
@@ -270,6 +283,13 @@ The default input size for this model is 224x224.
|
||||
- include_top: whether to include the 3 fully-connected layers at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- inputs_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 48.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
|
||||
### Returns
|
||||
|
||||
@@ -290,7 +310,7 @@ These weights are ported from the ones [released by VGG at Oxford](http://www.ro
|
||||
|
||||
|
||||
```python
|
||||
keras.applications.resnet50.ResNet50(include_top=True, weights='imagenet', input_tensor=None)
|
||||
keras.applications.resnet50.ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
|
||||
```
|
||||
|
||||
|
||||
@@ -307,6 +327,13 @@ The default input size for this model is 224x224.
|
||||
- include_top: whether to include the fully-connected layer at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- inputs_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 197.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
|
||||
### Returns
|
||||
|
||||
@@ -326,7 +353,7 @@ These weights are ported from the ones [released by Kaiming He](https://github.c
|
||||
|
||||
|
||||
```python
|
||||
keras.applications.inception_v3.InceptionV3(include_top=True, weights='imagenet', input_tensor=None)
|
||||
keras.applications.inception_v3.InceptionV3(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
|
||||
```
|
||||
|
||||
Inception V3 model, with weights pre-trained on ImageNet.
|
||||
@@ -342,6 +369,13 @@ The default input size for this model is 299x299.
|
||||
- include_top: whether to include the fully-connected layer at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- inputs_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(299, 299, 3)` (with `tf` dim ordering)
|
||||
or `(3, 299, 299)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 139.
|
||||
E.g. `(150, 150, 3)` would be one valid value.
|
||||
|
||||
### Returns
|
||||
|
||||
|
||||
externo
+23
@@ -44,6 +44,29 @@ Using TensorFlow backend.
|
||||
|
||||
----
|
||||
|
||||
## keras.json details
|
||||
|
||||
|
||||
```
|
||||
{
|
||||
"image_dim_ordering": "tf",
|
||||
"epsilon": 1e-07,
|
||||
"floatx": "float32",
|
||||
"backend": "tensorflow"
|
||||
}
|
||||
```
|
||||
|
||||
You can change these settings by editing `~/.keras/keras.json`.
|
||||
|
||||
* `image_dim_ordering`: string, either `"tf"` or `"th"`. It specifies which dimension ordering convention Keras will follow. (`keras.backend.image_dim_ordering()` returns it.)
|
||||
- For 2D data (e.g. image), `"tf"` assumes `(rows, cols, channels)` while `"th"` assumes `(channels, rows, cols)`.
|
||||
- For 3D data, `"tf"` assumes `(conv_dim1, conv_dim2, conv_dim3, channels)` while `"th"` assumes `(channels, conv_dim1, conv_dim2, conv_dim3)`.
|
||||
* `epsilon`: float, a numeric fuzzing constant used to avoid dividing by zero in some operations.
|
||||
* `floatx`: string, `"float16"`, `"float32"`, or `"float64"`. Default float precision.
|
||||
* `backend`: string, `"tensorflow"` or `"theano"`.
|
||||
|
||||
----
|
||||
|
||||
## Using the abstract Keras backend to write new code
|
||||
|
||||
If you want the Keras modules you write to be compatible with both Theano and TensorFlow, you have to write them via the abstract Keras backend API. Here's an intro.
|
||||
|
||||
externo
+1
-1
@@ -2,7 +2,7 @@
|
||||
|
||||
Functions from the `constraints` module allow setting constraints (eg. non-negativity) on network parameters during optimization.
|
||||
|
||||
The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D` and `Convolution2D` have a unified API.
|
||||
The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D`, `Convolution2D` and `Convolution3D` have a unified API.
|
||||
|
||||
These layers expose 2 keyword arguments:
|
||||
|
||||
|
||||
+18
-20
@@ -4,7 +4,7 @@
|
||||
- [How can I run Keras on GPU?](#how-can-i-run-keras-on-gpu)
|
||||
- [How can I save a Keras model?](#how-can-i-save-a-keras-model)
|
||||
- [Why is the training loss much higher than the testing loss?](#why-is-the-training-loss-much-higher-than-the-testing-loss)
|
||||
- [How can I visualize the output of an intermediate layer?](#how-can-i-visualize-the-output-of-an-intermediate-layer)
|
||||
- [How can I obtain the output of an intermediate layer?](#how-can-i-obtain-the-output-of-an-intermediate-layer)
|
||||
- [How can I use Keras with datasets that don't fit in memory?](#how-can-i-use-keras-with-datasets-that-dont-fit-in-memory)
|
||||
- [How can I interrupt training when the validation loss isn't decreasing anymore?](#how-can-i-interrupt-training-when-the-validation-loss-isnt-decreasing-anymore)
|
||||
- [How is the validation split computed?](#how-is-the-validation-split-computed)
|
||||
@@ -156,9 +156,22 @@ Besides, the training loss is the average of the losses over each batch of train
|
||||
|
||||
---
|
||||
|
||||
### How can I visualize the output of an intermediate layer?
|
||||
### How can I obtain the output of an intermediate layer?
|
||||
|
||||
You can build a Keras function that will return the output of a certain layer given a certain input, for example:
|
||||
One simple way is to create a new `Model` that will output the layers that you are interested in:
|
||||
|
||||
```python
|
||||
from keras.models import Model
|
||||
|
||||
model = ... # create the original model
|
||||
|
||||
layer_name = 'my_layer'
|
||||
intermediate_layer_model = Model(input=model.input,
|
||||
output=model.get_layer(layer_name).output)
|
||||
intermediate_output = intermediate_layer_model.predict(data)
|
||||
```
|
||||
|
||||
Alternatively, you can build a Keras function that will return the output of a certain layer given a certain input, for example:
|
||||
|
||||
```python
|
||||
from keras import backend as K
|
||||
@@ -185,22 +198,6 @@ layer_output = get_3rd_layer_output([X, 0])[0]
|
||||
layer_output = get_3rd_layer_output([X, 1])[0]
|
||||
```
|
||||
|
||||
Another more flexible way of getting output from intermediate layers is to use the [functional API](/getting-started/functional-api-guide). For example, if you have created an autoencoder for MNIST:
|
||||
|
||||
```python
|
||||
inputs = Input(shape=(784,))
|
||||
encoded = Dense(32, activation='relu')(inputs)
|
||||
decoded = Dense(784)(encoded)
|
||||
model = Model(input=inputs, output=decoded)
|
||||
```
|
||||
|
||||
After compiling and training the model, you can get the output of the data from the encoder like this:
|
||||
|
||||
```python
|
||||
encoder = Model(input=inputs, output=encoded)
|
||||
X_encoded = encoder.predict(X)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### How can I use Keras with datasets that don't fit in memory?
|
||||
@@ -229,8 +226,9 @@ Find out more in the [callbacks documentation](/callbacks).
|
||||
|
||||
### How is the validation split computed?
|
||||
|
||||
If you set the `validation_split` argument in `model.fit` to e.g. 0.1, then the validation data used will be the *last 10%* of the data. If you set it to 0.25, it will be the last 25% of the data, etc.
|
||||
If you set the `validation_split` argument in `model.fit` to e.g. 0.1, then the validation data used will be the *last 10%* of the data. If you set it to 0.25, it will be the last 25% of the data, etc. Note that the data isn't shuffled before extracting the validation split, so the validation is literally just the *last* x% of samples in the input you passed.
|
||||
|
||||
The same validation set is used for all epochs (within a same call to `fit`).
|
||||
|
||||
---
|
||||
|
||||
|
||||
externo
+2
-2
@@ -146,9 +146,9 @@ By default, Keras will use TensorFlow as its tensor manipulation library. [Follo
|
||||
You can ask questions and join the development discussion:
|
||||
|
||||
- On the [Keras Google group](https://groups.google.com/forum/#!forum/keras-users).
|
||||
- On the [Keras Gitter channel](https://gitter.im/Keras-io/Lobby).
|
||||
- On the [Keras Slack channel](https://kerasteam.slack.com). Use [this link](https://keras-slack-autojoin.herokuapp.com/) to request an invitation to the channel.
|
||||
|
||||
You can also post bug reports and feature requests in [Github issues](https://github.com/fchollet/keras/issues). Make sure to read [our guidelines](https://github.com/fchollet/keras/blob/master/CONTRIBUTING.md) first.
|
||||
You can also post **bug reports and feature requests** (only) in [Github issues](https://github.com/fchollet/keras/issues). Make sure to read [our guidelines](https://github.com/fchollet/keras/blob/master/CONTRIBUTING.md) first.
|
||||
|
||||
|
||||
------------------
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
|
||||
For simple, stateless custom operations, you are probably better off using `layers.core.Lambda` layers. But for any custom operation that has trainable weights, you should implement your own layer.
|
||||
|
||||
Here is the skeleton of a Keras layer. There are only three methods you need to implement:
|
||||
Here is the skeleton of a Keras layer, **as of Keras 1.1.3** (if you have an older version, please upgrade). There are only three methods you need to implement:
|
||||
|
||||
- `build(input_shape)`: this is where you will define your weights. Trainable weights should be added to the list `self.trainable_weights`. Other attributes of note are: `self.non_trainable_weights` (list) and `self.updates` (list of update tuples (tensor, new_tensor)). For an example of how to use `non_trainable_weights` and `updates`, see the code for the `BatchNormalization` layer.
|
||||
- `build(input_shape)`: this is where you will define your weights. This method must set `self.built = True`, which can be done by calling `super([Layer], self).build()`.
|
||||
- `call(x)`: this is where the layer's logic lives. Unless you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor.
|
||||
- `get_output_shape_for(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference.
|
||||
|
||||
@@ -19,10 +19,11 @@ class MyLayer(Layer):
|
||||
super(MyLayer, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
input_dim = input_shape[1]
|
||||
initial_weight_value = np.random.random((input_dim, output_dim))
|
||||
self.W = K.variable(initial_weight_value)
|
||||
self.trainable_weights = [self.W]
|
||||
# Create a trainable weight variable for this layer.
|
||||
self.W = self.add_weight(shape=(input_shape[1], self.output_dim),
|
||||
initializer='random_uniform',
|
||||
trainable=True)
|
||||
super(MyLayer, self).build() # Be sure to call this somewhere!
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.dot(x, self.W)
|
||||
@@ -31,4 +32,4 @@ class MyLayer(Layer):
|
||||
return (input_shape[0], self.output_dim)
|
||||
```
|
||||
|
||||
The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code!
|
||||
The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code!
|
||||
|
||||
externo
+1
-1
@@ -9,7 +9,7 @@ You can either pass the name of an existing metric, or pass a Theano/TensorFlow
|
||||
|
||||
#### Arguments
|
||||
- __y_true__: True labels. Theano/TensorFlow tensor.
|
||||
- __y_pred__: Predictions. Theano/TensorFlow tensor of the same shape as y_true.
|
||||
- __y_pred__: Predictions. Theano/TensorFlow tensor of the same shape as y_true.
|
||||
|
||||
#### Returns
|
||||
Single tensor value representing the mean of the output array across all
|
||||
|
||||
+16
-8
@@ -24,9 +24,9 @@ keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
|
||||
Generate batches of tensor image data with real-time data augmentation. The data will be looped over (in batches) indefinitely.
|
||||
|
||||
- __Arguments__:
|
||||
- __featurewise_center__: Boolean. Set input mean to 0 over the dataset.
|
||||
- __featurewise_center__: Boolean. Set input mean to 0 over the dataset, feature-wise.
|
||||
- __samplewise_center__: Boolean. Set each sample mean to 0.
|
||||
- __featurewise_std_normalization__: Boolean. Divide inputs by std of the dataset.
|
||||
- __featurewise_std_normalization__: Boolean. Divide inputs by std of the dataset, feature-wise.
|
||||
- __samplewise_std_normalization__: Boolean. Divide each input by its std.
|
||||
- __zca_whitening__: Boolean. Apply ZCA whitening.
|
||||
- __rotation_range__: Int. Degree range for random rotations.
|
||||
@@ -43,8 +43,8 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
otherwise we multiply the data by the value provided (before applying
|
||||
any other transformation).
|
||||
- __dim_ordering__: One of {"th", "tf"}.
|
||||
"tf" mode means that the images should have shape `(samples, width, height, channels)`,
|
||||
"th" mode means that the images should have shape `(samples, channels, width, height)`.
|
||||
"tf" mode means that the images should have shape `(samples, height, width, channels)`,
|
||||
"th" mode means that the images should have shape `(samples, channels, height, width)`.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "tf".
|
||||
@@ -53,13 +53,19 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
- __fit(X)__: Compute the internal data stats related to the data-dependent transformations, based on an array of sample data.
|
||||
Only required if featurewise_center or featurewise_std_normalization or zca_whitening.
|
||||
- __Arguments__:
|
||||
- __X__: sample data.
|
||||
- __X__: sample data. Should have rank 4.
|
||||
In case of grayscale data,
|
||||
the channels axis should have value 1, and in case
|
||||
of RGB data, it should have value 3.
|
||||
- __augment__: Boolean (default: False). Whether to fit on randomly augmented samples.
|
||||
- __rounds__: int (default: 1). If augment, how many augmentation passes over the data to use.
|
||||
- __seed__: int (default: None). Random seed.
|
||||
- __flow(X, y)__: Takes numpy data & label arrays, and generates batches of augmented/normalized data. Yields batches indefinitely, in an infinite loop.
|
||||
- __Arguments__:
|
||||
- __X__: data.
|
||||
- __X__: data. Should have rank 4.
|
||||
In case of grayscale data,
|
||||
the channels axis should have value 1, and in case
|
||||
of RGB data, it should have value 3.
|
||||
- __y__: labels.
|
||||
- __batch_size__: int (default: 32).
|
||||
- __shuffle__: boolean (defaut: True).
|
||||
@@ -71,8 +77,9 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
The generator loops indefinitely.
|
||||
- __flow_from_directory(directory)__: Takes the path to a directory, and generates batches of augmented/normalized data. Yields batches indefinitely, in an infinite loop.
|
||||
- __Arguments__:
|
||||
- __directory__: path to the target directory. It should contain one subdirectory per class,
|
||||
and the subdirectories should contain PNG or JPG images. See [this script](https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d) for more details.
|
||||
- __directory__: path to the target directory. It should contain one subdirectory per class.
|
||||
Any PNG, JPG or BNP images inside each of the subdirectories directory tree will be included in the generator.
|
||||
See [this script](https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d) for more details.
|
||||
- __target_size__: tuple of integers, default: `(256, 256)`. The dimensions to which all images found will be resized.
|
||||
- __color_mode__: one of "grayscale", "rbg". Default: "rgb". Whether the images will be converted to have 1 or 3 color channels.
|
||||
- __classes__: optional list of class subdirectories (e.g. `['dogs', 'cats']`). Default: None. If not provided, the list of classes will be automatically inferred (and the order of the classes, which will map to the label indices, will be alphanumeric).
|
||||
@@ -83,6 +90,7 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
- __save_to_dir__: None or str (default: None). This allows you to optimally specify a directory to which to save the augmented pictures being generated (useful for visualizing what you are doing).
|
||||
- __save_prefix__: str. Prefix to use for filenames of saved pictures (only relevant if `save_to_dir` is set).
|
||||
- __save_format__: one of "png", "jpeg" (only relevant if `save_to_dir` is set). Default: "jpeg".
|
||||
- __follow_links__: whether to follow symlinks inside class subdirectories (default: False).
|
||||
|
||||
|
||||
- __Examples__:
|
||||
|
||||
externo
+1
-1
@@ -2,7 +2,7 @@
|
||||
|
||||
Regularizers allow to apply penalties on layer parameters or layer activity during optimization. These penalties are incorporated in the loss function that the network optimizes.
|
||||
|
||||
The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D` and `Convolution2D` have a unified API.
|
||||
The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D`, `Convolution2D` and `Convolution3D` have a unified API.
|
||||
|
||||
These layers expose 3 keyword arguments:
|
||||
|
||||
|
||||
@@ -18,6 +18,9 @@ Trains a simple deep CNN on the CIFAR10 small images dataset.
|
||||
[conv_filter_visualization.py](conv_filter_visualization.py)
|
||||
Visualization of the filters of VGG16, via gradient ascent in input space.
|
||||
|
||||
[conv_lstm.py](conv_lstm.py)
|
||||
Demonstrates the use of a convolutional LSTM network.
|
||||
|
||||
[deep_dream.py](deep_dream.py)
|
||||
Deep Dreams in Keras.
|
||||
|
||||
|
||||
+10
-17
@@ -1,14 +1,10 @@
|
||||
'''Train a simple deep CNN on the CIFAR10 small images dataset.
|
||||
|
||||
GPU run command:
|
||||
GPU run command with Theano backend (with TensorFlow, the GPU is automatically used):
|
||||
THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10_cnn.py
|
||||
|
||||
It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs.
|
||||
(it's still underfitting at that point, though).
|
||||
|
||||
Note: the data was pickled with Python 2, and some encoding issues might prevent you
|
||||
from loading it in Python 3. You might have to load it in Python 2,
|
||||
save it in a different format, load it in Python 3 and repickle it.
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
@@ -17,7 +13,6 @@ from keras.preprocessing.image import ImageDataGenerator
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.optimizers import SGD
|
||||
from keras.utils import np_utils
|
||||
|
||||
batch_size = 32
|
||||
@@ -27,16 +22,16 @@ data_augmentation = True
|
||||
|
||||
# input image dimensions
|
||||
img_rows, img_cols = 32, 32
|
||||
# the CIFAR10 images are RGB
|
||||
# The CIFAR10 images are RGB.
|
||||
img_channels = 3
|
||||
|
||||
# the data, shuffled and split between train and test sets
|
||||
# The data, shuffled and split between train and test sets:
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
|
||||
print('X_train shape:', X_train.shape)
|
||||
print(X_train.shape[0], 'train samples')
|
||||
print(X_test.shape[0], 'test samples')
|
||||
|
||||
# convert class vectors to binary class matrices
|
||||
# Convert class vectors to binary class matrices.
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
@@ -64,10 +59,9 @@ model.add(Dropout(0.5))
|
||||
model.add(Dense(nb_classes))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
# let's train the model using SGD + momentum (how original).
|
||||
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
# Let's train the model using RMSprop
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer=sgd,
|
||||
optimizer='rmsprop',
|
||||
metrics=['accuracy'])
|
||||
|
||||
X_train = X_train.astype('float32')
|
||||
@@ -84,8 +78,7 @@ if not data_augmentation:
|
||||
shuffle=True)
|
||||
else:
|
||||
print('Using real-time data augmentation.')
|
||||
|
||||
# this will do preprocessing and realtime data augmentation
|
||||
# This will do preprocessing and realtime data augmentation:
|
||||
datagen = ImageDataGenerator(
|
||||
featurewise_center=False, # set input mean to 0 over the dataset
|
||||
samplewise_center=False, # set each sample mean to 0
|
||||
@@ -98,11 +91,11 @@ else:
|
||||
horizontal_flip=True, # randomly flip images
|
||||
vertical_flip=False) # randomly flip images
|
||||
|
||||
# compute quantities required for featurewise normalization
|
||||
# (std, mean, and principal components if ZCA whitening is applied)
|
||||
# Compute quantities required for featurewise normalization
|
||||
# (std, mean, and principal components if ZCA whitening is applied).
|
||||
datagen.fit(X_train)
|
||||
|
||||
# fit the model on the batches generated by datagen.flow()
|
||||
# Fit the model on the batches generated by datagen.flow().
|
||||
model.fit_generator(datagen.flow(X_train, Y_train,
|
||||
batch_size=batch_size),
|
||||
samples_per_epoch=X_train.shape[0],
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
""" This script demonstrates the use of a convolutional LSTM network.
|
||||
This network is used to predict the next frame of an artificially
|
||||
generated movie which contains moving squares.
|
||||
"""
|
||||
from keras.models import Sequential
|
||||
from keras.layers.convolutional import Convolution3D
|
||||
from keras.layers.convolutional_recurrent import ConvLSTM2D
|
||||
from keras.layers.normalization import BatchNormalization
|
||||
import numpy as np
|
||||
import pylab as plt
|
||||
|
||||
# We create a layer which take as input movies of shape
|
||||
# (n_frames, width, height, channels) and returns a movie
|
||||
# of identical shape.
|
||||
|
||||
seq = Sequential()
|
||||
seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
|
||||
input_shape=(None, 40, 40, 1),
|
||||
border_mode='same', return_sequences=True))
|
||||
seq.add(BatchNormalization())
|
||||
|
||||
seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
|
||||
border_mode='same', return_sequences=True))
|
||||
seq.add(BatchNormalization())
|
||||
|
||||
seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
|
||||
border_mode='same', return_sequences=True))
|
||||
seq.add(BatchNormalization())
|
||||
|
||||
seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
|
||||
border_mode='same', return_sequences=True))
|
||||
seq.add(BatchNormalization())
|
||||
|
||||
seq.add(Convolution3D(nb_filter=1, kernel_dim1=1, kernel_dim2=3,
|
||||
kernel_dim3=3, activation='sigmoid',
|
||||
border_mode='same', dim_ordering='tf'))
|
||||
|
||||
seq.compile(loss='binary_crossentropy', optimizer='adadelta')
|
||||
|
||||
|
||||
# Artificial data generation:
|
||||
# Generate movies with 3 to 7 moving squares inside.
|
||||
# The squares are of shape 1x1 or 2x2 pixels,
|
||||
# which move linearly over time.
|
||||
# For convenience we first create movies with bigger width and height (80x80)
|
||||
# and at the end we select a 40x40 window.
|
||||
|
||||
def generate_movies(n_samples=1200, n_frames=15):
|
||||
row = 80
|
||||
col = 80
|
||||
noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float)
|
||||
shifted_movies = np.zeros((n_samples, n_frames, row, col, 1),
|
||||
dtype=np.float)
|
||||
|
||||
for i in range(n_samples):
|
||||
# Add 3 to 7 moving squares
|
||||
n = np.random.randint(3, 8)
|
||||
|
||||
for j in range(n):
|
||||
# Initial position
|
||||
xstart = np.random.randint(20, 60)
|
||||
ystart = np.random.randint(20, 60)
|
||||
# Direction of motion
|
||||
directionx = np.random.randint(0, 3) - 1
|
||||
directiony = np.random.randint(0, 3) - 1
|
||||
|
||||
# Size of the square
|
||||
w = np.random.randint(2, 4)
|
||||
|
||||
for t in range(n_frames):
|
||||
x_shift = xstart + directionx * t
|
||||
y_shift = ystart + directiony * t
|
||||
noisy_movies[i, t, x_shift - w: x_shift + w,
|
||||
y_shift - w: y_shift + w, 0] += 1
|
||||
|
||||
# Make it more robust by adding noise.
|
||||
# The idea is that if during inference,
|
||||
# the value of the pixel is not exactly one,
|
||||
# we need to train the network to be robust and still
|
||||
# consider it as a pixel belonging to a square.
|
||||
if np.random.randint(0, 2):
|
||||
noise_f = (-1)**np.random.randint(0, 2)
|
||||
noisy_movies[i, t,
|
||||
x_shift - w - 1: x_shift + w + 1,
|
||||
y_shift - w - 1: y_shift + w + 1,
|
||||
0] += noise_f * 0.1
|
||||
|
||||
# Shift the ground truth by 1
|
||||
x_shift = xstart + directionx * (t + 1)
|
||||
y_shift = ystart + directiony * (t + 1)
|
||||
shifted_movies[i, t, x_shift - w: x_shift + w,
|
||||
y_shift - w: y_shift + w, 0] += 1
|
||||
|
||||
# Cut to a 40x40 window
|
||||
noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::]
|
||||
shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::]
|
||||
noisy_movies[noisy_movies >= 1] = 1
|
||||
shifted_movies[shifted_movies >= 1] = 1
|
||||
return noisy_movies, shifted_movies
|
||||
|
||||
# Train the network
|
||||
noisy_movies, shifted_movies = generate_movies(n_samples=1200)
|
||||
seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10,
|
||||
nb_epoch=300, validation_split=0.05)
|
||||
|
||||
# Testing the network on one movie
|
||||
# feed it with the first 7 positions and then
|
||||
# predict the new positions
|
||||
which = 1004
|
||||
track = noisy_movies[which][:7, ::, ::, ::]
|
||||
|
||||
for j in range(16):
|
||||
new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::])
|
||||
new = new_pos[::, -1, ::, ::, ::]
|
||||
track = np.concatenate((track, new), axis=0)
|
||||
|
||||
|
||||
# And then compare the predictions
|
||||
# to the ground truth
|
||||
track2 = noisy_movies[which][::, ::, ::, ::]
|
||||
for i in range(15):
|
||||
fig = plt.figure(figsize=(10, 5))
|
||||
|
||||
ax = fig.add_subplot(121)
|
||||
|
||||
if i >= 7:
|
||||
ax.text(1, 3, 'Predictions !', fontsize=20, color='w')
|
||||
else:
|
||||
ax.text(1, 3, 'Inital trajectory', fontsize=20)
|
||||
|
||||
toplot = track[i, ::, ::, 0]
|
||||
|
||||
plt.imshow(toplot)
|
||||
ax = fig.add_subplot(122)
|
||||
plt.text(1, 3, 'Ground truth', fontsize=20)
|
||||
|
||||
toplot = track2[i, ::, ::, 0]
|
||||
if i >= 2:
|
||||
toplot = shifted_movies[which][i - 1, ::, ::, 0]
|
||||
|
||||
plt.imshow(toplot)
|
||||
plt.savefig('%i_animate.png' % (i + 1))
|
||||
@@ -0,0 +1,310 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Train an Auxiliary Classifier Generative Adversarial Network (ACGAN) on the
|
||||
MNIST dataset. See https://arxiv.org/abs/1610.09585 for more details.
|
||||
|
||||
You should start to see reasonable images after ~5 epochs, and good images
|
||||
by ~15 epochs. You should use a GPU, as the convolution-heavy operations are
|
||||
very slow on the CPU. Prefer the TensorFlow backend if you plan on iterating, as
|
||||
the compilation time can be a blocker using Theano.
|
||||
|
||||
Timings:
|
||||
|
||||
Hardware | Backend | Time / Epoch
|
||||
-------------------------------------------
|
||||
CPU | TF | 3 hrs
|
||||
Titan X (maxwell) | TF | 4 min
|
||||
Titan X (maxwell) | TH | 7 min
|
||||
|
||||
Consult https://github.com/lukedeo/keras-acgan for more information and
|
||||
example output
|
||||
"""
|
||||
from __future__ import print_function
|
||||
|
||||
from collections import defaultdict
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError:
|
||||
import pickle
|
||||
from PIL import Image
|
||||
|
||||
from six.moves import range
|
||||
|
||||
import keras.backend as K
|
||||
from keras.datasets import mnist
|
||||
from keras.layers import Input, Dense, Reshape, Flatten, Embedding, merge, Dropout
|
||||
from keras.layers.advanced_activations import LeakyReLU
|
||||
from keras.layers.convolutional import UpSampling2D, Convolution2D
|
||||
from keras.models import Sequential, Model
|
||||
from keras.optimizers import Adam
|
||||
from keras.utils.generic_utils import Progbar
|
||||
import numpy as np
|
||||
|
||||
np.random.seed(1337)
|
||||
|
||||
K.set_image_dim_ordering('th')
|
||||
|
||||
|
||||
def build_generator(latent_size):
|
||||
# we will map a pair of (z, L), where z is a latent vector and L is a
|
||||
# label drawn from P_c, to image space (..., 1, 28, 28)
|
||||
cnn = Sequential()
|
||||
|
||||
cnn.add(Dense(1024, input_dim=latent_size, activation='relu'))
|
||||
cnn.add(Dense(128 * 7 * 7, activation='relu'))
|
||||
cnn.add(Reshape((128, 7, 7)))
|
||||
|
||||
# upsample to (..., 14, 14)
|
||||
cnn.add(UpSampling2D(size=(2, 2)))
|
||||
cnn.add(Convolution2D(256, 5, 5, border_mode='same',
|
||||
activation='relu', init='glorot_normal'))
|
||||
|
||||
# upsample to (..., 28, 28)
|
||||
cnn.add(UpSampling2D(size=(2, 2)))
|
||||
cnn.add(Convolution2D(128, 5, 5, border_mode='same',
|
||||
activation='relu', init='glorot_normal'))
|
||||
|
||||
# take a channel axis reduction
|
||||
cnn.add(Convolution2D(1, 2, 2, border_mode='same',
|
||||
activation='tanh', init='glorot_normal'))
|
||||
|
||||
# this is the z space commonly refered to in GAN papers
|
||||
latent = Input(shape=(latent_size, ))
|
||||
|
||||
# this will be our label
|
||||
image_class = Input(shape=(1,), dtype='int32')
|
||||
|
||||
# 10 classes in MNIST
|
||||
cls = Flatten()(Embedding(10, latent_size,
|
||||
init='glorot_normal')(image_class))
|
||||
|
||||
# hadamard product between z-space and a class conditional embedding
|
||||
h = merge([latent, cls], mode='mul')
|
||||
|
||||
fake_image = cnn(h)
|
||||
|
||||
return Model(input=[latent, image_class], output=fake_image)
|
||||
|
||||
|
||||
def build_discriminator():
|
||||
# build a relatively standard conv net, with LeakyReLUs as suggested in
|
||||
# the reference paper
|
||||
cnn = Sequential()
|
||||
|
||||
cnn.add(Convolution2D(32, 3, 3, border_mode='same', subsample=(2, 2),
|
||||
input_shape=(1, 28, 28)))
|
||||
cnn.add(LeakyReLU())
|
||||
cnn.add(Dropout(0.3))
|
||||
|
||||
cnn.add(Convolution2D(64, 3, 3, border_mode='same', subsample=(1, 1)))
|
||||
cnn.add(LeakyReLU())
|
||||
cnn.add(Dropout(0.3))
|
||||
|
||||
cnn.add(Convolution2D(128, 3, 3, border_mode='same', subsample=(2, 2)))
|
||||
cnn.add(LeakyReLU())
|
||||
cnn.add(Dropout(0.3))
|
||||
|
||||
cnn.add(Convolution2D(256, 3, 3, border_mode='same', subsample=(1, 1)))
|
||||
cnn.add(LeakyReLU())
|
||||
cnn.add(Dropout(0.3))
|
||||
|
||||
cnn.add(Flatten())
|
||||
|
||||
image = Input(shape=(1, 28, 28))
|
||||
|
||||
features = cnn(image)
|
||||
|
||||
# first output (name=generation) is whether or not the discriminator
|
||||
# thinks the image that is being shown is fake, and the second output
|
||||
# (name=auxiliary) is the class that the discriminator thinks the image
|
||||
# belongs to.
|
||||
fake = Dense(1, activation='sigmoid', name='generation')(features)
|
||||
aux = Dense(10, activation='softmax', name='auxiliary')(features)
|
||||
|
||||
return Model(input=image, output=[fake, aux])
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# batch and latent size taken from the paper
|
||||
nb_epochs = 50
|
||||
batch_size = 100
|
||||
latent_size = 100
|
||||
|
||||
# Adam parameters suggested in https://arxiv.org/abs/1511.06434
|
||||
adam_lr = 0.0002
|
||||
adam_beta_1 = 0.5
|
||||
|
||||
# build the discriminator
|
||||
discriminator = build_discriminator()
|
||||
discriminator.compile(
|
||||
optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
|
||||
loss=['binary_crossentropy', 'sparse_categorical_crossentropy']
|
||||
)
|
||||
|
||||
# build the generator
|
||||
generator = build_generator(latent_size)
|
||||
generator.compile(optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
|
||||
loss='binary_crossentropy')
|
||||
|
||||
latent = Input(shape=(latent_size, ))
|
||||
image_class = Input(shape=(1,), dtype='int32')
|
||||
|
||||
# get a fake image
|
||||
fake = generator([latent, image_class])
|
||||
|
||||
# we only want to be able to train generation for the combined model
|
||||
discriminator.trainable = False
|
||||
fake, aux = discriminator(fake)
|
||||
combined = Model(input=[latent, image_class], output=[fake, aux])
|
||||
|
||||
combined.compile(
|
||||
optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
|
||||
loss=['binary_crossentropy', 'sparse_categorical_crossentropy']
|
||||
)
|
||||
|
||||
# get our mnist data, and force it to be of shape (..., 1, 28, 28) with
|
||||
# range [-1, 1]
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
|
||||
X_train = np.expand_dims(X_train, axis=1)
|
||||
|
||||
X_test = (X_test.astype(np.float32) - 127.5) / 127.5
|
||||
X_test = np.expand_dims(X_test, axis=1)
|
||||
|
||||
nb_train, nb_test = X_train.shape[0], X_test.shape[0]
|
||||
|
||||
train_history = defaultdict(list)
|
||||
test_history = defaultdict(list)
|
||||
|
||||
for epoch in range(nb_epochs):
|
||||
print('Epoch {} of {}'.format(epoch + 1, nb_epochs))
|
||||
|
||||
nb_batches = int(X_train.shape[0] / batch_size)
|
||||
progress_bar = Progbar(target=nb_batches)
|
||||
|
||||
epoch_gen_loss = []
|
||||
epoch_disc_loss = []
|
||||
|
||||
for index in range(nb_batches):
|
||||
progress_bar.update(index)
|
||||
# generate a new batch of noise
|
||||
noise = np.random.uniform(-1, 1, (batch_size, latent_size))
|
||||
|
||||
# get a batch of real images
|
||||
image_batch = X_train[index * batch_size:(index + 1) * batch_size]
|
||||
label_batch = y_train[index * batch_size:(index + 1) * batch_size]
|
||||
|
||||
# sample some labels from p_c
|
||||
sampled_labels = np.random.randint(0, 10, batch_size)
|
||||
|
||||
# generate a batch of fake images, using the generated labels as a
|
||||
# conditioner. We reshape the sampled labels to be
|
||||
# (batch_size, 1) so that we can feed them into the embedding
|
||||
# layer as a length one sequence
|
||||
generated_images = generator.predict(
|
||||
[noise, sampled_labels.reshape((-1, 1))], verbose=0)
|
||||
|
||||
X = np.concatenate((image_batch, generated_images))
|
||||
y = np.array([1] * batch_size + [0] * batch_size)
|
||||
aux_y = np.concatenate((label_batch, sampled_labels), axis=0)
|
||||
|
||||
# see if the discriminator can figure itself out...
|
||||
epoch_disc_loss.append(discriminator.train_on_batch(X, [y, aux_y]))
|
||||
|
||||
# make new noise. we generate 2 * batch size here such that we have
|
||||
# the generator optimize over an identical number of images as the
|
||||
# discriminator
|
||||
noise = np.random.uniform(-1, 1, (2 * batch_size, latent_size))
|
||||
sampled_labels = np.random.randint(0, 10, 2 * batch_size)
|
||||
|
||||
# we want to train the genrator to trick the discriminator
|
||||
# For the generator, we want all the {fake, not-fake} labels to say
|
||||
# not-fake
|
||||
trick = np.ones(2 * batch_size)
|
||||
|
||||
epoch_gen_loss.append(combined.train_on_batch(
|
||||
[noise, sampled_labels.reshape((-1, 1))], [trick, sampled_labels]))
|
||||
|
||||
print('\nTesting for epoch {}:'.format(epoch + 1))
|
||||
|
||||
# evaluate the testing loss here
|
||||
|
||||
# generate a new batch of noise
|
||||
noise = np.random.uniform(-1, 1, (nb_test, latent_size))
|
||||
|
||||
# sample some labels from p_c and generate images from them
|
||||
sampled_labels = np.random.randint(0, 10, nb_test)
|
||||
generated_images = generator.predict(
|
||||
[noise, sampled_labels.reshape((-1, 1))], verbose=False)
|
||||
|
||||
X = np.concatenate((X_test, generated_images))
|
||||
y = np.array([1] * nb_test + [0] * nb_test)
|
||||
aux_y = np.concatenate((y_test, sampled_labels), axis=0)
|
||||
|
||||
# see if the discriminator can figure itself out...
|
||||
discriminator_test_loss = discriminator.evaluate(
|
||||
X, [y, aux_y], verbose=False)
|
||||
|
||||
discriminator_train_loss = np.mean(np.array(epoch_disc_loss), axis=0)
|
||||
|
||||
# make new noise
|
||||
noise = np.random.uniform(-1, 1, (2 * nb_test, latent_size))
|
||||
sampled_labels = np.random.randint(0, 10, 2 * nb_test)
|
||||
|
||||
trick = np.ones(2 * nb_test)
|
||||
|
||||
generator_test_loss = combined.evaluate(
|
||||
[noise, sampled_labels.reshape((-1, 1))],
|
||||
[trick, sampled_labels], verbose=False)
|
||||
|
||||
generator_train_loss = np.mean(np.array(epoch_gen_loss), axis=0)
|
||||
|
||||
# generate an epoch report on performance
|
||||
train_history['generator'].append(generator_train_loss)
|
||||
train_history['discriminator'].append(discriminator_train_loss)
|
||||
|
||||
test_history['generator'].append(generator_test_loss)
|
||||
test_history['discriminator'].append(discriminator_test_loss)
|
||||
|
||||
print('{0:<22s} | {1:4s} | {2:15s} | {3:5s}'.format(
|
||||
'component', *discriminator.metrics_names))
|
||||
print('-' * 65)
|
||||
|
||||
ROW_FMT = '{0:<22s} | {1:<4.2f} | {2:<15.2f} | {3:<5.2f}'
|
||||
print(ROW_FMT.format('generator (train)',
|
||||
*train_history['generator'][-1]))
|
||||
print(ROW_FMT.format('generator (test)',
|
||||
*test_history['generator'][-1]))
|
||||
print(ROW_FMT.format('discriminator (train)',
|
||||
*train_history['discriminator'][-1]))
|
||||
print(ROW_FMT.format('discriminator (test)',
|
||||
*test_history['discriminator'][-1]))
|
||||
|
||||
# save weights every epoch
|
||||
generator.save_weights(
|
||||
'params_generator_epoch_{0:03d}.hdf5'.format(epoch), True)
|
||||
discriminator.save_weights(
|
||||
'params_discriminator_epoch_{0:03d}.hdf5'.format(epoch), True)
|
||||
|
||||
# generate some digits to display
|
||||
noise = np.random.uniform(-1, 1, (100, latent_size))
|
||||
|
||||
sampled_labels = np.array([
|
||||
[i] * 10 for i in range(10)
|
||||
]).reshape(-1, 1)
|
||||
|
||||
# get a batch to display
|
||||
generated_images = generator.predict(
|
||||
[noise, sampled_labels], verbose=0)
|
||||
|
||||
# arrange them into a grid
|
||||
img = (np.concatenate([r.reshape(-1, 28)
|
||||
for r in np.split(generated_images, 10)
|
||||
], axis=-1) * 127.5 + 127.5).astype(np.uint8)
|
||||
|
||||
Image.fromarray(img).save(
|
||||
'plot_epoch_{0:03d}_generated.png'.format(epoch))
|
||||
|
||||
pickle.dump({'train': train_history, 'test': test_history},
|
||||
open('acgan-history.pkl', 'wb'))
|
||||
@@ -55,6 +55,7 @@ Results
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
from six.moves import xrange
|
||||
import numpy as np
|
||||
np.random.seed(1337)
|
||||
|
||||
|
||||
@@ -75,7 +75,7 @@ def create_base_network(input_dim):
|
||||
def compute_accuracy(predictions, labels):
|
||||
'''Compute classification accuracy with a fixed threshold on distances.
|
||||
'''
|
||||
return labels[predictions.ravel() < 0.5].mean()
|
||||
return np.mean(labels == (predictions.ravel() > 0.5))
|
||||
|
||||
|
||||
# the data, shuffled and split between train and test sets
|
||||
|
||||
@@ -44,7 +44,6 @@ Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
||||
https://arxiv.org/abs/1603.05027v3
|
||||
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
np.random.seed(1337) # for reproducibility
|
||||
@@ -76,6 +75,16 @@ def getwhere(x):
|
||||
y_prepool, y_postpool = x
|
||||
return K.gradients(K.sum(y_postpool), y_prepool)
|
||||
|
||||
if K.backend() == 'tensorflow':
|
||||
raise Exception('This example can only run with the '
|
||||
'Theano backend for the time being, '
|
||||
'because it requires taking the gradient '
|
||||
'of a gradient, which isn\'t '
|
||||
'supported for all TF ops.')
|
||||
|
||||
# This example assume 'th' dim ordering.
|
||||
K.set_image_dim_ordering('th')
|
||||
|
||||
# input image dimensions
|
||||
img_rows, img_cols = 28, 28
|
||||
|
||||
|
||||
@@ -8,6 +8,13 @@ e.g.:
|
||||
```
|
||||
python neural_style_transfer.py img/tuebingen.jpg img/starry_night.jpg results/my_result
|
||||
```
|
||||
Optional parameters:
|
||||
```
|
||||
--iter, To specify the number of iterations the style transfer takes place (Default is 10)
|
||||
--content_weight, The weight given to the content loss (Default is 0.025)
|
||||
--style_weight, The weight given to the style loss (Default is 1.0)
|
||||
--tv_weight, The weight given to the total variation loss (Default is 1.0)
|
||||
```
|
||||
|
||||
It is preferable to run this script on GPU, for speed.
|
||||
|
||||
@@ -60,16 +67,25 @@ parser.add_argument('style_reference_image_path', metavar='ref', type=str,
|
||||
help='Path to the style reference image.')
|
||||
parser.add_argument('result_prefix', metavar='res_prefix', type=str,
|
||||
help='Prefix for the saved results.')
|
||||
parser.add_argument('--iter', type=int, default=10, required=False,
|
||||
help='Number of iterations to run.')
|
||||
parser.add_argument('--content_weight', type=float, default=0.025, required=False,
|
||||
help='Content weight.')
|
||||
parser.add_argument('--style_weight', type=float, default=1.0, required=False,
|
||||
help='Style weight.')
|
||||
parser.add_argument('--tv_weight', type=float, default=1.0, required=False,
|
||||
help='Total Variation weight.')
|
||||
|
||||
args = parser.parse_args()
|
||||
base_image_path = args.base_image_path
|
||||
style_reference_image_path = args.style_reference_image_path
|
||||
result_prefix = args.result_prefix
|
||||
iterations = args.iter
|
||||
|
||||
# these are the weights of the different loss components
|
||||
total_variation_weight = 1.
|
||||
style_weight = 1.
|
||||
content_weight = 0.025
|
||||
total_variation_weight = args.tv_weight
|
||||
style_weight = args.style_weight
|
||||
content_weight = args.content_weight
|
||||
|
||||
# dimensions of the generated picture.
|
||||
img_nrows = 400
|
||||
@@ -246,7 +262,7 @@ if K.image_dim_ordering() == 'th':
|
||||
else:
|
||||
x = np.random.uniform(0, 255, (1, img_nrows, img_ncols, 3)) - 128.
|
||||
|
||||
for i in range(10):
|
||||
for i in range(iterations):
|
||||
print('Start of iteration', i)
|
||||
start_time = time.time()
|
||||
x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
|
||||
|
||||
@@ -54,7 +54,6 @@ model.add(LSTM(50,
|
||||
return_sequences=True,
|
||||
stateful=True))
|
||||
model.add(LSTM(50,
|
||||
batch_input_shape=(batch_size, tsteps, 1),
|
||||
return_sequences=False,
|
||||
stateful=True))
|
||||
model.add(Dense(1))
|
||||
|
||||
@@ -4,6 +4,7 @@ Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
|
||||
'''
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy.stats import norm
|
||||
|
||||
from keras.layers import Input, Dense, Lambda
|
||||
from keras.models import Model
|
||||
@@ -16,7 +17,7 @@ original_dim = 784
|
||||
latent_dim = 2
|
||||
intermediate_dim = 256
|
||||
nb_epoch = 50
|
||||
epsilon_std = 0.01
|
||||
epsilon_std = 1.0
|
||||
|
||||
x = Input(batch_shape=(batch_size, original_dim))
|
||||
h = Dense(intermediate_dim, activation='relu')(x)
|
||||
@@ -82,9 +83,10 @@ generator = Model(decoder_input, _x_decoded_mean)
|
||||
n = 15 # figure with 15x15 digits
|
||||
digit_size = 28
|
||||
figure = np.zeros((digit_size * n, digit_size * n))
|
||||
# we will sample n points within [-15, 15] standard deviations
|
||||
grid_x = np.linspace(-15, 15, n)
|
||||
grid_y = np.linspace(-15, 15, n)
|
||||
# linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian
|
||||
# to produce values of the latent variables z, since the prior of the latent space is Gaussian
|
||||
grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
|
||||
grid_y = norm.ppf(np.linspace(0.05, 0.95, n))
|
||||
|
||||
for i, yi in enumerate(grid_x):
|
||||
for j, xi in enumerate(grid_y):
|
||||
@@ -95,5 +97,5 @@ for i, yi in enumerate(grid_x):
|
||||
j * digit_size: (j + 1) * digit_size] = digit
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
plt.imshow(figure)
|
||||
plt.imshow(figure, cmap='Greys_r')
|
||||
plt.show()
|
||||
|
||||
@@ -5,6 +5,7 @@ Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
|
||||
'''
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy.stats import norm
|
||||
|
||||
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
|
||||
from keras.layers import Convolution2D, Deconvolution2D
|
||||
@@ -27,7 +28,7 @@ else:
|
||||
original_img_size = (img_rows, img_cols, img_chns)
|
||||
latent_dim = 2
|
||||
intermediate_dim = 128
|
||||
epsilon_std = 0.01
|
||||
epsilon_std = 1.0
|
||||
nb_epoch = 5
|
||||
|
||||
x = Input(batch_shape=(batch_size,) + original_img_size)
|
||||
@@ -153,9 +154,10 @@ generator = Model(decoder_input, _x_decoded_mean_squash)
|
||||
n = 15 # figure with 15x15 digits
|
||||
digit_size = 28
|
||||
figure = np.zeros((digit_size * n, digit_size * n))
|
||||
# we will sample n points within [-15, 15] standard deviations
|
||||
grid_x = np.linspace(-15, 15, n)
|
||||
grid_y = np.linspace(-15, 15, n)
|
||||
# linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian
|
||||
# to produce values of the latent variables z, since the prior of the latent space is Gaussian
|
||||
grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
|
||||
grid_y = norm.ppf(np.linspace(0.05, 0.95, n))
|
||||
|
||||
for i, yi in enumerate(grid_x):
|
||||
for j, xi in enumerate(grid_y):
|
||||
@@ -167,5 +169,5 @@ for i, yi in enumerate(grid_x):
|
||||
j * digit_size: (j + 1) * digit_size] = digit
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
plt.imshow(figure)
|
||||
plt.imshow(figure, cmap='Greys_r')
|
||||
plt.show()
|
||||
|
||||
+1
-1
@@ -15,4 +15,4 @@ from . import objectives
|
||||
from . import optimizers
|
||||
from . import regularizers
|
||||
|
||||
__version__ = '1.1.1'
|
||||
__version__ = '1.2.0'
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import absolute_import
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
|
||||
def softmax(x):
|
||||
@@ -11,13 +12,15 @@ def softmax(x):
|
||||
s = K.sum(e, axis=-1, keepdims=True)
|
||||
return e / s
|
||||
else:
|
||||
raise Exception('Cannot apply softmax to a tensor that is not 2D or 3D. ' +
|
||||
'Here, ndim=' + str(ndim))
|
||||
raise ValueError('Cannot apply softmax to a tensor '
|
||||
'that is not 2D or 3D. '
|
||||
'Here, ndim=' + str(ndim))
|
||||
|
||||
|
||||
def elu(x, alpha=1.0):
|
||||
return K.elu(x, alpha)
|
||||
|
||||
|
||||
def softplus(x):
|
||||
return K.softplus(x)
|
||||
|
||||
@@ -43,13 +46,9 @@ def hard_sigmoid(x):
|
||||
|
||||
|
||||
def linear(x):
|
||||
'''
|
||||
The function returns the variable that is passed in, so all types work.
|
||||
'''
|
||||
return x
|
||||
|
||||
|
||||
from .utils.generic_utils import get_from_module
|
||||
def get(identifier):
|
||||
if identifier is None:
|
||||
return linear
|
||||
|
||||
@@ -44,7 +44,51 @@ def decode_predictions(preds, top=5):
|
||||
CLASS_INDEX = json.load(open(fpath))
|
||||
results = []
|
||||
for pred in preds:
|
||||
top_indices = np.argpartition(pred, -top)[-top:][::-1]
|
||||
top_indices = pred.argsort()[-top:][::-1]
|
||||
result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices]
|
||||
result.sort(key=lambda x: x[2], reverse=True)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
|
||||
def _obtain_input_shape(input_shape, default_size, min_size, dim_ordering, include_top):
|
||||
if dim_ordering == 'th':
|
||||
default_shape = (3, default_size, default_size)
|
||||
else:
|
||||
default_shape = (default_size, default_size, 3)
|
||||
if include_top:
|
||||
if input_shape is not None:
|
||||
if input_shape != default_shape:
|
||||
raise ValueError('When setting`include_top=True`, '
|
||||
'`input_shape` should be ' + str(default_shape) + '.')
|
||||
input_shape = default_shape
|
||||
else:
|
||||
if dim_ordering == 'th':
|
||||
if input_shape is not None:
|
||||
if len(input_shape) != 3:
|
||||
raise ValueError('`input_shape` must be a tuple of three integers.')
|
||||
if input_shape[0] != 3:
|
||||
raise ValueError('The input must have 3 channels; got '
|
||||
'`input_shape=' + str(input_shape) + '`')
|
||||
if ((input_shape[1] is not None and input_shape[1] < min_size) or
|
||||
(input_shape[2] is not None and input_shape[2] < min_size)):
|
||||
raise ValueError('Input size must be at least ' +
|
||||
str(min_size) + 'x' + str(min_size) + ', got '
|
||||
'`input_shape=' + str(input_shape) + '`')
|
||||
else:
|
||||
input_shape = (3, None, None)
|
||||
else:
|
||||
if input_shape is not None:
|
||||
if len(input_shape) != 3:
|
||||
raise ValueError('`input_shape` must be a tuple of three integers.')
|
||||
if input_shape[-1] != 3:
|
||||
raise ValueError('The input must have 3 channels; got '
|
||||
'`input_shape=' + str(input_shape) + '`')
|
||||
if ((input_shape[0] is not None and input_shape[0] < min_size) or
|
||||
(input_shape[1] is not None and input_shape[1] < min_size)):
|
||||
raise ValueError('Input size must be at least ' +
|
||||
str(min_size) + 'x' + str(min_size) + ', got '
|
||||
'`input_shape=' + str(input_shape) + '`')
|
||||
else:
|
||||
input_shape = (None, None, 3)
|
||||
return input_shape
|
||||
|
||||
@@ -23,10 +23,11 @@ import warnings
|
||||
from ..models import Model
|
||||
from ..layers import Flatten, Dense, Input, BatchNormalization, merge
|
||||
from ..layers import Convolution2D, MaxPooling2D, AveragePooling2D
|
||||
from ..engine.topology import get_source_inputs
|
||||
from ..utils.layer_utils import convert_all_kernels_in_model
|
||||
from ..utils.data_utils import get_file
|
||||
from .. import backend as K
|
||||
from .imagenet_utils import decode_predictions
|
||||
from .imagenet_utils import decode_predictions, _obtain_input_shape
|
||||
|
||||
|
||||
TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_th_dim_ordering_th_kernels.h5'
|
||||
@@ -60,7 +61,7 @@ def conv2d_bn(x, nb_filter, nb_row, nb_col,
|
||||
|
||||
|
||||
def InceptionV3(include_top=True, weights='imagenet',
|
||||
input_tensor=None):
|
||||
input_tensor=None, input_shape=None):
|
||||
'''Instantiate the Inception v3 architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. Note that when using TensorFlow,
|
||||
@@ -82,6 +83,13 @@ def InceptionV3(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
inputs_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(299, 299, 3)` (with `tf` dim ordering)
|
||||
or `(3, 299, 299)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 139.
|
||||
E.g. `(150, 150, 3)` would be one valid value.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
@@ -91,16 +99,11 @@ def InceptionV3(include_top=True, weights='imagenet',
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
# Determine proper input shape
|
||||
if K.image_dim_ordering() == 'th':
|
||||
if include_top:
|
||||
input_shape = (3, 299, 299)
|
||||
else:
|
||||
input_shape = (3, None, None)
|
||||
else:
|
||||
if include_top:
|
||||
input_shape = (299, 299, 3)
|
||||
else:
|
||||
input_shape = (None, None, 3)
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=299,
|
||||
min_size=139,
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
include_top=include_top)
|
||||
|
||||
if input_tensor is None:
|
||||
img_input = Input(shape=input_shape)
|
||||
@@ -261,8 +264,14 @@ def InceptionV3(include_top=True, weights='imagenet',
|
||||
x = Flatten(name='flatten')(x)
|
||||
x = Dense(1000, activation='softmax', name='predictions')(x)
|
||||
|
||||
# Create model
|
||||
model = Model(img_input, x)
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = img_input
|
||||
# Create model.
|
||||
model = Model(inputs, x, name='inception_v3')
|
||||
|
||||
# load weights
|
||||
if weights == 'imagenet':
|
||||
|
||||
@@ -18,6 +18,7 @@ from ..layers.convolutional import MaxPooling2D, ZeroPadding2D
|
||||
from ..layers.normalization import BatchNormalization
|
||||
from ..layers.advanced_activations import ELU
|
||||
from ..layers.recurrent import GRU
|
||||
from ..engine.topology import get_source_inputs
|
||||
from ..utils.data_utils import get_file
|
||||
from ..utils.layer_utils import convert_all_kernels_in_model
|
||||
from .audio_conv_utils import decode_predictions, preprocess_input
|
||||
@@ -127,8 +128,15 @@ def MusicTaggerCRNN(weights='msd', input_tensor=None,
|
||||
if include_top:
|
||||
x = Dense(50, activation='sigmoid', name='output')(x)
|
||||
|
||||
# Create model
|
||||
model = Model(melgram_input, x)
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = melgram_input
|
||||
# Create model.
|
||||
model = Model(inputs, x, name='music_tagger_crnn')
|
||||
|
||||
if weights is None:
|
||||
return model
|
||||
else:
|
||||
|
||||
@@ -18,9 +18,10 @@ from ..layers import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2
|
||||
from ..layers import BatchNormalization
|
||||
from ..models import Model
|
||||
from .. import backend as K
|
||||
from ..engine.topology import get_source_inputs
|
||||
from ..utils.layer_utils import convert_all_kernels_in_model
|
||||
from ..utils.data_utils import get_file
|
||||
from .imagenet_utils import decode_predictions, preprocess_input
|
||||
from .imagenet_utils import decode_predictions, preprocess_input, _obtain_input_shape
|
||||
|
||||
|
||||
TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels.h5'
|
||||
@@ -108,7 +109,7 @@ def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2))
|
||||
|
||||
|
||||
def ResNet50(include_top=True, weights='imagenet',
|
||||
input_tensor=None):
|
||||
input_tensor=None, input_shape=None):
|
||||
'''Instantiate the ResNet50 architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. Note that when using TensorFlow,
|
||||
@@ -126,8 +127,15 @@ def ResNet50(include_top=True, weights='imagenet',
|
||||
layers at the top of the network.
|
||||
weights: one of `None` (random initialization)
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. xput of `layers.Input()`)
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
inputs_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 197.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
@@ -137,16 +145,11 @@ def ResNet50(include_top=True, weights='imagenet',
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
# Determine proper input shape
|
||||
if K.image_dim_ordering() == 'th':
|
||||
if include_top:
|
||||
input_shape = (3, 224, 224)
|
||||
else:
|
||||
input_shape = (3, None, None)
|
||||
else:
|
||||
if include_top:
|
||||
input_shape = (224, 224, 3)
|
||||
else:
|
||||
input_shape = (None, None, 3)
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=224,
|
||||
min_size=197,
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
include_top=include_top)
|
||||
|
||||
if input_tensor is None:
|
||||
img_input = Input(shape=input_shape)
|
||||
@@ -192,7 +195,14 @@ def ResNet50(include_top=True, weights='imagenet',
|
||||
x = Flatten()(x)
|
||||
x = Dense(1000, activation='softmax', name='fc1000')(x)
|
||||
|
||||
model = Model(img_input, x)
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = img_input
|
||||
# Create model.
|
||||
model = Model(inputs, x, name='resnet50')
|
||||
|
||||
# load weights
|
||||
if weights == 'imagenet':
|
||||
|
||||
@@ -14,10 +14,11 @@ import warnings
|
||||
from ..models import Model
|
||||
from ..layers import Flatten, Dense, Input
|
||||
from ..layers import Convolution2D, MaxPooling2D
|
||||
from ..engine.topology import get_source_inputs
|
||||
from ..utils.layer_utils import convert_all_kernels_in_model
|
||||
from ..utils.data_utils import get_file
|
||||
from .. import backend as K
|
||||
from .imagenet_utils import decode_predictions, preprocess_input
|
||||
from .imagenet_utils import decode_predictions, preprocess_input, _obtain_input_shape
|
||||
|
||||
|
||||
TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5'
|
||||
@@ -27,7 +28,7 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
|
||||
|
||||
def VGG16(include_top=True, weights='imagenet',
|
||||
input_tensor=None):
|
||||
input_tensor=None, input_shape=None):
|
||||
'''Instantiate the VGG16 architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. Note that when using TensorFlow,
|
||||
@@ -47,6 +48,13 @@ def VGG16(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
inputs_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 48.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
@@ -56,16 +64,11 @@ def VGG16(include_top=True, weights='imagenet',
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
# Determine proper input shape
|
||||
if K.image_dim_ordering() == 'th':
|
||||
if include_top:
|
||||
input_shape = (3, 224, 224)
|
||||
else:
|
||||
input_shape = (3, None, None)
|
||||
else:
|
||||
if include_top:
|
||||
input_shape = (224, 224, 3)
|
||||
else:
|
||||
input_shape = (None, None, 3)
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=224,
|
||||
min_size=48,
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
include_top=include_top)
|
||||
|
||||
if input_tensor is None:
|
||||
img_input = Input(shape=input_shape)
|
||||
@@ -109,8 +112,14 @@ def VGG16(include_top=True, weights='imagenet',
|
||||
x = Dense(4096, activation='relu', name='fc2')(x)
|
||||
x = Dense(1000, activation='softmax', name='predictions')(x)
|
||||
|
||||
# Create model
|
||||
model = Model(img_input, x)
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = img_input
|
||||
# Create model.
|
||||
model = Model(inputs, x, name='vgg16')
|
||||
|
||||
# load weights
|
||||
if weights == 'imagenet':
|
||||
|
||||
@@ -14,10 +14,11 @@ import warnings
|
||||
from ..models import Model
|
||||
from ..layers import Flatten, Dense, Input
|
||||
from ..layers import Convolution2D, MaxPooling2D
|
||||
from ..engine.topology import get_source_inputs
|
||||
from ..utils.layer_utils import convert_all_kernels_in_model
|
||||
from ..utils.data_utils import get_file
|
||||
from .. import backend as K
|
||||
from .imagenet_utils import decode_predictions, preprocess_input
|
||||
from .imagenet_utils import decode_predictions, preprocess_input, _obtain_input_shape
|
||||
|
||||
|
||||
TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_th_dim_ordering_th_kernels.h5'
|
||||
@@ -27,7 +28,7 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
|
||||
|
||||
def VGG19(include_top=True, weights='imagenet',
|
||||
input_tensor=None):
|
||||
input_tensor=None, input_shape=None):
|
||||
'''Instantiate the VGG19 architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. Note that when using TensorFlow,
|
||||
@@ -47,6 +48,13 @@ def VGG19(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
inputs_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 48.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
@@ -56,16 +64,11 @@ def VGG19(include_top=True, weights='imagenet',
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
# Determine proper input shape
|
||||
if K.image_dim_ordering() == 'th':
|
||||
if include_top:
|
||||
input_shape = (3, 224, 224)
|
||||
else:
|
||||
input_shape = (3, None, None)
|
||||
else:
|
||||
if include_top:
|
||||
input_shape = (224, 224, 3)
|
||||
else:
|
||||
input_shape = (None, None, 3)
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=224,
|
||||
min_size=48,
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
include_top=include_top)
|
||||
|
||||
if input_tensor is None:
|
||||
img_input = Input(shape=input_shape)
|
||||
@@ -112,8 +115,14 @@ def VGG19(include_top=True, weights='imagenet',
|
||||
x = Dense(4096, activation='relu', name='fc2')(x)
|
||||
x = Dense(1000, activation='softmax', name='predictions')(x)
|
||||
|
||||
# Create model
|
||||
model = Model(img_input, x)
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = img_input
|
||||
# Create model.
|
||||
model = Model(inputs, x, name='vgg19')
|
||||
|
||||
# load weights
|
||||
if weights == 'imagenet':
|
||||
|
||||
@@ -25,9 +25,10 @@ import warnings
|
||||
from ..models import Model
|
||||
from ..layers import Dense, Input, BatchNormalization, Activation, merge
|
||||
from ..layers import Conv2D, SeparableConv2D, MaxPooling2D, GlobalAveragePooling2D
|
||||
from ..engine.topology import get_source_inputs
|
||||
from ..utils.data_utils import get_file
|
||||
from .. import backend as K
|
||||
from .imagenet_utils import decode_predictions
|
||||
from .imagenet_utils import decode_predictions, _obtain_input_shape
|
||||
|
||||
|
||||
TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels.h5'
|
||||
@@ -35,7 +36,7 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
|
||||
|
||||
def Xception(include_top=True, weights='imagenet',
|
||||
input_tensor=None):
|
||||
input_tensor=None, input_shape=None):
|
||||
'''Instantiate the Xception architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. This model is available for TensorFlow only,
|
||||
@@ -53,6 +54,12 @@ def Xception(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
inputs_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(299, 299, 3)`.
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 71.
|
||||
E.g. `(150, 150, 3)` would be one valid value.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
@@ -62,8 +69,8 @@ def Xception(include_top=True, weights='imagenet',
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
if K.backend() != 'tensorflow':
|
||||
raise Exception('The Xception model is only available with '
|
||||
'the TensorFlow backend.')
|
||||
raise RuntimeError('The Xception model is only available with '
|
||||
'the TensorFlow backend.')
|
||||
if K.image_dim_ordering() != 'tf':
|
||||
warnings.warn('The Xception model is only available for the '
|
||||
'input dimension ordering "tf" '
|
||||
@@ -80,10 +87,11 @@ def Xception(include_top=True, weights='imagenet',
|
||||
old_dim_ordering = None
|
||||
|
||||
# Determine proper input shape
|
||||
if include_top:
|
||||
input_shape = (299, 299, 3)
|
||||
else:
|
||||
input_shape = (None, None, 3)
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=299,
|
||||
min_size=71,
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
include_top=include_top)
|
||||
|
||||
if input_tensor is None:
|
||||
img_input = Input(shape=input_shape)
|
||||
@@ -183,8 +191,14 @@ def Xception(include_top=True, weights='imagenet',
|
||||
x = GlobalAveragePooling2D(name='avg_pool')(x)
|
||||
x = Dense(1000, activation='softmax', name='predictions')(x)
|
||||
|
||||
# Create model
|
||||
model = Model(img_input, x)
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = img_input
|
||||
# Create model.
|
||||
model = Model(inputs, x, name='xception')
|
||||
|
||||
# load weights
|
||||
if weights == 'imagenet':
|
||||
|
||||
@@ -23,11 +23,8 @@ _keras_dir = os.path.join(_keras_base_dir, '.keras')
|
||||
if not os.path.exists(_keras_dir):
|
||||
os.makedirs(_keras_dir)
|
||||
|
||||
# Set theano as default backend for Windows users since tensorflow is not available for Windows yet.
|
||||
if os.name == 'nt':
|
||||
_BACKEND = 'theano'
|
||||
else:
|
||||
_BACKEND = 'tensorflow'
|
||||
# Default backend: TensorFlow.
|
||||
_BACKEND = 'tensorflow'
|
||||
|
||||
_config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json'))
|
||||
if os.path.exists(_config_path):
|
||||
@@ -35,10 +32,11 @@ if os.path.exists(_config_path):
|
||||
_floatx = _config.get('floatx', floatx())
|
||||
assert _floatx in {'float16', 'float32', 'float64'}
|
||||
_epsilon = _config.get('epsilon', epsilon())
|
||||
assert type(_epsilon) == float
|
||||
assert isinstance(_epsilon, float)
|
||||
_backend = _config.get('backend', _BACKEND)
|
||||
assert _backend in {'theano', 'tensorflow'}
|
||||
_image_dim_ordering = _config.get('image_dim_ordering', image_dim_ordering())
|
||||
_image_dim_ordering = _config.get('image_dim_ordering',
|
||||
image_dim_ordering())
|
||||
assert _image_dim_ordering in {'tf', 'th'}
|
||||
|
||||
set_floatx(_floatx)
|
||||
@@ -68,7 +66,7 @@ elif _BACKEND == 'tensorflow':
|
||||
sys.stderr.write('Using TensorFlow backend.\n')
|
||||
from .tensorflow_backend import *
|
||||
else:
|
||||
raise Exception('Unknown backend: ' + str(_BACKEND))
|
||||
raise ValueError('Unknown backend: ' + str(_BACKEND))
|
||||
|
||||
|
||||
def backend():
|
||||
|
||||
+132
-4
@@ -13,6 +13,15 @@ _LEGACY_WEIGHT_ORDERING = False
|
||||
def epsilon():
|
||||
'''Returns the value of the fuzz
|
||||
factor used in numeric expressions.
|
||||
|
||||
# Returns
|
||||
A float.
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> keras.backend.epsilon()
|
||||
1e-08
|
||||
```
|
||||
'''
|
||||
return _EPSILON
|
||||
|
||||
@@ -20,6 +29,19 @@ def epsilon():
|
||||
def set_epsilon(e):
|
||||
'''Sets the value of the fuzz
|
||||
factor used in numeric expressions.
|
||||
|
||||
# Arguments
|
||||
e: float. New value of epsilon.
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> from keras import backend as K
|
||||
>>> K.epsilon()
|
||||
1e-08
|
||||
>>> K.set_epsilon(1e-05)
|
||||
>>> K.epsilon()
|
||||
1e-05
|
||||
```
|
||||
'''
|
||||
global _EPSILON
|
||||
_EPSILON = e
|
||||
@@ -28,26 +50,80 @@ def set_epsilon(e):
|
||||
def floatx():
|
||||
'''Returns the default float type, as a string
|
||||
(e.g. 'float16', 'float32', 'float64').
|
||||
|
||||
# Returns
|
||||
String, the current default float type.
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> keras.backend.floatx()
|
||||
'float32'
|
||||
```
|
||||
'''
|
||||
return _FLOATX
|
||||
|
||||
|
||||
def set_floatx(floatx):
|
||||
'''Sets the default float type.
|
||||
|
||||
# Arguments
|
||||
String: 'float16', 'float32', or 'float64'.
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> from keras import backend as K
|
||||
>>> K.floatx()
|
||||
'float32'
|
||||
>>> K.set_floatx('float16')
|
||||
>>> K.floatx()
|
||||
'float16'
|
||||
```
|
||||
'''
|
||||
global _FLOATX
|
||||
if floatx not in {'float16', 'float32', 'float64'}:
|
||||
raise Exception('Unknown floatx type: ' + str(floatx))
|
||||
raise ValueError('Unknown floatx type: ' + str(floatx))
|
||||
_FLOATX = str(floatx)
|
||||
|
||||
|
||||
def cast_to_floatx(x):
|
||||
'''Cast a Numpy array to floatx.
|
||||
'''Cast a Numpy array to the default Keras float type.
|
||||
|
||||
# Arguments
|
||||
x: Numpy array.
|
||||
|
||||
# Returns
|
||||
The same Numpy array, cast to its new type.
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> from keras import backend as K
|
||||
>>> K.floatx()
|
||||
'float32'
|
||||
>>> arr = numpy.array([1.0, 2.0], dtype='float64')
|
||||
>>> arr.dtype
|
||||
dtype('float64')
|
||||
>>> new_arr = K.cast_to_floatx(arr)
|
||||
>>> new_arr
|
||||
array([ 1., 2.], dtype=float32)
|
||||
>>> new_arr.dtype
|
||||
dtype('float32')
|
||||
```
|
||||
'''
|
||||
return np.asarray(x, dtype=_FLOATX)
|
||||
|
||||
|
||||
def image_dim_ordering():
|
||||
'''Returns the image dimension ordering
|
||||
'''Returns the default image dimension ordering
|
||||
convention ('th' or 'tf').
|
||||
|
||||
# Returns
|
||||
A string, either `'th'` or `'tf'`
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> keras.backend.image_dim_ordering()
|
||||
'th'
|
||||
```
|
||||
'''
|
||||
return _IMAGE_DIM_ORDERING
|
||||
|
||||
@@ -55,14 +131,44 @@ def image_dim_ordering():
|
||||
def set_image_dim_ordering(dim_ordering):
|
||||
'''Sets the value of the image dimension
|
||||
ordering convention ('th' or 'tf').
|
||||
|
||||
# Arguments
|
||||
dim_ordering: string. `'th'` or `'tf'`.
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> from keras import backend as K
|
||||
>>> K.image_dim_ordering()
|
||||
'th'
|
||||
>>> K.set_image_dim_ordering('tf')
|
||||
>>> K.image_dim_ordering()
|
||||
'tf'
|
||||
```
|
||||
'''
|
||||
global _IMAGE_DIM_ORDERING
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise Exception('Unknown dim_ordering:', dim_ordering)
|
||||
raise ValueError('Unknown dim_ordering:', dim_ordering)
|
||||
_IMAGE_DIM_ORDERING = str(dim_ordering)
|
||||
|
||||
|
||||
def get_uid(prefix=''):
|
||||
'''Provides a unique UID given a string prefix.
|
||||
|
||||
# Arguments
|
||||
prefix: string.
|
||||
|
||||
# Returns
|
||||
An integer.
|
||||
|
||||
# Example
|
||||
```
|
||||
>>> keras.backend.get_uid('dense')
|
||||
>>> 1
|
||||
>>> keras.backend.get_uid('dense')
|
||||
>>> 2
|
||||
```
|
||||
|
||||
'''
|
||||
_UID_PREFIXES[prefix] += 1
|
||||
return _UID_PREFIXES[prefix]
|
||||
|
||||
@@ -73,6 +179,28 @@ def reset_uids():
|
||||
|
||||
|
||||
def is_keras_tensor(x):
|
||||
'''Returns whether `x` is a Keras tensor.
|
||||
|
||||
# Arguments
|
||||
x: a potential tensor.
|
||||
|
||||
# Returns
|
||||
A boolean: whether the argument is a Keras tensor.
|
||||
|
||||
# Examples
|
||||
```python
|
||||
>>> from keras import backend as K
|
||||
>>> np_var = numpy.array([1, 2])
|
||||
>>> K.is_keras_tensor(np_var)
|
||||
False
|
||||
>>> keras_var = K.variable(np_var)
|
||||
>>> K.is_keras_tensor(keras_var) # A variable is not a Tensor.
|
||||
False
|
||||
>>> keras_placeholder = K.placeholder(shape=(2, 4, 5))
|
||||
>>> K.is_keras_tensor(keras_placeholder) # A placeholder is a Tensor.
|
||||
True
|
||||
```
|
||||
'''
|
||||
if hasattr(x, '_keras_shape'):
|
||||
return True
|
||||
else:
|
||||
|
||||
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
+396
-116
@@ -14,7 +14,7 @@ except ImportError:
|
||||
from theano.sandbox.softsign import softsign as T_softsign
|
||||
import inspect
|
||||
import numpy as np
|
||||
from .common import _FLOATX, _EPSILON, _IMAGE_DIM_ORDERING
|
||||
from .common import _FLOATX, floatx, _EPSILON, image_dim_ordering
|
||||
py_all = all
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ def set_learning_phase(value):
|
||||
'0 or 1.')
|
||||
_LEARNING_PHASE = value
|
||||
|
||||
|
||||
# VARIABLE MANIPULATION
|
||||
|
||||
|
||||
@@ -55,22 +56,37 @@ def to_dense(tensor):
|
||||
return tensor
|
||||
|
||||
|
||||
def variable(value, dtype=_FLOATX, name=None):
|
||||
'''Instantiate a tensor variable.
|
||||
def variable(value, dtype=None, name=None):
|
||||
'''Instantiates a variable and returns it.
|
||||
|
||||
# Arguments
|
||||
value: Numpy array, initial value of the tensor.
|
||||
dtype: Tensor type.
|
||||
name: Optional name string for the tensor.
|
||||
|
||||
# Returns
|
||||
A variable instance (with Keras metadata included).
|
||||
'''
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
if hasattr(value, 'tocoo'):
|
||||
_assert_sparse_module()
|
||||
return th_sparse_module.as_sparse_variable(value)
|
||||
variable = th_sparse_module.as_sparse_variable(value)
|
||||
else:
|
||||
value = np.asarray(value, dtype=dtype)
|
||||
return theano.shared(value=value, name=name, strict=False)
|
||||
variable = theano.shared(value=value, name=name, strict=False)
|
||||
variable._keras_shape = value.shape
|
||||
variable._uses_learning_phase = False
|
||||
return variable
|
||||
|
||||
|
||||
def placeholder(shape=None, ndim=None, dtype=_FLOATX, sparse=False, name=None):
|
||||
def placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None):
|
||||
'''Instantiate an input data placeholder variable.
|
||||
'''
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
if shape is None and ndim is None:
|
||||
raise Exception('Specify either a shape or ndim value.')
|
||||
raise ValueError('Specify either a shape or ndim value.')
|
||||
if shape is not None:
|
||||
ndim = len(shape)
|
||||
else:
|
||||
@@ -88,7 +104,7 @@ def placeholder(shape=None, ndim=None, dtype=_FLOATX, sparse=False, name=None):
|
||||
|
||||
|
||||
def shape(x):
|
||||
'''Return the shape of a tensor.
|
||||
'''Returns the shape of a tensor.
|
||||
|
||||
Warning: type returned will be different for
|
||||
Theano backend (Theano tensor type) and TF backend (TF TensorShape).
|
||||
@@ -96,6 +112,22 @@ def shape(x):
|
||||
return x.shape
|
||||
|
||||
|
||||
def int_shape(x):
|
||||
'''Returns the shape of a Keras tensor or a Keras variable as a tuple of
|
||||
integers or None entries.
|
||||
|
||||
# Arguments
|
||||
x: Tensor or variable.
|
||||
|
||||
# Returns
|
||||
A tuple of integers (or None entries).
|
||||
'''
|
||||
if hasattr(x, '_keras_shape'):
|
||||
return x._keras_shape
|
||||
else:
|
||||
raise Exception('Not a Keras tensor:', x)
|
||||
|
||||
|
||||
def ndim(x):
|
||||
return x.ndim
|
||||
|
||||
@@ -105,49 +137,55 @@ def dtype(x):
|
||||
|
||||
|
||||
def eval(x):
|
||||
'''Run a graph.
|
||||
'''Returns the value of a tensor.
|
||||
'''
|
||||
return to_dense(x).eval()
|
||||
|
||||
|
||||
def zeros(shape, dtype=_FLOATX, name=None):
|
||||
'''Instantiate an all-zeros variable.
|
||||
def zeros(shape, dtype=None, name=None):
|
||||
'''Instantiates an all-zeros variable.
|
||||
'''
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
return variable(np.zeros(shape), dtype, name)
|
||||
|
||||
|
||||
def ones(shape, dtype=_FLOATX, name=None):
|
||||
'''Instantiate an all-ones variable.
|
||||
def ones(shape, dtype=None, name=None):
|
||||
'''Instantiates an all-ones variable.
|
||||
'''
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
return variable(np.ones(shape), dtype, name)
|
||||
|
||||
|
||||
def eye(size, dtype=_FLOATX, name=None):
|
||||
'''Instantiate an identity matrix.
|
||||
def eye(size, dtype=None, name=None):
|
||||
'''Instantiates an identity matrix.
|
||||
'''
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
return variable(np.eye(size), dtype, name)
|
||||
|
||||
|
||||
def ones_like(x):
|
||||
def ones_like(x, name=None):
|
||||
return T.ones_like(x)
|
||||
|
||||
|
||||
def zeros_like(x):
|
||||
def zeros_like(x, name=None):
|
||||
return T.zeros_like(x)
|
||||
|
||||
|
||||
def random_uniform_variable(shape, low, high, dtype=_FLOATX, name=None):
|
||||
def random_uniform_variable(shape, low, high, dtype=None, name=None):
|
||||
return variable(np.random.uniform(low=low, high=high, size=shape),
|
||||
dtype=dtype, name=name)
|
||||
|
||||
|
||||
def random_normal_variable(shape, mean, scale, dtype=_FLOATX, name=None):
|
||||
def random_normal_variable(shape, mean, scale, dtype=None, name=None):
|
||||
return variable(np.random.normal(loc=0.0, scale=scale, size=shape),
|
||||
dtype=dtype, name=name)
|
||||
|
||||
|
||||
def count_params(x):
|
||||
'''Return number of scalars in a tensor.
|
||||
'''Returns the number of scalars in a tensor.
|
||||
|
||||
Return: numpy integer.
|
||||
'''
|
||||
@@ -230,7 +268,7 @@ def batch_dot(x, y, axes=None):
|
||||
|
||||
output_shape = (100, 30)
|
||||
'''
|
||||
if type(axes) == int:
|
||||
if isinstance(axes, int):
|
||||
axes = (axes, axes)
|
||||
if axes is None:
|
||||
# behaves like tf.batch_matmul as default
|
||||
@@ -278,9 +316,12 @@ def prod(x, axis=None, keepdims=False):
|
||||
|
||||
|
||||
def mean(x, axis=None, keepdims=False):
|
||||
'''Mean of a tensor, alongside the specified axis.
|
||||
'''
|
||||
dtype = None
|
||||
if 'int' in x.dtype:
|
||||
dtype = _FLOATX
|
||||
# bool is available since theano v0.9dev
|
||||
if 'int' in x.dtype or x.dtype == 'bool':
|
||||
dtype = floatx()
|
||||
return T.mean(x, axis=axis, keepdims=keepdims, dtype=dtype)
|
||||
|
||||
|
||||
@@ -392,8 +433,43 @@ def cos(x):
|
||||
|
||||
|
||||
def normalize_batch_in_training(x, gamma, beta,
|
||||
reduction_axes, epsilon=0.0001):
|
||||
'''Compute mean and std for batch then apply batch_normalization on batch.
|
||||
reduction_axes, epsilon=1e-3):
|
||||
'''Computes mean and std for batch then apply batch_normalization on batch.
|
||||
'''
|
||||
# TODO remove this if statement when Theano without
|
||||
# T.nnet.bn.batch_normalization_train is deprecated
|
||||
if not hasattr(T.nnet.bn, 'batch_normalization_train'):
|
||||
return _old_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon)
|
||||
|
||||
normed, mean, stdinv = T.nnet.bn.batch_normalization_train(
|
||||
x, gamma, beta, reduction_axes, epsilon)
|
||||
|
||||
return normed, mean, T.inv(stdinv ** 2)
|
||||
|
||||
|
||||
def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
|
||||
'''Apply batch normalization on x given mean, var, beta and gamma.
|
||||
'''
|
||||
# TODO remove this if statement when Theano without
|
||||
# T.nnet.bn.batch_normalization_test is deprecated
|
||||
if not hasattr(T.nnet.bn, 'batch_normalization_test'):
|
||||
return _old_batch_normalization(x, mean, var, beta, gamma, epsilon)
|
||||
|
||||
if mean.ndim == 1:
|
||||
# based on TensorFlow's default: normalize along rightmost dimension
|
||||
reduction_axes = range(x.ndim - 1)
|
||||
else:
|
||||
reduction_axes = [i for i in range(x.ndim) if mean.broadcastable[i]]
|
||||
|
||||
return T.nnet.bn.batch_normalization_test(
|
||||
x, gamma, beta, mean, var, reduction_axes, epsilon)
|
||||
|
||||
|
||||
# TODO remove this function when Theano without
|
||||
# T.nnet.bn.batch_normalization_train is deprecated
|
||||
def _old_normalize_batch_in_training(x, gamma, beta,
|
||||
reduction_axes, epsilon=1e-3):
|
||||
'''Computes mean and std for batch then apply batch_normalization on batch.
|
||||
'''
|
||||
dev = theano.config.device
|
||||
use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu'))
|
||||
@@ -429,9 +505,21 @@ def normalize_batch_in_training(x, gamma, beta,
|
||||
return normed, mean, var
|
||||
|
||||
|
||||
def batch_normalization(x, mean, var, beta, gamma, epsilon=0.0001):
|
||||
# TODO remove this if statement when Theano without
|
||||
# T.nnet.bn.batch_normalization_test is deprecated
|
||||
def _old_batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
|
||||
'''Apply batch normalization on x given mean, var, beta and gamma.
|
||||
'''
|
||||
if mean.ndim == 1 and x.ndim > 1:
|
||||
# in TensorFlow's batch_normalization, if the parameters are vectors
|
||||
# the batch normalization should be applied along the rightmost axis.
|
||||
# Theano expects the parameters to always have x.ndim dimensions.
|
||||
shuffle_pattern = ['x'] * (x.ndim - 1) + [0]
|
||||
mean = mean.dimshuffle(shuffle_pattern)
|
||||
var = var.dimshuffle(shuffle_pattern)
|
||||
beta = beta.dimshuffle(shuffle_pattern)
|
||||
gamma = gamma.dimshuffle(shuffle_pattern)
|
||||
|
||||
ndim = x.ndim
|
||||
dev = theano.config.device
|
||||
use_cudnn = ndim < 5 and (dev.startswith('cuda') or dev.startswith('gpu'))
|
||||
@@ -442,16 +530,16 @@ def batch_normalization(x, mean, var, beta, gamma, epsilon=0.0001):
|
||||
shuffle_pattern = list(range(ndim))
|
||||
shuffle_pattern[1] = shuffle_pattern[axis]
|
||||
shuffle_pattern[axis] = 1
|
||||
x = x.dimshuffle(shuffle_pattern)
|
||||
mean = mean.dimshuffle(shuffle_pattern)
|
||||
var = var.dimshuffle(shuffle_pattern)
|
||||
beta = beta.dimshuffle(shuffle_pattern)
|
||||
gamma = gamma.dimshuffle(shuffle_pattern)
|
||||
normed = theano.sandbox.cuda.dnn.dnn_batch_normalization_test(x, gamma, beta, mean, var,
|
||||
'spatial', epsilon)
|
||||
if axis != 1:
|
||||
normed = normed.dimshuffle(shuffle_pattern)
|
||||
return normed
|
||||
return theano.sandbox.cuda.dnn.dnn_batch_normalization_test(
|
||||
x.dimshuffle(shuffle_pattern),
|
||||
gamma.dimshuffle(shuffle_pattern),
|
||||
beta.dimshuffle(shuffle_pattern),
|
||||
mean.dimshuffle(shuffle_pattern),
|
||||
var.dimshuffle(shuffle_pattern),
|
||||
'spatial', epsilon).dimshuffle(shuffle_pattern)
|
||||
else:
|
||||
return theano.sandbox.cuda.dnn.dnn_batch_normalization_test(
|
||||
x, gamma, beta, mean, var, 'spatial', epsilon)
|
||||
except AttributeError:
|
||||
pass
|
||||
except ValueError:
|
||||
@@ -470,7 +558,7 @@ def concatenate(tensors, axis=-1):
|
||||
elif axis == 1:
|
||||
return th_sparse_module.basic.hstack(tensors, format='csr')
|
||||
else:
|
||||
raise Exception('Invalid concat axis for sparse matrix: ' + axis)
|
||||
raise ValueError('Invalid concat axis for sparse matrix:', axis)
|
||||
else:
|
||||
return T.concatenate([to_dense(x) for x in tensors], axis=axis)
|
||||
|
||||
@@ -514,7 +602,7 @@ def resize_images(X, height_factor, width_factor, dim_ordering):
|
||||
output = repeat_elements(output, width_factor, axis=2)
|
||||
return output
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
|
||||
|
||||
def resize_volumes(X, depth_factor, height_factor, width_factor, dim_ordering):
|
||||
@@ -535,7 +623,7 @@ def resize_volumes(X, depth_factor, height_factor, width_factor, dim_ordering):
|
||||
output = repeat_elements(output, width_factor, axis=3)
|
||||
return output
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
|
||||
|
||||
def repeat(x, n):
|
||||
@@ -549,6 +637,19 @@ def repeat(x, n):
|
||||
return T.extra_ops.repeat(x, n, axis=1)
|
||||
|
||||
|
||||
def arange(start, stop=None, step=1, dtype='int32'):
|
||||
'''Creates a 1-D tensor containing a sequence of integers.
|
||||
|
||||
The function arguments use the same convention as
|
||||
Theano's arange: if only one argument is provided,
|
||||
it is in fact the "stop" argument.
|
||||
|
||||
The default type of the returned tensor is 'int32' to
|
||||
match TensorFlow's default.
|
||||
'''
|
||||
return T.arange(start, stop=stop, step=step, dtype=dtype)
|
||||
|
||||
|
||||
def tile(x, n):
|
||||
return T.tile(x, n)
|
||||
|
||||
@@ -616,10 +717,15 @@ def asymmetric_temporal_padding(x, left_pad=1, right_pad=1):
|
||||
return T.set_subtensor(output[:, left_pad:x.shape[1] + left_pad, :], x)
|
||||
|
||||
|
||||
def spatial_2d_padding(x, padding=(1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
def spatial_2d_padding(x, padding=(1, 1), dim_ordering='default'):
|
||||
'''Pad the 2nd and 3rd dimensions of a 4D tensor
|
||||
with "padding[0]" and "padding[1]" (resp.) zeros left and right.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
input_shape = x.shape
|
||||
if dim_ordering == 'th':
|
||||
output_shape = (input_shape[0],
|
||||
@@ -643,14 +749,22 @@ def spatial_2d_padding(x, padding=(1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
slice(padding[1], input_shape[2] + padding[1]),
|
||||
slice(None))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
return T.set_subtensor(output[indices], x)
|
||||
|
||||
|
||||
def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1, left_pad=1, right_pad=1, dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1,
|
||||
left_pad=1, right_pad=1,
|
||||
dim_ordering='default'):
|
||||
'''Pad the rows and columns of a 4D tensor
|
||||
with "top_pad", "bottom_pad", "left_pad", "right_pad" (resp.) zeros rows on top, bottom; cols on left, right.
|
||||
with "top_pad", "bottom_pad", "left_pad", "right_pad" (resp.) zeros
|
||||
rows on top, bottom; cols on left, right.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
input_shape = x.shape
|
||||
if dim_ordering == 'th':
|
||||
output_shape = (input_shape[0],
|
||||
@@ -675,14 +789,19 @@ def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1, left_pad=1, right_
|
||||
slice(left_pad, input_shape[2] + left_pad),
|
||||
slice(None))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
return T.set_subtensor(output[indices], x)
|
||||
|
||||
|
||||
def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering='default'):
|
||||
'''Pad the 2nd, 3rd and 4th dimensions of a 5D tensor
|
||||
with "padding[0]", "padding[1]" and "padding[2]" (resp.) zeros left and right.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
input_shape = x.shape
|
||||
if dim_ordering == 'th':
|
||||
output_shape = (input_shape[0],
|
||||
@@ -710,11 +829,11 @@ def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
slice(padding[2], input_shape[3] + padding[2]),
|
||||
slice(None))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
return T.set_subtensor(output[indices], x)
|
||||
|
||||
|
||||
def pack(x):
|
||||
def stack(x):
|
||||
return T.stack(*x)
|
||||
|
||||
|
||||
@@ -733,19 +852,22 @@ def one_hot(indices, nb_classes):
|
||||
def reverse(x, axes):
|
||||
'''Reverse a tensor along the the specified axes
|
||||
'''
|
||||
if type(axes) == int:
|
||||
if isinstance(axes, int):
|
||||
axes = [axes]
|
||||
slices = [slice(None, None, -1) if i in axes else slice(None, None, None) for i in range(x.ndim)]
|
||||
return x[slices]
|
||||
|
||||
|
||||
def pattern_broadcast(x, broatcastable):
|
||||
return T.patternbroadcast(x, broatcastable)
|
||||
|
||||
# VALUE MANIPULATION
|
||||
|
||||
|
||||
def get_value(x):
|
||||
if not hasattr(x, 'get_value'):
|
||||
raise Exception("'get_value() can only be called on a variable. " +
|
||||
"If you have an expression instead, use eval().")
|
||||
raise TypeError('get_value() can only be called on a variable. '
|
||||
'If you have an expression instead, use eval().')
|
||||
return x.get_value()
|
||||
|
||||
|
||||
@@ -782,13 +904,18 @@ def print_tensor(x, message=''):
|
||||
class Function(object):
|
||||
|
||||
def __init__(self, inputs, outputs, updates=[], **kwargs):
|
||||
unique_variables_to_update = {}
|
||||
for v, nv in updates:
|
||||
if v not in unique_variables_to_update:
|
||||
unique_variables_to_update[v] = nv
|
||||
updates = unique_variables_to_update.items()
|
||||
self.function = theano.function(inputs, outputs, updates=updates,
|
||||
allow_input_downcast=True,
|
||||
on_unused_input='ignore',
|
||||
**kwargs)
|
||||
|
||||
def __call__(self, inputs):
|
||||
assert type(inputs) in {list, tuple}
|
||||
assert isinstance(inputs, (list, tuple))
|
||||
return self.function(*inputs)
|
||||
|
||||
|
||||
@@ -797,7 +924,7 @@ def function(inputs, outputs, updates=[], **kwargs):
|
||||
function_args = inspect.getargspec(theano.function)[0]
|
||||
for key in kwargs.keys():
|
||||
if key not in function_args:
|
||||
msg = "Invalid argument '%s' passed to K.function" % key
|
||||
msg = 'Invalid argument "%s" passed to K.function' % key
|
||||
raise ValueError(msg)
|
||||
return Function(inputs, outputs, updates=updates, **kwargs)
|
||||
|
||||
@@ -858,8 +985,9 @@ def rnn(step_function, inputs, initial_states,
|
||||
|
||||
if unroll:
|
||||
if input_length is None:
|
||||
raise Exception('When specifying `unroll=True`, an `input_length` '
|
||||
'must be provided to `rnn`.')
|
||||
raise ValueError('When specifying `unroll=True`, '
|
||||
'an `input_length` '
|
||||
'must be provided to `rnn`.')
|
||||
|
||||
axes = [1, 0] + list(range(2, ndim))
|
||||
inputs = inputs.dimshuffle(axes)
|
||||
@@ -925,7 +1053,7 @@ def rnn(step_function, inputs, initial_states,
|
||||
go_backwards=go_backwards)
|
||||
|
||||
# deal with Theano API inconsistency
|
||||
if type(results) is list:
|
||||
if isinstance(results, list):
|
||||
outputs = results[0]
|
||||
states = results[1:]
|
||||
else:
|
||||
@@ -962,7 +1090,7 @@ def rnn(step_function, inputs, initial_states,
|
||||
go_backwards=go_backwards)
|
||||
|
||||
# deal with Theano API inconsistency
|
||||
if type(results) is list:
|
||||
if isinstance(results, list):
|
||||
outputs = results[0]
|
||||
states = results[1:]
|
||||
else:
|
||||
@@ -989,7 +1117,7 @@ def in_train_phase(x, alt):
|
||||
return x
|
||||
elif _LEARNING_PHASE is 0:
|
||||
return alt
|
||||
x = T.switch(_LEARNING_PHASE, x, alt)
|
||||
x = theano.ifelse.ifelse(_LEARNING_PHASE, x, alt)
|
||||
x._uses_learning_phase = True
|
||||
return x
|
||||
|
||||
@@ -999,7 +1127,7 @@ def in_test_phase(x, alt):
|
||||
return alt
|
||||
elif _LEARNING_PHASE is 0:
|
||||
return x
|
||||
x = T.switch(_LEARNING_PHASE, alt, x)
|
||||
x = theano.ifelse.ifelse(_LEARNING_PHASE, alt, x)
|
||||
x._uses_learning_phase = True
|
||||
return x
|
||||
|
||||
@@ -1007,10 +1135,13 @@ def in_test_phase(x, alt):
|
||||
# NN OPERATIONS
|
||||
|
||||
def _assert_has_capability(module, func):
|
||||
assert hasattr(module, func), ('It looks like like your version of '
|
||||
'Theano is out of date. '
|
||||
'Install the latest version with:\n'
|
||||
'pip install git+git://github.com/Theano/Theano.git --upgrade --no-deps')
|
||||
if not hasattr(module, func):
|
||||
raise EnvironmentError(
|
||||
'It looks like like your version of '
|
||||
'Theano is out of date. '
|
||||
'Install the latest version with:\n'
|
||||
'pip install git+git://github.com/Theano/Theano.git '
|
||||
'--upgrade --no-deps')
|
||||
|
||||
|
||||
def elu(x, alpha=1.0):
|
||||
@@ -1095,7 +1226,7 @@ def dropout(x, level, noise_shape=None, seed=None):
|
||||
seed: random seed to ensure determinism.
|
||||
'''
|
||||
if level < 0. or level >= 1:
|
||||
raise Exception('Dropout level must be in interval [0, 1[.')
|
||||
raise ValueError('Dropout level must be in interval [0, 1[.')
|
||||
if seed is None:
|
||||
seed = np.random.randint(1, 10e6)
|
||||
|
||||
@@ -1119,7 +1250,7 @@ def l2_normalize(x, axis):
|
||||
|
||||
|
||||
def in_top_k(predictions, targets, k):
|
||||
'''Says whether the `targets` are in the top `k` `predictions`
|
||||
'''Returns whether the `targets` are in the top `k` `predictions`
|
||||
|
||||
# Arguments
|
||||
predictions: A tensor of shape batch_size x classess and type float32.
|
||||
@@ -1182,8 +1313,10 @@ def _preprocess_border_mode(border_mode):
|
||||
th_border_mode = 'half'
|
||||
elif border_mode == 'valid':
|
||||
th_border_mode = 'valid'
|
||||
elif border_mode == 'full':
|
||||
th_border_mode = 'full'
|
||||
else:
|
||||
raise Exception('Border mode not supported: ' + str(border_mode))
|
||||
raise ValueError('Border mode not supported:', str(border_mode))
|
||||
return th_border_mode
|
||||
|
||||
|
||||
@@ -1275,8 +1408,20 @@ def _postprocess_conv3d_output(conv_out, x, border_mode, np_kernel, strides, dim
|
||||
return conv_out
|
||||
|
||||
|
||||
def conv1d(x, kernel, stride=1, border_mode='valid',
|
||||
image_shape=None, filter_shape=None):
|
||||
'''1D convolution.
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
strides: stride integer.
|
||||
border_mode: string, "same" or "valid".
|
||||
'''
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING, image_shape=None,
|
||||
dim_ordering='default', image_shape=None,
|
||||
filter_shape=None, filter_dilation=(1, 1)):
|
||||
'''2D convolution.
|
||||
|
||||
@@ -1288,8 +1433,10 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering ', dim_ordering)
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
@@ -1320,7 +1467,7 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
|
||||
def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
dim_ordering='default',
|
||||
image_shape=None, filter_shape=None):
|
||||
'''2D deconvolution (transposed convolution).
|
||||
|
||||
@@ -1334,8 +1481,10 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
flip_filters = False
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering ' + dim_ordering)
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
@@ -1343,6 +1492,7 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
th_border_mode = _preprocess_border_mode(border_mode)
|
||||
np_kernel = kernel.eval()
|
||||
filter_shape = _preprocess_conv2d_filter_shape(dim_ordering, filter_shape)
|
||||
filter_shape = tuple(filter_shape[i] for i in (1, 0, 2, 3))
|
||||
|
||||
op = T.nnet.abstract_conv.AbstractConv2d_gradInputs(imshp=output_shape,
|
||||
kshp=filter_shape,
|
||||
@@ -1358,18 +1508,18 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
|
||||
def atrous_conv2d(x, kernel, rate=1,
|
||||
border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
dim_ordering='default',
|
||||
image_shape=None, filter_shape=None):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1),
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
border_mode='valid', dim_ordering='default'):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
border_mode='valid', dim_ordering='default',
|
||||
volume_shape=None, filter_shape=None,
|
||||
filter_dilation=(1, 1, 1)):
|
||||
'''3D convolution.
|
||||
@@ -1382,14 +1532,16 @@ def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering:', dim_ordering)
|
||||
|
||||
# TODO: remove this if statement when Theano without AbstractConv3d is deprecated
|
||||
if not hasattr(T.nnet, 'conv3d'):
|
||||
if filter_dilation != (1, 1, 1):
|
||||
raise Exception('conv3d with filter dilation requires Theano '
|
||||
'0.9.0dev3 or newer.')
|
||||
raise ValueError('conv3d with filter dilation requires Theano '
|
||||
'0.9.0dev3 or newer.')
|
||||
|
||||
return _old_theano_conv3d(x, kernel, strides, border_mode,
|
||||
dim_ordering, volume_shape, filter_shape)
|
||||
@@ -1415,17 +1567,18 @@ def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
|
||||
# TODO: remove this function when theano without AbstractConv3d is deprecated
|
||||
def _old_theano_conv3d(x, kernel, strides=(1, 1, 1),
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
border_mode='valid', dim_ordering='default',
|
||||
volume_shape=None, filter_shape=None):
|
||||
'''
|
||||
Run on cuDNN if available.
|
||||
border_mode: string, "same" or "valid".
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
raise ValueError('Unknown dim_ordering:', dim_ordering)
|
||||
if border_mode not in {'same', 'valid'}:
|
||||
raise Exception('Invalid border mode: ' + str(border_mode))
|
||||
raise ValueError('Invalid border mode:', border_mode)
|
||||
|
||||
if dim_ordering == 'tf':
|
||||
# TF uses the last dimension as channel dimension,
|
||||
@@ -1477,34 +1630,59 @@ def _old_theano_conv3d(x, kernel, strides=(1, 1, 1),
|
||||
|
||||
|
||||
def pool2d(x, pool_size, strides=(1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING, pool_mode='max'):
|
||||
dim_ordering='default', pool_mode='max'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering:', dim_ordering)
|
||||
|
||||
assert pool_size[0] >= 1 and pool_size[1] >= 1
|
||||
|
||||
if border_mode == 'same':
|
||||
w_pad = pool_size[0] - 2 if pool_size[0] % 2 == 1 else pool_size[0] - 1
|
||||
h_pad = pool_size[1] - 2 if pool_size[1] % 2 == 1 else pool_size[1] - 1
|
||||
w_pad = pool_size[0] - 2 if pool_size[0] > 2 and pool_size[0] % 2 == 1 else pool_size[0] - 1
|
||||
h_pad = pool_size[1] - 2 if pool_size[1] > 2 and pool_size[1] % 2 == 1 else pool_size[1] - 1
|
||||
padding = (w_pad, h_pad)
|
||||
elif border_mode == 'valid':
|
||||
padding = (0, 0)
|
||||
else:
|
||||
raise Exception('Invalid border mode: ' + str(border_mode))
|
||||
raise ValueError('Invalid border mode:', border_mode)
|
||||
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering:', dim_ordering)
|
||||
|
||||
if dim_ordering == 'tf':
|
||||
x = x.dimshuffle((0, 3, 1, 2))
|
||||
|
||||
if pool_mode == 'max':
|
||||
pool_out = pool.pool_2d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='max')
|
||||
# TODO remove the old call once Theano older than 0.9.0dev4 is deprecated
|
||||
try:
|
||||
# new interface (introduced in 0.9.0dev4)
|
||||
pool_out = pool.pool_2d(x, ws=pool_size, stride=strides,
|
||||
ignore_border=True,
|
||||
pad=padding,
|
||||
mode='max')
|
||||
except TypeError:
|
||||
# old interface
|
||||
pool_out = pool.pool_2d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='max')
|
||||
elif pool_mode == 'avg':
|
||||
pool_out = pool.pool_2d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='average_exc_pad')
|
||||
# TODO remove the old call once Theano older than 0.9.0dev4 is deprecated
|
||||
try:
|
||||
# new interface (introduced in 0.9.0dev4)
|
||||
pool_out = pool.pool_2d(x, ws=pool_size, stride=strides,
|
||||
ignore_border=True,
|
||||
pad=padding,
|
||||
mode='average_exc_pad')
|
||||
except TypeError:
|
||||
# old interface
|
||||
pool_out = pool.pool_2d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='average_exc_pad')
|
||||
else:
|
||||
raise Exception('Invalid pooling mode: ' + str(pool_mode))
|
||||
raise ValueError('Invalid pooling mode:', pool_mode)
|
||||
|
||||
if border_mode == 'same':
|
||||
expected_width = (x.shape[2] + strides[0] - 1) // strides[0]
|
||||
@@ -1520,7 +1698,12 @@ def pool2d(x, pool_size, strides=(1, 1), border_mode='valid',
|
||||
|
||||
|
||||
def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING, pool_mode='max'):
|
||||
dim_ordering='default', pool_mode='max'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering:', dim_ordering)
|
||||
|
||||
# TODO: remove this if statement when Theano without pool_3d is deprecated
|
||||
# (pool_3d was introduced after 0.9.0dev3)
|
||||
if not hasattr(T.signal.pool, 'pool_3d'):
|
||||
@@ -1535,26 +1718,43 @@ def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
elif border_mode == 'valid':
|
||||
padding = (0, 0, 0)
|
||||
else:
|
||||
raise Exception('Invalid border mode: ' + str(border_mode))
|
||||
|
||||
raise ValueError('Invalid border mode:', border_mode)
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering:', dim_ordering)
|
||||
|
||||
if dim_ordering == 'tf':
|
||||
x = x.dimshuffle((0, 4, 1, 2, 3))
|
||||
|
||||
if pool_mode == 'max':
|
||||
pool_out = pool.pool_3d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='max')
|
||||
# TODO remove the old call once Theano older than 0.9.0dev4 is deprecated
|
||||
try:
|
||||
# new interface (introduced in 0.9.0dev4)
|
||||
pool_out = pool.pool_3d(x, ws=pool_size, stride=strides,
|
||||
ignore_border=True,
|
||||
pad=padding,
|
||||
mode='max')
|
||||
except TypeError:
|
||||
# old interface
|
||||
pool_out = pool.pool_3d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='max')
|
||||
elif pool_mode == 'avg':
|
||||
pool_out = pool.pool_3d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='average_exc_pad')
|
||||
# TODO remove the old call once Theano older than 0.9.0dev4 is deprecated
|
||||
try:
|
||||
# new interface (introduced in 0.9.0dev4)
|
||||
pool_out = pool.pool_3d(x, ws=pool_size, stride=strides,
|
||||
ignore_border=True,
|
||||
pad=padding,
|
||||
mode='average_exc_pad')
|
||||
except TypeError:
|
||||
# old interface
|
||||
pool_out = pool.pool_3d(x, ds=pool_size, st=strides,
|
||||
ignore_border=True,
|
||||
padding=padding,
|
||||
mode='average_exc_pad')
|
||||
else:
|
||||
raise Exception('Invalid pooling mode: ' + str(pool_mode))
|
||||
raise ValueError('Invalid pooling mode:', pool_mode)
|
||||
|
||||
if border_mode == 'same':
|
||||
expected_width = (x.shape[2] + strides[0] - 1) // strides[0]
|
||||
@@ -1574,18 +1774,23 @@ def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
# TODO: remove this function when Theano without pool_3d is deprecated
|
||||
# (pool_3d was introduced after 0.9.0dev3)
|
||||
def _old_theano_pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING, pool_mode='max'):
|
||||
dim_ordering='default', pool_mode='max'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering:', dim_ordering)
|
||||
|
||||
if border_mode == 'same':
|
||||
# TODO: add implementation for border_mode="same"
|
||||
raise Exception('border_mode="same" not supported with Theano.')
|
||||
raise ValueError('border_mode="same" not supported with Theano.')
|
||||
elif border_mode == 'valid':
|
||||
ignore_border = True
|
||||
padding = (0, 0)
|
||||
else:
|
||||
raise Exception('Invalid border mode: ' + str(border_mode))
|
||||
raise ValueError('Invalid border mode:', border_mode)
|
||||
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
raise ValueError('Unknown dim_ordering:', dim_ordering)
|
||||
|
||||
if dim_ordering == 'tf':
|
||||
x = x.dimshuffle((0, 4, 1, 2, 3))
|
||||
@@ -1624,7 +1829,7 @@ def _old_theano_pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
padding=padding,
|
||||
mode='average_exc_pad')
|
||||
else:
|
||||
raise Exception('Invalid pooling mode: ' + str(pool_mode))
|
||||
raise ValueError('Invalid pooling mode:', pool_mode)
|
||||
|
||||
if dim_ordering == 'tf':
|
||||
pool_out = pool_out.dimshuffle((0, 2, 3, 4, 1))
|
||||
@@ -1634,21 +1839,27 @@ def _old_theano_pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
# RANDOMNESS
|
||||
|
||||
|
||||
def random_normal(shape, mean=0.0, std=1.0, dtype=_FLOATX, seed=None):
|
||||
def random_normal(shape, mean=0.0, std=1.0, dtype=None, seed=None):
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
if seed is None:
|
||||
seed = np.random.randint(1, 10e6)
|
||||
rng = RandomStreams(seed=seed)
|
||||
return rng.normal(size=shape, avg=mean, std=std, dtype=dtype)
|
||||
|
||||
|
||||
def random_uniform(shape, low=0.0, high=1.0, dtype=_FLOATX, seed=None):
|
||||
def random_uniform(shape, low=0.0, high=1.0, dtype=None, seed=None):
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
if seed is None:
|
||||
seed = np.random.randint(1, 10e6)
|
||||
rng = RandomStreams(seed=seed)
|
||||
return rng.uniform(shape, low=low, high=high, dtype=dtype)
|
||||
|
||||
|
||||
def random_binomial(shape, p=0.0, dtype=_FLOATX, seed=None):
|
||||
def random_binomial(shape, p=0.0, dtype=None, seed=None):
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
if seed is None:
|
||||
seed = np.random.randint(1, 10e6)
|
||||
rng = RandomStreams(seed=seed)
|
||||
@@ -1665,11 +1876,13 @@ def ctc_interleave_blanks(Y):
|
||||
Y_ = T.set_subtensor(Y_[T.arange(Y.shape[0]) * 2 + 1], Y)
|
||||
return Y_
|
||||
|
||||
|
||||
def ctc_create_skip_idxs(Y):
|
||||
skip_idxs = T.arange((Y.shape[0] - 3) // 2) * 2 + 1
|
||||
non_repeats = T.neq(Y[skip_idxs], Y[skip_idxs + 2])
|
||||
return skip_idxs[non_repeats.nonzero()]
|
||||
|
||||
|
||||
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
|
||||
active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
|
||||
active_next = T.cast(T.minimum(
|
||||
@@ -1695,11 +1908,11 @@ def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
|
||||
)
|
||||
return active_next, log_p_next
|
||||
|
||||
|
||||
def ctc_path_probs(predict, Y, alpha=1e-4):
|
||||
smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
|
||||
L = T.log(smoothed_predict)
|
||||
zeros = T.zeros_like(L[0])
|
||||
base = T.set_subtensor(zeros[:1], np.float32(1))
|
||||
log_first = zeros
|
||||
|
||||
f_skip_idxs = ctc_create_skip_idxs(Y)
|
||||
@@ -1718,12 +1931,14 @@ def ctc_path_probs(predict, Y, alpha=1e-4):
|
||||
log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
|
||||
return log_probs, mask
|
||||
|
||||
|
||||
def ctc_cost(predict, Y):
|
||||
log_probs, mask = ctc_path_probs(predict, ctc_interleave_blanks(Y))
|
||||
common_factor = T.max(log_probs)
|
||||
total_log_prob = T.log(T.sum(T.exp(log_probs - common_factor)[mask.nonzero()])) + common_factor
|
||||
return -total_log_prob
|
||||
|
||||
|
||||
# batchifies original CTC code
|
||||
def ctc_batch_cost(y_true, y_pred, input_length, label_length):
|
||||
'''Runs CTC loss algorithm on each batch element.
|
||||
@@ -1748,10 +1963,75 @@ def ctc_batch_cost(y_true, y_pred, input_length, label_length):
|
||||
return ctc_cost(y_pred_step, y_true_step)
|
||||
|
||||
ret, _ = theano.scan(
|
||||
fn = ctc_step,
|
||||
fn=ctc_step,
|
||||
outputs_info=None,
|
||||
sequences=[y_true, y_pred, input_length, label_length]
|
||||
)
|
||||
|
||||
ret = ret.dimshuffle('x', 0)
|
||||
return ret
|
||||
|
||||
|
||||
# HIGH ORDER FUNCTIONS
|
||||
|
||||
def map_fn(fn, elems, name=None):
|
||||
'''Map the function fn over the elements elems and return the outputs.
|
||||
|
||||
# Arguments
|
||||
fn: Callable that will be called upon each element in elems
|
||||
elems: tensor, at least 2 dimensional
|
||||
name: A string name for the map node in the graph
|
||||
|
||||
# Returns
|
||||
Tensor with first dimension equal to the elems and second depending on
|
||||
fn
|
||||
'''
|
||||
return theano.map(fn, elems, name=name)[0]
|
||||
|
||||
|
||||
def foldl(fn, elems, initializer=None, name=None):
|
||||
'''Reduce elems using fn to combine them from left to right.
|
||||
|
||||
# Arguments
|
||||
fn: Callable that will be called upon each element in elems and an
|
||||
accumulator, for instance lambda acc, x: acc + x
|
||||
elems: tensor
|
||||
initializer: The first value used (elems[0] in case of None)
|
||||
name: A string name for the foldl node in the graph
|
||||
|
||||
# Returns
|
||||
Same type and shape as initializer
|
||||
'''
|
||||
if initializer is None:
|
||||
initializer = elems[0]
|
||||
elems = elems[1:]
|
||||
|
||||
# We need to change the order of the arguments because theano accepts x as
|
||||
# first parameter and accumulator as second
|
||||
fn2 = lambda x, acc: fn(acc, x)
|
||||
|
||||
return theano.foldl(fn2, elems, initializer, name=name)[0]
|
||||
|
||||
|
||||
def foldr(fn, elems, initializer=None, name=None):
|
||||
'''Reduce elems using fn to combine them from right to left.
|
||||
|
||||
# Arguments
|
||||
fn: Callable that will be called upon each element in elems and an
|
||||
accumulator, for instance lambda acc, x: acc + x
|
||||
elems: tensor
|
||||
initializer: The first value used (elems[-1] in case of None)
|
||||
name: A string name for the foldr node in the graph
|
||||
|
||||
# Returns
|
||||
Same type and shape as initializer
|
||||
'''
|
||||
if initializer is None:
|
||||
initializer = elems[-1]
|
||||
elems = elems[:-1]
|
||||
|
||||
# We need to change the order of the arguments because theano accepts x as
|
||||
# first parameter and accumulator as second
|
||||
fn2 = lambda x, acc: fn(acc, x)
|
||||
|
||||
return theano.foldr(fn2, elems, initializer, name=name)[0]
|
||||
|
||||
+76
-41
@@ -1,6 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import csv
|
||||
|
||||
import numpy as np
|
||||
@@ -245,17 +246,20 @@ class ModelCheckpoint(Callback):
|
||||
save_weights_only: if True, then only the model's weights will be
|
||||
saved (`model.save_weights(filepath)`), else the full model
|
||||
is saved (`model.save(filepath)`).
|
||||
period: Interval (number of epochs) between checkpoints.
|
||||
|
||||
'''
|
||||
def __init__(self, filepath, monitor='val_loss', verbose=0,
|
||||
save_best_only=False, save_weights_only=False,
|
||||
mode='auto'):
|
||||
mode='auto', period=1):
|
||||
super(ModelCheckpoint, self).__init__()
|
||||
self.monitor = monitor
|
||||
self.verbose = verbose
|
||||
self.filepath = filepath
|
||||
self.save_best_only = save_best_only
|
||||
self.save_weights_only = save_weights_only
|
||||
self.period = period
|
||||
self.epochs_since_last_save = 0
|
||||
|
||||
if mode not in ['auto', 'min', 'max']:
|
||||
warnings.warn('ModelCheckpoint mode %s is unknown, '
|
||||
@@ -278,35 +282,38 @@ class ModelCheckpoint(Callback):
|
||||
self.best = np.Inf
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
filepath = self.filepath.format(epoch=epoch, **logs)
|
||||
if self.save_best_only:
|
||||
current = logs.get(self.monitor)
|
||||
if current is None:
|
||||
warnings.warn('Can save best model only with %s available, '
|
||||
'skipping.' % (self.monitor), RuntimeWarning)
|
||||
else:
|
||||
if self.monitor_op(current, self.best):
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: %s improved from %0.5f to %0.5f,'
|
||||
' saving model to %s'
|
||||
% (epoch, self.monitor, self.best,
|
||||
current, filepath))
|
||||
self.best = current
|
||||
if self.save_weights_only:
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
else:
|
||||
self.model.save(filepath, overwrite=True)
|
||||
self.epochs_since_last_save += 1
|
||||
if self.epochs_since_last_save >= self.period:
|
||||
self.epochs_since_last_save = 0
|
||||
filepath = self.filepath.format(epoch=epoch, **logs)
|
||||
if self.save_best_only:
|
||||
current = logs.get(self.monitor)
|
||||
if current is None:
|
||||
warnings.warn('Can save best model only with %s available, '
|
||||
'skipping.' % (self.monitor), RuntimeWarning)
|
||||
else:
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: %s did not improve' %
|
||||
(epoch, self.monitor))
|
||||
else:
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: saving model to %s' % (epoch, filepath))
|
||||
if self.save_weights_only:
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
if self.monitor_op(current, self.best):
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: %s improved from %0.5f to %0.5f,'
|
||||
' saving model to %s'
|
||||
% (epoch, self.monitor, self.best,
|
||||
current, filepath))
|
||||
self.best = current
|
||||
if self.save_weights_only:
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
else:
|
||||
self.model.save(filepath, overwrite=True)
|
||||
else:
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: %s did not improve' %
|
||||
(epoch, self.monitor))
|
||||
else:
|
||||
self.model.save(filepath, overwrite=True)
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: saving model to %s' % (epoch, filepath))
|
||||
if self.save_weights_only:
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
else:
|
||||
self.model.save(filepath, overwrite=True)
|
||||
|
||||
|
||||
class EarlyStopping(Callback):
|
||||
@@ -337,6 +344,7 @@ class EarlyStopping(Callback):
|
||||
self.verbose = verbose
|
||||
self.min_delta = min_delta
|
||||
self.wait = 0
|
||||
self.stopped_epoch = 0
|
||||
|
||||
if mode not in ['auto', 'min', 'max']:
|
||||
warnings.warn('EarlyStopping mode %s is unknown, '
|
||||
@@ -374,11 +382,14 @@ class EarlyStopping(Callback):
|
||||
self.wait = 0
|
||||
else:
|
||||
if self.wait >= self.patience:
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: early stopping' % (epoch))
|
||||
self.stopped_epoch = epoch
|
||||
self.model.stop_training = True
|
||||
self.wait += 1
|
||||
|
||||
def on_train_end(self, logs={}):
|
||||
if self.stopped_epoch > 0 and self.verbose > 0:
|
||||
print('Epoch %05d: early stopping' % (self.stopped_epoch))
|
||||
|
||||
|
||||
class RemoteMonitor(Callback):
|
||||
'''Callback used to stream events to a server.
|
||||
@@ -396,11 +407,13 @@ class RemoteMonitor(Callback):
|
||||
def __init__(self,
|
||||
root='http://localhost:9000',
|
||||
path='/publish/epoch/end/',
|
||||
field='data'):
|
||||
field='data',
|
||||
headers={'Accept': 'application/json', 'Content-Type': 'application/json'}):
|
||||
super(RemoteMonitor, self).__init__()
|
||||
self.root = root
|
||||
self.path = path
|
||||
self.field = field
|
||||
self.headers = headers
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
import requests
|
||||
@@ -410,7 +423,8 @@ class RemoteMonitor(Callback):
|
||||
send[k] = v
|
||||
try:
|
||||
requests.post(self.root + self.path,
|
||||
{self.field: json.dumps(send)})
|
||||
{self.field: json.dumps(send)},
|
||||
headers=self.headers)
|
||||
except:
|
||||
print('Warning: could not reach RemoteMonitor '
|
||||
'root server at ' + str(self.root))
|
||||
@@ -432,7 +446,11 @@ class LearningRateScheduler(Callback):
|
||||
assert hasattr(self.model.optimizer, 'lr'), \
|
||||
'Optimizer must have a "lr" attribute.'
|
||||
lr = self.schedule(epoch)
|
||||
assert type(lr) == float, 'The output of the "schedule" function should be float.'
|
||||
|
||||
if not isinstance(lr, (float, np.float32, np.float64)):
|
||||
raise ValueError('The output of the "schedule" function '
|
||||
'should be float.')
|
||||
|
||||
K.set_value(self.model.optimizer.lr, lr)
|
||||
|
||||
|
||||
@@ -468,8 +486,8 @@ class TensorBoard(Callback):
|
||||
def __init__(self, log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False):
|
||||
super(TensorBoard, self).__init__()
|
||||
if K._BACKEND != 'tensorflow':
|
||||
raise Exception('TensorBoard callback only works '
|
||||
'with the TensorFlow backend.')
|
||||
raise RuntimeError('TensorBoard callback only works '
|
||||
'with the TensorFlow backend.')
|
||||
self.log_dir = log_dir
|
||||
self.histogram_freq = histogram_freq
|
||||
self.merged = None
|
||||
@@ -505,16 +523,25 @@ class TensorBoard(Callback):
|
||||
if hasattr(layer, 'output'):
|
||||
tf.histogram_summary('{}_out'.format(layer.name),
|
||||
layer.output)
|
||||
self.merged = tf.merge_all_summaries()
|
||||
if parse_version(tf.__version__) >= parse_version('0.12.0'):
|
||||
self.merged = tf.summary.merge_all()
|
||||
else:
|
||||
self.merged = tf.merge_all_summaries()
|
||||
if self.write_graph:
|
||||
if parse_version(tf.__version__) >= parse_version('0.8.0'):
|
||||
if parse_version(tf.__version__) >= parse_version('0.12.0'):
|
||||
self.writer = tf.summary.FileWriter(self.log_dir,
|
||||
self.sess.graph)
|
||||
elif parse_version(tf.__version__) >= parse_version('0.8.0'):
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph_def)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir)
|
||||
if parse_version(tf.__version__) >= parse_version('0.12.0'):
|
||||
self.writer = tf.summary.FileWriter(self.log_dir)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir)
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
import tensorflow as tf
|
||||
@@ -545,6 +572,9 @@ class TensorBoard(Callback):
|
||||
self.writer.add_summary(summary, epoch)
|
||||
self.writer.flush()
|
||||
|
||||
def on_train_end(self, _):
|
||||
self.writer.close()
|
||||
|
||||
|
||||
class ReduceLROnPlateau(Callback):
|
||||
'''Reduce learning rate when a metric has stopped improving.
|
||||
@@ -662,7 +692,7 @@ class CSVLogger(Callback):
|
||||
model.fit(X_train, Y_train, callbacks=[csv_logger])
|
||||
```
|
||||
|
||||
Arguments
|
||||
# Arguments
|
||||
filename: filename of the csv file, e.g. 'run/log.csv'.
|
||||
separator: string used to separate elements in the csv file.
|
||||
append: True: append if file exists (useful for continuing
|
||||
@@ -675,10 +705,14 @@ class CSVLogger(Callback):
|
||||
self.append = append
|
||||
self.writer = None
|
||||
self.keys = None
|
||||
self.append_header = True
|
||||
super(CSVLogger, self).__init__()
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
if self.append:
|
||||
if os.path.exists(self.filename):
|
||||
with open(self.filename) as f:
|
||||
self.append_header = len(f.readline()) == 0
|
||||
self.csv_file = open(self.filename, 'a')
|
||||
else:
|
||||
self.csv_file = open(self.filename, 'w')
|
||||
@@ -694,7 +728,8 @@ class CSVLogger(Callback):
|
||||
if not self.writer:
|
||||
self.keys = sorted(logs.keys())
|
||||
self.writer = csv.DictWriter(self.csv_file, fieldnames=['epoch'] + self.keys)
|
||||
self.writer.writeheader()
|
||||
if self.append_header:
|
||||
self.writer.writeheader()
|
||||
|
||||
row_dict = OrderedDict({'epoch': epoch})
|
||||
row_dict.update((key, handle_value(logs[key])) for key in self.keys)
|
||||
@@ -709,7 +744,7 @@ class LambdaCallback(Callback):
|
||||
"""Callback for creating simple, custom callbacks on-the-fly.
|
||||
|
||||
This callback is constructed with anonymous functions that will be called
|
||||
at the appropiate time. Note that the callbacks expects positional
|
||||
at the appropriate time. Note that the callbacks expects positional
|
||||
arguments, as:
|
||||
- `on_epoch_begin` and `on_epoch_end` expect two positional arguments: `epoch`, `logs`
|
||||
- `on_batch_begin` and `on_batch_end` expect two positional arguments: `batch`, `logs`
|
||||
|
||||
@@ -11,9 +11,10 @@ def load_batch(fpath, label_key='labels'):
|
||||
else:
|
||||
d = cPickle.load(f, encoding="bytes")
|
||||
# decode utf8
|
||||
d_decoded = {}
|
||||
for k, v in d.items():
|
||||
del(d[k])
|
||||
d[k.decode("utf8")] = v
|
||||
d_decoded[k.decode("utf8")] = v
|
||||
d = d_decoded
|
||||
f.close()
|
||||
data = d["data"]
|
||||
labels = d[label_key]
|
||||
|
||||
@@ -8,17 +8,17 @@ import os
|
||||
|
||||
def load_data(label_mode='fine'):
|
||||
if label_mode not in ['fine', 'coarse']:
|
||||
raise Exception('label_mode must be one of "fine" "coarse".')
|
||||
raise ValueError('label_mode must be one of "fine" "coarse".')
|
||||
|
||||
dirname = "cifar-100-python"
|
||||
origin = "http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
|
||||
dirname = 'cifar-100-python'
|
||||
origin = 'http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
|
||||
path = get_file(dirname, origin=origin, untar=True)
|
||||
|
||||
fpath = os.path.join(path, 'train')
|
||||
X_train, y_train = load_batch(fpath, label_key=label_mode+'_labels')
|
||||
X_train, y_train = load_batch(fpath, label_key=label_mode + '_labels')
|
||||
|
||||
fpath = os.path.join(path, 'test')
|
||||
X_test, y_test = load_batch(fpath, label_key=label_mode+'_labels')
|
||||
X_test, y_test = load_batch(fpath, label_key=label_mode + '_labels')
|
||||
|
||||
y_train = np.reshape(y_train, (len(y_train), 1))
|
||||
y_test = np.reshape(y_test, (len(y_test), 1))
|
||||
|
||||
@@ -10,7 +10,8 @@ import sys
|
||||
def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0,
|
||||
maxlen=None, seed=113,
|
||||
start_char=1, oov_char=2, index_from=3):
|
||||
'''
|
||||
'''Loads IMDB dataset.
|
||||
|
||||
# Arguments
|
||||
path: where to store the data (in `/.keras/dataset`)
|
||||
nb_words: max number of words to include. Words are ranked
|
||||
@@ -72,9 +73,9 @@ def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0,
|
||||
X = new_X
|
||||
labels = new_labels
|
||||
if not X:
|
||||
raise Exception('After filtering for sequences shorter than maxlen=' +
|
||||
str(maxlen) + ', no sequence was kept. '
|
||||
'Increase maxlen.')
|
||||
raise ValueError('After filtering for sequences shorter than maxlen=' +
|
||||
str(maxlen) + ', no sequence was kept. '
|
||||
'Increase maxlen.')
|
||||
if not nb_words:
|
||||
nb_words = max([max(x) for x in X])
|
||||
|
||||
|
||||
+796
-591
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
+407
-342
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -1,6 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
|
||||
def get_fans(shape, dim_ordering='th'):
|
||||
@@ -20,7 +21,7 @@ def get_fans(shape, dim_ordering='th'):
|
||||
fan_in = shape[-2] * receptive_field_size
|
||||
fan_out = shape[-1] * receptive_field_size
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering: ' + dim_ordering)
|
||||
else:
|
||||
# no specific assumptions
|
||||
fan_in = np.sqrt(np.prod(shape))
|
||||
@@ -87,8 +88,8 @@ def orthogonal(shape, scale=1.1, name=None):
|
||||
|
||||
def identity(shape, scale=1, name=None):
|
||||
if len(shape) != 2 or shape[0] != shape[1]:
|
||||
raise Exception('Identity matrix initialization can only be used '
|
||||
'for 2D square matrices.')
|
||||
raise ValueError('Identity matrix initialization can only be used '
|
||||
'for 2D square matrices.')
|
||||
else:
|
||||
return K.variable(scale * np.identity(shape[0]), name=name)
|
||||
|
||||
@@ -101,7 +102,6 @@ def one(shape, name=None):
|
||||
return K.ones(shape, name=name)
|
||||
|
||||
|
||||
from .utils.generic_utils import get_from_module
|
||||
def get(identifier, **kwargs):
|
||||
return get_from_module(identifier, globals(),
|
||||
'initialization', kwargs=kwargs)
|
||||
|
||||
@@ -10,3 +10,4 @@ from .embeddings import *
|
||||
from .noise import *
|
||||
from .advanced_activations import *
|
||||
from .wrappers import *
|
||||
from .convolutional_recurrent import *
|
||||
|
||||
@@ -52,18 +52,37 @@ class PReLU(Layer):
|
||||
# Arguments
|
||||
init: initialization function for the weights.
|
||||
weights: initial weights, as a list of a single Numpy array.
|
||||
shared_axes: the axes along which to share learnable
|
||||
parameters for the activation function.
|
||||
For example, if the incoming feature maps
|
||||
are from a 2D convolution
|
||||
with output shape `(batch, height, width, channels)`,
|
||||
and you wish to share parameters across space
|
||||
so that each filter only has one set of parameters,
|
||||
set `shared_axes=[1, 2]`.
|
||||
|
||||
# References
|
||||
- [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification](http://arxiv.org/pdf/1502.01852v1.pdf)
|
||||
'''
|
||||
def __init__(self, init='zero', weights=None, **kwargs):
|
||||
def __init__(self, init='zero', weights=None, shared_axes=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.init = initializations.get(init)
|
||||
self.initial_weights = weights
|
||||
if type(shared_axes) is not list and type(shared_axes) is not tuple:
|
||||
self.shared_axes = [shared_axes]
|
||||
else:
|
||||
self.shared_axes = list(shared_axes)
|
||||
super(PReLU, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.alphas = self.init(input_shape[1:],
|
||||
param_shape = list(input_shape[1:])
|
||||
self.param_broadcast = [False] * len(param_shape)
|
||||
if self.shared_axes[0] is not None:
|
||||
for i in self.shared_axes:
|
||||
param_shape[i] = 1
|
||||
self.param_broadcast[i] = True
|
||||
|
||||
self.alphas = self.init(param_shape,
|
||||
name='{}_alphas'.format(self.name))
|
||||
self.trainable_weights = [self.alphas]
|
||||
|
||||
@@ -73,7 +92,10 @@ class PReLU(Layer):
|
||||
|
||||
def call(self, x, mask=None):
|
||||
pos = K.relu(x)
|
||||
neg = self.alphas * (x - abs(x)) * 0.5
|
||||
if K.backend() == 'theano':
|
||||
neg = K.pattern_broadcast(self.alphas, self.param_broadcast) * (x - abs(x)) * 0.5
|
||||
else:
|
||||
neg = self.alphas * (x - abs(x)) * 0.5
|
||||
return pos + neg
|
||||
|
||||
def get_config(self):
|
||||
@@ -131,23 +153,41 @@ class ParametricSoftplus(Layer):
|
||||
alpha_init: float. Initial value of the alpha weights.
|
||||
beta_init: float. Initial values of the beta weights.
|
||||
weights: initial weights, as a list of 2 numpy arrays.
|
||||
shared_axes: the axes along which to share learnable
|
||||
parameters for the activation function.
|
||||
For example, if the incoming feature maps
|
||||
are from a 2D convolution
|
||||
with output shape `(batch, height, width, channels)`,
|
||||
and you wish to share parameters across space
|
||||
so that each filter only has one set of parameters,
|
||||
set `shared_axes=[1, 2]`.
|
||||
|
||||
# References
|
||||
- [Inferring Nonlinear Neuronal Computation Based on Physiologically Plausible Inputs](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003143)
|
||||
'''
|
||||
def __init__(self, alpha_init=0.2, beta_init=5.0,
|
||||
weights=None, **kwargs):
|
||||
weights=None, shared_axes=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.alpha_init = K.cast_to_floatx(alpha_init)
|
||||
self.beta_init = K.cast_to_floatx(beta_init)
|
||||
self.initial_weights = weights
|
||||
if type(shared_axes) is not list and type(shared_axes) is not tuple:
|
||||
self.shared_axes = [shared_axes]
|
||||
else:
|
||||
self.shared_axes = list(shared_axes)
|
||||
super(ParametricSoftplus, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
input_shape = input_shape[1:]
|
||||
self.alphas = K.variable(self.alpha_init * np.ones(input_shape),
|
||||
param_shape = list(input_shape[1:])
|
||||
self.param_broadcast = [False] * len(param_shape)
|
||||
if self.shared_axes[0] is not None:
|
||||
for i in self.shared_axes:
|
||||
param_shape[i] = 1
|
||||
self.param_broadcast[i] = True
|
||||
|
||||
self.alphas = K.variable(self.alpha_init * np.ones(param_shape),
|
||||
name='{}_alphas'.format(self.name))
|
||||
self.betas = K.variable(self.beta_init * np.ones(input_shape),
|
||||
self.betas = K.variable(self.beta_init * np.ones(param_shape),
|
||||
name='{}_betas'.format(self.name))
|
||||
self.trainable_weights = [self.alphas, self.betas]
|
||||
|
||||
@@ -156,7 +196,10 @@ class ParametricSoftplus(Layer):
|
||||
del self.initial_weights
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.softplus(self.betas * x) * self.alphas
|
||||
if K.backend() == 'theano':
|
||||
return K.softplus(K.pattern_broadcast(self.betas, self.param_broadcast) * x) * K.pattern_broadcast(self.alphas, self.param_broadcast)
|
||||
else:
|
||||
return K.softplus(self.betas * x) * self.alphas
|
||||
|
||||
def get_config(self):
|
||||
config = {'alpha_init': float(self.alpha_init),
|
||||
@@ -214,34 +257,51 @@ class SReLU(Layer):
|
||||
a_left_init: initialization function for the left part slope
|
||||
t_right_init: initialization function for the right part intercept
|
||||
a_right_init: initialization function for the right part slope
|
||||
shared_axes: the axes along which to share learnable
|
||||
parameters for the activation function.
|
||||
For example, if the incoming feature maps
|
||||
are from a 2D convolution
|
||||
with output shape `(batch, height, width, channels)`,
|
||||
and you wish to share parameters across space
|
||||
so that each filter only has one set of parameters,
|
||||
set `shared_axes=[1, 2]`.
|
||||
|
||||
# References
|
||||
- [Deep Learning with S-shaped Rectified Linear Activation Units](http://arxiv.org/abs/1512.07030)
|
||||
'''
|
||||
def __init__(self, t_left_init='zero', a_left_init='glorot_uniform',
|
||||
t_right_init='glorot_uniform', a_right_init='one', **kwargs):
|
||||
t_right_init='glorot_uniform', a_right_init='one', shared_axes=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.t_left_init = t_left_init
|
||||
self.a_left_init = a_left_init
|
||||
self.t_right_init = t_right_init
|
||||
self.a_right_init = a_right_init
|
||||
if type(shared_axes) is not list and type(shared_axes) is not tuple:
|
||||
self.shared_axes = [shared_axes]
|
||||
else:
|
||||
self.shared_axes = list(shared_axes)
|
||||
super(SReLU, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
input_shape = input_shape[1:]
|
||||
param_shape = list(input_shape[1:])
|
||||
self.param_broadcast = [False] * len(param_shape)
|
||||
if self.shared_axes[0] is not None:
|
||||
for i in self.shared_axes:
|
||||
param_shape[i] = 1
|
||||
self.param_broadcast[i] = True
|
||||
|
||||
t_left_init = initializations.get(self.t_left_init)
|
||||
a_left_init = initializations.get(self.a_left_init)
|
||||
t_right_init = initializations.get(self.t_right_init)
|
||||
a_right_init = initializations.get(self.a_right_init)
|
||||
|
||||
self.t_left = t_left_init(input_shape,
|
||||
self.t_left = t_left_init(param_shape,
|
||||
name='{}_t_left'.format(self.name))
|
||||
self.a_left = a_left_init(input_shape,
|
||||
self.a_left = a_left_init(param_shape,
|
||||
name='{}_a_left'.format(self.name))
|
||||
self.t_right = t_right_init(input_shape,
|
||||
self.t_right = t_right_init(param_shape,
|
||||
name='{}_t_right'.format(self.name))
|
||||
self.a_right = a_right_init(input_shape,
|
||||
self.a_right = a_right_init(param_shape,
|
||||
name='{}_a_right'.format(self.name))
|
||||
# ensure the the right part is always to the right of the left
|
||||
self.t_right_actual = self.t_left + abs(self.t_right)
|
||||
@@ -249,10 +309,21 @@ class SReLU(Layer):
|
||||
self.t_right, self.a_right]
|
||||
|
||||
def call(self, x, mask=None):
|
||||
Y_left_and_center = self.t_left + K.relu(x - self.t_left,
|
||||
self.a_left,
|
||||
self.t_right_actual - self.t_left)
|
||||
Y_right = K.relu(x - self.t_right_actual) * self.a_right
|
||||
if K.backend() == 'theano':
|
||||
t_left = K.pattern_broadcast(self.t_left, self.param_broadcast)
|
||||
a_left = K.pattern_broadcast(self.a_left, self.param_broadcast)
|
||||
a_right = K.pattern_broadcast(self.a_right, self.param_broadcast)
|
||||
t_right_actual = K.pattern_broadcast(self.t_right_actual, self.param_broadcast)
|
||||
else:
|
||||
t_left = self.t_left
|
||||
a_left = self.a_left
|
||||
a_right = self.a_right
|
||||
t_right_actual = self.t_right_actual
|
||||
|
||||
Y_left_and_center = t_left + K.relu(x - t_left,
|
||||
a_left,
|
||||
t_right_actual - t_left)
|
||||
Y_right = K.relu(x - t_right_actual) * a_right
|
||||
return Y_left_and_center + Y_right
|
||||
|
||||
def get_config(self):
|
||||
|
||||
+197
-212
@@ -47,7 +47,7 @@ class Convolution1D(Layer):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
subsample_length: factor by which to subsample output.
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
(eg. L1 or L2 regularization), applied to the main weights matrix.
|
||||
@@ -77,19 +77,18 @@ class Convolution1D(Layer):
|
||||
`steps` value might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, filter_length,
|
||||
init='uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample_length=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution1D:', border_mode)
|
||||
if border_mode not in {'valid', 'same', 'full'}:
|
||||
raise ValueError('Invalid border mode for Convolution1D:', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.filter_length = filter_length
|
||||
self.init = initializations.get(init, dim_ordering='th')
|
||||
self.activation = activations.get(activation)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
self.border_mode = border_mode
|
||||
self.subsample_length = subsample_length
|
||||
|
||||
@@ -114,35 +113,25 @@ class Convolution1D(Layer):
|
||||
def build(self, input_shape):
|
||||
input_dim = input_shape[2]
|
||||
self.W_shape = (self.filter_length, 1, input_dim, self.nb_filter)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((self.nb_filter,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
length = conv_output_length(input_shape[1],
|
||||
@@ -218,7 +207,7 @@ class AtrousConvolution1D(Convolution1D):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
subsample_length: factor by which to subsample output.
|
||||
atrous_rate: Factor for kernel dilation. Also called filter_dilation
|
||||
elsewhere.
|
||||
@@ -250,14 +239,14 @@ class AtrousConvolution1D(Convolution1D):
|
||||
`steps` value might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, filter_length,
|
||||
init='uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample_length=1, atrous_rate=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for AtrousConv1D:', border_mode)
|
||||
if border_mode not in {'valid', 'same', 'full'}:
|
||||
raise ValueError('Invalid border mode for AtrousConv1D:', border_mode)
|
||||
|
||||
self.atrous_rate = int(atrous_rate)
|
||||
|
||||
@@ -331,7 +320,7 @@ class Convolution2D(Layer):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
subsample: tuple of length 2. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
@@ -366,24 +355,24 @@ class Convolution2D(Layer):
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1), dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution2D:', border_mode)
|
||||
if border_mode not in {'valid', 'same', 'full'}:
|
||||
raise ValueError('Invalid border mode for Convolution2D:', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
self.init = initializations.get(init, dim_ordering=dim_ordering)
|
||||
self.activation = activations.get(activation)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
self.border_mode = border_mode
|
||||
self.subsample = tuple(subsample)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('dim_ordering must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
@@ -406,36 +395,25 @@ class Convolution2D(Layer):
|
||||
stack_size = input_shape[3]
|
||||
self.W_shape = (self.nb_row, self.nb_col, stack_size, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((self.nb_filter,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -445,7 +423,7 @@ class Convolution2D(Layer):
|
||||
rows = input_shape[1]
|
||||
cols = input_shape[2]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
rows = conv_output_length(rows, self.nb_row,
|
||||
self.border_mode, self.subsample[0])
|
||||
@@ -456,8 +434,6 @@ class Convolution2D(Layer):
|
||||
return (input_shape[0], self.nb_filter, rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = K.conv2d(x, self.W, strides=self.subsample,
|
||||
@@ -470,7 +446,7 @@ class Convolution2D(Layer):
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
@@ -570,7 +546,7 @@ class Deconvolution2D(Convolution2D):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
subsample: tuple of length 2. Factor by which to oversample output.
|
||||
Also called strides elsewhere.
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
@@ -609,7 +585,7 @@ class Deconvolution2D(Convolution2D):
|
||||
[3] [Deconvolutional Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col, output_shape,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
@@ -617,19 +593,25 @@ class Deconvolution2D(Convolution2D):
|
||||
bias=True, **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Deconvolution2D:', border_mode)
|
||||
if border_mode not in {'valid', 'same', 'full'}:
|
||||
raise ValueError('Invalid border mode for Deconvolution2D:', border_mode)
|
||||
|
||||
self.output_shape_ = output_shape
|
||||
|
||||
super(Deconvolution2D, self).__init__(nb_filter, nb_row, nb_col,
|
||||
init=init, activation=activation,
|
||||
weights=weights, border_mode=border_mode,
|
||||
subsample=subsample, dim_ordering=dim_ordering,
|
||||
W_regularizer=W_regularizer, b_regularizer=b_regularizer,
|
||||
init=init,
|
||||
activation=activation,
|
||||
weights=weights,
|
||||
border_mode=border_mode,
|
||||
subsample=subsample,
|
||||
dim_ordering=dim_ordering,
|
||||
W_regularizer=W_regularizer,
|
||||
b_regularizer=b_regularizer,
|
||||
activity_regularizer=activity_regularizer,
|
||||
W_constraint=W_constraint, b_constraint=b_constraint,
|
||||
bias=bias, **kwargs)
|
||||
W_constraint=W_constraint,
|
||||
b_constraint=b_constraint,
|
||||
bias=bias,
|
||||
**kwargs)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -639,14 +621,12 @@ class Deconvolution2D(Convolution2D):
|
||||
rows = self.output_shape_[1]
|
||||
cols = self.output_shape_[2]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], self.nb_filter, rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = K.deconv2d(x, self.W, self.output_shape_,
|
||||
@@ -660,12 +640,12 @@ class Deconvolution2D(Convolution2D):
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
config = {'output_shape': self.output_shape}
|
||||
config = {'output_shape': self.output_shape_}
|
||||
base_config = super(Deconvolution2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -703,7 +683,7 @@ class AtrousConvolution2D(Convolution2D):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
subsample: tuple of length 2. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
atrous_rate: tuple of length 2. Factor for kernel dilation.
|
||||
@@ -742,7 +722,7 @@ class AtrousConvolution2D(Convolution2D):
|
||||
- [Multi-Scale Context Aggregation by Dilated Convolutions](https://arxiv.org/abs/1511.07122)
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
atrous_rate=(1, 1), dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
@@ -751,19 +731,25 @@ class AtrousConvolution2D(Convolution2D):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for AtrousConv2D:', border_mode)
|
||||
if border_mode not in {'valid', 'same', 'full'}:
|
||||
raise ValueError('Invalid border mode for AtrousConv2D:', border_mode)
|
||||
|
||||
self.atrous_rate = tuple(atrous_rate)
|
||||
|
||||
super(AtrousConvolution2D, self).__init__(nb_filter, nb_row, nb_col,
|
||||
init=init, activation=activation,
|
||||
weights=weights, border_mode=border_mode,
|
||||
subsample=subsample, dim_ordering=dim_ordering,
|
||||
W_regularizer=W_regularizer, b_regularizer=b_regularizer,
|
||||
init=init,
|
||||
activation=activation,
|
||||
weights=weights,
|
||||
border_mode=border_mode,
|
||||
subsample=subsample,
|
||||
dim_ordering=dim_ordering,
|
||||
W_regularizer=W_regularizer,
|
||||
b_regularizer=b_regularizer,
|
||||
activity_regularizer=activity_regularizer,
|
||||
W_constraint=W_constraint, b_constraint=b_constraint,
|
||||
bias=bias, **kwargs)
|
||||
W_constraint=W_constraint,
|
||||
b_constraint=b_constraint,
|
||||
bias=bias,
|
||||
**kwargs)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -773,19 +759,19 @@ class AtrousConvolution2D(Convolution2D):
|
||||
rows = input_shape[1]
|
||||
cols = input_shape[2]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
rows = conv_output_length(rows, self.nb_row, self.border_mode,
|
||||
self.subsample[0], dilation=self.atrous_rate[0])
|
||||
self.subsample[0],
|
||||
dilation=self.atrous_rate[0])
|
||||
cols = conv_output_length(cols, self.nb_col, self.border_mode,
|
||||
self.subsample[1], dilation=self.atrous_rate[1])
|
||||
self.subsample[1],
|
||||
dilation=self.atrous_rate[1])
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], self.nb_filter, rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = K.conv2d(x, self.W, strides=self.subsample,
|
||||
@@ -799,7 +785,7 @@ class AtrousConvolution2D(Convolution2D):
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
@@ -889,7 +875,7 @@ class SeparableConvolution2D(Layer):
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
depth_multiplier=1, dim_ordering='default',
|
||||
depthwise_regularizer=None, pointwise_regularizer=None,
|
||||
@@ -898,28 +884,30 @@ class SeparableConvolution2D(Layer):
|
||||
b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
|
||||
if K._BACKEND != 'tensorflow':
|
||||
raise Exception('SeparableConv2D is only available '
|
||||
'with TensorFlow for the time being.')
|
||||
if K.backend() != 'tensorflow':
|
||||
raise RuntimeError('SeparableConv2D is only available '
|
||||
'with TensorFlow for the time being.')
|
||||
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for SeparableConv2D:', border_mode)
|
||||
raise ValueError('Invalid border mode for SeparableConv2D:', border_mode)
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for SeparableConv2D:', border_mode)
|
||||
raise ValueError('Invalid border mode for SeparableConv2D:', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
self.init = initializations.get(init, dim_ordering=dim_ordering)
|
||||
self.activation = activations.get(activation)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise ValueError('border_mode must be in {valid, same}.')
|
||||
self.border_mode = border_mode
|
||||
self.subsample = tuple(subsample)
|
||||
self.depth_multiplier = depth_multiplier
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('dim_ordering must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
|
||||
self.depthwise_regularizer = regularizers.get(depthwise_regularizer)
|
||||
@@ -946,44 +934,31 @@ class SeparableConvolution2D(Layer):
|
||||
depthwise_shape = (self.nb_row, self.nb_col, stack_size, self.depth_multiplier)
|
||||
pointwise_shape = (1, 1, self.depth_multiplier * stack_size, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
self.depthwise_kernel = self.init(depthwise_shape,
|
||||
name='{}_depthwise_kernel'.format(self.name))
|
||||
self.pointwise_kernel = self.init(pointwise_shape,
|
||||
name='{}_pointwise_kernel'.format(self.name))
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.depthwise_kernel,
|
||||
self.pointwise_kernel,
|
||||
self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.depthwise_kernel,
|
||||
self.pointwise_kernel]
|
||||
self.regularizers = []
|
||||
if self.depthwise_regularizer:
|
||||
self.depthwise_regularizer.set_param(self.depthwise_kernel)
|
||||
self.regularizers.append(self.depthwise_regularizer)
|
||||
if self.pointwise_regularizer:
|
||||
self.pointwise_regularizer.set_param(self.pointwise_kernel)
|
||||
self.regularizers.append(self.pointwise_regularizer)
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
self.constraints = {}
|
||||
if self.depthwise_constraint:
|
||||
self.constraints[self.depthwise_kernel] = self.depthwise_constraint
|
||||
if self.pointwise_constraint:
|
||||
self.constraints[self.pointwise_kernel] = self.pointwise_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.depthwise_kernel = self.add_weight(depthwise_shape,
|
||||
initializer=self.init,
|
||||
regularizer=self.depthwise_regularizer,
|
||||
constraint=self.depthwise_constraint,
|
||||
name='{}_depthwise_kernel'.format(self.name))
|
||||
self.pointwise_kernel = self.add_weight(pointwise_shape,
|
||||
initializer=self.init,
|
||||
regularizer=self.pointwise_regularizer,
|
||||
constraint=self.pointwise_constraint,
|
||||
name='{}_pointwise_kernel'.format(self.name))
|
||||
if self.bias:
|
||||
self.b = self.add_weight((self.nb_filter,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -993,7 +968,7 @@ class SeparableConvolution2D(Layer):
|
||||
rows = input_shape[1]
|
||||
cols = input_shape[2]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
rows = conv_output_length(rows, self.nb_row,
|
||||
self.border_mode, self.subsample[0])
|
||||
@@ -1005,7 +980,7 @@ class SeparableConvolution2D(Layer):
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = K.separable_conv2d(x, self.depthwise_kernel,
|
||||
@@ -1019,7 +994,7 @@ class SeparableConvolution2D(Layer):
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
@@ -1068,7 +1043,7 @@ class Convolution3D(Layer):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of Numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
subsample: tuple of length 3. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
Note: 'subsample' is implemented by slicing the output of conv3d with strides=(1,1,1).
|
||||
@@ -1104,7 +1079,7 @@ class Convolution3D(Layer):
|
||||
'''
|
||||
|
||||
def __init__(self, nb_filter, kernel_dim1, kernel_dim2, kernel_dim3,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1, 1), dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
@@ -1112,18 +1087,18 @@ class Convolution3D(Layer):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution3D:', border_mode)
|
||||
if border_mode not in {'valid', 'same', 'full'}:
|
||||
raise ValueError('Invalid border mode for Convolution3D:', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.kernel_dim1 = kernel_dim1
|
||||
self.kernel_dim2 = kernel_dim2
|
||||
self.kernel_dim3 = kernel_dim3
|
||||
self.init = initializations.get(init, dim_ordering=dim_ordering)
|
||||
self.activation = activations.get(activation)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
self.border_mode = border_mode
|
||||
self.subsample = tuple(subsample)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('dim_ordering must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
@@ -1151,37 +1126,26 @@ class Convolution3D(Layer):
|
||||
self.W_shape = (self.kernel_dim1, self.kernel_dim2, self.kernel_dim3,
|
||||
stack_size, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((self.nb_filter,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -1193,7 +1157,7 @@ class Convolution3D(Layer):
|
||||
conv_dim2 = input_shape[2]
|
||||
conv_dim3 = input_shape[3]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
conv_dim1 = conv_output_length(conv_dim1, self.kernel_dim1,
|
||||
self.border_mode, self.subsample[0])
|
||||
@@ -1207,7 +1171,7 @@ class Convolution3D(Layer):
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], conv_dim1, conv_dim2, conv_dim3, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
@@ -1222,7 +1186,7 @@ class Convolution3D(Layer):
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, 1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
@@ -1308,7 +1272,8 @@ class UpSampling2D(Layer):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.size = tuple(size)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('dim_ordering must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=4)]
|
||||
super(UpSampling2D, self).__init__(**kwargs)
|
||||
@@ -1329,7 +1294,7 @@ class UpSampling2D(Layer):
|
||||
height,
|
||||
input_shape[3])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.resize_images(x, self.size[0], self.size[1],
|
||||
@@ -1371,7 +1336,8 @@ class UpSampling3D(Layer):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.size = tuple(size)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('dim_ordering must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=5)]
|
||||
super(UpSampling3D, self).__init__(**kwargs)
|
||||
@@ -1396,7 +1362,7 @@ class UpSampling3D(Layer):
|
||||
dim3,
|
||||
input_shape[4])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.resize_volumes(x, self.size[0], self.size[1], self.size[2],
|
||||
@@ -1543,7 +1509,8 @@ class ZeroPadding2D(Layer):
|
||||
'of length 2 or 4, or dict. '
|
||||
'Found: ' + str(padding))
|
||||
|
||||
assert dim_ordering in {'tf', 'th'}, '`dim_ordering` must be in {"tf", "th"}.'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('dim_ordering must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=4)]
|
||||
|
||||
@@ -1563,7 +1530,7 @@ class ZeroPadding2D(Layer):
|
||||
cols,
|
||||
input_shape[3])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.asymmetric_spatial_2d_padding(x,
|
||||
@@ -1586,7 +1553,7 @@ class ZeroPadding3D(Layer):
|
||||
padding: tuple of int (length 3)
|
||||
How many zeros to add at the beginning and end of
|
||||
the 3 padding dimensions (axis 3, 4 and 5).
|
||||
Currentl only symmetric padding is supported.
|
||||
Currently only symmetric padding is supported.
|
||||
dim_ordering: 'th' or 'tf'.
|
||||
In 'th' mode, the channels dimension (the depth)
|
||||
is at index 1, in 'tf' mode is it at index 4.
|
||||
@@ -1608,7 +1575,8 @@ class ZeroPadding3D(Layer):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.padding = tuple(padding)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('dim_ordering must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=5)]
|
||||
|
||||
@@ -1632,7 +1600,7 @@ class ZeroPadding3D(Layer):
|
||||
dim3,
|
||||
input_shape[4])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.spatial_3d_padding(x, padding=self.padding,
|
||||
@@ -1643,6 +1611,7 @@ class ZeroPadding3D(Layer):
|
||||
base_config = super(ZeroPadding3D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Cropping1D(Layer):
|
||||
'''Cropping layer for 1D input (e.g. temporal sequence).
|
||||
It crops along the time dimension (axis 1).
|
||||
@@ -1662,27 +1631,32 @@ class Cropping1D(Layer):
|
||||
def __init__(self, cropping=(1, 1), **kwargs):
|
||||
super(Cropping1D, self).__init__(**kwargs)
|
||||
self.cropping = tuple(cropping)
|
||||
assert len(self.cropping) == 2, 'cropping must be a tuple length of 2'
|
||||
if len(self.cropping) != 2:
|
||||
raise ValueError('`cropping` must be a tuple length of 2.')
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
length = input_shape[1] - self.cropping[0] - self.cropping[1] if input_shape[1] is not None else None
|
||||
if input_shape[1] is not None:
|
||||
length = input_shape[1] - self.cropping[0] - self.cropping[1]
|
||||
else:
|
||||
length = None
|
||||
return (input_shape[0],
|
||||
length,
|
||||
input_shape[2])
|
||||
|
||||
def call(self, x, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
return x[:, self.cropping[0]:input_shape[1]-self.cropping[1], :]
|
||||
return x[:, self.cropping[0]:-self.cropping[1], :]
|
||||
|
||||
def get_config(self):
|
||||
config = {'cropping': self.cropping}
|
||||
base_config = super(Cropping1D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Cropping2D(Layer):
|
||||
'''Cropping layer for 2D input (e.g. picture).
|
||||
It crops along spatial dimensions, i.e. width and height.
|
||||
@@ -1726,15 +1700,20 @@ class Cropping2D(Layer):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.cropping = tuple(cropping)
|
||||
assert len(self.cropping) == 2, 'cropping must be a tuple length of 2'
|
||||
assert len(self.cropping[0]) == 2, 'cropping[0] must be a tuple length of 2'
|
||||
assert len(self.cropping[1]) == 2, 'cropping[1] must be a tuple length of 2'
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if len(self.cropping) != 2:
|
||||
raise ValueError('`cropping` must be a tuple length of 2.')
|
||||
if len(self.cropping[0]) != 2:
|
||||
raise ValueError('`cropping[0]` must be a tuple length of 2.')
|
||||
if len(self.cropping[1]) != 2:
|
||||
raise ValueError('`cropping[1]` must be a tuple length of 2.')
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('dim_ordering must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=4)]
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -1748,19 +1727,18 @@ class Cropping2D(Layer):
|
||||
input_shape[2] - self.cropping[1][0] - self.cropping[1][1],
|
||||
input_shape[3])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
if self.dim_ordering == 'th':
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:input_shape[2]-self.cropping[0][1],
|
||||
self.cropping[1][0]:input_shape[3]-self.cropping[1][1]]
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:-self.cropping[1][1]]
|
||||
elif self.dim_ordering == 'tf':
|
||||
return x[:,
|
||||
self.cropping[0][0]:input_shape[1]-self.cropping[0][1],
|
||||
self.cropping[1][0]:input_shape[2]-self.cropping[1][1],
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
:]
|
||||
|
||||
def get_config(self):
|
||||
@@ -1768,8 +1746,9 @@ class Cropping2D(Layer):
|
||||
base_config = super(Cropping2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Cropping3D(Layer):
|
||||
'''Cropping layer for 3D data (e.g. spatial or saptio-temporal).
|
||||
'''Cropping layer for 3D data (e.g. spatial or spatio-temporal).
|
||||
|
||||
# Arguments
|
||||
cropping: tuple of tuple of int (length 3)
|
||||
@@ -1792,21 +1771,28 @@ class Cropping3D(Layer):
|
||||
|
||||
'''
|
||||
|
||||
def __init__(self, cropping=((1, 1), (1, 1), (1, 1)), dim_ordering='default', **kwargs):
|
||||
def __init__(self, cropping=((1, 1), (1, 1), (1, 1)),
|
||||
dim_ordering='default', **kwargs):
|
||||
super(Cropping3D, self).__init__(**kwargs)
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.cropping = tuple(cropping)
|
||||
assert len(self.cropping) == 3, 'cropping must be a tuple length of 3'
|
||||
assert len(self.cropping[0]) == 2, 'cropping[0] must be a tuple length of 2'
|
||||
assert len(self.cropping[1]) == 2, 'cropping[1] must be a tuple length of 2'
|
||||
assert len(self.cropping[2]) == 2, 'cropping[2] must be a tuple length of 2'
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if len(self.cropping) != 3:
|
||||
raise ValueError('`cropping` must be a tuple length of 3.')
|
||||
if len(self.cropping[0]) != 2:
|
||||
raise ValueError('`cropping[0]` must be a tuple length of 2.')
|
||||
if len(self.cropping[1]) != 2:
|
||||
raise ValueError('`cropping[1]` must be a tuple length of 2.')
|
||||
if len(self.cropping[2]) != 2:
|
||||
raise ValueError('`cropping[2]` must be a tuple length of 2.')
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('dim_ordering must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=5)]
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -1828,21 +1814,20 @@ class Cropping3D(Layer):
|
||||
dim3,
|
||||
input_shape[4])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
if self.dim_ordering == 'th':
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:input_shape[2]-self.cropping[0][1],
|
||||
self.cropping[1][0]:input_shape[3]-self.cropping[1][1],
|
||||
self.cropping[2][0]:input_shape[4]-self.cropping[2][1]]
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
self.cropping[2][0]:-self.cropping[2][1]]
|
||||
elif self.dim_ordering == 'tf':
|
||||
return x[:,
|
||||
self.cropping[0][0]:input_shape[1]-self.cropping[0][1],
|
||||
self.cropping[1][0]:input_shape[2]-self.cropping[1][1],
|
||||
self.cropping[2][0]:input_shape[3]-self.cropping[2][1],
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
self.cropping[2][0]:-self.cropping[2][1],
|
||||
:]
|
||||
|
||||
def get_config(self):
|
||||
|
||||
@@ -0,0 +1,510 @@
|
||||
from .. import backend as K
|
||||
from .. import activations, initializations, regularizers
|
||||
|
||||
import numpy as np
|
||||
from ..engine import Layer, InputSpec
|
||||
from ..utils.np_utils import conv_output_length
|
||||
import warnings
|
||||
|
||||
|
||||
class ConvRecurrent2D(Layer):
|
||||
'''Abstract base class for convolutional recurrent layers.
|
||||
Do not use in a model -- it's not a functional layer!
|
||||
|
||||
ConvLSTM2D
|
||||
follow the specifications of this class and accept
|
||||
the keyword arguments listed below.
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape `(nb_samples, timesteps, channels, rows, cols)`.
|
||||
|
||||
# Output shape
|
||||
- if `return_sequences`: 5D tensor with shape
|
||||
`(nb_samples, timesteps, channels, rows, cols)`.
|
||||
- else, 4D tensor with shape `(nb_samples, channels, rows, cols)`.
|
||||
|
||||
# Arguments
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
The list should have 3 elements, of shapes:
|
||||
`[(input_dim, nb_filter), (nb_filter, nb_filter), (nb_filter,)]`.
|
||||
return_sequences: Boolean. Whether to return the last output
|
||||
in the output sequence, or the full sequence.
|
||||
go_backwards: Boolean (default False).
|
||||
If True, rocess the input sequence backwards.
|
||||
stateful: Boolean (default False). If True, the last state
|
||||
for each sample at index i in a batch will be used as initial
|
||||
state for the sample of index i in the following batch.
|
||||
nb_filter: Number of convolution filters to use.
|
||||
nb_row: Number of rows in the convolution kernel.
|
||||
nb_col: Number of columns in the convolution kernel.
|
||||
is required when using this layer as the first layer in a model.
|
||||
input_shape: input_shape
|
||||
|
||||
# Masking
|
||||
This layer supports masking for input data with a variable number
|
||||
of timesteps. To introduce masks to your data,
|
||||
use an [Embedding](embeddings.md) layer with the `mask_zero` parameter
|
||||
set to `True`.
|
||||
**Note:** for the time being, masking is only supported with Theano.
|
||||
|
||||
# TensorFlow warning
|
||||
For the time being, when using the TensorFlow backend,
|
||||
the number of timesteps used must be specified in your model.
|
||||
Make sure to pass an `input_length` int argument to your
|
||||
recurrent layer (if it comes first in your model),
|
||||
or to pass a complete `input_shape` argument to the first layer
|
||||
in your model otherwise.
|
||||
|
||||
|
||||
# Note on using statefulness in RNNs
|
||||
You can set RNN layers to be 'stateful', which means that the states
|
||||
computed for the samples in one batch will be reused as initial states
|
||||
for the samples in the next batch.
|
||||
This assumes a one-to-one mapping between
|
||||
samples in different successive batches.
|
||||
|
||||
To enable statefulness:
|
||||
- specify `stateful=True` in the layer constructor.
|
||||
- specify a fixed batch size for your model, by passing
|
||||
a `batch_input_size=(...)` to the first layer in your model.
|
||||
This is the expected shape of your inputs *including the batch
|
||||
size*.
|
||||
It should be a tuple of integers, e.g. `(32, 10, 100)`.
|
||||
|
||||
To reset the states of your model, call `.reset_states()` on either
|
||||
a specific layer, or on your entire model.
|
||||
'''
|
||||
|
||||
def __init__(self, weights=None, nb_row=None, nb_col=None, nb_filter=None,
|
||||
return_sequences=False, go_backwards=False, stateful=False,
|
||||
dim_ordering=None, **kwargs):
|
||||
self.return_sequences = return_sequences
|
||||
self.go_backwards = go_backwards
|
||||
self.stateful = stateful
|
||||
self.initial_weights = weights
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
self.nb_filter = nb_filter
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=5)]
|
||||
|
||||
super(ConvRecurrent2D, self).__init__(**kwargs)
|
||||
|
||||
def compute_mask(self, input, mask):
|
||||
if self.return_sequences:
|
||||
return mask
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
rows = input_shape[3]
|
||||
cols = input_shape[4]
|
||||
elif self.dim_ordering == 'tf':
|
||||
rows = input_shape[2]
|
||||
cols = input_shape[3]
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
rows = conv_output_length(rows, self.nb_row,
|
||||
self.border_mode, self.subsample[0])
|
||||
cols = conv_output_length(cols, self.nb_col,
|
||||
self.border_mode, self.subsample[1])
|
||||
|
||||
if self.return_sequences:
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], input_shape[1],
|
||||
self.nb_filter, rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], input_shape[1],
|
||||
rows, cols, self.nb_filter)
|
||||
else:
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], self.nb_filter, rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, self.nb_filter)
|
||||
|
||||
def step(self, x, states):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_constants(self, X, train=False):
|
||||
return None
|
||||
|
||||
def get_initial_states(self, X):
|
||||
# (samples, timesteps, row, col, filter)
|
||||
initial_state = K.zeros_like(X)
|
||||
# (samples,row, col, filter)
|
||||
initial_state = K.sum(initial_state, axis=1)
|
||||
initial_state = self.conv_step(initial_state, K.zeros(self.W_shape),
|
||||
border_mode=self.border_mode)
|
||||
|
||||
initial_states = [initial_state for _ in range(2)]
|
||||
return initial_states
|
||||
|
||||
def preprocess_input(self, x):
|
||||
return x
|
||||
|
||||
def call(self, x, mask=None):
|
||||
assert K.ndim(x) == 5
|
||||
input_shape = self.input_spec[0].shape
|
||||
unroll = False
|
||||
|
||||
if self.stateful:
|
||||
initial_states = self.states
|
||||
else:
|
||||
initial_states = self.get_initial_states(x)
|
||||
|
||||
constants = self.get_constants(x)
|
||||
preprocessed_input = self.preprocess_input(x)
|
||||
|
||||
last_output, outputs, states = K.rnn(self.step, preprocessed_input,
|
||||
initial_states,
|
||||
go_backwards=self.go_backwards,
|
||||
mask=mask,
|
||||
constants=constants,
|
||||
unroll=unroll,
|
||||
input_length=input_shape[1])
|
||||
if self.stateful:
|
||||
self.updates = []
|
||||
for i in range(len(states)):
|
||||
self.updates.append((self.states[i], states[i]))
|
||||
|
||||
if self.return_sequences:
|
||||
return outputs
|
||||
else:
|
||||
return last_output
|
||||
|
||||
def get_config(self):
|
||||
config = {'return_sequences': self.return_sequences,
|
||||
'go_backwards': self.go_backwards,
|
||||
'stateful': self.stateful}
|
||||
if self.stateful:
|
||||
config['batch_input_shape'] = self.input_spec[0].shape
|
||||
|
||||
base_config = super(ConvRecurrent2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class ConvLSTM2D(ConvRecurrent2D):
|
||||
'''Convolutional LSTM.
|
||||
|
||||
# Input shape
|
||||
- if dim_ordering='th'
|
||||
5D tensor with shape:
|
||||
`(samples,time, channels, rows, cols)`
|
||||
- if dim_ordering='tf'
|
||||
5D tensor with shape:
|
||||
`(samples,time, rows, cols, channels)`
|
||||
|
||||
# Output shape
|
||||
- if `return_sequences`
|
||||
- if dim_ordering='th'
|
||||
5D tensor with shape:
|
||||
`(samples, time, nb_filter, output_row, output_col)`
|
||||
- if dim_ordering='tf'
|
||||
5D tensor with shape:
|
||||
`(samples, time, output_row, output_col, nb_filter)`
|
||||
- else
|
||||
- if dim_ordering ='th'
|
||||
4D tensor with shape:
|
||||
`(samples, nb_filter, output_row, output_col)`
|
||||
- if dim_ordering='tf'
|
||||
4D tensor with shape:
|
||||
`(samples, output_row, output_col, nb_filter)`
|
||||
|
||||
where o_row and o_col depend on the shape of the filter and
|
||||
the border_mode
|
||||
|
||||
# Arguments
|
||||
nb_filter: Number of convolution filters to use.
|
||||
nb_row: Number of rows in the convolution kernel.
|
||||
nb_col: Number of columns in the convolution kernel.
|
||||
border_mode: 'valid' or 'same'.
|
||||
subsample: tuple of length 2. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
dim_ordering: 'tf' if the feature are at the last dimension or 'th'
|
||||
stateful : Boolean (default False). If True, the last state
|
||||
for each sample at index i in a batch will be used as initial
|
||||
state for the sample of index i in the following batch.
|
||||
init: weight initialization function.
|
||||
Can be the name of an existing function (str),
|
||||
or a Theano function
|
||||
(see: [initializations](../initializations.md)).
|
||||
inner_init: initialization function of the inner cells.
|
||||
forget_bias_init: initialization function for the bias of the
|
||||
forget gate.
|
||||
[Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
|
||||
recommend initializing with ones.
|
||||
activation: activation function.
|
||||
Can be the name of an existing function (str),
|
||||
or a Theano function (see: [activations](../activations.md)).
|
||||
inner_activation: activation function for the inner cells.
|
||||
|
||||
# References
|
||||
- [Convolutional LSTM Network: A Machine Learning Approach for
|
||||
Precipitation Nowcasting](http://arxiv.org/pdf/1506.04214v1.pdf)
|
||||
The current implementation does not include the feedback loop on the
|
||||
cells output
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
forget_bias_init='one', activation='tanh',
|
||||
inner_activation='hard_sigmoid',
|
||||
dim_ordering='default',
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
W_regularizer=None, U_regularizer=None, b_regularizer=None,
|
||||
dropout_W=0., dropout_U=0., **kwargs):
|
||||
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('dim_ordering must be in {tf,th}', dim_ordering)
|
||||
self.nb_filter = nb_filter
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
self.init = initializations.get(init)
|
||||
self.inner_init = initializations.get(inner_init)
|
||||
self.forget_bias_init = initializations.get(forget_bias_init)
|
||||
self.activation = activations.get(activation)
|
||||
self.inner_activation = activations.get(inner_activation)
|
||||
self.border_mode = border_mode
|
||||
self.subsample = subsample
|
||||
|
||||
if dim_ordering == 'th':
|
||||
warnings.warn('Be carefull if used with convolution3D layers:\n'
|
||||
'th in convolution 3D corresponds to '
|
||||
'(samples, channels, conv_dim1, conv_dim2,'
|
||||
'conv_dim3)\n'
|
||||
'while for this network it corresponds to: '
|
||||
'(samples, time, channels, rows, cols)')
|
||||
self.dim_ordering = dim_ordering
|
||||
|
||||
kwargs['nb_filter'] = nb_filter
|
||||
kwargs['nb_row'] = nb_row
|
||||
kwargs['nb_col'] = nb_col
|
||||
kwargs['dim_ordering'] = dim_ordering
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.U_regularizer = regularizers.get(U_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.dropout_W, self.dropout_U = dropout_W, dropout_U
|
||||
if self.dropout_W or self.dropout_U:
|
||||
self.uses_learning_phase = True
|
||||
|
||||
super(ConvLSTM2D, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
stack_size = input_shape[2]
|
||||
self.W_shape = (self.nb_filter, stack_size,
|
||||
self.nb_row, self.nb_col)
|
||||
elif self.dim_ordering == 'tf':
|
||||
stack_size = input_shape[4]
|
||||
self.W_shape = (self.nb_row, self.nb_col,
|
||||
stack_size, self.nb_filter)
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
self.W_shape1 = (self.nb_filter, self.nb_filter,
|
||||
self.nb_row, self.nb_col)
|
||||
elif self.dim_ordering == 'tf':
|
||||
self.W_shape1 = (self.nb_row, self.nb_col,
|
||||
self.nb_filter, self.nb_filter)
|
||||
|
||||
if self.stateful:
|
||||
self.reset_states()
|
||||
else:
|
||||
# initial states: 2 all-zero tensor of shape (nb_filter)
|
||||
self.states = [None, None, None, None]
|
||||
|
||||
self.W_i = self.init(self.W_shape, name='{}_W_i'.format(self.name))
|
||||
self.U_i = self.inner_init(self.W_shape1,
|
||||
name='{}_U_i'.format(self.name))
|
||||
self.b_i = K.zeros((self.nb_filter,), name='{}_b_i'.format(self.name))
|
||||
|
||||
self.W_f = self.init(self.W_shape, name='{}_W_f'.format(self.name))
|
||||
self.U_f = self.inner_init(self.W_shape1,
|
||||
name='{}_U_f'.format(self.name))
|
||||
self.b_f = self.forget_bias_init((self.nb_filter,),
|
||||
name='{}_b_f'.format(self.name))
|
||||
|
||||
self.W_c = self.init(self.W_shape, name='{}_W_c'.format(self.name))
|
||||
self.U_c = self.inner_init(self.W_shape1,
|
||||
name='{}_U_c'.format(self.name))
|
||||
self.b_c = K.zeros((self.nb_filter,), name='{}_b_c'.format(self.name))
|
||||
|
||||
self.W_o = self.init(self.W_shape, name='{}_W_o'.format(self.name))
|
||||
self.U_o = self.inner_init(self.W_shape1,
|
||||
name='{}_U_o'.format(self.name))
|
||||
self.b_o = K.zeros((self.nb_filter,), name='{}_b_o'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W_i, self.U_i, self.b_i,
|
||||
self.W_c, self.U_c, self.b_c,
|
||||
self.W_f, self.U_f, self.b_f,
|
||||
self.W_o, self.U_o, self.b_o]
|
||||
|
||||
self.W = K.concatenate([self.W_i, self.W_f, self.W_c, self.W_o])
|
||||
self.U = K.concatenate([self.U_i, self.U_f, self.U_c, self.U_o])
|
||||
self.b = K.concatenate([self.b_i, self.b_f, self.b_c, self.b_o])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def reset_states(self):
|
||||
assert self.stateful, 'Layer must be stateful.'
|
||||
input_shape = self.input_spec[0].shape
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
if not input_shape[0]:
|
||||
raise ValueError('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided ' +
|
||||
'(including batch size).')
|
||||
|
||||
if self.return_sequences:
|
||||
out_row, out_col, out_filter = output_shape[2:]
|
||||
else:
|
||||
out_row, out_col, out_filter = output_shape[1:]
|
||||
|
||||
if hasattr(self, 'states'):
|
||||
K.set_value(self.states[0],
|
||||
np.zeros((input_shape[0],
|
||||
out_row, out_col, out_filter)))
|
||||
K.set_value(self.states[1],
|
||||
np.zeros((input_shape[0],
|
||||
out_row, out_col, out_filter)))
|
||||
else:
|
||||
self.states = [K.zeros((input_shape[0],
|
||||
out_row, out_col, out_filter)),
|
||||
K.zeros((input_shape[0],
|
||||
out_row, out_col, out_filter))]
|
||||
|
||||
def conv_step(self, x, W, b=None, border_mode='valid'):
|
||||
input_shape = self.input_spec[0].shape
|
||||
|
||||
conv_out = K.conv2d(x, W, strides=self.subsample,
|
||||
border_mode=border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
image_shape=(input_shape[0],
|
||||
input_shape[2],
|
||||
input_shape[3],
|
||||
input_shape[4]),
|
||||
filter_shape=self.W_shape)
|
||||
if b:
|
||||
if self.dim_ordering == 'th':
|
||||
conv_out = conv_out + K.reshape(b, (1, self.nb_filter, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
conv_out = conv_out + K.reshape(b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
return conv_out
|
||||
|
||||
def conv_step_hidden(self, x, W, border_mode='valid'):
|
||||
# This new function was defined because the
|
||||
# image shape must be hardcoded
|
||||
input_shape = self.input_spec[0].shape
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
if self.return_sequences:
|
||||
out_row, out_col, out_filter = output_shape[2:]
|
||||
else:
|
||||
out_row, out_col, out_filter = output_shape[1:]
|
||||
|
||||
conv_out = K.conv2d(x, W, strides=(1, 1),
|
||||
border_mode=border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
image_shape=(input_shape[0],
|
||||
out_row, out_col,
|
||||
out_filter),
|
||||
filter_shape=self.W_shape1)
|
||||
|
||||
return conv_out
|
||||
|
||||
def step(self, x, states):
|
||||
assert len(states) == 4
|
||||
h_tm1 = states[0]
|
||||
c_tm1 = states[1]
|
||||
B_U = states[2]
|
||||
B_W = states[3]
|
||||
|
||||
x_i = self.conv_step(x * B_W[0], self.W_i, self.b_i,
|
||||
border_mode=self.border_mode)
|
||||
x_f = self.conv_step(x * B_W[1], self.W_f, self.b_f,
|
||||
border_mode=self.border_mode)
|
||||
x_c = self.conv_step(x * B_W[2], self.W_c, self.b_c,
|
||||
border_mode=self.border_mode)
|
||||
x_o = self.conv_step(x * B_W[3], self.W_o, self.b_o,
|
||||
border_mode=self.border_mode)
|
||||
|
||||
# U : from nb_filter to nb_filter
|
||||
# Same because must be stable in the output space
|
||||
h_i = self.conv_step_hidden(h_tm1 * B_U[0], self.U_i,
|
||||
border_mode='same')
|
||||
h_f = self.conv_step_hidden(h_tm1 * B_U[1], self.U_f,
|
||||
border_mode='same')
|
||||
h_c = self.conv_step_hidden(h_tm1 * B_U[2], self.U_c,
|
||||
border_mode='same')
|
||||
h_o = self.conv_step_hidden(h_tm1 * B_U[3], self.U_o,
|
||||
border_mode='same')
|
||||
|
||||
i = self.inner_activation(x_i + h_i)
|
||||
f = self.inner_activation(x_f + h_f)
|
||||
c = f * c_tm1 + i * self.activation(x_c + h_c)
|
||||
o = self.inner_activation(x_o + h_o)
|
||||
h = o * self.activation(c)
|
||||
|
||||
return h, [h, c]
|
||||
|
||||
def get_constants(self, x):
|
||||
constants = []
|
||||
if 0 < self.dropout_U < 1:
|
||||
ones = K.zeros_like(x)
|
||||
ones = K.sum(ones, axis=1)
|
||||
ones = self.conv_step(ones, K.zeros(self.W_shape),
|
||||
border_mode=self.border_mode)
|
||||
ones = ones + 1
|
||||
B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones)
|
||||
for _ in range(4)]
|
||||
constants.append(B_U)
|
||||
else:
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(4)])
|
||||
|
||||
if 0 < self.dropout_W < 1:
|
||||
ones = K.zeros_like(x)
|
||||
ones = K.sum(ones, axis=1)
|
||||
ones = ones + 1
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones)
|
||||
for _ in range(4)]
|
||||
constants.append(B_W)
|
||||
else:
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(4)])
|
||||
return constants
|
||||
|
||||
def get_config(self):
|
||||
config = {'nb_filter': self.nb_filter,
|
||||
'nb_row': self.nb_row,
|
||||
'nb_col': self.nb_col,
|
||||
'init': self.init.__name__,
|
||||
'inner_init': self.inner_init.__name__,
|
||||
'forget_bias_init': self.forget_bias_init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'dim_ordering': self.dim_ordering,
|
||||
'border_mode': self.border_mode,
|
||||
'inner_activation': self.inner_activation.__name__}
|
||||
base_config = super(ConvLSTM2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
+210
-177
@@ -96,6 +96,37 @@ class Dropout(Layer):
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class SpatialDropout1D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
entire 1D feature maps instead of individual elements. If adjacent frames
|
||||
within feature maps are strongly correlated (as is normally the case in
|
||||
early convolution layers) then regular dropout will not regularize the
|
||||
activations and will otherwise just result in an effective learning rate
|
||||
decrease. In this case, SpatialDropout1D will help promote independence
|
||||
between feature maps and should be used instead.
|
||||
|
||||
# Arguments
|
||||
p: float between 0 and 1. Fraction of the input units to drop.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape:
|
||||
`(samples, timesteps, channels)`
|
||||
|
||||
# Output shape
|
||||
Same as input
|
||||
|
||||
# References
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/pdf/1411.4280.pdf)
|
||||
'''
|
||||
def __init__(self, p, **kwargs):
|
||||
super(SpatialDropout1D, self).__init__(p, **kwargs)
|
||||
|
||||
def _get_noise_shape(self, x):
|
||||
input_shape = K.shape(x)
|
||||
noise_shape = (input_shape[0], 1, input_shape[2])
|
||||
return noise_shape
|
||||
|
||||
|
||||
class SpatialDropout2D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
entire 2D feature maps instead of individual elements. If adjacent pixels
|
||||
@@ -139,7 +170,7 @@ class SpatialDropout2D(Dropout):
|
||||
elif self.dim_ordering == 'tf':
|
||||
noise_shape = (input_shape[0], 1, 1, input_shape[3])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
return noise_shape
|
||||
|
||||
|
||||
@@ -187,7 +218,7 @@ class SpatialDropout3D(Dropout):
|
||||
elif self.dim_ordering == 'tf':
|
||||
noise_shape = (input_shape[0], 1, 1, 1, input_shape[4])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
return noise_shape
|
||||
|
||||
|
||||
@@ -259,8 +290,8 @@ class Reshape(Layer):
|
||||
'''Find and replace a single missing dimension in an output shape
|
||||
given an input shape.
|
||||
|
||||
A near direct port of the internal Numpy function _fix_unknown_dimension
|
||||
in numpy/core/src/multiarray/shape.c
|
||||
A near direct port of the internal Numpy function
|
||||
_fix_unknown_dimension in numpy/core/src/multiarray/shape.c
|
||||
|
||||
# Arguments
|
||||
input_shape: shape of array being reshaped
|
||||
@@ -301,7 +332,8 @@ class Reshape(Layer):
|
||||
return tuple(output_shape)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
return (input_shape[0],) + self._fix_unknown_dimension(input_shape[1:], self.target_shape)
|
||||
return (input_shape[0],) + self._fix_unknown_dimension(input_shape[1:],
|
||||
self.target_shape)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
# In case the target shape is not fully defined,
|
||||
@@ -384,7 +416,9 @@ class Flatten(Layer):
|
||||
|
||||
```python
|
||||
model = Sequential()
|
||||
model.add(Convolution2D(64, 3, 3, border_mode='same', input_shape=(3, 32, 32)))
|
||||
model.add(Convolution2D(64, 3, 3,
|
||||
border_mode='same',
|
||||
input_shape=(3, 32, 32)))
|
||||
# now: model.output_shape == (None, 64, 32, 32)
|
||||
|
||||
model.add(Flatten())
|
||||
@@ -397,12 +431,12 @@ class Flatten(Layer):
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if not all(input_shape[1:]):
|
||||
raise Exception('The shape of the input to "Flatten" '
|
||||
'is not fully defined '
|
||||
'(got ' + str(input_shape[1:]) + '. '
|
||||
'Make sure to pass a complete "input_shape" '
|
||||
'or "batch_input_shape" argument to the first '
|
||||
'layer in your model.')
|
||||
raise ValueError('The shape of the input to "Flatten" '
|
||||
'is not fully defined '
|
||||
'(got ' + str(input_shape[1:]) + '. '
|
||||
'Make sure to pass a complete "input_shape" '
|
||||
'or "batch_input_shape" argument to the first '
|
||||
'layer in your model.')
|
||||
return (input_shape[0], np.prod(input_shape[1:]))
|
||||
|
||||
def call(self, x, mask=None):
|
||||
@@ -478,7 +512,8 @@ class Lambda(Layer):
|
||||
shape[-1] *= 2
|
||||
return tuple(shape)
|
||||
|
||||
model.add(Lambda(antirectifier, output_shape=antirectifier_output_shape))
|
||||
model.add(Lambda(antirectifier,
|
||||
output_shape=antirectifier_output_shape))
|
||||
```
|
||||
|
||||
# Arguments
|
||||
@@ -511,41 +546,46 @@ class Lambda(Layer):
|
||||
|
||||
if output_shape is None:
|
||||
self._output_shape = None
|
||||
elif type(output_shape) in {tuple, list}:
|
||||
elif isinstance(output_shape, (tuple, list)):
|
||||
self._output_shape = tuple(output_shape)
|
||||
else:
|
||||
if not hasattr(output_shape, '__call__'):
|
||||
raise Exception('In Lambda, `output_shape` '
|
||||
if not callable(output_shape):
|
||||
raise TypeError('In Lambda, `output_shape` '
|
||||
'must be a list, a tuple, or a function.')
|
||||
self._output_shape = output_shape
|
||||
super(Lambda, self).__init__(**kwargs)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self._output_shape is None:
|
||||
# if TensorFlow, we can infer the output shape directly:
|
||||
if K._BACKEND == 'tensorflow':
|
||||
if type(input_shape) is list:
|
||||
# With TensorFlow, we can infer the output shape directly:
|
||||
if K.backend() == 'tensorflow':
|
||||
if isinstance(input_shape, list):
|
||||
xs = [K.placeholder(shape=shape) for shape in input_shape]
|
||||
x = self.call(xs)
|
||||
else:
|
||||
x = K.placeholder(shape=input_shape)
|
||||
x = self.call(x)
|
||||
if type(x) is list:
|
||||
if isinstance(x, list):
|
||||
return [K.int_shape(x_elem) for x_elem in x]
|
||||
else:
|
||||
return K.int_shape(x)
|
||||
# otherwise, we default to the input shape
|
||||
# Otherwise, we default to the input shape.
|
||||
warnings.warn('`output_shape` argument not specified for layer {} '
|
||||
'and cannot be automatically inferred with the Theano backend. '
|
||||
'Defaulting to output shape `{}` (same as input shape). '
|
||||
'If the expected output shape is different, specify it via the `output_shape` argument.'
|
||||
.format(self.name, input_shape))
|
||||
return input_shape
|
||||
elif type(self._output_shape) in {tuple, list}:
|
||||
if type(input_shape) is list:
|
||||
elif isinstance(self._output_shape, (tuple, list)):
|
||||
if isinstance(input_shape, list):
|
||||
nb_samples = input_shape[0][0]
|
||||
else:
|
||||
nb_samples = input_shape[0] if input_shape else None
|
||||
return (nb_samples,) + tuple(self._output_shape)
|
||||
else:
|
||||
shape = self._output_shape(input_shape)
|
||||
if type(shape) not in {list, tuple}:
|
||||
raise Exception('output_shape function must return a tuple')
|
||||
if not isinstance(shape, (list, tuple)):
|
||||
raise ValueError('output_shape function must return a tuple')
|
||||
return tuple(shape)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
@@ -582,20 +622,27 @@ class Lambda(Layer):
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config):
|
||||
def from_config(cls, config, custom_objects={}):
|
||||
# Insert custom objects into globals.
|
||||
if custom_objects:
|
||||
globs = globals().copy()
|
||||
globs.update(custom_objects)
|
||||
else:
|
||||
globs = globals()
|
||||
|
||||
function_type = config.pop('function_type')
|
||||
if function_type == 'function':
|
||||
function = globals()[config['function']]
|
||||
function = globs[config['function']]
|
||||
elif function_type == 'lambda':
|
||||
function = func_load(config['function'], globs=globals())
|
||||
function = func_load(config['function'], globs=globs)
|
||||
else:
|
||||
raise Exception('Unknown function type: ' + function_type)
|
||||
raise TypeError('Unknown function type:', function_type)
|
||||
|
||||
output_shape_type = config.pop('output_shape_type')
|
||||
if output_shape_type == 'function':
|
||||
output_shape = globals()[config['output_shape']]
|
||||
output_shape = globs[config['output_shape']]
|
||||
elif output_shape_type == 'lambda':
|
||||
output_shape = func_load(config['output_shape'], globs=globals())
|
||||
output_shape = func_load(config['output_shape'], globs=globs)
|
||||
else:
|
||||
output_shape = config['output_shape']
|
||||
|
||||
@@ -650,18 +697,24 @@ class Dense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
bias: whether to include a bias
|
||||
(i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer). This argument
|
||||
(or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
|
||||
# Input shape
|
||||
2D tensor with shape: `(nb_samples, input_dim)`.
|
||||
nD tensor with shape: `(nb_samples, ..., input_dim)`.
|
||||
The most common situation would be
|
||||
a 2D input with shape `(nb_samples, input_dim)`.
|
||||
|
||||
# Output shape
|
||||
2D tensor with shape: `(nb_samples, output_dim)`.
|
||||
nD tensor with shape: `(nb_samples, ..., output_dim)`.
|
||||
For instance, for a 2D input with shape `(nb_samples, input_dim)`,
|
||||
the output would have shape `(nb_samples, output_dim)`.
|
||||
'''
|
||||
def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None,
|
||||
def __init__(self, output_dim, init='glorot_uniform',
|
||||
activation=None, weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
@@ -679,49 +732,37 @@ class Dense(Layer):
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=2)]
|
||||
self.input_spec = [InputSpec(ndim='2+')]
|
||||
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_dim,)
|
||||
super(Dense, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
assert len(input_shape) == 2
|
||||
input_dim = input_shape[1]
|
||||
assert len(input_shape) >= 2
|
||||
input_dim = input_shape[-1]
|
||||
self.input_dim = input_dim
|
||||
self.input_spec = [InputSpec(dtype=K.floatx(),
|
||||
shape=(None, input_dim))]
|
||||
ndim='2+')]
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.W = self.add_weight((input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = K.dot(x, self.W)
|
||||
@@ -730,8 +771,11 @@ class Dense(Layer):
|
||||
return self.activation(output)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
assert input_shape and len(input_shape) == 2
|
||||
return (input_shape[0], self.output_dim)
|
||||
assert input_shape and len(input_shape) >= 2
|
||||
assert input_shape[-1] and input_shape[-1] == self.input_dim
|
||||
output_shape = list(input_shape)
|
||||
output_shape[-1] = self.output_dim
|
||||
return tuple(output_shape)
|
||||
|
||||
def get_config(self):
|
||||
config = {'output_dim': self.output_dim,
|
||||
@@ -770,9 +814,8 @@ class ActivityRegularization(Layer):
|
||||
self.l2 = l2
|
||||
|
||||
super(ActivityRegularization, self).__init__(**kwargs)
|
||||
activity_regularizer = ActivityRegularizer(l1=l1, l2=l2)
|
||||
activity_regularizer.set_layer(self)
|
||||
self.regularizers = [activity_regularizer]
|
||||
self.activity_regularizer = regularizers.L1L2Regularizer(l1=l1, l2=l2)
|
||||
self.regularizers = [self.activity_regularizer]
|
||||
|
||||
def get_config(self):
|
||||
config = {'l1': self.l1,
|
||||
@@ -815,9 +858,10 @@ class MaxoutDense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
bias: whether to include a bias
|
||||
(i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer). This argument
|
||||
(or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
|
||||
# Input shape
|
||||
@@ -829,11 +873,18 @@ class MaxoutDense(Layer):
|
||||
# References
|
||||
- [Maxout Networks](http://arxiv.org/pdf/1302.4389.pdf)
|
||||
'''
|
||||
def __init__(self, output_dim, nb_feature=4,
|
||||
init='glorot_uniform', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
def __init__(self, output_dim,
|
||||
nb_feature=4,
|
||||
init='glorot_uniform',
|
||||
weights=None,
|
||||
W_regularizer=None,
|
||||
b_regularizer=None,
|
||||
activity_regularizer=None,
|
||||
W_constraint=None,
|
||||
b_constraint=None,
|
||||
bias=True,
|
||||
input_dim=None,
|
||||
**kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.nb_feature = nb_feature
|
||||
self.init = initializations.get(init)
|
||||
@@ -859,37 +910,24 @@ class MaxoutDense(Layer):
|
||||
self.input_spec = [InputSpec(dtype=K.floatx(),
|
||||
shape=(None, input_dim))]
|
||||
|
||||
self.W = self.init((self.nb_feature, input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.W = self.add_weight((self.nb_feature, input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_feature, self.output_dim),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((self.nb_feature, self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
assert input_shape and len(input_shape) == 2
|
||||
@@ -947,9 +985,10 @@ class Highway(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
bias: whether to include a bias
|
||||
(i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer). This argument
|
||||
(or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
|
||||
# Input shape
|
||||
@@ -961,11 +1000,19 @@ class Highway(Layer):
|
||||
# References
|
||||
- [Highway Networks](http://arxiv.org/pdf/1505.00387v2.pdf)
|
||||
'''
|
||||
def __init__(self, init='glorot_uniform', transform_bias=-2,
|
||||
activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
def __init__(self,
|
||||
init='glorot_uniform',
|
||||
transform_bias=-2,
|
||||
activation=None,
|
||||
weights=None,
|
||||
W_regularizer=None,
|
||||
b_regularizer=None,
|
||||
activity_regularizer=None,
|
||||
W_constraint=None,
|
||||
b_constraint=None,
|
||||
bias=True,
|
||||
input_dim=None,
|
||||
**kwargs):
|
||||
self.init = initializations.get(init)
|
||||
self.transform_bias = transform_bias
|
||||
self.activation = activations.get(activation)
|
||||
@@ -991,42 +1038,30 @@ class Highway(Layer):
|
||||
self.input_spec = [InputSpec(dtype=K.floatx(),
|
||||
shape=(None, input_dim))]
|
||||
|
||||
self.W = self.init((input_dim, input_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.W_carry = self.init((input_dim, input_dim),
|
||||
name='{}_W_carry'.format(self.name))
|
||||
|
||||
self.W = self.add_weight((input_dim, input_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
self.W_carry = self.add_weight((input_dim, input_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_carry'.format(self.name))
|
||||
if self.bias:
|
||||
self.b = K.zeros((input_dim,), name='{}_b'.format(self.name))
|
||||
# initialize with a vector of values `transform_bias`
|
||||
self.b_carry = K.variable(np.ones((input_dim,)) * self.transform_bias,
|
||||
name='{}_b_carry'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b, self.W_carry, self.b_carry]
|
||||
self.b = self.add_weight((input_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
self.b_carry = self.add_weight((input_dim,),
|
||||
initializer='one',
|
||||
name='{}_b_carry'.format(self.name))
|
||||
else:
|
||||
self.trainable_weights = [self.W, self.W_carry]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b_carry = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def call(self, x, mask=None):
|
||||
y = K.dot(x, self.W_carry)
|
||||
@@ -1096,21 +1131,31 @@ class TimeDistributedDense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
bias: whether to include a bias
|
||||
(i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer). This argument
|
||||
(or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
input_length: length of inputs sequences
|
||||
(integer, or None for variable-length sequences).
|
||||
'''
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
warnings.warn('TimeDistributedDense is deprecated, '
|
||||
'please use TimeDistributed(Dense(...)) instead.')
|
||||
init='glorot_uniform',
|
||||
activation=None,
|
||||
weights=None,
|
||||
W_regularizer=None,
|
||||
b_regularizer=None,
|
||||
activity_regularizer=None,
|
||||
W_constraint=None,
|
||||
b_constraint=None,
|
||||
bias=True,
|
||||
input_dim=None,
|
||||
input_length=None,
|
||||
**kwargs):
|
||||
warnings.warn('`TimeDistributedDense` is deprecated, '
|
||||
'And will be removed on May 1st, 2017. '
|
||||
'Please use a `Dense` layer instead.')
|
||||
self.output_dim = output_dim
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
@@ -1138,35 +1183,24 @@ class TimeDistributedDense(Layer):
|
||||
shape=(None,) + input_shape[1:])]
|
||||
input_dim = input_shape[2]
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.W = self.add_weight((input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
return (input_shape[0], input_shape[1], self.output_dim)
|
||||
@@ -1175,12 +1209,11 @@ class TimeDistributedDense(Layer):
|
||||
input_shape = self.input_spec[0].shape
|
||||
# x has shape (samples, timesteps, input_dim)
|
||||
input_length = input_shape[1]
|
||||
# Note: input_length should always be provided when using tensorflow backend.
|
||||
if not input_length:
|
||||
if hasattr(K, 'int_shape'):
|
||||
input_length = K.int_shape(x)[1]
|
||||
if not input_length:
|
||||
raise Exception(
|
||||
raise ValueError(
|
||||
'Layer ' + self.name +
|
||||
' requires to know the length of its input, '
|
||||
'but it could not be inferred automatically. '
|
||||
|
||||
@@ -91,25 +91,15 @@ class Embedding(Layer):
|
||||
super(Embedding, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.W = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
self.W = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
self.built = True
|
||||
|
||||
def compute_mask(self, x, mask=None):
|
||||
if not self.mask_zero:
|
||||
|
||||
+41
-60
@@ -75,14 +75,14 @@ class LocallyConnected1D(Layer):
|
||||
`steps` value might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, filter_length,
|
||||
init='uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample_length=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
if border_mode != 'valid':
|
||||
raise Exception('Invalid border mode for LocallyConnected1D '
|
||||
'(only "valid" is supported):', border_mode)
|
||||
raise ValueError('Invalid border mode for LocallyConnected1D '
|
||||
'(only "valid" is supported):', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.filter_length = filter_length
|
||||
self.init = initializations.get(init, dim_ordering='th')
|
||||
@@ -110,35 +110,27 @@ class LocallyConnected1D(Layer):
|
||||
def build(self, input_shape):
|
||||
input_dim = input_shape[2]
|
||||
_, output_length, nb_filter = self.get_output_shape_for(input_shape)
|
||||
|
||||
self.W_shape = (output_length, self.filter_length * input_dim, nb_filter)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.W_shape = (output_length,
|
||||
self.filter_length * input_dim,
|
||||
nb_filter)
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((output_length, self.nb_filter), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((output_length, self.nb_filter),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
length = conv_output_length(input_shape[1],
|
||||
@@ -257,7 +249,7 @@ class LocallyConnected2D(Layer):
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
@@ -266,8 +258,8 @@ class LocallyConnected2D(Layer):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if border_mode != 'valid':
|
||||
raise Exception('Invalid border mode for LocallyConnected2D '
|
||||
'(only "valid" is supported):', border_mode)
|
||||
raise ValueError('Invalid border mode for LocallyConnected2D '
|
||||
'(only "valid" is supported):', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
@@ -276,7 +268,8 @@ class LocallyConnected2D(Layer):
|
||||
|
||||
self.border_mode = border_mode
|
||||
self.subsample = tuple(subsample)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('`dim_ordering` must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
@@ -300,39 +293,31 @@ class LocallyConnected2D(Layer):
|
||||
_, output_row, output_col, nb_filter = output_shape
|
||||
input_filter = input_shape[3]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
self.output_row = output_row
|
||||
self.output_col = output_col
|
||||
self.W_shape = (output_row * output_col, self.nb_row * self.nb_col * input_filter, nb_filter)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
|
||||
self.W_shape = (output_row * output_col,
|
||||
self.nb_row * self.nb_col * input_filter,
|
||||
nb_filter)
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((output_row, output_col, nb_filter), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((output_row, output_col, nb_filter),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -342,7 +327,7 @@ class LocallyConnected2D(Layer):
|
||||
rows = input_shape[1]
|
||||
cols = input_shape[2]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
rows = conv_output_length(rows, self.nb_row,
|
||||
self.border_mode, self.subsample[0])
|
||||
@@ -353,8 +338,6 @@ class LocallyConnected2D(Layer):
|
||||
return (input_shape[0], self.nb_filter, rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
stride_row, stride_col = self.subsample
|
||||
@@ -399,15 +382,13 @@ class LocallyConnected2D(Layer):
|
||||
output = K.reshape(output, (self.output_row, self.output_col, -1, nb_filter))
|
||||
output = K.permute_dimensions(output, (2, 0, 1, 3))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
if self.bias:
|
||||
if self.dim_ordering == 'th':
|
||||
output += K.reshape(self.b, (1, nb_filter, self.output_row, self.output_col))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, self.output_row, self.output_col, nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
@@ -10,6 +10,7 @@ class BatchNormalization(Layer):
|
||||
|
||||
# Arguments
|
||||
epsilon: small float > 0. Fuzz parameter.
|
||||
Theano expects epsilon >= 1e-5.
|
||||
mode: integer, 0, 1 or 2.
|
||||
- 0: feature-wise normalization.
|
||||
Each feature map in the input will
|
||||
@@ -60,7 +61,7 @@ class BatchNormalization(Layer):
|
||||
# References
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](http://jmlr.org/proceedings/papers/v37/ioffe15.pdf)
|
||||
'''
|
||||
def __init__(self, epsilon=1e-5, mode=0, axis=-1, momentum=0.99,
|
||||
def __init__(self, epsilon=1e-3, mode=0, axis=-1, momentum=0.99,
|
||||
weights=None, beta_init='zero', gamma_init='one',
|
||||
gamma_regularizer=None, beta_regularizer=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
@@ -81,66 +82,45 @@ class BatchNormalization(Layer):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
shape = (input_shape[self.axis],)
|
||||
|
||||
self.gamma = self.gamma_init(shape, name='{}_gamma'.format(self.name))
|
||||
self.beta = self.beta_init(shape, name='{}_beta'.format(self.name))
|
||||
self.trainable_weights = [self.gamma, self.beta]
|
||||
|
||||
self.regularizers = []
|
||||
if self.gamma_regularizer:
|
||||
self.gamma_regularizer.set_param(self.gamma)
|
||||
self.regularizers.append(self.gamma_regularizer)
|
||||
|
||||
if self.beta_regularizer:
|
||||
self.beta_regularizer.set_param(self.beta)
|
||||
self.regularizers.append(self.beta_regularizer)
|
||||
|
||||
self.running_mean = K.zeros(shape,
|
||||
name='{}_running_mean'.format(self.name))
|
||||
self.running_std = K.ones(shape,
|
||||
name='{}_running_std'.format(self.name))
|
||||
self.non_trainable_weights = [self.running_mean, self.running_std]
|
||||
self.gamma = self.add_weight(shape,
|
||||
initializer=self.gamma_init,
|
||||
regularizer=self.gamma_regularizer,
|
||||
name='{}_gamma'.format(self.name))
|
||||
self.beta = self.add_weight(shape,
|
||||
initializer=self.beta_init,
|
||||
regularizer=self.beta_regularizer,
|
||||
name='{}_beta'.format(self.name))
|
||||
self.running_mean = self.add_weight(shape, initializer='zero',
|
||||
name='{}_running_mean'.format(self.name),
|
||||
trainable=False)
|
||||
self.running_std = self.add_weight(shape, initializer='one',
|
||||
name='{}_running_std'.format(self.name),
|
||||
trainable=False)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
self.called_with = None
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if self.mode == 0 or self.mode == 2:
|
||||
assert self.built, 'Layer must be built before being called'
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
|
||||
reduction_axes = list(range(len(input_shape)))
|
||||
del reduction_axes[self.axis]
|
||||
broadcast_shape = [1] * len(input_shape)
|
||||
broadcast_shape[self.axis] = input_shape[self.axis]
|
||||
|
||||
if self.mode == 2:
|
||||
x_normed, mean, std = K.normalize_batch_in_training(
|
||||
x, self.gamma, self.beta, reduction_axes,
|
||||
epsilon=self.epsilon)
|
||||
else:
|
||||
# mode 0
|
||||
if self.called_with not in {None, x}:
|
||||
raise Exception('You are attempting to share a '
|
||||
'same `BatchNormalization` layer across '
|
||||
'different data flows. '
|
||||
'This is not possible. '
|
||||
'You should use `mode=2` in '
|
||||
'`BatchNormalization`, which has '
|
||||
'a similar behavior but is shareable '
|
||||
'(see docs for a description of '
|
||||
'the behavior).')
|
||||
self.called_with = x
|
||||
x_normed, mean, std = K.normalize_batch_in_training(
|
||||
x, self.gamma, self.beta, reduction_axes,
|
||||
epsilon=self.epsilon)
|
||||
x_normed, mean, std = K.normalize_batch_in_training(
|
||||
x, self.gamma, self.beta, reduction_axes,
|
||||
epsilon=self.epsilon)
|
||||
|
||||
self.updates = [K.moving_average_update(self.running_mean, mean, self.momentum),
|
||||
K.moving_average_update(self.running_std, std, self.momentum)]
|
||||
if self.mode == 0:
|
||||
self.add_update([K.moving_average_update(self.running_mean, mean, self.momentum),
|
||||
K.moving_average_update(self.running_std, std, self.momentum)], x)
|
||||
|
||||
if K.backend() == 'tensorflow' and sorted(reduction_axes) == range(K.ndim(x))[:-1]:
|
||||
if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
|
||||
x_normed_running = K.batch_normalization(
|
||||
x, self.running_mean, self.running_std,
|
||||
self.beta, self.gamma,
|
||||
@@ -168,11 +148,11 @@ class BatchNormalization(Layer):
|
||||
return x_normed
|
||||
|
||||
def get_config(self):
|
||||
config = {"epsilon": self.epsilon,
|
||||
"mode": self.mode,
|
||||
"axis": self.axis,
|
||||
"gamma_regularizer": self.gamma_regularizer.get_config() if self.gamma_regularizer else None,
|
||||
"beta_regularizer": self.beta_regularizer.get_config() if self.beta_regularizer else None,
|
||||
"momentum": self.momentum}
|
||||
config = {'epsilon': self.epsilon,
|
||||
'mode': self.mode,
|
||||
'axis': self.axis,
|
||||
'gamma_regularizer': self.gamma_regularizer.get_config() if self.gamma_regularizer else None,
|
||||
'beta_regularizer': self.beta_regularizer.get_config() if self.beta_regularizer else None,
|
||||
'momentum': self.momentum}
|
||||
base_config = super(BatchNormalization, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
+27
-26
@@ -20,7 +20,8 @@ class _Pooling1D(Layer):
|
||||
self.stride = stride
|
||||
self.st = (self.stride, 1)
|
||||
self.pool_size = (pool_length, 1)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise ValueError('`border_mode` must be in {valid, same}.')
|
||||
self.border_mode = border_mode
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
|
||||
@@ -34,14 +35,12 @@ class _Pooling1D(Layer):
|
||||
raise NotImplementedError
|
||||
|
||||
def call(self, x, mask=None):
|
||||
x = K.expand_dims(x, -1) # add dummy last dimension
|
||||
x = K.permute_dimensions(x, (0, 2, 1, 3))
|
||||
x = K.expand_dims(x, 2) # add dummy last dimension
|
||||
output = self._pooling_function(inputs=x, pool_size=self.pool_size,
|
||||
strides=self.st,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering='th')
|
||||
output = K.permute_dimensions(output, (0, 2, 1, 3))
|
||||
return K.squeeze(output, 3) # remove dummy last dimension
|
||||
dim_ordering='tf')
|
||||
return K.squeeze(output, 2) # remove dummy last dimension
|
||||
|
||||
def get_config(self):
|
||||
config = {'stride': self.stride,
|
||||
@@ -66,7 +65,6 @@ class MaxPooling1D(_Pooling1D):
|
||||
2 will halve the input.
|
||||
If None, it will default to `pool_length`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
'''
|
||||
|
||||
def __init__(self, pool_length=2, stride=None,
|
||||
@@ -89,7 +87,6 @@ class AveragePooling1D(_Pooling1D):
|
||||
stride: integer, or None. Stride value.
|
||||
If None, it will default to `pool_length`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape: `(samples, steps, features)`.
|
||||
@@ -123,9 +120,11 @@ class _Pooling2D(Layer):
|
||||
if strides is None:
|
||||
strides = self.pool_size
|
||||
self.strides = tuple(strides)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise ValueError('`border_mode` must be in {valid, same}.')
|
||||
self.border_mode = border_mode
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('`dim_ordering` must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=4)]
|
||||
|
||||
@@ -137,7 +136,7 @@ class _Pooling2D(Layer):
|
||||
rows = input_shape[1]
|
||||
cols = input_shape[2]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
rows = conv_output_length(rows, self.pool_size[0],
|
||||
self.border_mode, self.strides[0])
|
||||
@@ -148,15 +147,14 @@ class _Pooling2D(Layer):
|
||||
return (input_shape[0], input_shape[1], rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, input_shape[3])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
raise NotImplementedError
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = self._pooling_function(inputs=x, pool_size=self.pool_size,
|
||||
output = self._pooling_function(inputs=x,
|
||||
pool_size=self.pool_size,
|
||||
strides=self.strides,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering)
|
||||
@@ -181,7 +179,6 @@ class MaxPooling2D(_Pooling2D):
|
||||
strides: tuple of 2 integers, or None. Strides values.
|
||||
If None, it will default to `pool_size`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 3.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
@@ -209,7 +206,8 @@ class MaxPooling2D(_Pooling2D):
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
output = K.pool2d(inputs, pool_size, strides,
|
||||
border_mode, dim_ordering, pool_mode='max')
|
||||
border_mode, dim_ordering,
|
||||
pool_mode='max')
|
||||
return output
|
||||
|
||||
|
||||
@@ -223,7 +221,6 @@ class AveragePooling2D(_Pooling2D):
|
||||
strides: tuple of 2 integers, or None. Strides values.
|
||||
If None, it will default to `pool_size`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 3.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
@@ -268,9 +265,11 @@ class _Pooling3D(Layer):
|
||||
if strides is None:
|
||||
strides = self.pool_size
|
||||
self.strides = tuple(strides)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise ValueError('`border_mode` must be in {valid, same}.')
|
||||
self.border_mode = border_mode
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('`dim_ordering` must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=5)]
|
||||
|
||||
@@ -284,7 +283,7 @@ class _Pooling3D(Layer):
|
||||
len_dim2 = input_shape[2]
|
||||
len_dim3 = input_shape[3]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
len_dim1 = conv_output_length(len_dim1, self.pool_size[0],
|
||||
self.border_mode, self.strides[0])
|
||||
@@ -292,13 +291,14 @@ class _Pooling3D(Layer):
|
||||
self.border_mode, self.strides[1])
|
||||
len_dim3 = conv_output_length(len_dim3, self.pool_size[2],
|
||||
self.border_mode, self.strides[2])
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], input_shape[1], len_dim1, len_dim2, len_dim3)
|
||||
return (input_shape[0],
|
||||
input_shape[1],
|
||||
len_dim1, len_dim2, len_dim3)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], len_dim1, len_dim2, len_dim3, input_shape[4])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
return (input_shape[0],
|
||||
len_dim1, len_dim2, len_dim3,
|
||||
input_shape[4])
|
||||
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
@@ -396,7 +396,8 @@ class AveragePooling3D(_Pooling3D):
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
output = K.pool3d(inputs, pool_size, strides,
|
||||
border_mode, dim_ordering, pool_mode='avg')
|
||||
border_mode, dim_ordering,
|
||||
pool_mode='avg')
|
||||
return output
|
||||
|
||||
|
||||
|
||||
+180
-137
@@ -32,7 +32,7 @@ def time_distributed_dense(x, w, b=None, dropout=None,
|
||||
x = x + b
|
||||
# reshape to 3D tensor
|
||||
if K.backend() == 'tensorflow':
|
||||
x = K.reshape(x, K.pack([-1, timesteps, output_dim]))
|
||||
x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
|
||||
x.set_shape([None, None, output_dim])
|
||||
else:
|
||||
x = K.reshape(x, (-1, timesteps, output_dim))
|
||||
@@ -198,7 +198,19 @@ class Recurrent(Layer):
|
||||
# input shape: (nb_samples, time (padded with zeros), input_dim)
|
||||
# note that the .build() method of subclasses MUST define
|
||||
# self.input_spec with a complete input shape.
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
if self.unroll and input_shape[1] is None:
|
||||
raise ValueError('Cannot unroll a RNN if the '
|
||||
'time dimension is undefined. \n'
|
||||
'- If using a Sequential model, '
|
||||
'specify the time dimension by passing '
|
||||
'an `input_shape` or `batch_input_shape` '
|
||||
'argument to your first layer. If your '
|
||||
'first layer is an Embedding, you can '
|
||||
'also use the `input_length` argument.\n'
|
||||
'- If using the functional API, specify '
|
||||
'the time dimension by passing a `shape` '
|
||||
'or `batch_shape` argument to your Input layer.')
|
||||
if self.stateful:
|
||||
initial_states = self.states
|
||||
else:
|
||||
@@ -214,9 +226,10 @@ class Recurrent(Layer):
|
||||
unroll=self.unroll,
|
||||
input_length=input_shape[1])
|
||||
if self.stateful:
|
||||
self.updates = []
|
||||
updates = []
|
||||
for i in range(len(states)):
|
||||
self.updates.append((self.states[i], states[i]))
|
||||
updates.append((self.states[i], states[i]))
|
||||
self.add_update(updates, x)
|
||||
|
||||
if self.return_sequences:
|
||||
return outputs
|
||||
@@ -229,7 +242,7 @@ class Recurrent(Layer):
|
||||
'stateful': self.stateful,
|
||||
'unroll': self.unroll,
|
||||
'consume_less': self.consume_less}
|
||||
if self.stateful:
|
||||
if self.stateful and self.input_spec[0].shape:
|
||||
config['batch_input_shape'] = self.input_spec[0].shape
|
||||
else:
|
||||
config['input_dim'] = self.input_dim
|
||||
@@ -275,7 +288,8 @@ class SimpleRNN(Recurrent):
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.U_regularizer = regularizers.get(U_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.dropout_W, self.dropout_U = dropout_W, dropout_U
|
||||
self.dropout_W = dropout_W
|
||||
self.dropout_U = dropout_U
|
||||
|
||||
if self.dropout_W or self.dropout_U:
|
||||
self.uses_learning_phase = True
|
||||
@@ -291,35 +305,38 @@ class SimpleRNN(Recurrent):
|
||||
input_dim = input_shape[2]
|
||||
self.input_dim = input_dim
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
self.b = K.zeros((self.output_dim,), name='{}_b'.format(self.name))
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
self.W = self.add_weight((input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.inner_init,
|
||||
name='{}_U'.format(self.name),
|
||||
regularizer=self.U_regularizer)
|
||||
self.b = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def reset_states(self):
|
||||
assert self.stateful, 'Layer must be stateful.'
|
||||
input_shape = self.input_spec[0].shape
|
||||
if not input_shape[0]:
|
||||
raise Exception('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided (including batch size).')
|
||||
raise ValueError('If a RNN is stateful, it needs to know '
|
||||
'its batch size. Specify the batch size '
|
||||
'of your input tensors: \n'
|
||||
'- If using a Sequential model, '
|
||||
'specify the batch size by passing '
|
||||
'a `batch_input_shape` '
|
||||
'argument to your first layer.\n'
|
||||
'- If using the functional API, specify '
|
||||
'the time dimension by passing a '
|
||||
'`batch_shape` argument to your Input layer.')
|
||||
if hasattr(self, 'states'):
|
||||
K.set_value(self.states[0],
|
||||
np.zeros((input_shape[0], self.output_dim)))
|
||||
@@ -328,7 +345,7 @@ class SimpleRNN(Recurrent):
|
||||
|
||||
def preprocess_input(self, x):
|
||||
if self.consume_less == 'cpu':
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
input_dim = input_shape[2]
|
||||
timesteps = input_shape[1]
|
||||
return time_distributed_dense(x, self.W, self.b, self.dropout_W,
|
||||
@@ -360,10 +377,10 @@ class SimpleRNN(Recurrent):
|
||||
else:
|
||||
constants.append(K.cast_to_floatx(1.))
|
||||
if self.consume_less == 'cpu' and 0 < self.dropout_W < 1:
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.tile(ones, (1, input_dim))
|
||||
ones = K.tile(ones, (1, int(input_dim)))
|
||||
B_W = K.in_train_phase(K.dropout(ones, self.dropout_W), ones)
|
||||
constants.append(B_W)
|
||||
else:
|
||||
@@ -407,7 +424,7 @@ class GRU(Recurrent):
|
||||
dropout_U: float between 0 and 1. Fraction of the input units to drop for recurrent connections.
|
||||
|
||||
# References
|
||||
- [On the Properties of Neural Machine Translation: Encoder–Decoder Approaches](http://www.aclweb.org/anthology/W14-4012)
|
||||
- [On the Properties of Neural Machine Translation: Encoder-Decoder Approaches](http://www.aclweb.org/anthology/W14-4012)
|
||||
- [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling](http://arxiv.org/pdf/1412.3555v1.pdf)
|
||||
- [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
|
||||
'''
|
||||
@@ -424,7 +441,8 @@ class GRU(Recurrent):
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.U_regularizer = regularizers.get(U_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.dropout_W, self.dropout_U = dropout_W, dropout_U
|
||||
self.dropout_W = dropout_W
|
||||
self.dropout_U = dropout_U
|
||||
|
||||
if self.dropout_W or self.dropout_U:
|
||||
self.uses_learning_phase = True
|
||||
@@ -441,67 +459,70 @@ class GRU(Recurrent):
|
||||
self.states = [None]
|
||||
|
||||
if self.consume_less == 'gpu':
|
||||
|
||||
self.W = self.init((self.input_dim, 3 * self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, 3 * self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
self.W = self.add_weight((self.input_dim, 3 * self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U = self.add_weight((self.output_dim, 3 * self.output_dim),
|
||||
initializer=self.inner_init,
|
||||
name='{}_U'.format(self.name),
|
||||
regularizer=self.U_regularizer)
|
||||
self.b = self.add_weight((self.output_dim * 3,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
else:
|
||||
|
||||
self.W_z = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_z'.format(self.name))
|
||||
self.U_z = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_z'.format(self.name))
|
||||
self.b_z = K.zeros((self.output_dim,), name='{}_b_z'.format(self.name))
|
||||
|
||||
self.W_r = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_r'.format(self.name))
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_r'.format(self.name))
|
||||
self.b_r = K.zeros((self.output_dim,), name='{}_b_r'.format(self.name))
|
||||
|
||||
self.W_h = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_h'.format(self.name))
|
||||
self.U_h = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_h'.format(self.name))
|
||||
self.b_h = K.zeros((self.output_dim,), name='{}_b_h'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W_z, self.U_z, self.b_z,
|
||||
self.W_r, self.U_r, self.b_r,
|
||||
self.W_h, self.U_h, self.b_h]
|
||||
|
||||
self.W_z = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_z'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_z = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_z'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_z = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_z'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_r = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_r'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_r = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_r'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_r = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_r'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_h = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_h'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_h = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_h'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_h = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_h'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W = K.concatenate([self.W_z, self.W_r, self.W_h])
|
||||
self.U = K.concatenate([self.U_z, self.U_r, self.U_h])
|
||||
self.b = K.concatenate([self.b_z, self.b_r, self.b_h])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def reset_states(self):
|
||||
assert self.stateful, 'Layer must be stateful.'
|
||||
input_shape = self.input_spec[0].shape
|
||||
if not input_shape[0]:
|
||||
raise Exception('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided (including batch size).')
|
||||
raise ValueError('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided (including batch size).')
|
||||
if hasattr(self, 'states'):
|
||||
K.set_value(self.states[0],
|
||||
np.zeros((input_shape[0], self.output_dim)))
|
||||
@@ -510,7 +531,7 @@ class GRU(Recurrent):
|
||||
|
||||
def preprocess_input(self, x):
|
||||
if self.consume_less == 'cpu':
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
input_dim = input_shape[2]
|
||||
timesteps = input_shape[1]
|
||||
|
||||
@@ -555,7 +576,7 @@ class GRU(Recurrent):
|
||||
x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
|
||||
x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
|
||||
else:
|
||||
raise Exception('Unknown `consume_less` mode.')
|
||||
raise ValueError('Unknown `consume_less` mode.')
|
||||
z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
|
||||
r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))
|
||||
|
||||
@@ -574,10 +595,10 @@ class GRU(Recurrent):
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(3)])
|
||||
|
||||
if 0 < self.dropout_W < 1:
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.tile(ones, (1, input_dim))
|
||||
ones = K.tile(ones, (1, int(input_dim)))
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3)]
|
||||
constants.append(B_W)
|
||||
else:
|
||||
@@ -630,7 +651,7 @@ class LSTM(Recurrent):
|
||||
# References
|
||||
- [Long short-term memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf) (original 1997 paper)
|
||||
- [Learning to forget: Continual prediction with LSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015)
|
||||
- [Supervised sequence labelling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf)
|
||||
- [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf)
|
||||
- [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
|
||||
'''
|
||||
def __init__(self, output_dim,
|
||||
@@ -648,7 +669,8 @@ class LSTM(Recurrent):
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.U_regularizer = regularizers.get(U_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.dropout_W, self.dropout_U = dropout_W, dropout_U
|
||||
self.dropout_W = dropout_W
|
||||
self.dropout_U = dropout_U
|
||||
|
||||
if self.dropout_W or self.dropout_U:
|
||||
self.uses_learning_phase = True
|
||||
@@ -665,73 +687,94 @@ class LSTM(Recurrent):
|
||||
self.states = [None, None]
|
||||
|
||||
if self.consume_less == 'gpu':
|
||||
self.W = self.init((self.input_dim, 4 * self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, 4 * self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
self.W = self.add_weight((self.input_dim, 4 * self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U = self.add_weight((self.output_dim, 4 * self.output_dim),
|
||||
initializer=self.inner_init,
|
||||
name='{}_U'.format(self.name),
|
||||
regularizer=self.U_regularizer)
|
||||
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
K.get_value(self.forget_bias_init((self.output_dim,))),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
def b_reg(shape, name=None):
|
||||
return K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
K.get_value(self.forget_bias_init((self.output_dim,))),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
self.b = self.add_weight((self.output_dim * 4,),
|
||||
initializer=b_reg,
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
else:
|
||||
self.W_i = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_i'.format(self.name))
|
||||
self.U_i = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_i'.format(self.name))
|
||||
self.b_i = K.zeros((self.output_dim,), name='{}_b_i'.format(self.name))
|
||||
|
||||
self.W_f = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_f'.format(self.name))
|
||||
self.U_f = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_f'.format(self.name))
|
||||
self.b_f = self.forget_bias_init((self.output_dim,),
|
||||
name='{}_b_f'.format(self.name))
|
||||
|
||||
self.W_c = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_c'.format(self.name))
|
||||
self.U_c = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_c'.format(self.name))
|
||||
self.b_c = K.zeros((self.output_dim,), name='{}_b_c'.format(self.name))
|
||||
|
||||
self.W_o = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_o'.format(self.name))
|
||||
self.U_o = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_o'.format(self.name))
|
||||
self.b_o = K.zeros((self.output_dim,), name='{}_b_o'.format(self.name))
|
||||
self.W_i = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_i'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_i = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_i'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_i = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_i'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_f = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_f'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_f = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_f'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_f = self.add_weight((self.output_dim,),
|
||||
initializer=self.forget_bias_init,
|
||||
name='{}_b_f'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_c = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_c'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_c = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_c'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_c = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_c'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_o = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_o'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_o = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_o'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_o = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_o'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
|
||||
self.trainable_weights = [self.W_i, self.U_i, self.b_i,
|
||||
self.W_c, self.U_c, self.b_c,
|
||||
self.W_f, self.U_f, self.b_f,
|
||||
self.W_o, self.U_o, self.b_o]
|
||||
|
||||
self.W = K.concatenate([self.W_i, self.W_f, self.W_c, self.W_o])
|
||||
self.U = K.concatenate([self.U_i, self.U_f, self.U_c, self.U_o])
|
||||
self.b = K.concatenate([self.b_i, self.b_f, self.b_c, self.b_o])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
self.built = True
|
||||
|
||||
def reset_states(self):
|
||||
assert self.stateful, 'Layer must be stateful.'
|
||||
input_shape = self.input_spec[0].shape
|
||||
if not input_shape[0]:
|
||||
raise Exception('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided (including batch size).')
|
||||
raise ValueError('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided (including batch size).')
|
||||
if hasattr(self, 'states'):
|
||||
K.set_value(self.states[0],
|
||||
np.zeros((input_shape[0], self.output_dim)))
|
||||
@@ -747,7 +790,7 @@ class LSTM(Recurrent):
|
||||
dropout = self.dropout_W
|
||||
else:
|
||||
dropout = 0
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
input_dim = input_shape[2]
|
||||
timesteps = input_shape[1]
|
||||
|
||||
@@ -793,7 +836,7 @@ class LSTM(Recurrent):
|
||||
x_c = K.dot(x * B_W[2], self.W_c) + self.b_c
|
||||
x_o = K.dot(x * B_W[3], self.W_o) + self.b_o
|
||||
else:
|
||||
raise Exception('Unknown `consume_less` mode.')
|
||||
raise ValueError('Unknown `consume_less` mode.')
|
||||
|
||||
i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i))
|
||||
f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f))
|
||||
@@ -814,10 +857,10 @@ class LSTM(Recurrent):
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(4)])
|
||||
|
||||
if 0 < self.dropout_W < 1:
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.tile(ones, (1, input_dim))
|
||||
ones = K.tile(ones, (1, int(input_dim)))
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(4)]
|
||||
constants.append(B_W)
|
||||
else:
|
||||
|
||||
+16
-23
@@ -17,7 +17,7 @@ class Wrapper(Layer):
|
||||
self.trainable_weights = getattr(self.layer, 'trainable_weights', [])
|
||||
self.non_trainable_weights = getattr(self.layer, 'non_trainable_weights', [])
|
||||
self.updates = getattr(self.layer, 'updates', [])
|
||||
self.regularizers = getattr(self.layer, 'regularizers', [])
|
||||
self.losses = getattr(self.layer, 'losses', [])
|
||||
self.constraints = getattr(self.layer, 'constraints', {})
|
||||
|
||||
# properly attribute the current layer to
|
||||
@@ -106,29 +106,17 @@ class TimeDistributed(Wrapper):
|
||||
return (child_output_shape[0], timesteps) + child_output_shape[1:]
|
||||
|
||||
def call(self, X, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(X)
|
||||
if input_shape[0]:
|
||||
# batch size matters, use rnn-based implementation
|
||||
def step(x, states):
|
||||
output = self.layer.call(x)
|
||||
return output, []
|
||||
input_length = input_shape[1]
|
||||
if K.backend() == 'tensorflow' and len(input_shape) > 3:
|
||||
if input_length is None:
|
||||
raise Exception('When using TensorFlow, you should define '
|
||||
'explicitly the number of timesteps of '
|
||||
'your sequences.\n'
|
||||
'If your first layer is an Embedding, '
|
||||
'make sure to pass it an "input_length" '
|
||||
'argument. Otherwise, make sure '
|
||||
'the first layer has '
|
||||
'an "input_shape" or "batch_input_shape" '
|
||||
'argument, including the time axis.')
|
||||
unroll = True
|
||||
else:
|
||||
unroll = False
|
||||
last_output, outputs, states = K.rnn(step, X,
|
||||
initial_states=[], input_length=input_length, unroll=unroll)
|
||||
|
||||
_, outputs, _ = K.rnn(step, X,
|
||||
initial_states=[],
|
||||
input_length=input_shape[1],
|
||||
unroll=False)
|
||||
y = outputs
|
||||
else:
|
||||
# no batch size specified, therefore the layer will be able
|
||||
@@ -137,11 +125,16 @@ class TimeDistributed(Wrapper):
|
||||
input_length = input_shape[1]
|
||||
if not input_length:
|
||||
input_length = K.shape(X)[1]
|
||||
X = K.reshape(X, (-1, ) + input_shape[2:]) # (nb_samples * timesteps, ...)
|
||||
X = K.reshape(X, (-1,) + input_shape[2:]) # (nb_samples * timesteps, ...)
|
||||
y = self.layer.call(X) # (nb_samples * timesteps, ...)
|
||||
# (nb_samples, timesteps, ...)
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
y = K.reshape(y, (-1, input_length) + output_shape[2:])
|
||||
|
||||
# Apply activity regularizer if any:
|
||||
if hasattr(self.layer, 'activity_regularizer') and self.layer.activity_regularizer is not None:
|
||||
regularization_loss = self.layer.activity_regularizer(y)
|
||||
self.add_loss(regularization_loss, X)
|
||||
return y
|
||||
|
||||
|
||||
@@ -258,9 +251,9 @@ class Bidirectional(Wrapper):
|
||||
return []
|
||||
|
||||
@property
|
||||
def regularizers(self):
|
||||
if hasattr(self.forward_layer, 'regularizers'):
|
||||
return self.forward_layer.regularizers + self.backward_layer.regularizers
|
||||
def losses(self):
|
||||
if hasattr(self.forward_layer, 'losses'):
|
||||
return self.forward_layer.losses + self.backward_layer.losses
|
||||
return []
|
||||
|
||||
@property
|
||||
|
||||
@@ -1,777 +0,0 @@
|
||||
from collections import OrderedDict
|
||||
import warnings
|
||||
import copy
|
||||
|
||||
from .. import backend as K
|
||||
from ..layers import InputLayer, Layer, Merge
|
||||
from ..engine.training import Model
|
||||
|
||||
|
||||
class Graph(Model):
|
||||
'''Arbitrary connection graph.
|
||||
|
||||
THIS IS A LEGACY MODEL AND SHOULD NOT BE USED
|
||||
except for backwards compatibility support.
|
||||
|
||||
For multi-inputs/multi-outputs models, or
|
||||
models using shared layers, use the functional API instead.
|
||||
'''
|
||||
|
||||
def __init__(self, name=None):
|
||||
# model attributes
|
||||
self.inbound_nodes = []
|
||||
self.outbound_nodes = []
|
||||
self.built = False
|
||||
self.supports_masking = False
|
||||
|
||||
# legacy attributes (we prefix them with _graph_)
|
||||
self._graph_namespace = set() # strings
|
||||
self._graph_nodes = OrderedDict() # layer-like
|
||||
self._graph_inputs = OrderedDict() # layer-like
|
||||
self._graph_outputs = OrderedDict() # layer-like
|
||||
self._graph_input_config = [] # dicts
|
||||
self._graph_output_config = [] # dicts
|
||||
self._graph_node_config = [] # dicts
|
||||
self._graph_shared_nodes_names = []
|
||||
|
||||
if not name:
|
||||
prefix = 'graph_'
|
||||
name = prefix + str(K.get_uid(prefix))
|
||||
self.name = name
|
||||
|
||||
def __call__(self, x, mask=None):
|
||||
self.build()
|
||||
return super(Graph, self).__call__(x, mask)
|
||||
|
||||
def build(self, input_shape=None):
|
||||
# this will crash if the input/output layers have multiple nodes
|
||||
# no plans to support that case since Graph is deprecated
|
||||
input_tensors = [layer.output for layer in self._graph_inputs.values()]
|
||||
output_tensors = [layer.output for layer in self._graph_outputs.values()]
|
||||
# actually create the model
|
||||
super(Graph, self).__init__(input_tensors,
|
||||
output_tensors,
|
||||
name=self.name)
|
||||
self.built = True
|
||||
|
||||
def compile(self, optimizer, loss,
|
||||
metrics=[],
|
||||
sample_weight_modes=None,
|
||||
loss_weights=None,
|
||||
**kwargs):
|
||||
'''Configures the learning process.
|
||||
|
||||
# Arguments
|
||||
optimizer: str (name of optimizer) or optimizer object.
|
||||
See [optimizers](optimizers.md).
|
||||
loss: dictionary mapping the name(s) of the output(s) to
|
||||
a loss function (string name of objective function or
|
||||
objective function. See [objectives](objectives.md)).
|
||||
metrics: list of str (name of metrics) or
|
||||
list of metrics functions. See [metrics](metrics.md).
|
||||
sample_weight_modes: optional dictionary mapping certain
|
||||
output names to a sample weight mode ("temporal" and None
|
||||
are the only supported modes). If you need to do
|
||||
timestep-wise loss weighting on one of your graph outputs,
|
||||
you will need to set the sample weight mode for this output
|
||||
to "temporal".
|
||||
loss_weights: dictionary you can pass to specify a weight
|
||||
coefficient for each loss function (in a multi-output model).
|
||||
If no loss weight is specified for an output,
|
||||
the weight for this output's loss will be considered to be 1.
|
||||
kwargs: for Theano backend, these are passed into K.function.
|
||||
Ignored for Tensorflow backend.
|
||||
'''
|
||||
# create the underlying Model
|
||||
if not self.built:
|
||||
self.build()
|
||||
super(Graph, self).compile(optimizer, loss,
|
||||
metrics=metrics,
|
||||
sample_weight_mode=sample_weight_modes,
|
||||
loss_weights=loss_weights,
|
||||
**kwargs)
|
||||
|
||||
def add_input(self, name, input_shape=None,
|
||||
batch_input_shape=None, dtype='float'):
|
||||
'''Adds an input to the graph.
|
||||
|
||||
# Arguments:
|
||||
name: string. The name of the new input.
|
||||
Must be unique in the graph.
|
||||
input_shape: a tuple of integers,
|
||||
the expected shape of the input samples.
|
||||
Does not include the batch size.
|
||||
batch_input_shape: a tuple of integers,
|
||||
the expected shape of the whole input batch,
|
||||
including the batch size.
|
||||
dtype: 'float', or 'int'.
|
||||
'''
|
||||
if name in self._graph_namespace:
|
||||
raise Exception('Duplicate node identifier: ' + name)
|
||||
self._graph_namespace.add(name)
|
||||
self.built = False
|
||||
|
||||
if dtype[:3] == 'int':
|
||||
dtype = 'int32'
|
||||
elif dtype[:5] == 'float':
|
||||
dtype = K.floatx()
|
||||
else:
|
||||
raise Exception('Uknown dtype (should be "int" or "float"): ' +
|
||||
str(dtype))
|
||||
|
||||
# create input layer
|
||||
input_layer = InputLayer(input_shape=input_shape,
|
||||
batch_input_shape=batch_input_shape,
|
||||
name=name, input_dtype=dtype)
|
||||
self._graph_inputs[name] = input_layer
|
||||
|
||||
# append input config to self._graph_input_config
|
||||
config = {'name': name, 'dtype': dtype}
|
||||
if batch_input_shape:
|
||||
config['batch_input_shape'] = batch_input_shape
|
||||
else:
|
||||
config['input_shape'] = input_shape
|
||||
self._graph_input_config.append(config)
|
||||
|
||||
def add_node(self, layer, name, input=None, inputs=[],
|
||||
merge_mode='concat', concat_axis=-1, dot_axes=-1,
|
||||
create_output=False):
|
||||
'''Adds a node in the graph. It can be connected to multiple
|
||||
inputs, which will first be merged into one tensor
|
||||
according to the mode specified.
|
||||
|
||||
# Arguments
|
||||
layer: the layer at the node.
|
||||
name: name for the node.
|
||||
input: when connecting the layer to a single input,
|
||||
this is the name of the incoming node.
|
||||
inputs: when connecting the layer to multiple inputs,
|
||||
this is a list of names of incoming nodes.
|
||||
merge_mode: one of {concat, sum, dot, ave, mul}
|
||||
concat_axis: when `merge_mode=='concat'`, this is the
|
||||
input concatenation axis.
|
||||
dot_axes: when `merge_mode='dot'`,
|
||||
this is the contraction axes specification;
|
||||
see the `Merge` layer for details.
|
||||
create_output: boolean. Set this to `True` if you want the output
|
||||
of your node to be an output of the graph.
|
||||
'''
|
||||
if name in self._graph_namespace:
|
||||
raise Exception('Duplicate node identifier: ' + name)
|
||||
self._graph_namespace.add(name)
|
||||
layer.name = name
|
||||
self.built = False
|
||||
|
||||
if input:
|
||||
if input not in self._graph_namespace:
|
||||
raise Exception('Unknown node/input identifier: ' + input)
|
||||
if input in self._graph_nodes:
|
||||
layer.add_inbound_node(self._graph_nodes[input])
|
||||
elif input in self._graph_inputs:
|
||||
layer.add_inbound_node(self._graph_inputs[input])
|
||||
if inputs:
|
||||
to_merge = []
|
||||
for n in inputs:
|
||||
if n in self._graph_nodes:
|
||||
to_merge.append(self._graph_nodes[n])
|
||||
elif n in self._graph_inputs:
|
||||
to_merge.append(self._graph_inputs[n])
|
||||
else:
|
||||
raise Exception('Unknown identifier: ' + n)
|
||||
merge = Merge(to_merge, mode=merge_mode,
|
||||
concat_axis=concat_axis, dot_axes=dot_axes,
|
||||
name='merge_inputs_for_' + name)
|
||||
layer.add_inbound_node(merge)
|
||||
self._graph_nodes[name] = layer
|
||||
self._graph_node_config.append({'name': name,
|
||||
'input': input,
|
||||
'inputs': inputs,
|
||||
'merge_mode': merge_mode,
|
||||
'concat_axis': concat_axis,
|
||||
'dot_axes': dot_axes,
|
||||
'create_output': create_output})
|
||||
if create_output:
|
||||
self.add_output(name, input=name)
|
||||
|
||||
def add_shared_node(self, layer, name, inputs=[], merge_mode=None,
|
||||
concat_axis=-1, dot_axes=-1, outputs=[],
|
||||
create_output=False):
|
||||
'''Used to share a same layer across multiple nodes.
|
||||
|
||||
Supposed, for instance, that you want to apply one same `Dense` layer
|
||||
after two different nodes ('node_a' and 'node_b').
|
||||
You can then add the dense layer as a shared node by calling:
|
||||
|
||||
```python
|
||||
model.add_shared_node(my_dense, name='shared_dense', inputs=['node_a', 'node_b'], ...)
|
||||
```
|
||||
|
||||
If you want access to the output of dense(node_a) and dense(node_b) separately,
|
||||
you can add these outputs to the Graph by passing an `outputs` argument:
|
||||
|
||||
```python
|
||||
model.add_shared_node(my_dense, name='shared_dense', inputs=['node_a', 'node_b'],
|
||||
outputs=['dense_output_a', 'dense_outputs_b'])
|
||||
```
|
||||
|
||||
Otherwise you can merge these different outputs via `merge_mode`.
|
||||
In that case you can access the merged output
|
||||
under the identifier `name`.
|
||||
|
||||
# Arguments
|
||||
layer: The layer to be shared across multiple inputs
|
||||
name: Name of the shared node
|
||||
inputs: List of names of input nodes
|
||||
merge_mode: Same meaning as `merge_mode` argument of `add_node()`
|
||||
concat_axis: Same meaning as `concat_axis` argument of `add_node()`
|
||||
dot_axes: Same meaning as `dot_axes` argument of `add_node()`
|
||||
outputs: Used when `merge_mode=None`. Names for the output nodes.
|
||||
create_output: Same meaning as `create_output` argument of `add_node()`.
|
||||
'''
|
||||
if name in self._graph_namespace:
|
||||
raise Exception('Duplicate node identifier: ' + name)
|
||||
self._graph_namespace.add(name)
|
||||
self.built = False
|
||||
|
||||
for o in outputs:
|
||||
if o in self._graph_namespace:
|
||||
raise Exception('Duplicate node identifier: ' + o)
|
||||
if merge_mode:
|
||||
if merge_mode not in {'sum', 'ave', 'mul', 'dot', 'cos', 'concat'}:
|
||||
raise Exception('Invalid merge mode:', merge_mode)
|
||||
input_layers = []
|
||||
for i in range(len(inputs)):
|
||||
input = inputs[i]
|
||||
if input in self._graph_nodes:
|
||||
n = self._graph_nodes[input]
|
||||
input_layers.append(n)
|
||||
elif input in self._graph_inputs:
|
||||
n = self._graph_inputs[input]
|
||||
input_layers.append(n)
|
||||
else:
|
||||
raise Exception('Unknown identifier: ' + input)
|
||||
|
||||
created_node_indices = []
|
||||
for input_layer in input_layers:
|
||||
created_node_indices.append(len(layer.inbound_nodes))
|
||||
layer.add_inbound_node(input_layer)
|
||||
|
||||
if merge_mode:
|
||||
layer.name = 'input_for_' + name
|
||||
# collect all output nodes of layer and merge them into a single output
|
||||
merge = Merge([layer for _ in range(len(inputs))],
|
||||
mode=merge_mode,
|
||||
concat_axis=concat_axis, dot_axes=dot_axes,
|
||||
node_indices=created_node_indices,
|
||||
name=name)
|
||||
self._graph_nodes[name] = merge
|
||||
if create_output:
|
||||
self.add_output(name, input=name)
|
||||
else:
|
||||
layer.name = name
|
||||
# create one new layer per output node of layer,
|
||||
# and add them to the Graph with their own identifiers
|
||||
if len(outputs) != len(inputs):
|
||||
raise Exception('When using merge_mode=None, '
|
||||
'you should provide a list of '
|
||||
'output names (`output` argument) '
|
||||
'the same size as `input`.')
|
||||
for i in range(len(outputs)):
|
||||
output_layer_name = outputs[i]
|
||||
output_layer = Layer(name=output_layer_name)
|
||||
output_layer.add_inbound_node(layer, created_node_indices[i])
|
||||
self._graph_namespace.add(output_layer_name)
|
||||
self._graph_nodes[output_layer_name] = output_layer
|
||||
if create_output:
|
||||
self.add_output(output_layer_name, input=output_layer_name)
|
||||
|
||||
self._graph_node_config.append({'name': name,
|
||||
'layer': {
|
||||
'config': layer.get_config(),
|
||||
'class_name': layer.__class__.__name__,
|
||||
},
|
||||
'inputs': inputs,
|
||||
'merge_mode': merge_mode,
|
||||
'concat_axis': concat_axis,
|
||||
'dot_axes': dot_axes,
|
||||
'outputs': outputs,
|
||||
'create_output': create_output if merge_mode else False})
|
||||
self._graph_shared_nodes_names.append(name)
|
||||
|
||||
def add_output(self, name, input=None, inputs=[],
|
||||
merge_mode='concat', concat_axis=-1, dot_axes=-1):
|
||||
'''Adds an output to the graph.
|
||||
|
||||
This output can merge several node outputs into a single output.
|
||||
|
||||
# Arguments
|
||||
name: name of the output.
|
||||
input: when connecting the layer to a single input,
|
||||
this is the name of the incoming node.
|
||||
inputs: when connecting the layer to multiple inputs,
|
||||
this is a list of names of incoming nodes.
|
||||
merge_mode: one of {concat, sum, dot, ave, mul}
|
||||
concat_axis: when `merge_mode=='concat'`, this is the
|
||||
input concatenation axis.
|
||||
dot_axes: when `merge_mode='dot'`,
|
||||
this is the contraction axes specification;
|
||||
see the `Merge layer for details.
|
||||
'''
|
||||
if name not in self._graph_namespace:
|
||||
self._graph_namespace.add(name)
|
||||
if name in self._graph_outputs:
|
||||
raise Exception('Duplicate output identifier:', name)
|
||||
self.built = False
|
||||
|
||||
if input:
|
||||
if input in self._graph_nodes:
|
||||
layer = self._graph_nodes[input]
|
||||
elif input in self._graph_inputs:
|
||||
layer = self._graph_inputs[input]
|
||||
else:
|
||||
raise Exception('Unknown node/input identifier: ' + input)
|
||||
if layer.name == name:
|
||||
self._graph_outputs[name] = layer
|
||||
else:
|
||||
layer.name = name
|
||||
self._graph_outputs[name] = layer
|
||||
if inputs:
|
||||
to_merge = []
|
||||
for n in inputs:
|
||||
if n not in self._graph_nodes:
|
||||
raise Exception('Unknown identifier: ' + n)
|
||||
to_merge.append(self._graph_nodes[n])
|
||||
merge = Merge(to_merge, mode=merge_mode,
|
||||
concat_axis=concat_axis, dot_axes=dot_axes,
|
||||
name=name)
|
||||
self._graph_outputs[name] = merge
|
||||
|
||||
self._graph_output_config.append({'name': name,
|
||||
'input': input,
|
||||
'inputs': inputs,
|
||||
'merge_mode': merge_mode,
|
||||
'concat_axis': concat_axis,
|
||||
'dot_axes': dot_axes})
|
||||
|
||||
def _get_x(self, data):
|
||||
x = []
|
||||
for key in self._graph_inputs.keys():
|
||||
if key not in data:
|
||||
raise Exception('Expected to be provided an array '
|
||||
'(in dict argument `data`) for input "' +
|
||||
key + '".')
|
||||
x.append(data[key])
|
||||
return x
|
||||
|
||||
def _get_y(self, data):
|
||||
y = []
|
||||
for key in self._graph_outputs.keys():
|
||||
if key not in data:
|
||||
raise Exception('Expected to be provided an array '
|
||||
'(in dict argument `data`) for output "' +
|
||||
key + '".')
|
||||
y.append(data[key])
|
||||
return y
|
||||
|
||||
def fit(self, data, batch_size=32, nb_epoch=10, verbose=1, callbacks=[],
|
||||
validation_split=0., validation_data=None, shuffle=True,
|
||||
class_weight=None, sample_weight=None, **kwargs):
|
||||
'''Trains the model for a fixed number of epochs.
|
||||
|
||||
Returns a history object. Its `history` attribute is a record of
|
||||
training loss values at successive epochs,
|
||||
as well as validation loss values (if applicable).
|
||||
|
||||
# Arguments
|
||||
data: dictionary mapping input names and outputs names to
|
||||
appropriate Numpy arrays. All arrays should contain
|
||||
the same number of samples.
|
||||
batch_size: int. Number of samples per gradient update.
|
||||
nb_epoch: int.
|
||||
verbose: 0 for no logging to stdout,
|
||||
1 for progress bar logging, 2 for one log line per epoch.
|
||||
callbacks: `keras.callbacks.Callback` list. List of callbacks
|
||||
to apply during training. See [callbacks](callbacks.md).
|
||||
validation_split: float (0. < x < 1). Fraction of the data to
|
||||
use as held-out validation data.
|
||||
validation_data: dictionary mapping input names and outputs names
|
||||
to appropriate Numpy arrays to be used as
|
||||
held-out validation data.
|
||||
All arrays should contain the same number of samples.
|
||||
Will override validation_split.
|
||||
shuffle: boolean. Whether to shuffle the samples at each epoch.
|
||||
class_weight: dictionary mapping output names to
|
||||
class weight dictionaries.
|
||||
sample_weight: dictionary mapping output names to
|
||||
numpy arrays of sample weights.
|
||||
'''
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
'instead you should pass the "accuracy" metric to '
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
|
||||
if type(validation_data) is tuple:
|
||||
raise Exception('Cannot used sample_weight with '
|
||||
'validation data with legacy Graph model. '
|
||||
'validation_data should be a dictionary.')
|
||||
if validation_data:
|
||||
val_x = self._get_x(validation_data)
|
||||
val_y = self._get_y(validation_data)
|
||||
validation_data = (val_x, val_y)
|
||||
return super(Graph, self).fit(x, y,
|
||||
batch_size=batch_size,
|
||||
nb_epoch=nb_epoch,
|
||||
verbose=verbose,
|
||||
callbacks=callbacks,
|
||||
validation_split=validation_split,
|
||||
validation_data=validation_data,
|
||||
shuffle=shuffle,
|
||||
class_weight=class_weight,
|
||||
sample_weight=sample_weight)
|
||||
|
||||
def evaluate(self, data, batch_size=128,
|
||||
verbose=0, sample_weight={}, **kwargs):
|
||||
'''Computes the loss on some input data, batch by batch.
|
||||
|
||||
Returns the scalar test loss over the data,
|
||||
or a list of metrics values (starting with the test loss)
|
||||
if applicable.
|
||||
|
||||
Arguments: see `fit` method.
|
||||
'''
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
'instead you should pass the "accuracy" metric to '
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
return super(Graph, self).evaluate(x, y,
|
||||
batch_size=batch_size,
|
||||
verbose=verbose,
|
||||
sample_weight=sample_weight)
|
||||
|
||||
def predict(self, data, batch_size=128, verbose=0):
|
||||
'''Generates output predictions for the input samples
|
||||
batch by batch.
|
||||
|
||||
Arguments: see `fit` method.
|
||||
'''
|
||||
x = self._get_x(data)
|
||||
output_list = super(Graph, self).predict(x, batch_size=batch_size,
|
||||
verbose=verbose)
|
||||
if not isinstance(output_list, list):
|
||||
output_list = [output_list]
|
||||
return dict(zip(self._graph_outputs, output_list))
|
||||
|
||||
def train_on_batch(self, data,
|
||||
class_weight={},
|
||||
sample_weight={}, **kwargs):
|
||||
'''Single gradient update on a batch of samples.
|
||||
|
||||
Returns the scalar train loss over the data,
|
||||
or a list of metrics values (starting with the test loss)
|
||||
if applicable.
|
||||
|
||||
Arguments: see `fit` method.
|
||||
'''
|
||||
if 'accuracy' in kwargs:
|
||||
kwargs.pop('accuracy')
|
||||
warnings.warn('The "accuracy" argument is deprecated, '
|
||||
'instead you should pass the "accuracy" metric to '
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
return super(Graph, self).train_on_batch(x, y,
|
||||
sample_weight=sample_weight,
|
||||
class_weight=class_weight)
|
||||
|
||||
def test_on_batch(self, data, sample_weight={}, **kwargs):
|
||||
'''Test the network on a single batch of samples.
|
||||
|
||||
Returns the scalar test loss over the data,
|
||||
or a list of metrics values (starting with the test loss)
|
||||
if applicable.
|
||||
|
||||
Arguments: see `fit` method.
|
||||
'''
|
||||
if 'accuracy' in kwargs:
|
||||
kwargs.pop('accuracy')
|
||||
warnings.warn('The "accuracy" argument is deprecated, '
|
||||
'instead you should pass the "accuracy" metric to '
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
return super(Graph, self).test_on_batch(x, y,
|
||||
sample_weight=sample_weight)
|
||||
|
||||
def predict_on_batch(self, data):
|
||||
output_list = super(Graph, self).predict_on_batch(data)
|
||||
if not isinstance(output_list, list):
|
||||
output_list = [output_list]
|
||||
return dict(zip(self._graph_outputs, output_list))
|
||||
|
||||
def fit_generator(self, generator, samples_per_epoch, nb_epoch,
|
||||
verbose=1, callbacks=[],
|
||||
validation_data=None, nb_val_samples=None,
|
||||
class_weight={},
|
||||
max_q_size=10, nb_worker=1,
|
||||
pickle_safe=False, **kwargs):
|
||||
'''Fits a model on data generated batch-by-batch by a Python generator.
|
||||
The generator is run in parallel to the model, for efficiency.
|
||||
For instance, this allows you to do real-time data augmentation
|
||||
on images on CPU in parallel to training your model on GPU.
|
||||
|
||||
# Arguments
|
||||
generator: a generator.
|
||||
The output of the generator must be either a tuple
|
||||
of dictionaries `(input_data, sample_weight)`
|
||||
or a dictionary `input_data`
|
||||
(mapping names of inputs and outputs to Numpy arrays).
|
||||
All arrays should contain the same number of samples.
|
||||
The generator is expected to loop over its data
|
||||
indefinitely. An epoch finishes when `samples_per_epoch`
|
||||
samples have been seen by the model.
|
||||
samples_per_epoch: integer, number of samples to process before
|
||||
going to the next epoch.
|
||||
nb_epoch: integer, total number of iterations on the data.
|
||||
verbose: verbosity mode, 0, 1, or 2.
|
||||
callbacks: list of callbacks to be called during training.
|
||||
validation_data: dictionary mapping input names and outputs names
|
||||
to appropriate Numpy arrays to be used as
|
||||
held-out validation data, or a generator yielding such
|
||||
dictionaries. All arrays should contain the same number
|
||||
of samples. If a generator, will be called until more than
|
||||
`nb_val_samples` examples have been generated at the
|
||||
end of every epoch. These examples will then be used
|
||||
as the validation data.
|
||||
nb_val_samples: number of samples to use from validation
|
||||
generator at the end of every epoch.
|
||||
class_weight: dictionary mapping class indices to a weight
|
||||
for the class.
|
||||
|
||||
# Returns
|
||||
A `History` object.
|
||||
|
||||
# Examples
|
||||
|
||||
```python
|
||||
def generate_arrays_from_file(path):
|
||||
while 1:
|
||||
f = open(path)
|
||||
for line in f:
|
||||
# create Numpy arrays of input data
|
||||
# and labels, from each line in the file
|
||||
x1, x2, y = process_line(line)
|
||||
yield ({'input_1': x1, 'input_2': x2, 'output': y})
|
||||
f.close()
|
||||
|
||||
graph.fit_generator(generate_arrays_from_file('/my_file.txt'),
|
||||
samples_per_epoch=10000, nb_epoch=10)
|
||||
```
|
||||
'''
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
'instead you should pass the "accuracy" metric to '
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if 'nb_val_worker' in kwargs:
|
||||
kwargs.pop('nb_val_worker')
|
||||
warnings.warn('The "nb_val_worker" argument is deprecated, '
|
||||
'please remove it from your code.')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
|
||||
self._train_on_batch = self.train_on_batch
|
||||
self.train_on_batch = super(Graph, self).train_on_batch
|
||||
self._evaluate = self.evaluate
|
||||
self.evaluate = super(Graph, self).evaluate
|
||||
|
||||
if validation_data and type(validation_data) is tuple:
|
||||
raise Exception('Cannot use sample_weight with '
|
||||
'validation_data in legacy Graph model.')
|
||||
if validation_data and type(validation_data) is dict:
|
||||
validation_data = (self._get_x(validation_data),
|
||||
self._get_y(validation_data))
|
||||
|
||||
original_generator = generator
|
||||
|
||||
def fixed_generator():
|
||||
while 1:
|
||||
data = next(original_generator)
|
||||
if type(data) is tuple:
|
||||
data, sample_weight = data
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
yield x, y, sample_weight
|
||||
else:
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
yield x, y
|
||||
|
||||
generator = fixed_generator()
|
||||
history = super(Graph, self).fit_generator(generator,
|
||||
samples_per_epoch,
|
||||
nb_epoch,
|
||||
verbose=verbose,
|
||||
callbacks=callbacks,
|
||||
validation_data=validation_data,
|
||||
nb_val_samples=nb_val_samples,
|
||||
class_weight=class_weight,
|
||||
max_q_size=max_q_size,
|
||||
nb_worker=nb_worker,
|
||||
pickle_safe=pickle_safe)
|
||||
self.train_on_batch = self._train_on_batch
|
||||
self.evaluate = self._evaluate
|
||||
return history
|
||||
|
||||
def evaluate_generator(self, generator, val_samples,
|
||||
verbose=1, max_q_size=10, nb_worker=1,
|
||||
pickle_safe=False, **kwargs):
|
||||
'''Evaluates the model on a generator. The generator should
|
||||
return the same kind of data with every yield as accepted
|
||||
by `evaluate`.
|
||||
|
||||
If `show_accuracy`, it returns a tuple `(loss, accuracy)`,
|
||||
otherwise it returns the loss value.
|
||||
|
||||
Arguments:
|
||||
generator:
|
||||
generator yielding dictionaries of the kind accepted
|
||||
by `evaluate`, or tuples of such dictionaries and
|
||||
associated dictionaries of sample weights.
|
||||
val_samples:
|
||||
total number of samples to generate from `generator`
|
||||
to use in validation.
|
||||
|
||||
Other arguments are the same as for `fit`.
|
||||
'''
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
'instead you should pass the "accuracy" metric to '
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if 'verbose' in kwargs:
|
||||
kwargs.pop('verbose')
|
||||
warnings.warn('The "verbose" argument is deprecated.')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
|
||||
self._test_on_batch = self.test_on_batch
|
||||
self.test_on_batch = super(Graph, self).test_on_batch
|
||||
|
||||
original_generator = generator
|
||||
|
||||
def fixed_generator():
|
||||
while 1:
|
||||
data = next(original_generator)
|
||||
if type(data) is tuple:
|
||||
data, sample_weight = data
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
yield x, y, sample_weight
|
||||
else:
|
||||
x = self._get_x(data)
|
||||
y = self._get_y(data)
|
||||
yield x, y
|
||||
|
||||
generator = fixed_generator()
|
||||
history = super(Graph, self).evaluate_generator(generator,
|
||||
val_samples,
|
||||
max_q_size=max_q_size,
|
||||
nb_worker=nb_worker,
|
||||
pickle_safe=pickle_safe)
|
||||
self.test_on_batch = self._test_on_batch
|
||||
return history
|
||||
|
||||
# get_weights, set_weights: inherited
|
||||
def get_config(self):
|
||||
config = {'input_config': self._graph_input_config,
|
||||
'node_config': self._graph_node_config,
|
||||
'output_config': self._graph_output_config}
|
||||
nodes = {}
|
||||
for name, node in self._graph_nodes.items():
|
||||
nodes[name] = {'class_name': node.__class__.__name__,
|
||||
'config': node.get_config()}
|
||||
if name in self._graph_shared_nodes_names:
|
||||
nodes[name]['shared'] = True
|
||||
config['nodes'] = nodes
|
||||
return copy.deepcopy(config)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config):
|
||||
# TODO: test legacy support
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
|
||||
def normalize_legacy_config(conf):
|
||||
if 'class_name' not in conf:
|
||||
class_name = conf['name']
|
||||
name = conf.get('custom_name')
|
||||
conf['name'] = name
|
||||
new_config = {
|
||||
'class_name': class_name,
|
||||
'config': conf,
|
||||
}
|
||||
return new_config
|
||||
return conf
|
||||
|
||||
graph = cls()
|
||||
inputs = config.get('input_config')
|
||||
for input in inputs:
|
||||
graph.add_input(**input)
|
||||
|
||||
nodes = config.get('node_config')
|
||||
for node in nodes:
|
||||
layer_config = config['nodes'][node['name']]
|
||||
layer_config = normalize_legacy_config(layer_config)
|
||||
if 'layer' in node:
|
||||
# for add_shared_node
|
||||
node['layer'] = layer_from_config(node['layer'])
|
||||
else:
|
||||
layer = layer_from_config(layer_config)
|
||||
node['layer'] = layer
|
||||
|
||||
node['create_output'] = False # outputs will be added below
|
||||
if layer_config.get('shared'):
|
||||
graph.add_shared_node(**node)
|
||||
else:
|
||||
graph.add_node(**node)
|
||||
|
||||
outputs = config.get('output_config')
|
||||
for output in outputs:
|
||||
graph.add_output(**output)
|
||||
return graph
|
||||
|
||||
def load_weights(self, fname):
|
||||
if not self.built:
|
||||
self.build()
|
||||
super(Graph, self).load_weights(fname)
|
||||
+52
-36
@@ -5,14 +5,14 @@ from .utils.generic_utils import get_from_module
|
||||
|
||||
def binary_accuracy(y_true, y_pred):
|
||||
'''Calculates the mean accuracy rate across all predictions for binary
|
||||
classification problems
|
||||
classification problems.
|
||||
'''
|
||||
return K.mean(K.equal(y_true, K.round(y_pred)))
|
||||
|
||||
|
||||
def categorical_accuracy(y_true, y_pred):
|
||||
'''Calculates the mean accuracy rate across all predictions for
|
||||
multiclass classification problems
|
||||
multiclass classification problems.
|
||||
'''
|
||||
return K.mean(K.equal(K.argmax(y_true, axis=-1),
|
||||
K.argmax(y_pred, axis=-1)))
|
||||
@@ -20,7 +20,7 @@ def categorical_accuracy(y_true, y_pred):
|
||||
|
||||
def sparse_categorical_accuracy(y_true, y_pred):
|
||||
'''Same as categorical_accuracy, but useful when the predictions are for
|
||||
sparse targets
|
||||
sparse targets.
|
||||
'''
|
||||
return K.mean(K.equal(K.max(y_true, axis=-1),
|
||||
K.cast(K.argmax(y_pred, axis=-1), K.floatx())))
|
||||
@@ -28,36 +28,36 @@ def sparse_categorical_accuracy(y_true, y_pred):
|
||||
|
||||
def top_k_categorical_accuracy(y_true, y_pred, k=5):
|
||||
'''Calculates the top-k categorical accuracy rate, i.e. success when the
|
||||
target class is within the top-k predictions provided
|
||||
target class is within the top-k predictions provided.
|
||||
'''
|
||||
return K.mean(K.in_top_k(y_pred, K.argmax(y_true, axis=-1), k))
|
||||
|
||||
|
||||
def mean_squared_error(y_true, y_pred):
|
||||
'''Calculates the mean squared error (mse) rate between predicted and target
|
||||
values
|
||||
'''Calculates the mean squared error (mse) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
return K.mean(K.square(y_pred - y_true))
|
||||
|
||||
|
||||
def mean_absolute_error(y_true, y_pred):
|
||||
'''Calculates the mean absolute error (mae) rate between predicted and target
|
||||
values
|
||||
'''Calculates the mean absolute error (mae) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
return K.mean(K.abs(y_pred - y_true))
|
||||
|
||||
|
||||
def mean_absolute_percentage_error(y_true, y_pred):
|
||||
'''Calculates the mean absolute percentage error (mape) rate between predicted
|
||||
and target values
|
||||
'''Calculates the mean absolute percentage error (mape) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), np.inf))
|
||||
return 100. * K.mean(diff)
|
||||
|
||||
|
||||
def mean_squared_logarithmic_error(y_true, y_pred):
|
||||
'''Calculates the mean squared logarithmic error (msle) rate between predicted
|
||||
and target values
|
||||
'''Calculates the mean squared logarithmic error (msle) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.)
|
||||
second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.)
|
||||
@@ -66,13 +66,13 @@ def mean_squared_logarithmic_error(y_true, y_pred):
|
||||
|
||||
def hinge(y_true, y_pred):
|
||||
'''Calculates the hinge loss, which is defined as
|
||||
`max(1 - y_true * y_pred, 0)`
|
||||
`max(1 - y_true * y_pred, 0)`.
|
||||
'''
|
||||
return K.mean(K.maximum(1. - y_true * y_pred, 0.))
|
||||
|
||||
|
||||
def squared_hinge(y_true, y_pred):
|
||||
'''Calculates the squared value of the hinge loss
|
||||
'''Calculates the squared value of the hinge loss.
|
||||
'''
|
||||
return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)))
|
||||
|
||||
@@ -104,7 +104,7 @@ def binary_crossentropy(y_true, y_pred):
|
||||
|
||||
def kullback_leibler_divergence(y_true, y_pred):
|
||||
'''Calculates the Kullback-Leibler (KL) divergence between prediction
|
||||
and target values
|
||||
and target values.
|
||||
'''
|
||||
y_true = K.clip(y_true, K.epsilon(), 1)
|
||||
y_pred = K.clip(y_pred, K.epsilon(), 1)
|
||||
@@ -148,11 +148,31 @@ def matthews_correlation(y_true, y_pred):
|
||||
return numerator / (denominator + K.epsilon())
|
||||
|
||||
|
||||
def fbeta_score(y_true, y_pred, beta=1):
|
||||
'''Computes the F score, the weighted harmonic mean of precision and recall.
|
||||
def precision(y_true, y_pred):
|
||||
'''Calculates the precision, a metric for multi-label classification of
|
||||
how many selected items are relevant.
|
||||
'''
|
||||
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
||||
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
|
||||
precision = true_positives / (predicted_positives + K.epsilon())
|
||||
return precision
|
||||
|
||||
This is useful for multi-label classification where input samples can be
|
||||
tagged with a set of labels. By only using accuracy (precision) a model
|
||||
|
||||
def recall(y_true, y_pred):
|
||||
'''Calculates the recall, a metric for multi-label classification of
|
||||
how many relevant items are selected.
|
||||
'''
|
||||
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
||||
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
|
||||
recall = true_positives / (possible_positives + K.epsilon())
|
||||
return recall
|
||||
|
||||
|
||||
def fbeta_score(y_true, y_pred, beta=1):
|
||||
'''Calculates the F score, the weighted harmonic mean of precision and recall.
|
||||
|
||||
This is useful for multi-label classification, where input samples can be
|
||||
classified as sets of labels. By only using accuracy (precision) a model
|
||||
would achieve a perfect score by simply assigning every class to every
|
||||
input. In order to avoid this, a metric should penalize incorrect class
|
||||
assignments as well (recall). The F-beta score (ranged from 0.0 to 1.0)
|
||||
@@ -162,30 +182,25 @@ def fbeta_score(y_true, y_pred, beta=1):
|
||||
With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning
|
||||
correct classes becomes more important, and with beta > 1 the metric is
|
||||
instead weighted towards penalizing incorrect class assignments.
|
||||
|
||||
'''
|
||||
if beta < 0:
|
||||
raise ValueError('The lowest choosable beta is zero (only precision).')
|
||||
|
||||
# Count positive samples.
|
||||
c1 = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
||||
c2 = K.sum(K.round(K.clip(y_pred, 0, 1)))
|
||||
c3 = K.sum(K.round(K.clip(y_true, 0, 1)))
|
||||
|
||||
# If there are no true samples, fix the F score at 0.
|
||||
if c3 == 0:
|
||||
|
||||
# If there are no true positives, fix the F score at 0 like sklearn.
|
||||
if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
|
||||
return 0
|
||||
|
||||
# How many selected items are relevant?
|
||||
precision = c1 / c2
|
||||
p = precision(y_true, y_pred)
|
||||
r = recall(y_true, y_pred)
|
||||
bb = beta ** 2
|
||||
fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
|
||||
return fbeta_score
|
||||
|
||||
# How many relevant items are selected?
|
||||
recall = c1 / c3
|
||||
|
||||
# Weight precision and recall together as a single scalar.
|
||||
beta2 = beta ** 2
|
||||
f_score = (1 + beta2) * (precision * recall) / (beta2 * precision + recall)
|
||||
return f_score
|
||||
def fmeasure(y_true, y_pred):
|
||||
'''Calculates the f-measure, the harmonic mean of precision and recall.
|
||||
'''
|
||||
return fbeta_score(y_true, y_pred, beta=1)
|
||||
|
||||
|
||||
# aliases
|
||||
@@ -194,6 +209,7 @@ mae = MAE = mean_absolute_error
|
||||
mape = MAPE = mean_absolute_percentage_error
|
||||
msle = MSLE = mean_squared_logarithmic_error
|
||||
cosine = cosine_proximity
|
||||
fscore = f1score = fmeasure
|
||||
|
||||
|
||||
def get(identifier):
|
||||
|
||||
+128
-84
@@ -6,11 +6,11 @@ import os
|
||||
import numpy as np
|
||||
|
||||
from . import backend as K
|
||||
from . import optimizers
|
||||
from .utils.io_utils import ask_to_proceed_with_overwrite
|
||||
from .engine.training import Model
|
||||
from .engine.topology import get_source_inputs, Node, Layer
|
||||
from .engine.topology import get_source_inputs, Node, Layer, Merge
|
||||
from .optimizers import optimizer_from_config
|
||||
from .legacy.models import Graph
|
||||
|
||||
|
||||
def save_model(model, filepath, overwrite=True):
|
||||
@@ -27,7 +27,7 @@ def save_model(model, filepath, overwrite=True):
|
||||
return obj.item()
|
||||
|
||||
# misc functions (e.g. loss function)
|
||||
if hasattr(obj, '__call__'):
|
||||
if callable(obj):
|
||||
return obj.__name__
|
||||
|
||||
# if obj is a python 'type'
|
||||
@@ -56,40 +56,52 @@ def save_model(model, filepath, overwrite=True):
|
||||
model.save_weights_to_hdf5_group(model_weights_group)
|
||||
|
||||
if hasattr(model, 'optimizer'):
|
||||
f.attrs['training_config'] = json.dumps({
|
||||
'optimizer_config': {
|
||||
'class_name': model.optimizer.__class__.__name__,
|
||||
'config': model.optimizer.get_config()
|
||||
},
|
||||
'loss': model.loss,
|
||||
'metrics': model.metrics,
|
||||
'sample_weight_mode': model.sample_weight_mode,
|
||||
'loss_weights': model.loss_weights,
|
||||
}, default=get_json_type).encode('utf8')
|
||||
if isinstance(model.optimizer, optimizers.TFOptimizer):
|
||||
warnings.warn(
|
||||
'TensorFlow optimizers do not '
|
||||
'make it possible to access '
|
||||
'optimizer attributes or optimizer state '
|
||||
'after instantiation. '
|
||||
'As a result, we cannot save the optimizer '
|
||||
'as part of the model save file.'
|
||||
'You will have to compile your model again after loading it. '
|
||||
'Prefer using a Keras optimizer instead '
|
||||
'(see keras.io/optimizers).')
|
||||
else:
|
||||
f.attrs['training_config'] = json.dumps({
|
||||
'optimizer_config': {
|
||||
'class_name': model.optimizer.__class__.__name__,
|
||||
'config': model.optimizer.get_config()
|
||||
},
|
||||
'loss': model.loss,
|
||||
'metrics': model.metrics,
|
||||
'sample_weight_mode': model.sample_weight_mode,
|
||||
'loss_weights': model.loss_weights,
|
||||
}, default=get_json_type).encode('utf8')
|
||||
|
||||
# save optimizer weights
|
||||
symbolic_weights = getattr(model.optimizer, 'weights')
|
||||
if symbolic_weights:
|
||||
optimizer_weights_group = f.create_group('optimizer_weights')
|
||||
weight_values = K.batch_get_value(symbolic_weights)
|
||||
weight_names = []
|
||||
for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
|
||||
if hasattr(w, 'name') and w.name:
|
||||
name = str(w.name)
|
||||
else:
|
||||
name = 'param_' + str(i)
|
||||
weight_names.append(name.encode('utf8'))
|
||||
optimizer_weights_group.attrs['weight_names'] = weight_names
|
||||
for name, val in zip(weight_names, weight_values):
|
||||
param_dset = optimizer_weights_group.create_dataset(
|
||||
name,
|
||||
val.shape,
|
||||
dtype=val.dtype)
|
||||
if not val.shape:
|
||||
# scalar
|
||||
param_dset[()] = val
|
||||
else:
|
||||
param_dset[:] = val
|
||||
# save optimizer weights
|
||||
symbolic_weights = getattr(model.optimizer, 'weights')
|
||||
if symbolic_weights:
|
||||
optimizer_weights_group = f.create_group('optimizer_weights')
|
||||
weight_values = K.batch_get_value(symbolic_weights)
|
||||
weight_names = []
|
||||
for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
|
||||
if hasattr(w, 'name') and w.name:
|
||||
name = str(w.name)
|
||||
else:
|
||||
name = 'param_' + str(i)
|
||||
weight_names.append(name.encode('utf8'))
|
||||
optimizer_weights_group.attrs['weight_names'] = weight_names
|
||||
for name, val in zip(weight_names, weight_values):
|
||||
param_dset = optimizer_weights_group.create_dataset(
|
||||
name,
|
||||
val.shape,
|
||||
dtype=val.dtype)
|
||||
if not val.shape:
|
||||
# scalar
|
||||
param_dset[()] = val
|
||||
else:
|
||||
param_dset[:] = val
|
||||
f.flush()
|
||||
f.close()
|
||||
|
||||
@@ -97,7 +109,7 @@ def save_model(model, filepath, overwrite=True):
|
||||
def load_model(filepath, custom_objects={}):
|
||||
|
||||
def deserialize(obj):
|
||||
if type(obj) is list:
|
||||
if isinstance(obj, list):
|
||||
deserialized = []
|
||||
for value in obj:
|
||||
if value in custom_objects:
|
||||
@@ -105,7 +117,7 @@ def load_model(filepath, custom_objects={}):
|
||||
else:
|
||||
deserialized.append(value)
|
||||
return deserialized
|
||||
if type(obj) is dict:
|
||||
if isinstance(obj, dict):
|
||||
deserialized = {}
|
||||
for key, value in obj.items():
|
||||
if value in custom_objects:
|
||||
@@ -139,7 +151,7 @@ def load_model(filepath, custom_objects={}):
|
||||
return model
|
||||
training_config = json.loads(training_config.decode('utf-8'))
|
||||
optimizer_config = training_config['optimizer_config']
|
||||
optimizer = optimizer_from_config(optimizer_config)
|
||||
optimizer = optimizer_from_config(optimizer_config, custom_objects=custom_objects)
|
||||
|
||||
# recover loss functions and metrics
|
||||
loss = deserialize(training_config['loss'])
|
||||
@@ -157,7 +169,7 @@ def load_model(filepath, custom_objects={}):
|
||||
# set optimizer weights
|
||||
if 'optimizer_weights' in f:
|
||||
# build train function (to get weight updates)
|
||||
if model.__class__.__name__ == 'Sequential':
|
||||
if isinstance(model, Sequential):
|
||||
model.model._make_train_function()
|
||||
else:
|
||||
model._make_train_function()
|
||||
@@ -172,8 +184,9 @@ def load_model(filepath, custom_objects={}):
|
||||
def model_from_config(config, custom_objects={}):
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
if isinstance(config, list):
|
||||
raise Exception('`model_fom_config` expects a dictionary, not a list. '
|
||||
'Maybe you meant to use `Sequential.from_config(config)`?')
|
||||
raise TypeError('`model_fom_config` expects a dictionary, not a list. '
|
||||
'Maybe you meant to use '
|
||||
'`Sequential.from_config(config)`?')
|
||||
return layer_from_config(config, custom_objects=custom_objects)
|
||||
|
||||
|
||||
@@ -238,7 +251,7 @@ class Sequential(Model):
|
||||
self.model = None # internal Model instance
|
||||
self.inputs = [] # tensors
|
||||
self.outputs = [] # tensors (length 1)
|
||||
self.trainable = True
|
||||
self._trainable = True
|
||||
|
||||
# model attributes
|
||||
self.inbound_nodes = []
|
||||
@@ -261,17 +274,18 @@ class Sequential(Model):
|
||||
layer: layer instance.
|
||||
'''
|
||||
if not isinstance(layer, Layer):
|
||||
raise ValueError('The added layer must be '
|
||||
'an instance of class Layer. '
|
||||
'Found: ' + str(layer))
|
||||
raise TypeError('The added layer must be '
|
||||
'an instance of class Layer. '
|
||||
'Found: ' + str(layer))
|
||||
if not self.outputs:
|
||||
# first layer in model: check that it is an input layer
|
||||
if len(layer.inbound_nodes) == 0:
|
||||
# create an input layer
|
||||
if not hasattr(layer, 'batch_input_shape'):
|
||||
raise Exception('The first layer in a Sequential model must '
|
||||
'get an `input_shape` or '
|
||||
'`batch_input_shape` argument.')
|
||||
raise ValueError('The first layer in a '
|
||||
'Sequential model must '
|
||||
'get an `input_shape` or '
|
||||
'`batch_input_shape` argument.')
|
||||
batch_input_shape = layer.batch_input_shape
|
||||
if hasattr(layer, 'input_dtype'):
|
||||
input_dtype = layer.input_dtype
|
||||
@@ -280,17 +294,18 @@ class Sequential(Model):
|
||||
layer.create_input_layer(batch_input_shape, input_dtype)
|
||||
|
||||
if len(layer.inbound_nodes) != 1:
|
||||
raise Exception('A layer added to a Sequential model must '
|
||||
'not already be connected somewhere else. '
|
||||
'Model received layer ' + layer.name +
|
||||
' which has ' + str(len(layer.inbound_nodes)) +
|
||||
' pre-existing inbound connections.')
|
||||
raise ValueError('A layer added to a Sequential model must '
|
||||
'not already be connected somewhere else. '
|
||||
'Model received layer ' + layer.name +
|
||||
' which has ' +
|
||||
str(len(layer.inbound_nodes)) +
|
||||
' pre-existing inbound connections.')
|
||||
|
||||
if len(layer.inbound_nodes[0].output_tensors) != 1:
|
||||
raise Exception('All layers in a Sequential model '
|
||||
'should have a single output tensor. '
|
||||
'For multi-output layers, '
|
||||
'use the functional API.')
|
||||
raise ValueError('All layers in a Sequential model '
|
||||
'should have a single output tensor. '
|
||||
'For multi-output layers, '
|
||||
'use the functional API.')
|
||||
|
||||
self.outputs = [layer.inbound_nodes[0].output_tensors[0]]
|
||||
self.inputs = get_source_inputs(self.outputs[0])
|
||||
@@ -310,8 +325,8 @@ class Sequential(Model):
|
||||
output_shapes=[self.outputs[0]._keras_shape])
|
||||
else:
|
||||
output_tensor = layer(self.outputs[0])
|
||||
if type(output_tensor) is list:
|
||||
raise Exception('All layers in a Sequential model '
|
||||
if isinstance(output_tensor, list):
|
||||
raise TypeError('All layers in a Sequential model '
|
||||
'should have a single output tensor. '
|
||||
'For multi-output layers, '
|
||||
'use the functional API.')
|
||||
@@ -328,7 +343,7 @@ class Sequential(Model):
|
||||
'''Removes the last layer in the model.
|
||||
'''
|
||||
if not self.layers:
|
||||
raise Exception('There are no layers in the model.')
|
||||
raise TypeError('There are no layers in the model.')
|
||||
|
||||
self.layers.pop()
|
||||
if not self.layers:
|
||||
@@ -367,10 +382,12 @@ class Sequential(Model):
|
||||
|
||||
def build(self, input_shape=None):
|
||||
if not self.inputs or not self.outputs:
|
||||
raise Exception('Sequential model cannot be built: model is empty.'
|
||||
raise TypeError('Sequential model cannot be built: model is empty.'
|
||||
' Add some layers first.')
|
||||
# actually create the model
|
||||
self.model = Model(self.inputs, self.outputs[0], name=self.name + '_model')
|
||||
self.model = Model(self.inputs, self.outputs[0],
|
||||
name=self.name + '_model')
|
||||
self.model.trainable = self.trainable
|
||||
|
||||
# mirror model attributes
|
||||
self.supports_masking = self.model.supports_masking
|
||||
@@ -405,7 +422,7 @@ class Sequential(Model):
|
||||
return self._flattened_layers
|
||||
layers = []
|
||||
if self.layers:
|
||||
if self.layers[0].__class__.__name__ == 'Merge':
|
||||
if isinstance(self.layers[0], Merge):
|
||||
merge = self.layers[0]
|
||||
for layer in merge.layers:
|
||||
if hasattr(layer, 'flattened_layers'):
|
||||
@@ -442,6 +459,16 @@ class Sequential(Model):
|
||||
list(layer_dict.items()))
|
||||
return all_attrs
|
||||
|
||||
@property
|
||||
def trainable(self):
|
||||
return self._trainable
|
||||
|
||||
@trainable.setter
|
||||
def trainable(self, value):
|
||||
if self.model:
|
||||
self.model.trainable = value
|
||||
self._trainable = value
|
||||
|
||||
@property
|
||||
def trainable_weights(self):
|
||||
if not self.trainable:
|
||||
@@ -460,13 +487,22 @@ class Sequential(Model):
|
||||
|
||||
@property
|
||||
def updates(self):
|
||||
# support for legacy behavior
|
||||
return self._gather_list_attr('updates')
|
||||
return self.model.updates
|
||||
|
||||
@property
|
||||
def state_updates(self):
|
||||
# support for legacy behavior
|
||||
return self._gather_list_attr('state_updates')
|
||||
return self.model.state_updates
|
||||
|
||||
def get_updates_for(self, inputs):
|
||||
return self.model.get_updates_for(inputs)
|
||||
|
||||
@property
|
||||
def losses(self):
|
||||
return self.model.losses
|
||||
|
||||
def get_losses_for(self, inputs):
|
||||
return self.model.get_losses_for(inputs)
|
||||
|
||||
@property
|
||||
def regularizers(self):
|
||||
@@ -604,7 +640,8 @@ class Sequential(Model):
|
||||
and validation metrics values (if applicable).
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
@@ -613,7 +650,7 @@ class Sequential(Model):
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
raise TypeError('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.fit(x, y,
|
||||
batch_size=batch_size,
|
||||
@@ -645,7 +682,8 @@ class Sequential(Model):
|
||||
the display labels for the scalar outputs.
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
@@ -654,7 +692,7 @@ class Sequential(Model):
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
raise TypeError('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.evaluate(x, y,
|
||||
batch_size=batch_size,
|
||||
@@ -703,7 +741,8 @@ class Sequential(Model):
|
||||
the display labels for the scalar outputs.
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
if 'accuracy' in kwargs:
|
||||
kwargs.pop('accuracy')
|
||||
warnings.warn('The "accuracy" argument is deprecated, '
|
||||
@@ -712,7 +751,7 @@ class Sequential(Model):
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
raise TypeError('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.train_on_batch(x, y,
|
||||
sample_weight=sample_weight,
|
||||
@@ -735,7 +774,8 @@ class Sequential(Model):
|
||||
the display labels for the scalar outputs.
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
if 'accuracy' in kwargs:
|
||||
kwargs.pop('accuracy')
|
||||
warnings.warn('The "accuracy" argument is deprecated, '
|
||||
@@ -744,7 +784,7 @@ class Sequential(Model):
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
raise TypeError('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.test_on_batch(x, y,
|
||||
sample_weight=sample_weight)
|
||||
@@ -851,9 +891,11 @@ class Sequential(Model):
|
||||
```
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
if nb_worker > 1 and not pickle_safe:
|
||||
warnings.warn('The "nb_worker" argument is deprecated when pickle_safe is False')
|
||||
warnings.warn('The "nb_worker" argument is deprecated '
|
||||
'when pickle_safe is False')
|
||||
nb_worker = 1 # For backward compatibility
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
@@ -867,7 +909,7 @@ class Sequential(Model):
|
||||
warnings.warn('The "nb_val_worker" argument is deprecated, '
|
||||
'please remove it from your code.')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
raise TypeError('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.fit_generator(generator,
|
||||
samples_per_epoch,
|
||||
@@ -902,9 +944,11 @@ class Sequential(Model):
|
||||
easily to children processes.
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
if nb_worker > 1 and not pickle_safe:
|
||||
warnings.warn('The "nb_worker" argument is deprecated when pickle_safe is False')
|
||||
warnings.warn('The "nb_worker" argument is deprecated '
|
||||
'when pickle_safe is False')
|
||||
nb_worker = 1 # For backward compatibility
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
@@ -917,7 +961,7 @@ class Sequential(Model):
|
||||
kwargs.pop('verbose')
|
||||
warnings.warn('The "verbose" argument is deprecated.')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
raise TypeError('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.evaluate_generator(generator,
|
||||
val_samples,
|
||||
@@ -948,7 +992,8 @@ class Sequential(Model):
|
||||
if self.model is None:
|
||||
self.build()
|
||||
if nb_worker > 1 and not pickle_safe:
|
||||
warnings.warn('The "nb_worker" argument is deprecated when pickle_safe is False')
|
||||
warnings.warn('The "nb_worker" argument is deprecated '
|
||||
'when pickle_safe is False')
|
||||
nb_worker = 1 # For backward compatibility
|
||||
return self.model.predict_generator(generator, val_samples,
|
||||
max_q_size=max_q_size,
|
||||
@@ -960,7 +1005,7 @@ class Sequential(Model):
|
||||
as a Python list.
|
||||
'''
|
||||
config = []
|
||||
if self.layers[0].__class__.__name__ == 'Merge':
|
||||
if isinstance(self.layers[0], Merge):
|
||||
assert hasattr(self.layers[0], 'layers')
|
||||
layers = []
|
||||
for layer in self.layers[0].layers:
|
||||
@@ -984,7 +1029,6 @@ class Sequential(Model):
|
||||
'''
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
from keras.layers import Merge
|
||||
assert type(config) is list
|
||||
|
||||
if not layer_cache:
|
||||
layer_cache = {}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
|
||||
def mean_squared_error(y_true, y_pred):
|
||||
@@ -72,6 +73,6 @@ msle = MSLE = mean_squared_logarithmic_error
|
||||
kld = KLD = kullback_leibler_divergence
|
||||
cosine = cosine_proximity
|
||||
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
def get(identifier):
|
||||
return get_from_module(identifier, globals(), 'objective')
|
||||
|
||||
+42
-13
@@ -19,6 +19,7 @@ def optimizer_from_config(config, custom_objects={}):
|
||||
'adam': Adam,
|
||||
'adamax': Adamax,
|
||||
'nadam': Nadam,
|
||||
'tfoptimizer': TFOptimizer,
|
||||
}
|
||||
class_name = config['class_name']
|
||||
if class_name in custom_objects:
|
||||
@@ -47,20 +48,12 @@ class Optimizer(object):
|
||||
allowed_kwargs = {'clipnorm', 'clipvalue'}
|
||||
for k in kwargs:
|
||||
if k not in allowed_kwargs:
|
||||
raise Exception('Unexpected keyword argument '
|
||||
raise TypeError('Unexpected keyword argument '
|
||||
'passed to optimizer: ' + str(k))
|
||||
self.__dict__.update(kwargs)
|
||||
self.updates = []
|
||||
self.weights = []
|
||||
|
||||
def get_state(self):
|
||||
return [K.get_value(u[0]) for u in self.updates]
|
||||
|
||||
def set_state(self, value_list):
|
||||
assert len(self.updates) == len(value_list)
|
||||
for u, v in zip(self.updates, value_list):
|
||||
K.set_value(u[0], v)
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -91,10 +84,10 @@ class Optimizer(object):
|
||||
param_values = K.batch_get_value(params)
|
||||
for pv, p, w in zip(param_values, params, weights):
|
||||
if pv.shape != w.shape:
|
||||
raise Exception('Optimizer weight shape ' +
|
||||
str(pv.shape) +
|
||||
' not compatible with '
|
||||
'provided weight shape ' + str(w.shape))
|
||||
raise ValueError('Optimizer weight shape ' +
|
||||
str(pv.shape) +
|
||||
' not compatible with '
|
||||
'provided weight shape ' + str(w.shape))
|
||||
weight_value_tuples.append((p, w))
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
|
||||
@@ -570,6 +563,36 @@ class Nadam(Optimizer):
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class TFOptimizer(Optimizer):
|
||||
|
||||
def __init__(self, optimizer):
|
||||
self.optimizer = optimizer
|
||||
self.iterations = K.variable(0.)
|
||||
self.updates = []
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
if constraints:
|
||||
raise ValueError('TF optimizers do not support '
|
||||
'weights constraints. Either remove '
|
||||
'all weights constraints in your model, '
|
||||
'or use a Keras optimizer.')
|
||||
grads = self.optimizer.compute_gradients(loss, params)
|
||||
opt_update = self.optimizer.apply_gradients(
|
||||
grads, global_step=self.iterations)
|
||||
self.updates.append(opt_update)
|
||||
return self.updates
|
||||
|
||||
@property
|
||||
def weights(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_config(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def from_config(self, config):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
# aliases
|
||||
sgd = SGD
|
||||
rmsprop = RMSprop
|
||||
@@ -581,5 +604,11 @@ nadam = Nadam
|
||||
|
||||
|
||||
def get(identifier, kwargs=None):
|
||||
if K.backend() == 'tensorflow':
|
||||
# Wrap TF optimizer instances
|
||||
import tensorflow as tf
|
||||
if isinstance(identifier, tf.train.Optimizer):
|
||||
return TFOptimizer(identifier)
|
||||
# Instantiate a Keras optimizer
|
||||
return get_from_module(identifier, globals(), 'optimizer',
|
||||
instantiate=True, kwargs=kwargs)
|
||||
|
||||
+145
-57
@@ -12,6 +12,7 @@ import scipy.ndimage as ndi
|
||||
from six.moves import range
|
||||
import os
|
||||
import threading
|
||||
import warnings
|
||||
|
||||
from .. import backend as K
|
||||
|
||||
@@ -59,8 +60,8 @@ def random_shear(x, intensity, row_index=1, col_index=2, channel_index=0,
|
||||
def random_zoom(x, zoom_range, row_index=1, col_index=2, channel_index=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
if len(zoom_range) != 2:
|
||||
raise Exception('zoom_range should be a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
raise ValueError('zoom_range should be a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
|
||||
if zoom_range[0] == 1 and zoom_range[1] == 1:
|
||||
zx, zy = 1, 1
|
||||
@@ -120,8 +121,19 @@ def flip_axis(x, axis):
|
||||
|
||||
def array_to_img(x, dim_ordering='default', scale=True):
|
||||
from PIL import Image
|
||||
x = np.asarray(x)
|
||||
if x.ndim != 3:
|
||||
raise ValueError('Expected image array to have rank 3 (single image). '
|
||||
'Got array with shape:', x.shape)
|
||||
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
|
||||
# Original Numpy array x has format (height, width, channel)
|
||||
# or (channel, height, width)
|
||||
# but target PIL image has format (width, height, channel)
|
||||
if dim_ordering == 'th':
|
||||
x = x.transpose(1, 2, 0)
|
||||
if scale:
|
||||
@@ -137,15 +149,17 @@ def array_to_img(x, dim_ordering='default', scale=True):
|
||||
# grayscale
|
||||
return Image.fromarray(x[:, :, 0].astype('uint8'), 'L')
|
||||
else:
|
||||
raise Exception('Unsupported channel number: ', x.shape[2])
|
||||
raise ValueError('Unsupported channel number: ', x.shape[2])
|
||||
|
||||
|
||||
def img_to_array(img, dim_ordering='default'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering not in ['th', 'tf']:
|
||||
raise Exception('Unknown dim_ordering: ', dim_ordering)
|
||||
# image has dim_ordering (height, width, channel)
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering: ', dim_ordering)
|
||||
# Numpy array x has format (height, width, channel)
|
||||
# or (channel, height, width)
|
||||
# but original PIL image has format (width, height, channel)
|
||||
x = np.asarray(img, dtype='float32')
|
||||
if len(x.shape) == 3:
|
||||
if dim_ordering == 'th':
|
||||
@@ -156,7 +170,7 @@ def img_to_array(img, dim_ordering='default'):
|
||||
else:
|
||||
x = x.reshape((x.shape[0], x.shape[1], 1))
|
||||
else:
|
||||
raise Exception('Unsupported image shape: ', x.shape)
|
||||
raise ValueError('Unsupported image shape: ', x.shape)
|
||||
return x
|
||||
|
||||
|
||||
@@ -181,8 +195,9 @@ def load_img(path, grayscale=False, target_size=None):
|
||||
|
||||
|
||||
def list_pictures(directory, ext='jpg|jpeg|bmp|png'):
|
||||
return [os.path.join(directory, f) for f in sorted(os.listdir(directory))
|
||||
if os.path.isfile(os.path.join(directory, f)) and re.match('([\w]+\.(?:' + ext + '))', f)]
|
||||
return [os.path.join(root, f)
|
||||
for root, dirs, files in os.walk(directory) for f in files
|
||||
if re.match('([\w]+\.(?:' + ext + '))', f)]
|
||||
|
||||
|
||||
class ImageDataGenerator(object):
|
||||
@@ -211,8 +226,12 @@ class ImageDataGenerator(object):
|
||||
horizontal_flip: whether to randomly flip images horizontally.
|
||||
vertical_flip: whether to randomly flip images vertically.
|
||||
rescale: rescaling factor. If None or 0, no rescaling is applied,
|
||||
otherwise we multiply the data by the value provided (before applying
|
||||
any other transformation).
|
||||
otherwise we multiply the data by the value provided
|
||||
(before applying any other transformation).
|
||||
preprocessing_function: function that will be implied on each input.
|
||||
The function will run before any other modification on it.
|
||||
The function should take one argument: one image (Numpy tensor with rank 3),
|
||||
and should output a Numpy tensor with the same shape.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode it is at index 3.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
@@ -236,6 +255,7 @@ class ImageDataGenerator(object):
|
||||
horizontal_flip=False,
|
||||
vertical_flip=False,
|
||||
rescale=None,
|
||||
preprocessing_function=None,
|
||||
dim_ordering='default'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
@@ -244,11 +264,12 @@ class ImageDataGenerator(object):
|
||||
self.std = None
|
||||
self.principal_components = None
|
||||
self.rescale = rescale
|
||||
self.preprocessing_function = preprocessing_function
|
||||
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise Exception('dim_ordering should be "tf" (channel after row and '
|
||||
'column) or "th" (channel before row and column). '
|
||||
'Received arg: ', dim_ordering)
|
||||
raise ValueError('dim_ordering should be "tf" (channel after row and '
|
||||
'column) or "th" (channel before row and column). '
|
||||
'Received arg: ', dim_ordering)
|
||||
self.dim_ordering = dim_ordering
|
||||
if dim_ordering == 'th':
|
||||
self.channel_index = 1
|
||||
@@ -264,9 +285,9 @@ class ImageDataGenerator(object):
|
||||
elif len(zoom_range) == 2:
|
||||
self.zoom_range = [zoom_range[0], zoom_range[1]]
|
||||
else:
|
||||
raise Exception('zoom_range should be a float or '
|
||||
'a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
raise ValueError('zoom_range should be a float or '
|
||||
'a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
|
||||
def flow(self, X, y=None, batch_size=32, shuffle=True, seed=None,
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
@@ -280,16 +301,20 @@ class ImageDataGenerator(object):
|
||||
target_size=(256, 256), color_mode='rgb',
|
||||
classes=None, class_mode='categorical',
|
||||
batch_size=32, shuffle=True, seed=None,
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg',
|
||||
follow_links=False):
|
||||
return DirectoryIterator(
|
||||
directory, self,
|
||||
target_size=target_size, color_mode=color_mode,
|
||||
classes=classes, class_mode=class_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
batch_size=batch_size, shuffle=shuffle, seed=seed,
|
||||
save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format)
|
||||
save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format,
|
||||
follow_links=follow_links)
|
||||
|
||||
def standardize(self, x):
|
||||
if self.preprocessing_function:
|
||||
x = self.preprocessing_function(x)
|
||||
if self.rescale:
|
||||
x *= self.rescale
|
||||
# x is a single image, so it doesn't have image number at index 0
|
||||
@@ -300,15 +325,31 @@ class ImageDataGenerator(object):
|
||||
x /= (np.std(x, axis=img_channel_index, keepdims=True) + 1e-7)
|
||||
|
||||
if self.featurewise_center:
|
||||
x -= self.mean
|
||||
if self.mean is not None:
|
||||
x -= self.mean
|
||||
else:
|
||||
warnings.warn('This ImageDataGenerator specifies '
|
||||
'`featurewise_center`, but it hasn\'t'
|
||||
'been fit on any training data. Fit it '
|
||||
'first by calling `.fit(numpy_data)`.')
|
||||
if self.featurewise_std_normalization:
|
||||
x /= (self.std + 1e-7)
|
||||
|
||||
if self.std is not None:
|
||||
x /= (self.std + 1e-7)
|
||||
else:
|
||||
warnings.warn('This ImageDataGenerator specifies '
|
||||
'`featurewise_std_normalization`, but it hasn\'t'
|
||||
'been fit on any training data. Fit it '
|
||||
'first by calling `.fit(numpy_data)`.')
|
||||
if self.zca_whitening:
|
||||
flatx = np.reshape(x, (x.size))
|
||||
whitex = np.dot(flatx, self.principal_components)
|
||||
x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2]))
|
||||
|
||||
if self.principal_components is not None:
|
||||
flatx = np.reshape(x, (x.size))
|
||||
whitex = np.dot(flatx, self.principal_components)
|
||||
x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2]))
|
||||
else:
|
||||
warnings.warn('This ImageDataGenerator specifies '
|
||||
'`zca_whitening`, but it hasn\'t'
|
||||
'been fit on any training data. Fit it '
|
||||
'first by calling `.fit(numpy_data)`.')
|
||||
return x
|
||||
|
||||
def random_transform(self, x):
|
||||
@@ -384,12 +425,28 @@ class ImageDataGenerator(object):
|
||||
and zca_whitening.
|
||||
|
||||
# Arguments
|
||||
X: Numpy array, the data to fit on.
|
||||
augment: whether to fit on randomly augmented samples
|
||||
rounds: if `augment`,
|
||||
X: Numpy array, the data to fit on. Should have rank 4.
|
||||
In case of grayscale data,
|
||||
the channels axis should have value 1, and in case
|
||||
of RGB data, it should have value 3.
|
||||
augment: Whether to fit on randomly augmented samples
|
||||
rounds: If `augment`,
|
||||
how many augmentation passes to do over the data
|
||||
seed: random seed.
|
||||
'''
|
||||
X = np.asarray(X)
|
||||
if X.ndim != 4:
|
||||
raise ValueError('Input to `.fit()` should have rank 4. '
|
||||
'Got array with shape: ' + str(X.shape))
|
||||
if X.shape[self.channel_index] not in {1, 3, 4}:
|
||||
raise ValueError(
|
||||
'Expected input to be images (as Numpy array) '
|
||||
'following the dimension ordering convention "' + self.dim_ordering + '" '
|
||||
'(channels on axis ' + str(self.channel_index) + '), i.e. expected '
|
||||
'either 1, 3 or 4 channels on axis ' + str(self.channel_index) + '. '
|
||||
'However, it was passed an array with shape ' + str(X.shape) +
|
||||
' (' + str(X.shape[self.channel_index]) + ' channels).')
|
||||
|
||||
if seed is not None:
|
||||
np.random.seed(seed)
|
||||
|
||||
@@ -402,12 +459,18 @@ class ImageDataGenerator(object):
|
||||
X = aX
|
||||
|
||||
if self.featurewise_center:
|
||||
self.mean = np.mean(X, axis=0)
|
||||
self.mean = np.mean(X, axis=(0, self.row_index, self.col_index))
|
||||
broadcast_shape = [1, 1, 1]
|
||||
broadcast_shape[self.channel_index - 1] = X.shape[self.channel_index]
|
||||
self.mean = np.reshape(self.mean, broadcast_shape)
|
||||
X -= self.mean
|
||||
|
||||
if self.featurewise_std_normalization:
|
||||
self.std = np.std(X, axis=0)
|
||||
X /= (self.std + 1e-7)
|
||||
self.std = np.std(X, axis=(0, self.row_index, self.col_index))
|
||||
broadcast_shape = [1, 1, 1]
|
||||
broadcast_shape[self.channel_index - 1] = X.shape[self.channel_index]
|
||||
self.std = np.reshape(self.std, broadcast_shape)
|
||||
X /= (self.std + K.epsilon())
|
||||
|
||||
if self.zca_whitening:
|
||||
flatX = np.reshape(X, (X.shape[0], X.shape[1] * X.shape[2] * X.shape[3]))
|
||||
@@ -468,13 +531,28 @@ class NumpyArrayIterator(Iterator):
|
||||
dim_ordering='default',
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
if y is not None and len(X) != len(y):
|
||||
raise Exception('X (images tensor) and y (labels) '
|
||||
'should have the same length. '
|
||||
'Found: X.shape = %s, y.shape = %s' % (np.asarray(X).shape, np.asarray(y).shape))
|
||||
raise ValueError('X (images tensor) and y (labels) '
|
||||
'should have the same length. '
|
||||
'Found: X.shape = %s, y.shape = %s' % (np.asarray(X).shape, np.asarray(y).shape))
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.X = X
|
||||
self.y = y
|
||||
self.X = np.asarray(X)
|
||||
if self.X.ndim != 4:
|
||||
raise ValueError('Input data in `NumpyArrayIterator` '
|
||||
'should have rank 4. You passed an array '
|
||||
'with shape', self.X.shape)
|
||||
channels_axis = 3 if dim_ordering == 'tf' else 1
|
||||
if self.X.shape[channels_axis] not in {1, 3, 4}:
|
||||
raise ValueError('NumpyArrayIterator is set to use the '
|
||||
'dimension ordering convention "' + dim_ordering + '" '
|
||||
'(channels on axis ' + str(channels_axis) + '), i.e. expected '
|
||||
'either 1, 3 or 4 channels on axis ' + str(channels_axis) + '. '
|
||||
'However, it was passed an array with shape ' + str(self.X.shape) +
|
||||
' (' + str(self.X.shape[channels_axis]) + ' channels).')
|
||||
if y is not None:
|
||||
self.y = np.asarray(y)
|
||||
else:
|
||||
self.y = None
|
||||
self.image_data_generator = image_data_generator
|
||||
self.dim_ordering = dim_ordering
|
||||
self.save_to_dir = save_to_dir
|
||||
@@ -517,7 +595,8 @@ class DirectoryIterator(Iterator):
|
||||
dim_ordering='default',
|
||||
classes=None, class_mode='categorical',
|
||||
batch_size=32, shuffle=True, seed=None,
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg',
|
||||
follow_links=False):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.directory = directory
|
||||
@@ -561,16 +640,20 @@ class DirectoryIterator(Iterator):
|
||||
self.nb_class = len(classes)
|
||||
self.class_indices = dict(zip(classes, range(len(classes))))
|
||||
|
||||
def _recursive_list(subpath):
|
||||
return sorted(os.walk(subpath, followlinks=follow_links), key=lambda tpl: tpl[0])
|
||||
|
||||
for subdir in classes:
|
||||
subpath = os.path.join(directory, subdir)
|
||||
for fname in sorted(os.listdir(subpath)):
|
||||
is_valid = False
|
||||
for extension in white_list_formats:
|
||||
if fname.lower().endswith('.' + extension):
|
||||
is_valid = True
|
||||
break
|
||||
if is_valid:
|
||||
self.nb_sample += 1
|
||||
for root, dirs, files in _recursive_list(subpath):
|
||||
for fname in files:
|
||||
is_valid = False
|
||||
for extension in white_list_formats:
|
||||
if fname.lower().endswith('.' + extension):
|
||||
is_valid = True
|
||||
break
|
||||
if is_valid:
|
||||
self.nb_sample += 1
|
||||
print('Found %d images belonging to %d classes.' % (self.nb_sample, self.nb_class))
|
||||
|
||||
# second, build an index of the images in the different class subfolders
|
||||
@@ -579,16 +662,19 @@ class DirectoryIterator(Iterator):
|
||||
i = 0
|
||||
for subdir in classes:
|
||||
subpath = os.path.join(directory, subdir)
|
||||
for fname in sorted(os.listdir(subpath)):
|
||||
is_valid = False
|
||||
for extension in white_list_formats:
|
||||
if fname.lower().endswith('.' + extension):
|
||||
is_valid = True
|
||||
break
|
||||
if is_valid:
|
||||
self.classes[i] = self.class_indices[subdir]
|
||||
self.filenames.append(os.path.join(subdir, fname))
|
||||
i += 1
|
||||
for root, dirs, files in _recursive_list(subpath):
|
||||
for fname in files:
|
||||
is_valid = False
|
||||
for extension in white_list_formats:
|
||||
if fname.lower().endswith('.' + extension):
|
||||
is_valid = True
|
||||
break
|
||||
if is_valid:
|
||||
self.classes[i] = self.class_indices[subdir]
|
||||
i += 1
|
||||
# add filename relative to directory
|
||||
absolute_path = os.path.join(root, fname)
|
||||
self.filenames.append(os.path.relpath(absolute_path, directory))
|
||||
super(DirectoryIterator, self).__init__(self.nb_sample, batch_size, shuffle, seed)
|
||||
|
||||
def next(self):
|
||||
@@ -600,7 +686,9 @@ class DirectoryIterator(Iterator):
|
||||
# build batch of image data
|
||||
for i, j in enumerate(index_array):
|
||||
fname = self.filenames[j]
|
||||
img = load_img(os.path.join(self.directory, fname), grayscale=grayscale, target_size=self.target_size)
|
||||
img = load_img(os.path.join(self.directory, fname),
|
||||
grayscale=grayscale,
|
||||
target_size=self.target_size)
|
||||
x = img_to_array(img, dim_ordering=self.dim_ordering)
|
||||
x = self.image_data_generator.random_transform(x)
|
||||
x = self.image_data_generator.standardize(x)
|
||||
|
||||
@@ -179,14 +179,14 @@ class Tokenizer(object):
|
||||
if self.word_index:
|
||||
nb_words = len(self.word_index) + 1
|
||||
else:
|
||||
raise Exception('Specify a dimension (nb_words argument), '
|
||||
'or fit on some text data first.')
|
||||
raise ValueError('Specify a dimension (nb_words argument), '
|
||||
'or fit on some text data first.')
|
||||
else:
|
||||
nb_words = self.nb_words
|
||||
|
||||
if mode == 'tfidf' and not self.document_count:
|
||||
raise Exception('Fit the Tokenizer on some data '
|
||||
'before using tfidf mode.')
|
||||
raise ValueError('Fit the Tokenizer on some data '
|
||||
'before using tfidf mode.')
|
||||
|
||||
X = np.zeros((len(sequences), nb_words))
|
||||
for i, seq in enumerate(sequences):
|
||||
@@ -214,5 +214,5 @@ class Tokenizer(object):
|
||||
idf = np.log(1 + self.document_count / (1 + self.index_docs.get(j, 0)))
|
||||
X[i][j] = tf * idf
|
||||
else:
|
||||
raise Exception('Unknown vectorization mode: ' + str(mode))
|
||||
raise ValueError('Unknown vectorization mode:', mode)
|
||||
return X
|
||||
|
||||
+45
-95
@@ -1,22 +1,27 @@
|
||||
from __future__ import absolute_import
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
import warnings
|
||||
|
||||
|
||||
class Regularizer(object):
|
||||
|
||||
def set_param(self, p):
|
||||
self.p = p
|
||||
|
||||
def set_layer(self, layer):
|
||||
self.layer = layer
|
||||
|
||||
def __call__(self, loss):
|
||||
return loss
|
||||
def __call__(self, x):
|
||||
return 0
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__}
|
||||
|
||||
def set_param(self, _):
|
||||
warnings.warn('The `set_param` method on regularizers is deprecated. '
|
||||
'It no longer does anything, '
|
||||
'and it will be removed after 06/2017.')
|
||||
|
||||
def set_layer(self, _):
|
||||
warnings.warn('The `set_layer` method on regularizers is deprecated. '
|
||||
'It no longer does anything, '
|
||||
'and it will be removed after 06/2017.')
|
||||
|
||||
|
||||
class EigenvalueRegularizer(Regularizer):
|
||||
'''This takes a constant that controls
|
||||
@@ -28,71 +33,43 @@ class EigenvalueRegularizer(Regularizer):
|
||||
'''
|
||||
def __init__(self, k):
|
||||
self.k = k
|
||||
self.uses_learning_phase = True
|
||||
|
||||
def set_param(self, p):
|
||||
if hasattr(self, 'p'):
|
||||
raise Exception('Regularizers cannot be reused. '
|
||||
'Instantiate one regularizer per layer.')
|
||||
self.p = p
|
||||
def __call__(self, x):
|
||||
if K.ndim(x) != 2:
|
||||
raise ValueError('EigenvalueRegularizer '
|
||||
'is only available for tensors of rank 2.')
|
||||
covariance = K.dot(K.transpose(x), x)
|
||||
dim1, dim2 = K.eval(K.shape(covariance))
|
||||
|
||||
def __call__(self, loss):
|
||||
power = 9 # number of iterations of the power method
|
||||
W = self.p
|
||||
if K.ndim(W) > 2:
|
||||
raise Exception('Eigenvalue Decay regularizer '
|
||||
'is only available for dense '
|
||||
'and embedding layers.')
|
||||
WW = K.dot(K.transpose(W), W)
|
||||
dim1, dim2 = K.eval(K.shape(WW)) # number of neurons in the layer
|
||||
|
||||
# power method for approximating the dominant eigenvector:
|
||||
o = K.ones([dim1, 1]) # initial values for the dominant eigenvector
|
||||
main_eigenvect = K.dot(WW, o)
|
||||
# Power method for approximating the dominant eigenvector:
|
||||
power = 9 # Number of iterations of the power method.
|
||||
o = K.ones([dim1, 1]) # Initial values for the dominant eigenvector.
|
||||
main_eigenvect = K.dot(covariance, o)
|
||||
for n in range(power - 1):
|
||||
main_eigenvect = K.dot(WW, main_eigenvect)
|
||||
main_eigenvect = K.dot(covariance, main_eigenvect)
|
||||
covariance_d = K.dot(covariance, main_eigenvect)
|
||||
|
||||
WWd = K.dot(WW, main_eigenvect)
|
||||
|
||||
# the corresponding dominant eigenvalue:
|
||||
main_eigenval = (K.dot(K.transpose(WWd), main_eigenvect) /
|
||||
# The corresponding dominant eigenvalue:
|
||||
main_eigenval = (K.dot(K.transpose(covariance_d), main_eigenvect) /
|
||||
K.dot(K.transpose(main_eigenvect), main_eigenvect))
|
||||
# multiplied by the given regularization gain
|
||||
regularized_loss = loss + (main_eigenval ** 0.5) * self.k
|
||||
|
||||
return K.in_train_phase(regularized_loss[0, 0], loss)
|
||||
# Multiply by the given regularization gain.
|
||||
regularization = (main_eigenval ** 0.5) * self.k
|
||||
return K.sum(regularization)
|
||||
|
||||
|
||||
class WeightRegularizer(Regularizer):
|
||||
class L1L2Regularizer(Regularizer):
|
||||
|
||||
def __init__(self, l1=0., l2=0.):
|
||||
self.l1 = K.cast_to_floatx(l1)
|
||||
self.l2 = K.cast_to_floatx(l2)
|
||||
self.uses_learning_phase = True
|
||||
self.p = None
|
||||
|
||||
def set_param(self, p):
|
||||
if self.p is not None:
|
||||
raise Exception('Regularizers cannot be reused. '
|
||||
'Instantiate one regularizer per layer.')
|
||||
self.p = p
|
||||
|
||||
def __call__(self, loss):
|
||||
if self.p is None:
|
||||
raise Exception('Need to call `set_param` on '
|
||||
'WeightRegularizer instance '
|
||||
'before calling the instance. '
|
||||
'Check that you are not passing '
|
||||
'a WeightRegularizer instead of an '
|
||||
'ActivityRegularizer '
|
||||
'(i.e. activity_regularizer="l2" instead '
|
||||
'of activity_regularizer="activity_l2".')
|
||||
regularized_loss = loss
|
||||
def __call__(self, x):
|
||||
regularization = 0
|
||||
if self.l1:
|
||||
regularized_loss += K.sum(self.l1 * K.abs(self.p))
|
||||
regularization += K.sum(self.l1 * K.abs(x))
|
||||
if self.l2:
|
||||
regularized_loss += K.sum(self.l2 * K.square(self.p))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
regularization += K.sum(self.l2 * K.square(x))
|
||||
return regularization
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
@@ -100,61 +77,34 @@ class WeightRegularizer(Regularizer):
|
||||
'l2': float(self.l2)}
|
||||
|
||||
|
||||
class ActivityRegularizer(Regularizer):
|
||||
# Aliases.
|
||||
|
||||
def __init__(self, l1=0., l2=0.):
|
||||
self.l1 = K.cast_to_floatx(l1)
|
||||
self.l2 = K.cast_to_floatx(l2)
|
||||
self.uses_learning_phase = True
|
||||
self.layer = None
|
||||
|
||||
def set_layer(self, layer):
|
||||
if self.layer is not None:
|
||||
raise Exception('Regularizers cannot be reused')
|
||||
self.layer = layer
|
||||
|
||||
def __call__(self, loss):
|
||||
if self.layer is None:
|
||||
raise Exception('Need to call `set_layer` on '
|
||||
'ActivityRegularizer instance '
|
||||
'before calling the instance.')
|
||||
regularized_loss = loss
|
||||
for i in range(len(self.layer.inbound_nodes)):
|
||||
output = self.layer.get_output_at(i)
|
||||
if self.l1:
|
||||
regularized_loss += K.sum(self.l1 * K.abs(output))
|
||||
if self.l2:
|
||||
regularized_loss += K.sum(self.l2 * K.square(output))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
'l1': float(self.l1),
|
||||
'l2': float(self.l2)}
|
||||
WeightRegularizer = L1L2Regularizer
|
||||
ActivityRegularizer = L1L2Regularizer
|
||||
|
||||
|
||||
def l1(l=0.01):
|
||||
return WeightRegularizer(l1=l)
|
||||
return L1L2Regularizer(l1=l)
|
||||
|
||||
|
||||
def l2(l=0.01):
|
||||
return WeightRegularizer(l2=l)
|
||||
return L1L2Regularizer(l2=l)
|
||||
|
||||
|
||||
def l1l2(l1=0.01, l2=0.01):
|
||||
return WeightRegularizer(l1=l1, l2=l2)
|
||||
return L1L2Regularizer(l1=l1, l2=l2)
|
||||
|
||||
|
||||
def activity_l1(l=0.01):
|
||||
return ActivityRegularizer(l1=l)
|
||||
return L1L2Regularizer(l1=l)
|
||||
|
||||
|
||||
def activity_l2(l=0.01):
|
||||
return ActivityRegularizer(l2=l)
|
||||
return L1L2Regularizer(l2=l)
|
||||
|
||||
|
||||
def activity_l1l2(l1=0.01, l2=0.01):
|
||||
return ActivityRegularizer(l1=l1, l2=l2)
|
||||
return L1L2Regularizer(l1=l1, l2=l2)
|
||||
|
||||
|
||||
def get(identifier, kwargs=None):
|
||||
|
||||
@@ -12,22 +12,22 @@ def get_from_module(identifier, module_params, module_name,
|
||||
if isinstance(identifier, six.string_types):
|
||||
res = module_params.get(identifier)
|
||||
if not res:
|
||||
raise Exception('Invalid ' + str(module_name) + ': ' +
|
||||
str(identifier))
|
||||
raise ValueError('Invalid ' + str(module_name) + ': ' +
|
||||
str(identifier))
|
||||
if instantiate and not kwargs:
|
||||
return res()
|
||||
elif instantiate and kwargs:
|
||||
return res(**kwargs)
|
||||
else:
|
||||
return res
|
||||
elif type(identifier) is dict:
|
||||
elif isinstance(identifier, dict):
|
||||
name = identifier.pop('name')
|
||||
res = module_params.get(name)
|
||||
if res:
|
||||
return res(**identifier)
|
||||
else:
|
||||
raise Exception('Invalid ' + str(module_name) + ': ' +
|
||||
str(identifier))
|
||||
raise ValueError('Invalid ' + str(module_name) + ': ' +
|
||||
str(identifier))
|
||||
return identifier
|
||||
|
||||
|
||||
@@ -51,32 +51,22 @@ def func_load(code, defaults=None, closure=None, globs=None):
|
||||
if isinstance(code, (tuple, list)): # unpack previous dump
|
||||
code, defaults, closure = code
|
||||
code = marshal.loads(code.encode('raw_unicode_escape'))
|
||||
if closure is not None:
|
||||
closure = func_reconstruct_closure(closure)
|
||||
if globs is None:
|
||||
globs = globals()
|
||||
return python_types.FunctionType(code, globs, name=code.co_name, argdefs=defaults, closure=closure)
|
||||
|
||||
|
||||
def func_reconstruct_closure(values):
|
||||
'''Deserialization helper that reconstructs a closure.'''
|
||||
nums = range(len(values))
|
||||
src = ["def func(arg):"]
|
||||
src += [" _%d = arg[%d]" % (n, n) for n in nums]
|
||||
src += [" return lambda:(%s)" % ','.join(["_%d" % n for n in nums]), ""]
|
||||
src = '\n'.join(src)
|
||||
try:
|
||||
exec(src, globals())
|
||||
except:
|
||||
raise SyntaxError(src)
|
||||
return func(values).__closure__
|
||||
return python_types.FunctionType(code, globs,
|
||||
name=code.co_name,
|
||||
argdefs=defaults,
|
||||
closure=closure)
|
||||
|
||||
|
||||
class Progbar(object):
|
||||
|
||||
def __init__(self, target, width=30, verbose=1, interval=0.01):
|
||||
'''
|
||||
@param target: total number of steps expected
|
||||
@param interval: minimum visual progress update interval (in seconds)
|
||||
'''Dislays a progress bar.
|
||||
|
||||
# Arguments:
|
||||
target: Total number of steps expected.
|
||||
interval: Minimum visual progress update interval (in seconds).
|
||||
'''
|
||||
self.width = width
|
||||
self.target = target
|
||||
@@ -90,15 +80,18 @@ class Progbar(object):
|
||||
self.verbose = verbose
|
||||
|
||||
def update(self, current, values=[], force=False):
|
||||
'''
|
||||
@param current: index of current step
|
||||
@param values: list of tuples (name, value_for_last_step).
|
||||
The progress bar will display averages for these values.
|
||||
@param force: force visual progress update
|
||||
'''Updates the progress bar.
|
||||
|
||||
# Arguments
|
||||
current: Index of current step.
|
||||
values: List of tuples (name, value_for_last_step).
|
||||
The progress bar will display averages for these values.
|
||||
force: Whether to force visual progress update.
|
||||
'''
|
||||
for k, v in values:
|
||||
if k not in self.sum_values:
|
||||
self.sum_values[k] = [v * (current - self.seen_so_far), current - self.seen_so_far]
|
||||
self.sum_values[k] = [v * (current - self.seen_so_far),
|
||||
current - self.seen_so_far]
|
||||
self.unique_values.append(k)
|
||||
else:
|
||||
self.sum_values[k][0] += v * (current - self.seen_so_far)
|
||||
@@ -111,8 +104,8 @@ class Progbar(object):
|
||||
return
|
||||
|
||||
prev_total_width = self.total_width
|
||||
sys.stdout.write("\b" * prev_total_width)
|
||||
sys.stdout.write("\r")
|
||||
sys.stdout.write('\b' * prev_total_width)
|
||||
sys.stdout.write('\r')
|
||||
|
||||
numdigits = int(np.floor(np.log10(self.target))) + 1
|
||||
barstr = '%%%dd/%%%dd [' % (numdigits, numdigits)
|
||||
@@ -142,7 +135,7 @@ class Progbar(object):
|
||||
info += ' - %ds' % (now - self.start)
|
||||
for k in self.unique_values:
|
||||
info += ' - %s:' % k
|
||||
if type(self.sum_values[k]) is list:
|
||||
if isinstance(self.sum_values[k], list):
|
||||
avg = self.sum_values[k][0] / max(1, self.sum_values[k][1])
|
||||
if abs(avg) > 1e-3:
|
||||
info += ' %.4f' % avg
|
||||
@@ -153,13 +146,13 @@ class Progbar(object):
|
||||
|
||||
self.total_width += len(info)
|
||||
if prev_total_width > self.total_width:
|
||||
info += ((prev_total_width - self.total_width) * " ")
|
||||
info += ((prev_total_width - self.total_width) * ' ')
|
||||
|
||||
sys.stdout.write(info)
|
||||
sys.stdout.flush()
|
||||
|
||||
if current >= self.target:
|
||||
sys.stdout.write("\n")
|
||||
sys.stdout.write('\n')
|
||||
|
||||
if self.verbose == 2:
|
||||
if current >= self.target:
|
||||
|
||||
@@ -86,7 +86,7 @@ def save_array(array, name):
|
||||
import tables
|
||||
f = tables.open_file(name, 'w')
|
||||
atom = tables.Atom.from_dtype(array.dtype)
|
||||
ds = f.createCArray(f.root, 'data', atom, array.shape)
|
||||
ds = f.create_carray(f.root, 'data', atom, array.shape)
|
||||
ds[:] = array
|
||||
f.close()
|
||||
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
from __future__ import print_function
|
||||
import inspect
|
||||
|
||||
from .generic_utils import get_from_module
|
||||
from .np_utils import convert_kernel
|
||||
from ..layers import *
|
||||
from ..models import Model, Sequential, Graph
|
||||
from ..models import Model, Sequential
|
||||
from .. import backend as K
|
||||
|
||||
|
||||
@@ -15,7 +16,7 @@ def layer_from_config(config, custom_objects={}):
|
||||
of custom (non-Keras) objects to class/functions
|
||||
|
||||
# Returns
|
||||
Layer instance (may be Model, Sequential, Graph, Layer...)
|
||||
Layer instance (may be Model, Sequential, Layer...)
|
||||
'''
|
||||
# Insert custom layers into globals so they can
|
||||
# be accessed by `get_from_module`.
|
||||
@@ -26,17 +27,21 @@ def layer_from_config(config, custom_objects={}):
|
||||
|
||||
if class_name == 'Sequential':
|
||||
layer_class = Sequential
|
||||
elif class_name == 'Graph':
|
||||
layer_class = Graph
|
||||
elif class_name in ['Model', 'Container']:
|
||||
layer_class = Model
|
||||
else:
|
||||
layer_class = get_from_module(class_name, globals(), 'layer',
|
||||
instantiate=False)
|
||||
return layer_class.from_config(config['config'])
|
||||
|
||||
arg_spec = inspect.getargspec(layer_class.from_config)
|
||||
if 'custom_objects' in arg_spec.args:
|
||||
return layer_class.from_config(config['config'], custom_objects=custom_objects)
|
||||
else:
|
||||
return layer_class.from_config(config['config'])
|
||||
|
||||
|
||||
def print_summary(layers, relevant_nodes=None, line_length=100, positions=[.33, .55, .67, 1.]):
|
||||
def print_summary(layers, relevant_nodes=None,
|
||||
line_length=100, positions=[.33, .55, .67, 1.]):
|
||||
'''Prints a summary of a layer
|
||||
|
||||
# Arguments
|
||||
@@ -53,6 +58,8 @@ def print_summary(layers, relevant_nodes=None, line_length=100, positions=[.33,
|
||||
def print_row(fields, positions):
|
||||
line = ''
|
||||
for i in range(len(fields)):
|
||||
if i > 0:
|
||||
line = line[:-1] + ' '
|
||||
line += str(fields[i])
|
||||
line = line[:positions[i]]
|
||||
line += ' ' * (positions[i] - len(line))
|
||||
@@ -100,24 +107,33 @@ def print_summary(layers, relevant_nodes=None, line_length=100, positions=[.33,
|
||||
else:
|
||||
print('_' * line_length)
|
||||
|
||||
def count_total_params(layers, layer_set=None):
|
||||
if layer_set is None:
|
||||
layer_set = set()
|
||||
total_params = 0
|
||||
for layer in layers:
|
||||
if layer in layer_set:
|
||||
continue
|
||||
layer_set.add(layer)
|
||||
if type(layer) in (Model, Sequential):
|
||||
total_params += count_total_params(layer.layers, layer_set)
|
||||
else:
|
||||
total_params += layer.count_params()
|
||||
return total_params
|
||||
trainable_count, non_trainable_count = count_total_params(layers, layer_set=None)
|
||||
|
||||
print('Total params: %s' % count_total_params(layers))
|
||||
print('Total params: {:,}'.format(trainable_count + non_trainable_count))
|
||||
print('Trainable params: {:,}'.format(trainable_count))
|
||||
print('Non-trainable params: {:,}'.format(non_trainable_count))
|
||||
print('_' * line_length)
|
||||
|
||||
|
||||
def count_total_params(layers, layer_set=None):
|
||||
if layer_set is None:
|
||||
layer_set = set()
|
||||
trainable_count = 0
|
||||
non_trainable_count = 0
|
||||
for layer in layers:
|
||||
if layer in layer_set:
|
||||
continue
|
||||
layer_set.add(layer)
|
||||
if type(layer) in (Model, Sequential):
|
||||
t, nt = count_total_params(layer.layers, layer_set)
|
||||
trainable_count += t
|
||||
non_trainable_count += nt
|
||||
else:
|
||||
trainable_count += sum([K.count_params(p) for p in layer.trainable_weights])
|
||||
non_trainable_count += sum([K.count_params(p) for p in layer.non_trainable_weights])
|
||||
return trainable_count, non_trainable_count
|
||||
|
||||
|
||||
def convert_all_kernels_in_model(model):
|
||||
# Note: SeparableConvolution not included
|
||||
# since only supported by TF.
|
||||
|
||||
@@ -16,6 +16,7 @@ def to_categorical(y, nb_classes=None):
|
||||
# Returns
|
||||
A binary matrix representation of the input.
|
||||
'''
|
||||
y = np.array(y, dtype='int')
|
||||
if not nb_classes:
|
||||
nb_classes = np.max(y)+1
|
||||
Y = np.zeros((len(y), nb_classes))
|
||||
@@ -85,7 +86,7 @@ def convert_kernel(kernel, dim_ordering='default'):
|
||||
for j in range(h):
|
||||
new_kernel[i, j, :, :] = kernel[w - i - 1, h - j - 1, :, :]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + str(dim_ordering))
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
elif kernel.ndim == 5:
|
||||
# conv 3d
|
||||
# TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3)
|
||||
@@ -113,7 +114,7 @@ def convert_kernel(kernel, dim_ordering='default'):
|
||||
z - k - 1,
|
||||
:, :]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + str(dim_ordering))
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
else:
|
||||
raise ValueError('Invalid kernel shape:', kernel.shape)
|
||||
return new_kernel
|
||||
@@ -122,21 +123,25 @@ def convert_kernel(kernel, dim_ordering='default'):
|
||||
def conv_output_length(input_length, filter_size, border_mode, stride, dilation=1):
|
||||
if input_length is None:
|
||||
return None
|
||||
assert border_mode in {'same', 'valid'}
|
||||
assert border_mode in {'same', 'valid', 'full'}
|
||||
dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1)
|
||||
if border_mode == 'same':
|
||||
output_length = input_length
|
||||
elif border_mode == 'valid':
|
||||
output_length = input_length - dilated_filter_size + 1
|
||||
elif border_mode == 'full':
|
||||
output_length = input_length + dilated_filter_size - 1
|
||||
return (output_length + stride - 1) // stride
|
||||
|
||||
|
||||
def conv_input_length(output_length, filter_size, border_mode, stride):
|
||||
if output_length is None:
|
||||
return None
|
||||
assert border_mode in {'same', 'valid'}
|
||||
assert border_mode in {'same', 'valid', 'full'}
|
||||
if border_mode == 'same':
|
||||
pad = filter_size // 2
|
||||
elif border_mode == 'valid':
|
||||
pad = 0
|
||||
elif border_mode == 'full':
|
||||
pad = filter_size - 1
|
||||
return (output_length - 1) * stride - 2 * pad + filter_size
|
||||
|
||||
@@ -45,7 +45,12 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
assert input_shape
|
||||
if not input_dtype:
|
||||
input_dtype = K.floatx()
|
||||
input_data = (10 * np.random.random(input_shape)).astype(input_dtype)
|
||||
input_data_shape = list(input_shape)
|
||||
for i, e in enumerate(input_data_shape):
|
||||
if e is None:
|
||||
input_data_shape[i] = np.random.randint(1, 4)
|
||||
input_data = (10 * np.random.random(input_data_shape))
|
||||
input_data = input_data.astype(input_dtype)
|
||||
elif input_shape is None:
|
||||
input_shape = input_data.shape
|
||||
|
||||
@@ -78,7 +83,10 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
expected_output_shape = layer.get_output_shape_for(input_shape)
|
||||
actual_output = model.predict(input_data)
|
||||
actual_output_shape = actual_output.shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
for expected_dim, actual_dim in zip(expected_output_shape,
|
||||
actual_output_shape):
|
||||
if expected_dim is not None:
|
||||
assert expected_dim == actual_dim
|
||||
if expected_output is not None:
|
||||
assert_allclose(actual_output, expected_output, rtol=1e-3)
|
||||
|
||||
@@ -97,7 +105,10 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
model.compile('rmsprop', 'mse')
|
||||
actual_output = model.predict(input_data)
|
||||
actual_output_shape = actual_output.shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
for expected_dim, actual_dim in zip(expected_output_shape,
|
||||
actual_output_shape):
|
||||
if expected_dim is not None:
|
||||
assert expected_dim == actual_dim
|
||||
if expected_output is not None:
|
||||
assert_allclose(actual_output, expected_output, rtol=1e-3)
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
|
||||
from ..layers.wrappers import Wrapper
|
||||
from ..models import Sequential
|
||||
|
||||
try:
|
||||
# pydot-ng is a fork of pydot that is better maintained
|
||||
@@ -19,7 +20,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True):
|
||||
dot.set('concentrate', True)
|
||||
dot.set_node_defaults(shape='record')
|
||||
|
||||
if model.__class__.__name__ == 'Sequential':
|
||||
if isinstance(model, Sequential):
|
||||
if not model.built:
|
||||
model.build()
|
||||
model = model.model
|
||||
@@ -28,13 +29,14 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True):
|
||||
# Create graph nodes.
|
||||
for layer in layers:
|
||||
layer_id = str(id(layer))
|
||||
|
||||
|
||||
# Append a wrapped layer's label to node's label, if it exists.
|
||||
layer_name = layer.name
|
||||
class_name = layer.__class__.__name__
|
||||
if isinstance(layer, Wrapper):
|
||||
layer_name = '{}({})'.format(layer_name, layer.layer.name)
|
||||
class_name = '{}({})'.format(class_name, layer.layer.__class__.__name__)
|
||||
child_class_name = layer.layer.__class__.__name__
|
||||
class_name = '{}({})'.format(class_name, child_class_name)
|
||||
|
||||
# Create node's label.
|
||||
if show_layer_names:
|
||||
|
||||
@@ -242,14 +242,14 @@ class KerasClassifier(BaseWrapper):
|
||||
y = to_categorical(y)
|
||||
|
||||
outputs = self.model.evaluate(X, y, **kwargs)
|
||||
if type(outputs) is not list:
|
||||
if not isinstance(outputs, list):
|
||||
outputs = [outputs]
|
||||
for name, output in zip(self.model.metrics_names, outputs):
|
||||
if name == 'acc':
|
||||
return output
|
||||
raise Exception('The model is not configured to compute accuracy. '
|
||||
'You should pass `metrics=["accuracy"]` to '
|
||||
'the `model.compile()` method.')
|
||||
raise ValueError('The model is not configured to compute accuracy. '
|
||||
'You should pass `metrics=["accuracy"]` to '
|
||||
'the `model.compile()` method.')
|
||||
|
||||
|
||||
class KerasRegressor(BaseWrapper):
|
||||
@@ -290,6 +290,6 @@ class KerasRegressor(BaseWrapper):
|
||||
'''
|
||||
kwargs = self.filter_sk_params(Sequential.evaluate, kwargs)
|
||||
loss = self.model.evaluate(X, y, **kwargs)
|
||||
if type(loss) is list:
|
||||
if isinstance(loss, list):
|
||||
return loss[0]
|
||||
return loss
|
||||
|
||||
+2
-2
@@ -3,12 +3,12 @@ from setuptools import find_packages
|
||||
|
||||
|
||||
setup(name='Keras',
|
||||
version='1.1.1',
|
||||
version='1.2.0',
|
||||
description='Deep Learning for Python',
|
||||
author='Francois Chollet',
|
||||
author_email='francois.chollet@gmail.com',
|
||||
url='https://github.com/fchollet/keras',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.1.1',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.2.0',
|
||||
license='MIT',
|
||||
install_requires=['theano', 'pyyaml', 'six'],
|
||||
extras_require={
|
||||
|
||||
@@ -1,14 +1,21 @@
|
||||
import sys
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose
|
||||
import numpy as np
|
||||
import scipy.sparse as sparse
|
||||
|
||||
from keras.backend import theano_backend as KTH
|
||||
from keras import backend as K
|
||||
from keras.backend import theano_backend as KTH, floatx, set_floatx, variable
|
||||
from keras.backend import tensorflow_backend as KTF
|
||||
from keras.utils.np_utils import convert_kernel
|
||||
|
||||
|
||||
def check_dtype(var, dtype):
|
||||
if K._BACKEND == 'theano':
|
||||
assert var.dtype == dtype
|
||||
else:
|
||||
assert var.dtype.name == '%s_ref' % dtype
|
||||
|
||||
|
||||
def check_single_tensor_operation(function_name, input_shape, **kwargs):
|
||||
val = np.random.random(input_shape) - 0.5
|
||||
xth = KTH.variable(val)
|
||||
@@ -626,43 +633,43 @@ class TestBackend(object):
|
||||
mean = 0.
|
||||
std = 1.
|
||||
rand = KTF.eval(KTF.random_normal((1000, 1000), mean=mean, std=std))
|
||||
assert(rand.shape == (1000, 1000))
|
||||
assert(np.abs(np.mean(rand) - mean) < 0.01)
|
||||
assert(np.abs(np.std(rand) - std) < 0.01)
|
||||
assert rand.shape == (1000, 1000)
|
||||
assert np.abs(np.mean(rand) - mean) < 0.01
|
||||
assert np.abs(np.std(rand) - std) < 0.01
|
||||
|
||||
rand = KTH.eval(KTH.random_normal((1000, 1000), mean=mean, std=std))
|
||||
assert(rand.shape == (1000, 1000))
|
||||
assert(np.abs(np.mean(rand) - mean) < 0.01)
|
||||
assert(np.abs(np.std(rand) - std) < 0.01)
|
||||
assert rand.shape == (1000, 1000)
|
||||
assert np.abs(np.mean(rand) - mean) < 0.01
|
||||
assert np.abs(np.std(rand) - std) < 0.01
|
||||
|
||||
def test_random_uniform(self):
|
||||
min = -1.
|
||||
max = 1.
|
||||
rand = KTF.eval(KTF.random_uniform((1000, 1000), min, max))
|
||||
assert(rand.shape == (1000, 1000))
|
||||
assert(np.abs(np.mean(rand)) < 0.01)
|
||||
assert(np.max(rand) <= max)
|
||||
assert(np.min(rand) >= min)
|
||||
min_val = -1.
|
||||
max_val = 1.
|
||||
rand = KTF.eval(KTF.random_uniform((1000, 1000), min_val, max_val))
|
||||
assert rand.shape == (1000, 1000)
|
||||
assert np.abs(np.mean(rand)) < 0.01
|
||||
assert np.max(rand) <= max_val
|
||||
assert np.min(rand) >= min_val
|
||||
|
||||
rand = KTH.eval(KTH.random_uniform((1000, 1000), min, max))
|
||||
assert(rand.shape == (1000, 1000))
|
||||
assert(np.abs(np.mean(rand)) < 0.01)
|
||||
assert(np.max(rand) <= max)
|
||||
assert(np.min(rand) >= min)
|
||||
rand = KTH.eval(KTH.random_uniform((1000, 1000), min_val, max_val))
|
||||
assert rand.shape == (1000, 1000)
|
||||
assert np.abs(np.mean(rand)) < 0.01
|
||||
assert np.max(rand) <= max_val
|
||||
assert np.min(rand) >= min_val
|
||||
|
||||
def test_random_binomial(self):
|
||||
p = 0.5
|
||||
rand = KTF.eval(KTF.random_binomial((1000, 1000), p))
|
||||
assert(rand.shape == (1000, 1000))
|
||||
assert(np.abs(np.mean(rand) - p) < 0.01)
|
||||
assert(np.max(rand) == 1)
|
||||
assert(np.min(rand) == 0)
|
||||
assert rand.shape == (1000, 1000)
|
||||
assert np.abs(np.mean(rand) - p) < 0.01
|
||||
assert np.max(rand) == 1
|
||||
assert np.min(rand) == 0
|
||||
|
||||
rand = KTH.eval(KTH.random_binomial((1000, 1000), p))
|
||||
assert(rand.shape == (1000, 1000))
|
||||
assert(np.abs(np.mean(rand) - p) < 0.01)
|
||||
assert(np.max(rand) == 1)
|
||||
assert(np.min(rand) == 0)
|
||||
assert rand.shape == (1000, 1000)
|
||||
assert np.abs(np.mean(rand) - p) < 0.01
|
||||
assert np.max(rand) == 1
|
||||
assert np.min(rand) == 0
|
||||
|
||||
def test_ctc(self):
|
||||
# simplified version of TensorFlow's test
|
||||
@@ -881,6 +888,96 @@ class TestBackend(object):
|
||||
assert k_s_d.shape == k_d.shape
|
||||
assert_allclose(k_s_d, k_d, atol=1e-05)
|
||||
|
||||
def test_map(self):
|
||||
x = np.random.rand(10, 3).astype(np.float32)
|
||||
for K in [KTF, KTH]:
|
||||
kx = K.eval(K.map_fn(K.sum, x))
|
||||
|
||||
assert (10,) == kx.shape
|
||||
assert_allclose(x.sum(axis=1), kx, atol=1e-05)
|
||||
|
||||
def test_foldl(self):
|
||||
x = np.random.rand(10, 3).astype(np.float32)
|
||||
for K in [KTF, KTH]:
|
||||
kx = K.eval(K.foldl(lambda a, b: a+b, x))
|
||||
|
||||
assert (3,) == kx.shape
|
||||
assert_allclose(x.sum(axis=0), kx, atol=1e-05)
|
||||
|
||||
def test_foldr(self):
|
||||
# This test aims to make sure that we walk the array from right to left
|
||||
# and checks it in the following way: multiplying left to right 1e-40
|
||||
# cannot be held into a float32 so it causes an underflow while from
|
||||
# right to left we have no such problem and the result is larger
|
||||
x = np.array([1e-20, 1e-20, 10, 10, 10], dtype=np.float32)
|
||||
for K in [KTF, KTH]:
|
||||
p1 = K.eval(K.foldl(lambda a, b: a*b, x))
|
||||
p2 = K.eval(K.foldr(lambda a, b: a*b, x))
|
||||
|
||||
assert p1 < p2
|
||||
assert 9e-38 < p2 <= 1e-37
|
||||
|
||||
def test_arange(self):
|
||||
for test_value in (-20, 0, 1, 10):
|
||||
t_a = KTF.arange(test_value)
|
||||
a = KTF.eval(t_a)
|
||||
assert np.array_equal(a, np.arange(test_value))
|
||||
t_b = KTH.arange(test_value)
|
||||
b = KTH.eval(t_b)
|
||||
assert np.array_equal(b, np.arange(test_value))
|
||||
assert np.array_equal(a, b)
|
||||
assert KTF.dtype(t_a) == KTH.dtype(t_b)
|
||||
for start, stop, step in ((0, 5, 1), (-5, 5, 2), (0, 1, 2)):
|
||||
a = KTF.eval(KTF.arange(start, stop, step))
|
||||
assert np.array_equal(a, np.arange(start, stop, step))
|
||||
b = KTH.eval(KTH.arange(start, stop, step))
|
||||
assert np.array_equal(b, np.arange(start, stop, step))
|
||||
assert np.array_equal(a, b)
|
||||
for dtype in ('int32', 'int64', 'float32', 'float64'):
|
||||
for backend in (KTF, KTH):
|
||||
t = backend.arange(10, dtype=dtype)
|
||||
assert backend.dtype(t) == dtype
|
||||
|
||||
def test_setfloatx_incorrect_values(self):
|
||||
# Keep track of the old value
|
||||
old_floatx = floatx()
|
||||
# Try some incorrect values
|
||||
initial = floatx()
|
||||
for value in ['', 'beerfloat', 123]:
|
||||
with pytest.raises(Exception):
|
||||
set_floatx(value)
|
||||
assert floatx() == initial
|
||||
# Restore old value
|
||||
set_floatx(old_floatx)
|
||||
|
||||
def test_setfloatx_correct_values(self):
|
||||
# Keep track of the old value
|
||||
old_floatx = floatx()
|
||||
# Check correct values
|
||||
for value in ['float16', 'float32', 'float64']:
|
||||
set_floatx(value)
|
||||
assert floatx() == value
|
||||
# Restore old value
|
||||
set_floatx(old_floatx)
|
||||
|
||||
def test_set_floatx(self):
|
||||
"""
|
||||
Make sure that changes to the global floatx are effectively
|
||||
taken into account by the backend.
|
||||
"""
|
||||
# Keep track of the old value
|
||||
old_floatx = floatx()
|
||||
|
||||
set_floatx('float16')
|
||||
var = variable([10])
|
||||
check_dtype(var, 'float16')
|
||||
|
||||
set_floatx('float64')
|
||||
var = variable([10])
|
||||
check_dtype(var, 'float64')
|
||||
|
||||
# Restore old value
|
||||
set_floatx(old_floatx)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -4,10 +4,11 @@ from numpy.testing import assert_allclose
|
||||
|
||||
from keras.layers import Dense, Dropout
|
||||
from keras.engine.topology import merge, Input
|
||||
from keras.engine.training import Model
|
||||
from keras.engine.training import Model, check_loss_and_target_compatibility
|
||||
from keras.models import Sequential
|
||||
from keras import backend as K
|
||||
from keras.utils.test_utils import keras_test
|
||||
from keras.callbacks import LambdaCallback
|
||||
|
||||
|
||||
@keras_test
|
||||
@@ -146,6 +147,28 @@ def test_model_methods():
|
||||
[output_a_np, output_b_np])
|
||||
assert len(out) == 4
|
||||
|
||||
# test starting from non-zero initial epoch
|
||||
trained_epochs = []
|
||||
|
||||
def on_epoch_begin(epoch, logs):
|
||||
trained_epochs.append(epoch)
|
||||
tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin)
|
||||
out = model.fit([input_a_np, input_b_np],
|
||||
[output_a_np, output_b_np], nb_epoch=5, batch_size=4,
|
||||
initial_epoch=2, callbacks=[tracker_cb])
|
||||
assert trained_epochs == [2, 3, 4]
|
||||
|
||||
# test starting from non-zero initial epoch for generator too
|
||||
trained_epochs = []
|
||||
|
||||
def gen_data(batch_sz):
|
||||
while True:
|
||||
yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))],
|
||||
[np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))])
|
||||
out = model.fit_generator(gen_data(4), samples_per_epoch=10, nb_epoch=5,
|
||||
initial_epoch=2, callbacks=[tracker_cb])
|
||||
assert trained_epochs == [2, 3, 4]
|
||||
|
||||
# test with a custom metric function
|
||||
mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2))
|
||||
|
||||
@@ -202,5 +225,30 @@ def test_trainable_argument():
|
||||
assert_allclose(out, out_2)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_check_not_failing():
|
||||
a = np.random.random((2, 1, 3))
|
||||
check_loss_and_target_compatibility([a], [K.categorical_crossentropy], [a.shape])
|
||||
check_loss_and_target_compatibility([a], [K.categorical_crossentropy], [(2, None, 3)])
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_check_last_is_one():
|
||||
a = np.random.random((2, 3, 1))
|
||||
with pytest.raises(Exception) as exc:
|
||||
check_loss_and_target_compatibility([a], [K.categorical_crossentropy], [a.shape])
|
||||
|
||||
assert "You are passing a target array" in str(exc)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_check_bad_shape():
|
||||
a = np.random.random((2, 3, 5))
|
||||
with pytest.raises(Exception) as exc:
|
||||
check_loss_and_target_compatibility([a], [K.categorical_crossentropy], [(2, 3, 6)])
|
||||
|
||||
assert "targets to have the same shape" in str(exc)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -17,6 +17,13 @@ def test_prelu():
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_prelu_share():
|
||||
from keras.layers.advanced_activations import PReLU
|
||||
layer_test(PReLU, kwargs={'shared_axes': 1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_elu():
|
||||
from keras.layers.advanced_activations import ELU
|
||||
@@ -28,11 +35,20 @@ def test_elu():
|
||||
@keras_test
|
||||
def test_parametric_softplus():
|
||||
from keras.layers.advanced_activations import ParametricSoftplus
|
||||
for alpha in [0., .5, -1.]:
|
||||
layer_test(ParametricSoftplus,
|
||||
kwargs={'alpha_init': 1.,
|
||||
'beta_init': -1},
|
||||
input_shape=(2, 3, 4))
|
||||
layer_test(ParametricSoftplus,
|
||||
kwargs={'alpha_init': 1.,
|
||||
'beta_init': -1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_parametric_softplus_share():
|
||||
from keras.layers.advanced_activations import ParametricSoftplus
|
||||
layer_test(ParametricSoftplus,
|
||||
kwargs={'shared_axes': 1,
|
||||
'alpha_init': 1.,
|
||||
'beta_init': -1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
@@ -49,5 +65,12 @@ def test_srelu():
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_srelu_share():
|
||||
from keras.layers.advanced_activations import SReLU
|
||||
layer_test(SReLU, kwargs={'shared_axes': 1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -8,6 +8,13 @@ from keras import backend as K
|
||||
from keras.layers import convolutional, pooling
|
||||
|
||||
|
||||
# TensorFlow does not support full convolution.
|
||||
if K._BACKEND == 'theano':
|
||||
_convolution_border_modes = ['valid', 'same', 'full']
|
||||
else:
|
||||
_convolution_border_modes = ['valid', 'same']
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_convolution_1d():
|
||||
nb_samples = 2
|
||||
@@ -16,7 +23,7 @@ def test_convolution_1d():
|
||||
filter_length = 3
|
||||
nb_filter = 3
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample_length in [1, 2]:
|
||||
if border_mode == 'same' and subsample_length != 1:
|
||||
continue
|
||||
@@ -47,7 +54,7 @@ def test_atrous_conv_1d():
|
||||
filter_length = 3
|
||||
nb_filter = 3
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample_length in [1, 2]:
|
||||
for atrous_rate in [1, 2]:
|
||||
if border_mode == 'same' and subsample_length != 1:
|
||||
@@ -77,11 +84,12 @@ def test_atrous_conv_1d():
|
||||
|
||||
@keras_test
|
||||
def test_maxpooling_1d():
|
||||
for stride in [1, 2]:
|
||||
layer_test(convolutional.MaxPooling1D,
|
||||
kwargs={'stride': stride,
|
||||
'border_mode': 'valid'},
|
||||
input_shape=(3, 5, 4))
|
||||
for border_mode in ['valid', 'same']:
|
||||
for stride in [1, 2]:
|
||||
layer_test(convolutional.MaxPooling1D,
|
||||
kwargs={'stride': stride,
|
||||
'border_mode': border_mode},
|
||||
input_shape=(3, 5, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
@@ -101,7 +109,7 @@ def test_convolution_2d():
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
@@ -134,7 +142,7 @@ def test_deconvolution_2d():
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
@@ -175,7 +183,7 @@ def test_atrous_conv_2d():
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
for atrous_rate in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
@@ -214,7 +222,7 @@ def test_separable_conv_2d():
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
for multiplier in [1, 2]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
@@ -322,7 +330,7 @@ def test_convolution_3d():
|
||||
input_len_dim2 = 11
|
||||
input_len_dim3 = 12
|
||||
|
||||
for border_mode in ['same', 'valid']:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1, 1), (2, 2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1, 1):
|
||||
continue
|
||||
@@ -382,7 +390,8 @@ def test_zero_padding_1d():
|
||||
nb_samples = 2
|
||||
input_dim = 2
|
||||
nb_steps = 5
|
||||
input = np.ones((nb_samples, nb_steps, input_dim))
|
||||
shape = (nb_samples, nb_steps, input_dim)
|
||||
input = np.ones(shape)
|
||||
|
||||
# basic test
|
||||
layer_test(convolutional.ZeroPadding1D,
|
||||
@@ -397,22 +406,22 @@ def test_zero_padding_1d():
|
||||
|
||||
# correctness test
|
||||
layer = convolutional.ZeroPadding1D(padding=2)
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer.build(shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
for offset in [0, 1, -1, -2]:
|
||||
assert_allclose(out[:, offset, :], 0.)
|
||||
assert_allclose(out[:, 2:-2, :], 1.)
|
||||
assert_allclose(np_output[:, offset, :], 0.)
|
||||
assert_allclose(np_output[:, 2:-2, :], 1.)
|
||||
|
||||
layer = convolutional.ZeroPadding1D(padding=(1, 2))
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer.build(shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
for left_offset in [0]:
|
||||
assert_allclose(out[:, left_offset, :], 0.)
|
||||
assert_allclose(np_output[:, left_offset, :], 0.)
|
||||
for right_offset in [-1, -2]:
|
||||
assert_allclose(out[:, right_offset, :], 0.)
|
||||
assert_allclose(out[:, 1:-2, :], 1.)
|
||||
assert_allclose(np_output[:, right_offset, :], 0.)
|
||||
assert_allclose(np_output[:, 1:-2, :], 1.)
|
||||
layer.get_config()
|
||||
|
||||
|
||||
@@ -443,44 +452,44 @@ def test_zero_padding_2d():
|
||||
|
||||
# correctness test
|
||||
layer = convolutional.ZeroPadding2D(padding=(2, 2))
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
if dim_ordering == 'tf':
|
||||
for offset in [0, 1, -1, -2]:
|
||||
assert_allclose(out[:, offset, :, :], 0.)
|
||||
assert_allclose(out[:, :, offset, :], 0.)
|
||||
assert_allclose(out[:, 2:-2, 2:-2, :], 1.)
|
||||
assert_allclose(np_output[:, offset, :, :], 0.)
|
||||
assert_allclose(np_output[:, :, offset, :], 0.)
|
||||
assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.)
|
||||
elif dim_ordering == 'th':
|
||||
for offset in [0, 1, -1, -2]:
|
||||
assert_allclose(out[:, :, offset, :], 0.)
|
||||
assert_allclose(out[:, :, :, offset], 0.)
|
||||
assert_allclose(out[:, 2:-2, 2:-2, :], 1.)
|
||||
assert_allclose(np_output[:, :, offset, :], 0.)
|
||||
assert_allclose(np_output[:, :, :, offset], 0.)
|
||||
assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.)
|
||||
|
||||
layer = convolutional.ZeroPadding2D(padding=(1, 2, 3, 4))
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
if dim_ordering == 'tf':
|
||||
for top_offset in [0]:
|
||||
assert_allclose(out[:, top_offset, :, :], 0.)
|
||||
assert_allclose(np_output[:, top_offset, :, :], 0.)
|
||||
for bottom_offset in [-1, -2]:
|
||||
assert_allclose(out[:, bottom_offset, :, :], 0.)
|
||||
assert_allclose(np_output[:, bottom_offset, :, :], 0.)
|
||||
for left_offset in [0, 1, 2]:
|
||||
assert_allclose(out[:, :, left_offset, :], 0.)
|
||||
assert_allclose(np_output[:, :, left_offset, :], 0.)
|
||||
for right_offset in [-1, -2, -3, -4]:
|
||||
assert_allclose(out[:, :, right_offset, :], 0.)
|
||||
assert_allclose(out[:, 1:-2, 3:-4, :], 1.)
|
||||
assert_allclose(np_output[:, :, right_offset, :], 0.)
|
||||
assert_allclose(np_output[:, 1:-2, 3:-4, :], 1.)
|
||||
elif dim_ordering == 'th':
|
||||
for top_offset in [0]:
|
||||
assert_allclose(out[:, :, top_offset, :], 0.)
|
||||
assert_allclose(np_output[:, :, top_offset, :], 0.)
|
||||
for bottom_offset in [-1, -2]:
|
||||
assert_allclose(out[:, :, bottom_offset, :], 0.)
|
||||
assert_allclose(np_output[:, :, bottom_offset, :], 0.)
|
||||
for left_offset in [0, 1, 2]:
|
||||
assert_allclose(out[:, :, :, left_offset], 0.)
|
||||
assert_allclose(np_output[:, :, :, left_offset], 0.)
|
||||
for right_offset in [-1, -2, -3, -4]:
|
||||
assert_allclose(out[:, :, :, right_offset], 0.)
|
||||
assert_allclose(out[:, :, 1:-2, 3:-4], 1.)
|
||||
assert_allclose(np_output[:, :, :, right_offset], 0.)
|
||||
assert_allclose(np_output[:, :, 1:-2, 3:-4], 1.)
|
||||
layer.get_config()
|
||||
|
||||
|
||||
@@ -502,13 +511,14 @@ def test_zero_padding_3d():
|
||||
|
||||
# correctness test
|
||||
layer = convolutional.ZeroPadding3D(padding=(2, 2, 2))
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
out = K.eval(layer.output)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
for offset in [0, 1, -1, -2]:
|
||||
assert_allclose(out[:, offset, :, :, :], 0.)
|
||||
assert_allclose(out[:, :, offset, :, :], 0.)
|
||||
assert_allclose(out[:, :, :, offset, :], 0.)
|
||||
assert_allclose(out[:, 2:-2, 2:-2, 2:-2, :], 1.)
|
||||
assert_allclose(np_output[:, offset, :, :, :], 0.)
|
||||
assert_allclose(np_output[:, :, offset, :, :], 0.)
|
||||
assert_allclose(np_output[:, :, :, offset, :], 0.)
|
||||
assert_allclose(np_output[:, 2:-2, 2:-2, 2:-2, :], 1.)
|
||||
layer.get_config()
|
||||
|
||||
|
||||
@@ -539,15 +549,15 @@ def test_upsampling_2d():
|
||||
layer = convolutional.UpSampling2D(
|
||||
size=(length_row, length_col),
|
||||
dim_ordering=dim_ordering)
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
if dim_ordering == 'th':
|
||||
assert out.shape[2] == length_row * input_nb_row
|
||||
assert out.shape[3] == length_col * input_nb_col
|
||||
assert np_output.shape[2] == length_row * input_nb_row
|
||||
assert np_output.shape[3] == length_col * input_nb_col
|
||||
else: # tf
|
||||
assert out.shape[1] == length_row * input_nb_row
|
||||
assert out.shape[2] == length_col * input_nb_col
|
||||
assert np_output.shape[1] == length_row * input_nb_row
|
||||
assert np_output.shape[2] == length_col * input_nb_col
|
||||
|
||||
# compare with numpy
|
||||
if dim_ordering == 'th':
|
||||
@@ -557,7 +567,7 @@ def test_upsampling_2d():
|
||||
expected_out = np.repeat(input, length_row, axis=1)
|
||||
expected_out = np.repeat(expected_out, length_col, axis=2)
|
||||
|
||||
assert_allclose(out, expected_out)
|
||||
assert_allclose(np_output, expected_out)
|
||||
|
||||
|
||||
def test_upsampling_3d():
|
||||
@@ -580,17 +590,17 @@ def test_upsampling_3d():
|
||||
layer = convolutional.UpSampling3D(
|
||||
size=(length_dim1, length_dim2, length_dim3),
|
||||
dim_ordering=dim_ordering)
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
if dim_ordering == 'th':
|
||||
assert out.shape[2] == length_dim1 * input_len_dim1
|
||||
assert out.shape[3] == length_dim2 * input_len_dim2
|
||||
assert out.shape[4] == length_dim3 * input_len_dim3
|
||||
assert np_output.shape[2] == length_dim1 * input_len_dim1
|
||||
assert np_output.shape[3] == length_dim2 * input_len_dim2
|
||||
assert np_output.shape[4] == length_dim3 * input_len_dim3
|
||||
else: # tf
|
||||
assert out.shape[1] == length_dim1 * input_len_dim1
|
||||
assert out.shape[2] == length_dim2 * input_len_dim2
|
||||
assert out.shape[3] == length_dim3 * input_len_dim3
|
||||
assert np_output.shape[1] == length_dim1 * input_len_dim1
|
||||
assert np_output.shape[2] == length_dim2 * input_len_dim2
|
||||
assert np_output.shape[3] == length_dim3 * input_len_dim3
|
||||
|
||||
# compare with numpy
|
||||
if dim_ordering == 'th':
|
||||
@@ -602,7 +612,7 @@ def test_upsampling_3d():
|
||||
expected_out = np.repeat(expected_out, length_dim2, axis=2)
|
||||
expected_out = np.repeat(expected_out, length_dim3, axis=3)
|
||||
|
||||
assert_allclose(out, expected_out)
|
||||
assert_allclose(np_output, expected_out)
|
||||
|
||||
|
||||
@keras_test
|
||||
@@ -626,32 +636,35 @@ def test_cropping_2d():
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if dim_ordering == 'th':
|
||||
input = np.random.rand(nb_samples, stack_size, input_len_dim1, input_len_dim2)
|
||||
input = np.random.rand(nb_samples, stack_size,
|
||||
input_len_dim1, input_len_dim2)
|
||||
else:
|
||||
input = np.random.rand(nb_samples, input_len_dim1, input_len_dim2, stack_size)
|
||||
input = np.random.rand(nb_samples,
|
||||
input_len_dim1, input_len_dim2,
|
||||
stack_size)
|
||||
# basic test
|
||||
layer_test(convolutional.Cropping2D,
|
||||
kwargs={'cropping': cropping,
|
||||
'dim_ordering': dim_ordering},
|
||||
input_shape=input.shape)
|
||||
# correctness test
|
||||
layer = convolutional.Cropping2D(cropping=cropping, dim_ordering=dim_ordering)
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer = convolutional.Cropping2D(cropping=cropping,
|
||||
dim_ordering=dim_ordering)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
# compare with numpy
|
||||
if dim_ordering == 'th':
|
||||
expected_out = input[:,
|
||||
:,
|
||||
cropping[0][0]:-cropping[0][1],
|
||||
cropping[1][0]:-cropping[1][1]]
|
||||
cropping[0][0]: -cropping[0][1],
|
||||
cropping[1][0]: -cropping[1][1]]
|
||||
else:
|
||||
expected_out = input[:,
|
||||
cropping[0][0]:-cropping[0][1],
|
||||
cropping[1][0]:-cropping[1][1],
|
||||
cropping[0][0]: -cropping[0][1],
|
||||
cropping[1][0]: -cropping[1][1],
|
||||
:]
|
||||
|
||||
assert_allclose(out, expected_out)
|
||||
assert_allclose(np_output, expected_out)
|
||||
|
||||
|
||||
def test_cropping_3d():
|
||||
@@ -664,34 +677,37 @@ def test_cropping_3d():
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if dim_ordering == 'th':
|
||||
input = np.random.rand(nb_samples, stack_size, input_len_dim1, input_len_dim2, input_len_dim3)
|
||||
input = np.random.rand(nb_samples, stack_size,
|
||||
input_len_dim1, input_len_dim2, input_len_dim3)
|
||||
else:
|
||||
input = np.random.rand(nb_samples, input_len_dim1, input_len_dim2, input_len_dim3, stack_size)
|
||||
input = np.random.rand(nb_samples,
|
||||
input_len_dim1, input_len_dim2,
|
||||
input_len_dim3, stack_size)
|
||||
# basic test
|
||||
layer_test(convolutional.Cropping3D,
|
||||
kwargs={'cropping': cropping,
|
||||
'dim_ordering': dim_ordering},
|
||||
input_shape=input.shape)
|
||||
# correctness test
|
||||
layer = convolutional.Cropping3D(cropping=cropping, dim_ordering=dim_ordering)
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
layer = convolutional.Cropping3D(cropping=cropping,
|
||||
dim_ordering=dim_ordering)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
# compare with numpy
|
||||
if dim_ordering == 'th':
|
||||
expected_out = input[:,
|
||||
:,
|
||||
cropping[0][0]:-cropping[0][1],
|
||||
cropping[1][0]:-cropping[1][1],
|
||||
cropping[2][0]:-cropping[2][1]]
|
||||
cropping[0][0]: -cropping[0][1],
|
||||
cropping[1][0]: -cropping[1][1],
|
||||
cropping[2][0]: -cropping[2][1]]
|
||||
else:
|
||||
expected_out = input[:,
|
||||
cropping[0][0]:-cropping[0][1],
|
||||
cropping[1][0]:-cropping[1][1],
|
||||
cropping[2][0]:-cropping[2][1],
|
||||
cropping[0][0]: -cropping[0][1],
|
||||
cropping[1][0]: -cropping[1][1],
|
||||
cropping[2][0]: -cropping[2][1],
|
||||
:]
|
||||
|
||||
assert_allclose(out, expected_out)
|
||||
assert_allclose(np_output, expected_out)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -0,0 +1,130 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras import backend as K
|
||||
from keras.models import Sequential
|
||||
from keras.layers import convolutional_recurrent
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras import regularizers
|
||||
|
||||
|
||||
def test_recurrent_convolutional():
|
||||
nb_row = 3
|
||||
nb_col = 3
|
||||
nb_filter = 5
|
||||
nb_samples = 2
|
||||
input_channel = 2
|
||||
input_nb_row = 5
|
||||
input_nb_col = 5
|
||||
sequence_len = 2
|
||||
for dim_ordering in ['th', 'tf']:
|
||||
|
||||
if dim_ordering == 'th':
|
||||
input = np.random.rand(nb_samples, sequence_len,
|
||||
input_channel,
|
||||
input_nb_row, input_nb_col)
|
||||
else: # tf
|
||||
input = np.random.rand(nb_samples, sequence_len,
|
||||
input_nb_row, input_nb_col,
|
||||
input_channel)
|
||||
|
||||
for return_sequences in [True, False]:
|
||||
# test for ouptput shape:
|
||||
output = layer_test(convolutional_recurrent.ConvLSTM2D,
|
||||
kwargs={'dim_ordering': dim_ordering,
|
||||
'return_sequences': return_sequences,
|
||||
'nb_filter': nb_filter,
|
||||
'nb_row': nb_row,
|
||||
'nb_col': nb_col,
|
||||
'border_mode': "same"},
|
||||
input_shape=input.shape)
|
||||
|
||||
output_shape = [nb_samples, input_nb_row, input_nb_col]
|
||||
|
||||
if dim_ordering == 'th':
|
||||
output_shape.insert(1, nb_filter)
|
||||
else:
|
||||
output_shape.insert(3, nb_filter)
|
||||
|
||||
if return_sequences:
|
||||
output_shape.insert(1, sequence_len)
|
||||
|
||||
assert output.shape == tuple(output_shape)
|
||||
|
||||
# No need to check statefulness for both
|
||||
if dim_ordering == 'th' or return_sequences:
|
||||
continue
|
||||
|
||||
# Tests for statefulness
|
||||
model = Sequential()
|
||||
kwargs = {'dim_ordering': dim_ordering,
|
||||
'return_sequences': return_sequences,
|
||||
'nb_filter': nb_filter,
|
||||
'nb_row': nb_row,
|
||||
'nb_col': nb_col,
|
||||
'stateful': True,
|
||||
'batch_input_shape': input.shape,
|
||||
'border_mode': "same"}
|
||||
layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
|
||||
|
||||
model.add(layer)
|
||||
model.compile(optimizer='sgd', loss='mse')
|
||||
out1 = model.predict(np.ones_like(input))
|
||||
assert(out1.shape == tuple(output_shape))
|
||||
|
||||
# train once so that the states change
|
||||
model.train_on_batch(np.ones_like(input),
|
||||
np.ones_like(output))
|
||||
out2 = model.predict(np.ones_like(input))
|
||||
|
||||
# if the state is not reset, output should be different
|
||||
assert(out1.max() != out2.max())
|
||||
|
||||
# check that output changes after states are reset
|
||||
# (even though the model itself didn't change)
|
||||
layer.reset_states()
|
||||
out3 = model.predict(np.ones_like(input))
|
||||
assert(out2.max() != out3.max())
|
||||
|
||||
# check that container-level reset_states() works
|
||||
model.reset_states()
|
||||
out4 = model.predict(np.ones_like(input))
|
||||
assert_allclose(out3, out4, atol=1e-5)
|
||||
|
||||
# check that the call to `predict` updated the states
|
||||
out5 = model.predict(np.ones_like(input))
|
||||
assert(out4.max() != out5.max())
|
||||
|
||||
# check regularizers
|
||||
kwargs = {'dim_ordering': dim_ordering,
|
||||
'return_sequences': return_sequences,
|
||||
'nb_filter': nb_filter,
|
||||
'nb_row': nb_row,
|
||||
'nb_col': nb_col,
|
||||
'stateful': True,
|
||||
'batch_input_shape': input.shape,
|
||||
'W_regularizer': regularizers.WeightRegularizer(l1=0.01),
|
||||
'U_regularizer': regularizers.WeightRegularizer(l1=0.01),
|
||||
'b_regularizer': 'l2',
|
||||
'border_mode': "same"}
|
||||
|
||||
layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(np.ones(input.shape)))
|
||||
K.eval(output)
|
||||
|
||||
# check dropout
|
||||
layer_test(convolutional_recurrent.ConvLSTM2D,
|
||||
kwargs={'dim_ordering': dim_ordering,
|
||||
'return_sequences': return_sequences,
|
||||
'nb_filter': nb_filter,
|
||||
'nb_row': nb_row,
|
||||
'nb_col': nb_col,
|
||||
'border_mode': "same",
|
||||
'dropout_W': 0.1,
|
||||
'dropout_U': 0.1},
|
||||
input_shape=input.shape)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
@@ -15,7 +15,7 @@ def test_masking():
|
||||
|
||||
@keras_test
|
||||
def test_merge():
|
||||
from keras.layers import Input, merge, Merge
|
||||
from keras.layers import Input, merge, Merge, Masking
|
||||
from keras.models import Model
|
||||
|
||||
# test modes: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot'.
|
||||
@@ -53,7 +53,8 @@ def test_merge():
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
merged = merge([input_a, input_b],
|
||||
mode=lambda tup: K.concatenate([tup[0], tup[1]]),
|
||||
output_shape=lambda tup: (tup[0][:-1],) + (tup[0][-1] + tup[1][-1],))
|
||||
output_shape=lambda tup: tup[0][:-1] + (tup[0][-1] + tup[1][-1],))
|
||||
model = Model([input_a, input_b], merged)
|
||||
expected_output_shape = model.get_output_shape_for(input_shapes)
|
||||
actual_output_shape = model.predict(inputs).shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
@@ -65,17 +66,18 @@ def test_merge():
|
||||
# test function with output_shape function
|
||||
def fn_mode(tup):
|
||||
x, y = tup
|
||||
return K.concatenate([x, y])
|
||||
return K.concatenate([x, y], axis=1)
|
||||
|
||||
def fn_output_shape(tup):
|
||||
s1, s2 = tup
|
||||
return (s1[:-1],) + (s1[-1] + s2[-1],)
|
||||
return (s1[0], s1[1] + s2[1]) + s1[2:]
|
||||
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
merged = merge([input_a, input_b],
|
||||
mode=fn_mode,
|
||||
output_shape=fn_output_shape)
|
||||
model = Model([input_a, input_b], merged)
|
||||
expected_output_shape = model.get_output_shape_for(input_shapes)
|
||||
actual_output_shape = model.predict(inputs).shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
@@ -84,6 +86,74 @@ def test_merge():
|
||||
model = Model.from_config(config)
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
# test function with output_mask function
|
||||
# time dimension is required for masking
|
||||
input_shapes = [(4, 3, 2), (4, 3, 2)]
|
||||
inputs = [np.random.random(shape) for shape in input_shapes]
|
||||
|
||||
def fn_output_mask(tup):
|
||||
x_mask, y_mask = tup
|
||||
return K.concatenate([x_mask, y_mask])
|
||||
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
a = Masking()(input_a)
|
||||
b = Masking()(input_b)
|
||||
merged = merge([a, b], mode=fn_mode, output_shape=fn_output_shape, output_mask=fn_output_mask)
|
||||
model = Model([input_a, input_b], merged)
|
||||
expected_output_shape = model.get_output_shape_for(input_shapes)
|
||||
actual_output_shape = model.predict(inputs).shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
|
||||
config = model.get_config()
|
||||
model = Model.from_config(config)
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
mask_inputs = (np.zeros(input_shapes[0][:-1]), np.ones(input_shapes[1][:-1]))
|
||||
expected_mask_output = np.concatenate(mask_inputs, axis=-1)
|
||||
mask_input_placeholders = [K.placeholder(shape=input_shape[:-1]) for input_shape in input_shapes]
|
||||
mask_output = model.layers[-1]._output_mask(mask_input_placeholders)
|
||||
assert np.all(K.function(mask_input_placeholders, [mask_output])(mask_inputs)[0] == expected_mask_output)
|
||||
|
||||
# test lambda with output_mask lambda
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
a = Masking()(input_a)
|
||||
b = Masking()(input_b)
|
||||
merged = merge([a, b], mode=lambda tup: K.concatenate([tup[0], tup[1]], axis=1),
|
||||
output_shape=lambda tup: (tup[0][0], tup[0][1] + tup[1][1]) + tup[0][2:],
|
||||
output_mask=lambda tup: K.concatenate([tup[0], tup[1]]))
|
||||
model = Model([input_a, input_b], merged)
|
||||
expected_output_shape = model.get_output_shape_for(input_shapes)
|
||||
actual_output_shape = model.predict(inputs).shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
|
||||
config = model.get_config()
|
||||
model = Model.from_config(config)
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
mask_output = model.layers[-1]._output_mask(mask_input_placeholders)
|
||||
assert np.all(K.function(mask_input_placeholders, [mask_output])(mask_inputs)[0] == expected_mask_output)
|
||||
|
||||
# test with arguments
|
||||
input_shapes = [(3, 2), (3, 2)]
|
||||
inputs = [np.random.random(shape) for shape in input_shapes]
|
||||
|
||||
def fn_mode(tup, a, b):
|
||||
x, y = tup
|
||||
return x * a + y * b
|
||||
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
merged = merge([input_a, input_b], mode=fn_mode, output_shape=lambda s: s[0], arguments={'a': 0.7, 'b': 0.3})
|
||||
model = Model([input_a, input_b], merged)
|
||||
output = model.predict(inputs)
|
||||
|
||||
config = model.get_config()
|
||||
model = Model.from_config(config)
|
||||
|
||||
assert np.all(model.predict(inputs) == output)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_mask_2d():
|
||||
@@ -153,6 +223,10 @@ def test_dropout():
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(3, 2))
|
||||
|
||||
layer_test(core.SpatialDropout1D,
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
layer_test(core.SpatialDropout2D,
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(2, 3, 4, 5))
|
||||
@@ -212,6 +286,11 @@ def test_lambda():
|
||||
kwargs={'function': lambda x: x + 1},
|
||||
input_shape=(3, 2))
|
||||
|
||||
layer_test(Lambda,
|
||||
kwargs={'function': lambda x, a, b: x * a + b,
|
||||
'arguments': {'a': 0.6, 'b': 0.4}},
|
||||
input_shape=(3, 2))
|
||||
|
||||
# test serialization with function
|
||||
def f(x):
|
||||
return x + 1
|
||||
@@ -246,6 +325,18 @@ def test_dense():
|
||||
kwargs={'output_dim': 3},
|
||||
input_shape=(3, 2))
|
||||
|
||||
layer_test(core.Dense,
|
||||
kwargs={'output_dim': 3},
|
||||
input_shape=(3, 4, 2))
|
||||
|
||||
layer_test(core.Dense,
|
||||
kwargs={'output_dim': 3},
|
||||
input_shape=(None, None, 2))
|
||||
|
||||
layer_test(core.Dense,
|
||||
kwargs={'output_dim': 3},
|
||||
input_shape=(3, 4, 5, 2))
|
||||
|
||||
layer_test(core.Dense,
|
||||
kwargs={'output_dim': 3,
|
||||
'W_regularizer': regularizers.l2(0.01),
|
||||
|
||||
@@ -2,10 +2,10 @@ import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras.layers.core import Dense, Activation
|
||||
from keras.layers import Dense, Activation, Input
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
from keras.layers import normalization
|
||||
from keras.models import Sequential
|
||||
from keras.models import Sequential, Model
|
||||
from keras import backend as K
|
||||
|
||||
input_1 = np.arange(10)
|
||||
@@ -78,5 +78,33 @@ def test_batchnorm_mode_1():
|
||||
assert_allclose(K.eval(K.std(out)), 0.0, atol=1e-1)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_shared_batchnorm():
|
||||
'''Test that a BN layer can be shared
|
||||
across different data streams.
|
||||
'''
|
||||
# Test single layer reuse
|
||||
bn = normalization.BatchNormalization(input_shape=(10,), mode=0)
|
||||
x1 = Input(shape=(10,))
|
||||
bn(x1)
|
||||
|
||||
x2 = Input(shape=(10,))
|
||||
y2 = bn(x2)
|
||||
|
||||
x = np.random.normal(loc=5.0, scale=10.0, size=(2, 10))
|
||||
model = Model(x2, y2)
|
||||
assert len(model.updates) == 2
|
||||
model.compile('sgd', 'mse')
|
||||
model.train_on_batch(x, x)
|
||||
|
||||
# Test model-level reuse
|
||||
x3 = Input(shape=(10,))
|
||||
y3 = model(x3)
|
||||
new_model = Model(x3, y3)
|
||||
assert len(model.updates) == 2
|
||||
new_model.compile('sgd', 'mse')
|
||||
new_model.train_on_batch(x, x)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -129,9 +129,15 @@ def test_regularizer(layer_class):
|
||||
U_regularizer=regularizers.WeightRegularizer(l1=0.01),
|
||||
b_regularizer='l2')
|
||||
shape = (nb_samples, timesteps, embedding_dim)
|
||||
layer.set_input(K.variable(np.ones(shape)),
|
||||
shape=shape)
|
||||
K.eval(layer.output)
|
||||
layer.build(shape)
|
||||
output = layer(K.variable(np.ones(shape)))
|
||||
K.eval(output)
|
||||
if layer_class == recurrent.SimpleRNN:
|
||||
assert len(layer.losses) == 3
|
||||
if layer_class == recurrent.GRU:
|
||||
assert len(layer.losses) == 9
|
||||
if layer_class == recurrent.LSTM:
|
||||
assert len(layer.losses) == 12
|
||||
|
||||
|
||||
@keras_test
|
||||
@@ -140,15 +146,30 @@ def test_masking_layer():
|
||||
https://github.com/fchollet/keras/issues/1567
|
||||
|
||||
'''
|
||||
model = Sequential()
|
||||
model.add(Masking(input_shape=(3, 4)))
|
||||
model.add(recurrent.LSTM(output_dim=5, return_sequences=True))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
I = np.random.random((6, 3, 4))
|
||||
V = np.abs(np.random.random((6, 3, 5)))
|
||||
V /= V.sum(axis=-1, keepdims=True)
|
||||
|
||||
model = Sequential()
|
||||
model.add(Masking(input_shape=(3, 4)))
|
||||
model.add(recurrent.LSTM(output_dim=5, return_sequences=True, unroll=False))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
model.fit(I, V, nb_epoch=1, batch_size=100, verbose=1)
|
||||
|
||||
model = Sequential()
|
||||
model.add(Masking(input_shape=(3, 4)))
|
||||
model.add(recurrent.LSTM(output_dim=5, return_sequences=True, unroll=True))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
model.fit(I, V, nb_epoch=1, batch_size=100, verbose=1)
|
||||
|
||||
|
||||
@rnn_test
|
||||
def test_from_config(layer_class):
|
||||
for stateful in (False, True):
|
||||
l1 = layer_class(output_dim=1, stateful=stateful)
|
||||
l2 = layer_class.from_config(l1.get_config())
|
||||
assert l1.get_config() == l2.get_config()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -76,6 +76,15 @@ def test_TimeDistributed():
|
||||
outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)), nb_epoch=1, batch_size=10)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_regularizers():
|
||||
model = Sequential()
|
||||
model.add(wrappers.TimeDistributed(core.Dense(2, W_regularizer='l1'), input_shape=(3, 4)))
|
||||
model.add(core.Activation('relu'))
|
||||
model.compile(optimizer='rmsprop', loss='mse')
|
||||
assert len(model.losses) == 1
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_Bidirectional():
|
||||
rnn = recurrent.SimpleRNN
|
||||
@@ -115,6 +124,13 @@ def test_Bidirectional():
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
model.fit(x, y, nb_epoch=1, batch_size=1)
|
||||
|
||||
# Bidirectional and stateful
|
||||
input = Input(batch_shape=(1, timesteps, dim))
|
||||
output = wrappers.Bidirectional(rnn(output_dim, stateful=True), merge_mode=mode)(input)
|
||||
model = Model(input, output)
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
model.fit(x, y, nb_epoch=1, batch_size=1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import pytest
|
||||
from keras.preprocessing.image import *
|
||||
from keras.preprocessing import image
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import os
|
||||
@@ -33,10 +33,10 @@ class TestImage:
|
||||
for test_images in self.all_test_images:
|
||||
img_list = []
|
||||
for im in test_images:
|
||||
img_list.append(img_to_array(im)[None, ...])
|
||||
img_list.append(image.img_to_array(im)[None, ...])
|
||||
|
||||
images = np.vstack(img_list)
|
||||
generator = ImageDataGenerator(
|
||||
generator = image.ImageDataGenerator(
|
||||
featurewise_center=True,
|
||||
samplewise_center=True,
|
||||
featurewise_std_normalization=True,
|
||||
@@ -61,34 +61,133 @@ class TestImage:
|
||||
break
|
||||
shutil.rmtree(tmp_folder)
|
||||
|
||||
def test_img_flip(self):
|
||||
x = np.array(range(4)).reshape([1, 1, 2, 2])
|
||||
assert (flip_axis(x, 0) == x).all()
|
||||
assert (flip_axis(x, 1) == x).all()
|
||||
assert (flip_axis(x, 2) == [[[[2, 3], [0, 1]]]]).all()
|
||||
assert (flip_axis(x, 3) == [[[[1, 0], [3, 2]]]]).all()
|
||||
def test_image_data_generator_invalid_data(self):
|
||||
generator = image.ImageDataGenerator(
|
||||
featurewise_center=True,
|
||||
samplewise_center=True,
|
||||
featurewise_std_normalization=True,
|
||||
samplewise_std_normalization=True,
|
||||
zca_whitening=True,
|
||||
dim_ordering='tf')
|
||||
# Test fit with invalid data
|
||||
with pytest.raises(ValueError):
|
||||
x = np.random.random((3, 10, 10))
|
||||
generator.fit(x)
|
||||
with pytest.raises(ValueError):
|
||||
x = np.random.random((32, 3, 10, 10))
|
||||
generator.fit(x)
|
||||
with pytest.raises(ValueError):
|
||||
x = np.random.random((32, 10, 10, 5))
|
||||
generator.fit(x)
|
||||
# Test flow with invalid data
|
||||
with pytest.raises(ValueError):
|
||||
x = np.random.random((32, 10, 10, 5))
|
||||
generator.flow(np.arange(x.shape[0]))
|
||||
with pytest.raises(ValueError):
|
||||
x = np.random.random((32, 10, 10))
|
||||
generator.flow(np.arange(x.shape[0]))
|
||||
with pytest.raises(ValueError):
|
||||
x = np.random.random((32, 3, 10, 10))
|
||||
generator.flow(np.arange(x.shape[0]))
|
||||
|
||||
dim_ordering_and_col_index = (('tf', 2), ('th', 3))
|
||||
for dim_ordering, col_index in dim_ordering_and_col_index:
|
||||
image_generator_th = ImageDataGenerator(
|
||||
featurewise_center=False,
|
||||
samplewise_center=False,
|
||||
featurewise_std_normalization=False,
|
||||
samplewise_std_normalization=False,
|
||||
zca_whitening=False,
|
||||
rotation_range=0,
|
||||
width_shift_range=0,
|
||||
height_shift_range=0,
|
||||
shear_range=0,
|
||||
zoom_range=0,
|
||||
channel_shift_range=0,
|
||||
horizontal_flip=True,
|
||||
vertical_flip=False,
|
||||
dim_ordering=dim_ordering).flow(x, [1])
|
||||
for i in range(10):
|
||||
potentially_flipped_x, _ = next(image_generator_th)
|
||||
assert ((potentially_flipped_x == x).all() or
|
||||
(potentially_flipped_x == flip_axis(x, col_index)).all())
|
||||
def test_image_data_generator_fit(self):
|
||||
generator = image.ImageDataGenerator(
|
||||
featurewise_center=True,
|
||||
samplewise_center=True,
|
||||
featurewise_std_normalization=True,
|
||||
samplewise_std_normalization=True,
|
||||
zca_whitening=True,
|
||||
dim_ordering='tf')
|
||||
# Test grayscale
|
||||
x = np.random.random((32, 10, 10, 1))
|
||||
generator.fit(x)
|
||||
# Test RBG
|
||||
x = np.random.random((32, 10, 10, 3))
|
||||
generator.fit(x)
|
||||
generator = image.ImageDataGenerator(
|
||||
featurewise_center=True,
|
||||
samplewise_center=True,
|
||||
featurewise_std_normalization=True,
|
||||
samplewise_std_normalization=True,
|
||||
zca_whitening=True,
|
||||
dim_ordering='th')
|
||||
# Test grayscale
|
||||
x = np.random.random((32, 1, 10, 10))
|
||||
generator.fit(x)
|
||||
# Test RBG
|
||||
x = np.random.random((32, 3, 10, 10))
|
||||
generator.fit(x)
|
||||
|
||||
def test_directory_iterator(self):
|
||||
num_classes = 2
|
||||
tmp_folder = tempfile.mkdtemp(prefix='test_images')
|
||||
|
||||
# create folders and subfolders
|
||||
paths = []
|
||||
for cl in range(num_classes):
|
||||
class_directory = 'class-{}'.format(cl)
|
||||
classpaths = [
|
||||
class_directory,
|
||||
os.path.join(class_directory, 'subfolder-1'),
|
||||
os.path.join(class_directory, 'subfolder-2'),
|
||||
os.path.join(class_directory, 'subfolder-1', 'sub-subfolder')
|
||||
]
|
||||
for path in classpaths:
|
||||
os.mkdir(os.path.join(tmp_folder, path))
|
||||
paths.append(classpaths)
|
||||
|
||||
# save the images in the paths
|
||||
count = 0
|
||||
filenames = []
|
||||
for test_images in self.all_test_images:
|
||||
for im in test_images:
|
||||
# rotate image class
|
||||
im_class = count % num_classes
|
||||
# rotate subfolders
|
||||
classpaths = paths[im_class]
|
||||
filename = os.path.join(classpaths[count % len(classpaths)], 'image-{}.jpg'.format(count))
|
||||
filenames.append(filename)
|
||||
im.save(os.path.join(tmp_folder, filename))
|
||||
count += 1
|
||||
|
||||
# create iterator
|
||||
generator = image.ImageDataGenerator()
|
||||
dir_iterator = generator.flow_from_directory(tmp_folder)
|
||||
|
||||
# check number of classes and images
|
||||
assert(len(dir_iterator.class_indices) == num_classes)
|
||||
assert(len(dir_iterator.classes) == count)
|
||||
assert(sorted(dir_iterator.filenames) == sorted(filenames))
|
||||
shutil.rmtree(tmp_folder)
|
||||
|
||||
def test_img_utils(self):
|
||||
height, width = 10, 8
|
||||
|
||||
# Test th dim ordering
|
||||
x = np.random.random((3, height, width))
|
||||
img = image.array_to_img(x, dim_ordering='th')
|
||||
assert img.size == (width, height)
|
||||
x = image.img_to_array(img, dim_ordering='th')
|
||||
assert x.shape == (3, height, width)
|
||||
# Test 2D
|
||||
x = np.random.random((1, height, width))
|
||||
img = image.array_to_img(x, dim_ordering='th')
|
||||
assert img.size == (width, height)
|
||||
x = image.img_to_array(img, dim_ordering='th')
|
||||
assert x.shape == (1, height, width)
|
||||
|
||||
# Test tf dim ordering
|
||||
x = np.random.random((height, width, 3))
|
||||
img = image.array_to_img(x, dim_ordering='tf')
|
||||
assert img.size == (width, height)
|
||||
x = image.img_to_array(img, dim_ordering='tf')
|
||||
assert x.shape == (height, width, 3)
|
||||
# Test 2D
|
||||
x = np.random.random((height, width, 1))
|
||||
img = image.array_to_img(x, dim_ordering='tf')
|
||||
assert img.size == (width, height)
|
||||
x = image.img_to_array(img, dim_ordering='tf')
|
||||
assert x.shape == (height, width, 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -9,7 +9,7 @@ from keras import optimizers
|
||||
np.random.seed(1337)
|
||||
|
||||
from keras import callbacks
|
||||
from keras.models import Graph, Sequential
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras import backend as K
|
||||
@@ -79,6 +79,22 @@ def test_ModelCheckpoint():
|
||||
assert os.path.exists(filepath)
|
||||
os.remove(filepath)
|
||||
|
||||
# case 5
|
||||
save_best_only = False
|
||||
period = 2
|
||||
mode = 'auto'
|
||||
filepath = 'checkpoint.{epoch:02d}.h5'
|
||||
cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor,
|
||||
save_best_only=save_best_only, mode=mode,
|
||||
period=period)]
|
||||
model.fit(X_train, y_train, batch_size=batch_size,
|
||||
validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=4)
|
||||
assert os.path.exists(filepath.format(epoch=1))
|
||||
assert os.path.exists(filepath.format(epoch=3))
|
||||
assert not os.path.exists(filepath.format(epoch=0))
|
||||
assert not os.path.exists(filepath.format(epoch=2))
|
||||
os.remove(filepath.format(epoch=1))
|
||||
os.remove(filepath.format(epoch=3))
|
||||
|
||||
def test_EarlyStopping():
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples,
|
||||
@@ -186,19 +202,18 @@ def test_ReduceLROnPlateau():
|
||||
assert np.allclose(float(K.get_value(model.optimizer.lr)), 0.1, atol=K.epsilon())
|
||||
|
||||
|
||||
@pytest.mark.skipif((K._BACKEND != 'tensorflow'),
|
||||
@pytest.mark.skipif((K.backend() != 'tensorflow'),
|
||||
reason="Requires tensorflow backend")
|
||||
def test_TensorBoard():
|
||||
import shutil
|
||||
import tensorflow as tf
|
||||
import keras.backend.tensorflow_backend as KTF
|
||||
old_session = KTF.get_session()
|
||||
|
||||
filepath = './logs'
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples,
|
||||
nb_test=test_samples,
|
||||
input_shape=(input_dim,),
|
||||
classification=True,
|
||||
nb_class=nb_class)
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(
|
||||
nb_train=train_samples,
|
||||
nb_test=test_samples,
|
||||
input_shape=(input_dim,),
|
||||
classification=True,
|
||||
nb_class=nb_class)
|
||||
y_test = np_utils.to_categorical(y_test)
|
||||
y_train = np_utils.to_categorical(y_train)
|
||||
|
||||
@@ -210,9 +225,11 @@ def test_TensorBoard():
|
||||
i = 0
|
||||
while 1:
|
||||
if train:
|
||||
yield (X_train[i * batch_size: (i + 1) * batch_size], y_train[i * batch_size: (i + 1) * batch_size])
|
||||
yield (X_train[i * batch_size: (i + 1) * batch_size],
|
||||
y_train[i * batch_size: (i + 1) * batch_size])
|
||||
else:
|
||||
yield (X_test[i * batch_size: (i + 1) * batch_size], y_test[i * batch_size: (i + 1) * batch_size])
|
||||
yield (X_test[i * batch_size: (i + 1) * batch_size],
|
||||
y_test[i * batch_size: (i + 1) * batch_size])
|
||||
i += 1
|
||||
i = i % max_batch_index
|
||||
|
||||
@@ -224,92 +241,44 @@ def test_TensorBoard():
|
||||
yield {'X_vars': X_test, 'output': y_test}
|
||||
|
||||
# case 1 Sequential
|
||||
model = Sequential()
|
||||
model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
|
||||
model.add(Dense(nb_class, activation='softmax'))
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='sgd',
|
||||
metrics=['accuracy'])
|
||||
|
||||
with tf.Graph().as_default():
|
||||
session = tf.Session('')
|
||||
KTF.set_session(session)
|
||||
model = Sequential()
|
||||
model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
|
||||
model.add(Dense(nb_class, activation='softmax'))
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='sgd',
|
||||
metrics=['accuracy'])
|
||||
tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1)
|
||||
cbks = [tsb]
|
||||
|
||||
tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1)
|
||||
cbks = [tsb]
|
||||
# fit with validation data
|
||||
model.fit(X_train, y_train, batch_size=batch_size,
|
||||
validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=3)
|
||||
|
||||
# fit with validation data
|
||||
model.fit(X_train, y_train, batch_size=batch_size,
|
||||
validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)
|
||||
# fit with validation data and accuracy
|
||||
model.fit(X_train, y_train, batch_size=batch_size,
|
||||
validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)
|
||||
|
||||
# fit with validation data and accuracy
|
||||
model.fit(X_train, y_train, batch_size=batch_size,
|
||||
validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)
|
||||
# fit generator with validation data
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
validation_data=(X_test, y_test),
|
||||
callbacks=cbks)
|
||||
|
||||
# fit generator with validation data
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
validation_data=(X_test, y_test),
|
||||
callbacks=cbks)
|
||||
# fit generator without validation data
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
callbacks=cbks)
|
||||
|
||||
# fit generator without validation data
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
callbacks=cbks)
|
||||
# fit generator with validation data and accuracy
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
validation_data=(X_test, y_test),
|
||||
callbacks=cbks)
|
||||
|
||||
# fit generator with validation data and accuracy
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
validation_data=(X_test, y_test),
|
||||
callbacks=cbks)
|
||||
# fit generator without validation data and accuracy
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
callbacks=cbks)
|
||||
|
||||
# fit generator without validation data and accuracy
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch=2,
|
||||
callbacks=cbks)
|
||||
|
||||
assert os.path.exists(filepath)
|
||||
shutil.rmtree(filepath)
|
||||
|
||||
# case 2 Graph
|
||||
|
||||
with tf.Graph().as_default():
|
||||
session = tf.Session('')
|
||||
KTF.set_session(session)
|
||||
model = Graph()
|
||||
model.add_input(name='X_vars', input_shape=(input_dim,))
|
||||
|
||||
model.add_node(Dense(nb_hidden, activation="sigmoid"),
|
||||
name='Dense1', input='X_vars')
|
||||
model.add_node(Dense(nb_class, activation="softmax"),
|
||||
name='last_dense',
|
||||
input='Dense1')
|
||||
model.add_output(name='output', input='last_dense')
|
||||
model.compile(optimizer='sgd', loss={'output': 'mse'})
|
||||
|
||||
tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1)
|
||||
cbks = [tsb]
|
||||
|
||||
# fit with validation
|
||||
model.fit({'X_vars': X_train, 'output': y_train},
|
||||
batch_size=batch_size,
|
||||
validation_data={'X_vars': X_test, 'output': y_test},
|
||||
callbacks=cbks, nb_epoch=2)
|
||||
|
||||
# fit wo validation
|
||||
model.fit({'X_vars': X_train, 'output': y_train},
|
||||
batch_size=batch_size,
|
||||
callbacks=cbks, nb_epoch=2)
|
||||
|
||||
# fit generator with validation
|
||||
model.fit_generator(data_generator_graph(True), 1000, nb_epoch=2,
|
||||
validation_data={'X_vars': X_test, 'output': y_test},
|
||||
callbacks=cbks)
|
||||
|
||||
# fit generator wo validation
|
||||
model.fit_generator(data_generator_graph(True), 1000, nb_epoch=2,
|
||||
callbacks=cbks)
|
||||
|
||||
assert os.path.exists(filepath)
|
||||
shutil.rmtree(filepath)
|
||||
|
||||
KTF.set_session(old_session)
|
||||
assert os.path.exists(filepath)
|
||||
shutil.rmtree(filepath)
|
||||
|
||||
|
||||
def test_LambdaCallback():
|
||||
@@ -343,7 +312,7 @@ def test_LambdaCallback():
|
||||
assert not p.is_alive()
|
||||
|
||||
|
||||
@pytest.mark.skipif((K._BACKEND != 'tensorflow'),
|
||||
@pytest.mark.skipif((K.backend() != 'tensorflow'),
|
||||
reason="Requires tensorflow backend")
|
||||
def test_TensorBoard_with_ReduceLROnPlateau():
|
||||
import shutil
|
||||
|
||||
@@ -46,14 +46,50 @@ def test_matthews_correlation():
|
||||
assert expected - epsilon <= actual <= expected + epsilon
|
||||
|
||||
|
||||
def test_precision():
|
||||
y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0]))
|
||||
y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]))
|
||||
|
||||
# Calculated using sklearn.metrics.precision_score
|
||||
expected = 0.40000000000000002
|
||||
|
||||
actual = K.eval(metrics.precision(y_true, y_pred))
|
||||
epsilon = 1e-05
|
||||
assert expected - epsilon <= actual <= expected + epsilon
|
||||
|
||||
|
||||
def test_recall():
|
||||
y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0]))
|
||||
y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]))
|
||||
|
||||
# Calculated using sklearn.metrics.recall_score
|
||||
expected = 0.2857142857142857
|
||||
|
||||
actual = K.eval(metrics.recall(y_true, y_pred))
|
||||
epsilon = 1e-05
|
||||
assert expected - epsilon <= actual <= expected + epsilon
|
||||
|
||||
|
||||
def test_fbeta_score():
|
||||
y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0]))
|
||||
y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]))
|
||||
|
||||
# Calculated using sklearn.metrics.fbeta_score
|
||||
expected = 0.30303030303030304
|
||||
|
||||
actual = K.eval(metrics.fbeta_score(y_true, y_pred, beta=2))
|
||||
epsilon = 1e-05
|
||||
assert expected - epsilon <= actual <= expected + epsilon
|
||||
|
||||
|
||||
def test_fmeasure():
|
||||
y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0]))
|
||||
y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]))
|
||||
|
||||
# Calculated using sklearn.metrics.f1_score
|
||||
expected = 0.33333333333333331
|
||||
|
||||
actual = K.eval(metrics.fbeta_score(y_true, y_pred))
|
||||
actual = K.eval(metrics.fmeasure(y_true, y_pred))
|
||||
epsilon = 1e-05
|
||||
assert expected - epsilon <= actual <= expected + epsilon
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import print_function
|
||||
import pytest
|
||||
import numpy as np
|
||||
np.random.seed(1337)
|
||||
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam
|
||||
|
||||
@@ -67,6 +67,7 @@ def test_W_reg():
|
||||
regularizers.l1l2()]:
|
||||
model = create_model(weight_reg=reg)
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
assert len(model.losses) == 1
|
||||
model.fit(X_train, Y_train, batch_size=batch_size,
|
||||
nb_epoch=nb_epoch, verbose=0)
|
||||
model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0)
|
||||
@@ -77,6 +78,7 @@ def test_A_reg():
|
||||
for reg in [regularizers.activity_l1(), regularizers.activity_l2()]:
|
||||
model = create_model(activity_reg=reg)
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
assert len(model.losses) == 1
|
||||
model.fit(X_train, Y_train, batch_size=batch_size,
|
||||
nb_epoch=nb_epoch, verbose=0)
|
||||
model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0)
|
||||
|
||||
@@ -0,0 +1,114 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import pytest
|
||||
|
||||
from keras.utils.test_utils import keras_test
|
||||
from keras.models import Model, Sequential
|
||||
from keras.layers import Dense, Input
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_layer_trainability_switch():
|
||||
# with constructor argument, in Sequential
|
||||
model = Sequential()
|
||||
model.add(Dense(2, trainable=False, input_dim=1))
|
||||
assert model.trainable_weights == []
|
||||
|
||||
# by setting the `trainable` argument, in Sequential
|
||||
model = Sequential()
|
||||
layer = Dense(2, input_dim=1)
|
||||
model.add(layer)
|
||||
assert model.trainable_weights == layer.trainable_weights
|
||||
layer.trainable = False
|
||||
assert model.trainable_weights == []
|
||||
|
||||
# with constructor argument, in Model
|
||||
x = Input(shape=(1,))
|
||||
y = Dense(2, trainable=False)(x)
|
||||
model = Model(x, y)
|
||||
assert model.trainable_weights == []
|
||||
|
||||
# by setting the `trainable` argument, in Model
|
||||
x = Input(shape=(1,))
|
||||
layer = Dense(2)
|
||||
y = layer(x)
|
||||
model = Model(x, y)
|
||||
assert model.trainable_weights == layer.trainable_weights
|
||||
layer.trainable = False
|
||||
assert model.trainable_weights == []
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_model_trainability_switch():
|
||||
# a non-trainable model has no trainable weights
|
||||
x = Input(shape=(1,))
|
||||
y = Dense(2)(x)
|
||||
model = Model(x, y)
|
||||
model.trainable = False
|
||||
assert model.trainable_weights == []
|
||||
|
||||
# same for Sequential
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=1))
|
||||
model.trainable = False
|
||||
assert model.trainable_weights == []
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_nested_model_trainability():
|
||||
# a Sequential inside a Model
|
||||
inner_model = Sequential()
|
||||
inner_model.add(Dense(2, input_dim=1))
|
||||
|
||||
x = Input(shape=(1,))
|
||||
y = inner_model(x)
|
||||
outer_model = Model(x, y)
|
||||
assert outer_model.trainable_weights == inner_model.trainable_weights
|
||||
inner_model.trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
inner_model.trainable = True
|
||||
inner_model.layers[-1].trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
|
||||
# a Sequential inside a Sequential
|
||||
inner_model = Sequential()
|
||||
inner_model.add(Dense(2, input_dim=1))
|
||||
outer_model = Sequential()
|
||||
outer_model.add(inner_model)
|
||||
assert outer_model.trainable_weights == inner_model.trainable_weights
|
||||
inner_model.trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
inner_model.trainable = True
|
||||
inner_model.layers[-1].trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
|
||||
# a Model inside a Model
|
||||
x = Input(shape=(1,))
|
||||
y = Dense(2)(x)
|
||||
inner_model = Model(x, y)
|
||||
x = Input(shape=(1,))
|
||||
y = inner_model(x)
|
||||
outer_model = Model(x, y)
|
||||
assert outer_model.trainable_weights == inner_model.trainable_weights
|
||||
inner_model.trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
inner_model.trainable = True
|
||||
inner_model.layers[-1].trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
|
||||
# a Model inside a Sequential
|
||||
x = Input(shape=(1,))
|
||||
y = Dense(2)(x)
|
||||
inner_model = Model(x, y)
|
||||
outer_model = Sequential()
|
||||
outer_model.add(inner_model)
|
||||
assert outer_model.trainable_weights == inner_model.trainable_weights
|
||||
inner_model.trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
inner_model.trainable = True
|
||||
inner_model.layers[-1].trainable = False
|
||||
assert outer_model.trainable_weights == []
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
@@ -5,7 +5,7 @@ import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras.models import Model, Sequential
|
||||
from keras.layers import Dense, Dropout, RepeatVector, TimeDistributed
|
||||
from keras.layers import Dense, Dropout, Lambda, RepeatVector, TimeDistributed
|
||||
from keras.layers import Input
|
||||
from keras import optimizers
|
||||
from keras import objectives
|
||||
@@ -232,5 +232,35 @@ def test_loading_weights_by_name_2():
|
||||
assert_allclose(np.zeros_like(jessica[1]), jessica[1]) # biases init to 0
|
||||
|
||||
|
||||
# a function to be called from the Lambda layer
|
||||
def square_fn(x):
|
||||
return x * x
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_saving_lambda_custom_objects():
|
||||
input = Input(shape=(3,))
|
||||
x = Lambda(lambda x: square_fn(x), output_shape=(3,))(input)
|
||||
output = Dense(3)(x)
|
||||
|
||||
model = Model(input, output)
|
||||
model.compile(loss=objectives.MSE,
|
||||
optimizer=optimizers.RMSprop(lr=0.0001),
|
||||
metrics=[metrics.categorical_accuracy])
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3))
|
||||
model.train_on_batch(x, y)
|
||||
|
||||
out = model.predict(x)
|
||||
_, fname = tempfile.mkstemp('.h5')
|
||||
save_model(model, fname)
|
||||
|
||||
model = load_model(fname, custom_objects={'square_fn': square_fn})
|
||||
os.remove(fname)
|
||||
|
||||
out2 = model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário