Comparar commits
219 Commits
| Autor | SHA1 | Data | |
|---|---|---|---|
| 262e5751f4 | |||
| e153e560a1 | |||
| 445aecdeb7 | |||
| 57429d1567 | |||
| 8f8d97e615 | |||
| c243f39ce5 | |||
| 55487f33b1 | |||
| 1c6db08158 | |||
| e54d7951f2 | |||
| 82ca6d4185 | |||
| f3c60dc571 | |||
| c4166a9efc | |||
| 3d176e926f | |||
| 3a7cd05b48 | |||
| 8ef4a3da52 | |||
| 1b7800aceb | |||
| b5746331f6 | |||
| 3e933ca0ed | |||
| 53e541f7bf | |||
| fbc9a18f0a | |||
| 8a50f5dfc8 | |||
| c3c634f4b1 | |||
| 710d8e4dd3 | |||
| 887576b113 | |||
| 2ad3544b01 | |||
| 68bde67d0a | |||
| 0edecdd09e | |||
| 5d97657375 | |||
| cf8947da79 | |||
| c6bf7558b2 | |||
| 429e253fb6 | |||
| e5529d98fe | |||
| 6e03136116 | |||
| 4973fe3069 | |||
| cfa1f7c3bc | |||
| 538d368396 | |||
| 590a5a5382 | |||
| fa585c5151 | |||
| 7ae2f84783 | |||
| 088dbe6866 | |||
| 6fb7ba721c | |||
| 7aa3114d9f | |||
| 8bfd851133 | |||
| 9120a7251d | |||
| fdb9561ade | |||
| a5ec992b1f | |||
| 2c432ffeb3 | |||
| 0ab4b647f8 | |||
| 9f4734cbf1 | |||
| ac1a09c787 | |||
| c10945f53a | |||
| 309f586424 | |||
| 1f5455e29e | |||
| a90af6f22e | |||
| 38719480a8 | |||
| aa18604fec | |||
| 875bc59ecf | |||
| 89f0527f31 | |||
| 8c0c3774e6 | |||
| 9c93d8ec06 | |||
| 1ccad186fd | |||
| e8cd940cf8 | |||
| c39546ee10 | |||
| 8f75744379 | |||
| ea47e6de27 | |||
| a6525be4fc | |||
| 833c0b23f5 | |||
| a04d968422 | |||
| 7b261704cf | |||
| 97b0f9f6e4 | |||
| 3071e0de2f | |||
| fe72033b2e | |||
| b57b9d3f8e | |||
| 50b4f7fad5 | |||
| 6b05aebc0c | |||
| 5863fc74b1 | |||
| 293940600b | |||
| f0369909d0 | |||
| 9db82605d2 | |||
| c0d95fd6c2 | |||
| 150e0fa8a6 | |||
| 45ad509611 | |||
| cbefd323be | |||
| 0f0d837178 | |||
| a6c9227372 | |||
| f6b804263a | |||
| b5df1c6170 | |||
| 44bf298ec3 | |||
| 5d575a3eff | |||
| e63372e41f | |||
| 9ee0c8e634 | |||
| 431c76abc4 | |||
| 21023f7f9c | |||
| 1746ac463a | |||
| f573a86b42 | |||
| 0e18cb3efa | |||
| 50f7f03f6b | |||
| 3d4a48b120 | |||
| ffe013033e | |||
| 00cbeecf6c | |||
| 737bea8f39 | |||
| c2e36f369b | |||
| 883f74ca41 | |||
| d8b226f26b | |||
| c4f3155d19 | |||
| 72c7716902 | |||
| 1bc79f66f9 | |||
| 2b3eae5f08 | |||
| 497cff9772 | |||
| fdb20dbc7e | |||
| 942ed44fdd | |||
| 8bc3f4d916 | |||
| dcbc2b933a | |||
| d0b4779071 | |||
| 12d068f675 | |||
| 070609cbac | |||
| 6b1bf7d917 | |||
| fefb70b217 | |||
| 48d8853cad | |||
| 2a3d4722c2 | |||
| d137d00182 | |||
| 9333179ad9 | |||
| 0c842391d3 | |||
| 1fcb74f218 | |||
| 1278bf9cfa | |||
| 18e5b75f67 | |||
| 766572b5b8 | |||
| 1de4d7cfba | |||
| 04107252f2 | |||
| 5f0e0d6c38 | |||
| 79406f111b | |||
| 30fa61d457 | |||
| 914d976801 | |||
| 839d4f108e | |||
| 5e73db6c00 | |||
| e9b8424839 | |||
| e3dd5d7ca5 | |||
| 2752a58730 | |||
| 69eb5752ce | |||
| 9090704f1d | |||
| fa9f863dbf | |||
| 4b1b706aa4 | |||
| 5e75b8506c | |||
| bc9f341165 | |||
| b40b8a00e4 | |||
| d811048887 | |||
| 0ba2626bd2 | |||
| c6eea03c8d | |||
| 0fd0218ef0 | |||
| 5e1a5d07c4 | |||
| 518fa3aa44 | |||
| ef1da479ec | |||
| 3f12d7ae44 | |||
| c4579a9c43 | |||
| ff62eb251b | |||
| 2b336756b6 | |||
| 0f0d8be884 | |||
| 3f3e0aa90e | |||
| c0ee5b859c | |||
| edae178532 | |||
| a0a0308061 | |||
| 74329d0c1d | |||
| 5777355972 | |||
| 0272587c29 | |||
| 22d3c8810c | |||
| 4aa8aa100b | |||
| bd404b1c88 | |||
| bed17efae8 | |||
| 8d0199ed42 | |||
| 9f33f8af5f | |||
| 7c4f033c6a | |||
| 7e2e7a5e5a | |||
| 909fbd19ea | |||
| 2b27ab1c9e | |||
| d244d38047 | |||
| 2a0b112d08 | |||
| d9657b70c0 | |||
| e8939f43a6 | |||
| 8e587fb17a | |||
| 757ae95cca | |||
| cc6e65d145 | |||
| df464c103e | |||
| d517b55576 | |||
| 4c1353c188 | |||
| 4871208f02 | |||
| 08566f22c7 | |||
| ea7b37a42a | |||
| 302eef7bad | |||
| 825adad18d | |||
| 4491212da4 | |||
| 52e2f3ed64 | |||
| 1de4fe0ba8 | |||
| e1208f5b9f | |||
| 0a8ac44617 | |||
| 1fd2108bcf | |||
| ad5e29a2b7 | |||
| 93b7dd9915 | |||
| 9256b76226 | |||
| fbd12f7d44 | |||
| 90c4895a7a | |||
| 6dfa8b1d60 | |||
| 5430844453 | |||
| 9dd06082e7 | |||
| cb4f93913e | |||
| 149946c706 | |||
| 78988b5cd6 | |||
| a081e049db | |||
| 68af216772 | |||
| b4a532e970 | |||
| 3bf913dc35 | |||
| 55163b5999 | |||
| 9bfbe6ae3e | |||
| b23e873e0f | |||
| 79ec9b8079 | |||
| 24d6cca275 | |||
| 83b90c172c | |||
| 57f2f11005 | |||
| bf502be578 | |||
| dfeca151a2 |
+10
-10
@@ -3,18 +3,18 @@ dist: trusty
|
||||
language: python
|
||||
matrix:
|
||||
include:
|
||||
- python: 3.4
|
||||
env: KERAS_BACKEND=theano
|
||||
- python: 3.4
|
||||
env: KERAS_BACKEND=tensorflow
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=theano
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=tensorflow
|
||||
env: KERAS_BACKEND=theano TEST_MODE=PEP8
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=theano TEST_MODE=INTEGRATION_TESTS
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=theano TEST_MODE=PEP8
|
||||
env: KERAS_BACKEND=tensorflow
|
||||
- python: 3.4
|
||||
env: KERAS_BACKEND=tensorflow
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=theano
|
||||
- python: 3.4
|
||||
env: KERAS_BACKEND=theano
|
||||
install:
|
||||
# code below is taken from http://conda.pydata.org/docs/travis.html
|
||||
# We do this conditionally because it saves us some downloading if the
|
||||
@@ -49,9 +49,9 @@ install:
|
||||
|
||||
# install TensorFlow
|
||||
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl;
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.12.1-cp27-none-linux_x86_64.whl;
|
||||
elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl;
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.12.1-cp34-cp34m-linux_x86_64.whl;
|
||||
fi
|
||||
# command to run tests
|
||||
script:
|
||||
|
||||
+6
-2
@@ -1,9 +1,13 @@
|
||||
Please make sure that the boxes below are checked before you submit your issue. Thank you!
|
||||
Please make sure that the boxes below are checked before you submit your issue. If your issue is an implementation question, please ask your question on [StackOverflow](http://stackoverflow.com/questions/tagged/keras) or [join the Keras Slack channel](https://keras-slack-autojoin.herokuapp.com/) and ask there instead of filing a GitHub issue.
|
||||
|
||||
Thank you!
|
||||
|
||||
- [ ] Check that you are up-to-date with the master branch of Keras. You can update with:
|
||||
pip install git+git://github.com/fchollet/keras.git --upgrade --no-deps
|
||||
|
||||
- [ ] If running on TensorFlow, check that you are up-to-date with the latest version. The installation instructions can be found [here](https://www.tensorflow.org/get_started/os_setup).
|
||||
|
||||
- [ ] If running on Theano, check that you are up-to-date with the master branch of Theano. You can update with:
|
||||
pip install git+git://github.com/Theano/Theano.git --upgrade --no-deps
|
||||
|
||||
- [ ] Provide a link to a GitHub Gist of a Python script that can reproduce your issue (or just copy the script here if it is short).
|
||||
- [ ] Provide a link to a GitHub Gist of a Python script that can reproduce your issue (or just copy the script here if it is short).
|
||||
|
||||
+4
-6
@@ -1,9 +1,7 @@
|
||||
# Keras: Deep Learning library for TensorFlow and Theano
|
||||
|
||||
[](https://travis-ci.org/fchollet/keras)
|
||||
[](https://badge.fury.io/py/keras)
|
||||
[](https://github.com/fchollet/keras/blob/master/LICENSE)
|
||||
[](https://gitter.im/Keras-io/Lobby)
|
||||
|
||||
|
||||
## You have just found Keras.
|
||||
@@ -57,9 +55,9 @@ Stacking layers is as easy as `.add()`:
|
||||
from keras.layers import Dense, Activation
|
||||
|
||||
model.add(Dense(output_dim=64, input_dim=100))
|
||||
model.add(Activation("relu"))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(output_dim=10))
|
||||
model.add(Activation("softmax"))
|
||||
model.add(Activation('softmax'))
|
||||
```
|
||||
|
||||
Once your model looks good, configure its learning process with `.compile()`:
|
||||
@@ -152,9 +150,9 @@ By default, Keras will use TensorFlow as its tensor manipulation library. [Follo
|
||||
You can ask questions and join the development discussion:
|
||||
|
||||
- On the [Keras Google group](https://groups.google.com/forum/#!forum/keras-users).
|
||||
- On the [Keras Gitter channel](https://gitter.im/Keras-io/Lobby).
|
||||
- On the [Keras Slack channel](https://kerasteam.slack.com). Use [this link](https://keras-slack-autojoin.herokuapp.com/) to request an invitation to the channel.
|
||||
|
||||
You can also post bug reports and feature requests in [Github issues](https://github.com/fchollet/keras/issues). Make sure to read [our guidelines](https://github.com/fchollet/keras/blob/master/CONTRIBUTING.md) first.
|
||||
You can also post **bug reports and feature requests** (only) in [Github issues](https://github.com/fchollet/keras/issues). Make sure to read [our guidelines](https://github.com/fchollet/keras/blob/master/CONTRIBUTING.md) first.
|
||||
|
||||
|
||||
------------------
|
||||
|
||||
+4
-4
@@ -1,4 +1,4 @@
|
||||
FROM nvidia/cuda:7.5-cudnn5-devel
|
||||
FROM nvidia/cuda:8.0-cudnn5-devel
|
||||
|
||||
ENV CONDA_DIR /opt/conda
|
||||
ENV PATH $CONDA_DIR/bin:$PATH
|
||||
@@ -24,10 +24,10 @@ RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \
|
||||
USER keras
|
||||
|
||||
# Python
|
||||
ARG python_version=3.5.1
|
||||
ARG tensorflow_version=0.9.0rc0-cp35-cp35m
|
||||
ARG python_version=3.5.2
|
||||
ARG tensorflow_version=0.12.0rc0-cp35-cp35m
|
||||
RUN conda install -y python=${python_version} && \
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-${tensorflow_version}-linux_x86_64.whl && \
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-${tensorflow_version}-linux_x86_64.whl && \
|
||||
pip install git+git://github.com/Theano/Theano.git && \
|
||||
pip install ipdb pytest pytest-cov python-coveralls coverage==3.7.1 pytest-xdist pep8 pytest-pep8 pydot_ng && \
|
||||
conda install Pillow scikit-learn notebook pandas matplotlib nose pyyaml six h5py && \
|
||||
|
||||
+1
-5
@@ -139,9 +139,6 @@ PAGES = [
|
||||
core.Dense,
|
||||
core.Activation,
|
||||
core.Dropout,
|
||||
core.SpatialDropout1D,
|
||||
core.SpatialDropout2D,
|
||||
core.SpatialDropout3D,
|
||||
core.Flatten,
|
||||
core.Reshape,
|
||||
core.Permute,
|
||||
@@ -152,7 +149,6 @@ PAGES = [
|
||||
core.Masking,
|
||||
core.Highway,
|
||||
core.MaxoutDense,
|
||||
core.TimeDistributedDense,
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -316,7 +312,7 @@ def get_function_signature(function, method=True):
|
||||
for a in args:
|
||||
st += str(a) + ', '
|
||||
for a, v in kwargs:
|
||||
if type(v) == str:
|
||||
if isinstance(v, str):
|
||||
v = '\'' + v + '\''
|
||||
st += str(a) + '=' + str(v) + ', '
|
||||
if kwargs or args:
|
||||
|
||||
externo
+62
-10
@@ -181,7 +181,7 @@ model = InceptionV3(input_tensor=input_tensor, weights='imagenet', include_top=T
|
||||
|
||||
|
||||
```python
|
||||
keras.applications.xception.Xception(include_top=True, weights='imagenet', input_tensor=None)
|
||||
keras.applications.xception.Xception(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
|
||||
```
|
||||
|
||||
Xception V1 model, with weights pre-trained on ImageNet.
|
||||
@@ -200,6 +200,15 @@ The default input size for this model is 299x299.
|
||||
- include_top: whether to include the fully-connected layer at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(299, 299, 3)`.
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 71.
|
||||
E.g. `(150, 150, 3)` would be one valid value.
|
||||
- classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
### Returns
|
||||
|
||||
@@ -220,7 +229,7 @@ These weights are trained by ourselves and are released under the MIT license.
|
||||
## VGG16
|
||||
|
||||
```python
|
||||
keras.applications.vgg16.VGG16(include_top=True, weights='imagenet', input_tensor=None)
|
||||
keras.applications.vgg16.VGG16(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
|
||||
```
|
||||
|
||||
VGG16 model, with weights pre-trained on ImageNet.
|
||||
@@ -235,7 +244,17 @@ The default input size for this model is 224x224.
|
||||
- include_top: whether to include the 3 fully-connected layers at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
|
||||
- input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 48.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
- classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
### Returns
|
||||
|
||||
A Keras model instance.
|
||||
@@ -254,7 +273,7 @@ These weights are ported from the ones [released by VGG at Oxford](http://www.ro
|
||||
|
||||
|
||||
```python
|
||||
keras.applications.vgg19.VGG19(include_top=True, weights='imagenet', input_tensor=None)
|
||||
keras.applications.vgg19.VGG19(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
|
||||
```
|
||||
|
||||
|
||||
@@ -270,7 +289,17 @@ The default input size for this model is 224x224.
|
||||
- include_top: whether to include the 3 fully-connected layers at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
|
||||
- input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 48.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
- classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
### Returns
|
||||
|
||||
A Keras model instance.
|
||||
@@ -290,7 +319,7 @@ These weights are ported from the ones [released by VGG at Oxford](http://www.ro
|
||||
|
||||
|
||||
```python
|
||||
keras.applications.resnet50.ResNet50(include_top=True, weights='imagenet', input_tensor=None)
|
||||
keras.applications.resnet50.ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
|
||||
```
|
||||
|
||||
|
||||
@@ -307,7 +336,17 @@ The default input size for this model is 224x224.
|
||||
- include_top: whether to include the fully-connected layer at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
|
||||
- input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 197.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
- classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
### Returns
|
||||
|
||||
A Keras model instance.
|
||||
@@ -326,7 +365,7 @@ These weights are ported from the ones [released by Kaiming He](https://github.c
|
||||
|
||||
|
||||
```python
|
||||
keras.applications.inception_v3.InceptionV3(include_top=True, weights='imagenet', input_tensor=None)
|
||||
keras.applications.inception_v3.InceptionV3(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
|
||||
```
|
||||
|
||||
Inception V3 model, with weights pre-trained on ImageNet.
|
||||
@@ -342,7 +381,17 @@ The default input size for this model is 299x299.
|
||||
- include_top: whether to include the fully-connected layer at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
|
||||
- input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(299, 299, 3)` (with `tf` dim ordering)
|
||||
or `(3, 299, 299)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 139.
|
||||
E.g. `(150, 150, 3)` would be one valid value.
|
||||
- classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
### Returns
|
||||
|
||||
A Keras model instance.
|
||||
@@ -371,7 +420,10 @@ A convolutional-recurrent model taking as input a vectorized representation of t
|
||||
- weights: one of `None` (random initialization) or "msd" (pre-training on [Million Song Dataset](http://labrosa.ee.columbia.edu/millionsong/)).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- include_top: whether to include the 1 fully-connected layer (output layer) at the top of the network. If False, the network outputs 32-dim features.
|
||||
|
||||
- classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
### Returns
|
||||
|
||||
A Keras model instance.
|
||||
|
||||
externo
+23
@@ -44,6 +44,29 @@ Using TensorFlow backend.
|
||||
|
||||
----
|
||||
|
||||
## keras.json details
|
||||
|
||||
|
||||
```
|
||||
{
|
||||
"image_dim_ordering": "tf",
|
||||
"epsilon": 1e-07,
|
||||
"floatx": "float32",
|
||||
"backend": "tensorflow"
|
||||
}
|
||||
```
|
||||
|
||||
You can change these settings by editing `~/.keras/keras.json`.
|
||||
|
||||
* `image_dim_ordering`: string, either `"tf"` or `"th"`. It specifies which dimension ordering convention Keras will follow. (`keras.backend.image_dim_ordering()` returns it.)
|
||||
- For 2D data (e.g. image), `"tf"` assumes `(rows, cols, channels)` while `"th"` assumes `(channels, rows, cols)`.
|
||||
- For 3D data, `"tf"` assumes `(conv_dim1, conv_dim2, conv_dim3, channels)` while `"th"` assumes `(channels, conv_dim1, conv_dim2, conv_dim3)`.
|
||||
* `epsilon`: float, a numeric fuzzing constant used to avoid dividing by zero in some operations.
|
||||
* `floatx`: string, `"float16"`, `"float32"`, or `"float64"`. Default float precision.
|
||||
* `backend`: string, `"tensorflow"` or `"theano"`.
|
||||
|
||||
----
|
||||
|
||||
## Using the abstract Keras backend to write new code
|
||||
|
||||
If you want the Keras modules you write to be compatible with both Theano and TensorFlow, you have to write them via the abstract Keras backend API. Here's an intro.
|
||||
|
||||
externo
+1
-1
@@ -2,7 +2,7 @@
|
||||
|
||||
Functions from the `constraints` module allow setting constraints (eg. non-negativity) on network parameters during optimization.
|
||||
|
||||
The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D` and `Convolution2D` have a unified API.
|
||||
The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D`, `Convolution2D` and `Convolution3D` have a unified API.
|
||||
|
||||
These layers expose 2 keyword arguments:
|
||||
|
||||
|
||||
+20
-20
@@ -4,7 +4,7 @@
|
||||
- [How can I run Keras on GPU?](#how-can-i-run-keras-on-gpu)
|
||||
- [How can I save a Keras model?](#how-can-i-save-a-keras-model)
|
||||
- [Why is the training loss much higher than the testing loss?](#why-is-the-training-loss-much-higher-than-the-testing-loss)
|
||||
- [How can I visualize the output of an intermediate layer?](#how-can-i-visualize-the-output-of-an-intermediate-layer)
|
||||
- [How can I obtain the output of an intermediate layer?](#how-can-i-obtain-the-output-of-an-intermediate-layer)
|
||||
- [How can I use Keras with datasets that don't fit in memory?](#how-can-i-use-keras-with-datasets-that-dont-fit-in-memory)
|
||||
- [How can I interrupt training when the validation loss isn't decreasing anymore?](#how-can-i-interrupt-training-when-the-validation-loss-isnt-decreasing-anymore)
|
||||
- [How is the validation split computed?](#how-is-the-validation-split-computed)
|
||||
@@ -156,9 +156,22 @@ Besides, the training loss is the average of the losses over each batch of train
|
||||
|
||||
---
|
||||
|
||||
### How can I visualize the output of an intermediate layer?
|
||||
### How can I obtain the output of an intermediate layer?
|
||||
|
||||
You can build a Keras function that will return the output of a certain layer given a certain input, for example:
|
||||
One simple way is to create a new `Model` that will output the layers that you are interested in:
|
||||
|
||||
```python
|
||||
from keras.models import Model
|
||||
|
||||
model = ... # create the original model
|
||||
|
||||
layer_name = 'my_layer'
|
||||
intermediate_layer_model = Model(input=model.input,
|
||||
output=model.get_layer(layer_name).output)
|
||||
intermediate_output = intermediate_layer_model.predict(data)
|
||||
```
|
||||
|
||||
Alternatively, you can build a Keras function that will return the output of a certain layer given a certain input, for example:
|
||||
|
||||
```python
|
||||
from keras import backend as K
|
||||
@@ -185,22 +198,6 @@ layer_output = get_3rd_layer_output([X, 0])[0]
|
||||
layer_output = get_3rd_layer_output([X, 1])[0]
|
||||
```
|
||||
|
||||
Another more flexible way of getting output from intermediate layers is to use the [functional API](/getting-started/functional-api-guide). For example, if you have created an autoencoder for MNIST:
|
||||
|
||||
```python
|
||||
inputs = Input(shape=(784,))
|
||||
encoded = Dense(32, activation='relu')(inputs)
|
||||
decoded = Dense(784)(encoded)
|
||||
model = Model(input=inputs, output=decoded)
|
||||
```
|
||||
|
||||
After compiling and training the model, you can get the output of the data from the encoder like this:
|
||||
|
||||
```python
|
||||
encoder = Model(input=inputs, output=encoded)
|
||||
X_encoded = encoder.predict(X)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### How can I use Keras with datasets that don't fit in memory?
|
||||
@@ -229,8 +226,9 @@ Find out more in the [callbacks documentation](/callbacks).
|
||||
|
||||
### How is the validation split computed?
|
||||
|
||||
If you set the `validation_split` argument in `model.fit` to e.g. 0.1, then the validation data used will be the *last 10%* of the data. If you set it to 0.25, it will be the last 25% of the data, etc.
|
||||
If you set the `validation_split` argument in `model.fit` to e.g. 0.1, then the validation data used will be the *last 10%* of the data. If you set it to 0.25, it will be the last 25% of the data, etc. Note that the data isn't shuffled before extracting the validation split, so the validation is literally just the *last* x% of samples in the input you passed.
|
||||
|
||||
The same validation set is used for all epochs (within a same call to `fit`).
|
||||
|
||||
---
|
||||
|
||||
@@ -358,6 +356,7 @@ print(len(model.layers)) # "1"
|
||||
|
||||
Code and pre-trained weights are available for the following image classification models:
|
||||
|
||||
- Xception
|
||||
- VGG16
|
||||
- VGG19
|
||||
- ResNet50
|
||||
@@ -366,6 +365,7 @@ Code and pre-trained weights are available for the following image classificatio
|
||||
They can be imported from the module `keras.applications`:
|
||||
|
||||
```python
|
||||
from keras.applications.xception import Xception
|
||||
from keras.applications.vgg16 import VGG16
|
||||
from keras.applications.vgg19 import VGG19
|
||||
from keras.applications.resnet50 import ResNet50
|
||||
|
||||
externo
+4
-4
@@ -51,9 +51,9 @@ Stacking layers is as easy as `.add()`:
|
||||
from keras.layers import Dense, Activation
|
||||
|
||||
model.add(Dense(output_dim=64, input_dim=100))
|
||||
model.add(Activation("relu"))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(output_dim=10))
|
||||
model.add(Activation("softmax"))
|
||||
model.add(Activation('softmax'))
|
||||
```
|
||||
|
||||
Once your model looks good, configure its learning process with `.compile()`:
|
||||
@@ -146,9 +146,9 @@ By default, Keras will use TensorFlow as its tensor manipulation library. [Follo
|
||||
You can ask questions and join the development discussion:
|
||||
|
||||
- On the [Keras Google group](https://groups.google.com/forum/#!forum/keras-users).
|
||||
- On the [Keras Gitter channel](https://gitter.im/Keras-io/Lobby).
|
||||
- On the [Keras Slack channel](https://kerasteam.slack.com). Use [this link](https://keras-slack-autojoin.herokuapp.com/) to request an invitation to the channel.
|
||||
|
||||
You can also post bug reports and feature requests in [Github issues](https://github.com/fchollet/keras/issues). Make sure to read [our guidelines](https://github.com/fchollet/keras/blob/master/CONTRIBUTING.md) first.
|
||||
You can also post **bug reports and feature requests** (only) in [Github issues](https://github.com/fchollet/keras/issues). Make sure to read [our guidelines](https://github.com/fchollet/keras/blob/master/CONTRIBUTING.md) first.
|
||||
|
||||
|
||||
------------------
|
||||
|
||||
+2
-1
@@ -17,6 +17,7 @@ model.add(Dense(64, init='uniform'))
|
||||
- __identity__: Use with square 2D layers (`shape[0] == shape[1]`).
|
||||
- __orthogonal__: Use with square 2D layers (`shape[0] == shape[1]`).
|
||||
- __zero__
|
||||
- __one__
|
||||
- __glorot_normal__: Gaussian initialization scaled by fan_in + fan_out (Glorot 2010)
|
||||
- __glorot_uniform__
|
||||
- __he_normal__: Gaussian initialization scaled by fan_in (He et al., 2014)
|
||||
@@ -47,4 +48,4 @@ def my_init(shape, name=None):
|
||||
return initializations.normal(shape, scale=0.01, name=name)
|
||||
|
||||
model.add(Dense(64, init=my_init))
|
||||
```
|
||||
```
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
|
||||
For simple, stateless custom operations, you are probably better off using `layers.core.Lambda` layers. But for any custom operation that has trainable weights, you should implement your own layer.
|
||||
|
||||
Here is the skeleton of a Keras layer. There are only three methods you need to implement:
|
||||
Here is the skeleton of a Keras layer, **as of Keras 1.2.0** (if you have an older version, please upgrade). There are only three methods you need to implement:
|
||||
|
||||
- `build(input_shape)`: this is where you will define your weights. Trainable weights should be added to the list `self.trainable_weights`. Other attributes of note are: `self.non_trainable_weights` (list) and `self.updates` (list of update tuples (tensor, new_tensor)). For an example of how to use `non_trainable_weights` and `updates`, see the code for the `BatchNormalization` layer. This method must set `self.built = True`, which can be done by calling `super([Layer], self).build()`.
|
||||
- `build(input_shape)`: this is where you will define your weights. This method must set `self.built = True`, which can be done by calling `super([Layer], self).build()`.
|
||||
- `call(x)`: this is where the layer's logic lives. Unless you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor.
|
||||
- `get_output_shape_for(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference.
|
||||
|
||||
@@ -19,11 +19,11 @@ class MyLayer(Layer):
|
||||
super(MyLayer, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
input_dim = input_shape[1]
|
||||
initial_weight_value = np.random.random((input_dim, output_dim))
|
||||
self.W = K.variable(initial_weight_value)
|
||||
self.trainable_weights = [self.W]
|
||||
super(MyLayer, self).build() # be sure you call this somewhere!
|
||||
# Create a trainable weight variable for this layer.
|
||||
self.W = self.add_weight(shape=(input_shape[1], self.output_dim),
|
||||
initializer='uniform',
|
||||
trainable=True)
|
||||
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.dot(x, self.W)
|
||||
@@ -32,4 +32,4 @@ class MyLayer(Layer):
|
||||
return (input_shape[0], self.output_dim)
|
||||
```
|
||||
|
||||
The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code!
|
||||
The existing Keras layers provide examples of how to implement almost anything. Never hesitate to read the source code!
|
||||
|
||||
externo
+1
-1
@@ -9,7 +9,7 @@ You can either pass the name of an existing metric, or pass a Theano/TensorFlow
|
||||
|
||||
#### Arguments
|
||||
- __y_true__: True labels. Theano/TensorFlow tensor.
|
||||
- __y_pred__: Predictions. Theano/TensorFlow tensor of the same shape as y_true.
|
||||
- __y_pred__: Predictions. Theano/TensorFlow tensor of the same shape as y_true.
|
||||
|
||||
#### Returns
|
||||
Single tensor value representing the mean of the output array across all
|
||||
|
||||
+16
-8
@@ -24,9 +24,9 @@ keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
|
||||
Generate batches of tensor image data with real-time data augmentation. The data will be looped over (in batches) indefinitely.
|
||||
|
||||
- __Arguments__:
|
||||
- __featurewise_center__: Boolean. Set input mean to 0 over the dataset.
|
||||
- __featurewise_center__: Boolean. Set input mean to 0 over the dataset, feature-wise.
|
||||
- __samplewise_center__: Boolean. Set each sample mean to 0.
|
||||
- __featurewise_std_normalization__: Boolean. Divide inputs by std of the dataset.
|
||||
- __featurewise_std_normalization__: Boolean. Divide inputs by std of the dataset, feature-wise.
|
||||
- __samplewise_std_normalization__: Boolean. Divide each input by its std.
|
||||
- __zca_whitening__: Boolean. Apply ZCA whitening.
|
||||
- __rotation_range__: Int. Degree range for random rotations.
|
||||
@@ -43,8 +43,8 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
otherwise we multiply the data by the value provided (before applying
|
||||
any other transformation).
|
||||
- __dim_ordering__: One of {"th", "tf"}.
|
||||
"tf" mode means that the images should have shape `(samples, width, height, channels)`,
|
||||
"th" mode means that the images should have shape `(samples, channels, width, height)`.
|
||||
"tf" mode means that the images should have shape `(samples, height, width, channels)`,
|
||||
"th" mode means that the images should have shape `(samples, channels, height, width)`.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "tf".
|
||||
@@ -53,13 +53,19 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
- __fit(X)__: Compute the internal data stats related to the data-dependent transformations, based on an array of sample data.
|
||||
Only required if featurewise_center or featurewise_std_normalization or zca_whitening.
|
||||
- __Arguments__:
|
||||
- __X__: sample data.
|
||||
- __X__: sample data. Should have rank 4.
|
||||
In case of grayscale data,
|
||||
the channels axis should have value 1, and in case
|
||||
of RGB data, it should have value 3.
|
||||
- __augment__: Boolean (default: False). Whether to fit on randomly augmented samples.
|
||||
- __rounds__: int (default: 1). If augment, how many augmentation passes over the data to use.
|
||||
- __seed__: int (default: None). Random seed.
|
||||
- __flow(X, y)__: Takes numpy data & label arrays, and generates batches of augmented/normalized data. Yields batches indefinitely, in an infinite loop.
|
||||
- __Arguments__:
|
||||
- __X__: data.
|
||||
- __X__: data. Should have rank 4.
|
||||
In case of grayscale data,
|
||||
the channels axis should have value 1, and in case
|
||||
of RGB data, it should have value 3.
|
||||
- __y__: labels.
|
||||
- __batch_size__: int (default: 32).
|
||||
- __shuffle__: boolean (defaut: True).
|
||||
@@ -71,8 +77,9 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
The generator loops indefinitely.
|
||||
- __flow_from_directory(directory)__: Takes the path to a directory, and generates batches of augmented/normalized data. Yields batches indefinitely, in an infinite loop.
|
||||
- __Arguments__:
|
||||
- __directory__: path to the target directory. It should contain one subdirectory per class,
|
||||
and the subdirectories should contain PNG or JPG images. See [this script](https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d) for more details.
|
||||
- __directory__: path to the target directory. It should contain one subdirectory per class.
|
||||
Any PNG, JPG or BNP images inside each of the subdirectories directory tree will be included in the generator.
|
||||
See [this script](https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d) for more details.
|
||||
- __target_size__: tuple of integers, default: `(256, 256)`. The dimensions to which all images found will be resized.
|
||||
- __color_mode__: one of "grayscale", "rbg". Default: "rgb". Whether the images will be converted to have 1 or 3 color channels.
|
||||
- __classes__: optional list of class subdirectories (e.g. `['dogs', 'cats']`). Default: None. If not provided, the list of classes will be automatically inferred (and the order of the classes, which will map to the label indices, will be alphanumeric).
|
||||
@@ -83,6 +90,7 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
- __save_to_dir__: None or str (default: None). This allows you to optimally specify a directory to which to save the augmented pictures being generated (useful for visualizing what you are doing).
|
||||
- __save_prefix__: str. Prefix to use for filenames of saved pictures (only relevant if `save_to_dir` is set).
|
||||
- __save_format__: one of "png", "jpeg" (only relevant if `save_to_dir` is set). Default: "jpeg".
|
||||
- __follow_links__: whether to follow symlinks inside class subdirectories (default: False).
|
||||
|
||||
|
||||
- __Examples__:
|
||||
|
||||
+3
-2
@@ -1,10 +1,11 @@
|
||||
## pad_sequences
|
||||
|
||||
```python
|
||||
keras.preprocessing.sequence.pad_sequences(sequences, maxlen=None, dtype='int32')
|
||||
keras.preprocessing.sequence.pad_sequences(sequences, maxlen=None, dtype='int32',
|
||||
padding='pre', truncating='pre', value=0.)
|
||||
```
|
||||
|
||||
Transform a list of `nb_samples sequences` (lists of scalars) into a 2D Numpy array of shape `(nb_samples, nb_timesteps)`. `nb_timesteps` is either the `maxlen` argument if provided, or the length of the longest sequence otherwise. Sequences that are shorter than `nb_timesteps` are padded with zeros at the end.
|
||||
Transform a list of `nb_samples` sequences (lists of scalars) into a 2D Numpy array of shape `(nb_samples, nb_timesteps)`. `nb_timesteps` is either the `maxlen` argument if provided, or the length of the longest sequence otherwise. Sequences that are shorter than `nb_timesteps` are padded with `value` at the end. Sequences longer than `nb_timesteps` are truncated so that it fits the desired length. Position where padding or truncation happens is determined by `padding` or `truncating`, respectively.
|
||||
|
||||
- __Return__: 2D Numpy array of shape `(nb_samples, nb_timesteps)`.
|
||||
|
||||
|
||||
externo
+1
-1
@@ -2,7 +2,7 @@
|
||||
|
||||
Regularizers allow to apply penalties on layer parameters or layer activity during optimization. These penalties are incorporated in the loss function that the network optimizes.
|
||||
|
||||
The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D` and `Convolution2D` have a unified API.
|
||||
The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D`, `Convolution2D` and `Convolution3D` have a unified API.
|
||||
|
||||
These layers expose 3 keyword arguments:
|
||||
|
||||
|
||||
@@ -48,6 +48,9 @@ Compares different LSTM implementations on the IMDB sentiment classification tas
|
||||
[lstm_text_generation.py](lstm_text_generation.py)
|
||||
Generates text from Nietzsche's writings.
|
||||
|
||||
[mnist_acgan.py](mnist_acgan.py)
|
||||
Implementation of AC-GAN ( Auxiliary Classifier GAN ) on the MNIST dataset
|
||||
|
||||
[mnist_cnn.py](mnist_cnn.py)
|
||||
Trains a simple convnet on the MNIST dataset.
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ class CharacterTable(object):
|
||||
+ Decode the one hot integer representation to their character output
|
||||
+ Decode a vector of probabilities to their character output
|
||||
'''
|
||||
|
||||
def __init__(self, chars, maxlen):
|
||||
self.chars = sorted(set(chars))
|
||||
self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
|
||||
@@ -119,7 +120,7 @@ X = X[indices]
|
||||
y = y[indices]
|
||||
|
||||
# Explicitly set apart 10% for validation data that we never train over
|
||||
split_at = len(X) - len(X) / 10
|
||||
split_at = len(X) - len(X) // 10
|
||||
(X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at))
|
||||
(y_train, y_val) = (y[:split_at], y[split_at:])
|
||||
|
||||
|
||||
@@ -45,6 +45,7 @@ class Antirectifier(Layer):
|
||||
with twice less parameters yet with comparable
|
||||
classification accuracy as an equivalent ReLU-based network.
|
||||
'''
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
shape = list(input_shape)
|
||||
assert len(shape) == 2 # only valid for 2D tensors
|
||||
|
||||
+11
-18
@@ -1,14 +1,10 @@
|
||||
'''Train a simple deep CNN on the CIFAR10 small images dataset.
|
||||
|
||||
GPU run command:
|
||||
GPU run command with Theano backend (with TensorFlow, the GPU is automatically used):
|
||||
THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10_cnn.py
|
||||
|
||||
It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs.
|
||||
(it's still underfitting at that point, though).
|
||||
|
||||
Note: the data was pickled with Python 2, and some encoding issues might prevent you
|
||||
from loading it in Python 3. You might have to load it in Python 2,
|
||||
save it in a different format, load it in Python 3 and repickle it.
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
@@ -17,7 +13,6 @@ from keras.preprocessing.image import ImageDataGenerator
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.optimizers import SGD
|
||||
from keras.utils import np_utils
|
||||
|
||||
batch_size = 32
|
||||
@@ -27,16 +22,16 @@ data_augmentation = True
|
||||
|
||||
# input image dimensions
|
||||
img_rows, img_cols = 32, 32
|
||||
# the CIFAR10 images are RGB
|
||||
# The CIFAR10 images are RGB.
|
||||
img_channels = 3
|
||||
|
||||
# the data, shuffled and split between train and test sets
|
||||
# The data, shuffled and split between train and test sets:
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
|
||||
print('X_train shape:', X_train.shape)
|
||||
print(X_train.shape[0], 'train samples')
|
||||
print(X_test.shape[0], 'test samples')
|
||||
|
||||
# convert class vectors to binary class matrices
|
||||
# Convert class vectors to binary class matrices.
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
@@ -64,10 +59,9 @@ model.add(Dropout(0.5))
|
||||
model.add(Dense(nb_classes))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
# let's train the model using SGD + momentum (how original).
|
||||
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
# Let's train the model using RMSprop
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer=sgd,
|
||||
optimizer='rmsprop',
|
||||
metrics=['accuracy'])
|
||||
|
||||
X_train = X_train.astype('float32')
|
||||
@@ -84,8 +78,7 @@ if not data_augmentation:
|
||||
shuffle=True)
|
||||
else:
|
||||
print('Using real-time data augmentation.')
|
||||
|
||||
# this will do preprocessing and realtime data augmentation
|
||||
# This will do preprocessing and realtime data augmentation:
|
||||
datagen = ImageDataGenerator(
|
||||
featurewise_center=False, # set input mean to 0 over the dataset
|
||||
samplewise_center=False, # set each sample mean to 0
|
||||
@@ -98,13 +91,13 @@ else:
|
||||
horizontal_flip=True, # randomly flip images
|
||||
vertical_flip=False) # randomly flip images
|
||||
|
||||
# compute quantities required for featurewise normalization
|
||||
# (std, mean, and principal components if ZCA whitening is applied)
|
||||
# Compute quantities required for featurewise normalization
|
||||
# (std, mean, and principal components if ZCA whitening is applied).
|
||||
datagen.fit(X_train)
|
||||
|
||||
# fit the model on the batches generated by datagen.flow()
|
||||
# Fit the model on the batches generated by datagen.flow().
|
||||
model.fit_generator(datagen.flow(X_train, Y_train,
|
||||
batch_size=batch_size),
|
||||
batch_size=batch_size),
|
||||
samples_per_epoch=X_train.shape[0],
|
||||
nb_epoch=nb_epoch,
|
||||
validation_data=(X_test, Y_test))
|
||||
|
||||
@@ -20,6 +20,8 @@ img_height = 128
|
||||
layer_name = 'block5_conv1'
|
||||
|
||||
# util function to convert a tensor into a valid image
|
||||
|
||||
|
||||
def deprocess_image(x):
|
||||
# normalize tensor: center on 0., ensure std is 0.1
|
||||
x -= x.mean()
|
||||
|
||||
@@ -61,6 +61,8 @@ saved_settings = {
|
||||
settings = saved_settings['dreamy']
|
||||
|
||||
# util function to open, resize and format pictures into appropriate tensors
|
||||
|
||||
|
||||
def preprocess_image(image_path):
|
||||
img = load_img(image_path, target_size=(img_width, img_height))
|
||||
img = img_to_array(img)
|
||||
@@ -69,6 +71,8 @@ def preprocess_image(image_path):
|
||||
return img
|
||||
|
||||
# util function to convert a tensor into a valid image
|
||||
|
||||
|
||||
def deprocess_image(x):
|
||||
if K.image_dim_ordering() == 'th':
|
||||
x = x.reshape((3, img_width, img_height))
|
||||
@@ -101,6 +105,8 @@ print('Model loaded.')
|
||||
layer_dict = dict([(layer.name, layer) for layer in model.layers])
|
||||
|
||||
# continuity loss util function
|
||||
|
||||
|
||||
def continuity_loss(x):
|
||||
assert K.ndim(x) == 4
|
||||
if K.image_dim_ordering() == 'th':
|
||||
@@ -109,9 +115,9 @@ def continuity_loss(x):
|
||||
b = K.square(x[:, :, :img_width - 1, :img_height - 1] -
|
||||
x[:, :, :img_width - 1, 1:])
|
||||
else:
|
||||
a = K.square(x[:, :img_width - 1, :img_height-1, :] -
|
||||
a = K.square(x[:, :img_width - 1, :img_height - 1, :] -
|
||||
x[:, 1:, :img_height - 1, :])
|
||||
b = K.square(x[:, :img_width - 1, :img_height-1, :] -
|
||||
b = K.square(x[:, :img_width - 1, :img_height - 1, :] -
|
||||
x[:, :img_width - 1, 1:, :])
|
||||
return K.sum(K.pow(a + b, 1.25))
|
||||
|
||||
@@ -140,12 +146,14 @@ loss += settings['dream_l2'] * K.sum(K.square(dream)) / np.prod(img_size)
|
||||
grads = K.gradients(loss, dream)
|
||||
|
||||
outputs = [loss]
|
||||
if type(grads) in {list, tuple}:
|
||||
if isinstance(grads, (list, tuple)):
|
||||
outputs += grads
|
||||
else:
|
||||
outputs.append(grads)
|
||||
|
||||
f_outputs = K.function([dream], outputs)
|
||||
|
||||
|
||||
def eval_loss_and_grads(x):
|
||||
x = x.reshape((1,) + img_size)
|
||||
outs = f_outputs([x])
|
||||
@@ -162,7 +170,10 @@ def eval_loss_and_grads(x):
|
||||
# "loss" and "grads". This is done because scipy.optimize
|
||||
# requires separate functions for loss and gradients,
|
||||
# but computing them separately would be inefficient.
|
||||
|
||||
|
||||
class Evaluator(object):
|
||||
|
||||
def __init__(self):
|
||||
self.loss_value = None
|
||||
self.grad_values = None
|
||||
|
||||
+158
-140
@@ -6,36 +6,31 @@ the different fonts thrown at it...the purpose is more to demonstrate CTC
|
||||
inside of Keras. Note that the font list may need to be updated
|
||||
for the particular OS in use.
|
||||
|
||||
This starts off with 4 letter words. After 10 or so epochs, CTC
|
||||
learns translational invariance, so longer words and groups of words
|
||||
with spaces are gradually fed in. This gradual increase in difficulty
|
||||
is handled using the TextImageGenerator class which is both a generator
|
||||
class for test/train data and a Keras callback class. Every 10 epochs
|
||||
the wordlist that the generator draws from increases in difficulty.
|
||||
This starts off with 4 letter words. For the first 12 epochs, the
|
||||
difficulty is gradually increased using the TextImageGenerator class
|
||||
which is both a generator class for test/train data and a Keras
|
||||
callback class. After 20 epochs, longer sequences are thrown at it
|
||||
by recompiling the model to handle a wider image and rebuilding
|
||||
the word list to include two words separated by a space.
|
||||
|
||||
The table below shows normalized edit distance values. Theano uses
|
||||
a slightly different CTC implementation, so some Theano-specific
|
||||
hyperparameter tuning would be needed to get it to match Tensorflow.
|
||||
a slightly different CTC implementation, hence the different results.
|
||||
|
||||
Norm. ED
|
||||
Epoch | TF | TH
|
||||
------------------------
|
||||
10 0.072 0.272
|
||||
20 0.032 0.115
|
||||
30 0.024 0.098
|
||||
40 0.023 0.108
|
||||
10 0.027 0.064
|
||||
15 0.038 0.035
|
||||
20 0.043 0.045
|
||||
25 0.014 0.019
|
||||
|
||||
This requires cairo and editdistance packages:
|
||||
pip install cairocffi
|
||||
pip install editdistance
|
||||
|
||||
Due to the use of a dummy loss function, Theano requires the following flags:
|
||||
on_unused_input='ignore'
|
||||
|
||||
Created by Mike Henry
|
||||
https://github.com/mbhenry/
|
||||
'''
|
||||
|
||||
import os
|
||||
import itertools
|
||||
import re
|
||||
@@ -47,17 +42,17 @@ from scipy import ndimage
|
||||
import pylab
|
||||
from keras import backend as K
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Input, Layer, Dense, Activation, Flatten
|
||||
from keras.layers import Reshape, Lambda, merge, Permute, TimeDistributed
|
||||
from keras.layers import Input, Dense, Activation
|
||||
from keras.layers import Reshape, Lambda, merge
|
||||
from keras.models import Model
|
||||
from keras.layers.recurrent import GRU
|
||||
from keras.optimizers import SGD
|
||||
from keras.utils import np_utils
|
||||
from keras.utils.data_utils import get_file
|
||||
from keras.preprocessing import image
|
||||
import keras.callbacks
|
||||
|
||||
OUTPUT_DIR = "image_ocr"
|
||||
|
||||
OUTPUT_DIR = 'image_ocr'
|
||||
|
||||
np.random.seed(55)
|
||||
|
||||
@@ -79,28 +74,33 @@ def speckle(img):
|
||||
# also uses a random font, a slight random rotation,
|
||||
# and a random amount of speckle noise
|
||||
|
||||
def paint_text(text, w, h):
|
||||
def paint_text(text, w, h, rotate=False, ud=False, multi_fonts=False):
|
||||
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, w, h)
|
||||
with cairo.Context(surface) as context:
|
||||
context.set_source_rgb(1, 1, 1) # White
|
||||
context.paint()
|
||||
# this font list works in Centos 7
|
||||
fonts = ['Century Schoolbook', 'Courier', 'STIX', 'URW Chancery L', 'FreeMono']
|
||||
context.select_font_face(np.random.choice(fonts), cairo.FONT_SLANT_NORMAL,
|
||||
np.random.choice([cairo.FONT_WEIGHT_BOLD, cairo.FONT_WEIGHT_NORMAL]))
|
||||
context.set_font_size(40)
|
||||
if multi_fonts:
|
||||
fonts = ['Century Schoolbook', 'Courier', 'STIX', 'URW Chancery L', 'FreeMono']
|
||||
context.select_font_face(np.random.choice(fonts), cairo.FONT_SLANT_NORMAL,
|
||||
np.random.choice([cairo.FONT_WEIGHT_BOLD, cairo.FONT_WEIGHT_NORMAL]))
|
||||
else:
|
||||
context.select_font_face('Courier', cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_BOLD)
|
||||
context.set_font_size(25)
|
||||
box = context.text_extents(text)
|
||||
if box[2] > w or box[3] > h:
|
||||
border_w_h = (4, 4)
|
||||
if box[2] > (w - 2 * border_w_h[1]) or box[3] > (h - 2 * border_w_h[0]):
|
||||
raise IOError('Could not fit string into image. Max char count is too large for given image width.')
|
||||
|
||||
# teach the RNN translational invariance by
|
||||
# fitting text box randomly on canvas, with some room to rotate
|
||||
border_w_h = (10, 16)
|
||||
max_shift_x = w - box[2] - border_w_h[0]
|
||||
max_shift_y = h - box[3] - border_w_h[1]
|
||||
top_left_x = np.random.randint(0, int(max_shift_x))
|
||||
top_left_y = np.random.randint(0, int(max_shift_y))
|
||||
|
||||
if ud:
|
||||
top_left_y = np.random.randint(0, int(max_shift_y))
|
||||
else:
|
||||
top_left_y = h // 2
|
||||
context.move_to(top_left_x - int(box[0]), top_left_y - int(box[1]))
|
||||
context.set_source_rgb(0, 0, 0)
|
||||
context.show_text(text)
|
||||
@@ -111,8 +111,9 @@ def paint_text(text, w, h):
|
||||
a = a[:, :, 0] # grab single channel
|
||||
a = a.astype(np.float32) / 255
|
||||
a = np.expand_dims(a, 0)
|
||||
if rotate:
|
||||
a = image.random_rotation(a, 3 * (w - top_left_x) / w + 1)
|
||||
a = speckle(a)
|
||||
a = image.random_rotation(a, 3 * (w - top_left_x) / w + 1)
|
||||
|
||||
return a
|
||||
|
||||
@@ -164,7 +165,7 @@ def is_valid_str(in_str):
|
||||
class TextImageGenerator(keras.callbacks.Callback):
|
||||
|
||||
def __init__(self, monogram_file, bigram_file, minibatch_size,
|
||||
img_w, img_h, downsample_width, val_split,
|
||||
img_w, img_h, downsample_factor, val_split,
|
||||
absolute_max_string_len=16):
|
||||
|
||||
self.minibatch_size = minibatch_size
|
||||
@@ -172,7 +173,7 @@ class TextImageGenerator(keras.callbacks.Callback):
|
||||
self.img_h = img_h
|
||||
self.monogram_file = monogram_file
|
||||
self.bigram_file = bigram_file
|
||||
self.downsample_width = downsample_width
|
||||
self.downsample_factor = downsample_factor
|
||||
self.val_split = val_split
|
||||
self.blank_label = self.get_output_size() - 1
|
||||
self.absolute_max_string_len = absolute_max_string_len
|
||||
@@ -187,7 +188,8 @@ class TextImageGenerator(keras.callbacks.Callback):
|
||||
assert num_words % self.minibatch_size == 0
|
||||
assert (self.val_split * num_words) % self.minibatch_size == 0
|
||||
self.num_words = num_words
|
||||
self.string_list = []
|
||||
self.string_list = [''] * self.num_words
|
||||
tmp_string_list = []
|
||||
self.max_string_len = max_string_len
|
||||
self.Y_data = np.ones([self.num_words, self.absolute_max_string_len]) * -1
|
||||
self.X_text = []
|
||||
@@ -196,25 +198,28 @@ class TextImageGenerator(keras.callbacks.Callback):
|
||||
# monogram file is sorted by frequency in english speech
|
||||
with open(self.monogram_file, 'rt') as f:
|
||||
for line in f:
|
||||
if len(self.string_list) == int(self.num_words * mono_fraction):
|
||||
if len(tmp_string_list) == int(self.num_words * mono_fraction):
|
||||
break
|
||||
word = line.rstrip()
|
||||
if max_string_len == -1 or max_string_len is None or len(word) <= max_string_len:
|
||||
self.string_list.append(word)
|
||||
tmp_string_list.append(word)
|
||||
|
||||
# bigram file contains common word pairings in english speech
|
||||
with open(self.bigram_file, 'rt') as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
if len(self.string_list) == self.num_words:
|
||||
if len(tmp_string_list) == self.num_words:
|
||||
break
|
||||
columns = line.lower().split()
|
||||
word = columns[0] + ' ' + columns[1]
|
||||
if is_valid_str(word) and \
|
||||
(max_string_len == -1 or max_string_len is None or len(word) <= max_string_len):
|
||||
self.string_list.append(word)
|
||||
if len(self.string_list) != self.num_words:
|
||||
tmp_string_list.append(word)
|
||||
if len(tmp_string_list) != self.num_words:
|
||||
raise IOError('Could not pull enough words from supplied monogram and bigram files. ')
|
||||
# interlace to mix up the easy and hard words
|
||||
self.string_list[::2] = tmp_string_list[:self.num_words // 2]
|
||||
self.string_list[1::2] = tmp_string_list[self.num_words // 2:]
|
||||
|
||||
for i, word in enumerate(self.string_list):
|
||||
self.Y_len[i] = len(word)
|
||||
@@ -228,37 +233,38 @@ class TextImageGenerator(keras.callbacks.Callback):
|
||||
# each time an image is requested from train/val/test, a new random
|
||||
# painting of the text is performed
|
||||
def get_batch(self, index, size, train):
|
||||
# width and height are backwards from typical Keras convention
|
||||
# because width is the time dimension when it gets fed into the RNN
|
||||
if K.image_dim_ordering() == 'th':
|
||||
X_data = np.ones([size, 1, self.img_h, self.img_w])
|
||||
X_data = np.ones([size, 1, self.img_w, self.img_h])
|
||||
else:
|
||||
X_data = np.ones([size, self.img_h, self.img_w, 1])
|
||||
X_data = np.ones([size, self.img_w, self.img_h, 1])
|
||||
|
||||
labels = np.ones([size, self.absolute_max_string_len])
|
||||
input_length = np.zeros([size, 1])
|
||||
label_length = np.zeros([size, 1])
|
||||
source_str = []
|
||||
|
||||
for i in range(0, size):
|
||||
# Mix in some blank inputs. This seems to be important for
|
||||
# achieving translational invariance
|
||||
if train and i > size - 4:
|
||||
if K.image_dim_ordering() == 'th':
|
||||
X_data[i, 0, :, :] = paint_text('', self.img_w, self.img_h)
|
||||
X_data[i, 0, 0:self.img_w, :] = self.paint_func('')[0, :, :].T
|
||||
else:
|
||||
X_data[i, :, :, 0] = paint_text('', self.img_w, self.img_h)
|
||||
X_data[i, 0:self.img_w, :, 0] = self.paint_func('',)[0, :, :].T
|
||||
labels[i, 0] = self.blank_label
|
||||
input_length[i] = self.downsample_width
|
||||
input_length[i] = self.img_w // self.downsample_factor - 2
|
||||
label_length[i] = 1
|
||||
source_str.append('')
|
||||
else:
|
||||
if K.image_dim_ordering() == 'th':
|
||||
X_data[i, 0, :, :] = paint_text(self.X_text[index + i], self.img_w, self.img_h)
|
||||
X_data[i, 0, 0:self.img_w, :] = self.paint_func(self.X_text[index + i])[0, :, :].T
|
||||
else:
|
||||
X_data[i, :, :, 0] = paint_text(self.X_text[index + i], self.img_w, self.img_h)
|
||||
X_data[i, 0:self.img_w, :, 0] = self.paint_func(self.X_text[index + i])[0, :, :].T
|
||||
labels[i, :] = self.Y_data[index + i]
|
||||
input_length[i] = self.downsample_width
|
||||
input_length[i] = self.img_w // self.downsample_factor - 2
|
||||
label_length[i] = self.Y_len[index + i]
|
||||
source_str.append(self.X_text[index + i])
|
||||
|
||||
inputs = {'the_input': X_data,
|
||||
'the_labels': labels,
|
||||
'input_length': input_length,
|
||||
@@ -287,19 +293,23 @@ class TextImageGenerator(keras.callbacks.Callback):
|
||||
yield ret
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
# translational invariance seems to be the hardest thing
|
||||
# for the RNN to learn, so start with <= 4 letter words.
|
||||
self.build_word_list(16000, 4, 1)
|
||||
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,
|
||||
rotate=False, ud=False, multi_fonts=False)
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
# After 10 epochs, translational invariance should be learned
|
||||
# so start feeding longer words and eventually multiple words with spaces
|
||||
if epoch == 10:
|
||||
self.build_word_list(32000, 8, 1)
|
||||
if epoch == 20:
|
||||
self.build_word_list(32000, 8, 0.6)
|
||||
if epoch == 30:
|
||||
self.build_word_list(64000, 12, 0.5)
|
||||
# rebind the paint function to implement curriculum learning
|
||||
if epoch >= 3 and epoch < 6:
|
||||
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,
|
||||
rotate=False, ud=True, multi_fonts=False)
|
||||
elif epoch >= 6 and epoch < 9:
|
||||
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,
|
||||
rotate=False, ud=True, multi_fonts=True)
|
||||
elif epoch >= 9:
|
||||
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,
|
||||
rotate=True, ud=True, multi_fonts=True)
|
||||
if epoch >= 21 and self.max_string_len < 12:
|
||||
self.build_word_list(32000, 12, 0.5)
|
||||
|
||||
|
||||
# the actual loss calc occurs here despite it not being
|
||||
@@ -335,13 +345,14 @@ def decode_batch(test_func, word_batch):
|
||||
|
||||
class VizCallback(keras.callbacks.Callback):
|
||||
|
||||
def __init__(self, test_func, text_img_gen, num_display_words=6):
|
||||
def __init__(self, run_name, test_func, text_img_gen, num_display_words=6):
|
||||
self.test_func = test_func
|
||||
self.output_dir = os.path.join(
|
||||
OUTPUT_DIR, datetime.datetime.now().strftime('%A, %d. %B %Y %I.%M%p'))
|
||||
OUTPUT_DIR, run_name)
|
||||
self.text_img_gen = text_img_gen
|
||||
self.num_display_words = num_display_words
|
||||
os.makedirs(self.output_dir)
|
||||
if not os.path.exists(self.output_dir):
|
||||
os.makedirs(self.output_dir)
|
||||
|
||||
def show_edit_distance(self, num):
|
||||
num_left = num
|
||||
@@ -362,109 +373,116 @@ class VizCallback(keras.callbacks.Callback):
|
||||
% (num, mean_ed, mean_norm_ed))
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
self.model.save_weights(os.path.join(self.output_dir, 'weights%02d.h5' % epoch))
|
||||
self.model.save_weights(os.path.join(self.output_dir, 'weights%02d.h5' % (epoch)))
|
||||
self.show_edit_distance(256)
|
||||
word_batch = next(self.text_img_gen)[0]
|
||||
res = decode_batch(self.test_func, word_batch['the_input'][0:self.num_display_words])
|
||||
|
||||
if word_batch['the_input'][0].shape[0] < 256:
|
||||
cols = 2
|
||||
else:
|
||||
cols = 1
|
||||
for i in range(self.num_display_words):
|
||||
pylab.subplot(self.num_display_words, 1, i + 1)
|
||||
pylab.subplot(self.num_display_words // cols, cols, i + 1)
|
||||
if K.image_dim_ordering() == 'th':
|
||||
the_input = word_batch['the_input'][i, 0, :, :]
|
||||
else:
|
||||
the_input = word_batch['the_input'][i, :, :, 0]
|
||||
pylab.imshow(the_input, cmap='Greys_r')
|
||||
pylab.xlabel('Truth = \'%s\' Decoded = \'%s\'' % (word_batch['source_str'][i], res[i]))
|
||||
pylab.imshow(the_input.T, cmap='Greys_r')
|
||||
pylab.xlabel('Truth = \'%s\'\nDecoded = \'%s\'' % (word_batch['source_str'][i], res[i]))
|
||||
fig = pylab.gcf()
|
||||
fig.set_size_inches(10, 12)
|
||||
pylab.savefig(os.path.join(self.output_dir, 'e%02d.png' % epoch))
|
||||
fig.set_size_inches(10, 13)
|
||||
pylab.savefig(os.path.join(self.output_dir, 'e%02d.png' % (epoch)))
|
||||
pylab.close()
|
||||
|
||||
# Input Parameters
|
||||
img_h = 64
|
||||
img_w = 512
|
||||
nb_epoch = 50
|
||||
minibatch_size = 32
|
||||
words_per_epoch = 16000
|
||||
val_split = 0.2
|
||||
val_words = int(words_per_epoch * (val_split))
|
||||
|
||||
# Network parameters
|
||||
conv_num_filters = 16
|
||||
filter_size = 3
|
||||
pool_size_1 = 4
|
||||
pool_size_2 = 2
|
||||
time_dense_size = 32
|
||||
rnn_size = 512
|
||||
time_steps = img_w // (pool_size_1 * pool_size_2)
|
||||
def train(run_name, start_epoch, stop_epoch, img_w):
|
||||
# Input Parameters
|
||||
img_h = 64
|
||||
words_per_epoch = 16000
|
||||
val_split = 0.2
|
||||
val_words = int(words_per_epoch * (val_split))
|
||||
|
||||
if K.image_dim_ordering() == 'th':
|
||||
input_shape = (1, img_h, img_w)
|
||||
else:
|
||||
input_shape = (img_h, img_w, 1)
|
||||
# Network parameters
|
||||
conv_num_filters = 16
|
||||
filter_size = 3
|
||||
pool_size = 2
|
||||
time_dense_size = 32
|
||||
rnn_size = 512
|
||||
|
||||
fdir = os.path.dirname(get_file('wordlists.tgz',
|
||||
origin='http://www.isosemi.com/datasets/wordlists.tgz', untar=True))
|
||||
if K.image_dim_ordering() == 'th':
|
||||
input_shape = (1, img_w, img_h)
|
||||
else:
|
||||
input_shape = (img_w, img_h, 1)
|
||||
|
||||
img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
|
||||
bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
|
||||
minibatch_size=32,
|
||||
img_w=img_w,
|
||||
img_h=img_h,
|
||||
downsample_width=img_w // (pool_size_1 * pool_size_2) - 2,
|
||||
val_split=words_per_epoch - val_words)
|
||||
fdir = os.path.dirname(get_file('wordlists.tgz',
|
||||
origin='http://www.isosemi.com/datasets/wordlists.tgz', untar=True))
|
||||
|
||||
act = 'relu'
|
||||
input_data = Input(name='the_input', shape=input_shape, dtype='float32')
|
||||
inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same',
|
||||
activation=act, name='conv1')(input_data)
|
||||
inner = MaxPooling2D(pool_size=(pool_size_1, pool_size_1), name='max1')(inner)
|
||||
inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same',
|
||||
activation=act, name='conv2')(inner)
|
||||
inner = MaxPooling2D(pool_size=(pool_size_2, pool_size_2), name='max2')(inner)
|
||||
img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
|
||||
bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
|
||||
minibatch_size=32,
|
||||
img_w=img_w,
|
||||
img_h=img_h,
|
||||
downsample_factor=(pool_size ** 2),
|
||||
val_split=words_per_epoch - val_words
|
||||
)
|
||||
act = 'relu'
|
||||
input_data = Input(name='the_input', shape=input_shape, dtype='float32')
|
||||
inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same',
|
||||
activation=act, init='he_normal', name='conv1')(input_data)
|
||||
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
|
||||
inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same',
|
||||
activation=act, init='he_normal', name='conv2')(inner)
|
||||
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
|
||||
|
||||
conv_to_rnn_dims = ((img_h // (pool_size_1 * pool_size_2)) * conv_num_filters, img_w // (pool_size_1 * pool_size_2))
|
||||
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
|
||||
inner = Permute(dims=(2, 1), name='permute')(inner)
|
||||
conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_num_filters)
|
||||
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
|
||||
|
||||
# cuts down input size going into RNN:
|
||||
inner = TimeDistributed(Dense(time_dense_size, activation=act, name='dense1'))(inner)
|
||||
# cuts down input size going into RNN:
|
||||
inner = Dense(time_dense_size, activation=act, name='dense1')(inner)
|
||||
|
||||
# Two layers of bidirecitonal GRUs
|
||||
# GRU seems to work as well, if not better than LSTM:
|
||||
gru_1 = GRU(rnn_size, return_sequences=True, name='gru1')(inner)
|
||||
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, name='gru1_b')(inner)
|
||||
gru1_merged = merge([gru_1, gru_1b], mode='sum')
|
||||
gru_2 = GRU(rnn_size, return_sequences=True, name='gru2')(gru1_merged)
|
||||
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True)(gru1_merged)
|
||||
# Two layers of bidirecitonal GRUs
|
||||
# GRU seems to work as well, if not better than LSTM:
|
||||
gru_1 = GRU(rnn_size, return_sequences=True, init='he_normal', name='gru1')(inner)
|
||||
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, init='he_normal', name='gru1_b')(inner)
|
||||
gru1_merged = merge([gru_1, gru_1b], mode='sum')
|
||||
gru_2 = GRU(rnn_size, return_sequences=True, init='he_normal', name='gru2')(gru1_merged)
|
||||
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, init='he_normal', name='gru2_b')(gru1_merged)
|
||||
|
||||
# transforms RNN output to character activations:
|
||||
inner = TimeDistributed(Dense(img_gen.get_output_size(), name='dense2'))(merge([gru_2, gru_2b], mode='concat'))
|
||||
y_pred = Activation('softmax', name='softmax')(inner)
|
||||
Model(input=[input_data], output=y_pred).summary()
|
||||
# transforms RNN output to character activations:
|
||||
inner = Dense(img_gen.get_output_size(), init='he_normal',
|
||||
name='dense2')(merge([gru_2, gru_2b], mode='concat'))
|
||||
y_pred = Activation('softmax', name='softmax')(inner)
|
||||
Model(input=[input_data], output=y_pred).summary()
|
||||
|
||||
labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32')
|
||||
input_length = Input(name='input_length', shape=[1], dtype='int64')
|
||||
label_length = Input(name='label_length', shape=[1], dtype='int64')
|
||||
# Keras doesn't currently support loss funcs with extra parameters
|
||||
# so CTC loss is implemented in a lambda layer
|
||||
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name="ctc")([y_pred, labels, input_length, label_length])
|
||||
labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32')
|
||||
input_length = Input(name='input_length', shape=[1], dtype='int64')
|
||||
label_length = Input(name='label_length', shape=[1], dtype='int64')
|
||||
# Keras doesn't currently support loss funcs with extra parameters
|
||||
# so CTC loss is implemented in a lambda layer
|
||||
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
|
||||
|
||||
lr = 0.03
|
||||
# clipnorm seems to speeds up convergence
|
||||
clipnorm = 5
|
||||
sgd = SGD(lr=lr, decay=3e-7, momentum=0.9, nesterov=True, clipnorm=clipnorm)
|
||||
# clipnorm seems to speeds up convergence
|
||||
sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
|
||||
|
||||
model = Model(input=[input_data, labels, input_length, label_length], output=[loss_out])
|
||||
model = Model(input=[input_data, labels, input_length, label_length], output=[loss_out])
|
||||
|
||||
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
|
||||
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
|
||||
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
|
||||
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
|
||||
if start_epoch > 0:
|
||||
weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
|
||||
model.load_weights(weight_file)
|
||||
# captures output of softmax so we can decode the output during visualization
|
||||
test_func = K.function([input_data], [y_pred])
|
||||
|
||||
# captures output of softmax so we can decode the output during visualization
|
||||
test_func = K.function([input_data], [y_pred])
|
||||
viz_cb = VizCallback(run_name, test_func, img_gen.next_val())
|
||||
|
||||
viz_cb = VizCallback(test_func, img_gen.next_val())
|
||||
model.fit_generator(generator=img_gen.next_train(), samples_per_epoch=(words_per_epoch - val_words),
|
||||
nb_epoch=stop_epoch, validation_data=img_gen.next_val(), nb_val_samples=val_words,
|
||||
callbacks=[viz_cb, img_gen], initial_epoch=start_epoch)
|
||||
|
||||
model.fit_generator(generator=img_gen.next_train(), samples_per_epoch=(words_per_epoch - val_words),
|
||||
nb_epoch=nb_epoch, validation_data=img_gen.next_val(), nb_val_samples=val_words,
|
||||
callbacks=[viz_cb, img_gen])
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_name = datetime.datetime.now().strftime('%Y:%m:%d:%H:%M:%S')
|
||||
train(run_name, 0, 20, 128)
|
||||
# increase to wider images and start at epoch 20. The learned weights are reloaded
|
||||
train(run_name, 20, 25, 512)
|
||||
|
||||
@@ -10,7 +10,7 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Embedding, LSTM, Input, Bidirectional
|
||||
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
|
||||
from keras.datasets import imdb
|
||||
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ from keras.layers import Dense, Dropout, Activation
|
||||
from keras.layers import Embedding
|
||||
from keras.layers import Convolution1D, GlobalMaxPooling1D
|
||||
from keras.datasets import imdb
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
# set parameters:
|
||||
|
||||
@@ -54,9 +54,9 @@ def add_ngram(sequences, token_indice, ngram_range=2):
|
||||
new_sequences = []
|
||||
for input_list in sequences:
|
||||
new_list = input_list[:]
|
||||
for i in range(len(new_list)-ngram_range+1):
|
||||
for ngram_value in range(2, ngram_range+1):
|
||||
ngram = tuple(new_list[i:i+ngram_value])
|
||||
for i in range(len(new_list) - ngram_range + 1):
|
||||
for ngram_value in range(2, ngram_range + 1):
|
||||
ngram = tuple(new_list[i:i + ngram_value])
|
||||
if ngram in token_indice:
|
||||
new_list.append(token_indice[ngram])
|
||||
new_sequences.append(new_list)
|
||||
@@ -84,7 +84,7 @@ if ngram_range > 1:
|
||||
# Create set of unique n-gram from the training set.
|
||||
ngram_set = set()
|
||||
for input_list in X_train:
|
||||
for i in range(2, ngram_range+1):
|
||||
for i in range(2, ngram_range + 1):
|
||||
set_of_ngram = create_ngram_set(input_list, ngram_value=i)
|
||||
ngram_set.update(set_of_ngram)
|
||||
|
||||
@@ -92,7 +92,7 @@ if ngram_range > 1:
|
||||
# Integer values are greater than max_features in order
|
||||
# to avoid collision with existing features.
|
||||
start_index = max_features + 1
|
||||
token_indice = {v: k+start_index for k, v in enumerate(ngram_set)}
|
||||
token_indice = {v: k + start_index for k, v in enumerate(ngram_set)}
|
||||
indice_token = {token_indice[k]: k for k in token_indice}
|
||||
|
||||
# max_features is the highest integer that could be found in the dataset.
|
||||
|
||||
@@ -15,10 +15,9 @@ import numpy as np
|
||||
np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.utils import np_utils
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Activation, Embedding
|
||||
from keras.layers import LSTM, SimpleRNN, GRU
|
||||
from keras.layers import Dense, Activation, Embedding
|
||||
from keras.layers import LSTM
|
||||
from keras.datasets import imdb
|
||||
|
||||
max_features = 20000
|
||||
|
||||
@@ -12,7 +12,7 @@ has at least ~100k characters. ~1M is better.
|
||||
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Activation, Dropout
|
||||
from keras.layers import Dense, Activation
|
||||
from keras.layers import LSTM
|
||||
from keras.optimizers import RMSprop
|
||||
from keras.utils.data_utils import get_file
|
||||
|
||||
@@ -23,7 +23,10 @@ example output
|
||||
from __future__ import print_function
|
||||
|
||||
from collections import defaultdict
|
||||
import cPickle as pickle
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError:
|
||||
import pickle
|
||||
from PIL import Image
|
||||
|
||||
from six.moves import range
|
||||
@@ -160,8 +163,6 @@ if __name__ == '__main__':
|
||||
loss=['binary_crossentropy', 'sparse_categorical_crossentropy']
|
||||
)
|
||||
|
||||
discriminator.trainable = True
|
||||
|
||||
# get our mnist data, and force it to be of shape (..., 1, 28, 28) with
|
||||
# range [-1, 1]
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
@@ -217,10 +218,7 @@ if __name__ == '__main__':
|
||||
noise = np.random.uniform(-1, 1, (2 * batch_size, latent_size))
|
||||
sampled_labels = np.random.randint(0, 10, 2 * batch_size)
|
||||
|
||||
# we want to fix the discriminator and let the generator train to
|
||||
# trick it
|
||||
discriminator.trainable = False
|
||||
|
||||
# we want to train the genrator to trick the discriminator
|
||||
# For the generator, we want all the {fake, not-fake} labels to say
|
||||
# not-fake
|
||||
trick = np.ones(2 * batch_size)
|
||||
@@ -228,8 +226,6 @@ if __name__ == '__main__':
|
||||
epoch_gen_loss.append(combined.train_on_batch(
|
||||
[noise, sampled_labels.reshape((-1, 1))], [trick, sampled_labels]))
|
||||
|
||||
discriminator.trainable = True
|
||||
|
||||
print('\nTesting for epoch {}:'.format(epoch + 1))
|
||||
|
||||
# evaluate the testing loss here
|
||||
|
||||
@@ -8,7 +8,7 @@ document vector is considered to preserve both the word-level and
|
||||
sentence-level structure of the context.
|
||||
|
||||
# References
|
||||
- [A Hierarchical Neural Autoencoder for Paragraphs and Documents](https://web.stanford.edu/~jurafsky/pubs/P15-1107.pdf)
|
||||
- [A Hierarchical Neural Autoencoder for Paragraphs and Documents](https://arxiv.org/abs/1506.01057)
|
||||
Encodes paragraphs and documents with HRNN.
|
||||
Results have shown that HRNN outperforms standard
|
||||
RNNs and may play some role in more sophisticated generation tasks like
|
||||
@@ -27,7 +27,7 @@ After 5 epochs: train acc: 0.9858, val acc: 0.9864
|
||||
from __future__ import print_function
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential, Model
|
||||
from keras.models import Model
|
||||
from keras.layers import Input, Dense, TimeDistributed
|
||||
from keras.layers import LSTM
|
||||
from keras.utils import np_utils
|
||||
|
||||
@@ -12,7 +12,7 @@ np.random.seed(1337) # for reproducibility
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.optimizers import SGD, Adam, RMSprop
|
||||
from keras.optimizers import RMSprop
|
||||
from keras.utils import np_utils
|
||||
|
||||
|
||||
|
||||
@@ -55,6 +55,7 @@ Results
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
from six.moves import xrange
|
||||
import numpy as np
|
||||
np.random.seed(1337)
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ import random
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential, Model
|
||||
from keras.layers import Dense, Dropout, Input, Lambda
|
||||
from keras.optimizers import SGD, RMSprop
|
||||
from keras.optimizers import RMSprop
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ def create_pairs(x, digit_indices):
|
||||
n = min([len(digit_indices[d]) for d in range(10)]) - 1
|
||||
for d in range(10):
|
||||
for i in range(n):
|
||||
z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
|
||||
z1, z2 = digit_indices[d][i], digit_indices[d][i + 1]
|
||||
pairs += [[x[z1], x[z2]]]
|
||||
inc = random.randrange(1, 10)
|
||||
dn = (d + inc) % 10
|
||||
@@ -75,7 +75,7 @@ def create_base_network(input_dim):
|
||||
def compute_accuracy(predictions, labels):
|
||||
'''Compute classification accuracy with a fixed threshold on distances.
|
||||
'''
|
||||
return labels[predictions.ravel() < 0.5].mean()
|
||||
return np.mean(labels == (predictions.ravel() > 0.5))
|
||||
|
||||
|
||||
# the data, shuffled and split between train and test sets
|
||||
|
||||
@@ -34,6 +34,7 @@ X_test /= 255
|
||||
y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
|
||||
def make_model(dense_layer_sizes, nb_filters, nb_conv, nb_pool):
|
||||
'''Creates model comprised of 2 convolutional layers followed by dense layers
|
||||
|
||||
|
||||
@@ -44,7 +44,6 @@ Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
||||
https://arxiv.org/abs/1603.05027v3
|
||||
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
np.random.seed(1337) # for reproducibility
|
||||
@@ -76,6 +75,16 @@ def getwhere(x):
|
||||
y_prepool, y_postpool = x
|
||||
return K.gradients(K.sum(y_postpool), y_prepool)
|
||||
|
||||
if K.backend() == 'tensorflow':
|
||||
raise RuntimeError('This example can only run with the '
|
||||
'Theano backend for the time being, '
|
||||
'because it requires taking the gradient '
|
||||
'of a gradient, which isn\'t '
|
||||
'supported for all TF ops.')
|
||||
|
||||
# This example assume 'th' dim ordering.
|
||||
K.set_image_dim_ordering('th')
|
||||
|
||||
# input image dimensions
|
||||
img_rows, img_cols = 28, 28
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ from scipy.optimize import fmin_l_bfgs_b
|
||||
from scipy.misc import imread, imsave
|
||||
|
||||
from keras import backend as K
|
||||
from keras.layers import Input, Convolution2D, MaxPooling2D, AveragePooling2D
|
||||
from keras.layers import Input, AveragePooling2D
|
||||
from keras.models import Model
|
||||
from keras.preprocessing.image import load_img, img_to_array
|
||||
from keras.applications import vgg19
|
||||
@@ -301,7 +301,7 @@ loss_grads = K.gradients(loss, target_image)
|
||||
|
||||
# Evaluator class for computing efficiency
|
||||
outputs = [loss]
|
||||
if type(loss_grads) in {list, tuple}:
|
||||
if isinstance(loss_grads, (list, tuple)):
|
||||
outputs += loss_grads
|
||||
else:
|
||||
outputs.append(loss_grads)
|
||||
|
||||
@@ -8,6 +8,13 @@ e.g.:
|
||||
```
|
||||
python neural_style_transfer.py img/tuebingen.jpg img/starry_night.jpg results/my_result
|
||||
```
|
||||
Optional parameters:
|
||||
```
|
||||
--iter, To specify the number of iterations the style transfer takes place (Default is 10)
|
||||
--content_weight, The weight given to the content loss (Default is 0.025)
|
||||
--style_weight, The weight given to the style loss (Default is 1.0)
|
||||
--tv_weight, The weight given to the total variation loss (Default is 1.0)
|
||||
```
|
||||
|
||||
It is preferable to run this script on GPU, for speed.
|
||||
|
||||
@@ -60,23 +67,34 @@ parser.add_argument('style_reference_image_path', metavar='ref', type=str,
|
||||
help='Path to the style reference image.')
|
||||
parser.add_argument('result_prefix', metavar='res_prefix', type=str,
|
||||
help='Prefix for the saved results.')
|
||||
parser.add_argument('--iter', type=int, default=10, required=False,
|
||||
help='Number of iterations to run.')
|
||||
parser.add_argument('--content_weight', type=float, default=0.025, required=False,
|
||||
help='Content weight.')
|
||||
parser.add_argument('--style_weight', type=float, default=1.0, required=False,
|
||||
help='Style weight.')
|
||||
parser.add_argument('--tv_weight', type=float, default=1.0, required=False,
|
||||
help='Total Variation weight.')
|
||||
|
||||
args = parser.parse_args()
|
||||
base_image_path = args.base_image_path
|
||||
style_reference_image_path = args.style_reference_image_path
|
||||
result_prefix = args.result_prefix
|
||||
iterations = args.iter
|
||||
|
||||
# these are the weights of the different loss components
|
||||
total_variation_weight = 1.
|
||||
style_weight = 1.
|
||||
content_weight = 0.025
|
||||
total_variation_weight = args.tv_weight
|
||||
style_weight = args.style_weight
|
||||
content_weight = args.content_weight
|
||||
|
||||
# dimensions of the generated picture.
|
||||
width, height = load_img(base_image_path).size
|
||||
img_nrows = 400
|
||||
img_ncols = 400
|
||||
assert img_ncols == img_nrows, 'Due to the use of the Gram matrix, width and height must match.'
|
||||
img_ncols = int(width * img_nrows / height)
|
||||
|
||||
# util function to open, resize and format pictures into appropriate tensors
|
||||
|
||||
|
||||
def preprocess_image(image_path):
|
||||
img = load_img(image_path, target_size=(img_nrows, img_ncols))
|
||||
img = img_to_array(img)
|
||||
@@ -85,6 +103,8 @@ def preprocess_image(image_path):
|
||||
return img
|
||||
|
||||
# util function to convert a tensor into a valid image
|
||||
|
||||
|
||||
def deprocess_image(x):
|
||||
if K.image_dim_ordering() == 'th':
|
||||
x = x.reshape((3, img_nrows, img_ncols))
|
||||
@@ -128,6 +148,8 @@ outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
|
||||
# first we need to define 4 util functions
|
||||
|
||||
# the gram matrix of an image tensor (feature-wise outer product)
|
||||
|
||||
|
||||
def gram_matrix(x):
|
||||
assert K.ndim(x) == 3
|
||||
if K.image_dim_ordering() == 'th':
|
||||
@@ -142,6 +164,8 @@ def gram_matrix(x):
|
||||
# It is based on the gram matrices (which capture style) of
|
||||
# feature maps from the style reference image
|
||||
# and from the generated image
|
||||
|
||||
|
||||
def style_loss(style, combination):
|
||||
assert K.ndim(style) == 3
|
||||
assert K.ndim(combination) == 3
|
||||
@@ -154,19 +178,23 @@ def style_loss(style, combination):
|
||||
# an auxiliary loss function
|
||||
# designed to maintain the "content" of the
|
||||
# base image in the generated image
|
||||
|
||||
|
||||
def content_loss(base, combination):
|
||||
return K.sum(K.square(combination - base))
|
||||
|
||||
# the 3rd loss function, total variation loss,
|
||||
# designed to keep the generated image locally coherent
|
||||
|
||||
|
||||
def total_variation_loss(x):
|
||||
assert K.ndim(x) == 4
|
||||
if K.image_dim_ordering() == 'th':
|
||||
a = K.square(x[:, :, :img_nrows-1, :img_ncols-1] - x[:, :, 1:, :img_ncols-1])
|
||||
b = K.square(x[:, :, :img_nrows-1, :img_ncols-1] - x[:, :, :img_nrows-1, 1:])
|
||||
a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1])
|
||||
b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:])
|
||||
else:
|
||||
a = K.square(x[:, :img_nrows-1, :img_ncols-1, :] - x[:, 1:, :img_ncols-1, :])
|
||||
b = K.square(x[:, :img_nrows-1, :img_ncols-1, :] - x[:, :img_nrows-1, 1:, :])
|
||||
a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :])
|
||||
b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :])
|
||||
return K.sum(K.pow(a + b, 1.25))
|
||||
|
||||
# combine these loss functions into a single scalar
|
||||
@@ -192,13 +220,14 @@ loss += total_variation_weight * total_variation_loss(combination_image)
|
||||
grads = K.gradients(loss, combination_image)
|
||||
|
||||
outputs = [loss]
|
||||
if type(grads) in {list, tuple}:
|
||||
if isinstance(grads, (list, tuple)):
|
||||
outputs += grads
|
||||
else:
|
||||
outputs.append(grads)
|
||||
|
||||
f_outputs = K.function([combination_image], outputs)
|
||||
|
||||
|
||||
def eval_loss_and_grads(x):
|
||||
if K.image_dim_ordering() == 'th':
|
||||
x = x.reshape((1, 3, img_nrows, img_ncols))
|
||||
@@ -218,7 +247,10 @@ def eval_loss_and_grads(x):
|
||||
# "loss" and "grads". This is done because scipy.optimize
|
||||
# requires separate functions for loss and gradients,
|
||||
# but computing them separately would be inefficient.
|
||||
|
||||
|
||||
class Evaluator(object):
|
||||
|
||||
def __init__(self):
|
||||
self.loss_value = None
|
||||
self.grads_values = None
|
||||
@@ -246,7 +278,7 @@ if K.image_dim_ordering() == 'th':
|
||||
else:
|
||||
x = np.random.uniform(0, 255, (1, img_nrows, img_ncols, 3)) - 128.
|
||||
|
||||
for i in range(10):
|
||||
for i in range(iterations):
|
||||
print('Start of iteration', i)
|
||||
start_time = time.time()
|
||||
x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
|
||||
|
||||
@@ -21,7 +21,7 @@ print('Loading data...')
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
nb_classes = np.max(y_train)+1
|
||||
nb_classes = np.max(y_train) + 1
|
||||
print(nb_classes, 'classes')
|
||||
|
||||
print('Vectorizing sequence data...')
|
||||
|
||||
@@ -100,6 +100,7 @@ deconv_2_decoded = decoder_deconv_2(deconv_1_decoded)
|
||||
x_decoded_relu = decoder_deconv_3_upsamp(deconv_2_decoded)
|
||||
x_decoded_mean_squash = decoder_mean_squash(x_decoded_relu)
|
||||
|
||||
|
||||
def vae_loss(x, x_decoded_mean):
|
||||
# NOTE: binary_crossentropy expects a batch_size by dim
|
||||
# for x and x_decoded_mean, so we MUST flatten these!
|
||||
|
||||
+1
-1
@@ -15,4 +15,4 @@ from . import objectives
|
||||
from . import optimizers
|
||||
from . import regularizers
|
||||
|
||||
__version__ = '1.1.2'
|
||||
__version__ = '1.2.1'
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
import numpy as np
|
||||
from .. import backend as K
|
||||
|
||||
try:
|
||||
import librosa
|
||||
except ImportError:
|
||||
librosa = None
|
||||
|
||||
|
||||
TAGS = ['rock', 'pop', 'alternative', 'indie', 'electronic',
|
||||
'female vocalists', 'dance', '00s', 'alternative rock', 'jazz',
|
||||
@@ -15,51 +20,50 @@ TAGS = ['rock', 'pop', 'alternative', 'indie', 'electronic',
|
||||
'sad', 'House', 'happy']
|
||||
|
||||
|
||||
def librosa_exists():
|
||||
try:
|
||||
__import__('librosa')
|
||||
except ImportError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def preprocess_input(audio_path, dim_ordering='default'):
|
||||
'''Reads an audio file and outputs a Mel-spectrogram.
|
||||
'''
|
||||
"""Reads an audio file and outputs a Mel-spectrogram.
|
||||
|
||||
# Arguments
|
||||
audio_path: path to the target audio file.
|
||||
dim_ordering: data format for the output spectrogram image.
|
||||
|
||||
# Returns
|
||||
3D Numpy tensor encoding the Mel-spectrogram.
|
||||
|
||||
# Raises
|
||||
ImportError: if librosa is not available.
|
||||
"""
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
assert dim_ordering in {'tf', 'th'}
|
||||
|
||||
if librosa_exists():
|
||||
import librosa
|
||||
else:
|
||||
raise RuntimeError('Librosa is required to process audio files.\n' +
|
||||
'Install it via `pip install librosa` \nor visit ' +
|
||||
'http://librosa.github.io/librosa/ for details.')
|
||||
if librosa is None:
|
||||
raise ImportError('Librosa is required to process audio files. '
|
||||
'Install it via `pip install librosa` or visit '
|
||||
'http://librosa.github.io/librosa/ for details.')
|
||||
|
||||
# mel-spectrogram parameters
|
||||
SR = 12000
|
||||
N_FFT = 512
|
||||
N_MELS = 96
|
||||
HOP_LEN = 256
|
||||
DURA = 29.12
|
||||
sr = 12000
|
||||
n_fft = 512
|
||||
n_mels = 96
|
||||
hop_length = 256
|
||||
duration = 29.12
|
||||
|
||||
src, sr = librosa.load(audio_path, sr=SR)
|
||||
src, sr = librosa.load(audio_path, sr=sr)
|
||||
n_sample = src.shape[0]
|
||||
n_sample_wanted = int(DURA * SR)
|
||||
n_sample_wanted = int(duration * sr)
|
||||
|
||||
# trim the signal at the center
|
||||
if n_sample < n_sample_wanted: # if too short
|
||||
src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
|
||||
src = np.hstack((src, np.zeros((int(duration * sr) - n_sample,))))
|
||||
elif n_sample > n_sample_wanted: # if too long
|
||||
src = src[(n_sample - n_sample_wanted) / 2:
|
||||
(n_sample + n_sample_wanted) / 2]
|
||||
|
||||
logam = librosa.logamplitude
|
||||
melgram = librosa.feature.melspectrogram
|
||||
x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
|
||||
n_fft=N_FFT, n_mels=N_MELS) ** 2,
|
||||
x = logam(melgram(y=src, sr=sr, hop_lengthgth=hop_length,
|
||||
n_fft=n_fft, n_mels=n_mels) ** 2,
|
||||
ref_power=1.0)
|
||||
|
||||
if dim_ordering == 'th':
|
||||
@@ -70,13 +74,15 @@ def preprocess_input(audio_path, dim_ordering='default'):
|
||||
|
||||
|
||||
def decode_predictions(preds, top_n=5):
|
||||
'''Decode the output of a music tagger model.
|
||||
"""Decode the output of a music tagger model.
|
||||
|
||||
# Arguments
|
||||
preds: 2-dimensional numpy array
|
||||
top_n: integer in [0, 50], number of items to show
|
||||
top_n: integer in [0, 50], number of items to show.
|
||||
|
||||
'''
|
||||
# Returns
|
||||
Decoded output.
|
||||
"""
|
||||
assert len(preds.shape) == 2 and preds.shape[1] == 50
|
||||
results = []
|
||||
for pred in preds:
|
||||
|
||||
@@ -9,6 +9,15 @@ CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/i
|
||||
|
||||
|
||||
def preprocess_input(x, dim_ordering='default'):
|
||||
"""Preprocesses a tensor encoding a batch of images.
|
||||
|
||||
# Arguments
|
||||
x: input Numpy tensor, 4D.
|
||||
dim_ordering: data format of the image tensor.
|
||||
|
||||
# Returns
|
||||
Preprocessed tensor.
|
||||
"""
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
assert dim_ordering in {'tf', 'th'}
|
||||
@@ -31,6 +40,21 @@ def preprocess_input(x, dim_ordering='default'):
|
||||
|
||||
|
||||
def decode_predictions(preds, top=5):
|
||||
"""Decodes the prediction of an ImageNet model.
|
||||
|
||||
# Arguments
|
||||
preds: Numpy tensor encoding a batch of predictions.
|
||||
top: integer, how many top-guesses to return.
|
||||
|
||||
# Returns
|
||||
A list of lists of top class prediction tuples
|
||||
`(class_name, class_description, score)`.
|
||||
One list of tuples per sample in batch input.
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid shape of the `pred` array
|
||||
(must be 2D).
|
||||
"""
|
||||
global CLASS_INDEX
|
||||
if len(preds.shape) != 2 or preds.shape[1] != 1000:
|
||||
raise ValueError('`decode_predictions` expects '
|
||||
@@ -49,3 +73,67 @@ def decode_predictions(preds, top=5):
|
||||
result.sort(key=lambda x: x[2], reverse=True)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
|
||||
def _obtain_input_shape(input_shape,
|
||||
default_size,
|
||||
min_size,
|
||||
dim_ordering,
|
||||
include_top):
|
||||
"""Internal utility to compute/validate an ImageNet model's input shape.
|
||||
|
||||
# Arguments
|
||||
input_shape: either None (will return the default network input shape),
|
||||
or a user-provided shape to be validated.
|
||||
default_size: default input width/height for the model.
|
||||
min_size: minimum input width/height accepted by the model.
|
||||
dim_ordering: image data format to use.
|
||||
include_top: whether the model is expected to
|
||||
be linked to a classifier via a Flatten layer.
|
||||
|
||||
# Returns
|
||||
An integer shape tuple (may include None entries).
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid argument values.
|
||||
"""
|
||||
if dim_ordering == 'th':
|
||||
default_shape = (3, default_size, default_size)
|
||||
else:
|
||||
default_shape = (default_size, default_size, 3)
|
||||
if include_top:
|
||||
if input_shape is not None:
|
||||
if input_shape != default_shape:
|
||||
raise ValueError('When setting`include_top=True`, '
|
||||
'`input_shape` should be ' + str(default_shape) + '.')
|
||||
input_shape = default_shape
|
||||
else:
|
||||
if dim_ordering == 'th':
|
||||
if input_shape is not None:
|
||||
if len(input_shape) != 3:
|
||||
raise ValueError('`input_shape` must be a tuple of three integers.')
|
||||
if input_shape[0] != 3:
|
||||
raise ValueError('The input must have 3 channels; got '
|
||||
'`input_shape=' + str(input_shape) + '`')
|
||||
if ((input_shape[1] is not None and input_shape[1] < min_size) or
|
||||
(input_shape[2] is not None and input_shape[2] < min_size)):
|
||||
raise ValueError('Input size must be at least ' +
|
||||
str(min_size) + 'x' + str(min_size) + ', got '
|
||||
'`input_shape=' + str(input_shape) + '`')
|
||||
else:
|
||||
input_shape = (3, None, None)
|
||||
else:
|
||||
if input_shape is not None:
|
||||
if len(input_shape) != 3:
|
||||
raise ValueError('`input_shape` must be a tuple of three integers.')
|
||||
if input_shape[-1] != 3:
|
||||
raise ValueError('The input must have 3 channels; got '
|
||||
'`input_shape=' + str(input_shape) + '`')
|
||||
if ((input_shape[0] is not None and input_shape[0] < min_size) or
|
||||
(input_shape[1] is not None and input_shape[1] < min_size)):
|
||||
raise ValueError('Input size must be at least ' +
|
||||
str(min_size) + 'x' + str(min_size) + ', got '
|
||||
'`input_shape=' + str(input_shape) + '`')
|
||||
else:
|
||||
input_shape = (None, None, 3)
|
||||
return input_shape
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''Inception V3 model for Keras.
|
||||
"""Inception V3 model for Keras.
|
||||
|
||||
Note that the ImageNet weights provided are from a model that had not fully converged.
|
||||
Inception v3 should be able to reach 6.9% top-5 error, but our model
|
||||
@@ -10,11 +10,11 @@ Also, do note that the input image format for this model is different than for
|
||||
the VGG16 and ResNet models (299x299 instead of 224x224), and that the input preprocessing function
|
||||
is also different (same as Xception).
|
||||
|
||||
# Reference:
|
||||
# Reference
|
||||
|
||||
- [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567)
|
||||
|
||||
'''
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -23,10 +23,11 @@ import warnings
|
||||
from ..models import Model
|
||||
from ..layers import Flatten, Dense, Input, BatchNormalization, merge
|
||||
from ..layers import Convolution2D, MaxPooling2D, AveragePooling2D
|
||||
from ..engine.topology import get_source_inputs
|
||||
from ..utils.layer_utils import convert_all_kernels_in_model
|
||||
from ..utils.data_utils import get_file
|
||||
from .. import backend as K
|
||||
from .imagenet_utils import decode_predictions
|
||||
from .imagenet_utils import decode_predictions, _obtain_input_shape
|
||||
|
||||
|
||||
TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_th_dim_ordering_th_kernels.h5'
|
||||
@@ -38,8 +39,8 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
def conv2d_bn(x, nb_filter, nb_row, nb_col,
|
||||
border_mode='same', subsample=(1, 1),
|
||||
name=None):
|
||||
'''Utility function to apply conv + BN.
|
||||
'''
|
||||
"""Utility function to apply conv + BN.
|
||||
"""
|
||||
if name is not None:
|
||||
bn_name = name + '_bn'
|
||||
conv_name = name + '_conv'
|
||||
@@ -60,8 +61,9 @@ def conv2d_bn(x, nb_filter, nb_row, nb_col,
|
||||
|
||||
|
||||
def InceptionV3(include_top=True, weights='imagenet',
|
||||
input_tensor=None):
|
||||
'''Instantiate the Inception v3 architecture,
|
||||
input_tensor=None, input_shape=None,
|
||||
classes=1000):
|
||||
"""Instantiate the Inception v3 architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. Note that when using TensorFlow,
|
||||
for best performance you should set
|
||||
@@ -82,25 +84,35 @@ def InceptionV3(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(299, 299, 3)` (with `tf` dim ordering)
|
||||
or `(3, 299, 299)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 139.
|
||||
E.g. `(150, 150, 3)` would be one valid value.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
'''
|
||||
"""
|
||||
if weights not in {'imagenet', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
|
||||
if weights == 'imagenet' and include_top and classes != 1000:
|
||||
raise ValueError('If using `weights` as imagenet with `include_top`'
|
||||
' as true, `classes` should be 1000')
|
||||
|
||||
# Determine proper input shape
|
||||
if K.image_dim_ordering() == 'th':
|
||||
if include_top:
|
||||
input_shape = (3, 299, 299)
|
||||
else:
|
||||
input_shape = (3, None, None)
|
||||
else:
|
||||
if include_top:
|
||||
input_shape = (299, 299, 3)
|
||||
else:
|
||||
input_shape = (None, None, 3)
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=299,
|
||||
min_size=139,
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
include_top=include_top)
|
||||
|
||||
if input_tensor is None:
|
||||
img_input = Input(shape=input_shape)
|
||||
@@ -259,10 +271,16 @@ def InceptionV3(include_top=True, weights='imagenet',
|
||||
# Classification block
|
||||
x = AveragePooling2D((8, 8), strides=(8, 8), name='avg_pool')(x)
|
||||
x = Flatten(name='flatten')(x)
|
||||
x = Dense(1000, activation='softmax', name='predictions')(x)
|
||||
x = Dense(classes, activation='softmax', name='predictions')(x)
|
||||
|
||||
# Create model
|
||||
model = Model(img_input, x)
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = img_input
|
||||
# Create model.
|
||||
model = Model(inputs, x, name='inception_v3')
|
||||
|
||||
# load weights
|
||||
if weights == 'imagenet':
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''MusicTaggerCRNN model for Keras.
|
||||
"""MusicTaggerCRNN model for Keras.
|
||||
|
||||
# Reference:
|
||||
|
||||
- [Music-auto_tagging-keras](https://github.com/keunwoochoi/music-auto_tagging-keras)
|
||||
|
||||
'''
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -18,6 +18,7 @@ from ..layers.convolutional import MaxPooling2D, ZeroPadding2D
|
||||
from ..layers.normalization import BatchNormalization
|
||||
from ..layers.advanced_activations import ELU
|
||||
from ..layers.recurrent import GRU
|
||||
from ..engine.topology import get_source_inputs
|
||||
from ..utils.data_utils import get_file
|
||||
from ..utils.layer_utils import convert_all_kernels_in_model
|
||||
from .audio_conv_utils import decode_predictions, preprocess_input
|
||||
@@ -27,8 +28,8 @@ TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/dow
|
||||
|
||||
|
||||
def MusicTaggerCRNN(weights='msd', input_tensor=None,
|
||||
include_top=True):
|
||||
'''Instantiate the MusicTaggerCRNN architecture,
|
||||
include_top=True, classes=50):
|
||||
"""Instantiate the MusicTaggerCRNN architecture,
|
||||
optionally loading weights pre-trained
|
||||
on Million Song Dataset. Note that when using TensorFlow,
|
||||
for best performance you should set
|
||||
@@ -53,16 +54,21 @@ def MusicTaggerCRNN(weights='msd', input_tensor=None,
|
||||
include_top: whether to include the 1 fully-connected
|
||||
layer (output layer) at the top of the network.
|
||||
If False, the network outputs 32-dim features.
|
||||
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
'''
|
||||
"""
|
||||
if weights not in {'msd', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `msd` '
|
||||
'(pre-training on Million Song Dataset).')
|
||||
|
||||
if weights == 'msd' and include_top and classes != 50:
|
||||
raise ValueError('If using `weights` as msd with `include_top`'
|
||||
' as true, `classes` should be 50')
|
||||
# Determine proper input shape
|
||||
if K.image_dim_ordering() == 'th':
|
||||
input_shape = (1, 96, 1366)
|
||||
@@ -125,10 +131,17 @@ def MusicTaggerCRNN(weights='msd', input_tensor=None,
|
||||
x = GRU(32, return_sequences=False, name='gru2')(x)
|
||||
|
||||
if include_top:
|
||||
x = Dense(50, activation='sigmoid', name='output')(x)
|
||||
x = Dense(classes, activation='sigmoid', name='output')(x)
|
||||
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = melgram_input
|
||||
# Create model.
|
||||
model = Model(inputs, x, name='music_tagger_crnn')
|
||||
|
||||
# Create model
|
||||
model = Model(melgram_input, x)
|
||||
if weights is None:
|
||||
return model
|
||||
else:
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''ResNet50 model for Keras.
|
||||
"""ResNet50 model for Keras.
|
||||
|
||||
# Reference:
|
||||
|
||||
- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
|
||||
|
||||
Adapted from code contributed by BigMoyan.
|
||||
'''
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -18,9 +18,10 @@ from ..layers import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2
|
||||
from ..layers import BatchNormalization
|
||||
from ..models import Model
|
||||
from .. import backend as K
|
||||
from ..engine.topology import get_source_inputs
|
||||
from ..utils.layer_utils import convert_all_kernels_in_model
|
||||
from ..utils.data_utils import get_file
|
||||
from .imagenet_utils import decode_predictions, preprocess_input
|
||||
from .imagenet_utils import decode_predictions, preprocess_input, _obtain_input_shape
|
||||
|
||||
|
||||
TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels.h5'
|
||||
@@ -30,7 +31,7 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
|
||||
|
||||
def identity_block(input_tensor, kernel_size, filters, stage, block):
|
||||
'''The identity_block is the block that has no conv layer at shortcut
|
||||
"""The identity_block is the block that has no conv layer at shortcut
|
||||
|
||||
# Arguments
|
||||
input_tensor: input tensor
|
||||
@@ -38,7 +39,7 @@ def identity_block(input_tensor, kernel_size, filters, stage, block):
|
||||
filters: list of integers, the nb_filters of 3 conv layer at main path
|
||||
stage: integer, current stage label, used for generating layer names
|
||||
block: 'a','b'..., current block label, used for generating layer names
|
||||
'''
|
||||
"""
|
||||
nb_filter1, nb_filter2, nb_filter3 = filters
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
bn_axis = 3
|
||||
@@ -65,7 +66,7 @@ def identity_block(input_tensor, kernel_size, filters, stage, block):
|
||||
|
||||
|
||||
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
|
||||
'''conv_block is the block that has a conv layer at shortcut
|
||||
"""conv_block is the block that has a conv layer at shortcut
|
||||
|
||||
# Arguments
|
||||
input_tensor: input tensor
|
||||
@@ -76,7 +77,7 @@ def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2))
|
||||
|
||||
Note that from stage 3, the first conv layer at main path is with subsample=(2,2)
|
||||
And the shortcut should have subsample=(2,2) as well
|
||||
'''
|
||||
"""
|
||||
nb_filter1, nb_filter2, nb_filter3 = filters
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
bn_axis = 3
|
||||
@@ -108,8 +109,9 @@ def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2))
|
||||
|
||||
|
||||
def ResNet50(include_top=True, weights='imagenet',
|
||||
input_tensor=None):
|
||||
'''Instantiate the ResNet50 architecture,
|
||||
input_tensor=None, input_shape=None,
|
||||
classes=1000):
|
||||
"""Instantiate the ResNet50 architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. Note that when using TensorFlow,
|
||||
for best performance you should set
|
||||
@@ -126,27 +128,37 @@ def ResNet50(include_top=True, weights='imagenet',
|
||||
layers at the top of the network.
|
||||
weights: one of `None` (random initialization)
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. xput of `layers.Input()`)
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 197.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
'''
|
||||
"""
|
||||
if weights not in {'imagenet', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
|
||||
if weights == 'imagenet' and include_top and classes != 1000:
|
||||
raise ValueError('If using `weights` as imagenet with `include_top`'
|
||||
' as true, `classes` should be 1000')
|
||||
|
||||
# Determine proper input shape
|
||||
if K.image_dim_ordering() == 'th':
|
||||
if include_top:
|
||||
input_shape = (3, 224, 224)
|
||||
else:
|
||||
input_shape = (3, None, None)
|
||||
else:
|
||||
if include_top:
|
||||
input_shape = (224, 224, 3)
|
||||
else:
|
||||
input_shape = (None, None, 3)
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=224,
|
||||
min_size=197,
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
include_top=include_top)
|
||||
|
||||
if input_tensor is None:
|
||||
img_input = Input(shape=input_shape)
|
||||
@@ -190,9 +202,16 @@ def ResNet50(include_top=True, weights='imagenet',
|
||||
|
||||
if include_top:
|
||||
x = Flatten()(x)
|
||||
x = Dense(1000, activation='softmax', name='fc1000')(x)
|
||||
x = Dense(classes, activation='softmax', name='fc1000')(x)
|
||||
|
||||
model = Model(img_input, x)
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = img_input
|
||||
# Create model.
|
||||
model = Model(inputs, x, name='resnet50')
|
||||
|
||||
# load weights
|
||||
if weights == 'imagenet':
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''VGG16 model for Keras.
|
||||
"""VGG16 model for Keras.
|
||||
|
||||
# Reference:
|
||||
# Reference
|
||||
|
||||
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
|
||||
|
||||
'''
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -14,10 +14,11 @@ import warnings
|
||||
from ..models import Model
|
||||
from ..layers import Flatten, Dense, Input
|
||||
from ..layers import Convolution2D, MaxPooling2D
|
||||
from ..engine.topology import get_source_inputs
|
||||
from ..utils.layer_utils import convert_all_kernels_in_model
|
||||
from ..utils.data_utils import get_file
|
||||
from .. import backend as K
|
||||
from .imagenet_utils import decode_predictions, preprocess_input
|
||||
from .imagenet_utils import decode_predictions, preprocess_input, _obtain_input_shape
|
||||
|
||||
|
||||
TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5'
|
||||
@@ -27,8 +28,9 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
|
||||
|
||||
def VGG16(include_top=True, weights='imagenet',
|
||||
input_tensor=None):
|
||||
'''Instantiate the VGG16 architecture,
|
||||
input_tensor=None, input_shape=None,
|
||||
classes=1000):
|
||||
"""Instantiate the VGG16 architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. Note that when using TensorFlow,
|
||||
for best performance you should set
|
||||
@@ -47,25 +49,34 @@ def VGG16(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 48.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
'''
|
||||
"""
|
||||
if weights not in {'imagenet', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
|
||||
if weights == 'imagenet' and include_top and classes != 1000:
|
||||
raise ValueError('If using `weights` as imagenet with `include_top`'
|
||||
' as true, `classes` should be 1000')
|
||||
# Determine proper input shape
|
||||
if K.image_dim_ordering() == 'th':
|
||||
if include_top:
|
||||
input_shape = (3, 224, 224)
|
||||
else:
|
||||
input_shape = (3, None, None)
|
||||
else:
|
||||
if include_top:
|
||||
input_shape = (224, 224, 3)
|
||||
else:
|
||||
input_shape = (None, None, 3)
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=224,
|
||||
min_size=48,
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
include_top=include_top)
|
||||
|
||||
if input_tensor is None:
|
||||
img_input = Input(shape=input_shape)
|
||||
@@ -107,10 +118,16 @@ def VGG16(include_top=True, weights='imagenet',
|
||||
x = Flatten(name='flatten')(x)
|
||||
x = Dense(4096, activation='relu', name='fc1')(x)
|
||||
x = Dense(4096, activation='relu', name='fc2')(x)
|
||||
x = Dense(1000, activation='softmax', name='predictions')(x)
|
||||
x = Dense(classes, activation='softmax', name='predictions')(x)
|
||||
|
||||
# Create model
|
||||
model = Model(img_input, x)
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = img_input
|
||||
# Create model.
|
||||
model = Model(inputs, x, name='vgg16')
|
||||
|
||||
# load weights
|
||||
if weights == 'imagenet':
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''VGG19 model for Keras.
|
||||
"""VGG19 model for Keras.
|
||||
|
||||
# Reference:
|
||||
# Reference
|
||||
|
||||
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
|
||||
|
||||
'''
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -14,10 +14,11 @@ import warnings
|
||||
from ..models import Model
|
||||
from ..layers import Flatten, Dense, Input
|
||||
from ..layers import Convolution2D, MaxPooling2D
|
||||
from ..engine.topology import get_source_inputs
|
||||
from ..utils.layer_utils import convert_all_kernels_in_model
|
||||
from ..utils.data_utils import get_file
|
||||
from .. import backend as K
|
||||
from .imagenet_utils import decode_predictions, preprocess_input
|
||||
from .imagenet_utils import decode_predictions, preprocess_input, _obtain_input_shape
|
||||
|
||||
|
||||
TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_th_dim_ordering_th_kernels.h5'
|
||||
@@ -27,8 +28,9 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
|
||||
|
||||
def VGG19(include_top=True, weights='imagenet',
|
||||
input_tensor=None):
|
||||
'''Instantiate the VGG19 architecture,
|
||||
input_tensor=None, input_shape=None,
|
||||
classes=1000):
|
||||
"""Instantiate the VGG19 architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. Note that when using TensorFlow,
|
||||
for best performance you should set
|
||||
@@ -47,25 +49,34 @@ def VGG19(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 48.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
'''
|
||||
"""
|
||||
if weights not in {'imagenet', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
|
||||
if weights == 'imagenet' and include_top and classes != 1000:
|
||||
raise ValueError('If using `weights` as imagenet with `include_top`'
|
||||
' as true, `classes` should be 1000')
|
||||
# Determine proper input shape
|
||||
if K.image_dim_ordering() == 'th':
|
||||
if include_top:
|
||||
input_shape = (3, 224, 224)
|
||||
else:
|
||||
input_shape = (3, None, None)
|
||||
else:
|
||||
if include_top:
|
||||
input_shape = (224, 224, 3)
|
||||
else:
|
||||
input_shape = (None, None, 3)
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=224,
|
||||
min_size=48,
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
include_top=include_top)
|
||||
|
||||
if input_tensor is None:
|
||||
img_input = Input(shape=input_shape)
|
||||
@@ -110,10 +121,16 @@ def VGG19(include_top=True, weights='imagenet',
|
||||
x = Flatten(name='flatten')(x)
|
||||
x = Dense(4096, activation='relu', name='fc1')(x)
|
||||
x = Dense(4096, activation='relu', name='fc2')(x)
|
||||
x = Dense(1000, activation='softmax', name='predictions')(x)
|
||||
x = Dense(classes, activation='softmax', name='predictions')(x)
|
||||
|
||||
# Create model
|
||||
model = Model(img_input, x)
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = img_input
|
||||
# Create model.
|
||||
model = Model(inputs, x, name='vgg19')
|
||||
|
||||
# load weights
|
||||
if weights == 'imagenet':
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''Xception V1 model for Keras.
|
||||
"""Xception V1 model for Keras.
|
||||
|
||||
On ImageNet, this model gets to a top-1 validation accuracy of 0.790
|
||||
and a top-5 validation accuracy of 0.945.
|
||||
@@ -12,11 +12,11 @@ is also different (same as Inception V3).
|
||||
Also do note that this model is only available for the TensorFlow backend,
|
||||
due to its reliance on `SeparableConvolution` layers.
|
||||
|
||||
# Reference:
|
||||
# Reference
|
||||
|
||||
- [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357)
|
||||
|
||||
'''
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -25,9 +25,10 @@ import warnings
|
||||
from ..models import Model
|
||||
from ..layers import Dense, Input, BatchNormalization, Activation, merge
|
||||
from ..layers import Conv2D, SeparableConv2D, MaxPooling2D, GlobalAveragePooling2D
|
||||
from ..engine.topology import get_source_inputs
|
||||
from ..utils.data_utils import get_file
|
||||
from .. import backend as K
|
||||
from .imagenet_utils import decode_predictions
|
||||
from .imagenet_utils import decode_predictions, _obtain_input_shape
|
||||
|
||||
|
||||
TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels.h5'
|
||||
@@ -35,8 +36,9 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
|
||||
|
||||
def Xception(include_top=True, weights='imagenet',
|
||||
input_tensor=None):
|
||||
'''Instantiate the Xception architecture,
|
||||
input_tensor=None, input_shape=None,
|
||||
classes=1000):
|
||||
"""Instantiate the Xception architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. This model is available for TensorFlow only,
|
||||
and can only be used with inputs following the TensorFlow
|
||||
@@ -53,17 +55,31 @@ def Xception(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(299, 299, 3)`.
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 71.
|
||||
E.g. `(150, 150, 3)` would be one valid value.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
'''
|
||||
"""
|
||||
if weights not in {'imagenet', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
|
||||
if weights == 'imagenet' and include_top and classes != 1000:
|
||||
raise ValueError('If using `weights` as imagenet with `include_top`'
|
||||
' as true, `classes` should be 1000')
|
||||
|
||||
if K.backend() != 'tensorflow':
|
||||
raise Exception('The Xception model is only available with '
|
||||
'the TensorFlow backend.')
|
||||
raise RuntimeError('The Xception model is only available with '
|
||||
'the TensorFlow backend.')
|
||||
if K.image_dim_ordering() != 'tf':
|
||||
warnings.warn('The Xception model is only available for the '
|
||||
'input dimension ordering "tf" '
|
||||
@@ -80,10 +96,11 @@ def Xception(include_top=True, weights='imagenet',
|
||||
old_dim_ordering = None
|
||||
|
||||
# Determine proper input shape
|
||||
if include_top:
|
||||
input_shape = (299, 299, 3)
|
||||
else:
|
||||
input_shape = (None, None, 3)
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=299,
|
||||
min_size=71,
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
include_top=include_top)
|
||||
|
||||
if input_tensor is None:
|
||||
img_input = Input(shape=input_shape)
|
||||
@@ -181,10 +198,16 @@ def Xception(include_top=True, weights='imagenet',
|
||||
|
||||
if include_top:
|
||||
x = GlobalAveragePooling2D(name='avg_pool')(x)
|
||||
x = Dense(1000, activation='softmax', name='predictions')(x)
|
||||
x = Dense(classes, activation='softmax', name='predictions')(x)
|
||||
|
||||
# Create model
|
||||
model = Model(img_input, x)
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = img_input
|
||||
# Create model.
|
||||
model = Model(inputs, x, name='xception')
|
||||
|
||||
# load weights
|
||||
if weights == 'imagenet':
|
||||
|
||||
@@ -23,11 +23,8 @@ _keras_dir = os.path.join(_keras_base_dir, '.keras')
|
||||
if not os.path.exists(_keras_dir):
|
||||
os.makedirs(_keras_dir)
|
||||
|
||||
# Set theano as default backend for Windows users since tensorflow is not available for Windows yet.
|
||||
if os.name == 'nt':
|
||||
_BACKEND = 'theano'
|
||||
else:
|
||||
_BACKEND = 'tensorflow'
|
||||
# Default backend: TensorFlow.
|
||||
_BACKEND = 'tensorflow'
|
||||
|
||||
_config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json'))
|
||||
if os.path.exists(_config_path):
|
||||
@@ -35,10 +32,11 @@ if os.path.exists(_config_path):
|
||||
_floatx = _config.get('floatx', floatx())
|
||||
assert _floatx in {'float16', 'float32', 'float64'}
|
||||
_epsilon = _config.get('epsilon', epsilon())
|
||||
assert type(_epsilon) == float
|
||||
assert isinstance(_epsilon, float)
|
||||
_backend = _config.get('backend', _BACKEND)
|
||||
assert _backend in {'theano', 'tensorflow'}
|
||||
_image_dim_ordering = _config.get('image_dim_ordering', image_dim_ordering())
|
||||
_image_dim_ordering = _config.get('image_dim_ordering',
|
||||
image_dim_ordering())
|
||||
assert _image_dim_ordering in {'tf', 'th'}
|
||||
|
||||
set_floatx(_floatx)
|
||||
@@ -68,11 +66,11 @@ elif _BACKEND == 'tensorflow':
|
||||
sys.stderr.write('Using TensorFlow backend.\n')
|
||||
from .tensorflow_backend import *
|
||||
else:
|
||||
raise Exception('Unknown backend: ' + str(_BACKEND))
|
||||
raise ValueError('Unknown backend: ' + str(_BACKEND))
|
||||
|
||||
|
||||
def backend():
|
||||
'''Publicly accessible method
|
||||
"""Publicly accessible method
|
||||
for determining the current backend.
|
||||
'''
|
||||
"""
|
||||
return _BACKEND
|
||||
|
||||
+142
-14
@@ -11,58 +11,164 @@ _LEGACY_WEIGHT_ORDERING = False
|
||||
|
||||
|
||||
def epsilon():
|
||||
'''Returns the value of the fuzz
|
||||
"""Returns the value of the fuzz
|
||||
factor used in numeric expressions.
|
||||
'''
|
||||
|
||||
# Returns
|
||||
A float.
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> keras.backend.epsilon()
|
||||
1e-08
|
||||
```
|
||||
"""
|
||||
return _EPSILON
|
||||
|
||||
|
||||
def set_epsilon(e):
|
||||
'''Sets the value of the fuzz
|
||||
"""Sets the value of the fuzz
|
||||
factor used in numeric expressions.
|
||||
'''
|
||||
|
||||
# Arguments
|
||||
e: float. New value of epsilon.
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> from keras import backend as K
|
||||
>>> K.epsilon()
|
||||
1e-08
|
||||
>>> K.set_epsilon(1e-05)
|
||||
>>> K.epsilon()
|
||||
1e-05
|
||||
```
|
||||
"""
|
||||
global _EPSILON
|
||||
_EPSILON = e
|
||||
|
||||
|
||||
def floatx():
|
||||
'''Returns the default float type, as a string
|
||||
"""Returns the default float type, as a string
|
||||
(e.g. 'float16', 'float32', 'float64').
|
||||
'''
|
||||
|
||||
# Returns
|
||||
String, the current default float type.
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> keras.backend.floatx()
|
||||
'float32'
|
||||
```
|
||||
"""
|
||||
return _FLOATX
|
||||
|
||||
|
||||
def set_floatx(floatx):
|
||||
"""Sets the default float type.
|
||||
|
||||
# Arguments
|
||||
String: 'float16', 'float32', or 'float64'.
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> from keras import backend as K
|
||||
>>> K.floatx()
|
||||
'float32'
|
||||
>>> K.set_floatx('float16')
|
||||
>>> K.floatx()
|
||||
'float16'
|
||||
```
|
||||
"""
|
||||
global _FLOATX
|
||||
if floatx not in {'float16', 'float32', 'float64'}:
|
||||
raise Exception('Unknown floatx type: ' + str(floatx))
|
||||
raise ValueError('Unknown floatx type: ' + str(floatx))
|
||||
_FLOATX = str(floatx)
|
||||
|
||||
|
||||
def cast_to_floatx(x):
|
||||
'''Cast a Numpy array to floatx.
|
||||
'''
|
||||
"""Cast a Numpy array to the default Keras float type.
|
||||
|
||||
# Arguments
|
||||
x: Numpy array.
|
||||
|
||||
# Returns
|
||||
The same Numpy array, cast to its new type.
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> from keras import backend as K
|
||||
>>> K.floatx()
|
||||
'float32'
|
||||
>>> arr = numpy.array([1.0, 2.0], dtype='float64')
|
||||
>>> arr.dtype
|
||||
dtype('float64')
|
||||
>>> new_arr = K.cast_to_floatx(arr)
|
||||
>>> new_arr
|
||||
array([ 1., 2.], dtype=float32)
|
||||
>>> new_arr.dtype
|
||||
dtype('float32')
|
||||
```
|
||||
"""
|
||||
return np.asarray(x, dtype=_FLOATX)
|
||||
|
||||
|
||||
def image_dim_ordering():
|
||||
'''Returns the image dimension ordering
|
||||
"""Returns the default image dimension ordering
|
||||
convention ('th' or 'tf').
|
||||
'''
|
||||
|
||||
# Returns
|
||||
A string, either `'th'` or `'tf'`
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> keras.backend.image_dim_ordering()
|
||||
'th'
|
||||
```
|
||||
"""
|
||||
return _IMAGE_DIM_ORDERING
|
||||
|
||||
|
||||
def set_image_dim_ordering(dim_ordering):
|
||||
'''Sets the value of the image dimension
|
||||
"""Sets the value of the image dimension
|
||||
ordering convention ('th' or 'tf').
|
||||
'''
|
||||
|
||||
# Arguments
|
||||
dim_ordering: string. `'th'` or `'tf'`.
|
||||
|
||||
# Example
|
||||
```python
|
||||
>>> from keras import backend as K
|
||||
>>> K.image_dim_ordering()
|
||||
'th'
|
||||
>>> K.set_image_dim_ordering('tf')
|
||||
>>> K.image_dim_ordering()
|
||||
'tf'
|
||||
```
|
||||
"""
|
||||
global _IMAGE_DIM_ORDERING
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise Exception('Unknown dim_ordering:', dim_ordering)
|
||||
raise ValueError('Unknown dim_ordering:', dim_ordering)
|
||||
_IMAGE_DIM_ORDERING = str(dim_ordering)
|
||||
|
||||
|
||||
def get_uid(prefix=''):
|
||||
"""Provides a unique UID given a string prefix.
|
||||
|
||||
# Arguments
|
||||
prefix: string.
|
||||
|
||||
# Returns
|
||||
An integer.
|
||||
|
||||
# Example
|
||||
```
|
||||
>>> keras.backend.get_uid('dense')
|
||||
>>> 1
|
||||
>>> keras.backend.get_uid('dense')
|
||||
>>> 2
|
||||
```
|
||||
|
||||
"""
|
||||
_UID_PREFIXES[prefix] += 1
|
||||
return _UID_PREFIXES[prefix]
|
||||
|
||||
@@ -73,6 +179,28 @@ def reset_uids():
|
||||
|
||||
|
||||
def is_keras_tensor(x):
|
||||
"""Returns whether `x` is a Keras tensor.
|
||||
|
||||
# Arguments
|
||||
x: a potential tensor.
|
||||
|
||||
# Returns
|
||||
A boolean: whether the argument is a Keras tensor.
|
||||
|
||||
# Examples
|
||||
```python
|
||||
>>> from keras import backend as K
|
||||
>>> np_var = numpy.array([1, 2])
|
||||
>>> K.is_keras_tensor(np_var)
|
||||
False
|
||||
>>> keras_var = K.variable(np_var)
|
||||
>>> K.is_keras_tensor(keras_var) # A variable is not a Tensor.
|
||||
False
|
||||
>>> keras_placeholder = K.placeholder(shape=(2, 4, 5))
|
||||
>>> K.is_keras_tensor(keras_placeholder) # A placeholder is a Tensor.
|
||||
True
|
||||
```
|
||||
"""
|
||||
if hasattr(x, '_keras_shape'):
|
||||
return True
|
||||
else:
|
||||
|
||||
+1481
-462
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
+365
-183
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
+321
-170
@@ -9,53 +9,101 @@ import time
|
||||
import json
|
||||
import warnings
|
||||
|
||||
from collections import deque, OrderedDict, Iterable
|
||||
from collections import deque
|
||||
from collections import OrderedDict
|
||||
from collections import Iterable
|
||||
from .utils.generic_utils import Progbar
|
||||
from keras import backend as K
|
||||
from pkg_resources import parse_version
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
requests = None
|
||||
|
||||
if K.backend() == 'tensorflow':
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
class CallbackList(object):
|
||||
def __init__(self, callbacks=[], queue_length=10):
|
||||
"""Container abstracting a list of callbacks.
|
||||
|
||||
# Arguments
|
||||
callbacks: List of `Callback` instances.
|
||||
queue_length: Queue length for keeping
|
||||
running statistics over callback execution time.
|
||||
"""
|
||||
|
||||
def __init__(self, callbacks=None, queue_length=10):
|
||||
callbacks = callbacks or []
|
||||
self.callbacks = [c for c in callbacks]
|
||||
self.queue_length = queue_length
|
||||
|
||||
def append(self, callback):
|
||||
self.callbacks.append(callback)
|
||||
|
||||
def _set_params(self, params):
|
||||
def set_params(self, params):
|
||||
for callback in self.callbacks:
|
||||
callback._set_params(params)
|
||||
callback.set_params(params)
|
||||
|
||||
def _set_model(self, model):
|
||||
def set_model(self, model):
|
||||
for callback in self.callbacks:
|
||||
callback._set_model(model)
|
||||
callback.set_model(model)
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
def on_epoch_begin(self, epoch, logs=None):
|
||||
"""Called at the start of an epoch.
|
||||
|
||||
# Arguments
|
||||
epoch: integer, index of epoch.
|
||||
logs: dictionary of logs.
|
||||
"""
|
||||
logs = logs or {}
|
||||
for callback in self.callbacks:
|
||||
callback.on_epoch_begin(epoch, logs)
|
||||
self._delta_t_batch = 0.
|
||||
self._delta_ts_batch_begin = deque([], maxlen=self.queue_length)
|
||||
self._delta_ts_batch_end = deque([], maxlen=self.queue_length)
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
"""Called at the end of an epoch.
|
||||
|
||||
# Arguments
|
||||
epoch: integer, index of epoch.
|
||||
logs: dictionary of logs.
|
||||
"""
|
||||
logs = logs or {}
|
||||
for callback in self.callbacks:
|
||||
callback.on_epoch_end(epoch, logs)
|
||||
|
||||
def on_batch_begin(self, batch, logs={}):
|
||||
def on_batch_begin(self, batch, logs=None):
|
||||
"""Called right before processing a batch.
|
||||
|
||||
# Arguments
|
||||
batch: integer, index of batch within the current epoch.
|
||||
logs: dictionary of logs.
|
||||
"""
|
||||
logs = logs or {}
|
||||
t_before_callbacks = time.time()
|
||||
for callback in self.callbacks:
|
||||
callback.on_batch_begin(batch, logs)
|
||||
self._delta_ts_batch_begin.append(time.time() - t_before_callbacks)
|
||||
delta_t_median = np.median(self._delta_ts_batch_begin)
|
||||
if self._delta_t_batch > 0. and delta_t_median > 0.95 * \
|
||||
self._delta_t_batch and delta_t_median > 0.1:
|
||||
if (self._delta_t_batch > 0. and
|
||||
delta_t_median > 0.95 * self._delta_t_batch and
|
||||
delta_t_median > 0.1):
|
||||
warnings.warn('Method on_batch_begin() is slow compared '
|
||||
'to the batch update (%f). Check your callbacks.'
|
||||
% delta_t_median)
|
||||
self._t_enter_batch = time.time()
|
||||
|
||||
def on_batch_end(self, batch, logs={}):
|
||||
def on_batch_end(self, batch, logs=None):
|
||||
"""Called at the end of a batch.
|
||||
|
||||
# Arguments
|
||||
batch: integer, index of batch within the current epoch.
|
||||
logs: dictionary of logs.
|
||||
"""
|
||||
logs = logs or {}
|
||||
if not hasattr(self, '_t_enter_batch'):
|
||||
self._t_enter_batch = time.time()
|
||||
self._delta_t_batch = time.time() - self._t_enter_batch
|
||||
@@ -64,22 +112,35 @@ class CallbackList(object):
|
||||
callback.on_batch_end(batch, logs)
|
||||
self._delta_ts_batch_end.append(time.time() - t_before_callbacks)
|
||||
delta_t_median = np.median(self._delta_ts_batch_end)
|
||||
if self._delta_t_batch > 0. and (delta_t_median > 0.95 * self._delta_t_batch and delta_t_median > 0.1):
|
||||
if (self._delta_t_batch > 0. and
|
||||
(delta_t_median > 0.95 * self._delta_t_batch and delta_t_median > 0.1)):
|
||||
warnings.warn('Method on_batch_end() is slow compared '
|
||||
'to the batch update (%f). Check your callbacks.'
|
||||
% delta_t_median)
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
def on_train_begin(self, logs=None):
|
||||
"""Called at the beginning of training.
|
||||
|
||||
# Arguments
|
||||
logs: dictionary of logs.
|
||||
"""
|
||||
logs = logs or {}
|
||||
for callback in self.callbacks:
|
||||
callback.on_train_begin(logs)
|
||||
|
||||
def on_train_end(self, logs={}):
|
||||
def on_train_end(self, logs=None):
|
||||
"""Called at the end of training.
|
||||
|
||||
# Arguments
|
||||
logs: dictionary of logs.
|
||||
"""
|
||||
logs = logs or {}
|
||||
for callback in self.callbacks:
|
||||
callback.on_train_end(logs)
|
||||
|
||||
|
||||
class Callback(object):
|
||||
'''Abstract base class used to build new callbacks.
|
||||
"""Abstract base class used to build new callbacks.
|
||||
|
||||
# Properties
|
||||
params: dict. Training parameters
|
||||
@@ -103,47 +164,48 @@ class Callback(object):
|
||||
the number of samples in the current batch.
|
||||
on_batch_end: logs include `loss`, and optionally `acc`
|
||||
(if accuracy monitoring is enabled).
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def _set_params(self, params):
|
||||
def set_params(self, params):
|
||||
self.params = params
|
||||
|
||||
def _set_model(self, model):
|
||||
def set_model(self, model):
|
||||
self.model = model
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
def on_epoch_begin(self, epoch, logs=None):
|
||||
pass
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
pass
|
||||
|
||||
def on_batch_begin(self, batch, logs={}):
|
||||
def on_batch_begin(self, batch, logs=None):
|
||||
pass
|
||||
|
||||
def on_batch_end(self, batch, logs={}):
|
||||
def on_batch_end(self, batch, logs=None):
|
||||
pass
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
def on_train_begin(self, logs=None):
|
||||
pass
|
||||
|
||||
def on_train_end(self, logs={}):
|
||||
def on_train_end(self, logs=None):
|
||||
pass
|
||||
|
||||
|
||||
class BaseLogger(Callback):
|
||||
'''Callback that accumulates epoch averages of
|
||||
the metrics being monitored.
|
||||
"""Callback that accumulates epoch averages of metrics.
|
||||
|
||||
This callback is automatically applied to
|
||||
every Keras model.
|
||||
'''
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
This callback is automatically applied to every Keras model.
|
||||
"""
|
||||
|
||||
def on_epoch_begin(self, epoch, logs=None):
|
||||
self.seen = 0
|
||||
self.totals = {}
|
||||
|
||||
def on_batch_end(self, batch, logs={}):
|
||||
def on_batch_end(self, batch, logs=None):
|
||||
logs = logs or {}
|
||||
batch_size = logs.get('size', 0)
|
||||
self.seen += batch_size
|
||||
|
||||
@@ -153,32 +215,35 @@ class BaseLogger(Callback):
|
||||
else:
|
||||
self.totals[k] = v * batch_size
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
for k in self.params['metrics']:
|
||||
if k in self.totals:
|
||||
# make value available to next callbacks
|
||||
logs[k] = self.totals[k] / self.seen
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
if logs is not None:
|
||||
for k in self.params['metrics']:
|
||||
if k in self.totals:
|
||||
# Make value available to next callbacks.
|
||||
logs[k] = self.totals[k] / self.seen
|
||||
|
||||
|
||||
class ProgbarLogger(Callback):
|
||||
'''Callback that prints metrics to stdout.
|
||||
'''
|
||||
def on_train_begin(self, logs={}):
|
||||
"""Callback that prints metrics to stdout.
|
||||
"""
|
||||
|
||||
def on_train_begin(self, logs=None):
|
||||
self.verbose = self.params['verbose']
|
||||
self.nb_epoch = self.params['nb_epoch']
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
def on_epoch_begin(self, epoch, logs=None):
|
||||
if self.verbose:
|
||||
print('Epoch %d/%d' % (epoch + 1, self.nb_epoch))
|
||||
self.progbar = Progbar(target=self.params['nb_sample'],
|
||||
verbose=self.verbose)
|
||||
self.seen = 0
|
||||
|
||||
def on_batch_begin(self, batch, logs={}):
|
||||
def on_batch_begin(self, batch, logs=None):
|
||||
if self.seen < self.params['nb_sample']:
|
||||
self.log_values = []
|
||||
|
||||
def on_batch_end(self, batch, logs={}):
|
||||
def on_batch_end(self, batch, logs=None):
|
||||
logs = logs or {}
|
||||
batch_size = logs.get('size', 0)
|
||||
self.seen += batch_size
|
||||
|
||||
@@ -186,12 +251,13 @@ class ProgbarLogger(Callback):
|
||||
if k in logs:
|
||||
self.log_values.append((k, logs[k]))
|
||||
|
||||
# skip progbar update for the last batch;
|
||||
# will be handled by on_epoch_end
|
||||
# Skip progbar update for the last batch;
|
||||
# will be handled by on_epoch_end.
|
||||
if self.verbose and self.seen < self.params['nb_sample']:
|
||||
self.progbar.update(self.seen, self.log_values)
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
for k in self.params['metrics']:
|
||||
if k in logs:
|
||||
self.log_values.append((k, logs[k]))
|
||||
@@ -200,33 +266,34 @@ class ProgbarLogger(Callback):
|
||||
|
||||
|
||||
class History(Callback):
|
||||
'''Callback that records events
|
||||
into a `History` object.
|
||||
"""Callback that records events into a `History` object.
|
||||
|
||||
This callback is automatically applied to
|
||||
every Keras model. The `History` object
|
||||
gets returned by the `fit` method of models.
|
||||
'''
|
||||
def on_train_begin(self, logs={}):
|
||||
"""
|
||||
|
||||
def on_train_begin(self, logs=None):
|
||||
self.epoch = []
|
||||
self.history = {}
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
self.epoch.append(epoch)
|
||||
for k, v in logs.items():
|
||||
self.history.setdefault(k, []).append(v)
|
||||
|
||||
|
||||
class ModelCheckpoint(Callback):
|
||||
'''Save the model after every epoch.
|
||||
"""Save the model after every epoch.
|
||||
|
||||
`filepath` can contain named formatting options,
|
||||
which will be filled the value of `epoch` and
|
||||
keys in `logs` (passed in `on_epoch_end`).
|
||||
|
||||
For example: if `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`,
|
||||
then multiple files will be save with the epoch number and
|
||||
the validation loss.
|
||||
then the model checkpoints will be saved with the epoch number and
|
||||
the validation loss in the filename.
|
||||
|
||||
# Arguments
|
||||
filepath: string, path to save the model file.
|
||||
@@ -246,17 +313,20 @@ class ModelCheckpoint(Callback):
|
||||
save_weights_only: if True, then only the model's weights will be
|
||||
saved (`model.save_weights(filepath)`), else the full model
|
||||
is saved (`model.save(filepath)`).
|
||||
period: Interval (number of epochs) between checkpoints.
|
||||
"""
|
||||
|
||||
'''
|
||||
def __init__(self, filepath, monitor='val_loss', verbose=0,
|
||||
save_best_only=False, save_weights_only=False,
|
||||
mode='auto'):
|
||||
mode='auto', period=1):
|
||||
super(ModelCheckpoint, self).__init__()
|
||||
self.monitor = monitor
|
||||
self.verbose = verbose
|
||||
self.filepath = filepath
|
||||
self.save_best_only = save_best_only
|
||||
self.save_weights_only = save_weights_only
|
||||
self.period = period
|
||||
self.epochs_since_last_save = 0
|
||||
|
||||
if mode not in ['auto', 'min', 'max']:
|
||||
warnings.warn('ModelCheckpoint mode %s is unknown, '
|
||||
@@ -271,47 +341,51 @@ class ModelCheckpoint(Callback):
|
||||
self.monitor_op = np.greater
|
||||
self.best = -np.Inf
|
||||
else:
|
||||
if 'acc' in self.monitor:
|
||||
if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
|
||||
self.monitor_op = np.greater
|
||||
self.best = -np.Inf
|
||||
else:
|
||||
self.monitor_op = np.less
|
||||
self.best = np.Inf
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
filepath = self.filepath.format(epoch=epoch, **logs)
|
||||
if self.save_best_only:
|
||||
current = logs.get(self.monitor)
|
||||
if current is None:
|
||||
warnings.warn('Can save best model only with %s available, '
|
||||
'skipping.' % (self.monitor), RuntimeWarning)
|
||||
else:
|
||||
if self.monitor_op(current, self.best):
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: %s improved from %0.5f to %0.5f,'
|
||||
' saving model to %s'
|
||||
% (epoch, self.monitor, self.best,
|
||||
current, filepath))
|
||||
self.best = current
|
||||
if self.save_weights_only:
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
else:
|
||||
self.model.save(filepath, overwrite=True)
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
self.epochs_since_last_save += 1
|
||||
if self.epochs_since_last_save >= self.period:
|
||||
self.epochs_since_last_save = 0
|
||||
filepath = self.filepath.format(epoch=epoch, **logs)
|
||||
if self.save_best_only:
|
||||
current = logs.get(self.monitor)
|
||||
if current is None:
|
||||
warnings.warn('Can save best model only with %s available, '
|
||||
'skipping.' % (self.monitor), RuntimeWarning)
|
||||
else:
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: %s did not improve' %
|
||||
(epoch, self.monitor))
|
||||
else:
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: saving model to %s' % (epoch, filepath))
|
||||
if self.save_weights_only:
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
if self.monitor_op(current, self.best):
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: %s improved from %0.5f to %0.5f,'
|
||||
' saving model to %s'
|
||||
% (epoch, self.monitor, self.best,
|
||||
current, filepath))
|
||||
self.best = current
|
||||
if self.save_weights_only:
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
else:
|
||||
self.model.save(filepath, overwrite=True)
|
||||
else:
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: %s did not improve' %
|
||||
(epoch, self.monitor))
|
||||
else:
|
||||
self.model.save(filepath, overwrite=True)
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: saving model to %s' % (epoch, filepath))
|
||||
if self.save_weights_only:
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
else:
|
||||
self.model.save(filepath, overwrite=True)
|
||||
|
||||
|
||||
class EarlyStopping(Callback):
|
||||
'''Stop training when a monitored quantity has stopped improving.
|
||||
"""Stop training when a monitored quantity has stopped improving.
|
||||
|
||||
# Arguments
|
||||
monitor: quantity to be monitored.
|
||||
@@ -329,8 +403,10 @@ class EarlyStopping(Callback):
|
||||
monitored has stopped increasing; in `auto`
|
||||
mode, the direction is automatically inferred
|
||||
from the name of the monitored quantity.
|
||||
'''
|
||||
def __init__(self, monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto'):
|
||||
"""
|
||||
|
||||
def __init__(self, monitor='val_loss',
|
||||
min_delta=0, patience=0, verbose=0, mode='auto'):
|
||||
super(EarlyStopping, self).__init__()
|
||||
|
||||
self.monitor = monitor
|
||||
@@ -351,7 +427,7 @@ class EarlyStopping(Callback):
|
||||
elif mode == 'max':
|
||||
self.monitor_op = np.greater
|
||||
else:
|
||||
if 'acc' in self.monitor:
|
||||
if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
|
||||
self.monitor_op = np.greater
|
||||
else:
|
||||
self.monitor_op = np.less
|
||||
@@ -361,11 +437,11 @@ class EarlyStopping(Callback):
|
||||
else:
|
||||
self.min_delta *= -1
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
self.wait = 0 # Allow instances to be re-used
|
||||
def on_train_begin(self, logs=None):
|
||||
self.wait = 0 # Allow instances to be re-used
|
||||
self.best = np.Inf if self.monitor_op == np.less else -np.Inf
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
current = logs.get(self.monitor)
|
||||
if current is None:
|
||||
warnings.warn('Early stopping requires %s available!' %
|
||||
@@ -380,73 +456,86 @@ class EarlyStopping(Callback):
|
||||
self.model.stop_training = True
|
||||
self.wait += 1
|
||||
|
||||
def on_train_end(self, logs={}):
|
||||
def on_train_end(self, logs=None):
|
||||
if self.stopped_epoch > 0 and self.verbose > 0:
|
||||
print('Epoch %05d: early stopping' % (self.stopped_epoch))
|
||||
|
||||
|
||||
class RemoteMonitor(Callback):
|
||||
'''Callback used to stream events to a server.
|
||||
"""Callback used to stream events to a server.
|
||||
|
||||
Requires the `requests` library.
|
||||
Events are sent to `root + '/publish/epoch/end/'` by default. Calls are
|
||||
HTTP POST, with a `data` argument which is a
|
||||
JSON-encoded dictionary of event data.
|
||||
|
||||
# Arguments
|
||||
root: root url to which the events will be sent (at the end
|
||||
of every epoch). Events are sent to
|
||||
`root + '/publish/epoch/end/'` by default. Calls are
|
||||
HTTP POST, with a `data` argument which is a
|
||||
JSON-encoded dictionary of event data.
|
||||
'''
|
||||
root: String; root url of the target server.
|
||||
path: String; path relative to `root` to which the events will be sent.
|
||||
field: String; JSON field under which the data will be stored.
|
||||
headers: Dictionary; optional custom HTTP headers.
|
||||
Defaults to:
|
||||
`{'Accept': 'application/json',
|
||||
'Content-Type': 'application/json'}`
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
root='http://localhost:9000',
|
||||
path='/publish/epoch/end/',
|
||||
field='data'):
|
||||
field='data',
|
||||
headers=None):
|
||||
super(RemoteMonitor, self).__init__()
|
||||
if headers is None:
|
||||
headers = {'Accept': 'application/json',
|
||||
'Content-Type': 'application/json'}
|
||||
self.root = root
|
||||
self.path = path
|
||||
self.field = field
|
||||
self.headers = headers
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
import requests
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
if requests is None:
|
||||
raise ImportError('RemoteMonitor requires '
|
||||
'the `requests` library.')
|
||||
logs = logs or {}
|
||||
send = {}
|
||||
send['epoch'] = epoch
|
||||
for k, v in logs.items():
|
||||
send[k] = v
|
||||
try:
|
||||
requests.post(self.root + self.path,
|
||||
{self.field: json.dumps(send)})
|
||||
except:
|
||||
print('Warning: could not reach RemoteMonitor '
|
||||
'root server at ' + str(self.root))
|
||||
{self.field: json.dumps(send)},
|
||||
headers=self.headers)
|
||||
except requests.exceptions.RequestException:
|
||||
warnings.warn('Warning: could not reach RemoteMonitor '
|
||||
'root server at ' + str(self.root))
|
||||
|
||||
|
||||
class LearningRateScheduler(Callback):
|
||||
'''Learning rate scheduler.
|
||||
"""Learning rate scheduler.
|
||||
|
||||
# Arguments
|
||||
schedule: a function that takes an epoch index as input
|
||||
(integer, indexed from 0) and returns a new
|
||||
learning rate as output (float).
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, schedule):
|
||||
super(LearningRateScheduler, self).__init__()
|
||||
self.schedule = schedule
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
assert hasattr(self.model.optimizer, 'lr'), \
|
||||
'Optimizer must have a "lr" attribute.'
|
||||
def on_epoch_begin(self, epoch, logs=None):
|
||||
if not hasattr(self.model.optimizer, 'lr'):
|
||||
raise ValueError('Optimizer must have a "lr" attribute.')
|
||||
lr = self.schedule(epoch)
|
||||
|
||||
if not isinstance(lr, (float, np.float32, np.float64)):
|
||||
raise ValueError('The output of the "schedule" function '
|
||||
'should be float.')
|
||||
|
||||
K.set_value(self.model.optimizer.lr, lr)
|
||||
|
||||
|
||||
class TensorBoard(Callback):
|
||||
''' Tensorboard basic visualizations.
|
||||
"""Tensorboard basic visualizations.
|
||||
|
||||
This callback writes a log for TensorBoard, which allows
|
||||
you to visualize dynamic graphs of your training and test
|
||||
@@ -472,30 +561,33 @@ class TensorBoard(Callback):
|
||||
write_graph: whether to visualize the graph in Tensorboard.
|
||||
The log file can become quite large when
|
||||
write_graph is set to True.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False):
|
||||
def __init__(self, log_dir='./logs',
|
||||
histogram_freq=0,
|
||||
write_graph=True,
|
||||
write_images=False):
|
||||
super(TensorBoard, self).__init__()
|
||||
if K._BACKEND != 'tensorflow':
|
||||
raise Exception('TensorBoard callback only works '
|
||||
'with the TensorFlow backend.')
|
||||
if K.backend() != 'tensorflow':
|
||||
raise RuntimeError('TensorBoard callback only works '
|
||||
'with the TensorFlow backend.')
|
||||
self.log_dir = log_dir
|
||||
self.histogram_freq = histogram_freq
|
||||
self.merged = None
|
||||
self.write_graph = write_graph
|
||||
self.write_images = write_images
|
||||
|
||||
def _set_model(self, model):
|
||||
import tensorflow as tf
|
||||
import keras.backend.tensorflow_backend as KTF
|
||||
|
||||
def set_model(self, model):
|
||||
self.model = model
|
||||
self.sess = KTF.get_session()
|
||||
self.sess = K.get_session()
|
||||
if self.histogram_freq and self.merged is None:
|
||||
for layer in self.model.layers:
|
||||
|
||||
for weight in layer.weights:
|
||||
tf.histogram_summary(weight.name, weight)
|
||||
if hasattr(tf, 'histogram_summary'):
|
||||
tf.histogram_summary(weight.name, weight)
|
||||
else:
|
||||
tf.summary.histogram(weight.name, weight)
|
||||
|
||||
if self.write_images:
|
||||
w_img = tf.squeeze(weight)
|
||||
@@ -509,24 +601,42 @@ class TensorBoard(Callback):
|
||||
|
||||
w_img = tf.expand_dims(tf.expand_dims(w_img, 0), -1)
|
||||
|
||||
tf.image_summary(weight.name, w_img)
|
||||
if hasattr(tf, 'image_summary'):
|
||||
tf.image_summary(weight.name, w_img)
|
||||
else:
|
||||
tf.summary.image(weight.name, w_img)
|
||||
|
||||
if hasattr(layer, 'output'):
|
||||
tf.histogram_summary('{}_out'.format(layer.name),
|
||||
layer.output)
|
||||
self.merged = tf.merge_all_summaries()
|
||||
if hasattr(tf, 'histogram_summary'):
|
||||
tf.histogram_summary('{}_out'.format(layer.name),
|
||||
layer.output)
|
||||
else:
|
||||
tf.summary.histogram('{}_out'.format(layer.name),
|
||||
layer.output)
|
||||
|
||||
if hasattr(tf, 'merge_all_summaries'):
|
||||
self.merged = tf.merge_all_summaries()
|
||||
else:
|
||||
self.merged = tf.summary.merge_all()
|
||||
|
||||
if self.write_graph:
|
||||
if parse_version(tf.__version__) >= parse_version('0.8.0'):
|
||||
if hasattr(tf, 'summary') and hasattr(tf.summary, 'FileWriter'):
|
||||
self.writer = tf.summary.FileWriter(self.log_dir,
|
||||
self.sess.graph)
|
||||
elif parse_version(tf.__version__) >= parse_version('0.8.0'):
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph_def)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir)
|
||||
if hasattr(tf, 'summary') and hasattr(tf.summary, 'FileWriter'):
|
||||
self.writer = tf.summary.FileWriter(self.log_dir)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir)
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
import tensorflow as tf
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
|
||||
if self.model.validation_data and self.histogram_freq:
|
||||
if epoch % self.histogram_freq == 0:
|
||||
@@ -554,9 +664,12 @@ class TensorBoard(Callback):
|
||||
self.writer.add_summary(summary, epoch)
|
||||
self.writer.flush()
|
||||
|
||||
def on_train_end(self, _):
|
||||
self.writer.close()
|
||||
|
||||
|
||||
class ReduceLROnPlateau(Callback):
|
||||
'''Reduce learning rate when a metric has stopped improving.
|
||||
"""Reduce learning rate when a metric has stopped improving.
|
||||
|
||||
Models often benefit from reducing the learning rate by a factor
|
||||
of 2-10 once learning stagnates. This callback monitors a
|
||||
@@ -589,15 +702,16 @@ class ReduceLROnPlateau(Callback):
|
||||
cooldown: number of epochs to wait before resuming
|
||||
normal operation after lr has been reduced.
|
||||
min_lr: lower bound on the learning rate.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, monitor='val_loss', factor=0.1, patience=10,
|
||||
verbose=0, mode='auto', epsilon=1e-4, cooldown=0, min_lr=0):
|
||||
super(Callback, self).__init__()
|
||||
super(ReduceLROnPlateau, self).__init__()
|
||||
|
||||
self.monitor = monitor
|
||||
if factor >= 1.0:
|
||||
raise ValueError('ReduceLROnPlateau does not support a factor >= 1.0.')
|
||||
raise ValueError('ReduceLROnPlateau '
|
||||
'does not support a factor >= 1.0.')
|
||||
self.factor = factor
|
||||
self.min_lr = min_lr
|
||||
self.epsilon = epsilon
|
||||
@@ -609,14 +723,18 @@ class ReduceLROnPlateau(Callback):
|
||||
self.best = 0
|
||||
self.mode = mode
|
||||
self.monitor_op = None
|
||||
self.reset()
|
||||
self._reset()
|
||||
|
||||
def reset(self):
|
||||
def _reset(self):
|
||||
"""Resets wait counter and cooldown counter.
|
||||
"""
|
||||
if self.mode not in ['auto', 'min', 'max']:
|
||||
warnings.warn('Learning Rate Plateau Reducing mode %s is unknown, '
|
||||
'fallback to auto mode.' % (self.mode), RuntimeWarning)
|
||||
'fallback to auto mode.' % (self.mode),
|
||||
RuntimeWarning)
|
||||
self.mode = 'auto'
|
||||
if self.mode == 'min' or (self.mode == 'auto' and 'acc' not in self.monitor):
|
||||
if (self.mode == 'min' or
|
||||
(self.mode == 'auto' and 'acc' not in self.monitor)):
|
||||
self.monitor_op = lambda a, b: np.less(a, b - self.epsilon)
|
||||
self.best = np.Inf
|
||||
else:
|
||||
@@ -626,10 +744,11 @@ class ReduceLROnPlateau(Callback):
|
||||
self.wait = 0
|
||||
self.lr_epsilon = self.min_lr * 1e-4
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
self.reset()
|
||||
def on_train_begin(self, logs=None):
|
||||
self._reset()
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
logs['lr'] = K.get_value(self.model.optimizer.lr)
|
||||
current = logs.get(self.monitor)
|
||||
if current is None:
|
||||
@@ -661,7 +780,8 @@ class ReduceLROnPlateau(Callback):
|
||||
|
||||
|
||||
class CSVLogger(Callback):
|
||||
'''Callback that streams epoch results to a csv file.
|
||||
"""Callback that streams epoch results to a csv file.
|
||||
|
||||
Supports all values that can be represented as a string,
|
||||
including 1D iterables such as np.ndarray.
|
||||
|
||||
@@ -671,12 +791,12 @@ class CSVLogger(Callback):
|
||||
model.fit(X_train, Y_train, callbacks=[csv_logger])
|
||||
```
|
||||
|
||||
Arguments
|
||||
# Arguments
|
||||
filename: filename of the csv file, e.g. 'run/log.csv'.
|
||||
separator: string used to separate elements in the csv file.
|
||||
append: True: append if file exists (useful for continuing
|
||||
training). False: overwrite existing file,
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, filename, separator=',', append=False):
|
||||
self.sep = separator
|
||||
@@ -687,26 +807,29 @@ class CSVLogger(Callback):
|
||||
self.append_header = True
|
||||
super(CSVLogger, self).__init__()
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
def on_train_begin(self, logs=None):
|
||||
if self.append:
|
||||
if os.path.exists(self.filename):
|
||||
with open(self.filename) as f:
|
||||
self.append_header = len(f.readline()) == 0
|
||||
self.append_header = bool(len(f.readline()))
|
||||
self.csv_file = open(self.filename, 'a')
|
||||
else:
|
||||
self.csv_file = open(self.filename, 'w')
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
|
||||
def handle_value(k):
|
||||
is_zero_dim_ndarray = isinstance(k, np.ndarray) and k.ndim == 0
|
||||
if isinstance(k, Iterable) and not is_zero_dim_ndarray:
|
||||
return '"[%s]"' % (', '.join(map(lambda x: str(x), k)))
|
||||
return '"[%s]"' % (', '.join(map(str, k)))
|
||||
else:
|
||||
return k
|
||||
|
||||
if not self.writer:
|
||||
self.keys = sorted(logs.keys())
|
||||
self.writer = csv.DictWriter(self.csv_file, fieldnames=['epoch'] + self.keys)
|
||||
self.writer = csv.DictWriter(self.csv_file,
|
||||
fieldnames=['epoch'] + self.keys)
|
||||
if self.append_header:
|
||||
self.writer.writeheader()
|
||||
|
||||
@@ -715,7 +838,7 @@ class CSVLogger(Callback):
|
||||
self.writer.writerow(row_dict)
|
||||
self.csv_file.flush()
|
||||
|
||||
def on_train_end(self, logs={}):
|
||||
def on_train_end(self, logs=None):
|
||||
self.csv_file.close()
|
||||
|
||||
|
||||
@@ -723,11 +846,14 @@ class LambdaCallback(Callback):
|
||||
"""Callback for creating simple, custom callbacks on-the-fly.
|
||||
|
||||
This callback is constructed with anonymous functions that will be called
|
||||
at the appropiate time. Note that the callbacks expects positional
|
||||
at the appropriate time. Note that the callbacks expects positional
|
||||
arguments, as:
|
||||
- `on_epoch_begin` and `on_epoch_end` expect two positional arguments: `epoch`, `logs`
|
||||
- `on_batch_begin` and `on_batch_end` expect two positional arguments: `batch`, `logs`
|
||||
- `on_train_begin` and `on_train_end` expect one positional argument: `logs`
|
||||
- `on_epoch_begin` and `on_epoch_end` expect two positional arguments:
|
||||
`epoch`, `logs`
|
||||
- `on_batch_begin` and `on_batch_end` expect two positional arguments:
|
||||
`batch`, `logs`
|
||||
- `on_train_begin` and `on_train_end` expect one positional argument:
|
||||
`logs`
|
||||
|
||||
# Arguments
|
||||
on_epoch_begin: called at the beginning of every epoch.
|
||||
@@ -740,20 +866,27 @@ class LambdaCallback(Callback):
|
||||
# Example
|
||||
```python
|
||||
# Print the batch number at the beginning of every batch.
|
||||
batch_print_callback = LambdaCallback(on_batch_begin=lambda batch, logs: print(batch))
|
||||
batch_print_callback = LambdaCallback(
|
||||
on_batch_begin=lambda batch,logs: print(batch))
|
||||
|
||||
# Plot the loss after every epoch.
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
plot_loss_callback = LambdaCallback(on_epoch_end=lambda epoch, logs: plt.plot(np.arange(epoch), logs['loss']))
|
||||
plot_loss_callback = LambdaCallback(
|
||||
on_epoch_end=lambda epoch, logs: plt.plot(np.arange(epoch),
|
||||
logs['loss']))
|
||||
|
||||
# Terminate some processes after having finished model training.
|
||||
processes = ...
|
||||
cleanup_callback = LambdaCallback(on_train_end=lambda logs: [p.terminate() for p in processes if p.is_alive()])
|
||||
cleanup_callback = LambdaCallback(
|
||||
on_train_end=lambda logs: [
|
||||
p.terminate() for p in processes if p.is_alive()])
|
||||
|
||||
model.fit(..., callbacks=[batch_print_callback, plot_loss_callback, cleanup_callback])
|
||||
model.fit(...,
|
||||
callbacks=[batch_print_callback,
|
||||
plot_loss_callback,
|
||||
cleanup_callback])
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@@ -764,11 +897,29 @@ class LambdaCallback(Callback):
|
||||
on_train_begin=None,
|
||||
on_train_end=None,
|
||||
**kwargs):
|
||||
super(Callback, self).__init__()
|
||||
super(LambdaCallback, self).__init__()
|
||||
self.__dict__.update(kwargs)
|
||||
self.on_epoch_begin = on_epoch_begin if on_epoch_begin else lambda epoch, logs: None
|
||||
self.on_epoch_end = on_epoch_end if on_epoch_end else lambda epoch, logs: None
|
||||
self.on_batch_begin = on_batch_begin if on_batch_begin else lambda batch, logs: None
|
||||
self.on_batch_end = on_batch_end if on_batch_end else lambda batch, logs: None
|
||||
self.on_train_begin = on_train_begin if on_train_begin else lambda logs: None
|
||||
self.on_train_end = on_train_end if on_train_end else lambda logs: None
|
||||
if on_epoch_begin is not None:
|
||||
self.on_epoch_begin = on_epoch_begin
|
||||
else:
|
||||
self.on_epoch_begin = lambda epoch, logs: None
|
||||
if on_epoch_end is not None:
|
||||
self.on_epoch_end = on_epoch_end
|
||||
else:
|
||||
self.on_epoch_end = lambda epoch, logs: None
|
||||
if on_batch_begin is not None:
|
||||
self.on_batch_begin = on_batch_begin
|
||||
else:
|
||||
self.on_batch_begin = lambda batch, logs: None
|
||||
if on_batch_end is not None:
|
||||
self.on_batch_end = on_batch_end
|
||||
else:
|
||||
self.on_batch_end = lambda batch, logs: None
|
||||
if on_train_begin is not None:
|
||||
self.on_train_begin = on_train_begin
|
||||
else:
|
||||
self.on_train_begin = lambda logs: None
|
||||
if on_train_end is not None:
|
||||
self.on_train_end = on_train_end
|
||||
else:
|
||||
self.on_train_end = lambda logs: None
|
||||
|
||||
+43
-33
@@ -1,8 +1,10 @@
|
||||
from __future__ import absolute_import
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
|
||||
class Constraint(object):
|
||||
|
||||
def __call__(self, p):
|
||||
return p
|
||||
|
||||
@@ -11,26 +13,29 @@ class Constraint(object):
|
||||
|
||||
|
||||
class MaxNorm(Constraint):
|
||||
'''Constrain the weights incident to each hidden unit to have a norm less than or equal to a desired value.
|
||||
"""MaxNorm weight constraint.
|
||||
|
||||
Constrains the weights incident to each hidden unit
|
||||
to have a norm less than or equal to a desired value.
|
||||
|
||||
# Arguments
|
||||
m: the maximum norm for the incoming weights.
|
||||
axis: integer, axis along which to calculate weight norms. For instance,
|
||||
in a `Dense` layer the weight matrix has shape (input_dim, output_dim),
|
||||
set `axis` to `0` to constrain each weight vector of length (input_dim).
|
||||
In a `MaxoutDense` layer the weight tensor has shape (nb_feature, input_dim, output_dim),
|
||||
set `axis` to `1` to constrain each weight vector of length (input_dim),
|
||||
i.e. constrain the filters incident to the `max` operation.
|
||||
In a `Convolution2D` layer with the Theano backend, the weight tensor
|
||||
has shape (nb_filter, stack_size, nb_row, nb_col), set `axis` to `[1,2,3]`
|
||||
to constrain the weights of each filter tensor of size (stack_size, nb_row, nb_col).
|
||||
In a `Convolution2D` layer with the TensorFlow backend, the weight tensor
|
||||
has shape (nb_row, nb_col, stack_size, nb_filter), set `axis` to `[0,1,2]`
|
||||
to constrain the weights of each filter tensor of size (nb_row, nb_col, stack_size).
|
||||
axis: integer, axis along which to calculate weight norms.
|
||||
For instance, in a `Dense` layer the weight matrix
|
||||
has shape `(input_dim, output_dim)`,
|
||||
set `axis` to `0` to constrain each weight vector
|
||||
of length `(input_dim,)`.
|
||||
In a `Convolution2D` layer with `dim_ordering="tf"`,
|
||||
the weight tensor has shape
|
||||
`(rows, cols, input_depth, output_depth)`,
|
||||
set `axis` to `[0, 1, 2]`
|
||||
to constrain the weights of each filter tensor of size
|
||||
`(rows, cols, input_depth)`.
|
||||
|
||||
# References
|
||||
- [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, m=2, axis=0):
|
||||
self.m = m
|
||||
self.axis = axis
|
||||
@@ -38,7 +43,7 @@ class MaxNorm(Constraint):
|
||||
def __call__(self, p):
|
||||
norms = K.sqrt(K.sum(K.square(p), axis=self.axis, keepdims=True))
|
||||
desired = K.clip(norms, 0, self.m)
|
||||
p = p * (desired / (K.epsilon() + norms))
|
||||
p *= (desired / (K.epsilon() + norms))
|
||||
return p
|
||||
|
||||
def get_config(self):
|
||||
@@ -48,46 +53,51 @@ class MaxNorm(Constraint):
|
||||
|
||||
|
||||
class NonNeg(Constraint):
|
||||
'''Constrain the weights to be non-negative.
|
||||
'''
|
||||
"""Constrains the weights to be non-negative.
|
||||
"""
|
||||
|
||||
def __call__(self, p):
|
||||
p *= K.cast(p >= 0., K.floatx())
|
||||
return p
|
||||
|
||||
|
||||
class UnitNorm(Constraint):
|
||||
'''Constrain the weights incident to each hidden unit to have unit norm.
|
||||
"""Constrains the weights incident to each hidden unit to have unit norm.
|
||||
|
||||
# Arguments
|
||||
axis: integer, axis along which to calculate weight norms. For instance,
|
||||
in a `Dense` layer the weight matrix has shape (input_dim, output_dim),
|
||||
set `axis` to `0` to constrain each weight vector of length (input_dim).
|
||||
In a `MaxoutDense` layer the weight tensor has shape (nb_feature, input_dim, output_dim),
|
||||
set `axis` to `1` to constrain each weight vector of length (input_dim),
|
||||
i.e. constrain the filters incident to the `max` operation.
|
||||
In a `Convolution2D` layer with the Theano backend, the weight tensor
|
||||
has shape (nb_filter, stack_size, nb_row, nb_col), set `axis` to `[1,2,3]`
|
||||
to constrain the weights of each filter tensor of size (stack_size, nb_row, nb_col).
|
||||
In a `Convolution2D` layer with the TensorFlow backend, the weight tensor
|
||||
has shape (nb_row, nb_col, stack_size, nb_filter), set `axis` to `[0,1,2]`
|
||||
to constrain the weights of each filter tensor of size (nb_row, nb_col, stack_size).
|
||||
'''
|
||||
axis: integer, axis along which to calculate weight norms.
|
||||
For instance, in a `Dense` layer the weight matrix
|
||||
has shape `(input_dim, output_dim)`,
|
||||
set `axis` to `0` to constrain each weight vector
|
||||
of length `(input_dim,)`.
|
||||
In a `Convolution2D` layer with `dim_ordering="tf"`,
|
||||
the weight tensor has shape
|
||||
`(rows, cols, input_depth, output_depth)`,
|
||||
set `axis` to `[0, 1, 2]`
|
||||
to constrain the weights of each filter tensor of size
|
||||
`(rows, cols, input_depth)`.
|
||||
"""
|
||||
|
||||
def __init__(self, axis=0):
|
||||
self.axis = axis
|
||||
|
||||
def __call__(self, p):
|
||||
return p / (K.epsilon() + K.sqrt(K.sum(K.square(p), axis=self.axis, keepdims=True)))
|
||||
return p / (K.epsilon() + K.sqrt(K.sum(K.square(p),
|
||||
axis=self.axis,
|
||||
keepdims=True)))
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
'axis': self.axis}
|
||||
|
||||
|
||||
# Aliases.
|
||||
|
||||
maxnorm = MaxNorm
|
||||
nonneg = NonNeg
|
||||
unitnorm = UnitNorm
|
||||
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
def get(identifier, kwargs=None):
|
||||
return get_from_module(identifier, globals(), 'constraint',
|
||||
instantiate=True, kwargs=kwargs)
|
||||
|
||||
@@ -5,18 +5,28 @@ from six.moves import cPickle
|
||||
|
||||
|
||||
def load_batch(fpath, label_key='labels'):
|
||||
"""Internal utility for parsing CIFAR data.
|
||||
|
||||
# Arguments
|
||||
fpath: path the file to parse.
|
||||
label_key: key for label data in the retrieve
|
||||
dictionary.
|
||||
|
||||
# Returns
|
||||
A tuple `(data, labels)`.
|
||||
"""
|
||||
f = open(fpath, 'rb')
|
||||
if sys.version_info < (3,):
|
||||
d = cPickle.load(f)
|
||||
else:
|
||||
d = cPickle.load(f, encoding="bytes")
|
||||
d = cPickle.load(f, encoding='bytes')
|
||||
# decode utf8
|
||||
d_decoded = {}
|
||||
for k, v in d.items():
|
||||
d_decoded[k.decode("utf8")] = v
|
||||
d_decoded[k.decode('utf8')] = v
|
||||
d = d_decoded
|
||||
f.close()
|
||||
data = d["data"]
|
||||
data = d['data']
|
||||
labels = d[label_key]
|
||||
|
||||
data = data.reshape(data.shape[0], 3, 32, 32)
|
||||
|
||||
@@ -7,29 +7,34 @@ import os
|
||||
|
||||
|
||||
def load_data():
|
||||
dirname = "cifar-10-batches-py"
|
||||
origin = "http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
|
||||
"""Loads CIFAR10 dataset.
|
||||
|
||||
# Returns
|
||||
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
|
||||
"""
|
||||
dirname = 'cifar-10-batches-py'
|
||||
origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
|
||||
path = get_file(dirname, origin=origin, untar=True)
|
||||
|
||||
nb_train_samples = 50000
|
||||
|
||||
X_train = np.zeros((nb_train_samples, 3, 32, 32), dtype="uint8")
|
||||
y_train = np.zeros((nb_train_samples,), dtype="uint8")
|
||||
x_train = np.zeros((nb_train_samples, 3, 32, 32), dtype='uint8')
|
||||
y_train = np.zeros((nb_train_samples,), dtype='uint8')
|
||||
|
||||
for i in range(1, 6):
|
||||
fpath = os.path.join(path, 'data_batch_' + str(i))
|
||||
data, labels = load_batch(fpath)
|
||||
X_train[(i - 1) * 10000: i * 10000, :, :, :] = data
|
||||
x_train[(i - 1) * 10000: i * 10000, :, :, :] = data
|
||||
y_train[(i - 1) * 10000: i * 10000] = labels
|
||||
|
||||
fpath = os.path.join(path, 'test_batch')
|
||||
X_test, y_test = load_batch(fpath)
|
||||
x_test, y_test = load_batch(fpath)
|
||||
|
||||
y_train = np.reshape(y_train, (len(y_train), 1))
|
||||
y_test = np.reshape(y_test, (len(y_test), 1))
|
||||
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
X_train = X_train.transpose(0, 2, 3, 1)
|
||||
X_test = X_test.transpose(0, 2, 3, 1)
|
||||
x_train = x_train.transpose(0, 2, 3, 1)
|
||||
x_test = x_test.transpose(0, 2, 3, 1)
|
||||
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
return (x_train, y_train), (x_test, y_test)
|
||||
|
||||
@@ -7,24 +7,35 @@ import os
|
||||
|
||||
|
||||
def load_data(label_mode='fine'):
|
||||
if label_mode not in ['fine', 'coarse']:
|
||||
raise Exception('label_mode must be one of "fine" "coarse".')
|
||||
"""Loads CIFAR100 dataset.
|
||||
|
||||
dirname = "cifar-100-python"
|
||||
origin = "http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
|
||||
# Arguments
|
||||
label_mode: one of "fine", "coarse".
|
||||
|
||||
# Returns
|
||||
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid `label_mode`.
|
||||
"""
|
||||
if label_mode not in ['fine', 'coarse']:
|
||||
raise ValueError('label_mode must be one of "fine" "coarse".')
|
||||
|
||||
dirname = 'cifar-100-python'
|
||||
origin = 'http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
|
||||
path = get_file(dirname, origin=origin, untar=True)
|
||||
|
||||
fpath = os.path.join(path, 'train')
|
||||
X_train, y_train = load_batch(fpath, label_key=label_mode+'_labels')
|
||||
x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels')
|
||||
|
||||
fpath = os.path.join(path, 'test')
|
||||
X_test, y_test = load_batch(fpath, label_key=label_mode+'_labels')
|
||||
x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels')
|
||||
|
||||
y_train = np.reshape(y_train, (len(y_train), 1))
|
||||
y_test = np.reshape(y_test, (len(y_test), 1))
|
||||
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
X_train = X_train.transpose(0, 2, 3, 1)
|
||||
X_test = X_test.transpose(0, 2, 3, 1)
|
||||
x_train = x_train.transpose(0, 2, 3, 1)
|
||||
x_test = x_test.transpose(0, 2, 3, 1)
|
||||
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
return (x_train, y_train), (x_test, y_test)
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
from ..utils.data_utils import *
|
||||
import warnings
|
||||
|
||||
warnings.warn('data_utils has been moved to keras.utils.data_utils.')
|
||||
+42
-25
@@ -10,9 +10,10 @@ import sys
|
||||
def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0,
|
||||
maxlen=None, seed=113,
|
||||
start_char=1, oov_char=2, index_from=3):
|
||||
'''
|
||||
"""Loads the IMDB dataset.
|
||||
|
||||
# Arguments
|
||||
path: where to store the data (in `/.keras/dataset`)
|
||||
path: where to cache the data (relative to `~/.keras/dataset`).
|
||||
nb_words: max number of words to include. Words are ranked
|
||||
by how often they occur (in the training set) and only
|
||||
the most frequent words are kept
|
||||
@@ -26,12 +27,19 @@ def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0,
|
||||
or `skip_top` limit will be replaced with this character.
|
||||
index_from: index actual words with this index and higher.
|
||||
|
||||
# Returns
|
||||
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
|
||||
|
||||
# Raises
|
||||
ValueError: in case `maxlen` is so low
|
||||
that no input sequence could be kept.
|
||||
|
||||
Note that the 'out of vocabulary' character is only used for
|
||||
words that were present in the training set but are not included
|
||||
because they're not making the `nb_words` cut here.
|
||||
Words that were not seen in the trining set but are in the test set
|
||||
have simply been skipped.
|
||||
'''
|
||||
"""
|
||||
path = get_file(path,
|
||||
origin='https://s3.amazonaws.com/text-datasets/imdb_full.pkl',
|
||||
md5_hash='d091312047c43cf9e4e38fef92437263')
|
||||
@@ -54,54 +62,63 @@ def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0,
|
||||
np.random.seed(seed * 2)
|
||||
np.random.shuffle(labels_test)
|
||||
|
||||
X = x_train + x_test
|
||||
xs = x_train + x_test
|
||||
labels = labels_train + labels_test
|
||||
|
||||
if start_char is not None:
|
||||
X = [[start_char] + [w + index_from for w in x] for x in X]
|
||||
xs = [[start_char] + [w + index_from for w in x] for x in xs]
|
||||
elif index_from:
|
||||
X = [[w + index_from for w in x] for x in X]
|
||||
xs = [[w + index_from for w in x] for x in xs]
|
||||
|
||||
if maxlen:
|
||||
new_X = []
|
||||
new_xs = []
|
||||
new_labels = []
|
||||
for x, y in zip(X, labels):
|
||||
for x, y in zip(xs, labels):
|
||||
if len(x) < maxlen:
|
||||
new_X.append(x)
|
||||
new_xs.append(x)
|
||||
new_labels.append(y)
|
||||
X = new_X
|
||||
xs = new_xs
|
||||
labels = new_labels
|
||||
if not X:
|
||||
raise Exception('After filtering for sequences shorter than maxlen=' +
|
||||
str(maxlen) + ', no sequence was kept. '
|
||||
'Increase maxlen.')
|
||||
if not xs:
|
||||
raise ValueError('After filtering for sequences shorter than maxlen=' +
|
||||
str(maxlen) + ', no sequence was kept. '
|
||||
'Increase maxlen.')
|
||||
if not nb_words:
|
||||
nb_words = max([max(x) for x in X])
|
||||
nb_words = max([max(x) for x in xs])
|
||||
|
||||
# by convention, use 2 as OOV word
|
||||
# reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV)
|
||||
# reserve 'index_from' (=3 by default) characters:
|
||||
# 0 (padding), 1 (start), 2 (OOV)
|
||||
if oov_char is not None:
|
||||
X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X]
|
||||
xs = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in xs]
|
||||
else:
|
||||
nX = []
|
||||
for x in X:
|
||||
new_xs = []
|
||||
for x in xs:
|
||||
nx = []
|
||||
for w in x:
|
||||
if (w >= nb_words or w < skip_top):
|
||||
if w >= nb_words or w < skip_top:
|
||||
nx.append(w)
|
||||
nX.append(nx)
|
||||
X = nX
|
||||
new_xs.append(nx)
|
||||
xs = new_xs
|
||||
|
||||
X_train = np.array(X[:len(x_train)])
|
||||
x_train = np.array(xs[:len(x_train)])
|
||||
y_train = np.array(labels[:len(x_train)])
|
||||
|
||||
X_test = np.array(X[len(x_train):])
|
||||
x_test = np.array(xs[len(x_train):])
|
||||
y_test = np.array(labels[len(x_train):])
|
||||
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
return (x_train, y_train), (x_test, y_test)
|
||||
|
||||
|
||||
def get_word_index(path='imdb_word_index.pkl'):
|
||||
"""Retrieves the dictionary mapping word indices back to words.
|
||||
|
||||
# Arguments
|
||||
path: where to cache the data (relative to `~/.keras/dataset`).
|
||||
|
||||
# Returns
|
||||
The word index dictionary.
|
||||
"""
|
||||
path = get_file(path,
|
||||
origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.pkl',
|
||||
md5_hash='72d94b01291be4ff843198d3b0e1e4d7')
|
||||
|
||||
@@ -5,6 +5,15 @@ import sys
|
||||
|
||||
|
||||
def load_data(path='mnist.pkl.gz'):
|
||||
"""Loads the MNIST dataset.
|
||||
|
||||
# Arguments
|
||||
path: path where to cache the dataset locally
|
||||
(relative to ~/.keras/datasets).
|
||||
|
||||
# Returns
|
||||
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
|
||||
"""
|
||||
path = get_file(path, origin='https://s3.amazonaws.com/img-datasets/mnist.pkl.gz')
|
||||
|
||||
if path.endswith('.gz'):
|
||||
@@ -18,4 +27,4 @@ def load_data(path='mnist.pkl.gz'):
|
||||
data = cPickle.load(f, encoding='bytes')
|
||||
|
||||
f.close()
|
||||
return data # (X_train, y_train), (X_test, y_test)
|
||||
return data # (x_train, y_train), (x_test, y_test)
|
||||
|
||||
+36
-24
@@ -10,10 +10,10 @@ import sys
|
||||
def load_data(path='reuters.pkl', nb_words=None, skip_top=0,
|
||||
maxlen=None, test_split=0.2, seed=113,
|
||||
start_char=1, oov_char=2, index_from=3):
|
||||
'''Loads the Reuters newswire classification dataset.
|
||||
"""Loads the Reuters newswire classification dataset.
|
||||
|
||||
# Arguments
|
||||
path: where to store the data (in `/.keras/dataset`)
|
||||
path: where to cache the data (relative to `~/.keras/dataset`).
|
||||
nb_words: max number of words to include. Words are ranked
|
||||
by how often they occur (in the training set) and only
|
||||
the most frequent words are kept
|
||||
@@ -28,65 +28,77 @@ def load_data(path='reuters.pkl', nb_words=None, skip_top=0,
|
||||
or `skip_top` limit will be replaced with this character.
|
||||
index_from: index actual words with this index and higher.
|
||||
|
||||
# Returns
|
||||
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
|
||||
|
||||
Note that the 'out of vocabulary' character is only used for
|
||||
words that were present in the training set but are not included
|
||||
because they're not making the `nb_words` cut here.
|
||||
Words that were not seen in the trining set but are in the test set
|
||||
have simply been skipped.
|
||||
'''
|
||||
"""
|
||||
|
||||
path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters.pkl')
|
||||
f = open(path, 'rb')
|
||||
X, labels = cPickle.load(f)
|
||||
xs, labels = cPickle.load(f)
|
||||
f.close()
|
||||
|
||||
np.random.seed(seed)
|
||||
np.random.shuffle(X)
|
||||
np.random.shuffle(xs)
|
||||
np.random.seed(seed)
|
||||
np.random.shuffle(labels)
|
||||
|
||||
if start_char is not None:
|
||||
X = [[start_char] + [w + index_from for w in x] for x in X]
|
||||
xs = [[start_char] + [w + index_from for w in x] for x in xs]
|
||||
elif index_from:
|
||||
X = [[w + index_from for w in x] for x in X]
|
||||
xs = [[w + index_from for w in x] for x in xs]
|
||||
|
||||
if maxlen:
|
||||
new_X = []
|
||||
new_xs = []
|
||||
new_labels = []
|
||||
for x, y in zip(X, labels):
|
||||
for x, y in zip(xs, labels):
|
||||
if len(x) < maxlen:
|
||||
new_X.append(x)
|
||||
new_xs.append(x)
|
||||
new_labels.append(y)
|
||||
X = new_X
|
||||
xs = new_xs
|
||||
labels = new_labels
|
||||
|
||||
if not nb_words:
|
||||
nb_words = max([max(x) for x in X])
|
||||
nb_words = max([max(x) for x in xs])
|
||||
|
||||
# by convention, use 2 as OOV word
|
||||
# reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV)
|
||||
# reserve 'index_from' (=3 by default) characters:
|
||||
# 0 (padding), 1 (start), 2 (OOV)
|
||||
if oov_char is not None:
|
||||
X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X]
|
||||
xs = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in xs]
|
||||
else:
|
||||
nX = []
|
||||
for x in X:
|
||||
new_xs = []
|
||||
for x in xs:
|
||||
nx = []
|
||||
for w in x:
|
||||
if (w >= nb_words or w < skip_top):
|
||||
if w >= nb_words or w < skip_top:
|
||||
nx.append(w)
|
||||
nX.append(nx)
|
||||
X = nX
|
||||
new_xs.append(nx)
|
||||
xs = new_xs
|
||||
|
||||
X_train = X[:int(len(X) * (1 - test_split))]
|
||||
y_train = labels[:int(len(X) * (1 - test_split))]
|
||||
x_train = xs[:int(len(xs) * (1 - test_split))]
|
||||
y_train = labels[:int(len(xs) * (1 - test_split))]
|
||||
|
||||
X_test = X[int(len(X) * (1 - test_split)):]
|
||||
y_test = labels[int(len(X) * (1 - test_split)):]
|
||||
x_test = xs[int(len(xs) * (1 - test_split)):]
|
||||
y_test = labels[int(len(xs) * (1 - test_split)):]
|
||||
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
return (x_train, y_train), (x_test, y_test)
|
||||
|
||||
|
||||
def get_word_index(path='reuters_word_index.pkl'):
|
||||
"""Retrieves the dictionary mapping word indices back to words.
|
||||
|
||||
# Arguments
|
||||
path: where to cache the data (relative to `~/.keras/dataset`).
|
||||
|
||||
# Returns
|
||||
The word index dictionary.
|
||||
"""
|
||||
path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl')
|
||||
f = open(path, 'rb')
|
||||
|
||||
|
||||
+599
-407
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
+648
-548
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
+33
-19
@@ -9,7 +9,7 @@ def get_fans(shape, dim_ordering='th'):
|
||||
fan_in = shape[0]
|
||||
fan_out = shape[1]
|
||||
elif len(shape) == 4 or len(shape) == 5:
|
||||
# assuming convolution kernels (2D or 3D).
|
||||
# Assuming convolution kernels (2D or 3D).
|
||||
# TH kernel shape: (depth, input_depth, ...)
|
||||
# TF kernel shape: (..., input_depth, depth)
|
||||
if dim_ordering == 'th':
|
||||
@@ -23,32 +23,38 @@ def get_fans(shape, dim_ordering='th'):
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering: ' + dim_ordering)
|
||||
else:
|
||||
# no specific assumptions
|
||||
# No specific assumptions.
|
||||
fan_in = np.sqrt(np.prod(shape))
|
||||
fan_out = np.sqrt(np.prod(shape))
|
||||
return fan_in, fan_out
|
||||
|
||||
|
||||
def uniform(shape, scale=0.05, name=None):
|
||||
def uniform(shape, scale=0.05, name=None, dim_ordering='th'):
|
||||
return K.random_uniform_variable(shape, -scale, scale, name=name)
|
||||
|
||||
|
||||
def normal(shape, scale=0.05, name=None):
|
||||
def normal(shape, scale=0.05, name=None, dim_ordering='th'):
|
||||
return K.random_normal_variable(shape, 0.0, scale, name=name)
|
||||
|
||||
|
||||
def lecun_uniform(shape, name=None, dim_ordering='th'):
|
||||
''' Reference: LeCun 98, Efficient Backprop
|
||||
"""LeCun uniform variance scaling initializer.
|
||||
|
||||
# References
|
||||
LeCun 98, Efficient Backprop,
|
||||
http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf
|
||||
'''
|
||||
"""
|
||||
fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering)
|
||||
scale = np.sqrt(3. / fan_in)
|
||||
return uniform(shape, scale, name=name)
|
||||
|
||||
|
||||
def glorot_normal(shape, name=None, dim_ordering='th'):
|
||||
''' Reference: Glorot & Bengio, AISTATS 2010
|
||||
'''
|
||||
"""Glorot normal variance scaling initializer.
|
||||
|
||||
# References
|
||||
Glorot & Bengio, AISTATS 2010
|
||||
"""
|
||||
fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering)
|
||||
s = np.sqrt(2. / (fan_in + fan_out))
|
||||
return normal(shape, s, name=name)
|
||||
@@ -61,44 +67,52 @@ def glorot_uniform(shape, name=None, dim_ordering='th'):
|
||||
|
||||
|
||||
def he_normal(shape, name=None, dim_ordering='th'):
|
||||
''' Reference: He et al., http://arxiv.org/abs/1502.01852
|
||||
'''
|
||||
"""He normal variance scaling initializer.
|
||||
|
||||
# References
|
||||
He et al., http://arxiv.org/abs/1502.01852
|
||||
"""
|
||||
fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering)
|
||||
s = np.sqrt(2. / fan_in)
|
||||
return normal(shape, s, name=name)
|
||||
|
||||
|
||||
def he_uniform(shape, name=None, dim_ordering='th'):
|
||||
"""He uniform variance scaling initializer.
|
||||
"""
|
||||
fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering)
|
||||
s = np.sqrt(6. / fan_in)
|
||||
return uniform(shape, s, name=name)
|
||||
|
||||
|
||||
def orthogonal(shape, scale=1.1, name=None):
|
||||
''' From Lasagne. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
|
||||
'''
|
||||
def orthogonal(shape, scale=1.1, name=None, dim_ordering='th'):
|
||||
"""Orthogonal initializer.
|
||||
|
||||
# References
|
||||
Saxe et al., http://arxiv.org/abs/1312.6120
|
||||
"""
|
||||
flat_shape = (shape[0], np.prod(shape[1:]))
|
||||
a = np.random.normal(0.0, 1.0, flat_shape)
|
||||
u, _, v = np.linalg.svd(a, full_matrices=False)
|
||||
# pick the one with the correct shape
|
||||
# Pick the one with the correct shape.
|
||||
q = u if u.shape == flat_shape else v
|
||||
q = q.reshape(shape)
|
||||
return K.variable(scale * q[:shape[0], :shape[1]], name=name)
|
||||
|
||||
|
||||
def identity(shape, scale=1, name=None):
|
||||
def identity(shape, scale=1, name=None, dim_ordering='th'):
|
||||
if len(shape) != 2 or shape[0] != shape[1]:
|
||||
raise Exception('Identity matrix initialization can only be used '
|
||||
'for 2D square matrices.')
|
||||
raise ValueError('Identity matrix initialization can only be used '
|
||||
'for 2D square matrices.')
|
||||
else:
|
||||
return K.variable(scale * np.identity(shape[0]), name=name)
|
||||
|
||||
|
||||
def zero(shape, name=None):
|
||||
def zero(shape, name=None, dim_ordering='th'):
|
||||
return K.zeros(shape, name=name)
|
||||
|
||||
|
||||
def one(shape, name=None):
|
||||
def one(shape, name=None, dim_ordering='th'):
|
||||
return K.ones(shape, name=name)
|
||||
|
||||
|
||||
|
||||
@@ -5,8 +5,9 @@ import numpy as np
|
||||
|
||||
|
||||
class LeakyReLU(Layer):
|
||||
'''Special version of a Rectified Linear Unit
|
||||
that allows a small gradient when the unit is not active:
|
||||
"""Leaky version of a Rectified Linear Unit.
|
||||
|
||||
It allows a small gradient when the unit is not active:
|
||||
`f(x) = alpha * x for x < 0`,
|
||||
`f(x) = x for x >= 0`.
|
||||
|
||||
@@ -20,7 +21,11 @@ class LeakyReLU(Layer):
|
||||
|
||||
# Arguments
|
||||
alpha: float >= 0. Negative slope coefficient.
|
||||
'''
|
||||
|
||||
# References
|
||||
- [Rectifier Nonlinearities Improve Neural Network Acoustic Models](https://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf)
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=0.3, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.alpha = alpha
|
||||
@@ -36,7 +41,9 @@ class LeakyReLU(Layer):
|
||||
|
||||
|
||||
class PReLU(Layer):
|
||||
'''Parametric Rectified Linear Unit:
|
||||
"""Parametric Rectified Linear Unit.
|
||||
|
||||
It follows:
|
||||
`f(x) = alphas * x for x < 0`,
|
||||
`f(x) = x for x >= 0`,
|
||||
where `alphas` is a learned array with the same shape as x.
|
||||
@@ -52,18 +59,38 @@ class PReLU(Layer):
|
||||
# Arguments
|
||||
init: initialization function for the weights.
|
||||
weights: initial weights, as a list of a single Numpy array.
|
||||
shared_axes: the axes along which to share learnable
|
||||
parameters for the activation function.
|
||||
For example, if the incoming feature maps
|
||||
are from a 2D convolution
|
||||
with output shape `(batch, height, width, channels)`,
|
||||
and you wish to share parameters across space
|
||||
so that each filter only has one set of parameters,
|
||||
set `shared_axes=[1, 2]`.
|
||||
|
||||
# References
|
||||
- [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification](http://arxiv.org/pdf/1502.01852v1.pdf)
|
||||
'''
|
||||
def __init__(self, init='zero', weights=None, **kwargs):
|
||||
- [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification](https://arxiv.org/abs/1502.01852)
|
||||
"""
|
||||
|
||||
def __init__(self, init='zero', weights=None, shared_axes=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.init = initializations.get(init)
|
||||
self.initial_weights = weights
|
||||
if not isinstance(shared_axes, (list, tuple)):
|
||||
self.shared_axes = [shared_axes]
|
||||
else:
|
||||
self.shared_axes = list(shared_axes)
|
||||
super(PReLU, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.alphas = self.init(input_shape[1:],
|
||||
param_shape = list(input_shape[1:])
|
||||
self.param_broadcast = [False] * len(param_shape)
|
||||
if self.shared_axes[0] is not None:
|
||||
for i in self.shared_axes:
|
||||
param_shape[i - 1] = 1
|
||||
self.param_broadcast[i - 1] = True
|
||||
|
||||
self.alphas = self.init(param_shape,
|
||||
name='{}_alphas'.format(self.name))
|
||||
self.trainable_weights = [self.alphas]
|
||||
|
||||
@@ -73,7 +100,11 @@ class PReLU(Layer):
|
||||
|
||||
def call(self, x, mask=None):
|
||||
pos = K.relu(x)
|
||||
neg = self.alphas * (x - abs(x)) * 0.5
|
||||
if K.backend() == 'theano':
|
||||
neg = (K.pattern_broadcast(self.alphas, self.param_broadcast) *
|
||||
(x - abs(x)) * 0.5)
|
||||
else:
|
||||
neg = self.alphas * (x - abs(x)) * 0.5
|
||||
return pos + neg
|
||||
|
||||
def get_config(self):
|
||||
@@ -83,7 +114,9 @@ class PReLU(Layer):
|
||||
|
||||
|
||||
class ELU(Layer):
|
||||
'''Exponential Linear Unit:
|
||||
"""Exponential Linear Unit.
|
||||
|
||||
It follows:
|
||||
`f(x) = alpha * (exp(x) - 1.) for x < 0`,
|
||||
`f(x) = x for x >= 0`.
|
||||
|
||||
@@ -99,8 +132,9 @@ class ELU(Layer):
|
||||
alpha: scale for the negative factor.
|
||||
|
||||
# References
|
||||
- [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)](http://arxiv.org/pdf/1511.07289v1.pdf)
|
||||
'''
|
||||
- [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)](https://arxiv.org/abs/1511.07289v1)
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=1.0, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.alpha = K.cast_to_floatx(alpha)
|
||||
@@ -116,8 +150,10 @@ class ELU(Layer):
|
||||
|
||||
|
||||
class ParametricSoftplus(Layer):
|
||||
'''Parametric Softplus:
|
||||
`alpha * log(1 + exp(beta * x))`
|
||||
"""Parametric Softplus.
|
||||
|
||||
It follows:
|
||||
`f(x) = alpha * log(1 + exp(beta * x))`
|
||||
|
||||
# Input shape
|
||||
Arbitrary. Use the keyword argument `input_shape`
|
||||
@@ -131,23 +167,42 @@ class ParametricSoftplus(Layer):
|
||||
alpha_init: float. Initial value of the alpha weights.
|
||||
beta_init: float. Initial values of the beta weights.
|
||||
weights: initial weights, as a list of 2 numpy arrays.
|
||||
shared_axes: the axes along which to share learnable
|
||||
parameters for the activation function.
|
||||
For example, if the incoming feature maps
|
||||
are from a 2D convolution
|
||||
with output shape `(batch, height, width, channels)`,
|
||||
and you wish to share parameters across space
|
||||
so that each filter only has one set of parameters,
|
||||
set `shared_axes=[1, 2]`.
|
||||
|
||||
# References
|
||||
- [Inferring Nonlinear Neuronal Computation Based on Physiologically Plausible Inputs](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003143)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, alpha_init=0.2, beta_init=5.0,
|
||||
weights=None, **kwargs):
|
||||
weights=None, shared_axes=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.alpha_init = K.cast_to_floatx(alpha_init)
|
||||
self.beta_init = K.cast_to_floatx(beta_init)
|
||||
self.initial_weights = weights
|
||||
if not isinstance(shared_axes, (list, tuple)):
|
||||
self.shared_axes = [shared_axes]
|
||||
else:
|
||||
self.shared_axes = list(shared_axes)
|
||||
super(ParametricSoftplus, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
input_shape = input_shape[1:]
|
||||
self.alphas = K.variable(self.alpha_init * np.ones(input_shape),
|
||||
param_shape = list(input_shape[1:])
|
||||
self.param_broadcast = [False] * len(param_shape)
|
||||
if self.shared_axes[0] is not None:
|
||||
for i in self.shared_axes:
|
||||
param_shape[i - 1] = 1
|
||||
self.param_broadcast[i - 1] = True
|
||||
|
||||
self.alphas = K.variable(self.alpha_init * np.ones(param_shape),
|
||||
name='{}_alphas'.format(self.name))
|
||||
self.betas = K.variable(self.beta_init * np.ones(input_shape),
|
||||
self.betas = K.variable(self.beta_init * np.ones(param_shape),
|
||||
name='{}_betas'.format(self.name))
|
||||
self.trainable_weights = [self.alphas, self.betas]
|
||||
|
||||
@@ -156,7 +211,12 @@ class ParametricSoftplus(Layer):
|
||||
del self.initial_weights
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.softplus(self.betas * x) * self.alphas
|
||||
if K.backend() == 'theano':
|
||||
return (K.softplus(K.pattern_broadcast(self.betas,
|
||||
self.param_broadcast) * x) *
|
||||
K.pattern_broadcast(self.alphas, self.param_broadcast))
|
||||
else:
|
||||
return K.softplus(self.betas * x) * self.alphas
|
||||
|
||||
def get_config(self):
|
||||
config = {'alpha_init': float(self.alpha_init),
|
||||
@@ -166,8 +226,10 @@ class ParametricSoftplus(Layer):
|
||||
|
||||
|
||||
class ThresholdedReLU(Layer):
|
||||
'''Thresholded Rectified Linear Unit:
|
||||
`f(x) = x for x > theta`
|
||||
"""Thresholded Rectified Linear Unit.
|
||||
|
||||
It follows:
|
||||
`f(x) = x for x > theta`,
|
||||
`f(x) = 0 otherwise`.
|
||||
|
||||
# Input shape
|
||||
@@ -182,8 +244,9 @@ class ThresholdedReLU(Layer):
|
||||
theta: float >= 0. Threshold location of activation.
|
||||
|
||||
# References
|
||||
- [Zero-Bias Autoencoders and the Benefits of Co-Adapting Features](http://arxiv.org/pdf/1402.3337.pdf)
|
||||
'''
|
||||
- [Zero-Bias Autoencoders and the Benefits of Co-Adapting Features](http://arxiv.org/abs/1402.3337)
|
||||
"""
|
||||
|
||||
def __init__(self, theta=1.0, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.theta = K.cast_to_floatx(theta)
|
||||
@@ -199,7 +262,12 @@ class ThresholdedReLU(Layer):
|
||||
|
||||
|
||||
class SReLU(Layer):
|
||||
'''S-shaped Rectified Linear Unit.
|
||||
"""S-shaped Rectified Linear Unit.
|
||||
|
||||
It follows:
|
||||
`f(x) = t^r + a^r(x - t^r) for x >= t^r`,
|
||||
`f(x) = x for t^r > x > t^l`,
|
||||
`f(x) = t^l + a^l(x - t^l) for x <= t^l`.
|
||||
|
||||
# Input shape
|
||||
Arbitrary. Use the keyword argument `input_shape`
|
||||
@@ -214,34 +282,53 @@ class SReLU(Layer):
|
||||
a_left_init: initialization function for the left part slope
|
||||
t_right_init: initialization function for the right part intercept
|
||||
a_right_init: initialization function for the right part slope
|
||||
shared_axes: the axes along which to share learnable
|
||||
parameters for the activation function.
|
||||
For example, if the incoming feature maps
|
||||
are from a 2D convolution
|
||||
with output shape `(batch, height, width, channels)`,
|
||||
and you wish to share parameters across space
|
||||
so that each filter only has one set of parameters,
|
||||
set `shared_axes=[1, 2]`.
|
||||
|
||||
# References
|
||||
- [Deep Learning with S-shaped Rectified Linear Activation Units](http://arxiv.org/abs/1512.07030)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, t_left_init='zero', a_left_init='glorot_uniform',
|
||||
t_right_init='glorot_uniform', a_right_init='one', **kwargs):
|
||||
t_right_init='glorot_uniform', a_right_init='one',
|
||||
shared_axes=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.t_left_init = t_left_init
|
||||
self.a_left_init = a_left_init
|
||||
self.t_right_init = t_right_init
|
||||
self.a_right_init = a_right_init
|
||||
if not isinstance(shared_axes, (list, tuple)):
|
||||
self.shared_axes = [shared_axes]
|
||||
else:
|
||||
self.shared_axes = list(shared_axes)
|
||||
super(SReLU, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
input_shape = input_shape[1:]
|
||||
param_shape = list(input_shape[1:])
|
||||
self.param_broadcast = [False] * len(param_shape)
|
||||
if self.shared_axes[0] is not None:
|
||||
for i in self.shared_axes:
|
||||
param_shape[i - 1] = 1
|
||||
self.param_broadcast[i - 1] = True
|
||||
|
||||
t_left_init = initializations.get(self.t_left_init)
|
||||
a_left_init = initializations.get(self.a_left_init)
|
||||
t_right_init = initializations.get(self.t_right_init)
|
||||
a_right_init = initializations.get(self.a_right_init)
|
||||
|
||||
self.t_left = t_left_init(input_shape,
|
||||
self.t_left = t_left_init(param_shape,
|
||||
name='{}_t_left'.format(self.name))
|
||||
self.a_left = a_left_init(input_shape,
|
||||
self.a_left = a_left_init(param_shape,
|
||||
name='{}_a_left'.format(self.name))
|
||||
self.t_right = t_right_init(input_shape,
|
||||
self.t_right = t_right_init(param_shape,
|
||||
name='{}_t_right'.format(self.name))
|
||||
self.a_right = a_right_init(input_shape,
|
||||
self.a_right = a_right_init(param_shape,
|
||||
name='{}_a_right'.format(self.name))
|
||||
# ensure the the right part is always to the right of the left
|
||||
self.t_right_actual = self.t_left + abs(self.t_right)
|
||||
@@ -249,11 +336,23 @@ class SReLU(Layer):
|
||||
self.t_right, self.a_right]
|
||||
|
||||
def call(self, x, mask=None):
|
||||
Y_left_and_center = self.t_left + K.relu(x - self.t_left,
|
||||
self.a_left,
|
||||
self.t_right_actual - self.t_left)
|
||||
Y_right = K.relu(x - self.t_right_actual) * self.a_right
|
||||
return Y_left_and_center + Y_right
|
||||
if K.backend() == 'theano':
|
||||
t_left = K.pattern_broadcast(self.t_left, self.param_broadcast)
|
||||
a_left = K.pattern_broadcast(self.a_left, self.param_broadcast)
|
||||
a_right = K.pattern_broadcast(self.a_right, self.param_broadcast)
|
||||
t_right_actual = K.pattern_broadcast(self.t_right_actual,
|
||||
self.param_broadcast)
|
||||
else:
|
||||
t_left = self.t_left
|
||||
a_left = self.a_left
|
||||
a_right = self.a_right
|
||||
t_right_actual = self.t_right_actual
|
||||
|
||||
y_left_and_center = t_left + K.relu(x - t_left,
|
||||
a_left,
|
||||
t_right_actual - t_left)
|
||||
y_right = K.relu(x - t_right_actual) * a_right
|
||||
return y_left_and_center + y_right
|
||||
|
||||
def get_config(self):
|
||||
config = {'t_left_init': self.t_left_init,
|
||||
|
||||
+460
-302
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -1,14 +1,18 @@
|
||||
from .. import backend as K
|
||||
from .. import activations, initializations, regularizers
|
||||
from .. import activations
|
||||
from .. import initializations
|
||||
from .. import regularizers
|
||||
|
||||
import numpy as np
|
||||
from ..engine import Layer, InputSpec
|
||||
from ..engine import Layer
|
||||
from ..engine import InputSpec
|
||||
from ..utils.np_utils import conv_output_length
|
||||
import warnings
|
||||
|
||||
|
||||
class ConvRecurrent2D(Layer):
|
||||
'''Abstract base class for convolutional recurrent layers.
|
||||
"""Abstract base class for convolutional recurrent layers.
|
||||
|
||||
Do not use in a model -- it's not a functional layer!
|
||||
|
||||
ConvLSTM2D
|
||||
@@ -73,7 +77,7 @@ class ConvRecurrent2D(Layer):
|
||||
|
||||
To reset the states of your model, call `.reset_states()` on either
|
||||
a specific layer, or on your entire model.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, weights=None, nb_row=None, nb_col=None, nb_filter=None,
|
||||
return_sequences=False, go_backwards=False, stateful=False,
|
||||
@@ -105,7 +109,7 @@ class ConvRecurrent2D(Layer):
|
||||
rows = input_shape[2]
|
||||
cols = input_shape[3]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
rows = conv_output_length(rows, self.nb_row,
|
||||
self.border_mode, self.subsample[0])
|
||||
@@ -119,15 +123,11 @@ class ConvRecurrent2D(Layer):
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], input_shape[1],
|
||||
rows, cols, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
else:
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], self.nb_filter, rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def step(self, x, states):
|
||||
raise NotImplementedError
|
||||
@@ -191,7 +191,7 @@ class ConvRecurrent2D(Layer):
|
||||
|
||||
|
||||
class ConvLSTM2D(ConvRecurrent2D):
|
||||
'''Convolutional LSTM.
|
||||
"""Convolutional LSTM.
|
||||
|
||||
# Input shape
|
||||
- if dim_ordering='th'
|
||||
@@ -225,7 +225,7 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
nb_row: Number of rows in the convolution kernel.
|
||||
nb_col: Number of columns in the convolution kernel.
|
||||
border_mode: 'valid' or 'same'.
|
||||
sub_sample: tuple of length 2. Factor by which to subsample output.
|
||||
subsample: tuple of length 2. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
dim_ordering: 'tf' if the feature are at the last dimension or 'th'
|
||||
stateful : Boolean (default False). If True, the last state
|
||||
@@ -247,10 +247,11 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
|
||||
# References
|
||||
- [Convolutional LSTM Network: A Machine Learning Approach for
|
||||
Precipitation Nowcasting](http://arxiv.org/pdf/1506.04214v1.pdf)
|
||||
Precipitation Nowcasting](http://arxiv.org/abs/1506.04214v1)
|
||||
The current implementation does not include the feedback loop on the
|
||||
cells output
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
forget_bias_init='one', activation='tanh',
|
||||
@@ -310,7 +311,7 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
self.W_shape = (self.nb_row, self.nb_col,
|
||||
stack_size, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
self.W_shape1 = (self.nb_filter, self.nb_filter,
|
||||
@@ -318,8 +319,6 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
elif self.dim_ordering == 'tf':
|
||||
self.W_shape1 = (self.nb_row, self.nb_col,
|
||||
self.nb_filter, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
if self.stateful:
|
||||
self.reset_states()
|
||||
@@ -378,9 +377,9 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
input_shape = self.input_spec[0].shape
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
if not input_shape[0]:
|
||||
raise Exception('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided ' +
|
||||
'(including batch size).')
|
||||
raise ValueError('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided ' +
|
||||
'(including batch size).')
|
||||
|
||||
if self.return_sequences:
|
||||
out_row, out_col, out_filter = output_shape[2:]
|
||||
@@ -417,7 +416,7 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
elif self.dim_ordering == 'tf':
|
||||
conv_out = conv_out + K.reshape(b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
return conv_out
|
||||
|
||||
@@ -483,7 +482,7 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
ones = K.sum(ones, axis=1)
|
||||
ones = self.conv_step(ones, K.zeros(self.W_shape),
|
||||
border_mode=self.border_mode)
|
||||
ones = ones + 1
|
||||
ones += 1
|
||||
B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones)
|
||||
for _ in range(4)]
|
||||
constants.append(B_U)
|
||||
@@ -493,7 +492,7 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
if 0 < self.dropout_W < 1:
|
||||
ones = K.zeros_like(x)
|
||||
ones = K.sum(ones, axis=1)
|
||||
ones = ones + 1
|
||||
ones += 1
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones)
|
||||
for _ in range(4)]
|
||||
constants.append(B_W)
|
||||
|
||||
+294
-247
@@ -10,15 +10,19 @@ import types as python_types
|
||||
import warnings
|
||||
|
||||
from .. import backend as K
|
||||
from .. import activations, initializations, regularizers, constraints
|
||||
from ..engine import InputSpec, Layer, Merge
|
||||
from ..regularizers import ActivityRegularizer
|
||||
from ..utils.generic_utils import func_dump, func_load
|
||||
from .. import activations
|
||||
from .. import initializations
|
||||
from .. import regularizers
|
||||
from .. import constraints
|
||||
from ..engine import InputSpec
|
||||
from ..engine import Layer
|
||||
from ..engine import Merge
|
||||
from ..utils.generic_utils import func_dump
|
||||
from ..utils.generic_utils import func_load
|
||||
|
||||
|
||||
class Masking(Layer):
|
||||
'''Masks an input sequence by using a mask value to
|
||||
identify timesteps to be skipped.
|
||||
"""Masks a sequence by using a mask value to skip timesteps.
|
||||
|
||||
For each timestep in the input tensor (dimension #1 in the tensor),
|
||||
if all values in the input tensor at that timestep
|
||||
@@ -43,14 +47,15 @@ class Masking(Layer):
|
||||
model.add(Masking(mask_value=0., input_shape=(timesteps, features)))
|
||||
model.add(LSTM(32))
|
||||
```
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, mask_value=0., **kwargs):
|
||||
self.supports_masking = True
|
||||
self.mask_value = mask_value
|
||||
super(Masking, self).__init__(**kwargs)
|
||||
|
||||
def compute_mask(self, input, input_mask=None):
|
||||
return K.any(K.not_equal(input, self.mask_value), axis=-1)
|
||||
def compute_mask(self, x, input_mask=None):
|
||||
return K.any(K.not_equal(x, self.mask_value), axis=-1)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
boolean_mask = K.any(K.not_equal(x, self.mask_value),
|
||||
@@ -64,30 +69,45 @@ class Masking(Layer):
|
||||
|
||||
|
||||
class Dropout(Layer):
|
||||
'''Applies Dropout to the input. Dropout consists in randomly setting
|
||||
"""Applies Dropout to the input.
|
||||
|
||||
Dropout consists in randomly setting
|
||||
a fraction `p` of input units to 0 at each update during training time,
|
||||
which helps prevent overfitting.
|
||||
|
||||
# Arguments
|
||||
p: float between 0 and 1. Fraction of the input units to drop.
|
||||
noise_shape: 1D integer tensor representing the shape of the
|
||||
binary dropout mask that will be multiplied with the input.
|
||||
For instance, if your inputs ahve shape
|
||||
`(batch_size, timesteps, features)` and
|
||||
you want the dropout mask to be the same for all timesteps,
|
||||
you can use `noise_shape=(batch_size, 1, features)`.
|
||||
seed: A Python integer to use as random seed.
|
||||
|
||||
# References
|
||||
- [Dropout: A Simple Way to Prevent Neural Networks from Overfitting](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
|
||||
'''
|
||||
def __init__(self, p, **kwargs):
|
||||
"""
|
||||
|
||||
def __init__(self, p, noise_shape=None, seed=None, **kwargs):
|
||||
self.p = p
|
||||
self.noise_shape = noise_shape
|
||||
self.seed = seed
|
||||
if 0. < self.p < 1.:
|
||||
self.uses_learning_phase = True
|
||||
self.supports_masking = True
|
||||
super(Dropout, self).__init__(**kwargs)
|
||||
|
||||
def _get_noise_shape(self, x):
|
||||
return None
|
||||
def _get_noise_shape(self, _):
|
||||
return self.noise_shape
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if 0. < self.p < 1.:
|
||||
noise_shape = self._get_noise_shape(x)
|
||||
x = K.in_train_phase(K.dropout(x, self.p, noise_shape), x)
|
||||
|
||||
def dropped_inputs():
|
||||
return K.dropout(x, self.p, noise_shape, seed=self.seed)
|
||||
x = K.in_train_phase(dropped_inputs, lambda: x)
|
||||
return x
|
||||
|
||||
def get_config(self):
|
||||
@@ -97,7 +117,9 @@ class Dropout(Layer):
|
||||
|
||||
|
||||
class SpatialDropout1D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
"""Spatial 1D version of Dropout.
|
||||
|
||||
This version performs the same function as Dropout, however it drops
|
||||
entire 1D feature maps instead of individual elements. If adjacent frames
|
||||
within feature maps are strongly correlated (as is normally the case in
|
||||
early convolution layers) then regular dropout will not regularize the
|
||||
@@ -116,8 +138,9 @@ class SpatialDropout1D(Dropout):
|
||||
Same as input
|
||||
|
||||
# References
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/pdf/1411.4280.pdf)
|
||||
'''
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/abs/1411.4280)
|
||||
"""
|
||||
|
||||
def __init__(self, p, **kwargs):
|
||||
super(SpatialDropout1D, self).__init__(p, **kwargs)
|
||||
|
||||
@@ -125,10 +148,12 @@ class SpatialDropout1D(Dropout):
|
||||
input_shape = K.shape(x)
|
||||
noise_shape = (input_shape[0], 1, input_shape[2])
|
||||
return noise_shape
|
||||
|
||||
|
||||
|
||||
|
||||
class SpatialDropout2D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
"""Spatial 2D version of Dropout.
|
||||
|
||||
This version performs the same function as Dropout, however it drops
|
||||
entire 2D feature maps instead of individual elements. If adjacent pixels
|
||||
within feature maps are strongly correlated (as is normally the case in
|
||||
early convolution layers) then regular dropout will not regularize the
|
||||
@@ -154,8 +179,9 @@ class SpatialDropout2D(Dropout):
|
||||
Same as input
|
||||
|
||||
# References
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/pdf/1411.4280.pdf)
|
||||
'''
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/abs/1411.4280)
|
||||
"""
|
||||
|
||||
def __init__(self, p, dim_ordering='default', **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
@@ -170,12 +196,14 @@ class SpatialDropout2D(Dropout):
|
||||
elif self.dim_ordering == 'tf':
|
||||
noise_shape = (input_shape[0], 1, 1, input_shape[3])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
return noise_shape
|
||||
|
||||
|
||||
class SpatialDropout3D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
"""Spatial 3D version of Dropout.
|
||||
|
||||
This version performs the same function as Dropout, however it drops
|
||||
entire 3D feature maps instead of individual elements. If adjacent voxels
|
||||
within feature maps are strongly correlated (as is normally the case in
|
||||
early convolution layers) then regular dropout will not regularize the
|
||||
@@ -202,8 +230,9 @@ class SpatialDropout3D(Dropout):
|
||||
Same as input
|
||||
|
||||
# References
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/pdf/1411.4280.pdf)
|
||||
'''
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/abs/1411.4280)
|
||||
"""
|
||||
|
||||
def __init__(self, p, dim_ordering='default', **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
@@ -218,12 +247,12 @@ class SpatialDropout3D(Dropout):
|
||||
elif self.dim_ordering == 'tf':
|
||||
noise_shape = (input_shape[0], 1, 1, 1, input_shape[4])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
return noise_shape
|
||||
|
||||
|
||||
class Activation(Layer):
|
||||
'''Applies an activation function to an output.
|
||||
"""Applies an activation function to an output.
|
||||
|
||||
# Arguments
|
||||
activation: name of activation function to use
|
||||
@@ -237,7 +266,8 @@ class Activation(Layer):
|
||||
|
||||
# Output shape
|
||||
Same shape as input.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, activation, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.activation = activations.get(activation)
|
||||
@@ -253,7 +283,7 @@ class Activation(Layer):
|
||||
|
||||
|
||||
class Reshape(Layer):
|
||||
'''Reshapes an output to a certain shape.
|
||||
"""Reshapes an output to a certain shape.
|
||||
|
||||
# Arguments
|
||||
target_shape: target shape. Tuple of integers,
|
||||
@@ -280,22 +310,25 @@ class Reshape(Layer):
|
||||
# as intermediate layer in a Sequential model
|
||||
model.add(Reshape((6, 2)))
|
||||
# now: model.output_shape == (None, 6, 2)
|
||||
|
||||
# also supports shape inference using `-1` as dimension
|
||||
model.add(Reshape((-1, 2, 2)))
|
||||
# now: model.output_shape == (None, 3, 2, 2)
|
||||
```
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, target_shape, **kwargs):
|
||||
super(Reshape, self).__init__(**kwargs)
|
||||
self.target_shape = tuple(target_shape)
|
||||
|
||||
def _fix_unknown_dimension(self, input_shape, output_shape):
|
||||
'''Find and replace a single missing dimension in an output shape
|
||||
given an input shape.
|
||||
"""Find and replace a missing dimension in an output shape.
|
||||
|
||||
A near direct port of the internal Numpy function _fix_unknown_dimension
|
||||
in numpy/core/src/multiarray/shape.c
|
||||
This is a near direct port of the internal Numpy function
|
||||
`_fix_unknown_dimension` in `numpy/core/src/multiarray/shape.c`
|
||||
|
||||
# Arguments
|
||||
input_shape: shape of array being reshaped
|
||||
|
||||
output_shape: desired shape of the array with at most
|
||||
a single -1 which indicates a dimension that should be
|
||||
derived from the input shape.
|
||||
@@ -306,7 +339,11 @@ class Reshape(Layer):
|
||||
Raises a ValueError if the total array size of the output_shape is
|
||||
different then the input_shape, or more then one unknown dimension
|
||||
is specified.
|
||||
'''
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid values
|
||||
for `input_shape` or `input_shape`.
|
||||
"""
|
||||
output_shape = list(output_shape)
|
||||
|
||||
msg = 'total size of new array must be unchanged'
|
||||
@@ -317,7 +354,7 @@ class Reshape(Layer):
|
||||
if unknown is None:
|
||||
unknown = index
|
||||
else:
|
||||
raise ValueError('can only specify one unknown dimension')
|
||||
raise ValueError('Can only specify one unknown dimension.')
|
||||
else:
|
||||
known *= dim
|
||||
|
||||
@@ -332,7 +369,8 @@ class Reshape(Layer):
|
||||
return tuple(output_shape)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
return (input_shape[0],) + self._fix_unknown_dimension(input_shape[1:], self.target_shape)
|
||||
return (input_shape[0],) + self._fix_unknown_dimension(input_shape[1:],
|
||||
self.target_shape)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
# In case the target shape is not fully defined,
|
||||
@@ -349,7 +387,7 @@ class Reshape(Layer):
|
||||
elif hasattr(K, 'int_shape'):
|
||||
input_shape = K.int_shape(x)
|
||||
if input_shape is not None:
|
||||
target_shape = self.get_output_shape_for(input_shape)
|
||||
target_shape = self.get_output_shape_for(input_shape)[1:]
|
||||
return K.reshape(x, (-1,) + target_shape)
|
||||
|
||||
def get_config(self):
|
||||
@@ -359,7 +397,7 @@ class Reshape(Layer):
|
||||
|
||||
|
||||
class Permute(Layer):
|
||||
'''Permutes the dimensions of the input according to a given pattern.
|
||||
"""Permutes the dimensions of the input according to a given pattern.
|
||||
|
||||
Useful for e.g. connecting RNNs and convnets together.
|
||||
|
||||
@@ -386,7 +424,8 @@ class Permute(Layer):
|
||||
# Output shape
|
||||
Same as the input shape, but with the dimensions re-ordered according
|
||||
to the specified pattern.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, dims, **kwargs):
|
||||
self.dims = tuple(dims)
|
||||
super(Permute, self).__init__(**kwargs)
|
||||
@@ -396,7 +435,7 @@ class Permute(Layer):
|
||||
output_shape = copy.copy(input_shape)
|
||||
for i, dim in enumerate(self.dims):
|
||||
target_dim = input_shape[dim]
|
||||
output_shape[i+1] = target_dim
|
||||
output_shape[i + 1] = target_dim
|
||||
return tuple(output_shape)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
@@ -409,31 +448,34 @@ class Permute(Layer):
|
||||
|
||||
|
||||
class Flatten(Layer):
|
||||
'''Flattens the input. Does not affect the batch size.
|
||||
"""Flattens the input. Does not affect the batch size.
|
||||
|
||||
# Example
|
||||
|
||||
```python
|
||||
model = Sequential()
|
||||
model.add(Convolution2D(64, 3, 3, border_mode='same', input_shape=(3, 32, 32)))
|
||||
model.add(Convolution2D(64, 3, 3,
|
||||
border_mode='same',
|
||||
input_shape=(3, 32, 32)))
|
||||
# now: model.output_shape == (None, 64, 32, 32)
|
||||
|
||||
model.add(Flatten())
|
||||
# now: model.output_shape == (None, 65536)
|
||||
```
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.input_spec = [InputSpec(ndim='3+')]
|
||||
super(Flatten, self).__init__(**kwargs)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if not all(input_shape[1:]):
|
||||
raise Exception('The shape of the input to "Flatten" '
|
||||
'is not fully defined '
|
||||
'(got ' + str(input_shape[1:]) + '. '
|
||||
'Make sure to pass a complete "input_shape" '
|
||||
'or "batch_input_shape" argument to the first '
|
||||
'layer in your model.')
|
||||
raise ValueError('The shape of the input to "Flatten" '
|
||||
'is not fully defined '
|
||||
'(got ' + str(input_shape[1:]) + '. '
|
||||
'Make sure to pass a complete "input_shape" '
|
||||
'or "batch_input_shape" argument to the first '
|
||||
'layer in your model.')
|
||||
return (input_shape[0], np.prod(input_shape[1:]))
|
||||
|
||||
def call(self, x, mask=None):
|
||||
@@ -441,7 +483,7 @@ class Flatten(Layer):
|
||||
|
||||
|
||||
class RepeatVector(Layer):
|
||||
'''Repeats the input n times.
|
||||
"""Repeats the input n times.
|
||||
|
||||
# Example
|
||||
|
||||
@@ -463,7 +505,8 @@ class RepeatVector(Layer):
|
||||
|
||||
# Output shape
|
||||
3D tensor of shape `(nb_samples, n, features)`.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, n, **kwargs):
|
||||
self.n = n
|
||||
self.input_spec = [InputSpec(ndim=2)]
|
||||
@@ -482,8 +525,7 @@ class RepeatVector(Layer):
|
||||
|
||||
|
||||
class Lambda(Layer):
|
||||
'''Used for evaluating an arbitrary Theano / TensorFlow expression
|
||||
on the output of the previous layer.
|
||||
"""Used for evaluating an arbitrary expressions on an input.
|
||||
|
||||
# Examples
|
||||
|
||||
@@ -509,7 +551,8 @@ class Lambda(Layer):
|
||||
shape[-1] *= 2
|
||||
return tuple(shape)
|
||||
|
||||
model.add(Lambda(antirectifier, output_shape=antirectifier_output_shape))
|
||||
model.add(Lambda(antirectifier,
|
||||
output_shape=antirectifier_output_shape))
|
||||
```
|
||||
|
||||
# Arguments
|
||||
@@ -520,7 +563,8 @@ class Lambda(Layer):
|
||||
If a tuple, it only specifies the first dimension onward;
|
||||
sample dimension is assumed either the same as the input:
|
||||
`output_shape = (input_shape[0], ) + output_shape`
|
||||
or, the input is `None` and the sample dimension is also `None`:
|
||||
or, the input is `None` and
|
||||
the sample dimension is also `None`:
|
||||
`output_shape = (None, ) + output_shape`
|
||||
If a function, it specifies the entire shape as a function of the
|
||||
input shape: `output_shape = f(input_shape)`
|
||||
@@ -534,49 +578,58 @@ class Lambda(Layer):
|
||||
|
||||
# Output shape
|
||||
Specified by `output_shape` argument.
|
||||
'''
|
||||
def __init__(self, function, output_shape=None, arguments={}, **kwargs):
|
||||
"""
|
||||
|
||||
def __init__(self, function, output_shape=None, arguments=None, **kwargs):
|
||||
self.function = function
|
||||
self.arguments = arguments
|
||||
self.arguments = arguments if arguments else {}
|
||||
self.supports_masking = False
|
||||
|
||||
if output_shape is None:
|
||||
self._output_shape = None
|
||||
elif type(output_shape) in {tuple, list}:
|
||||
elif isinstance(output_shape, (tuple, list)):
|
||||
self._output_shape = tuple(output_shape)
|
||||
else:
|
||||
if not hasattr(output_shape, '__call__'):
|
||||
raise Exception('In Lambda, `output_shape` '
|
||||
if not callable(output_shape):
|
||||
raise TypeError('In Lambda, `output_shape` '
|
||||
'must be a list, a tuple, or a function.')
|
||||
self._output_shape = output_shape
|
||||
super(Lambda, self).__init__(**kwargs)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self._output_shape is None:
|
||||
# if TensorFlow, we can infer the output shape directly:
|
||||
if K._BACKEND == 'tensorflow':
|
||||
if type(input_shape) is list:
|
||||
# With TensorFlow, we can infer the output shape directly:
|
||||
if K.backend() == 'tensorflow':
|
||||
if isinstance(input_shape, list):
|
||||
xs = [K.placeholder(shape=shape) for shape in input_shape]
|
||||
x = self.call(xs)
|
||||
else:
|
||||
x = K.placeholder(shape=input_shape)
|
||||
x = self.call(x)
|
||||
if type(x) is list:
|
||||
if isinstance(x, list):
|
||||
return [K.int_shape(x_elem) for x_elem in x]
|
||||
else:
|
||||
return K.int_shape(x)
|
||||
# otherwise, we default to the input shape
|
||||
# Otherwise, we default to the input shape.
|
||||
warnings.warn('`output_shape` argument not specified for layer {} '
|
||||
'and cannot be automatically inferred '
|
||||
'with the Theano backend. '
|
||||
'Defaulting to output shape `{}` '
|
||||
'(same as input shape). '
|
||||
'If the expected output shape is different, '
|
||||
'specify it via the `output_shape` argument.'
|
||||
.format(self.name, input_shape))
|
||||
return input_shape
|
||||
elif type(self._output_shape) in {tuple, list}:
|
||||
if type(input_shape) is list:
|
||||
elif isinstance(self._output_shape, (tuple, list)):
|
||||
if isinstance(input_shape, list):
|
||||
nb_samples = input_shape[0][0]
|
||||
else:
|
||||
nb_samples = input_shape[0] if input_shape else None
|
||||
return (nb_samples,) + tuple(self._output_shape)
|
||||
else:
|
||||
shape = self._output_shape(input_shape)
|
||||
if type(shape) not in {list, tuple}:
|
||||
raise Exception('output_shape function must return a tuple')
|
||||
if not isinstance(shape, (list, tuple)):
|
||||
raise ValueError('output_shape function must return a tuple')
|
||||
return tuple(shape)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
@@ -613,20 +666,27 @@ class Lambda(Layer):
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config):
|
||||
def from_config(cls, config, custom_objects=None):
|
||||
# Insert custom objects into globals.
|
||||
if custom_objects:
|
||||
globs = globals().copy()
|
||||
globs.update(custom_objects)
|
||||
else:
|
||||
globs = globals()
|
||||
|
||||
function_type = config.pop('function_type')
|
||||
if function_type == 'function':
|
||||
function = globals()[config['function']]
|
||||
function = globs[config['function']]
|
||||
elif function_type == 'lambda':
|
||||
function = func_load(config['function'], globs=globals())
|
||||
function = func_load(config['function'], globs=globs)
|
||||
else:
|
||||
raise Exception('Unknown function type: ' + function_type)
|
||||
raise TypeError('Unknown function type:', function_type)
|
||||
|
||||
output_shape_type = config.pop('output_shape_type')
|
||||
if output_shape_type == 'function':
|
||||
output_shape = globals()[config['output_shape']]
|
||||
output_shape = globs[config['output_shape']]
|
||||
elif output_shape_type == 'lambda':
|
||||
output_shape = func_load(config['output_shape'], globs=globals())
|
||||
output_shape = func_load(config['output_shape'], globs=globs)
|
||||
else:
|
||||
output_shape = config['output_shape']
|
||||
|
||||
@@ -636,7 +696,7 @@ class Lambda(Layer):
|
||||
|
||||
|
||||
class Dense(Layer):
|
||||
'''Just your regular fully connected NN layer.
|
||||
"""Just your regular densely-connected NN layer.
|
||||
|
||||
# Example
|
||||
|
||||
@@ -681,17 +741,23 @@ class Dense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
bias: whether to include a bias
|
||||
(i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer). This argument
|
||||
(or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
|
||||
# Input shape
|
||||
2D tensor with shape: `(nb_samples, input_dim)`.
|
||||
nD tensor with shape: `(nb_samples, ..., input_dim)`.
|
||||
The most common situation would be
|
||||
a 2D input with shape `(nb_samples, input_dim)`.
|
||||
|
||||
# Output shape
|
||||
2D tensor with shape: `(nb_samples, output_dim)`.
|
||||
'''
|
||||
nD tensor with shape: `(nb_samples, ..., output_dim)`.
|
||||
For instance, for a 2D input with shape `(nb_samples, input_dim)`,
|
||||
the output would have shape `(nb_samples, output_dim)`.
|
||||
"""
|
||||
|
||||
def __init__(self, output_dim, init='glorot_uniform',
|
||||
activation=None, weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
@@ -711,45 +777,32 @@ class Dense(Layer):
|
||||
|
||||
self.bias = bias
|
||||
self.initial_weights = weights
|
||||
self.input_spec = [InputSpec(ndim=2)]
|
||||
self.input_spec = [InputSpec(ndim='2+')]
|
||||
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_dim,)
|
||||
super(Dense, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
assert len(input_shape) == 2
|
||||
input_dim = input_shape[1]
|
||||
assert len(input_shape) >= 2
|
||||
input_dim = input_shape[-1]
|
||||
self.input_dim = input_dim
|
||||
self.input_spec = [InputSpec(dtype=K.floatx(),
|
||||
shape=(None, input_dim))]
|
||||
ndim='2+')]
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.W = self.add_weight((input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@@ -763,8 +816,11 @@ class Dense(Layer):
|
||||
return self.activation(output)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
assert input_shape and len(input_shape) == 2
|
||||
return (input_shape[0], self.output_dim)
|
||||
assert input_shape and len(input_shape) >= 2
|
||||
assert input_shape[-1] and input_shape[-1] == self.input_dim
|
||||
output_shape = list(input_shape)
|
||||
output_shape[-1] = self.output_dim
|
||||
return tuple(output_shape)
|
||||
|
||||
def get_config(self):
|
||||
config = {'output_dim': self.output_dim,
|
||||
@@ -782,8 +838,7 @@ class Dense(Layer):
|
||||
|
||||
|
||||
class ActivityRegularization(Layer):
|
||||
'''Layer that passes through its input unchanged, but applies an update
|
||||
to the cost function based on the activity.
|
||||
"""Layer that applies an update to the cost function based input activity.
|
||||
|
||||
# Arguments
|
||||
l1: L1 regularization factor (positive float).
|
||||
@@ -796,16 +851,16 @@ class ActivityRegularization(Layer):
|
||||
|
||||
# Output shape
|
||||
Same shape as input.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, l1=0., l2=0., **kwargs):
|
||||
self.supports_masking = True
|
||||
self.l1 = l1
|
||||
self.l2 = l2
|
||||
|
||||
super(ActivityRegularization, self).__init__(**kwargs)
|
||||
activity_regularizer = ActivityRegularizer(l1=l1, l2=l2)
|
||||
activity_regularizer.set_layer(self)
|
||||
self.regularizers = [activity_regularizer]
|
||||
self.activity_regularizer = regularizers.L1L2Regularizer(l1=l1, l2=l2)
|
||||
self.regularizers = [self.activity_regularizer]
|
||||
|
||||
def get_config(self):
|
||||
config = {'l1': self.l1,
|
||||
@@ -815,7 +870,7 @@ class ActivityRegularization(Layer):
|
||||
|
||||
|
||||
class MaxoutDense(Layer):
|
||||
'''A dense maxout layer.
|
||||
"""A dense maxout layer.
|
||||
|
||||
A `MaxoutDense` layer takes the element-wise maximum of
|
||||
`nb_feature` `Dense(input_dim, output_dim)` linear layers.
|
||||
@@ -848,9 +903,10 @@ class MaxoutDense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
bias: whether to include a bias
|
||||
(i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer). This argument
|
||||
(or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
|
||||
# Input shape
|
||||
@@ -860,13 +916,21 @@ class MaxoutDense(Layer):
|
||||
2D tensor with shape: `(nb_samples, output_dim)`.
|
||||
|
||||
# References
|
||||
- [Maxout Networks](http://arxiv.org/pdf/1302.4389.pdf)
|
||||
'''
|
||||
def __init__(self, output_dim, nb_feature=4,
|
||||
init='glorot_uniform', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
- [Maxout Networks](http://arxiv.org/abs/1302.4389)
|
||||
"""
|
||||
|
||||
def __init__(self, output_dim,
|
||||
nb_feature=4,
|
||||
init='glorot_uniform',
|
||||
weights=None,
|
||||
W_regularizer=None,
|
||||
b_regularizer=None,
|
||||
activity_regularizer=None,
|
||||
W_constraint=None,
|
||||
b_constraint=None,
|
||||
bias=True,
|
||||
input_dim=None,
|
||||
**kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.nb_feature = nb_feature
|
||||
self.init = initializations.get(init)
|
||||
@@ -892,33 +956,19 @@ class MaxoutDense(Layer):
|
||||
self.input_spec = [InputSpec(dtype=K.floatx(),
|
||||
shape=(None, input_dim))]
|
||||
|
||||
self.W = self.init((self.nb_feature, input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.W = self.add_weight((self.nb_feature, input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_feature, self.output_dim),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((self.nb_feature, self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@@ -953,8 +1003,9 @@ class MaxoutDense(Layer):
|
||||
|
||||
|
||||
class Highway(Layer):
|
||||
'''Densely connected highway network,
|
||||
a natural extension of LSTMs to feedforward networks.
|
||||
"""Densely connected highway network.
|
||||
|
||||
Highway layers are a natural extension of LSTMs to feedforward networks.
|
||||
|
||||
# Arguments
|
||||
init: name of initialization function for the weights of the layer
|
||||
@@ -962,7 +1013,6 @@ class Highway(Layer):
|
||||
or alternatively, Theano function to use for weights
|
||||
initialization. This parameter is only relevant
|
||||
if you don't pass a `weights` argument.
|
||||
transform_bias: value for the bias to take on initially (default -2)
|
||||
activation: name of activation function to use
|
||||
(see [activations](../activations.md)),
|
||||
or alternatively, elementwise Theano function.
|
||||
@@ -981,9 +1031,10 @@ class Highway(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
bias: whether to include a bias
|
||||
(i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer). This argument
|
||||
(or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
|
||||
# Input shape
|
||||
@@ -993,15 +1044,26 @@ class Highway(Layer):
|
||||
2D tensor with shape: `(nb_samples, input_dim)`.
|
||||
|
||||
# References
|
||||
- [Highway Networks](http://arxiv.org/pdf/1505.00387v2.pdf)
|
||||
'''
|
||||
def __init__(self, init='glorot_uniform', transform_bias=-2,
|
||||
activation=None, weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, **kwargs):
|
||||
- [Highway Networks](http://arxiv.org/abs/1505.00387v2)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
init='glorot_uniform',
|
||||
activation=None,
|
||||
weights=None,
|
||||
W_regularizer=None,
|
||||
b_regularizer=None,
|
||||
activity_regularizer=None,
|
||||
W_constraint=None,
|
||||
b_constraint=None,
|
||||
bias=True,
|
||||
input_dim=None,
|
||||
**kwargs):
|
||||
if 'transform_bias' in kwargs:
|
||||
kwargs.pop('transform_bias')
|
||||
warnings.warn('`transform_bias` argument is deprecated and '
|
||||
'will be removed after 5/2017.')
|
||||
self.init = initializations.get(init)
|
||||
self.transform_bias = transform_bias
|
||||
self.activation = activations.get(activation)
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
@@ -1025,38 +1087,25 @@ class Highway(Layer):
|
||||
self.input_spec = [InputSpec(dtype=K.floatx(),
|
||||
shape=(None, input_dim))]
|
||||
|
||||
self.W = self.init((input_dim, input_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.W_carry = self.init((input_dim, input_dim),
|
||||
name='{}_W_carry'.format(self.name))
|
||||
|
||||
self.W = self.add_weight((input_dim, input_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
self.W_carry = self.add_weight((input_dim, input_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_carry'.format(self.name))
|
||||
if self.bias:
|
||||
self.b = K.zeros((input_dim,), name='{}_b'.format(self.name))
|
||||
# initialize with a vector of values `transform_bias`
|
||||
self.b_carry = K.variable(np.ones((input_dim,)) * self.transform_bias,
|
||||
name='{}_b_carry'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b, self.W_carry, self.b_carry]
|
||||
self.b = self.add_weight((input_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
self.b_carry = self.add_weight((input_dim,),
|
||||
initializer='one',
|
||||
name='{}_b_carry'.format(self.name))
|
||||
else:
|
||||
self.trainable_weights = [self.W, self.W_carry]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b_carry = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@@ -1078,7 +1127,6 @@ class Highway(Layer):
|
||||
|
||||
def get_config(self):
|
||||
config = {'init': self.init.__name__,
|
||||
'transform_bias': self.transform_bias,
|
||||
'activation': self.activation.__name__,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
@@ -1092,7 +1140,8 @@ class Highway(Layer):
|
||||
|
||||
|
||||
class TimeDistributedDense(Layer):
|
||||
'''Apply a same Dense layer for each dimension[1] (time_dimension) input.
|
||||
"""Apply a same Dense layer for each dimension[1] (time_dimension) input.
|
||||
|
||||
Especially useful after a recurrent network with 'return_sequence=True'.
|
||||
|
||||
Note: this layer is deprecated, prefer using the `TimeDistributed` wrapper:
|
||||
@@ -1131,21 +1180,31 @@ class TimeDistributedDense(Layer):
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer).
|
||||
This argument (or alternatively, the keyword argument `input_shape`)
|
||||
bias: whether to include a bias
|
||||
(i.e. make the layer affine rather than linear).
|
||||
input_dim: dimensionality of the input (integer). This argument
|
||||
(or alternatively, the keyword argument `input_shape`)
|
||||
is required when using this layer as the first layer in a model.
|
||||
input_length: length of inputs sequences
|
||||
(integer, or None for variable-length sequences).
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
warnings.warn('TimeDistributedDense is deprecated, '
|
||||
'please use TimeDistributed(Dense(...)) instead.')
|
||||
init='glorot_uniform',
|
||||
activation=None,
|
||||
weights=None,
|
||||
W_regularizer=None,
|
||||
b_regularizer=None,
|
||||
activity_regularizer=None,
|
||||
W_constraint=None,
|
||||
b_constraint=None,
|
||||
bias=True,
|
||||
input_dim=None,
|
||||
input_length=None,
|
||||
**kwargs):
|
||||
warnings.warn('`TimeDistributedDense` is deprecated, '
|
||||
'And will be removed on May 1st, 2017. '
|
||||
'Please use a `Dense` layer instead.')
|
||||
self.output_dim = output_dim
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
@@ -1173,31 +1232,19 @@ class TimeDistributedDense(Layer):
|
||||
shape=(None,) + input_shape[1:])]
|
||||
input_dim = input_shape[2]
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.W = self.add_weight((input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@@ -1211,17 +1258,17 @@ class TimeDistributedDense(Layer):
|
||||
input_shape = self.input_spec[0].shape
|
||||
# x has shape (samples, timesteps, input_dim)
|
||||
input_length = input_shape[1]
|
||||
# Note: input_length should always be provided when using tensorflow backend.
|
||||
if not input_length:
|
||||
if hasattr(K, 'int_shape'):
|
||||
input_length = K.int_shape(x)[1]
|
||||
if not input_length:
|
||||
raise Exception(
|
||||
'Layer ' + self.name +
|
||||
' requires to know the length of its input, '
|
||||
'but it could not be inferred automatically. '
|
||||
'Specify it manually by passing an input_shape '
|
||||
'argument to the first layer in your model.')
|
||||
raise ValueError('Layer ' + self.name +
|
||||
' requires to know the length '
|
||||
'of its input, but it could not '
|
||||
'be inferred automatically. '
|
||||
'Specify it manually by passing '
|
||||
'an input_shape argument to '
|
||||
'the first layer in your model.')
|
||||
else:
|
||||
input_length = K.shape(x)[1]
|
||||
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .. import backend as K
|
||||
from .. import initializations, regularizers, constraints
|
||||
from .. import initializations
|
||||
from .. import regularizers
|
||||
from .. import constraints
|
||||
from ..engine import Layer
|
||||
|
||||
|
||||
class Embedding(Layer):
|
||||
'''Turn positive integers (indexes) into dense vectors of fixed size.
|
||||
"""Turn positive integers (indexes) into dense vectors of fixed size.
|
||||
eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]
|
||||
|
||||
This layer can only be used as the first layer in a model.
|
||||
@@ -62,8 +64,7 @@ class Embedding(Layer):
|
||||
|
||||
# References
|
||||
- [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
|
||||
'''
|
||||
input_ndim = 2
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, output_dim,
|
||||
init='uniform', input_length=None,
|
||||
@@ -91,22 +92,11 @@ class Embedding(Layer):
|
||||
super(Embedding, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.W = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
self.W = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
|
||||
+54
-65
@@ -2,13 +2,19 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from keras import backend as K
|
||||
from keras.layers import activations, initializations, regularizers, constraints
|
||||
from keras.engine import Layer, InputSpec
|
||||
from keras.layers import activations
|
||||
from keras.layers import initializations
|
||||
from keras.layers import regularizers
|
||||
from keras.layers import constraints
|
||||
from keras.engine import Layer
|
||||
from keras.engine import InputSpec
|
||||
from ..utils.np_utils import conv_output_length
|
||||
|
||||
|
||||
class LocallyConnected1D(Layer):
|
||||
'''The `LocallyConnected1D` layer works similarly to
|
||||
"""Locally-connected layer for 1D inputs.
|
||||
|
||||
The `LocallyConnected1D` layer works similarly to
|
||||
the `Convolution1D` layer, except that weights are unshared,
|
||||
that is, a different set of filters is applied at each different patch
|
||||
of the input.
|
||||
@@ -73,7 +79,8 @@ class LocallyConnected1D(Layer):
|
||||
# Output shape
|
||||
3D tensor with shape: `(samples, new_steps, nb_filter)`.
|
||||
`steps` value might have changed due to padding.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, filter_length,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample_length=1,
|
||||
@@ -81,8 +88,8 @@ class LocallyConnected1D(Layer):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
if border_mode != 'valid':
|
||||
raise Exception('Invalid border mode for LocallyConnected1D '
|
||||
'(only "valid" is supported):', border_mode)
|
||||
raise ValueError('Invalid border mode for LocallyConnected1D '
|
||||
'(only "valid" is supported):', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.filter_length = filter_length
|
||||
self.init = initializations.get(init, dim_ordering='th')
|
||||
@@ -110,31 +117,22 @@ class LocallyConnected1D(Layer):
|
||||
def build(self, input_shape):
|
||||
input_dim = input_shape[2]
|
||||
_, output_length, nb_filter = self.get_output_shape_for(input_shape)
|
||||
|
||||
self.W_shape = (output_length, self.filter_length * input_dim, nb_filter)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.W_shape = (output_length,
|
||||
self.filter_length * input_dim,
|
||||
nb_filter)
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((output_length, self.nb_filter), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((output_length, self.nb_filter),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@@ -187,7 +185,9 @@ class LocallyConnected1D(Layer):
|
||||
|
||||
|
||||
class LocallyConnected2D(Layer):
|
||||
'''The `LocallyConnected2D` layer works similarly
|
||||
"""Locally-connected layer for 2D inputs.
|
||||
|
||||
The `LocallyConnected2D` layer works similarly
|
||||
to the `Convolution2D` layer, except that weights are unshared,
|
||||
that is, a different set of filters is applied at each
|
||||
different patch of the input.
|
||||
@@ -256,7 +256,8 @@ class LocallyConnected2D(Layer):
|
||||
or 4D tensor with shape:
|
||||
`(samples, new_rows, new_cols, nb_filter)` if dim_ordering='tf'.
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
@@ -267,8 +268,8 @@ class LocallyConnected2D(Layer):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if border_mode != 'valid':
|
||||
raise Exception('Invalid border mode for LocallyConnected2D '
|
||||
'(only "valid" is supported):', border_mode)
|
||||
raise ValueError('Invalid border mode for LocallyConnected2D '
|
||||
'(only "valid" is supported):', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
@@ -277,7 +278,8 @@ class LocallyConnected2D(Layer):
|
||||
|
||||
self.border_mode = border_mode
|
||||
self.subsample = tuple(subsample)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('`dim_ordering` must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
@@ -301,35 +303,26 @@ class LocallyConnected2D(Layer):
|
||||
_, output_row, output_col, nb_filter = output_shape
|
||||
input_filter = input_shape[3]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
self.output_row = output_row
|
||||
self.output_col = output_col
|
||||
self.W_shape = (output_row * output_col, self.nb_row * self.nb_col * input_filter, nb_filter)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
|
||||
self.W_shape = (output_row * output_col,
|
||||
self.nb_row * self.nb_col * input_filter,
|
||||
nb_filter)
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((output_row, output_col, nb_filter), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((output_row, output_col, nb_filter),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@@ -344,7 +337,7 @@ class LocallyConnected2D(Layer):
|
||||
rows = input_shape[1]
|
||||
cols = input_shape[2]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
rows = conv_output_length(rows, self.nb_row,
|
||||
self.border_mode, self.subsample[0])
|
||||
@@ -355,15 +348,13 @@ class LocallyConnected2D(Layer):
|
||||
return (input_shape[0], self.nb_filter, rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
stride_row, stride_col = self.subsample
|
||||
_, feature_dim, nb_filter = self.W_shape
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
if K._backend == 'theano':
|
||||
if K.backend() == 'theano':
|
||||
output = []
|
||||
for i in range(self.output_row):
|
||||
for j in range(self.output_col):
|
||||
@@ -401,15 +392,13 @@ class LocallyConnected2D(Layer):
|
||||
output = K.reshape(output, (self.output_row, self.output_col, -1, nb_filter))
|
||||
output = K.permute_dimensions(output, (2, 0, 1, 3))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
if self.bias:
|
||||
if self.dim_ordering == 'th':
|
||||
output += K.reshape(self.b, (1, nb_filter, self.output_row, self.output_col))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, self.output_row, self.output_col, nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
+12
-8
@@ -5,9 +5,10 @@ import numpy as np
|
||||
|
||||
|
||||
class GaussianNoise(Layer):
|
||||
'''Apply to the input an additive zero-centered Gaussian noise with
|
||||
standard deviation `sigma`. This is useful to mitigate overfitting
|
||||
(you could see it as a kind of random data augmentation).
|
||||
"""Apply additive zero-centered Gaussian noise.
|
||||
|
||||
This is useful to mitigate overfitting
|
||||
(you could see it as a form of random data augmentation).
|
||||
Gaussian Noise (GS) is a natural choice as corruption process
|
||||
for real valued inputs.
|
||||
|
||||
@@ -23,7 +24,8 @@ class GaussianNoise(Layer):
|
||||
|
||||
# Output shape
|
||||
Same shape as input.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, sigma, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.sigma = sigma
|
||||
@@ -43,13 +45,14 @@ class GaussianNoise(Layer):
|
||||
|
||||
|
||||
class GaussianDropout(Layer):
|
||||
'''Apply to the input an multiplicative one-centered Gaussian noise
|
||||
with standard deviation `sqrt(p/(1-p))`.
|
||||
"""Apply multiplicative 1-centered Gaussian noise.
|
||||
|
||||
As it is a regularization layer, it is only active at training time.
|
||||
|
||||
# Arguments
|
||||
p: float, drop probability (as with `Dropout`).
|
||||
The multiplicative noise will have
|
||||
standard deviation `sqrt(p / (1 - p))`.
|
||||
|
||||
# Input shape
|
||||
Arbitrary. Use the keyword argument `input_shape`
|
||||
@@ -60,8 +63,9 @@ class GaussianDropout(Layer):
|
||||
Same shape as input.
|
||||
|
||||
# References
|
||||
[Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
|
||||
'''
|
||||
- [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
|
||||
"""
|
||||
|
||||
def __init__(self, p, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.p = p
|
||||
|
||||
@@ -4,12 +4,15 @@ from .. import backend as K
|
||||
|
||||
|
||||
class BatchNormalization(Layer):
|
||||
'''Normalize the activations of the previous layer at each batch,
|
||||
"""Batch normalization layer (Ioffe and Szegedy, 2014).
|
||||
|
||||
Normalize the activations of the previous layer at each batch,
|
||||
i.e. applies a transformation that maintains the mean activation
|
||||
close to 0 and the activation standard deviation close to 1.
|
||||
|
||||
# Arguments
|
||||
epsilon: small float > 0. Fuzz parameter.
|
||||
Theano expects epsilon >= 1e-5.
|
||||
mode: integer, 0, 1 or 2.
|
||||
- 0: feature-wise normalization.
|
||||
Each feature map in the input will
|
||||
@@ -58,9 +61,10 @@ class BatchNormalization(Layer):
|
||||
Same shape as input.
|
||||
|
||||
# References
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](http://jmlr.org/proceedings/papers/v37/ioffe15.pdf)
|
||||
'''
|
||||
def __init__(self, epsilon=1e-5, mode=0, axis=-1, momentum=0.99,
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](https://arxiv.org/abs/1502.03167)
|
||||
"""
|
||||
|
||||
def __init__(self, epsilon=1e-3, mode=0, axis=-1, momentum=0.99,
|
||||
weights=None, beta_init='zero', gamma_init='one',
|
||||
gamma_regularizer=None, beta_regularizer=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
@@ -81,24 +85,20 @@ class BatchNormalization(Layer):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
shape = (input_shape[self.axis],)
|
||||
|
||||
self.gamma = self.gamma_init(shape, name='{}_gamma'.format(self.name))
|
||||
self.beta = self.beta_init(shape, name='{}_beta'.format(self.name))
|
||||
self.trainable_weights = [self.gamma, self.beta]
|
||||
|
||||
self.regularizers = []
|
||||
if self.gamma_regularizer:
|
||||
self.gamma_regularizer.set_param(self.gamma)
|
||||
self.regularizers.append(self.gamma_regularizer)
|
||||
|
||||
if self.beta_regularizer:
|
||||
self.beta_regularizer.set_param(self.beta)
|
||||
self.regularizers.append(self.beta_regularizer)
|
||||
|
||||
self.running_mean = K.zeros(shape,
|
||||
name='{}_running_mean'.format(self.name))
|
||||
self.running_std = K.ones(shape,
|
||||
name='{}_running_std'.format(self.name))
|
||||
self.non_trainable_weights = [self.running_mean, self.running_std]
|
||||
self.gamma = self.add_weight(shape,
|
||||
initializer=self.gamma_init,
|
||||
regularizer=self.gamma_regularizer,
|
||||
name='{}_gamma'.format(self.name))
|
||||
self.beta = self.add_weight(shape,
|
||||
initializer=self.beta_init,
|
||||
regularizer=self.beta_regularizer,
|
||||
name='{}_beta'.format(self.name))
|
||||
self.running_mean = self.add_weight(shape, initializer='zero',
|
||||
name='{}_running_mean'.format(self.name),
|
||||
trainable=False)
|
||||
self.running_std = self.add_weight(shape, initializer='one',
|
||||
name='{}_running_std'.format(self.name),
|
||||
trainable=False)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@@ -108,27 +108,22 @@ class BatchNormalization(Layer):
|
||||
def call(self, x, mask=None):
|
||||
if self.mode == 0 or self.mode == 2:
|
||||
assert self.built, 'Layer must be built before being called'
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
|
||||
reduction_axes = list(range(len(input_shape)))
|
||||
del reduction_axes[self.axis]
|
||||
broadcast_shape = [1] * len(input_shape)
|
||||
broadcast_shape[self.axis] = input_shape[self.axis]
|
||||
|
||||
if self.mode == 2:
|
||||
x_normed, mean, std = K.normalize_batch_in_training(
|
||||
x, self.gamma, self.beta, reduction_axes,
|
||||
epsilon=self.epsilon)
|
||||
else:
|
||||
# mode 0
|
||||
x_normed, mean, std = K.normalize_batch_in_training(
|
||||
x, self.gamma, self.beta, reduction_axes,
|
||||
epsilon=self.epsilon)
|
||||
x_normed, mean, std = K.normalize_batch_in_training(
|
||||
x, self.gamma, self.beta, reduction_axes,
|
||||
epsilon=self.epsilon)
|
||||
|
||||
self.add_updates([K.moving_average_update(self.running_mean, mean, self.momentum),
|
||||
K.moving_average_update(self.running_std, std, self.momentum)], x)
|
||||
if self.mode == 0:
|
||||
self.add_update([K.moving_average_update(self.running_mean, mean, self.momentum),
|
||||
K.moving_average_update(self.running_std, std, self.momentum)], x)
|
||||
|
||||
if K.backend() == 'tensorflow' and sorted(reduction_axes) == range(K.ndim(x))[:-1]:
|
||||
if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
|
||||
x_normed_running = K.batch_normalization(
|
||||
x, self.running_mean, self.running_std,
|
||||
self.beta, self.gamma,
|
||||
|
||||
+63
-50
@@ -2,14 +2,14 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .. import backend as K
|
||||
from ..engine import Layer, InputSpec
|
||||
from ..engine import Layer
|
||||
from ..engine import InputSpec
|
||||
from ..utils.np_utils import conv_output_length
|
||||
|
||||
|
||||
class _Pooling1D(Layer):
|
||||
'''Abstract class for different pooling 1D layers.
|
||||
'''
|
||||
input_dim = 3
|
||||
"""Abstract class for different pooling 1D layers.
|
||||
"""
|
||||
|
||||
def __init__(self, pool_length=2, stride=None,
|
||||
border_mode='valid', **kwargs):
|
||||
@@ -20,7 +20,8 @@ class _Pooling1D(Layer):
|
||||
self.stride = stride
|
||||
self.st = (self.stride, 1)
|
||||
self.pool_size = (pool_length, 1)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise ValueError('`border_mode` must be in {valid, same}.')
|
||||
self.border_mode = border_mode
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
|
||||
@@ -29,7 +30,7 @@ class _Pooling1D(Layer):
|
||||
self.border_mode, self.stride)
|
||||
return (input_shape[0], length, input_shape[2])
|
||||
|
||||
def _pooling_function(self, back_end, inputs, pool_size, strides,
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -50,7 +51,7 @@ class _Pooling1D(Layer):
|
||||
|
||||
|
||||
class MaxPooling1D(_Pooling1D):
|
||||
'''Max pooling operation for temporal data.
|
||||
"""Max pooling operation for temporal data.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape: `(samples, steps, features)`.
|
||||
@@ -64,7 +65,7 @@ class MaxPooling1D(_Pooling1D):
|
||||
2 will halve the input.
|
||||
If None, it will default to `pool_length`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, pool_length=2, stride=None,
|
||||
border_mode='valid', **kwargs):
|
||||
@@ -79,7 +80,7 @@ class MaxPooling1D(_Pooling1D):
|
||||
|
||||
|
||||
class AveragePooling1D(_Pooling1D):
|
||||
'''Average pooling for temporal data.
|
||||
"""Average pooling for temporal data.
|
||||
|
||||
# Arguments
|
||||
pool_length: factor by which to downscale. 2 will halve the input.
|
||||
@@ -92,7 +93,7 @@ class AveragePooling1D(_Pooling1D):
|
||||
|
||||
# Output shape
|
||||
3D tensor with shape: `(samples, downsampled_steps, features)`.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, pool_length=2, stride=None,
|
||||
border_mode='valid', **kwargs):
|
||||
@@ -107,8 +108,8 @@ class AveragePooling1D(_Pooling1D):
|
||||
|
||||
|
||||
class _Pooling2D(Layer):
|
||||
'''Abstract class for different pooling 2D layers.
|
||||
'''
|
||||
"""Abstract class for different pooling 2D layers.
|
||||
"""
|
||||
|
||||
def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -119,9 +120,11 @@ class _Pooling2D(Layer):
|
||||
if strides is None:
|
||||
strides = self.pool_size
|
||||
self.strides = tuple(strides)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise ValueError('`border_mode` must be in {valid, same}.')
|
||||
self.border_mode = border_mode
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('`dim_ordering` must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=4)]
|
||||
|
||||
@@ -133,7 +136,7 @@ class _Pooling2D(Layer):
|
||||
rows = input_shape[1]
|
||||
cols = input_shape[2]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
rows = conv_output_length(rows, self.pool_size[0],
|
||||
self.border_mode, self.strides[0])
|
||||
@@ -144,15 +147,14 @@ class _Pooling2D(Layer):
|
||||
return (input_shape[0], input_shape[1], rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, input_shape[3])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
raise NotImplementedError
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = self._pooling_function(inputs=x, pool_size=self.pool_size,
|
||||
output = self._pooling_function(inputs=x,
|
||||
pool_size=self.pool_size,
|
||||
strides=self.strides,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering)
|
||||
@@ -168,7 +170,7 @@ class _Pooling2D(Layer):
|
||||
|
||||
|
||||
class MaxPooling2D(_Pooling2D):
|
||||
'''Max pooling operation for spatial data.
|
||||
"""Max pooling operation for spatial data.
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 2 integers,
|
||||
@@ -194,7 +196,7 @@ class MaxPooling2D(_Pooling2D):
|
||||
`(nb_samples, channels, pooled_rows, pooled_cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, pooled_rows, pooled_cols, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -204,12 +206,13 @@ class MaxPooling2D(_Pooling2D):
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
output = K.pool2d(inputs, pool_size, strides,
|
||||
border_mode, dim_ordering, pool_mode='max')
|
||||
border_mode, dim_ordering,
|
||||
pool_mode='max')
|
||||
return output
|
||||
|
||||
|
||||
class AveragePooling2D(_Pooling2D):
|
||||
'''Average pooling operation for spatial data.
|
||||
"""Average pooling operation for spatial data.
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 2 integers,
|
||||
@@ -235,7 +238,7 @@ class AveragePooling2D(_Pooling2D):
|
||||
`(nb_samples, channels, pooled_rows, pooled_cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, pooled_rows, pooled_cols, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -250,8 +253,8 @@ class AveragePooling2D(_Pooling2D):
|
||||
|
||||
|
||||
class _Pooling3D(Layer):
|
||||
'''Abstract class for different pooling 3D layers.
|
||||
'''
|
||||
"""Abstract class for different pooling 3D layers.
|
||||
"""
|
||||
|
||||
def __init__(self, pool_size=(2, 2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -262,9 +265,11 @@ class _Pooling3D(Layer):
|
||||
if strides is None:
|
||||
strides = self.pool_size
|
||||
self.strides = tuple(strides)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise ValueError('`border_mode` must be in {valid, same}.')
|
||||
self.border_mode = border_mode
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('`dim_ordering` must be in {tf, th}.')
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=5)]
|
||||
|
||||
@@ -278,7 +283,7 @@ class _Pooling3D(Layer):
|
||||
len_dim2 = input_shape[2]
|
||||
len_dim3 = input_shape[3]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
len_dim1 = conv_output_length(len_dim1, self.pool_size[0],
|
||||
self.border_mode, self.strides[0])
|
||||
@@ -286,13 +291,14 @@ class _Pooling3D(Layer):
|
||||
self.border_mode, self.strides[1])
|
||||
len_dim3 = conv_output_length(len_dim3, self.pool_size[2],
|
||||
self.border_mode, self.strides[2])
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], input_shape[1], len_dim1, len_dim2, len_dim3)
|
||||
return (input_shape[0],
|
||||
input_shape[1],
|
||||
len_dim1, len_dim2, len_dim3)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], len_dim1, len_dim2, len_dim3, input_shape[4])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
return (input_shape[0],
|
||||
len_dim1, len_dim2, len_dim3,
|
||||
input_shape[4])
|
||||
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
@@ -315,7 +321,7 @@ class _Pooling3D(Layer):
|
||||
|
||||
|
||||
class MaxPooling3D(_Pooling3D):
|
||||
'''Max pooling operation for 3D data (spatial or spatio-temporal).
|
||||
"""Max pooling operation for 3D data (spatial or spatio-temporal).
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 3 integers,
|
||||
@@ -340,7 +346,7 @@ class MaxPooling3D(_Pooling3D):
|
||||
`(nb_samples, channels, pooled_dim1, pooled_dim2, pooled_dim3)` if dim_ordering='th'
|
||||
or 5D tensor with shape:
|
||||
`(samples, pooled_dim1, pooled_dim2, pooled_dim3, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, pool_size=(2, 2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -355,7 +361,7 @@ class MaxPooling3D(_Pooling3D):
|
||||
|
||||
|
||||
class AveragePooling3D(_Pooling3D):
|
||||
'''Average pooling operation for 3D data (spatial or spatio-temporal).
|
||||
"""Average pooling operation for 3D data (spatial or spatio-temporal).
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 3 integers,
|
||||
@@ -380,7 +386,7 @@ class AveragePooling3D(_Pooling3D):
|
||||
`(nb_samples, channels, pooled_dim1, pooled_dim2, pooled_dim3)` if dim_ordering='th'
|
||||
or 5D tensor with shape:
|
||||
`(samples, pooled_dim1, pooled_dim2, pooled_dim3, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, pool_size=(2, 2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -390,11 +396,14 @@ class AveragePooling3D(_Pooling3D):
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
output = K.pool3d(inputs, pool_size, strides,
|
||||
border_mode, dim_ordering, pool_mode='avg')
|
||||
border_mode, dim_ordering,
|
||||
pool_mode='avg')
|
||||
return output
|
||||
|
||||
|
||||
class _GlobalPooling1D(Layer):
|
||||
"""Abstract class for different global pooling 1D layers.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(_GlobalPooling1D, self).__init__(**kwargs)
|
||||
@@ -408,34 +417,36 @@ class _GlobalPooling1D(Layer):
|
||||
|
||||
|
||||
class GlobalAveragePooling1D(_GlobalPooling1D):
|
||||
'''Global average pooling operation for temporal data.
|
||||
"""Global average pooling operation for temporal data.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape: `(samples, steps, features)`.
|
||||
|
||||
# Output shape
|
||||
2D tensor with shape: `(samples, features)`.
|
||||
'''
|
||||
"""
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.mean(x, axis=1)
|
||||
|
||||
|
||||
class GlobalMaxPooling1D(_GlobalPooling1D):
|
||||
'''Global max pooling operation for temporal data.
|
||||
"""Global max pooling operation for temporal data.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape: `(samples, steps, features)`.
|
||||
|
||||
# Output shape
|
||||
2D tensor with shape: `(samples, features)`.
|
||||
'''
|
||||
"""
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.max(x, axis=1)
|
||||
|
||||
|
||||
class _GlobalPooling2D(Layer):
|
||||
"""Abstract class for different global pooling 2D layers.
|
||||
"""
|
||||
|
||||
def __init__(self, dim_ordering='default', **kwargs):
|
||||
super(_GlobalPooling2D, self).__init__(**kwargs)
|
||||
@@ -460,7 +471,7 @@ class _GlobalPooling2D(Layer):
|
||||
|
||||
|
||||
class GlobalAveragePooling2D(_GlobalPooling2D):
|
||||
'''Global average pooling operation for spatial data.
|
||||
"""Global average pooling operation for spatial data.
|
||||
|
||||
# Arguments
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
@@ -478,7 +489,7 @@ class GlobalAveragePooling2D(_GlobalPooling2D):
|
||||
# Output shape
|
||||
2D tensor with shape:
|
||||
`(nb_samples, channels)`
|
||||
'''
|
||||
"""
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if self.dim_ordering == 'tf':
|
||||
@@ -488,7 +499,7 @@ class GlobalAveragePooling2D(_GlobalPooling2D):
|
||||
|
||||
|
||||
class GlobalMaxPooling2D(_GlobalPooling2D):
|
||||
'''Global max pooling operation for spatial data.
|
||||
"""Global max pooling operation for spatial data.
|
||||
|
||||
# Arguments
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
@@ -506,7 +517,7 @@ class GlobalMaxPooling2D(_GlobalPooling2D):
|
||||
# Output shape
|
||||
2D tensor with shape:
|
||||
`(nb_samples, channels)`
|
||||
'''
|
||||
"""
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if self.dim_ordering == 'tf':
|
||||
@@ -516,6 +527,8 @@ class GlobalMaxPooling2D(_GlobalPooling2D):
|
||||
|
||||
|
||||
class _GlobalPooling3D(Layer):
|
||||
"""Abstract class for different global pooling 3D layers.
|
||||
"""
|
||||
|
||||
def __init__(self, dim_ordering='default', **kwargs):
|
||||
super(_GlobalPooling3D, self).__init__(**kwargs)
|
||||
@@ -540,7 +553,7 @@ class _GlobalPooling3D(Layer):
|
||||
|
||||
|
||||
class GlobalAveragePooling3D(_GlobalPooling3D):
|
||||
'''Global Average pooling operation for 3D data.
|
||||
"""Global Average pooling operation for 3D data.
|
||||
|
||||
# Arguments
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
@@ -558,7 +571,7 @@ class GlobalAveragePooling3D(_GlobalPooling3D):
|
||||
# Output shape
|
||||
2D tensor with shape:
|
||||
`(nb_samples, channels)`
|
||||
'''
|
||||
"""
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if self.dim_ordering == 'tf':
|
||||
@@ -568,7 +581,7 @@ class GlobalAveragePooling3D(_GlobalPooling3D):
|
||||
|
||||
|
||||
class GlobalMaxPooling3D(_GlobalPooling3D):
|
||||
'''Global Max pooling operation for 3D data.
|
||||
"""Global Max pooling operation for 3D data.
|
||||
|
||||
# Arguments
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
@@ -586,7 +599,7 @@ class GlobalMaxPooling3D(_GlobalPooling3D):
|
||||
# Output shape
|
||||
2D tensor with shape:
|
||||
`(nb_samples, channels)`
|
||||
'''
|
||||
"""
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if self.dim_ordering == 'tf':
|
||||
|
||||
+199
-159
@@ -3,14 +3,30 @@ from __future__ import absolute_import
|
||||
import numpy as np
|
||||
|
||||
from .. import backend as K
|
||||
from .. import activations, initializations, regularizers
|
||||
from ..engine import Layer, InputSpec
|
||||
from .. import activations
|
||||
from .. import initializations
|
||||
from .. import regularizers
|
||||
from ..engine import Layer
|
||||
from ..engine import InputSpec
|
||||
|
||||
|
||||
def time_distributed_dense(x, w, b=None, dropout=None,
|
||||
input_dim=None, output_dim=None, timesteps=None):
|
||||
'''Apply y.w + b for every temporal slice y of x.
|
||||
'''
|
||||
"""Apply `y . w + b` for every temporal slice y of x.
|
||||
|
||||
# Arguments
|
||||
x: input tensor.
|
||||
w: weight matrix.
|
||||
b: optional bias vector.
|
||||
dropout: wether to apply dropout (same dropout mask
|
||||
for every temporal slice of the input).
|
||||
input_dim: integer; optional dimensionality of the input.
|
||||
output_dim: integer; optional dimensionality of the output.
|
||||
timesteps: integer; optional number of timesteps.
|
||||
|
||||
# Returns
|
||||
Output tensor.
|
||||
"""
|
||||
if not input_dim:
|
||||
input_dim = K.shape(x)[2]
|
||||
if not timesteps:
|
||||
@@ -29,10 +45,10 @@ def time_distributed_dense(x, w, b=None, dropout=None,
|
||||
x = K.reshape(x, (-1, input_dim))
|
||||
x = K.dot(x, w)
|
||||
if b:
|
||||
x = x + b
|
||||
x += b
|
||||
# reshape to 3D tensor
|
||||
if K.backend() == 'tensorflow':
|
||||
x = K.reshape(x, K.pack([-1, timesteps, output_dim]))
|
||||
x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
|
||||
x.set_shape([None, None, output_dim])
|
||||
else:
|
||||
x = K.reshape(x, (-1, timesteps, output_dim))
|
||||
@@ -40,7 +56,7 @@ def time_distributed_dense(x, w, b=None, dropout=None,
|
||||
|
||||
|
||||
class Recurrent(Layer):
|
||||
'''Abstract base class for recurrent layers.
|
||||
"""Abstract base class for recurrent layers.
|
||||
Do not use in a model -- it's not a valid layer!
|
||||
Use its children classes `LSTM`, `GRU` and `SimpleRNN` instead.
|
||||
|
||||
@@ -128,23 +144,24 @@ class Recurrent(Layer):
|
||||
# Note on using statefulness in RNNs
|
||||
You can set RNN layers to be 'stateful', which means that the states
|
||||
computed for the samples in one batch will be reused as initial states
|
||||
for the samples in the next batch.
|
||||
This assumes a one-to-one mapping between
|
||||
samples in different successive batches.
|
||||
for the samples in the next batch. This assumes a one-to-one mapping
|
||||
between samples in different successive batches.
|
||||
|
||||
To enable statefulness:
|
||||
- specify `stateful=True` in the layer constructor.
|
||||
- specify a fixed batch size for your model, by passing
|
||||
if sequential model:
|
||||
a `batch_input_shape=(...)` to the first layer in your model.
|
||||
`batch_input_shape=(...)` to the first layer in your model.
|
||||
else for functional model with 1 or more Input layers:
|
||||
a `batch_shape=(...)` to all the first layers in your model.
|
||||
`batch_shape=(...)` to all the first layers in your model.
|
||||
This is the expected shape of your inputs *including the batch size*.
|
||||
It should be a tuple of integers, e.g. `(32, 10, 100)`.
|
||||
- specify `shuffle=False` when calling fit().
|
||||
|
||||
To reset the states of your model, call `.reset_states()` on either
|
||||
a specific layer, or on your entire model.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, weights=None,
|
||||
return_sequences=False, go_backwards=False, stateful=False,
|
||||
unroll=False, consume_less='cpu',
|
||||
@@ -198,7 +215,7 @@ class Recurrent(Layer):
|
||||
# input shape: (nb_samples, time (padded with zeros), input_dim)
|
||||
# note that the .build() method of subclasses MUST define
|
||||
# self.input_spec with a complete input shape.
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
if self.unroll and input_shape[1] is None:
|
||||
raise ValueError('Cannot unroll a RNN if the '
|
||||
'time dimension is undefined. \n'
|
||||
@@ -229,7 +246,7 @@ class Recurrent(Layer):
|
||||
updates = []
|
||||
for i in range(len(states)):
|
||||
updates.append((self.states[i], states[i]))
|
||||
self.add_updates(updates, x)
|
||||
self.add_update(updates, x)
|
||||
|
||||
if self.return_sequences:
|
||||
return outputs
|
||||
@@ -253,7 +270,7 @@ class Recurrent(Layer):
|
||||
|
||||
|
||||
class SimpleRNN(Recurrent):
|
||||
'''Fully-connected RNN where the output is to be fed back to input.
|
||||
"""Fully-connected RNN where the output is to be fed back to input.
|
||||
|
||||
# Arguments
|
||||
output_dim: dimension of the internal projections and the final output.
|
||||
@@ -275,7 +292,8 @@ class SimpleRNN(Recurrent):
|
||||
|
||||
# References
|
||||
- [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
activation='tanh',
|
||||
@@ -288,7 +306,8 @@ class SimpleRNN(Recurrent):
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.U_regularizer = regularizers.get(U_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.dropout_W, self.dropout_U = dropout_W, dropout_U
|
||||
self.dropout_W = dropout_W
|
||||
self.dropout_U = dropout_U
|
||||
|
||||
if self.dropout_W or self.dropout_U:
|
||||
self.uses_learning_phase = True
|
||||
@@ -304,24 +323,18 @@ class SimpleRNN(Recurrent):
|
||||
input_dim = input_shape[2]
|
||||
self.input_dim = input_dim
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
self.b = K.zeros((self.output_dim,), name='{}_b'.format(self.name))
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
self.W = self.add_weight((input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.inner_init,
|
||||
name='{}_U'.format(self.name),
|
||||
regularizer=self.U_regularizer)
|
||||
self.b = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@@ -332,16 +345,16 @@ class SimpleRNN(Recurrent):
|
||||
assert self.stateful, 'Layer must be stateful.'
|
||||
input_shape = self.input_spec[0].shape
|
||||
if not input_shape[0]:
|
||||
raise Exception('If a RNN is stateful, it needs to know '
|
||||
'its batch size. Specify the batch size '
|
||||
'of your input tensors: \n'
|
||||
'- If using a Sequential model, '
|
||||
'specify the batch size by passing '
|
||||
'a `batch_input_shape` '
|
||||
'argument to your first layer.\n'
|
||||
'- If using the functional API, specify '
|
||||
'the time dimension by passing a '
|
||||
'`batch_shape` argument to your Input layer.')
|
||||
raise ValueError('If a RNN is stateful, it needs to know '
|
||||
'its batch size. Specify the batch size '
|
||||
'of your input tensors: \n'
|
||||
'- If using a Sequential model, '
|
||||
'specify the batch size by passing '
|
||||
'a `batch_input_shape` '
|
||||
'argument to your first layer.\n'
|
||||
'- If using the functional API, specify '
|
||||
'the time dimension by passing a '
|
||||
'`batch_shape` argument to your Input layer.')
|
||||
if hasattr(self, 'states'):
|
||||
K.set_value(self.states[0],
|
||||
np.zeros((input_shape[0], self.output_dim)))
|
||||
@@ -350,7 +363,7 @@ class SimpleRNN(Recurrent):
|
||||
|
||||
def preprocess_input(self, x):
|
||||
if self.consume_less == 'cpu':
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
input_dim = input_shape[2]
|
||||
timesteps = input_shape[1]
|
||||
return time_distributed_dense(x, self.W, self.b, self.dropout_W,
|
||||
@@ -382,7 +395,7 @@ class SimpleRNN(Recurrent):
|
||||
else:
|
||||
constants.append(K.cast_to_floatx(1.))
|
||||
if self.consume_less == 'cpu' and 0 < self.dropout_W < 1:
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.tile(ones, (1, int(input_dim)))
|
||||
@@ -407,7 +420,7 @@ class SimpleRNN(Recurrent):
|
||||
|
||||
|
||||
class GRU(Recurrent):
|
||||
'''Gated Recurrent Unit - Cho et al. 2014.
|
||||
"""Gated Recurrent Unit - Cho et al. 2014.
|
||||
|
||||
# Arguments
|
||||
output_dim: dimension of the internal projections and the final output.
|
||||
@@ -429,10 +442,11 @@ class GRU(Recurrent):
|
||||
dropout_U: float between 0 and 1. Fraction of the input units to drop for recurrent connections.
|
||||
|
||||
# References
|
||||
- [On the Properties of Neural Machine Translation: Encoder–Decoder Approaches](http://www.aclweb.org/anthology/W14-4012)
|
||||
- [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling](http://arxiv.org/pdf/1412.3555v1.pdf)
|
||||
- [On the Properties of Neural Machine Translation: Encoder-Decoder Approaches](https://arxiv.org/abs/1409.1259)
|
||||
- [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling](http://arxiv.org/abs/1412.3555v1)
|
||||
- [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
activation='tanh', inner_activation='hard_sigmoid',
|
||||
@@ -446,7 +460,8 @@ class GRU(Recurrent):
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.U_regularizer = regularizers.get(U_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.dropout_W, self.dropout_U = dropout_W, dropout_U
|
||||
self.dropout_W = dropout_W
|
||||
self.dropout_U = dropout_U
|
||||
|
||||
if self.dropout_W or self.dropout_U:
|
||||
self.uses_learning_phase = True
|
||||
@@ -463,57 +478,59 @@ class GRU(Recurrent):
|
||||
self.states = [None]
|
||||
|
||||
if self.consume_less == 'gpu':
|
||||
|
||||
self.W = self.init((self.input_dim, 3 * self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, 3 * self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
self.W = self.add_weight((self.input_dim, 3 * self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U = self.add_weight((self.output_dim, 3 * self.output_dim),
|
||||
initializer=self.inner_init,
|
||||
name='{}_U'.format(self.name),
|
||||
regularizer=self.U_regularizer)
|
||||
self.b = self.add_weight((self.output_dim * 3,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
else:
|
||||
|
||||
self.W_z = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_z'.format(self.name))
|
||||
self.U_z = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_z'.format(self.name))
|
||||
self.b_z = K.zeros((self.output_dim,), name='{}_b_z'.format(self.name))
|
||||
|
||||
self.W_r = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_r'.format(self.name))
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_r'.format(self.name))
|
||||
self.b_r = K.zeros((self.output_dim,), name='{}_b_r'.format(self.name))
|
||||
|
||||
self.W_h = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_h'.format(self.name))
|
||||
self.U_h = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_h'.format(self.name))
|
||||
self.b_h = K.zeros((self.output_dim,), name='{}_b_h'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W_z, self.U_z, self.b_z,
|
||||
self.W_r, self.U_r, self.b_r,
|
||||
self.W_h, self.U_h, self.b_h]
|
||||
|
||||
self.W_z = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_z'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_z = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_z'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_z = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_z'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_r = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_r'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_r = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_r'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_r = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_r'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_h = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_h'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_h = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_h'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_h = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_h'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W = K.concatenate([self.W_z, self.W_r, self.W_h])
|
||||
self.U = K.concatenate([self.U_z, self.U_r, self.U_h])
|
||||
self.b = K.concatenate([self.b_z, self.b_r, self.b_h])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
@@ -523,8 +540,9 @@ class GRU(Recurrent):
|
||||
assert self.stateful, 'Layer must be stateful.'
|
||||
input_shape = self.input_spec[0].shape
|
||||
if not input_shape[0]:
|
||||
raise Exception('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided (including batch size).')
|
||||
raise ValueError('If a RNN is stateful, a complete '
|
||||
'input_shape must be provided '
|
||||
'(including batch size).')
|
||||
if hasattr(self, 'states'):
|
||||
K.set_value(self.states[0],
|
||||
np.zeros((input_shape[0], self.output_dim)))
|
||||
@@ -533,7 +551,7 @@ class GRU(Recurrent):
|
||||
|
||||
def preprocess_input(self, x):
|
||||
if self.consume_less == 'cpu':
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
input_dim = input_shape[2]
|
||||
timesteps = input_shape[1]
|
||||
|
||||
@@ -578,7 +596,7 @@ class GRU(Recurrent):
|
||||
x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
|
||||
x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
|
||||
else:
|
||||
raise Exception('Unknown `consume_less` mode.')
|
||||
raise ValueError('Unknown `consume_less` mode.')
|
||||
z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
|
||||
r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))
|
||||
|
||||
@@ -597,7 +615,7 @@ class GRU(Recurrent):
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(3)])
|
||||
|
||||
if 0 < self.dropout_W < 1:
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.tile(ones, (1, int(input_dim)))
|
||||
@@ -623,7 +641,7 @@ class GRU(Recurrent):
|
||||
|
||||
|
||||
class LSTM(Recurrent):
|
||||
'''Long-Short Term Memory unit - Hochreiter 1997.
|
||||
"""Long-Short Term Memory unit - Hochreiter 1997.
|
||||
|
||||
For a step-by-step description of the algorithm, see
|
||||
[this tutorial](http://deeplearning.net/tutorial/lstm.html).
|
||||
@@ -653,9 +671,10 @@ class LSTM(Recurrent):
|
||||
# References
|
||||
- [Long short-term memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf) (original 1997 paper)
|
||||
- [Learning to forget: Continual prediction with LSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015)
|
||||
- [Supervised sequence labelling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf)
|
||||
- [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf)
|
||||
- [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
forget_bias_init='one', activation='tanh',
|
||||
@@ -671,7 +690,8 @@ class LSTM(Recurrent):
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.U_regularizer = regularizers.get(U_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.dropout_W, self.dropout_U = dropout_W, dropout_U
|
||||
self.dropout_W = dropout_W
|
||||
self.dropout_U = dropout_U
|
||||
|
||||
if self.dropout_W or self.dropout_U:
|
||||
self.uses_learning_phase = True
|
||||
@@ -688,63 +708,83 @@ class LSTM(Recurrent):
|
||||
self.states = [None, None]
|
||||
|
||||
if self.consume_less == 'gpu':
|
||||
self.W = self.init((self.input_dim, 4 * self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, 4 * self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
self.W = self.add_weight((self.input_dim, 4 * self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U = self.add_weight((self.output_dim, 4 * self.output_dim),
|
||||
initializer=self.inner_init,
|
||||
name='{}_U'.format(self.name),
|
||||
regularizer=self.U_regularizer)
|
||||
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
K.get_value(self.forget_bias_init((self.output_dim,))),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
def b_reg(shape, name=None):
|
||||
return K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
K.get_value(self.forget_bias_init((self.output_dim,))),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
self.b = self.add_weight((self.output_dim * 4,),
|
||||
initializer=b_reg,
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
else:
|
||||
self.W_i = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_i'.format(self.name))
|
||||
self.U_i = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_i'.format(self.name))
|
||||
self.b_i = K.zeros((self.output_dim,), name='{}_b_i'.format(self.name))
|
||||
|
||||
self.W_f = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_f'.format(self.name))
|
||||
self.U_f = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_f'.format(self.name))
|
||||
self.b_f = self.forget_bias_init((self.output_dim,),
|
||||
name='{}_b_f'.format(self.name))
|
||||
|
||||
self.W_c = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_c'.format(self.name))
|
||||
self.U_c = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_c'.format(self.name))
|
||||
self.b_c = K.zeros((self.output_dim,), name='{}_b_c'.format(self.name))
|
||||
|
||||
self.W_o = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_o'.format(self.name))
|
||||
self.U_o = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_o'.format(self.name))
|
||||
self.b_o = K.zeros((self.output_dim,), name='{}_b_o'.format(self.name))
|
||||
self.W_i = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_i'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_i = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_i'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_i = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_i'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_f = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_f'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_f = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_f'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_f = self.add_weight((self.output_dim,),
|
||||
initializer=self.forget_bias_init,
|
||||
name='{}_b_f'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_c = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_c'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_c = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_c'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_c = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_c'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_o = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_o'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_o = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_o'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_o = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_o'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
|
||||
self.trainable_weights = [self.W_i, self.U_i, self.b_i,
|
||||
self.W_c, self.U_c, self.b_c,
|
||||
self.W_f, self.U_f, self.b_f,
|
||||
self.W_o, self.U_o, self.b_o]
|
||||
|
||||
self.W = K.concatenate([self.W_i, self.W_f, self.W_c, self.W_o])
|
||||
self.U = K.concatenate([self.U_i, self.U_f, self.U_c, self.U_o])
|
||||
self.b = K.concatenate([self.b_i, self.b_f, self.b_c, self.b_o])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
@@ -754,8 +794,8 @@ class LSTM(Recurrent):
|
||||
assert self.stateful, 'Layer must be stateful.'
|
||||
input_shape = self.input_spec[0].shape
|
||||
if not input_shape[0]:
|
||||
raise Exception('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided (including batch size).')
|
||||
raise ValueError('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided (including batch size).')
|
||||
if hasattr(self, 'states'):
|
||||
K.set_value(self.states[0],
|
||||
np.zeros((input_shape[0], self.output_dim)))
|
||||
@@ -771,7 +811,7 @@ class LSTM(Recurrent):
|
||||
dropout = self.dropout_W
|
||||
else:
|
||||
dropout = 0
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
input_dim = input_shape[2]
|
||||
timesteps = input_shape[1]
|
||||
|
||||
@@ -817,7 +857,7 @@ class LSTM(Recurrent):
|
||||
x_c = K.dot(x * B_W[2], self.W_c) + self.b_c
|
||||
x_o = K.dot(x * B_W[3], self.W_o) + self.b_o
|
||||
else:
|
||||
raise Exception('Unknown `consume_less` mode.')
|
||||
raise ValueError('Unknown `consume_less` mode.')
|
||||
|
||||
i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i))
|
||||
f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f))
|
||||
@@ -838,7 +878,7 @@ class LSTM(Recurrent):
|
||||
constants.append([K.cast_to_floatx(1.) for _ in range(4)])
|
||||
|
||||
if 0 < self.dropout_W < 1:
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_shape = K.int_shape(x)
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.tile(ones, (1, int(input_dim)))
|
||||
|
||||
+61
-53
@@ -1,8 +1,11 @@
|
||||
from ..engine import Layer, InputSpec
|
||||
from ..engine import Layer
|
||||
from ..engine import InputSpec
|
||||
from .. import backend as K
|
||||
|
||||
|
||||
class Wrapper(Layer):
|
||||
"""Abstract wrapper base class.
|
||||
"""
|
||||
|
||||
def __init__(self, layer, **kwargs):
|
||||
self.layer = layer
|
||||
@@ -10,23 +13,14 @@ class Wrapper(Layer):
|
||||
super(Wrapper, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape=None):
|
||||
'''Assumes that self.layer is already set.
|
||||
Should be called at the end of .build() in the
|
||||
children classes.
|
||||
'''
|
||||
# Assumes that self.layer is already set.
|
||||
# Should be called at the end of .build() in the children classes.
|
||||
self.trainable_weights = getattr(self.layer, 'trainable_weights', [])
|
||||
self.non_trainable_weights = getattr(self.layer, 'non_trainable_weights', [])
|
||||
self.updates = getattr(self.layer, 'updates', [])
|
||||
self.regularizers = getattr(self.layer, 'regularizers', [])
|
||||
self.losses = getattr(self.layer, 'losses', [])
|
||||
self.constraints = getattr(self.layer, 'constraints', {})
|
||||
|
||||
# properly attribute the current layer to
|
||||
# regularizers that need access to it
|
||||
# (e.g. ActivityRegularizer).
|
||||
for regularizer in self.regularizers:
|
||||
if hasattr(regularizer, 'set_layer'):
|
||||
regularizer.set_layer(self)
|
||||
|
||||
def get_weights(self):
|
||||
weights = self.layer.get_weights()
|
||||
return weights
|
||||
@@ -48,18 +42,19 @@ class Wrapper(Layer):
|
||||
|
||||
|
||||
class TimeDistributed(Wrapper):
|
||||
"""This wrapper allows to apply a layer to every
|
||||
temporal slice of an input.
|
||||
"""This wrapper allows to apply a layer to every temporal slice of an input.
|
||||
|
||||
The input should be at least 3D,
|
||||
and the dimension of index one will be considered to be
|
||||
the temporal dimension.
|
||||
The input should be at least 3D, and the dimension of index one
|
||||
will be considered to be the temporal dimension.
|
||||
|
||||
Consider a batch of 32 samples, where each sample is a sequence of 10
|
||||
vectors of 16 dimensions. The batch input shape of the layer is then `(32, 10, 16)`
|
||||
(and the `input_shape`, not including the samples dimension, is `(10, 16)`).
|
||||
Consider a batch of 32 samples,
|
||||
where each sample is a sequence of 10 vectors of 16 dimensions.
|
||||
The batch input shape of the layer is then `(32, 10, 16)`,
|
||||
and the `input_shape`, not including the samples dimension, is `(10, 16)`.
|
||||
|
||||
You can then use `TimeDistributed` to apply a `Dense` layer
|
||||
to each of the 10 timesteps, independently:
|
||||
|
||||
You can then use `TimeDistributed` to apply a `Dense` layer to each of the 10 timesteps, independently:
|
||||
```python
|
||||
# as the first layer in a model
|
||||
model = Sequential()
|
||||
@@ -73,19 +68,22 @@ class TimeDistributed(Wrapper):
|
||||
|
||||
The output will then have shape `(32, 10, 8)`.
|
||||
|
||||
Note this is strictly equivalent to using `layers.core.TimeDistributedDense`.
|
||||
Note this is strictly equivalent to
|
||||
using `layers.core.TimeDistributedDense`.
|
||||
However what is different about `TimeDistributed`
|
||||
is that it can be used with arbitrary layers, not just `Dense`,
|
||||
for instance with a `Convolution2D` layer:
|
||||
|
||||
```python
|
||||
model = Sequential()
|
||||
model.add(TimeDistributed(Convolution2D(64, 3, 3), input_shape=(10, 3, 299, 299)))
|
||||
model.add(TimeDistributed(Convolution2D(64, 3, 3),
|
||||
input_shape=(10, 3, 299, 299)))
|
||||
```
|
||||
|
||||
# Arguments
|
||||
layer: a layer instance.
|
||||
"""
|
||||
|
||||
def __init__(self, layer, **kwargs):
|
||||
self.supports_masking = True
|
||||
super(TimeDistributed, self).__init__(layer, **kwargs)
|
||||
@@ -105,15 +103,15 @@ class TimeDistributed(Wrapper):
|
||||
timesteps = input_shape[1]
|
||||
return (child_output_shape[0], timesteps) + child_output_shape[1:]
|
||||
|
||||
def call(self, X, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
def call(self, inputs, mask=None):
|
||||
input_shape = K.int_shape(inputs)
|
||||
if input_shape[0]:
|
||||
# batch size matters, use rnn-based implementation
|
||||
def step(x, states):
|
||||
def step(x, _):
|
||||
output = self.layer.call(x)
|
||||
return output, []
|
||||
|
||||
_, outputs, _ = K.rnn(step, X,
|
||||
_, outputs, _ = K.rnn(step, inputs,
|
||||
initial_states=[],
|
||||
input_length=input_shape[1],
|
||||
unroll=False)
|
||||
@@ -124,19 +122,26 @@ class TimeDistributed(Wrapper):
|
||||
# we can go with reshape-based implementation for performance
|
||||
input_length = input_shape[1]
|
||||
if not input_length:
|
||||
input_length = K.shape(X)[1]
|
||||
X = K.reshape(X, (-1, ) + input_shape[2:]) # (nb_samples * timesteps, ...)
|
||||
y = self.layer.call(X) # (nb_samples * timesteps, ...)
|
||||
input_length = K.shape(inputs)[1]
|
||||
# (nb_samples * timesteps, ...)
|
||||
inputs = K.reshape(inputs, (-1,) + input_shape[2:])
|
||||
y = self.layer.call(inputs) # (nb_samples * timesteps, ...)
|
||||
# (nb_samples, timesteps, ...)
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
y = K.reshape(y, (-1, input_length) + output_shape[2:])
|
||||
|
||||
# Apply activity regularizer if any:
|
||||
if (hasattr(self.layer, 'activity_regularizer') and
|
||||
self.layer.activity_regularizer is not None):
|
||||
regularization_loss = self.layer.activity_regularizer(y)
|
||||
self.add_loss(regularization_loss, inputs)
|
||||
return y
|
||||
|
||||
|
||||
class Bidirectional(Wrapper):
|
||||
''' Bidirectional wrapper for RNNs.
|
||||
"""Bidirectional wrapper for RNNs.
|
||||
|
||||
# Arguments:
|
||||
# Arguments
|
||||
layer: `Recurrent` instance.
|
||||
merge_mode: Mode by which outputs of the
|
||||
forward and backward RNNs will be combined.
|
||||
@@ -144,7 +149,7 @@ class Bidirectional(Wrapper):
|
||||
If None, the outputs will not be combined,
|
||||
they will be returned as a list.
|
||||
|
||||
# Examples:
|
||||
# Examples
|
||||
|
||||
```python
|
||||
model = Sequential()
|
||||
@@ -154,7 +159,8 @@ class Bidirectional(Wrapper):
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
```
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, layer, merge_mode='concat', weights=None, **kwargs):
|
||||
if merge_mode not in ['sum', 'mul', 'ave', 'concat', None]:
|
||||
raise ValueError('Invalid merge mode. '
|
||||
@@ -194,21 +200,21 @@ class Bidirectional(Wrapper):
|
||||
elif self.merge_mode is None:
|
||||
return [self.forward_layer.get_output_shape_for(input_shape)] * 2
|
||||
|
||||
def call(self, X, mask=None):
|
||||
Y = self.forward_layer.call(X, mask)
|
||||
Y_rev = self.backward_layer.call(X, mask)
|
||||
def call(self, inputs, mask=None):
|
||||
y = self.forward_layer.call(inputs, mask)
|
||||
y_rev = self.backward_layer.call(inputs, mask)
|
||||
if self.return_sequences:
|
||||
Y_rev = K.reverse(Y_rev, 1)
|
||||
y_rev = K.reverse(y_rev, 1)
|
||||
if self.merge_mode == 'concat':
|
||||
return K.concatenate([Y, Y_rev])
|
||||
return K.concatenate([y, y_rev])
|
||||
elif self.merge_mode == 'sum':
|
||||
return Y + Y_rev
|
||||
return y + y_rev
|
||||
elif self.merge_mode == 'ave':
|
||||
return (Y + Y_rev) / 2
|
||||
return (y + y_rev) / 2
|
||||
elif self.merge_mode == 'mul':
|
||||
return Y * Y_rev
|
||||
return y * y_rev
|
||||
elif self.merge_mode is None:
|
||||
return [Y, Y_rev]
|
||||
return [y, y_rev]
|
||||
|
||||
def reset_states(self):
|
||||
self.forward_layer.reset_states()
|
||||
@@ -230,13 +236,15 @@ class Bidirectional(Wrapper):
|
||||
@property
|
||||
def trainable_weights(self):
|
||||
if hasattr(self.forward_layer, 'trainable_weights'):
|
||||
return self.forward_layer.trainable_weights + self.backward_layer.trainable_weights
|
||||
return (self.forward_layer.trainable_weights +
|
||||
self.backward_layer.trainable_weights)
|
||||
return []
|
||||
|
||||
@property
|
||||
def non_trainable_weights(self):
|
||||
if hasattr(self.forward_layer, 'non_trainable_weights'):
|
||||
return self.forward_layer.non_trainable_weights + self.backward_layer.non_trainable_weights
|
||||
return (self.forward_layer.non_trainable_weights +
|
||||
self.backward_layer.non_trainable_weights)
|
||||
return []
|
||||
|
||||
@property
|
||||
@@ -246,18 +254,18 @@ class Bidirectional(Wrapper):
|
||||
return []
|
||||
|
||||
@property
|
||||
def regularizers(self):
|
||||
if hasattr(self.forward_layer, 'regularizers'):
|
||||
return self.forward_layer.regularizers + self.backward_layer.regularizers
|
||||
def losses(self):
|
||||
if hasattr(self.forward_layer, 'losses'):
|
||||
return self.forward_layer.losses + self.backward_layer.losses
|
||||
return []
|
||||
|
||||
@property
|
||||
def constraints(self):
|
||||
_constraints = {}
|
||||
constraints = {}
|
||||
if hasattr(self.forward_layer, 'constraints'):
|
||||
_constraints.update(self.forward_layer.constraints)
|
||||
_constraints.update(self.backward_layer.constraints)
|
||||
return _constraints
|
||||
constraints.update(self.forward_layer.constraints)
|
||||
constraints.update(self.backward_layer.constraints)
|
||||
return constraints
|
||||
|
||||
def get_config(self):
|
||||
config = {"merge_mode": self.merge_mode}
|
||||
|
||||
+35
-67
@@ -1,135 +1,90 @@
|
||||
import numpy as np
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
|
||||
def binary_accuracy(y_true, y_pred):
|
||||
'''Calculates the mean accuracy rate across all predictions for binary
|
||||
classification problems.
|
||||
'''
|
||||
return K.mean(K.equal(y_true, K.round(y_pred)))
|
||||
|
||||
|
||||
def categorical_accuracy(y_true, y_pred):
|
||||
'''Calculates the mean accuracy rate across all predictions for
|
||||
multiclass classification problems.
|
||||
'''
|
||||
return K.mean(K.equal(K.argmax(y_true, axis=-1),
|
||||
K.argmax(y_pred, axis=-1)))
|
||||
K.argmax(y_pred, axis=-1)))
|
||||
|
||||
|
||||
def sparse_categorical_accuracy(y_true, y_pred):
|
||||
'''Same as categorical_accuracy, but useful when the predictions are for
|
||||
sparse targets.
|
||||
'''
|
||||
return K.mean(K.equal(K.max(y_true, axis=-1),
|
||||
K.cast(K.argmax(y_pred, axis=-1), K.floatx())))
|
||||
|
||||
|
||||
def top_k_categorical_accuracy(y_true, y_pred, k=5):
|
||||
'''Calculates the top-k categorical accuracy rate, i.e. success when the
|
||||
target class is within the top-k predictions provided.
|
||||
'''
|
||||
return K.mean(K.in_top_k(y_pred, K.argmax(y_true, axis=-1), k))
|
||||
|
||||
|
||||
def mean_squared_error(y_true, y_pred):
|
||||
'''Calculates the mean squared error (mse) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
return K.mean(K.square(y_pred - y_true))
|
||||
|
||||
|
||||
def mean_absolute_error(y_true, y_pred):
|
||||
'''Calculates the mean absolute error (mae) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
return K.mean(K.abs(y_pred - y_true))
|
||||
|
||||
|
||||
def mean_absolute_percentage_error(y_true, y_pred):
|
||||
'''Calculates the mean absolute percentage error (mape) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), np.inf))
|
||||
diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true),
|
||||
K.epsilon(),
|
||||
None))
|
||||
return 100. * K.mean(diff)
|
||||
|
||||
|
||||
def mean_squared_logarithmic_error(y_true, y_pred):
|
||||
'''Calculates the mean squared logarithmic error (msle) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.)
|
||||
second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.)
|
||||
first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.)
|
||||
second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.)
|
||||
return K.mean(K.square(first_log - second_log))
|
||||
|
||||
|
||||
def hinge(y_true, y_pred):
|
||||
'''Calculates the hinge loss, which is defined as
|
||||
`max(1 - y_true * y_pred, 0)`.
|
||||
'''
|
||||
return K.mean(K.maximum(1. - y_true * y_pred, 0.))
|
||||
|
||||
|
||||
def squared_hinge(y_true, y_pred):
|
||||
'''Calculates the squared value of the hinge loss.
|
||||
'''
|
||||
return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)))
|
||||
|
||||
|
||||
def categorical_crossentropy(y_true, y_pred):
|
||||
'''Calculates the cross-entropy value for multiclass classification
|
||||
problems. Note: Expects a binary class matrix instead of a vector
|
||||
of scalar classes.
|
||||
'''
|
||||
return K.mean(K.categorical_crossentropy(y_pred, y_true))
|
||||
|
||||
|
||||
def sparse_categorical_crossentropy(y_true, y_pred):
|
||||
'''Calculates the cross-entropy value for multiclass classification
|
||||
problems with sparse targets. Note: Expects an array of integer
|
||||
classes. Labels shape must have the same number of dimensions as
|
||||
output shape. If you get a shape error, add a length-1 dimension
|
||||
to labels.
|
||||
'''
|
||||
return K.mean(K.sparse_categorical_crossentropy(y_pred, y_true))
|
||||
|
||||
|
||||
def binary_crossentropy(y_true, y_pred):
|
||||
'''Calculates the cross-entropy value for binary classification
|
||||
problems.
|
||||
'''
|
||||
return K.mean(K.binary_crossentropy(y_pred, y_true))
|
||||
|
||||
|
||||
def kullback_leibler_divergence(y_true, y_pred):
|
||||
'''Calculates the Kullback-Leibler (KL) divergence between prediction
|
||||
and target values.
|
||||
'''
|
||||
y_true = K.clip(y_true, K.epsilon(), 1)
|
||||
y_pred = K.clip(y_pred, K.epsilon(), 1)
|
||||
return K.sum(y_true * K.log(y_true / y_pred), axis=-1)
|
||||
return K.mean(K.sum(y_true * K.log(y_true / y_pred), axis=-1))
|
||||
|
||||
|
||||
def poisson(y_true, y_pred):
|
||||
'''Calculates the poisson function over prediction and target values.
|
||||
'''
|
||||
return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()))
|
||||
|
||||
|
||||
def cosine_proximity(y_true, y_pred):
|
||||
'''Calculates the cosine similarity between the prediction and target
|
||||
values.
|
||||
'''
|
||||
y_true = K.l2_normalize(y_true, axis=-1)
|
||||
y_pred = K.l2_normalize(y_pred, axis=-1)
|
||||
return -K.mean(y_true * y_pred)
|
||||
|
||||
|
||||
def matthews_correlation(y_true, y_pred):
|
||||
'''Calculates the Matthews correlation coefficient measure for quality
|
||||
"""Matthews correlation metric.
|
||||
|
||||
It is only computed as a batch-wise average, not globally.
|
||||
|
||||
Computes the Matthews correlation coefficient measure for quality
|
||||
of binary classification problems.
|
||||
'''
|
||||
"""
|
||||
y_pred_pos = K.round(K.clip(y_pred, 0, 1))
|
||||
y_pred_neg = 1 - y_pred_pos
|
||||
|
||||
@@ -149,9 +104,13 @@ def matthews_correlation(y_true, y_pred):
|
||||
|
||||
|
||||
def precision(y_true, y_pred):
|
||||
'''Calculates the precision, a metric for multi-label classification of
|
||||
"""Precision metric.
|
||||
|
||||
Only computes a batch-wise average of precision.
|
||||
|
||||
Computes the precision, a metric for multi-label classification of
|
||||
how many selected items are relevant.
|
||||
'''
|
||||
"""
|
||||
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
||||
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
|
||||
precision = true_positives / (predicted_positives + K.epsilon())
|
||||
@@ -159,9 +118,13 @@ def precision(y_true, y_pred):
|
||||
|
||||
|
||||
def recall(y_true, y_pred):
|
||||
'''Calculates the recall, a metric for multi-label classification of
|
||||
"""Recall metric.
|
||||
|
||||
Only computes a batch-wise average of recall.
|
||||
|
||||
Computes the recall, a metric for multi-label classification of
|
||||
how many relevant items are selected.
|
||||
'''
|
||||
"""
|
||||
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
||||
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
|
||||
recall = true_positives / (possible_positives + K.epsilon())
|
||||
@@ -169,7 +132,10 @@ def recall(y_true, y_pred):
|
||||
|
||||
|
||||
def fbeta_score(y_true, y_pred, beta=1):
|
||||
'''Calculates the F score, the weighted harmonic mean of precision and recall.
|
||||
"""Computes the F score.
|
||||
|
||||
The F score is the weighted harmonic mean of precision and recall.
|
||||
Here it is only computed as a batch-wise average, not globally.
|
||||
|
||||
This is useful for multi-label classification, where input samples can be
|
||||
classified as sets of labels. By only using accuracy (precision) a model
|
||||
@@ -182,10 +148,10 @@ def fbeta_score(y_true, y_pred, beta=1):
|
||||
With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning
|
||||
correct classes becomes more important, and with beta > 1 the metric is
|
||||
instead weighted towards penalizing incorrect class assignments.
|
||||
'''
|
||||
"""
|
||||
if beta < 0:
|
||||
raise ValueError('The lowest choosable beta is zero (only precision).')
|
||||
|
||||
|
||||
# If there are no true positives, fix the F score at 0 like sklearn.
|
||||
if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
|
||||
return 0
|
||||
@@ -198,8 +164,10 @@ def fbeta_score(y_true, y_pred, beta=1):
|
||||
|
||||
|
||||
def fmeasure(y_true, y_pred):
|
||||
'''Calculates the f-measure, the harmonic mean of precision and recall.
|
||||
'''
|
||||
"""Computes the f-measure, the harmonic mean of precision and recall.
|
||||
|
||||
Here it is only computed as a batch-wise average, not globally.
|
||||
"""
|
||||
return fbeta_score(y_true, y_pred, beta=1)
|
||||
|
||||
|
||||
|
||||
+130
-100
@@ -27,7 +27,7 @@ def save_model(model, filepath, overwrite=True):
|
||||
return obj.item()
|
||||
|
||||
# misc functions (e.g. loss function)
|
||||
if hasattr(obj, '__call__'):
|
||||
if callable(obj):
|
||||
return obj.__name__
|
||||
|
||||
# if obj is a python 'type'
|
||||
@@ -106,10 +106,12 @@ def save_model(model, filepath, overwrite=True):
|
||||
f.close()
|
||||
|
||||
|
||||
def load_model(filepath, custom_objects={}):
|
||||
def load_model(filepath, custom_objects=None):
|
||||
if not custom_objects:
|
||||
custom_objects = {}
|
||||
|
||||
def deserialize(obj):
|
||||
if type(obj) is list:
|
||||
if isinstance(obj, list):
|
||||
deserialized = []
|
||||
for value in obj:
|
||||
if value in custom_objects:
|
||||
@@ -117,7 +119,7 @@ def load_model(filepath, custom_objects={}):
|
||||
else:
|
||||
deserialized.append(value)
|
||||
return deserialized
|
||||
if type(obj) is dict:
|
||||
if isinstance(obj, dict):
|
||||
deserialized = {}
|
||||
for key, value in obj.items():
|
||||
if value in custom_objects:
|
||||
@@ -151,7 +153,8 @@ def load_model(filepath, custom_objects={}):
|
||||
return model
|
||||
training_config = json.loads(training_config.decode('utf-8'))
|
||||
optimizer_config = training_config['optimizer_config']
|
||||
optimizer = optimizer_from_config(optimizer_config)
|
||||
optimizer = optimizer_from_config(optimizer_config,
|
||||
custom_objects=custom_objects)
|
||||
|
||||
# recover loss functions and metrics
|
||||
loss = deserialize(training_config['loss'])
|
||||
@@ -181,28 +184,29 @@ def load_model(filepath, custom_objects={}):
|
||||
return model
|
||||
|
||||
|
||||
def model_from_config(config, custom_objects={}):
|
||||
def model_from_config(config, custom_objects=None):
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
if isinstance(config, list):
|
||||
raise Exception('`model_fom_config` expects a dictionary, not a list. '
|
||||
'Maybe you meant to use `Sequential.from_config(config)`?')
|
||||
raise TypeError('`model_fom_config` expects a dictionary, not a list. '
|
||||
'Maybe you meant to use '
|
||||
'`Sequential.from_config(config)`?')
|
||||
return layer_from_config(config, custom_objects=custom_objects)
|
||||
|
||||
|
||||
def model_from_yaml(yaml_string, custom_objects={}):
|
||||
'''Parses a yaml model configuration file
|
||||
def model_from_yaml(yaml_string, custom_objects=None):
|
||||
"""Parses a yaml model configuration file
|
||||
and returns a model instance.
|
||||
'''
|
||||
"""
|
||||
import yaml
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
config = yaml.load(yaml_string)
|
||||
return layer_from_config(config, custom_objects=custom_objects)
|
||||
|
||||
|
||||
def model_from_json(json_string, custom_objects={}):
|
||||
'''Parses a JSON model configuration file
|
||||
def model_from_json(json_string, custom_objects=None):
|
||||
"""Parses a JSON model configuration file
|
||||
and returns a model instance.
|
||||
'''
|
||||
"""
|
||||
import json
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
config = json.loads(json_string)
|
||||
@@ -210,7 +214,7 @@ def model_from_json(json_string, custom_objects={}):
|
||||
|
||||
|
||||
class Sequential(Model):
|
||||
'''Linear stack of layers.
|
||||
"""Linear stack of layers.
|
||||
|
||||
# Arguments
|
||||
layers: list of layers to add to the model.
|
||||
@@ -244,8 +248,9 @@ class Sequential(Model):
|
||||
model.add(Dense(32, batch_input_shape=(None, 500)))
|
||||
model.add(Dense(32))
|
||||
```
|
||||
'''
|
||||
def __init__(self, layers=[], name=None):
|
||||
"""
|
||||
|
||||
def __init__(self, layers=None, name=None):
|
||||
self.layers = [] # stack of layers
|
||||
self.model = None # internal Model instance
|
||||
self.inputs = [] # tensors
|
||||
@@ -263,27 +268,29 @@ class Sequential(Model):
|
||||
name = prefix + str(K.get_uid(prefix))
|
||||
self.name = name
|
||||
|
||||
for layer in layers:
|
||||
self.add(layer)
|
||||
if layers:
|
||||
for layer in layers:
|
||||
self.add(layer)
|
||||
|
||||
def add(self, layer):
|
||||
'''Adds a layer instance on top of the layer stack.
|
||||
"""Adds a layer instance on top of the layer stack.
|
||||
|
||||
# Arguments
|
||||
layer: layer instance.
|
||||
'''
|
||||
"""
|
||||
if not isinstance(layer, Layer):
|
||||
raise ValueError('The added layer must be '
|
||||
'an instance of class Layer. '
|
||||
'Found: ' + str(layer))
|
||||
raise TypeError('The added layer must be '
|
||||
'an instance of class Layer. '
|
||||
'Found: ' + str(layer))
|
||||
if not self.outputs:
|
||||
# first layer in model: check that it is an input layer
|
||||
if len(layer.inbound_nodes) == 0:
|
||||
# create an input layer
|
||||
if not hasattr(layer, 'batch_input_shape'):
|
||||
raise Exception('The first layer in a Sequential model must '
|
||||
'get an `input_shape` or '
|
||||
'`batch_input_shape` argument.')
|
||||
raise ValueError('The first layer in a '
|
||||
'Sequential model must '
|
||||
'get an `input_shape` or '
|
||||
'`batch_input_shape` argument.')
|
||||
batch_input_shape = layer.batch_input_shape
|
||||
if hasattr(layer, 'input_dtype'):
|
||||
input_dtype = layer.input_dtype
|
||||
@@ -292,17 +299,18 @@ class Sequential(Model):
|
||||
layer.create_input_layer(batch_input_shape, input_dtype)
|
||||
|
||||
if len(layer.inbound_nodes) != 1:
|
||||
raise Exception('A layer added to a Sequential model must '
|
||||
'not already be connected somewhere else. '
|
||||
'Model received layer ' + layer.name +
|
||||
' which has ' + str(len(layer.inbound_nodes)) +
|
||||
' pre-existing inbound connections.')
|
||||
raise ValueError('A layer added to a Sequential model must '
|
||||
'not already be connected somewhere else. '
|
||||
'Model received layer ' + layer.name +
|
||||
' which has ' +
|
||||
str(len(layer.inbound_nodes)) +
|
||||
' pre-existing inbound connections.')
|
||||
|
||||
if len(layer.inbound_nodes[0].output_tensors) != 1:
|
||||
raise Exception('All layers in a Sequential model '
|
||||
'should have a single output tensor. '
|
||||
'For multi-output layers, '
|
||||
'use the functional API.')
|
||||
raise ValueError('All layers in a Sequential model '
|
||||
'should have a single output tensor. '
|
||||
'For multi-output layers, '
|
||||
'use the functional API.')
|
||||
|
||||
self.outputs = [layer.inbound_nodes[0].output_tensors[0]]
|
||||
self.inputs = get_source_inputs(self.outputs[0])
|
||||
@@ -322,8 +330,8 @@ class Sequential(Model):
|
||||
output_shapes=[self.outputs[0]._keras_shape])
|
||||
else:
|
||||
output_tensor = layer(self.outputs[0])
|
||||
if type(output_tensor) is list:
|
||||
raise Exception('All layers in a Sequential model '
|
||||
if isinstance(output_tensor, list):
|
||||
raise TypeError('All layers in a Sequential model '
|
||||
'should have a single output tensor. '
|
||||
'For multi-output layers, '
|
||||
'use the functional API.')
|
||||
@@ -337,10 +345,10 @@ class Sequential(Model):
|
||||
self._flattened_layers = None
|
||||
|
||||
def pop(self):
|
||||
'''Removes the last layer in the model.
|
||||
'''
|
||||
"""Removes the last layer in the model.
|
||||
"""
|
||||
if not self.layers:
|
||||
raise Exception('There are no layers in the model.')
|
||||
raise TypeError('There are no layers in the model.')
|
||||
|
||||
self.layers.pop()
|
||||
if not self.layers:
|
||||
@@ -357,7 +365,7 @@ class Sequential(Model):
|
||||
self._flattened_layers = None
|
||||
|
||||
def get_layer(self, name=None, index=None):
|
||||
'''Returns a layer based on either its name (unique)
|
||||
"""Returns a layer based on either its name (unique)
|
||||
or its index in the graph. Indices are based on
|
||||
order of horizontal graph traversal (bottom-up).
|
||||
|
||||
@@ -367,7 +375,7 @@ class Sequential(Model):
|
||||
|
||||
# Returns
|
||||
A layer instance.
|
||||
'''
|
||||
"""
|
||||
if not self.built:
|
||||
self.build()
|
||||
return self.model.get_layer(name, index)
|
||||
@@ -379,10 +387,11 @@ class Sequential(Model):
|
||||
|
||||
def build(self, input_shape=None):
|
||||
if not self.inputs or not self.outputs:
|
||||
raise Exception('Sequential model cannot be built: model is empty.'
|
||||
raise TypeError('Sequential model cannot be built: model is empty.'
|
||||
' Add some layers first.')
|
||||
# actually create the model
|
||||
self.model = Model(self.inputs, self.outputs[0], name=self.name + '_model')
|
||||
self.model = Model(self.inputs, self.outputs[0],
|
||||
name=self.name + '_model')
|
||||
self.model.trainable = self.trainable
|
||||
|
||||
# mirror model attributes
|
||||
@@ -493,6 +502,13 @@ class Sequential(Model):
|
||||
def get_updates_for(self, inputs):
|
||||
return self.model.get_updates_for(inputs)
|
||||
|
||||
@property
|
||||
def losses(self):
|
||||
return self.model.losses
|
||||
|
||||
def get_losses_for(self, inputs):
|
||||
return self.model.get_losses_for(inputs)
|
||||
|
||||
@property
|
||||
def regularizers(self):
|
||||
# support for legacy behavior
|
||||
@@ -504,9 +520,9 @@ class Sequential(Model):
|
||||
return self._gather_dict_attr('constraints')
|
||||
|
||||
def get_weights(self):
|
||||
'''Returns the weights of the model,
|
||||
"""Returns the weights of the model,
|
||||
as a flat list of Numpy arrays.
|
||||
'''
|
||||
"""
|
||||
# support for legacy behavior
|
||||
weights = []
|
||||
for layer in self.flattened_layers:
|
||||
@@ -514,11 +530,11 @@ class Sequential(Model):
|
||||
return weights
|
||||
|
||||
def set_weights(self, weights):
|
||||
'''Sets the weights of the model.
|
||||
"""Sets the weights of the model.
|
||||
The `weights` argument should be a list
|
||||
of Numpy arrays with shapes and types matching
|
||||
the output of `model.get_weights()`.
|
||||
'''
|
||||
"""
|
||||
# support for legacy behavior
|
||||
for layer in self.flattened_layers:
|
||||
nb_param = len(layer.weights)
|
||||
@@ -534,10 +550,10 @@ class Sequential(Model):
|
||||
return self.model.training_data
|
||||
|
||||
def compile(self, optimizer, loss,
|
||||
metrics=[],
|
||||
metrics=None,
|
||||
sample_weight_mode=None,
|
||||
**kwargs):
|
||||
'''Configures the learning process.
|
||||
"""Configures the learning process.
|
||||
|
||||
# Arguments
|
||||
optimizer: str (name of optimizer) or optimizer object.
|
||||
@@ -563,7 +579,7 @@ class Sequential(Model):
|
||||
loss='categorical_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
```
|
||||
'''
|
||||
"""
|
||||
# create the underlying model
|
||||
self.build()
|
||||
# legacy kwarg support
|
||||
@@ -584,10 +600,10 @@ class Sequential(Model):
|
||||
self.metrics_names = self.model.metrics_names
|
||||
self.sample_weight_mode = self.model.sample_weight_mode
|
||||
|
||||
def fit(self, x, y, batch_size=32, nb_epoch=10, verbose=1, callbacks=[],
|
||||
def fit(self, x, y, batch_size=32, nb_epoch=10, verbose=1, callbacks=None,
|
||||
validation_split=0., validation_data=None, shuffle=True,
|
||||
class_weight=None, sample_weight=None, **kwargs):
|
||||
'''Trains the model for a fixed number of epochs.
|
||||
class_weight=None, sample_weight=None, initial_epoch=0, **kwargs):
|
||||
"""Trains the model for a fixed number of epochs.
|
||||
|
||||
# Arguments
|
||||
x: input data, as a Numpy array or list of Numpy arrays
|
||||
@@ -621,15 +637,18 @@ class Sequential(Model):
|
||||
to apply a different weight to every timestep of every sample.
|
||||
In this case you should make sure to specify
|
||||
sample_weight_mode="temporal" in compile().
|
||||
initial_epoch: epoch at which to start training
|
||||
(useful for resuming a previous training run)
|
||||
|
||||
# Returns
|
||||
A `History` object. Its `History.history` attribute is
|
||||
a record of training loss values and metrics values
|
||||
at successive epochs, as well as validation loss values
|
||||
and validation metrics values (if applicable).
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
@@ -638,7 +657,7 @@ class Sequential(Model):
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
raise TypeError('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.fit(x, y,
|
||||
batch_size=batch_size,
|
||||
@@ -649,11 +668,12 @@ class Sequential(Model):
|
||||
validation_data=validation_data,
|
||||
shuffle=shuffle,
|
||||
class_weight=class_weight,
|
||||
sample_weight=sample_weight)
|
||||
sample_weight=sample_weight,
|
||||
initial_epoch=initial_epoch)
|
||||
|
||||
def evaluate(self, x, y, batch_size=32, verbose=1,
|
||||
sample_weight=None, **kwargs):
|
||||
'''Computes the loss on some input data, batch by batch.
|
||||
"""Computes the loss on some input data, batch by batch.
|
||||
|
||||
# Arguments
|
||||
x: input data, as a Numpy array or list of Numpy arrays
|
||||
@@ -668,9 +688,10 @@ class Sequential(Model):
|
||||
or list of scalars (if the model computes other metrics).
|
||||
The attribute `model.metrics_names` will give you
|
||||
the display labels for the scalar outputs.
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
@@ -679,7 +700,7 @@ class Sequential(Model):
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
raise TypeError('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.evaluate(x, y,
|
||||
batch_size=batch_size,
|
||||
@@ -687,7 +708,7 @@ class Sequential(Model):
|
||||
sample_weight=sample_weight)
|
||||
|
||||
def predict(self, x, batch_size=32, verbose=0):
|
||||
'''Generates output predictions for the input samples,
|
||||
"""Generates output predictions for the input samples,
|
||||
processing the samples in a batched way.
|
||||
|
||||
# Arguments
|
||||
@@ -697,21 +718,21 @@ class Sequential(Model):
|
||||
|
||||
# Returns
|
||||
A Numpy array of predictions.
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
self.build()
|
||||
return self.model.predict(x, batch_size=batch_size, verbose=verbose)
|
||||
|
||||
def predict_on_batch(self, x):
|
||||
'''Returns predictions for a single batch of samples.
|
||||
'''
|
||||
"""Returns predictions for a single batch of samples.
|
||||
"""
|
||||
if self.model is None:
|
||||
self.build()
|
||||
return self.model.predict_on_batch(x)
|
||||
|
||||
def train_on_batch(self, x, y, class_weight=None,
|
||||
sample_weight=None, **kwargs):
|
||||
'''Single gradient update over one batch of samples.
|
||||
"""Single gradient update over one batch of samples.
|
||||
|
||||
# Arguments
|
||||
x: input data, as a Numpy array or list of Numpy arrays
|
||||
@@ -726,9 +747,10 @@ class Sequential(Model):
|
||||
or list of scalars (if the model computes other metrics).
|
||||
The attribute `model.metrics_names` will give you
|
||||
the display labels for the scalar outputs.
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
if 'accuracy' in kwargs:
|
||||
kwargs.pop('accuracy')
|
||||
warnings.warn('The "accuracy" argument is deprecated, '
|
||||
@@ -737,7 +759,7 @@ class Sequential(Model):
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
raise TypeError('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.train_on_batch(x, y,
|
||||
sample_weight=sample_weight,
|
||||
@@ -745,7 +767,7 @@ class Sequential(Model):
|
||||
|
||||
def test_on_batch(self, x, y,
|
||||
sample_weight=None, **kwargs):
|
||||
'''Evaluates the model over a single batch of samples.
|
||||
"""Evaluates the model over a single batch of samples.
|
||||
|
||||
# Arguments
|
||||
x: input data, as a Numpy array or list of Numpy arrays
|
||||
@@ -758,9 +780,10 @@ class Sequential(Model):
|
||||
or list of scalars (if the model computes other metrics).
|
||||
The attribute `model.metrics_names` will give you
|
||||
the display labels for the scalar outputs.
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
if 'accuracy' in kwargs:
|
||||
kwargs.pop('accuracy')
|
||||
warnings.warn('The "accuracy" argument is deprecated, '
|
||||
@@ -769,13 +792,13 @@ class Sequential(Model):
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
raise TypeError('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.test_on_batch(x, y,
|
||||
sample_weight=sample_weight)
|
||||
|
||||
def predict_proba(self, x, batch_size=32, verbose=1):
|
||||
'''Generates class probability predictions for the input samples
|
||||
"""Generates class probability predictions for the input samples
|
||||
batch by batch.
|
||||
|
||||
# Arguments
|
||||
@@ -786,7 +809,7 @@ class Sequential(Model):
|
||||
|
||||
# Returns
|
||||
A Numpy array of probability predictions.
|
||||
'''
|
||||
"""
|
||||
preds = self.predict(x, batch_size, verbose)
|
||||
if preds.min() < 0. or preds.max() > 1.:
|
||||
warnings.warn('Network returning invalid probability values. '
|
||||
@@ -796,7 +819,7 @@ class Sequential(Model):
|
||||
return preds
|
||||
|
||||
def predict_classes(self, x, batch_size=32, verbose=1):
|
||||
'''Generate class predictions for the input samples
|
||||
"""Generate class predictions for the input samples
|
||||
batch by batch.
|
||||
|
||||
# Arguments
|
||||
@@ -807,7 +830,7 @@ class Sequential(Model):
|
||||
|
||||
# Returns
|
||||
A numpy array of class predictions.
|
||||
'''
|
||||
"""
|
||||
proba = self.predict(x, batch_size=batch_size, verbose=verbose)
|
||||
if proba.shape[-1] > 1:
|
||||
return proba.argmax(axis=-1)
|
||||
@@ -815,11 +838,11 @@ class Sequential(Model):
|
||||
return (proba > 0.5).astype('int32')
|
||||
|
||||
def fit_generator(self, generator, samples_per_epoch, nb_epoch,
|
||||
verbose=1, callbacks=[],
|
||||
verbose=1, callbacks=None,
|
||||
validation_data=None, nb_val_samples=None,
|
||||
class_weight=None, max_q_size=10, nb_worker=1,
|
||||
pickle_safe=False, **kwargs):
|
||||
'''Fits the model on data generated batch-by-batch by
|
||||
pickle_safe=False, initial_epoch=0, **kwargs):
|
||||
"""Fits the model on data generated batch-by-batch by
|
||||
a Python generator.
|
||||
The generator is run in parallel to the model, for efficiency.
|
||||
For instance, this allows you to do real-time data augmentation
|
||||
@@ -854,6 +877,8 @@ class Sequential(Model):
|
||||
this implementation relies on multiprocessing, you should not pass
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
initial_epoch: epoch at which to start training
|
||||
(useful for resuming a previous training run)
|
||||
|
||||
# Returns
|
||||
A `History` object.
|
||||
@@ -874,11 +899,13 @@ class Sequential(Model):
|
||||
model.fit_generator(generate_arrays_from_file('/my_file.txt'),
|
||||
samples_per_epoch=10000, nb_epoch=10)
|
||||
```
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
if nb_worker > 1 and not pickle_safe:
|
||||
warnings.warn('The "nb_worker" argument is deprecated when pickle_safe is False')
|
||||
warnings.warn('The "nb_worker" argument is deprecated '
|
||||
'when pickle_safe is False')
|
||||
nb_worker = 1 # For backward compatibility
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
@@ -892,7 +919,7 @@ class Sequential(Model):
|
||||
warnings.warn('The "nb_val_worker" argument is deprecated, '
|
||||
'please remove it from your code.')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
raise TypeError('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.fit_generator(generator,
|
||||
samples_per_epoch,
|
||||
@@ -904,12 +931,13 @@ class Sequential(Model):
|
||||
class_weight=class_weight,
|
||||
max_q_size=max_q_size,
|
||||
nb_worker=nb_worker,
|
||||
pickle_safe=pickle_safe)
|
||||
pickle_safe=pickle_safe,
|
||||
initial_epoch=initial_epoch)
|
||||
|
||||
def evaluate_generator(self, generator, val_samples,
|
||||
max_q_size=10, nb_worker=1,
|
||||
pickle_safe=False, **kwargs):
|
||||
'''Evaluates the model on a data generator. The generator should
|
||||
"""Evaluates the model on a data generator. The generator should
|
||||
return the same kind of data as accepted by `test_on_batch`.
|
||||
|
||||
# Arguments
|
||||
@@ -925,11 +953,13 @@ class Sequential(Model):
|
||||
this implementation relies on multiprocessing, you should not pass non
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
if nb_worker > 1 and not pickle_safe:
|
||||
warnings.warn('The "nb_worker" argument is deprecated when pickle_safe is False')
|
||||
warnings.warn('The "nb_worker" argument is deprecated '
|
||||
'when pickle_safe is False')
|
||||
nb_worker = 1 # For backward compatibility
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
@@ -942,7 +972,7 @@ class Sequential(Model):
|
||||
kwargs.pop('verbose')
|
||||
warnings.warn('The "verbose" argument is deprecated.')
|
||||
if kwargs:
|
||||
raise Exception('Received unknown keyword arguments: ' +
|
||||
raise TypeError('Received unknown keyword arguments: ' +
|
||||
str(kwargs))
|
||||
return self.model.evaluate_generator(generator,
|
||||
val_samples,
|
||||
@@ -952,7 +982,7 @@ class Sequential(Model):
|
||||
|
||||
def predict_generator(self, generator, val_samples,
|
||||
max_q_size=10, nb_worker=1, pickle_safe=False):
|
||||
'''Generates predictions for the input samples from a data generator.
|
||||
"""Generates predictions for the input samples from a data generator.
|
||||
The generator should return the same kind of data as accepted by
|
||||
`predict_on_batch`.
|
||||
|
||||
@@ -969,11 +999,12 @@ class Sequential(Model):
|
||||
|
||||
# Returns
|
||||
A Numpy array of predictions.
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
self.build()
|
||||
if nb_worker > 1 and not pickle_safe:
|
||||
warnings.warn('The "nb_worker" argument is deprecated when pickle_safe is False')
|
||||
warnings.warn('The "nb_worker" argument is deprecated '
|
||||
'when pickle_safe is False')
|
||||
nb_worker = 1 # For backward compatibility
|
||||
return self.model.predict_generator(generator, val_samples,
|
||||
max_q_size=max_q_size,
|
||||
@@ -981,9 +1012,9 @@ class Sequential(Model):
|
||||
pickle_safe=pickle_safe)
|
||||
|
||||
def get_config(self):
|
||||
'''Returns the model configuration
|
||||
"""Returns the model configuration
|
||||
as a Python list.
|
||||
'''
|
||||
"""
|
||||
config = []
|
||||
if isinstance(self.layers[0], Merge):
|
||||
assert hasattr(self.layers[0], 'layers')
|
||||
@@ -1005,11 +1036,10 @@ class Sequential(Model):
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config, layer_cache=None):
|
||||
'''Supports legacy formats
|
||||
'''
|
||||
"""Supports legacy formats
|
||||
"""
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
from keras.layers import Merge
|
||||
assert type(config) is list
|
||||
|
||||
if not layer_cache:
|
||||
layer_cache = {}
|
||||
|
||||
+8
-11
@@ -1,5 +1,5 @@
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
@@ -13,13 +13,15 @@ def mean_absolute_error(y_true, y_pred):
|
||||
|
||||
|
||||
def mean_absolute_percentage_error(y_true, y_pred):
|
||||
diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), np.inf))
|
||||
diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true),
|
||||
K.epsilon(),
|
||||
None))
|
||||
return 100. * K.mean(diff, axis=-1)
|
||||
|
||||
|
||||
def mean_squared_logarithmic_error(y_true, y_pred):
|
||||
first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.)
|
||||
second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.)
|
||||
first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.)
|
||||
second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.)
|
||||
return K.mean(K.square(first_log - second_log), axis=-1)
|
||||
|
||||
|
||||
@@ -32,16 +34,10 @@ def hinge(y_true, y_pred):
|
||||
|
||||
|
||||
def categorical_crossentropy(y_true, y_pred):
|
||||
'''Expects a binary class matrix instead of a vector of scalar classes.
|
||||
'''
|
||||
return K.categorical_crossentropy(y_pred, y_true)
|
||||
|
||||
|
||||
def sparse_categorical_crossentropy(y_true, y_pred):
|
||||
'''expects an array of integer classes.
|
||||
Note: labels shape must have the same number of dimensions as output shape.
|
||||
If you get a shape error, add a length-1 dimension to labels.
|
||||
'''
|
||||
return K.sparse_categorical_crossentropy(y_pred, y_true)
|
||||
|
||||
|
||||
@@ -65,7 +61,8 @@ def cosine_proximity(y_true, y_pred):
|
||||
return -K.mean(y_true * y_pred, axis=-1)
|
||||
|
||||
|
||||
# aliases
|
||||
# Aliases.
|
||||
|
||||
mse = MSE = mean_squared_error
|
||||
mae = MAE = mean_absolute_error
|
||||
mape = MAPE = mean_absolute_percentage_error
|
||||
|
||||
+97
-55
@@ -1,8 +1,12 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from six.moves import zip
|
||||
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
from six.moves import zip
|
||||
import warnings
|
||||
|
||||
if K.backend() == 'tensorflow':
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def clip_norm(g, c, n):
|
||||
@@ -11,7 +15,20 @@ def clip_norm(g, c, n):
|
||||
return g
|
||||
|
||||
|
||||
def optimizer_from_config(config, custom_objects={}):
|
||||
def optimizer_from_config(config, custom_objects=None):
|
||||
"""Instantiate an optimizer given a config dictionary.
|
||||
|
||||
# Arguments
|
||||
config: Config dictionary
|
||||
(e.g. output of `optimizer.get_config()`).
|
||||
custom_objects: Optional dictionary of custom optimizer classes.
|
||||
|
||||
# Returns
|
||||
An optimizer instance.
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid optimizer config.
|
||||
"""
|
||||
all_classes = {
|
||||
'sgd': SGD,
|
||||
'rmsprop': RMSprop,
|
||||
@@ -23,7 +40,7 @@ def optimizer_from_config(config, custom_objects={}):
|
||||
'tfoptimizer': TFOptimizer,
|
||||
}
|
||||
class_name = config['class_name']
|
||||
if class_name in custom_objects:
|
||||
if custom_objects and class_name in custom_objects:
|
||||
cls = custom_objects[class_name]
|
||||
else:
|
||||
if class_name.lower() not in all_classes:
|
||||
@@ -33,7 +50,7 @@ def optimizer_from_config(config, custom_objects={}):
|
||||
|
||||
|
||||
class Optimizer(object):
|
||||
'''Abstract optimizer base class.
|
||||
"""Abstract optimizer base class.
|
||||
|
||||
Note: this is the parent class of all optimizers, not an actual optimizer
|
||||
that can be used for training models.
|
||||
@@ -44,12 +61,13 @@ class Optimizer(object):
|
||||
when their L2 norm exceeds this value.
|
||||
clipvalue: float >= 0. Gradients will be clipped
|
||||
when their absolute value exceeds this value.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
allowed_kwargs = {'clipnorm', 'clipvalue'}
|
||||
for k in kwargs:
|
||||
if k not in allowed_kwargs:
|
||||
raise Exception('Unexpected keyword argument '
|
||||
raise TypeError('Unexpected keyword argument '
|
||||
'passed to optimizer: ' + str(k))
|
||||
self.__dict__.update(kwargs)
|
||||
self.updates = []
|
||||
@@ -68,7 +86,7 @@ class Optimizer(object):
|
||||
return grads
|
||||
|
||||
def set_weights(self, weights):
|
||||
'''Sets the weights of the optimizer, from Numpy arrays.
|
||||
"""Sets the weights of the optimizer, from Numpy arrays.
|
||||
|
||||
Should only be called after computing the gradients
|
||||
(otherwise the optimizer has no weights).
|
||||
@@ -79,23 +97,28 @@ class Optimizer(object):
|
||||
number of the dimensions of the weights
|
||||
of the optimizer (i.e. it should match the
|
||||
output of `get_weights`).
|
||||
'''
|
||||
|
||||
# Raises
|
||||
ValueError: in case of incompatible weight shapes.
|
||||
"""
|
||||
params = self.weights
|
||||
weight_value_tuples = []
|
||||
param_values = K.batch_get_value(params)
|
||||
for pv, p, w in zip(param_values, params, weights):
|
||||
if pv.shape != w.shape:
|
||||
raise Exception('Optimizer weight shape ' +
|
||||
str(pv.shape) +
|
||||
' not compatible with '
|
||||
'provided weight shape ' + str(w.shape))
|
||||
raise ValueError('Optimizer weight shape ' +
|
||||
str(pv.shape) +
|
||||
' not compatible with '
|
||||
'provided weight shape ' + str(w.shape))
|
||||
weight_value_tuples.append((p, w))
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
|
||||
def get_weights(self):
|
||||
'''Returns the current weights of the optimizer,
|
||||
as a list of numpy arrays.
|
||||
'''
|
||||
"""Returns the current value of the weights of the optimizer.
|
||||
|
||||
# Returns
|
||||
A list of numpy arrays.
|
||||
"""
|
||||
return K.batch_get_value(self.weights)
|
||||
|
||||
def get_config(self):
|
||||
@@ -112,7 +135,9 @@ class Optimizer(object):
|
||||
|
||||
|
||||
class SGD(Optimizer):
|
||||
'''Stochastic gradient descent, with support for momentum,
|
||||
"""Stochastic gradient descent optimizer.
|
||||
|
||||
Includes support for momentum,
|
||||
learning rate decay, and Nesterov momentum.
|
||||
|
||||
# Arguments
|
||||
@@ -120,23 +145,24 @@ class SGD(Optimizer):
|
||||
momentum: float >= 0. Parameter updates momentum.
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
nesterov: boolean. Whether to apply Nesterov momentum.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=0.01, momentum=0., decay=0.,
|
||||
nesterov=False, **kwargs):
|
||||
super(SGD, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0.)
|
||||
self.lr = K.variable(lr)
|
||||
self.momentum = K.variable(momentum)
|
||||
self.decay = K.variable(decay)
|
||||
self.inital_decay = decay
|
||||
self.initial_decay = decay
|
||||
self.nesterov = nesterov
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = []
|
||||
|
||||
lr = self.lr
|
||||
if self.inital_decay > 0:
|
||||
if self.initial_decay > 0:
|
||||
lr *= (1. / (1. + self.decay * self.iterations))
|
||||
self.updates .append(K.update_add(self.iterations, 1))
|
||||
|
||||
@@ -171,7 +197,7 @@ class SGD(Optimizer):
|
||||
|
||||
|
||||
class RMSprop(Optimizer):
|
||||
'''RMSProp optimizer.
|
||||
"""RMSProp optimizer.
|
||||
|
||||
It is recommended to leave the parameters of this optimizer
|
||||
at their default values
|
||||
@@ -185,15 +211,16 @@ class RMSprop(Optimizer):
|
||||
rho: float >= 0.
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=0.001, rho=0.9, epsilon=1e-8, decay=0.,
|
||||
**kwargs):
|
||||
super(RMSprop, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
self.rho = K.variable(rho)
|
||||
self.epsilon = epsilon
|
||||
self.decay = K.variable(decay)
|
||||
self.inital_decay = decay
|
||||
self.initial_decay = decay
|
||||
self.iterations = K.variable(0.)
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
@@ -204,7 +231,7 @@ class RMSprop(Optimizer):
|
||||
self.updates = []
|
||||
|
||||
lr = self.lr
|
||||
if self.inital_decay > 0:
|
||||
if self.initial_decay > 0:
|
||||
lr *= (1. / (1. + self.decay * self.iterations))
|
||||
self.updates.append(K.update_add(self.iterations, 1))
|
||||
|
||||
@@ -231,7 +258,7 @@ class RMSprop(Optimizer):
|
||||
|
||||
|
||||
class Adagrad(Optimizer):
|
||||
'''Adagrad optimizer.
|
||||
"""Adagrad optimizer.
|
||||
|
||||
It is recommended to leave the parameters of this optimizer
|
||||
at their default values.
|
||||
@@ -239,16 +266,18 @@ class Adagrad(Optimizer):
|
||||
# Arguments
|
||||
lr: float >= 0. Learning rate.
|
||||
epsilon: float >= 0.
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
|
||||
# References
|
||||
- [Adaptive Subgradient Methods for Online Learning and Stochastic Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=0.01, epsilon=1e-8, decay=0., **kwargs):
|
||||
super(Adagrad, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
self.epsilon = epsilon
|
||||
self.decay = K.variable(decay)
|
||||
self.inital_decay = decay
|
||||
self.initial_decay = decay
|
||||
self.iterations = K.variable(0.)
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
@@ -259,7 +288,7 @@ class Adagrad(Optimizer):
|
||||
self.updates = []
|
||||
|
||||
lr = self.lr
|
||||
if self.inital_decay > 0:
|
||||
if self.initial_decay > 0:
|
||||
lr *= (1. / (1. + self.decay * self.iterations))
|
||||
self.updates.append(K.update_add(self.iterations, 1))
|
||||
|
||||
@@ -283,7 +312,7 @@ class Adagrad(Optimizer):
|
||||
|
||||
|
||||
class Adadelta(Optimizer):
|
||||
'''Adadelta optimizer.
|
||||
"""Adadelta optimizer.
|
||||
|
||||
It is recommended to leave the parameters of this optimizer
|
||||
at their default values.
|
||||
@@ -293,17 +322,20 @@ class Adadelta(Optimizer):
|
||||
It is recommended to leave it at the default value.
|
||||
rho: float >= 0.
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
|
||||
# References
|
||||
- [Adadelta - an adaptive learning rate method](http://arxiv.org/abs/1212.5701)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=1.0, rho=0.95, epsilon=1e-8, decay=0.,
|
||||
**kwargs):
|
||||
super(Adadelta, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
self.rho = rho
|
||||
self.epsilon = epsilon
|
||||
self.decay = K.variable(decay)
|
||||
self.inital_decay = decay
|
||||
self.initial_decay = decay
|
||||
self.iterations = K.variable(0.)
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
@@ -315,7 +347,7 @@ class Adadelta(Optimizer):
|
||||
self.updates = []
|
||||
|
||||
lr = self.lr
|
||||
if self.inital_decay > 0:
|
||||
if self.initial_decay > 0:
|
||||
lr *= (1. / (1. + self.decay * self.iterations))
|
||||
self.updates.append(K.update_add(self.iterations, 1))
|
||||
|
||||
@@ -349,39 +381,43 @@ class Adadelta(Optimizer):
|
||||
|
||||
|
||||
class Adam(Optimizer):
|
||||
'''Adam optimizer.
|
||||
"""Adam optimizer.
|
||||
|
||||
Default parameters follow those provided in the original paper.
|
||||
|
||||
# Arguments
|
||||
lr: float >= 0. Learning rate.
|
||||
beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1.
|
||||
beta_1: float, 0 < beta < 1. Generally close to 1.
|
||||
beta_2: float, 0 < beta < 1. Generally close to 1.
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
|
||||
# References
|
||||
- [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, decay=0., **kwargs):
|
||||
super(Adam, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0)
|
||||
self.lr = K.variable(lr)
|
||||
self.beta_1 = K.variable(beta_1)
|
||||
self.beta_2 = K.variable(beta_2)
|
||||
self.epsilon = epsilon
|
||||
self.decay = K.variable(decay)
|
||||
self.inital_decay = decay
|
||||
self.initial_decay = decay
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
lr = self.lr
|
||||
if self.inital_decay > 0:
|
||||
if self.initial_decay > 0:
|
||||
lr *= (1. / (1. + self.decay * self.iterations))
|
||||
|
||||
t = self.iterations + 1
|
||||
lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))
|
||||
lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
|
||||
(1. - K.pow(self.beta_1, t)))
|
||||
|
||||
shapes = [K.get_variable_shape(p) for p in params]
|
||||
ms = [K.zeros(shape) for shape in shapes]
|
||||
@@ -415,36 +451,38 @@ class Adam(Optimizer):
|
||||
|
||||
|
||||
class Adamax(Optimizer):
|
||||
'''Adamax optimizer from Adam paper's Section 7. It is a variant
|
||||
of Adam based on the infinity norm.
|
||||
"""Adamax optimizer from Adam paper's Section 7.
|
||||
|
||||
It is a variant of Adam based on the infinity norm.
|
||||
Default parameters follow those provided in the paper.
|
||||
|
||||
# Arguments
|
||||
lr: float >= 0. Learning rate.
|
||||
beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1.
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
|
||||
# References
|
||||
- [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, decay=0., **kwargs):
|
||||
super(Adamax, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0.)
|
||||
self.lr = K.variable(lr)
|
||||
self.beta_1 = K.variable(beta_1)
|
||||
self.beta_2 = K.variable(beta_2)
|
||||
self.epsilon = epsilon
|
||||
self.decay = K.variable(decay)
|
||||
self.inital_decay = decay
|
||||
self.initial_decay = decay
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
lr = self.lr
|
||||
if self.inital_decay > 0:
|
||||
if self.initial_decay > 0:
|
||||
lr *= (1. / (1. + self.decay * self.iterations))
|
||||
|
||||
t = self.iterations + 1
|
||||
@@ -485,8 +523,9 @@ class Adamax(Optimizer):
|
||||
|
||||
|
||||
class Nadam(Optimizer):
|
||||
'''
|
||||
Nesterov Adam optimizer: Much like Adam is essentially RMSprop with momentum,
|
||||
"""Nesterov Adam optimizer.
|
||||
|
||||
Much like Adam is essentially RMSprop with momentum,
|
||||
Nadam is Adam RMSprop with Nesterov momentum.
|
||||
|
||||
Default parameters follow those provided in the paper.
|
||||
@@ -501,16 +540,17 @@ class Nadam(Optimizer):
|
||||
# References
|
||||
- [Nadam report](http://cs229.stanford.edu/proj2015/054_report.pdf)
|
||||
- [On the importance of initialization and momentum in deep learning](http://www.cs.toronto.edu/~fritz/absps/momentum.pdf)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, schedule_decay=0.004, **kwargs):
|
||||
super(Nadam, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0.)
|
||||
self.m_schedule = K.variable(1.)
|
||||
self.lr = K.variable(lr)
|
||||
self.beta_1 = K.variable(beta_1)
|
||||
self.beta_2 = K.variable(beta_2)
|
||||
self.epsilon = epsilon
|
||||
self.schedule_decay = schedule_decay
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
@@ -565,6 +605,8 @@ class Nadam(Optimizer):
|
||||
|
||||
|
||||
class TFOptimizer(Optimizer):
|
||||
"""Wrapper class for native TensorFlow optimizers.
|
||||
"""
|
||||
|
||||
def __init__(self, optimizer):
|
||||
self.optimizer = optimizer
|
||||
@@ -594,7 +636,8 @@ class TFOptimizer(Optimizer):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
# aliases
|
||||
# Aliases.
|
||||
|
||||
sgd = SGD
|
||||
rmsprop = RMSprop
|
||||
adagrad = Adagrad
|
||||
@@ -607,7 +650,6 @@ nadam = Nadam
|
||||
def get(identifier, kwargs=None):
|
||||
if K.backend() == 'tensorflow':
|
||||
# Wrap TF optimizer instances
|
||||
import tensorflow as tf
|
||||
if isinstance(identifier, tf.train.Optimizer):
|
||||
return TFOptimizer(identifier)
|
||||
# Instantiate a Keras optimizer
|
||||
|
||||
+391
-162
@@ -1,7 +1,7 @@
|
||||
'''Fairly basic set of tools for real-time data augmentation on image data.
|
||||
"""Fairly basic set of tools for real-time data augmentation on image data.
|
||||
Can easily be extended to include new transformations,
|
||||
new preprocessing methods, etc...
|
||||
'''
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
@@ -12,26 +12,67 @@ import scipy.ndimage as ndi
|
||||
from six.moves import range
|
||||
import os
|
||||
import threading
|
||||
import warnings
|
||||
|
||||
from .. import backend as K
|
||||
|
||||
try:
|
||||
from PIL import Image as pil_image
|
||||
except ImportError:
|
||||
pil_image = None
|
||||
|
||||
def random_rotation(x, rg, row_index=1, col_index=2, channel_index=0,
|
||||
|
||||
def random_rotation(x, rg, row_axis=1, col_axis=2, channel_axis=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
"""Performs a random rotation of a Numpy image tensor.
|
||||
|
||||
# Arguments
|
||||
x: Input tensor. Must be 3D.
|
||||
rg: Rotation range, in degrees.
|
||||
row_axis: Index of axis for rows in the input tensor.
|
||||
col_axis: Index of axis for columns in the input tensor.
|
||||
channel_axis: Index of axis for channels in the input tensor.
|
||||
fill_mode: Points outside the boundaries of the input
|
||||
are filled according to the given mode
|
||||
(one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
|
||||
cval: Value used for points outside the boundaries
|
||||
of the input if `mode='constant'`.
|
||||
|
||||
# Returns
|
||||
Rotated Numpy image tensor.
|
||||
"""
|
||||
theta = np.pi / 180 * np.random.uniform(-rg, rg)
|
||||
rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
|
||||
[np.sin(theta), np.cos(theta), 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
h, w = x.shape[row_axis], x.shape[col_axis]
|
||||
transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_shift(x, wrg, hrg, row_index=1, col_index=2, channel_index=0,
|
||||
def random_shift(x, wrg, hrg, row_axis=1, col_axis=2, channel_axis=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
"""Performs a random spatial shift of a Numpy image tensor.
|
||||
|
||||
# Arguments
|
||||
x: Input tensor. Must be 3D.
|
||||
wrg: Width shift range, as a float fraction of the width.
|
||||
hrg: Height shift range, as a float fraction of the height.
|
||||
row_axis: Index of axis for rows in the input tensor.
|
||||
col_axis: Index of axis for columns in the input tensor.
|
||||
channel_axis: Index of axis for channels in the input tensor.
|
||||
fill_mode: Points outside the boundaries of the input
|
||||
are filled according to the given mode
|
||||
(one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
|
||||
cval: Value used for points outside the boundaries
|
||||
of the input if `mode='constant'`.
|
||||
|
||||
# Returns
|
||||
Shifted Numpy image tensor.
|
||||
"""
|
||||
h, w = x.shape[row_axis], x.shape[col_axis]
|
||||
tx = np.random.uniform(-hrg, hrg) * h
|
||||
ty = np.random.uniform(-wrg, wrg) * w
|
||||
translation_matrix = np.array([[1, 0, tx],
|
||||
@@ -39,28 +80,65 @@ def random_shift(x, wrg, hrg, row_index=1, col_index=2, channel_index=0,
|
||||
[0, 0, 1]])
|
||||
|
||||
transform_matrix = translation_matrix # no need to do offset
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_shear(x, intensity, row_index=1, col_index=2, channel_index=0,
|
||||
def random_shear(x, intensity, row_axis=1, col_axis=2, channel_axis=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
"""Performs a random spatial shear of a Numpy image tensor.
|
||||
|
||||
# Arguments
|
||||
x: Input tensor. Must be 3D.
|
||||
intensity: Transformation intensity.
|
||||
row_axis: Index of axis for rows in the input tensor.
|
||||
col_axis: Index of axis for columns in the input tensor.
|
||||
channel_axis: Index of axis for channels in the input tensor.
|
||||
fill_mode: Points outside the boundaries of the input
|
||||
are filled according to the given mode
|
||||
(one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
|
||||
cval: Value used for points outside the boundaries
|
||||
of the input if `mode='constant'`.
|
||||
|
||||
# Returns
|
||||
Sheared Numpy image tensor.
|
||||
"""
|
||||
shear = np.random.uniform(-intensity, intensity)
|
||||
shear_matrix = np.array([[1, -np.sin(shear), 0],
|
||||
[0, np.cos(shear), 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
h, w = x.shape[row_axis], x.shape[col_axis]
|
||||
transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_zoom(x, zoom_range, row_index=1, col_index=2, channel_index=0,
|
||||
def random_zoom(x, zoom_range, row_axis=1, col_axis=2, channel_axis=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
"""Performs a random spatial zoom of a Numpy image tensor.
|
||||
|
||||
# Arguments
|
||||
x: Input tensor. Must be 3D.
|
||||
zoom_range: Tuple of floats; zoom range for width and height.
|
||||
row_axis: Index of axis for rows in the input tensor.
|
||||
col_axis: Index of axis for columns in the input tensor.
|
||||
channel_axis: Index of axis for channels in the input tensor.
|
||||
fill_mode: Points outside the boundaries of the input
|
||||
are filled according to the given mode
|
||||
(one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
|
||||
cval: Value used for points outside the boundaries
|
||||
of the input if `mode='constant'`.
|
||||
|
||||
# Returns
|
||||
Zoomed Numpy image tensor.
|
||||
|
||||
# Raises
|
||||
ValueError: if `zoom_range` isn't a tuple.
|
||||
"""
|
||||
if len(zoom_range) != 2:
|
||||
raise Exception('zoom_range should be a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
raise ValueError('zoom_range should be a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
|
||||
if zoom_range[0] == 1 and zoom_range[1] == 1:
|
||||
zx, zy = 1, 1
|
||||
@@ -70,24 +148,19 @@ def random_zoom(x, zoom_range, row_index=1, col_index=2, channel_index=0,
|
||||
[0, zy, 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
h, w = x.shape[row_axis], x.shape[col_axis]
|
||||
transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_barrel_transform(x, intensity):
|
||||
# TODO
|
||||
pass
|
||||
|
||||
|
||||
def random_channel_shift(x, intensity, channel_index=0):
|
||||
x = np.rollaxis(x, channel_index, 0)
|
||||
def random_channel_shift(x, intensity, channel_axis=0):
|
||||
x = np.rollaxis(x, channel_axis, 0)
|
||||
min_x, max_x = np.min(x), np.max(x)
|
||||
channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x)
|
||||
for x_channel in x]
|
||||
x = np.stack(channel_images, axis=0)
|
||||
x = np.rollaxis(x, 0, channel_index+1)
|
||||
x = np.rollaxis(x, 0, channel_axis + 1)
|
||||
return x
|
||||
|
||||
|
||||
@@ -100,14 +173,14 @@ def transform_matrix_offset_center(matrix, x, y):
|
||||
return transform_matrix
|
||||
|
||||
|
||||
def apply_transform(x, transform_matrix, channel_index=0, fill_mode='nearest', cval=0.):
|
||||
x = np.rollaxis(x, channel_index, 0)
|
||||
def apply_transform(x, transform_matrix, channel_axis=0, fill_mode='nearest', cval=0.):
|
||||
x = np.rollaxis(x, channel_axis, 0)
|
||||
final_affine_matrix = transform_matrix[:2, :2]
|
||||
final_offset = transform_matrix[:2, 2]
|
||||
channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
|
||||
final_offset, order=0, mode=fill_mode, cval=cval) for x_channel in x]
|
||||
final_offset, order=0, mode=fill_mode, cval=cval) for x_channel in x]
|
||||
x = np.stack(channel_images, axis=0)
|
||||
x = np.rollaxis(x, 0, channel_index+1)
|
||||
x = np.rollaxis(x, 0, channel_axis + 1)
|
||||
return x
|
||||
|
||||
|
||||
@@ -119,33 +192,75 @@ def flip_axis(x, axis):
|
||||
|
||||
|
||||
def array_to_img(x, dim_ordering='default', scale=True):
|
||||
from PIL import Image
|
||||
"""Converts a 3D Numpy array to a PIL Image instance.
|
||||
|
||||
# Arguments
|
||||
x: Input Numpy array.
|
||||
dim_ordering: Image data format.
|
||||
scale: Whether to rescale image values
|
||||
to be within [0, 255].
|
||||
|
||||
# Returns
|
||||
A PIL Image instance.
|
||||
|
||||
# Raises
|
||||
ImportError: if PIL is not available.
|
||||
ValueError: if invalid `x` or `dim_ordering` is passed.
|
||||
"""
|
||||
if pil_image is None:
|
||||
raise ImportError('Could not import PIL.Image. '
|
||||
'The use of `array_to_img` requires PIL.')
|
||||
x = np.asarray(x)
|
||||
if x.ndim != 3:
|
||||
raise ValueError('Expected image array to have rank 3 (single image). '
|
||||
'Got array with shape:', x.shape)
|
||||
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
|
||||
# Original Numpy array x has format (height, width, channel)
|
||||
# or (channel, height, width)
|
||||
# but target PIL image has format (width, height, channel)
|
||||
if dim_ordering == 'th':
|
||||
x = x.transpose(1, 2, 0)
|
||||
if scale:
|
||||
x += max(-np.min(x), 0)
|
||||
x = x + max(-np.min(x), 0)
|
||||
x_max = np.max(x)
|
||||
if x_max != 0:
|
||||
x /= x_max
|
||||
x *= 255
|
||||
if x.shape[2] == 3:
|
||||
# RGB
|
||||
return Image.fromarray(x.astype('uint8'), 'RGB')
|
||||
return pil_image.fromarray(x.astype('uint8'), 'RGB')
|
||||
elif x.shape[2] == 1:
|
||||
# grayscale
|
||||
return Image.fromarray(x[:, :, 0].astype('uint8'), 'L')
|
||||
return pil_image.fromarray(x[:, :, 0].astype('uint8'), 'L')
|
||||
else:
|
||||
raise Exception('Unsupported channel number: ', x.shape[2])
|
||||
raise ValueError('Unsupported channel number: ', x.shape[2])
|
||||
|
||||
|
||||
def img_to_array(img, dim_ordering='default'):
|
||||
"""Converts a PIL Image instance to a Numpy array.
|
||||
|
||||
# Arguments
|
||||
img: PIL Image instance.
|
||||
dim_ordering: Image data format.
|
||||
|
||||
# Returns
|
||||
A 3D Numpy array (float32).
|
||||
|
||||
# Raises
|
||||
ValueError: if invalid `img` or `dim_ordering` is passed.
|
||||
"""
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering not in ['th', 'tf']:
|
||||
raise Exception('Unknown dim_ordering: ', dim_ordering)
|
||||
# image has dim_ordering (height, width, channel)
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering: ', dim_ordering)
|
||||
# Numpy array x has format (height, width, channel)
|
||||
# or (channel, height, width)
|
||||
# but original PIL image has format (width, height, channel)
|
||||
x = np.asarray(img, dtype='float32')
|
||||
if len(x.shape) == 3:
|
||||
if dim_ordering == 'th':
|
||||
@@ -156,21 +271,29 @@ def img_to_array(img, dim_ordering='default'):
|
||||
else:
|
||||
x = x.reshape((x.shape[0], x.shape[1], 1))
|
||||
else:
|
||||
raise Exception('Unsupported image shape: ', x.shape)
|
||||
raise ValueError('Unsupported image shape: ', x.shape)
|
||||
return x
|
||||
|
||||
|
||||
def load_img(path, grayscale=False, target_size=None):
|
||||
'''Load an image into PIL format.
|
||||
"""Loads an image into PIL format.
|
||||
|
||||
# Arguments
|
||||
path: path to image file
|
||||
grayscale: boolean
|
||||
target_size: None (default to original size)
|
||||
or (img_height, img_width)
|
||||
'''
|
||||
from PIL import Image
|
||||
img = Image.open(path)
|
||||
path: Path to image file
|
||||
grayscale: Boolean, whether to load the image as grayscale.
|
||||
target_size: Either `None` (default to original size)
|
||||
or tuple of ints `(img_height, img_width)`.
|
||||
|
||||
# Returns
|
||||
A PIL Image instance.
|
||||
|
||||
# Raises
|
||||
ImportError: if PIL is not available.
|
||||
"""
|
||||
if pil_image is None:
|
||||
raise ImportError('Could not import PIL.Image. '
|
||||
'The use of `array_to_img` requires PIL.')
|
||||
img = pil_image.open(path)
|
||||
if grayscale:
|
||||
img = img.convert('L')
|
||||
else: # Ensure 3 channel even when loaded image is grayscale
|
||||
@@ -181,13 +304,13 @@ def load_img(path, grayscale=False, target_size=None):
|
||||
|
||||
|
||||
def list_pictures(directory, ext='jpg|jpeg|bmp|png'):
|
||||
return [os.path.join(directory, f) for f in sorted(os.listdir(directory))
|
||||
if os.path.isfile(os.path.join(directory, f)) and re.match('([\w]+\.(?:' + ext + '))', f)]
|
||||
return [os.path.join(root, f)
|
||||
for root, _, files in os.walk(directory) for f in files
|
||||
if re.match('([\w]+\.(?:' + ext + '))', f)]
|
||||
|
||||
|
||||
class ImageDataGenerator(object):
|
||||
'''Generate minibatches with
|
||||
real-time data augmentation.
|
||||
"""Generate minibatches of image data with real-time data augmentation.
|
||||
|
||||
# Arguments
|
||||
featurewise_center: set input mean to 0 over the dataset.
|
||||
@@ -211,14 +334,20 @@ class ImageDataGenerator(object):
|
||||
horizontal_flip: whether to randomly flip images horizontally.
|
||||
vertical_flip: whether to randomly flip images vertically.
|
||||
rescale: rescaling factor. If None or 0, no rescaling is applied,
|
||||
otherwise we multiply the data by the value provided (before applying
|
||||
any other transformation).
|
||||
otherwise we multiply the data by the value provided
|
||||
(before applying any other transformation).
|
||||
preprocessing_function: function that will be implied on each input.
|
||||
The function will run before any other modification on it.
|
||||
The function should take one argument:
|
||||
one image (Numpy tensor with rank 3),
|
||||
and should output a Numpy tensor with the same shape.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode it is at index 3.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "th".
|
||||
'''
|
||||
If you never set it, then it will be "tf".
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
featurewise_center=False,
|
||||
samplewise_center=False,
|
||||
@@ -236,88 +365,134 @@ class ImageDataGenerator(object):
|
||||
horizontal_flip=False,
|
||||
vertical_flip=False,
|
||||
rescale=None,
|
||||
preprocessing_function=None,
|
||||
dim_ordering='default'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.__dict__.update(locals())
|
||||
self.featurewise_center = featurewise_center
|
||||
self.samplewise_center = samplewise_center
|
||||
self.featurewise_std_normalization = featurewise_std_normalization
|
||||
self.samplewise_std_normalization = samplewise_std_normalization
|
||||
self.zca_whitening = zca_whitening
|
||||
self.rotation_range = rotation_range
|
||||
self.width_shift_range = width_shift_range
|
||||
self.height_shift_range = height_shift_range
|
||||
self.shear_range = shear_range
|
||||
self.zoom_range = zoom_range
|
||||
self.channel_shift_range = channel_shift_range
|
||||
self.fill_mode = fill_mode
|
||||
self.cval = cval
|
||||
self.horizontal_flip = horizontal_flip
|
||||
self.vertical_flip = vertical_flip
|
||||
self.rescale = rescale
|
||||
self.preprocessing_function = preprocessing_function
|
||||
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('dim_ordering should be "tf" (channel after row and '
|
||||
'column) or "th" (channel before row and column). '
|
||||
'Received arg: ', dim_ordering)
|
||||
self.dim_ordering = dim_ordering
|
||||
if dim_ordering == 'th':
|
||||
self.channel_axis = 1
|
||||
self.row_axis = 2
|
||||
self.col_axis = 3
|
||||
if dim_ordering == 'tf':
|
||||
self.channel_axis = 3
|
||||
self.row_axis = 1
|
||||
self.col_axis = 2
|
||||
|
||||
self.mean = None
|
||||
self.std = None
|
||||
self.principal_components = None
|
||||
self.rescale = rescale
|
||||
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise Exception('dim_ordering should be "tf" (channel after row and '
|
||||
'column) or "th" (channel before row and column). '
|
||||
'Received arg: ', dim_ordering)
|
||||
self.dim_ordering = dim_ordering
|
||||
if dim_ordering == 'th':
|
||||
self.channel_index = 1
|
||||
self.row_index = 2
|
||||
self.col_index = 3
|
||||
if dim_ordering == 'tf':
|
||||
self.channel_index = 3
|
||||
self.row_index = 1
|
||||
self.col_index = 2
|
||||
|
||||
if np.isscalar(zoom_range):
|
||||
self.zoom_range = [1 - zoom_range, 1 + zoom_range]
|
||||
elif len(zoom_range) == 2:
|
||||
self.zoom_range = [zoom_range[0], zoom_range[1]]
|
||||
else:
|
||||
raise Exception('zoom_range should be a float or '
|
||||
'a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
raise ValueError('zoom_range should be a float or '
|
||||
'a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
|
||||
def flow(self, X, y=None, batch_size=32, shuffle=True, seed=None,
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
return NumpyArrayIterator(
|
||||
X, y, self,
|
||||
batch_size=batch_size, shuffle=shuffle, seed=seed,
|
||||
batch_size=batch_size,
|
||||
shuffle=shuffle,
|
||||
seed=seed,
|
||||
dim_ordering=self.dim_ordering,
|
||||
save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format)
|
||||
save_to_dir=save_to_dir,
|
||||
save_prefix=save_prefix,
|
||||
save_format=save_format)
|
||||
|
||||
def flow_from_directory(self, directory,
|
||||
target_size=(256, 256), color_mode='rgb',
|
||||
classes=None, class_mode='categorical',
|
||||
batch_size=32, shuffle=True, seed=None,
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
save_to_dir=None,
|
||||
save_prefix='',
|
||||
save_format='jpeg',
|
||||
follow_links=False):
|
||||
return DirectoryIterator(
|
||||
directory, self,
|
||||
target_size=target_size, color_mode=color_mode,
|
||||
classes=classes, class_mode=class_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
batch_size=batch_size, shuffle=shuffle, seed=seed,
|
||||
save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format)
|
||||
save_to_dir=save_to_dir,
|
||||
save_prefix=save_prefix,
|
||||
save_format=save_format,
|
||||
follow_links=follow_links)
|
||||
|
||||
def standardize(self, x):
|
||||
if self.preprocessing_function:
|
||||
x = self.preprocessing_function(x)
|
||||
if self.rescale:
|
||||
x *= self.rescale
|
||||
# x is a single image, so it doesn't have image number at index 0
|
||||
img_channel_index = self.channel_index - 1
|
||||
img_channel_axis = self.channel_axis - 1
|
||||
if self.samplewise_center:
|
||||
x -= np.mean(x, axis=img_channel_index, keepdims=True)
|
||||
x -= np.mean(x, axis=img_channel_axis, keepdims=True)
|
||||
if self.samplewise_std_normalization:
|
||||
x /= (np.std(x, axis=img_channel_index, keepdims=True) + 1e-7)
|
||||
x /= (np.std(x, axis=img_channel_axis, keepdims=True) + 1e-7)
|
||||
|
||||
if self.featurewise_center:
|
||||
x -= self.mean
|
||||
if self.mean is not None:
|
||||
x -= self.mean
|
||||
else:
|
||||
warnings.warn('This ImageDataGenerator specifies '
|
||||
'`featurewise_center`, but it hasn\'t'
|
||||
'been fit on any training data. Fit it '
|
||||
'first by calling `.fit(numpy_data)`.')
|
||||
if self.featurewise_std_normalization:
|
||||
x /= (self.std + 1e-7)
|
||||
|
||||
if self.std is not None:
|
||||
x /= (self.std + 1e-7)
|
||||
else:
|
||||
warnings.warn('This ImageDataGenerator specifies '
|
||||
'`featurewise_std_normalization`, but it hasn\'t'
|
||||
'been fit on any training data. Fit it '
|
||||
'first by calling `.fit(numpy_data)`.')
|
||||
if self.zca_whitening:
|
||||
flatx = np.reshape(x, (x.size))
|
||||
whitex = np.dot(flatx, self.principal_components)
|
||||
x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2]))
|
||||
|
||||
if self.principal_components is not None:
|
||||
flatx = np.reshape(x, (x.size))
|
||||
whitex = np.dot(flatx, self.principal_components)
|
||||
x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2]))
|
||||
else:
|
||||
warnings.warn('This ImageDataGenerator specifies '
|
||||
'`zca_whitening`, but it hasn\'t'
|
||||
'been fit on any training data. Fit it '
|
||||
'first by calling `.fit(numpy_data)`.')
|
||||
return x
|
||||
|
||||
def random_transform(self, x):
|
||||
# x is a single image, so it doesn't have image number at index 0
|
||||
img_row_index = self.row_index - 1
|
||||
img_col_index = self.col_index - 1
|
||||
img_channel_index = self.channel_index - 1
|
||||
img_row_axis = self.row_axis - 1
|
||||
img_col_axis = self.col_axis - 1
|
||||
img_channel_axis = self.channel_axis - 1
|
||||
|
||||
# use composition of homographies to generate final transform that needs to be applied
|
||||
# use composition of homographies
|
||||
# to generate final transform that needs to be applied
|
||||
if self.rotation_range:
|
||||
theta = np.pi / 180 * np.random.uniform(-self.rotation_range, self.rotation_range)
|
||||
else:
|
||||
@@ -326,12 +501,12 @@ class ImageDataGenerator(object):
|
||||
[np.sin(theta), np.cos(theta), 0],
|
||||
[0, 0, 1]])
|
||||
if self.height_shift_range:
|
||||
tx = np.random.uniform(-self.height_shift_range, self.height_shift_range) * x.shape[img_row_index]
|
||||
tx = np.random.uniform(-self.height_shift_range, self.height_shift_range) * x.shape[img_row_axis]
|
||||
else:
|
||||
tx = 0
|
||||
|
||||
if self.width_shift_range:
|
||||
ty = np.random.uniform(-self.width_shift_range, self.width_shift_range) * x.shape[img_col_index]
|
||||
ty = np.random.uniform(-self.width_shift_range, self.width_shift_range) * x.shape[img_col_axis]
|
||||
else:
|
||||
ty = 0
|
||||
|
||||
@@ -354,99 +529,125 @@ class ImageDataGenerator(object):
|
||||
[0, zy, 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
transform_matrix = np.dot(np.dot(np.dot(rotation_matrix, translation_matrix), shear_matrix), zoom_matrix)
|
||||
transform_matrix = np.dot(np.dot(np.dot(rotation_matrix,
|
||||
translation_matrix),
|
||||
shear_matrix),
|
||||
zoom_matrix)
|
||||
|
||||
h, w = x.shape[img_row_index], x.shape[img_col_index]
|
||||
h, w = x.shape[img_row_axis], x.shape[img_col_axis]
|
||||
transform_matrix = transform_matrix_offset_center(transform_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, img_channel_index,
|
||||
x = apply_transform(x, transform_matrix, img_channel_axis,
|
||||
fill_mode=self.fill_mode, cval=self.cval)
|
||||
if self.channel_shift_range != 0:
|
||||
x = random_channel_shift(x, self.channel_shift_range, img_channel_index)
|
||||
|
||||
x = random_channel_shift(x,
|
||||
self.channel_shift_range,
|
||||
img_channel_axis)
|
||||
if self.horizontal_flip:
|
||||
if np.random.random() < 0.5:
|
||||
x = flip_axis(x, img_col_index)
|
||||
x = flip_axis(x, img_col_axis)
|
||||
|
||||
if self.vertical_flip:
|
||||
if np.random.random() < 0.5:
|
||||
x = flip_axis(x, img_row_index)
|
||||
x = flip_axis(x, img_row_axis)
|
||||
|
||||
# TODO:
|
||||
# channel-wise normalization
|
||||
# barrel/fisheye
|
||||
return x
|
||||
|
||||
def fit(self, X,
|
||||
def fit(self, x,
|
||||
augment=False,
|
||||
rounds=1,
|
||||
seed=None):
|
||||
'''Required for featurewise_center, featurewise_std_normalization
|
||||
"""Required for featurewise_center, featurewise_std_normalization
|
||||
and zca_whitening.
|
||||
|
||||
# Arguments
|
||||
X: Numpy array, the data to fit on.
|
||||
augment: whether to fit on randomly augmented samples
|
||||
rounds: if `augment`,
|
||||
x: Numpy array, the data to fit on. Should have rank 4.
|
||||
In case of grayscale data,
|
||||
the channels axis should have value 1, and in case
|
||||
of RGB data, it should have value 3.
|
||||
augment: Whether to fit on randomly augmented samples
|
||||
rounds: If `augment`,
|
||||
how many augmentation passes to do over the data
|
||||
seed: random seed.
|
||||
'''
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid input `x`.
|
||||
"""
|
||||
x = np.asarray(x)
|
||||
if x.ndim != 4:
|
||||
raise ValueError('Input to `.fit()` should have rank 4. '
|
||||
'Got array with shape: ' + str(x.shape))
|
||||
if x.shape[self.channel_axis] not in {1, 3, 4}:
|
||||
raise ValueError(
|
||||
'Expected input to be images (as Numpy array) '
|
||||
'following the dimension ordering convention "' + self.dim_ordering + '" '
|
||||
'(channels on axis ' + str(self.channel_axis) + '), i.e. expected '
|
||||
'either 1, 3 or 4 channels on axis ' + str(self.channel_axis) + '. '
|
||||
'However, it was passed an array with shape ' + str(x.shape) +
|
||||
' (' + str(x.shape[self.channel_axis]) + ' channels).')
|
||||
|
||||
if seed is not None:
|
||||
np.random.seed(seed)
|
||||
|
||||
X = np.copy(X)
|
||||
x = np.copy(x)
|
||||
if augment:
|
||||
aX = np.zeros(tuple([rounds * X.shape[0]] + list(X.shape)[1:]))
|
||||
ax = np.zeros(tuple([rounds * x.shape[0]] + list(x.shape)[1:]))
|
||||
for r in range(rounds):
|
||||
for i in range(X.shape[0]):
|
||||
aX[i + r * X.shape[0]] = self.random_transform(X[i])
|
||||
X = aX
|
||||
for i in range(x.shape[0]):
|
||||
ax[i + r * x.shape[0]] = self.random_transform(x[i])
|
||||
x = ax
|
||||
|
||||
if self.featurewise_center:
|
||||
self.mean = np.mean(X, axis=0)
|
||||
X -= self.mean
|
||||
self.mean = np.mean(x, axis=(0, self.row_axis, self.col_axis))
|
||||
broadcast_shape = [1, 1, 1]
|
||||
broadcast_shape[self.channel_axis - 1] = x.shape[self.channel_axis]
|
||||
self.mean = np.reshape(self.mean, broadcast_shape)
|
||||
x -= self.mean
|
||||
|
||||
if self.featurewise_std_normalization:
|
||||
self.std = np.std(X, axis=0)
|
||||
X /= (self.std + 1e-7)
|
||||
self.std = np.std(x, axis=(0, self.row_axis, self.col_axis))
|
||||
broadcast_shape = [1, 1, 1]
|
||||
broadcast_shape[self.channel_axis - 1] = x.shape[self.channel_axis]
|
||||
self.std = np.reshape(self.std, broadcast_shape)
|
||||
x /= (self.std + K.epsilon())
|
||||
|
||||
if self.zca_whitening:
|
||||
flatX = np.reshape(X, (X.shape[0], X.shape[1] * X.shape[2] * X.shape[3]))
|
||||
sigma = np.dot(flatX.T, flatX) / flatX.shape[0]
|
||||
U, S, V = linalg.svd(sigma)
|
||||
self.principal_components = np.dot(np.dot(U, np.diag(1. / np.sqrt(S + 10e-7))), U.T)
|
||||
flat_x = np.reshape(x, (x.shape[0], x.shape[1] * x.shape[2] * x.shape[3]))
|
||||
sigma = np.dot(flat_x.T, flat_x) / flat_x.shape[0]
|
||||
u, s, _ = linalg.svd(sigma)
|
||||
self.principal_components = np.dot(np.dot(u, np.diag(1. / np.sqrt(s + 10e-7))), u.T)
|
||||
|
||||
|
||||
class Iterator(object):
|
||||
|
||||
def __init__(self, N, batch_size, shuffle, seed):
|
||||
self.N = N
|
||||
def __init__(self, n, batch_size, shuffle, seed):
|
||||
self.n = n
|
||||
self.batch_size = batch_size
|
||||
self.shuffle = shuffle
|
||||
self.batch_index = 0
|
||||
self.total_batches_seen = 0
|
||||
self.lock = threading.Lock()
|
||||
self.index_generator = self._flow_index(N, batch_size, shuffle, seed)
|
||||
self.index_generator = self._flow_index(n, batch_size, shuffle, seed)
|
||||
|
||||
def reset(self):
|
||||
self.batch_index = 0
|
||||
|
||||
def _flow_index(self, N, batch_size=32, shuffle=False, seed=None):
|
||||
def _flow_index(self, n, batch_size=32, shuffle=False, seed=None):
|
||||
# ensure self.batch_index is 0
|
||||
self.reset()
|
||||
while 1:
|
||||
if seed is not None:
|
||||
np.random.seed(seed + self.total_batches_seen)
|
||||
if self.batch_index == 0:
|
||||
index_array = np.arange(N)
|
||||
index_array = np.arange(n)
|
||||
if shuffle:
|
||||
index_array = np.random.permutation(N)
|
||||
index_array = np.random.permutation(n)
|
||||
|
||||
current_index = (self.batch_index * batch_size) % N
|
||||
if N >= current_index + batch_size:
|
||||
current_index = (self.batch_index * batch_size) % n
|
||||
if n >= current_index + batch_size:
|
||||
current_batch_size = batch_size
|
||||
self.batch_index += 1
|
||||
else:
|
||||
current_batch_size = N - current_index
|
||||
current_batch_size = n - current_index
|
||||
self.batch_index = 0
|
||||
self.total_batches_seen += 1
|
||||
yield (index_array[current_index: current_index + current_batch_size],
|
||||
@@ -463,24 +664,40 @@ class Iterator(object):
|
||||
|
||||
class NumpyArrayIterator(Iterator):
|
||||
|
||||
def __init__(self, X, y, image_data_generator,
|
||||
def __init__(self, x, y, image_data_generator,
|
||||
batch_size=32, shuffle=False, seed=None,
|
||||
dim_ordering='default',
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
if y is not None and len(X) != len(y):
|
||||
raise Exception('X (images tensor) and y (labels) '
|
||||
'should have the same length. '
|
||||
'Found: X.shape = %s, y.shape = %s' % (np.asarray(X).shape, np.asarray(y).shape))
|
||||
if y is not None and len(x) != len(y):
|
||||
raise ValueError('X (images tensor) and y (labels) '
|
||||
'should have the same length. '
|
||||
'Found: X.shape = %s, y.shape = %s' %
|
||||
(np.asarray(x).shape, np.asarray(y).shape))
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.X = X
|
||||
self.y = y
|
||||
self.x = np.asarray(x)
|
||||
if self.x.ndim != 4:
|
||||
raise ValueError('Input data in `NumpyArrayIterator` '
|
||||
'should have rank 4. You passed an array '
|
||||
'with shape', self.x.shape)
|
||||
channels_axis = 3 if dim_ordering == 'tf' else 1
|
||||
if self.x.shape[channels_axis] not in {1, 3, 4}:
|
||||
raise ValueError('NumpyArrayIterator is set to use the '
|
||||
'dimension ordering convention "' + dim_ordering + '" '
|
||||
'(channels on axis ' + str(channels_axis) + '), i.e. expected '
|
||||
'either 1, 3 or 4 channels on axis ' + str(channels_axis) + '. '
|
||||
'However, it was passed an array with shape ' + str(self.x.shape) +
|
||||
' (' + str(self.x.shape[channels_axis]) + ' channels).')
|
||||
if y is not None:
|
||||
self.y = np.asarray(y)
|
||||
else:
|
||||
self.y = None
|
||||
self.image_data_generator = image_data_generator
|
||||
self.dim_ordering = dim_ordering
|
||||
self.save_to_dir = save_to_dir
|
||||
self.save_prefix = save_prefix
|
||||
self.save_format = save_format
|
||||
super(NumpyArrayIterator, self).__init__(X.shape[0], batch_size, shuffle, seed)
|
||||
super(NumpyArrayIterator, self).__init__(x.shape[0], batch_size, shuffle, seed)
|
||||
|
||||
def next(self):
|
||||
# for python 2.x.
|
||||
@@ -489,10 +706,11 @@ class NumpyArrayIterator(Iterator):
|
||||
# see http://anandology.com/blog/using-iterators-and-generators/
|
||||
with self.lock:
|
||||
index_array, current_index, current_batch_size = next(self.index_generator)
|
||||
# The transformation of images is not under thread lock so it can be done in parallel
|
||||
batch_x = np.zeros(tuple([current_batch_size] + list(self.X.shape)[1:]))
|
||||
# The transformation of images is not under thread lock
|
||||
# so it can be done in parallel
|
||||
batch_x = np.zeros(tuple([current_batch_size] + list(self.x.shape)[1:]))
|
||||
for i, j in enumerate(index_array):
|
||||
x = self.X[j]
|
||||
x = self.x[j]
|
||||
x = self.image_data_generator.random_transform(x.astype('float32'))
|
||||
x = self.image_data_generator.standardize(x)
|
||||
batch_x[i] = x
|
||||
@@ -517,7 +735,8 @@ class DirectoryIterator(Iterator):
|
||||
dim_ordering='default',
|
||||
classes=None, class_mode='categorical',
|
||||
batch_size=32, shuffle=True, seed=None,
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg',
|
||||
follow_links=False):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.directory = directory
|
||||
@@ -561,16 +780,20 @@ class DirectoryIterator(Iterator):
|
||||
self.nb_class = len(classes)
|
||||
self.class_indices = dict(zip(classes, range(len(classes))))
|
||||
|
||||
def _recursive_list(subpath):
|
||||
return sorted(os.walk(subpath, followlinks=follow_links), key=lambda tpl: tpl[0])
|
||||
|
||||
for subdir in classes:
|
||||
subpath = os.path.join(directory, subdir)
|
||||
for fname in sorted(os.listdir(subpath)):
|
||||
is_valid = False
|
||||
for extension in white_list_formats:
|
||||
if fname.lower().endswith('.' + extension):
|
||||
is_valid = True
|
||||
break
|
||||
if is_valid:
|
||||
self.nb_sample += 1
|
||||
for root, _, files in _recursive_list(subpath):
|
||||
for fname in files:
|
||||
is_valid = False
|
||||
for extension in white_list_formats:
|
||||
if fname.lower().endswith('.' + extension):
|
||||
is_valid = True
|
||||
break
|
||||
if is_valid:
|
||||
self.nb_sample += 1
|
||||
print('Found %d images belonging to %d classes.' % (self.nb_sample, self.nb_class))
|
||||
|
||||
# second, build an index of the images in the different class subfolders
|
||||
@@ -579,28 +802,34 @@ class DirectoryIterator(Iterator):
|
||||
i = 0
|
||||
for subdir in classes:
|
||||
subpath = os.path.join(directory, subdir)
|
||||
for fname in sorted(os.listdir(subpath)):
|
||||
is_valid = False
|
||||
for extension in white_list_formats:
|
||||
if fname.lower().endswith('.' + extension):
|
||||
is_valid = True
|
||||
break
|
||||
if is_valid:
|
||||
self.classes[i] = self.class_indices[subdir]
|
||||
self.filenames.append(os.path.join(subdir, fname))
|
||||
i += 1
|
||||
for root, _, files in _recursive_list(subpath):
|
||||
for fname in files:
|
||||
is_valid = False
|
||||
for extension in white_list_formats:
|
||||
if fname.lower().endswith('.' + extension):
|
||||
is_valid = True
|
||||
break
|
||||
if is_valid:
|
||||
self.classes[i] = self.class_indices[subdir]
|
||||
i += 1
|
||||
# add filename relative to directory
|
||||
absolute_path = os.path.join(root, fname)
|
||||
self.filenames.append(os.path.relpath(absolute_path, directory))
|
||||
super(DirectoryIterator, self).__init__(self.nb_sample, batch_size, shuffle, seed)
|
||||
|
||||
def next(self):
|
||||
with self.lock:
|
||||
index_array, current_index, current_batch_size = next(self.index_generator)
|
||||
# The transformation of images is not under thread lock so it can be done in parallel
|
||||
# The transformation of images is not under thread lock
|
||||
# so it can be done in parallel
|
||||
batch_x = np.zeros((current_batch_size,) + self.image_shape)
|
||||
grayscale = self.color_mode == 'grayscale'
|
||||
# build batch of image data
|
||||
for i, j in enumerate(index_array):
|
||||
fname = self.filenames[j]
|
||||
img = load_img(os.path.join(self.directory, fname), grayscale=grayscale, target_size=self.target_size)
|
||||
img = load_img(os.path.join(self.directory, fname),
|
||||
grayscale=grayscale,
|
||||
target_size=self.target_size)
|
||||
x = img_to_array(img, dim_ordering=self.dim_ordering)
|
||||
x = self.image_data_generator.random_transform(x)
|
||||
x = self.image_data_generator.standardize(x)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import absolute_import
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import random
|
||||
from six.moves import range
|
||||
@@ -7,8 +8,7 @@ from six.moves import range
|
||||
|
||||
def pad_sequences(sequences, maxlen=None, dtype='int32',
|
||||
padding='pre', truncating='pre', value=0.):
|
||||
'''Pads each sequence to the same length:
|
||||
the length of the longest sequence.
|
||||
"""Pads each sequence to the same length (length of the longest sequence).
|
||||
|
||||
If maxlen is provided, any sequence longer
|
||||
than maxlen is truncated to maxlen.
|
||||
@@ -28,7 +28,11 @@ def pad_sequences(sequences, maxlen=None, dtype='int32',
|
||||
|
||||
# Returns
|
||||
x: numpy array with dimensions (number_of_sequences, maxlen)
|
||||
'''
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid values for `truncating` or `padding`,
|
||||
or in case of invalid shape for a `sequences` entry.
|
||||
"""
|
||||
lengths = [len(s) for s in sequences]
|
||||
|
||||
nb_samples = len(sequences)
|
||||
@@ -45,8 +49,8 @@ def pad_sequences(sequences, maxlen=None, dtype='int32',
|
||||
|
||||
x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype)
|
||||
for idx, s in enumerate(sequences):
|
||||
if len(s) == 0:
|
||||
continue # empty list was found
|
||||
if not len(s):
|
||||
continue # empty list/array was found
|
||||
if truncating == 'pre':
|
||||
trunc = s[-maxlen:]
|
||||
elif truncating == 'post':
|
||||
@@ -70,7 +74,9 @@ def pad_sequences(sequences, maxlen=None, dtype='int32',
|
||||
|
||||
|
||||
def make_sampling_table(size, sampling_factor=1e-5):
|
||||
'''This generates an array where the ith element
|
||||
"""Generates a word rank-based probabilistic sampling table.
|
||||
|
||||
This generates an array where the ith element
|
||||
is the probability that a word of rank i would be sampled,
|
||||
according to the sampling distribution used in word2vec.
|
||||
|
||||
@@ -84,11 +90,16 @@ def make_sampling_table(size, sampling_factor=1e-5):
|
||||
|
||||
# Arguments
|
||||
size: int, number of possible words to sample.
|
||||
'''
|
||||
sampling_factor: the sampling factor in the word2vec formula.
|
||||
|
||||
# Returns
|
||||
A 1D Numpy array of length `size` where the ith entry
|
||||
is the probability that a word of rank i should be sampled.
|
||||
"""
|
||||
gamma = 0.577
|
||||
rank = np.array(list(range(size)))
|
||||
rank[0] = 1
|
||||
inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1./(12.*rank)
|
||||
inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1. / (12. * rank)
|
||||
f = sampling_factor * inv_fq
|
||||
|
||||
return np.minimum(1., f / np.sqrt(f))
|
||||
@@ -97,29 +108,40 @@ def make_sampling_table(size, sampling_factor=1e-5):
|
||||
def skipgrams(sequence, vocabulary_size,
|
||||
window_size=4, negative_samples=1., shuffle=True,
|
||||
categorical=False, sampling_table=None):
|
||||
'''Take a sequence (list of indexes of words),
|
||||
"""Generates skipgram word pairs.
|
||||
|
||||
Takes a sequence (list of indexes of words),
|
||||
returns couples of [word_index, other_word index] and labels (1s or 0s),
|
||||
where label = 1 if 'other_word' belongs to the context of 'word',
|
||||
and label=0 if 'other_word' is randomly sampled
|
||||
|
||||
# Arguments
|
||||
sequence: a word sequence (sentence), encoded as a list
|
||||
of word indices (integers). If using a `sampling_table`,
|
||||
word indices are expected to match the rank
|
||||
of the words in a reference dataset (e.g. 10 would encode
|
||||
the 10-th most frequently occuring token).
|
||||
Note that index 0 is expected to be a non-word and will be skipped.
|
||||
vocabulary_size: int. maximum possible word index + 1
|
||||
window_size: int. actually half-window.
|
||||
The window of a word wi will be [i-window_size, i+window_size+1]
|
||||
negative_samples: float >= 0. 0 for no negative (=random) samples.
|
||||
1 for same number as positive samples. etc.
|
||||
shuffle: whether to shuffle the word couples before returning them.
|
||||
categorical: bool. if False, labels will be
|
||||
integers (eg. [0, 1, 1 .. ]),
|
||||
if True labels will be categorical eg. [[1,0],[0,1],[0,1] .. ]
|
||||
sampling_table: 1D array of size `vocabulary_size` where the entry i
|
||||
encodes the probabibily to sample a word of rank i.
|
||||
|
||||
# Returns
|
||||
couples, labels: where `couples` are int pairs and
|
||||
`labels` are either 0 or 1.
|
||||
|
||||
# Notes
|
||||
# Note
|
||||
By convention, index 0 in the vocabulary is
|
||||
a non-word and will be skipped.
|
||||
'''
|
||||
"""
|
||||
couples = []
|
||||
labels = []
|
||||
for i, wi in enumerate(sequence):
|
||||
@@ -129,8 +151,8 @@ def skipgrams(sequence, vocabulary_size,
|
||||
if sampling_table[wi] < random.random():
|
||||
continue
|
||||
|
||||
window_start = max(0, i-window_size)
|
||||
window_end = min(len(sequence), i+window_size+1)
|
||||
window_start = max(0, i - window_size)
|
||||
window_end = min(len(sequence), i + window_size + 1)
|
||||
for j in range(window_start, window_end):
|
||||
if j != i:
|
||||
wj = sequence[j]
|
||||
@@ -147,11 +169,12 @@ def skipgrams(sequence, vocabulary_size,
|
||||
words = [c[0] for c in couples]
|
||||
random.shuffle(words)
|
||||
|
||||
couples += [[words[i %len(words)], random.randint(1, vocabulary_size-1)] for i in range(nb_negative_samples)]
|
||||
couples += [[words[i % len(words)],
|
||||
random.randint(1, vocabulary_size - 1)] for i in range(nb_negative_samples)]
|
||||
if categorical:
|
||||
labels += [[1, 0]]*nb_negative_samples
|
||||
labels += [[1, 0]] * nb_negative_samples
|
||||
else:
|
||||
labels += [0]*nb_negative_samples
|
||||
labels += [0] * nb_negative_samples
|
||||
|
||||
if shuffle:
|
||||
seed = random.randint(0, 10e6)
|
||||
|
||||
+122
-75
@@ -1,7 +1,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''These preprocessing utilities would greatly benefit
|
||||
from a fast Cython rewrite.
|
||||
'''
|
||||
"""Utilities for text input preprocessing.
|
||||
|
||||
May benefit from a fast Cython rewrite.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
|
||||
@@ -17,54 +18,70 @@ else:
|
||||
maketrans = str.maketrans
|
||||
|
||||
|
||||
def base_filter():
|
||||
f = string.punctuation
|
||||
f = f.replace("'", '')
|
||||
f += '\t\n'
|
||||
return f
|
||||
def text_to_word_sequence(text,
|
||||
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
|
||||
lower=True, split=" "):
|
||||
"""Converts a text to a sequence of word indices.
|
||||
|
||||
# Arguments
|
||||
text: Input text (string).
|
||||
filters: Sequence of characters to filter out.
|
||||
lower: Whether to convert the input to lowercase.
|
||||
split: Sentence split marker (string).
|
||||
|
||||
def text_to_word_sequence(text, filters=base_filter(), lower=True, split=" "):
|
||||
'''prune: sequence of characters to filter out
|
||||
'''
|
||||
# Returns
|
||||
A list of integer word indices.
|
||||
"""
|
||||
if lower:
|
||||
text = text.lower()
|
||||
text = text.translate(maketrans(filters, split*len(filters)))
|
||||
text = text.translate(maketrans(filters, split * len(filters)))
|
||||
seq = text.split(split)
|
||||
return [_f for _f in seq if _f]
|
||||
return [i for i in seq if i]
|
||||
|
||||
|
||||
def one_hot(text, n, filters=base_filter(), lower=True, split=" "):
|
||||
seq = text_to_word_sequence(text, filters=filters, lower=lower, split=split)
|
||||
def one_hot(text, n,
|
||||
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
|
||||
lower=True,
|
||||
split=' '):
|
||||
seq = text_to_word_sequence(text,
|
||||
filters=filters,
|
||||
lower=lower,
|
||||
split=split)
|
||||
return [(abs(hash(w)) % (n - 1) + 1) for w in seq]
|
||||
|
||||
|
||||
class Tokenizer(object):
|
||||
def __init__(self, nb_words=None, filters=base_filter(),
|
||||
lower=True, split=' ', char_level=False):
|
||||
'''The class allows to vectorize a text corpus, by turning each
|
||||
text into either a sequence of integers (each integer being the index
|
||||
of a token in a dictionary) or into a vector where the coefficient
|
||||
for each token could be binary, based on word count, based on tf-idf...
|
||||
"""Text tokenization utility class.
|
||||
|
||||
# Arguments
|
||||
nb_words: the maximum number of words to keep, based
|
||||
on word frequency. Only the most common `nb_words` words will
|
||||
be kept.
|
||||
filters: a string where each element is a character that will be
|
||||
filtered from the texts. The default is all punctuation, plus
|
||||
tabs and line breaks, minus the `'` character.
|
||||
lower: boolean. Whether to convert the texts to lowercase.
|
||||
split: character or string to use for token splitting.
|
||||
char_level: if True, every character will be treated as a word.
|
||||
This class allows to vectorize a text corpus, by turning each
|
||||
text into either a sequence of integers (each integer being the index
|
||||
of a token in a dictionary) or into a vector where the coefficient
|
||||
for each token could be binary, based on word count, based on tf-idf...
|
||||
|
||||
By default, all punctuation is removed, turning the texts into
|
||||
space-separated sequences of words
|
||||
(words maybe include the `'` character). These sequences are then
|
||||
split into lists of tokens. They will then be indexed or vectorized.
|
||||
# Arguments
|
||||
nb_words: the maximum number of words to keep, based
|
||||
on word frequency. Only the most common `nb_words` words will
|
||||
be kept.
|
||||
filters: a string where each element is a character that will be
|
||||
filtered from the texts. The default is all punctuation, plus
|
||||
tabs and line breaks, minus the `'` character.
|
||||
lower: boolean. Whether to convert the texts to lowercase.
|
||||
split: character or string to use for token splitting.
|
||||
char_level: if True, every character will be treated as a word.
|
||||
|
||||
`0` is a reserved index that won't be assigned to any word.
|
||||
'''
|
||||
By default, all punctuation is removed, turning the texts into
|
||||
space-separated sequences of words
|
||||
(words maybe include the `'` character). These sequences are then
|
||||
split into lists of tokens. They will then be indexed or vectorized.
|
||||
|
||||
`0` is a reserved index that won't be assigned to any word.
|
||||
"""
|
||||
|
||||
def __init__(self, nb_words=None,
|
||||
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
|
||||
lower=True,
|
||||
split=' ',
|
||||
char_level=False):
|
||||
self.word_counts = {}
|
||||
self.word_docs = {}
|
||||
self.filters = filters
|
||||
@@ -75,16 +92,21 @@ class Tokenizer(object):
|
||||
self.char_level = char_level
|
||||
|
||||
def fit_on_texts(self, texts):
|
||||
'''Required before using texts_to_sequences or texts_to_matrix
|
||||
"""Updates internal vocabulary based on a list of texts.
|
||||
|
||||
Required before using `texts_to_sequences` or `texts_to_matrix`.
|
||||
|
||||
# Arguments
|
||||
texts: can be a list of strings,
|
||||
or a generator of strings (for memory-efficiency)
|
||||
'''
|
||||
"""
|
||||
self.document_count = 0
|
||||
for text in texts:
|
||||
self.document_count += 1
|
||||
seq = text if self.char_level else text_to_word_sequence(text, self.filters, self.lower, self.split)
|
||||
seq = text if self.char_level else text_to_word_sequence(text,
|
||||
self.filters,
|
||||
self.lower,
|
||||
self.split)
|
||||
for w in seq:
|
||||
if w in self.word_counts:
|
||||
self.word_counts[w] += 1
|
||||
@@ -107,9 +129,15 @@ class Tokenizer(object):
|
||||
self.index_docs[self.word_index[w]] = c
|
||||
|
||||
def fit_on_sequences(self, sequences):
|
||||
'''Required before using sequences_to_matrix
|
||||
(if fit_on_texts was never called)
|
||||
'''
|
||||
"""Updates internal vocabulary based on a list of sequences.
|
||||
|
||||
Required before using `sequences_to_matrix`
|
||||
(if `fit_on_texts` was never called).
|
||||
|
||||
# Arguments
|
||||
sequences: A list of sequence.
|
||||
A "sequence" is a list of integer word indices.
|
||||
"""
|
||||
self.document_count = len(sequences)
|
||||
self.index_docs = {}
|
||||
for seq in sequences:
|
||||
@@ -121,30 +149,40 @@ class Tokenizer(object):
|
||||
self.index_docs[i] += 1
|
||||
|
||||
def texts_to_sequences(self, texts):
|
||||
'''Transforms each text in texts in a sequence of integers.
|
||||
"""Transforms each text in texts in a sequence of integers.
|
||||
|
||||
Only top "nb_words" most frequent words will be taken into account.
|
||||
Only words known by the tokenizer will be taken into account.
|
||||
|
||||
Returns a list of sequences.
|
||||
'''
|
||||
# Arguments
|
||||
texts: A list of texts (strings).
|
||||
|
||||
# Returns
|
||||
A list of sequences.
|
||||
"""
|
||||
res = []
|
||||
for vect in self.texts_to_sequences_generator(texts):
|
||||
res.append(vect)
|
||||
return res
|
||||
|
||||
def texts_to_sequences_generator(self, texts):
|
||||
'''Transforms each text in texts in a sequence of integers.
|
||||
"""Transforms each text in texts in a sequence of integers.
|
||||
|
||||
Only top "nb_words" most frequent words will be taken into account.
|
||||
Only words known by the tokenizer will be taken into account.
|
||||
|
||||
Yields individual sequences.
|
||||
# Arguments
|
||||
texts: A list of texts (strings).
|
||||
|
||||
# Arguments:
|
||||
texts: list of strings.
|
||||
'''
|
||||
# Yields
|
||||
Yields individual sequences.
|
||||
"""
|
||||
nb_words = self.nb_words
|
||||
for text in texts:
|
||||
seq = text if self.char_level else text_to_word_sequence(text, self.filters, self.lower, self.split)
|
||||
seq = text if self.char_level else text_to_word_sequence(text,
|
||||
self.filters,
|
||||
self.lower,
|
||||
self.split)
|
||||
vect = []
|
||||
for w in seq:
|
||||
i = self.word_index.get(w)
|
||||
@@ -156,39 +194,47 @@ class Tokenizer(object):
|
||||
yield vect
|
||||
|
||||
def texts_to_matrix(self, texts, mode='binary'):
|
||||
'''Convert a list of texts to a Numpy matrix,
|
||||
according to some vectorization mode.
|
||||
"""Convert a list of texts to a Numpy matrix.
|
||||
|
||||
# Arguments:
|
||||
# Arguments
|
||||
texts: list of strings.
|
||||
modes: one of "binary", "count", "tfidf", "freq"
|
||||
'''
|
||||
mode: one of "binary", "count", "tfidf", "freq".
|
||||
|
||||
# Returns
|
||||
A Numpy matrix.
|
||||
"""
|
||||
sequences = self.texts_to_sequences(texts)
|
||||
return self.sequences_to_matrix(sequences, mode=mode)
|
||||
|
||||
def sequences_to_matrix(self, sequences, mode='binary'):
|
||||
'''Converts a list of sequences into a Numpy matrix,
|
||||
according to some vectorization mode.
|
||||
"""Converts a list of sequences into a Numpy matrix.
|
||||
|
||||
# Arguments:
|
||||
# Arguments
|
||||
sequences: list of sequences
|
||||
(a sequence is a list of integer word indices).
|
||||
modes: one of "binary", "count", "tfidf", "freq"
|
||||
'''
|
||||
mode: one of "binary", "count", "tfidf", "freq"
|
||||
|
||||
# Returns
|
||||
A Numpy matrix.
|
||||
|
||||
# Raises
|
||||
ValueError: In case of invalid `mode` argument,
|
||||
or if the Tokenizer requires to be fit to sample data.
|
||||
"""
|
||||
if not self.nb_words:
|
||||
if self.word_index:
|
||||
nb_words = len(self.word_index) + 1
|
||||
else:
|
||||
raise Exception('Specify a dimension (nb_words argument), '
|
||||
'or fit on some text data first.')
|
||||
raise ValueError('Specify a dimension (nb_words argument), '
|
||||
'or fit on some text data first.')
|
||||
else:
|
||||
nb_words = self.nb_words
|
||||
|
||||
if mode == 'tfidf' and not self.document_count:
|
||||
raise Exception('Fit the Tokenizer on some data '
|
||||
'before using tfidf mode.')
|
||||
raise ValueError('Fit the Tokenizer on some data '
|
||||
'before using tfidf mode.')
|
||||
|
||||
X = np.zeros((len(sequences), nb_words))
|
||||
x = np.zeros((len(sequences), nb_words))
|
||||
for i, seq in enumerate(sequences):
|
||||
if not seq:
|
||||
continue
|
||||
@@ -202,17 +248,18 @@ class Tokenizer(object):
|
||||
counts[j] += 1
|
||||
for j, c in list(counts.items()):
|
||||
if mode == 'count':
|
||||
X[i][j] = c
|
||||
x[i][j] = c
|
||||
elif mode == 'freq':
|
||||
X[i][j] = c / len(seq)
|
||||
x[i][j] = c / len(seq)
|
||||
elif mode == 'binary':
|
||||
X[i][j] = 1
|
||||
x[i][j] = 1
|
||||
elif mode == 'tfidf':
|
||||
# Use weighting scheme 2 in
|
||||
# https://en.wikipedia.org/wiki/Tf%E2%80%93idf
|
||||
# https://en.wikipedia.org/wiki/Tf%E2%80%93idf
|
||||
tf = 1 + np.log(c)
|
||||
idf = np.log(1 + self.document_count / (1 + self.index_docs.get(j, 0)))
|
||||
X[i][j] = tf * idf
|
||||
idf = np.log(1 + self.document_count /
|
||||
(1 + self.index_docs.get(j, 0)))
|
||||
x[i][j] = tf * idf
|
||||
else:
|
||||
raise Exception('Unknown vectorization mode: ' + str(mode))
|
||||
return X
|
||||
raise ValueError('Unknown vectorization mode:', mode)
|
||||
return x
|
||||
|
||||
+61
-102
@@ -1,98 +1,84 @@
|
||||
from __future__ import absolute_import
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
import warnings
|
||||
|
||||
|
||||
class Regularizer(object):
|
||||
"""Regularizer base class.
|
||||
"""
|
||||
|
||||
def set_param(self, p):
|
||||
self.p = p
|
||||
|
||||
def set_layer(self, layer):
|
||||
self.layer = layer
|
||||
|
||||
def __call__(self, loss):
|
||||
return loss
|
||||
def __call__(self, x):
|
||||
return 0
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__}
|
||||
|
||||
def set_param(self, _):
|
||||
warnings.warn('The `set_param` method on regularizers is deprecated. '
|
||||
'It no longer does anything, '
|
||||
'and it will be removed after 06/2017.')
|
||||
|
||||
def set_layer(self, _):
|
||||
warnings.warn('The `set_layer` method on regularizers is deprecated. '
|
||||
'It no longer does anything, '
|
||||
'and it will be removed after 06/2017.')
|
||||
|
||||
|
||||
class EigenvalueRegularizer(Regularizer):
|
||||
'''This takes a constant that controls
|
||||
the regularization by Eigenvalue Decay on the
|
||||
current layer and outputs the regularized
|
||||
loss (evaluated on the training data) and
|
||||
the original loss (evaluated on the
|
||||
validation data).
|
||||
'''
|
||||
"""Regularizer based on the eignvalues of a weight matrix.
|
||||
|
||||
Only available for tensors of rank 2.
|
||||
|
||||
# Arguments
|
||||
k: Float; modulates the amount of regularization to apply.
|
||||
"""
|
||||
|
||||
def __init__(self, k):
|
||||
self.k = k
|
||||
self.uses_learning_phase = True
|
||||
|
||||
def set_param(self, p):
|
||||
if hasattr(self, 'p'):
|
||||
raise Exception('Regularizers cannot be reused. '
|
||||
'Instantiate one regularizer per layer.')
|
||||
self.p = p
|
||||
def __call__(self, x):
|
||||
if K.ndim(x) != 2:
|
||||
raise ValueError('EigenvalueRegularizer '
|
||||
'is only available for tensors of rank 2.')
|
||||
covariance = K.dot(K.transpose(x), x)
|
||||
dim1, dim2 = K.eval(K.shape(covariance))
|
||||
|
||||
def __call__(self, loss):
|
||||
power = 9 # number of iterations of the power method
|
||||
W = self.p
|
||||
if K.ndim(W) > 2:
|
||||
raise Exception('Eigenvalue Decay regularizer '
|
||||
'is only available for dense '
|
||||
'and embedding layers.')
|
||||
WW = K.dot(K.transpose(W), W)
|
||||
dim1, dim2 = K.eval(K.shape(WW)) # number of neurons in the layer
|
||||
|
||||
# power method for approximating the dominant eigenvector:
|
||||
o = K.ones([dim1, 1]) # initial values for the dominant eigenvector
|
||||
main_eigenvect = K.dot(WW, o)
|
||||
# Power method for approximating the dominant eigenvector:
|
||||
power = 9 # Number of iterations of the power method.
|
||||
o = K.ones([dim1, 1]) # Initial values for the dominant eigenvector.
|
||||
main_eigenvect = K.dot(covariance, o)
|
||||
for n in range(power - 1):
|
||||
main_eigenvect = K.dot(WW, main_eigenvect)
|
||||
main_eigenvect = K.dot(covariance, main_eigenvect)
|
||||
covariance_d = K.dot(covariance, main_eigenvect)
|
||||
|
||||
WWd = K.dot(WW, main_eigenvect)
|
||||
|
||||
# the corresponding dominant eigenvalue:
|
||||
main_eigenval = (K.dot(K.transpose(WWd), main_eigenvect) /
|
||||
# The corresponding dominant eigenvalue:
|
||||
main_eigenval = (K.dot(K.transpose(covariance_d), main_eigenvect) /
|
||||
K.dot(K.transpose(main_eigenvect), main_eigenvect))
|
||||
# multiplied by the given regularization gain
|
||||
regularized_loss = loss + (main_eigenval ** 0.5) * self.k
|
||||
|
||||
return K.in_train_phase(regularized_loss[0, 0], loss)
|
||||
# Multiply by the given regularization gain.
|
||||
regularization = (main_eigenval ** 0.5) * self.k
|
||||
return K.sum(regularization)
|
||||
|
||||
|
||||
class WeightRegularizer(Regularizer):
|
||||
class L1L2Regularizer(Regularizer):
|
||||
"""Regularizer for L1 and L2 regularization.
|
||||
|
||||
# Arguments
|
||||
l1: Float; L1 regularization factor.
|
||||
l2: Float; L2 regularization factor.
|
||||
"""
|
||||
|
||||
def __init__(self, l1=0., l2=0.):
|
||||
self.l1 = K.cast_to_floatx(l1)
|
||||
self.l2 = K.cast_to_floatx(l2)
|
||||
self.uses_learning_phase = True
|
||||
self.p = None
|
||||
|
||||
def set_param(self, p):
|
||||
if self.p is not None:
|
||||
raise Exception('Regularizers cannot be reused. '
|
||||
'Instantiate one regularizer per layer.')
|
||||
self.p = p
|
||||
|
||||
def __call__(self, loss):
|
||||
if self.p is None:
|
||||
raise Exception('Need to call `set_param` on '
|
||||
'WeightRegularizer instance '
|
||||
'before calling the instance. '
|
||||
'Check that you are not passing '
|
||||
'a WeightRegularizer instead of an '
|
||||
'ActivityRegularizer '
|
||||
'(i.e. activity_regularizer="l2" instead '
|
||||
'of activity_regularizer="activity_l2".')
|
||||
regularized_loss = loss
|
||||
def __call__(self, x):
|
||||
regularization = 0
|
||||
if self.l1:
|
||||
regularized_loss += K.sum(self.l1 * K.abs(self.p))
|
||||
regularization += K.sum(self.l1 * K.abs(x))
|
||||
if self.l2:
|
||||
regularized_loss += K.sum(self.l2 * K.square(self.p))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
regularization += K.sum(self.l2 * K.square(x))
|
||||
return regularization
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
@@ -100,61 +86,34 @@ class WeightRegularizer(Regularizer):
|
||||
'l2': float(self.l2)}
|
||||
|
||||
|
||||
class ActivityRegularizer(Regularizer):
|
||||
# Aliases.
|
||||
|
||||
def __init__(self, l1=0., l2=0.):
|
||||
self.l1 = K.cast_to_floatx(l1)
|
||||
self.l2 = K.cast_to_floatx(l2)
|
||||
self.uses_learning_phase = True
|
||||
self.layer = None
|
||||
|
||||
def set_layer(self, layer):
|
||||
if self.layer is not None:
|
||||
raise Exception('Regularizers cannot be reused')
|
||||
self.layer = layer
|
||||
|
||||
def __call__(self, loss):
|
||||
if self.layer is None:
|
||||
raise Exception('Need to call `set_layer` on '
|
||||
'ActivityRegularizer instance '
|
||||
'before calling the instance.')
|
||||
regularized_loss = loss
|
||||
for i in range(len(self.layer.inbound_nodes)):
|
||||
output = self.layer.get_output_at(i)
|
||||
if self.l1:
|
||||
regularized_loss += K.sum(self.l1 * K.abs(output))
|
||||
if self.l2:
|
||||
regularized_loss += K.sum(self.l2 * K.square(output))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
'l1': float(self.l1),
|
||||
'l2': float(self.l2)}
|
||||
WeightRegularizer = L1L2Regularizer
|
||||
ActivityRegularizer = L1L2Regularizer
|
||||
|
||||
|
||||
def l1(l=0.01):
|
||||
return WeightRegularizer(l1=l)
|
||||
return L1L2Regularizer(l1=l)
|
||||
|
||||
|
||||
def l2(l=0.01):
|
||||
return WeightRegularizer(l2=l)
|
||||
return L1L2Regularizer(l2=l)
|
||||
|
||||
|
||||
def l1l2(l1=0.01, l2=0.01):
|
||||
return WeightRegularizer(l1=l1, l2=l2)
|
||||
return L1L2Regularizer(l1=l1, l2=l2)
|
||||
|
||||
|
||||
def activity_l1(l=0.01):
|
||||
return ActivityRegularizer(l1=l)
|
||||
return L1L2Regularizer(l1=l)
|
||||
|
||||
|
||||
def activity_l2(l=0.01):
|
||||
return ActivityRegularizer(l2=l)
|
||||
return L1L2Regularizer(l2=l)
|
||||
|
||||
|
||||
def activity_l1l2(l1=0.01, l2=0.01):
|
||||
return ActivityRegularizer(l1=l1, l2=l2)
|
||||
return L1L2Regularizer(l1=l1, l2=l2)
|
||||
|
||||
|
||||
def get(identifier, kwargs=None):
|
||||
|
||||
+30
-13
@@ -1,21 +1,38 @@
|
||||
"""Utilities for file download and caching."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
import functools
|
||||
import tarfile
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import hashlib
|
||||
from six.moves.urllib.request import urlopen
|
||||
from six.moves.urllib.error import URLError, HTTPError
|
||||
from six.moves.urllib.error import URLError
|
||||
from six.moves.urllib.error import HTTPError
|
||||
|
||||
from ..utils.generic_utils import Progbar
|
||||
|
||||
|
||||
# Under Python 2, 'urlretrieve' relies on FancyURLopener from legacy
|
||||
# urllib module, known to have issues with proxy management
|
||||
if sys.version_info[0] == 2:
|
||||
def urlretrieve(url, filename, reporthook=None, data=None):
|
||||
"""Replacement for `urlretrive` for Python 2.
|
||||
|
||||
Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
|
||||
`urllib` module, known to have issues with proxy management.
|
||||
|
||||
# Arguments
|
||||
url: url to retrieve.
|
||||
filename: where to store the retrieved data locally.
|
||||
reporthook: a hook function that will be called once
|
||||
on establishment of the network connection and once
|
||||
after each block read thereafter.
|
||||
The hook will be passed three arguments;
|
||||
a count of blocks transferred so far,
|
||||
a block size in bytes, and the total size of the file.
|
||||
data: `data` argument passed to `urlopen`.
|
||||
"""
|
||||
def chunk_read(response, chunk_size=8192, reporthook=None):
|
||||
total_size = response.info().get('Content-Length').strip()
|
||||
total_size = int(total_size)
|
||||
@@ -40,9 +57,10 @@ else:
|
||||
|
||||
def get_file(fname, origin, untar=False,
|
||||
md5_hash=None, cache_subdir='datasets'):
|
||||
'''Downloads a file from a URL if it not already in the cache.
|
||||
"""Downloads a file from a URL if it not already in the cache.
|
||||
|
||||
Passing the MD5 hash will verify the file after download as well as if it is already present in the cache.
|
||||
Passing the MD5 hash will verify the file after download
|
||||
as well as if it is already present in the cache.
|
||||
|
||||
# Arguments
|
||||
fname: name of the file
|
||||
@@ -53,7 +71,7 @@ def get_file(fname, origin, untar=False,
|
||||
|
||||
# Returns
|
||||
Path to the downloaded file
|
||||
'''
|
||||
"""
|
||||
datadir_base = os.path.expanduser(os.path.join('~', '.keras'))
|
||||
if not os.access(datadir_base, os.W_OK):
|
||||
datadir_base = os.path.join('/tmp', '.keras')
|
||||
@@ -69,7 +87,7 @@ def get_file(fname, origin, untar=False,
|
||||
|
||||
download = False
|
||||
if os.path.exists(fpath):
|
||||
# file found; verify integrity if a hash was provided
|
||||
# File found; verify integrity if a hash was provided.
|
||||
if md5_hash is not None:
|
||||
if not validate_file(fpath, md5_hash):
|
||||
print('A local file was found, but it seems to be '
|
||||
@@ -80,11 +98,9 @@ def get_file(fname, origin, untar=False,
|
||||
|
||||
if download:
|
||||
print('Downloading data from', origin)
|
||||
global progbar
|
||||
progbar = None
|
||||
|
||||
def dl_progress(count, block_size, total_size):
|
||||
global progbar
|
||||
def dl_progress(count, block_size, total_size, progbar=None):
|
||||
if progbar is None:
|
||||
progbar = Progbar(total_size)
|
||||
else:
|
||||
@@ -93,7 +109,8 @@ def get_file(fname, origin, untar=False,
|
||||
error_msg = 'URL fetch failure on {}: {} -- {}'
|
||||
try:
|
||||
try:
|
||||
urlretrieve(origin, fpath, dl_progress)
|
||||
urlretrieve(origin, fpath,
|
||||
functools.partial(dl_progress, progbar=progbar))
|
||||
except URLError as e:
|
||||
raise Exception(error_msg.format(origin, e.errno, e.reason))
|
||||
except HTTPError as e:
|
||||
@@ -124,7 +141,7 @@ def get_file(fname, origin, untar=False,
|
||||
|
||||
|
||||
def validate_file(fpath, md5_hash):
|
||||
'''Validates a file against a MD5 hash
|
||||
"""Validates a file against a MD5 hash.
|
||||
|
||||
# Arguments
|
||||
fpath: path to the file being validated
|
||||
@@ -132,7 +149,7 @@ def validate_file(fpath, md5_hash):
|
||||
|
||||
# Returns
|
||||
Whether the file is valid
|
||||
'''
|
||||
"""
|
||||
hasher = hashlib.md5()
|
||||
with open(fpath, 'rb') as f:
|
||||
buf = f.read()
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
"""Python utilities required by Keras."""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
|
||||
import time
|
||||
import sys
|
||||
import six
|
||||
@@ -9,25 +12,46 @@ import types as python_types
|
||||
|
||||
def get_from_module(identifier, module_params, module_name,
|
||||
instantiate=False, kwargs=None):
|
||||
"""Retrieves a class of function member of a module.
|
||||
|
||||
# Arguments
|
||||
identifier: the object to retrieve. It could be specified
|
||||
by name (as a string), or by dict. In any other case,
|
||||
`identifier` itself will be returned without any changes.
|
||||
module_params: the members of a module
|
||||
(e.g. the output of `globals()`).
|
||||
module_name: string; the name of the target module. Only used
|
||||
to format error messages.
|
||||
instantiate: whether to instantiate the returned object
|
||||
(if it's a class).
|
||||
kwargs: a dictionary of keyword arguments to pass to the
|
||||
class constructor if `instantiate` is `True`.
|
||||
|
||||
# Returns
|
||||
The target object.
|
||||
|
||||
# Raises
|
||||
ValueError: if the identifier cannot be found.
|
||||
"""
|
||||
if isinstance(identifier, six.string_types):
|
||||
res = module_params.get(identifier)
|
||||
if not res:
|
||||
raise Exception('Invalid ' + str(module_name) + ': ' +
|
||||
str(identifier))
|
||||
raise ValueError('Invalid ' + str(module_name) + ': ' +
|
||||
str(identifier))
|
||||
if instantiate and not kwargs:
|
||||
return res()
|
||||
elif instantiate and kwargs:
|
||||
return res(**kwargs)
|
||||
else:
|
||||
return res
|
||||
elif type(identifier) is dict:
|
||||
elif isinstance(identifier, dict):
|
||||
name = identifier.pop('name')
|
||||
res = module_params.get(name)
|
||||
if res:
|
||||
return res(**identifier)
|
||||
else:
|
||||
raise Exception('Invalid ' + str(module_name) + ': ' +
|
||||
str(identifier))
|
||||
raise ValueError('Invalid ' + str(module_name) + ': ' +
|
||||
str(identifier))
|
||||
return identifier
|
||||
|
||||
|
||||
@@ -36,7 +60,14 @@ def make_tuple(*args):
|
||||
|
||||
|
||||
def func_dump(func):
|
||||
'''Serialize user defined function.'''
|
||||
"""Serializes a user defined function.
|
||||
|
||||
# Arguments
|
||||
func: the function to serialize.
|
||||
|
||||
# Returns
|
||||
A tuple `(code, defaults, closure)`.
|
||||
"""
|
||||
code = marshal.dumps(func.__code__).decode('raw_unicode_escape')
|
||||
defaults = func.__defaults__
|
||||
if func.__closure__:
|
||||
@@ -47,37 +78,37 @@ def func_dump(func):
|
||||
|
||||
|
||||
def func_load(code, defaults=None, closure=None, globs=None):
|
||||
'''Deserialize user defined function.'''
|
||||
"""Deserializes a user defined function.
|
||||
|
||||
# Arguments
|
||||
code: bytecode of the function.
|
||||
defaults: defaults of the function.
|
||||
closure: closure of the function.
|
||||
globs: dictionary of global objects.
|
||||
|
||||
# Returns
|
||||
A function object.
|
||||
"""
|
||||
if isinstance(code, (tuple, list)): # unpack previous dump
|
||||
code, defaults, closure = code
|
||||
code = marshal.loads(code.encode('raw_unicode_escape'))
|
||||
if closure is not None:
|
||||
closure = func_reconstruct_closure(closure)
|
||||
if globs is None:
|
||||
globs = globals()
|
||||
return python_types.FunctionType(code, globs, name=code.co_name, argdefs=defaults, closure=closure)
|
||||
|
||||
|
||||
def func_reconstruct_closure(values):
|
||||
'''Deserialization helper that reconstructs a closure.'''
|
||||
nums = range(len(values))
|
||||
src = ["def func(arg):"]
|
||||
src += [" _%d = arg[%d]" % (n, n) for n in nums]
|
||||
src += [" return lambda:(%s)" % ','.join(["_%d" % n for n in nums]), ""]
|
||||
src = '\n'.join(src)
|
||||
try:
|
||||
exec(src, globals())
|
||||
except:
|
||||
raise SyntaxError(src)
|
||||
return func(values).__closure__
|
||||
return python_types.FunctionType(code, globs,
|
||||
name=code.co_name,
|
||||
argdefs=defaults,
|
||||
closure=closure)
|
||||
|
||||
|
||||
class Progbar(object):
|
||||
"""Displays a progress bar.
|
||||
|
||||
# Arguments
|
||||
target: Total number of steps expected.
|
||||
interval: Minimum visual progress update interval (in seconds).
|
||||
"""
|
||||
|
||||
def __init__(self, target, width=30, verbose=1, interval=0.01):
|
||||
'''
|
||||
@param target: total number of steps expected
|
||||
@param interval: minimum visual progress update interval (in seconds)
|
||||
'''
|
||||
self.width = width
|
||||
self.target = target
|
||||
self.sum_values = {}
|
||||
@@ -89,16 +120,20 @@ class Progbar(object):
|
||||
self.seen_so_far = 0
|
||||
self.verbose = verbose
|
||||
|
||||
def update(self, current, values=[], force=False):
|
||||
'''
|
||||
@param current: index of current step
|
||||
@param values: list of tuples (name, value_for_last_step).
|
||||
The progress bar will display averages for these values.
|
||||
@param force: force visual progress update
|
||||
'''
|
||||
def update(self, current, values=None, force=False):
|
||||
"""Updates the progress bar.
|
||||
|
||||
# Arguments
|
||||
current: Index of current step.
|
||||
values: List of tuples (name, value_for_last_step).
|
||||
The progress bar will display averages for these values.
|
||||
force: Whether to force visual progress update.
|
||||
"""
|
||||
values = values or []
|
||||
for k, v in values:
|
||||
if k not in self.sum_values:
|
||||
self.sum_values[k] = [v * (current - self.seen_so_far), current - self.seen_so_far]
|
||||
self.sum_values[k] = [v * (current - self.seen_so_far),
|
||||
current - self.seen_so_far]
|
||||
self.unique_values.append(k)
|
||||
else:
|
||||
self.sum_values[k][0] += v * (current - self.seen_so_far)
|
||||
@@ -111,8 +146,8 @@ class Progbar(object):
|
||||
return
|
||||
|
||||
prev_total_width = self.total_width
|
||||
sys.stdout.write("\b" * prev_total_width)
|
||||
sys.stdout.write("\r")
|
||||
sys.stdout.write('\b' * prev_total_width)
|
||||
sys.stdout.write('\r')
|
||||
|
||||
numdigits = int(np.floor(np.log10(self.target))) + 1
|
||||
barstr = '%%%dd/%%%dd [' % (numdigits, numdigits)
|
||||
@@ -120,7 +155,7 @@ class Progbar(object):
|
||||
prog = float(current) / self.target
|
||||
prog_width = int(self.width * prog)
|
||||
if prog_width > 0:
|
||||
bar += ('=' * (prog_width-1))
|
||||
bar += ('=' * (prog_width - 1))
|
||||
if current < self.target:
|
||||
bar += '>'
|
||||
else:
|
||||
@@ -142,7 +177,7 @@ class Progbar(object):
|
||||
info += ' - %ds' % (now - self.start)
|
||||
for k in self.unique_values:
|
||||
info += ' - %s:' % k
|
||||
if type(self.sum_values[k]) is list:
|
||||
if isinstance(self.sum_values[k], list):
|
||||
avg = self.sum_values[k][0] / max(1, self.sum_values[k][1])
|
||||
if abs(avg) > 1e-3:
|
||||
info += ' %.4f' % avg
|
||||
@@ -153,13 +188,13 @@ class Progbar(object):
|
||||
|
||||
self.total_width += len(info)
|
||||
if prev_total_width > self.total_width:
|
||||
info += ((prev_total_width - self.total_width) * " ")
|
||||
info += ((prev_total_width - self.total_width) * ' ')
|
||||
|
||||
sys.stdout.write(info)
|
||||
sys.stdout.flush()
|
||||
|
||||
if current >= self.target:
|
||||
sys.stdout.write("\n")
|
||||
sys.stdout.write('\n')
|
||||
|
||||
if self.verbose == 2:
|
||||
if current >= self.target:
|
||||
@@ -175,19 +210,5 @@ class Progbar(object):
|
||||
|
||||
self.last_update = now
|
||||
|
||||
def add(self, n, values=[]):
|
||||
def add(self, n, values=None):
|
||||
self.update(self.seen_so_far + n, values)
|
||||
|
||||
|
||||
def display_table(rows, positions):
|
||||
|
||||
def display_row(objects, positions):
|
||||
line = ''
|
||||
for i in range(len(objects)):
|
||||
line += str(objects[i])
|
||||
line = line[:positions[i]]
|
||||
line += ' ' * (positions[i] - len(line))
|
||||
print(line)
|
||||
|
||||
for objects in rows:
|
||||
display_row(objects, positions)
|
||||
|
||||
+39
-12
@@ -1,22 +1,33 @@
|
||||
"""Utilities related to disk I/O."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
try:
|
||||
import h5py
|
||||
except ImportError:
|
||||
h5py = None
|
||||
|
||||
class HDF5Matrix():
|
||||
'''Representation of HDF5 dataset which can be used instead of a
|
||||
Numpy array.
|
||||
try:
|
||||
import tables
|
||||
except ImportError:
|
||||
tables = None
|
||||
|
||||
|
||||
class HDF5Matrix(object):
|
||||
"""Representation of HDF5 dataset to be used instead of a Numpy array.
|
||||
|
||||
# Example
|
||||
|
||||
```python
|
||||
X_data = HDF5Matrix('input/file.hdf5', 'data')
|
||||
model.predict(X_data)
|
||||
x_data = HDF5Matrix('input/file.hdf5', 'data')
|
||||
model.predict(x_data)
|
||||
```
|
||||
|
||||
Providing start and end allows use of a slice of the dataset.
|
||||
Providing `start` and `end` allows use of a slice of the dataset.
|
||||
|
||||
Optionally, a normalizer function (or lambda) can be given. This will
|
||||
be called on every slice of data retrieved.
|
||||
@@ -29,11 +40,15 @@ class HDF5Matrix():
|
||||
end: int, end of desired slice of the specified dataset
|
||||
normalizer: function to be called on data when retrieved
|
||||
|
||||
'''
|
||||
# Returns
|
||||
An array-like HDF5 dataset.
|
||||
"""
|
||||
refs = defaultdict(int)
|
||||
|
||||
def __init__(self, datapath, dataset, start=0, end=None, normalizer=None):
|
||||
import h5py
|
||||
if h5py is None:
|
||||
raise ImportError('The use of HDF5Matrix requires '
|
||||
'HDF5 and h5py installed.')
|
||||
|
||||
if datapath not in list(self.refs.keys()):
|
||||
f = h5py.File(datapath)
|
||||
@@ -54,7 +69,7 @@ class HDF5Matrix():
|
||||
def __getitem__(self, key):
|
||||
if isinstance(key, slice):
|
||||
if key.stop + self.start <= self.end:
|
||||
idx = slice(key.start+self.start, key.stop + self.start)
|
||||
idx = slice(key.start + self.start, key.stop + self.start)
|
||||
else:
|
||||
raise IndexError
|
||||
elif isinstance(key, int):
|
||||
@@ -83,16 +98,20 @@ class HDF5Matrix():
|
||||
|
||||
|
||||
def save_array(array, name):
|
||||
import tables
|
||||
if tables is None:
|
||||
raise ImportError('The use of `save_array` requires '
|
||||
'the tables module.')
|
||||
f = tables.open_file(name, 'w')
|
||||
atom = tables.Atom.from_dtype(array.dtype)
|
||||
ds = f.createCArray(f.root, 'data', atom, array.shape)
|
||||
ds = f.create_carray(f.root, 'data', atom, array.shape)
|
||||
ds[:] = array
|
||||
f.close()
|
||||
|
||||
|
||||
def load_array(name):
|
||||
import tables
|
||||
if tables is None:
|
||||
raise ImportError('The use of `save_array` requires '
|
||||
'the tables module.')
|
||||
f = tables.open_file(name)
|
||||
array = f.root.data
|
||||
a = np.empty(shape=array.shape, dtype=array.dtype)
|
||||
@@ -102,6 +121,14 @@ def load_array(name):
|
||||
|
||||
|
||||
def ask_to_proceed_with_overwrite(filepath):
|
||||
"""Produces a prompt asking about overwriting a file.
|
||||
|
||||
# Arguments
|
||||
filepath: the path to the file to be overwritten.
|
||||
|
||||
# Returns
|
||||
True if we can proceed with overwrite, False otherwise.
|
||||
"""
|
||||
get_input = input
|
||||
if sys.version_info[:2] <= (2, 7):
|
||||
get_input = raw_input
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from __future__ import print_function
|
||||
import inspect
|
||||
|
||||
from .generic_utils import get_from_module
|
||||
from .np_utils import convert_kernel
|
||||
@@ -7,8 +8,9 @@ from ..models import Model, Sequential
|
||||
from .. import backend as K
|
||||
|
||||
|
||||
def layer_from_config(config, custom_objects={}):
|
||||
'''
|
||||
def layer_from_config(config, custom_objects=None):
|
||||
"""Instantiate a layer from a config dictionary.
|
||||
|
||||
# Arguments
|
||||
config: dict of the form {'class_name': str, 'config': dict}
|
||||
custom_objects: dict mapping class names (or function names)
|
||||
@@ -16,11 +18,12 @@ def layer_from_config(config, custom_objects={}):
|
||||
|
||||
# Returns
|
||||
Layer instance (may be Model, Sequential, Layer...)
|
||||
'''
|
||||
"""
|
||||
# Insert custom layers into globals so they can
|
||||
# be accessed by `get_from_module`.
|
||||
for cls_key in custom_objects:
|
||||
globals()[cls_key] = custom_objects[cls_key]
|
||||
if custom_objects:
|
||||
for cls_key in custom_objects:
|
||||
globals()[cls_key] = custom_objects[cls_key]
|
||||
|
||||
class_name = config['class_name']
|
||||
|
||||
@@ -31,18 +34,27 @@ def layer_from_config(config, custom_objects={}):
|
||||
else:
|
||||
layer_class = get_from_module(class_name, globals(), 'layer',
|
||||
instantiate=False)
|
||||
return layer_class.from_config(config['config'])
|
||||
|
||||
arg_spec = inspect.getargspec(layer_class.from_config)
|
||||
if 'custom_objects' in arg_spec.args:
|
||||
return layer_class.from_config(config['config'],
|
||||
custom_objects=custom_objects)
|
||||
else:
|
||||
return layer_class.from_config(config['config'])
|
||||
|
||||
|
||||
def print_summary(layers, relevant_nodes=None, line_length=100, positions=[.33, .55, .67, 1.]):
|
||||
'''Prints a summary of a layer
|
||||
def print_summary(layers, relevant_nodes=None,
|
||||
line_length=100, positions=None):
|
||||
"""Prints a summary of a layer.
|
||||
|
||||
# Arguments
|
||||
layers: list of layers to print summaries of
|
||||
relevant_nodes: list of relevant nodes
|
||||
line_length: total length of printed lines
|
||||
positions: relative or absolute positions of log elements in each line
|
||||
'''
|
||||
positions: relative or absolute positions of log elements in each line.
|
||||
If not provided, defaults to `[.33, .55, .67, 1.]`.
|
||||
"""
|
||||
positions = positions or [.33, .55, .67, 1.]
|
||||
if positions[-1] <= 1:
|
||||
positions = [int(line_length * p) for p in positions]
|
||||
# header names for the different log elements
|
||||
@@ -63,9 +75,14 @@ def print_summary(layers, relevant_nodes=None, line_length=100, positions=[.33,
|
||||
print('=' * line_length)
|
||||
|
||||
def print_layer_summary(layer):
|
||||
"""Prints a summary for a single layer.
|
||||
|
||||
# Arguments
|
||||
layer: target layer.
|
||||
"""
|
||||
try:
|
||||
output_shape = layer.output_shape
|
||||
except:
|
||||
except AttributeError:
|
||||
output_shape = 'multiple'
|
||||
connections = []
|
||||
for node_index, node in enumerate(layer.inbound_nodes):
|
||||
@@ -100,25 +117,51 @@ def print_summary(layers, relevant_nodes=None, line_length=100, positions=[.33,
|
||||
else:
|
||||
print('_' * line_length)
|
||||
|
||||
def count_total_params(layers, layer_set=None):
|
||||
if layer_set is None:
|
||||
layer_set = set()
|
||||
total_params = 0
|
||||
for layer in layers:
|
||||
if layer in layer_set:
|
||||
continue
|
||||
layer_set.add(layer)
|
||||
if type(layer) in (Model, Sequential):
|
||||
total_params += count_total_params(layer.layers, layer_set)
|
||||
else:
|
||||
total_params += layer.count_params()
|
||||
return total_params
|
||||
trainable_count, non_trainable_count = count_total_params(layers, layer_set=None)
|
||||
|
||||
print('Total params: %s' % count_total_params(layers))
|
||||
print('Total params: {:,}'.format(trainable_count + non_trainable_count))
|
||||
print('Trainable params: {:,}'.format(trainable_count))
|
||||
print('Non-trainable params: {:,}'.format(non_trainable_count))
|
||||
print('_' * line_length)
|
||||
|
||||
|
||||
def count_total_params(layers, layer_set=None):
|
||||
"""Counts the number of parameters in a list of layers.
|
||||
|
||||
# Arguments
|
||||
layers: list of layers.
|
||||
layer_set: set of layers already seen
|
||||
(so that we don't count their weights twice).
|
||||
|
||||
# Returns
|
||||
A tuple (count of trainable weights, count of non-trainable weights.)
|
||||
"""
|
||||
if layer_set is None:
|
||||
layer_set = set()
|
||||
trainable_count = 0
|
||||
non_trainable_count = 0
|
||||
for layer in layers:
|
||||
if layer in layer_set:
|
||||
continue
|
||||
layer_set.add(layer)
|
||||
if isinstance(layer, (Model, Sequential)):
|
||||
t, nt = count_total_params(layer.layers, layer_set)
|
||||
trainable_count += t
|
||||
non_trainable_count += nt
|
||||
else:
|
||||
trainable_count += sum([K.count_params(p) for p in layer.trainable_weights])
|
||||
non_trainable_count += sum([K.count_params(p) for p in layer.non_trainable_weights])
|
||||
return trainable_count, non_trainable_count
|
||||
|
||||
|
||||
def convert_all_kernels_in_model(model):
|
||||
"""Converts all convolution kernels in a model from Theano to TensorFlow.
|
||||
|
||||
Also works from TensorFlow to Theano.
|
||||
|
||||
# Arguments
|
||||
model: target model for the conversion.
|
||||
"""
|
||||
# Note: SeparableConvolution not included
|
||||
# since only supported by TF.
|
||||
conv_classes = {
|
||||
|
||||
+74
-73
@@ -1,27 +1,32 @@
|
||||
"""Numpy-related utilities."""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import scipy as sp
|
||||
from six.moves import range
|
||||
from six.moves import zip
|
||||
from .. import backend as K
|
||||
|
||||
|
||||
def to_categorical(y, nb_classes=None):
|
||||
'''Convert class vector (integers from 0 to nb_classes) to binary class matrix, for use with categorical_crossentropy.
|
||||
"""Converts a class vector (integers) to binary class matrix.
|
||||
|
||||
E.g. for use with categorical_crossentropy.
|
||||
|
||||
# Arguments
|
||||
y: class vector to be converted into a matrix
|
||||
nb_classes: total number of classes
|
||||
(integers from 0 to nb_classes).
|
||||
nb_classes: total number of classes.
|
||||
|
||||
# Returns
|
||||
A binary matrix representation of the input.
|
||||
'''
|
||||
"""
|
||||
y = np.array(y, dtype='int').ravel()
|
||||
if not nb_classes:
|
||||
nb_classes = np.max(y)+1
|
||||
Y = np.zeros((len(y), nb_classes))
|
||||
for i in range(len(y)):
|
||||
Y[i, y[i]] = 1.
|
||||
return Y
|
||||
nb_classes = np.max(y) + 1
|
||||
n = y.shape[0]
|
||||
categorical = np.zeros((n, nb_classes))
|
||||
categorical[np.arange(n), y] = 1
|
||||
return categorical
|
||||
|
||||
|
||||
def normalize(a, axis=-1, order=2):
|
||||
@@ -32,16 +37,16 @@ def normalize(a, axis=-1, order=2):
|
||||
|
||||
def binary_logloss(p, y):
|
||||
epsilon = 1e-15
|
||||
p = sp.maximum(epsilon, p)
|
||||
p = sp.minimum(1-epsilon, p)
|
||||
res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
|
||||
res *= -1.0/len(y)
|
||||
p = np.maximum(epsilon, p)
|
||||
p = np.minimum(1 - epsilon, p)
|
||||
res = sum(y * np.log(p) + np.subtract(1, y) * np.log(np.subtract(1, p)))
|
||||
res *= -1.0 / len(y)
|
||||
return res
|
||||
|
||||
|
||||
def multiclass_logloss(P, Y):
|
||||
npreds = [P[i][Y[i]-1] for i in range(len(Y))]
|
||||
score = -(1. / len(Y)) * np.sum(np.log(npreds))
|
||||
def multiclass_logloss(p, y):
|
||||
npreds = [p[i][y[i] - 1] for i in range(len(y))]
|
||||
score = -(1. / len(y)) * np.sum(np.log(npreds))
|
||||
return score
|
||||
|
||||
|
||||
@@ -59,67 +64,52 @@ def categorical_probas_to_classes(p):
|
||||
return np.argmax(p, axis=1)
|
||||
|
||||
|
||||
def convert_kernel(kernel, dim_ordering='default'):
|
||||
'''Converts a kernel matrix (Numpy array)
|
||||
from Theano format to TensorFlow format
|
||||
(or reciprocally, since the transformation
|
||||
is its own inverse).
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
def convert_kernel(kernel, dim_ordering=None):
|
||||
"""Converts a Numpy kernel matrix from Theano format to TensorFlow format.
|
||||
|
||||
Also works reciprocally, since the transformation is its own inverse.
|
||||
|
||||
# Arguments
|
||||
kernel: Numpy array (4D or 5D).
|
||||
dim_ordering: the data format.
|
||||
|
||||
# Returns
|
||||
The converted kernel.
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid kernel shape or invalid dim_ordering.
|
||||
"""
|
||||
if dim_ordering is None:
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
new_kernel = np.copy(kernel)
|
||||
if kernel.ndim == 4:
|
||||
# conv 2d
|
||||
# TH kernel shape: (depth, input_depth, rows, cols)
|
||||
# TF kernel shape: (rows, cols, input_depth, depth)
|
||||
if dim_ordering == 'th':
|
||||
w = kernel.shape[2]
|
||||
h = kernel.shape[3]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
new_kernel[:, :, i, j] = kernel[:, :, w - i - 1, h - j - 1]
|
||||
elif dim_ordering == 'tf':
|
||||
w = kernel.shape[0]
|
||||
h = kernel.shape[1]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
new_kernel[i, j, :, :] = kernel[w - i - 1, h - j - 1, :, :]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + str(dim_ordering))
|
||||
elif kernel.ndim == 5:
|
||||
# conv 3d
|
||||
# TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3)
|
||||
# TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth)
|
||||
if dim_ordering == 'th':
|
||||
w = kernel.shape[2]
|
||||
h = kernel.shape[3]
|
||||
z = kernel.shape[4]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
for k in range(z):
|
||||
new_kernel[:, :, i, j, k] = kernel[:, :,
|
||||
w - i - 1,
|
||||
h - j - 1,
|
||||
z - k - 1]
|
||||
elif dim_ordering == 'tf':
|
||||
w = kernel.shape[0]
|
||||
h = kernel.shape[1]
|
||||
z = kernel.shape[2]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
for k in range(z):
|
||||
new_kernel[i, j, k, :, :] = kernel[w - i - 1,
|
||||
h - j - 1,
|
||||
z - k - 1,
|
||||
:, :]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + str(dim_ordering))
|
||||
else:
|
||||
if not 4 <= kernel.ndim <= 5:
|
||||
raise ValueError('Invalid kernel shape:', kernel.shape)
|
||||
return new_kernel
|
||||
|
||||
slices = [slice(None, None, -1) for _ in range(kernel.ndim)]
|
||||
no_flip = (slice(None, None), slice(None, None))
|
||||
if dim_ordering == 'th': # (out_depth, input_depth, ...)
|
||||
slices[:2] = no_flip
|
||||
elif dim_ordering == 'tf': # (..., input_depth, out_depth)
|
||||
slices[-2:] = no_flip
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
|
||||
return np.copy(kernel[slices])
|
||||
|
||||
|
||||
def conv_output_length(input_length, filter_size, border_mode, stride, dilation=1):
|
||||
def conv_output_length(input_length, filter_size,
|
||||
border_mode, stride, dilation=1):
|
||||
"""Determines output length of a convolution given input length.
|
||||
|
||||
# Arguments
|
||||
input_length: integer.
|
||||
filter_size: integer.
|
||||
border_mode: one of "same", "valid", "full".
|
||||
stride: integer.
|
||||
dilation: dilation rate, integer.
|
||||
|
||||
# Returns
|
||||
The output length (integer).
|
||||
"""
|
||||
if input_length is None:
|
||||
return None
|
||||
assert border_mode in {'same', 'valid', 'full'}
|
||||
@@ -134,6 +124,17 @@ def conv_output_length(input_length, filter_size, border_mode, stride, dilation=
|
||||
|
||||
|
||||
def conv_input_length(output_length, filter_size, border_mode, stride):
|
||||
"""Determines input length of a convolution given output length.
|
||||
|
||||
# Arguments
|
||||
output_length: integer.
|
||||
filter_size: integer.
|
||||
border_mode: one of "same", "valid", "full".
|
||||
stride: integer.
|
||||
|
||||
# Returns
|
||||
The input length (integer).
|
||||
"""
|
||||
if output_length is None:
|
||||
return None
|
||||
assert border_mode in {'same', 'valid', 'full'}
|
||||
|
||||
+35
-15
@@ -1,23 +1,26 @@
|
||||
"""Utilities related to Keras unit tests."""
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import inspect
|
||||
import six
|
||||
|
||||
from ..engine import Model, Input
|
||||
from ..models import Sequential, model_from_json
|
||||
from ..models import Sequential
|
||||
from ..models import model_from_json
|
||||
from .. import backend as K
|
||||
|
||||
|
||||
def get_test_data(nb_train=1000, nb_test=500, input_shape=(10,),
|
||||
output_shape=(2,),
|
||||
classification=True, nb_class=2):
|
||||
'''
|
||||
classification=True overrides output_shape
|
||||
(i.e. output_shape is set to (1,)) and the output
|
||||
consists in integers in [0, nb_class-1].
|
||||
"""Generates test data to train a model on.
|
||||
|
||||
Otherwise: float output with shape output_shape.
|
||||
'''
|
||||
classification=True overrides output_shape
|
||||
(i.e. output_shape is set to (1,)) and the output
|
||||
consists in integers in [0, nb_class-1].
|
||||
|
||||
Otherwise: float output with shape output_shape.
|
||||
"""
|
||||
nb_sample = nb_train + nb_test
|
||||
if classification:
|
||||
y = np.random.randint(0, nb_class, size=(nb_sample,))
|
||||
@@ -38,14 +41,19 @@ def get_test_data(nb_train=1000, nb_test=500, input_shape=(10,),
|
||||
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
input_data=None, expected_output=None,
|
||||
expected_output_dtype=None, fixed_batch_size=False):
|
||||
'''Test routine for a layer with a single input tensor
|
||||
"""Test routine for a layer with a single input tensor
|
||||
and single output tensor.
|
||||
'''
|
||||
"""
|
||||
if input_data is None:
|
||||
assert input_shape
|
||||
if not input_dtype:
|
||||
input_dtype = K.floatx()
|
||||
input_data = (10 * np.random.random(input_shape)).astype(input_dtype)
|
||||
input_data_shape = list(input_shape)
|
||||
for i, e in enumerate(input_data_shape):
|
||||
if e is None:
|
||||
input_data_shape[i] = np.random.randint(1, 4)
|
||||
input_data = (10 * np.random.random(input_data_shape))
|
||||
input_data = input_data.astype(input_dtype)
|
||||
elif input_shape is None:
|
||||
input_shape = input_data.shape
|
||||
|
||||
@@ -78,7 +86,10 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
expected_output_shape = layer.get_output_shape_for(input_shape)
|
||||
actual_output = model.predict(input_data)
|
||||
actual_output_shape = actual_output.shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
for expected_dim, actual_dim in zip(expected_output_shape,
|
||||
actual_output_shape):
|
||||
if expected_dim is not None:
|
||||
assert expected_dim == actual_dim
|
||||
if expected_output is not None:
|
||||
assert_allclose(actual_output, expected_output, rtol=1e-3)
|
||||
|
||||
@@ -97,7 +108,10 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
model.compile('rmsprop', 'mse')
|
||||
actual_output = model.predict(input_data)
|
||||
actual_output_shape = actual_output.shape
|
||||
assert expected_output_shape == actual_output_shape
|
||||
for expected_dim, actual_dim in zip(expected_output_shape,
|
||||
actual_output_shape):
|
||||
if expected_dim is not None:
|
||||
assert expected_dim == actual_dim
|
||||
if expected_output is not None:
|
||||
assert_allclose(actual_output, expected_output, rtol=1e-3)
|
||||
|
||||
@@ -110,12 +124,18 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
|
||||
|
||||
def keras_test(func):
|
||||
'''Clean up after tensorflow tests.
|
||||
'''
|
||||
"""Function wrapper to clean up after TensorFlow tests.
|
||||
|
||||
# Arguments
|
||||
func: test function to clean up after.
|
||||
|
||||
# Returns
|
||||
A function wrapping the input function.
|
||||
"""
|
||||
@six.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
output = func(*args, **kwargs)
|
||||
if K._BACKEND == 'tensorflow':
|
||||
if K.backend() == 'tensorflow':
|
||||
K.clear_session()
|
||||
return output
|
||||
return wrapper
|
||||
|
||||
@@ -1,20 +1,31 @@
|
||||
"""Utilities related to model visualization."""
|
||||
import os
|
||||
|
||||
from ..layers.wrappers import Wrapper
|
||||
from ..models import Sequential
|
||||
|
||||
try:
|
||||
# pydot-ng is a fork of pydot that is better maintained
|
||||
# pydot-ng is a fork of pydot that is better maintained.
|
||||
import pydot_ng as pydot
|
||||
except ImportError:
|
||||
# fall back on pydot if necessary
|
||||
# Fall back on pydot if necessary.
|
||||
import pydot
|
||||
if not pydot.find_graphviz():
|
||||
raise RuntimeError('Failed to import pydot. You must install pydot'
|
||||
' and graphviz for `pydotprint` to work.')
|
||||
raise ImportError('Failed to import pydot. You must install pydot'
|
||||
' and graphviz for `pydotprint` to work.')
|
||||
|
||||
|
||||
def model_to_dot(model, show_shapes=False, show_layer_names=True):
|
||||
"""Converts a Keras model to dot format.
|
||||
|
||||
# Arguments
|
||||
model: A Keras model instance.
|
||||
show_shapes: whether to display shape information.
|
||||
show_layer_names: whether to display layer names.
|
||||
|
||||
# Returns
|
||||
A `pydot.Dot` instance representing the Keras model.
|
||||
"""
|
||||
dot = pydot.Dot()
|
||||
dot.set('rankdir', 'TB')
|
||||
dot.set('concentrate', True)
|
||||
@@ -48,7 +59,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True):
|
||||
if show_shapes:
|
||||
try:
|
||||
outputlabels = str(layer.output_shape)
|
||||
except:
|
||||
except AttributeError:
|
||||
outputlabels = 'multiple'
|
||||
if hasattr(layer, 'input_shape'):
|
||||
inputlabels = str(layer.input_shape)
|
||||
@@ -77,9 +88,9 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True):
|
||||
|
||||
def plot(model, to_file='model.png', show_shapes=False, show_layer_names=True):
|
||||
dot = model_to_dot(model, show_shapes, show_layer_names)
|
||||
_, format = os.path.splitext(to_file)
|
||||
if not format:
|
||||
format = 'png'
|
||||
_, extension = os.path.splitext(to_file)
|
||||
if not extension:
|
||||
extension = 'png'
|
||||
else:
|
||||
format = format[1:]
|
||||
dot.write(to_file, format=format)
|
||||
extension = extension[1:]
|
||||
dot.write(to_file, format=extension)
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import copy
|
||||
import inspect
|
||||
import types
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..utils.np_utils import to_categorical
|
||||
@@ -9,14 +11,14 @@ from ..models import Sequential
|
||||
|
||||
|
||||
class BaseWrapper(object):
|
||||
'''Base class for the Keras scikit-learn wrapper.
|
||||
"""Base class for the Keras scikit-learn wrapper.
|
||||
|
||||
Warning: This class should not be used directly.
|
||||
Use descendant classes instead.
|
||||
|
||||
# Arguments
|
||||
build_fn: callable function or class instance
|
||||
sk_params: model parameters & fitting parameters
|
||||
**sk_params: model parameters & fitting parameters
|
||||
|
||||
The build_fn should construct, compile and return a Keras model, which
|
||||
will then be used to fit/predict. One of the following
|
||||
@@ -47,7 +49,7 @@ class BaseWrapper(object):
|
||||
those you could pass to `sk_params`, including fitting parameters.
|
||||
In other words, you could use `grid_search` to search for the best
|
||||
`batch_size` or `nb_epoch` as well as the model parameters.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, build_fn=None, **sk_params):
|
||||
self.build_fn = build_fn
|
||||
@@ -55,18 +57,20 @@ class BaseWrapper(object):
|
||||
self.check_params(sk_params)
|
||||
|
||||
def check_params(self, params):
|
||||
'''Check for user typos in "params" keys to avoid
|
||||
unwanted usage of default values
|
||||
"""Checks for user typos in "params".
|
||||
|
||||
# Arguments
|
||||
params: dictionary
|
||||
The parameters to be checked
|
||||
'''
|
||||
params: dictionary; the parameters to be checked
|
||||
|
||||
# Raises
|
||||
ValueError: if any member of `params` is not a valid argument.
|
||||
"""
|
||||
legal_params_fns = [Sequential.fit, Sequential.predict,
|
||||
Sequential.predict_classes, Sequential.evaluate]
|
||||
if self.build_fn is None:
|
||||
legal_params_fns.append(self.__call__)
|
||||
elif not isinstance(self.build_fn, types.FunctionType) and not isinstance(self.build_fn, types.MethodType):
|
||||
elif (not isinstance(self.build_fn, types.FunctionType) and
|
||||
not isinstance(self.build_fn, types.MethodType)):
|
||||
legal_params_fns.append(self.build_fn.__call__)
|
||||
else:
|
||||
legal_params_fns.append(self.build_fn)
|
||||
@@ -80,57 +84,50 @@ class BaseWrapper(object):
|
||||
if params_name not in legal_params:
|
||||
raise ValueError('{} is not a legal parameter'.format(params_name))
|
||||
|
||||
def get_params(self, deep=True):
|
||||
'''Get parameters for this estimator.
|
||||
|
||||
# Arguments
|
||||
deep: boolean, optional
|
||||
If True, will return the parameters for this estimator and
|
||||
contained sub-objects that are estimators.
|
||||
def get_params(self, _):
|
||||
"""Gets parameters for this estimator.
|
||||
|
||||
# Returns
|
||||
params : dict
|
||||
Dictionary of parameter names mapped to their values.
|
||||
'''
|
||||
"""
|
||||
res = copy.deepcopy(self.sk_params)
|
||||
res.update({'build_fn': self.build_fn})
|
||||
return res
|
||||
|
||||
def set_params(self, **params):
|
||||
'''Set the parameters of this estimator.
|
||||
"""Sets the parameters of this estimator.
|
||||
|
||||
# Arguments
|
||||
params: dict
|
||||
Dictionary of parameter names mapped to their values.
|
||||
**params: Dictionary of parameter names mapped to their values.
|
||||
|
||||
# Returns
|
||||
self
|
||||
'''
|
||||
"""
|
||||
self.check_params(params)
|
||||
self.sk_params.update(params)
|
||||
return self
|
||||
|
||||
def fit(self, X, y, **kwargs):
|
||||
'''Construct a new model with build_fn and fit the model according
|
||||
to the given training data.
|
||||
def fit(self, x, y, **kwargs):
|
||||
"""Constructs a new model with `build_fn` & fit the model to `(x, y)`.
|
||||
|
||||
# Arguments
|
||||
X : array-like, shape `(n_samples, n_features)`
|
||||
x : array-like, shape `(n_samples, n_features)`
|
||||
Training samples where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)`
|
||||
True labels for X.
|
||||
kwargs: dictionary arguments
|
||||
**kwargs: dictionary arguments
|
||||
Legal arguments are the arguments of `Sequential.fit`
|
||||
|
||||
# Returns
|
||||
history : object
|
||||
details about the training history at each epoch.
|
||||
'''
|
||||
|
||||
"""
|
||||
if self.build_fn is None:
|
||||
self.model = self.__call__(**self.filter_sk_params(self.__call__))
|
||||
elif not isinstance(self.build_fn, types.FunctionType) and not isinstance(self.build_fn, types.MethodType):
|
||||
elif (not isinstance(self.build_fn, types.FunctionType) and
|
||||
not isinstance(self.build_fn, types.MethodType)):
|
||||
self.model = self.build_fn(
|
||||
**self.filter_sk_params(self.build_fn.__call__))
|
||||
else:
|
||||
@@ -145,12 +142,12 @@ class BaseWrapper(object):
|
||||
fit_args = copy.deepcopy(self.filter_sk_params(Sequential.fit))
|
||||
fit_args.update(kwargs)
|
||||
|
||||
history = self.model.fit(X, y, **fit_args)
|
||||
history = self.model.fit(x, y, **fit_args)
|
||||
|
||||
return history
|
||||
|
||||
def filter_sk_params(self, fn, override={}):
|
||||
'''Filter sk_params and return those in fn's arguments
|
||||
def filter_sk_params(self, fn, override=None):
|
||||
"""Filters `sk_params` and return those in `fn`'s arguments.
|
||||
|
||||
# Arguments
|
||||
fn : arbitrary function
|
||||
@@ -159,7 +156,8 @@ class BaseWrapper(object):
|
||||
# Returns
|
||||
res : dictionary dictionary containing variables
|
||||
in both sk_params and fn's arguments.
|
||||
'''
|
||||
"""
|
||||
override = override or {}
|
||||
res = {}
|
||||
fn_args = inspect.getargspec(fn)[0]
|
||||
for name, value in self.sk_params.items():
|
||||
@@ -170,35 +168,37 @@ class BaseWrapper(object):
|
||||
|
||||
|
||||
class KerasClassifier(BaseWrapper):
|
||||
'''Implementation of the scikit-learn classifier API for Keras.
|
||||
'''
|
||||
"""Implementation of the scikit-learn classifier API for Keras.
|
||||
"""
|
||||
|
||||
def predict(self, X, **kwargs):
|
||||
'''Returns the class predictions for the given test data.
|
||||
def predict(self, x, **kwargs):
|
||||
"""Returns the class predictions for the given test data.
|
||||
|
||||
# Arguments
|
||||
X: array-like, shape `(n_samples, n_features)`
|
||||
x: array-like, shape `(n_samples, n_features)`
|
||||
Test samples where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
kwargs: dictionary arguments
|
||||
Legal arguments are the arguments of `Sequential.predict_classes`.
|
||||
**kwargs: dictionary arguments
|
||||
Legal arguments are the arguments
|
||||
of `Sequential.predict_classes`.
|
||||
|
||||
# Returns
|
||||
preds: array-like, shape `(n_samples,)`
|
||||
Class predictions.
|
||||
'''
|
||||
"""
|
||||
kwargs = self.filter_sk_params(Sequential.predict_classes, kwargs)
|
||||
return self.model.predict_classes(X, **kwargs)
|
||||
return self.model.predict_classes(x, **kwargs)
|
||||
|
||||
def predict_proba(self, X, **kwargs):
|
||||
'''Returns class probability estimates for the given test data.
|
||||
def predict_proba(self, x, **kwargs):
|
||||
"""Returns class probability estimates for the given test data.
|
||||
|
||||
# Arguments
|
||||
X: array-like, shape `(n_samples, n_features)`
|
||||
x: array-like, shape `(n_samples, n_features)`
|
||||
Test samples where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
kwargs: dictionary arguments
|
||||
Legal arguments are the arguments of `Sequential.predict_classes`.
|
||||
**kwargs: dictionary arguments
|
||||
Legal arguments are the arguments
|
||||
of `Sequential.predict_classes`.
|
||||
|
||||
# Returns
|
||||
proba: array-like, shape `(n_samples, n_outputs)`
|
||||
@@ -207,9 +207,9 @@ class KerasClassifier(BaseWrapper):
|
||||
tp match the scikit-learn API,
|
||||
will return an array of shape '(n_samples, 2)'
|
||||
(instead of `(n_sample, 1)` as in Keras).
|
||||
'''
|
||||
"""
|
||||
kwargs = self.filter_sk_params(Sequential.predict_proba, kwargs)
|
||||
probs = self.model.predict_proba(X, **kwargs)
|
||||
probs = self.model.predict_proba(x, **kwargs)
|
||||
|
||||
# check if binary classification
|
||||
if probs.shape[1] == 1:
|
||||
@@ -217,22 +217,27 @@ class KerasClassifier(BaseWrapper):
|
||||
probs = np.hstack([1 - probs, probs])
|
||||
return probs
|
||||
|
||||
def score(self, X, y, **kwargs):
|
||||
'''Returns the mean accuracy on the given test data and labels.
|
||||
def score(self, x, y, **kwargs):
|
||||
"""Returns the mean accuracy on the given test data and labels.
|
||||
|
||||
# Arguments
|
||||
X: array-like, shape `(n_samples, n_features)`
|
||||
x: array-like, shape `(n_samples, n_features)`
|
||||
Test samples where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
y: array-like, shape `(n_samples,)` or `(n_samples, n_outputs)`
|
||||
True labels for X.
|
||||
kwargs: dictionary arguments
|
||||
True labels for x.
|
||||
**kwargs: dictionary arguments
|
||||
Legal arguments are the arguments of `Sequential.evaluate`.
|
||||
|
||||
# Returns
|
||||
score: float
|
||||
Mean accuracy of predictions on X wrt. y.
|
||||
'''
|
||||
|
||||
# Raises
|
||||
ValueError: If the underlying model isn't configured to
|
||||
compute accuracy. You should pass `metrics=["accuracy"]` to
|
||||
the `.compile()` method of the model.
|
||||
"""
|
||||
kwargs = self.filter_sk_params(Sequential.evaluate, kwargs)
|
||||
|
||||
loss_name = self.model.loss
|
||||
@@ -241,55 +246,56 @@ class KerasClassifier(BaseWrapper):
|
||||
if loss_name == 'categorical_crossentropy' and len(y.shape) != 2:
|
||||
y = to_categorical(y)
|
||||
|
||||
outputs = self.model.evaluate(X, y, **kwargs)
|
||||
if type(outputs) is not list:
|
||||
outputs = self.model.evaluate(x, y, **kwargs)
|
||||
if not isinstance(outputs, list):
|
||||
outputs = [outputs]
|
||||
for name, output in zip(self.model.metrics_names, outputs):
|
||||
if name == 'acc':
|
||||
return output
|
||||
raise Exception('The model is not configured to compute accuracy. '
|
||||
'You should pass `metrics=["accuracy"]` to '
|
||||
'the `model.compile()` method.')
|
||||
raise ValueError('The model is not configured to compute accuracy. '
|
||||
'You should pass `metrics=["accuracy"]` to '
|
||||
'the `model.compile()` method.')
|
||||
|
||||
|
||||
class KerasRegressor(BaseWrapper):
|
||||
'''Implementation of the scikit-learn regressor API for Keras.
|
||||
'''
|
||||
"""Implementation of the scikit-learn regressor API for Keras.
|
||||
"""
|
||||
|
||||
def predict(self, X, **kwargs):
|
||||
'''Returns predictions for the given test data.
|
||||
def predict(self, x, **kwargs):
|
||||
"""Returns predictions for the given test data.
|
||||
|
||||
# Arguments
|
||||
X: array-like, shape `(n_samples, n_features)`
|
||||
x: array-like, shape `(n_samples, n_features)`
|
||||
Test samples where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
kwargs: dictionary arguments
|
||||
**kwargs: dictionary arguments
|
||||
Legal arguments are the arguments of `Sequential.predict`.
|
||||
|
||||
# Returns
|
||||
preds: array-like, shape `(n_samples,)`
|
||||
Predictions.
|
||||
'''
|
||||
"""
|
||||
kwargs = self.filter_sk_params(Sequential.predict, kwargs)
|
||||
return np.squeeze(self.model.predict(X, **kwargs))
|
||||
return np.squeeze(self.model.predict(x, **kwargs))
|
||||
|
||||
def score(self, X, y, **kwargs):
|
||||
'''Returns the mean loss on the given test data and labels.
|
||||
def score(self, x, y, **kwargs):
|
||||
"""Returns the mean loss on the given test data and labels.
|
||||
|
||||
# Arguments
|
||||
X: array-like, shape `(n_samples, n_features)`
|
||||
x: array-like, shape `(n_samples, n_features)`
|
||||
Test samples where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
y: array-like, shape `(n_samples,)`
|
||||
True labels for X.
|
||||
kwargs: dictionary arguments
|
||||
**kwargs: dictionary arguments
|
||||
Legal arguments are the arguments of `Sequential.evaluate`.
|
||||
|
||||
# Returns
|
||||
score: float
|
||||
Mean accuracy of predictions on X wrt. y.
|
||||
'''
|
||||
"""
|
||||
kwargs = self.filter_sk_params(Sequential.evaluate, kwargs)
|
||||
loss = self.model.evaluate(X, y, **kwargs)
|
||||
if type(loss) is list:
|
||||
loss = self.model.evaluate(x, y, **kwargs)
|
||||
if isinstance(loss, list):
|
||||
return loss[0]
|
||||
return loss
|
||||
|
||||
+3
-15
@@ -10,23 +10,11 @@ addopts=-v
|
||||
norecursedirs= build
|
||||
|
||||
# PEP-8 The following are ignored:
|
||||
# E251 unexpected spaces around keyword / parameter equals
|
||||
# E225 missing whitespace around operator
|
||||
# E226 missing whitespace around arithmetic operator
|
||||
# W293 blank line contains whitespace
|
||||
# E501 line too long (82 > 79 characters)
|
||||
# E402 module level import not at top of file - temporary measure to coninue adding ros python packaged in sys.path
|
||||
# E402 module level import not at top of file - temporary measure to continue adding ros python packaged in sys.path
|
||||
# E731 do not assign a lambda expression, use a def
|
||||
# E302 two blank lines between the functions
|
||||
# E261 at least two spaces before inline comment
|
||||
|
||||
|
||||
pep8ignore=* E251 \
|
||||
* E225 \
|
||||
* E226 \
|
||||
* W293 \
|
||||
* E501 \
|
||||
pep8ignore=* E501 \
|
||||
* E402 \
|
||||
* E731 \
|
||||
* E302 \
|
||||
* E261
|
||||
|
||||
|
||||
+2
-2
@@ -3,12 +3,12 @@ from setuptools import find_packages
|
||||
|
||||
|
||||
setup(name='Keras',
|
||||
version='1.1.2',
|
||||
version='1.2.1',
|
||||
description='Deep Learning for Python',
|
||||
author='Francois Chollet',
|
||||
author_email='francois.chollet@gmail.com',
|
||||
url='https://github.com/fchollet/keras',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.1.2',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.2.1',
|
||||
license='MIT',
|
||||
install_requires=['theano', 'pyyaml', 'six'],
|
||||
extras_require={
|
||||
|
||||
@@ -108,8 +108,8 @@ def test_stacked_lstm_char_prediction():
|
||||
y = np.zeros((len(sentences), number_of_chars), dtype=np.bool)
|
||||
for i, sentence in enumerate(sentences):
|
||||
for t, char in enumerate(sentence):
|
||||
X[i, t, ord(char)-ord('a')] = 1
|
||||
y[i, ord(next_chars[i])-ord('a')] = 1
|
||||
X[i, t, ord(char) - ord('a')] = 1
|
||||
y[i, ord(next_chars[i]) - ord('a')] = 1
|
||||
|
||||
# learn the alphabet with stacked LSTM
|
||||
model = Sequential([
|
||||
@@ -123,7 +123,7 @@ def test_stacked_lstm_char_prediction():
|
||||
# prime the model with 'ab' sequence and let it generate the learned alphabet
|
||||
sentence = alphabet[:sequence_length]
|
||||
generated = sentence
|
||||
for iteration in range(number_of_chars-sequence_length):
|
||||
for iteration in range(number_of_chars - sequence_length):
|
||||
x = np.zeros((1, sequence_length, number_of_chars))
|
||||
for t, char in enumerate(sentence):
|
||||
x[0, t, ord(char) - ord('a')] = 1.
|
||||
|
||||
@@ -1,14 +1,21 @@
|
||||
import sys
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose
|
||||
import numpy as np
|
||||
import scipy.sparse as sparse
|
||||
|
||||
from keras.backend import theano_backend as KTH
|
||||
from keras import backend as K
|
||||
from keras.backend import theano_backend as KTH, floatx, set_floatx, variable
|
||||
from keras.backend import tensorflow_backend as KTF
|
||||
from keras.utils.np_utils import convert_kernel
|
||||
|
||||
|
||||
def check_dtype(var, dtype):
|
||||
if K._BACKEND == 'theano':
|
||||
assert var.dtype == dtype
|
||||
else:
|
||||
assert var.dtype.name == '%s_ref' % dtype
|
||||
|
||||
|
||||
def check_single_tensor_operation(function_name, input_shape, **kwargs):
|
||||
val = np.random.random(input_shape) - 0.5
|
||||
xth = KTH.variable(val)
|
||||
@@ -626,43 +633,43 @@ class TestBackend(object):
|
||||
mean = 0.
|
||||
std = 1.
|
||||
rand = KTF.eval(KTF.random_normal((1000, 1000), mean=mean, std=std))
|
||||
assert(rand.shape == (1000, 1000))
|
||||
assert(np.abs(np.mean(rand) - mean) < 0.01)
|
||||
assert(np.abs(np.std(rand) - std) < 0.01)
|
||||
assert rand.shape == (1000, 1000)
|
||||
assert np.abs(np.mean(rand) - mean) < 0.01
|
||||
assert np.abs(np.std(rand) - std) < 0.01
|
||||
|
||||
rand = KTH.eval(KTH.random_normal((1000, 1000), mean=mean, std=std))
|
||||
assert(rand.shape == (1000, 1000))
|
||||
assert(np.abs(np.mean(rand) - mean) < 0.01)
|
||||
assert(np.abs(np.std(rand) - std) < 0.01)
|
||||
assert rand.shape == (1000, 1000)
|
||||
assert np.abs(np.mean(rand) - mean) < 0.01
|
||||
assert np.abs(np.std(rand) - std) < 0.01
|
||||
|
||||
def test_random_uniform(self):
|
||||
min = -1.
|
||||
max = 1.
|
||||
rand = KTF.eval(KTF.random_uniform((1000, 1000), min, max))
|
||||
assert(rand.shape == (1000, 1000))
|
||||
assert(np.abs(np.mean(rand)) < 0.01)
|
||||
assert(np.max(rand) <= max)
|
||||
assert(np.min(rand) >= min)
|
||||
min_val = -1.
|
||||
max_val = 1.
|
||||
rand = KTF.eval(KTF.random_uniform((1000, 1000), min_val, max_val))
|
||||
assert rand.shape == (1000, 1000)
|
||||
assert np.abs(np.mean(rand)) < 0.01
|
||||
assert np.max(rand) <= max_val
|
||||
assert np.min(rand) >= min_val
|
||||
|
||||
rand = KTH.eval(KTH.random_uniform((1000, 1000), min, max))
|
||||
assert(rand.shape == (1000, 1000))
|
||||
assert(np.abs(np.mean(rand)) < 0.01)
|
||||
assert(np.max(rand) <= max)
|
||||
assert(np.min(rand) >= min)
|
||||
rand = KTH.eval(KTH.random_uniform((1000, 1000), min_val, max_val))
|
||||
assert rand.shape == (1000, 1000)
|
||||
assert np.abs(np.mean(rand)) < 0.01
|
||||
assert np.max(rand) <= max_val
|
||||
assert np.min(rand) >= min_val
|
||||
|
||||
def test_random_binomial(self):
|
||||
p = 0.5
|
||||
rand = KTF.eval(KTF.random_binomial((1000, 1000), p))
|
||||
assert(rand.shape == (1000, 1000))
|
||||
assert(np.abs(np.mean(rand) - p) < 0.01)
|
||||
assert(np.max(rand) == 1)
|
||||
assert(np.min(rand) == 0)
|
||||
assert rand.shape == (1000, 1000)
|
||||
assert np.abs(np.mean(rand) - p) < 0.01
|
||||
assert np.max(rand) == 1
|
||||
assert np.min(rand) == 0
|
||||
|
||||
rand = KTH.eval(KTH.random_binomial((1000, 1000), p))
|
||||
assert(rand.shape == (1000, 1000))
|
||||
assert(np.abs(np.mean(rand) - p) < 0.01)
|
||||
assert(np.max(rand) == 1)
|
||||
assert(np.min(rand) == 0)
|
||||
assert rand.shape == (1000, 1000)
|
||||
assert np.abs(np.mean(rand) - p) < 0.01
|
||||
assert np.max(rand) == 1
|
||||
assert np.min(rand) == 0
|
||||
|
||||
def test_ctc(self):
|
||||
# simplified version of TensorFlow's test
|
||||
@@ -783,7 +790,7 @@ class TestBackend(object):
|
||||
|
||||
# len max_time_steps array of batch_size x depth matrices
|
||||
inputs = ([input_prob_matrix_0[t, :][np.newaxis, :]
|
||||
for t in range(seq_len_0)] + # Pad to max_time_steps = 8
|
||||
for t in range(seq_len_0)] + # Pad to max_time_steps = 8
|
||||
2 * [np.zeros((1, depth), dtype=np.float32)])
|
||||
|
||||
inputs = KTF.variable(np.asarray(inputs).transpose((1, 0, 2)))
|
||||
@@ -892,7 +899,7 @@ class TestBackend(object):
|
||||
def test_foldl(self):
|
||||
x = np.random.rand(10, 3).astype(np.float32)
|
||||
for K in [KTF, KTH]:
|
||||
kx = K.eval(K.foldl(lambda a, b: a+b, x))
|
||||
kx = K.eval(K.foldl(lambda a, b: a + b, x))
|
||||
|
||||
assert (3,) == kx.shape
|
||||
assert_allclose(x.sum(axis=0), kx, atol=1e-05)
|
||||
@@ -904,12 +911,73 @@ class TestBackend(object):
|
||||
# right to left we have no such problem and the result is larger
|
||||
x = np.array([1e-20, 1e-20, 10, 10, 10], dtype=np.float32)
|
||||
for K in [KTF, KTH]:
|
||||
p1 = K.eval(K.foldl(lambda a, b: a*b, x))
|
||||
p2 = K.eval(K.foldr(lambda a, b: a*b, x))
|
||||
p1 = K.eval(K.foldl(lambda a, b: a * b, x))
|
||||
p2 = K.eval(K.foldr(lambda a, b: a * b, x))
|
||||
|
||||
assert p1 < p2
|
||||
assert 9e-38 < p2 <= 1e-37
|
||||
|
||||
def test_arange(self):
|
||||
for test_value in (-20, 0, 1, 10):
|
||||
t_a = KTF.arange(test_value)
|
||||
a = KTF.eval(t_a)
|
||||
assert np.array_equal(a, np.arange(test_value))
|
||||
t_b = KTH.arange(test_value)
|
||||
b = KTH.eval(t_b)
|
||||
assert np.array_equal(b, np.arange(test_value))
|
||||
assert np.array_equal(a, b)
|
||||
assert KTF.dtype(t_a) == KTH.dtype(t_b)
|
||||
for start, stop, step in ((0, 5, 1), (-5, 5, 2), (0, 1, 2)):
|
||||
a = KTF.eval(KTF.arange(start, stop, step))
|
||||
assert np.array_equal(a, np.arange(start, stop, step))
|
||||
b = KTH.eval(KTH.arange(start, stop, step))
|
||||
assert np.array_equal(b, np.arange(start, stop, step))
|
||||
assert np.array_equal(a, b)
|
||||
for dtype in ('int32', 'int64', 'float32', 'float64'):
|
||||
for backend in (KTF, KTH):
|
||||
t = backend.arange(10, dtype=dtype)
|
||||
assert backend.dtype(t) == dtype
|
||||
|
||||
def test_setfloatx_incorrect_values(self):
|
||||
# Keep track of the old value
|
||||
old_floatx = floatx()
|
||||
# Try some incorrect values
|
||||
initial = floatx()
|
||||
for value in ['', 'beerfloat', 123]:
|
||||
with pytest.raises(Exception):
|
||||
set_floatx(value)
|
||||
assert floatx() == initial
|
||||
# Restore old value
|
||||
set_floatx(old_floatx)
|
||||
|
||||
def test_setfloatx_correct_values(self):
|
||||
# Keep track of the old value
|
||||
old_floatx = floatx()
|
||||
# Check correct values
|
||||
for value in ['float16', 'float32', 'float64']:
|
||||
set_floatx(value)
|
||||
assert floatx() == value
|
||||
# Restore old value
|
||||
set_floatx(old_floatx)
|
||||
|
||||
def test_set_floatx(self):
|
||||
"""
|
||||
Make sure that changes to the global floatx are effectively
|
||||
taken into account by the backend.
|
||||
"""
|
||||
# Keep track of the old value
|
||||
old_floatx = floatx()
|
||||
|
||||
set_floatx('float16')
|
||||
var = variable([10])
|
||||
check_dtype(var, 'float16')
|
||||
|
||||
set_floatx('float64')
|
||||
var = variable([10])
|
||||
check_dtype(var, 'float64')
|
||||
|
||||
# Restore old value
|
||||
set_floatx(old_floatx)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -10,6 +10,28 @@ from keras.models import model_from_json, model_from_yaml
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_get_updates_for():
|
||||
a = Input(shape=(2,))
|
||||
dense_layer = Dense(1)
|
||||
dense_layer.add_update(0, inputs=a)
|
||||
dense_layer.add_update(1, inputs=None)
|
||||
|
||||
assert dense_layer.get_updates_for(a) == [0]
|
||||
assert dense_layer.get_updates_for(None) == [1]
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_get_losses_for():
|
||||
a = Input(shape=(2,))
|
||||
dense_layer = Dense(1)
|
||||
dense_layer.add_loss(0, inputs=a)
|
||||
dense_layer.add_loss(1, inputs=None)
|
||||
|
||||
assert dense_layer.get_losses_for(a) == [0]
|
||||
assert dense_layer.get_losses_for(None) == [1]
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_trainable_weights():
|
||||
a = Input(shape=(2,))
|
||||
|
||||
@@ -17,6 +17,13 @@ def test_prelu():
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_prelu_share():
|
||||
from keras.layers.advanced_activations import PReLU
|
||||
layer_test(PReLU, kwargs={'shared_axes': 1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_elu():
|
||||
from keras.layers.advanced_activations import ELU
|
||||
@@ -28,11 +35,20 @@ def test_elu():
|
||||
@keras_test
|
||||
def test_parametric_softplus():
|
||||
from keras.layers.advanced_activations import ParametricSoftplus
|
||||
for alpha in [0., .5, -1.]:
|
||||
layer_test(ParametricSoftplus,
|
||||
kwargs={'alpha_init': 1.,
|
||||
'beta_init': -1},
|
||||
input_shape=(2, 3, 4))
|
||||
layer_test(ParametricSoftplus,
|
||||
kwargs={'alpha_init': 1.,
|
||||
'beta_init': -1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_parametric_softplus_share():
|
||||
from keras.layers.advanced_activations import ParametricSoftplus
|
||||
layer_test(ParametricSoftplus,
|
||||
kwargs={'shared_axes': 1,
|
||||
'alpha_init': 1.,
|
||||
'beta_init': -1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
@@ -49,5 +65,12 @@ def test_srelu():
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_srelu_share():
|
||||
from keras.layers.advanced_activations import SReLU
|
||||
layer_test(SReLU, kwargs={'shared_axes': 1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -9,7 +9,7 @@ from keras.layers import convolutional, pooling
|
||||
|
||||
|
||||
# TensorFlow does not support full convolution.
|
||||
if K._BACKEND == 'theano':
|
||||
if K.backend() == 'theano':
|
||||
_convolution_border_modes = ['valid', 'same', 'full']
|
||||
else:
|
||||
_convolution_border_modes = ['valid', 'same']
|
||||
@@ -84,11 +84,12 @@ def test_atrous_conv_1d():
|
||||
|
||||
@keras_test
|
||||
def test_maxpooling_1d():
|
||||
for stride in [1, 2]:
|
||||
layer_test(convolutional.MaxPooling1D,
|
||||
kwargs={'stride': stride,
|
||||
'border_mode': 'valid'},
|
||||
input_shape=(3, 5, 4))
|
||||
for border_mode in ['valid', 'same']:
|
||||
for stride in [1, 2]:
|
||||
layer_test(convolutional.MaxPooling1D,
|
||||
kwargs={'stride': stride,
|
||||
'border_mode': border_mode},
|
||||
input_shape=(3, 5, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
@@ -141,37 +142,38 @@ def test_deconvolution_2d():
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
for batch_size in [None, nb_samples]:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
|
||||
rows = conv_input_length(nb_row, 3, border_mode, subsample[0])
|
||||
cols = conv_input_length(nb_col, 3, border_mode, subsample[1])
|
||||
layer_test(convolutional.Deconvolution2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'output_shape': (nb_samples, nb_filter, rows, cols),
|
||||
'border_mode': border_mode,
|
||||
'subsample': subsample,
|
||||
'dim_ordering': 'th'},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col),
|
||||
fixed_batch_size=True)
|
||||
rows = conv_input_length(nb_row, 3, border_mode, subsample[0])
|
||||
cols = conv_input_length(nb_col, 3, border_mode, subsample[1])
|
||||
layer_test(convolutional.Deconvolution2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'output_shape': (batch_size, nb_filter, rows, cols),
|
||||
'border_mode': border_mode,
|
||||
'subsample': subsample,
|
||||
'dim_ordering': 'th'},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col),
|
||||
fixed_batch_size=True)
|
||||
|
||||
layer_test(convolutional.Deconvolution2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'output_shape': (nb_samples, nb_filter, rows, cols),
|
||||
'border_mode': border_mode,
|
||||
'dim_ordering': 'th',
|
||||
'W_regularizer': 'l2',
|
||||
'b_regularizer': 'l2',
|
||||
'activity_regularizer': 'activity_l2',
|
||||
'subsample': subsample},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col),
|
||||
fixed_batch_size=True)
|
||||
layer_test(convolutional.Deconvolution2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'output_shape': (batch_size, nb_filter, rows, cols),
|
||||
'border_mode': border_mode,
|
||||
'dim_ordering': 'th',
|
||||
'W_regularizer': 'l2',
|
||||
'b_regularizer': 'l2',
|
||||
'activity_regularizer': 'activity_l2',
|
||||
'subsample': subsample},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col),
|
||||
fixed_batch_size=True)
|
||||
|
||||
|
||||
@keras_test
|
||||
@@ -212,7 +214,7 @@ def test_atrous_conv_2d():
|
||||
input_shape=(nb_samples, nb_row, nb_col, stack_size))
|
||||
|
||||
|
||||
@pytest.mark.skipif(K._BACKEND != 'tensorflow', reason="Requires TF backend")
|
||||
@pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TF backend')
|
||||
@keras_test
|
||||
def test_separable_conv_2d():
|
||||
nb_samples = 2
|
||||
@@ -664,6 +666,15 @@ def test_cropping_2d():
|
||||
cropping[1][0]: -cropping[1][1],
|
||||
:]
|
||||
assert_allclose(np_output, expected_out)
|
||||
# another correctness test (no cropping)
|
||||
cropping = ((0, 0), (0, 0))
|
||||
layer = convolutional.Cropping2D(cropping=cropping,
|
||||
dim_ordering=dim_ordering)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
# compare with input
|
||||
assert_allclose(np_output, input)
|
||||
|
||||
|
||||
def test_cropping_3d():
|
||||
@@ -707,6 +718,15 @@ def test_cropping_3d():
|
||||
cropping[2][0]: -cropping[2][1],
|
||||
:]
|
||||
assert_allclose(np_output, expected_out)
|
||||
# another correctness test (no cropping)
|
||||
cropping = ((0, 0), (0, 0), (0, 0))
|
||||
layer = convolutional.Cropping3D(cropping=cropping,
|
||||
dim_ordering=dim_ordering)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
# compare with input
|
||||
assert_allclose(np_output, input)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -223,6 +223,10 @@ def test_dropout():
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(3, 2))
|
||||
|
||||
layer_test(core.Dropout,
|
||||
kwargs={'p': 0.5, 'noise_shape': [3, 1]},
|
||||
input_shape=(3, 2))
|
||||
|
||||
layer_test(core.SpatialDropout1D,
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(2, 3, 4))
|
||||
@@ -255,6 +259,14 @@ def test_reshape():
|
||||
kwargs={'target_shape': (8, 1)},
|
||||
input_shape=(3, 2, 4))
|
||||
|
||||
layer_test(core.Reshape,
|
||||
kwargs={'target_shape': (-1, 1)},
|
||||
input_shape=(3, 2, 4))
|
||||
|
||||
layer_test(core.Reshape,
|
||||
kwargs={'target_shape': (1, -1)},
|
||||
input_shape=(3, 2, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_permute():
|
||||
@@ -325,6 +337,18 @@ def test_dense():
|
||||
kwargs={'output_dim': 3},
|
||||
input_shape=(3, 2))
|
||||
|
||||
layer_test(core.Dense,
|
||||
kwargs={'output_dim': 3},
|
||||
input_shape=(3, 4, 2))
|
||||
|
||||
layer_test(core.Dense,
|
||||
kwargs={'output_dim': 3},
|
||||
input_shape=(None, None, 2))
|
||||
|
||||
layer_test(core.Dense,
|
||||
kwargs={'output_dim': 3},
|
||||
input_shape=(3, 4, 5, 2))
|
||||
|
||||
layer_test(core.Dense,
|
||||
kwargs={'output_dim': 3,
|
||||
'W_regularizer': regularizers.l2(0.01),
|
||||
|
||||
@@ -46,6 +46,20 @@ def test_batchnorm_mode_0_or_2():
|
||||
assert_allclose(out.std(), 1.0, atol=1e-1)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchnorm_mode_0_or_2_twice():
|
||||
# This is a regression test for issue #4881 with the old
|
||||
# batch normalization functions in the Theano backend.
|
||||
model = Sequential()
|
||||
model.add(normalization.BatchNormalization(mode=0, input_shape=(10, 5, 5), axis=1))
|
||||
model.add(normalization.BatchNormalization(mode=0, input_shape=(10, 5, 5), axis=1))
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
|
||||
X = np.random.normal(loc=5.0, scale=10.0, size=(20, 10, 5, 5))
|
||||
model.fit(X, X, nb_epoch=1, verbose=0)
|
||||
model.predict(X)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchnorm_mode_0_convnet():
|
||||
model = Sequential()
|
||||
|
||||
Alguns arquivos não foram exibidos porque demasiados arquivos foram alterados neste diff Mostrar Mais
Referência em uma Nova Issue
Bloquear um usuário