Comparar commits
160 Commits
| Autor | SHA1 | Data | |
|---|---|---|---|
| 4fa7e5d454 | |||
| 14f35ab055 | |||
| 9777b51ee2 | |||
| 434545a11f | |||
| e58f0be8f0 | |||
| f85695cb7b | |||
| 66f2613416 | |||
| 793232fe76 | |||
| 3ad7463b60 | |||
| 5ea9f5bdd1 | |||
| 757b3ed1b0 | |||
| c13f890972 | |||
| 99ee2fb09a | |||
| d009ac8fba | |||
| 5c384a1bca | |||
| 0f450fe265 | |||
| c2a7c69f9d | |||
| 676e227b47 | |||
| 1de4bf1b59 | |||
| 7016e8f1d9 | |||
| 0f4fec30f0 | |||
| ff1f796032 | |||
| f1a95869eb | |||
| 4cd3d284e9 | |||
| 0f4be6d17b | |||
| ab3b93e8dd | |||
| 3d400116b9 | |||
| f41d5f021a | |||
| 7174a09d3c | |||
| 180fa47123 | |||
| 1585b8dd4e | |||
| c07d0e6448 | |||
| aabda13a10 | |||
| dcb9fac577 | |||
| 2b674827c3 | |||
| a5a775b79f | |||
| 9736056a60 | |||
| d739b3c2cd | |||
| c751f81d0d | |||
| d675907654 | |||
| 72f1ce4ed4 | |||
| 5d38b04415 | |||
| d7e0621ed3 | |||
| c57d1a3219 | |||
| d87148c56b | |||
| 15338cc3da | |||
| 262e5751f4 | |||
| e153e560a1 | |||
| 445aecdeb7 | |||
| 57429d1567 | |||
| 8f8d97e615 | |||
| c243f39ce5 | |||
| 55487f33b1 | |||
| 1c6db08158 | |||
| e54d7951f2 | |||
| 82ca6d4185 | |||
| f3c60dc571 | |||
| c4166a9efc | |||
| 3d176e926f | |||
| 3a7cd05b48 | |||
| 8ef4a3da52 | |||
| 1b7800aceb | |||
| b5746331f6 | |||
| 3e933ca0ed | |||
| 53e541f7bf | |||
| fbc9a18f0a | |||
| 8a50f5dfc8 | |||
| c3c634f4b1 | |||
| 710d8e4dd3 | |||
| 887576b113 | |||
| 2ad3544b01 | |||
| 68bde67d0a | |||
| 0edecdd09e | |||
| 5d97657375 | |||
| cf8947da79 | |||
| c6bf7558b2 | |||
| 429e253fb6 | |||
| e5529d98fe | |||
| 6e03136116 | |||
| 4973fe3069 | |||
| cfa1f7c3bc | |||
| 538d368396 | |||
| 590a5a5382 | |||
| fa585c5151 | |||
| 7ae2f84783 | |||
| 088dbe6866 | |||
| 6fb7ba721c | |||
| 7aa3114d9f | |||
| 8bfd851133 | |||
| 9120a7251d | |||
| fdb9561ade | |||
| a5ec992b1f | |||
| 2c432ffeb3 | |||
| 0ab4b647f8 | |||
| 9f4734cbf1 | |||
| ac1a09c787 | |||
| c10945f53a | |||
| 309f586424 | |||
| 1f5455e29e | |||
| a90af6f22e | |||
| 38719480a8 | |||
| aa18604fec | |||
| 875bc59ecf | |||
| 89f0527f31 | |||
| 8c0c3774e6 | |||
| 9c93d8ec06 | |||
| 1ccad186fd | |||
| e8cd940cf8 | |||
| c39546ee10 | |||
| 8f75744379 | |||
| ea47e6de27 | |||
| a6525be4fc | |||
| 833c0b23f5 | |||
| a04d968422 | |||
| 7b261704cf | |||
| 97b0f9f6e4 | |||
| 3071e0de2f | |||
| fe72033b2e | |||
| b57b9d3f8e | |||
| 50b4f7fad5 | |||
| 6b05aebc0c | |||
| 5863fc74b1 | |||
| 293940600b | |||
| f0369909d0 | |||
| 9db82605d2 | |||
| c0d95fd6c2 | |||
| 150e0fa8a6 | |||
| 45ad509611 | |||
| cbefd323be | |||
| 0f0d837178 | |||
| a6c9227372 | |||
| f6b804263a | |||
| b5df1c6170 | |||
| 44bf298ec3 | |||
| 5d575a3eff | |||
| e63372e41f | |||
| 9ee0c8e634 | |||
| 431c76abc4 | |||
| 21023f7f9c | |||
| 1746ac463a | |||
| f573a86b42 | |||
| 0e18cb3efa | |||
| 50f7f03f6b | |||
| 3d4a48b120 | |||
| ffe013033e | |||
| 00cbeecf6c | |||
| 737bea8f39 | |||
| c2e36f369b | |||
| 883f74ca41 | |||
| d8b226f26b | |||
| c4f3155d19 | |||
| 72c7716902 | |||
| 1bc79f66f9 | |||
| 2b3eae5f08 | |||
| 497cff9772 | |||
| fdb20dbc7e | |||
| 942ed44fdd | |||
| 8bc3f4d916 | |||
| dcbc2b933a | |||
| d0b4779071 |
+7
-9
@@ -9,11 +9,11 @@ matrix:
|
||||
env: KERAS_BACKEND=theano TEST_MODE=INTEGRATION_TESTS
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=tensorflow
|
||||
- python: 3.4
|
||||
- python: 3.5
|
||||
env: KERAS_BACKEND=tensorflow
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=theano
|
||||
- python: 3.4
|
||||
- python: 3.5
|
||||
env: KERAS_BACKEND=theano
|
||||
install:
|
||||
# code below is taken from http://conda.pydata.org/docs/travis.html
|
||||
@@ -34,24 +34,22 @@ install:
|
||||
|
||||
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py
|
||||
- source activate test-environment
|
||||
- pip install pytest-cov python-coveralls pytest-xdist coverage==3.7.1 #we need this version of coverage for coveralls.io to work
|
||||
- pip install pep8 pytest-pep8
|
||||
- pip install git+git://github.com/Theano/Theano.git
|
||||
|
||||
# install PIL for preprocessing tests
|
||||
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
|
||||
conda install pil;
|
||||
elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then
|
||||
elif [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then
|
||||
conda install Pillow;
|
||||
fi
|
||||
|
||||
- python setup.py install
|
||||
- pip install -e .[tests]
|
||||
|
||||
# install TensorFlow
|
||||
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl;
|
||||
elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl;
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.12.1-cp27-none-linux_x86_64.whl;
|
||||
elif [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.12.1-cp35-cp35m-linux_x86_64.whl;
|
||||
fi
|
||||
# command to run tests
|
||||
script:
|
||||
|
||||
+1
-1
@@ -43,7 +43,7 @@ We love pull requests. Here's a quick guide:
|
||||
4. Write tests. Your code should have full unit test coverage. If you want to see your PR merged promptly, this is crucial.
|
||||
|
||||
5. Run our test suite locally. It's easy: from the Keras folder, simply run: `py.test tests/`.
|
||||
- You will need to install `pytest`, `coveralls`, `pytest-cov`, `pytest-xdist`: `pip install pytest pytest-cov python-coveralls pytest-xdist pep8 pytest-pep8`
|
||||
- You will need to install the test requirements as well: `pip install -e .[tests]`.
|
||||
|
||||
6. Make sure all tests are passing:
|
||||
- with the Theano backend, on Python 2.7 and Python 3.5
|
||||
|
||||
+3
-3
@@ -55,9 +55,9 @@ Stacking layers is as easy as `.add()`:
|
||||
from keras.layers import Dense, Activation
|
||||
|
||||
model.add(Dense(output_dim=64, input_dim=100))
|
||||
model.add(Activation("relu"))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(output_dim=10))
|
||||
model.add(Activation("softmax"))
|
||||
model.add(Activation('softmax'))
|
||||
```
|
||||
|
||||
Once your model looks good, configure its learning process with `.compile()`:
|
||||
@@ -118,7 +118,7 @@ Keras uses the following dependencies:
|
||||
*When using the TensorFlow backend:*
|
||||
|
||||
- TensorFlow
|
||||
- [See installation instructions](https://github.com/tensorflow/tensorflow#download-and-setup).
|
||||
- [See installation instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/get_started/os_setup.md)
|
||||
|
||||
*When using the Theano backend:*
|
||||
|
||||
|
||||
+7
-5
@@ -89,6 +89,7 @@ from keras.utils import data_utils
|
||||
from keras.utils import io_utils
|
||||
from keras.utils import layer_utils
|
||||
from keras.utils import np_utils
|
||||
from keras.utils import generic_utils
|
||||
|
||||
|
||||
EXCLUDE = {
|
||||
@@ -139,9 +140,6 @@ PAGES = [
|
||||
core.Dense,
|
||||
core.Activation,
|
||||
core.Dropout,
|
||||
core.SpatialDropout1D,
|
||||
core.SpatialDropout2D,
|
||||
core.SpatialDropout3D,
|
||||
core.Flatten,
|
||||
core.Reshape,
|
||||
core.Permute,
|
||||
@@ -152,7 +150,6 @@ PAGES = [
|
||||
core.Masking,
|
||||
core.Highway,
|
||||
core.MaxoutDense,
|
||||
core.TimeDistributedDense,
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -269,6 +266,11 @@ PAGES = [
|
||||
'page': 'utils/np_utils.md',
|
||||
'all_module_functions': [np_utils]
|
||||
},
|
||||
{
|
||||
'page': 'utils/generic_utils.md',
|
||||
'all_module_functions': [generic_utils],
|
||||
'classes': [generic_utils.CustomObjectScope]
|
||||
},
|
||||
]
|
||||
|
||||
ROOT = 'http://keras.io/'
|
||||
@@ -316,7 +318,7 @@ def get_function_signature(function, method=True):
|
||||
for a in args:
|
||||
st += str(a) + ', '
|
||||
for a, v in kwargs:
|
||||
if type(v) == str:
|
||||
if isinstance(v, str):
|
||||
v = '\'' + v + '\''
|
||||
st += str(a) + '=' + str(v) + ', '
|
||||
if kwargs or args:
|
||||
|
||||
@@ -55,6 +55,7 @@ pages:
|
||||
- I/O Utils: utils/io_utils.md
|
||||
- Layer Utils: utils/layer_utils.md
|
||||
- Numpy Utils: utils/np_utils.md
|
||||
- Generic Utils: utils/generic_utils.md
|
||||
|
||||
|
||||
|
||||
|
||||
externo
+28
-10
@@ -200,12 +200,15 @@ The default input size for this model is 299x299.
|
||||
- include_top: whether to include the fully-connected layer at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- inputs_shape: optional shape tuple, only to be specified
|
||||
- input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(299, 299, 3)`.
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 71.
|
||||
E.g. `(150, 150, 3)` would be one valid value.
|
||||
- classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
### Returns
|
||||
|
||||
@@ -241,14 +244,17 @@ The default input size for this model is 224x224.
|
||||
- include_top: whether to include the 3 fully-connected layers at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- inputs_shape: optional shape tuple, only to be specified
|
||||
- input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 48.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
|
||||
- classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
### Returns
|
||||
|
||||
A Keras model instance.
|
||||
@@ -283,14 +289,17 @@ The default input size for this model is 224x224.
|
||||
- include_top: whether to include the 3 fully-connected layers at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- inputs_shape: optional shape tuple, only to be specified
|
||||
- input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 48.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
|
||||
- classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
### Returns
|
||||
|
||||
A Keras model instance.
|
||||
@@ -327,14 +336,17 @@ The default input size for this model is 224x224.
|
||||
- include_top: whether to include the fully-connected layer at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- inputs_shape: optional shape tuple, only to be specified
|
||||
- input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 197.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
|
||||
- classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
### Returns
|
||||
|
||||
A Keras model instance.
|
||||
@@ -369,14 +381,17 @@ The default input size for this model is 299x299.
|
||||
- include_top: whether to include the fully-connected layer at the top of the network.
|
||||
- weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- inputs_shape: optional shape tuple, only to be specified
|
||||
- input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(299, 299, 3)` (with `tf` dim ordering)
|
||||
or `(3, 299, 299)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 139.
|
||||
E.g. `(150, 150, 3)` would be one valid value.
|
||||
|
||||
- classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
### Returns
|
||||
|
||||
A Keras model instance.
|
||||
@@ -405,7 +420,10 @@ A convolutional-recurrent model taking as input a vectorized representation of t
|
||||
- weights: one of `None` (random initialization) or "msd" (pre-training on [Million Song Dataset](http://labrosa.ee.columbia.edu/millionsong/)).
|
||||
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
|
||||
- include_top: whether to include the 1 fully-connected layer (output layer) at the top of the network. If False, the network outputs 32-dim features.
|
||||
|
||||
- classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
### Returns
|
||||
|
||||
A Keras model instance.
|
||||
|
||||
+20
@@ -2,6 +2,7 @@
|
||||
|
||||
- [How should I cite Keras?](#how-should-i-cite-keras)
|
||||
- [How can I run Keras on GPU?](#how-can-i-run-keras-on-gpu)
|
||||
- [What does \["sample", "batch", "epoch"\] mean?](#what-does-sample-batch-epoch-mean)
|
||||
- [How can I save a Keras model?](#how-can-i-save-a-keras-model)
|
||||
- [Why is the training loss much higher than the testing loss?](#why-is-the-training-loss-much-higher-than-the-testing-loss)
|
||||
- [How can I obtain the output of an intermediate layer?](#how-can-i-obtain-the-output-of-an-intermediate-layer)
|
||||
@@ -31,6 +32,8 @@ Please cite Keras in your publications if it helps your research. Here is an exa
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### How can I run Keras on GPU?
|
||||
|
||||
If you are running on the TensorFlow backend, your code will automatically run on GPU if any available GPU is detected.
|
||||
@@ -54,6 +57,21 @@ theano.config.floatX = 'float32'
|
||||
|
||||
---
|
||||
|
||||
### What does \["sample", "batch", "epoch"\] mean?
|
||||
|
||||
Below are some common definitions that are necessary to know and understand to correctly utilize Keras:
|
||||
|
||||
- **Sample**: one element of a dataset.
|
||||
- *Example:* one image is a **sample** in a convolutional network
|
||||
- *Example:* one audio file is a **sample** for a speech recognition model
|
||||
- **Batch**: a set of *N* samples. The samples in a **batch** are processed independently, in parallel. If training, a batch results in only one update to the model.
|
||||
- A **batch** generally approximates the distribution of the input data better than a single input. The larger the batch, the better the approximation; however, it is also true that the batch will take longer to processes and will still result in only one update. For inference (evaluate/predict), it is recommended to pick a batch size that is as large as you can afford without going out of memory (since larger batches will usually result in faster evaluating/prediction).
|
||||
- **Epoch**: an arbitrary cutoff, generally defined as "one pass over the entire dataset", used to separate training into distinct phases, which is useful for logging and periodic evaluation.
|
||||
- When using `evaluation_data` or `evaluation_split` with the `fit` method of Keras models, evaluation will be run at the end of every **epoch**.
|
||||
- Within Keras, there is the ability to add [callbacks](https://keras.io/callbacks/) specifically designed to be run at the end of an **epoch**. Examples of these are learning rate changes and model checkpointing (saving).
|
||||
|
||||
---
|
||||
|
||||
### How can I save a Keras model?
|
||||
|
||||
*It is not recommended to use pickle or cPickle to save a Keras model.*
|
||||
@@ -356,6 +374,7 @@ print(len(model.layers)) # "1"
|
||||
|
||||
Code and pre-trained weights are available for the following image classification models:
|
||||
|
||||
- Xception
|
||||
- VGG16
|
||||
- VGG19
|
||||
- ResNet50
|
||||
@@ -364,6 +383,7 @@ Code and pre-trained weights are available for the following image classificatio
|
||||
They can be imported from the module `keras.applications`:
|
||||
|
||||
```python
|
||||
from keras.applications.xception import Xception
|
||||
from keras.applications.vgg16 import VGG16
|
||||
from keras.applications.vgg19 import VGG19
|
||||
from keras.applications.resnet50 import ResNet50
|
||||
|
||||
externo
+2
-2
@@ -51,9 +51,9 @@ Stacking layers is as easy as `.add()`:
|
||||
from keras.layers import Dense, Activation
|
||||
|
||||
model.add(Dense(output_dim=64, input_dim=100))
|
||||
model.add(Activation("relu"))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(output_dim=10))
|
||||
model.add(Activation("softmax"))
|
||||
model.add(Activation('softmax'))
|
||||
```
|
||||
|
||||
Once your model looks good, configure its learning process with `.compile()`:
|
||||
|
||||
+2
-1
@@ -17,6 +17,7 @@ model.add(Dense(64, init='uniform'))
|
||||
- __identity__: Use with square 2D layers (`shape[0] == shape[1]`).
|
||||
- __orthogonal__: Use with square 2D layers (`shape[0] == shape[1]`).
|
||||
- __zero__
|
||||
- __one__
|
||||
- __glorot_normal__: Gaussian initialization scaled by fan_in + fan_out (Glorot 2010)
|
||||
- __glorot_uniform__
|
||||
- __he_normal__: Gaussian initialization scaled by fan_in (He et al., 2014)
|
||||
@@ -47,4 +48,4 @@ def my_init(shape, name=None):
|
||||
return initializations.normal(shape, scale=0.01, name=name)
|
||||
|
||||
model.add(Dense(64, init=my_init))
|
||||
```
|
||||
```
|
||||
|
||||
+11
-1
@@ -5,11 +5,21 @@ All Keras layers have a number of methods in common:
|
||||
- `layer.get_weights()`: returns the weights of the layer as a list of Numpy arrays.
|
||||
- `layer.set_weights(weights)`: sets the weights of the layer from a list of Numpy arrays (with the same shapes as the output of `get_weights`).
|
||||
- `layer.get_config()`: returns a dictionary containing the configuration of the layer. The layer can be reinstantiated from its config via:
|
||||
|
||||
```python
|
||||
layer = Dense(32)
|
||||
config = layer.get_config()
|
||||
reconstructed_layer = Dense.from_config(config)
|
||||
```
|
||||
|
||||
Or:
|
||||
|
||||
```python
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
|
||||
config = layer.get_config()
|
||||
layer = layer_from_config(config)
|
||||
layer = layer_from_config({'class_name': layer.__class__.__name__,
|
||||
'config': config})
|
||||
```
|
||||
|
||||
If a layer has a single node (i.e. if it isn't a shared layer), you can get its input tensor, output tensor, input shape and output shape via:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
For simple, stateless custom operations, you are probably better off using `layers.core.Lambda` layers. But for any custom operation that has trainable weights, you should implement your own layer.
|
||||
|
||||
Here is the skeleton of a Keras layer, **as of Keras 1.1.3** (if you have an older version, please upgrade). There are only three methods you need to implement:
|
||||
Here is the skeleton of a Keras layer, **as of Keras 1.2.0** (if you have an older version, please upgrade). There are only three methods you need to implement:
|
||||
|
||||
- `build(input_shape)`: this is where you will define your weights. This method must set `self.built = True`, which can be done by calling `super([Layer], self).build()`.
|
||||
- `call(x)`: this is where the layer's logic lives. Unless you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor.
|
||||
@@ -21,9 +21,9 @@ class MyLayer(Layer):
|
||||
def build(self, input_shape):
|
||||
# Create a trainable weight variable for this layer.
|
||||
self.W = self.add_weight(shape=(input_shape[1], self.output_dim),
|
||||
initializer='random_uniform',
|
||||
initializer='uniform',
|
||||
trainable=True)
|
||||
super(MyLayer, self).build() # Be sure to call this somewhere!
|
||||
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.dot(x, self.W)
|
||||
@@ -32,4 +32,4 @@ class MyLayer(Layer):
|
||||
return (input_shape[0], self.output_dim)
|
||||
```
|
||||
|
||||
The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code!
|
||||
The existing Keras layers provide examples of how to implement almost anything. Never hesitate to read the source code!
|
||||
|
||||
+1
-1
@@ -78,7 +78,7 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
- __flow_from_directory(directory)__: Takes the path to a directory, and generates batches of augmented/normalized data. Yields batches indefinitely, in an infinite loop.
|
||||
- __Arguments__:
|
||||
- __directory__: path to the target directory. It should contain one subdirectory per class.
|
||||
Any PNG, JPG or BNP images inside each of the subdirectories directory tree will be included in the generator.
|
||||
Any PNG, JPG or BMP images inside each of the subdirectories directory tree will be included in the generator.
|
||||
See [this script](https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d) for more details.
|
||||
- __target_size__: tuple of integers, default: `(256, 256)`. The dimensions to which all images found will be resized.
|
||||
- __color_mode__: one of "grayscale", "rbg". Default: "rgb". Whether the images will be converted to have 1 or 3 color channels.
|
||||
|
||||
+3
-2
@@ -1,10 +1,11 @@
|
||||
## pad_sequences
|
||||
|
||||
```python
|
||||
keras.preprocessing.sequence.pad_sequences(sequences, maxlen=None, dtype='int32')
|
||||
keras.preprocessing.sequence.pad_sequences(sequences, maxlen=None, dtype='int32',
|
||||
padding='pre', truncating='pre', value=0.)
|
||||
```
|
||||
|
||||
Transform a list of `nb_samples sequences` (lists of scalars) into a 2D Numpy array of shape `(nb_samples, nb_timesteps)`. `nb_timesteps` is either the `maxlen` argument if provided, or the length of the longest sequence otherwise. Sequences that are shorter than `nb_timesteps` are padded with zeros at the end.
|
||||
Transform a list of `nb_samples` sequences (lists of scalars) into a 2D Numpy array of shape `(nb_samples, nb_timesteps)`. `nb_timesteps` is either the `maxlen` argument if provided, or the length of the longest sequence otherwise. Sequences that are shorter than `nb_timesteps` are padded with `value` at the end. Sequences longer than `nb_timesteps` are truncated so that it fits the desired length. Position where padding or truncation happens is determined by `padding` or `truncating`, respectively.
|
||||
|
||||
- __Return__: 2D Numpy array of shape `(nb_samples, nb_timesteps)`.
|
||||
|
||||
|
||||
@@ -48,6 +48,9 @@ Compares different LSTM implementations on the IMDB sentiment classification tas
|
||||
[lstm_text_generation.py](lstm_text_generation.py)
|
||||
Generates text from Nietzsche's writings.
|
||||
|
||||
[mnist_acgan.py](mnist_acgan.py)
|
||||
Implementation of AC-GAN ( Auxiliary Classifier GAN ) on the MNIST dataset
|
||||
|
||||
[mnist_cnn.py](mnist_cnn.py)
|
||||
Trains a simple convnet on the MNIST dataset.
|
||||
|
||||
|
||||
+68
-34
@@ -35,21 +35,29 @@ from six.moves import range
|
||||
|
||||
|
||||
class CharacterTable(object):
|
||||
'''
|
||||
Given a set of characters:
|
||||
"""Given a set of characters:
|
||||
+ Encode them to a one hot integer representation
|
||||
+ Decode the one hot integer representation to their character output
|
||||
+ Decode a vector of probabilities to their character output
|
||||
'''
|
||||
def __init__(self, chars, maxlen):
|
||||
"""
|
||||
def __init__(self, chars):
|
||||
"""Initialize character table.
|
||||
|
||||
# Arguments
|
||||
chars: Characters that can appear in the input.
|
||||
"""
|
||||
self.chars = sorted(set(chars))
|
||||
self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
|
||||
self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
|
||||
self.maxlen = maxlen
|
||||
|
||||
def encode(self, C, maxlen=None):
|
||||
maxlen = maxlen if maxlen else self.maxlen
|
||||
X = np.zeros((maxlen, len(self.chars)))
|
||||
def encode(self, C, num_rows):
|
||||
"""One hot encode given string C.
|
||||
|
||||
# Arguments
|
||||
num_rows: Number of rows in the returned one hot encoding. This is
|
||||
used to keep the # of rows for each data the same.
|
||||
"""
|
||||
X = np.zeros((num_rows, len(self.chars)))
|
||||
for i, c in enumerate(C):
|
||||
X[i, self.char_indices[c]] = 1
|
||||
return X
|
||||
@@ -65,40 +73,42 @@ class colors:
|
||||
fail = '\033[91m'
|
||||
close = '\033[0m'
|
||||
|
||||
# Parameters for the model and dataset
|
||||
# Parameters for the model and dataset.
|
||||
TRAINING_SIZE = 50000
|
||||
DIGITS = 3
|
||||
INVERT = True
|
||||
# Try replacing GRU, or SimpleRNN
|
||||
RNN = recurrent.LSTM
|
||||
HIDDEN_SIZE = 128
|
||||
BATCH_SIZE = 128
|
||||
LAYERS = 1
|
||||
|
||||
# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
|
||||
# int is DIGITS.
|
||||
MAXLEN = DIGITS + 1 + DIGITS
|
||||
|
||||
# All the numbers, plus sign and space for padding.
|
||||
chars = '0123456789+ '
|
||||
ctable = CharacterTable(chars, MAXLEN)
|
||||
ctable = CharacterTable(chars)
|
||||
|
||||
questions = []
|
||||
expected = []
|
||||
seen = set()
|
||||
print('Generating data...')
|
||||
while len(questions) < TRAINING_SIZE:
|
||||
f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
|
||||
f = lambda: int(''.join(np.random.choice(list('0123456789'))
|
||||
for i in range(np.random.randint(1, DIGITS + 1))))
|
||||
a, b = f(), f()
|
||||
# Skip any addition questions we've already seen
|
||||
# Also skip any such that X+Y == Y+X (hence the sorting)
|
||||
# Also skip any such that X+Y == Y+X (hence the sorting).
|
||||
key = tuple(sorted((a, b)))
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
# Pad the data with spaces such that it is always MAXLEN
|
||||
# Pad the data with spaces such that it is always MAXLEN.
|
||||
q = '{}+{}'.format(a, b)
|
||||
query = q + ' ' * (MAXLEN - len(q))
|
||||
ans = str(a + b)
|
||||
# Answers can be of maximum size DIGITS + 1
|
||||
# Answers can be of maximum size DIGITS + 1.
|
||||
ans += ' ' * (DIGITS + 1 - len(ans))
|
||||
if INVERT:
|
||||
# Reverse the query, e.g., '12+345 ' becomes ' 543+21'. (Note the
|
||||
# space used for padding.)
|
||||
query = query[::-1]
|
||||
questions.append(query)
|
||||
expected.append(ans)
|
||||
@@ -108,53 +118,73 @@ print('Vectorization...')
|
||||
X = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
|
||||
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
|
||||
for i, sentence in enumerate(questions):
|
||||
X[i] = ctable.encode(sentence, maxlen=MAXLEN)
|
||||
X[i] = ctable.encode(sentence, MAXLEN)
|
||||
for i, sentence in enumerate(expected):
|
||||
y[i] = ctable.encode(sentence, maxlen=DIGITS + 1)
|
||||
y[i] = ctable.encode(sentence, DIGITS + 1)
|
||||
|
||||
# Shuffle (X, y) in unison as the later parts of X will almost all be larger digits
|
||||
# Shuffle (X, y) in unison as the later parts of X will almost all be larger
|
||||
# digits.
|
||||
indices = np.arange(len(y))
|
||||
np.random.shuffle(indices)
|
||||
X = X[indices]
|
||||
y = y[indices]
|
||||
|
||||
# Explicitly set apart 10% for validation data that we never train over
|
||||
split_at = len(X) - len(X) / 10
|
||||
# Explicitly set apart 10% for validation data that we never train over.
|
||||
split_at = len(X) - len(X) // 10
|
||||
(X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at))
|
||||
(y_train, y_val) = (y[:split_at], y[split_at:])
|
||||
|
||||
print('Training Data:')
|
||||
print(X_train.shape)
|
||||
print(y_train.shape)
|
||||
|
||||
print('Validation Data:')
|
||||
print(X_val.shape)
|
||||
print(y_val.shape)
|
||||
|
||||
# Try replacing GRU, or SimpleRNN.
|
||||
RNN = recurrent.LSTM
|
||||
HIDDEN_SIZE = 128
|
||||
BATCH_SIZE = 128
|
||||
LAYERS = 1
|
||||
|
||||
print('Build model...')
|
||||
model = Sequential()
|
||||
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
|
||||
# note: in a situation where your input sequences have a variable length,
|
||||
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
|
||||
# Note: In a situation where your input sequences have a variable length,
|
||||
# use input_shape=(None, nb_feature).
|
||||
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
|
||||
# For the decoder's input, we repeat the encoded input for each time step
|
||||
# As the decoder RNN's input, repeatedly provide with the last hidden state of
|
||||
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
|
||||
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
|
||||
model.add(RepeatVector(DIGITS + 1))
|
||||
# The decoder RNN could be multiple layers stacked or a single layer
|
||||
# The decoder RNN could be multiple layers stacked or a single layer.
|
||||
for _ in range(LAYERS):
|
||||
# By setting return_sequences to True, return not only the last output but
|
||||
# all the outputs so far in the form of (nb_samples, timesteps,
|
||||
# output_dim). This is necessary as TimeDistributed in the below expects
|
||||
# the first dimension to be the timesteps.
|
||||
model.add(RNN(HIDDEN_SIZE, return_sequences=True))
|
||||
|
||||
# For each of step of the output sequence, decide which character should be chosen
|
||||
# Apply a dense layer to the every temporal slice of an input. For each of step
|
||||
# of the output sequence, decide which character should be chosen.
|
||||
model.add(TimeDistributed(Dense(len(chars))))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='adam',
|
||||
metrics=['accuracy'])
|
||||
model.summary()
|
||||
|
||||
# Train the model each generation and show predictions against the validation dataset
|
||||
# Train the model each generation and show predictions against the validation
|
||||
# dataset.
|
||||
for iteration in range(1, 200):
|
||||
print()
|
||||
print('-' * 50)
|
||||
print('Iteration', iteration)
|
||||
model.fit(X_train, y_train, batch_size=BATCH_SIZE, nb_epoch=1,
|
||||
validation_data=(X_val, y_val))
|
||||
###
|
||||
# Select 10 samples from the validation set at random so we can visualize errors
|
||||
# Select 10 samples from the validation set at random so we can visualize
|
||||
# errors.
|
||||
for i in range(10):
|
||||
ind = np.random.randint(0, len(X_val))
|
||||
rowX, rowy = X_val[np.array([ind])], y_val[np.array([ind])]
|
||||
@@ -164,5 +194,9 @@ for iteration in range(1, 200):
|
||||
guess = ctable.decode(preds[0], calc_argmax=False)
|
||||
print('Q', q[::-1] if INVERT else q)
|
||||
print('T', correct)
|
||||
print(colors.ok + '☑' + colors.close if correct == guess else colors.fail + '☒' + colors.close, guess)
|
||||
if correct == guess:
|
||||
print(colors.ok + '☑' + colors.close, end=" ")
|
||||
else:
|
||||
print(colors.fail + '☒' + colors.close, end=" ")
|
||||
print(guess)
|
||||
print('---')
|
||||
|
||||
@@ -45,6 +45,7 @@ class Antirectifier(Layer):
|
||||
with twice less parameters yet with comparable
|
||||
classification accuracy as an equivalent ReLU-based network.
|
||||
'''
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
shape = list(input_shape)
|
||||
assert len(shape) == 2 # only valid for 2D tensors
|
||||
|
||||
@@ -97,7 +97,7 @@ else:
|
||||
|
||||
# Fit the model on the batches generated by datagen.flow().
|
||||
model.fit_generator(datagen.flow(X_train, Y_train,
|
||||
batch_size=batch_size),
|
||||
batch_size=batch_size),
|
||||
samples_per_epoch=X_train.shape[0],
|
||||
nb_epoch=nb_epoch,
|
||||
validation_data=(X_test, Y_test))
|
||||
|
||||
@@ -20,6 +20,8 @@ img_height = 128
|
||||
layer_name = 'block5_conv1'
|
||||
|
||||
# util function to convert a tensor into a valid image
|
||||
|
||||
|
||||
def deprocess_image(x):
|
||||
# normalize tensor: center on 0., ensure std is 0.1
|
||||
x -= x.mean()
|
||||
|
||||
+26
-15
@@ -37,8 +37,8 @@ base_image_path = args.base_image_path
|
||||
result_prefix = args.result_prefix
|
||||
|
||||
# dimensions of the generated picture.
|
||||
img_width = 600
|
||||
img_height = 600
|
||||
img_width = 600
|
||||
|
||||
# path to the model weights file.
|
||||
weights_path = 'vgg16_weights.h5'
|
||||
@@ -61,20 +61,24 @@ saved_settings = {
|
||||
settings = saved_settings['dreamy']
|
||||
|
||||
# util function to open, resize and format pictures into appropriate tensors
|
||||
|
||||
|
||||
def preprocess_image(image_path):
|
||||
img = load_img(image_path, target_size=(img_width, img_height))
|
||||
img = load_img(image_path, target_size=(img_height, img_width))
|
||||
img = img_to_array(img)
|
||||
img = np.expand_dims(img, axis=0)
|
||||
img = vgg16.preprocess_input(img)
|
||||
return img
|
||||
|
||||
# util function to convert a tensor into a valid image
|
||||
|
||||
|
||||
def deprocess_image(x):
|
||||
if K.image_dim_ordering() == 'th':
|
||||
x = x.reshape((3, img_width, img_height))
|
||||
x = x.reshape((3, img_height, img_width))
|
||||
x = x.transpose((1, 2, 0))
|
||||
else:
|
||||
x = x.reshape((img_width, img_height, 3))
|
||||
x = x.reshape((img_height, img_width, 3))
|
||||
# Remove zero-center by mean pixel
|
||||
x[:, :, 0] += 103.939
|
||||
x[:, :, 1] += 116.779
|
||||
@@ -85,9 +89,9 @@ def deprocess_image(x):
|
||||
return x
|
||||
|
||||
if K.image_dim_ordering() == 'th':
|
||||
img_size = (3, img_width, img_height)
|
||||
img_size = (3, img_height, img_width)
|
||||
else:
|
||||
img_size = (img_width, img_height, 3)
|
||||
img_size = (img_height, img_width, 3)
|
||||
# this will contain our generated image
|
||||
dream = Input(batch_shape=(1,) + img_size)
|
||||
|
||||
@@ -101,18 +105,20 @@ print('Model loaded.')
|
||||
layer_dict = dict([(layer.name, layer) for layer in model.layers])
|
||||
|
||||
# continuity loss util function
|
||||
|
||||
|
||||
def continuity_loss(x):
|
||||
assert K.ndim(x) == 4
|
||||
if K.image_dim_ordering() == 'th':
|
||||
a = K.square(x[:, :, :img_width - 1, :img_height - 1] -
|
||||
x[:, :, 1:, :img_height - 1])
|
||||
b = K.square(x[:, :, :img_width - 1, :img_height - 1] -
|
||||
x[:, :, :img_width - 1, 1:])
|
||||
a = K.square(x[:, :, :img_height - 1, :img_width - 1] -
|
||||
x[:, :, 1:, :img_width - 1])
|
||||
b = K.square(x[:, :, :img_height - 1, :img_width - 1] -
|
||||
x[:, :, :img_height - 1, 1:])
|
||||
else:
|
||||
a = K.square(x[:, :img_width - 1, :img_height-1, :] -
|
||||
x[:, 1:, :img_height - 1, :])
|
||||
b = K.square(x[:, :img_width - 1, :img_height-1, :] -
|
||||
x[:, :img_width - 1, 1:, :])
|
||||
a = K.square(x[:, :img_height - 1, :img_width - 1, :] -
|
||||
x[:, 1:, :img_width - 1, :])
|
||||
b = K.square(x[:, :img_height - 1, :img_width - 1, :] -
|
||||
x[:, :img_height - 1, 1:, :])
|
||||
return K.sum(K.pow(a + b, 1.25))
|
||||
|
||||
# define the loss
|
||||
@@ -140,12 +146,14 @@ loss += settings['dream_l2'] * K.sum(K.square(dream)) / np.prod(img_size)
|
||||
grads = K.gradients(loss, dream)
|
||||
|
||||
outputs = [loss]
|
||||
if type(grads) in {list, tuple}:
|
||||
if isinstance(grads, (list, tuple)):
|
||||
outputs += grads
|
||||
else:
|
||||
outputs.append(grads)
|
||||
|
||||
f_outputs = K.function([dream], outputs)
|
||||
|
||||
|
||||
def eval_loss_and_grads(x):
|
||||
x = x.reshape((1,) + img_size)
|
||||
outs = f_outputs([x])
|
||||
@@ -162,7 +170,10 @@ def eval_loss_and_grads(x):
|
||||
# "loss" and "grads". This is done because scipy.optimize
|
||||
# requires separate functions for loss and gradients,
|
||||
# but computing them separately would be inefficient.
|
||||
|
||||
|
||||
class Evaluator(object):
|
||||
|
||||
def __init__(self):
|
||||
self.loss_value = None
|
||||
self.grad_values = None
|
||||
|
||||
+158
-140
@@ -6,36 +6,31 @@ the different fonts thrown at it...the purpose is more to demonstrate CTC
|
||||
inside of Keras. Note that the font list may need to be updated
|
||||
for the particular OS in use.
|
||||
|
||||
This starts off with 4 letter words. After 10 or so epochs, CTC
|
||||
learns translational invariance, so longer words and groups of words
|
||||
with spaces are gradually fed in. This gradual increase in difficulty
|
||||
is handled using the TextImageGenerator class which is both a generator
|
||||
class for test/train data and a Keras callback class. Every 10 epochs
|
||||
the wordlist that the generator draws from increases in difficulty.
|
||||
This starts off with 4 letter words. For the first 12 epochs, the
|
||||
difficulty is gradually increased using the TextImageGenerator class
|
||||
which is both a generator class for test/train data and a Keras
|
||||
callback class. After 20 epochs, longer sequences are thrown at it
|
||||
by recompiling the model to handle a wider image and rebuilding
|
||||
the word list to include two words separated by a space.
|
||||
|
||||
The table below shows normalized edit distance values. Theano uses
|
||||
a slightly different CTC implementation, so some Theano-specific
|
||||
hyperparameter tuning would be needed to get it to match Tensorflow.
|
||||
a slightly different CTC implementation, hence the different results.
|
||||
|
||||
Norm. ED
|
||||
Epoch | TF | TH
|
||||
------------------------
|
||||
10 0.072 0.272
|
||||
20 0.032 0.115
|
||||
30 0.024 0.098
|
||||
40 0.023 0.108
|
||||
10 0.027 0.064
|
||||
15 0.038 0.035
|
||||
20 0.043 0.045
|
||||
25 0.014 0.019
|
||||
|
||||
This requires cairo and editdistance packages:
|
||||
pip install cairocffi
|
||||
pip install editdistance
|
||||
|
||||
Due to the use of a dummy loss function, Theano requires the following flags:
|
||||
on_unused_input='ignore'
|
||||
|
||||
Created by Mike Henry
|
||||
https://github.com/mbhenry/
|
||||
'''
|
||||
|
||||
import os
|
||||
import itertools
|
||||
import re
|
||||
@@ -47,17 +42,17 @@ from scipy import ndimage
|
||||
import pylab
|
||||
from keras import backend as K
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Input, Layer, Dense, Activation, Flatten
|
||||
from keras.layers import Reshape, Lambda, merge, Permute, TimeDistributed
|
||||
from keras.layers import Input, Dense, Activation
|
||||
from keras.layers import Reshape, Lambda, merge
|
||||
from keras.models import Model
|
||||
from keras.layers.recurrent import GRU
|
||||
from keras.optimizers import SGD
|
||||
from keras.utils import np_utils
|
||||
from keras.utils.data_utils import get_file
|
||||
from keras.preprocessing import image
|
||||
import keras.callbacks
|
||||
|
||||
OUTPUT_DIR = "image_ocr"
|
||||
|
||||
OUTPUT_DIR = 'image_ocr'
|
||||
|
||||
np.random.seed(55)
|
||||
|
||||
@@ -79,28 +74,33 @@ def speckle(img):
|
||||
# also uses a random font, a slight random rotation,
|
||||
# and a random amount of speckle noise
|
||||
|
||||
def paint_text(text, w, h):
|
||||
def paint_text(text, w, h, rotate=False, ud=False, multi_fonts=False):
|
||||
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, w, h)
|
||||
with cairo.Context(surface) as context:
|
||||
context.set_source_rgb(1, 1, 1) # White
|
||||
context.paint()
|
||||
# this font list works in Centos 7
|
||||
fonts = ['Century Schoolbook', 'Courier', 'STIX', 'URW Chancery L', 'FreeMono']
|
||||
context.select_font_face(np.random.choice(fonts), cairo.FONT_SLANT_NORMAL,
|
||||
np.random.choice([cairo.FONT_WEIGHT_BOLD, cairo.FONT_WEIGHT_NORMAL]))
|
||||
context.set_font_size(40)
|
||||
if multi_fonts:
|
||||
fonts = ['Century Schoolbook', 'Courier', 'STIX', 'URW Chancery L', 'FreeMono']
|
||||
context.select_font_face(np.random.choice(fonts), cairo.FONT_SLANT_NORMAL,
|
||||
np.random.choice([cairo.FONT_WEIGHT_BOLD, cairo.FONT_WEIGHT_NORMAL]))
|
||||
else:
|
||||
context.select_font_face('Courier', cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_BOLD)
|
||||
context.set_font_size(25)
|
||||
box = context.text_extents(text)
|
||||
if box[2] > w or box[3] > h:
|
||||
border_w_h = (4, 4)
|
||||
if box[2] > (w - 2 * border_w_h[1]) or box[3] > (h - 2 * border_w_h[0]):
|
||||
raise IOError('Could not fit string into image. Max char count is too large for given image width.')
|
||||
|
||||
# teach the RNN translational invariance by
|
||||
# fitting text box randomly on canvas, with some room to rotate
|
||||
border_w_h = (10, 16)
|
||||
max_shift_x = w - box[2] - border_w_h[0]
|
||||
max_shift_y = h - box[3] - border_w_h[1]
|
||||
top_left_x = np.random.randint(0, int(max_shift_x))
|
||||
top_left_y = np.random.randint(0, int(max_shift_y))
|
||||
|
||||
if ud:
|
||||
top_left_y = np.random.randint(0, int(max_shift_y))
|
||||
else:
|
||||
top_left_y = h // 2
|
||||
context.move_to(top_left_x - int(box[0]), top_left_y - int(box[1]))
|
||||
context.set_source_rgb(0, 0, 0)
|
||||
context.show_text(text)
|
||||
@@ -111,8 +111,9 @@ def paint_text(text, w, h):
|
||||
a = a[:, :, 0] # grab single channel
|
||||
a = a.astype(np.float32) / 255
|
||||
a = np.expand_dims(a, 0)
|
||||
if rotate:
|
||||
a = image.random_rotation(a, 3 * (w - top_left_x) / w + 1)
|
||||
a = speckle(a)
|
||||
a = image.random_rotation(a, 3 * (w - top_left_x) / w + 1)
|
||||
|
||||
return a
|
||||
|
||||
@@ -164,7 +165,7 @@ def is_valid_str(in_str):
|
||||
class TextImageGenerator(keras.callbacks.Callback):
|
||||
|
||||
def __init__(self, monogram_file, bigram_file, minibatch_size,
|
||||
img_w, img_h, downsample_width, val_split,
|
||||
img_w, img_h, downsample_factor, val_split,
|
||||
absolute_max_string_len=16):
|
||||
|
||||
self.minibatch_size = minibatch_size
|
||||
@@ -172,7 +173,7 @@ class TextImageGenerator(keras.callbacks.Callback):
|
||||
self.img_h = img_h
|
||||
self.monogram_file = monogram_file
|
||||
self.bigram_file = bigram_file
|
||||
self.downsample_width = downsample_width
|
||||
self.downsample_factor = downsample_factor
|
||||
self.val_split = val_split
|
||||
self.blank_label = self.get_output_size() - 1
|
||||
self.absolute_max_string_len = absolute_max_string_len
|
||||
@@ -187,7 +188,8 @@ class TextImageGenerator(keras.callbacks.Callback):
|
||||
assert num_words % self.minibatch_size == 0
|
||||
assert (self.val_split * num_words) % self.minibatch_size == 0
|
||||
self.num_words = num_words
|
||||
self.string_list = []
|
||||
self.string_list = [''] * self.num_words
|
||||
tmp_string_list = []
|
||||
self.max_string_len = max_string_len
|
||||
self.Y_data = np.ones([self.num_words, self.absolute_max_string_len]) * -1
|
||||
self.X_text = []
|
||||
@@ -196,25 +198,28 @@ class TextImageGenerator(keras.callbacks.Callback):
|
||||
# monogram file is sorted by frequency in english speech
|
||||
with open(self.monogram_file, 'rt') as f:
|
||||
for line in f:
|
||||
if len(self.string_list) == int(self.num_words * mono_fraction):
|
||||
if len(tmp_string_list) == int(self.num_words * mono_fraction):
|
||||
break
|
||||
word = line.rstrip()
|
||||
if max_string_len == -1 or max_string_len is None or len(word) <= max_string_len:
|
||||
self.string_list.append(word)
|
||||
tmp_string_list.append(word)
|
||||
|
||||
# bigram file contains common word pairings in english speech
|
||||
with open(self.bigram_file, 'rt') as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
if len(self.string_list) == self.num_words:
|
||||
if len(tmp_string_list) == self.num_words:
|
||||
break
|
||||
columns = line.lower().split()
|
||||
word = columns[0] + ' ' + columns[1]
|
||||
if is_valid_str(word) and \
|
||||
(max_string_len == -1 or max_string_len is None or len(word) <= max_string_len):
|
||||
self.string_list.append(word)
|
||||
if len(self.string_list) != self.num_words:
|
||||
tmp_string_list.append(word)
|
||||
if len(tmp_string_list) != self.num_words:
|
||||
raise IOError('Could not pull enough words from supplied monogram and bigram files. ')
|
||||
# interlace to mix up the easy and hard words
|
||||
self.string_list[::2] = tmp_string_list[:self.num_words // 2]
|
||||
self.string_list[1::2] = tmp_string_list[self.num_words // 2:]
|
||||
|
||||
for i, word in enumerate(self.string_list):
|
||||
self.Y_len[i] = len(word)
|
||||
@@ -228,37 +233,38 @@ class TextImageGenerator(keras.callbacks.Callback):
|
||||
# each time an image is requested from train/val/test, a new random
|
||||
# painting of the text is performed
|
||||
def get_batch(self, index, size, train):
|
||||
# width and height are backwards from typical Keras convention
|
||||
# because width is the time dimension when it gets fed into the RNN
|
||||
if K.image_dim_ordering() == 'th':
|
||||
X_data = np.ones([size, 1, self.img_h, self.img_w])
|
||||
X_data = np.ones([size, 1, self.img_w, self.img_h])
|
||||
else:
|
||||
X_data = np.ones([size, self.img_h, self.img_w, 1])
|
||||
X_data = np.ones([size, self.img_w, self.img_h, 1])
|
||||
|
||||
labels = np.ones([size, self.absolute_max_string_len])
|
||||
input_length = np.zeros([size, 1])
|
||||
label_length = np.zeros([size, 1])
|
||||
source_str = []
|
||||
|
||||
for i in range(0, size):
|
||||
# Mix in some blank inputs. This seems to be important for
|
||||
# achieving translational invariance
|
||||
if train and i > size - 4:
|
||||
if K.image_dim_ordering() == 'th':
|
||||
X_data[i, 0, :, :] = paint_text('', self.img_w, self.img_h)
|
||||
X_data[i, 0, 0:self.img_w, :] = self.paint_func('')[0, :, :].T
|
||||
else:
|
||||
X_data[i, :, :, 0] = paint_text('', self.img_w, self.img_h)
|
||||
X_data[i, 0:self.img_w, :, 0] = self.paint_func('',)[0, :, :].T
|
||||
labels[i, 0] = self.blank_label
|
||||
input_length[i] = self.downsample_width
|
||||
input_length[i] = self.img_w // self.downsample_factor - 2
|
||||
label_length[i] = 1
|
||||
source_str.append('')
|
||||
else:
|
||||
if K.image_dim_ordering() == 'th':
|
||||
X_data[i, 0, :, :] = paint_text(self.X_text[index + i], self.img_w, self.img_h)
|
||||
X_data[i, 0, 0:self.img_w, :] = self.paint_func(self.X_text[index + i])[0, :, :].T
|
||||
else:
|
||||
X_data[i, :, :, 0] = paint_text(self.X_text[index + i], self.img_w, self.img_h)
|
||||
X_data[i, 0:self.img_w, :, 0] = self.paint_func(self.X_text[index + i])[0, :, :].T
|
||||
labels[i, :] = self.Y_data[index + i]
|
||||
input_length[i] = self.downsample_width
|
||||
input_length[i] = self.img_w // self.downsample_factor - 2
|
||||
label_length[i] = self.Y_len[index + i]
|
||||
source_str.append(self.X_text[index + i])
|
||||
|
||||
inputs = {'the_input': X_data,
|
||||
'the_labels': labels,
|
||||
'input_length': input_length,
|
||||
@@ -287,19 +293,23 @@ class TextImageGenerator(keras.callbacks.Callback):
|
||||
yield ret
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
# translational invariance seems to be the hardest thing
|
||||
# for the RNN to learn, so start with <= 4 letter words.
|
||||
self.build_word_list(16000, 4, 1)
|
||||
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,
|
||||
rotate=False, ud=False, multi_fonts=False)
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
# After 10 epochs, translational invariance should be learned
|
||||
# so start feeding longer words and eventually multiple words with spaces
|
||||
if epoch == 10:
|
||||
self.build_word_list(32000, 8, 1)
|
||||
if epoch == 20:
|
||||
self.build_word_list(32000, 8, 0.6)
|
||||
if epoch == 30:
|
||||
self.build_word_list(64000, 12, 0.5)
|
||||
# rebind the paint function to implement curriculum learning
|
||||
if epoch >= 3 and epoch < 6:
|
||||
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,
|
||||
rotate=False, ud=True, multi_fonts=False)
|
||||
elif epoch >= 6 and epoch < 9:
|
||||
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,
|
||||
rotate=False, ud=True, multi_fonts=True)
|
||||
elif epoch >= 9:
|
||||
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,
|
||||
rotate=True, ud=True, multi_fonts=True)
|
||||
if epoch >= 21 and self.max_string_len < 12:
|
||||
self.build_word_list(32000, 12, 0.5)
|
||||
|
||||
|
||||
# the actual loss calc occurs here despite it not being
|
||||
@@ -335,13 +345,14 @@ def decode_batch(test_func, word_batch):
|
||||
|
||||
class VizCallback(keras.callbacks.Callback):
|
||||
|
||||
def __init__(self, test_func, text_img_gen, num_display_words=6):
|
||||
def __init__(self, run_name, test_func, text_img_gen, num_display_words=6):
|
||||
self.test_func = test_func
|
||||
self.output_dir = os.path.join(
|
||||
OUTPUT_DIR, datetime.datetime.now().strftime('%A, %d. %B %Y %I.%M%p'))
|
||||
OUTPUT_DIR, run_name)
|
||||
self.text_img_gen = text_img_gen
|
||||
self.num_display_words = num_display_words
|
||||
os.makedirs(self.output_dir)
|
||||
if not os.path.exists(self.output_dir):
|
||||
os.makedirs(self.output_dir)
|
||||
|
||||
def show_edit_distance(self, num):
|
||||
num_left = num
|
||||
@@ -362,109 +373,116 @@ class VizCallback(keras.callbacks.Callback):
|
||||
% (num, mean_ed, mean_norm_ed))
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
self.model.save_weights(os.path.join(self.output_dir, 'weights%02d.h5' % epoch))
|
||||
self.model.save_weights(os.path.join(self.output_dir, 'weights%02d.h5' % (epoch)))
|
||||
self.show_edit_distance(256)
|
||||
word_batch = next(self.text_img_gen)[0]
|
||||
res = decode_batch(self.test_func, word_batch['the_input'][0:self.num_display_words])
|
||||
|
||||
if word_batch['the_input'][0].shape[0] < 256:
|
||||
cols = 2
|
||||
else:
|
||||
cols = 1
|
||||
for i in range(self.num_display_words):
|
||||
pylab.subplot(self.num_display_words, 1, i + 1)
|
||||
pylab.subplot(self.num_display_words // cols, cols, i + 1)
|
||||
if K.image_dim_ordering() == 'th':
|
||||
the_input = word_batch['the_input'][i, 0, :, :]
|
||||
else:
|
||||
the_input = word_batch['the_input'][i, :, :, 0]
|
||||
pylab.imshow(the_input, cmap='Greys_r')
|
||||
pylab.xlabel('Truth = \'%s\' Decoded = \'%s\'' % (word_batch['source_str'][i], res[i]))
|
||||
pylab.imshow(the_input.T, cmap='Greys_r')
|
||||
pylab.xlabel('Truth = \'%s\'\nDecoded = \'%s\'' % (word_batch['source_str'][i], res[i]))
|
||||
fig = pylab.gcf()
|
||||
fig.set_size_inches(10, 12)
|
||||
pylab.savefig(os.path.join(self.output_dir, 'e%02d.png' % epoch))
|
||||
fig.set_size_inches(10, 13)
|
||||
pylab.savefig(os.path.join(self.output_dir, 'e%02d.png' % (epoch)))
|
||||
pylab.close()
|
||||
|
||||
# Input Parameters
|
||||
img_h = 64
|
||||
img_w = 512
|
||||
nb_epoch = 50
|
||||
minibatch_size = 32
|
||||
words_per_epoch = 16000
|
||||
val_split = 0.2
|
||||
val_words = int(words_per_epoch * (val_split))
|
||||
|
||||
# Network parameters
|
||||
conv_num_filters = 16
|
||||
filter_size = 3
|
||||
pool_size_1 = 4
|
||||
pool_size_2 = 2
|
||||
time_dense_size = 32
|
||||
rnn_size = 512
|
||||
time_steps = img_w // (pool_size_1 * pool_size_2)
|
||||
def train(run_name, start_epoch, stop_epoch, img_w):
|
||||
# Input Parameters
|
||||
img_h = 64
|
||||
words_per_epoch = 16000
|
||||
val_split = 0.2
|
||||
val_words = int(words_per_epoch * (val_split))
|
||||
|
||||
if K.image_dim_ordering() == 'th':
|
||||
input_shape = (1, img_h, img_w)
|
||||
else:
|
||||
input_shape = (img_h, img_w, 1)
|
||||
# Network parameters
|
||||
conv_num_filters = 16
|
||||
filter_size = 3
|
||||
pool_size = 2
|
||||
time_dense_size = 32
|
||||
rnn_size = 512
|
||||
|
||||
fdir = os.path.dirname(get_file('wordlists.tgz',
|
||||
origin='http://www.isosemi.com/datasets/wordlists.tgz', untar=True))
|
||||
if K.image_dim_ordering() == 'th':
|
||||
input_shape = (1, img_w, img_h)
|
||||
else:
|
||||
input_shape = (img_w, img_h, 1)
|
||||
|
||||
img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
|
||||
bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
|
||||
minibatch_size=32,
|
||||
img_w=img_w,
|
||||
img_h=img_h,
|
||||
downsample_width=img_w // (pool_size_1 * pool_size_2) - 2,
|
||||
val_split=words_per_epoch - val_words)
|
||||
fdir = os.path.dirname(get_file('wordlists.tgz',
|
||||
origin='http://www.isosemi.com/datasets/wordlists.tgz', untar=True))
|
||||
|
||||
act = 'relu'
|
||||
input_data = Input(name='the_input', shape=input_shape, dtype='float32')
|
||||
inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same',
|
||||
activation=act, name='conv1')(input_data)
|
||||
inner = MaxPooling2D(pool_size=(pool_size_1, pool_size_1), name='max1')(inner)
|
||||
inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same',
|
||||
activation=act, name='conv2')(inner)
|
||||
inner = MaxPooling2D(pool_size=(pool_size_2, pool_size_2), name='max2')(inner)
|
||||
img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
|
||||
bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
|
||||
minibatch_size=32,
|
||||
img_w=img_w,
|
||||
img_h=img_h,
|
||||
downsample_factor=(pool_size ** 2),
|
||||
val_split=words_per_epoch - val_words
|
||||
)
|
||||
act = 'relu'
|
||||
input_data = Input(name='the_input', shape=input_shape, dtype='float32')
|
||||
inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same',
|
||||
activation=act, init='he_normal', name='conv1')(input_data)
|
||||
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
|
||||
inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same',
|
||||
activation=act, init='he_normal', name='conv2')(inner)
|
||||
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
|
||||
|
||||
conv_to_rnn_dims = ((img_h // (pool_size_1 * pool_size_2)) * conv_num_filters, img_w // (pool_size_1 * pool_size_2))
|
||||
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
|
||||
inner = Permute(dims=(2, 1), name='permute')(inner)
|
||||
conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_num_filters)
|
||||
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
|
||||
|
||||
# cuts down input size going into RNN:
|
||||
inner = TimeDistributed(Dense(time_dense_size, activation=act, name='dense1'))(inner)
|
||||
# cuts down input size going into RNN:
|
||||
inner = Dense(time_dense_size, activation=act, name='dense1')(inner)
|
||||
|
||||
# Two layers of bidirecitonal GRUs
|
||||
# GRU seems to work as well, if not better than LSTM:
|
||||
gru_1 = GRU(rnn_size, return_sequences=True, name='gru1')(inner)
|
||||
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, name='gru1_b')(inner)
|
||||
gru1_merged = merge([gru_1, gru_1b], mode='sum')
|
||||
gru_2 = GRU(rnn_size, return_sequences=True, name='gru2')(gru1_merged)
|
||||
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True)(gru1_merged)
|
||||
# Two layers of bidirecitonal GRUs
|
||||
# GRU seems to work as well, if not better than LSTM:
|
||||
gru_1 = GRU(rnn_size, return_sequences=True, init='he_normal', name='gru1')(inner)
|
||||
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, init='he_normal', name='gru1_b')(inner)
|
||||
gru1_merged = merge([gru_1, gru_1b], mode='sum')
|
||||
gru_2 = GRU(rnn_size, return_sequences=True, init='he_normal', name='gru2')(gru1_merged)
|
||||
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, init='he_normal', name='gru2_b')(gru1_merged)
|
||||
|
||||
# transforms RNN output to character activations:
|
||||
inner = TimeDistributed(Dense(img_gen.get_output_size(), name='dense2'))(merge([gru_2, gru_2b], mode='concat'))
|
||||
y_pred = Activation('softmax', name='softmax')(inner)
|
||||
Model(input=[input_data], output=y_pred).summary()
|
||||
# transforms RNN output to character activations:
|
||||
inner = Dense(img_gen.get_output_size(), init='he_normal',
|
||||
name='dense2')(merge([gru_2, gru_2b], mode='concat'))
|
||||
y_pred = Activation('softmax', name='softmax')(inner)
|
||||
Model(input=[input_data], output=y_pred).summary()
|
||||
|
||||
labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32')
|
||||
input_length = Input(name='input_length', shape=[1], dtype='int64')
|
||||
label_length = Input(name='label_length', shape=[1], dtype='int64')
|
||||
# Keras doesn't currently support loss funcs with extra parameters
|
||||
# so CTC loss is implemented in a lambda layer
|
||||
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name="ctc")([y_pred, labels, input_length, label_length])
|
||||
labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32')
|
||||
input_length = Input(name='input_length', shape=[1], dtype='int64')
|
||||
label_length = Input(name='label_length', shape=[1], dtype='int64')
|
||||
# Keras doesn't currently support loss funcs with extra parameters
|
||||
# so CTC loss is implemented in a lambda layer
|
||||
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
|
||||
|
||||
lr = 0.03
|
||||
# clipnorm seems to speeds up convergence
|
||||
clipnorm = 5
|
||||
sgd = SGD(lr=lr, decay=3e-7, momentum=0.9, nesterov=True, clipnorm=clipnorm)
|
||||
# clipnorm seems to speeds up convergence
|
||||
sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
|
||||
|
||||
model = Model(input=[input_data, labels, input_length, label_length], output=[loss_out])
|
||||
model = Model(input=[input_data, labels, input_length, label_length], output=[loss_out])
|
||||
|
||||
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
|
||||
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
|
||||
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
|
||||
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
|
||||
if start_epoch > 0:
|
||||
weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
|
||||
model.load_weights(weight_file)
|
||||
# captures output of softmax so we can decode the output during visualization
|
||||
test_func = K.function([input_data], [y_pred])
|
||||
|
||||
# captures output of softmax so we can decode the output during visualization
|
||||
test_func = K.function([input_data], [y_pred])
|
||||
viz_cb = VizCallback(run_name, test_func, img_gen.next_val())
|
||||
|
||||
viz_cb = VizCallback(test_func, img_gen.next_val())
|
||||
model.fit_generator(generator=img_gen.next_train(), samples_per_epoch=(words_per_epoch - val_words),
|
||||
nb_epoch=stop_epoch, validation_data=img_gen.next_val(), nb_val_samples=val_words,
|
||||
callbacks=[viz_cb, img_gen], initial_epoch=start_epoch)
|
||||
|
||||
model.fit_generator(generator=img_gen.next_train(), samples_per_epoch=(words_per_epoch - val_words),
|
||||
nb_epoch=nb_epoch, validation_data=img_gen.next_val(), nb_val_samples=val_words,
|
||||
callbacks=[viz_cb, img_gen])
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_name = datetime.datetime.now().strftime('%Y:%m:%d:%H:%M:%S')
|
||||
train(run_name, 0, 20, 128)
|
||||
# increase to wider images and start at epoch 20. The learned weights are reloaded
|
||||
train(run_name, 20, 25, 512)
|
||||
|
||||
@@ -10,7 +10,7 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Embedding, LSTM, Input, Bidirectional
|
||||
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
|
||||
from keras.datasets import imdb
|
||||
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ from keras.layers import Dense, Dropout, Activation
|
||||
from keras.layers import Embedding
|
||||
from keras.layers import Convolution1D, GlobalMaxPooling1D
|
||||
from keras.datasets import imdb
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
# set parameters:
|
||||
|
||||
@@ -54,9 +54,9 @@ def add_ngram(sequences, token_indice, ngram_range=2):
|
||||
new_sequences = []
|
||||
for input_list in sequences:
|
||||
new_list = input_list[:]
|
||||
for i in range(len(new_list)-ngram_range+1):
|
||||
for ngram_value in range(2, ngram_range+1):
|
||||
ngram = tuple(new_list[i:i+ngram_value])
|
||||
for i in range(len(new_list) - ngram_range + 1):
|
||||
for ngram_value in range(2, ngram_range + 1):
|
||||
ngram = tuple(new_list[i:i + ngram_value])
|
||||
if ngram in token_indice:
|
||||
new_list.append(token_indice[ngram])
|
||||
new_sequences.append(new_list)
|
||||
@@ -84,7 +84,7 @@ if ngram_range > 1:
|
||||
# Create set of unique n-gram from the training set.
|
||||
ngram_set = set()
|
||||
for input_list in X_train:
|
||||
for i in range(2, ngram_range+1):
|
||||
for i in range(2, ngram_range + 1):
|
||||
set_of_ngram = create_ngram_set(input_list, ngram_value=i)
|
||||
ngram_set.update(set_of_ngram)
|
||||
|
||||
@@ -92,7 +92,7 @@ if ngram_range > 1:
|
||||
# Integer values are greater than max_features in order
|
||||
# to avoid collision with existing features.
|
||||
start_index = max_features + 1
|
||||
token_indice = {v: k+start_index for k, v in enumerate(ngram_set)}
|
||||
token_indice = {v: k + start_index for k, v in enumerate(ngram_set)}
|
||||
indice_token = {token_indice[k]: k for k in token_indice}
|
||||
|
||||
# max_features is the highest integer that could be found in the dataset.
|
||||
|
||||
@@ -15,10 +15,9 @@ import numpy as np
|
||||
np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.utils import np_utils
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Activation, Embedding
|
||||
from keras.layers import LSTM, SimpleRNN, GRU
|
||||
from keras.layers import Dense, Activation, Embedding
|
||||
from keras.layers import LSTM
|
||||
from keras.datasets import imdb
|
||||
|
||||
max_features = 20000
|
||||
|
||||
@@ -12,7 +12,7 @@ has at least ~100k characters. ~1M is better.
|
||||
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Activation, Dropout
|
||||
from keras.layers import Dense, Activation
|
||||
from keras.layers import LSTM
|
||||
from keras.optimizers import RMSprop
|
||||
from keras.utils.data_utils import get_file
|
||||
|
||||
@@ -8,7 +8,7 @@ document vector is considered to preserve both the word-level and
|
||||
sentence-level structure of the context.
|
||||
|
||||
# References
|
||||
- [A Hierarchical Neural Autoencoder for Paragraphs and Documents](https://web.stanford.edu/~jurafsky/pubs/P15-1107.pdf)
|
||||
- [A Hierarchical Neural Autoencoder for Paragraphs and Documents](https://arxiv.org/abs/1506.01057)
|
||||
Encodes paragraphs and documents with HRNN.
|
||||
Results have shown that HRNN outperforms standard
|
||||
RNNs and may play some role in more sophisticated generation tasks like
|
||||
@@ -27,7 +27,7 @@ After 5 epochs: train acc: 0.9858, val acc: 0.9864
|
||||
from __future__ import print_function
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential, Model
|
||||
from keras.models import Model
|
||||
from keras.layers import Input, Dense, TimeDistributed
|
||||
from keras.layers import LSTM
|
||||
from keras.utils import np_utils
|
||||
|
||||
@@ -12,7 +12,7 @@ np.random.seed(1337) # for reproducibility
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.optimizers import SGD, Adam, RMSprop
|
||||
from keras.optimizers import RMSprop
|
||||
from keras.utils import np_utils
|
||||
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ import random
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential, Model
|
||||
from keras.layers import Dense, Dropout, Input, Lambda
|
||||
from keras.optimizers import SGD, RMSprop
|
||||
from keras.optimizers import RMSprop
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ def create_pairs(x, digit_indices):
|
||||
n = min([len(digit_indices[d]) for d in range(10)]) - 1
|
||||
for d in range(10):
|
||||
for i in range(n):
|
||||
z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
|
||||
z1, z2 = digit_indices[d][i], digit_indices[d][i + 1]
|
||||
pairs += [[x[z1], x[z2]]]
|
||||
inc = random.randrange(1, 10)
|
||||
dn = (d + inc) % 10
|
||||
@@ -75,7 +75,7 @@ def create_base_network(input_dim):
|
||||
def compute_accuracy(predictions, labels):
|
||||
'''Compute classification accuracy with a fixed threshold on distances.
|
||||
'''
|
||||
return np.mean(labels == (predictions.ravel() > 0.5))
|
||||
return labels[predictions.ravel() < 0.5].mean()
|
||||
|
||||
|
||||
# the data, shuffled and split between train and test sets
|
||||
|
||||
@@ -13,6 +13,7 @@ from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Convolution2D, MaxPooling2D
|
||||
from keras.utils import np_utils
|
||||
from keras.wrappers.scikit_learn import KerasClassifier
|
||||
from keras import backend as K
|
||||
from sklearn.grid_search import GridSearchCV
|
||||
|
||||
|
||||
@@ -23,8 +24,16 @@ img_rows, img_cols = 28, 28
|
||||
|
||||
# load training data and do basic data normalization
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
|
||||
X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
|
||||
|
||||
if K.image_dim_ordering() == 'th':
|
||||
X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
|
||||
X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
|
||||
input_shape = (1, img_rows, img_cols)
|
||||
else:
|
||||
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
|
||||
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
|
||||
input_shape = (img_rows, img_cols, 1)
|
||||
|
||||
X_train = X_train.astype('float32')
|
||||
X_test = X_test.astype('float32')
|
||||
X_train /= 255
|
||||
@@ -34,6 +43,7 @@ X_test /= 255
|
||||
y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
|
||||
def make_model(dense_layer_sizes, nb_filters, nb_conv, nb_pool):
|
||||
'''Creates model comprised of 2 convolutional layers followed by dense layers
|
||||
|
||||
@@ -47,7 +57,7 @@ def make_model(dense_layer_sizes, nb_filters, nb_conv, nb_pool):
|
||||
|
||||
model.add(Convolution2D(nb_filters, nb_conv, nb_conv,
|
||||
border_mode='valid',
|
||||
input_shape=(1, img_rows, img_cols)))
|
||||
input_shape=input_shape))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
|
||||
model.add(Activation('relu'))
|
||||
|
||||
@@ -76,11 +76,11 @@ def getwhere(x):
|
||||
return K.gradients(K.sum(y_postpool), y_prepool)
|
||||
|
||||
if K.backend() == 'tensorflow':
|
||||
raise Exception('This example can only run with the '
|
||||
'Theano backend for the time being, '
|
||||
'because it requires taking the gradient '
|
||||
'of a gradient, which isn\'t '
|
||||
'supported for all TF ops.')
|
||||
raise RuntimeError('This example can only run with the '
|
||||
'Theano backend for the time being, '
|
||||
'because it requires taking the gradient '
|
||||
'of a gradient, which isn\'t '
|
||||
'supported for all TF ops.')
|
||||
|
||||
# This example assume 'th' dim ordering.
|
||||
K.set_image_dim_ordering('th')
|
||||
|
||||
@@ -47,7 +47,7 @@ from scipy.optimize import fmin_l_bfgs_b
|
||||
from scipy.misc import imread, imsave
|
||||
|
||||
from keras import backend as K
|
||||
from keras.layers import Input, Convolution2D, MaxPooling2D, AveragePooling2D
|
||||
from keras.layers import Input, AveragePooling2D
|
||||
from keras.models import Model
|
||||
from keras.preprocessing.image import load_img, img_to_array
|
||||
from keras.applications import vgg19
|
||||
@@ -301,7 +301,7 @@ loss_grads = K.gradients(loss, target_image)
|
||||
|
||||
# Evaluator class for computing efficiency
|
||||
outputs = [loss]
|
||||
if type(loss_grads) in {list, tuple}:
|
||||
if isinstance(loss_grads, (list, tuple)):
|
||||
outputs += loss_grads
|
||||
else:
|
||||
outputs.append(loss_grads)
|
||||
|
||||
@@ -88,11 +88,13 @@ style_weight = args.style_weight
|
||||
content_weight = args.content_weight
|
||||
|
||||
# dimensions of the generated picture.
|
||||
width, height = load_img(base_image_path).size
|
||||
img_nrows = 400
|
||||
img_ncols = 400
|
||||
assert img_ncols == img_nrows, 'Due to the use of the Gram matrix, width and height must match.'
|
||||
img_ncols = int(width * img_nrows / height)
|
||||
|
||||
# util function to open, resize and format pictures into appropriate tensors
|
||||
|
||||
|
||||
def preprocess_image(image_path):
|
||||
img = load_img(image_path, target_size=(img_nrows, img_ncols))
|
||||
img = img_to_array(img)
|
||||
@@ -101,6 +103,8 @@ def preprocess_image(image_path):
|
||||
return img
|
||||
|
||||
# util function to convert a tensor into a valid image
|
||||
|
||||
|
||||
def deprocess_image(x):
|
||||
if K.image_dim_ordering() == 'th':
|
||||
x = x.reshape((3, img_nrows, img_ncols))
|
||||
@@ -144,6 +148,8 @@ outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
|
||||
# first we need to define 4 util functions
|
||||
|
||||
# the gram matrix of an image tensor (feature-wise outer product)
|
||||
|
||||
|
||||
def gram_matrix(x):
|
||||
assert K.ndim(x) == 3
|
||||
if K.image_dim_ordering() == 'th':
|
||||
@@ -158,6 +164,8 @@ def gram_matrix(x):
|
||||
# It is based on the gram matrices (which capture style) of
|
||||
# feature maps from the style reference image
|
||||
# and from the generated image
|
||||
|
||||
|
||||
def style_loss(style, combination):
|
||||
assert K.ndim(style) == 3
|
||||
assert K.ndim(combination) == 3
|
||||
@@ -170,19 +178,23 @@ def style_loss(style, combination):
|
||||
# an auxiliary loss function
|
||||
# designed to maintain the "content" of the
|
||||
# base image in the generated image
|
||||
|
||||
|
||||
def content_loss(base, combination):
|
||||
return K.sum(K.square(combination - base))
|
||||
|
||||
# the 3rd loss function, total variation loss,
|
||||
# designed to keep the generated image locally coherent
|
||||
|
||||
|
||||
def total_variation_loss(x):
|
||||
assert K.ndim(x) == 4
|
||||
if K.image_dim_ordering() == 'th':
|
||||
a = K.square(x[:, :, :img_nrows-1, :img_ncols-1] - x[:, :, 1:, :img_ncols-1])
|
||||
b = K.square(x[:, :, :img_nrows-1, :img_ncols-1] - x[:, :, :img_nrows-1, 1:])
|
||||
a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1])
|
||||
b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:])
|
||||
else:
|
||||
a = K.square(x[:, :img_nrows-1, :img_ncols-1, :] - x[:, 1:, :img_ncols-1, :])
|
||||
b = K.square(x[:, :img_nrows-1, :img_ncols-1, :] - x[:, :img_nrows-1, 1:, :])
|
||||
a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :])
|
||||
b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :])
|
||||
return K.sum(K.pow(a + b, 1.25))
|
||||
|
||||
# combine these loss functions into a single scalar
|
||||
@@ -208,13 +220,14 @@ loss += total_variation_weight * total_variation_loss(combination_image)
|
||||
grads = K.gradients(loss, combination_image)
|
||||
|
||||
outputs = [loss]
|
||||
if type(grads) in {list, tuple}:
|
||||
if isinstance(grads, (list, tuple)):
|
||||
outputs += grads
|
||||
else:
|
||||
outputs.append(grads)
|
||||
|
||||
f_outputs = K.function([combination_image], outputs)
|
||||
|
||||
|
||||
def eval_loss_and_grads(x):
|
||||
if K.image_dim_ordering() == 'th':
|
||||
x = x.reshape((1, 3, img_nrows, img_ncols))
|
||||
@@ -234,7 +247,10 @@ def eval_loss_and_grads(x):
|
||||
# "loss" and "grads". This is done because scipy.optimize
|
||||
# requires separate functions for loss and gradients,
|
||||
# but computing them separately would be inefficient.
|
||||
|
||||
|
||||
class Evaluator(object):
|
||||
|
||||
def __init__(self):
|
||||
self.loss_value = None
|
||||
self.grads_values = None
|
||||
|
||||
@@ -102,9 +102,9 @@ print('Preparing embedding matrix.')
|
||||
|
||||
# prepare embedding matrix
|
||||
nb_words = min(MAX_NB_WORDS, len(word_index))
|
||||
embedding_matrix = np.zeros((nb_words + 1, EMBEDDING_DIM))
|
||||
embedding_matrix = np.zeros((nb_words, EMBEDDING_DIM))
|
||||
for word, i in word_index.items():
|
||||
if i > MAX_NB_WORDS:
|
||||
if i >= MAX_NB_WORDS:
|
||||
continue
|
||||
embedding_vector = embeddings_index.get(word)
|
||||
if embedding_vector is not None:
|
||||
@@ -113,7 +113,7 @@ for word, i in word_index.items():
|
||||
|
||||
# load pre-trained word embeddings into an Embedding layer
|
||||
# note that we set trainable = False so as to keep the embeddings fixed
|
||||
embedding_layer = Embedding(nb_words + 1,
|
||||
embedding_layer = Embedding(nb_words,
|
||||
EMBEDDING_DIM,
|
||||
weights=[embedding_matrix],
|
||||
input_length=MAX_SEQUENCE_LENGTH,
|
||||
|
||||
@@ -21,7 +21,7 @@ print('Loading data...')
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
nb_classes = np.max(y_train)+1
|
||||
nb_classes = np.max(y_train) + 1
|
||||
print(nb_classes, 'classes')
|
||||
|
||||
print('Vectorizing sequence data...')
|
||||
|
||||
@@ -100,6 +100,7 @@ deconv_2_decoded = decoder_deconv_2(deconv_1_decoded)
|
||||
x_decoded_relu = decoder_deconv_3_upsamp(deconv_2_decoded)
|
||||
x_decoded_mean_squash = decoder_mean_squash(x_decoded_relu)
|
||||
|
||||
|
||||
def vae_loss(x, x_decoded_mean):
|
||||
# NOTE: binary_crossentropy expects a batch_size by dim
|
||||
# for x and x_decoded_mean, so we MUST flatten these!
|
||||
|
||||
+1
-1
@@ -15,4 +15,4 @@ from . import objectives
|
||||
from . import optimizers
|
||||
from . import regularizers
|
||||
|
||||
__version__ = '1.2.0'
|
||||
__version__ = '1.2.2'
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
import numpy as np
|
||||
from .. import backend as K
|
||||
|
||||
try:
|
||||
import librosa
|
||||
except ImportError:
|
||||
librosa = None
|
||||
|
||||
|
||||
TAGS = ['rock', 'pop', 'alternative', 'indie', 'electronic',
|
||||
'female vocalists', 'dance', '00s', 'alternative rock', 'jazz',
|
||||
@@ -15,51 +20,50 @@ TAGS = ['rock', 'pop', 'alternative', 'indie', 'electronic',
|
||||
'sad', 'House', 'happy']
|
||||
|
||||
|
||||
def librosa_exists():
|
||||
try:
|
||||
__import__('librosa')
|
||||
except ImportError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def preprocess_input(audio_path, dim_ordering='default'):
|
||||
'''Reads an audio file and outputs a Mel-spectrogram.
|
||||
'''
|
||||
"""Reads an audio file and outputs a Mel-spectrogram.
|
||||
|
||||
# Arguments
|
||||
audio_path: path to the target audio file.
|
||||
dim_ordering: data format for the output spectrogram image.
|
||||
|
||||
# Returns
|
||||
3D Numpy tensor encoding the Mel-spectrogram.
|
||||
|
||||
# Raises
|
||||
ImportError: if librosa is not available.
|
||||
"""
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
assert dim_ordering in {'tf', 'th'}
|
||||
|
||||
if librosa_exists():
|
||||
import librosa
|
||||
else:
|
||||
raise RuntimeError('Librosa is required to process audio files.\n' +
|
||||
'Install it via `pip install librosa` \nor visit ' +
|
||||
'http://librosa.github.io/librosa/ for details.')
|
||||
if librosa is None:
|
||||
raise ImportError('Librosa is required to process audio files. '
|
||||
'Install it via `pip install librosa` or visit '
|
||||
'http://librosa.github.io/librosa/ for details.')
|
||||
|
||||
# mel-spectrogram parameters
|
||||
SR = 12000
|
||||
N_FFT = 512
|
||||
N_MELS = 96
|
||||
HOP_LEN = 256
|
||||
DURA = 29.12
|
||||
sr = 12000
|
||||
n_fft = 512
|
||||
n_mels = 96
|
||||
hop_length = 256
|
||||
duration = 29.12
|
||||
|
||||
src, sr = librosa.load(audio_path, sr=SR)
|
||||
src, sr = librosa.load(audio_path, sr=sr)
|
||||
n_sample = src.shape[0]
|
||||
n_sample_wanted = int(DURA * SR)
|
||||
n_sample_wanted = int(duration * sr)
|
||||
|
||||
# trim the signal at the center
|
||||
if n_sample < n_sample_wanted: # if too short
|
||||
src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
|
||||
src = np.hstack((src, np.zeros((int(duration * sr) - n_sample,))))
|
||||
elif n_sample > n_sample_wanted: # if too long
|
||||
src = src[(n_sample - n_sample_wanted) / 2:
|
||||
(n_sample + n_sample_wanted) / 2]
|
||||
src = src[(n_sample - n_sample_wanted) // 2:
|
||||
(n_sample + n_sample_wanted) // 2]
|
||||
|
||||
logam = librosa.logamplitude
|
||||
melgram = librosa.feature.melspectrogram
|
||||
x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
|
||||
n_fft=N_FFT, n_mels=N_MELS) ** 2,
|
||||
x = logam(melgram(y=src, sr=sr, hop_length=hop_length,
|
||||
n_fft=n_fft, n_mels=n_mels) ** 2,
|
||||
ref_power=1.0)
|
||||
|
||||
if dim_ordering == 'th':
|
||||
@@ -70,13 +74,15 @@ def preprocess_input(audio_path, dim_ordering='default'):
|
||||
|
||||
|
||||
def decode_predictions(preds, top_n=5):
|
||||
'''Decode the output of a music tagger model.
|
||||
"""Decode the output of a music tagger model.
|
||||
|
||||
# Arguments
|
||||
preds: 2-dimensional numpy array
|
||||
top_n: integer in [0, 50], number of items to show
|
||||
top_n: integer in [0, 50], number of items to show.
|
||||
|
||||
'''
|
||||
# Returns
|
||||
Decoded output.
|
||||
"""
|
||||
assert len(preds.shape) == 2 and preds.shape[1] == 50
|
||||
results = []
|
||||
for pred in preds:
|
||||
|
||||
@@ -9,6 +9,15 @@ CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/i
|
||||
|
||||
|
||||
def preprocess_input(x, dim_ordering='default'):
|
||||
"""Preprocesses a tensor encoding a batch of images.
|
||||
|
||||
# Arguments
|
||||
x: input Numpy tensor, 4D.
|
||||
dim_ordering: data format of the image tensor.
|
||||
|
||||
# Returns
|
||||
Preprocessed tensor.
|
||||
"""
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
assert dim_ordering in {'tf', 'th'}
|
||||
@@ -31,6 +40,21 @@ def preprocess_input(x, dim_ordering='default'):
|
||||
|
||||
|
||||
def decode_predictions(preds, top=5):
|
||||
"""Decodes the prediction of an ImageNet model.
|
||||
|
||||
# Arguments
|
||||
preds: Numpy tensor encoding a batch of predictions.
|
||||
top: integer, how many top-guesses to return.
|
||||
|
||||
# Returns
|
||||
A list of lists of top class prediction tuples
|
||||
`(class_name, class_description, score)`.
|
||||
One list of tuples per sample in batch input.
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid shape of the `pred` array
|
||||
(must be 2D).
|
||||
"""
|
||||
global CLASS_INDEX
|
||||
if len(preds.shape) != 2 or preds.shape[1] != 1000:
|
||||
raise ValueError('`decode_predictions` expects '
|
||||
@@ -51,7 +75,28 @@ def decode_predictions(preds, top=5):
|
||||
return results
|
||||
|
||||
|
||||
def _obtain_input_shape(input_shape, default_size, min_size, dim_ordering, include_top):
|
||||
def _obtain_input_shape(input_shape,
|
||||
default_size,
|
||||
min_size,
|
||||
dim_ordering,
|
||||
include_top):
|
||||
"""Internal utility to compute/validate an ImageNet model's input shape.
|
||||
|
||||
# Arguments
|
||||
input_shape: either None (will return the default network input shape),
|
||||
or a user-provided shape to be validated.
|
||||
default_size: default input width/height for the model.
|
||||
min_size: minimum input width/height accepted by the model.
|
||||
dim_ordering: image data format to use.
|
||||
include_top: whether the model is expected to
|
||||
be linked to a classifier via a Flatten layer.
|
||||
|
||||
# Returns
|
||||
An integer shape tuple (may include None entries).
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid argument values.
|
||||
"""
|
||||
if dim_ordering == 'th':
|
||||
default_shape = (3, default_size, default_size)
|
||||
else:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''Inception V3 model for Keras.
|
||||
"""Inception V3 model for Keras.
|
||||
|
||||
Note that the ImageNet weights provided are from a model that had not fully converged.
|
||||
Inception v3 should be able to reach 6.9% top-5 error, but our model
|
||||
@@ -10,11 +10,11 @@ Also, do note that the input image format for this model is different than for
|
||||
the VGG16 and ResNet models (299x299 instead of 224x224), and that the input preprocessing function
|
||||
is also different (same as Xception).
|
||||
|
||||
# Reference:
|
||||
# Reference
|
||||
|
||||
- [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567)
|
||||
|
||||
'''
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -39,8 +39,8 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
def conv2d_bn(x, nb_filter, nb_row, nb_col,
|
||||
border_mode='same', subsample=(1, 1),
|
||||
name=None):
|
||||
'''Utility function to apply conv + BN.
|
||||
'''
|
||||
"""Utility function to apply conv + BN.
|
||||
"""
|
||||
if name is not None:
|
||||
bn_name = name + '_bn'
|
||||
conv_name = name + '_conv'
|
||||
@@ -61,8 +61,9 @@ def conv2d_bn(x, nb_filter, nb_row, nb_col,
|
||||
|
||||
|
||||
def InceptionV3(include_top=True, weights='imagenet',
|
||||
input_tensor=None, input_shape=None):
|
||||
'''Instantiate the Inception v3 architecture,
|
||||
input_tensor=None, input_shape=None,
|
||||
classes=1000):
|
||||
"""Instantiate the Inception v3 architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. Note that when using TensorFlow,
|
||||
for best performance you should set
|
||||
@@ -83,21 +84,29 @@ def InceptionV3(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
inputs_shape: optional shape tuple, only to be specified
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(299, 299, 3)` (with `tf` dim ordering)
|
||||
or `(3, 299, 299)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 139.
|
||||
E.g. `(150, 150, 3)` would be one valid value.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
'''
|
||||
"""
|
||||
if weights not in {'imagenet', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
|
||||
if weights == 'imagenet' and include_top and classes != 1000:
|
||||
raise ValueError('If using `weights` as imagenet with `include_top`'
|
||||
' as true, `classes` should be 1000')
|
||||
|
||||
# Determine proper input shape
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=299,
|
||||
@@ -262,7 +271,7 @@ def InceptionV3(include_top=True, weights='imagenet',
|
||||
# Classification block
|
||||
x = AveragePooling2D((8, 8), strides=(8, 8), name='avg_pool')(x)
|
||||
x = Flatten(name='flatten')(x)
|
||||
x = Dense(1000, activation='softmax', name='predictions')(x)
|
||||
x = Dense(classes, activation='softmax', name='predictions')(x)
|
||||
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''MusicTaggerCRNN model for Keras.
|
||||
"""MusicTaggerCRNN model for Keras.
|
||||
|
||||
# Reference:
|
||||
|
||||
- [Music-auto_tagging-keras](https://github.com/keunwoochoi/music-auto_tagging-keras)
|
||||
|
||||
'''
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -28,8 +28,8 @@ TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/dow
|
||||
|
||||
|
||||
def MusicTaggerCRNN(weights='msd', input_tensor=None,
|
||||
include_top=True):
|
||||
'''Instantiate the MusicTaggerCRNN architecture,
|
||||
include_top=True, classes=50):
|
||||
"""Instantiate the MusicTaggerCRNN architecture,
|
||||
optionally loading weights pre-trained
|
||||
on Million Song Dataset. Note that when using TensorFlow,
|
||||
for best performance you should set
|
||||
@@ -54,16 +54,21 @@ def MusicTaggerCRNN(weights='msd', input_tensor=None,
|
||||
include_top: whether to include the 1 fully-connected
|
||||
layer (output layer) at the top of the network.
|
||||
If False, the network outputs 32-dim features.
|
||||
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
'''
|
||||
"""
|
||||
if weights not in {'msd', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `msd` '
|
||||
'(pre-training on Million Song Dataset).')
|
||||
|
||||
if weights == 'msd' and include_top and classes != 50:
|
||||
raise ValueError('If using `weights` as msd with `include_top`'
|
||||
' as true, `classes` should be 50')
|
||||
# Determine proper input shape
|
||||
if K.image_dim_ordering() == 'th':
|
||||
input_shape = (1, 96, 1366)
|
||||
@@ -126,7 +131,7 @@ def MusicTaggerCRNN(weights='msd', input_tensor=None,
|
||||
x = GRU(32, return_sequences=False, name='gru2')(x)
|
||||
|
||||
if include_top:
|
||||
x = Dense(50, activation='sigmoid', name='output')(x)
|
||||
x = Dense(classes, activation='sigmoid', name='output')(x)
|
||||
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''ResNet50 model for Keras.
|
||||
"""ResNet50 model for Keras.
|
||||
|
||||
# Reference:
|
||||
|
||||
- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
|
||||
|
||||
Adapted from code contributed by BigMoyan.
|
||||
'''
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -31,7 +31,7 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
|
||||
|
||||
def identity_block(input_tensor, kernel_size, filters, stage, block):
|
||||
'''The identity_block is the block that has no conv layer at shortcut
|
||||
"""The identity_block is the block that has no conv layer at shortcut
|
||||
|
||||
# Arguments
|
||||
input_tensor: input tensor
|
||||
@@ -39,7 +39,7 @@ def identity_block(input_tensor, kernel_size, filters, stage, block):
|
||||
filters: list of integers, the nb_filters of 3 conv layer at main path
|
||||
stage: integer, current stage label, used for generating layer names
|
||||
block: 'a','b'..., current block label, used for generating layer names
|
||||
'''
|
||||
"""
|
||||
nb_filter1, nb_filter2, nb_filter3 = filters
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
bn_axis = 3
|
||||
@@ -66,7 +66,7 @@ def identity_block(input_tensor, kernel_size, filters, stage, block):
|
||||
|
||||
|
||||
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
|
||||
'''conv_block is the block that has a conv layer at shortcut
|
||||
"""conv_block is the block that has a conv layer at shortcut
|
||||
|
||||
# Arguments
|
||||
input_tensor: input tensor
|
||||
@@ -77,7 +77,7 @@ def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2))
|
||||
|
||||
Note that from stage 3, the first conv layer at main path is with subsample=(2,2)
|
||||
And the shortcut should have subsample=(2,2) as well
|
||||
'''
|
||||
"""
|
||||
nb_filter1, nb_filter2, nb_filter3 = filters
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
bn_axis = 3
|
||||
@@ -109,8 +109,9 @@ def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2))
|
||||
|
||||
|
||||
def ResNet50(include_top=True, weights='imagenet',
|
||||
input_tensor=None, input_shape=None):
|
||||
'''Instantiate the ResNet50 architecture,
|
||||
input_tensor=None, input_shape=None,
|
||||
classes=1000):
|
||||
"""Instantiate the ResNet50 architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. Note that when using TensorFlow,
|
||||
for best performance you should set
|
||||
@@ -129,21 +130,29 @@ def ResNet50(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
inputs_shape: optional shape tuple, only to be specified
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 197.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
'''
|
||||
"""
|
||||
if weights not in {'imagenet', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
|
||||
if weights == 'imagenet' and include_top and classes != 1000:
|
||||
raise ValueError('If using `weights` as imagenet with `include_top`'
|
||||
' as true, `classes` should be 1000')
|
||||
|
||||
# Determine proper input shape
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=224,
|
||||
@@ -193,7 +202,7 @@ def ResNet50(include_top=True, weights='imagenet',
|
||||
|
||||
if include_top:
|
||||
x = Flatten()(x)
|
||||
x = Dense(1000, activation='softmax', name='fc1000')(x)
|
||||
x = Dense(classes, activation='softmax', name='fc1000')(x)
|
||||
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''VGG16 model for Keras.
|
||||
"""VGG16 model for Keras.
|
||||
|
||||
# Reference:
|
||||
# Reference
|
||||
|
||||
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
|
||||
|
||||
'''
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -28,8 +28,9 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
|
||||
|
||||
def VGG16(include_top=True, weights='imagenet',
|
||||
input_tensor=None, input_shape=None):
|
||||
'''Instantiate the VGG16 architecture,
|
||||
input_tensor=None, input_shape=None,
|
||||
classes=1000):
|
||||
"""Instantiate the VGG16 architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. Note that when using TensorFlow,
|
||||
for best performance you should set
|
||||
@@ -48,21 +49,28 @@ def VGG16(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
inputs_shape: optional shape tuple, only to be specified
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 48.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
'''
|
||||
"""
|
||||
if weights not in {'imagenet', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
|
||||
if weights == 'imagenet' and include_top and classes != 1000:
|
||||
raise ValueError('If using `weights` as imagenet with `include_top`'
|
||||
' as true, `classes` should be 1000')
|
||||
# Determine proper input shape
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=224,
|
||||
@@ -110,7 +118,7 @@ def VGG16(include_top=True, weights='imagenet',
|
||||
x = Flatten(name='flatten')(x)
|
||||
x = Dense(4096, activation='relu', name='fc1')(x)
|
||||
x = Dense(4096, activation='relu', name='fc2')(x)
|
||||
x = Dense(1000, activation='softmax', name='predictions')(x)
|
||||
x = Dense(classes, activation='softmax', name='predictions')(x)
|
||||
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''VGG19 model for Keras.
|
||||
"""VGG19 model for Keras.
|
||||
|
||||
# Reference:
|
||||
# Reference
|
||||
|
||||
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
|
||||
|
||||
'''
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -28,8 +28,9 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
|
||||
|
||||
def VGG19(include_top=True, weights='imagenet',
|
||||
input_tensor=None, input_shape=None):
|
||||
'''Instantiate the VGG19 architecture,
|
||||
input_tensor=None, input_shape=None,
|
||||
classes=1000):
|
||||
"""Instantiate the VGG19 architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. Note that when using TensorFlow,
|
||||
for best performance you should set
|
||||
@@ -48,21 +49,28 @@ def VGG19(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
inputs_shape: optional shape tuple, only to be specified
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `tf` dim ordering)
|
||||
or `(3, 224, 244)` (with `th` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 48.
|
||||
E.g. `(200, 200, 3)` would be one valid value.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
'''
|
||||
"""
|
||||
if weights not in {'imagenet', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
|
||||
if weights == 'imagenet' and include_top and classes != 1000:
|
||||
raise ValueError('If using `weights` as imagenet with `include_top`'
|
||||
' as true, `classes` should be 1000')
|
||||
# Determine proper input shape
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=224,
|
||||
@@ -113,7 +121,7 @@ def VGG19(include_top=True, weights='imagenet',
|
||||
x = Flatten(name='flatten')(x)
|
||||
x = Dense(4096, activation='relu', name='fc1')(x)
|
||||
x = Dense(4096, activation='relu', name='fc2')(x)
|
||||
x = Dense(1000, activation='softmax', name='predictions')(x)
|
||||
x = Dense(classes, activation='softmax', name='predictions')(x)
|
||||
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''Xception V1 model for Keras.
|
||||
"""Xception V1 model for Keras.
|
||||
|
||||
On ImageNet, this model gets to a top-1 validation accuracy of 0.790
|
||||
and a top-5 validation accuracy of 0.945.
|
||||
@@ -12,11 +12,11 @@ is also different (same as Inception V3).
|
||||
Also do note that this model is only available for the TensorFlow backend,
|
||||
due to its reliance on `SeparableConvolution` layers.
|
||||
|
||||
# Reference:
|
||||
# Reference
|
||||
|
||||
- [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357)
|
||||
|
||||
'''
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -36,8 +36,9 @@ TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/relea
|
||||
|
||||
|
||||
def Xception(include_top=True, weights='imagenet',
|
||||
input_tensor=None, input_shape=None):
|
||||
'''Instantiate the Xception architecture,
|
||||
input_tensor=None, input_shape=None,
|
||||
classes=1000):
|
||||
"""Instantiate the Xception architecture,
|
||||
optionally loading weights pre-trained
|
||||
on ImageNet. This model is available for TensorFlow only,
|
||||
and can only be used with inputs following the TensorFlow
|
||||
@@ -54,20 +55,28 @@ def Xception(include_top=True, weights='imagenet',
|
||||
or "imagenet" (pre-training on ImageNet).
|
||||
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
||||
to use as image input for the model.
|
||||
inputs_shape: optional shape tuple, only to be specified
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(299, 299, 3)`.
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 71.
|
||||
E.g. `(150, 150, 3)` would be one valid value.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
'''
|
||||
"""
|
||||
if weights not in {'imagenet', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
|
||||
if weights == 'imagenet' and include_top and classes != 1000:
|
||||
raise ValueError('If using `weights` as imagenet with `include_top`'
|
||||
' as true, `classes` should be 1000')
|
||||
|
||||
if K.backend() != 'tensorflow':
|
||||
raise RuntimeError('The Xception model is only available with '
|
||||
'the TensorFlow backend.')
|
||||
@@ -189,7 +198,7 @@ def Xception(include_top=True, weights='imagenet',
|
||||
|
||||
if include_top:
|
||||
x = GlobalAveragePooling2D(name='avg_pool')(x)
|
||||
x = Dense(1000, activation='softmax', name='predictions')(x)
|
||||
x = Dense(classes, activation='softmax', name='predictions')(x)
|
||||
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
|
||||
@@ -70,7 +70,7 @@ else:
|
||||
|
||||
|
||||
def backend():
|
||||
'''Publicly accessible method
|
||||
"""Publicly accessible method
|
||||
for determining the current backend.
|
||||
'''
|
||||
"""
|
||||
return _BACKEND
|
||||
|
||||
+18
-18
@@ -11,7 +11,7 @@ _LEGACY_WEIGHT_ORDERING = False
|
||||
|
||||
|
||||
def epsilon():
|
||||
'''Returns the value of the fuzz
|
||||
"""Returns the value of the fuzz
|
||||
factor used in numeric expressions.
|
||||
|
||||
# Returns
|
||||
@@ -22,12 +22,12 @@ def epsilon():
|
||||
>>> keras.backend.epsilon()
|
||||
1e-08
|
||||
```
|
||||
'''
|
||||
"""
|
||||
return _EPSILON
|
||||
|
||||
|
||||
def set_epsilon(e):
|
||||
'''Sets the value of the fuzz
|
||||
"""Sets the value of the fuzz
|
||||
factor used in numeric expressions.
|
||||
|
||||
# Arguments
|
||||
@@ -42,13 +42,13 @@ def set_epsilon(e):
|
||||
>>> K.epsilon()
|
||||
1e-05
|
||||
```
|
||||
'''
|
||||
"""
|
||||
global _EPSILON
|
||||
_EPSILON = e
|
||||
|
||||
|
||||
def floatx():
|
||||
'''Returns the default float type, as a string
|
||||
"""Returns the default float type, as a string
|
||||
(e.g. 'float16', 'float32', 'float64').
|
||||
|
||||
# Returns
|
||||
@@ -59,12 +59,12 @@ def floatx():
|
||||
>>> keras.backend.floatx()
|
||||
'float32'
|
||||
```
|
||||
'''
|
||||
"""
|
||||
return _FLOATX
|
||||
|
||||
|
||||
def set_floatx(floatx):
|
||||
'''Sets the default float type.
|
||||
"""Sets the default float type.
|
||||
|
||||
# Arguments
|
||||
String: 'float16', 'float32', or 'float64'.
|
||||
@@ -78,7 +78,7 @@ def set_floatx(floatx):
|
||||
>>> K.floatx()
|
||||
'float16'
|
||||
```
|
||||
'''
|
||||
"""
|
||||
global _FLOATX
|
||||
if floatx not in {'float16', 'float32', 'float64'}:
|
||||
raise ValueError('Unknown floatx type: ' + str(floatx))
|
||||
@@ -86,7 +86,7 @@ def set_floatx(floatx):
|
||||
|
||||
|
||||
def cast_to_floatx(x):
|
||||
'''Cast a Numpy array to the default Keras float type.
|
||||
"""Cast a Numpy array to the default Keras float type.
|
||||
|
||||
# Arguments
|
||||
x: Numpy array.
|
||||
@@ -108,12 +108,12 @@ def cast_to_floatx(x):
|
||||
>>> new_arr.dtype
|
||||
dtype('float32')
|
||||
```
|
||||
'''
|
||||
"""
|
||||
return np.asarray(x, dtype=_FLOATX)
|
||||
|
||||
|
||||
def image_dim_ordering():
|
||||
'''Returns the default image dimension ordering
|
||||
"""Returns the default image dimension ordering
|
||||
convention ('th' or 'tf').
|
||||
|
||||
# Returns
|
||||
@@ -124,12 +124,12 @@ def image_dim_ordering():
|
||||
>>> keras.backend.image_dim_ordering()
|
||||
'th'
|
||||
```
|
||||
'''
|
||||
"""
|
||||
return _IMAGE_DIM_ORDERING
|
||||
|
||||
|
||||
def set_image_dim_ordering(dim_ordering):
|
||||
'''Sets the value of the image dimension
|
||||
"""Sets the value of the image dimension
|
||||
ordering convention ('th' or 'tf').
|
||||
|
||||
# Arguments
|
||||
@@ -144,7 +144,7 @@ def set_image_dim_ordering(dim_ordering):
|
||||
>>> K.image_dim_ordering()
|
||||
'tf'
|
||||
```
|
||||
'''
|
||||
"""
|
||||
global _IMAGE_DIM_ORDERING
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
raise ValueError('Unknown dim_ordering:', dim_ordering)
|
||||
@@ -152,7 +152,7 @@ def set_image_dim_ordering(dim_ordering):
|
||||
|
||||
|
||||
def get_uid(prefix=''):
|
||||
'''Provides a unique UID given a string prefix.
|
||||
"""Provides a unique UID given a string prefix.
|
||||
|
||||
# Arguments
|
||||
prefix: string.
|
||||
@@ -168,7 +168,7 @@ def get_uid(prefix=''):
|
||||
>>> 2
|
||||
```
|
||||
|
||||
'''
|
||||
"""
|
||||
_UID_PREFIXES[prefix] += 1
|
||||
return _UID_PREFIXES[prefix]
|
||||
|
||||
@@ -179,7 +179,7 @@ def reset_uids():
|
||||
|
||||
|
||||
def is_keras_tensor(x):
|
||||
'''Returns whether `x` is a Keras tensor.
|
||||
"""Returns whether `x` is a Keras tensor.
|
||||
|
||||
# Arguments
|
||||
x: a potential tensor.
|
||||
@@ -200,7 +200,7 @@ def is_keras_tensor(x):
|
||||
>>> K.is_keras_tensor(keras_placeholder) # A placeholder is a Tensor.
|
||||
True
|
||||
```
|
||||
'''
|
||||
"""
|
||||
if hasattr(x, '_keras_shape'):
|
||||
return True
|
||||
else:
|
||||
|
||||
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
+222
-135
@@ -56,8 +56,18 @@ def to_dense(tensor):
|
||||
return tensor
|
||||
|
||||
|
||||
def is_explicit_shape(shape):
|
||||
if hasattr(shape, '__iter__'):
|
||||
for x in shape:
|
||||
if x is not None:
|
||||
if not isinstance(x, int):
|
||||
return False
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def variable(value, dtype=None, name=None):
|
||||
'''Instantiates a variable and returns it.
|
||||
"""Instantiates a variable and returns it.
|
||||
|
||||
# Arguments
|
||||
value: Numpy array, initial value of the tensor.
|
||||
@@ -66,7 +76,7 @@ def variable(value, dtype=None, name=None):
|
||||
|
||||
# Returns
|
||||
A variable instance (with Keras metadata included).
|
||||
'''
|
||||
"""
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
if hasattr(value, 'tocoo'):
|
||||
@@ -81,8 +91,8 @@ def variable(value, dtype=None, name=None):
|
||||
|
||||
|
||||
def placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None):
|
||||
'''Instantiate an input data placeholder variable.
|
||||
'''
|
||||
"""Instantiate an input data placeholder variable.
|
||||
"""
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
if shape is None and ndim is None:
|
||||
@@ -104,16 +114,16 @@ def placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None):
|
||||
|
||||
|
||||
def shape(x):
|
||||
'''Returns the shape of a tensor.
|
||||
"""Returns the shape of a tensor.
|
||||
|
||||
Warning: type returned will be different for
|
||||
Theano backend (Theano tensor type) and TF backend (TF TensorShape).
|
||||
'''
|
||||
"""
|
||||
return x.shape
|
||||
|
||||
|
||||
def int_shape(x):
|
||||
'''Returns the shape of a Keras tensor or a Keras variable as a tuple of
|
||||
"""Returns the shape of a Keras tensor or a Keras variable as a tuple of
|
||||
integers or None entries.
|
||||
|
||||
# Arguments
|
||||
@@ -121,7 +131,7 @@ def int_shape(x):
|
||||
|
||||
# Returns
|
||||
A tuple of integers (or None entries).
|
||||
'''
|
||||
"""
|
||||
if hasattr(x, '_keras_shape'):
|
||||
return x._keras_shape
|
||||
else:
|
||||
@@ -137,41 +147,41 @@ def dtype(x):
|
||||
|
||||
|
||||
def eval(x):
|
||||
'''Returns the value of a tensor.
|
||||
'''
|
||||
"""Returns the value of a tensor.
|
||||
"""
|
||||
return to_dense(x).eval()
|
||||
|
||||
|
||||
def zeros(shape, dtype=None, name=None):
|
||||
'''Instantiates an all-zeros variable.
|
||||
'''
|
||||
"""Instantiates an all-zeros variable.
|
||||
"""
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
return variable(np.zeros(shape), dtype, name)
|
||||
|
||||
|
||||
def ones(shape, dtype=None, name=None):
|
||||
'''Instantiates an all-ones variable.
|
||||
'''
|
||||
"""Instantiates an all-ones variable.
|
||||
"""
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
return variable(np.ones(shape), dtype, name)
|
||||
|
||||
|
||||
def eye(size, dtype=None, name=None):
|
||||
'''Instantiates an identity matrix.
|
||||
'''
|
||||
"""Instantiates an identity matrix.
|
||||
"""
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
return variable(np.eye(size), dtype, name)
|
||||
|
||||
|
||||
def ones_like(x, name=None):
|
||||
return T.ones_like(x)
|
||||
def ones_like(x, dtype=None, name=None):
|
||||
return T.ones_like(x, dtype=dtype)
|
||||
|
||||
|
||||
def zeros_like(x, name=None):
|
||||
return T.zeros_like(x)
|
||||
def zeros_like(x, dtype=None, name=None):
|
||||
return T.zeros_like(x, dtype=dtype)
|
||||
|
||||
|
||||
def random_uniform_variable(shape, low, high, dtype=None, name=None):
|
||||
@@ -185,10 +195,10 @@ def random_normal_variable(shape, mean, scale, dtype=None, name=None):
|
||||
|
||||
|
||||
def count_params(x):
|
||||
'''Returns the number of scalars in a tensor.
|
||||
"""Returns the number of scalars in a tensor.
|
||||
|
||||
Return: numpy integer.
|
||||
'''
|
||||
"""
|
||||
return np.prod(x.shape.eval())
|
||||
|
||||
|
||||
@@ -217,13 +227,14 @@ def moving_average_update(variable, value, momentum):
|
||||
|
||||
# LINEAR ALGEBRA
|
||||
|
||||
'''
|
||||
"""
|
||||
Assumed overridden:
|
||||
+, -, /, *, +=, -=, *=, /=
|
||||
'''
|
||||
"""
|
||||
|
||||
|
||||
def dot(x, y):
|
||||
# TODO: `keras_shape` inference.
|
||||
if is_sparse(x):
|
||||
return th_sparse_module.basic.structured_dot(x, y)
|
||||
else:
|
||||
@@ -231,7 +242,7 @@ def dot(x, y):
|
||||
|
||||
|
||||
def batch_dot(x, y, axes=None):
|
||||
'''Batchwise dot product.
|
||||
"""Batchwise dot product.
|
||||
|
||||
batch_dot results in a tensor with less dimensions than the input.
|
||||
If the number of dimensions is reduced to 1, we use `expand_dims` to
|
||||
@@ -267,7 +278,8 @@ def batch_dot(x, y, axes=None):
|
||||
dimension 2 of y has been summed over. (dot_axes[1] = 2)
|
||||
|
||||
output_shape = (100, 30)
|
||||
'''
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
if isinstance(axes, int):
|
||||
axes = (axes, axes)
|
||||
if axes is None:
|
||||
@@ -280,15 +292,17 @@ def batch_dot(x, y, axes=None):
|
||||
|
||||
|
||||
def transpose(x):
|
||||
# TODO: `keras_shape` inference.
|
||||
return T.transpose(x)
|
||||
|
||||
|
||||
def gather(reference, indices):
|
||||
'''reference: a tensor.
|
||||
"""reference: a tensor.
|
||||
indices: an int tensor of indices.
|
||||
|
||||
Return: a tensor of same type as reference.
|
||||
'''
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
return reference[indices]
|
||||
|
||||
|
||||
@@ -304,20 +318,20 @@ def min(x, axis=None, keepdims=False):
|
||||
|
||||
|
||||
def sum(x, axis=None, keepdims=False):
|
||||
'''Sum of the values in a tensor, alongside the specified axis.
|
||||
'''
|
||||
"""Sum of the values in a tensor, alongside the specified axis.
|
||||
"""
|
||||
return T.sum(x, axis=axis, keepdims=keepdims)
|
||||
|
||||
|
||||
def prod(x, axis=None, keepdims=False):
|
||||
'''Multiply the values in a tensor, alongside the specified axis.
|
||||
'''
|
||||
"""Multiply the values in a tensor, alongside the specified axis.
|
||||
"""
|
||||
return T.prod(x, axis=axis, keepdims=keepdims)
|
||||
|
||||
|
||||
def mean(x, axis=None, keepdims=False):
|
||||
'''Mean of a tensor, alongside the specified axis.
|
||||
'''
|
||||
"""Mean of a tensor, alongside the specified axis.
|
||||
"""
|
||||
dtype = None
|
||||
# bool is available since theano v0.9dev
|
||||
if 'int' in x.dtype or x.dtype == 'bool':
|
||||
@@ -334,14 +348,14 @@ def var(x, axis=None, keepdims=False):
|
||||
|
||||
|
||||
def any(x, axis=None, keepdims=False):
|
||||
'''Bitwise reduction (logical OR).
|
||||
'''
|
||||
"""Bitwise reduction (logical OR).
|
||||
"""
|
||||
return T.any(x, axis=axis, keepdims=keepdims)
|
||||
|
||||
|
||||
def all(x, axis=None, keepdims=False):
|
||||
'''Bitwise reduction (logical AND).
|
||||
'''
|
||||
"""Bitwise reduction (logical AND).
|
||||
"""
|
||||
return T.all(x, axis=axis, keepdims=keepdims)
|
||||
|
||||
|
||||
@@ -375,7 +389,7 @@ def log(x):
|
||||
|
||||
|
||||
def round(x):
|
||||
return T.round(x)
|
||||
return T.round(x, mode='half_to_even')
|
||||
|
||||
|
||||
def sign(x):
|
||||
@@ -387,8 +401,10 @@ def pow(x, a):
|
||||
|
||||
|
||||
def clip(x, min_value, max_value):
|
||||
if max_value < min_value:
|
||||
if max_value is not None and max_value < min_value:
|
||||
max_value = min_value
|
||||
if max_value is None:
|
||||
max_value = np.inf
|
||||
return T.clip(x, min_value, max_value)
|
||||
|
||||
|
||||
@@ -434,8 +450,8 @@ def cos(x):
|
||||
|
||||
def normalize_batch_in_training(x, gamma, beta,
|
||||
reduction_axes, epsilon=1e-3):
|
||||
'''Computes mean and std for batch then apply batch_normalization on batch.
|
||||
'''
|
||||
"""Computes mean and std for batch then apply batch_normalization on batch.
|
||||
"""
|
||||
# TODO remove this if statement when Theano without
|
||||
# T.nnet.bn.batch_normalization_train is deprecated
|
||||
if not hasattr(T.nnet.bn, 'batch_normalization_train'):
|
||||
@@ -448,8 +464,8 @@ def normalize_batch_in_training(x, gamma, beta,
|
||||
|
||||
|
||||
def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
|
||||
'''Apply batch normalization on x given mean, var, beta and gamma.
|
||||
'''
|
||||
"""Apply batch normalization on x given mean, var, beta and gamma.
|
||||
"""
|
||||
# TODO remove this if statement when Theano without
|
||||
# T.nnet.bn.batch_normalization_test is deprecated
|
||||
if not hasattr(T.nnet.bn, 'batch_normalization_test'):
|
||||
@@ -469,8 +485,8 @@ def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
|
||||
# T.nnet.bn.batch_normalization_train is deprecated
|
||||
def _old_normalize_batch_in_training(x, gamma, beta,
|
||||
reduction_axes, epsilon=1e-3):
|
||||
'''Computes mean and std for batch then apply batch_normalization on batch.
|
||||
'''
|
||||
"""Computes mean and std for batch then apply batch_normalization on batch.
|
||||
"""
|
||||
dev = theano.config.device
|
||||
use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu'))
|
||||
if use_cudnn:
|
||||
@@ -479,6 +495,9 @@ def _old_normalize_batch_in_training(x, gamma, beta,
|
||||
try:
|
||||
normed, mean, stdinv = theano.sandbox.cuda.dnn.dnn_batch_normalization_train(
|
||||
x, broadcast_gamma, broadcast_beta, 'spatial', epsilon)
|
||||
normed = theano.tensor.as_tensor_variable(normed)
|
||||
mean = theano.tensor.as_tensor_variable(mean)
|
||||
stdinv = theano.tensor.as_tensor_variable(stdinv)
|
||||
var = T.inv(stdinv ** 2)
|
||||
return normed, T.flatten(mean), T.flatten(var)
|
||||
except AttributeError:
|
||||
@@ -508,8 +527,8 @@ def _old_normalize_batch_in_training(x, gamma, beta,
|
||||
# TODO remove this if statement when Theano without
|
||||
# T.nnet.bn.batch_normalization_test is deprecated
|
||||
def _old_batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
|
||||
'''Apply batch normalization on x given mean, var, beta and gamma.
|
||||
'''
|
||||
"""Apply batch normalization on x given mean, var, beta and gamma.
|
||||
"""
|
||||
if mean.ndim == 1 and x.ndim > 1:
|
||||
# in TensorFlow's batch_normalization, if the parameters are vectors
|
||||
# the batch normalization should be applied along the rightmost axis.
|
||||
@@ -530,7 +549,7 @@ def _old_batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
|
||||
shuffle_pattern = list(range(ndim))
|
||||
shuffle_pattern[1] = shuffle_pattern[axis]
|
||||
shuffle_pattern[axis] = 1
|
||||
return theano.sandbox.cuda.dnn.dnn_batch_normalization_test(
|
||||
result = theano.sandbox.cuda.dnn.dnn_batch_normalization_test(
|
||||
x.dimshuffle(shuffle_pattern),
|
||||
gamma.dimshuffle(shuffle_pattern),
|
||||
beta.dimshuffle(shuffle_pattern),
|
||||
@@ -538,8 +557,9 @@ def _old_batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
|
||||
var.dimshuffle(shuffle_pattern),
|
||||
'spatial', epsilon).dimshuffle(shuffle_pattern)
|
||||
else:
|
||||
return theano.sandbox.cuda.dnn.dnn_batch_normalization_test(
|
||||
result = theano.sandbox.cuda.dnn.dnn_batch_normalization_test(
|
||||
x, gamma, beta, mean, var, 'spatial', epsilon)
|
||||
return theano.tensor.as_tensor_variable(result)
|
||||
except AttributeError:
|
||||
pass
|
||||
except ValueError:
|
||||
@@ -564,35 +584,45 @@ def concatenate(tensors, axis=-1):
|
||||
|
||||
|
||||
def reshape(x, shape):
|
||||
return T.reshape(x, shape)
|
||||
y = T.reshape(x, shape)
|
||||
if is_explicit_shape(shape):
|
||||
y._keras_shape = shape
|
||||
if hasattr(x, '_uses_learning_phase'):
|
||||
y._uses_learning_phase = x._uses_learning_phase
|
||||
else:
|
||||
y._uses_learning_phase = False
|
||||
return y
|
||||
|
||||
|
||||
def permute_dimensions(x, pattern):
|
||||
'''Transpose dimensions.
|
||||
"""Transpose dimensions.
|
||||
|
||||
pattern should be a tuple or list of
|
||||
dimension indices, e.g. [0, 2, 1].
|
||||
'''
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
pattern = tuple(pattern)
|
||||
return x.dimshuffle(pattern)
|
||||
|
||||
|
||||
def repeat_elements(x, rep, axis):
|
||||
'''Repeat the elements of a tensor along an axis, like np.repeat.
|
||||
"""Repeat the elements of a tensor along an axis, like np.repeat.
|
||||
|
||||
If x has shape (s1, s2, s3) and axis=1, the output
|
||||
will have shape (s1, s2 * rep, s3).
|
||||
'''
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
return T.repeat(x, rep, axis=axis)
|
||||
|
||||
|
||||
def resize_images(X, height_factor, width_factor, dim_ordering):
|
||||
'''Resize the images contained in a 4D tensor of shape
|
||||
"""Resize the images contained in a 4D tensor of shape
|
||||
- [batch, channels, height, width] (for 'th' dim_ordering)
|
||||
- [batch, height, width, channels] (for 'tf' dim_ordering)
|
||||
by a factor of (height_factor, width_factor). Both factors should be
|
||||
positive integers.
|
||||
'''
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
if dim_ordering == 'th':
|
||||
output = repeat_elements(X, height_factor, axis=2)
|
||||
output = repeat_elements(output, width_factor, axis=3)
|
||||
@@ -606,12 +636,13 @@ def resize_images(X, height_factor, width_factor, dim_ordering):
|
||||
|
||||
|
||||
def resize_volumes(X, depth_factor, height_factor, width_factor, dim_ordering):
|
||||
'''Resize the volume contained in a 5D tensor of shape
|
||||
"""Resize the volume contained in a 5D tensor of shape
|
||||
- [batch, channels, depth, height, width] (for 'th' dim_ordering)
|
||||
- [batch, depth, height, width, channels] (for 'tf' dim_ordering)
|
||||
by a factor of (depth_factor, height_factor, width_factor).
|
||||
Both factors should be positive integers.
|
||||
'''
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
if dim_ordering == 'th':
|
||||
output = repeat_elements(X, depth_factor, axis=2)
|
||||
output = repeat_elements(output, height_factor, axis=3)
|
||||
@@ -627,18 +658,19 @@ def resize_volumes(X, depth_factor, height_factor, width_factor, dim_ordering):
|
||||
|
||||
|
||||
def repeat(x, n):
|
||||
'''Repeat a 2D tensor.
|
||||
"""Repeat a 2D tensor.
|
||||
|
||||
If x has shape (samples, dim) and n=2,
|
||||
the output will have shape (samples, 2, dim).
|
||||
'''
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
assert x.ndim == 2
|
||||
x = x.dimshuffle((0, 'x', 1))
|
||||
return T.extra_ops.repeat(x, n, axis=1)
|
||||
|
||||
|
||||
def arange(start, stop=None, step=1, dtype='int32'):
|
||||
'''Creates a 1-D tensor containing a sequence of integers.
|
||||
"""Creates a 1-D tensor containing a sequence of integers.
|
||||
|
||||
The function arguments use the same convention as
|
||||
Theano's arange: if only one argument is provided,
|
||||
@@ -646,29 +678,33 @@ def arange(start, stop=None, step=1, dtype='int32'):
|
||||
|
||||
The default type of the returned tensor is 'int32' to
|
||||
match TensorFlow's default.
|
||||
'''
|
||||
"""
|
||||
return T.arange(start, stop=stop, step=step, dtype=dtype)
|
||||
|
||||
|
||||
def tile(x, n):
|
||||
# TODO: `keras_shape` inference.
|
||||
return T.tile(x, n)
|
||||
|
||||
|
||||
def flatten(x):
|
||||
# TODO: `keras_shape` inference.
|
||||
return T.flatten(x)
|
||||
|
||||
|
||||
def batch_flatten(x):
|
||||
'''Turn a n-D tensor into a 2D tensor where
|
||||
"""Turn a n-D tensor into a 2D tensor where
|
||||
the first dimension is conserved.
|
||||
'''
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
x = T.reshape(x, (x.shape[0], T.prod(x.shape) // x.shape[0]))
|
||||
return x
|
||||
|
||||
|
||||
def expand_dims(x, dim=-1):
|
||||
'''Add a 1-sized dimension at index "dim".
|
||||
'''
|
||||
"""Add a 1-sized dimension at index "dim".
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
pattern = [i for i in range(x.type.ndim)]
|
||||
if dim < 0:
|
||||
if x.type.ndim == 0:
|
||||
@@ -680,20 +716,22 @@ def expand_dims(x, dim=-1):
|
||||
|
||||
|
||||
def squeeze(x, axis):
|
||||
'''Remove a 1-dimension from the tensor at index "axis".
|
||||
'''
|
||||
"""Remove a 1-dimension from the tensor at index "axis".
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
shape = list(x.shape)
|
||||
shape.pop(axis)
|
||||
return T.reshape(x, tuple(shape))
|
||||
|
||||
|
||||
def temporal_padding(x, padding=1):
|
||||
'''Pad the middle dimension of a 3D tensor
|
||||
"""Pad the middle dimension of a 3D tensor
|
||||
with "padding" zeros left and right.
|
||||
|
||||
Apologies for the inane API, but Theano makes this
|
||||
really hard.
|
||||
'''
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
input_shape = x.shape
|
||||
output_shape = (input_shape[0],
|
||||
input_shape[1] + 2 * padding,
|
||||
@@ -703,12 +741,13 @@ def temporal_padding(x, padding=1):
|
||||
|
||||
|
||||
def asymmetric_temporal_padding(x, left_pad=1, right_pad=1):
|
||||
'''Pad the middle dimension of a 3D tensor
|
||||
"""Pad the middle dimension of a 3D tensor
|
||||
with "left_pad" zeros left and "right_pad" right.
|
||||
|
||||
Apologies for the inane API, but Theano makes this
|
||||
really hard.
|
||||
'''
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
input_shape = x.shape
|
||||
output_shape = (input_shape[0],
|
||||
input_shape[1] + left_pad + right_pad,
|
||||
@@ -718,9 +757,10 @@ def asymmetric_temporal_padding(x, left_pad=1, right_pad=1):
|
||||
|
||||
|
||||
def spatial_2d_padding(x, padding=(1, 1), dim_ordering='default'):
|
||||
'''Pad the 2nd and 3rd dimensions of a 4D tensor
|
||||
"""Pad the 2nd and 3rd dimensions of a 4D tensor
|
||||
with "padding[0]" and "padding[1]" (resp.) zeros left and right.
|
||||
'''
|
||||
"""
|
||||
# TODO: `keras_shape` inference.
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
@@ -756,10 +796,10 @@ def spatial_2d_padding(x, padding=(1, 1), dim_ordering='default'):
|
||||
def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1,
|
||||
left_pad=1, right_pad=1,
|
||||
dim_ordering='default'):
|
||||
'''Pad the rows and columns of a 4D tensor
|
||||
"""Pad the rows and columns of a 4D tensor
|
||||
with "top_pad", "bottom_pad", "left_pad", "right_pad" (resp.) zeros
|
||||
rows on top, bottom; cols on left, right.
|
||||
'''
|
||||
"""
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
@@ -794,9 +834,9 @@ def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1,
|
||||
|
||||
|
||||
def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering='default'):
|
||||
'''Pad the 2nd, 3rd and 4th dimensions of a 5D tensor
|
||||
"""Pad the 2nd, 3rd and 4th dimensions of a 5D tensor
|
||||
with "padding[0]", "padding[1]" and "padding[2]" (resp.) zeros left and right.
|
||||
'''
|
||||
"""
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
@@ -838,10 +878,10 @@ def stack(x):
|
||||
|
||||
|
||||
def one_hot(indices, nb_classes):
|
||||
'''Input: nD integer tensor of shape (batch_size, dim1, dim2, ... dim(n-1))
|
||||
"""Input: nD integer tensor of shape (batch_size, dim1, dim2, ... dim(n-1))
|
||||
Output: (n + 1)D one hot representation of the input
|
||||
with shape (batch_size, dim1, dim2, ... dim(n-1), nb_classes)
|
||||
'''
|
||||
"""
|
||||
input_shape = tuple((indices.shape[i] for i in range(indices.ndim)))
|
||||
indices = T.flatten(indices)
|
||||
oh = T.extra_ops.to_one_hot(indices, nb_classes)
|
||||
@@ -850,8 +890,8 @@ def one_hot(indices, nb_classes):
|
||||
|
||||
|
||||
def reverse(x, axes):
|
||||
'''Reverse a tensor along the the specified axes
|
||||
'''
|
||||
"""Reverse a tensor along the the specified axes
|
||||
"""
|
||||
if isinstance(axes, int):
|
||||
axes = [axes]
|
||||
slices = [slice(None, None, -1) if i in axes else slice(None, None, None) for i in range(x.ndim)]
|
||||
@@ -872,9 +912,9 @@ def get_value(x):
|
||||
|
||||
|
||||
def batch_get_value(xs):
|
||||
'''Returns the value of more than one tensor variable,
|
||||
"""Returns the value of more than one tensor variable,
|
||||
as a list of Numpy arrays.
|
||||
'''
|
||||
"""
|
||||
return [get_value(x) for x in xs]
|
||||
|
||||
|
||||
@@ -892,9 +932,9 @@ def get_variable_shape(x):
|
||||
|
||||
|
||||
def print_tensor(x, message=''):
|
||||
'''Print the message and the tensor when evaluated and return the same
|
||||
"""Print the message and the tensor when evaluated and return the same
|
||||
tensor.
|
||||
'''
|
||||
"""
|
||||
p_op = Print(message)
|
||||
return p_op(x)
|
||||
|
||||
@@ -934,9 +974,9 @@ def gradients(loss, variables):
|
||||
|
||||
|
||||
def stop_gradient(variables):
|
||||
'''Returns `variables` but with zero gradient with respect to every other
|
||||
"""Returns `variables` but with zero gradient with respect to every other
|
||||
variables.
|
||||
'''
|
||||
"""
|
||||
return theano.gradient.disconnected_grad(variables)
|
||||
|
||||
|
||||
@@ -945,7 +985,7 @@ def stop_gradient(variables):
|
||||
def rnn(step_function, inputs, initial_states,
|
||||
go_backwards=False, mask=None, constants=None,
|
||||
unroll=False, input_length=None):
|
||||
'''Iterates over the time dimension of a tensor.
|
||||
"""Iterates over the time dimension of a tensor.
|
||||
|
||||
# Arguments
|
||||
inputs: tensor of temporal data of shape (samples, time, ...)
|
||||
@@ -968,7 +1008,7 @@ def rnn(step_function, inputs, initial_states,
|
||||
mask: binary tensor with shape (samples, time),
|
||||
with a zero for every element that is masked.
|
||||
constants: a list of constant values passed at each step.
|
||||
unroll: whether to unroll the RNN or to use a symbolic loop (`scan`).
|
||||
unroll: whether to unroll the RNN or to use a symbolic loop (`while_loop` or `scan` depending on backend).
|
||||
input_length: must be specified if using `unroll`.
|
||||
|
||||
# Returns
|
||||
@@ -979,7 +1019,7 @@ def rnn(step_function, inputs, initial_states,
|
||||
at time t for sample s.
|
||||
new_states: list of tensors, latest states returned by
|
||||
the step function, of shape (samples, ...).
|
||||
'''
|
||||
"""
|
||||
ndim = inputs.ndim
|
||||
assert ndim >= 3, 'Input should be at least 3D.'
|
||||
|
||||
@@ -996,7 +1036,7 @@ def rnn(step_function, inputs, initial_states,
|
||||
constants = []
|
||||
|
||||
if mask is not None:
|
||||
if mask.ndim == ndim-1:
|
||||
if mask.ndim == ndim - 1:
|
||||
mask = expand_dims(mask)
|
||||
assert mask.ndim == ndim
|
||||
mask = mask.dimshuffle(axes)
|
||||
@@ -1035,6 +1075,8 @@ def rnn(step_function, inputs, initial_states,
|
||||
initial_output = step_function(inputs[0], initial_states + constants)[0] * 0
|
||||
# Theano gets confused by broadcasting patterns in the scan op
|
||||
initial_output = T.unbroadcast(initial_output, 0, 1)
|
||||
if len(initial_states) > 0:
|
||||
initial_states[0] = T.unbroadcast(initial_states[0], 0, 1)
|
||||
|
||||
def _step(input, mask, output_tm1, *states):
|
||||
output, new_states = step_function(input, states)
|
||||
@@ -1082,6 +1124,10 @@ def rnn(step_function, inputs, initial_states,
|
||||
output, new_states = step_function(input, states)
|
||||
return [output] + new_states
|
||||
|
||||
# Theano likes to make shape==1 dimensions in the initial states (outputs_info) broadcastable
|
||||
if len(initial_states) > 0:
|
||||
initial_states[0] = T.unbroadcast(initial_states[0], 1)
|
||||
|
||||
results, _ = theano.scan(
|
||||
_step,
|
||||
sequences=inputs,
|
||||
@@ -1107,12 +1153,20 @@ def rnn(step_function, inputs, initial_states,
|
||||
|
||||
|
||||
def switch(condition, then_expression, else_expression):
|
||||
'''condition: scalar tensor.
|
||||
'''
|
||||
"""condition: scalar tensor.
|
||||
"""
|
||||
if callable(then_expression):
|
||||
then_expression = then_expression()
|
||||
if callable(else_expression):
|
||||
else_expression = else_expression()
|
||||
return T.switch(condition, then_expression, else_expression)
|
||||
|
||||
|
||||
def in_train_phase(x, alt):
|
||||
if callable(x):
|
||||
x = x()
|
||||
if callable(alt):
|
||||
alt = alt()
|
||||
if _LEARNING_PHASE is 1:
|
||||
return x
|
||||
elif _LEARNING_PHASE is 0:
|
||||
@@ -1123,6 +1177,10 @@ def in_train_phase(x, alt):
|
||||
|
||||
|
||||
def in_test_phase(x, alt):
|
||||
if callable(x):
|
||||
x = x()
|
||||
if callable(alt):
|
||||
alt = alt()
|
||||
if _LEARNING_PHASE is 1:
|
||||
return alt
|
||||
elif _LEARNING_PHASE is 0:
|
||||
@@ -1214,7 +1272,7 @@ def tanh(x):
|
||||
|
||||
|
||||
def dropout(x, level, noise_shape=None, seed=None):
|
||||
'''Sets entries in `x` to zero at random,
|
||||
"""Sets entries in `x` to zero at random,
|
||||
while scaling the entire tensor.
|
||||
|
||||
# Arguments
|
||||
@@ -1224,11 +1282,13 @@ def dropout(x, level, noise_shape=None, seed=None):
|
||||
noise_shape: shape for randomly generated keep/drop flags,
|
||||
must be broadcastable to the shape of `x`
|
||||
seed: random seed to ensure determinism.
|
||||
'''
|
||||
"""
|
||||
if level < 0. or level >= 1:
|
||||
raise ValueError('Dropout level must be in interval [0, 1[.')
|
||||
if seed is None:
|
||||
seed = np.random.randint(1, 10e6)
|
||||
if isinstance(noise_shape, list):
|
||||
noise_shape = tuple(noise_shape)
|
||||
|
||||
rng = RandomStreams(seed=seed)
|
||||
retain_prob = 1. - level
|
||||
@@ -1237,8 +1297,8 @@ def dropout(x, level, noise_shape=None, seed=None):
|
||||
random_tensor = rng.binomial(x.shape, p=retain_prob, dtype=x.dtype)
|
||||
else:
|
||||
random_tensor = rng.binomial(noise_shape, p=retain_prob, dtype=x.dtype)
|
||||
random_tensor = T.patternbroadcast(random_tensor, [dim == 1 for dim in noise_shape])
|
||||
|
||||
random_tensor = T.patternbroadcast(random_tensor,
|
||||
[dim == 1 for dim in noise_shape])
|
||||
x *= random_tensor
|
||||
x /= retain_prob
|
||||
return x
|
||||
@@ -1250,7 +1310,7 @@ def l2_normalize(x, axis):
|
||||
|
||||
|
||||
def in_top_k(predictions, targets, k):
|
||||
'''Returns whether the `targets` are in the top `k` `predictions`
|
||||
"""Returns whether the `targets` are in the top `k` `predictions`
|
||||
|
||||
# Arguments
|
||||
predictions: A tensor of shape batch_size x classess and type float32.
|
||||
@@ -1260,7 +1320,7 @@ def in_top_k(predictions, targets, k):
|
||||
# Returns
|
||||
A tensor of shape batch_size and type int. output_i is 1 if
|
||||
targets_i is within top-k values of predictions_i
|
||||
'''
|
||||
"""
|
||||
predictions_top_k = T.argsort(predictions)[:, -k:]
|
||||
result, _ = theano.map(lambda prediction, target: any(equal(prediction, target)), sequences=[predictions_top_k, targets])
|
||||
return result
|
||||
@@ -1384,24 +1444,24 @@ def _preprocess_conv3d_filter_shape(dim_ordering, filter_shape):
|
||||
return filter_shape
|
||||
|
||||
|
||||
def _postprocess_conv2d_output(conv_out, x, border_mode, np_kernel, strides, dim_ordering):
|
||||
def _postprocess_conv2d_output(conv_out, x, border_mode, kernel_shape, strides, dim_ordering):
|
||||
if border_mode == 'same':
|
||||
if np_kernel.shape[2] % 2 == 0:
|
||||
if kernel_shape[2] % 2 == 0:
|
||||
conv_out = conv_out[:, :, :(x.shape[2] + strides[0] - 1) // strides[0], :]
|
||||
if np_kernel.shape[3] % 2 == 0:
|
||||
if kernel_shape[3] % 2 == 0:
|
||||
conv_out = conv_out[:, :, :, :(x.shape[3] + strides[1] - 1) // strides[1]]
|
||||
if dim_ordering == 'tf':
|
||||
conv_out = conv_out.dimshuffle((0, 2, 3, 1))
|
||||
return conv_out
|
||||
|
||||
|
||||
def _postprocess_conv3d_output(conv_out, x, border_mode, np_kernel, strides, dim_ordering):
|
||||
def _postprocess_conv3d_output(conv_out, x, border_mode, kernel_shape, strides, dim_ordering):
|
||||
if border_mode == 'same':
|
||||
if np_kernel.shape[2] % 2 == 0:
|
||||
if kernel_shape[2] % 2 == 0:
|
||||
conv_out = conv_out[:, :, :(x.shape[2] + strides[0] - 1) // strides[0], :, :]
|
||||
if np_kernel.shape[3] % 2 == 0:
|
||||
if kernel_shape[3] % 2 == 0:
|
||||
conv_out = conv_out[:, :, :, :(x.shape[3] + strides[1] - 1) // strides[1], :]
|
||||
if np_kernel.shape[4] % 2 == 0:
|
||||
if kernel_shape[4] % 2 == 0:
|
||||
conv_out = conv_out[:, :, :, :, :(x.shape[4] + strides[2] - 1) // strides[2]]
|
||||
if dim_ordering == 'tf':
|
||||
conv_out = conv_out.dimshuffle((0, 2, 3, 4, 1))
|
||||
@@ -1410,20 +1470,20 @@ def _postprocess_conv3d_output(conv_out, x, border_mode, np_kernel, strides, dim
|
||||
|
||||
def conv1d(x, kernel, stride=1, border_mode='valid',
|
||||
image_shape=None, filter_shape=None):
|
||||
'''1D convolution.
|
||||
"""1D convolution.
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
strides: stride integer.
|
||||
border_mode: string, "same" or "valid".
|
||||
'''
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
dim_ordering='default', image_shape=None,
|
||||
filter_shape=None, filter_dilation=(1, 1)):
|
||||
'''2D convolution.
|
||||
"""2D convolution.
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
@@ -1432,7 +1492,7 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
dim_ordering: "tf" or "th".
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
"""
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
@@ -1441,7 +1501,13 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
th_border_mode = _preprocess_border_mode(border_mode)
|
||||
np_kernel = kernel.eval()
|
||||
|
||||
if hasattr(kernel, '_keras_shape'):
|
||||
kernel_shape = kernel._keras_shape
|
||||
else:
|
||||
# Will only work if `kernel` is a shared variable.
|
||||
kernel_shape = kernel.eval().shape
|
||||
|
||||
image_shape = _preprocess_conv2d_image_shape(dim_ordering, image_shape)
|
||||
filter_shape = _preprocess_conv2d_filter_shape(dim_ordering, filter_shape)
|
||||
|
||||
@@ -1453,6 +1519,11 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
input_shape=image_shape,
|
||||
filter_shape=filter_shape)
|
||||
else:
|
||||
# T.nnet.conv2d uses **kwargs, so the filter_dilation parameter will be
|
||||
# ignored by versions that do not support it
|
||||
if 'filter_dilation' not in inspect.getargspec(T.nnet.conv2d).args:
|
||||
raise ValueError('conv2d with filter dilation requires Theano '
|
||||
'0.9.0dev2 or newer.')
|
||||
conv_out = T.nnet.conv2d(x, kernel,
|
||||
border_mode=th_border_mode,
|
||||
subsample=strides,
|
||||
@@ -1460,8 +1531,8 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
filter_shape=filter_shape,
|
||||
filter_dilation=filter_dilation)
|
||||
|
||||
conv_out = _postprocess_conv2d_output(conv_out, x, border_mode, np_kernel,
|
||||
strides, dim_ordering)
|
||||
conv_out = _postprocess_conv2d_output(conv_out, x, border_mode,
|
||||
kernel_shape, strides, dim_ordering)
|
||||
return conv_out
|
||||
|
||||
|
||||
@@ -1469,7 +1540,7 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
border_mode='valid',
|
||||
dim_ordering='default',
|
||||
image_shape=None, filter_shape=None):
|
||||
'''2D deconvolution (transposed convolution).
|
||||
"""2D deconvolution (transposed convolution).
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
@@ -1479,18 +1550,27 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
dim_ordering: "tf" or "th".
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
"""
|
||||
flip_filters = False
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise ValueError('Unknown dim_ordering ' + dim_ordering)
|
||||
|
||||
if dim_ordering == 'tf':
|
||||
output_shape = (output_shape[0], output_shape[3], output_shape[1], output_shape[2])
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
kernel = kernel.dimshuffle((1, 0, 2, 3))
|
||||
th_border_mode = _preprocess_border_mode(border_mode)
|
||||
np_kernel = kernel.eval()
|
||||
|
||||
if hasattr(kernel, '_keras_shape'):
|
||||
kernel_shape = kernel._keras_shape
|
||||
else:
|
||||
# Will only work if `kernel` is a shared variable.
|
||||
kernel_shape = kernel.eval().shape
|
||||
|
||||
filter_shape = _preprocess_conv2d_filter_shape(dim_ordering, filter_shape)
|
||||
filter_shape = tuple(filter_shape[i] for i in (1, 0, 2, 3))
|
||||
|
||||
@@ -1501,8 +1581,8 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
filter_flip=not flip_filters)
|
||||
conv_out = op(kernel, x, output_shape[2:])
|
||||
|
||||
conv_out = _postprocess_conv2d_output(conv_out, x, border_mode, np_kernel,
|
||||
strides, dim_ordering)
|
||||
conv_out = _postprocess_conv2d_output(conv_out, x, border_mode,
|
||||
kernel_shape, strides, dim_ordering)
|
||||
return conv_out
|
||||
|
||||
|
||||
@@ -1522,7 +1602,7 @@ def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
border_mode='valid', dim_ordering='default',
|
||||
volume_shape=None, filter_shape=None,
|
||||
filter_dilation=(1, 1, 1)):
|
||||
'''3D convolution.
|
||||
"""3D convolution.
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
@@ -1531,7 +1611,7 @@ def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
dim_ordering: "tf" or "th".
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
"""
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
@@ -1549,7 +1629,13 @@ def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
x = _preprocess_conv3d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv3d_kernel(kernel, dim_ordering)
|
||||
th_border_mode = _preprocess_border_mode(border_mode)
|
||||
np_kernel = kernel.eval()
|
||||
|
||||
if hasattr(kernel, '_keras_shape'):
|
||||
kernel_shape = kernel._keras_shape
|
||||
else:
|
||||
# Will only work if `kernel` is a shared variable.
|
||||
kernel_shape = kernel.eval().shape
|
||||
|
||||
volume_shape = _preprocess_conv3d_volume_shape(dim_ordering, volume_shape)
|
||||
filter_shape = _preprocess_conv3d_filter_shape(dim_ordering, filter_shape)
|
||||
|
||||
@@ -1560,8 +1646,8 @@ def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
filter_shape=filter_shape,
|
||||
filter_dilation=filter_dilation)
|
||||
|
||||
conv_out = _postprocess_conv3d_output(conv_out, x, border_mode, np_kernel,
|
||||
strides, dim_ordering)
|
||||
conv_out = _postprocess_conv3d_output(conv_out, x, border_mode,
|
||||
kernel_shape, strides, dim_ordering)
|
||||
return conv_out
|
||||
|
||||
|
||||
@@ -1569,10 +1655,10 @@ def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
def _old_theano_conv3d(x, kernel, strides=(1, 1, 1),
|
||||
border_mode='valid', dim_ordering='default',
|
||||
volume_shape=None, filter_shape=None):
|
||||
'''
|
||||
"""
|
||||
Run on cuDNN if available.
|
||||
border_mode: string, "same" or "valid".
|
||||
'''
|
||||
"""
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
@@ -1871,6 +1957,7 @@ def random_binomial(shape, p=0.0, dtype=None, seed=None):
|
||||
# Note that tensorflow's native CTC code is significantly
|
||||
# faster than this
|
||||
|
||||
|
||||
def ctc_interleave_blanks(Y):
|
||||
Y_ = T.alloc(-1, Y.shape[0] * 2 + 1)
|
||||
Y_ = T.set_subtensor(Y_[T.arange(Y.shape[0]) * 2 + 1], Y)
|
||||
@@ -1941,7 +2028,7 @@ def ctc_cost(predict, Y):
|
||||
|
||||
# batchifies original CTC code
|
||||
def ctc_batch_cost(y_true, y_pred, input_length, label_length):
|
||||
'''Runs CTC loss algorithm on each batch element.
|
||||
"""Runs CTC loss algorithm on each batch element.
|
||||
|
||||
# Arguments
|
||||
y_true: tensor (samples, max_string_length) containing the truth labels
|
||||
@@ -1955,7 +2042,7 @@ def ctc_batch_cost(y_true, y_pred, input_length, label_length):
|
||||
# Returns
|
||||
Tensor with shape (samples,1) containing the
|
||||
CTC loss of each element
|
||||
'''
|
||||
"""
|
||||
|
||||
def ctc_step(y_true_step, y_pred_step, input_length_step, label_length_step):
|
||||
y_pred_step = y_pred_step[0: input_length_step[0]]
|
||||
@@ -1975,7 +2062,7 @@ def ctc_batch_cost(y_true, y_pred, input_length, label_length):
|
||||
# HIGH ORDER FUNCTIONS
|
||||
|
||||
def map_fn(fn, elems, name=None):
|
||||
'''Map the function fn over the elements elems and return the outputs.
|
||||
"""Map the function fn over the elements elems and return the outputs.
|
||||
|
||||
# Arguments
|
||||
fn: Callable that will be called upon each element in elems
|
||||
@@ -1985,12 +2072,12 @@ def map_fn(fn, elems, name=None):
|
||||
# Returns
|
||||
Tensor with first dimension equal to the elems and second depending on
|
||||
fn
|
||||
'''
|
||||
"""
|
||||
return theano.map(fn, elems, name=name)[0]
|
||||
|
||||
|
||||
def foldl(fn, elems, initializer=None, name=None):
|
||||
'''Reduce elems using fn to combine them from left to right.
|
||||
"""Reduce elems using fn to combine them from left to right.
|
||||
|
||||
# Arguments
|
||||
fn: Callable that will be called upon each element in elems and an
|
||||
@@ -2001,7 +2088,7 @@ def foldl(fn, elems, initializer=None, name=None):
|
||||
|
||||
# Returns
|
||||
Same type and shape as initializer
|
||||
'''
|
||||
"""
|
||||
if initializer is None:
|
||||
initializer = elems[0]
|
||||
elems = elems[1:]
|
||||
@@ -2014,7 +2101,7 @@ def foldl(fn, elems, initializer=None, name=None):
|
||||
|
||||
|
||||
def foldr(fn, elems, initializer=None, name=None):
|
||||
'''Reduce elems using fn to combine them from right to left.
|
||||
"""Reduce elems using fn to combine them from right to left.
|
||||
|
||||
# Arguments
|
||||
fn: Callable that will be called upon each element in elems and an
|
||||
@@ -2025,7 +2112,7 @@ def foldr(fn, elems, initializer=None, name=None):
|
||||
|
||||
# Returns
|
||||
Same type and shape as initializer
|
||||
'''
|
||||
"""
|
||||
if initializer is None:
|
||||
initializer = elems[-1]
|
||||
elems = elems[:-1]
|
||||
|
||||
+274
-139
@@ -9,53 +9,101 @@ import time
|
||||
import json
|
||||
import warnings
|
||||
|
||||
from collections import deque, OrderedDict, Iterable
|
||||
from collections import deque
|
||||
from collections import OrderedDict
|
||||
from collections import Iterable
|
||||
from .utils.generic_utils import Progbar
|
||||
from keras import backend as K
|
||||
from pkg_resources import parse_version
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
requests = None
|
||||
|
||||
if K.backend() == 'tensorflow':
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
class CallbackList(object):
|
||||
def __init__(self, callbacks=[], queue_length=10):
|
||||
"""Container abstracting a list of callbacks.
|
||||
|
||||
# Arguments
|
||||
callbacks: List of `Callback` instances.
|
||||
queue_length: Queue length for keeping
|
||||
running statistics over callback execution time.
|
||||
"""
|
||||
|
||||
def __init__(self, callbacks=None, queue_length=10):
|
||||
callbacks = callbacks or []
|
||||
self.callbacks = [c for c in callbacks]
|
||||
self.queue_length = queue_length
|
||||
|
||||
def append(self, callback):
|
||||
self.callbacks.append(callback)
|
||||
|
||||
def _set_params(self, params):
|
||||
def set_params(self, params):
|
||||
for callback in self.callbacks:
|
||||
callback._set_params(params)
|
||||
callback.set_params(params)
|
||||
|
||||
def _set_model(self, model):
|
||||
def set_model(self, model):
|
||||
for callback in self.callbacks:
|
||||
callback._set_model(model)
|
||||
callback.set_model(model)
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
def on_epoch_begin(self, epoch, logs=None):
|
||||
"""Called at the start of an epoch.
|
||||
|
||||
# Arguments
|
||||
epoch: integer, index of epoch.
|
||||
logs: dictionary of logs.
|
||||
"""
|
||||
logs = logs or {}
|
||||
for callback in self.callbacks:
|
||||
callback.on_epoch_begin(epoch, logs)
|
||||
self._delta_t_batch = 0.
|
||||
self._delta_ts_batch_begin = deque([], maxlen=self.queue_length)
|
||||
self._delta_ts_batch_end = deque([], maxlen=self.queue_length)
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
"""Called at the end of an epoch.
|
||||
|
||||
# Arguments
|
||||
epoch: integer, index of epoch.
|
||||
logs: dictionary of logs.
|
||||
"""
|
||||
logs = logs or {}
|
||||
for callback in self.callbacks:
|
||||
callback.on_epoch_end(epoch, logs)
|
||||
|
||||
def on_batch_begin(self, batch, logs={}):
|
||||
def on_batch_begin(self, batch, logs=None):
|
||||
"""Called right before processing a batch.
|
||||
|
||||
# Arguments
|
||||
batch: integer, index of batch within the current epoch.
|
||||
logs: dictionary of logs.
|
||||
"""
|
||||
logs = logs or {}
|
||||
t_before_callbacks = time.time()
|
||||
for callback in self.callbacks:
|
||||
callback.on_batch_begin(batch, logs)
|
||||
self._delta_ts_batch_begin.append(time.time() - t_before_callbacks)
|
||||
delta_t_median = np.median(self._delta_ts_batch_begin)
|
||||
if self._delta_t_batch > 0. and delta_t_median > 0.95 * \
|
||||
self._delta_t_batch and delta_t_median > 0.1:
|
||||
if (self._delta_t_batch > 0. and
|
||||
delta_t_median > 0.95 * self._delta_t_batch and
|
||||
delta_t_median > 0.1):
|
||||
warnings.warn('Method on_batch_begin() is slow compared '
|
||||
'to the batch update (%f). Check your callbacks.'
|
||||
% delta_t_median)
|
||||
self._t_enter_batch = time.time()
|
||||
|
||||
def on_batch_end(self, batch, logs={}):
|
||||
def on_batch_end(self, batch, logs=None):
|
||||
"""Called at the end of a batch.
|
||||
|
||||
# Arguments
|
||||
batch: integer, index of batch within the current epoch.
|
||||
logs: dictionary of logs.
|
||||
"""
|
||||
logs = logs or {}
|
||||
if not hasattr(self, '_t_enter_batch'):
|
||||
self._t_enter_batch = time.time()
|
||||
self._delta_t_batch = time.time() - self._t_enter_batch
|
||||
@@ -64,22 +112,35 @@ class CallbackList(object):
|
||||
callback.on_batch_end(batch, logs)
|
||||
self._delta_ts_batch_end.append(time.time() - t_before_callbacks)
|
||||
delta_t_median = np.median(self._delta_ts_batch_end)
|
||||
if self._delta_t_batch > 0. and (delta_t_median > 0.95 * self._delta_t_batch and delta_t_median > 0.1):
|
||||
if (self._delta_t_batch > 0. and
|
||||
(delta_t_median > 0.95 * self._delta_t_batch and delta_t_median > 0.1)):
|
||||
warnings.warn('Method on_batch_end() is slow compared '
|
||||
'to the batch update (%f). Check your callbacks.'
|
||||
% delta_t_median)
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
def on_train_begin(self, logs=None):
|
||||
"""Called at the beginning of training.
|
||||
|
||||
# Arguments
|
||||
logs: dictionary of logs.
|
||||
"""
|
||||
logs = logs or {}
|
||||
for callback in self.callbacks:
|
||||
callback.on_train_begin(logs)
|
||||
|
||||
def on_train_end(self, logs={}):
|
||||
def on_train_end(self, logs=None):
|
||||
"""Called at the end of training.
|
||||
|
||||
# Arguments
|
||||
logs: dictionary of logs.
|
||||
"""
|
||||
logs = logs or {}
|
||||
for callback in self.callbacks:
|
||||
callback.on_train_end(logs)
|
||||
|
||||
|
||||
class Callback(object):
|
||||
'''Abstract base class used to build new callbacks.
|
||||
"""Abstract base class used to build new callbacks.
|
||||
|
||||
# Properties
|
||||
params: dict. Training parameters
|
||||
@@ -103,47 +164,48 @@ class Callback(object):
|
||||
the number of samples in the current batch.
|
||||
on_batch_end: logs include `loss`, and optionally `acc`
|
||||
(if accuracy monitoring is enabled).
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def _set_params(self, params):
|
||||
def set_params(self, params):
|
||||
self.params = params
|
||||
|
||||
def _set_model(self, model):
|
||||
def set_model(self, model):
|
||||
self.model = model
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
def on_epoch_begin(self, epoch, logs=None):
|
||||
pass
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
pass
|
||||
|
||||
def on_batch_begin(self, batch, logs={}):
|
||||
def on_batch_begin(self, batch, logs=None):
|
||||
pass
|
||||
|
||||
def on_batch_end(self, batch, logs={}):
|
||||
def on_batch_end(self, batch, logs=None):
|
||||
pass
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
def on_train_begin(self, logs=None):
|
||||
pass
|
||||
|
||||
def on_train_end(self, logs={}):
|
||||
def on_train_end(self, logs=None):
|
||||
pass
|
||||
|
||||
|
||||
class BaseLogger(Callback):
|
||||
'''Callback that accumulates epoch averages of
|
||||
the metrics being monitored.
|
||||
"""Callback that accumulates epoch averages of metrics.
|
||||
|
||||
This callback is automatically applied to
|
||||
every Keras model.
|
||||
'''
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
This callback is automatically applied to every Keras model.
|
||||
"""
|
||||
|
||||
def on_epoch_begin(self, epoch, logs=None):
|
||||
self.seen = 0
|
||||
self.totals = {}
|
||||
|
||||
def on_batch_end(self, batch, logs={}):
|
||||
def on_batch_end(self, batch, logs=None):
|
||||
logs = logs or {}
|
||||
batch_size = logs.get('size', 0)
|
||||
self.seen += batch_size
|
||||
|
||||
@@ -153,32 +215,35 @@ class BaseLogger(Callback):
|
||||
else:
|
||||
self.totals[k] = v * batch_size
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
for k in self.params['metrics']:
|
||||
if k in self.totals:
|
||||
# make value available to next callbacks
|
||||
logs[k] = self.totals[k] / self.seen
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
if logs is not None:
|
||||
for k in self.params['metrics']:
|
||||
if k in self.totals:
|
||||
# Make value available to next callbacks.
|
||||
logs[k] = self.totals[k] / self.seen
|
||||
|
||||
|
||||
class ProgbarLogger(Callback):
|
||||
'''Callback that prints metrics to stdout.
|
||||
'''
|
||||
def on_train_begin(self, logs={}):
|
||||
"""Callback that prints metrics to stdout.
|
||||
"""
|
||||
|
||||
def on_train_begin(self, logs=None):
|
||||
self.verbose = self.params['verbose']
|
||||
self.nb_epoch = self.params['nb_epoch']
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
def on_epoch_begin(self, epoch, logs=None):
|
||||
if self.verbose:
|
||||
print('Epoch %d/%d' % (epoch + 1, self.nb_epoch))
|
||||
self.progbar = Progbar(target=self.params['nb_sample'],
|
||||
verbose=self.verbose)
|
||||
self.seen = 0
|
||||
|
||||
def on_batch_begin(self, batch, logs={}):
|
||||
def on_batch_begin(self, batch, logs=None):
|
||||
if self.seen < self.params['nb_sample']:
|
||||
self.log_values = []
|
||||
|
||||
def on_batch_end(self, batch, logs={}):
|
||||
def on_batch_end(self, batch, logs=None):
|
||||
logs = logs or {}
|
||||
batch_size = logs.get('size', 0)
|
||||
self.seen += batch_size
|
||||
|
||||
@@ -186,12 +251,13 @@ class ProgbarLogger(Callback):
|
||||
if k in logs:
|
||||
self.log_values.append((k, logs[k]))
|
||||
|
||||
# skip progbar update for the last batch;
|
||||
# will be handled by on_epoch_end
|
||||
# Skip progbar update for the last batch;
|
||||
# will be handled by on_epoch_end.
|
||||
if self.verbose and self.seen < self.params['nb_sample']:
|
||||
self.progbar.update(self.seen, self.log_values)
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
for k in self.params['metrics']:
|
||||
if k in logs:
|
||||
self.log_values.append((k, logs[k]))
|
||||
@@ -200,33 +266,34 @@ class ProgbarLogger(Callback):
|
||||
|
||||
|
||||
class History(Callback):
|
||||
'''Callback that records events
|
||||
into a `History` object.
|
||||
"""Callback that records events into a `History` object.
|
||||
|
||||
This callback is automatically applied to
|
||||
every Keras model. The `History` object
|
||||
gets returned by the `fit` method of models.
|
||||
'''
|
||||
def on_train_begin(self, logs={}):
|
||||
"""
|
||||
|
||||
def on_train_begin(self, logs=None):
|
||||
self.epoch = []
|
||||
self.history = {}
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
self.epoch.append(epoch)
|
||||
for k, v in logs.items():
|
||||
self.history.setdefault(k, []).append(v)
|
||||
|
||||
|
||||
class ModelCheckpoint(Callback):
|
||||
'''Save the model after every epoch.
|
||||
"""Save the model after every epoch.
|
||||
|
||||
`filepath` can contain named formatting options,
|
||||
which will be filled the value of `epoch` and
|
||||
keys in `logs` (passed in `on_epoch_end`).
|
||||
|
||||
For example: if `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`,
|
||||
then multiple files will be save with the epoch number and
|
||||
the validation loss.
|
||||
then the model checkpoints will be saved with the epoch number and
|
||||
the validation loss in the filename.
|
||||
|
||||
# Arguments
|
||||
filepath: string, path to save the model file.
|
||||
@@ -247,8 +314,8 @@ class ModelCheckpoint(Callback):
|
||||
saved (`model.save_weights(filepath)`), else the full model
|
||||
is saved (`model.save(filepath)`).
|
||||
period: Interval (number of epochs) between checkpoints.
|
||||
"""
|
||||
|
||||
'''
|
||||
def __init__(self, filepath, monitor='val_loss', verbose=0,
|
||||
save_best_only=False, save_weights_only=False,
|
||||
mode='auto', period=1):
|
||||
@@ -274,14 +341,15 @@ class ModelCheckpoint(Callback):
|
||||
self.monitor_op = np.greater
|
||||
self.best = -np.Inf
|
||||
else:
|
||||
if 'acc' in self.monitor:
|
||||
if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
|
||||
self.monitor_op = np.greater
|
||||
self.best = -np.Inf
|
||||
else:
|
||||
self.monitor_op = np.less
|
||||
self.best = np.Inf
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
self.epochs_since_last_save += 1
|
||||
if self.epochs_since_last_save >= self.period:
|
||||
self.epochs_since_last_save = 0
|
||||
@@ -317,7 +385,7 @@ class ModelCheckpoint(Callback):
|
||||
|
||||
|
||||
class EarlyStopping(Callback):
|
||||
'''Stop training when a monitored quantity has stopped improving.
|
||||
"""Stop training when a monitored quantity has stopped improving.
|
||||
|
||||
# Arguments
|
||||
monitor: quantity to be monitored.
|
||||
@@ -335,8 +403,10 @@ class EarlyStopping(Callback):
|
||||
monitored has stopped increasing; in `auto`
|
||||
mode, the direction is automatically inferred
|
||||
from the name of the monitored quantity.
|
||||
'''
|
||||
def __init__(self, monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto'):
|
||||
"""
|
||||
|
||||
def __init__(self, monitor='val_loss',
|
||||
min_delta=0, patience=0, verbose=0, mode='auto'):
|
||||
super(EarlyStopping, self).__init__()
|
||||
|
||||
self.monitor = monitor
|
||||
@@ -357,7 +427,7 @@ class EarlyStopping(Callback):
|
||||
elif mode == 'max':
|
||||
self.monitor_op = np.greater
|
||||
else:
|
||||
if 'acc' in self.monitor:
|
||||
if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
|
||||
self.monitor_op = np.greater
|
||||
else:
|
||||
self.monitor_op = np.less
|
||||
@@ -367,11 +437,11 @@ class EarlyStopping(Callback):
|
||||
else:
|
||||
self.min_delta *= -1
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
self.wait = 0 # Allow instances to be re-used
|
||||
def on_train_begin(self, logs=None):
|
||||
self.wait = 0 # Allow instances to be re-used
|
||||
self.best = np.Inf if self.monitor_op == np.less else -np.Inf
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
current = logs.get(self.monitor)
|
||||
if current is None:
|
||||
warnings.warn('Early stopping requires %s available!' %
|
||||
@@ -386,37 +456,48 @@ class EarlyStopping(Callback):
|
||||
self.model.stop_training = True
|
||||
self.wait += 1
|
||||
|
||||
def on_train_end(self, logs={}):
|
||||
def on_train_end(self, logs=None):
|
||||
if self.stopped_epoch > 0 and self.verbose > 0:
|
||||
print('Epoch %05d: early stopping' % (self.stopped_epoch))
|
||||
|
||||
|
||||
class RemoteMonitor(Callback):
|
||||
'''Callback used to stream events to a server.
|
||||
"""Callback used to stream events to a server.
|
||||
|
||||
Requires the `requests` library.
|
||||
Events are sent to `root + '/publish/epoch/end/'` by default. Calls are
|
||||
HTTP POST, with a `data` argument which is a
|
||||
JSON-encoded dictionary of event data.
|
||||
|
||||
# Arguments
|
||||
root: root url to which the events will be sent (at the end
|
||||
of every epoch). Events are sent to
|
||||
`root + '/publish/epoch/end/'` by default. Calls are
|
||||
HTTP POST, with a `data` argument which is a
|
||||
JSON-encoded dictionary of event data.
|
||||
'''
|
||||
root: String; root url of the target server.
|
||||
path: String; path relative to `root` to which the events will be sent.
|
||||
field: String; JSON field under which the data will be stored.
|
||||
headers: Dictionary; optional custom HTTP headers.
|
||||
Defaults to:
|
||||
`{'Accept': 'application/json',
|
||||
'Content-Type': 'application/json'}`
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
root='http://localhost:9000',
|
||||
path='/publish/epoch/end/',
|
||||
field='data',
|
||||
headers={'Accept': 'application/json', 'Content-Type': 'application/json'}):
|
||||
headers=None):
|
||||
super(RemoteMonitor, self).__init__()
|
||||
if headers is None:
|
||||
headers = {'Accept': 'application/json',
|
||||
'Content-Type': 'application/json'}
|
||||
self.root = root
|
||||
self.path = path
|
||||
self.field = field
|
||||
self.headers = headers
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
import requests
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
if requests is None:
|
||||
raise ImportError('RemoteMonitor requires '
|
||||
'the `requests` library.')
|
||||
logs = logs or {}
|
||||
send = {}
|
||||
send['epoch'] = epoch
|
||||
for k, v in logs.items():
|
||||
@@ -425,37 +506,36 @@ class RemoteMonitor(Callback):
|
||||
requests.post(self.root + self.path,
|
||||
{self.field: json.dumps(send)},
|
||||
headers=self.headers)
|
||||
except:
|
||||
print('Warning: could not reach RemoteMonitor '
|
||||
'root server at ' + str(self.root))
|
||||
except requests.exceptions.RequestException:
|
||||
warnings.warn('Warning: could not reach RemoteMonitor '
|
||||
'root server at ' + str(self.root))
|
||||
|
||||
|
||||
class LearningRateScheduler(Callback):
|
||||
'''Learning rate scheduler.
|
||||
"""Learning rate scheduler.
|
||||
|
||||
# Arguments
|
||||
schedule: a function that takes an epoch index as input
|
||||
(integer, indexed from 0) and returns a new
|
||||
learning rate as output (float).
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, schedule):
|
||||
super(LearningRateScheduler, self).__init__()
|
||||
self.schedule = schedule
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
assert hasattr(self.model.optimizer, 'lr'), \
|
||||
'Optimizer must have a "lr" attribute.'
|
||||
def on_epoch_begin(self, epoch, logs=None):
|
||||
if not hasattr(self.model.optimizer, 'lr'):
|
||||
raise ValueError('Optimizer must have a "lr" attribute.')
|
||||
lr = self.schedule(epoch)
|
||||
|
||||
if not isinstance(lr, (float, np.float32, np.float64)):
|
||||
raise ValueError('The output of the "schedule" function '
|
||||
'should be float.')
|
||||
|
||||
K.set_value(self.model.optimizer.lr, lr)
|
||||
|
||||
|
||||
class TensorBoard(Callback):
|
||||
''' Tensorboard basic visualizations.
|
||||
"""Tensorboard basic visualizations.
|
||||
|
||||
This callback writes a log for TensorBoard, which allows
|
||||
you to visualize dynamic graphs of your training and test
|
||||
@@ -481,11 +561,14 @@ class TensorBoard(Callback):
|
||||
write_graph: whether to visualize the graph in Tensorboard.
|
||||
The log file can become quite large when
|
||||
write_graph is set to True.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False):
|
||||
def __init__(self, log_dir='./logs',
|
||||
histogram_freq=0,
|
||||
write_graph=True,
|
||||
write_images=False):
|
||||
super(TensorBoard, self).__init__()
|
||||
if K._BACKEND != 'tensorflow':
|
||||
if K.backend() != 'tensorflow':
|
||||
raise RuntimeError('TensorBoard callback only works '
|
||||
'with the TensorFlow backend.')
|
||||
self.log_dir = log_dir
|
||||
@@ -494,17 +577,17 @@ class TensorBoard(Callback):
|
||||
self.write_graph = write_graph
|
||||
self.write_images = write_images
|
||||
|
||||
def _set_model(self, model):
|
||||
import tensorflow as tf
|
||||
import keras.backend.tensorflow_backend as KTF
|
||||
|
||||
def set_model(self, model):
|
||||
self.model = model
|
||||
self.sess = KTF.get_session()
|
||||
self.sess = K.get_session()
|
||||
if self.histogram_freq and self.merged is None:
|
||||
for layer in self.model.layers:
|
||||
|
||||
for weight in layer.weights:
|
||||
tf.histogram_summary(weight.name, weight)
|
||||
if hasattr(tf, 'histogram_summary'):
|
||||
tf.histogram_summary(weight.name, weight)
|
||||
else:
|
||||
tf.summary.histogram(weight.name, weight)
|
||||
|
||||
if self.write_images:
|
||||
w_img = tf.squeeze(weight)
|
||||
@@ -518,17 +601,26 @@ class TensorBoard(Callback):
|
||||
|
||||
w_img = tf.expand_dims(tf.expand_dims(w_img, 0), -1)
|
||||
|
||||
tf.image_summary(weight.name, w_img)
|
||||
if hasattr(tf, 'image_summary'):
|
||||
tf.image_summary(weight.name, w_img)
|
||||
else:
|
||||
tf.summary.image(weight.name, w_img)
|
||||
|
||||
if hasattr(layer, 'output'):
|
||||
tf.histogram_summary('{}_out'.format(layer.name),
|
||||
layer.output)
|
||||
if parse_version(tf.__version__) >= parse_version('0.12.0'):
|
||||
self.merged = tf.summary.merge_all()
|
||||
else:
|
||||
if hasattr(tf, 'histogram_summary'):
|
||||
tf.histogram_summary('{}_out'.format(layer.name),
|
||||
layer.output)
|
||||
else:
|
||||
tf.summary.histogram('{}_out'.format(layer.name),
|
||||
layer.output)
|
||||
|
||||
if hasattr(tf, 'merge_all_summaries'):
|
||||
self.merged = tf.merge_all_summaries()
|
||||
else:
|
||||
self.merged = tf.summary.merge_all()
|
||||
|
||||
if self.write_graph:
|
||||
if parse_version(tf.__version__) >= parse_version('0.12.0'):
|
||||
if hasattr(tf, 'summary') and hasattr(tf.summary, 'FileWriter'):
|
||||
self.writer = tf.summary.FileWriter(self.log_dir,
|
||||
self.sess.graph)
|
||||
elif parse_version(tf.__version__) >= parse_version('0.8.0'):
|
||||
@@ -538,13 +630,13 @@ class TensorBoard(Callback):
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph_def)
|
||||
else:
|
||||
if parse_version(tf.__version__) >= parse_version('0.12.0'):
|
||||
if hasattr(tf, 'summary') and hasattr(tf.summary, 'FileWriter'):
|
||||
self.writer = tf.summary.FileWriter(self.log_dir)
|
||||
else:
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir)
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
import tensorflow as tf
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
|
||||
if self.model.validation_data and self.histogram_freq:
|
||||
if epoch % self.histogram_freq == 0:
|
||||
@@ -577,7 +669,7 @@ class TensorBoard(Callback):
|
||||
|
||||
|
||||
class ReduceLROnPlateau(Callback):
|
||||
'''Reduce learning rate when a metric has stopped improving.
|
||||
"""Reduce learning rate when a metric has stopped improving.
|
||||
|
||||
Models often benefit from reducing the learning rate by a factor
|
||||
of 2-10 once learning stagnates. This callback monitors a
|
||||
@@ -610,15 +702,16 @@ class ReduceLROnPlateau(Callback):
|
||||
cooldown: number of epochs to wait before resuming
|
||||
normal operation after lr has been reduced.
|
||||
min_lr: lower bound on the learning rate.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, monitor='val_loss', factor=0.1, patience=10,
|
||||
verbose=0, mode='auto', epsilon=1e-4, cooldown=0, min_lr=0):
|
||||
super(Callback, self).__init__()
|
||||
super(ReduceLROnPlateau, self).__init__()
|
||||
|
||||
self.monitor = monitor
|
||||
if factor >= 1.0:
|
||||
raise ValueError('ReduceLROnPlateau does not support a factor >= 1.0.')
|
||||
raise ValueError('ReduceLROnPlateau '
|
||||
'does not support a factor >= 1.0.')
|
||||
self.factor = factor
|
||||
self.min_lr = min_lr
|
||||
self.epsilon = epsilon
|
||||
@@ -630,14 +723,18 @@ class ReduceLROnPlateau(Callback):
|
||||
self.best = 0
|
||||
self.mode = mode
|
||||
self.monitor_op = None
|
||||
self.reset()
|
||||
self._reset()
|
||||
|
||||
def reset(self):
|
||||
def _reset(self):
|
||||
"""Resets wait counter and cooldown counter.
|
||||
"""
|
||||
if self.mode not in ['auto', 'min', 'max']:
|
||||
warnings.warn('Learning Rate Plateau Reducing mode %s is unknown, '
|
||||
'fallback to auto mode.' % (self.mode), RuntimeWarning)
|
||||
'fallback to auto mode.' % (self.mode),
|
||||
RuntimeWarning)
|
||||
self.mode = 'auto'
|
||||
if self.mode == 'min' or (self.mode == 'auto' and 'acc' not in self.monitor):
|
||||
if (self.mode == 'min' or
|
||||
(self.mode == 'auto' and 'acc' not in self.monitor)):
|
||||
self.monitor_op = lambda a, b: np.less(a, b - self.epsilon)
|
||||
self.best = np.Inf
|
||||
else:
|
||||
@@ -647,10 +744,11 @@ class ReduceLROnPlateau(Callback):
|
||||
self.wait = 0
|
||||
self.lr_epsilon = self.min_lr * 1e-4
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
self.reset()
|
||||
def on_train_begin(self, logs=None):
|
||||
self._reset()
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
logs['lr'] = K.get_value(self.model.optimizer.lr)
|
||||
current = logs.get(self.monitor)
|
||||
if current is None:
|
||||
@@ -682,7 +780,8 @@ class ReduceLROnPlateau(Callback):
|
||||
|
||||
|
||||
class CSVLogger(Callback):
|
||||
'''Callback that streams epoch results to a csv file.
|
||||
"""Callback that streams epoch results to a csv file.
|
||||
|
||||
Supports all values that can be represented as a string,
|
||||
including 1D iterables such as np.ndarray.
|
||||
|
||||
@@ -697,7 +796,7 @@ class CSVLogger(Callback):
|
||||
separator: string used to separate elements in the csv file.
|
||||
append: True: append if file exists (useful for continuing
|
||||
training). False: overwrite existing file,
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, filename, separator=',', append=False):
|
||||
self.sep = separator
|
||||
@@ -708,26 +807,33 @@ class CSVLogger(Callback):
|
||||
self.append_header = True
|
||||
super(CSVLogger, self).__init__()
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
def on_train_begin(self, logs=None):
|
||||
if self.append:
|
||||
if os.path.exists(self.filename):
|
||||
with open(self.filename) as f:
|
||||
self.append_header = len(f.readline()) == 0
|
||||
self.append_header = not bool(len(f.readline()))
|
||||
self.csv_file = open(self.filename, 'a')
|
||||
else:
|
||||
self.csv_file = open(self.filename, 'w')
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
|
||||
def handle_value(k):
|
||||
is_zero_dim_ndarray = isinstance(k, np.ndarray) and k.ndim == 0
|
||||
if isinstance(k, Iterable) and not is_zero_dim_ndarray:
|
||||
return '"[%s]"' % (', '.join(map(lambda x: str(x), k)))
|
||||
return '"[%s]"' % (', '.join(map(str, k)))
|
||||
else:
|
||||
return k
|
||||
|
||||
if not self.writer:
|
||||
self.keys = sorted(logs.keys())
|
||||
self.writer = csv.DictWriter(self.csv_file, fieldnames=['epoch'] + self.keys)
|
||||
|
||||
class CustomDialect(csv.excel):
|
||||
delimiter = self.sep
|
||||
|
||||
self.writer = csv.DictWriter(self.csv_file,
|
||||
fieldnames=['epoch'] + self.keys, dialect=CustomDialect)
|
||||
if self.append_header:
|
||||
self.writer.writeheader()
|
||||
|
||||
@@ -736,8 +842,9 @@ class CSVLogger(Callback):
|
||||
self.writer.writerow(row_dict)
|
||||
self.csv_file.flush()
|
||||
|
||||
def on_train_end(self, logs={}):
|
||||
def on_train_end(self, logs=None):
|
||||
self.csv_file.close()
|
||||
self.writer = None
|
||||
|
||||
|
||||
class LambdaCallback(Callback):
|
||||
@@ -746,9 +853,12 @@ class LambdaCallback(Callback):
|
||||
This callback is constructed with anonymous functions that will be called
|
||||
at the appropriate time. Note that the callbacks expects positional
|
||||
arguments, as:
|
||||
- `on_epoch_begin` and `on_epoch_end` expect two positional arguments: `epoch`, `logs`
|
||||
- `on_batch_begin` and `on_batch_end` expect two positional arguments: `batch`, `logs`
|
||||
- `on_train_begin` and `on_train_end` expect one positional argument: `logs`
|
||||
- `on_epoch_begin` and `on_epoch_end` expect two positional arguments:
|
||||
`epoch`, `logs`
|
||||
- `on_batch_begin` and `on_batch_end` expect two positional arguments:
|
||||
`batch`, `logs`
|
||||
- `on_train_begin` and `on_train_end` expect one positional argument:
|
||||
`logs`
|
||||
|
||||
# Arguments
|
||||
on_epoch_begin: called at the beginning of every epoch.
|
||||
@@ -761,20 +871,27 @@ class LambdaCallback(Callback):
|
||||
# Example
|
||||
```python
|
||||
# Print the batch number at the beginning of every batch.
|
||||
batch_print_callback = LambdaCallback(on_batch_begin=lambda batch, logs: print(batch))
|
||||
batch_print_callback = LambdaCallback(
|
||||
on_batch_begin=lambda batch,logs: print(batch))
|
||||
|
||||
# Plot the loss after every epoch.
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
plot_loss_callback = LambdaCallback(on_epoch_end=lambda epoch, logs: plt.plot(np.arange(epoch), logs['loss']))
|
||||
plot_loss_callback = LambdaCallback(
|
||||
on_epoch_end=lambda epoch, logs: plt.plot(np.arange(epoch),
|
||||
logs['loss']))
|
||||
|
||||
# Terminate some processes after having finished model training.
|
||||
processes = ...
|
||||
cleanup_callback = LambdaCallback(on_train_end=lambda logs: [p.terminate() for p in processes if p.is_alive()])
|
||||
cleanup_callback = LambdaCallback(
|
||||
on_train_end=lambda logs: [
|
||||
p.terminate() for p in processes if p.is_alive()])
|
||||
|
||||
model.fit(..., callbacks=[batch_print_callback, plot_loss_callback, cleanup_callback])
|
||||
model.fit(...,
|
||||
callbacks=[batch_print_callback,
|
||||
plot_loss_callback,
|
||||
cleanup_callback])
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@@ -785,11 +902,29 @@ class LambdaCallback(Callback):
|
||||
on_train_begin=None,
|
||||
on_train_end=None,
|
||||
**kwargs):
|
||||
super(Callback, self).__init__()
|
||||
super(LambdaCallback, self).__init__()
|
||||
self.__dict__.update(kwargs)
|
||||
self.on_epoch_begin = on_epoch_begin if on_epoch_begin else lambda epoch, logs: None
|
||||
self.on_epoch_end = on_epoch_end if on_epoch_end else lambda epoch, logs: None
|
||||
self.on_batch_begin = on_batch_begin if on_batch_begin else lambda batch, logs: None
|
||||
self.on_batch_end = on_batch_end if on_batch_end else lambda batch, logs: None
|
||||
self.on_train_begin = on_train_begin if on_train_begin else lambda logs: None
|
||||
self.on_train_end = on_train_end if on_train_end else lambda logs: None
|
||||
if on_epoch_begin is not None:
|
||||
self.on_epoch_begin = on_epoch_begin
|
||||
else:
|
||||
self.on_epoch_begin = lambda epoch, logs: None
|
||||
if on_epoch_end is not None:
|
||||
self.on_epoch_end = on_epoch_end
|
||||
else:
|
||||
self.on_epoch_end = lambda epoch, logs: None
|
||||
if on_batch_begin is not None:
|
||||
self.on_batch_begin = on_batch_begin
|
||||
else:
|
||||
self.on_batch_begin = lambda batch, logs: None
|
||||
if on_batch_end is not None:
|
||||
self.on_batch_end = on_batch_end
|
||||
else:
|
||||
self.on_batch_end = lambda batch, logs: None
|
||||
if on_train_begin is not None:
|
||||
self.on_train_begin = on_train_begin
|
||||
else:
|
||||
self.on_train_begin = lambda logs: None
|
||||
if on_train_end is not None:
|
||||
self.on_train_end = on_train_end
|
||||
else:
|
||||
self.on_train_end = lambda logs: None
|
||||
|
||||
+90
-33
@@ -1,8 +1,10 @@
|
||||
from __future__ import absolute_import
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
|
||||
class Constraint(object):
|
||||
|
||||
def __call__(self, p):
|
||||
return p
|
||||
|
||||
@@ -11,26 +13,29 @@ class Constraint(object):
|
||||
|
||||
|
||||
class MaxNorm(Constraint):
|
||||
'''Constrain the weights incident to each hidden unit to have a norm less than or equal to a desired value.
|
||||
"""MaxNorm weight constraint.
|
||||
|
||||
Constrains the weights incident to each hidden unit
|
||||
to have a norm less than or equal to a desired value.
|
||||
|
||||
# Arguments
|
||||
m: the maximum norm for the incoming weights.
|
||||
axis: integer, axis along which to calculate weight norms. For instance,
|
||||
in a `Dense` layer the weight matrix has shape (input_dim, output_dim),
|
||||
set `axis` to `0` to constrain each weight vector of length (input_dim).
|
||||
In a `MaxoutDense` layer the weight tensor has shape (nb_feature, input_dim, output_dim),
|
||||
set `axis` to `1` to constrain each weight vector of length (input_dim),
|
||||
i.e. constrain the filters incident to the `max` operation.
|
||||
In a `Convolution2D` layer with the Theano backend, the weight tensor
|
||||
has shape (nb_filter, stack_size, nb_row, nb_col), set `axis` to `[1,2,3]`
|
||||
to constrain the weights of each filter tensor of size (stack_size, nb_row, nb_col).
|
||||
In a `Convolution2D` layer with the TensorFlow backend, the weight tensor
|
||||
has shape (nb_row, nb_col, stack_size, nb_filter), set `axis` to `[0,1,2]`
|
||||
to constrain the weights of each filter tensor of size (nb_row, nb_col, stack_size).
|
||||
axis: integer, axis along which to calculate weight norms.
|
||||
For instance, in a `Dense` layer the weight matrix
|
||||
has shape `(input_dim, output_dim)`,
|
||||
set `axis` to `0` to constrain each weight vector
|
||||
of length `(input_dim,)`.
|
||||
In a `Convolution2D` layer with `dim_ordering="tf"`,
|
||||
the weight tensor has shape
|
||||
`(rows, cols, input_depth, output_depth)`,
|
||||
set `axis` to `[0, 1, 2]`
|
||||
to constrain the weights of each filter tensor of size
|
||||
`(rows, cols, input_depth)`.
|
||||
|
||||
# References
|
||||
- [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, m=2, axis=0):
|
||||
self.m = m
|
||||
self.axis = axis
|
||||
@@ -38,7 +43,7 @@ class MaxNorm(Constraint):
|
||||
def __call__(self, p):
|
||||
norms = K.sqrt(K.sum(K.square(p), axis=self.axis, keepdims=True))
|
||||
desired = K.clip(norms, 0, self.m)
|
||||
p = p * (desired / (K.epsilon() + norms))
|
||||
p *= (desired / (K.epsilon() + norms))
|
||||
return p
|
||||
|
||||
def get_config(self):
|
||||
@@ -48,46 +53,98 @@ class MaxNorm(Constraint):
|
||||
|
||||
|
||||
class NonNeg(Constraint):
|
||||
'''Constrain the weights to be non-negative.
|
||||
'''
|
||||
"""Constrains the weights to be non-negative.
|
||||
"""
|
||||
|
||||
def __call__(self, p):
|
||||
p *= K.cast(p >= 0., K.floatx())
|
||||
return p
|
||||
|
||||
|
||||
class UnitNorm(Constraint):
|
||||
'''Constrain the weights incident to each hidden unit to have unit norm.
|
||||
"""Constrains the weights incident to each hidden unit to have unit norm.
|
||||
|
||||
# Arguments
|
||||
axis: integer, axis along which to calculate weight norms. For instance,
|
||||
in a `Dense` layer the weight matrix has shape (input_dim, output_dim),
|
||||
set `axis` to `0` to constrain each weight vector of length (input_dim).
|
||||
In a `MaxoutDense` layer the weight tensor has shape (nb_feature, input_dim, output_dim),
|
||||
set `axis` to `1` to constrain each weight vector of length (input_dim),
|
||||
i.e. constrain the filters incident to the `max` operation.
|
||||
In a `Convolution2D` layer with the Theano backend, the weight tensor
|
||||
has shape (nb_filter, stack_size, nb_row, nb_col), set `axis` to `[1,2,3]`
|
||||
to constrain the weights of each filter tensor of size (stack_size, nb_row, nb_col).
|
||||
In a `Convolution2D` layer with the TensorFlow backend, the weight tensor
|
||||
has shape (nb_row, nb_col, stack_size, nb_filter), set `axis` to `[0,1,2]`
|
||||
to constrain the weights of each filter tensor of size (nb_row, nb_col, stack_size).
|
||||
'''
|
||||
axis: integer, axis along which to calculate weight norms.
|
||||
For instance, in a `Dense` layer the weight matrix
|
||||
has shape `(input_dim, output_dim)`,
|
||||
set `axis` to `0` to constrain each weight vector
|
||||
of length `(input_dim,)`.
|
||||
In a `Convolution2D` layer with `dim_ordering="tf"`,
|
||||
the weight tensor has shape
|
||||
`(rows, cols, input_depth, output_depth)`,
|
||||
set `axis` to `[0, 1, 2]`
|
||||
to constrain the weights of each filter tensor of size
|
||||
`(rows, cols, input_depth)`.
|
||||
"""
|
||||
|
||||
def __init__(self, axis=0):
|
||||
self.axis = axis
|
||||
|
||||
def __call__(self, p):
|
||||
return p / (K.epsilon() + K.sqrt(K.sum(K.square(p), axis=self.axis, keepdims=True)))
|
||||
return p / (K.epsilon() + K.sqrt(K.sum(K.square(p),
|
||||
axis=self.axis,
|
||||
keepdims=True)))
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
'axis': self.axis}
|
||||
|
||||
|
||||
class MinMaxNorm(Constraint):
|
||||
"""MinMaxNorm weight constraint.
|
||||
|
||||
Constrains the weights incident to each hidden unit
|
||||
to have the norm between a lower bound and an upper bound.
|
||||
|
||||
# Arguments
|
||||
low: the minimum norm for the incoming weights.
|
||||
high: the maximum norm for the incoming weights.
|
||||
rate: rate for enforcing the constraint: weights will be
|
||||
rescaled to yield (1 - rate) * norm + rate * norm.clip(low, high).
|
||||
Effectively, this means that rate=1.0 stands for strict
|
||||
enforcement of the constraint, while rate<1.0 means that
|
||||
weights will be rescaled at each step to slowly move
|
||||
towards a value inside the desired interval.
|
||||
axis: integer, axis along which to calculate weight norms.
|
||||
For instance, in a `Dense` layer the weight matrix
|
||||
has shape `(input_dim, output_dim)`,
|
||||
set `axis` to `0` to constrain each weight vector
|
||||
of length `(input_dim,)`.
|
||||
In a `Convolution2D` layer with `dim_ordering="tf"`,
|
||||
the weight tensor has shape
|
||||
`(rows, cols, input_depth, output_depth)`,
|
||||
set `axis` to `[0, 1, 2]`
|
||||
to constrain the weights of each filter tensor of size
|
||||
`(rows, cols, input_depth)`.
|
||||
"""
|
||||
def __init__(self, low=0.0, high=1.0, rate=1.0, axis=0):
|
||||
self.low = low
|
||||
self.high = high
|
||||
self.rate = rate
|
||||
self.axis = axis
|
||||
|
||||
def __call__(self, p):
|
||||
norms = K.sqrt(K.sum(K.square(p), axis=self.axis, keepdims=True))
|
||||
desired = self.rate * K.clip(norms, self.low, self.high) + (1 - self.rate) * norms
|
||||
p *= (desired / (K.epsilon() + norms))
|
||||
return p
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
'low': self.low,
|
||||
'high': self.high,
|
||||
'rate': self.rate,
|
||||
'axis': self.axis}
|
||||
|
||||
|
||||
# Aliases.
|
||||
|
||||
maxnorm = MaxNorm
|
||||
nonneg = NonNeg
|
||||
unitnorm = UnitNorm
|
||||
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
def get(identifier, kwargs=None):
|
||||
return get_from_module(identifier, globals(), 'constraint',
|
||||
instantiate=True, kwargs=kwargs)
|
||||
|
||||
@@ -5,18 +5,28 @@ from six.moves import cPickle
|
||||
|
||||
|
||||
def load_batch(fpath, label_key='labels'):
|
||||
"""Internal utility for parsing CIFAR data.
|
||||
|
||||
# Arguments
|
||||
fpath: path the file to parse.
|
||||
label_key: key for label data in the retrieve
|
||||
dictionary.
|
||||
|
||||
# Returns
|
||||
A tuple `(data, labels)`.
|
||||
"""
|
||||
f = open(fpath, 'rb')
|
||||
if sys.version_info < (3,):
|
||||
d = cPickle.load(f)
|
||||
else:
|
||||
d = cPickle.load(f, encoding="bytes")
|
||||
d = cPickle.load(f, encoding='bytes')
|
||||
# decode utf8
|
||||
d_decoded = {}
|
||||
for k, v in d.items():
|
||||
d_decoded[k.decode("utf8")] = v
|
||||
d_decoded[k.decode('utf8')] = v
|
||||
d = d_decoded
|
||||
f.close()
|
||||
data = d["data"]
|
||||
data = d['data']
|
||||
labels = d[label_key]
|
||||
|
||||
data = data.reshape(data.shape[0], 3, 32, 32)
|
||||
|
||||
@@ -7,29 +7,34 @@ import os
|
||||
|
||||
|
||||
def load_data():
|
||||
dirname = "cifar-10-batches-py"
|
||||
origin = "http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
|
||||
"""Loads CIFAR10 dataset.
|
||||
|
||||
# Returns
|
||||
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
|
||||
"""
|
||||
dirname = 'cifar-10-batches-py'
|
||||
origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
|
||||
path = get_file(dirname, origin=origin, untar=True)
|
||||
|
||||
nb_train_samples = 50000
|
||||
|
||||
X_train = np.zeros((nb_train_samples, 3, 32, 32), dtype="uint8")
|
||||
y_train = np.zeros((nb_train_samples,), dtype="uint8")
|
||||
x_train = np.zeros((nb_train_samples, 3, 32, 32), dtype='uint8')
|
||||
y_train = np.zeros((nb_train_samples,), dtype='uint8')
|
||||
|
||||
for i in range(1, 6):
|
||||
fpath = os.path.join(path, 'data_batch_' + str(i))
|
||||
data, labels = load_batch(fpath)
|
||||
X_train[(i - 1) * 10000: i * 10000, :, :, :] = data
|
||||
x_train[(i - 1) * 10000: i * 10000, :, :, :] = data
|
||||
y_train[(i - 1) * 10000: i * 10000] = labels
|
||||
|
||||
fpath = os.path.join(path, 'test_batch')
|
||||
X_test, y_test = load_batch(fpath)
|
||||
x_test, y_test = load_batch(fpath)
|
||||
|
||||
y_train = np.reshape(y_train, (len(y_train), 1))
|
||||
y_test = np.reshape(y_test, (len(y_test), 1))
|
||||
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
X_train = X_train.transpose(0, 2, 3, 1)
|
||||
X_test = X_test.transpose(0, 2, 3, 1)
|
||||
x_train = x_train.transpose(0, 2, 3, 1)
|
||||
x_test = x_test.transpose(0, 2, 3, 1)
|
||||
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
return (x_train, y_train), (x_test, y_test)
|
||||
|
||||
@@ -7,6 +7,17 @@ import os
|
||||
|
||||
|
||||
def load_data(label_mode='fine'):
|
||||
"""Loads CIFAR100 dataset.
|
||||
|
||||
# Arguments
|
||||
label_mode: one of "fine", "coarse".
|
||||
|
||||
# Returns
|
||||
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid `label_mode`.
|
||||
"""
|
||||
if label_mode not in ['fine', 'coarse']:
|
||||
raise ValueError('label_mode must be one of "fine" "coarse".')
|
||||
|
||||
@@ -15,16 +26,16 @@ def load_data(label_mode='fine'):
|
||||
path = get_file(dirname, origin=origin, untar=True)
|
||||
|
||||
fpath = os.path.join(path, 'train')
|
||||
X_train, y_train = load_batch(fpath, label_key=label_mode + '_labels')
|
||||
x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels')
|
||||
|
||||
fpath = os.path.join(path, 'test')
|
||||
X_test, y_test = load_batch(fpath, label_key=label_mode + '_labels')
|
||||
x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels')
|
||||
|
||||
y_train = np.reshape(y_train, (len(y_train), 1))
|
||||
y_test = np.reshape(y_test, (len(y_test), 1))
|
||||
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
X_train = X_train.transpose(0, 2, 3, 1)
|
||||
X_test = X_test.transpose(0, 2, 3, 1)
|
||||
x_train = x_train.transpose(0, 2, 3, 1)
|
||||
x_test = x_test.transpose(0, 2, 3, 1)
|
||||
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
return (x_train, y_train), (x_test, y_test)
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
from ..utils.data_utils import *
|
||||
import warnings
|
||||
|
||||
warnings.warn('data_utils has been moved to keras.utils.data_utils.')
|
||||
+38
-22
@@ -10,10 +10,10 @@ import sys
|
||||
def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0,
|
||||
maxlen=None, seed=113,
|
||||
start_char=1, oov_char=2, index_from=3):
|
||||
'''Loads IMDB dataset.
|
||||
"""Loads the IMDB dataset.
|
||||
|
||||
# Arguments
|
||||
path: where to store the data (in `/.keras/dataset`)
|
||||
path: where to cache the data (relative to `~/.keras/dataset`).
|
||||
nb_words: max number of words to include. Words are ranked
|
||||
by how often they occur (in the training set) and only
|
||||
the most frequent words are kept
|
||||
@@ -27,12 +27,19 @@ def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0,
|
||||
or `skip_top` limit will be replaced with this character.
|
||||
index_from: index actual words with this index and higher.
|
||||
|
||||
# Returns
|
||||
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
|
||||
|
||||
# Raises
|
||||
ValueError: in case `maxlen` is so low
|
||||
that no input sequence could be kept.
|
||||
|
||||
Note that the 'out of vocabulary' character is only used for
|
||||
words that were present in the training set but are not included
|
||||
because they're not making the `nb_words` cut here.
|
||||
Words that were not seen in the trining set but are in the test set
|
||||
have simply been skipped.
|
||||
'''
|
||||
"""
|
||||
path = get_file(path,
|
||||
origin='https://s3.amazonaws.com/text-datasets/imdb_full.pkl',
|
||||
md5_hash='d091312047c43cf9e4e38fef92437263')
|
||||
@@ -55,54 +62,63 @@ def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0,
|
||||
np.random.seed(seed * 2)
|
||||
np.random.shuffle(labels_test)
|
||||
|
||||
X = x_train + x_test
|
||||
xs = x_train + x_test
|
||||
labels = labels_train + labels_test
|
||||
|
||||
if start_char is not None:
|
||||
X = [[start_char] + [w + index_from for w in x] for x in X]
|
||||
xs = [[start_char] + [w + index_from for w in x] for x in xs]
|
||||
elif index_from:
|
||||
X = [[w + index_from for w in x] for x in X]
|
||||
xs = [[w + index_from for w in x] for x in xs]
|
||||
|
||||
if maxlen:
|
||||
new_X = []
|
||||
new_xs = []
|
||||
new_labels = []
|
||||
for x, y in zip(X, labels):
|
||||
for x, y in zip(xs, labels):
|
||||
if len(x) < maxlen:
|
||||
new_X.append(x)
|
||||
new_xs.append(x)
|
||||
new_labels.append(y)
|
||||
X = new_X
|
||||
xs = new_xs
|
||||
labels = new_labels
|
||||
if not X:
|
||||
if not xs:
|
||||
raise ValueError('After filtering for sequences shorter than maxlen=' +
|
||||
str(maxlen) + ', no sequence was kept. '
|
||||
'Increase maxlen.')
|
||||
if not nb_words:
|
||||
nb_words = max([max(x) for x in X])
|
||||
nb_words = max([max(x) for x in xs])
|
||||
|
||||
# by convention, use 2 as OOV word
|
||||
# reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV)
|
||||
# reserve 'index_from' (=3 by default) characters:
|
||||
# 0 (padding), 1 (start), 2 (OOV)
|
||||
if oov_char is not None:
|
||||
X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X]
|
||||
xs = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in xs]
|
||||
else:
|
||||
nX = []
|
||||
for x in X:
|
||||
new_xs = []
|
||||
for x in xs:
|
||||
nx = []
|
||||
for w in x:
|
||||
if (w >= nb_words or w < skip_top):
|
||||
if w >= nb_words or w < skip_top:
|
||||
nx.append(w)
|
||||
nX.append(nx)
|
||||
X = nX
|
||||
new_xs.append(nx)
|
||||
xs = new_xs
|
||||
|
||||
X_train = np.array(X[:len(x_train)])
|
||||
x_train = np.array(xs[:len(x_train)])
|
||||
y_train = np.array(labels[:len(x_train)])
|
||||
|
||||
X_test = np.array(X[len(x_train):])
|
||||
x_test = np.array(xs[len(x_train):])
|
||||
y_test = np.array(labels[len(x_train):])
|
||||
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
return (x_train, y_train), (x_test, y_test)
|
||||
|
||||
|
||||
def get_word_index(path='imdb_word_index.pkl'):
|
||||
"""Retrieves the dictionary mapping word indices back to words.
|
||||
|
||||
# Arguments
|
||||
path: where to cache the data (relative to `~/.keras/dataset`).
|
||||
|
||||
# Returns
|
||||
The word index dictionary.
|
||||
"""
|
||||
path = get_file(path,
|
||||
origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.pkl',
|
||||
md5_hash='72d94b01291be4ff843198d3b0e1e4d7')
|
||||
|
||||
@@ -5,6 +5,15 @@ import sys
|
||||
|
||||
|
||||
def load_data(path='mnist.pkl.gz'):
|
||||
"""Loads the MNIST dataset.
|
||||
|
||||
# Arguments
|
||||
path: path where to cache the dataset locally
|
||||
(relative to ~/.keras/datasets).
|
||||
|
||||
# Returns
|
||||
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
|
||||
"""
|
||||
path = get_file(path, origin='https://s3.amazonaws.com/img-datasets/mnist.pkl.gz')
|
||||
|
||||
if path.endswith('.gz'):
|
||||
@@ -18,4 +27,4 @@ def load_data(path='mnist.pkl.gz'):
|
||||
data = cPickle.load(f, encoding='bytes')
|
||||
|
||||
f.close()
|
||||
return data # (X_train, y_train), (X_test, y_test)
|
||||
return data # (x_train, y_train), (x_test, y_test)
|
||||
|
||||
+36
-24
@@ -10,10 +10,10 @@ import sys
|
||||
def load_data(path='reuters.pkl', nb_words=None, skip_top=0,
|
||||
maxlen=None, test_split=0.2, seed=113,
|
||||
start_char=1, oov_char=2, index_from=3):
|
||||
'''Loads the Reuters newswire classification dataset.
|
||||
"""Loads the Reuters newswire classification dataset.
|
||||
|
||||
# Arguments
|
||||
path: where to store the data (in `/.keras/dataset`)
|
||||
path: where to cache the data (relative to `~/.keras/dataset`).
|
||||
nb_words: max number of words to include. Words are ranked
|
||||
by how often they occur (in the training set) and only
|
||||
the most frequent words are kept
|
||||
@@ -28,65 +28,77 @@ def load_data(path='reuters.pkl', nb_words=None, skip_top=0,
|
||||
or `skip_top` limit will be replaced with this character.
|
||||
index_from: index actual words with this index and higher.
|
||||
|
||||
# Returns
|
||||
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
|
||||
|
||||
Note that the 'out of vocabulary' character is only used for
|
||||
words that were present in the training set but are not included
|
||||
because they're not making the `nb_words` cut here.
|
||||
Words that were not seen in the trining set but are in the test set
|
||||
have simply been skipped.
|
||||
'''
|
||||
"""
|
||||
|
||||
path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters.pkl')
|
||||
f = open(path, 'rb')
|
||||
X, labels = cPickle.load(f)
|
||||
xs, labels = cPickle.load(f)
|
||||
f.close()
|
||||
|
||||
np.random.seed(seed)
|
||||
np.random.shuffle(X)
|
||||
np.random.shuffle(xs)
|
||||
np.random.seed(seed)
|
||||
np.random.shuffle(labels)
|
||||
|
||||
if start_char is not None:
|
||||
X = [[start_char] + [w + index_from for w in x] for x in X]
|
||||
xs = [[start_char] + [w + index_from for w in x] for x in xs]
|
||||
elif index_from:
|
||||
X = [[w + index_from for w in x] for x in X]
|
||||
xs = [[w + index_from for w in x] for x in xs]
|
||||
|
||||
if maxlen:
|
||||
new_X = []
|
||||
new_xs = []
|
||||
new_labels = []
|
||||
for x, y in zip(X, labels):
|
||||
for x, y in zip(xs, labels):
|
||||
if len(x) < maxlen:
|
||||
new_X.append(x)
|
||||
new_xs.append(x)
|
||||
new_labels.append(y)
|
||||
X = new_X
|
||||
xs = new_xs
|
||||
labels = new_labels
|
||||
|
||||
if not nb_words:
|
||||
nb_words = max([max(x) for x in X])
|
||||
nb_words = max([max(x) for x in xs])
|
||||
|
||||
# by convention, use 2 as OOV word
|
||||
# reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV)
|
||||
# reserve 'index_from' (=3 by default) characters:
|
||||
# 0 (padding), 1 (start), 2 (OOV)
|
||||
if oov_char is not None:
|
||||
X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X]
|
||||
xs = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in xs]
|
||||
else:
|
||||
nX = []
|
||||
for x in X:
|
||||
new_xs = []
|
||||
for x in xs:
|
||||
nx = []
|
||||
for w in x:
|
||||
if (w >= nb_words or w < skip_top):
|
||||
if w >= nb_words or w < skip_top:
|
||||
nx.append(w)
|
||||
nX.append(nx)
|
||||
X = nX
|
||||
new_xs.append(nx)
|
||||
xs = new_xs
|
||||
|
||||
X_train = X[:int(len(X) * (1 - test_split))]
|
||||
y_train = labels[:int(len(X) * (1 - test_split))]
|
||||
x_train = xs[:int(len(xs) * (1 - test_split))]
|
||||
y_train = labels[:int(len(xs) * (1 - test_split))]
|
||||
|
||||
X_test = X[int(len(X) * (1 - test_split)):]
|
||||
y_test = labels[int(len(X) * (1 - test_split)):]
|
||||
x_test = xs[int(len(xs) * (1 - test_split)):]
|
||||
y_test = labels[int(len(xs) * (1 - test_split)):]
|
||||
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
return (x_train, y_train), (x_test, y_test)
|
||||
|
||||
|
||||
def get_word_index(path='reuters_word_index.pkl'):
|
||||
"""Retrieves the dictionary mapping word indices back to words.
|
||||
|
||||
# Arguments
|
||||
path: where to cache the data (relative to `~/.keras/dataset`).
|
||||
|
||||
# Returns
|
||||
The word index dictionary.
|
||||
"""
|
||||
path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl')
|
||||
f = open(path, 'rb')
|
||||
|
||||
|
||||
+147
-140
@@ -19,11 +19,11 @@ from ..utils.generic_utils import func_dump, func_load
|
||||
|
||||
|
||||
def to_list(x):
|
||||
'''This normalizes a list/tensor into a list.
|
||||
"""This normalizes a list/tensor into a list.
|
||||
|
||||
If a tensor is passed, we return
|
||||
a list of size 1 containing the tensor.
|
||||
'''
|
||||
"""
|
||||
if isinstance(x, list):
|
||||
return x
|
||||
return [x]
|
||||
@@ -35,13 +35,14 @@ def object_list_uid(object_list):
|
||||
|
||||
|
||||
class InputSpec(object):
|
||||
'''This specifies the ndim, dtype and shape of every input to a layer.
|
||||
"""This specifies the ndim, dtype and shape of every input to a layer.
|
||||
Every layer should expose (if appropriate) an `input_spec` attribute:
|
||||
a list of instances of InputSpec (one per input tensor).
|
||||
|
||||
A None entry in a shape is compatible with any dimension,
|
||||
a None shape is compatible with any shape.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, dtype=None, shape=None, ndim=None):
|
||||
if isinstance(ndim, str):
|
||||
if '+' not in ndim:
|
||||
@@ -60,7 +61,7 @@ class InputSpec(object):
|
||||
|
||||
|
||||
class Node(object):
|
||||
'''A `Node` describes the connectivity between two layers.
|
||||
"""A `Node` describes the connectivity between two layers.
|
||||
|
||||
Each time a layer is connected to some new input,
|
||||
a node is added to `layer.inbound_nodes`.
|
||||
@@ -98,7 +99,8 @@ class Node(object):
|
||||
A node from layer A to layer B is added to:
|
||||
A.outbound_nodes
|
||||
B.inbound_nodes
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, outbound_layer,
|
||||
inbound_layers, node_indices, tensor_indices,
|
||||
input_tensors, output_tensors,
|
||||
@@ -214,7 +216,7 @@ class Node(object):
|
||||
|
||||
|
||||
class Layer(object):
|
||||
'''Abstract base layer class.
|
||||
"""Abstract base layer class.
|
||||
|
||||
# Properties
|
||||
name: String, must be unique within a model.
|
||||
@@ -281,7 +283,8 @@ class Layer(object):
|
||||
add_inbound_node(layer, index=0)
|
||||
create_input_layer()
|
||||
assert_input_compatibility()
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
# These properties should have been set
|
||||
# by the child class, as appropriate.
|
||||
@@ -299,10 +302,10 @@ class Layer(object):
|
||||
|
||||
# These properties will be set upon call of self.build(),
|
||||
# which itself will be called upon self.add_inbound_node if necessary.
|
||||
if not hasattr(self, 'trainable_weights'):
|
||||
self.trainable_weights = []
|
||||
if not hasattr(self, 'non_trainable_weights'):
|
||||
self.non_trainable_weights = []
|
||||
if not hasattr(self, '_trainable_weights'):
|
||||
self._trainable_weights = []
|
||||
if not hasattr(self, '_non_trainable_weights'):
|
||||
self._non_trainable_weights = []
|
||||
if not hasattr(self, 'losses'):
|
||||
self.losses = []
|
||||
if not hasattr(self, 'constraints'):
|
||||
@@ -401,16 +404,16 @@ class Layer(object):
|
||||
trainable=True,
|
||||
regularizer=None,
|
||||
constraint=None):
|
||||
'''Adds a weight variable to the layer.
|
||||
"""Adds a weight variable to the layer.
|
||||
|
||||
# Arguments:
|
||||
# Arguments
|
||||
shape: The shape tuple of the weight.
|
||||
initializer: An Initializer instance (callable).
|
||||
trainable: A boolean, whether the weight should
|
||||
be trained via backprop or not (assuming
|
||||
that the layer itself is also trainable).
|
||||
regularizer: An optional Regularizer instance.
|
||||
'''
|
||||
"""
|
||||
initializer = initializations.get(initializer)
|
||||
weight = initializer(shape, name=name)
|
||||
if regularizer is not None:
|
||||
@@ -418,16 +421,16 @@ class Layer(object):
|
||||
if constraint is not None:
|
||||
self.constraints[weight] = constraint
|
||||
if trainable:
|
||||
self.trainable_weights.append(weight)
|
||||
self._trainable_weights.append(weight)
|
||||
else:
|
||||
self.non_trainable_weights.append(weight)
|
||||
self._non_trainable_weights.append(weight)
|
||||
return weight
|
||||
|
||||
def assert_input_compatibility(self, input):
|
||||
'''This checks that the tensor(s) `input`
|
||||
"""This checks that the tensor(s) `input`
|
||||
verify the input assumptions of the layer
|
||||
(if any). If not, exceptions are raised.
|
||||
'''
|
||||
"""
|
||||
if not self.input_spec:
|
||||
return True
|
||||
if not isinstance(self.input_spec, list):
|
||||
@@ -490,7 +493,7 @@ class Layer(object):
|
||||
str(x_shape))
|
||||
|
||||
def call(self, x, mask=None):
|
||||
'''This is where the layer's logic lives.
|
||||
"""This is where the layer's logic lives.
|
||||
|
||||
# Arguments
|
||||
x: input tensor, or list/tuple of input tensors.
|
||||
@@ -498,11 +501,11 @@ class Layer(object):
|
||||
|
||||
# Returns:
|
||||
A tensor or list/tuple of tensors.
|
||||
'''
|
||||
"""
|
||||
return x
|
||||
|
||||
def __call__(self, x, mask=None):
|
||||
'''Wrapper around self.call(), for handling
|
||||
"""Wrapper around self.call(), for handling
|
||||
internal Keras references.
|
||||
|
||||
If a Keras tensor is passed:
|
||||
@@ -519,7 +522,7 @@ class Layer(object):
|
||||
# Arguments
|
||||
x: Can be a tensor or list/tuple of tensors.
|
||||
mask: Tensor or list/tuple of tensors.
|
||||
'''
|
||||
"""
|
||||
if not self.built:
|
||||
# Raise exceptions in case the input is not compatible
|
||||
# with the input_spec specified in the layer constructor.
|
||||
@@ -587,7 +590,7 @@ class Layer(object):
|
||||
|
||||
def add_inbound_node(self, inbound_layers,
|
||||
node_indices=None, tensor_indices=None):
|
||||
'''
|
||||
"""
|
||||
# Arguments
|
||||
inbound_layers: Can be a layer instance
|
||||
or a list/tuple of layer instances.
|
||||
@@ -604,7 +607,7 @@ class Layer(object):
|
||||
the entry in the output list
|
||||
(if applicable). "None" means that we take all outputs
|
||||
(as a list).
|
||||
'''
|
||||
"""
|
||||
inbound_layers = to_list(inbound_layers)
|
||||
if not node_indices:
|
||||
node_indices = [0 for _ in range(len(inbound_layers))]
|
||||
@@ -632,7 +635,7 @@ class Layer(object):
|
||||
Node.create_node(self, inbound_layers, node_indices, tensor_indices)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
'''Computes the output shape of the layer given
|
||||
"""Computes the output shape of the layer given
|
||||
an input shape (assumes that the layer will be built
|
||||
to match that input shape).
|
||||
|
||||
@@ -641,11 +644,11 @@ class Layer(object):
|
||||
or list of shape tuples (one per output tensor of the layer).
|
||||
Shape tuples can include None for free dimensions,
|
||||
instead of an integer.
|
||||
'''
|
||||
"""
|
||||
return input_shape
|
||||
|
||||
def compute_mask(self, input, input_mask=None):
|
||||
'''Computes an output masking tensor, given an input tensor
|
||||
"""Computes an output masking tensor, given an input tensor
|
||||
(or list thereof) and an input mask (or list thereof).
|
||||
|
||||
# Arguments
|
||||
@@ -655,11 +658,11 @@ class Layer(object):
|
||||
# Returns
|
||||
None or a tensor (or list of tensors,
|
||||
one per output tensor of the layer).
|
||||
'''
|
||||
"""
|
||||
if not hasattr(self, 'supports_masking') or not self.supports_masking:
|
||||
if input_mask is not None:
|
||||
if isinstance(input_mask, list):
|
||||
if any(input_mask):
|
||||
if any(mask is not None for mask in input_mask):
|
||||
raise ValueError('Layer ' + self.name +
|
||||
' does not support masking, '
|
||||
'but was passed an input_mask: ' +
|
||||
@@ -676,25 +679,25 @@ class Layer(object):
|
||||
return input_mask
|
||||
|
||||
def build(self, input_shape):
|
||||
'''Creates the layer weights.
|
||||
"""Creates the layer weights.
|
||||
Must be implemented on all layers that have weights.
|
||||
|
||||
# Arguments
|
||||
input_shape: Keras tensor (future input to layer)
|
||||
or list/tuple of Keras tensors to reference
|
||||
for weight shape computations.
|
||||
'''
|
||||
"""
|
||||
self.built = True
|
||||
|
||||
def _get_node_attribute_at_index(self, node_index, attr, attr_name):
|
||||
'''Retrieves an attribute (e.g. input_tensors) from a node.
|
||||
"""Retrieves an attribute (e.g. input_tensors) from a node.
|
||||
|
||||
# Arguments
|
||||
node_index: Integer index of the node from which
|
||||
to retrieve the attribute.
|
||||
attr: Exact node attribute name.
|
||||
attr_name: Human-readable attribute name, for error messages.
|
||||
'''
|
||||
"""
|
||||
if not self.inbound_nodes:
|
||||
raise RuntimeError('The layer has never been called '
|
||||
'and thus has no defined ' + attr_name + '.')
|
||||
@@ -710,53 +713,53 @@ class Layer(object):
|
||||
return values
|
||||
|
||||
def get_input_shape_at(self, node_index):
|
||||
'''Retrieves the input shape(s) of a layer at a given node.
|
||||
'''
|
||||
"""Retrieves the input shape(s) of a layer at a given node.
|
||||
"""
|
||||
return self._get_node_attribute_at_index(node_index,
|
||||
'input_shapes',
|
||||
'input shape')
|
||||
|
||||
def get_output_shape_at(self, node_index):
|
||||
'''Retrieves the output shape(s) of a layer at a given node.
|
||||
'''
|
||||
"""Retrieves the output shape(s) of a layer at a given node.
|
||||
"""
|
||||
return self._get_node_attribute_at_index(node_index,
|
||||
'output_shapes',
|
||||
'output shape')
|
||||
|
||||
def get_input_at(self, node_index):
|
||||
'''Retrieves the input tensor(s) of a layer at a given node.
|
||||
'''
|
||||
"""Retrieves the input tensor(s) of a layer at a given node.
|
||||
"""
|
||||
return self._get_node_attribute_at_index(node_index,
|
||||
'input_tensors',
|
||||
'input')
|
||||
|
||||
def get_output_at(self, node_index):
|
||||
'''Retrieves the output tensor(s) of a layer at a given node.
|
||||
'''
|
||||
"""Retrieves the output tensor(s) of a layer at a given node.
|
||||
"""
|
||||
return self._get_node_attribute_at_index(node_index,
|
||||
'output_tensors',
|
||||
'output')
|
||||
|
||||
def get_input_mask_at(self, node_index):
|
||||
'''Retrieves the input mask tensor(s) of a layer at a given node.
|
||||
'''
|
||||
"""Retrieves the input mask tensor(s) of a layer at a given node.
|
||||
"""
|
||||
return self._get_node_attribute_at_index(node_index,
|
||||
'input_masks',
|
||||
'input mask')
|
||||
|
||||
def get_output_mask_at(self, node_index):
|
||||
'''Retrieves the output mask tensor(s) of a layer at a given node.
|
||||
'''
|
||||
"""Retrieves the output mask tensor(s) of a layer at a given node.
|
||||
"""
|
||||
return self._get_node_attribute_at_index(node_index,
|
||||
'output_masks',
|
||||
'output mask')
|
||||
|
||||
@property
|
||||
def input(self):
|
||||
'''Retrieves the input tensor(s) of a layer (only applicable if
|
||||
"""Retrieves the input tensor(s) of a layer (only applicable if
|
||||
the layer has exactly one inbound node, i.e. if it is connected
|
||||
to one incoming layer).
|
||||
'''
|
||||
"""
|
||||
if len(self.inbound_nodes) > 1:
|
||||
raise AttributeError('Layer ' + self.name +
|
||||
' has multiple inbound nodes, '
|
||||
@@ -771,10 +774,10 @@ class Layer(object):
|
||||
|
||||
@property
|
||||
def output(self):
|
||||
'''Retrieves the output tensor(s) of a layer (only applicable if
|
||||
"""Retrieves the output tensor(s) of a layer (only applicable if
|
||||
the layer has exactly one inbound node, i.e. if it is connected
|
||||
to one incoming layer).
|
||||
'''
|
||||
"""
|
||||
if len(self.inbound_nodes) == 0:
|
||||
raise AttributeError('Layer ' + self.name +
|
||||
' has no inbound nodes.')
|
||||
@@ -789,10 +792,10 @@ class Layer(object):
|
||||
|
||||
@property
|
||||
def input_mask(self):
|
||||
'''Retrieves the input mask tensor(s) of a layer (only applicable if
|
||||
"""Retrieves the input mask tensor(s) of a layer (only applicable if
|
||||
the layer has exactly one inbound node, i.e. if it is connected
|
||||
to one incoming layer).
|
||||
'''
|
||||
"""
|
||||
if len(self.inbound_nodes) != 1:
|
||||
raise AttributeError('Layer ' + self.name +
|
||||
' has multiple inbound nodes, ' +
|
||||
@@ -804,10 +807,10 @@ class Layer(object):
|
||||
|
||||
@property
|
||||
def output_mask(self):
|
||||
'''Retrieves the output mask tensor(s) of a layer (only applicable if
|
||||
"""Retrieves the output mask tensor(s) of a layer (only applicable if
|
||||
the layer has exactly one inbound node, i.e. if it is connected
|
||||
to one incoming layer).
|
||||
'''
|
||||
"""
|
||||
if len(self.inbound_nodes) != 1:
|
||||
raise AttributeError('Layer ' + self.name +
|
||||
' has multiple inbound nodes, '
|
||||
@@ -820,10 +823,10 @@ class Layer(object):
|
||||
|
||||
@property
|
||||
def input_shape(self):
|
||||
'''Retrieves the input shape tuple(s) of a layer. Only applicable
|
||||
"""Retrieves the input shape tuple(s) of a layer. Only applicable
|
||||
if the layer has one inbound node,
|
||||
or if all inbound nodes have the same input shape.
|
||||
'''
|
||||
"""
|
||||
if not self.inbound_nodes:
|
||||
raise AttributeError('The layer has never been called '
|
||||
'and thus has no defined input shape.')
|
||||
@@ -845,10 +848,10 @@ class Layer(object):
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
'''Retrieves the output shape tuple(s) of a layer. Only applicable
|
||||
"""Retrieves the output shape tuple(s) of a layer. Only applicable
|
||||
if the layer has one inbound node,
|
||||
or if all inbound nodes have the same output shape.
|
||||
'''
|
||||
"""
|
||||
if not self.inbound_nodes:
|
||||
raise AttributeError('The layer has never been called '
|
||||
'and thus has no defined output shape.')
|
||||
@@ -927,7 +930,10 @@ class Layer(object):
|
||||
def get_updates_for(self, inputs):
|
||||
if not hasattr(self, '_per_input_updates'):
|
||||
return []
|
||||
inputs_hash = object_list_uid(inputs)
|
||||
if inputs is not None:
|
||||
inputs_hash = object_list_uid(inputs)
|
||||
else:
|
||||
inputs_hash = None
|
||||
if inputs_hash in self._per_input_updates:
|
||||
return self._per_input_updates[inputs_hash]
|
||||
return []
|
||||
@@ -935,7 +941,10 @@ class Layer(object):
|
||||
def get_losses_for(self, inputs):
|
||||
if not hasattr(self, '_per_input_losses'):
|
||||
return []
|
||||
inputs_hash = object_list_uid(inputs)
|
||||
if inputs is not None:
|
||||
inputs_hash = object_list_uid(inputs)
|
||||
else:
|
||||
inputs_hash = None
|
||||
if inputs_hash in self._per_input_losses:
|
||||
return self._per_input_losses[inputs_hash]
|
||||
return []
|
||||
@@ -945,7 +954,7 @@ class Layer(object):
|
||||
return self.trainable_weights + self.non_trainable_weights
|
||||
|
||||
def set_weights(self, weights):
|
||||
'''Sets the weights of the layer, from Numpy arrays.
|
||||
"""Sets the weights of the layer, from Numpy arrays.
|
||||
|
||||
# Arguments
|
||||
weights: a list of Numpy arrays. The number
|
||||
@@ -953,7 +962,7 @@ class Layer(object):
|
||||
number of the dimensions of the weights
|
||||
of the layer (i.e. it should match the
|
||||
output of `get_weights`).
|
||||
'''
|
||||
"""
|
||||
params = self.weights
|
||||
if len(params) != len(weights):
|
||||
raise ValueError('You called `set_weights(weights)` on layer "' +
|
||||
@@ -978,14 +987,14 @@ class Layer(object):
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
|
||||
def get_weights(self):
|
||||
'''Returns the current weights of the layer,
|
||||
"""Returns the current weights of the layer,
|
||||
as a list of numpy arrays.
|
||||
'''
|
||||
"""
|
||||
params = self.weights
|
||||
return K.batch_get_value(params)
|
||||
|
||||
def get_config(self):
|
||||
'''Returns a Python dictionary (serializable)
|
||||
"""Returns a Python dictionary (serializable)
|
||||
containing the configuration of a layer.
|
||||
The same layer can be reinstantiated later
|
||||
(without its trained weights) from this configuration.
|
||||
@@ -993,7 +1002,7 @@ class Layer(object):
|
||||
The config of a layer does not include connectivity
|
||||
information, nor the layer class name. These are handled
|
||||
by Container (one layer of abstraction above).
|
||||
'''
|
||||
"""
|
||||
config = {'name': self.name,
|
||||
'trainable': self.trainable}
|
||||
if hasattr(self, 'batch_input_shape'):
|
||||
@@ -1004,7 +1013,7 @@ class Layer(object):
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config):
|
||||
'''This method is the reverse of get_config,
|
||||
"""This method is the reverse of get_config,
|
||||
capable of instantiating the same layer from the config
|
||||
dictionary. It does not handle layer connectivity
|
||||
(handled by Container), nor weights (handled by `set_weights`).
|
||||
@@ -1012,13 +1021,13 @@ class Layer(object):
|
||||
# Arguments
|
||||
config: A Python dictionary, typically the
|
||||
output of get_config.
|
||||
'''
|
||||
"""
|
||||
return cls(**config)
|
||||
|
||||
def count_params(self):
|
||||
'''Returns the total number of floats (or ints)
|
||||
"""Returns the total number of floats (or ints)
|
||||
composing the weights of the layer.
|
||||
'''
|
||||
"""
|
||||
if not self.built:
|
||||
if self.__class__.__name__ == 'Sequential':
|
||||
self.build()
|
||||
@@ -1031,7 +1040,7 @@ class Layer(object):
|
||||
|
||||
|
||||
class InputLayer(Layer):
|
||||
'''Layer to be used as an entry point into a graph.
|
||||
"""Layer to be used as an entry point into a graph.
|
||||
It can either wrap an existing tensor (pass an `input_tensor` argument)
|
||||
or create its a placeholder tensor (pass arguments `input_shape`
|
||||
or `batch_input_shape` as well as `input_dtype`).
|
||||
@@ -1045,7 +1054,8 @@ class InputLayer(Layer):
|
||||
sparse: Boolean, whether the placeholder created
|
||||
is meant to be sparse.
|
||||
name: Name of the layer (string).
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, input_shape=None, batch_input_shape=None,
|
||||
input_dtype=None, input_tensor=None, sparse=False, name=None):
|
||||
self.input_spec = None
|
||||
@@ -1053,16 +1063,11 @@ class InputLayer(Layer):
|
||||
self.uses_learning_phase = False
|
||||
self.trainable = False
|
||||
self.built = True
|
||||
self.trainable_weights = []
|
||||
self.non_trainable_weights = []
|
||||
|
||||
self._trainable_weights = []
|
||||
self._non_trainable_weights = []
|
||||
self.inbound_nodes = []
|
||||
self.outbound_nodes = []
|
||||
|
||||
self.trainable_weights = []
|
||||
self.non_trainable_weights = []
|
||||
self.constraints = {}
|
||||
|
||||
self.sparse = sparse
|
||||
|
||||
if not name:
|
||||
@@ -1137,7 +1142,7 @@ class InputLayer(Layer):
|
||||
def Input(shape=None, batch_shape=None,
|
||||
name=None, dtype=K.floatx(), sparse=False,
|
||||
tensor=None):
|
||||
'''`Input()` is used to instantiate a Keras tensor.
|
||||
"""`Input()` is used to instantiate a Keras tensor.
|
||||
A Keras tensor is a tensor object from the underlying backend
|
||||
(Theano or TensorFlow), which we augment with certain
|
||||
attributes that allow us to build a Keras model
|
||||
@@ -1171,7 +1176,7 @@ def Input(shape=None, batch_shape=None,
|
||||
sparse: A boolean specifying whether the placeholder
|
||||
to be created is sparse.
|
||||
|
||||
# Example usage
|
||||
# Example
|
||||
|
||||
```python
|
||||
# this is a logistic regression in Keras
|
||||
@@ -1179,7 +1184,7 @@ def Input(shape=None, batch_shape=None,
|
||||
b = Dense(16, activation='softmax')(a)
|
||||
model = Model(input=a, output=b)
|
||||
```
|
||||
'''
|
||||
"""
|
||||
if not batch_shape and tensor is None:
|
||||
assert shape, ('Please provide to Input either a `shape`'
|
||||
' or a `batch_shape` argument. Note that '
|
||||
@@ -1201,10 +1206,10 @@ def Input(shape=None, batch_shape=None,
|
||||
|
||||
|
||||
class Merge(Layer):
|
||||
'''A `Merge` layer can be used to merge a list of tensors
|
||||
"""A `Merge` layer can be used to merge a list of tensors
|
||||
into a single tensor, following some merge `mode`.
|
||||
|
||||
# Example usage
|
||||
# Example
|
||||
|
||||
```python
|
||||
model1 = Sequential()
|
||||
@@ -1214,7 +1219,7 @@ class Merge(Layer):
|
||||
model2.add(Dense(32, input_dim=32))
|
||||
|
||||
merged_model = Sequential()
|
||||
merged_model.add(Merge([model1, model2], mode='concat', concat_axis=1)
|
||||
merged_model.add(Merge([model1, model2], mode='concat', concat_axis=1))
|
||||
```
|
||||
|
||||
# Arguments
|
||||
@@ -1251,10 +1256,11 @@ class Merge(Layer):
|
||||
output_mask: Mask or lambda/function to compute the output mask (only
|
||||
if merge mode is a lambda/function). If the latter case, it should
|
||||
take as input a list of masks and return a single mask.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, layers=None, mode='sum', concat_axis=-1,
|
||||
dot_axes=-1, output_shape=None, output_mask=None,
|
||||
arguments={}, node_indices=None, tensor_indices=None,
|
||||
arguments=None, node_indices=None, tensor_indices=None,
|
||||
name=None):
|
||||
self.layers = layers
|
||||
self.mode = mode
|
||||
@@ -1263,14 +1269,14 @@ class Merge(Layer):
|
||||
self._output_shape = output_shape
|
||||
self.node_indices = node_indices
|
||||
self._output_mask = output_mask
|
||||
self.arguments = arguments
|
||||
self.arguments = arguments if arguments else {}
|
||||
|
||||
# Layer parameters.
|
||||
self.inbound_nodes = []
|
||||
self.outbound_nodes = []
|
||||
self.constraints = {}
|
||||
self.trainable_weights = []
|
||||
self.non_trainable_weights = []
|
||||
self._trainable_weights = []
|
||||
self._non_trainable_weights = []
|
||||
self.supports_masking = True
|
||||
self.uses_learning_phase = False
|
||||
self.input_spec = None # Compatible with anything.
|
||||
@@ -1298,9 +1304,9 @@ class Merge(Layer):
|
||||
|
||||
def _arguments_validation(self, layers, mode, concat_axis, dot_axes,
|
||||
node_indices, tensor_indices):
|
||||
'''Validates user-passed arguments and raises exceptions
|
||||
"""Validates user-passed arguments and raises exceptions
|
||||
as appropriate.
|
||||
'''
|
||||
"""
|
||||
if not callable(mode):
|
||||
if mode not in {'sum', 'mul', 'concat', 'ave', 'cos', 'dot', 'max'}:
|
||||
raise ValueError('Invalid merge mode: ' + str(mode))
|
||||
@@ -1416,11 +1422,11 @@ class Merge(Layer):
|
||||
raise ValueError('Unknown merge mode.')
|
||||
|
||||
def __call__(self, inputs, mask=None):
|
||||
'''We disable successive calls to __call__ for Merge layers.
|
||||
"""We disable successive calls to __call__ for Merge layers.
|
||||
Although there is no technical obstacle to
|
||||
making it possible to __call__ a Merge instance many times
|
||||
(it is just a layer), it would make for a rather inelegant API.
|
||||
'''
|
||||
"""
|
||||
if not isinstance(inputs, list):
|
||||
raise TypeError('Merge can only be called on a list of tensors, '
|
||||
'not a single tensor. Received: ' + str(inputs))
|
||||
@@ -1615,11 +1621,11 @@ class Merge(Layer):
|
||||
|
||||
def merge(inputs, mode='sum', concat_axis=-1,
|
||||
dot_axes=-1, output_shape=None, output_mask=None,
|
||||
arguments={}, name=None):
|
||||
'''Functional merge, to apply to Keras tensors (NOT layers).
|
||||
arguments=None, name=None):
|
||||
"""Functional merge, to apply to Keras tensors (NOT layers).
|
||||
Returns a Keras tensor.
|
||||
|
||||
# Example usage:
|
||||
# Example
|
||||
|
||||
```python
|
||||
tensor_a = Input(shape=(32,))
|
||||
@@ -1648,7 +1654,7 @@ def merge(inputs, mode='sum', concat_axis=-1,
|
||||
tensor_indices: Optional list of indices of output tensors
|
||||
to consider for merging
|
||||
(in case some input layer node returns multiple tensors).
|
||||
'''
|
||||
"""
|
||||
all_keras_tensors = True
|
||||
for x in inputs:
|
||||
if not hasattr(x, '_keras_history'):
|
||||
@@ -1685,7 +1691,7 @@ def merge(inputs, mode='sum', concat_axis=-1,
|
||||
|
||||
|
||||
class Container(Layer):
|
||||
'''A Container is a directed acyclic graph of layers.
|
||||
"""A Container is a directed acyclic graph of layers.
|
||||
|
||||
It is the topological form of a "model". A Model
|
||||
is simply a Container with added training routines.
|
||||
@@ -1720,7 +1726,8 @@ class Container(Layer):
|
||||
|
||||
# Class Methods
|
||||
from_config
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, input, output, name=None):
|
||||
# Handle name argument.
|
||||
if not name:
|
||||
@@ -1870,7 +1877,7 @@ class Container(Layer):
|
||||
|
||||
def build_map_of_graph(tensor, seen_nodes=set(), depth=0,
|
||||
layer=None, node_index=None, tensor_index=None):
|
||||
'''This recursively updates the maps nodes_depths,
|
||||
"""This recursively updates the maps nodes_depths,
|
||||
layers_depths and the set container_nodes.
|
||||
Does not try to detect cycles in graph (TODO?)
|
||||
|
||||
@@ -1883,7 +1890,7 @@ class Container(Layer):
|
||||
will be obtained from `tensor._keras_history`.
|
||||
node_index: Node index from which `tensor` comes from.
|
||||
tensor_index: Tensor_index from which `tensor` comes from.
|
||||
'''
|
||||
"""
|
||||
if not layer or node_index is None or tensor_index is None:
|
||||
layer, node_index, tensor_index = tensor._keras_history
|
||||
node = layer.inbound_nodes[node_index]
|
||||
@@ -2029,7 +2036,7 @@ class Container(Layer):
|
||||
# self.input_spec
|
||||
|
||||
def get_layer(self, name=None, index=None):
|
||||
'''Returns a layer based on either its name (unique)
|
||||
"""Returns a layer based on either its name (unique)
|
||||
or its index in the graph. Indices are based on
|
||||
order of horizontal graph traversal (bottom-up).
|
||||
|
||||
@@ -2039,7 +2046,7 @@ class Container(Layer):
|
||||
|
||||
# Returns
|
||||
A layer instance.
|
||||
'''
|
||||
"""
|
||||
# It would be unreliable to build a dictionary
|
||||
# based on layer names, because names can potentially
|
||||
# be changed at any point by the user
|
||||
@@ -2111,11 +2118,11 @@ class Container(Layer):
|
||||
|
||||
@property
|
||||
def state_updates(self):
|
||||
'''Returns the `updates` from all layers that are
|
||||
"""Returns the `updates` from all layers that are
|
||||
stateful. This is useful for separating training updates and
|
||||
state updates, e.g. when we need to update a layer's internal state
|
||||
during prediction.
|
||||
'''
|
||||
"""
|
||||
state_updates = []
|
||||
for layer in self.layers:
|
||||
if getattr(layer, 'stateful', False):
|
||||
@@ -2166,20 +2173,20 @@ class Container(Layer):
|
||||
return weights
|
||||
|
||||
def get_weights(self):
|
||||
'''Returns the weights of the model,
|
||||
"""Returns the weights of the model,
|
||||
as a flat list of Numpy arrays.
|
||||
'''
|
||||
"""
|
||||
weights = []
|
||||
for layer in self.layers:
|
||||
weights += layer.weights
|
||||
return K.batch_get_value(weights)
|
||||
|
||||
def set_weights(self, weights):
|
||||
'''Sets the weights of the model.
|
||||
"""Sets the weights of the model.
|
||||
The `weights` argument should be a list
|
||||
of Numpy arrays with shapes and types matching
|
||||
the output of `model.get_weights()`.
|
||||
'''
|
||||
"""
|
||||
tuples = []
|
||||
for layer in self.layers:
|
||||
nb_param = len(layer.weights)
|
||||
@@ -2207,13 +2214,13 @@ class Container(Layer):
|
||||
|
||||
@property
|
||||
def uses_learning_phase(self):
|
||||
'''True if any layer in the graph uses it.
|
||||
'''
|
||||
"""True if any layer in the graph uses it.
|
||||
"""
|
||||
layers_learning_phase = any([layer.uses_learning_phase for layer in self.layers])
|
||||
return layers_learning_phase
|
||||
|
||||
def call(self, input, mask=None):
|
||||
'''`call` just reapplies all ops in the graph to the new inputs
|
||||
"""`call` just reapplies all ops in the graph to the new inputs
|
||||
(e.g. build a new computational graph from the provided inputs).
|
||||
|
||||
It is callable on non-Keras tensors.
|
||||
@@ -2226,7 +2233,7 @@ class Container(Layer):
|
||||
# Returns
|
||||
A tensor if there is a single output, or
|
||||
a list of tensors if there are more than one outputs.
|
||||
'''
|
||||
"""
|
||||
inputs = to_list(input)
|
||||
if mask is None:
|
||||
masks = [None for _ in range(len(inputs))]
|
||||
@@ -2333,7 +2340,7 @@ class Container(Layer):
|
||||
return output_shapes
|
||||
|
||||
def run_internal_graph(self, inputs, masks=None):
|
||||
'''Computes output tensors for new inputs.
|
||||
"""Computes output tensors for new inputs.
|
||||
|
||||
# Note:
|
||||
- Expects `inputs` to be a list (potentially with 1 element).
|
||||
@@ -2345,7 +2352,7 @@ class Container(Layer):
|
||||
|
||||
# Returns
|
||||
Three lists: output_tensors, output_masks, output_shapes
|
||||
'''
|
||||
"""
|
||||
if masks is None:
|
||||
masks = [None for _ in range(len(inputs))]
|
||||
|
||||
@@ -2534,9 +2541,9 @@ class Container(Layer):
|
||||
return copy.deepcopy(config)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config, custom_objects={}):
|
||||
'''Instantiates a Model from its config (output of `get_config()`).
|
||||
'''
|
||||
def from_config(cls, config, custom_objects=None):
|
||||
"""Instantiates a Model from its config (output of `get_config()`).
|
||||
"""
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
|
||||
# layer instances created during
|
||||
@@ -2592,7 +2599,7 @@ class Container(Layer):
|
||||
return cls(input=input_tensors, output=output_tensors, name=name)
|
||||
|
||||
def save(self, filepath, overwrite=True):
|
||||
'''Save into a single HDF5 file:
|
||||
"""Save into a single HDF5 file:
|
||||
- The model architecture, allowing to re-instantiate the model.
|
||||
- The model weights.
|
||||
- The state of the optimizer, allowing to resume training
|
||||
@@ -2606,7 +2613,7 @@ class Container(Layer):
|
||||
is a compiled model ready to be used (unless the saved model
|
||||
was never compiled in the first place).
|
||||
|
||||
# Example usage
|
||||
# Example
|
||||
|
||||
```python
|
||||
from keras.models import load_model
|
||||
@@ -2618,12 +2625,12 @@ class Container(Layer):
|
||||
# identical to the previous one
|
||||
model = load_model('my_model.h5')
|
||||
```
|
||||
'''
|
||||
"""
|
||||
from ..models import save_model
|
||||
save_model(self, filepath, overwrite)
|
||||
|
||||
def save_weights(self, filepath, overwrite=True):
|
||||
'''Dumps all layer weights to a HDF5 file.
|
||||
"""Dumps all layer weights to a HDF5 file.
|
||||
|
||||
The weight file has:
|
||||
- `layer_names` (attribute), a list of strings
|
||||
@@ -2634,7 +2641,7 @@ class Container(Layer):
|
||||
(ordered names of weights tensor of the layer).
|
||||
- For every weight in the layer, a dataset
|
||||
storing the weight value, named after the weight tensor.
|
||||
'''
|
||||
"""
|
||||
import h5py
|
||||
# If file exists and should not be overwritten:
|
||||
if not overwrite and os.path.isfile(filepath):
|
||||
@@ -2677,7 +2684,7 @@ class Container(Layer):
|
||||
param_dset[:] = val
|
||||
|
||||
def load_weights(self, filepath, by_name=False):
|
||||
'''Loads all layer weights from a HDF5 save file.
|
||||
"""Loads all layer weights from a HDF5 save file.
|
||||
|
||||
If `by_name` is False (default) weights are loaded
|
||||
based on the network's topology, meaning the architecture
|
||||
@@ -2690,7 +2697,7 @@ class Container(Layer):
|
||||
only if they share the same name. This is useful
|
||||
for fine-tuning or transfer-learning models where
|
||||
some of the layers have changed.
|
||||
'''
|
||||
"""
|
||||
import h5py
|
||||
f = h5py.File(filepath, mode='r')
|
||||
if 'layer_names' not in f.attrs and 'model_weights' in f:
|
||||
@@ -2704,12 +2711,12 @@ class Container(Layer):
|
||||
f.close()
|
||||
|
||||
def load_weights_from_hdf5_group(self, f):
|
||||
'''Weight loading is based on layer order in a list
|
||||
"""Weight loading is based on layer order in a list
|
||||
(matching model.flattened_layers for Sequential models,
|
||||
and model.layers for Model class instances), not
|
||||
on layer names.
|
||||
Layers that have no weights are skipped.
|
||||
'''
|
||||
"""
|
||||
if hasattr(self, 'flattened_layers'):
|
||||
# Support for legacy Sequential/Merge behavior.
|
||||
flattened_layers = self.flattened_layers
|
||||
@@ -2787,10 +2794,10 @@ class Container(Layer):
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
|
||||
def load_weights_from_hdf5_group_by_name(self, f):
|
||||
''' Name-based weight loading
|
||||
""" Name-based weight loading
|
||||
(instead of topological weight loading).
|
||||
Layers that have no matching name are skipped.
|
||||
'''
|
||||
"""
|
||||
if hasattr(self, 'flattened_layers'):
|
||||
# Support for legacy Sequential/Merge behavior.
|
||||
flattened_layers = self.flattened_layers
|
||||
@@ -2798,9 +2805,9 @@ class Container(Layer):
|
||||
flattened_layers = self.layers
|
||||
|
||||
if 'nb_layers' in f.attrs:
|
||||
raise ValueError('The weight file you are trying to load is'
|
||||
' in a legacy format that does not support'
|
||||
' name-based weight loading.')
|
||||
raise ValueError('The weight file you are trying to load is'
|
||||
' in a legacy format that does not support'
|
||||
' name-based weight loading.')
|
||||
else:
|
||||
# New file format.
|
||||
layer_names = [n.decode('utf8') for n in f.attrs['layer_names']]
|
||||
@@ -2836,7 +2843,7 @@ class Container(Layer):
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
|
||||
def _updated_config(self):
|
||||
'''Shared between different serialization methods.'''
|
||||
"""Shared between different serialization methods."""
|
||||
from keras import __version__ as keras_version
|
||||
|
||||
config = self.get_config()
|
||||
@@ -2848,11 +2855,11 @@ class Container(Layer):
|
||||
return model_config
|
||||
|
||||
def to_json(self, **kwargs):
|
||||
'''Returns a JSON string containing the network configuration.
|
||||
"""Returns a JSON string containing the network configuration.
|
||||
|
||||
To load a network from a JSON save file, use
|
||||
`keras.models.model_from_json(json_string, custom_objects={})`.
|
||||
'''
|
||||
"""
|
||||
import json
|
||||
|
||||
def get_json_type(obj):
|
||||
@@ -2870,7 +2877,7 @@ class Container(Layer):
|
||||
return json.dumps(model_config, default=get_json_type, **kwargs)
|
||||
|
||||
def to_yaml(self, **kwargs):
|
||||
'''Returns a yaml string containing the network configuration.
|
||||
"""Returns a yaml string containing the network configuration.
|
||||
|
||||
To load a network from a yaml save file, use
|
||||
`keras.models.model_from_yaml(yaml_string, custom_objects={})`.
|
||||
@@ -2878,7 +2885,7 @@ class Container(Layer):
|
||||
`custom_objects` should be a dictionary mapping
|
||||
the names of custom losses / layers / etc to the corresponding
|
||||
functions / classes.
|
||||
'''
|
||||
"""
|
||||
import yaml
|
||||
return yaml.dump(self._updated_config(), **kwargs)
|
||||
|
||||
@@ -2897,7 +2904,7 @@ class Container(Layer):
|
||||
|
||||
|
||||
def get_source_inputs(tensor, layer=None, node_index=None):
|
||||
'''Returns the list of input tensors
|
||||
"""Returns the list of input tensors
|
||||
necessary to compute `tensor`.
|
||||
|
||||
Output will always be a list of tensors
|
||||
@@ -2908,7 +2915,7 @@ def get_source_inputs(tensor, layer=None, node_index=None):
|
||||
layer: Origin layer of the tensor. Will be
|
||||
determined via tensor._keras_history if not provided.
|
||||
node_index: Origin node index of the tensor.
|
||||
'''
|
||||
"""
|
||||
if not hasattr(tensor, '_keras_history'):
|
||||
return tensor
|
||||
|
||||
|
||||
+340
-311
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
+31
-17
@@ -9,7 +9,7 @@ def get_fans(shape, dim_ordering='th'):
|
||||
fan_in = shape[0]
|
||||
fan_out = shape[1]
|
||||
elif len(shape) == 4 or len(shape) == 5:
|
||||
# assuming convolution kernels (2D or 3D).
|
||||
# Assuming convolution kernels (2D or 3D).
|
||||
# TH kernel shape: (depth, input_depth, ...)
|
||||
# TF kernel shape: (..., input_depth, depth)
|
||||
if dim_ordering == 'th':
|
||||
@@ -23,32 +23,38 @@ def get_fans(shape, dim_ordering='th'):
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering: ' + dim_ordering)
|
||||
else:
|
||||
# no specific assumptions
|
||||
# No specific assumptions.
|
||||
fan_in = np.sqrt(np.prod(shape))
|
||||
fan_out = np.sqrt(np.prod(shape))
|
||||
return fan_in, fan_out
|
||||
|
||||
|
||||
def uniform(shape, scale=0.05, name=None):
|
||||
def uniform(shape, scale=0.05, name=None, dim_ordering='th'):
|
||||
return K.random_uniform_variable(shape, -scale, scale, name=name)
|
||||
|
||||
|
||||
def normal(shape, scale=0.05, name=None):
|
||||
def normal(shape, scale=0.05, name=None, dim_ordering='th'):
|
||||
return K.random_normal_variable(shape, 0.0, scale, name=name)
|
||||
|
||||
|
||||
def lecun_uniform(shape, name=None, dim_ordering='th'):
|
||||
''' Reference: LeCun 98, Efficient Backprop
|
||||
"""LeCun uniform variance scaling initializer.
|
||||
|
||||
# References
|
||||
LeCun 98, Efficient Backprop,
|
||||
http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf
|
||||
'''
|
||||
"""
|
||||
fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering)
|
||||
scale = np.sqrt(3. / fan_in)
|
||||
return uniform(shape, scale, name=name)
|
||||
|
||||
|
||||
def glorot_normal(shape, name=None, dim_ordering='th'):
|
||||
''' Reference: Glorot & Bengio, AISTATS 2010
|
||||
'''
|
||||
"""Glorot normal variance scaling initializer.
|
||||
|
||||
# References
|
||||
Glorot & Bengio, AISTATS 2010
|
||||
"""
|
||||
fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering)
|
||||
s = np.sqrt(2. / (fan_in + fan_out))
|
||||
return normal(shape, s, name=name)
|
||||
@@ -61,32 +67,40 @@ def glorot_uniform(shape, name=None, dim_ordering='th'):
|
||||
|
||||
|
||||
def he_normal(shape, name=None, dim_ordering='th'):
|
||||
''' Reference: He et al., http://arxiv.org/abs/1502.01852
|
||||
'''
|
||||
"""He normal variance scaling initializer.
|
||||
|
||||
# References
|
||||
He et al., http://arxiv.org/abs/1502.01852
|
||||
"""
|
||||
fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering)
|
||||
s = np.sqrt(2. / fan_in)
|
||||
return normal(shape, s, name=name)
|
||||
|
||||
|
||||
def he_uniform(shape, name=None, dim_ordering='th'):
|
||||
"""He uniform variance scaling initializer.
|
||||
"""
|
||||
fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering)
|
||||
s = np.sqrt(6. / fan_in)
|
||||
return uniform(shape, s, name=name)
|
||||
|
||||
|
||||
def orthogonal(shape, scale=1.1, name=None):
|
||||
''' From Lasagne. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
|
||||
'''
|
||||
def orthogonal(shape, scale=1.1, name=None, dim_ordering='th'):
|
||||
"""Orthogonal initializer.
|
||||
|
||||
# References
|
||||
Saxe et al., http://arxiv.org/abs/1312.6120
|
||||
"""
|
||||
flat_shape = (shape[0], np.prod(shape[1:]))
|
||||
a = np.random.normal(0.0, 1.0, flat_shape)
|
||||
u, _, v = np.linalg.svd(a, full_matrices=False)
|
||||
# pick the one with the correct shape
|
||||
# Pick the one with the correct shape.
|
||||
q = u if u.shape == flat_shape else v
|
||||
q = q.reshape(shape)
|
||||
return K.variable(scale * q[:shape[0], :shape[1]], name=name)
|
||||
|
||||
|
||||
def identity(shape, scale=1, name=None):
|
||||
def identity(shape, scale=1, name=None, dim_ordering='th'):
|
||||
if len(shape) != 2 or shape[0] != shape[1]:
|
||||
raise ValueError('Identity matrix initialization can only be used '
|
||||
'for 2D square matrices.')
|
||||
@@ -94,11 +108,11 @@ def identity(shape, scale=1, name=None):
|
||||
return K.variable(scale * np.identity(shape[0]), name=name)
|
||||
|
||||
|
||||
def zero(shape, name=None):
|
||||
def zero(shape, name=None, dim_ordering='th'):
|
||||
return K.zeros(shape, name=name)
|
||||
|
||||
|
||||
def one(shape, name=None):
|
||||
def one(shape, name=None, dim_ordering='th'):
|
||||
return K.ones(shape, name=name)
|
||||
|
||||
|
||||
|
||||
@@ -5,8 +5,9 @@ import numpy as np
|
||||
|
||||
|
||||
class LeakyReLU(Layer):
|
||||
'''Special version of a Rectified Linear Unit
|
||||
that allows a small gradient when the unit is not active:
|
||||
"""Leaky version of a Rectified Linear Unit.
|
||||
|
||||
It allows a small gradient when the unit is not active:
|
||||
`f(x) = alpha * x for x < 0`,
|
||||
`f(x) = x for x >= 0`.
|
||||
|
||||
@@ -20,7 +21,11 @@ class LeakyReLU(Layer):
|
||||
|
||||
# Arguments
|
||||
alpha: float >= 0. Negative slope coefficient.
|
||||
'''
|
||||
|
||||
# References
|
||||
- [Rectifier Nonlinearities Improve Neural Network Acoustic Models](https://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf)
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=0.3, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.alpha = alpha
|
||||
@@ -36,7 +41,9 @@ class LeakyReLU(Layer):
|
||||
|
||||
|
||||
class PReLU(Layer):
|
||||
'''Parametric Rectified Linear Unit:
|
||||
"""Parametric Rectified Linear Unit.
|
||||
|
||||
It follows:
|
||||
`f(x) = alphas * x for x < 0`,
|
||||
`f(x) = x for x >= 0`,
|
||||
where `alphas` is a learned array with the same shape as x.
|
||||
@@ -62,13 +69,14 @@ class PReLU(Layer):
|
||||
set `shared_axes=[1, 2]`.
|
||||
|
||||
# References
|
||||
- [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification](http://arxiv.org/pdf/1502.01852v1.pdf)
|
||||
'''
|
||||
- [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification](https://arxiv.org/abs/1502.01852)
|
||||
"""
|
||||
|
||||
def __init__(self, init='zero', weights=None, shared_axes=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.init = initializations.get(init)
|
||||
self.initial_weights = weights
|
||||
if type(shared_axes) is not list and type(shared_axes) is not tuple:
|
||||
if not isinstance(shared_axes, (list, tuple)):
|
||||
self.shared_axes = [shared_axes]
|
||||
else:
|
||||
self.shared_axes = list(shared_axes)
|
||||
@@ -79,8 +87,8 @@ class PReLU(Layer):
|
||||
self.param_broadcast = [False] * len(param_shape)
|
||||
if self.shared_axes[0] is not None:
|
||||
for i in self.shared_axes:
|
||||
param_shape[i] = 1
|
||||
self.param_broadcast[i] = True
|
||||
param_shape[i - 1] = 1
|
||||
self.param_broadcast[i - 1] = True
|
||||
|
||||
self.alphas = self.init(param_shape,
|
||||
name='{}_alphas'.format(self.name))
|
||||
@@ -93,9 +101,10 @@ class PReLU(Layer):
|
||||
def call(self, x, mask=None):
|
||||
pos = K.relu(x)
|
||||
if K.backend() == 'theano':
|
||||
neg = K.pattern_broadcast(self.alphas, self.param_broadcast) * (x - abs(x)) * 0.5
|
||||
neg = (K.pattern_broadcast(self.alphas, self.param_broadcast) *
|
||||
(x - K.abs(x)) * 0.5)
|
||||
else:
|
||||
neg = self.alphas * (x - abs(x)) * 0.5
|
||||
neg = self.alphas * (x - K.abs(x)) * 0.5
|
||||
return pos + neg
|
||||
|
||||
def get_config(self):
|
||||
@@ -105,7 +114,9 @@ class PReLU(Layer):
|
||||
|
||||
|
||||
class ELU(Layer):
|
||||
'''Exponential Linear Unit:
|
||||
"""Exponential Linear Unit.
|
||||
|
||||
It follows:
|
||||
`f(x) = alpha * (exp(x) - 1.) for x < 0`,
|
||||
`f(x) = x for x >= 0`.
|
||||
|
||||
@@ -121,8 +132,9 @@ class ELU(Layer):
|
||||
alpha: scale for the negative factor.
|
||||
|
||||
# References
|
||||
- [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)](http://arxiv.org/pdf/1511.07289v1.pdf)
|
||||
'''
|
||||
- [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)](https://arxiv.org/abs/1511.07289v1)
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=1.0, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.alpha = K.cast_to_floatx(alpha)
|
||||
@@ -138,8 +150,10 @@ class ELU(Layer):
|
||||
|
||||
|
||||
class ParametricSoftplus(Layer):
|
||||
'''Parametric Softplus:
|
||||
`alpha * log(1 + exp(beta * x))`
|
||||
"""Parametric Softplus.
|
||||
|
||||
It follows:
|
||||
`f(x) = alpha * log(1 + exp(beta * x))`
|
||||
|
||||
# Input shape
|
||||
Arbitrary. Use the keyword argument `input_shape`
|
||||
@@ -164,14 +178,15 @@ class ParametricSoftplus(Layer):
|
||||
|
||||
# References
|
||||
- [Inferring Nonlinear Neuronal Computation Based on Physiologically Plausible Inputs](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003143)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, alpha_init=0.2, beta_init=5.0,
|
||||
weights=None, shared_axes=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.alpha_init = K.cast_to_floatx(alpha_init)
|
||||
self.beta_init = K.cast_to_floatx(beta_init)
|
||||
self.initial_weights = weights
|
||||
if type(shared_axes) is not list and type(shared_axes) is not tuple:
|
||||
if not isinstance(shared_axes, (list, tuple)):
|
||||
self.shared_axes = [shared_axes]
|
||||
else:
|
||||
self.shared_axes = list(shared_axes)
|
||||
@@ -182,8 +197,8 @@ class ParametricSoftplus(Layer):
|
||||
self.param_broadcast = [False] * len(param_shape)
|
||||
if self.shared_axes[0] is not None:
|
||||
for i in self.shared_axes:
|
||||
param_shape[i] = 1
|
||||
self.param_broadcast[i] = True
|
||||
param_shape[i - 1] = 1
|
||||
self.param_broadcast[i - 1] = True
|
||||
|
||||
self.alphas = K.variable(self.alpha_init * np.ones(param_shape),
|
||||
name='{}_alphas'.format(self.name))
|
||||
@@ -197,7 +212,9 @@ class ParametricSoftplus(Layer):
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if K.backend() == 'theano':
|
||||
return K.softplus(K.pattern_broadcast(self.betas, self.param_broadcast) * x) * K.pattern_broadcast(self.alphas, self.param_broadcast)
|
||||
return (K.softplus(K.pattern_broadcast(self.betas,
|
||||
self.param_broadcast) * x) *
|
||||
K.pattern_broadcast(self.alphas, self.param_broadcast))
|
||||
else:
|
||||
return K.softplus(self.betas * x) * self.alphas
|
||||
|
||||
@@ -209,8 +226,10 @@ class ParametricSoftplus(Layer):
|
||||
|
||||
|
||||
class ThresholdedReLU(Layer):
|
||||
'''Thresholded Rectified Linear Unit:
|
||||
`f(x) = x for x > theta`
|
||||
"""Thresholded Rectified Linear Unit.
|
||||
|
||||
It follows:
|
||||
`f(x) = x for x > theta`,
|
||||
`f(x) = 0 otherwise`.
|
||||
|
||||
# Input shape
|
||||
@@ -225,8 +244,9 @@ class ThresholdedReLU(Layer):
|
||||
theta: float >= 0. Threshold location of activation.
|
||||
|
||||
# References
|
||||
- [Zero-Bias Autoencoders and the Benefits of Co-Adapting Features](http://arxiv.org/pdf/1402.3337.pdf)
|
||||
'''
|
||||
- [Zero-Bias Autoencoders and the Benefits of Co-Adapting Features](http://arxiv.org/abs/1402.3337)
|
||||
"""
|
||||
|
||||
def __init__(self, theta=1.0, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.theta = K.cast_to_floatx(theta)
|
||||
@@ -242,7 +262,12 @@ class ThresholdedReLU(Layer):
|
||||
|
||||
|
||||
class SReLU(Layer):
|
||||
'''S-shaped Rectified Linear Unit.
|
||||
"""S-shaped Rectified Linear Unit.
|
||||
|
||||
It follows:
|
||||
`f(x) = t^r + a^r(x - t^r) for x >= t^r`,
|
||||
`f(x) = x for t^r > x > t^l`,
|
||||
`f(x) = t^l + a^l(x - t^l) for x <= t^l`.
|
||||
|
||||
# Input shape
|
||||
Arbitrary. Use the keyword argument `input_shape`
|
||||
@@ -268,15 +293,17 @@ class SReLU(Layer):
|
||||
|
||||
# References
|
||||
- [Deep Learning with S-shaped Rectified Linear Activation Units](http://arxiv.org/abs/1512.07030)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, t_left_init='zero', a_left_init='glorot_uniform',
|
||||
t_right_init='glorot_uniform', a_right_init='one', shared_axes=None, **kwargs):
|
||||
t_right_init='glorot_uniform', a_right_init='one',
|
||||
shared_axes=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.t_left_init = t_left_init
|
||||
self.a_left_init = a_left_init
|
||||
self.t_right_init = t_right_init
|
||||
self.a_right_init = a_right_init
|
||||
if type(shared_axes) is not list and type(shared_axes) is not tuple:
|
||||
if not isinstance(shared_axes, (list, tuple)):
|
||||
self.shared_axes = [shared_axes]
|
||||
else:
|
||||
self.shared_axes = list(shared_axes)
|
||||
@@ -287,8 +314,8 @@ class SReLU(Layer):
|
||||
self.param_broadcast = [False] * len(param_shape)
|
||||
if self.shared_axes[0] is not None:
|
||||
for i in self.shared_axes:
|
||||
param_shape[i] = 1
|
||||
self.param_broadcast[i] = True
|
||||
param_shape[i - 1] = 1
|
||||
self.param_broadcast[i - 1] = True
|
||||
|
||||
t_left_init = initializations.get(self.t_left_init)
|
||||
a_left_init = initializations.get(self.a_left_init)
|
||||
@@ -304,7 +331,7 @@ class SReLU(Layer):
|
||||
self.a_right = a_right_init(param_shape,
|
||||
name='{}_a_right'.format(self.name))
|
||||
# ensure the the right part is always to the right of the left
|
||||
self.t_right_actual = self.t_left + abs(self.t_right)
|
||||
self.t_right_actual = self.t_left + K.abs(self.t_right)
|
||||
self.trainable_weights = [self.t_left, self.a_left,
|
||||
self.t_right, self.a_right]
|
||||
|
||||
@@ -313,18 +340,19 @@ class SReLU(Layer):
|
||||
t_left = K.pattern_broadcast(self.t_left, self.param_broadcast)
|
||||
a_left = K.pattern_broadcast(self.a_left, self.param_broadcast)
|
||||
a_right = K.pattern_broadcast(self.a_right, self.param_broadcast)
|
||||
t_right_actual = K.pattern_broadcast(self.t_right_actual, self.param_broadcast)
|
||||
t_right_actual = K.pattern_broadcast(self.t_right_actual,
|
||||
self.param_broadcast)
|
||||
else:
|
||||
t_left = self.t_left
|
||||
a_left = self.a_left
|
||||
a_right = self.a_right
|
||||
t_right_actual = self.t_right_actual
|
||||
|
||||
Y_left_and_center = t_left + K.relu(x - t_left,
|
||||
y_left_and_center = t_left + K.relu(x - t_left,
|
||||
a_left,
|
||||
t_right_actual - t_left)
|
||||
Y_right = K.relu(x - t_right_actual) * a_right
|
||||
return Y_left_and_center + Y_right
|
||||
y_right = K.relu(x - t_right_actual) * a_right
|
||||
return y_left_and_center + y_right
|
||||
|
||||
def get_config(self):
|
||||
config = {'t_left_init': self.t_left_init,
|
||||
|
||||
+299
-123
@@ -1,18 +1,29 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
import functools
|
||||
|
||||
from .. import backend as K
|
||||
from .. import activations, initializations, regularizers, constraints
|
||||
from ..engine import Layer, InputSpec
|
||||
from ..utils.np_utils import conv_output_length, conv_input_length
|
||||
from .. import activations
|
||||
from .. import initializations
|
||||
from .. import regularizers
|
||||
from .. import constraints
|
||||
from ..engine import Layer
|
||||
from ..engine import InputSpec
|
||||
from ..utils.np_utils import conv_output_length
|
||||
from ..utils.np_utils import conv_input_length
|
||||
|
||||
# imports for backwards namespace compatibility
|
||||
from .pooling import AveragePooling1D, AveragePooling2D, AveragePooling3D
|
||||
from .pooling import MaxPooling1D, MaxPooling2D, MaxPooling3D
|
||||
from .pooling import AveragePooling1D
|
||||
from .pooling import AveragePooling2D
|
||||
from .pooling import AveragePooling3D
|
||||
from .pooling import MaxPooling1D
|
||||
from .pooling import MaxPooling2D
|
||||
from .pooling import MaxPooling3D
|
||||
|
||||
|
||||
class Convolution1D(Layer):
|
||||
'''Convolution operator for filtering neighborhoods of one-dimensional inputs.
|
||||
"""Convolution operator for filtering neighborhoods of 1-D inputs.
|
||||
|
||||
When using this layer as the first layer in a model,
|
||||
either provide the keyword argument `input_dim`
|
||||
(int, e.g. 128 for sequences of 128-dimensional vectors),
|
||||
@@ -38,16 +49,18 @@ class Convolution1D(Layer):
|
||||
(dimensionality of the output).
|
||||
filter_length: The extension (spatial or temporal) of each filter.
|
||||
init: name of initialization function for the weights of the layer
|
||||
(see [initializations](../initializations.md)),
|
||||
or alternatively, Theano function to use for weights initialization.
|
||||
This parameter is only relevant if you don't pass a `weights` argument.
|
||||
(see [initializations](../initializations.md)), or alternatively,
|
||||
Theano function to use for weights initialization.
|
||||
This parameter is only relevant
|
||||
if you don't pass a `weights` argument.
|
||||
activation: name of activation function to use
|
||||
(see [activations](../activations.md)),
|
||||
or alternatively, elementwise Theano function.
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
border_mode: 'valid', 'same' or 'full'
|
||||
('full' requires the Theano backend).
|
||||
subsample_length: factor by which to subsample output.
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
(eg. L1 or L2 regularization), applied to the main weights matrix.
|
||||
@@ -75,11 +88,13 @@ class Convolution1D(Layer):
|
||||
# Output shape
|
||||
3D tensor with shape: `(samples, new_steps, nb_filter)`.
|
||||
`steps` value might have changed due to padding.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, filter_length,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample_length=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_regularizer=None, b_regularizer=None,
|
||||
activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
|
||||
@@ -87,7 +102,7 @@ class Convolution1D(Layer):
|
||||
raise ValueError('Invalid border mode for Convolution1D:', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.filter_length = filter_length
|
||||
self.init = initializations.get(init, dim_ordering='th')
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
self.border_mode = border_mode
|
||||
self.subsample_length = subsample_length
|
||||
@@ -115,7 +130,8 @@ class Convolution1D(Layer):
|
||||
self.W_shape = (self.filter_length, 1, input_dim, self.nb_filter)
|
||||
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
initializer=functools.partial(self.init,
|
||||
dim_ordering='th'),
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
@@ -171,7 +187,8 @@ class Convolution1D(Layer):
|
||||
|
||||
|
||||
class AtrousConvolution1D(Convolution1D):
|
||||
'''Atrous Convolution operator for filtering neighborhoods of one-dimensional inputs.
|
||||
"""Atrous Convolution operator for filtering neighborhoods of 1-D inputs.
|
||||
|
||||
A.k.a dilated convolution or convolution with holes.
|
||||
When using this layer as the first layer in a model,
|
||||
either provide the keyword argument `input_dim`
|
||||
@@ -182,14 +199,18 @@ class AtrousConvolution1D(Convolution1D):
|
||||
# Example
|
||||
|
||||
```python
|
||||
# apply an atrous convolution 1d with atrous rate 2 of length 3 to a sequence with 10 timesteps,
|
||||
# apply an atrous convolution 1d
|
||||
# with atrous rate 2 of length 3 to a sequence with 10 timesteps,
|
||||
# with 64 output filters
|
||||
model = Sequential()
|
||||
model.add(AtrousConvolution1D(64, 3, atrous_rate=2, border_mode='same', input_shape=(10, 32)))
|
||||
model.add(AtrousConvolution1D(64, 3, atrous_rate=2,
|
||||
border_mode='same',
|
||||
input_shape=(10, 32)))
|
||||
# now model.output_shape == (None, 10, 64)
|
||||
|
||||
# add a new atrous conv1d on top
|
||||
model.add(AtrousConvolution1D(32, 3, atrous_rate=2, border_mode='same'))
|
||||
model.add(AtrousConvolution1D(32, 3, atrous_rate=2,
|
||||
border_mode='same'))
|
||||
# now model.output_shape == (None, 10, 32)
|
||||
```
|
||||
|
||||
@@ -198,16 +219,18 @@ class AtrousConvolution1D(Convolution1D):
|
||||
(dimensionality of the output).
|
||||
filter_length: The extension (spatial or temporal) of each filter.
|
||||
init: name of initialization function for the weights of the layer
|
||||
(see [initializations](../initializations.md)),
|
||||
or alternatively, Theano function to use for weights initialization.
|
||||
This parameter is only relevant if you don't pass a `weights` argument.
|
||||
(see [initializations](../initializations.md)), or alternatively,
|
||||
Theano function to use for weights initialization.
|
||||
This parameter is only relevant
|
||||
if you don't pass a `weights` argument.
|
||||
activation: name of activation function to use
|
||||
(see [activations](../activations.md)),
|
||||
or alternatively, elementwise Theano function.
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
border_mode: 'valid', 'same' or 'full'
|
||||
('full' requires the Theano backend).
|
||||
subsample_length: factor by which to subsample output.
|
||||
atrous_rate: Factor for kernel dilation. Also called filter_dilation
|
||||
elsewhere.
|
||||
@@ -237,11 +260,13 @@ class AtrousConvolution1D(Convolution1D):
|
||||
# Output shape
|
||||
3D tensor with shape: `(samples, new_steps, nb_filter)`.
|
||||
`steps` value might have changed due to padding.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, filter_length,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample_length=1, atrous_rate=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_regularizer=None, b_regularizer=None,
|
||||
activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
|
||||
@@ -250,14 +275,15 @@ class AtrousConvolution1D(Convolution1D):
|
||||
|
||||
self.atrous_rate = int(atrous_rate)
|
||||
|
||||
super(AtrousConvolution1D, self).__init__(nb_filter, filter_length,
|
||||
init=init, activation=activation,
|
||||
weights=weights, border_mode=border_mode,
|
||||
subsample_length=subsample_length,
|
||||
W_regularizer=W_regularizer, b_regularizer=b_regularizer,
|
||||
activity_regularizer=activity_regularizer,
|
||||
W_constraint=W_constraint, b_constraint=b_constraint,
|
||||
bias=bias, **kwargs)
|
||||
super(AtrousConvolution1D, self).__init__(
|
||||
nb_filter, filter_length,
|
||||
init=init, activation=activation,
|
||||
weights=weights, border_mode=border_mode,
|
||||
subsample_length=subsample_length,
|
||||
W_regularizer=W_regularizer, b_regularizer=b_regularizer,
|
||||
activity_regularizer=activity_regularizer,
|
||||
W_constraint=W_constraint, b_constraint=b_constraint,
|
||||
bias=bias, **kwargs)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
length = conv_output_length(input_shape[1],
|
||||
@@ -286,7 +312,8 @@ class AtrousConvolution1D(Convolution1D):
|
||||
|
||||
|
||||
class Convolution2D(Layer):
|
||||
'''Convolution operator for filtering windows of two-dimensional inputs.
|
||||
"""Convolution operator for filtering windows of two-dimensional inputs.
|
||||
|
||||
When using this layer as the first layer in a model,
|
||||
provide the keyword argument `input_shape`
|
||||
(tuple of integers, does not include the sample axis),
|
||||
@@ -297,7 +324,9 @@ class Convolution2D(Layer):
|
||||
```python
|
||||
# apply a 3x3 convolution with 64 output filters on a 256x256 image:
|
||||
model = Sequential()
|
||||
model.add(Convolution2D(64, 3, 3, border_mode='same', input_shape=(3, 256, 256)))
|
||||
model.add(Convolution2D(64, 3, 3,
|
||||
border_mode='same',
|
||||
input_shape=(3, 256, 256)))
|
||||
# now model.output_shape == (None, 64, 256, 256)
|
||||
|
||||
# add a 3x3 convolution on top, with 32 output filters:
|
||||
@@ -320,7 +349,8 @@ class Convolution2D(Layer):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
border_mode: 'valid', 'same' or 'full'
|
||||
('full' requires the Theano backend).
|
||||
subsample: tuple of length 2. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
@@ -353,11 +383,13 @@ class Convolution2D(Layer):
|
||||
or 4D tensor with shape:
|
||||
`(samples, new_rows, new_cols, nb_filter)` if dim_ordering='tf'.
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1), dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_regularizer=None, b_regularizer=None,
|
||||
activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
@@ -367,7 +399,7 @@ class Convolution2D(Layer):
|
||||
self.nb_filter = nb_filter
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
self.init = initializations.get(init, dim_ordering=dim_ordering)
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
self.border_mode = border_mode
|
||||
self.subsample = tuple(subsample)
|
||||
@@ -397,7 +429,8 @@ class Convolution2D(Layer):
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
initializer=functools.partial(self.init,
|
||||
dim_ordering=self.dim_ordering),
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
@@ -470,12 +503,14 @@ class Convolution2D(Layer):
|
||||
|
||||
|
||||
class Deconvolution2D(Convolution2D):
|
||||
'''Transposed convolution operator for filtering windows of two-dimensional inputs.
|
||||
The need for transposed convolutions generally arises from the desire
|
||||
to use a transformation going in the opposite direction of a normal convolution,
|
||||
i.e., from something that has the shape of the output of some convolution
|
||||
to something that has the shape of its input
|
||||
while maintaining a connectivity pattern that is compatible with said convolution. [1]
|
||||
"""Transposed convolution operator for filtering windows of 2-D inputs.
|
||||
|
||||
The need for transposed convolutions generally arises from the desire to
|
||||
use a transformation going in the opposite direction
|
||||
of a normal convolution, i.e., from something that has the shape
|
||||
of the output of some convolution to something that has the shape
|
||||
of its input while maintaining a connectivity pattern
|
||||
that is compatible with said convolution.
|
||||
|
||||
When using this layer as the first layer in a model,
|
||||
provide the keyword argument `input_shape`
|
||||
@@ -488,10 +523,14 @@ class Deconvolution2D(Convolution2D):
|
||||
# Examples
|
||||
|
||||
```python
|
||||
# apply a 3x3 transposed convolution with stride 1x1 and 3 output filters on a 12x12 image:
|
||||
# apply a 3x3 transposed convolution
|
||||
# with stride 1x1 and 3 output filters on a 12x12 image:
|
||||
model = Sequential()
|
||||
model.add(Deconvolution2D(3, 3, 3, output_shape=(None, 3, 14, 14), border_mode='valid', input_shape=(3, 12, 12)))
|
||||
# Note that you will have to change the output_shape depending on the backend used.
|
||||
model.add(Deconvolution2D(3, 3, 3, output_shape=(None, 3, 14, 14),
|
||||
border_mode='valid',
|
||||
input_shape=(3, 12, 12)))
|
||||
# Note that you will have to change
|
||||
# the output_shape depending on the backend used.
|
||||
|
||||
# we can predict with the model and print the shape of the array.
|
||||
dummy_input = np.ones((32, 3, 12, 12))
|
||||
@@ -502,9 +541,13 @@ class Deconvolution2D(Convolution2D):
|
||||
# Theano CPU: (None, 3, 14, 14)
|
||||
# TensorFlow: (None, 14, 14, 3)
|
||||
|
||||
# apply a 3x3 transposed convolution with stride 2x2 and 3 output filters on a 12x12 image:
|
||||
# apply a 3x3 transposed convolution
|
||||
# with stride 2x2 and 3 output filters on a 12x12 image:
|
||||
model = Sequential()
|
||||
model.add(Deconvolution2D(3, 3, 3, output_shape=(None, 3, 25, 25), subsample=(2, 2), border_mode='valid', input_shape=(3, 12, 12)))
|
||||
model.add(Deconvolution2D(3, 3, 3, output_shape=(None, 3, 25, 25),
|
||||
subsample=(2, 2),
|
||||
border_mode='valid',
|
||||
input_shape=(3, 12, 12)))
|
||||
model.summary()
|
||||
|
||||
# we can predict with the model and print the shape of the array.
|
||||
@@ -522,19 +565,11 @@ class Deconvolution2D(Convolution2D):
|
||||
nb_row: Number of rows in the transposed convolution kernel.
|
||||
nb_col: Number of columns in the transposed convolution kernel.
|
||||
output_shape: Output shape of the transposed convolution operation.
|
||||
tuple of integers (nb_samples, nb_filter, nb_output_rows, nb_output_cols)
|
||||
Formula for calculation of the output shape [1], [2]:
|
||||
o = s (i - 1) + a + k - 2p, \quad a \in \{0, \ldots, s - 1\}
|
||||
where:
|
||||
i - input size (rows or cols),
|
||||
k - kernel size (nb_filter),
|
||||
s - stride (subsample for rows or cols respectively),
|
||||
p - padding size,
|
||||
a - user-specified quantity used to distinguish between
|
||||
the s different possible output sizes.
|
||||
Because a is not specified explicitly and Theano and Tensorflow
|
||||
use different values, it is better to use a dummy input and observe
|
||||
the actual output shape of a layer as specified in the examples.
|
||||
tuple of integers
|
||||
`(nb_samples, nb_filter, nb_output_rows, nb_output_cols)`.
|
||||
It is better to use
|
||||
a dummy input and observe the actual output shape of
|
||||
a layer, as specified in the examples.
|
||||
init: name of initialization function for the weights of the layer
|
||||
(see [initializations](../initializations.md)), or alternatively,
|
||||
Theano function to use for weights initialization.
|
||||
@@ -546,7 +581,8 @@ class Deconvolution2D(Convolution2D):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
border_mode: 'valid', 'same' or 'full'
|
||||
('full' requires the Theano backend).
|
||||
subsample: tuple of length 2. Factor by which to oversample output.
|
||||
Also called strides elsewhere.
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
@@ -564,7 +600,8 @@ class Deconvolution2D(Convolution2D):
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "tf".
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
bias: whether to include a bias
|
||||
(i.e. make the layer affine rather than linear).
|
||||
|
||||
# Input shape
|
||||
4D tensor with shape:
|
||||
@@ -580,10 +617,11 @@ class Deconvolution2D(Convolution2D):
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
|
||||
# References
|
||||
[1] [A guide to convolution arithmetic for deep learning](https://arxiv.org/abs/1603.07285 "arXiv:1603.07285v1 [stat.ML]")
|
||||
[2] [Transposed convolution arithmetic](http://deeplearning.net/software/theano_versions/dev/tutorial/conv_arithmetic.html#transposed-convolution-arithmetic)
|
||||
[3] [Deconvolutional Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
|
||||
'''
|
||||
- [A guide to convolution arithmetic for deep learning](https://arxiv.org/abs/1603.07285v1)
|
||||
- [Transposed convolution arithmetic](http://deeplearning.net/software/theano_versions/dev/tutorial/conv_arithmetic.html#transposed-convolution-arithmetic)
|
||||
- [Deconvolutional Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, nb_row, nb_col, output_shape,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
@@ -651,7 +689,8 @@ class Deconvolution2D(Convolution2D):
|
||||
|
||||
|
||||
class AtrousConvolution2D(Convolution2D):
|
||||
'''Atrous Convolution operator for filtering windows of two-dimensional inputs.
|
||||
"""Atrous Convolution operator for filtering windows of 2-D inputs.
|
||||
|
||||
A.k.a dilated convolution or convolution with holes.
|
||||
When using this layer as the first layer in a model,
|
||||
provide the keyword argument `input_shape`
|
||||
@@ -661,10 +700,14 @@ class AtrousConvolution2D(Convolution2D):
|
||||
# Examples
|
||||
|
||||
```python
|
||||
# apply a 3x3 convolution with atrous rate 2x2 and 64 output filters on a 256x256 image:
|
||||
# apply a 3x3 convolution with atrous rate 2x2
|
||||
# and 64 output filters on a 256x256 image:
|
||||
model = Sequential()
|
||||
model.add(AtrousConvolution2D(64, 3, 3, atrous_rate=(2,2), border_mode='valid', input_shape=(3, 256, 256)))
|
||||
# now the actual kernel size is dilated from 3x3 to 5x5 (3+(3-1)*(2-1)=5)
|
||||
model.add(AtrousConvolution2D(64, 3, 3, atrous_rate=(2,2),
|
||||
border_mode='valid',
|
||||
input_shape=(3, 256, 256)))
|
||||
# now the actual kernel size is dilated
|
||||
# from 3x3 to 5x5 (3+(3-1)*(2-1)=5)
|
||||
# thus model.output_shape == (None, 64, 252, 252)
|
||||
```
|
||||
|
||||
@@ -683,7 +726,8 @@ class AtrousConvolution2D(Convolution2D):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
border_mode: 'valid', 'same' or 'full'
|
||||
('full' requires the Theano backend).
|
||||
subsample: tuple of length 2. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
atrous_rate: tuple of length 2. Factor for kernel dilation.
|
||||
@@ -703,7 +747,8 @@ class AtrousConvolution2D(Convolution2D):
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "tf".
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
bias: whether to include a bias
|
||||
(i.e. make the layer affine rather than linear).
|
||||
|
||||
# Input shape
|
||||
4D tensor with shape:
|
||||
@@ -720,12 +765,14 @@ class AtrousConvolution2D(Convolution2D):
|
||||
|
||||
# References
|
||||
- [Multi-Scale Context Aggregation by Dilated Convolutions](https://arxiv.org/abs/1511.07122)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
atrous_rate=(1, 1), dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_regularizer=None, b_regularizer=None,
|
||||
activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
@@ -796,7 +843,7 @@ class AtrousConvolution2D(Convolution2D):
|
||||
|
||||
|
||||
class SeparableConvolution2D(Layer):
|
||||
'''Separable convolution operator for 2D inputs.
|
||||
"""Separable convolution operator for 2D inputs.
|
||||
|
||||
Separable convolutions consist in first performing
|
||||
a depthwise spatial convolution
|
||||
@@ -873,7 +920,8 @@ class SeparableConvolution2D(Layer):
|
||||
or 4D tensor with shape:
|
||||
`(samples, new_rows, new_cols, nb_filter)` if dim_ordering='tf'.
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
@@ -899,7 +947,7 @@ class SeparableConvolution2D(Layer):
|
||||
self.nb_filter = nb_filter
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
self.init = initializations.get(init, dim_ordering=dim_ordering)
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise ValueError('border_mode must be in {valid, same}.')
|
||||
@@ -937,12 +985,14 @@ class SeparableConvolution2D(Layer):
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
self.depthwise_kernel = self.add_weight(depthwise_shape,
|
||||
initializer=self.init,
|
||||
initializer=functools.partial(self.init,
|
||||
dim_ordering=self.dim_ordering),
|
||||
regularizer=self.depthwise_regularizer,
|
||||
constraint=self.depthwise_constraint,
|
||||
name='{}_depthwise_kernel'.format(self.name))
|
||||
self.pointwise_kernel = self.add_weight(pointwise_shape,
|
||||
initializer=self.init,
|
||||
initializer=functools.partial(self.init,
|
||||
dim_ordering=self.dim_ordering),
|
||||
regularizer=self.pointwise_regularizer,
|
||||
constraint=self.pointwise_constraint,
|
||||
name='{}_pointwise_kernel'.format(self.name))
|
||||
@@ -1021,7 +1071,8 @@ class SeparableConvolution2D(Layer):
|
||||
|
||||
|
||||
class Convolution3D(Layer):
|
||||
'''Convolution operator for filtering windows of three-dimensional inputs.
|
||||
"""Convolution operator for filtering windows of three-dimensional inputs.
|
||||
|
||||
When using this layer as the first layer in a model,
|
||||
provide the keyword argument `input_shape`
|
||||
(tuple of integers, does not include the sample axis),
|
||||
@@ -1043,10 +1094,12 @@ class Convolution3D(Layer):
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of Numpy arrays to set as initial weights.
|
||||
border_mode: 'valid', 'same' or 'full'. ('full' requires the Theano backend.)
|
||||
border_mode: 'valid', 'same' or 'full'
|
||||
('full' requires the Theano backend).
|
||||
subsample: tuple of length 3. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
Note: 'subsample' is implemented by slicing the output of conv3d with strides=(1,1,1).
|
||||
Note: 'subsample' is implemented by slicing
|
||||
the output of conv3d with strides=(1,1,1).
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
(eg. L1 or L2 regularization), applied to the main weights matrix.
|
||||
b_regularizer: instance of [WeightRegularizer](../regularizers.md),
|
||||
@@ -1062,7 +1115,8 @@ class Convolution3D(Layer):
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "tf".
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
bias: whether to include a bias
|
||||
(i.e. make the layer affine rather than linear).
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape:
|
||||
@@ -1076,7 +1130,7 @@ class Convolution3D(Layer):
|
||||
or 5D tensor with shape:
|
||||
`(samples, new_conv_dim1, new_conv_dim2, new_conv_dim3, nb_filter)` if dim_ordering='tf'.
|
||||
`new_conv_dim1`, `new_conv_dim2` and `new_conv_dim3` values might have changed due to padding.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, kernel_dim1, kernel_dim2, kernel_dim3,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
@@ -1093,7 +1147,7 @@ class Convolution3D(Layer):
|
||||
self.kernel_dim1 = kernel_dim1
|
||||
self.kernel_dim2 = kernel_dim2
|
||||
self.kernel_dim3 = kernel_dim3
|
||||
self.init = initializations.get(init, dim_ordering=dim_ordering)
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
self.border_mode = border_mode
|
||||
self.subsample = tuple(subsample)
|
||||
@@ -1115,7 +1169,6 @@ class Convolution3D(Layer):
|
||||
|
||||
def build(self, input_shape):
|
||||
assert len(input_shape) == 5
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
stack_size = input_shape[1]
|
||||
@@ -1129,7 +1182,8 @@ class Convolution3D(Layer):
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
initializer=functools.partial(self.init,
|
||||
dim_ordering=self.dim_ordering),
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
@@ -1174,11 +1228,9 @@ class Convolution3D(Layer):
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
output = K.conv3d(x, self.W, strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
volume_shape=input_shape,
|
||||
filter_shape=self.W_shape)
|
||||
if self.bias:
|
||||
if self.dim_ordering == 'th':
|
||||
@@ -1211,7 +1263,9 @@ class Convolution3D(Layer):
|
||||
|
||||
|
||||
class UpSampling1D(Layer):
|
||||
'''Repeat each temporal step `length` times along the time axis.
|
||||
"""Upsampling layer for 1D inputs.
|
||||
|
||||
Repeats each temporal step `length` times along the time axis.
|
||||
|
||||
# Arguments
|
||||
length: integer. Upsampling factor.
|
||||
@@ -1221,7 +1275,7 @@ class UpSampling1D(Layer):
|
||||
|
||||
# Output shape
|
||||
3D tensor with shape: `(samples, upsampled_steps, features)`.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, length=2, **kwargs):
|
||||
self.length = length
|
||||
@@ -1243,7 +1297,9 @@ class UpSampling1D(Layer):
|
||||
|
||||
|
||||
class UpSampling2D(Layer):
|
||||
'''Repeat the rows and columns of the data
|
||||
"""Upsampling layer for 2D inputs.
|
||||
|
||||
Repeats the rows and columns of the data
|
||||
by size[0] and size[1] respectively.
|
||||
|
||||
# Arguments
|
||||
@@ -1266,7 +1322,7 @@ class UpSampling2D(Layer):
|
||||
`(samples, channels, upsampled_rows, upsampled_cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, upsampled_rows, upsampled_cols, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, size=(2, 2), dim_ordering='default', **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
@@ -1307,8 +1363,10 @@ class UpSampling2D(Layer):
|
||||
|
||||
|
||||
class UpSampling3D(Layer):
|
||||
'''Repeat the first, second and third dimension of the data
|
||||
by size[0], size[1] and size[2] respectively.
|
||||
"""Upsampling layer for 3D inputs.
|
||||
|
||||
Repeats the 1st, 2nd and 3rd dimensions
|
||||
of the data by size[0], size[1] and size[2] respectively.
|
||||
|
||||
# Arguments
|
||||
size: tuple of 3 integers. The upsampling factors for dim1, dim2 and dim3.
|
||||
@@ -1330,7 +1388,7 @@ class UpSampling3D(Layer):
|
||||
`(samples, channels, upsampled_dim1, upsampled_dim2, upsampled_dim3)` if dim_ordering='th'
|
||||
or 5D tensor with shape:
|
||||
`(samples, upsampled_dim1, upsampled_dim2, upsampled_dim3, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, size=(2, 2, 2), dim_ordering='default', **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
@@ -1375,7 +1433,7 @@ class UpSampling3D(Layer):
|
||||
|
||||
|
||||
class ZeroPadding1D(Layer):
|
||||
'''Zero-padding layer for 1D input (e.g. temporal sequence).
|
||||
"""Zero-padding layer for 1D input (e.g. temporal sequence).
|
||||
|
||||
# Arguments
|
||||
padding: int, or tuple of int (length 2), or dictionary.
|
||||
@@ -1390,11 +1448,11 @@ class ZeroPadding1D(Layer):
|
||||
If any key is missing, default value of 0 will be used for the missing key.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape (samples, axis_to_pad, features)
|
||||
3D tensor with shape `(samples, axis_to_pad, features)`
|
||||
|
||||
# Output shape
|
||||
3D tensor with shape (samples, padded_axis, features)
|
||||
'''
|
||||
3D tensor with shape `(samples, padded_axis, features)`
|
||||
"""
|
||||
|
||||
def __init__(self, padding=1, **kwargs):
|
||||
super(ZeroPadding1D, self).__init__(**kwargs)
|
||||
@@ -1438,7 +1496,7 @@ class ZeroPadding1D(Layer):
|
||||
|
||||
|
||||
class ZeroPadding2D(Layer):
|
||||
'''Zero-padding layer for 2D input (e.g. picture).
|
||||
"""Zero-padding layer for 2D input (e.g. picture).
|
||||
|
||||
# Arguments
|
||||
padding: tuple of int (length 2), or tuple of int (length 4), or dictionary.
|
||||
@@ -1470,7 +1528,7 @@ class ZeroPadding2D(Layer):
|
||||
`(samples, channels, padded_rows, padded_cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, padded_rows, padded_cols, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
padding=(1, 1),
|
||||
@@ -1547,7 +1605,7 @@ class ZeroPadding2D(Layer):
|
||||
|
||||
|
||||
class ZeroPadding3D(Layer):
|
||||
'''Zero-padding layer for 3D data (spatial or spatio-temporal).
|
||||
"""Zero-padding layer for 3D data (spatial or spatio-temporal).
|
||||
|
||||
# Arguments
|
||||
padding: tuple of int (length 3)
|
||||
@@ -1563,12 +1621,12 @@ class ZeroPadding3D(Layer):
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape:
|
||||
(samples, depth, first_axis_to_pad, second_axis_to_pad, third_axis_to_pad)
|
||||
`(samples, depth, first_axis_to_pad, second_axis_to_pad, third_axis_to_pad)`
|
||||
|
||||
# Output shape
|
||||
5D tensor with shape:
|
||||
(samples, depth, first_padded_axis, second_padded_axis, third_axis_to_pad)
|
||||
'''
|
||||
`(samples, depth, first_padded_axis, second_padded_axis, third_axis_to_pad)`
|
||||
"""
|
||||
|
||||
def __init__(self, padding=(1, 1, 1), dim_ordering='default', **kwargs):
|
||||
super(ZeroPadding3D, self).__init__(**kwargs)
|
||||
@@ -1613,7 +1671,8 @@ class ZeroPadding3D(Layer):
|
||||
|
||||
|
||||
class Cropping1D(Layer):
|
||||
'''Cropping layer for 1D input (e.g. temporal sequence).
|
||||
"""Cropping layer for 1D input (e.g. temporal sequence).
|
||||
|
||||
It crops along the time dimension (axis 1).
|
||||
|
||||
# Arguments
|
||||
@@ -1622,11 +1681,11 @@ class Cropping1D(Layer):
|
||||
the cropping dimension (axis 1).
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape (samples, axis_to_crop, features)
|
||||
3D tensor with shape `(samples, axis_to_crop, features)`
|
||||
|
||||
# Output shape
|
||||
3D tensor with shape (samples, cropped_axis, features)
|
||||
'''
|
||||
3D tensor with shape `(samples, cropped_axis, features)`
|
||||
"""
|
||||
|
||||
def __init__(self, cropping=(1, 1), **kwargs):
|
||||
super(Cropping1D, self).__init__(**kwargs)
|
||||
@@ -1649,7 +1708,10 @@ class Cropping1D(Layer):
|
||||
input_shape[2])
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return x[:, self.cropping[0]:-self.cropping[1], :]
|
||||
if self.cropping[1] == 0:
|
||||
return x[:, self.cropping[0]:, :]
|
||||
else:
|
||||
return x[:, self.cropping[0]:-self.cropping[1], :]
|
||||
|
||||
def get_config(self):
|
||||
config = {'cropping': self.cropping}
|
||||
@@ -1658,7 +1720,8 @@ class Cropping1D(Layer):
|
||||
|
||||
|
||||
class Cropping2D(Layer):
|
||||
'''Cropping layer for 2D input (e.g. picture).
|
||||
"""Cropping layer for 2D input (e.g. picture).
|
||||
|
||||
It crops along spatial dimensions, i.e. width and height.
|
||||
|
||||
# Arguments
|
||||
@@ -1674,11 +1737,11 @@ class Cropping2D(Layer):
|
||||
|
||||
# Input shape
|
||||
4D tensor with shape:
|
||||
(samples, depth, first_axis_to_crop, second_axis_to_crop)
|
||||
`(samples, depth, first_axis_to_crop, second_axis_to_crop)`
|
||||
|
||||
# Output shape
|
||||
4D tensor with shape:
|
||||
(samples, depth, first_cropped_axis, second_cropped_axis)
|
||||
`(samples, depth, first_cropped_axis, second_cropped_axis)`
|
||||
|
||||
# Examples
|
||||
|
||||
@@ -1692,8 +1755,7 @@ class Cropping2D(Layer):
|
||||
# now model.output_shape == (None, 64, 20, 16)
|
||||
|
||||
```
|
||||
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, cropping=((0, 0), (0, 0)), dim_ordering='default', **kwargs):
|
||||
super(Cropping2D, self).__init__(**kwargs)
|
||||
@@ -1731,11 +1793,41 @@ class Cropping2D(Layer):
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if self.dim_ordering == 'th':
|
||||
if self.cropping[0][1] == self.cropping[1][1] == 0:
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:,
|
||||
self.cropping[1][0]:]
|
||||
elif self.cropping[0][1] == 0:
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:,
|
||||
self.cropping[1][0]:-self.cropping[1][1]]
|
||||
elif self.cropping[1][1] == 0:
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:]
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:-self.cropping[1][1]]
|
||||
elif self.dim_ordering == 'tf':
|
||||
if self.cropping[0][1] == self.cropping[1][1] == 0:
|
||||
return x[:,
|
||||
self.cropping[0][0]:,
|
||||
self.cropping[1][0]:,
|
||||
:]
|
||||
elif self.cropping[0][1] == 0:
|
||||
return x[:,
|
||||
self.cropping[0][0]:,
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
:]
|
||||
elif self.cropping[1][1] == 0:
|
||||
return x[:,
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:,
|
||||
:]
|
||||
return x[:,
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
@@ -1748,7 +1840,7 @@ class Cropping2D(Layer):
|
||||
|
||||
|
||||
class Cropping3D(Layer):
|
||||
'''Cropping layer for 3D data (e.g. spatial or spatio-temporal).
|
||||
"""Cropping layer for 3D data (e.g. spatial or spatio-temporal).
|
||||
|
||||
# Arguments
|
||||
cropping: tuple of tuple of int (length 3)
|
||||
@@ -1763,13 +1855,13 @@ class Cropping3D(Layer):
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape:
|
||||
(samples, depth, first_axis_to_crop, second_axis_to_crop, third_axis_to_crop)
|
||||
`(samples, depth, first_axis_to_crop, second_axis_to_crop, third_axis_to_crop)`
|
||||
|
||||
# Output shape
|
||||
5D tensor with shape:
|
||||
(samples, depth, first_cropped_axis, second_cropped_axis, third_cropped_axis)
|
||||
`(samples, depth, first_cropped_axis, second_cropped_axis, third_cropped_axis)`
|
||||
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, cropping=((1, 1), (1, 1), (1, 1)),
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -1818,12 +1910,96 @@ class Cropping3D(Layer):
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if self.dim_ordering == 'th':
|
||||
if self.cropping[0][1] == self.cropping[1][1] == self.cropping[2][1] == 0:
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:,
|
||||
self.cropping[1][0]:,
|
||||
self.cropping[2][0]:]
|
||||
elif self.cropping[0][1] == self.cropping[1][1] == 0:
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:,
|
||||
self.cropping[1][0]:,
|
||||
self.cropping[2][0]:-self.cropping[2][1]]
|
||||
elif self.cropping[1][1] == self.cropping[2][1] == 0:
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:,
|
||||
self.cropping[2][0]:]
|
||||
elif self.cropping[0][1] == self.cropping[2][1] == 0:
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:,
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
self.cropping[2][0]:]
|
||||
elif self.cropping[0][1] == 0:
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:,
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
self.cropping[2][0]:-self.cropping[2][1]]
|
||||
elif self.cropping[1][1] == 0:
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:,
|
||||
self.cropping[2][0]:-self.cropping[2][1]]
|
||||
elif self.cropping[2][1] == 0:
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
self.cropping[2][0]:]
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
self.cropping[2][0]:-self.cropping[2][1]]
|
||||
elif self.dim_ordering == 'tf':
|
||||
if self.cropping[0][1] == self.cropping[1][1] == self.cropping[2][1] == 0:
|
||||
return x[:,
|
||||
self.cropping[0][0]:,
|
||||
self.cropping[1][0]:,
|
||||
self.cropping[2][0]:,
|
||||
:]
|
||||
elif self.cropping[0][1] == self.cropping[1][1] == 0:
|
||||
return x[:,
|
||||
self.cropping[0][0]:,
|
||||
self.cropping[1][0]:,
|
||||
self.cropping[2][0]:-self.cropping[2][1],
|
||||
:]
|
||||
elif self.cropping[1][1] == self.cropping[2][1] == 0:
|
||||
return x[:,
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:,
|
||||
self.cropping[2][0]:,
|
||||
:]
|
||||
elif self.cropping[0][1] == self.cropping[2][1] == 0:
|
||||
return x[:,
|
||||
self.cropping[0][0]:,
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
self.cropping[2][0]:,
|
||||
:]
|
||||
elif self.cropping[0][1] == 0:
|
||||
return x[:,
|
||||
self.cropping[0][0]:,
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
self.cropping[2][0]:-self.cropping[2][1],
|
||||
:]
|
||||
elif self.cropping[1][1] == 0:
|
||||
return x[:,
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:,
|
||||
self.cropping[2][0]:-self.cropping[2][1],
|
||||
:]
|
||||
elif self.cropping[2][1] == 0:
|
||||
return x[:,
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
self.cropping[2][0]:,
|
||||
:]
|
||||
return x[:,
|
||||
self.cropping[0][0]:-self.cropping[0][1],
|
||||
self.cropping[1][0]:-self.cropping[1][1],
|
||||
|
||||
@@ -1,14 +1,18 @@
|
||||
from .. import backend as K
|
||||
from .. import activations, initializations, regularizers
|
||||
from .. import activations
|
||||
from .. import initializations
|
||||
from .. import regularizers
|
||||
|
||||
import numpy as np
|
||||
from ..engine import Layer, InputSpec
|
||||
from ..engine import Layer
|
||||
from ..engine import InputSpec
|
||||
from ..utils.np_utils import conv_output_length
|
||||
import warnings
|
||||
|
||||
|
||||
class ConvRecurrent2D(Layer):
|
||||
'''Abstract base class for convolutional recurrent layers.
|
||||
"""Abstract base class for convolutional recurrent layers.
|
||||
|
||||
Do not use in a model -- it's not a functional layer!
|
||||
|
||||
ConvLSTM2D
|
||||
@@ -73,7 +77,7 @@ class ConvRecurrent2D(Layer):
|
||||
|
||||
To reset the states of your model, call `.reset_states()` on either
|
||||
a specific layer, or on your entire model.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, weights=None, nb_row=None, nb_col=None, nb_filter=None,
|
||||
return_sequences=False, go_backwards=False, stateful=False,
|
||||
@@ -187,7 +191,7 @@ class ConvRecurrent2D(Layer):
|
||||
|
||||
|
||||
class ConvLSTM2D(ConvRecurrent2D):
|
||||
'''Convolutional LSTM.
|
||||
"""Convolutional LSTM.
|
||||
|
||||
# Input shape
|
||||
- if dim_ordering='th'
|
||||
@@ -243,10 +247,11 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
|
||||
# References
|
||||
- [Convolutional LSTM Network: A Machine Learning Approach for
|
||||
Precipitation Nowcasting](http://arxiv.org/pdf/1506.04214v1.pdf)
|
||||
Precipitation Nowcasting](http://arxiv.org/abs/1506.04214v1)
|
||||
The current implementation does not include the feedback loop on the
|
||||
cells output
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
forget_bias_init='one', activation='tanh',
|
||||
@@ -477,7 +482,7 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
ones = K.sum(ones, axis=1)
|
||||
ones = self.conv_step(ones, K.zeros(self.W_shape),
|
||||
border_mode=self.border_mode)
|
||||
ones = ones + 1
|
||||
ones += 1
|
||||
B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones)
|
||||
for _ in range(4)]
|
||||
constants.append(B_U)
|
||||
@@ -487,7 +492,7 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
if 0 < self.dropout_W < 1:
|
||||
ones = K.zeros_like(x)
|
||||
ones = K.sum(ones, axis=1)
|
||||
ones = ones + 1
|
||||
ones += 1
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones)
|
||||
for _ in range(4)]
|
||||
constants.append(B_W)
|
||||
|
||||
+130
-79
@@ -10,15 +10,20 @@ import types as python_types
|
||||
import warnings
|
||||
|
||||
from .. import backend as K
|
||||
from .. import activations, initializations, regularizers, constraints
|
||||
from ..engine import InputSpec, Layer, Merge
|
||||
from ..regularizers import ActivityRegularizer
|
||||
from ..utils.generic_utils import func_dump, func_load
|
||||
from .. import activations
|
||||
from .. import initializations
|
||||
from .. import regularizers
|
||||
from .. import constraints
|
||||
from ..engine import InputSpec
|
||||
from ..engine import Layer
|
||||
from ..engine import Merge
|
||||
from ..utils.generic_utils import func_dump
|
||||
from ..utils.generic_utils import func_load
|
||||
from ..utils.generic_utils import get_from_module
|
||||
|
||||
|
||||
class Masking(Layer):
|
||||
'''Masks an input sequence by using a mask value to
|
||||
identify timesteps to be skipped.
|
||||
"""Masks a sequence by using a mask value to skip timesteps.
|
||||
|
||||
For each timestep in the input tensor (dimension #1 in the tensor),
|
||||
if all values in the input tensor at that timestep
|
||||
@@ -43,14 +48,15 @@ class Masking(Layer):
|
||||
model.add(Masking(mask_value=0., input_shape=(timesteps, features)))
|
||||
model.add(LSTM(32))
|
||||
```
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, mask_value=0., **kwargs):
|
||||
self.supports_masking = True
|
||||
self.mask_value = mask_value
|
||||
super(Masking, self).__init__(**kwargs)
|
||||
|
||||
def compute_mask(self, input, input_mask=None):
|
||||
return K.any(K.not_equal(input, self.mask_value), axis=-1)
|
||||
def compute_mask(self, x, input_mask=None):
|
||||
return K.any(K.not_equal(x, self.mask_value), axis=-1)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
boolean_mask = K.any(K.not_equal(x, self.mask_value),
|
||||
@@ -64,30 +70,45 @@ class Masking(Layer):
|
||||
|
||||
|
||||
class Dropout(Layer):
|
||||
'''Applies Dropout to the input. Dropout consists in randomly setting
|
||||
"""Applies Dropout to the input.
|
||||
|
||||
Dropout consists in randomly setting
|
||||
a fraction `p` of input units to 0 at each update during training time,
|
||||
which helps prevent overfitting.
|
||||
|
||||
# Arguments
|
||||
p: float between 0 and 1. Fraction of the input units to drop.
|
||||
noise_shape: 1D integer tensor representing the shape of the
|
||||
binary dropout mask that will be multiplied with the input.
|
||||
For instance, if your inputs ahve shape
|
||||
`(batch_size, timesteps, features)` and
|
||||
you want the dropout mask to be the same for all timesteps,
|
||||
you can use `noise_shape=(batch_size, 1, features)`.
|
||||
seed: A Python integer to use as random seed.
|
||||
|
||||
# References
|
||||
- [Dropout: A Simple Way to Prevent Neural Networks from Overfitting](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
|
||||
'''
|
||||
def __init__(self, p, **kwargs):
|
||||
"""
|
||||
|
||||
def __init__(self, p, noise_shape=None, seed=None, **kwargs):
|
||||
self.p = p
|
||||
self.noise_shape = noise_shape
|
||||
self.seed = seed
|
||||
if 0. < self.p < 1.:
|
||||
self.uses_learning_phase = True
|
||||
self.supports_masking = True
|
||||
super(Dropout, self).__init__(**kwargs)
|
||||
|
||||
def _get_noise_shape(self, x):
|
||||
return None
|
||||
def _get_noise_shape(self, _):
|
||||
return self.noise_shape
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if 0. < self.p < 1.:
|
||||
noise_shape = self._get_noise_shape(x)
|
||||
x = K.in_train_phase(K.dropout(x, self.p, noise_shape), x)
|
||||
|
||||
def dropped_inputs():
|
||||
return K.dropout(x, self.p, noise_shape, seed=self.seed)
|
||||
x = K.in_train_phase(dropped_inputs, lambda: x)
|
||||
return x
|
||||
|
||||
def get_config(self):
|
||||
@@ -97,7 +118,9 @@ class Dropout(Layer):
|
||||
|
||||
|
||||
class SpatialDropout1D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
"""Spatial 1D version of Dropout.
|
||||
|
||||
This version performs the same function as Dropout, however it drops
|
||||
entire 1D feature maps instead of individual elements. If adjacent frames
|
||||
within feature maps are strongly correlated (as is normally the case in
|
||||
early convolution layers) then regular dropout will not regularize the
|
||||
@@ -116,8 +139,9 @@ class SpatialDropout1D(Dropout):
|
||||
Same as input
|
||||
|
||||
# References
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/pdf/1411.4280.pdf)
|
||||
'''
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/abs/1411.4280)
|
||||
"""
|
||||
|
||||
def __init__(self, p, **kwargs):
|
||||
super(SpatialDropout1D, self).__init__(p, **kwargs)
|
||||
|
||||
@@ -128,7 +152,9 @@ class SpatialDropout1D(Dropout):
|
||||
|
||||
|
||||
class SpatialDropout2D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
"""Spatial 2D version of Dropout.
|
||||
|
||||
This version performs the same function as Dropout, however it drops
|
||||
entire 2D feature maps instead of individual elements. If adjacent pixels
|
||||
within feature maps are strongly correlated (as is normally the case in
|
||||
early convolution layers) then regular dropout will not regularize the
|
||||
@@ -154,8 +180,9 @@ class SpatialDropout2D(Dropout):
|
||||
Same as input
|
||||
|
||||
# References
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/pdf/1411.4280.pdf)
|
||||
'''
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/abs/1411.4280)
|
||||
"""
|
||||
|
||||
def __init__(self, p, dim_ordering='default', **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
@@ -175,7 +202,9 @@ class SpatialDropout2D(Dropout):
|
||||
|
||||
|
||||
class SpatialDropout3D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
"""Spatial 3D version of Dropout.
|
||||
|
||||
This version performs the same function as Dropout, however it drops
|
||||
entire 3D feature maps instead of individual elements. If adjacent voxels
|
||||
within feature maps are strongly correlated (as is normally the case in
|
||||
early convolution layers) then regular dropout will not regularize the
|
||||
@@ -202,8 +231,9 @@ class SpatialDropout3D(Dropout):
|
||||
Same as input
|
||||
|
||||
# References
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/pdf/1411.4280.pdf)
|
||||
'''
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/abs/1411.4280)
|
||||
"""
|
||||
|
||||
def __init__(self, p, dim_ordering='default', **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
@@ -223,7 +253,7 @@ class SpatialDropout3D(Dropout):
|
||||
|
||||
|
||||
class Activation(Layer):
|
||||
'''Applies an activation function to an output.
|
||||
"""Applies an activation function to an output.
|
||||
|
||||
# Arguments
|
||||
activation: name of activation function to use
|
||||
@@ -237,7 +267,8 @@ class Activation(Layer):
|
||||
|
||||
# Output shape
|
||||
Same shape as input.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, activation, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.activation = activations.get(activation)
|
||||
@@ -253,7 +284,7 @@ class Activation(Layer):
|
||||
|
||||
|
||||
class Reshape(Layer):
|
||||
'''Reshapes an output to a certain shape.
|
||||
"""Reshapes an output to a certain shape.
|
||||
|
||||
# Arguments
|
||||
target_shape: target shape. Tuple of integers,
|
||||
@@ -280,22 +311,25 @@ class Reshape(Layer):
|
||||
# as intermediate layer in a Sequential model
|
||||
model.add(Reshape((6, 2)))
|
||||
# now: model.output_shape == (None, 6, 2)
|
||||
|
||||
# also supports shape inference using `-1` as dimension
|
||||
model.add(Reshape((-1, 2, 2)))
|
||||
# now: model.output_shape == (None, 3, 2, 2)
|
||||
```
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, target_shape, **kwargs):
|
||||
super(Reshape, self).__init__(**kwargs)
|
||||
self.target_shape = tuple(target_shape)
|
||||
|
||||
def _fix_unknown_dimension(self, input_shape, output_shape):
|
||||
'''Find and replace a single missing dimension in an output shape
|
||||
given an input shape.
|
||||
"""Find and replace a missing dimension in an output shape.
|
||||
|
||||
A near direct port of the internal Numpy function
|
||||
_fix_unknown_dimension in numpy/core/src/multiarray/shape.c
|
||||
This is a near direct port of the internal Numpy function
|
||||
`_fix_unknown_dimension` in `numpy/core/src/multiarray/shape.c`
|
||||
|
||||
# Arguments
|
||||
input_shape: shape of array being reshaped
|
||||
|
||||
output_shape: desired shape of the array with at most
|
||||
a single -1 which indicates a dimension that should be
|
||||
derived from the input shape.
|
||||
@@ -306,7 +340,11 @@ class Reshape(Layer):
|
||||
Raises a ValueError if the total array size of the output_shape is
|
||||
different then the input_shape, or more then one unknown dimension
|
||||
is specified.
|
||||
'''
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid values
|
||||
for `input_shape` or `input_shape`.
|
||||
"""
|
||||
output_shape = list(output_shape)
|
||||
|
||||
msg = 'total size of new array must be unchanged'
|
||||
@@ -317,7 +355,7 @@ class Reshape(Layer):
|
||||
if unknown is None:
|
||||
unknown = index
|
||||
else:
|
||||
raise ValueError('can only specify one unknown dimension')
|
||||
raise ValueError('Can only specify one unknown dimension.')
|
||||
else:
|
||||
known *= dim
|
||||
|
||||
@@ -350,7 +388,7 @@ class Reshape(Layer):
|
||||
elif hasattr(K, 'int_shape'):
|
||||
input_shape = K.int_shape(x)
|
||||
if input_shape is not None:
|
||||
target_shape = self.get_output_shape_for(input_shape)
|
||||
target_shape = self.get_output_shape_for(input_shape)[1:]
|
||||
return K.reshape(x, (-1,) + target_shape)
|
||||
|
||||
def get_config(self):
|
||||
@@ -360,7 +398,7 @@ class Reshape(Layer):
|
||||
|
||||
|
||||
class Permute(Layer):
|
||||
'''Permutes the dimensions of the input according to a given pattern.
|
||||
"""Permutes the dimensions of the input according to a given pattern.
|
||||
|
||||
Useful for e.g. connecting RNNs and convnets together.
|
||||
|
||||
@@ -387,7 +425,8 @@ class Permute(Layer):
|
||||
# Output shape
|
||||
Same as the input shape, but with the dimensions re-ordered according
|
||||
to the specified pattern.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, dims, **kwargs):
|
||||
self.dims = tuple(dims)
|
||||
super(Permute, self).__init__(**kwargs)
|
||||
@@ -397,7 +436,7 @@ class Permute(Layer):
|
||||
output_shape = copy.copy(input_shape)
|
||||
for i, dim in enumerate(self.dims):
|
||||
target_dim = input_shape[dim]
|
||||
output_shape[i+1] = target_dim
|
||||
output_shape[i + 1] = target_dim
|
||||
return tuple(output_shape)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
@@ -410,7 +449,7 @@ class Permute(Layer):
|
||||
|
||||
|
||||
class Flatten(Layer):
|
||||
'''Flattens the input. Does not affect the batch size.
|
||||
"""Flattens the input. Does not affect the batch size.
|
||||
|
||||
# Example
|
||||
|
||||
@@ -424,7 +463,8 @@ class Flatten(Layer):
|
||||
model.add(Flatten())
|
||||
# now: model.output_shape == (None, 65536)
|
||||
```
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.input_spec = [InputSpec(ndim='3+')]
|
||||
super(Flatten, self).__init__(**kwargs)
|
||||
@@ -444,7 +484,7 @@ class Flatten(Layer):
|
||||
|
||||
|
||||
class RepeatVector(Layer):
|
||||
'''Repeats the input n times.
|
||||
"""Repeats the input n times.
|
||||
|
||||
# Example
|
||||
|
||||
@@ -466,7 +506,8 @@ class RepeatVector(Layer):
|
||||
|
||||
# Output shape
|
||||
3D tensor of shape `(nb_samples, n, features)`.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, n, **kwargs):
|
||||
self.n = n
|
||||
self.input_spec = [InputSpec(ndim=2)]
|
||||
@@ -485,8 +526,7 @@ class RepeatVector(Layer):
|
||||
|
||||
|
||||
class Lambda(Layer):
|
||||
'''Used for evaluating an arbitrary Theano / TensorFlow expression
|
||||
on the output of the previous layer.
|
||||
"""Used for evaluating an arbitrary expressions on an input.
|
||||
|
||||
# Examples
|
||||
|
||||
@@ -524,7 +564,8 @@ class Lambda(Layer):
|
||||
If a tuple, it only specifies the first dimension onward;
|
||||
sample dimension is assumed either the same as the input:
|
||||
`output_shape = (input_shape[0], ) + output_shape`
|
||||
or, the input is `None` and the sample dimension is also `None`:
|
||||
or, the input is `None` and
|
||||
the sample dimension is also `None`:
|
||||
`output_shape = (None, ) + output_shape`
|
||||
If a function, it specifies the entire shape as a function of the
|
||||
input shape: `output_shape = f(input_shape)`
|
||||
@@ -538,10 +579,11 @@ class Lambda(Layer):
|
||||
|
||||
# Output shape
|
||||
Specified by `output_shape` argument.
|
||||
'''
|
||||
def __init__(self, function, output_shape=None, arguments={}, **kwargs):
|
||||
"""
|
||||
|
||||
def __init__(self, function, output_shape=None, arguments=None, **kwargs):
|
||||
self.function = function
|
||||
self.arguments = arguments
|
||||
self.arguments = arguments if arguments else {}
|
||||
self.supports_masking = False
|
||||
|
||||
if output_shape is None:
|
||||
@@ -571,9 +613,12 @@ class Lambda(Layer):
|
||||
return K.int_shape(x)
|
||||
# Otherwise, we default to the input shape.
|
||||
warnings.warn('`output_shape` argument not specified for layer {} '
|
||||
'and cannot be automatically inferred with the Theano backend. '
|
||||
'Defaulting to output shape `{}` (same as input shape). '
|
||||
'If the expected output shape is different, specify it via the `output_shape` argument.'
|
||||
'and cannot be automatically inferred '
|
||||
'with the Theano backend. '
|
||||
'Defaulting to output shape `{}` '
|
||||
'(same as input shape). '
|
||||
'If the expected output shape is different, '
|
||||
'specify it via the `output_shape` argument.'
|
||||
.format(self.name, input_shape))
|
||||
return input_shape
|
||||
elif isinstance(self._output_shape, (tuple, list)):
|
||||
@@ -622,7 +667,7 @@ class Lambda(Layer):
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config, custom_objects={}):
|
||||
def from_config(cls, config, custom_objects=None):
|
||||
# Insert custom objects into globals.
|
||||
if custom_objects:
|
||||
globs = globals().copy()
|
||||
@@ -632,7 +677,7 @@ class Lambda(Layer):
|
||||
|
||||
function_type = config.pop('function_type')
|
||||
if function_type == 'function':
|
||||
function = globs[config['function']]
|
||||
function = get_from_module(config['function'], globs, 'core')
|
||||
elif function_type == 'lambda':
|
||||
function = func_load(config['function'], globs=globs)
|
||||
else:
|
||||
@@ -640,7 +685,7 @@ class Lambda(Layer):
|
||||
|
||||
output_shape_type = config.pop('output_shape_type')
|
||||
if output_shape_type == 'function':
|
||||
output_shape = globs[config['output_shape']]
|
||||
output_shape = get_from_module(config['output_shape'], globs, 'core')
|
||||
elif output_shape_type == 'lambda':
|
||||
output_shape = func_load(config['output_shape'], globs=globs)
|
||||
else:
|
||||
@@ -652,7 +697,7 @@ class Lambda(Layer):
|
||||
|
||||
|
||||
class Dense(Layer):
|
||||
'''Just your regular fully connected NN layer.
|
||||
"""Just your regular densely-connected NN layer.
|
||||
|
||||
# Example
|
||||
|
||||
@@ -712,7 +757,8 @@ class Dense(Layer):
|
||||
nD tensor with shape: `(nb_samples, ..., output_dim)`.
|
||||
For instance, for a 2D input with shape `(nb_samples, input_dim)`,
|
||||
the output would have shape `(nb_samples, output_dim)`.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, output_dim, init='glorot_uniform',
|
||||
activation=None, weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
@@ -793,8 +839,7 @@ class Dense(Layer):
|
||||
|
||||
|
||||
class ActivityRegularization(Layer):
|
||||
'''Layer that passes through its input unchanged, but applies an update
|
||||
to the cost function based on the activity.
|
||||
"""Layer that applies an update to the cost function based input activity.
|
||||
|
||||
# Arguments
|
||||
l1: L1 regularization factor (positive float).
|
||||
@@ -807,7 +852,8 @@ class ActivityRegularization(Layer):
|
||||
|
||||
# Output shape
|
||||
Same shape as input.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, l1=0., l2=0., **kwargs):
|
||||
self.supports_masking = True
|
||||
self.l1 = l1
|
||||
@@ -825,7 +871,7 @@ class ActivityRegularization(Layer):
|
||||
|
||||
|
||||
class MaxoutDense(Layer):
|
||||
'''A dense maxout layer.
|
||||
"""A dense maxout layer.
|
||||
|
||||
A `MaxoutDense` layer takes the element-wise maximum of
|
||||
`nb_feature` `Dense(input_dim, output_dim)` linear layers.
|
||||
@@ -871,8 +917,9 @@ class MaxoutDense(Layer):
|
||||
2D tensor with shape: `(nb_samples, output_dim)`.
|
||||
|
||||
# References
|
||||
- [Maxout Networks](http://arxiv.org/pdf/1302.4389.pdf)
|
||||
'''
|
||||
- [Maxout Networks](http://arxiv.org/abs/1302.4389)
|
||||
"""
|
||||
|
||||
def __init__(self, output_dim,
|
||||
nb_feature=4,
|
||||
init='glorot_uniform',
|
||||
@@ -957,8 +1004,9 @@ class MaxoutDense(Layer):
|
||||
|
||||
|
||||
class Highway(Layer):
|
||||
'''Densely connected highway network,
|
||||
a natural extension of LSTMs to feedforward networks.
|
||||
"""Densely connected highway network.
|
||||
|
||||
Highway layers are a natural extension of LSTMs to feedforward networks.
|
||||
|
||||
# Arguments
|
||||
init: name of initialization function for the weights of the layer
|
||||
@@ -966,7 +1014,6 @@ class Highway(Layer):
|
||||
or alternatively, Theano function to use for weights
|
||||
initialization. This parameter is only relevant
|
||||
if you don't pass a `weights` argument.
|
||||
transform_bias: value for the bias to take on initially (default -2)
|
||||
activation: name of activation function to use
|
||||
(see [activations](../activations.md)),
|
||||
or alternatively, elementwise Theano function.
|
||||
@@ -998,11 +1045,11 @@ class Highway(Layer):
|
||||
2D tensor with shape: `(nb_samples, input_dim)`.
|
||||
|
||||
# References
|
||||
- [Highway Networks](http://arxiv.org/pdf/1505.00387v2.pdf)
|
||||
'''
|
||||
- [Highway Networks](http://arxiv.org/abs/1505.00387v2)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
init='glorot_uniform',
|
||||
transform_bias=-2,
|
||||
activation=None,
|
||||
weights=None,
|
||||
W_regularizer=None,
|
||||
@@ -1013,8 +1060,11 @@ class Highway(Layer):
|
||||
bias=True,
|
||||
input_dim=None,
|
||||
**kwargs):
|
||||
if 'transform_bias' in kwargs:
|
||||
kwargs.pop('transform_bias')
|
||||
warnings.warn('`transform_bias` argument is deprecated and '
|
||||
'will be removed after 5/2017.')
|
||||
self.init = initializations.get(init)
|
||||
self.transform_bias = transform_bias
|
||||
self.activation = activations.get(activation)
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
@@ -1078,7 +1128,6 @@ class Highway(Layer):
|
||||
|
||||
def get_config(self):
|
||||
config = {'init': self.init.__name__,
|
||||
'transform_bias': self.transform_bias,
|
||||
'activation': self.activation.__name__,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
@@ -1092,7 +1141,8 @@ class Highway(Layer):
|
||||
|
||||
|
||||
class TimeDistributedDense(Layer):
|
||||
'''Apply a same Dense layer for each dimension[1] (time_dimension) input.
|
||||
"""Apply a same Dense layer for each dimension[1] (time_dimension) input.
|
||||
|
||||
Especially useful after a recurrent network with 'return_sequence=True'.
|
||||
|
||||
Note: this layer is deprecated, prefer using the `TimeDistributed` wrapper:
|
||||
@@ -1138,7 +1188,7 @@ class TimeDistributedDense(Layer):
|
||||
is required when using this layer as the first layer in a model.
|
||||
input_length: length of inputs sequences
|
||||
(integer, or None for variable-length sequences).
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform',
|
||||
@@ -1213,12 +1263,13 @@ class TimeDistributedDense(Layer):
|
||||
if hasattr(K, 'int_shape'):
|
||||
input_length = K.int_shape(x)[1]
|
||||
if not input_length:
|
||||
raise ValueError(
|
||||
'Layer ' + self.name +
|
||||
' requires to know the length of its input, '
|
||||
'but it could not be inferred automatically. '
|
||||
'Specify it manually by passing an input_shape '
|
||||
'argument to the first layer in your model.')
|
||||
raise ValueError('Layer ' + self.name +
|
||||
' requires to know the length '
|
||||
'of its input, but it could not '
|
||||
'be inferred automatically. '
|
||||
'Specify it manually by passing '
|
||||
'an input_shape argument to '
|
||||
'the first layer in your model.')
|
||||
else:
|
||||
input_length = K.shape(x)[1]
|
||||
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .. import backend as K
|
||||
from .. import initializations, regularizers, constraints
|
||||
from .. import initializations
|
||||
from .. import regularizers
|
||||
from .. import constraints
|
||||
from ..engine import Layer
|
||||
|
||||
|
||||
class Embedding(Layer):
|
||||
'''Turn positive integers (indexes) into dense vectors of fixed size.
|
||||
"""Turn positive integers (indexes) into dense vectors of fixed size.
|
||||
eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]
|
||||
|
||||
This layer can only be used as the first layer in a model.
|
||||
@@ -62,8 +64,7 @@ class Embedding(Layer):
|
||||
|
||||
# References
|
||||
- [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
|
||||
'''
|
||||
input_ndim = 2
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, output_dim,
|
||||
init='uniform', input_length=None,
|
||||
|
||||
+18
-8
@@ -1,14 +1,20 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
|
||||
from keras import backend as K
|
||||
from keras.layers import activations, initializations, regularizers, constraints
|
||||
from keras.engine import Layer, InputSpec
|
||||
from .. import backend as K
|
||||
from .. import activations
|
||||
from .. import initializations
|
||||
from .. import regularizers
|
||||
from .. import constraints
|
||||
from ..engine import Layer
|
||||
from ..engine import InputSpec
|
||||
from ..utils.np_utils import conv_output_length
|
||||
|
||||
|
||||
class LocallyConnected1D(Layer):
|
||||
'''The `LocallyConnected1D` layer works similarly to
|
||||
"""Locally-connected layer for 1D inputs.
|
||||
|
||||
The `LocallyConnected1D` layer works similarly to
|
||||
the `Convolution1D` layer, except that weights are unshared,
|
||||
that is, a different set of filters is applied at each different patch
|
||||
of the input.
|
||||
@@ -73,7 +79,8 @@ class LocallyConnected1D(Layer):
|
||||
# Output shape
|
||||
3D tensor with shape: `(samples, new_steps, nb_filter)`.
|
||||
`steps` value might have changed due to padding.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, filter_length,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample_length=1,
|
||||
@@ -178,7 +185,9 @@ class LocallyConnected1D(Layer):
|
||||
|
||||
|
||||
class LocallyConnected2D(Layer):
|
||||
'''The `LocallyConnected2D` layer works similarly
|
||||
"""Locally-connected layer for 2D inputs.
|
||||
|
||||
The `LocallyConnected2D` layer works similarly
|
||||
to the `Convolution2D` layer, except that weights are unshared,
|
||||
that is, a different set of filters is applied at each
|
||||
different patch of the input.
|
||||
@@ -247,7 +256,8 @@ class LocallyConnected2D(Layer):
|
||||
or 4D tensor with shape:
|
||||
`(samples, new_rows, new_cols, nb_filter)` if dim_ordering='tf'.
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation=None, weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
@@ -344,7 +354,7 @@ class LocallyConnected2D(Layer):
|
||||
_, feature_dim, nb_filter = self.W_shape
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
if K._backend == 'theano':
|
||||
if K.backend() == 'theano':
|
||||
output = []
|
||||
for i in range(self.output_row):
|
||||
for j in range(self.output_col):
|
||||
|
||||
+12
-8
@@ -5,9 +5,10 @@ import numpy as np
|
||||
|
||||
|
||||
class GaussianNoise(Layer):
|
||||
'''Apply to the input an additive zero-centered Gaussian noise with
|
||||
standard deviation `sigma`. This is useful to mitigate overfitting
|
||||
(you could see it as a kind of random data augmentation).
|
||||
"""Apply additive zero-centered Gaussian noise.
|
||||
|
||||
This is useful to mitigate overfitting
|
||||
(you could see it as a form of random data augmentation).
|
||||
Gaussian Noise (GS) is a natural choice as corruption process
|
||||
for real valued inputs.
|
||||
|
||||
@@ -23,7 +24,8 @@ class GaussianNoise(Layer):
|
||||
|
||||
# Output shape
|
||||
Same shape as input.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, sigma, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.sigma = sigma
|
||||
@@ -43,13 +45,14 @@ class GaussianNoise(Layer):
|
||||
|
||||
|
||||
class GaussianDropout(Layer):
|
||||
'''Apply to the input an multiplicative one-centered Gaussian noise
|
||||
with standard deviation `sqrt(p/(1-p))`.
|
||||
"""Apply multiplicative 1-centered Gaussian noise.
|
||||
|
||||
As it is a regularization layer, it is only active at training time.
|
||||
|
||||
# Arguments
|
||||
p: float, drop probability (as with `Dropout`).
|
||||
The multiplicative noise will have
|
||||
standard deviation `sqrt(p / (1 - p))`.
|
||||
|
||||
# Input shape
|
||||
Arbitrary. Use the keyword argument `input_shape`
|
||||
@@ -60,8 +63,9 @@ class GaussianDropout(Layer):
|
||||
Same shape as input.
|
||||
|
||||
# References
|
||||
[Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
|
||||
'''
|
||||
- [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
|
||||
"""
|
||||
|
||||
def __init__(self, p, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.p = p
|
||||
|
||||
@@ -4,7 +4,9 @@ from .. import backend as K
|
||||
|
||||
|
||||
class BatchNormalization(Layer):
|
||||
'''Normalize the activations of the previous layer at each batch,
|
||||
"""Batch normalization layer (Ioffe and Szegedy, 2014).
|
||||
|
||||
Normalize the activations of the previous layer at each batch,
|
||||
i.e. applies a transformation that maintains the mean activation
|
||||
close to 0 and the activation standard deviation close to 1.
|
||||
|
||||
@@ -59,8 +61,9 @@ class BatchNormalization(Layer):
|
||||
Same shape as input.
|
||||
|
||||
# References
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](http://jmlr.org/proceedings/papers/v37/ioffe15.pdf)
|
||||
'''
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](https://arxiv.org/abs/1502.03167)
|
||||
"""
|
||||
|
||||
def __init__(self, epsilon=1e-3, mode=0, axis=-1, momentum=0.99,
|
||||
weights=None, beta_init='zero', gamma_init='one',
|
||||
gamma_regularizer=None, beta_regularizer=None, **kwargs):
|
||||
|
||||
+39
-33
@@ -2,14 +2,14 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .. import backend as K
|
||||
from ..engine import Layer, InputSpec
|
||||
from ..engine import Layer
|
||||
from ..engine import InputSpec
|
||||
from ..utils.np_utils import conv_output_length
|
||||
|
||||
|
||||
class _Pooling1D(Layer):
|
||||
'''Abstract class for different pooling 1D layers.
|
||||
'''
|
||||
input_dim = 3
|
||||
"""Abstract class for different pooling 1D layers.
|
||||
"""
|
||||
|
||||
def __init__(self, pool_length=2, stride=None,
|
||||
border_mode='valid', **kwargs):
|
||||
@@ -30,7 +30,7 @@ class _Pooling1D(Layer):
|
||||
self.border_mode, self.stride)
|
||||
return (input_shape[0], length, input_shape[2])
|
||||
|
||||
def _pooling_function(self, back_end, inputs, pool_size, strides,
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -51,7 +51,7 @@ class _Pooling1D(Layer):
|
||||
|
||||
|
||||
class MaxPooling1D(_Pooling1D):
|
||||
'''Max pooling operation for temporal data.
|
||||
"""Max pooling operation for temporal data.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape: `(samples, steps, features)`.
|
||||
@@ -65,7 +65,7 @@ class MaxPooling1D(_Pooling1D):
|
||||
2 will halve the input.
|
||||
If None, it will default to `pool_length`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, pool_length=2, stride=None,
|
||||
border_mode='valid', **kwargs):
|
||||
@@ -80,7 +80,7 @@ class MaxPooling1D(_Pooling1D):
|
||||
|
||||
|
||||
class AveragePooling1D(_Pooling1D):
|
||||
'''Average pooling for temporal data.
|
||||
"""Average pooling for temporal data.
|
||||
|
||||
# Arguments
|
||||
pool_length: factor by which to downscale. 2 will halve the input.
|
||||
@@ -93,7 +93,7 @@ class AveragePooling1D(_Pooling1D):
|
||||
|
||||
# Output shape
|
||||
3D tensor with shape: `(samples, downsampled_steps, features)`.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, pool_length=2, stride=None,
|
||||
border_mode='valid', **kwargs):
|
||||
@@ -108,8 +108,8 @@ class AveragePooling1D(_Pooling1D):
|
||||
|
||||
|
||||
class _Pooling2D(Layer):
|
||||
'''Abstract class for different pooling 2D layers.
|
||||
'''
|
||||
"""Abstract class for different pooling 2D layers.
|
||||
"""
|
||||
|
||||
def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -170,7 +170,7 @@ class _Pooling2D(Layer):
|
||||
|
||||
|
||||
class MaxPooling2D(_Pooling2D):
|
||||
'''Max pooling operation for spatial data.
|
||||
"""Max pooling operation for spatial data.
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 2 integers,
|
||||
@@ -196,7 +196,7 @@ class MaxPooling2D(_Pooling2D):
|
||||
`(nb_samples, channels, pooled_rows, pooled_cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, pooled_rows, pooled_cols, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -212,7 +212,7 @@ class MaxPooling2D(_Pooling2D):
|
||||
|
||||
|
||||
class AveragePooling2D(_Pooling2D):
|
||||
'''Average pooling operation for spatial data.
|
||||
"""Average pooling operation for spatial data.
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 2 integers,
|
||||
@@ -238,7 +238,7 @@ class AveragePooling2D(_Pooling2D):
|
||||
`(nb_samples, channels, pooled_rows, pooled_cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, pooled_rows, pooled_cols, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -253,8 +253,8 @@ class AveragePooling2D(_Pooling2D):
|
||||
|
||||
|
||||
class _Pooling3D(Layer):
|
||||
'''Abstract class for different pooling 3D layers.
|
||||
'''
|
||||
"""Abstract class for different pooling 3D layers.
|
||||
"""
|
||||
|
||||
def __init__(self, pool_size=(2, 2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -321,7 +321,7 @@ class _Pooling3D(Layer):
|
||||
|
||||
|
||||
class MaxPooling3D(_Pooling3D):
|
||||
'''Max pooling operation for 3D data (spatial or spatio-temporal).
|
||||
"""Max pooling operation for 3D data (spatial or spatio-temporal).
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 3 integers,
|
||||
@@ -346,7 +346,7 @@ class MaxPooling3D(_Pooling3D):
|
||||
`(nb_samples, channels, pooled_dim1, pooled_dim2, pooled_dim3)` if dim_ordering='th'
|
||||
or 5D tensor with shape:
|
||||
`(samples, pooled_dim1, pooled_dim2, pooled_dim3, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, pool_size=(2, 2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -361,7 +361,7 @@ class MaxPooling3D(_Pooling3D):
|
||||
|
||||
|
||||
class AveragePooling3D(_Pooling3D):
|
||||
'''Average pooling operation for 3D data (spatial or spatio-temporal).
|
||||
"""Average pooling operation for 3D data (spatial or spatio-temporal).
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 3 integers,
|
||||
@@ -386,7 +386,7 @@ class AveragePooling3D(_Pooling3D):
|
||||
`(nb_samples, channels, pooled_dim1, pooled_dim2, pooled_dim3)` if dim_ordering='th'
|
||||
or 5D tensor with shape:
|
||||
`(samples, pooled_dim1, pooled_dim2, pooled_dim3, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, pool_size=(2, 2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
@@ -402,6 +402,8 @@ class AveragePooling3D(_Pooling3D):
|
||||
|
||||
|
||||
class _GlobalPooling1D(Layer):
|
||||
"""Abstract class for different global pooling 1D layers.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(_GlobalPooling1D, self).__init__(**kwargs)
|
||||
@@ -415,34 +417,36 @@ class _GlobalPooling1D(Layer):
|
||||
|
||||
|
||||
class GlobalAveragePooling1D(_GlobalPooling1D):
|
||||
'''Global average pooling operation for temporal data.
|
||||
"""Global average pooling operation for temporal data.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape: `(samples, steps, features)`.
|
||||
|
||||
# Output shape
|
||||
2D tensor with shape: `(samples, features)`.
|
||||
'''
|
||||
"""
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.mean(x, axis=1)
|
||||
|
||||
|
||||
class GlobalMaxPooling1D(_GlobalPooling1D):
|
||||
'''Global max pooling operation for temporal data.
|
||||
"""Global max pooling operation for temporal data.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape: `(samples, steps, features)`.
|
||||
|
||||
# Output shape
|
||||
2D tensor with shape: `(samples, features)`.
|
||||
'''
|
||||
"""
|
||||
|
||||
def call(self, x, mask=None):
|
||||
return K.max(x, axis=1)
|
||||
|
||||
|
||||
class _GlobalPooling2D(Layer):
|
||||
"""Abstract class for different global pooling 2D layers.
|
||||
"""
|
||||
|
||||
def __init__(self, dim_ordering='default', **kwargs):
|
||||
super(_GlobalPooling2D, self).__init__(**kwargs)
|
||||
@@ -467,7 +471,7 @@ class _GlobalPooling2D(Layer):
|
||||
|
||||
|
||||
class GlobalAveragePooling2D(_GlobalPooling2D):
|
||||
'''Global average pooling operation for spatial data.
|
||||
"""Global average pooling operation for spatial data.
|
||||
|
||||
# Arguments
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
@@ -485,7 +489,7 @@ class GlobalAveragePooling2D(_GlobalPooling2D):
|
||||
# Output shape
|
||||
2D tensor with shape:
|
||||
`(nb_samples, channels)`
|
||||
'''
|
||||
"""
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if self.dim_ordering == 'tf':
|
||||
@@ -495,7 +499,7 @@ class GlobalAveragePooling2D(_GlobalPooling2D):
|
||||
|
||||
|
||||
class GlobalMaxPooling2D(_GlobalPooling2D):
|
||||
'''Global max pooling operation for spatial data.
|
||||
"""Global max pooling operation for spatial data.
|
||||
|
||||
# Arguments
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
@@ -513,7 +517,7 @@ class GlobalMaxPooling2D(_GlobalPooling2D):
|
||||
# Output shape
|
||||
2D tensor with shape:
|
||||
`(nb_samples, channels)`
|
||||
'''
|
||||
"""
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if self.dim_ordering == 'tf':
|
||||
@@ -523,6 +527,8 @@ class GlobalMaxPooling2D(_GlobalPooling2D):
|
||||
|
||||
|
||||
class _GlobalPooling3D(Layer):
|
||||
"""Abstract class for different global pooling 3D layers.
|
||||
"""
|
||||
|
||||
def __init__(self, dim_ordering='default', **kwargs):
|
||||
super(_GlobalPooling3D, self).__init__(**kwargs)
|
||||
@@ -547,7 +553,7 @@ class _GlobalPooling3D(Layer):
|
||||
|
||||
|
||||
class GlobalAveragePooling3D(_GlobalPooling3D):
|
||||
'''Global Average pooling operation for 3D data.
|
||||
"""Global Average pooling operation for 3D data.
|
||||
|
||||
# Arguments
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
@@ -565,7 +571,7 @@ class GlobalAveragePooling3D(_GlobalPooling3D):
|
||||
# Output shape
|
||||
2D tensor with shape:
|
||||
`(nb_samples, channels)`
|
||||
'''
|
||||
"""
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if self.dim_ordering == 'tf':
|
||||
@@ -575,7 +581,7 @@ class GlobalAveragePooling3D(_GlobalPooling3D):
|
||||
|
||||
|
||||
class GlobalMaxPooling3D(_GlobalPooling3D):
|
||||
'''Global Max pooling operation for 3D data.
|
||||
"""Global Max pooling operation for 3D data.
|
||||
|
||||
# Arguments
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
@@ -593,7 +599,7 @@ class GlobalMaxPooling3D(_GlobalPooling3D):
|
||||
# Output shape
|
||||
2D tensor with shape:
|
||||
`(nb_samples, channels)`
|
||||
'''
|
||||
"""
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if self.dim_ordering == 'tf':
|
||||
|
||||
+43
-22
@@ -3,14 +3,30 @@ from __future__ import absolute_import
|
||||
import numpy as np
|
||||
|
||||
from .. import backend as K
|
||||
from .. import activations, initializations, regularizers
|
||||
from ..engine import Layer, InputSpec
|
||||
from .. import activations
|
||||
from .. import initializations
|
||||
from .. import regularizers
|
||||
from ..engine import Layer
|
||||
from ..engine import InputSpec
|
||||
|
||||
|
||||
def time_distributed_dense(x, w, b=None, dropout=None,
|
||||
input_dim=None, output_dim=None, timesteps=None):
|
||||
'''Apply y.w + b for every temporal slice y of x.
|
||||
'''
|
||||
"""Apply `y . w + b` for every temporal slice y of x.
|
||||
|
||||
# Arguments
|
||||
x: input tensor.
|
||||
w: weight matrix.
|
||||
b: optional bias vector.
|
||||
dropout: wether to apply dropout (same dropout mask
|
||||
for every temporal slice of the input).
|
||||
input_dim: integer; optional dimensionality of the input.
|
||||
output_dim: integer; optional dimensionality of the output.
|
||||
timesteps: integer; optional number of timesteps.
|
||||
|
||||
# Returns
|
||||
Output tensor.
|
||||
"""
|
||||
if not input_dim:
|
||||
input_dim = K.shape(x)[2]
|
||||
if not timesteps:
|
||||
@@ -29,7 +45,7 @@ def time_distributed_dense(x, w, b=None, dropout=None,
|
||||
x = K.reshape(x, (-1, input_dim))
|
||||
x = K.dot(x, w)
|
||||
if b:
|
||||
x = x + b
|
||||
x += b
|
||||
# reshape to 3D tensor
|
||||
if K.backend() == 'tensorflow':
|
||||
x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
|
||||
@@ -40,7 +56,7 @@ def time_distributed_dense(x, w, b=None, dropout=None,
|
||||
|
||||
|
||||
class Recurrent(Layer):
|
||||
'''Abstract base class for recurrent layers.
|
||||
"""Abstract base class for recurrent layers.
|
||||
Do not use in a model -- it's not a valid layer!
|
||||
Use its children classes `LSTM`, `GRU` and `SimpleRNN` instead.
|
||||
|
||||
@@ -128,23 +144,24 @@ class Recurrent(Layer):
|
||||
# Note on using statefulness in RNNs
|
||||
You can set RNN layers to be 'stateful', which means that the states
|
||||
computed for the samples in one batch will be reused as initial states
|
||||
for the samples in the next batch.
|
||||
This assumes a one-to-one mapping between
|
||||
samples in different successive batches.
|
||||
for the samples in the next batch. This assumes a one-to-one mapping
|
||||
between samples in different successive batches.
|
||||
|
||||
To enable statefulness:
|
||||
- specify `stateful=True` in the layer constructor.
|
||||
- specify a fixed batch size for your model, by passing
|
||||
if sequential model:
|
||||
a `batch_input_shape=(...)` to the first layer in your model.
|
||||
`batch_input_shape=(...)` to the first layer in your model.
|
||||
else for functional model with 1 or more Input layers:
|
||||
a `batch_shape=(...)` to all the first layers in your model.
|
||||
`batch_shape=(...)` to all the first layers in your model.
|
||||
This is the expected shape of your inputs *including the batch size*.
|
||||
It should be a tuple of integers, e.g. `(32, 10, 100)`.
|
||||
- specify `shuffle=False` when calling fit().
|
||||
|
||||
To reset the states of your model, call `.reset_states()` on either
|
||||
a specific layer, or on your entire model.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, weights=None,
|
||||
return_sequences=False, go_backwards=False, stateful=False,
|
||||
unroll=False, consume_less='cpu',
|
||||
@@ -253,7 +270,7 @@ class Recurrent(Layer):
|
||||
|
||||
|
||||
class SimpleRNN(Recurrent):
|
||||
'''Fully-connected RNN where the output is to be fed back to input.
|
||||
"""Fully-connected RNN where the output is to be fed back to input.
|
||||
|
||||
# Arguments
|
||||
output_dim: dimension of the internal projections and the final output.
|
||||
@@ -275,7 +292,8 @@ class SimpleRNN(Recurrent):
|
||||
|
||||
# References
|
||||
- [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
activation='tanh',
|
||||
@@ -402,7 +420,7 @@ class SimpleRNN(Recurrent):
|
||||
|
||||
|
||||
class GRU(Recurrent):
|
||||
'''Gated Recurrent Unit - Cho et al. 2014.
|
||||
"""Gated Recurrent Unit - Cho et al. 2014.
|
||||
|
||||
# Arguments
|
||||
output_dim: dimension of the internal projections and the final output.
|
||||
@@ -424,10 +442,11 @@ class GRU(Recurrent):
|
||||
dropout_U: float between 0 and 1. Fraction of the input units to drop for recurrent connections.
|
||||
|
||||
# References
|
||||
- [On the Properties of Neural Machine Translation: Encoder-Decoder Approaches](http://www.aclweb.org/anthology/W14-4012)
|
||||
- [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling](http://arxiv.org/pdf/1412.3555v1.pdf)
|
||||
- [On the Properties of Neural Machine Translation: Encoder-Decoder Approaches](https://arxiv.org/abs/1409.1259)
|
||||
- [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling](http://arxiv.org/abs/1412.3555v1)
|
||||
- [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
activation='tanh', inner_activation='hard_sigmoid',
|
||||
@@ -521,8 +540,9 @@ class GRU(Recurrent):
|
||||
assert self.stateful, 'Layer must be stateful.'
|
||||
input_shape = self.input_spec[0].shape
|
||||
if not input_shape[0]:
|
||||
raise ValueError('If a RNN is stateful, a complete ' +
|
||||
'input_shape must be provided (including batch size).')
|
||||
raise ValueError('If a RNN is stateful, a complete '
|
||||
'input_shape must be provided '
|
||||
'(including batch size).')
|
||||
if hasattr(self, 'states'):
|
||||
K.set_value(self.states[0],
|
||||
np.zeros((input_shape[0], self.output_dim)))
|
||||
@@ -621,7 +641,7 @@ class GRU(Recurrent):
|
||||
|
||||
|
||||
class LSTM(Recurrent):
|
||||
'''Long-Short Term Memory unit - Hochreiter 1997.
|
||||
"""Long-Short Term Memory unit - Hochreiter 1997.
|
||||
|
||||
For a step-by-step description of the algorithm, see
|
||||
[this tutorial](http://deeplearning.net/tutorial/lstm.html).
|
||||
@@ -653,7 +673,8 @@ class LSTM(Recurrent):
|
||||
- [Learning to forget: Continual prediction with LSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015)
|
||||
- [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf)
|
||||
- [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
forget_bias_init='one', activation='tanh',
|
||||
|
||||
+55
-54
@@ -1,8 +1,12 @@
|
||||
from ..engine import Layer, InputSpec
|
||||
import copy
|
||||
from ..engine import Layer
|
||||
from ..engine import InputSpec
|
||||
from .. import backend as K
|
||||
|
||||
|
||||
class Wrapper(Layer):
|
||||
"""Abstract wrapper base class.
|
||||
"""
|
||||
|
||||
def __init__(self, layer, **kwargs):
|
||||
self.layer = layer
|
||||
@@ -10,23 +14,14 @@ class Wrapper(Layer):
|
||||
super(Wrapper, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape=None):
|
||||
'''Assumes that self.layer is already set.
|
||||
Should be called at the end of .build() in the
|
||||
children classes.
|
||||
'''
|
||||
# Assumes that self.layer is already set.
|
||||
# Should be called at the end of .build() in the children classes.
|
||||
self.trainable_weights = getattr(self.layer, 'trainable_weights', [])
|
||||
self.non_trainable_weights = getattr(self.layer, 'non_trainable_weights', [])
|
||||
self.updates = getattr(self.layer, 'updates', [])
|
||||
self.losses = getattr(self.layer, 'losses', [])
|
||||
self.constraints = getattr(self.layer, 'constraints', {})
|
||||
|
||||
# properly attribute the current layer to
|
||||
# regularizers that need access to it
|
||||
# (e.g. ActivityRegularizer).
|
||||
for regularizer in self.regularizers:
|
||||
if hasattr(regularizer, 'set_layer'):
|
||||
regularizer.set_layer(self)
|
||||
|
||||
def get_weights(self):
|
||||
weights = self.layer.get_weights()
|
||||
return weights
|
||||
@@ -48,18 +43,19 @@ class Wrapper(Layer):
|
||||
|
||||
|
||||
class TimeDistributed(Wrapper):
|
||||
"""This wrapper allows to apply a layer to every
|
||||
temporal slice of an input.
|
||||
"""This wrapper allows to apply a layer to every temporal slice of an input.
|
||||
|
||||
The input should be at least 3D,
|
||||
and the dimension of index one will be considered to be
|
||||
the temporal dimension.
|
||||
The input should be at least 3D, and the dimension of index one
|
||||
will be considered to be the temporal dimension.
|
||||
|
||||
Consider a batch of 32 samples, where each sample is a sequence of 10
|
||||
vectors of 16 dimensions. The batch input shape of the layer is then `(32, 10, 16)`
|
||||
(and the `input_shape`, not including the samples dimension, is `(10, 16)`).
|
||||
Consider a batch of 32 samples,
|
||||
where each sample is a sequence of 10 vectors of 16 dimensions.
|
||||
The batch input shape of the layer is then `(32, 10, 16)`,
|
||||
and the `input_shape`, not including the samples dimension, is `(10, 16)`.
|
||||
|
||||
You can then use `TimeDistributed` to apply a `Dense` layer
|
||||
to each of the 10 timesteps, independently:
|
||||
|
||||
You can then use `TimeDistributed` to apply a `Dense` layer to each of the 10 timesteps, independently:
|
||||
```python
|
||||
# as the first layer in a model
|
||||
model = Sequential()
|
||||
@@ -73,19 +69,19 @@ class TimeDistributed(Wrapper):
|
||||
|
||||
The output will then have shape `(32, 10, 8)`.
|
||||
|
||||
Note this is strictly equivalent to using `layers.core.TimeDistributedDense`.
|
||||
However what is different about `TimeDistributed`
|
||||
is that it can be used with arbitrary layers, not just `Dense`,
|
||||
`TimeDistributed` can be used with arbitrary layers, not just `Dense`,
|
||||
for instance with a `Convolution2D` layer:
|
||||
|
||||
```python
|
||||
model = Sequential()
|
||||
model.add(TimeDistributed(Convolution2D(64, 3, 3), input_shape=(10, 3, 299, 299)))
|
||||
model.add(TimeDistributed(Convolution2D(64, 3, 3),
|
||||
input_shape=(10, 3, 299, 299)))
|
||||
```
|
||||
|
||||
# Arguments
|
||||
layer: a layer instance.
|
||||
"""
|
||||
|
||||
def __init__(self, layer, **kwargs):
|
||||
self.supports_masking = True
|
||||
super(TimeDistributed, self).__init__(layer, **kwargs)
|
||||
@@ -105,15 +101,15 @@ class TimeDistributed(Wrapper):
|
||||
timesteps = input_shape[1]
|
||||
return (child_output_shape[0], timesteps) + child_output_shape[1:]
|
||||
|
||||
def call(self, X, mask=None):
|
||||
input_shape = K.int_shape(X)
|
||||
def call(self, inputs, mask=None):
|
||||
input_shape = K.int_shape(inputs)
|
||||
if input_shape[0]:
|
||||
# batch size matters, use rnn-based implementation
|
||||
def step(x, states):
|
||||
def step(x, _):
|
||||
output = self.layer.call(x)
|
||||
return output, []
|
||||
|
||||
_, outputs, _ = K.rnn(step, X,
|
||||
_, outputs, _ = K.rnn(step, inputs,
|
||||
initial_states=[],
|
||||
input_length=input_shape[1],
|
||||
unroll=False)
|
||||
@@ -124,24 +120,26 @@ class TimeDistributed(Wrapper):
|
||||
# we can go with reshape-based implementation for performance
|
||||
input_length = input_shape[1]
|
||||
if not input_length:
|
||||
input_length = K.shape(X)[1]
|
||||
X = K.reshape(X, (-1,) + input_shape[2:]) # (nb_samples * timesteps, ...)
|
||||
y = self.layer.call(X) # (nb_samples * timesteps, ...)
|
||||
input_length = K.shape(inputs)[1]
|
||||
# (nb_samples * timesteps, ...)
|
||||
inputs = K.reshape(inputs, (-1,) + input_shape[2:])
|
||||
y = self.layer.call(inputs) # (nb_samples * timesteps, ...)
|
||||
# (nb_samples, timesteps, ...)
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
y = K.reshape(y, (-1, input_length) + output_shape[2:])
|
||||
|
||||
# Apply activity regularizer if any:
|
||||
if hasattr(self.layer, 'activity_regularizer') and self.layer.activity_regularizer is not None:
|
||||
if (hasattr(self.layer, 'activity_regularizer') and
|
||||
self.layer.activity_regularizer is not None):
|
||||
regularization_loss = self.layer.activity_regularizer(y)
|
||||
self.add_loss(regularization_loss, X)
|
||||
self.add_loss(regularization_loss, inputs)
|
||||
return y
|
||||
|
||||
|
||||
class Bidirectional(Wrapper):
|
||||
''' Bidirectional wrapper for RNNs.
|
||||
"""Bidirectional wrapper for RNNs.
|
||||
|
||||
# Arguments:
|
||||
# Arguments
|
||||
layer: `Recurrent` instance.
|
||||
merge_mode: Mode by which outputs of the
|
||||
forward and backward RNNs will be combined.
|
||||
@@ -149,7 +147,7 @@ class Bidirectional(Wrapper):
|
||||
If None, the outputs will not be combined,
|
||||
they will be returned as a list.
|
||||
|
||||
# Examples:
|
||||
# Examples
|
||||
|
||||
```python
|
||||
model = Sequential()
|
||||
@@ -159,13 +157,14 @@ class Bidirectional(Wrapper):
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
```
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, layer, merge_mode='concat', weights=None, **kwargs):
|
||||
if merge_mode not in ['sum', 'mul', 'ave', 'concat', None]:
|
||||
raise ValueError('Invalid merge mode. '
|
||||
'Merge mode should be one of '
|
||||
'{"sum", "mul", "ave", "concat", None}')
|
||||
self.forward_layer = layer
|
||||
self.forward_layer = copy.copy(layer)
|
||||
config = layer.get_config()
|
||||
config['go_backwards'] = not config['go_backwards']
|
||||
self.backward_layer = layer.__class__.from_config(config)
|
||||
@@ -199,21 +198,21 @@ class Bidirectional(Wrapper):
|
||||
elif self.merge_mode is None:
|
||||
return [self.forward_layer.get_output_shape_for(input_shape)] * 2
|
||||
|
||||
def call(self, X, mask=None):
|
||||
Y = self.forward_layer.call(X, mask)
|
||||
Y_rev = self.backward_layer.call(X, mask)
|
||||
def call(self, inputs, mask=None):
|
||||
y = self.forward_layer.call(inputs, mask)
|
||||
y_rev = self.backward_layer.call(inputs, mask)
|
||||
if self.return_sequences:
|
||||
Y_rev = K.reverse(Y_rev, 1)
|
||||
y_rev = K.reverse(y_rev, 1)
|
||||
if self.merge_mode == 'concat':
|
||||
return K.concatenate([Y, Y_rev])
|
||||
return K.concatenate([y, y_rev])
|
||||
elif self.merge_mode == 'sum':
|
||||
return Y + Y_rev
|
||||
return y + y_rev
|
||||
elif self.merge_mode == 'ave':
|
||||
return (Y + Y_rev) / 2
|
||||
return (y + y_rev) / 2
|
||||
elif self.merge_mode == 'mul':
|
||||
return Y * Y_rev
|
||||
return y * y_rev
|
||||
elif self.merge_mode is None:
|
||||
return [Y, Y_rev]
|
||||
return [y, y_rev]
|
||||
|
||||
def reset_states(self):
|
||||
self.forward_layer.reset_states()
|
||||
@@ -235,13 +234,15 @@ class Bidirectional(Wrapper):
|
||||
@property
|
||||
def trainable_weights(self):
|
||||
if hasattr(self.forward_layer, 'trainable_weights'):
|
||||
return self.forward_layer.trainable_weights + self.backward_layer.trainable_weights
|
||||
return (self.forward_layer.trainable_weights +
|
||||
self.backward_layer.trainable_weights)
|
||||
return []
|
||||
|
||||
@property
|
||||
def non_trainable_weights(self):
|
||||
if hasattr(self.forward_layer, 'non_trainable_weights'):
|
||||
return self.forward_layer.non_trainable_weights + self.backward_layer.non_trainable_weights
|
||||
return (self.forward_layer.non_trainable_weights +
|
||||
self.backward_layer.non_trainable_weights)
|
||||
return []
|
||||
|
||||
@property
|
||||
@@ -258,11 +259,11 @@ class Bidirectional(Wrapper):
|
||||
|
||||
@property
|
||||
def constraints(self):
|
||||
_constraints = {}
|
||||
constraints = {}
|
||||
if hasattr(self.forward_layer, 'constraints'):
|
||||
_constraints.update(self.forward_layer.constraints)
|
||||
_constraints.update(self.backward_layer.constraints)
|
||||
return _constraints
|
||||
constraints.update(self.forward_layer.constraints)
|
||||
constraints.update(self.backward_layer.constraints)
|
||||
return constraints
|
||||
|
||||
def get_config(self):
|
||||
config = {"merge_mode": self.merge_mode}
|
||||
|
||||
+35
-67
@@ -1,135 +1,90 @@
|
||||
import numpy as np
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
|
||||
def binary_accuracy(y_true, y_pred):
|
||||
'''Calculates the mean accuracy rate across all predictions for binary
|
||||
classification problems.
|
||||
'''
|
||||
return K.mean(K.equal(y_true, K.round(y_pred)))
|
||||
|
||||
|
||||
def categorical_accuracy(y_true, y_pred):
|
||||
'''Calculates the mean accuracy rate across all predictions for
|
||||
multiclass classification problems.
|
||||
'''
|
||||
return K.mean(K.equal(K.argmax(y_true, axis=-1),
|
||||
K.argmax(y_pred, axis=-1)))
|
||||
K.argmax(y_pred, axis=-1)))
|
||||
|
||||
|
||||
def sparse_categorical_accuracy(y_true, y_pred):
|
||||
'''Same as categorical_accuracy, but useful when the predictions are for
|
||||
sparse targets.
|
||||
'''
|
||||
return K.mean(K.equal(K.max(y_true, axis=-1),
|
||||
K.cast(K.argmax(y_pred, axis=-1), K.floatx())))
|
||||
|
||||
|
||||
def top_k_categorical_accuracy(y_true, y_pred, k=5):
|
||||
'''Calculates the top-k categorical accuracy rate, i.e. success when the
|
||||
target class is within the top-k predictions provided.
|
||||
'''
|
||||
return K.mean(K.in_top_k(y_pred, K.argmax(y_true, axis=-1), k))
|
||||
|
||||
|
||||
def mean_squared_error(y_true, y_pred):
|
||||
'''Calculates the mean squared error (mse) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
return K.mean(K.square(y_pred - y_true))
|
||||
|
||||
|
||||
def mean_absolute_error(y_true, y_pred):
|
||||
'''Calculates the mean absolute error (mae) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
return K.mean(K.abs(y_pred - y_true))
|
||||
|
||||
|
||||
def mean_absolute_percentage_error(y_true, y_pred):
|
||||
'''Calculates the mean absolute percentage error (mape) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), np.inf))
|
||||
diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true),
|
||||
K.epsilon(),
|
||||
None))
|
||||
return 100. * K.mean(diff)
|
||||
|
||||
|
||||
def mean_squared_logarithmic_error(y_true, y_pred):
|
||||
'''Calculates the mean squared logarithmic error (msle) rate
|
||||
between predicted and target values.
|
||||
'''
|
||||
first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.)
|
||||
second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.)
|
||||
first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.)
|
||||
second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.)
|
||||
return K.mean(K.square(first_log - second_log))
|
||||
|
||||
|
||||
def hinge(y_true, y_pred):
|
||||
'''Calculates the hinge loss, which is defined as
|
||||
`max(1 - y_true * y_pred, 0)`.
|
||||
'''
|
||||
return K.mean(K.maximum(1. - y_true * y_pred, 0.))
|
||||
|
||||
|
||||
def squared_hinge(y_true, y_pred):
|
||||
'''Calculates the squared value of the hinge loss.
|
||||
'''
|
||||
return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)))
|
||||
|
||||
|
||||
def categorical_crossentropy(y_true, y_pred):
|
||||
'''Calculates the cross-entropy value for multiclass classification
|
||||
problems. Note: Expects a binary class matrix instead of a vector
|
||||
of scalar classes.
|
||||
'''
|
||||
return K.mean(K.categorical_crossentropy(y_pred, y_true))
|
||||
|
||||
|
||||
def sparse_categorical_crossentropy(y_true, y_pred):
|
||||
'''Calculates the cross-entropy value for multiclass classification
|
||||
problems with sparse targets. Note: Expects an array of integer
|
||||
classes. Labels shape must have the same number of dimensions as
|
||||
output shape. If you get a shape error, add a length-1 dimension
|
||||
to labels.
|
||||
'''
|
||||
return K.mean(K.sparse_categorical_crossentropy(y_pred, y_true))
|
||||
|
||||
|
||||
def binary_crossentropy(y_true, y_pred):
|
||||
'''Calculates the cross-entropy value for binary classification
|
||||
problems.
|
||||
'''
|
||||
return K.mean(K.binary_crossentropy(y_pred, y_true))
|
||||
|
||||
|
||||
def kullback_leibler_divergence(y_true, y_pred):
|
||||
'''Calculates the Kullback-Leibler (KL) divergence between prediction
|
||||
and target values.
|
||||
'''
|
||||
y_true = K.clip(y_true, K.epsilon(), 1)
|
||||
y_pred = K.clip(y_pred, K.epsilon(), 1)
|
||||
return K.sum(y_true * K.log(y_true / y_pred), axis=-1)
|
||||
return K.mean(K.sum(y_true * K.log(y_true / y_pred), axis=-1))
|
||||
|
||||
|
||||
def poisson(y_true, y_pred):
|
||||
'''Calculates the poisson function over prediction and target values.
|
||||
'''
|
||||
return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()))
|
||||
|
||||
|
||||
def cosine_proximity(y_true, y_pred):
|
||||
'''Calculates the cosine similarity between the prediction and target
|
||||
values.
|
||||
'''
|
||||
y_true = K.l2_normalize(y_true, axis=-1)
|
||||
y_pred = K.l2_normalize(y_pred, axis=-1)
|
||||
return -K.mean(y_true * y_pred)
|
||||
|
||||
|
||||
def matthews_correlation(y_true, y_pred):
|
||||
'''Calculates the Matthews correlation coefficient measure for quality
|
||||
"""Matthews correlation metric.
|
||||
|
||||
It is only computed as a batch-wise average, not globally.
|
||||
|
||||
Computes the Matthews correlation coefficient measure for quality
|
||||
of binary classification problems.
|
||||
'''
|
||||
"""
|
||||
y_pred_pos = K.round(K.clip(y_pred, 0, 1))
|
||||
y_pred_neg = 1 - y_pred_pos
|
||||
|
||||
@@ -149,9 +104,13 @@ def matthews_correlation(y_true, y_pred):
|
||||
|
||||
|
||||
def precision(y_true, y_pred):
|
||||
'''Calculates the precision, a metric for multi-label classification of
|
||||
"""Precision metric.
|
||||
|
||||
Only computes a batch-wise average of precision.
|
||||
|
||||
Computes the precision, a metric for multi-label classification of
|
||||
how many selected items are relevant.
|
||||
'''
|
||||
"""
|
||||
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
||||
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
|
||||
precision = true_positives / (predicted_positives + K.epsilon())
|
||||
@@ -159,9 +118,13 @@ def precision(y_true, y_pred):
|
||||
|
||||
|
||||
def recall(y_true, y_pred):
|
||||
'''Calculates the recall, a metric for multi-label classification of
|
||||
"""Recall metric.
|
||||
|
||||
Only computes a batch-wise average of recall.
|
||||
|
||||
Computes the recall, a metric for multi-label classification of
|
||||
how many relevant items are selected.
|
||||
'''
|
||||
"""
|
||||
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
||||
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
|
||||
recall = true_positives / (possible_positives + K.epsilon())
|
||||
@@ -169,7 +132,10 @@ def recall(y_true, y_pred):
|
||||
|
||||
|
||||
def fbeta_score(y_true, y_pred, beta=1):
|
||||
'''Calculates the F score, the weighted harmonic mean of precision and recall.
|
||||
"""Computes the F score.
|
||||
|
||||
The F score is the weighted harmonic mean of precision and recall.
|
||||
Here it is only computed as a batch-wise average, not globally.
|
||||
|
||||
This is useful for multi-label classification, where input samples can be
|
||||
classified as sets of labels. By only using accuracy (precision) a model
|
||||
@@ -182,10 +148,10 @@ def fbeta_score(y_true, y_pred, beta=1):
|
||||
With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning
|
||||
correct classes becomes more important, and with beta > 1 the metric is
|
||||
instead weighted towards penalizing incorrect class assignments.
|
||||
'''
|
||||
"""
|
||||
if beta < 0:
|
||||
raise ValueError('The lowest choosable beta is zero (only precision).')
|
||||
|
||||
|
||||
# If there are no true positives, fix the F score at 0 like sklearn.
|
||||
if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
|
||||
return 0
|
||||
@@ -198,8 +164,10 @@ def fbeta_score(y_true, y_pred, beta=1):
|
||||
|
||||
|
||||
def fmeasure(y_true, y_pred):
|
||||
'''Calculates the f-measure, the harmonic mean of precision and recall.
|
||||
'''
|
||||
"""Computes the f-measure, the harmonic mean of precision and recall.
|
||||
|
||||
Here it is only computed as a batch-wise average, not globally.
|
||||
"""
|
||||
return fbeta_score(y_true, y_pred, beta=1)
|
||||
|
||||
|
||||
|
||||
+70
-59
@@ -106,7 +106,9 @@ def save_model(model, filepath, overwrite=True):
|
||||
f.close()
|
||||
|
||||
|
||||
def load_model(filepath, custom_objects={}):
|
||||
def load_model(filepath, custom_objects=None):
|
||||
if not custom_objects:
|
||||
custom_objects = {}
|
||||
|
||||
def deserialize(obj):
|
||||
if isinstance(obj, list):
|
||||
@@ -151,7 +153,8 @@ def load_model(filepath, custom_objects={}):
|
||||
return model
|
||||
training_config = json.loads(training_config.decode('utf-8'))
|
||||
optimizer_config = training_config['optimizer_config']
|
||||
optimizer = optimizer_from_config(optimizer_config, custom_objects=custom_objects)
|
||||
optimizer = optimizer_from_config(optimizer_config,
|
||||
custom_objects=custom_objects)
|
||||
|
||||
# recover loss functions and metrics
|
||||
loss = deserialize(training_config['loss'])
|
||||
@@ -181,7 +184,7 @@ def load_model(filepath, custom_objects={}):
|
||||
return model
|
||||
|
||||
|
||||
def model_from_config(config, custom_objects={}):
|
||||
def model_from_config(config, custom_objects=None):
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
if isinstance(config, list):
|
||||
raise TypeError('`model_fom_config` expects a dictionary, not a list. '
|
||||
@@ -190,20 +193,20 @@ def model_from_config(config, custom_objects={}):
|
||||
return layer_from_config(config, custom_objects=custom_objects)
|
||||
|
||||
|
||||
def model_from_yaml(yaml_string, custom_objects={}):
|
||||
'''Parses a yaml model configuration file
|
||||
def model_from_yaml(yaml_string, custom_objects=None):
|
||||
"""Parses a yaml model configuration file
|
||||
and returns a model instance.
|
||||
'''
|
||||
"""
|
||||
import yaml
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
config = yaml.load(yaml_string)
|
||||
return layer_from_config(config, custom_objects=custom_objects)
|
||||
|
||||
|
||||
def model_from_json(json_string, custom_objects={}):
|
||||
'''Parses a JSON model configuration file
|
||||
def model_from_json(json_string, custom_objects=None):
|
||||
"""Parses a JSON model configuration file
|
||||
and returns a model instance.
|
||||
'''
|
||||
"""
|
||||
import json
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
config = json.loads(json_string)
|
||||
@@ -211,7 +214,7 @@ def model_from_json(json_string, custom_objects={}):
|
||||
|
||||
|
||||
class Sequential(Model):
|
||||
'''Linear stack of layers.
|
||||
"""Linear stack of layers.
|
||||
|
||||
# Arguments
|
||||
layers: list of layers to add to the model.
|
||||
@@ -245,8 +248,9 @@ class Sequential(Model):
|
||||
model.add(Dense(32, batch_input_shape=(None, 500)))
|
||||
model.add(Dense(32))
|
||||
```
|
||||
'''
|
||||
def __init__(self, layers=[], name=None):
|
||||
"""
|
||||
|
||||
def __init__(self, layers=None, name=None):
|
||||
self.layers = [] # stack of layers
|
||||
self.model = None # internal Model instance
|
||||
self.inputs = [] # tensors
|
||||
@@ -264,15 +268,16 @@ class Sequential(Model):
|
||||
name = prefix + str(K.get_uid(prefix))
|
||||
self.name = name
|
||||
|
||||
for layer in layers:
|
||||
self.add(layer)
|
||||
if layers:
|
||||
for layer in layers:
|
||||
self.add(layer)
|
||||
|
||||
def add(self, layer):
|
||||
'''Adds a layer instance on top of the layer stack.
|
||||
"""Adds a layer instance on top of the layer stack.
|
||||
|
||||
# Arguments
|
||||
layer: layer instance.
|
||||
'''
|
||||
"""
|
||||
if not isinstance(layer, Layer):
|
||||
raise TypeError('The added layer must be '
|
||||
'an instance of class Layer. '
|
||||
@@ -340,8 +345,8 @@ class Sequential(Model):
|
||||
self._flattened_layers = None
|
||||
|
||||
def pop(self):
|
||||
'''Removes the last layer in the model.
|
||||
'''
|
||||
"""Removes the last layer in the model.
|
||||
"""
|
||||
if not self.layers:
|
||||
raise TypeError('There are no layers in the model.')
|
||||
|
||||
@@ -360,7 +365,7 @@ class Sequential(Model):
|
||||
self._flattened_layers = None
|
||||
|
||||
def get_layer(self, name=None, index=None):
|
||||
'''Returns a layer based on either its name (unique)
|
||||
"""Returns a layer based on either its name (unique)
|
||||
or its index in the graph. Indices are based on
|
||||
order of horizontal graph traversal (bottom-up).
|
||||
|
||||
@@ -370,7 +375,7 @@ class Sequential(Model):
|
||||
|
||||
# Returns
|
||||
A layer instance.
|
||||
'''
|
||||
"""
|
||||
if not self.built:
|
||||
self.build()
|
||||
return self.model.get_layer(name, index)
|
||||
@@ -515,9 +520,9 @@ class Sequential(Model):
|
||||
return self._gather_dict_attr('constraints')
|
||||
|
||||
def get_weights(self):
|
||||
'''Returns the weights of the model,
|
||||
"""Returns the weights of the model,
|
||||
as a flat list of Numpy arrays.
|
||||
'''
|
||||
"""
|
||||
# support for legacy behavior
|
||||
weights = []
|
||||
for layer in self.flattened_layers:
|
||||
@@ -525,11 +530,11 @@ class Sequential(Model):
|
||||
return weights
|
||||
|
||||
def set_weights(self, weights):
|
||||
'''Sets the weights of the model.
|
||||
"""Sets the weights of the model.
|
||||
The `weights` argument should be a list
|
||||
of Numpy arrays with shapes and types matching
|
||||
the output of `model.get_weights()`.
|
||||
'''
|
||||
"""
|
||||
# support for legacy behavior
|
||||
for layer in self.flattened_layers:
|
||||
nb_param = len(layer.weights)
|
||||
@@ -545,10 +550,10 @@ class Sequential(Model):
|
||||
return self.model.training_data
|
||||
|
||||
def compile(self, optimizer, loss,
|
||||
metrics=[],
|
||||
metrics=None,
|
||||
sample_weight_mode=None,
|
||||
**kwargs):
|
||||
'''Configures the learning process.
|
||||
"""Configures the learning process.
|
||||
|
||||
# Arguments
|
||||
optimizer: str (name of optimizer) or optimizer object.
|
||||
@@ -574,7 +579,7 @@ class Sequential(Model):
|
||||
loss='categorical_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
```
|
||||
'''
|
||||
"""
|
||||
# create the underlying model
|
||||
self.build()
|
||||
# legacy kwarg support
|
||||
@@ -595,10 +600,10 @@ class Sequential(Model):
|
||||
self.metrics_names = self.model.metrics_names
|
||||
self.sample_weight_mode = self.model.sample_weight_mode
|
||||
|
||||
def fit(self, x, y, batch_size=32, nb_epoch=10, verbose=1, callbacks=[],
|
||||
def fit(self, x, y, batch_size=32, nb_epoch=10, verbose=1, callbacks=None,
|
||||
validation_split=0., validation_data=None, shuffle=True,
|
||||
class_weight=None, sample_weight=None, **kwargs):
|
||||
'''Trains the model for a fixed number of epochs.
|
||||
class_weight=None, sample_weight=None, initial_epoch=0, **kwargs):
|
||||
"""Trains the model for a fixed number of epochs.
|
||||
|
||||
# Arguments
|
||||
x: input data, as a Numpy array or list of Numpy arrays
|
||||
@@ -632,13 +637,15 @@ class Sequential(Model):
|
||||
to apply a different weight to every timestep of every sample.
|
||||
In this case you should make sure to specify
|
||||
sample_weight_mode="temporal" in compile().
|
||||
initial_epoch: epoch at which to start training
|
||||
(useful for resuming a previous training run)
|
||||
|
||||
# Returns
|
||||
A `History` object. Its `History.history` attribute is
|
||||
a record of training loss values and metrics values
|
||||
at successive epochs, as well as validation loss values
|
||||
and validation metrics values (if applicable).
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
@@ -661,11 +668,12 @@ class Sequential(Model):
|
||||
validation_data=validation_data,
|
||||
shuffle=shuffle,
|
||||
class_weight=class_weight,
|
||||
sample_weight=sample_weight)
|
||||
sample_weight=sample_weight,
|
||||
initial_epoch=initial_epoch)
|
||||
|
||||
def evaluate(self, x, y, batch_size=32, verbose=1,
|
||||
sample_weight=None, **kwargs):
|
||||
'''Computes the loss on some input data, batch by batch.
|
||||
"""Computes the loss on some input data, batch by batch.
|
||||
|
||||
# Arguments
|
||||
x: input data, as a Numpy array or list of Numpy arrays
|
||||
@@ -680,7 +688,7 @@ class Sequential(Model):
|
||||
or list of scalars (if the model computes other metrics).
|
||||
The attribute `model.metrics_names` will give you
|
||||
the display labels for the scalar outputs.
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
@@ -700,7 +708,7 @@ class Sequential(Model):
|
||||
sample_weight=sample_weight)
|
||||
|
||||
def predict(self, x, batch_size=32, verbose=0):
|
||||
'''Generates output predictions for the input samples,
|
||||
"""Generates output predictions for the input samples,
|
||||
processing the samples in a batched way.
|
||||
|
||||
# Arguments
|
||||
@@ -710,21 +718,21 @@ class Sequential(Model):
|
||||
|
||||
# Returns
|
||||
A Numpy array of predictions.
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
self.build()
|
||||
return self.model.predict(x, batch_size=batch_size, verbose=verbose)
|
||||
|
||||
def predict_on_batch(self, x):
|
||||
'''Returns predictions for a single batch of samples.
|
||||
'''
|
||||
"""Returns predictions for a single batch of samples.
|
||||
"""
|
||||
if self.model is None:
|
||||
self.build()
|
||||
return self.model.predict_on_batch(x)
|
||||
|
||||
def train_on_batch(self, x, y, class_weight=None,
|
||||
sample_weight=None, **kwargs):
|
||||
'''Single gradient update over one batch of samples.
|
||||
"""Single gradient update over one batch of samples.
|
||||
|
||||
# Arguments
|
||||
x: input data, as a Numpy array or list of Numpy arrays
|
||||
@@ -739,7 +747,7 @@ class Sequential(Model):
|
||||
or list of scalars (if the model computes other metrics).
|
||||
The attribute `model.metrics_names` will give you
|
||||
the display labels for the scalar outputs.
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
@@ -759,7 +767,7 @@ class Sequential(Model):
|
||||
|
||||
def test_on_batch(self, x, y,
|
||||
sample_weight=None, **kwargs):
|
||||
'''Evaluates the model over a single batch of samples.
|
||||
"""Evaluates the model over a single batch of samples.
|
||||
|
||||
# Arguments
|
||||
x: input data, as a Numpy array or list of Numpy arrays
|
||||
@@ -772,7 +780,7 @@ class Sequential(Model):
|
||||
or list of scalars (if the model computes other metrics).
|
||||
The attribute `model.metrics_names` will give you
|
||||
the display labels for the scalar outputs.
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
@@ -790,7 +798,7 @@ class Sequential(Model):
|
||||
sample_weight=sample_weight)
|
||||
|
||||
def predict_proba(self, x, batch_size=32, verbose=1):
|
||||
'''Generates class probability predictions for the input samples
|
||||
"""Generates class probability predictions for the input samples
|
||||
batch by batch.
|
||||
|
||||
# Arguments
|
||||
@@ -801,7 +809,7 @@ class Sequential(Model):
|
||||
|
||||
# Returns
|
||||
A Numpy array of probability predictions.
|
||||
'''
|
||||
"""
|
||||
preds = self.predict(x, batch_size, verbose)
|
||||
if preds.min() < 0. or preds.max() > 1.:
|
||||
warnings.warn('Network returning invalid probability values. '
|
||||
@@ -811,7 +819,7 @@ class Sequential(Model):
|
||||
return preds
|
||||
|
||||
def predict_classes(self, x, batch_size=32, verbose=1):
|
||||
'''Generate class predictions for the input samples
|
||||
"""Generate class predictions for the input samples
|
||||
batch by batch.
|
||||
|
||||
# Arguments
|
||||
@@ -822,7 +830,7 @@ class Sequential(Model):
|
||||
|
||||
# Returns
|
||||
A numpy array of class predictions.
|
||||
'''
|
||||
"""
|
||||
proba = self.predict(x, batch_size=batch_size, verbose=verbose)
|
||||
if proba.shape[-1] > 1:
|
||||
return proba.argmax(axis=-1)
|
||||
@@ -830,11 +838,11 @@ class Sequential(Model):
|
||||
return (proba > 0.5).astype('int32')
|
||||
|
||||
def fit_generator(self, generator, samples_per_epoch, nb_epoch,
|
||||
verbose=1, callbacks=[],
|
||||
verbose=1, callbacks=None,
|
||||
validation_data=None, nb_val_samples=None,
|
||||
class_weight=None, max_q_size=10, nb_worker=1,
|
||||
pickle_safe=False, **kwargs):
|
||||
'''Fits the model on data generated batch-by-batch by
|
||||
pickle_safe=False, initial_epoch=0, **kwargs):
|
||||
"""Fits the model on data generated batch-by-batch by
|
||||
a Python generator.
|
||||
The generator is run in parallel to the model, for efficiency.
|
||||
For instance, this allows you to do real-time data augmentation
|
||||
@@ -869,6 +877,8 @@ class Sequential(Model):
|
||||
this implementation relies on multiprocessing, you should not pass
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
initial_epoch: epoch at which to start training
|
||||
(useful for resuming a previous training run)
|
||||
|
||||
# Returns
|
||||
A `History` object.
|
||||
@@ -889,7 +899,7 @@ class Sequential(Model):
|
||||
model.fit_generator(generate_arrays_from_file('/my_file.txt'),
|
||||
samples_per_epoch=10000, nb_epoch=10)
|
||||
```
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
@@ -921,12 +931,13 @@ class Sequential(Model):
|
||||
class_weight=class_weight,
|
||||
max_q_size=max_q_size,
|
||||
nb_worker=nb_worker,
|
||||
pickle_safe=pickle_safe)
|
||||
pickle_safe=pickle_safe,
|
||||
initial_epoch=initial_epoch)
|
||||
|
||||
def evaluate_generator(self, generator, val_samples,
|
||||
max_q_size=10, nb_worker=1,
|
||||
pickle_safe=False, **kwargs):
|
||||
'''Evaluates the model on a data generator. The generator should
|
||||
"""Evaluates the model on a data generator. The generator should
|
||||
return the same kind of data as accepted by `test_on_batch`.
|
||||
|
||||
# Arguments
|
||||
@@ -942,7 +953,7 @@ class Sequential(Model):
|
||||
this implementation relies on multiprocessing, you should not pass non
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
raise RuntimeError('The model needs to be compiled '
|
||||
'before being used.')
|
||||
@@ -971,7 +982,7 @@ class Sequential(Model):
|
||||
|
||||
def predict_generator(self, generator, val_samples,
|
||||
max_q_size=10, nb_worker=1, pickle_safe=False):
|
||||
'''Generates predictions for the input samples from a data generator.
|
||||
"""Generates predictions for the input samples from a data generator.
|
||||
The generator should return the same kind of data as accepted by
|
||||
`predict_on_batch`.
|
||||
|
||||
@@ -988,7 +999,7 @@ class Sequential(Model):
|
||||
|
||||
# Returns
|
||||
A Numpy array of predictions.
|
||||
'''
|
||||
"""
|
||||
if self.model is None:
|
||||
self.build()
|
||||
if nb_worker > 1 and not pickle_safe:
|
||||
@@ -1001,9 +1012,9 @@ class Sequential(Model):
|
||||
pickle_safe=pickle_safe)
|
||||
|
||||
def get_config(self):
|
||||
'''Returns the model configuration
|
||||
"""Returns the model configuration
|
||||
as a Python list.
|
||||
'''
|
||||
"""
|
||||
config = []
|
||||
if isinstance(self.layers[0], Merge):
|
||||
assert hasattr(self.layers[0], 'layers')
|
||||
@@ -1025,8 +1036,8 @@ class Sequential(Model):
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config, layer_cache=None):
|
||||
'''Supports legacy formats
|
||||
'''
|
||||
"""Supports legacy formats
|
||||
"""
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
from keras.layers import Merge
|
||||
|
||||
|
||||
+8
-11
@@ -1,5 +1,5 @@
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
@@ -13,13 +13,15 @@ def mean_absolute_error(y_true, y_pred):
|
||||
|
||||
|
||||
def mean_absolute_percentage_error(y_true, y_pred):
|
||||
diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), np.inf))
|
||||
diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true),
|
||||
K.epsilon(),
|
||||
None))
|
||||
return 100. * K.mean(diff, axis=-1)
|
||||
|
||||
|
||||
def mean_squared_logarithmic_error(y_true, y_pred):
|
||||
first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.)
|
||||
second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.)
|
||||
first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.)
|
||||
second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.)
|
||||
return K.mean(K.square(first_log - second_log), axis=-1)
|
||||
|
||||
|
||||
@@ -32,16 +34,10 @@ def hinge(y_true, y_pred):
|
||||
|
||||
|
||||
def categorical_crossentropy(y_true, y_pred):
|
||||
'''Expects a binary class matrix instead of a vector of scalar classes.
|
||||
'''
|
||||
return K.categorical_crossentropy(y_pred, y_true)
|
||||
|
||||
|
||||
def sparse_categorical_crossentropy(y_true, y_pred):
|
||||
'''expects an array of integer classes.
|
||||
Note: labels shape must have the same number of dimensions as output shape.
|
||||
If you get a shape error, add a length-1 dimension to labels.
|
||||
'''
|
||||
return K.sparse_categorical_crossentropy(y_pred, y_true)
|
||||
|
||||
|
||||
@@ -65,7 +61,8 @@ def cosine_proximity(y_true, y_pred):
|
||||
return -K.mean(y_true * y_pred, axis=-1)
|
||||
|
||||
|
||||
# aliases
|
||||
# Aliases.
|
||||
|
||||
mse = MSE = mean_squared_error
|
||||
mae = MAE = mean_absolute_error
|
||||
mape = MAPE = mean_absolute_percentage_error
|
||||
|
||||
+98
-50
@@ -1,8 +1,13 @@
|
||||
from __future__ import absolute_import
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
|
||||
from six.moves import zip
|
||||
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module, get_custom_objects
|
||||
|
||||
if K.backend() == 'tensorflow':
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def clip_norm(g, c, n):
|
||||
if c > 0:
|
||||
@@ -10,7 +15,20 @@ def clip_norm(g, c, n):
|
||||
return g
|
||||
|
||||
|
||||
def optimizer_from_config(config, custom_objects={}):
|
||||
def optimizer_from_config(config, custom_objects=None):
|
||||
"""Instantiate an optimizer given a config dictionary.
|
||||
|
||||
# Arguments
|
||||
config: Config dictionary
|
||||
(e.g. output of `optimizer.get_config()`).
|
||||
custom_objects: Optional dictionary of custom optimizer classes.
|
||||
|
||||
# Returns
|
||||
An optimizer instance.
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid optimizer config.
|
||||
"""
|
||||
all_classes = {
|
||||
'sgd': SGD,
|
||||
'rmsprop': RMSprop,
|
||||
@@ -22,8 +40,10 @@ def optimizer_from_config(config, custom_objects={}):
|
||||
'tfoptimizer': TFOptimizer,
|
||||
}
|
||||
class_name = config['class_name']
|
||||
if class_name in custom_objects:
|
||||
if custom_objects and class_name in custom_objects:
|
||||
cls = custom_objects[class_name]
|
||||
elif class_name in get_custom_objects():
|
||||
cls = get_custom_objects()[class_name]
|
||||
else:
|
||||
if class_name.lower() not in all_classes:
|
||||
raise ValueError('Optimizer class not found:', class_name)
|
||||
@@ -32,7 +52,7 @@ def optimizer_from_config(config, custom_objects={}):
|
||||
|
||||
|
||||
class Optimizer(object):
|
||||
'''Abstract optimizer base class.
|
||||
"""Abstract optimizer base class.
|
||||
|
||||
Note: this is the parent class of all optimizers, not an actual optimizer
|
||||
that can be used for training models.
|
||||
@@ -43,7 +63,8 @@ class Optimizer(object):
|
||||
when their L2 norm exceeds this value.
|
||||
clipvalue: float >= 0. Gradients will be clipped
|
||||
when their absolute value exceeds this value.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
allowed_kwargs = {'clipnorm', 'clipvalue'}
|
||||
for k in kwargs:
|
||||
@@ -67,7 +88,7 @@ class Optimizer(object):
|
||||
return grads
|
||||
|
||||
def set_weights(self, weights):
|
||||
'''Sets the weights of the optimizer, from Numpy arrays.
|
||||
"""Sets the weights of the optimizer, from Numpy arrays.
|
||||
|
||||
Should only be called after computing the gradients
|
||||
(otherwise the optimizer has no weights).
|
||||
@@ -78,7 +99,10 @@ class Optimizer(object):
|
||||
number of the dimensions of the weights
|
||||
of the optimizer (i.e. it should match the
|
||||
output of `get_weights`).
|
||||
'''
|
||||
|
||||
# Raises
|
||||
ValueError: in case of incompatible weight shapes.
|
||||
"""
|
||||
params = self.weights
|
||||
weight_value_tuples = []
|
||||
param_values = K.batch_get_value(params)
|
||||
@@ -92,9 +116,11 @@ class Optimizer(object):
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
|
||||
def get_weights(self):
|
||||
'''Returns the current weights of the optimizer,
|
||||
as a list of numpy arrays.
|
||||
'''
|
||||
"""Returns the current value of the weights of the optimizer.
|
||||
|
||||
# Returns
|
||||
A list of numpy arrays.
|
||||
"""
|
||||
return K.batch_get_value(self.weights)
|
||||
|
||||
def get_config(self):
|
||||
@@ -111,7 +137,9 @@ class Optimizer(object):
|
||||
|
||||
|
||||
class SGD(Optimizer):
|
||||
'''Stochastic gradient descent, with support for momentum,
|
||||
"""Stochastic gradient descent optimizer.
|
||||
|
||||
Includes support for momentum,
|
||||
learning rate decay, and Nesterov momentum.
|
||||
|
||||
# Arguments
|
||||
@@ -119,23 +147,24 @@ class SGD(Optimizer):
|
||||
momentum: float >= 0. Parameter updates momentum.
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
nesterov: boolean. Whether to apply Nesterov momentum.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=0.01, momentum=0., decay=0.,
|
||||
nesterov=False, **kwargs):
|
||||
super(SGD, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0.)
|
||||
self.lr = K.variable(lr)
|
||||
self.momentum = K.variable(momentum)
|
||||
self.decay = K.variable(decay)
|
||||
self.inital_decay = decay
|
||||
self.initial_decay = decay
|
||||
self.nesterov = nesterov
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = []
|
||||
|
||||
lr = self.lr
|
||||
if self.inital_decay > 0:
|
||||
if self.initial_decay > 0:
|
||||
lr *= (1. / (1. + self.decay * self.iterations))
|
||||
self.updates .append(K.update_add(self.iterations, 1))
|
||||
|
||||
@@ -170,7 +199,7 @@ class SGD(Optimizer):
|
||||
|
||||
|
||||
class RMSprop(Optimizer):
|
||||
'''RMSProp optimizer.
|
||||
"""RMSProp optimizer.
|
||||
|
||||
It is recommended to leave the parameters of this optimizer
|
||||
at their default values
|
||||
@@ -184,15 +213,19 @@ class RMSprop(Optimizer):
|
||||
rho: float >= 0.
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
'''
|
||||
|
||||
# References
|
||||
- [rmsprop: Divide the gradient by a running average of its recent magnitude](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
|
||||
"""
|
||||
|
||||
def __init__(self, lr=0.001, rho=0.9, epsilon=1e-8, decay=0.,
|
||||
**kwargs):
|
||||
super(RMSprop, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
self.rho = K.variable(rho)
|
||||
self.epsilon = epsilon
|
||||
self.decay = K.variable(decay)
|
||||
self.inital_decay = decay
|
||||
self.initial_decay = decay
|
||||
self.iterations = K.variable(0.)
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
@@ -203,7 +236,7 @@ class RMSprop(Optimizer):
|
||||
self.updates = []
|
||||
|
||||
lr = self.lr
|
||||
if self.inital_decay > 0:
|
||||
if self.initial_decay > 0:
|
||||
lr *= (1. / (1. + self.decay * self.iterations))
|
||||
self.updates.append(K.update_add(self.iterations, 1))
|
||||
|
||||
@@ -230,7 +263,7 @@ class RMSprop(Optimizer):
|
||||
|
||||
|
||||
class Adagrad(Optimizer):
|
||||
'''Adagrad optimizer.
|
||||
"""Adagrad optimizer.
|
||||
|
||||
It is recommended to leave the parameters of this optimizer
|
||||
at their default values.
|
||||
@@ -238,16 +271,18 @@ class Adagrad(Optimizer):
|
||||
# Arguments
|
||||
lr: float >= 0. Learning rate.
|
||||
epsilon: float >= 0.
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
|
||||
# References
|
||||
- [Adaptive Subgradient Methods for Online Learning and Stochastic Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=0.01, epsilon=1e-8, decay=0., **kwargs):
|
||||
super(Adagrad, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
self.epsilon = epsilon
|
||||
self.decay = K.variable(decay)
|
||||
self.inital_decay = decay
|
||||
self.initial_decay = decay
|
||||
self.iterations = K.variable(0.)
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
@@ -258,7 +293,7 @@ class Adagrad(Optimizer):
|
||||
self.updates = []
|
||||
|
||||
lr = self.lr
|
||||
if self.inital_decay > 0:
|
||||
if self.initial_decay > 0:
|
||||
lr *= (1. / (1. + self.decay * self.iterations))
|
||||
self.updates.append(K.update_add(self.iterations, 1))
|
||||
|
||||
@@ -282,7 +317,7 @@ class Adagrad(Optimizer):
|
||||
|
||||
|
||||
class Adadelta(Optimizer):
|
||||
'''Adadelta optimizer.
|
||||
"""Adadelta optimizer.
|
||||
|
||||
It is recommended to leave the parameters of this optimizer
|
||||
at their default values.
|
||||
@@ -292,17 +327,20 @@ class Adadelta(Optimizer):
|
||||
It is recommended to leave it at the default value.
|
||||
rho: float >= 0.
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
|
||||
# References
|
||||
- [Adadelta - an adaptive learning rate method](http://arxiv.org/abs/1212.5701)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=1.0, rho=0.95, epsilon=1e-8, decay=0.,
|
||||
**kwargs):
|
||||
super(Adadelta, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.lr = K.variable(lr)
|
||||
self.rho = rho
|
||||
self.epsilon = epsilon
|
||||
self.decay = K.variable(decay)
|
||||
self.inital_decay = decay
|
||||
self.initial_decay = decay
|
||||
self.iterations = K.variable(0.)
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
@@ -314,7 +352,7 @@ class Adadelta(Optimizer):
|
||||
self.updates = []
|
||||
|
||||
lr = self.lr
|
||||
if self.inital_decay > 0:
|
||||
if self.initial_decay > 0:
|
||||
lr *= (1. / (1. + self.decay * self.iterations))
|
||||
self.updates.append(K.update_add(self.iterations, 1))
|
||||
|
||||
@@ -348,39 +386,43 @@ class Adadelta(Optimizer):
|
||||
|
||||
|
||||
class Adam(Optimizer):
|
||||
'''Adam optimizer.
|
||||
"""Adam optimizer.
|
||||
|
||||
Default parameters follow those provided in the original paper.
|
||||
|
||||
# Arguments
|
||||
lr: float >= 0. Learning rate.
|
||||
beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1.
|
||||
beta_1: float, 0 < beta < 1. Generally close to 1.
|
||||
beta_2: float, 0 < beta < 1. Generally close to 1.
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
|
||||
# References
|
||||
- [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, decay=0., **kwargs):
|
||||
super(Adam, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0)
|
||||
self.lr = K.variable(lr)
|
||||
self.beta_1 = K.variable(beta_1)
|
||||
self.beta_2 = K.variable(beta_2)
|
||||
self.epsilon = epsilon
|
||||
self.decay = K.variable(decay)
|
||||
self.inital_decay = decay
|
||||
self.initial_decay = decay
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
lr = self.lr
|
||||
if self.inital_decay > 0:
|
||||
if self.initial_decay > 0:
|
||||
lr *= (1. / (1. + self.decay * self.iterations))
|
||||
|
||||
t = self.iterations + 1
|
||||
lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))
|
||||
lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
|
||||
(1. - K.pow(self.beta_1, t)))
|
||||
|
||||
shapes = [K.get_variable_shape(p) for p in params]
|
||||
ms = [K.zeros(shape) for shape in shapes]
|
||||
@@ -414,36 +456,38 @@ class Adam(Optimizer):
|
||||
|
||||
|
||||
class Adamax(Optimizer):
|
||||
'''Adamax optimizer from Adam paper's Section 7. It is a variant
|
||||
of Adam based on the infinity norm.
|
||||
"""Adamax optimizer from Adam paper's Section 7.
|
||||
|
||||
It is a variant of Adam based on the infinity norm.
|
||||
Default parameters follow those provided in the paper.
|
||||
|
||||
# Arguments
|
||||
lr: float >= 0. Learning rate.
|
||||
beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1.
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
decay: float >= 0. Learning rate decay over each update.
|
||||
|
||||
# References
|
||||
- [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, decay=0., **kwargs):
|
||||
super(Adamax, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0.)
|
||||
self.lr = K.variable(lr)
|
||||
self.beta_1 = K.variable(beta_1)
|
||||
self.beta_2 = K.variable(beta_2)
|
||||
self.epsilon = epsilon
|
||||
self.decay = K.variable(decay)
|
||||
self.inital_decay = decay
|
||||
self.initial_decay = decay
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
lr = self.lr
|
||||
if self.inital_decay > 0:
|
||||
if self.initial_decay > 0:
|
||||
lr *= (1. / (1. + self.decay * self.iterations))
|
||||
|
||||
t = self.iterations + 1
|
||||
@@ -484,8 +528,9 @@ class Adamax(Optimizer):
|
||||
|
||||
|
||||
class Nadam(Optimizer):
|
||||
'''
|
||||
Nesterov Adam optimizer: Much like Adam is essentially RMSprop with momentum,
|
||||
"""Nesterov Adam optimizer.
|
||||
|
||||
Much like Adam is essentially RMSprop with momentum,
|
||||
Nadam is Adam RMSprop with Nesterov momentum.
|
||||
|
||||
Default parameters follow those provided in the paper.
|
||||
@@ -500,16 +545,17 @@ class Nadam(Optimizer):
|
||||
# References
|
||||
- [Nadam report](http://cs229.stanford.edu/proj2015/054_report.pdf)
|
||||
- [On the importance of initialization and momentum in deep learning](http://www.cs.toronto.edu/~fritz/absps/momentum.pdf)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, schedule_decay=0.004, **kwargs):
|
||||
super(Nadam, self).__init__(**kwargs)
|
||||
self.__dict__.update(locals())
|
||||
self.iterations = K.variable(0.)
|
||||
self.m_schedule = K.variable(1.)
|
||||
self.lr = K.variable(lr)
|
||||
self.beta_1 = K.variable(beta_1)
|
||||
self.beta_2 = K.variable(beta_2)
|
||||
self.epsilon = epsilon
|
||||
self.schedule_decay = schedule_decay
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
@@ -564,6 +610,8 @@ class Nadam(Optimizer):
|
||||
|
||||
|
||||
class TFOptimizer(Optimizer):
|
||||
"""Wrapper class for native TensorFlow optimizers.
|
||||
"""
|
||||
|
||||
def __init__(self, optimizer):
|
||||
self.optimizer = optimizer
|
||||
@@ -593,7 +641,8 @@ class TFOptimizer(Optimizer):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
# aliases
|
||||
# Aliases.
|
||||
|
||||
sgd = SGD
|
||||
rmsprop = RMSprop
|
||||
adagrad = Adagrad
|
||||
@@ -606,7 +655,6 @@ nadam = Nadam
|
||||
def get(identifier, kwargs=None):
|
||||
if K.backend() == 'tensorflow':
|
||||
# Wrap TF optimizer instances
|
||||
import tensorflow as tf
|
||||
if isinstance(identifier, tf.train.Optimizer):
|
||||
return TFOptimizer(identifier)
|
||||
# Instantiate a Keras optimizer
|
||||
|
||||
+276
-135
@@ -1,7 +1,7 @@
|
||||
'''Fairly basic set of tools for real-time data augmentation on image data.
|
||||
"""Fairly basic set of tools for real-time data augmentation on image data.
|
||||
Can easily be extended to include new transformations,
|
||||
new preprocessing methods, etc...
|
||||
'''
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
@@ -16,23 +16,63 @@ import warnings
|
||||
|
||||
from .. import backend as K
|
||||
|
||||
try:
|
||||
from PIL import Image as pil_image
|
||||
except ImportError:
|
||||
pil_image = None
|
||||
|
||||
def random_rotation(x, rg, row_index=1, col_index=2, channel_index=0,
|
||||
|
||||
def random_rotation(x, rg, row_axis=1, col_axis=2, channel_axis=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
"""Performs a random rotation of a Numpy image tensor.
|
||||
|
||||
# Arguments
|
||||
x: Input tensor. Must be 3D.
|
||||
rg: Rotation range, in degrees.
|
||||
row_axis: Index of axis for rows in the input tensor.
|
||||
col_axis: Index of axis for columns in the input tensor.
|
||||
channel_axis: Index of axis for channels in the input tensor.
|
||||
fill_mode: Points outside the boundaries of the input
|
||||
are filled according to the given mode
|
||||
(one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
|
||||
cval: Value used for points outside the boundaries
|
||||
of the input if `mode='constant'`.
|
||||
|
||||
# Returns
|
||||
Rotated Numpy image tensor.
|
||||
"""
|
||||
theta = np.pi / 180 * np.random.uniform(-rg, rg)
|
||||
rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
|
||||
[np.sin(theta), np.cos(theta), 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
h, w = x.shape[row_axis], x.shape[col_axis]
|
||||
transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_shift(x, wrg, hrg, row_index=1, col_index=2, channel_index=0,
|
||||
def random_shift(x, wrg, hrg, row_axis=1, col_axis=2, channel_axis=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
"""Performs a random spatial shift of a Numpy image tensor.
|
||||
|
||||
# Arguments
|
||||
x: Input tensor. Must be 3D.
|
||||
wrg: Width shift range, as a float fraction of the width.
|
||||
hrg: Height shift range, as a float fraction of the height.
|
||||
row_axis: Index of axis for rows in the input tensor.
|
||||
col_axis: Index of axis for columns in the input tensor.
|
||||
channel_axis: Index of axis for channels in the input tensor.
|
||||
fill_mode: Points outside the boundaries of the input
|
||||
are filled according to the given mode
|
||||
(one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
|
||||
cval: Value used for points outside the boundaries
|
||||
of the input if `mode='constant'`.
|
||||
|
||||
# Returns
|
||||
Shifted Numpy image tensor.
|
||||
"""
|
||||
h, w = x.shape[row_axis], x.shape[col_axis]
|
||||
tx = np.random.uniform(-hrg, hrg) * h
|
||||
ty = np.random.uniform(-wrg, wrg) * w
|
||||
translation_matrix = np.array([[1, 0, tx],
|
||||
@@ -40,25 +80,62 @@ def random_shift(x, wrg, hrg, row_index=1, col_index=2, channel_index=0,
|
||||
[0, 0, 1]])
|
||||
|
||||
transform_matrix = translation_matrix # no need to do offset
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_shear(x, intensity, row_index=1, col_index=2, channel_index=0,
|
||||
def random_shear(x, intensity, row_axis=1, col_axis=2, channel_axis=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
"""Performs a random spatial shear of a Numpy image tensor.
|
||||
|
||||
# Arguments
|
||||
x: Input tensor. Must be 3D.
|
||||
intensity: Transformation intensity.
|
||||
row_axis: Index of axis for rows in the input tensor.
|
||||
col_axis: Index of axis for columns in the input tensor.
|
||||
channel_axis: Index of axis for channels in the input tensor.
|
||||
fill_mode: Points outside the boundaries of the input
|
||||
are filled according to the given mode
|
||||
(one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
|
||||
cval: Value used for points outside the boundaries
|
||||
of the input if `mode='constant'`.
|
||||
|
||||
# Returns
|
||||
Sheared Numpy image tensor.
|
||||
"""
|
||||
shear = np.random.uniform(-intensity, intensity)
|
||||
shear_matrix = np.array([[1, -np.sin(shear), 0],
|
||||
[0, np.cos(shear), 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
h, w = x.shape[row_axis], x.shape[col_axis]
|
||||
transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_zoom(x, zoom_range, row_index=1, col_index=2, channel_index=0,
|
||||
def random_zoom(x, zoom_range, row_axis=1, col_axis=2, channel_axis=0,
|
||||
fill_mode='nearest', cval=0.):
|
||||
"""Performs a random spatial zoom of a Numpy image tensor.
|
||||
|
||||
# Arguments
|
||||
x: Input tensor. Must be 3D.
|
||||
zoom_range: Tuple of floats; zoom range for width and height.
|
||||
row_axis: Index of axis for rows in the input tensor.
|
||||
col_axis: Index of axis for columns in the input tensor.
|
||||
channel_axis: Index of axis for channels in the input tensor.
|
||||
fill_mode: Points outside the boundaries of the input
|
||||
are filled according to the given mode
|
||||
(one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
|
||||
cval: Value used for points outside the boundaries
|
||||
of the input if `mode='constant'`.
|
||||
|
||||
# Returns
|
||||
Zoomed Numpy image tensor.
|
||||
|
||||
# Raises
|
||||
ValueError: if `zoom_range` isn't a tuple.
|
||||
"""
|
||||
if len(zoom_range) != 2:
|
||||
raise ValueError('zoom_range should be a tuple or list of two floats. '
|
||||
'Received arg: ', zoom_range)
|
||||
@@ -71,24 +148,19 @@ def random_zoom(x, zoom_range, row_index=1, col_index=2, channel_index=0,
|
||||
[0, zy, 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
h, w = x.shape[row_index], x.shape[col_index]
|
||||
h, w = x.shape[row_axis], x.shape[col_axis]
|
||||
transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
|
||||
x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
|
||||
return x
|
||||
|
||||
|
||||
def random_barrel_transform(x, intensity):
|
||||
# TODO
|
||||
pass
|
||||
|
||||
|
||||
def random_channel_shift(x, intensity, channel_index=0):
|
||||
x = np.rollaxis(x, channel_index, 0)
|
||||
def random_channel_shift(x, intensity, channel_axis=0):
|
||||
x = np.rollaxis(x, channel_axis, 0)
|
||||
min_x, max_x = np.min(x), np.max(x)
|
||||
channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x)
|
||||
for x_channel in x]
|
||||
x = np.stack(channel_images, axis=0)
|
||||
x = np.rollaxis(x, 0, channel_index+1)
|
||||
x = np.rollaxis(x, 0, channel_axis + 1)
|
||||
return x
|
||||
|
||||
|
||||
@@ -101,14 +173,14 @@ def transform_matrix_offset_center(matrix, x, y):
|
||||
return transform_matrix
|
||||
|
||||
|
||||
def apply_transform(x, transform_matrix, channel_index=0, fill_mode='nearest', cval=0.):
|
||||
x = np.rollaxis(x, channel_index, 0)
|
||||
def apply_transform(x, transform_matrix, channel_axis=0, fill_mode='nearest', cval=0.):
|
||||
x = np.rollaxis(x, channel_axis, 0)
|
||||
final_affine_matrix = transform_matrix[:2, :2]
|
||||
final_offset = transform_matrix[:2, 2]
|
||||
channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
|
||||
final_offset, order=0, mode=fill_mode, cval=cval) for x_channel in x]
|
||||
final_offset, order=0, mode=fill_mode, cval=cval) for x_channel in x]
|
||||
x = np.stack(channel_images, axis=0)
|
||||
x = np.rollaxis(x, 0, channel_index+1)
|
||||
x = np.rollaxis(x, 0, channel_axis + 1)
|
||||
return x
|
||||
|
||||
|
||||
@@ -120,8 +192,25 @@ def flip_axis(x, axis):
|
||||
|
||||
|
||||
def array_to_img(x, dim_ordering='default', scale=True):
|
||||
from PIL import Image
|
||||
x = np.asarray(x)
|
||||
"""Converts a 3D Numpy array to a PIL Image instance.
|
||||
|
||||
# Arguments
|
||||
x: Input Numpy array.
|
||||
dim_ordering: Image data format.
|
||||
scale: Whether to rescale image values
|
||||
to be within [0, 255].
|
||||
|
||||
# Returns
|
||||
A PIL Image instance.
|
||||
|
||||
# Raises
|
||||
ImportError: if PIL is not available.
|
||||
ValueError: if invalid `x` or `dim_ordering` is passed.
|
||||
"""
|
||||
if pil_image is None:
|
||||
raise ImportError('Could not import PIL.Image. '
|
||||
'The use of `array_to_img` requires PIL.')
|
||||
x = np.asarray(x, dtype=K.floatx())
|
||||
if x.ndim != 3:
|
||||
raise ValueError('Expected image array to have rank 3 (single image). '
|
||||
'Got array with shape:', x.shape)
|
||||
@@ -137,22 +226,34 @@ def array_to_img(x, dim_ordering='default', scale=True):
|
||||
if dim_ordering == 'th':
|
||||
x = x.transpose(1, 2, 0)
|
||||
if scale:
|
||||
x += max(-np.min(x), 0)
|
||||
x = x + max(-np.min(x), 0)
|
||||
x_max = np.max(x)
|
||||
if x_max != 0:
|
||||
x /= x_max
|
||||
x *= 255
|
||||
if x.shape[2] == 3:
|
||||
# RGB
|
||||
return Image.fromarray(x.astype('uint8'), 'RGB')
|
||||
return pil_image.fromarray(x.astype('uint8'), 'RGB')
|
||||
elif x.shape[2] == 1:
|
||||
# grayscale
|
||||
return Image.fromarray(x[:, :, 0].astype('uint8'), 'L')
|
||||
return pil_image.fromarray(x[:, :, 0].astype('uint8'), 'L')
|
||||
else:
|
||||
raise ValueError('Unsupported channel number: ', x.shape[2])
|
||||
|
||||
|
||||
def img_to_array(img, dim_ordering='default'):
|
||||
"""Converts a PIL Image instance to a Numpy array.
|
||||
|
||||
# Arguments
|
||||
img: PIL Image instance.
|
||||
dim_ordering: Image data format.
|
||||
|
||||
# Returns
|
||||
A 3D Numpy array.
|
||||
|
||||
# Raises
|
||||
ValueError: if invalid `img` or `dim_ordering` is passed.
|
||||
"""
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
@@ -160,7 +261,7 @@ def img_to_array(img, dim_ordering='default'):
|
||||
# Numpy array x has format (height, width, channel)
|
||||
# or (channel, height, width)
|
||||
# but original PIL image has format (width, height, channel)
|
||||
x = np.asarray(img, dtype='float32')
|
||||
x = np.asarray(img, dtype=K.floatx())
|
||||
if len(x.shape) == 3:
|
||||
if dim_ordering == 'th':
|
||||
x = x.transpose(2, 0, 1)
|
||||
@@ -175,16 +276,24 @@ def img_to_array(img, dim_ordering='default'):
|
||||
|
||||
|
||||
def load_img(path, grayscale=False, target_size=None):
|
||||
'''Load an image into PIL format.
|
||||
"""Loads an image into PIL format.
|
||||
|
||||
# Arguments
|
||||
path: path to image file
|
||||
grayscale: boolean
|
||||
target_size: None (default to original size)
|
||||
or (img_height, img_width)
|
||||
'''
|
||||
from PIL import Image
|
||||
img = Image.open(path)
|
||||
path: Path to image file
|
||||
grayscale: Boolean, whether to load the image as grayscale.
|
||||
target_size: Either `None` (default to original size)
|
||||
or tuple of ints `(img_height, img_width)`.
|
||||
|
||||
# Returns
|
||||
A PIL Image instance.
|
||||
|
||||
# Raises
|
||||
ImportError: if PIL is not available.
|
||||
"""
|
||||
if pil_image is None:
|
||||
raise ImportError('Could not import PIL.Image. '
|
||||
'The use of `array_to_img` requires PIL.')
|
||||
img = pil_image.open(path)
|
||||
if grayscale:
|
||||
img = img.convert('L')
|
||||
else: # Ensure 3 channel even when loaded image is grayscale
|
||||
@@ -196,13 +305,12 @@ def load_img(path, grayscale=False, target_size=None):
|
||||
|
||||
def list_pictures(directory, ext='jpg|jpeg|bmp|png'):
|
||||
return [os.path.join(root, f)
|
||||
for root, dirs, files in os.walk(directory) for f in files
|
||||
for root, _, files in os.walk(directory) for f in files
|
||||
if re.match('([\w]+\.(?:' + ext + '))', f)]
|
||||
|
||||
|
||||
class ImageDataGenerator(object):
|
||||
'''Generate minibatches with
|
||||
real-time data augmentation.
|
||||
"""Generate minibatches of image data with real-time data augmentation.
|
||||
|
||||
# Arguments
|
||||
featurewise_center: set input mean to 0 over the dataset.
|
||||
@@ -230,14 +338,16 @@ class ImageDataGenerator(object):
|
||||
(before applying any other transformation).
|
||||
preprocessing_function: function that will be implied on each input.
|
||||
The function will run before any other modification on it.
|
||||
The function should take one argument: one image (Numpy tensor with rank 3),
|
||||
The function should take one argument:
|
||||
one image (Numpy tensor with rank 3),
|
||||
and should output a Numpy tensor with the same shape.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode it is at index 3.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "th".
|
||||
'''
|
||||
If you never set it, then it will be "tf".
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
featurewise_center=False,
|
||||
samplewise_center=False,
|
||||
@@ -259,10 +369,21 @@ class ImageDataGenerator(object):
|
||||
dim_ordering='default'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.__dict__.update(locals())
|
||||
self.mean = None
|
||||
self.std = None
|
||||
self.principal_components = None
|
||||
self.featurewise_center = featurewise_center
|
||||
self.samplewise_center = samplewise_center
|
||||
self.featurewise_std_normalization = featurewise_std_normalization
|
||||
self.samplewise_std_normalization = samplewise_std_normalization
|
||||
self.zca_whitening = zca_whitening
|
||||
self.rotation_range = rotation_range
|
||||
self.width_shift_range = width_shift_range
|
||||
self.height_shift_range = height_shift_range
|
||||
self.shear_range = shear_range
|
||||
self.zoom_range = zoom_range
|
||||
self.channel_shift_range = channel_shift_range
|
||||
self.fill_mode = fill_mode
|
||||
self.cval = cval
|
||||
self.horizontal_flip = horizontal_flip
|
||||
self.vertical_flip = vertical_flip
|
||||
self.rescale = rescale
|
||||
self.preprocessing_function = preprocessing_function
|
||||
|
||||
@@ -272,13 +393,17 @@ class ImageDataGenerator(object):
|
||||
'Received arg: ', dim_ordering)
|
||||
self.dim_ordering = dim_ordering
|
||||
if dim_ordering == 'th':
|
||||
self.channel_index = 1
|
||||
self.row_index = 2
|
||||
self.col_index = 3
|
||||
self.channel_axis = 1
|
||||
self.row_axis = 2
|
||||
self.col_axis = 3
|
||||
if dim_ordering == 'tf':
|
||||
self.channel_index = 3
|
||||
self.row_index = 1
|
||||
self.col_index = 2
|
||||
self.channel_axis = 3
|
||||
self.row_axis = 1
|
||||
self.col_axis = 2
|
||||
|
||||
self.mean = None
|
||||
self.std = None
|
||||
self.principal_components = None
|
||||
|
||||
if np.isscalar(zoom_range):
|
||||
self.zoom_range = [1 - zoom_range, 1 + zoom_range]
|
||||
@@ -293,15 +418,21 @@ class ImageDataGenerator(object):
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
return NumpyArrayIterator(
|
||||
X, y, self,
|
||||
batch_size=batch_size, shuffle=shuffle, seed=seed,
|
||||
batch_size=batch_size,
|
||||
shuffle=shuffle,
|
||||
seed=seed,
|
||||
dim_ordering=self.dim_ordering,
|
||||
save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format)
|
||||
save_to_dir=save_to_dir,
|
||||
save_prefix=save_prefix,
|
||||
save_format=save_format)
|
||||
|
||||
def flow_from_directory(self, directory,
|
||||
target_size=(256, 256), color_mode='rgb',
|
||||
classes=None, class_mode='categorical',
|
||||
batch_size=32, shuffle=True, seed=None,
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg',
|
||||
save_to_dir=None,
|
||||
save_prefix='',
|
||||
save_format='jpeg',
|
||||
follow_links=False):
|
||||
return DirectoryIterator(
|
||||
directory, self,
|
||||
@@ -309,7 +440,9 @@ class ImageDataGenerator(object):
|
||||
classes=classes, class_mode=class_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
batch_size=batch_size, shuffle=shuffle, seed=seed,
|
||||
save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format,
|
||||
save_to_dir=save_to_dir,
|
||||
save_prefix=save_prefix,
|
||||
save_format=save_format,
|
||||
follow_links=follow_links)
|
||||
|
||||
def standardize(self, x):
|
||||
@@ -318,11 +451,11 @@ class ImageDataGenerator(object):
|
||||
if self.rescale:
|
||||
x *= self.rescale
|
||||
# x is a single image, so it doesn't have image number at index 0
|
||||
img_channel_index = self.channel_index - 1
|
||||
img_channel_axis = self.channel_axis - 1
|
||||
if self.samplewise_center:
|
||||
x -= np.mean(x, axis=img_channel_index, keepdims=True)
|
||||
x -= np.mean(x, axis=img_channel_axis, keepdims=True)
|
||||
if self.samplewise_std_normalization:
|
||||
x /= (np.std(x, axis=img_channel_index, keepdims=True) + 1e-7)
|
||||
x /= (np.std(x, axis=img_channel_axis, keepdims=True) + 1e-7)
|
||||
|
||||
if self.featurewise_center:
|
||||
if self.mean is not None:
|
||||
@@ -354,11 +487,12 @@ class ImageDataGenerator(object):
|
||||
|
||||
def random_transform(self, x):
|
||||
# x is a single image, so it doesn't have image number at index 0
|
||||
img_row_index = self.row_index - 1
|
||||
img_col_index = self.col_index - 1
|
||||
img_channel_index = self.channel_index - 1
|
||||
img_row_axis = self.row_axis - 1
|
||||
img_col_axis = self.col_axis - 1
|
||||
img_channel_axis = self.channel_axis - 1
|
||||
|
||||
# use composition of homographies to generate final transform that needs to be applied
|
||||
# use composition of homographies
|
||||
# to generate final transform that needs to be applied
|
||||
if self.rotation_range:
|
||||
theta = np.pi / 180 * np.random.uniform(-self.rotation_range, self.rotation_range)
|
||||
else:
|
||||
@@ -367,12 +501,12 @@ class ImageDataGenerator(object):
|
||||
[np.sin(theta), np.cos(theta), 0],
|
||||
[0, 0, 1]])
|
||||
if self.height_shift_range:
|
||||
tx = np.random.uniform(-self.height_shift_range, self.height_shift_range) * x.shape[img_row_index]
|
||||
tx = np.random.uniform(-self.height_shift_range, self.height_shift_range) * x.shape[img_row_axis]
|
||||
else:
|
||||
tx = 0
|
||||
|
||||
if self.width_shift_range:
|
||||
ty = np.random.uniform(-self.width_shift_range, self.width_shift_range) * x.shape[img_col_index]
|
||||
ty = np.random.uniform(-self.width_shift_range, self.width_shift_range) * x.shape[img_col_axis]
|
||||
else:
|
||||
ty = 0
|
||||
|
||||
@@ -395,37 +529,38 @@ class ImageDataGenerator(object):
|
||||
[0, zy, 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
transform_matrix = np.dot(np.dot(np.dot(rotation_matrix, translation_matrix), shear_matrix), zoom_matrix)
|
||||
transform_matrix = np.dot(np.dot(np.dot(rotation_matrix,
|
||||
translation_matrix),
|
||||
shear_matrix),
|
||||
zoom_matrix)
|
||||
|
||||
h, w = x.shape[img_row_index], x.shape[img_col_index]
|
||||
h, w = x.shape[img_row_axis], x.shape[img_col_axis]
|
||||
transform_matrix = transform_matrix_offset_center(transform_matrix, h, w)
|
||||
x = apply_transform(x, transform_matrix, img_channel_index,
|
||||
x = apply_transform(x, transform_matrix, img_channel_axis,
|
||||
fill_mode=self.fill_mode, cval=self.cval)
|
||||
if self.channel_shift_range != 0:
|
||||
x = random_channel_shift(x, self.channel_shift_range, img_channel_index)
|
||||
|
||||
x = random_channel_shift(x,
|
||||
self.channel_shift_range,
|
||||
img_channel_axis)
|
||||
if self.horizontal_flip:
|
||||
if np.random.random() < 0.5:
|
||||
x = flip_axis(x, img_col_index)
|
||||
x = flip_axis(x, img_col_axis)
|
||||
|
||||
if self.vertical_flip:
|
||||
if np.random.random() < 0.5:
|
||||
x = flip_axis(x, img_row_index)
|
||||
x = flip_axis(x, img_row_axis)
|
||||
|
||||
# TODO:
|
||||
# channel-wise normalization
|
||||
# barrel/fisheye
|
||||
return x
|
||||
|
||||
def fit(self, X,
|
||||
def fit(self, x,
|
||||
augment=False,
|
||||
rounds=1,
|
||||
seed=None):
|
||||
'''Required for featurewise_center, featurewise_std_normalization
|
||||
"""Required for featurewise_center, featurewise_std_normalization
|
||||
and zca_whitening.
|
||||
|
||||
# Arguments
|
||||
X: Numpy array, the data to fit on. Should have rank 4.
|
||||
x: Numpy array, the data to fit on. Should have rank 4.
|
||||
In case of grayscale data,
|
||||
the channels axis should have value 1, and in case
|
||||
of RGB data, it should have value 3.
|
||||
@@ -433,83 +568,86 @@ class ImageDataGenerator(object):
|
||||
rounds: If `augment`,
|
||||
how many augmentation passes to do over the data
|
||||
seed: random seed.
|
||||
'''
|
||||
X = np.asarray(X)
|
||||
if X.ndim != 4:
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid input `x`.
|
||||
"""
|
||||
x = np.asarray(x, dtype=K.floatx())
|
||||
if x.ndim != 4:
|
||||
raise ValueError('Input to `.fit()` should have rank 4. '
|
||||
'Got array with shape: ' + str(X.shape))
|
||||
if X.shape[self.channel_index] not in {1, 3, 4}:
|
||||
'Got array with shape: ' + str(x.shape))
|
||||
if x.shape[self.channel_axis] not in {1, 3, 4}:
|
||||
raise ValueError(
|
||||
'Expected input to be images (as Numpy array) '
|
||||
'following the dimension ordering convention "' + self.dim_ordering + '" '
|
||||
'(channels on axis ' + str(self.channel_index) + '), i.e. expected '
|
||||
'either 1, 3 or 4 channels on axis ' + str(self.channel_index) + '. '
|
||||
'However, it was passed an array with shape ' + str(X.shape) +
|
||||
' (' + str(X.shape[self.channel_index]) + ' channels).')
|
||||
'(channels on axis ' + str(self.channel_axis) + '), i.e. expected '
|
||||
'either 1, 3 or 4 channels on axis ' + str(self.channel_axis) + '. '
|
||||
'However, it was passed an array with shape ' + str(x.shape) +
|
||||
' (' + str(x.shape[self.channel_axis]) + ' channels).')
|
||||
|
||||
if seed is not None:
|
||||
np.random.seed(seed)
|
||||
|
||||
X = np.copy(X)
|
||||
x = np.copy(x)
|
||||
if augment:
|
||||
aX = np.zeros(tuple([rounds * X.shape[0]] + list(X.shape)[1:]))
|
||||
ax = np.zeros(tuple([rounds * x.shape[0]] + list(x.shape)[1:]), dtype=K.floatx())
|
||||
for r in range(rounds):
|
||||
for i in range(X.shape[0]):
|
||||
aX[i + r * X.shape[0]] = self.random_transform(X[i])
|
||||
X = aX
|
||||
for i in range(x.shape[0]):
|
||||
ax[i + r * x.shape[0]] = self.random_transform(x[i])
|
||||
x = ax
|
||||
|
||||
if self.featurewise_center:
|
||||
self.mean = np.mean(X, axis=(0, self.row_index, self.col_index))
|
||||
self.mean = np.mean(x, axis=(0, self.row_axis, self.col_axis))
|
||||
broadcast_shape = [1, 1, 1]
|
||||
broadcast_shape[self.channel_index - 1] = X.shape[self.channel_index]
|
||||
broadcast_shape[self.channel_axis - 1] = x.shape[self.channel_axis]
|
||||
self.mean = np.reshape(self.mean, broadcast_shape)
|
||||
X -= self.mean
|
||||
x -= self.mean
|
||||
|
||||
if self.featurewise_std_normalization:
|
||||
self.std = np.std(X, axis=(0, self.row_index, self.col_index))
|
||||
self.std = np.std(x, axis=(0, self.row_axis, self.col_axis))
|
||||
broadcast_shape = [1, 1, 1]
|
||||
broadcast_shape[self.channel_index - 1] = X.shape[self.channel_index]
|
||||
broadcast_shape[self.channel_axis - 1] = x.shape[self.channel_axis]
|
||||
self.std = np.reshape(self.std, broadcast_shape)
|
||||
X /= (self.std + K.epsilon())
|
||||
x /= (self.std + K.epsilon())
|
||||
|
||||
if self.zca_whitening:
|
||||
flatX = np.reshape(X, (X.shape[0], X.shape[1] * X.shape[2] * X.shape[3]))
|
||||
sigma = np.dot(flatX.T, flatX) / flatX.shape[0]
|
||||
U, S, V = linalg.svd(sigma)
|
||||
self.principal_components = np.dot(np.dot(U, np.diag(1. / np.sqrt(S + 10e-7))), U.T)
|
||||
flat_x = np.reshape(x, (x.shape[0], x.shape[1] * x.shape[2] * x.shape[3]))
|
||||
sigma = np.dot(flat_x.T, flat_x) / flat_x.shape[0]
|
||||
u, s, _ = linalg.svd(sigma)
|
||||
self.principal_components = np.dot(np.dot(u, np.diag(1. / np.sqrt(s + 10e-7))), u.T)
|
||||
|
||||
|
||||
class Iterator(object):
|
||||
|
||||
def __init__(self, N, batch_size, shuffle, seed):
|
||||
self.N = N
|
||||
def __init__(self, n, batch_size, shuffle, seed):
|
||||
self.n = n
|
||||
self.batch_size = batch_size
|
||||
self.shuffle = shuffle
|
||||
self.batch_index = 0
|
||||
self.total_batches_seen = 0
|
||||
self.lock = threading.Lock()
|
||||
self.index_generator = self._flow_index(N, batch_size, shuffle, seed)
|
||||
self.index_generator = self._flow_index(n, batch_size, shuffle, seed)
|
||||
|
||||
def reset(self):
|
||||
self.batch_index = 0
|
||||
|
||||
def _flow_index(self, N, batch_size=32, shuffle=False, seed=None):
|
||||
def _flow_index(self, n, batch_size=32, shuffle=False, seed=None):
|
||||
# ensure self.batch_index is 0
|
||||
self.reset()
|
||||
while 1:
|
||||
if seed is not None:
|
||||
np.random.seed(seed + self.total_batches_seen)
|
||||
if self.batch_index == 0:
|
||||
index_array = np.arange(N)
|
||||
index_array = np.arange(n)
|
||||
if shuffle:
|
||||
index_array = np.random.permutation(N)
|
||||
index_array = np.random.permutation(n)
|
||||
|
||||
current_index = (self.batch_index * batch_size) % N
|
||||
if N >= current_index + batch_size:
|
||||
current_index = (self.batch_index * batch_size) % n
|
||||
if n >= current_index + batch_size:
|
||||
current_batch_size = batch_size
|
||||
self.batch_index += 1
|
||||
else:
|
||||
current_batch_size = N - current_index
|
||||
current_batch_size = n - current_index
|
||||
self.batch_index = 0
|
||||
self.total_batches_seen += 1
|
||||
yield (index_array[current_index: current_index + current_batch_size],
|
||||
@@ -526,29 +664,30 @@ class Iterator(object):
|
||||
|
||||
class NumpyArrayIterator(Iterator):
|
||||
|
||||
def __init__(self, X, y, image_data_generator,
|
||||
def __init__(self, x, y, image_data_generator,
|
||||
batch_size=32, shuffle=False, seed=None,
|
||||
dim_ordering='default',
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
if y is not None and len(X) != len(y):
|
||||
if y is not None and len(x) != len(y):
|
||||
raise ValueError('X (images tensor) and y (labels) '
|
||||
'should have the same length. '
|
||||
'Found: X.shape = %s, y.shape = %s' % (np.asarray(X).shape, np.asarray(y).shape))
|
||||
'Found: X.shape = %s, y.shape = %s' %
|
||||
(np.asarray(x).shape, np.asarray(y).shape))
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.X = np.asarray(X)
|
||||
if self.X.ndim != 4:
|
||||
self.x = np.asarray(x, dtype=K.floatx())
|
||||
if self.x.ndim != 4:
|
||||
raise ValueError('Input data in `NumpyArrayIterator` '
|
||||
'should have rank 4. You passed an array '
|
||||
'with shape', self.X.shape)
|
||||
'with shape', self.x.shape)
|
||||
channels_axis = 3 if dim_ordering == 'tf' else 1
|
||||
if self.X.shape[channels_axis] not in {1, 3, 4}:
|
||||
if self.x.shape[channels_axis] not in {1, 3, 4}:
|
||||
raise ValueError('NumpyArrayIterator is set to use the '
|
||||
'dimension ordering convention "' + dim_ordering + '" '
|
||||
'(channels on axis ' + str(channels_axis) + '), i.e. expected '
|
||||
'either 1, 3 or 4 channels on axis ' + str(channels_axis) + '. '
|
||||
'However, it was passed an array with shape ' + str(self.X.shape) +
|
||||
' (' + str(self.X.shape[channels_axis]) + ' channels).')
|
||||
'However, it was passed an array with shape ' + str(self.x.shape) +
|
||||
' (' + str(self.x.shape[channels_axis]) + ' channels).')
|
||||
if y is not None:
|
||||
self.y = np.asarray(y)
|
||||
else:
|
||||
@@ -558,7 +697,7 @@ class NumpyArrayIterator(Iterator):
|
||||
self.save_to_dir = save_to_dir
|
||||
self.save_prefix = save_prefix
|
||||
self.save_format = save_format
|
||||
super(NumpyArrayIterator, self).__init__(X.shape[0], batch_size, shuffle, seed)
|
||||
super(NumpyArrayIterator, self).__init__(x.shape[0], batch_size, shuffle, seed)
|
||||
|
||||
def next(self):
|
||||
# for python 2.x.
|
||||
@@ -567,11 +706,12 @@ class NumpyArrayIterator(Iterator):
|
||||
# see http://anandology.com/blog/using-iterators-and-generators/
|
||||
with self.lock:
|
||||
index_array, current_index, current_batch_size = next(self.index_generator)
|
||||
# The transformation of images is not under thread lock so it can be done in parallel
|
||||
batch_x = np.zeros(tuple([current_batch_size] + list(self.X.shape)[1:]))
|
||||
# The transformation of images is not under thread lock
|
||||
# so it can be done in parallel
|
||||
batch_x = np.zeros(tuple([current_batch_size] + list(self.x.shape)[1:]), dtype=K.floatx())
|
||||
for i, j in enumerate(index_array):
|
||||
x = self.X[j]
|
||||
x = self.image_data_generator.random_transform(x.astype('float32'))
|
||||
x = self.x[j]
|
||||
x = self.image_data_generator.random_transform(x.astype(K.floatx()))
|
||||
x = self.image_data_generator.standardize(x)
|
||||
batch_x[i] = x
|
||||
if self.save_to_dir:
|
||||
@@ -645,7 +785,7 @@ class DirectoryIterator(Iterator):
|
||||
|
||||
for subdir in classes:
|
||||
subpath = os.path.join(directory, subdir)
|
||||
for root, dirs, files in _recursive_list(subpath):
|
||||
for root, _, files in _recursive_list(subpath):
|
||||
for fname in files:
|
||||
is_valid = False
|
||||
for extension in white_list_formats:
|
||||
@@ -662,7 +802,7 @@ class DirectoryIterator(Iterator):
|
||||
i = 0
|
||||
for subdir in classes:
|
||||
subpath = os.path.join(directory, subdir)
|
||||
for root, dirs, files in _recursive_list(subpath):
|
||||
for root, _, files in _recursive_list(subpath):
|
||||
for fname in files:
|
||||
is_valid = False
|
||||
for extension in white_list_formats:
|
||||
@@ -680,8 +820,9 @@ class DirectoryIterator(Iterator):
|
||||
def next(self):
|
||||
with self.lock:
|
||||
index_array, current_index, current_batch_size = next(self.index_generator)
|
||||
# The transformation of images is not under thread lock so it can be done in parallel
|
||||
batch_x = np.zeros((current_batch_size,) + self.image_shape)
|
||||
# The transformation of images is not under thread lock
|
||||
# so it can be done in parallel
|
||||
batch_x = np.zeros((current_batch_size,) + self.image_shape, dtype=K.floatx())
|
||||
grayscale = self.color_mode == 'grayscale'
|
||||
# build batch of image data
|
||||
for i, j in enumerate(index_array):
|
||||
@@ -706,9 +847,9 @@ class DirectoryIterator(Iterator):
|
||||
if self.class_mode == 'sparse':
|
||||
batch_y = self.classes[index_array]
|
||||
elif self.class_mode == 'binary':
|
||||
batch_y = self.classes[index_array].astype('float32')
|
||||
batch_y = self.classes[index_array].astype(K.floatx())
|
||||
elif self.class_mode == 'categorical':
|
||||
batch_y = np.zeros((len(batch_x), self.nb_class), dtype='float32')
|
||||
batch_y = np.zeros((len(batch_x), self.nb_class), dtype=K.floatx())
|
||||
for i, label in enumerate(self.classes[index_array]):
|
||||
batch_y[i, label] = 1.
|
||||
else:
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import absolute_import
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import random
|
||||
from six.moves import range
|
||||
@@ -7,8 +8,7 @@ from six.moves import range
|
||||
|
||||
def pad_sequences(sequences, maxlen=None, dtype='int32',
|
||||
padding='pre', truncating='pre', value=0.):
|
||||
'''Pads each sequence to the same length:
|
||||
the length of the longest sequence.
|
||||
"""Pads each sequence to the same length (length of the longest sequence).
|
||||
|
||||
If maxlen is provided, any sequence longer
|
||||
than maxlen is truncated to maxlen.
|
||||
@@ -28,7 +28,11 @@ def pad_sequences(sequences, maxlen=None, dtype='int32',
|
||||
|
||||
# Returns
|
||||
x: numpy array with dimensions (number_of_sequences, maxlen)
|
||||
'''
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid values for `truncating` or `padding`,
|
||||
or in case of invalid shape for a `sequences` entry.
|
||||
"""
|
||||
lengths = [len(s) for s in sequences]
|
||||
|
||||
nb_samples = len(sequences)
|
||||
@@ -45,8 +49,8 @@ def pad_sequences(sequences, maxlen=None, dtype='int32',
|
||||
|
||||
x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype)
|
||||
for idx, s in enumerate(sequences):
|
||||
if len(s) == 0:
|
||||
continue # empty list was found
|
||||
if not len(s):
|
||||
continue # empty list/array was found
|
||||
if truncating == 'pre':
|
||||
trunc = s[-maxlen:]
|
||||
elif truncating == 'post':
|
||||
@@ -70,7 +74,9 @@ def pad_sequences(sequences, maxlen=None, dtype='int32',
|
||||
|
||||
|
||||
def make_sampling_table(size, sampling_factor=1e-5):
|
||||
'''This generates an array where the ith element
|
||||
"""Generates a word rank-based probabilistic sampling table.
|
||||
|
||||
This generates an array where the ith element
|
||||
is the probability that a word of rank i would be sampled,
|
||||
according to the sampling distribution used in word2vec.
|
||||
|
||||
@@ -84,11 +90,16 @@ def make_sampling_table(size, sampling_factor=1e-5):
|
||||
|
||||
# Arguments
|
||||
size: int, number of possible words to sample.
|
||||
'''
|
||||
sampling_factor: the sampling factor in the word2vec formula.
|
||||
|
||||
# Returns
|
||||
A 1D Numpy array of length `size` where the ith entry
|
||||
is the probability that a word of rank i should be sampled.
|
||||
"""
|
||||
gamma = 0.577
|
||||
rank = np.array(list(range(size)))
|
||||
rank[0] = 1
|
||||
inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1./(12.*rank)
|
||||
inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1. / (12. * rank)
|
||||
f = sampling_factor * inv_fq
|
||||
|
||||
return np.minimum(1., f / np.sqrt(f))
|
||||
@@ -97,29 +108,40 @@ def make_sampling_table(size, sampling_factor=1e-5):
|
||||
def skipgrams(sequence, vocabulary_size,
|
||||
window_size=4, negative_samples=1., shuffle=True,
|
||||
categorical=False, sampling_table=None):
|
||||
'''Take a sequence (list of indexes of words),
|
||||
"""Generates skipgram word pairs.
|
||||
|
||||
Takes a sequence (list of indexes of words),
|
||||
returns couples of [word_index, other_word index] and labels (1s or 0s),
|
||||
where label = 1 if 'other_word' belongs to the context of 'word',
|
||||
and label=0 if 'other_word' is randomly sampled
|
||||
|
||||
# Arguments
|
||||
sequence: a word sequence (sentence), encoded as a list
|
||||
of word indices (integers). If using a `sampling_table`,
|
||||
word indices are expected to match the rank
|
||||
of the words in a reference dataset (e.g. 10 would encode
|
||||
the 10-th most frequently occuring token).
|
||||
Note that index 0 is expected to be a non-word and will be skipped.
|
||||
vocabulary_size: int. maximum possible word index + 1
|
||||
window_size: int. actually half-window.
|
||||
The window of a word wi will be [i-window_size, i+window_size+1]
|
||||
negative_samples: float >= 0. 0 for no negative (=random) samples.
|
||||
1 for same number as positive samples. etc.
|
||||
shuffle: whether to shuffle the word couples before returning them.
|
||||
categorical: bool. if False, labels will be
|
||||
integers (eg. [0, 1, 1 .. ]),
|
||||
if True labels will be categorical eg. [[1,0],[0,1],[0,1] .. ]
|
||||
sampling_table: 1D array of size `vocabulary_size` where the entry i
|
||||
encodes the probabibily to sample a word of rank i.
|
||||
|
||||
# Returns
|
||||
couples, labels: where `couples` are int pairs and
|
||||
`labels` are either 0 or 1.
|
||||
|
||||
# Notes
|
||||
# Note
|
||||
By convention, index 0 in the vocabulary is
|
||||
a non-word and will be skipped.
|
||||
'''
|
||||
"""
|
||||
couples = []
|
||||
labels = []
|
||||
for i, wi in enumerate(sequence):
|
||||
@@ -129,8 +151,8 @@ def skipgrams(sequence, vocabulary_size,
|
||||
if sampling_table[wi] < random.random():
|
||||
continue
|
||||
|
||||
window_start = max(0, i-window_size)
|
||||
window_end = min(len(sequence), i+window_size+1)
|
||||
window_start = max(0, i - window_size)
|
||||
window_end = min(len(sequence), i + window_size + 1)
|
||||
for j in range(window_start, window_end):
|
||||
if j != i:
|
||||
wj = sequence[j]
|
||||
@@ -147,11 +169,12 @@ def skipgrams(sequence, vocabulary_size,
|
||||
words = [c[0] for c in couples]
|
||||
random.shuffle(words)
|
||||
|
||||
couples += [[words[i %len(words)], random.randint(1, vocabulary_size-1)] for i in range(nb_negative_samples)]
|
||||
couples += [[words[i % len(words)],
|
||||
random.randint(1, vocabulary_size - 1)] for i in range(nb_negative_samples)]
|
||||
if categorical:
|
||||
labels += [[1, 0]]*nb_negative_samples
|
||||
labels += [[1, 0]] * nb_negative_samples
|
||||
else:
|
||||
labels += [0]*nb_negative_samples
|
||||
labels += [0] * nb_negative_samples
|
||||
|
||||
if shuffle:
|
||||
seed = random.randint(0, 10e6)
|
||||
|
||||
+117
-70
@@ -1,7 +1,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''These preprocessing utilities would greatly benefit
|
||||
from a fast Cython rewrite.
|
||||
'''
|
||||
"""Utilities for text input preprocessing.
|
||||
|
||||
May benefit from a fast Cython rewrite.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
|
||||
@@ -17,54 +18,70 @@ else:
|
||||
maketrans = str.maketrans
|
||||
|
||||
|
||||
def base_filter():
|
||||
f = string.punctuation
|
||||
f = f.replace("'", '')
|
||||
f += '\t\n'
|
||||
return f
|
||||
def text_to_word_sequence(text,
|
||||
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
|
||||
lower=True, split=" "):
|
||||
"""Converts a text to a sequence of word indices.
|
||||
|
||||
# Arguments
|
||||
text: Input text (string).
|
||||
filters: Sequence of characters to filter out.
|
||||
lower: Whether to convert the input to lowercase.
|
||||
split: Sentence split marker (string).
|
||||
|
||||
def text_to_word_sequence(text, filters=base_filter(), lower=True, split=" "):
|
||||
'''prune: sequence of characters to filter out
|
||||
'''
|
||||
# Returns
|
||||
A list of integer word indices.
|
||||
"""
|
||||
if lower:
|
||||
text = text.lower()
|
||||
text = text.translate(maketrans(filters, split*len(filters)))
|
||||
text = text.translate(maketrans(filters, split * len(filters)))
|
||||
seq = text.split(split)
|
||||
return [_f for _f in seq if _f]
|
||||
return [i for i in seq if i]
|
||||
|
||||
|
||||
def one_hot(text, n, filters=base_filter(), lower=True, split=" "):
|
||||
seq = text_to_word_sequence(text, filters=filters, lower=lower, split=split)
|
||||
def one_hot(text, n,
|
||||
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
|
||||
lower=True,
|
||||
split=' '):
|
||||
seq = text_to_word_sequence(text,
|
||||
filters=filters,
|
||||
lower=lower,
|
||||
split=split)
|
||||
return [(abs(hash(w)) % (n - 1) + 1) for w in seq]
|
||||
|
||||
|
||||
class Tokenizer(object):
|
||||
def __init__(self, nb_words=None, filters=base_filter(),
|
||||
lower=True, split=' ', char_level=False):
|
||||
'''The class allows to vectorize a text corpus, by turning each
|
||||
text into either a sequence of integers (each integer being the index
|
||||
of a token in a dictionary) or into a vector where the coefficient
|
||||
for each token could be binary, based on word count, based on tf-idf...
|
||||
"""Text tokenization utility class.
|
||||
|
||||
# Arguments
|
||||
nb_words: the maximum number of words to keep, based
|
||||
on word frequency. Only the most common `nb_words` words will
|
||||
be kept.
|
||||
filters: a string where each element is a character that will be
|
||||
filtered from the texts. The default is all punctuation, plus
|
||||
tabs and line breaks, minus the `'` character.
|
||||
lower: boolean. Whether to convert the texts to lowercase.
|
||||
split: character or string to use for token splitting.
|
||||
char_level: if True, every character will be treated as a word.
|
||||
This class allows to vectorize a text corpus, by turning each
|
||||
text into either a sequence of integers (each integer being the index
|
||||
of a token in a dictionary) or into a vector where the coefficient
|
||||
for each token could be binary, based on word count, based on tf-idf...
|
||||
|
||||
By default, all punctuation is removed, turning the texts into
|
||||
space-separated sequences of words
|
||||
(words maybe include the `'` character). These sequences are then
|
||||
split into lists of tokens. They will then be indexed or vectorized.
|
||||
# Arguments
|
||||
nb_words: the maximum number of words to keep, based
|
||||
on word frequency. Only the most common `nb_words` words will
|
||||
be kept.
|
||||
filters: a string where each element is a character that will be
|
||||
filtered from the texts. The default is all punctuation, plus
|
||||
tabs and line breaks, minus the `'` character.
|
||||
lower: boolean. Whether to convert the texts to lowercase.
|
||||
split: character or string to use for token splitting.
|
||||
char_level: if True, every character will be treated as a word.
|
||||
|
||||
`0` is a reserved index that won't be assigned to any word.
|
||||
'''
|
||||
By default, all punctuation is removed, turning the texts into
|
||||
space-separated sequences of words
|
||||
(words maybe include the `'` character). These sequences are then
|
||||
split into lists of tokens. They will then be indexed or vectorized.
|
||||
|
||||
`0` is a reserved index that won't be assigned to any word.
|
||||
"""
|
||||
|
||||
def __init__(self, nb_words=None,
|
||||
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
|
||||
lower=True,
|
||||
split=' ',
|
||||
char_level=False):
|
||||
self.word_counts = {}
|
||||
self.word_docs = {}
|
||||
self.filters = filters
|
||||
@@ -75,16 +92,21 @@ class Tokenizer(object):
|
||||
self.char_level = char_level
|
||||
|
||||
def fit_on_texts(self, texts):
|
||||
'''Required before using texts_to_sequences or texts_to_matrix
|
||||
"""Updates internal vocabulary based on a list of texts.
|
||||
|
||||
Required before using `texts_to_sequences` or `texts_to_matrix`.
|
||||
|
||||
# Arguments
|
||||
texts: can be a list of strings,
|
||||
or a generator of strings (for memory-efficiency)
|
||||
'''
|
||||
"""
|
||||
self.document_count = 0
|
||||
for text in texts:
|
||||
self.document_count += 1
|
||||
seq = text if self.char_level else text_to_word_sequence(text, self.filters, self.lower, self.split)
|
||||
seq = text if self.char_level else text_to_word_sequence(text,
|
||||
self.filters,
|
||||
self.lower,
|
||||
self.split)
|
||||
for w in seq:
|
||||
if w in self.word_counts:
|
||||
self.word_counts[w] += 1
|
||||
@@ -107,9 +129,15 @@ class Tokenizer(object):
|
||||
self.index_docs[self.word_index[w]] = c
|
||||
|
||||
def fit_on_sequences(self, sequences):
|
||||
'''Required before using sequences_to_matrix
|
||||
(if fit_on_texts was never called)
|
||||
'''
|
||||
"""Updates internal vocabulary based on a list of sequences.
|
||||
|
||||
Required before using `sequences_to_matrix`
|
||||
(if `fit_on_texts` was never called).
|
||||
|
||||
# Arguments
|
||||
sequences: A list of sequence.
|
||||
A "sequence" is a list of integer word indices.
|
||||
"""
|
||||
self.document_count = len(sequences)
|
||||
self.index_docs = {}
|
||||
for seq in sequences:
|
||||
@@ -121,30 +149,40 @@ class Tokenizer(object):
|
||||
self.index_docs[i] += 1
|
||||
|
||||
def texts_to_sequences(self, texts):
|
||||
'''Transforms each text in texts in a sequence of integers.
|
||||
"""Transforms each text in texts in a sequence of integers.
|
||||
|
||||
Only top "nb_words" most frequent words will be taken into account.
|
||||
Only words known by the tokenizer will be taken into account.
|
||||
|
||||
Returns a list of sequences.
|
||||
'''
|
||||
# Arguments
|
||||
texts: A list of texts (strings).
|
||||
|
||||
# Returns
|
||||
A list of sequences.
|
||||
"""
|
||||
res = []
|
||||
for vect in self.texts_to_sequences_generator(texts):
|
||||
res.append(vect)
|
||||
return res
|
||||
|
||||
def texts_to_sequences_generator(self, texts):
|
||||
'''Transforms each text in texts in a sequence of integers.
|
||||
"""Transforms each text in texts in a sequence of integers.
|
||||
|
||||
Only top "nb_words" most frequent words will be taken into account.
|
||||
Only words known by the tokenizer will be taken into account.
|
||||
|
||||
Yields individual sequences.
|
||||
# Arguments
|
||||
texts: A list of texts (strings).
|
||||
|
||||
# Arguments:
|
||||
texts: list of strings.
|
||||
'''
|
||||
# Yields
|
||||
Yields individual sequences.
|
||||
"""
|
||||
nb_words = self.nb_words
|
||||
for text in texts:
|
||||
seq = text if self.char_level else text_to_word_sequence(text, self.filters, self.lower, self.split)
|
||||
seq = text if self.char_level else text_to_word_sequence(text,
|
||||
self.filters,
|
||||
self.lower,
|
||||
self.split)
|
||||
vect = []
|
||||
for w in seq:
|
||||
i = self.word_index.get(w)
|
||||
@@ -156,25 +194,33 @@ class Tokenizer(object):
|
||||
yield vect
|
||||
|
||||
def texts_to_matrix(self, texts, mode='binary'):
|
||||
'''Convert a list of texts to a Numpy matrix,
|
||||
according to some vectorization mode.
|
||||
"""Convert a list of texts to a Numpy matrix.
|
||||
|
||||
# Arguments:
|
||||
# Arguments
|
||||
texts: list of strings.
|
||||
modes: one of "binary", "count", "tfidf", "freq"
|
||||
'''
|
||||
mode: one of "binary", "count", "tfidf", "freq".
|
||||
|
||||
# Returns
|
||||
A Numpy matrix.
|
||||
"""
|
||||
sequences = self.texts_to_sequences(texts)
|
||||
return self.sequences_to_matrix(sequences, mode=mode)
|
||||
|
||||
def sequences_to_matrix(self, sequences, mode='binary'):
|
||||
'''Converts a list of sequences into a Numpy matrix,
|
||||
according to some vectorization mode.
|
||||
"""Converts a list of sequences into a Numpy matrix.
|
||||
|
||||
# Arguments:
|
||||
# Arguments
|
||||
sequences: list of sequences
|
||||
(a sequence is a list of integer word indices).
|
||||
modes: one of "binary", "count", "tfidf", "freq"
|
||||
'''
|
||||
mode: one of "binary", "count", "tfidf", "freq"
|
||||
|
||||
# Returns
|
||||
A Numpy matrix.
|
||||
|
||||
# Raises
|
||||
ValueError: In case of invalid `mode` argument,
|
||||
or if the Tokenizer requires to be fit to sample data.
|
||||
"""
|
||||
if not self.nb_words:
|
||||
if self.word_index:
|
||||
nb_words = len(self.word_index) + 1
|
||||
@@ -188,7 +234,7 @@ class Tokenizer(object):
|
||||
raise ValueError('Fit the Tokenizer on some data '
|
||||
'before using tfidf mode.')
|
||||
|
||||
X = np.zeros((len(sequences), nb_words))
|
||||
x = np.zeros((len(sequences), nb_words))
|
||||
for i, seq in enumerate(sequences):
|
||||
if not seq:
|
||||
continue
|
||||
@@ -202,17 +248,18 @@ class Tokenizer(object):
|
||||
counts[j] += 1
|
||||
for j, c in list(counts.items()):
|
||||
if mode == 'count':
|
||||
X[i][j] = c
|
||||
x[i][j] = c
|
||||
elif mode == 'freq':
|
||||
X[i][j] = c / len(seq)
|
||||
x[i][j] = c / len(seq)
|
||||
elif mode == 'binary':
|
||||
X[i][j] = 1
|
||||
x[i][j] = 1
|
||||
elif mode == 'tfidf':
|
||||
# Use weighting scheme 2 in
|
||||
# https://en.wikipedia.org/wiki/Tf%E2%80%93idf
|
||||
# https://en.wikipedia.org/wiki/Tf%E2%80%93idf
|
||||
tf = 1 + np.log(c)
|
||||
idf = np.log(1 + self.document_count / (1 + self.index_docs.get(j, 0)))
|
||||
X[i][j] = tf * idf
|
||||
idf = np.log(1 + self.document_count /
|
||||
(1 + self.index_docs.get(j, 0)))
|
||||
x[i][j] = tf * idf
|
||||
else:
|
||||
raise ValueError('Unknown vectorization mode:', mode)
|
||||
return X
|
||||
return x
|
||||
|
||||
+16
-7
@@ -5,6 +5,8 @@ import warnings
|
||||
|
||||
|
||||
class Regularizer(object):
|
||||
"""Regularizer base class.
|
||||
"""
|
||||
|
||||
def __call__(self, x):
|
||||
return 0
|
||||
@@ -24,13 +26,14 @@ class Regularizer(object):
|
||||
|
||||
|
||||
class EigenvalueRegularizer(Regularizer):
|
||||
'''This takes a constant that controls
|
||||
the regularization by Eigenvalue Decay on the
|
||||
current layer and outputs the regularized
|
||||
loss (evaluated on the training data) and
|
||||
the original loss (evaluated on the
|
||||
validation data).
|
||||
'''
|
||||
"""Regularizer based on the eignvalues of a weight matrix.
|
||||
|
||||
Only available for tensors of rank 2.
|
||||
|
||||
# Arguments
|
||||
k: Float; modulates the amount of regularization to apply.
|
||||
"""
|
||||
|
||||
def __init__(self, k):
|
||||
self.k = k
|
||||
|
||||
@@ -58,6 +61,12 @@ class EigenvalueRegularizer(Regularizer):
|
||||
|
||||
|
||||
class L1L2Regularizer(Regularizer):
|
||||
"""Regularizer for L1 and L2 regularization.
|
||||
|
||||
# Arguments
|
||||
l1: Float; L1 regularization factor.
|
||||
l2: Float; L2 regularization factor.
|
||||
"""
|
||||
|
||||
def __init__(self, l1=0., l2=0.):
|
||||
self.l1 = K.cast_to_floatx(l1)
|
||||
|
||||
+30
-13
@@ -1,21 +1,38 @@
|
||||
"""Utilities for file download and caching."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
import functools
|
||||
import tarfile
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import hashlib
|
||||
from six.moves.urllib.request import urlopen
|
||||
from six.moves.urllib.error import URLError, HTTPError
|
||||
from six.moves.urllib.error import URLError
|
||||
from six.moves.urllib.error import HTTPError
|
||||
|
||||
from ..utils.generic_utils import Progbar
|
||||
|
||||
|
||||
# Under Python 2, 'urlretrieve' relies on FancyURLopener from legacy
|
||||
# urllib module, known to have issues with proxy management
|
||||
if sys.version_info[0] == 2:
|
||||
def urlretrieve(url, filename, reporthook=None, data=None):
|
||||
"""Replacement for `urlretrive` for Python 2.
|
||||
|
||||
Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
|
||||
`urllib` module, known to have issues with proxy management.
|
||||
|
||||
# Arguments
|
||||
url: url to retrieve.
|
||||
filename: where to store the retrieved data locally.
|
||||
reporthook: a hook function that will be called once
|
||||
on establishment of the network connection and once
|
||||
after each block read thereafter.
|
||||
The hook will be passed three arguments;
|
||||
a count of blocks transferred so far,
|
||||
a block size in bytes, and the total size of the file.
|
||||
data: `data` argument passed to `urlopen`.
|
||||
"""
|
||||
def chunk_read(response, chunk_size=8192, reporthook=None):
|
||||
total_size = response.info().get('Content-Length').strip()
|
||||
total_size = int(total_size)
|
||||
@@ -40,9 +57,10 @@ else:
|
||||
|
||||
def get_file(fname, origin, untar=False,
|
||||
md5_hash=None, cache_subdir='datasets'):
|
||||
'''Downloads a file from a URL if it not already in the cache.
|
||||
"""Downloads a file from a URL if it not already in the cache.
|
||||
|
||||
Passing the MD5 hash will verify the file after download as well as if it is already present in the cache.
|
||||
Passing the MD5 hash will verify the file after download
|
||||
as well as if it is already present in the cache.
|
||||
|
||||
# Arguments
|
||||
fname: name of the file
|
||||
@@ -53,7 +71,7 @@ def get_file(fname, origin, untar=False,
|
||||
|
||||
# Returns
|
||||
Path to the downloaded file
|
||||
'''
|
||||
"""
|
||||
datadir_base = os.path.expanduser(os.path.join('~', '.keras'))
|
||||
if not os.access(datadir_base, os.W_OK):
|
||||
datadir_base = os.path.join('/tmp', '.keras')
|
||||
@@ -69,7 +87,7 @@ def get_file(fname, origin, untar=False,
|
||||
|
||||
download = False
|
||||
if os.path.exists(fpath):
|
||||
# file found; verify integrity if a hash was provided
|
||||
# File found; verify integrity if a hash was provided.
|
||||
if md5_hash is not None:
|
||||
if not validate_file(fpath, md5_hash):
|
||||
print('A local file was found, but it seems to be '
|
||||
@@ -80,11 +98,9 @@ def get_file(fname, origin, untar=False,
|
||||
|
||||
if download:
|
||||
print('Downloading data from', origin)
|
||||
global progbar
|
||||
progbar = None
|
||||
|
||||
def dl_progress(count, block_size, total_size):
|
||||
global progbar
|
||||
def dl_progress(count, block_size, total_size, progbar=None):
|
||||
if progbar is None:
|
||||
progbar = Progbar(total_size)
|
||||
else:
|
||||
@@ -93,7 +109,8 @@ def get_file(fname, origin, untar=False,
|
||||
error_msg = 'URL fetch failure on {}: {} -- {}'
|
||||
try:
|
||||
try:
|
||||
urlretrieve(origin, fpath, dl_progress)
|
||||
urlretrieve(origin, fpath,
|
||||
functools.partial(dl_progress, progbar=progbar))
|
||||
except URLError as e:
|
||||
raise Exception(error_msg.format(origin, e.errno, e.reason))
|
||||
except HTTPError as e:
|
||||
@@ -124,7 +141,7 @@ def get_file(fname, origin, untar=False,
|
||||
|
||||
|
||||
def validate_file(fpath, md5_hash):
|
||||
'''Validates a file against a MD5 hash
|
||||
"""Validates a file against a MD5 hash.
|
||||
|
||||
# Arguments
|
||||
fpath: path to the file being validated
|
||||
@@ -132,7 +149,7 @@ def validate_file(fpath, md5_hash):
|
||||
|
||||
# Returns
|
||||
Whether the file is valid
|
||||
'''
|
||||
"""
|
||||
hasher = hashlib.md5()
|
||||
with open(fpath, 'rb') as f:
|
||||
buf = f.read()
|
||||
|
||||
+146
-29
@@ -1,16 +1,125 @@
|
||||
"""Python utilities required by Keras."""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
|
||||
import time
|
||||
import sys
|
||||
import six
|
||||
import marshal
|
||||
import types as python_types
|
||||
|
||||
_GLOBAL_CUSTOM_OBJECTS = {}
|
||||
|
||||
|
||||
class CustomObjectScope(object):
|
||||
"""Provides a scope that changes to `_GLOBAL_CUSTOM_OBJECTS` cannot escape.
|
||||
|
||||
Code within a `with` statement will be able to access custom objects
|
||||
by name. Changes to global custom objects persist within the enclosing `with` statement. At end of the `with`
|
||||
statement, global custom objects are reverted to state at beginning of the `with` statement.
|
||||
|
||||
# Example
|
||||
|
||||
Consider a custom object `MyObject`
|
||||
|
||||
```python
|
||||
with CustomObjectScope({"MyObject":MyObject}):
|
||||
layer = Dense(..., W_regularizer="MyObject")
|
||||
# save, load, etc. will recognize custom object by name
|
||||
```
|
||||
"""
|
||||
def __init__(self, *args):
|
||||
self.custom_objects = args
|
||||
self.backup = None
|
||||
|
||||
def __enter__(self):
|
||||
self.backup = _GLOBAL_CUSTOM_OBJECTS.copy()
|
||||
for objects in self.custom_objects:
|
||||
_GLOBAL_CUSTOM_OBJECTS.update(objects)
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
_GLOBAL_CUSTOM_OBJECTS.clear()
|
||||
_GLOBAL_CUSTOM_OBJECTS.update(self.backup)
|
||||
|
||||
|
||||
def custom_object_scope(*args):
|
||||
"""Provides a scope that changes to `_GLOBAL_CUSTOM_OBJECTS` cannot escape.
|
||||
|
||||
Convenience wrapper for `CustomObjectScope`. Code within a `with` statement will be able to access custom objects
|
||||
by name. Changes to global custom objects persist within the enclosing `with` statement. At end of the `with`
|
||||
statement, global custom objects are reverted to state at beginning of the `with` statement.
|
||||
|
||||
# Example
|
||||
|
||||
Consider a custom object `MyObject`
|
||||
|
||||
```python
|
||||
with custom_object_scope({"MyObject":MyObject}):
|
||||
layer = Dense(..., W_regularizer="MyObject")
|
||||
# save, load, etc. will recognize custom object by name
|
||||
```
|
||||
|
||||
# Arguments
|
||||
*args: Variable length list of dictionaries of name, class pairs to add to custom objects.
|
||||
|
||||
# Returns
|
||||
Object of type `CustomObjectScope`.
|
||||
"""
|
||||
return CustomObjectScope(*args)
|
||||
|
||||
|
||||
def get_custom_objects():
|
||||
"""Retrieves a live reference to the global dictionary of custom objects (`_GLOBAL_CUSTOM_OBJECTS`).
|
||||
|
||||
Updating and clearing custom objects using `custom_object_scope` is preferred, but `get_custom_objects` can
|
||||
be used to directly access `_GLOBAL_CUSTOM_OBJECTS`.
|
||||
|
||||
# Example
|
||||
|
||||
```python
|
||||
get_custom_objects().clear()
|
||||
get_custom_objects()["MyObject"] = MyObject
|
||||
```
|
||||
|
||||
# Returns
|
||||
Global dictionary of names to classes (`_GLOBAL_CUSTOM_OBJECTS`).
|
||||
"""
|
||||
return _GLOBAL_CUSTOM_OBJECTS
|
||||
|
||||
|
||||
def get_from_module(identifier, module_params, module_name,
|
||||
instantiate=False, kwargs=None):
|
||||
"""Retrieves a class or function member of a module.
|
||||
|
||||
First checks `_GLOBAL_CUSTOM_OBJECTS` for `module_name`, then checks `module_params`.
|
||||
|
||||
# Arguments
|
||||
identifier: the object to retrieve. It could be specified
|
||||
by name (as a string), or by dict. In any other case,
|
||||
`identifier` itself will be returned without any changes.
|
||||
module_params: the members of a module
|
||||
(e.g. the output of `globals()`).
|
||||
module_name: string; the name of the target module. Only used
|
||||
to format error messages.
|
||||
instantiate: whether to instantiate the returned object
|
||||
(if it's a class).
|
||||
kwargs: a dictionary of keyword arguments to pass to the
|
||||
class constructor if `instantiate` is `True`.
|
||||
|
||||
# Returns
|
||||
The target object.
|
||||
|
||||
# Raises
|
||||
ValueError: if the identifier cannot be found.
|
||||
"""
|
||||
if isinstance(identifier, six.string_types):
|
||||
res = module_params.get(identifier)
|
||||
res = None
|
||||
if identifier in _GLOBAL_CUSTOM_OBJECTS:
|
||||
res = _GLOBAL_CUSTOM_OBJECTS[identifier]
|
||||
if not res:
|
||||
res = module_params.get(identifier)
|
||||
if not res:
|
||||
raise ValueError('Invalid ' + str(module_name) + ': ' +
|
||||
str(identifier))
|
||||
@@ -22,7 +131,11 @@ def get_from_module(identifier, module_params, module_name,
|
||||
return res
|
||||
elif isinstance(identifier, dict):
|
||||
name = identifier.pop('name')
|
||||
res = module_params.get(name)
|
||||
res = None
|
||||
if name in _GLOBAL_CUSTOM_OBJECTS:
|
||||
res = _GLOBAL_CUSTOM_OBJECTS[name]
|
||||
if not res:
|
||||
res = module_params.get(name)
|
||||
if res:
|
||||
return res(**identifier)
|
||||
else:
|
||||
@@ -36,7 +149,14 @@ def make_tuple(*args):
|
||||
|
||||
|
||||
def func_dump(func):
|
||||
'''Serialize user defined function.'''
|
||||
"""Serializes a user defined function.
|
||||
|
||||
# Arguments
|
||||
func: the function to serialize.
|
||||
|
||||
# Returns
|
||||
A tuple `(code, defaults, closure)`.
|
||||
"""
|
||||
code = marshal.dumps(func.__code__).decode('raw_unicode_escape')
|
||||
defaults = func.__defaults__
|
||||
if func.__closure__:
|
||||
@@ -47,7 +167,17 @@ def func_dump(func):
|
||||
|
||||
|
||||
def func_load(code, defaults=None, closure=None, globs=None):
|
||||
'''Deserialize user defined function.'''
|
||||
"""Deserializes a user defined function.
|
||||
|
||||
# Arguments
|
||||
code: bytecode of the function.
|
||||
defaults: defaults of the function.
|
||||
closure: closure of the function.
|
||||
globs: dictionary of global objects.
|
||||
|
||||
# Returns
|
||||
A function object.
|
||||
"""
|
||||
if isinstance(code, (tuple, list)): # unpack previous dump
|
||||
code, defaults, closure = code
|
||||
code = marshal.loads(code.encode('raw_unicode_escape'))
|
||||
@@ -60,14 +190,14 @@ def func_load(code, defaults=None, closure=None, globs=None):
|
||||
|
||||
|
||||
class Progbar(object):
|
||||
"""Displays a progress bar.
|
||||
|
||||
def __init__(self, target, width=30, verbose=1, interval=0.01):
|
||||
'''Dislays a progress bar.
|
||||
# Arguments
|
||||
target: Total number of steps expected.
|
||||
interval: Minimum visual progress update interval (in seconds).
|
||||
"""
|
||||
|
||||
# Arguments:
|
||||
target: Total number of steps expected.
|
||||
interval: Minimum visual progress update interval (in seconds).
|
||||
'''
|
||||
def __init__(self, target, width=30, verbose=1, interval=0.05):
|
||||
self.width = width
|
||||
self.target = target
|
||||
self.sum_values = {}
|
||||
@@ -79,15 +209,16 @@ class Progbar(object):
|
||||
self.seen_so_far = 0
|
||||
self.verbose = verbose
|
||||
|
||||
def update(self, current, values=[], force=False):
|
||||
'''Updates the progress bar.
|
||||
def update(self, current, values=None, force=False):
|
||||
"""Updates the progress bar.
|
||||
|
||||
# Arguments
|
||||
current: Index of current step.
|
||||
values: List of tuples (name, value_for_last_step).
|
||||
The progress bar will display averages for these values.
|
||||
force: Whether to force visual progress update.
|
||||
'''
|
||||
"""
|
||||
values = values or []
|
||||
for k, v in values:
|
||||
if k not in self.sum_values:
|
||||
self.sum_values[k] = [v * (current - self.seen_so_far),
|
||||
@@ -113,7 +244,7 @@ class Progbar(object):
|
||||
prog = float(current) / self.target
|
||||
prog_width = int(self.width * prog)
|
||||
if prog_width > 0:
|
||||
bar += ('=' * (prog_width-1))
|
||||
bar += ('=' * (prog_width - 1))
|
||||
if current < self.target:
|
||||
bar += '>'
|
||||
else:
|
||||
@@ -168,19 +299,5 @@ class Progbar(object):
|
||||
|
||||
self.last_update = now
|
||||
|
||||
def add(self, n, values=[]):
|
||||
def add(self, n, values=None):
|
||||
self.update(self.seen_so_far + n, values)
|
||||
|
||||
|
||||
def display_table(rows, positions):
|
||||
|
||||
def display_row(objects, positions):
|
||||
line = ''
|
||||
for i in range(len(objects)):
|
||||
line += str(objects[i])
|
||||
line = line[:positions[i]]
|
||||
line += ' ' * (positions[i] - len(line))
|
||||
print(line)
|
||||
|
||||
for objects in rows:
|
||||
display_row(objects, positions)
|
||||
|
||||
+38
-11
@@ -1,22 +1,33 @@
|
||||
"""Utilities related to disk I/O."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
try:
|
||||
import h5py
|
||||
except ImportError:
|
||||
h5py = None
|
||||
|
||||
class HDF5Matrix():
|
||||
'''Representation of HDF5 dataset which can be used instead of a
|
||||
Numpy array.
|
||||
try:
|
||||
import tables
|
||||
except ImportError:
|
||||
tables = None
|
||||
|
||||
|
||||
class HDF5Matrix(object):
|
||||
"""Representation of HDF5 dataset to be used instead of a Numpy array.
|
||||
|
||||
# Example
|
||||
|
||||
```python
|
||||
X_data = HDF5Matrix('input/file.hdf5', 'data')
|
||||
model.predict(X_data)
|
||||
x_data = HDF5Matrix('input/file.hdf5', 'data')
|
||||
model.predict(x_data)
|
||||
```
|
||||
|
||||
Providing start and end allows use of a slice of the dataset.
|
||||
Providing `start` and `end` allows use of a slice of the dataset.
|
||||
|
||||
Optionally, a normalizer function (or lambda) can be given. This will
|
||||
be called on every slice of data retrieved.
|
||||
@@ -29,11 +40,15 @@ class HDF5Matrix():
|
||||
end: int, end of desired slice of the specified dataset
|
||||
normalizer: function to be called on data when retrieved
|
||||
|
||||
'''
|
||||
# Returns
|
||||
An array-like HDF5 dataset.
|
||||
"""
|
||||
refs = defaultdict(int)
|
||||
|
||||
def __init__(self, datapath, dataset, start=0, end=None, normalizer=None):
|
||||
import h5py
|
||||
if h5py is None:
|
||||
raise ImportError('The use of HDF5Matrix requires '
|
||||
'HDF5 and h5py installed.')
|
||||
|
||||
if datapath not in list(self.refs.keys()):
|
||||
f = h5py.File(datapath)
|
||||
@@ -54,7 +69,7 @@ class HDF5Matrix():
|
||||
def __getitem__(self, key):
|
||||
if isinstance(key, slice):
|
||||
if key.stop + self.start <= self.end:
|
||||
idx = slice(key.start+self.start, key.stop + self.start)
|
||||
idx = slice(key.start + self.start, key.stop + self.start)
|
||||
else:
|
||||
raise IndexError
|
||||
elif isinstance(key, int):
|
||||
@@ -83,7 +98,9 @@ class HDF5Matrix():
|
||||
|
||||
|
||||
def save_array(array, name):
|
||||
import tables
|
||||
if tables is None:
|
||||
raise ImportError('The use of `save_array` requires '
|
||||
'the tables module.')
|
||||
f = tables.open_file(name, 'w')
|
||||
atom = tables.Atom.from_dtype(array.dtype)
|
||||
ds = f.create_carray(f.root, 'data', atom, array.shape)
|
||||
@@ -92,7 +109,9 @@ def save_array(array, name):
|
||||
|
||||
|
||||
def load_array(name):
|
||||
import tables
|
||||
if tables is None:
|
||||
raise ImportError('The use of `load_array` requires '
|
||||
'the tables module.')
|
||||
f = tables.open_file(name)
|
||||
array = f.root.data
|
||||
a = np.empty(shape=array.shape, dtype=array.dtype)
|
||||
@@ -102,6 +121,14 @@ def load_array(name):
|
||||
|
||||
|
||||
def ask_to_proceed_with_overwrite(filepath):
|
||||
"""Produces a prompt asking about overwriting a file.
|
||||
|
||||
# Arguments
|
||||
filepath: the path to the file to be overwritten.
|
||||
|
||||
# Returns
|
||||
True if we can proceed with overwrite, False otherwise.
|
||||
"""
|
||||
get_input = input
|
||||
if sys.version_info[:2] <= (2, 7):
|
||||
get_input = raw_input
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
from __future__ import print_function
|
||||
import inspect
|
||||
|
||||
from .generic_utils import get_from_module
|
||||
from .generic_utils import get_from_module, get_custom_objects
|
||||
from .np_utils import convert_kernel
|
||||
from ..layers import *
|
||||
from ..models import Model, Sequential
|
||||
from .. import backend as K
|
||||
|
||||
|
||||
def layer_from_config(config, custom_objects={}):
|
||||
'''
|
||||
def layer_from_config(config, custom_objects=None):
|
||||
"""Instantiate a layer from a config dictionary.
|
||||
|
||||
# Arguments
|
||||
config: dict of the form {'class_name': str, 'config': dict}
|
||||
custom_objects: dict mapping class names (or function names)
|
||||
@@ -17,11 +18,11 @@ def layer_from_config(config, custom_objects={}):
|
||||
|
||||
# Returns
|
||||
Layer instance (may be Model, Sequential, Layer...)
|
||||
'''
|
||||
"""
|
||||
# Insert custom layers into globals so they can
|
||||
# be accessed by `get_from_module`.
|
||||
for cls_key in custom_objects:
|
||||
globals()[cls_key] = custom_objects[cls_key]
|
||||
if custom_objects:
|
||||
get_custom_objects().update(custom_objects)
|
||||
|
||||
class_name = config['class_name']
|
||||
|
||||
@@ -35,21 +36,24 @@ def layer_from_config(config, custom_objects={}):
|
||||
|
||||
arg_spec = inspect.getargspec(layer_class.from_config)
|
||||
if 'custom_objects' in arg_spec.args:
|
||||
return layer_class.from_config(config['config'], custom_objects=custom_objects)
|
||||
return layer_class.from_config(config['config'],
|
||||
custom_objects=custom_objects)
|
||||
else:
|
||||
return layer_class.from_config(config['config'])
|
||||
|
||||
|
||||
def print_summary(layers, relevant_nodes=None,
|
||||
line_length=100, positions=[.33, .55, .67, 1.]):
|
||||
'''Prints a summary of a layer
|
||||
line_length=100, positions=None):
|
||||
"""Prints a summary of a layer.
|
||||
|
||||
# Arguments
|
||||
layers: list of layers to print summaries of
|
||||
relevant_nodes: list of relevant nodes
|
||||
line_length: total length of printed lines
|
||||
positions: relative or absolute positions of log elements in each line
|
||||
'''
|
||||
positions: relative or absolute positions of log elements in each line.
|
||||
If not provided, defaults to `[.33, .55, .67, 1.]`.
|
||||
"""
|
||||
positions = positions or [.33, .55, .67, 1.]
|
||||
if positions[-1] <= 1:
|
||||
positions = [int(line_length * p) for p in positions]
|
||||
# header names for the different log elements
|
||||
@@ -70,9 +74,14 @@ def print_summary(layers, relevant_nodes=None,
|
||||
print('=' * line_length)
|
||||
|
||||
def print_layer_summary(layer):
|
||||
"""Prints a summary for a single layer.
|
||||
|
||||
# Arguments
|
||||
layer: target layer.
|
||||
"""
|
||||
try:
|
||||
output_shape = layer.output_shape
|
||||
except:
|
||||
except AttributeError:
|
||||
output_shape = 'multiple'
|
||||
connections = []
|
||||
for node_index, node in enumerate(layer.inbound_nodes):
|
||||
@@ -116,6 +125,16 @@ def print_summary(layers, relevant_nodes=None,
|
||||
|
||||
|
||||
def count_total_params(layers, layer_set=None):
|
||||
"""Counts the number of parameters in a list of layers.
|
||||
|
||||
# Arguments
|
||||
layers: list of layers.
|
||||
layer_set: set of layers already seen
|
||||
(so that we don't count their weights twice).
|
||||
|
||||
# Returns
|
||||
A tuple (count of trainable weights, count of non-trainable weights.)
|
||||
"""
|
||||
if layer_set is None:
|
||||
layer_set = set()
|
||||
trainable_count = 0
|
||||
@@ -124,7 +143,7 @@ def count_total_params(layers, layer_set=None):
|
||||
if layer in layer_set:
|
||||
continue
|
||||
layer_set.add(layer)
|
||||
if type(layer) in (Model, Sequential):
|
||||
if isinstance(layer, (Model, Sequential)):
|
||||
t, nt = count_total_params(layer.layers, layer_set)
|
||||
trainable_count += t
|
||||
non_trainable_count += nt
|
||||
@@ -135,6 +154,13 @@ def count_total_params(layers, layer_set=None):
|
||||
|
||||
|
||||
def convert_all_kernels_in_model(model):
|
||||
"""Converts all convolution kernels in a model from Theano to TensorFlow.
|
||||
|
||||
Also works from TensorFlow to Theano.
|
||||
|
||||
# Arguments
|
||||
model: target model for the conversion.
|
||||
"""
|
||||
# Note: SeparableConvolution not included
|
||||
# since only supported by TF.
|
||||
conv_classes = {
|
||||
|
||||
+74
-74
@@ -1,28 +1,32 @@
|
||||
"""Numpy-related utilities."""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import scipy as sp
|
||||
from six.moves import range
|
||||
from six.moves import zip
|
||||
from .. import backend as K
|
||||
|
||||
|
||||
def to_categorical(y, nb_classes=None):
|
||||
'''Convert class vector (integers from 0 to nb_classes) to binary class matrix, for use with categorical_crossentropy.
|
||||
"""Converts a class vector (integers) to binary class matrix.
|
||||
|
||||
E.g. for use with categorical_crossentropy.
|
||||
|
||||
# Arguments
|
||||
y: class vector to be converted into a matrix
|
||||
nb_classes: total number of classes
|
||||
(integers from 0 to nb_classes).
|
||||
nb_classes: total number of classes.
|
||||
|
||||
# Returns
|
||||
A binary matrix representation of the input.
|
||||
'''
|
||||
y = np.array(y, dtype='int')
|
||||
"""
|
||||
y = np.array(y, dtype='int').ravel()
|
||||
if not nb_classes:
|
||||
nb_classes = np.max(y)+1
|
||||
Y = np.zeros((len(y), nb_classes))
|
||||
for i in range(len(y)):
|
||||
Y[i, y[i]] = 1.
|
||||
return Y
|
||||
nb_classes = np.max(y) + 1
|
||||
n = y.shape[0]
|
||||
categorical = np.zeros((n, nb_classes))
|
||||
categorical[np.arange(n), y] = 1
|
||||
return categorical
|
||||
|
||||
|
||||
def normalize(a, axis=-1, order=2):
|
||||
@@ -33,16 +37,16 @@ def normalize(a, axis=-1, order=2):
|
||||
|
||||
def binary_logloss(p, y):
|
||||
epsilon = 1e-15
|
||||
p = sp.maximum(epsilon, p)
|
||||
p = sp.minimum(1-epsilon, p)
|
||||
res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
|
||||
res *= -1.0/len(y)
|
||||
p = np.maximum(epsilon, p)
|
||||
p = np.minimum(1 - epsilon, p)
|
||||
res = sum(y * np.log(p) + np.subtract(1, y) * np.log(np.subtract(1, p)))
|
||||
res *= -1.0 / len(y)
|
||||
return res
|
||||
|
||||
|
||||
def multiclass_logloss(P, Y):
|
||||
npreds = [P[i][Y[i]-1] for i in range(len(Y))]
|
||||
score = -(1. / len(Y)) * np.sum(np.log(npreds))
|
||||
def multiclass_logloss(p, y):
|
||||
npreds = [p[i][y[i] - 1] for i in range(len(y))]
|
||||
score = -(1. / len(y)) * np.sum(np.log(npreds))
|
||||
return score
|
||||
|
||||
|
||||
@@ -60,67 +64,52 @@ def categorical_probas_to_classes(p):
|
||||
return np.argmax(p, axis=1)
|
||||
|
||||
|
||||
def convert_kernel(kernel, dim_ordering='default'):
|
||||
'''Converts a kernel matrix (Numpy array)
|
||||
from Theano format to TensorFlow format
|
||||
(or reciprocally, since the transformation
|
||||
is its own inverse).
|
||||
'''
|
||||
if dim_ordering == 'default':
|
||||
def convert_kernel(kernel, dim_ordering=None):
|
||||
"""Converts a Numpy kernel matrix from Theano format to TensorFlow format.
|
||||
|
||||
Also works reciprocally, since the transformation is its own inverse.
|
||||
|
||||
# Arguments
|
||||
kernel: Numpy array (4D or 5D).
|
||||
dim_ordering: the data format.
|
||||
|
||||
# Returns
|
||||
The converted kernel.
|
||||
|
||||
# Raises
|
||||
ValueError: in case of invalid kernel shape or invalid dim_ordering.
|
||||
"""
|
||||
if dim_ordering is None:
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
new_kernel = np.copy(kernel)
|
||||
if kernel.ndim == 4:
|
||||
# conv 2d
|
||||
# TH kernel shape: (depth, input_depth, rows, cols)
|
||||
# TF kernel shape: (rows, cols, input_depth, depth)
|
||||
if dim_ordering == 'th':
|
||||
w = kernel.shape[2]
|
||||
h = kernel.shape[3]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
new_kernel[:, :, i, j] = kernel[:, :, w - i - 1, h - j - 1]
|
||||
elif dim_ordering == 'tf':
|
||||
w = kernel.shape[0]
|
||||
h = kernel.shape[1]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
new_kernel[i, j, :, :] = kernel[w - i - 1, h - j - 1, :, :]
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
elif kernel.ndim == 5:
|
||||
# conv 3d
|
||||
# TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3)
|
||||
# TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth)
|
||||
if dim_ordering == 'th':
|
||||
w = kernel.shape[2]
|
||||
h = kernel.shape[3]
|
||||
z = kernel.shape[4]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
for k in range(z):
|
||||
new_kernel[:, :, i, j, k] = kernel[:, :,
|
||||
w - i - 1,
|
||||
h - j - 1,
|
||||
z - k - 1]
|
||||
elif dim_ordering == 'tf':
|
||||
w = kernel.shape[0]
|
||||
h = kernel.shape[1]
|
||||
z = kernel.shape[2]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
for k in range(z):
|
||||
new_kernel[i, j, k, :, :] = kernel[w - i - 1,
|
||||
h - j - 1,
|
||||
z - k - 1,
|
||||
:, :]
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
else:
|
||||
if not 4 <= kernel.ndim <= 5:
|
||||
raise ValueError('Invalid kernel shape:', kernel.shape)
|
||||
return new_kernel
|
||||
|
||||
slices = [slice(None, None, -1) for _ in range(kernel.ndim)]
|
||||
no_flip = (slice(None, None), slice(None, None))
|
||||
if dim_ordering == 'th': # (out_depth, input_depth, ...)
|
||||
slices[:2] = no_flip
|
||||
elif dim_ordering == 'tf': # (..., input_depth, out_depth)
|
||||
slices[-2:] = no_flip
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering:', dim_ordering)
|
||||
|
||||
return np.copy(kernel[slices])
|
||||
|
||||
|
||||
def conv_output_length(input_length, filter_size, border_mode, stride, dilation=1):
|
||||
def conv_output_length(input_length, filter_size,
|
||||
border_mode, stride, dilation=1):
|
||||
"""Determines output length of a convolution given input length.
|
||||
|
||||
# Arguments
|
||||
input_length: integer.
|
||||
filter_size: integer.
|
||||
border_mode: one of "same", "valid", "full".
|
||||
stride: integer.
|
||||
dilation: dilation rate, integer.
|
||||
|
||||
# Returns
|
||||
The output length (integer).
|
||||
"""
|
||||
if input_length is None:
|
||||
return None
|
||||
assert border_mode in {'same', 'valid', 'full'}
|
||||
@@ -135,6 +124,17 @@ def conv_output_length(input_length, filter_size, border_mode, stride, dilation=
|
||||
|
||||
|
||||
def conv_input_length(output_length, filter_size, border_mode, stride):
|
||||
"""Determines input length of a convolution given output length.
|
||||
|
||||
# Arguments
|
||||
output_length: integer.
|
||||
filter_size: integer.
|
||||
border_mode: one of "same", "valid", "full".
|
||||
stride: integer.
|
||||
|
||||
# Returns
|
||||
The input length (integer).
|
||||
"""
|
||||
if output_length is None:
|
||||
return None
|
||||
assert border_mode in {'same', 'valid', 'full'}
|
||||
|
||||
+21
-12
@@ -1,23 +1,26 @@
|
||||
"""Utilities related to Keras unit tests."""
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import inspect
|
||||
import six
|
||||
|
||||
from ..engine import Model, Input
|
||||
from ..models import Sequential, model_from_json
|
||||
from ..models import Sequential
|
||||
from ..models import model_from_json
|
||||
from .. import backend as K
|
||||
|
||||
|
||||
def get_test_data(nb_train=1000, nb_test=500, input_shape=(10,),
|
||||
output_shape=(2,),
|
||||
classification=True, nb_class=2):
|
||||
'''
|
||||
classification=True overrides output_shape
|
||||
(i.e. output_shape is set to (1,)) and the output
|
||||
consists in integers in [0, nb_class-1].
|
||||
"""Generates test data to train a model on.
|
||||
|
||||
Otherwise: float output with shape output_shape.
|
||||
'''
|
||||
classification=True overrides output_shape
|
||||
(i.e. output_shape is set to (1,)) and the output
|
||||
consists in integers in [0, nb_class-1].
|
||||
|
||||
Otherwise: float output with shape output_shape.
|
||||
"""
|
||||
nb_sample = nb_train + nb_test
|
||||
if classification:
|
||||
y = np.random.randint(0, nb_class, size=(nb_sample,))
|
||||
@@ -38,9 +41,9 @@ def get_test_data(nb_train=1000, nb_test=500, input_shape=(10,),
|
||||
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
input_data=None, expected_output=None,
|
||||
expected_output_dtype=None, fixed_batch_size=False):
|
||||
'''Test routine for a layer with a single input tensor
|
||||
"""Test routine for a layer with a single input tensor
|
||||
and single output tensor.
|
||||
'''
|
||||
"""
|
||||
if input_data is None:
|
||||
assert input_shape
|
||||
if not input_dtype:
|
||||
@@ -121,12 +124,18 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
|
||||
|
||||
def keras_test(func):
|
||||
'''Clean up after tensorflow tests.
|
||||
'''
|
||||
"""Function wrapper to clean up after TensorFlow tests.
|
||||
|
||||
# Arguments
|
||||
func: test function to clean up after.
|
||||
|
||||
# Returns
|
||||
A function wrapping the input function.
|
||||
"""
|
||||
@six.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
output = func(*args, **kwargs)
|
||||
if K._BACKEND == 'tensorflow':
|
||||
if K.backend() == 'tensorflow':
|
||||
K.clear_session()
|
||||
return output
|
||||
return wrapper
|
||||
|
||||
@@ -1,20 +1,31 @@
|
||||
"""Utilities related to model visualization."""
|
||||
import os
|
||||
|
||||
from ..layers.wrappers import Wrapper
|
||||
from ..models import Sequential
|
||||
|
||||
try:
|
||||
# pydot-ng is a fork of pydot that is better maintained
|
||||
# pydot-ng is a fork of pydot that is better maintained.
|
||||
import pydot_ng as pydot
|
||||
except ImportError:
|
||||
# fall back on pydot if necessary
|
||||
# Fall back on pydot if necessary.
|
||||
import pydot
|
||||
if not pydot.find_graphviz():
|
||||
raise RuntimeError('Failed to import pydot. You must install pydot'
|
||||
' and graphviz for `pydotprint` to work.')
|
||||
raise ImportError('Failed to import pydot. You must install pydot'
|
||||
' and graphviz for `pydotprint` to work.')
|
||||
|
||||
|
||||
def model_to_dot(model, show_shapes=False, show_layer_names=True):
|
||||
"""Converts a Keras model to dot format.
|
||||
|
||||
# Arguments
|
||||
model: A Keras model instance.
|
||||
show_shapes: whether to display shape information.
|
||||
show_layer_names: whether to display layer names.
|
||||
|
||||
# Returns
|
||||
A `pydot.Dot` instance representing the Keras model.
|
||||
"""
|
||||
dot = pydot.Dot()
|
||||
dot.set('rankdir', 'TB')
|
||||
dot.set('concentrate', True)
|
||||
@@ -48,7 +59,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True):
|
||||
if show_shapes:
|
||||
try:
|
||||
outputlabels = str(layer.output_shape)
|
||||
except:
|
||||
except AttributeError:
|
||||
outputlabels = 'multiple'
|
||||
if hasattr(layer, 'input_shape'):
|
||||
inputlabels = str(layer.input_shape)
|
||||
@@ -77,9 +88,9 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True):
|
||||
|
||||
def plot(model, to_file='model.png', show_shapes=False, show_layer_names=True):
|
||||
dot = model_to_dot(model, show_shapes, show_layer_names)
|
||||
_, format = os.path.splitext(to_file)
|
||||
if not format:
|
||||
format = 'png'
|
||||
_, extension = os.path.splitext(to_file)
|
||||
if not extension:
|
||||
extension = 'png'
|
||||
else:
|
||||
format = format[1:]
|
||||
dot.write(to_file, format=format)
|
||||
extension = extension[1:]
|
||||
dot.write(to_file, format=extension)
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import copy
|
||||
import inspect
|
||||
import types
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..utils.np_utils import to_categorical
|
||||
@@ -9,14 +11,14 @@ from ..models import Sequential
|
||||
|
||||
|
||||
class BaseWrapper(object):
|
||||
'''Base class for the Keras scikit-learn wrapper.
|
||||
"""Base class for the Keras scikit-learn wrapper.
|
||||
|
||||
Warning: This class should not be used directly.
|
||||
Use descendant classes instead.
|
||||
|
||||
# Arguments
|
||||
build_fn: callable function or class instance
|
||||
sk_params: model parameters & fitting parameters
|
||||
**sk_params: model parameters & fitting parameters
|
||||
|
||||
The build_fn should construct, compile and return a Keras model, which
|
||||
will then be used to fit/predict. One of the following
|
||||
@@ -47,7 +49,7 @@ class BaseWrapper(object):
|
||||
those you could pass to `sk_params`, including fitting parameters.
|
||||
In other words, you could use `grid_search` to search for the best
|
||||
`batch_size` or `nb_epoch` as well as the model parameters.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, build_fn=None, **sk_params):
|
||||
self.build_fn = build_fn
|
||||
@@ -55,18 +57,20 @@ class BaseWrapper(object):
|
||||
self.check_params(sk_params)
|
||||
|
||||
def check_params(self, params):
|
||||
'''Check for user typos in "params" keys to avoid
|
||||
unwanted usage of default values
|
||||
"""Checks for user typos in "params".
|
||||
|
||||
# Arguments
|
||||
params: dictionary
|
||||
The parameters to be checked
|
||||
'''
|
||||
params: dictionary; the parameters to be checked
|
||||
|
||||
# Raises
|
||||
ValueError: if any member of `params` is not a valid argument.
|
||||
"""
|
||||
legal_params_fns = [Sequential.fit, Sequential.predict,
|
||||
Sequential.predict_classes, Sequential.evaluate]
|
||||
if self.build_fn is None:
|
||||
legal_params_fns.append(self.__call__)
|
||||
elif not isinstance(self.build_fn, types.FunctionType) and not isinstance(self.build_fn, types.MethodType):
|
||||
elif (not isinstance(self.build_fn, types.FunctionType) and
|
||||
not isinstance(self.build_fn, types.MethodType)):
|
||||
legal_params_fns.append(self.build_fn.__call__)
|
||||
else:
|
||||
legal_params_fns.append(self.build_fn)
|
||||
@@ -80,57 +84,50 @@ class BaseWrapper(object):
|
||||
if params_name not in legal_params:
|
||||
raise ValueError('{} is not a legal parameter'.format(params_name))
|
||||
|
||||
def get_params(self, deep=True):
|
||||
'''Get parameters for this estimator.
|
||||
|
||||
# Arguments
|
||||
deep: boolean, optional
|
||||
If True, will return the parameters for this estimator and
|
||||
contained sub-objects that are estimators.
|
||||
def get_params(self, **params):
|
||||
"""Gets parameters for this estimator.
|
||||
|
||||
# Returns
|
||||
params : dict
|
||||
Dictionary of parameter names mapped to their values.
|
||||
'''
|
||||
"""
|
||||
res = copy.deepcopy(self.sk_params)
|
||||
res.update({'build_fn': self.build_fn})
|
||||
return res
|
||||
|
||||
def set_params(self, **params):
|
||||
'''Set the parameters of this estimator.
|
||||
"""Sets the parameters of this estimator.
|
||||
|
||||
# Arguments
|
||||
params: dict
|
||||
Dictionary of parameter names mapped to their values.
|
||||
**params: Dictionary of parameter names mapped to their values.
|
||||
|
||||
# Returns
|
||||
self
|
||||
'''
|
||||
"""
|
||||
self.check_params(params)
|
||||
self.sk_params.update(params)
|
||||
return self
|
||||
|
||||
def fit(self, X, y, **kwargs):
|
||||
'''Construct a new model with build_fn and fit the model according
|
||||
to the given training data.
|
||||
def fit(self, x, y, **kwargs):
|
||||
"""Constructs a new model with `build_fn` & fit the model to `(x, y)`.
|
||||
|
||||
# Arguments
|
||||
X : array-like, shape `(n_samples, n_features)`
|
||||
x : array-like, shape `(n_samples, n_features)`
|
||||
Training samples where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)`
|
||||
True labels for X.
|
||||
kwargs: dictionary arguments
|
||||
**kwargs: dictionary arguments
|
||||
Legal arguments are the arguments of `Sequential.fit`
|
||||
|
||||
# Returns
|
||||
history : object
|
||||
details about the training history at each epoch.
|
||||
'''
|
||||
|
||||
"""
|
||||
if self.build_fn is None:
|
||||
self.model = self.__call__(**self.filter_sk_params(self.__call__))
|
||||
elif not isinstance(self.build_fn, types.FunctionType) and not isinstance(self.build_fn, types.MethodType):
|
||||
elif (not isinstance(self.build_fn, types.FunctionType) and
|
||||
not isinstance(self.build_fn, types.MethodType)):
|
||||
self.model = self.build_fn(
|
||||
**self.filter_sk_params(self.build_fn.__call__))
|
||||
else:
|
||||
@@ -145,12 +142,12 @@ class BaseWrapper(object):
|
||||
fit_args = copy.deepcopy(self.filter_sk_params(Sequential.fit))
|
||||
fit_args.update(kwargs)
|
||||
|
||||
history = self.model.fit(X, y, **fit_args)
|
||||
history = self.model.fit(x, y, **fit_args)
|
||||
|
||||
return history
|
||||
|
||||
def filter_sk_params(self, fn, override={}):
|
||||
'''Filter sk_params and return those in fn's arguments
|
||||
def filter_sk_params(self, fn, override=None):
|
||||
"""Filters `sk_params` and return those in `fn`'s arguments.
|
||||
|
||||
# Arguments
|
||||
fn : arbitrary function
|
||||
@@ -159,7 +156,8 @@ class BaseWrapper(object):
|
||||
# Returns
|
||||
res : dictionary dictionary containing variables
|
||||
in both sk_params and fn's arguments.
|
||||
'''
|
||||
"""
|
||||
override = override or {}
|
||||
res = {}
|
||||
fn_args = inspect.getargspec(fn)[0]
|
||||
for name, value in self.sk_params.items():
|
||||
@@ -170,35 +168,37 @@ class BaseWrapper(object):
|
||||
|
||||
|
||||
class KerasClassifier(BaseWrapper):
|
||||
'''Implementation of the scikit-learn classifier API for Keras.
|
||||
'''
|
||||
"""Implementation of the scikit-learn classifier API for Keras.
|
||||
"""
|
||||
|
||||
def predict(self, X, **kwargs):
|
||||
'''Returns the class predictions for the given test data.
|
||||
def predict(self, x, **kwargs):
|
||||
"""Returns the class predictions for the given test data.
|
||||
|
||||
# Arguments
|
||||
X: array-like, shape `(n_samples, n_features)`
|
||||
x: array-like, shape `(n_samples, n_features)`
|
||||
Test samples where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
kwargs: dictionary arguments
|
||||
Legal arguments are the arguments of `Sequential.predict_classes`.
|
||||
**kwargs: dictionary arguments
|
||||
Legal arguments are the arguments
|
||||
of `Sequential.predict_classes`.
|
||||
|
||||
# Returns
|
||||
preds: array-like, shape `(n_samples,)`
|
||||
Class predictions.
|
||||
'''
|
||||
"""
|
||||
kwargs = self.filter_sk_params(Sequential.predict_classes, kwargs)
|
||||
return self.model.predict_classes(X, **kwargs)
|
||||
return self.model.predict_classes(x, **kwargs)
|
||||
|
||||
def predict_proba(self, X, **kwargs):
|
||||
'''Returns class probability estimates for the given test data.
|
||||
def predict_proba(self, x, **kwargs):
|
||||
"""Returns class probability estimates for the given test data.
|
||||
|
||||
# Arguments
|
||||
X: array-like, shape `(n_samples, n_features)`
|
||||
x: array-like, shape `(n_samples, n_features)`
|
||||
Test samples where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
kwargs: dictionary arguments
|
||||
Legal arguments are the arguments of `Sequential.predict_classes`.
|
||||
**kwargs: dictionary arguments
|
||||
Legal arguments are the arguments
|
||||
of `Sequential.predict_classes`.
|
||||
|
||||
# Returns
|
||||
proba: array-like, shape `(n_samples, n_outputs)`
|
||||
@@ -207,9 +207,9 @@ class KerasClassifier(BaseWrapper):
|
||||
tp match the scikit-learn API,
|
||||
will return an array of shape '(n_samples, 2)'
|
||||
(instead of `(n_sample, 1)` as in Keras).
|
||||
'''
|
||||
"""
|
||||
kwargs = self.filter_sk_params(Sequential.predict_proba, kwargs)
|
||||
probs = self.model.predict_proba(X, **kwargs)
|
||||
probs = self.model.predict_proba(x, **kwargs)
|
||||
|
||||
# check if binary classification
|
||||
if probs.shape[1] == 1:
|
||||
@@ -217,22 +217,27 @@ class KerasClassifier(BaseWrapper):
|
||||
probs = np.hstack([1 - probs, probs])
|
||||
return probs
|
||||
|
||||
def score(self, X, y, **kwargs):
|
||||
'''Returns the mean accuracy on the given test data and labels.
|
||||
def score(self, x, y, **kwargs):
|
||||
"""Returns the mean accuracy on the given test data and labels.
|
||||
|
||||
# Arguments
|
||||
X: array-like, shape `(n_samples, n_features)`
|
||||
x: array-like, shape `(n_samples, n_features)`
|
||||
Test samples where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
y: array-like, shape `(n_samples,)` or `(n_samples, n_outputs)`
|
||||
True labels for X.
|
||||
kwargs: dictionary arguments
|
||||
True labels for x.
|
||||
**kwargs: dictionary arguments
|
||||
Legal arguments are the arguments of `Sequential.evaluate`.
|
||||
|
||||
# Returns
|
||||
score: float
|
||||
Mean accuracy of predictions on X wrt. y.
|
||||
'''
|
||||
|
||||
# Raises
|
||||
ValueError: If the underlying model isn't configured to
|
||||
compute accuracy. You should pass `metrics=["accuracy"]` to
|
||||
the `.compile()` method of the model.
|
||||
"""
|
||||
kwargs = self.filter_sk_params(Sequential.evaluate, kwargs)
|
||||
|
||||
loss_name = self.model.loss
|
||||
@@ -241,7 +246,7 @@ class KerasClassifier(BaseWrapper):
|
||||
if loss_name == 'categorical_crossentropy' and len(y.shape) != 2:
|
||||
y = to_categorical(y)
|
||||
|
||||
outputs = self.model.evaluate(X, y, **kwargs)
|
||||
outputs = self.model.evaluate(x, y, **kwargs)
|
||||
if not isinstance(outputs, list):
|
||||
outputs = [outputs]
|
||||
for name, output in zip(self.model.metrics_names, outputs):
|
||||
@@ -253,43 +258,44 @@ class KerasClassifier(BaseWrapper):
|
||||
|
||||
|
||||
class KerasRegressor(BaseWrapper):
|
||||
'''Implementation of the scikit-learn regressor API for Keras.
|
||||
'''
|
||||
"""Implementation of the scikit-learn regressor API for Keras.
|
||||
"""
|
||||
|
||||
def predict(self, X, **kwargs):
|
||||
'''Returns predictions for the given test data.
|
||||
def predict(self, x, **kwargs):
|
||||
"""Returns predictions for the given test data.
|
||||
|
||||
# Arguments
|
||||
X: array-like, shape `(n_samples, n_features)`
|
||||
x: array-like, shape `(n_samples, n_features)`
|
||||
Test samples where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
kwargs: dictionary arguments
|
||||
**kwargs: dictionary arguments
|
||||
Legal arguments are the arguments of `Sequential.predict`.
|
||||
|
||||
# Returns
|
||||
preds: array-like, shape `(n_samples,)`
|
||||
Predictions.
|
||||
'''
|
||||
"""
|
||||
kwargs = self.filter_sk_params(Sequential.predict, kwargs)
|
||||
return np.squeeze(self.model.predict(X, **kwargs))
|
||||
return np.squeeze(self.model.predict(x, **kwargs))
|
||||
|
||||
def score(self, X, y, **kwargs):
|
||||
'''Returns the mean loss on the given test data and labels.
|
||||
def score(self, x, y, **kwargs):
|
||||
"""Returns the mean loss on the given test data and labels.
|
||||
|
||||
# Arguments
|
||||
X: array-like, shape `(n_samples, n_features)`
|
||||
x: array-like, shape `(n_samples, n_features)`
|
||||
Test samples where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
y: array-like, shape `(n_samples,)`
|
||||
True labels for X.
|
||||
kwargs: dictionary arguments
|
||||
**kwargs: dictionary arguments
|
||||
Legal arguments are the arguments of `Sequential.evaluate`.
|
||||
|
||||
# Returns
|
||||
score: float
|
||||
Mean accuracy of predictions on X wrt. y.
|
||||
'''
|
||||
"""
|
||||
kwargs = self.filter_sk_params(Sequential.evaluate, kwargs)
|
||||
loss = self.model.evaluate(X, y, **kwargs)
|
||||
loss = self.model.evaluate(x, y, **kwargs)
|
||||
if isinstance(loss, list):
|
||||
return loss[0]
|
||||
return loss
|
||||
|
||||
+3
-15
@@ -10,23 +10,11 @@ addopts=-v
|
||||
norecursedirs= build
|
||||
|
||||
# PEP-8 The following are ignored:
|
||||
# E251 unexpected spaces around keyword / parameter equals
|
||||
# E225 missing whitespace around operator
|
||||
# E226 missing whitespace around arithmetic operator
|
||||
# W293 blank line contains whitespace
|
||||
# E501 line too long (82 > 79 characters)
|
||||
# E402 module level import not at top of file - temporary measure to coninue adding ros python packaged in sys.path
|
||||
# E402 module level import not at top of file - temporary measure to continue adding ros python packaged in sys.path
|
||||
# E731 do not assign a lambda expression, use a def
|
||||
# E302 two blank lines between the functions
|
||||
# E261 at least two spaces before inline comment
|
||||
|
||||
|
||||
pep8ignore=* E251 \
|
||||
* E225 \
|
||||
* E226 \
|
||||
* W293 \
|
||||
* E501 \
|
||||
pep8ignore=* E501 \
|
||||
* E402 \
|
||||
* E731 \
|
||||
* E302 \
|
||||
* E261
|
||||
|
||||
|
||||
+8
-2
@@ -3,16 +3,22 @@ from setuptools import find_packages
|
||||
|
||||
|
||||
setup(name='Keras',
|
||||
version='1.2.0',
|
||||
version='1.2.2',
|
||||
description='Deep Learning for Python',
|
||||
author='Francois Chollet',
|
||||
author_email='francois.chollet@gmail.com',
|
||||
url='https://github.com/fchollet/keras',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.2.0',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.2.2',
|
||||
license='MIT',
|
||||
install_requires=['theano', 'pyyaml', 'six'],
|
||||
extras_require={
|
||||
'h5py': ['h5py'],
|
||||
'visualize': ['pydot-ng'],
|
||||
'tests': ['pytest',
|
||||
'pytest-cov',
|
||||
'pytest-pep8',
|
||||
'pytest-xdist',
|
||||
'python-coveralls',
|
||||
'coverage==3.7.1'],
|
||||
},
|
||||
packages=find_packages())
|
||||
|
||||
@@ -108,8 +108,8 @@ def test_stacked_lstm_char_prediction():
|
||||
y = np.zeros((len(sentences), number_of_chars), dtype=np.bool)
|
||||
for i, sentence in enumerate(sentences):
|
||||
for t, char in enumerate(sentence):
|
||||
X[i, t, ord(char)-ord('a')] = 1
|
||||
y[i, ord(next_chars[i])-ord('a')] = 1
|
||||
X[i, t, ord(char) - ord('a')] = 1
|
||||
y[i, ord(next_chars[i]) - ord('a')] = 1
|
||||
|
||||
# learn the alphabet with stacked LSTM
|
||||
model = Sequential([
|
||||
@@ -123,7 +123,7 @@ def test_stacked_lstm_char_prediction():
|
||||
# prime the model with 'ab' sequence and let it generate the learned alphabet
|
||||
sentence = alphabet[:sequence_length]
|
||||
generated = sentence
|
||||
for iteration in range(number_of_chars-sequence_length):
|
||||
for iteration in range(number_of_chars - sequence_length):
|
||||
x = np.zeros((1, sequence_length, number_of_chars))
|
||||
for t, char in enumerate(sentence):
|
||||
x[0, t, ord(char) - ord('a')] = 1.
|
||||
|
||||
@@ -77,10 +77,27 @@ class TestBackend(object):
|
||||
|
||||
check_two_tensor_operation('batch_dot', (4, 2, 3), (4, 5, 3),
|
||||
axes=(2, 2))
|
||||
check_two_tensor_operation('batch_dot', (32, 20), (32, 20), axes=1)
|
||||
check_two_tensor_operation('batch_dot', (32, 20), (32, 20), axes=(1, 1))
|
||||
check_single_tensor_operation('transpose', (4, 2))
|
||||
check_single_tensor_operation('reverse', (4, 3, 2), axes=1)
|
||||
check_single_tensor_operation('reverse', (4, 3, 2), axes=(1, 2))
|
||||
|
||||
def test_batch_dot_shape(self):
|
||||
x_batch = KTF.ones(shape=(32, 20))
|
||||
y_batch = KTF.ones(shape=(32, 20))
|
||||
xy_batch_dot = KTF.batch_dot(x_batch, y_batch, axes=1)
|
||||
assert_allclose(KTF.eval(xy_batch_dot), np.ones((32, 1)) * 20, atol=1e-05)
|
||||
xy_batch_dot = KTF.batch_dot(x_batch, y_batch, axes=0)
|
||||
assert_allclose(KTF.eval(xy_batch_dot), np.ones((20, 1)) * 32, atol=1e-05)
|
||||
# making sure swapping axes when ndim == 2 works
|
||||
x_batch = KTF.ones(shape=(32, 20))
|
||||
y_batch = KTF.ones(shape=(20, 32))
|
||||
xy_batch_dot = KTF.batch_dot(x_batch, y_batch, axes=(0, 1))
|
||||
assert_allclose(KTF.eval(xy_batch_dot), np.ones((20, 1)) * 32, atol=1e-05)
|
||||
xy_batch_dot = KTF.batch_dot(x_batch, y_batch, axes=(1, 0))
|
||||
assert_allclose(KTF.eval(xy_batch_dot), np.ones((32, 1)) * 20, atol=1e-05)
|
||||
|
||||
def test_shape_operations(self):
|
||||
# concatenate
|
||||
xval = np.random.random((4, 3))
|
||||
@@ -790,7 +807,7 @@ class TestBackend(object):
|
||||
|
||||
# len max_time_steps array of batch_size x depth matrices
|
||||
inputs = ([input_prob_matrix_0[t, :][np.newaxis, :]
|
||||
for t in range(seq_len_0)] + # Pad to max_time_steps = 8
|
||||
for t in range(seq_len_0)] + # Pad to max_time_steps = 8
|
||||
2 * [np.zeros((1, depth), dtype=np.float32)])
|
||||
|
||||
inputs = KTF.variable(np.asarray(inputs).transpose((1, 0, 2)))
|
||||
@@ -899,7 +916,7 @@ class TestBackend(object):
|
||||
def test_foldl(self):
|
||||
x = np.random.rand(10, 3).astype(np.float32)
|
||||
for K in [KTF, KTH]:
|
||||
kx = K.eval(K.foldl(lambda a, b: a+b, x))
|
||||
kx = K.eval(K.foldl(lambda a, b: a + b, x))
|
||||
|
||||
assert (3,) == kx.shape
|
||||
assert_allclose(x.sum(axis=0), kx, atol=1e-05)
|
||||
@@ -911,8 +928,8 @@ class TestBackend(object):
|
||||
# right to left we have no such problem and the result is larger
|
||||
x = np.array([1e-20, 1e-20, 10, 10, 10], dtype=np.float32)
|
||||
for K in [KTF, KTH]:
|
||||
p1 = K.eval(K.foldl(lambda a, b: a*b, x))
|
||||
p2 = K.eval(K.foldr(lambda a, b: a*b, x))
|
||||
p1 = K.eval(K.foldl(lambda a, b: a * b, x))
|
||||
p2 = K.eval(K.foldr(lambda a, b: a * b, x))
|
||||
|
||||
assert p1 < p2
|
||||
assert 9e-38 < p2 <= 1e-37
|
||||
|
||||
@@ -10,6 +10,28 @@ from keras.models import model_from_json, model_from_yaml
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_get_updates_for():
|
||||
a = Input(shape=(2,))
|
||||
dense_layer = Dense(1)
|
||||
dense_layer.add_update(0, inputs=a)
|
||||
dense_layer.add_update(1, inputs=None)
|
||||
|
||||
assert dense_layer.get_updates_for(a) == [0]
|
||||
assert dense_layer.get_updates_for(None) == [1]
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_get_losses_for():
|
||||
a = Input(shape=(2,))
|
||||
dense_layer = Dense(1)
|
||||
dense_layer.add_loss(0, inputs=a)
|
||||
dense_layer.add_loss(1, inputs=None)
|
||||
|
||||
assert dense_layer.get_losses_for(a) == [0]
|
||||
assert dense_layer.get_losses_for(None) == [1]
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_trainable_weights():
|
||||
a = Input(shape=(2,))
|
||||
|
||||
@@ -9,7 +9,7 @@ from keras.layers import convolutional, pooling
|
||||
|
||||
|
||||
# TensorFlow does not support full convolution.
|
||||
if K._BACKEND == 'theano':
|
||||
if K.backend() == 'theano':
|
||||
_convolution_border_modes = ['valid', 'same', 'full']
|
||||
else:
|
||||
_convolution_border_modes = ['valid', 'same']
|
||||
@@ -142,37 +142,38 @@ def test_deconvolution_2d():
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
for batch_size in [None, nb_samples]:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
|
||||
rows = conv_input_length(nb_row, 3, border_mode, subsample[0])
|
||||
cols = conv_input_length(nb_col, 3, border_mode, subsample[1])
|
||||
layer_test(convolutional.Deconvolution2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'output_shape': (nb_samples, nb_filter, rows, cols),
|
||||
'border_mode': border_mode,
|
||||
'subsample': subsample,
|
||||
'dim_ordering': 'th'},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col),
|
||||
fixed_batch_size=True)
|
||||
rows = conv_input_length(nb_row, 3, border_mode, subsample[0])
|
||||
cols = conv_input_length(nb_col, 3, border_mode, subsample[1])
|
||||
layer_test(convolutional.Deconvolution2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'output_shape': (batch_size, nb_filter, rows, cols),
|
||||
'border_mode': border_mode,
|
||||
'subsample': subsample,
|
||||
'dim_ordering': 'th'},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col),
|
||||
fixed_batch_size=True)
|
||||
|
||||
layer_test(convolutional.Deconvolution2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'output_shape': (nb_samples, nb_filter, rows, cols),
|
||||
'border_mode': border_mode,
|
||||
'dim_ordering': 'th',
|
||||
'W_regularizer': 'l2',
|
||||
'b_regularizer': 'l2',
|
||||
'activity_regularizer': 'activity_l2',
|
||||
'subsample': subsample},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col),
|
||||
fixed_batch_size=True)
|
||||
layer_test(convolutional.Deconvolution2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'output_shape': (batch_size, nb_filter, rows, cols),
|
||||
'border_mode': border_mode,
|
||||
'dim_ordering': 'th',
|
||||
'W_regularizer': 'l2',
|
||||
'b_regularizer': 'l2',
|
||||
'activity_regularizer': 'activity_l2',
|
||||
'subsample': subsample},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col),
|
||||
fixed_batch_size=True)
|
||||
|
||||
|
||||
@keras_test
|
||||
@@ -213,7 +214,7 @@ def test_atrous_conv_2d():
|
||||
input_shape=(nb_samples, nb_row, nb_col, stack_size))
|
||||
|
||||
|
||||
@pytest.mark.skipif(K._BACKEND != 'tensorflow', reason="Requires TF backend")
|
||||
@pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TF backend')
|
||||
@keras_test
|
||||
def test_separable_conv_2d():
|
||||
nb_samples = 2
|
||||
@@ -665,6 +666,15 @@ def test_cropping_2d():
|
||||
cropping[1][0]: -cropping[1][1],
|
||||
:]
|
||||
assert_allclose(np_output, expected_out)
|
||||
# another correctness test (no cropping)
|
||||
cropping = ((0, 0), (0, 0))
|
||||
layer = convolutional.Cropping2D(cropping=cropping,
|
||||
dim_ordering=dim_ordering)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
# compare with input
|
||||
assert_allclose(np_output, input)
|
||||
|
||||
|
||||
def test_cropping_3d():
|
||||
@@ -708,6 +718,15 @@ def test_cropping_3d():
|
||||
cropping[2][0]: -cropping[2][1],
|
||||
:]
|
||||
assert_allclose(np_output, expected_out)
|
||||
# another correctness test (no cropping)
|
||||
cropping = ((0, 0), (0, 0), (0, 0))
|
||||
layer = convolutional.Cropping3D(cropping=cropping,
|
||||
dim_ordering=dim_ordering)
|
||||
layer.build(input.shape)
|
||||
output = layer(K.variable(input))
|
||||
np_output = K.eval(output)
|
||||
# compare with input
|
||||
assert_allclose(np_output, input)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -223,6 +223,10 @@ def test_dropout():
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(3, 2))
|
||||
|
||||
layer_test(core.Dropout,
|
||||
kwargs={'p': 0.5, 'noise_shape': [3, 1]},
|
||||
input_shape=(3, 2))
|
||||
|
||||
layer_test(core.SpatialDropout1D,
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(2, 3, 4))
|
||||
@@ -255,6 +259,14 @@ def test_reshape():
|
||||
kwargs={'target_shape': (8, 1)},
|
||||
input_shape=(3, 2, 4))
|
||||
|
||||
layer_test(core.Reshape,
|
||||
kwargs={'target_shape': (-1, 1)},
|
||||
input_shape=(3, 2, 4))
|
||||
|
||||
layer_test(core.Reshape,
|
||||
kwargs={'target_shape': (1, -1)},
|
||||
input_shape=(3, 2, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_permute():
|
||||
|
||||
@@ -46,6 +46,20 @@ def test_batchnorm_mode_0_or_2():
|
||||
assert_allclose(out.std(), 1.0, atol=1e-1)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchnorm_mode_0_or_2_twice():
|
||||
# This is a regression test for issue #4881 with the old
|
||||
# batch normalization functions in the Theano backend.
|
||||
model = Sequential()
|
||||
model.add(normalization.BatchNormalization(mode=0, input_shape=(10, 5, 5), axis=1))
|
||||
model.add(normalization.BatchNormalization(mode=0, input_shape=(10, 5, 5), axis=1))
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
|
||||
X = np.random.normal(loc=5.0, scale=10.0, size=(20, 10, 5, 5))
|
||||
model.fit(X, X, nb_epoch=1, verbose=0)
|
||||
model.predict(X)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchnorm_mode_0_convnet():
|
||||
model = Sequential()
|
||||
|
||||
@@ -15,7 +15,7 @@ class TestImage:
|
||||
gray_images = []
|
||||
for n in range(8):
|
||||
bias = np.random.rand(img_w, img_h, 1) * 64
|
||||
variance = np.random.rand(img_w, img_h, 1) * (255-64)
|
||||
variance = np.random.rand(img_w, img_h, 1) * (255 - 64)
|
||||
imarray = np.random.rand(img_w, img_h, 3) * variance + bias
|
||||
im = Image.fromarray(imarray.astype('uint8')).convert('RGB')
|
||||
rgb_images.append(im)
|
||||
|
||||
@@ -4,6 +4,7 @@ import multiprocessing
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from csv import Sniffer
|
||||
from keras import optimizers
|
||||
|
||||
np.random.seed(1337)
|
||||
@@ -96,6 +97,7 @@ def test_ModelCheckpoint():
|
||||
os.remove(filepath.format(epoch=1))
|
||||
os.remove(filepath.format(epoch=3))
|
||||
|
||||
|
||||
def test_EarlyStopping():
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples,
|
||||
nb_test=test_samples,
|
||||
@@ -202,6 +204,59 @@ def test_ReduceLROnPlateau():
|
||||
assert np.allclose(float(K.get_value(model.optimizer.lr)), 0.1, atol=K.epsilon())
|
||||
|
||||
|
||||
def test_CSVLogger():
|
||||
filepath = 'log.tsv'
|
||||
sep = '\t'
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples,
|
||||
nb_test=test_samples,
|
||||
input_shape=(input_dim,),
|
||||
classification=True,
|
||||
nb_class=nb_class)
|
||||
y_test = np_utils.to_categorical(y_test)
|
||||
y_train = np_utils.to_categorical(y_train)
|
||||
|
||||
def make_model():
|
||||
np.random.seed(1337)
|
||||
model = Sequential()
|
||||
model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
|
||||
model.add(Dense(nb_class, activation='softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer=optimizers.SGD(lr=0.1),
|
||||
metrics=['accuracy'])
|
||||
return model
|
||||
|
||||
# case 1, create new file with defined separator
|
||||
model = make_model()
|
||||
cbks = [callbacks.CSVLogger(filepath, separator=sep)]
|
||||
model.fit(X_train, y_train, batch_size=batch_size,
|
||||
validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=1)
|
||||
|
||||
assert os.path.exists(filepath)
|
||||
with open(filepath) as csvfile:
|
||||
dialect = Sniffer().sniff(csvfile.read())
|
||||
assert dialect.delimiter == sep
|
||||
del model
|
||||
del cbks
|
||||
|
||||
# case 2, append data to existing file, skip header
|
||||
model = make_model()
|
||||
cbks = [callbacks.CSVLogger(filepath, separator=sep, append=True)]
|
||||
model.fit(X_train, y_train, batch_size=batch_size,
|
||||
validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=1)
|
||||
|
||||
# case 3, reuse of CSVLogger object
|
||||
model.fit(X_train, y_train, batch_size=batch_size,
|
||||
validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=1)
|
||||
|
||||
import re
|
||||
with open(filepath) as csvfile:
|
||||
output = " ".join(csvfile.readlines())
|
||||
assert len(re.findall('epoch', output)) == 1
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
|
||||
@pytest.mark.skipif((K.backend() != 'tensorflow'),
|
||||
reason="Requires tensorflow backend")
|
||||
def test_TensorBoard():
|
||||
|
||||
@@ -13,6 +13,7 @@ CONV_SHAPE = (25, 25, 2, 2)
|
||||
# The equivalent shape of both test fixtures
|
||||
SHAPE = (100, 100)
|
||||
|
||||
|
||||
def _runner(init, shape, target_mean=None, target_std=None,
|
||||
target_max=None, target_min=None):
|
||||
variable = init(shape)
|
||||
@@ -85,10 +86,10 @@ def test_identity(tensor_shape):
|
||||
if len(tensor_shape) > 2:
|
||||
with pytest.raises(Exception):
|
||||
_runner(initializations.identity, tensor_shape,
|
||||
target_mean=1./SHAPE[0], target_max=1.)
|
||||
target_mean=1. / SHAPE[0], target_max=1.)
|
||||
else:
|
||||
_runner(initializations.identity, tensor_shape,
|
||||
target_mean=1./SHAPE[0], target_max=1.)
|
||||
target_mean=1. / SHAPE[0], target_max=1.)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV'])
|
||||
|
||||
@@ -105,13 +105,13 @@ def test_top_k_categorical_accuracy():
|
||||
y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
|
||||
y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]]))
|
||||
success_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred,
|
||||
k=3))
|
||||
k=3))
|
||||
assert success_result == 1
|
||||
partial_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred,
|
||||
k=2))
|
||||
k=2))
|
||||
assert partial_result == 0.5
|
||||
failure_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred,
|
||||
k=1))
|
||||
k=1))
|
||||
assert failure_result == 0
|
||||
|
||||
|
||||
|
||||
@@ -177,6 +177,102 @@ def test_multiprocessing_evaluating():
|
||||
assert reached_end
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multiprocessing_fit_error():
|
||||
|
||||
batch_size = 32
|
||||
good_batches = 5
|
||||
|
||||
def myGenerator():
|
||||
"""Raises an exception after a few good batches"""
|
||||
for i in range(good_batches):
|
||||
yield (np.random.randint(batch_size, 256, (500, 2)),
|
||||
np.random.randint(batch_size, 2, 500))
|
||||
raise RuntimeError
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(1, input_shape=(2, )))
|
||||
model.compile(loss='mse', optimizer='adadelta')
|
||||
|
||||
samples = batch_size * (good_batches + 1)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
model.fit_generator(
|
||||
myGenerator(), samples, 1,
|
||||
nb_worker=4, pickle_safe=True,
|
||||
)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
model.fit_generator(
|
||||
myGenerator(), samples, 1,
|
||||
pickle_safe=False,
|
||||
)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multiprocessing_evaluate_error():
|
||||
|
||||
batch_size = 32
|
||||
good_batches = 5
|
||||
|
||||
def myGenerator():
|
||||
"""Raises an exception after a few good batches"""
|
||||
for i in range(good_batches):
|
||||
yield (np.random.randint(batch_size, 256, (500, 2)),
|
||||
np.random.randint(batch_size, 2, 500))
|
||||
raise RuntimeError
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(1, input_shape=(2, )))
|
||||
model.compile(loss='mse', optimizer='adadelta')
|
||||
|
||||
samples = batch_size * (good_batches + 1)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
model.evaluate_generator(
|
||||
myGenerator(), samples, 1,
|
||||
nb_worker=4, pickle_safe=True,
|
||||
)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
model.evaluate_generator(
|
||||
myGenerator(), samples, 1,
|
||||
pickle_safe=False,
|
||||
)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multiprocessing_predict_error():
|
||||
|
||||
batch_size = 32
|
||||
good_batches = 5
|
||||
|
||||
def myGenerator():
|
||||
"""Raises an exception after a few good batches"""
|
||||
for i in range(good_batches):
|
||||
yield (np.random.randint(batch_size, 256, (500, 2)),
|
||||
np.random.randint(batch_size, 2, 500))
|
||||
raise RuntimeError
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(1, input_shape=(2, )))
|
||||
model.compile(loss='mse', optimizer='adadelta')
|
||||
|
||||
samples = batch_size * (good_batches + 1)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
model.predict_generator(
|
||||
myGenerator(), samples, 1,
|
||||
nb_worker=4, pickle_safe=True,
|
||||
)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
model.predict_generator(
|
||||
myGenerator(), samples, 1,
|
||||
pickle_safe=False,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
pytest.main([__file__])
|
||||
|
||||
Alguns arquivos não foram exibidos porque demasiados arquivos foram alterados neste diff Mostrar Mais
Referência em uma Nova Issue
Bloquear um usuário