Comparar commits
149 Commits
| Autor | SHA1 | Data | |
|---|---|---|---|
| d0659327bd | |||
| 88b301f182 | |||
| d5e16807d2 | |||
| 79a2bcd05f | |||
| e582f9dcac | |||
| 4cefd6136b | |||
| 1cf04b7a10 | |||
| ad3db301f2 | |||
| e15eb40317 | |||
| 8459d0403c | |||
| 08014eea36 | |||
| c0b27108d0 | |||
| 40612facf3 | |||
| e418fc6937 | |||
| 045d442fcd | |||
| 2370f9f1db | |||
| 519d1e7420 | |||
| 82afc713d0 | |||
| a1e9a8addd | |||
| f59736a06c | |||
| d187059596 | |||
| 7d2f0b1ba8 | |||
| 6a6d939dea | |||
| 090b8b7f99 | |||
| e4dda27de1 | |||
| f2786d9d80 | |||
| c6daa24e3c | |||
| 2f4eed1f0f | |||
| acc5c45feb | |||
| 00c3335071 | |||
| 65e4c8e76e | |||
| d4f5dff8ee | |||
| 8d9cb782fb | |||
| 02ff1d4462 | |||
| 007d2c2e25 | |||
| 3bf7637986 | |||
| 33ff9dbce2 | |||
| f25e894558 | |||
| 52c1a7456f | |||
| b2392413fa | |||
| 1941eaabe0 | |||
| 3d5bf9753f | |||
| a4d191d4f9 | |||
| dad54ec211 | |||
| b525f5f4d7 | |||
| e8190a8d8d | |||
| 4e155139ca | |||
| 458edeed9a | |||
| 04d785f4bf | |||
| 28d9c0c511 | |||
| 91310971b9 | |||
| 5d2acf4897 | |||
| dc98019d49 | |||
| b008bb35cc | |||
| 46d5b197e0 | |||
| 2c510530b1 | |||
| ec6eda77ad | |||
| 4805e5856b | |||
| 55447cbb3d | |||
| 69d5139b8c | |||
| 89f1e05147 | |||
| bc779df8b7 | |||
| e3c260e7d3 | |||
| 0af7e004c7 | |||
| 447445388e | |||
| b2c66816d7 | |||
| b6f81c6cc3 | |||
| 98b289630a | |||
| d68c0bd795 | |||
| 5afda71f74 | |||
| 1b08a8d675 | |||
| b508ab64bd | |||
| 84f435e24b | |||
| 984ad34a61 | |||
| ad3231c29a | |||
| c3d20bbc53 | |||
| f9c03f183f | |||
| 046a3c8a28 | |||
| 05883934f1 | |||
| 97d2a73dd3 | |||
| 5367a44acb | |||
| 1deaf71388 | |||
| 99f564e972 | |||
| c725f8d354 | |||
| 257ace722c | |||
| 0cd9d46828 | |||
| cef9e28a6c | |||
| 6c42da2abf | |||
| a9fc2bed49 | |||
| 1855c49d1f | |||
| cce65ce34d | |||
| 70866c0154 | |||
| d06e3753b0 | |||
| cb4de1f859 | |||
| b6d23b2e2d | |||
| 6a8815de0c | |||
| e0179bad2f | |||
| 8778add0d6 | |||
| facc823612 | |||
| b91854ea9d | |||
| 05abe814ac | |||
| ea561ba6d8 | |||
| df84c69676 | |||
| 3726aba2ee | |||
| f6bcaffe4a | |||
| c689b52dd1 | |||
| 09d75a4347 | |||
| 59bd247603 | |||
| f221ef952f | |||
| d3c75e1d34 | |||
| 3aab55d29f | |||
| f9ef72c38a | |||
| 108159ed17 | |||
| defa1283c4 | |||
| 2788b60fe6 | |||
| 7e70e1768f | |||
| 896ba77061 | |||
| c034262b78 | |||
| b7edcf6eea | |||
| 23e1ad2df7 | |||
| 0a3939883a | |||
| 3c8f91ee3d | |||
| efa5b04797 | |||
| 2da66ed009 | |||
| 2ac6811362 | |||
| 74c51f213c | |||
| 4302d8060d | |||
| 576cf8978b | |||
| 3533912016 | |||
| cf9922ff1d | |||
| 4fa65fbb2f | |||
| f502ee2338 | |||
| 7a56925176 | |||
| 0a108b3fb2 | |||
| 381a108e6d | |||
| 726c9fc8a6 | |||
| 946ccd3228 | |||
| 8e1ebbfc11 | |||
| cc0e60c101 | |||
| ff3f00d845 | |||
| 40195c2fa2 | |||
| 7f7300b8cb | |||
| 1b158ff4ed | |||
| b686b85b52 | |||
| 8fa82ae5cb | |||
| 0d5289141e | |||
| 01d5e7bc47 | |||
| cfbaec60c7 | |||
| f3e7245910 |
+2
-2
@@ -125,12 +125,12 @@ Keras uses the following dependencies:
|
||||
- [See installation instructions](https://github.com/tensorflow/tensorflow#download-and-setup).
|
||||
|
||||
To install Keras, `cd` to the Keras folder and run the install command:
|
||||
```
|
||||
```sh
|
||||
sudo python setup.py install
|
||||
```
|
||||
|
||||
You can also install Keras from PyPI:
|
||||
```
|
||||
```sh
|
||||
sudo pip install keras
|
||||
```
|
||||
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
FROM nvidia/cuda:7.5-cudnn5-devel
|
||||
|
||||
ENV CONDA_DIR /opt/conda
|
||||
ENV PATH $CONDA_DIR/bin:$PATH
|
||||
|
||||
RUN mkdir -p $CONDA_DIR && \
|
||||
echo export PATH=$CONDA_DIR/bin:'$PATH' > /etc/profile.d/conda.sh && \
|
||||
apt-get update && \
|
||||
apt-get install -y wget git libhdf5-dev g++ graphviz && \
|
||||
wget --quiet https://repo.continuum.io/miniconda/Miniconda3-3.9.1-Linux-x86_64.sh && \
|
||||
echo "6c6b44acdd0bc4229377ee10d52c8ac6160c336d9cdd669db7371aa9344e1ac3 *Miniconda3-3.9.1-Linux-x86_64.sh" | sha256sum -c - && \
|
||||
/bin/bash /Miniconda3-3.9.1-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
|
||||
rm Miniconda3-3.9.1-Linux-x86_64.sh
|
||||
|
||||
ENV NB_USER keras
|
||||
ENV NB_UID 1000
|
||||
|
||||
RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \
|
||||
mkdir -p $CONDA_DIR && \
|
||||
chown keras $CONDA_DIR -R && \
|
||||
mkdir -p /src && \
|
||||
chown keras /src
|
||||
|
||||
USER keras
|
||||
|
||||
# Python
|
||||
ARG python_version=3.5.1
|
||||
ARG tensorflow_version=0.9.0rc0-cp35-cp35m
|
||||
RUN conda install -y python=${python_version} && \
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-${tensorflow_version}-linux_x86_64.whl && \
|
||||
pip install git+git://github.com/Theano/Theano.git && \
|
||||
pip install ipdb pytest pytest-cov python-coveralls coverage==3.7.1 pytest-xdist pep8 pytest-pep8 pydot_ng && \
|
||||
conda install Pillow scikit-learn notebook pandas matplotlib nose pyyaml six h5py && \
|
||||
pip install git+git://github.com/fchollet/keras.git && \
|
||||
conda clean -yt
|
||||
|
||||
ADD theanorc /home/keras/.theanorc
|
||||
|
||||
ENV PYTHONPATH='/src/:$PYTHONPATH'
|
||||
|
||||
WORKDIR /src
|
||||
|
||||
EXPOSE 8888
|
||||
|
||||
CMD jupyter notebook --port=8888 --ip=0.0.0.0
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
help:
|
||||
@cat Makefile
|
||||
|
||||
DATA?="${HOME}/Data"
|
||||
GPU?=0
|
||||
DOCKER_FILE=Dockerfile
|
||||
DOCKER=GPU=$(GPU) nvidia-docker
|
||||
BACKEND=tensorflow
|
||||
TEST=tests/
|
||||
SRC=$(shell dirname `pwd`)
|
||||
|
||||
build:
|
||||
docker build -t keras --build-arg python_version=3.5 -f $(DOCKER_FILE) .
|
||||
|
||||
bash: build
|
||||
$(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras bash
|
||||
|
||||
ipython: build
|
||||
$(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras ipython
|
||||
|
||||
notebook: build
|
||||
$(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --net=host --env KERAS_BACKEND=$(BACKEND) keras
|
||||
|
||||
test: build
|
||||
$(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras py.test $(TEST)
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
# Using Keras via Docker
|
||||
|
||||
This directory contains `Dockerfile` to make it easy to get up and running with
|
||||
Keras via [Docker](http://www.docker.com/).
|
||||
|
||||
## Installing Docker
|
||||
|
||||
General installation instructions are
|
||||
[on the Docker site](https://docs.docker.com/installation/), but we give some
|
||||
quick links here:
|
||||
|
||||
* [OSX](https://docs.docker.com/installation/mac/): [docker toolbox](https://www.docker.com/toolbox)
|
||||
* [ubuntu](https://docs.docker.com/installation/ubuntulinux/)
|
||||
|
||||
## Running the container
|
||||
|
||||
We are using `Makefile` to simplify docker commands within make commands.
|
||||
|
||||
Build the container and start a jupyter notebook
|
||||
|
||||
$ make notebook
|
||||
|
||||
Build the container and start an iPython shell
|
||||
|
||||
$ make ipython
|
||||
|
||||
Build the container and start a bash
|
||||
|
||||
$ make bash
|
||||
|
||||
For GPU support install NVidia drivers (ideally latest) and
|
||||
[nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Run using
|
||||
|
||||
$ make notebook GPU=0 # or [ipython, bash]
|
||||
|
||||
Switch between Theano and TensorFlow
|
||||
|
||||
$ make notebook BACKEND=theano
|
||||
$ make notebook BACKEND=tensorflow
|
||||
|
||||
Mount a volume for external data sets
|
||||
|
||||
$ make DATA=~/mydata
|
||||
|
||||
Prints all make tasks
|
||||
|
||||
$ make help
|
||||
|
||||
You can change Theano parameters by editing `/docker/theanorc`.
|
||||
|
||||
|
||||
Note: If you would have a problem running nvidia-docker you may try the old way
|
||||
we have used. But it is not recommended. If you find a bug in the nvidia-docker report
|
||||
it there please and try using the nvidia-docker as described above.
|
||||
|
||||
$ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
|
||||
$ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
|
||||
$ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu
|
||||
@@ -0,0 +1,5 @@
|
||||
[global]
|
||||
floatX = float32
|
||||
optimizer=None
|
||||
device = gpu
|
||||
|
||||
+14
-1
@@ -65,6 +65,7 @@ if sys.version[0] == '2':
|
||||
sys.setdefaultencoding('utf8')
|
||||
|
||||
from keras.layers import convolutional
|
||||
from keras.layers import local
|
||||
from keras.layers import recurrent
|
||||
from keras.layers import core
|
||||
from keras.layers import noise
|
||||
@@ -88,6 +89,7 @@ EXCLUDE = {
|
||||
'Wrapper',
|
||||
'get_session',
|
||||
'set_session',
|
||||
'CallbackList',
|
||||
}
|
||||
|
||||
PAGES = [
|
||||
@@ -105,6 +107,7 @@ PAGES = [
|
||||
models.Sequential.predict_on_batch,
|
||||
models.Sequential.fit_generator,
|
||||
models.Sequential.evaluate_generator,
|
||||
models.Sequential.predict_generator,
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -119,6 +122,7 @@ PAGES = [
|
||||
models.Model.predict_on_batch,
|
||||
models.Model.fit_generator,
|
||||
models.Model.evaluate_generator,
|
||||
models.Model.predict_generator,
|
||||
models.Model.get_layer,
|
||||
]
|
||||
},
|
||||
@@ -146,7 +150,9 @@ PAGES = [
|
||||
'classes': [
|
||||
convolutional.Convolution1D,
|
||||
convolutional.Convolution2D,
|
||||
convolutional.AtrousConv2D,
|
||||
convolutional.AtrousConvolution2D,
|
||||
convolutional.SeparableConvolution2D,
|
||||
convolutional.Deconvolution2D,
|
||||
convolutional.Convolution3D,
|
||||
convolutional.UpSampling1D,
|
||||
convolutional.UpSampling2D,
|
||||
@@ -167,6 +173,13 @@ PAGES = [
|
||||
convolutional.AveragePooling3D,
|
||||
],
|
||||
},
|
||||
{
|
||||
'page': 'layers/local.md',
|
||||
'classes': [
|
||||
local.LocallyConnected1D,
|
||||
local.LocallyConnected2D,
|
||||
],
|
||||
},
|
||||
{
|
||||
'page': 'layers/recurrent.md',
|
||||
'classes': [
|
||||
|
||||
externo
+1
-1
@@ -29,7 +29,7 @@ You can also define the environment variable ``KERAS_BACKEND`` and this will
|
||||
override what is defined in your config file :
|
||||
|
||||
```bash
|
||||
KERAS_BACKEND=tensorflow python -c "from keras import backend; print backend._BACKEND"
|
||||
KERAS_BACKEND=tensorflow python -c "from keras import backend; print(backend._BACKEND)"
|
||||
Using TensorFlow backend.
|
||||
tensorflow
|
||||
```
|
||||
|
||||
externo
+18
-5
@@ -53,11 +53,14 @@ As a convention, "0" does not stand for a specific word, but instead is used to
|
||||
```python
|
||||
from keras.datasets import imdb
|
||||
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(path="imdb.pkl",
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(path="imdb_full.pkl",
|
||||
nb_words=None,
|
||||
skip_top=0,
|
||||
maxlen=None,
|
||||
test_split=0.1)
|
||||
seed=113,
|
||||
start_char=1,
|
||||
oov_char=2,
|
||||
index_from=3)
|
||||
```
|
||||
- __Return:__
|
||||
- 2 tuples:
|
||||
@@ -70,8 +73,12 @@ from keras.datasets import imdb
|
||||
- __nb_words__: integer or None. Top most frequent words to consider. Any less frequent word will appear as 0 in the sequence data.
|
||||
- __skip_top__: integer. Top most frequent words to ignore (they will appear as 0s in the sequence data).
|
||||
- __maxlen__: int. Maximum sequence length. Any longer sequence will be truncated.
|
||||
- __test_split__: float. Fraction of the dataset to be used as test data.
|
||||
- __seed__: int. Seed for reproducible data shuffling.
|
||||
- __start_char__: char. The start of a sequence will be marked with this character.
|
||||
Set to 1 because 0 is usually the padding character.
|
||||
- __oov_char__: char. words that were cut out because of the `nb_words`
|
||||
or `skip_top` limit will be replaced with this character.
|
||||
- __index_from__: int. Index actual words with this index and higher.
|
||||
|
||||
---
|
||||
|
||||
@@ -88,10 +95,16 @@ from keras.datasets import reuters
|
||||
nb_words=None,
|
||||
skip_top=0,
|
||||
maxlen=None,
|
||||
test_split=0.1)
|
||||
test_split=0.2,
|
||||
seed=113,
|
||||
start_char=1,
|
||||
oov_char=2,
|
||||
index_from=3)
|
||||
```
|
||||
|
||||
The specifications are the same as that of the IMDB dataset.
|
||||
The specifications are the same as that of the IMDB dataset, with the addition of:
|
||||
|
||||
- __test_split__: float. Fraction of the dataset to be used as test data.
|
||||
|
||||
This dataset also makes available the word index used for encoding the sequences:
|
||||
|
||||
|
||||
+35
-22
@@ -58,7 +58,31 @@ theano.config.floatX = 'float32'
|
||||
|
||||
*It is not recommended to use pickle or cPickle to save a Keras model.*
|
||||
|
||||
If you only need to save the architecture of a model, and not its weights, you can do:
|
||||
You can use `model.save(filepath)` to save a Keras model into a single HDF5 file which will contain:
|
||||
|
||||
- the architecture of the model, allowing to re-create the model
|
||||
- the weights of the model
|
||||
- the training configuration (loss, optimizer)
|
||||
- the state of the optimizer, allowing to resume training exactly where you left off.
|
||||
|
||||
You can then use `keras.models.load_model(filepath)` to reinstantiate your model.
|
||||
`load_model` will also take care of compiling the model using the saved training configuration
|
||||
(unless the model was never compiled in the first place).
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
from keras.models import load_model
|
||||
|
||||
model.save('my_model.h5') # creates a HDF5 file 'my_model.h5'
|
||||
del model # deletes the existing model
|
||||
|
||||
# returns a compiled model
|
||||
# identical to the previous one
|
||||
model = load_model('my_model.h5')
|
||||
```
|
||||
|
||||
If you only need to save the **architecture of a model**, and not its weights or its training configuration, you can do:
|
||||
|
||||
```python
|
||||
# save as JSON
|
||||
@@ -68,6 +92,8 @@ json_string = model.to_json()
|
||||
yaml_string = model.to_yaml()
|
||||
```
|
||||
|
||||
The generated JSON / YAML files are human-readable and can be manually edited if needed.
|
||||
|
||||
You can then build a fresh model from this data:
|
||||
|
||||
```python
|
||||
@@ -79,7 +105,7 @@ model = model_from_json(json_string)
|
||||
model = model_from_yaml(yaml_string)
|
||||
```
|
||||
|
||||
If you need to save the weights of a model, you can do so in HDF5 with the code below.
|
||||
If you need to save the **weights of a model**, you can do so in HDF5 with the code below.
|
||||
|
||||
Note that you will first need to install HDF5 and the Python library h5py, which do not come bundled with Keras.
|
||||
|
||||
@@ -93,22 +119,6 @@ Assuming you have code for instantiating your model, you can then load the weigh
|
||||
model.load_weights('my_model_weights.h5')
|
||||
```
|
||||
|
||||
This leads us to a way to save and reconstruct models from only serialized data:
|
||||
```python
|
||||
json_string = model.to_json()
|
||||
open('my_model_architecture.json', 'w').write(json_string)
|
||||
model.save_weights('my_model_weights.h5')
|
||||
|
||||
# elsewhere...
|
||||
model = model_from_json(open('my_model_architecture.json').read())
|
||||
model.load_weights('my_model_weights.h5')
|
||||
```
|
||||
|
||||
Finally, before it can be used, the model shall be compiled.
|
||||
```python
|
||||
model.compile(optimizer='adagrad', loss='mse')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Why is the training loss much higher than the testing loss?
|
||||
@@ -321,13 +331,16 @@ print(len(model.layers)) # "1"
|
||||
|
||||
Code and pre-trained weights are available for the following image classification models:
|
||||
|
||||
- [VGG-16](https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3)
|
||||
- [VGG-19](https://gist.github.com/baraldilorenzo/8d096f48a1be4a2d660d)
|
||||
- [AlexNet](https://github.com/heuritech/convnets-keras)
|
||||
- VGG16
|
||||
- VGG19
|
||||
- ResNet50
|
||||
- Inception v3
|
||||
|
||||
Find the code and weights in [this repository](https://github.com/fchollet/deep-learning-models).
|
||||
|
||||
For an example of how to use such a pre-trained model for feature extraction or for fine-tuning, see [this blog post](http://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html).
|
||||
|
||||
The VGG-16 model is also the basis for several Keras example scripts:
|
||||
The VGG16 model is also the basis for several Keras example scripts:
|
||||
|
||||
- [Style transfer](https://github.com/fchollet/keras/blob/master/examples/neural_style_transfer.py)
|
||||
- [Feature visualization](https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py)
|
||||
|
||||
@@ -75,7 +75,7 @@ The model will also be supervised via two loss functions. Using the main loss fu
|
||||
|
||||
Here's what our model looks like:
|
||||
|
||||
<img src="http://s3.amazonaws.com/keras.io/img/multi-input-multi-output-graph.png" alt="multi-input-multi-output-graph" style="width: 400px;"/>
|
||||
<img src="https://s3.amazonaws.com/keras.io/img/multi-input-multi-output-graph.png" alt="multi-input-multi-output-graph" style="width: 400px;"/>
|
||||
|
||||
Let's implement it with the functional API.
|
||||
|
||||
@@ -310,7 +310,7 @@ from keras.layers import merge, Convolution2D, Input
|
||||
# input tensor for a 3-channel 256x256 image
|
||||
x = Input(shape=(3, 256, 256))
|
||||
# 3x3 conv with 3 output channels (same as input channels)
|
||||
y = Convolution2D(3, 3, 3, border_mode='same')
|
||||
y = Convolution2D(3, 3, 3, border_mode='same')(x)
|
||||
# this returns x + y.
|
||||
z = merge([x, y], mode='sum')
|
||||
```
|
||||
|
||||
@@ -86,7 +86,7 @@ final_model.add(merged)
|
||||
final_model.add(Dense(10, activation='softmax'))
|
||||
```
|
||||
|
||||
<img src="http://s3.amazonaws.com/keras.io/img/two_branches_sequential_model.png" alt="two branch Sequential" style="width: 400px;"/>
|
||||
<img src="https://s3.amazonaws.com/keras.io/img/two_branches_sequential_model.png" alt="two branch Sequential" style="width: 400px;"/>
|
||||
|
||||
Such a two-branch model can then be trained via e.g.:
|
||||
|
||||
@@ -149,7 +149,7 @@ Keras models are trained on Numpy arrays of input data and labels. For training
|
||||
# for a single-input model with 2 classes (binary):
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(1, input_dim=784, activation='softmax'))
|
||||
model.add(Dense(1, input_dim=784, activation='sigmoid'))
|
||||
model.compile(optimizer='rmsprop',
|
||||
loss='binary_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
@@ -418,7 +418,7 @@ The first two LSTMs return their full output sequences, but the last one only re
|
||||
the last step in its output sequence, thus dropping the temporal dimension
|
||||
(i.e. converting the input sequence into a single vector).
|
||||
|
||||
<img src="http://keras.io/img/regular_stacked_lstm.png" alt="stacked LSTM" style="width: 300px;"/>
|
||||
<img src="https://keras.io/img/regular_stacked_lstm.png" alt="stacked LSTM" style="width: 300px;"/>
|
||||
|
||||
```python
|
||||
from keras.models import Sequential
|
||||
@@ -507,7 +507,7 @@ In this model, two input sequences are encoded into vectors by two separate LSTM
|
||||
|
||||
These two vectors are then concatenated, and a fully connected network is trained on top of the concatenated representations.
|
||||
|
||||
<img src="http://keras.io/img/dual_lstm.png" alt="Dual LSTM" style="width: 600px;"/>
|
||||
<img src="https://keras.io/img/dual_lstm.png" alt="Dual LSTM" style="width: 600px;"/>
|
||||
|
||||
```python
|
||||
from keras.models import Sequential
|
||||
|
||||
externo
+20
-1
@@ -9,7 +9,7 @@ model.add(Dense(64, init='uniform', input_dim=10))
|
||||
model.add(Activation('tanh'))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss='mean_squared_error', optimizer=sgd)
|
||||
```
|
||||
|
||||
@@ -22,4 +22,23 @@ model.compile(loss='mean_squared_error', optimizer='sgd')
|
||||
|
||||
---
|
||||
|
||||
## Parameters common to all Keras optimizers
|
||||
|
||||
The parameters `clipnorm` and `clipvalue` can be used with all optimizers to control gradient clipping:
|
||||
|
||||
```python
|
||||
# all parameter gradients will be clipped to
|
||||
# a maximum norm of 1.
|
||||
sgd = SGD(lr=0.01, clipnorm=1.)
|
||||
```
|
||||
|
||||
```python
|
||||
# all parameter gradients will be clipped to
|
||||
# a maximum value of 0.5 and
|
||||
# a minimum value of -0.5.
|
||||
sgd = SGD(lr=0.01, clipvalue=0.5)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
{{autogenerated}}
|
||||
+2
-2
@@ -61,7 +61,7 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
- __X__: data.
|
||||
- __y__: labels.
|
||||
- __batch_size__: int (default: 32).
|
||||
- __shuffle__: boolean (defaut: False).
|
||||
- __shuffle__: boolean (defaut: True).
|
||||
- __save_to_dir__: None or str (default: None). This allows you to optimally specify a directory to which to save the augmented pictures being generated (useful for visualizing what you are doing).
|
||||
- __save_prefix__: str (default: `''`). Prefix to use for filenames of saved pictures (only relevant if `save_to_dir` is set).
|
||||
- __save_format__: one of "png", "jpeg" (only relevant if `save_to_dir` is set). Default: "jpeg".
|
||||
@@ -88,7 +88,7 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
Example of using `.flow(X, y)`:
|
||||
|
||||
```python
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data(test_split=0.1)
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
|
||||
+3
-3
@@ -1,12 +1,12 @@
|
||||
# Wrappers for the Scikit-Learn API
|
||||
|
||||
You can use `Sequential` Keras models (single-input only) as part of your Scikit-Learn workflow via the wrappers found at `keras.wrappers.sklearn.py`.
|
||||
You can use `Sequential` Keras models (single-input only) as part of your Scikit-Learn workflow via the wrappers found at `keras.wrappers.scikit_learn.py`.
|
||||
|
||||
There are two wrappers available:
|
||||
|
||||
`keras.wrappers.sklearn.KerasClassifier(build_fn=None, **sk_params)`, which implements the sklearn classifier interface,
|
||||
`keras.wrappers.scikit_learn.KerasClassifier(build_fn=None, **sk_params)`, which implements the Scikit-Learn classifier interface,
|
||||
|
||||
`keras.wrappers.sklearn.KerasRegressor(build_fn=None, **sk_params)`, which implements the sklearn regressor interface.
|
||||
`keras.wrappers.scikit_learn.KerasRegressor(build_fn=None, **sk_params)`, which implements the Scikit-Learn regressor interface.
|
||||
|
||||
### Arguments
|
||||
|
||||
|
||||
@@ -0,0 +1,442 @@
|
||||
'''This example uses a convolutional stack followed by a recurrent stack
|
||||
and a CTC logloss function to perform optical character recognition
|
||||
of generated text images. I have no evidence of whether it actually
|
||||
learns general shapes of text, or just is able to recognize all
|
||||
the different fonts thrown at it...the purpose is more to demonstrate CTC
|
||||
inside of Keras. Note that the font list may need to be updated
|
||||
for the particular OS in use.
|
||||
|
||||
This starts off with 4 letter words. After 10 or so epochs, CTC
|
||||
learns translational invariance, so longer words and groups of words
|
||||
with spaces are gradually fed in. This gradual increase in difficulty
|
||||
is handled using the TextImageGenerator class which is both a generator
|
||||
class for test/train data and a Keras callback class. Every 10 epochs
|
||||
the wordlist that the generator draws from increases in difficulty.
|
||||
|
||||
The table below shows normalized edit distance values. Theano uses
|
||||
a slightly different CTC implementation, so some Theano-specific
|
||||
hyperparameter tuning would be needed to get it to match Tensorflow.
|
||||
|
||||
Norm. ED
|
||||
Epoch | TF | TH
|
||||
------------------------
|
||||
10 0.072 0.272
|
||||
20 0.032 0.115
|
||||
30 0.024 0.098
|
||||
40 0.023 0.108
|
||||
|
||||
This requires cairo and editdistance packages:
|
||||
pip install cairocffi
|
||||
pip install editdistance
|
||||
|
||||
Due to the use of a dummy loss function, Theano requires the following flags:
|
||||
on_unused_input='ignore'
|
||||
|
||||
Created by Mike Henry
|
||||
https://github.com/mbhenry/
|
||||
'''
|
||||
|
||||
import os
|
||||
import itertools
|
||||
import re
|
||||
import datetime
|
||||
import cairocffi as cairo
|
||||
import editdistance
|
||||
import numpy as np
|
||||
from scipy import ndimage
|
||||
import pylab
|
||||
from keras import backend as K
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.layers import Input, Layer, Dense, Activation, Flatten
|
||||
from keras.layers import Reshape, Lambda, merge, Permute, TimeDistributed
|
||||
from keras.models import Model
|
||||
from keras.layers.recurrent import GRU
|
||||
from keras.optimizers import SGD
|
||||
from keras.utils import np_utils
|
||||
from keras.utils.data_utils import get_file
|
||||
from keras.preprocessing import image
|
||||
import keras.callbacks
|
||||
|
||||
OUTPUT_DIR = "image_ocr"
|
||||
|
||||
np.random.seed(55)
|
||||
|
||||
# this creates larger "blotches" of noise which look
|
||||
# more realistic than just adding gaussian noise
|
||||
# assumes greyscale with pixels ranging from 0 to 1
|
||||
|
||||
def speckle(img):
|
||||
severity = np.random.uniform(0, 0.6)
|
||||
blur = ndimage.gaussian_filter(np.random.randn(*img.shape) * severity, 1)
|
||||
img_speck = (img + blur)
|
||||
img_speck[img_speck > 1] = 1
|
||||
img_speck[img_speck <= 0] = 0
|
||||
return img_speck
|
||||
|
||||
# paints the string in a random location the bounding box
|
||||
# also uses a random font, a slight random rotation,
|
||||
# and a random amount of speckle noise
|
||||
|
||||
def paint_text(text, w, h):
|
||||
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, w, h)
|
||||
with cairo.Context(surface) as context:
|
||||
context.set_source_rgb(1, 1, 1) # White
|
||||
context.paint()
|
||||
# this font list works in Centos 7
|
||||
fonts = ['Century Schoolbook', 'Courier', 'STIX', 'URW Chancery L', 'FreeMono']
|
||||
context.select_font_face(np.random.choice(fonts), cairo.FONT_SLANT_NORMAL,
|
||||
np.random.choice([cairo.FONT_WEIGHT_BOLD, cairo.FONT_WEIGHT_NORMAL]))
|
||||
context.set_font_size(40)
|
||||
box = context.text_extents(text)
|
||||
if box[2] > w or box[3] > h:
|
||||
raise IOError('Could not fit string into image. Max char count is too large for given image width.')
|
||||
|
||||
# teach the RNN translational invariance by
|
||||
# fitting text box randomly on canvas, with some room to rotate
|
||||
border_w_h = (10, 16)
|
||||
max_shift_x = w - box[2] - border_w_h[0]
|
||||
max_shift_y = h - box[3] - border_w_h[1]
|
||||
top_left_x = np.random.randint(0, int(max_shift_x))
|
||||
top_left_y = np.random.randint(0, int(max_shift_y))
|
||||
|
||||
context.move_to(top_left_x - int(box[0]), top_left_y - int(box[1]))
|
||||
context.set_source_rgb(0, 0, 0)
|
||||
context.show_text(text)
|
||||
|
||||
buf = surface.get_data()
|
||||
a = np.frombuffer(buf, np.uint8)
|
||||
a.shape = (h, w, 4)
|
||||
a = a[:, :, 0] # grab single channel
|
||||
a /= 255
|
||||
a = np.expand_dims(a, 0)
|
||||
a = speckle(a)
|
||||
a = image.random_rotation(a, 3 * (w - top_left_x) / w + 1)
|
||||
|
||||
return a
|
||||
|
||||
def shuffle_mats_or_lists(matrix_list, stop_ind=None):
|
||||
ret = []
|
||||
assert all([len(i) == len(matrix_list[0]) for i in matrix_list])
|
||||
len_val = len(matrix_list[0])
|
||||
if stop_ind is None:
|
||||
stop_ind = len_val
|
||||
assert stop_ind <= len_val
|
||||
|
||||
a = range(stop_ind)
|
||||
np.random.shuffle(a)
|
||||
a += range(stop_ind, len_val)
|
||||
for mat in matrix_list:
|
||||
if isinstance(mat, np.ndarray):
|
||||
ret.append(mat[a])
|
||||
elif isinstance(mat, list):
|
||||
ret.append([mat[i] for i in a])
|
||||
else:
|
||||
raise TypeError('shuffle_mats_or_lists only supports numpy.array and list objects')
|
||||
return ret
|
||||
|
||||
def text_to_labels(text, num_classes):
|
||||
ret = []
|
||||
for char in text:
|
||||
if char >= 'a' and char <= 'z':
|
||||
ret.append(ord(char) - ord('a'))
|
||||
elif char == ' ':
|
||||
ret.append(26)
|
||||
return ret
|
||||
|
||||
# only a-z and space..probably not to difficult
|
||||
# to expand to uppercase and symbols
|
||||
|
||||
def is_valid_str(in_str):
|
||||
search = re.compile(r'[^a-z\ ]').search
|
||||
return not bool(search(in_str))
|
||||
|
||||
# Uses generator functions to supply train/test with
|
||||
# data. Image renderings are text are created on the fly
|
||||
# each time with random perturbations
|
||||
|
||||
class TextImageGenerator(keras.callbacks.Callback):
|
||||
|
||||
def __init__(self, monogram_file, bigram_file, minibatch_size, img_w,
|
||||
img_h, downsample_width, val_split,
|
||||
absolute_max_string_len=16):
|
||||
|
||||
self.minibatch_size = minibatch_size
|
||||
self.img_w = img_w
|
||||
self.img_h = img_h
|
||||
self.monogram_file = monogram_file
|
||||
self.bigram_file = bigram_file
|
||||
self.downsample_width = downsample_width
|
||||
self.val_split = val_split
|
||||
self.blank_label = self.get_output_size() - 1
|
||||
self.absolute_max_string_len = absolute_max_string_len
|
||||
|
||||
def get_output_size(self):
|
||||
return 28
|
||||
|
||||
# num_words can be independent of the epoch size due to the use of generators
|
||||
# as max_string_len grows, num_words can grow
|
||||
def build_word_list(self, num_words, max_string_len=None, mono_fraction=0.5):
|
||||
assert max_string_len <= self.absolute_max_string_len
|
||||
assert num_words % self.minibatch_size == 0
|
||||
assert (self.val_split * num_words) % self.minibatch_size == 0
|
||||
self.num_words = num_words
|
||||
self.string_list = []
|
||||
self.max_string_len = max_string_len
|
||||
self.Y_data = np.ones([self.num_words, self.absolute_max_string_len]) * -1
|
||||
self.X_text = []
|
||||
self.Y_len = [0] * self.num_words
|
||||
|
||||
# monogram file is sorted by frequency in english speech
|
||||
with open(self.monogram_file, 'rt') as f:
|
||||
for line in f:
|
||||
if len(self.string_list) == int(self.num_words * mono_fraction):
|
||||
break
|
||||
word = line.rstrip()
|
||||
if max_string_len == -1 or max_string_len is None or len(word) <= max_string_len:
|
||||
self.string_list.append(word)
|
||||
|
||||
# bigram file contains common word pairings in english speech
|
||||
with open(self.bigram_file, 'rt') as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
if len(self.string_list) == self.num_words:
|
||||
break
|
||||
columns = line.lower().split()
|
||||
word = columns[0] + ' ' + columns[1]
|
||||
if is_valid_str(word) and \
|
||||
(max_string_len == -1 or max_string_len is None or len(word) <= max_string_len):
|
||||
self.string_list.append(word)
|
||||
if len(self.string_list) != self.num_words:
|
||||
raise IOError('Could not pull enough words from supplied monogram and bigram files. ')
|
||||
|
||||
for i, word in enumerate(self.string_list):
|
||||
self.Y_len[i] = len(word)
|
||||
self.Y_data[i, 0:len(word)] = text_to_labels(word, self.get_output_size())
|
||||
self.X_text.append(word)
|
||||
self.Y_len = np.expand_dims(np.array(self.Y_len), 1)
|
||||
|
||||
self.cur_val_index = self.val_split
|
||||
self.cur_train_index = 0
|
||||
|
||||
# each time an image is requested from train/val/test, a new random
|
||||
# painting of the text is performed
|
||||
def get_batch(self, index, size, train):
|
||||
X_data = np.ones([size, 1, self.img_h, self.img_w])
|
||||
labels = np.ones([size, self.absolute_max_string_len])
|
||||
input_length = np.zeros([size, 1])
|
||||
label_length = np.zeros([size, 1])
|
||||
source_str = []
|
||||
|
||||
for i in range(0, size):
|
||||
# Mix in some blank inputs. This seems to be important for
|
||||
# achieving translational invariance
|
||||
if train and i > size - 4:
|
||||
X_data[i, 0, :, :] = paint_text('', self.img_w, self.img_h)
|
||||
labels[i, 0] = self.blank_label
|
||||
input_length[i] = self.downsample_width
|
||||
label_length[i] = 1
|
||||
source_str.append('')
|
||||
else:
|
||||
X_data[i, 0, :, :] = paint_text(self.X_text[index + i], self.img_w, self.img_h)
|
||||
labels[i, :] = self.Y_data[index + i]
|
||||
input_length[i] = self.downsample_width
|
||||
label_length[i] = self.Y_len[index + i]
|
||||
source_str.append(self.X_text[index + i])
|
||||
|
||||
inputs = {'the_input': X_data,
|
||||
'the_labels': labels,
|
||||
'input_length': input_length,
|
||||
'label_length': label_length,
|
||||
'source_str': source_str # used for visualization only
|
||||
}
|
||||
outputs = {'ctc': np.zeros([size])} # dummy data for dummy loss function
|
||||
return (inputs, outputs)
|
||||
|
||||
def next_train(self):
|
||||
while 1:
|
||||
ret = self.get_batch(self.cur_train_index, self.minibatch_size, train=True)
|
||||
self.cur_train_index += self.minibatch_size
|
||||
if self.cur_train_index >= self.val_split:
|
||||
self.cur_train_index = self.cur_train_index % 32
|
||||
(self.X_text, self.Y_data, self.Y_len) = shuffle_mats_or_lists(
|
||||
[self.X_text, self.Y_data, self.Y_len], self.val_split)
|
||||
yield ret
|
||||
|
||||
def next_val(self):
|
||||
while 1:
|
||||
ret = self.get_batch(self.cur_val_index, self.minibatch_size, train=False)
|
||||
self.cur_val_index += self.minibatch_size
|
||||
if self.cur_val_index >= self.num_words:
|
||||
self.cur_val_index = self.val_split + self.cur_val_index % 32
|
||||
yield ret
|
||||
|
||||
def on_train_begin(self, logs={}):
|
||||
# translational invariance seems to be the hardest thing
|
||||
# for the RNN to learn, so start with <= 4 letter words.
|
||||
self.build_word_list(16000, 4, 1)
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
# After 10 epochs, translational invariance should be learned
|
||||
# so start feeding longer words and eventually multiple words with spaces
|
||||
if epoch == 10:
|
||||
self.build_word_list(32000, 8, 1)
|
||||
if epoch == 20:
|
||||
self.build_word_list(32000, 8, 0.6)
|
||||
if epoch == 30:
|
||||
self.build_word_list(64000, 12, 0.5)
|
||||
|
||||
# the actual loss calc occurs here despite it not being
|
||||
# an internal Keras loss function
|
||||
|
||||
def ctc_lambda_func(args):
|
||||
y_pred, labels, input_length, label_length = args
|
||||
# the 2 is critical here since the first couple outputs of the RNN
|
||||
# tend to be garbage:
|
||||
y_pred = y_pred[:, 2:, :]
|
||||
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
|
||||
|
||||
# For a real OCR application, this should be beam search with a dictionary
|
||||
# and language model. For this example, best path is sufficient.
|
||||
|
||||
def decode_batch(test_func, word_batch):
|
||||
out = test_func([word_batch])[0]
|
||||
ret = []
|
||||
for j in range(out.shape[0]):
|
||||
out_best = list(np.argmax(out[j, 2:], 1))
|
||||
out_best = [k for k, g in itertools.groupby(out_best)]
|
||||
# 26 is space, 27 is CTC blank char
|
||||
outstr = ''
|
||||
for c in out_best:
|
||||
if c >= 0 and c < 26:
|
||||
outstr += chr(c + ord('a'))
|
||||
elif c == 26:
|
||||
outstr += ' '
|
||||
ret.append(outstr)
|
||||
return ret
|
||||
|
||||
class VizCallback(keras.callbacks.Callback):
|
||||
|
||||
def __init__(self, test_func, text_img_gen, num_display_words = 6):
|
||||
self.test_func = test_func
|
||||
self.output_dir = os.path.join(
|
||||
OUTPUT_DIR, datetime.datetime.now().strftime('%A, %d. %B %Y %I.%M%p'))
|
||||
self.text_img_gen = text_img_gen
|
||||
self.num_display_words = num_display_words
|
||||
os.makedirs(self.output_dir)
|
||||
|
||||
def show_edit_distance(self, num):
|
||||
num_left = num
|
||||
mean_norm_ed = 0.0
|
||||
mean_ed = 0.0
|
||||
while num_left > 0:
|
||||
word_batch = next(self.text_img_gen)[0]
|
||||
num_proc = min(word_batch['the_input'].shape[0], num_left)
|
||||
decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])
|
||||
for j in range(0, num_proc):
|
||||
edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
|
||||
mean_ed += float(edit_dist)
|
||||
mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
|
||||
num_left -= num_proc
|
||||
mean_norm_ed = mean_norm_ed / num
|
||||
mean_ed = mean_ed / num
|
||||
print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f'
|
||||
% (num, mean_ed, mean_norm_ed))
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
self.model.save_weights(os.path.join(self.output_dir, 'weights%02d.h5' % epoch))
|
||||
self.show_edit_distance(256)
|
||||
word_batch = next(self.text_img_gen)[0]
|
||||
res = decode_batch(self.test_func, word_batch['the_input'][0:self.num_display_words])
|
||||
|
||||
for i in range(self.num_display_words):
|
||||
pylab.subplot(self.num_display_words, 1, i + 1)
|
||||
pylab.imshow(word_batch['the_input'][i, 0, :, :], cmap='Greys_r')
|
||||
pylab.xlabel('Truth = \'%s\' Decoded = \'%s\'' % (word_batch['source_str'][i], res[i]))
|
||||
fig = pylab.gcf()
|
||||
fig.set_size_inches(10, 12)
|
||||
pylab.savefig(os.path.join(self.output_dir, 'e%02d.png' % epoch))
|
||||
pylab.close()
|
||||
|
||||
# Input Parameters
|
||||
img_h = 64
|
||||
img_w = 512
|
||||
nb_epoch = 50
|
||||
minibatch_size = 32
|
||||
words_per_epoch = 16000
|
||||
val_split = 0.2
|
||||
val_words = int(words_per_epoch * (val_split))
|
||||
|
||||
# Network parameters
|
||||
conv_num_filters = 16
|
||||
filter_size = 3
|
||||
pool_size_1 = 4
|
||||
pool_size_2 = 2
|
||||
time_dense_size = 32
|
||||
rnn_size = 512
|
||||
time_steps = img_w / (pool_size_1 * pool_size_2)
|
||||
|
||||
fdir = os.path.dirname(get_file('wordlists.tgz',
|
||||
origin='http://www.isosemi.com/datasets/wordlists.tgz', untar=True))
|
||||
|
||||
img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
|
||||
bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
|
||||
minibatch_size=32,
|
||||
img_w=img_w,
|
||||
img_h=img_h,
|
||||
downsample_width=img_w / (pool_size_1 * pool_size_2) - 2,
|
||||
val_split=words_per_epoch - val_words)
|
||||
|
||||
act = 'relu'
|
||||
input_data = Input(name='the_input', shape=(1, img_h, img_w), dtype='float32')
|
||||
inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same',
|
||||
activation=act, name='conv1')(input_data)
|
||||
inner = MaxPooling2D(pool_size=(pool_size_1, pool_size_1), name='max1')(inner)
|
||||
inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same',
|
||||
activation=act, name='conv2')(inner)
|
||||
inner = MaxPooling2D(pool_size=(pool_size_2, pool_size_2), name='max2')(inner)
|
||||
|
||||
conv_to_rnn_dims = ((img_h / (pool_size_1 * pool_size_2)) * conv_num_filters, img_w / (pool_size_1 * pool_size_2))
|
||||
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
|
||||
inner = Permute(dims=(2, 1), name='permute')(inner)
|
||||
|
||||
# cuts down input size going into RNN:
|
||||
inner = TimeDistributed(Dense(time_dense_size, activation=act, name='dense1'))(inner)
|
||||
|
||||
# Two layers of bidirecitonal GRUs
|
||||
# GRU seems to work as well, if not better than LSTM:
|
||||
gru_1 = GRU(rnn_size, return_sequences=True, name='gru1')(inner)
|
||||
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, name='gru1_b')(inner)
|
||||
gru1_merged = merge([gru_1, gru_1b], mode='sum')
|
||||
gru_2 = GRU(rnn_size, return_sequences=True, name='gru2')(gru1_merged)
|
||||
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True)(gru1_merged)
|
||||
|
||||
# transforms RNN output to character activations:
|
||||
inner = TimeDistributed(Dense(img_gen.get_output_size(), name='dense2'))(merge([gru_2, gru_2b], mode='concat'))
|
||||
y_pred = Activation('softmax', name='softmax')(inner)
|
||||
Model(input=[input_data], output=y_pred).summary()
|
||||
|
||||
labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32')
|
||||
input_length = Input(name='input_length', shape=[1], dtype='int64')
|
||||
label_length = Input(name='label_length', shape=[1], dtype='int64')
|
||||
# Keras doesn't currently support loss funcs with extra parameters
|
||||
# so CTC loss is implemented in a lambda layer
|
||||
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name="ctc")([y_pred, labels, input_length, label_length])
|
||||
|
||||
lr = 0.03
|
||||
# clipnorm seems to speeds up convergence
|
||||
clipnorm = 5
|
||||
sgd = SGD(lr=lr, decay=3e-7, momentum=0.9, nesterov=True, clipnorm=clipnorm)
|
||||
|
||||
model = Model(input=[input_data, labels, input_length, label_length], output=[loss_out])
|
||||
|
||||
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
|
||||
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
|
||||
|
||||
# captures output of softmax so we can decode the output during visualization
|
||||
test_func = K.function([input_data], [y_pred])
|
||||
|
||||
viz_cb = VizCallback(test_func, img_gen.next_val())
|
||||
|
||||
model.fit_generator(generator=img_gen.next_train(), samples_per_epoch=(words_per_epoch - val_words),
|
||||
nb_epoch=nb_epoch, validation_data=img_gen.next_val(), nb_val_samples=val_words,
|
||||
callbacks=[viz_cb, img_gen])
|
||||
@@ -9,8 +9,8 @@ import numpy as np
|
||||
np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Model
|
||||
from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Embedding, LSTM, Input, Bidirectional
|
||||
from keras.datasets import imdb
|
||||
|
||||
|
||||
@@ -19,8 +19,7 @@ maxlen = 100 # cut texts after this number of words (among top max_features mos
|
||||
batch_size = 32
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features,
|
||||
test_split=0.2)
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
@@ -32,24 +31,11 @@ print('X_test shape:', X_test.shape)
|
||||
y_train = np.array(y_train)
|
||||
y_test = np.array(y_test)
|
||||
|
||||
|
||||
# this is the placeholder tensor for the input sequences
|
||||
sequence = Input(shape=(maxlen,), dtype='int32')
|
||||
# this embedding layer will transform the sequences of integers
|
||||
# into vectors of size 128
|
||||
embedded = Embedding(max_features, 128, input_length=maxlen)(sequence)
|
||||
|
||||
# apply forwards LSTM
|
||||
forwards = LSTM(64)(embedded)
|
||||
# apply backwards LSTM
|
||||
backwards = LSTM(64, go_backwards=True)(embedded)
|
||||
|
||||
# concatenate the outputs of the 2 LSTMs
|
||||
merged = merge([forwards, backwards], mode='concat', concat_axis=-1)
|
||||
after_dp = Dropout(0.5)(merged)
|
||||
output = Dense(1, activation='sigmoid')(after_dp)
|
||||
|
||||
model = Model(input=sequence, output=output)
|
||||
model = Sequential()
|
||||
model.add(Embedding(max_features, 128, input_length=maxlen))
|
||||
model.add(Bidirectional(LSTM(64)))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(1, activation='sigmoid'))
|
||||
|
||||
# try using different optimizers and different optimizer configs
|
||||
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
|
||||
|
||||
+9
-11
@@ -1,6 +1,6 @@
|
||||
'''This example demonstrates the use of Convolution1D for text classification.
|
||||
|
||||
Gets to 0.88 test accuracy after 2 epochs.
|
||||
Gets to 0.89 test accuracy after 2 epochs.
|
||||
90s/epoch on Intel i5 2.4Ghz CPU.
|
||||
10s/epoch on Tesla K40 GPU.
|
||||
|
||||
@@ -12,9 +12,9 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Activation, Lambda
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Embedding
|
||||
from keras.layers import Convolution1D
|
||||
from keras.layers import Convolution1D, MaxPooling1D
|
||||
from keras.datasets import imdb
|
||||
from keras import backend as K
|
||||
|
||||
@@ -30,8 +30,7 @@ hidden_dims = 250
|
||||
nb_epoch = 2
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features,
|
||||
test_split=0.2)
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
@@ -58,13 +57,12 @@ model.add(Convolution1D(nb_filter=nb_filter,
|
||||
border_mode='valid',
|
||||
activation='relu',
|
||||
subsample_length=1))
|
||||
# we use max pooling:
|
||||
model.add(MaxPooling1D(pool_length=model.output_shape[1]))
|
||||
|
||||
# we use max over time pooling by defining a python function to use
|
||||
# in a Lambda layer
|
||||
def max_1d(X):
|
||||
return K.max(X, axis=1)
|
||||
|
||||
model.add(Lambda(max_1d, output_shape=(nb_filter,)))
|
||||
# We flatten the output of the conv layer,
|
||||
# so that we can add a vanilla dense layer:
|
||||
model.add(Flatten())
|
||||
|
||||
# We add a vanilla hidden layer:
|
||||
model.add(Dense(hidden_dims))
|
||||
|
||||
@@ -22,9 +22,9 @@ maxlen = 100
|
||||
embedding_size = 128
|
||||
|
||||
# Convolution
|
||||
filter_length = 3
|
||||
filter_length = 5
|
||||
nb_filter = 64
|
||||
pool_length = 2
|
||||
pool_length = 4
|
||||
|
||||
# LSTM
|
||||
lstm_output_size = 70
|
||||
@@ -40,7 +40,7 @@ Only 2 epochs are needed as the dataset is very small.
|
||||
'''
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2)
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
'''This example demonstrates the use of fasttext for text classification
|
||||
|
||||
Based on Joulin et al's paper:
|
||||
|
||||
Bags of Tricks for Efficient Text Classification
|
||||
https://arxiv.org/abs/1607.01759
|
||||
|
||||
Can achieve accuracy around 88% after 5 epochs in 70s.
|
||||
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Flatten
|
||||
from keras.layers import Embedding
|
||||
from keras.layers import AveragePooling1D
|
||||
from keras.datasets import imdb
|
||||
|
||||
|
||||
# set parameters:
|
||||
max_features = 20000
|
||||
maxlen = 400
|
||||
batch_size = 32
|
||||
embedding_dims = 20
|
||||
nb_epoch = 5
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
print('Pad sequences (samples x time)')
|
||||
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
|
||||
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
|
||||
print('X_train shape:', X_train.shape)
|
||||
print('X_test shape:', X_test.shape)
|
||||
|
||||
print('Build model...')
|
||||
model = Sequential()
|
||||
|
||||
# we start off with an efficient embedding layer which maps
|
||||
# our vocab indices into embedding_dims dimensions
|
||||
model.add(Embedding(max_features,
|
||||
embedding_dims,
|
||||
input_length=maxlen))
|
||||
|
||||
# we add a AveragePooling1D, which will average the embeddings
|
||||
# of all words in the document
|
||||
model.add(AveragePooling1D(pool_length=model.output_shape[1]))
|
||||
|
||||
# We flatten the output of the AveragePooling1D layer
|
||||
model.add(Flatten())
|
||||
|
||||
# We project onto a single unit output layer, and squash it with a sigmoid:
|
||||
model.add(Dense(1, activation='sigmoid'))
|
||||
|
||||
model.compile(loss='binary_crossentropy',
|
||||
optimizer='adam',
|
||||
metrics=['accuracy'])
|
||||
|
||||
model.fit(X_train, y_train,
|
||||
batch_size=batch_size,
|
||||
nb_epoch=nb_epoch,
|
||||
validation_data=(X_test, y_test))
|
||||
@@ -1,8 +1,6 @@
|
||||
'''Trains a LSTM on the IMDB sentiment classification task.
|
||||
|
||||
The dataset is actually too small for LSTM to be of any advantage
|
||||
compared to simpler, much faster methods such as TF-IDF+LogReg.
|
||||
|
||||
compared to simpler, much faster methods such as TF-IDF + LogReg.
|
||||
Notes:
|
||||
|
||||
- RNNs are tricky. Choice of batch size is important,
|
||||
@@ -28,8 +26,7 @@ maxlen = 80 # cut texts after this number of words (among top max_features most
|
||||
batch_size = 32
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features,
|
||||
test_split=0.2)
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
@@ -52,8 +49,6 @@ model.compile(loss='binary_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
|
||||
print('Train...')
|
||||
print(X_train.shape)
|
||||
print(y_train.shape)
|
||||
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15,
|
||||
validation_data=(X_test, y_test))
|
||||
score, acc = model.evaluate(X_test, y_test,
|
||||
|
||||
@@ -14,6 +14,7 @@ from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Activation, Dropout
|
||||
from keras.layers import LSTM
|
||||
from keras.optimizers import RMSprop
|
||||
from keras.utils.data_utils import get_file
|
||||
import numpy as np
|
||||
import random
|
||||
@@ -47,23 +48,25 @@ for i, sentence in enumerate(sentences):
|
||||
y[i, char_indices[next_chars[i]]] = 1
|
||||
|
||||
|
||||
# build the model: 2 stacked LSTM
|
||||
# build the model: a single LSTM
|
||||
print('Build model...')
|
||||
model = Sequential()
|
||||
model.add(LSTM(512, return_sequences=True, input_shape=(maxlen, len(chars))))
|
||||
model.add(LSTM(512, return_sequences=False))
|
||||
model.add(Dropout(0.2))
|
||||
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
|
||||
model.add(Dense(len(chars)))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
optimizer = RMSprop(lr=0.01)
|
||||
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
|
||||
|
||||
|
||||
def sample(a, temperature=1.0):
|
||||
def sample(preds, temperature=1.0):
|
||||
# helper function to sample an index from a probability array
|
||||
a = np.log(a) / temperature
|
||||
a = np.exp(a) / np.sum(np.exp(a))
|
||||
return np.argmax(np.random.multinomial(1, a, 1))
|
||||
preds = np.asarray(preds).astype('float64')
|
||||
preds = np.log(preds) / temperature
|
||||
exp_preds = np.exp(preds)
|
||||
preds = exp_preds / np.sum(exp_preds)
|
||||
probas = np.random.multinomial(1, preds, 1)
|
||||
return np.argmax(probas)
|
||||
|
||||
# train the model, output generated text after each iteration
|
||||
for iteration in range(1, 60):
|
||||
|
||||
@@ -26,7 +26,7 @@ nb_filters = 32
|
||||
# size of pooling area for max pooling
|
||||
nb_pool = 2
|
||||
# convolution kernel size
|
||||
nb_conv = 3
|
||||
kernel_size = (3, 3)
|
||||
|
||||
# the data, shuffled and split between train and test sets
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
@@ -47,11 +47,11 @@ Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
model = Sequential()
|
||||
|
||||
model.add(Convolution2D(nb_filters, nb_conv, nb_conv,
|
||||
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1],
|
||||
border_mode='valid',
|
||||
input_shape=(1, img_rows, img_cols)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
|
||||
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1]))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
@@ -0,0 +1,87 @@
|
||||
"""This is an example of using Hierarchical RNN (HRNN) to classify MNIST digits.
|
||||
|
||||
HRNNs can learn across multiple levels of temporal hiearchy over a complex sequence.
|
||||
Usually, the first recurrent layer of an HRNN encodes a sentence (e.g. of word vectors)
|
||||
into a sentence vector. The second recurrent layer then encodes a sequence of
|
||||
such vectors (encoded by the first layer) into a document vector. This
|
||||
document vector is considered to preserve both the word-level and
|
||||
sentence-level structure of the context.
|
||||
|
||||
# References
|
||||
- [A Hierarchical Neural Autoencoder for Paragraphs and Documents](https://web.stanford.edu/~jurafsky/pubs/P15-1107.pdf)
|
||||
Encodes paragraphs and documents with HRNN.
|
||||
Results have shown that HRNN outperforms standard
|
||||
RNNs and may play some role in more sophisticated generation tasks like
|
||||
summarization or question answering.
|
||||
- [Hierarchical recurrent neural network for skeleton based action recognition](http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7298714)
|
||||
Achieved state-of-the-art results on skeleton based action recognition with 3 levels
|
||||
of bidirectional HRNN combined with fully connected layers.
|
||||
|
||||
In the below MNIST example the first LSTM layer first encodes every
|
||||
column of pixels of shape (28, 1) to a column vector of shape (128,). The second LSTM
|
||||
layer encodes then these 28 column vectors of shape (28, 128) to a image vector
|
||||
representing the whole image. A final Dense layer is added for prediction.
|
||||
|
||||
After 5 epochs: train acc: 0.9858, val acc: 0.9864
|
||||
"""
|
||||
from __future__ import print_function
|
||||
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential, Model
|
||||
from keras.layers import Input, Dense, TimeDistributed
|
||||
from keras.layers import LSTM
|
||||
from keras.utils import np_utils
|
||||
|
||||
# Training parameters.
|
||||
batch_size = 32
|
||||
nb_classes = 10
|
||||
nb_epochs = 5
|
||||
|
||||
# Embedding dimensions.
|
||||
row_hidden = 128
|
||||
col_hidden = 128
|
||||
|
||||
# The data, shuffled and split between train and test sets.
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
|
||||
# Reshapes data to 4D for Hierarchical RNN.
|
||||
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
|
||||
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
|
||||
X_train = X_train.astype('float32')
|
||||
X_test = X_test.astype('float32')
|
||||
X_train /= 255
|
||||
X_test /= 255
|
||||
print('X_train shape:', X_train.shape)
|
||||
print(X_train.shape[0], 'train samples')
|
||||
print(X_test.shape[0], 'test samples')
|
||||
|
||||
# Converts class vectors to binary class matrices.
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
row, col, pixel = X_train.shape[1:]
|
||||
|
||||
# 4D input.
|
||||
x = Input(shape=(row, col, pixel))
|
||||
|
||||
# Encodes a row of pixels using TimeDistributed Wrapper.
|
||||
encoded_rows = TimeDistributed(LSTM(output_dim=row_hidden))(x)
|
||||
|
||||
# Encodes columns of encoded rows.
|
||||
encoded_columns = LSTM(col_hidden)(encoded_rows)
|
||||
|
||||
# Final predictions and model.
|
||||
prediction = Dense(nb_classes, activation='softmax')(encoded_columns)
|
||||
model = Model(input=x, output=prediction)
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='rmsprop',
|
||||
metrics=['accuracy'])
|
||||
|
||||
# Training.
|
||||
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epochs,
|
||||
verbose=1, validation_data=(X_test, Y_test))
|
||||
|
||||
# Evaluation.
|
||||
scores = model.evaluate(X_test, Y_test, verbose=0)
|
||||
print('Test loss:', scores[0])
|
||||
print('Test accuracy:', scores[1])
|
||||
@@ -0,0 +1,384 @@
|
||||
'''This is an implementation of Net2Net experiment with MNIST in
|
||||
'Net2Net: Accelerating Learning via Knowledge Transfer'
|
||||
by Tianqi Chen, Ian Goodfellow, and Jonathon Shlens
|
||||
|
||||
arXiv:1511.05641v4 [cs.LG] 23 Apr 2016
|
||||
http://arxiv.org/abs/1511.05641
|
||||
|
||||
Notes
|
||||
- What:
|
||||
+ Net2Net is a group of methods to transfer knowledge from a teacher neural
|
||||
net to a student net,so that the student net can be trained faster than
|
||||
from scratch.
|
||||
+ The paper discussed two specific methods of Net2Net, i.e. Net2WiderNet
|
||||
and Net2DeeperNet.
|
||||
+ Net2WiderNet replaces a model with an equivalent wider model that has
|
||||
more units in each hidden layer.
|
||||
+ Net2DeeperNet replaces a model with an equivalent deeper model.
|
||||
+ Both are based on the idea of 'function-preserving transformations of
|
||||
neural nets'.
|
||||
- Why:
|
||||
+ Enable fast exploration of multiple neural nets in experimentation and
|
||||
design process,by creating a series of wider and deeper models with
|
||||
transferable knowledge.
|
||||
+ Enable 'lifelong learning system' by gradually adjusting model complexity
|
||||
to data availability,and reusing transferable knowledge.
|
||||
|
||||
Experiments
|
||||
- Teacher model: a basic CNN model trained on MNIST for 3 epochs.
|
||||
- Net2WiderNet exepriment:
|
||||
+ Student model has a wider Conv2D layer and a wider FC layer.
|
||||
+ Comparison of 'random-padding' vs 'net2wider' weight initialization.
|
||||
+ With both methods, student model should immediately perform as well as
|
||||
teacher model, but 'net2wider' is slightly better.
|
||||
- Net2DeeperNet experiment:
|
||||
+ Student model has an extra Conv2D layer and an extra FC layer.
|
||||
+ Comparison of 'random-init' vs 'net2deeper' weight initialization.
|
||||
+ Starting performance of 'net2deeper' is better than 'random-init'.
|
||||
- Hyper-parameters:
|
||||
+ SGD with momentum=0.9 is used for training teacher and student models.
|
||||
+ Learning rate adjustment: it's suggested to reduce learning rate
|
||||
to 1/10 for student model.
|
||||
+ Addition of noise in 'net2wider' is used to break weight symmetry
|
||||
and thus enable full capacity of student models. It is optional
|
||||
when a Dropout layer is used.
|
||||
|
||||
Results
|
||||
- Tested with 'Theano' backend and 'th' image_dim_ordering.
|
||||
- Running on GPU GeForce GTX 980M
|
||||
- Performance Comparisons - validation loss values during first 3 epochs:
|
||||
(1) teacher_model: 0.075 0.041 0.041
|
||||
(2) wider_random_pad: 0.036 0.034 0.032
|
||||
(3) wider_net2wider: 0.032 0.030 0.030
|
||||
(4) deeper_random_init: 0.061 0.043 0.041
|
||||
(5) deeper_net2deeper: 0.032 0.031 0.029
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
np.random.seed(1337)
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
|
||||
from keras.optimizers import SGD
|
||||
from keras.utils import np_utils
|
||||
from keras.datasets import mnist
|
||||
|
||||
input_shape = (1, 28, 28) # image shape
|
||||
nb_class = 10 # number of class
|
||||
|
||||
|
||||
# load and pre-process data
|
||||
def preprocess_input(x):
|
||||
return x.reshape((-1, ) + input_shape) / 255.
|
||||
|
||||
|
||||
def preprocess_output(y):
|
||||
return np_utils.to_categorical(y)
|
||||
|
||||
(train_x, train_y), (validation_x, validation_y) = mnist.load_data()
|
||||
train_x, validation_x = map(preprocess_input, [train_x, validation_x])
|
||||
train_y, validation_y = map(preprocess_output, [train_y, validation_y])
|
||||
print('Loading MNIST data...')
|
||||
print('train_x shape:', train_x.shape, 'train_y shape:', train_y.shape)
|
||||
print('validation_x shape:', validation_x.shape,
|
||||
'validation_y shape', validation_y.shape)
|
||||
|
||||
|
||||
# knowledge transfer algorithms
|
||||
def wider2net_conv2d(teacher_w1, teacher_b1, teacher_w2, new_width, init):
|
||||
'''Get initial weights for a wider conv2d layer with a bigger nb_filter,
|
||||
by 'random-padding' or 'net2wider'.
|
||||
|
||||
# Arguments
|
||||
teacher_w1: `weight` of conv2d layer to become wider,
|
||||
of shape (nb_filter1, nb_channel1, kh1, kw1)
|
||||
teacher_b1: `bias` of conv2d layer to become wider,
|
||||
of shape (nb_filter1, )
|
||||
teacher_w2: `weight` of next connected conv2d layer,
|
||||
of shape (nb_filter2, nb_channel2, kh2, kw2)
|
||||
new_width: new `nb_filter` for the wider conv2d layer
|
||||
init: initialization algorithm for new weights,
|
||||
either 'random-pad' or 'net2wider'
|
||||
'''
|
||||
assert teacher_w1.shape[0] == teacher_w2.shape[1], (
|
||||
'successive layers from teacher model should have compatible shapes')
|
||||
assert teacher_w1.shape[0] == teacher_b1.shape[0], (
|
||||
'weight and bias from same layer should have compatible shapes')
|
||||
assert new_width > teacher_w1.shape[0], (
|
||||
'new width (nb_filter) should be bigger than the existing one')
|
||||
|
||||
n = new_width - teacher_w1.shape[0]
|
||||
if init == 'random-pad':
|
||||
new_w1 = np.random.normal(0, 0.1, size=(n, ) + teacher_w1.shape[1:])
|
||||
new_b1 = np.ones(n) * 0.1
|
||||
new_w2 = np.random.normal(0, 0.1, size=(
|
||||
teacher_w2.shape[0], n) + teacher_w2.shape[2:])
|
||||
elif init == 'net2wider':
|
||||
index = np.random.randint(teacher_w1.shape[0], size=n)
|
||||
factors = np.bincount(index)[index] + 1.
|
||||
new_w1 = teacher_w1[index, :, :, :]
|
||||
new_b1 = teacher_b1[index]
|
||||
new_w2 = teacher_w2[:, index, :, :] / factors.reshape((1, -1, 1, 1))
|
||||
else:
|
||||
raise ValueError('Unsupported weight initializer: %s' % init)
|
||||
|
||||
student_w1 = np.concatenate((teacher_w1, new_w1), axis=0)
|
||||
if init == 'random-pad':
|
||||
student_w2 = np.concatenate((teacher_w2, new_w2), axis=1)
|
||||
elif init == 'net2wider':
|
||||
# add small noise to break symmetry, so that student model will have
|
||||
# full capacity later
|
||||
noise = np.random.normal(0, 5e-2 * new_w2.std(), size=new_w2.shape)
|
||||
student_w2 = np.concatenate((teacher_w2, new_w2 + noise), axis=1)
|
||||
student_w2[:, index, :, :] = new_w2
|
||||
student_b1 = np.concatenate((teacher_b1, new_b1), axis=0)
|
||||
|
||||
return student_w1, student_b1, student_w2
|
||||
|
||||
|
||||
def wider2net_fc(teacher_w1, teacher_b1, teacher_w2, new_width, init):
|
||||
'''Get initial weights for a wider fully connected (dense) layer
|
||||
with a bigger nout, by 'random-padding' or 'net2wider'.
|
||||
|
||||
# Arguments
|
||||
teacher_w1: `weight` of fc layer to become wider,
|
||||
of shape (nin1, nout1)
|
||||
teacher_b1: `bias` of fc layer to become wider,
|
||||
of shape (nout1, )
|
||||
teacher_w2: `weight` of next connected fc layer,
|
||||
of shape (nin2, nout2)
|
||||
new_width: new `nout` for the wider fc layer
|
||||
init: initialization algorithm for new weights,
|
||||
either 'random-pad' or 'net2wider'
|
||||
'''
|
||||
assert teacher_w1.shape[1] == teacher_w2.shape[0], (
|
||||
'successive layers from teacher model should have compatible shapes')
|
||||
assert teacher_w1.shape[1] == teacher_b1.shape[0], (
|
||||
'weight and bias from same layer should have compatible shapes')
|
||||
assert new_width > teacher_w1.shape[1], (
|
||||
'new width (nout) should be bigger than the existing one')
|
||||
|
||||
n = new_width - teacher_w1.shape[1]
|
||||
if init == 'random-pad':
|
||||
new_w1 = np.random.normal(0, 0.1, size=(teacher_w1.shape[0], n))
|
||||
new_b1 = np.ones(n) * 0.1
|
||||
new_w2 = np.random.normal(0, 0.1, size=(n, teacher_w2.shape[1]))
|
||||
elif init == 'net2wider':
|
||||
index = np.random.randint(teacher_w1.shape[1], size=n)
|
||||
factors = np.bincount(index)[index] + 1.
|
||||
new_w1 = teacher_w1[:, index]
|
||||
new_b1 = teacher_b1[index]
|
||||
new_w2 = teacher_w2[index, :] / factors[:, np.newaxis]
|
||||
else:
|
||||
raise ValueError('Unsupported weight initializer: %s' % init)
|
||||
|
||||
student_w1 = np.concatenate((teacher_w1, new_w1), axis=1)
|
||||
if init == 'random-pad':
|
||||
student_w2 = np.concatenate((teacher_w2, new_w2), axis=0)
|
||||
elif init == 'net2wider':
|
||||
# add small noise to break symmetry, so that student model will have
|
||||
# full capacity later
|
||||
noise = np.random.normal(0, 5e-2 * new_w2.std(), size=new_w2.shape)
|
||||
student_w2 = np.concatenate((teacher_w2, new_w2 + noise), axis=0)
|
||||
student_w2[index, :] = new_w2
|
||||
student_b1 = np.concatenate((teacher_b1, new_b1), axis=0)
|
||||
|
||||
return student_w1, student_b1, student_w2
|
||||
|
||||
|
||||
def deeper2net_conv2d(teacher_w):
|
||||
'''Get initial weights for a deeper conv2d layer by net2deeper'.
|
||||
|
||||
# Arguments
|
||||
teacher_w: `weight` of previous conv2d layer,
|
||||
of shape (nb_filter, nb_channel, kh, kw)
|
||||
'''
|
||||
nb_filter, nb_channel, kh, kw = teacher_w.shape
|
||||
student_w = np.zeros((nb_filter, nb_filter, kh, kw))
|
||||
for i in xrange(nb_filter):
|
||||
student_w[i, i, (kh - 1) / 2, (kw - 1) / 2] = 1.
|
||||
student_b = np.zeros(nb_filter)
|
||||
return student_w, student_b
|
||||
|
||||
|
||||
def copy_weights(teacher_model, student_model, layer_names):
|
||||
'''Copy weights from teacher_model to student_model,
|
||||
for layers with names listed in layer_names
|
||||
'''
|
||||
for name in layer_names:
|
||||
weights = teacher_model.get_layer(name=name).get_weights()
|
||||
student_model.get_layer(name=name).set_weights(weights)
|
||||
|
||||
|
||||
# methods to construct teacher_model and student_models
|
||||
def make_teacher_model(train_data, validation_data, nb_epoch=3):
|
||||
'''Train a simple CNN as teacher model.
|
||||
'''
|
||||
model = Sequential()
|
||||
model.add(Conv2D(64, 3, 3, input_shape=input_shape,
|
||||
border_mode='same', name='conv1'))
|
||||
model.add(MaxPooling2D(name='pool1'))
|
||||
model.add(Conv2D(64, 3, 3, border_mode='same', name='conv2'))
|
||||
model.add(MaxPooling2D(name='pool2'))
|
||||
model.add(Flatten(name='flatten'))
|
||||
model.add(Dense(64, activation='relu', name='fc1'))
|
||||
model.add(Dense(nb_class, activation='softmax', name='fc2'))
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer=SGD(lr=0.01, momentum=0.9),
|
||||
metrics=['accuracy'])
|
||||
|
||||
train_x, train_y = train_data
|
||||
history = model.fit(train_x, train_y, nb_epoch=nb_epoch,
|
||||
validation_data=validation_data)
|
||||
return model, history
|
||||
|
||||
|
||||
def make_wider_student_model(teacher_model, train_data,
|
||||
validation_data, init, nb_epoch=3):
|
||||
'''Train a wider student model based on teacher_model,
|
||||
with either 'random-pad' (baseline) or 'net2wider'
|
||||
'''
|
||||
new_conv1_width = 128
|
||||
new_fc1_width = 128
|
||||
|
||||
model = Sequential()
|
||||
# a wider conv1 compared to teacher_model
|
||||
model.add(Conv2D(new_conv1_width, 3, 3, input_shape=input_shape,
|
||||
border_mode='same', name='conv1'))
|
||||
model.add(MaxPooling2D(name='pool1'))
|
||||
model.add(Conv2D(64, 3, 3, border_mode='same', name='conv2'))
|
||||
model.add(MaxPooling2D(name='pool2'))
|
||||
model.add(Flatten(name='flatten'))
|
||||
# a wider fc1 compared to teacher model
|
||||
model.add(Dense(new_fc1_width, activation='relu', name='fc1'))
|
||||
model.add(Dense(nb_class, activation='softmax', name='fc2'))
|
||||
|
||||
# The weights for other layers need to be copied from teacher_model
|
||||
# to student_model, except for widened layers
|
||||
# and their immediate downstreams, which will be initialized separately.
|
||||
# For this example there are no other layers that need to be copied.
|
||||
|
||||
w_conv1, b_conv1 = teacher_model.get_layer('conv1').get_weights()
|
||||
w_conv2, b_conv2 = teacher_model.get_layer('conv2').get_weights()
|
||||
new_w_conv1, new_b_conv1, new_w_conv2 = wider2net_conv2d(
|
||||
w_conv1, b_conv1, w_conv2, new_conv1_width, init)
|
||||
model.get_layer('conv1').set_weights([new_w_conv1, new_b_conv1])
|
||||
model.get_layer('conv2').set_weights([new_w_conv2, b_conv2])
|
||||
|
||||
w_fc1, b_fc1 = teacher_model.get_layer('fc1').get_weights()
|
||||
w_fc2, b_fc2 = teacher_model.get_layer('fc2').get_weights()
|
||||
new_w_fc1, new_b_fc1, new_w_fc2 = wider2net_fc(
|
||||
w_fc1, b_fc1, w_fc2, new_fc1_width, init)
|
||||
model.get_layer('fc1').set_weights([new_w_fc1, new_b_fc1])
|
||||
model.get_layer('fc2').set_weights([new_w_fc2, b_fc2])
|
||||
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer=SGD(lr=0.001, momentum=0.9),
|
||||
metrics=['accuracy'])
|
||||
|
||||
train_x, train_y = train_data
|
||||
history = model.fit(train_x, train_y, nb_epoch=nb_epoch,
|
||||
validation_data=validation_data)
|
||||
return model, history
|
||||
|
||||
|
||||
def make_deeper_student_model(teacher_model, train_data,
|
||||
validation_data, init, nb_epoch=3):
|
||||
'''Train a deeper student model based on teacher_model,
|
||||
with either 'random-init' (baseline) or 'net2deeper'
|
||||
'''
|
||||
model = Sequential()
|
||||
model.add(Conv2D(64, 3, 3, input_shape=input_shape,
|
||||
border_mode='same', name='conv1'))
|
||||
model.add(MaxPooling2D(name='pool1'))
|
||||
model.add(Conv2D(64, 3, 3, border_mode='same', name='conv2'))
|
||||
# add another conv2d layer to make original conv2 deeper
|
||||
if init == 'net2deeper':
|
||||
prev_w, _ = model.get_layer('conv2').get_weights()
|
||||
new_weights = deeper2net_conv2d(prev_w)
|
||||
model.add(Conv2D(64, 3, 3, border_mode='same',
|
||||
name='conv2-deeper', weights=new_weights))
|
||||
elif init == 'random-init':
|
||||
model.add(Conv2D(64, 3, 3, border_mode='same', name='conv2-deeper'))
|
||||
else:
|
||||
raise ValueError('Unsupported weight initializer: %s' % init)
|
||||
model.add(MaxPooling2D(name='pool2'))
|
||||
model.add(Flatten(name='flatten'))
|
||||
model.add(Dense(64, activation='relu', name='fc1'))
|
||||
# add another fc layer to make original fc1 deeper
|
||||
if init == 'net2deeper':
|
||||
# net2deeper for fc layer with relu, is just an identity initializer
|
||||
model.add(Dense(64, init='identity',
|
||||
activation='relu', name='fc1-deeper'))
|
||||
elif init == 'random-init':
|
||||
model.add(Dense(64, activation='relu', name='fc1-deeper'))
|
||||
else:
|
||||
raise ValueError('Unsupported weight initializer: %s' % init)
|
||||
model.add(Dense(nb_class, activation='softmax', name='fc2'))
|
||||
|
||||
# copy weights for other layers
|
||||
copy_weights(teacher_model, model, layer_names=[
|
||||
'conv1', 'conv2', 'fc1', 'fc2'])
|
||||
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer=SGD(lr=0.001, momentum=0.9),
|
||||
metrics=['accuracy'])
|
||||
|
||||
train_x, train_y = train_data
|
||||
history = model.fit(train_x, train_y, nb_epoch=nb_epoch,
|
||||
validation_data=validation_data)
|
||||
return model, history
|
||||
|
||||
|
||||
# experiments setup
|
||||
def net2wider_experiment():
|
||||
'''Benchmark performances of
|
||||
(1) a teacher model,
|
||||
(2) a wider student model with `random_pad` initializer
|
||||
(3) a wider student model with `Net2WiderNet` initializer
|
||||
'''
|
||||
train_data = (train_x, train_y)
|
||||
validation_data = (validation_x, validation_y)
|
||||
print('\nExperiment of Net2WiderNet ...')
|
||||
print('\nbuilding teacher model ...')
|
||||
teacher_model, _ = make_teacher_model(train_data,
|
||||
validation_data,
|
||||
nb_epoch=3)
|
||||
|
||||
print('\nbuilding wider student model by random padding ...')
|
||||
make_wider_student_model(teacher_model, train_data,
|
||||
validation_data, 'random-pad',
|
||||
nb_epoch=3)
|
||||
print('\nbuilding wider student model by net2wider ...')
|
||||
make_wider_student_model(teacher_model, train_data,
|
||||
validation_data, 'net2wider',
|
||||
nb_epoch=3)
|
||||
|
||||
|
||||
def net2deeper_experiment():
|
||||
'''Benchmark performances of
|
||||
(1) a teacher model,
|
||||
(2) a deeper student model with `random_init` initializer
|
||||
(3) a deeper student model with `Net2DeeperNet` initializer
|
||||
'''
|
||||
train_data = (train_x, train_y)
|
||||
validation_data = (validation_x, validation_y)
|
||||
print('\nExperiment of Net2DeeperNet ...')
|
||||
print('\nbuilding teacher model ...')
|
||||
teacher_model, _ = make_teacher_model(train_data,
|
||||
validation_data,
|
||||
nb_epoch=3)
|
||||
|
||||
print('\nbuilding deeper student model by random init ...')
|
||||
make_deeper_student_model(teacher_model, train_data,
|
||||
validation_data, 'random-init',
|
||||
nb_epoch=3)
|
||||
print('\nbuilding deeper student model by net2deeper ...')
|
||||
make_deeper_student_model(teacher_model, train_data,
|
||||
validation_data, 'net2deeper',
|
||||
nb_epoch=3)
|
||||
|
||||
# run the experiments
|
||||
net2wider_experiment()
|
||||
net2deeper_experiment()
|
||||
@@ -80,6 +80,7 @@ total_variation_weight = 1.
|
||||
style_weight = 1.
|
||||
content_weight = 0.025
|
||||
|
||||
|
||||
# dimensions of the generated picture.
|
||||
img_width = 400
|
||||
img_height = 400
|
||||
@@ -88,13 +89,21 @@ assert img_height == img_width, 'Due to the use of the Gram matrix, width and he
|
||||
# util function to open, resize and format pictures into appropriate tensors
|
||||
def preprocess_image(image_path):
|
||||
img = imresize(imread(image_path), (img_width, img_height))
|
||||
img = img.transpose((2, 0, 1)).astype('float64')
|
||||
img = img[:, :, ::-1].astype('float64')
|
||||
img[:, :, 0] -= 103.939
|
||||
img[:, :, 1] -= 116.779
|
||||
img[:, :, 2] -= 123.68
|
||||
img = img.transpose((2, 0, 1))
|
||||
img = np.expand_dims(img, axis=0)
|
||||
return img
|
||||
|
||||
# util function to convert a tensor into a valid image
|
||||
def deprocess_image(x):
|
||||
x = x.transpose((1, 2, 0))
|
||||
x[:, :, 0] += 103.939
|
||||
x[:, :, 1] += 116.779
|
||||
x[:, :, 2] += 123.68
|
||||
x = x[:, :, ::-1]
|
||||
x = np.clip(x, 0, 255).astype('uint8')
|
||||
return x
|
||||
|
||||
@@ -275,6 +284,9 @@ evaluator = Evaluator()
|
||||
# run scipy-based optimization (L-BFGS) over the pixels of the generated image
|
||||
# so as to minimize the neural style loss
|
||||
x = np.random.uniform(0, 255, (1, 3, img_width, img_height))
|
||||
x[0, 0, :, :] -= 103.939
|
||||
x[0, 1, :, :] -= 116.779
|
||||
x[0, 2, :, :] -= 123.68
|
||||
for i in range(10):
|
||||
print('Start of iteration', i)
|
||||
start_time = time.time()
|
||||
@@ -282,7 +294,7 @@ for i in range(10):
|
||||
fprime=evaluator.grads, maxfun=20)
|
||||
print('Current loss value:', min_val)
|
||||
# save current generated image
|
||||
img = deprocess_image(x.reshape((3, img_width, img_height)))
|
||||
img = deprocess_image(x.copy().reshape((3, img_width, img_height)))
|
||||
fname = result_prefix + '_at_iteration_%d.png' % i
|
||||
imsave(fname, img)
|
||||
end_time = time.time()
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
'''This script loads pre-trained word embeddings (GloVe embeddings)
|
||||
into a frozen Keras Embedding layer, and uses it to
|
||||
train a text classification model on the 20 Newsgroup dataset
|
||||
(classication of newsgroup messages into 20 different categories).
|
||||
|
||||
GloVe embedding data can be found at:
|
||||
http://nlp.stanford.edu/data/glove.6B.zip
|
||||
(source page: http://nlp.stanford.edu/projects/glove/)
|
||||
|
||||
20 Newsgroup data can be found at:
|
||||
http://www.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import numpy as np
|
||||
np.random.seed(1337)
|
||||
|
||||
from keras.preprocessing.text import Tokenizer
|
||||
from keras.preprocessing.sequence import pad_sequences
|
||||
from keras.utils.np_utils import to_categorical
|
||||
from keras.layers import Dense, Input, Flatten
|
||||
from keras.layers import Conv1D, MaxPooling1D, Embedding
|
||||
from keras.models import Model
|
||||
import sys
|
||||
|
||||
BASE_DIR = ''
|
||||
GLOVE_DIR = BASE_DIR + '/glove.6B/'
|
||||
TEXT_DATA_DIR = BASE_DIR + '/20_newsgroup/'
|
||||
MAX_SEQUENCE_LENGTH = 1000
|
||||
MAX_NB_WORDS = 20000
|
||||
EMBEDDING_DIM = 100
|
||||
VALIDATION_SPLIT = 0.2
|
||||
|
||||
# first, build index mapping words in the embeddings set
|
||||
# to their embedding vector
|
||||
|
||||
print('Indexing word vectors.')
|
||||
|
||||
embeddings_index = {}
|
||||
f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'))
|
||||
for line in f:
|
||||
values = line.split()
|
||||
word = values[0]
|
||||
coefs = np.asarray(values[1:], dtype='float32')
|
||||
embeddings_index[word] = coefs
|
||||
f.close()
|
||||
|
||||
print('Found %s word vectors.' % len(embeddings_index))
|
||||
|
||||
# second, prepare text samples and their labels
|
||||
print('Processing text dataset')
|
||||
|
||||
texts = [] # list of text samples
|
||||
labels_index = {} # dictionary mapping label name to numeric id
|
||||
labels = [] # list of label ids
|
||||
for name in sorted(os.listdir(TEXT_DATA_DIR)):
|
||||
path = os.path.join(TEXT_DATA_DIR, name)
|
||||
if os.path.isdir(path):
|
||||
label_id = len(labels_index)
|
||||
labels_index[name] = label_id
|
||||
for fname in sorted(os.listdir(path)):
|
||||
if fname.isdigit():
|
||||
fpath = os.path.join(path, fname)
|
||||
if sys.version_info < (3,):
|
||||
f = open(fpath)
|
||||
else:
|
||||
f = open(fpath, encoding='latin-1')
|
||||
texts.append(f.read())
|
||||
f.close()
|
||||
labels.append(label_id)
|
||||
|
||||
print('Found %s texts.' % len(texts))
|
||||
|
||||
# finally, vectorize the text samples into a 2D integer tensor
|
||||
tokenizer = Tokenizer(nb_words=MAX_NB_WORDS)
|
||||
tokenizer.fit_on_texts(texts)
|
||||
sequences = tokenizer.texts_to_sequences(texts)
|
||||
|
||||
word_index = tokenizer.word_index
|
||||
print('Found %s unique tokens.' % len(word_index))
|
||||
|
||||
data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
|
||||
|
||||
labels = to_categorical(np.asarray(labels))
|
||||
print('Shape of data tensor:', data.shape)
|
||||
print('Shape of label tensor:', labels.shape)
|
||||
|
||||
# split the data into a training set and a validation set
|
||||
indices = np.arange(data.shape[0])
|
||||
np.random.shuffle(indices)
|
||||
data = data[indices]
|
||||
labels = labels[indices]
|
||||
nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0])
|
||||
|
||||
x_train = data[:-nb_validation_samples]
|
||||
y_train = labels[:-nb_validation_samples]
|
||||
x_val = data[-nb_validation_samples:]
|
||||
y_val = labels[-nb_validation_samples:]
|
||||
|
||||
print('Preparing embedding matrix.')
|
||||
|
||||
# prepare embedding matrix
|
||||
nb_words = min(MAX_NB_WORDS, len(word_index))
|
||||
embedding_matrix = np.zeros((nb_words + 1, EMBEDDING_DIM))
|
||||
for word, i in word_index.items():
|
||||
if i > MAX_NB_WORDS:
|
||||
continue
|
||||
embedding_vector = embeddings_index.get(word)
|
||||
if embedding_vector is not None:
|
||||
# words not found in embedding index will be all-zeros.
|
||||
embedding_matrix[i] = embedding_vector
|
||||
|
||||
# load pre-trained word embeddings into an Embedding layer
|
||||
# note that we set trainable = False so as to keep the embeddings fixed
|
||||
embedding_layer = Embedding(nb_words + 1,
|
||||
EMBEDDING_DIM,
|
||||
weights=[embedding_matrix],
|
||||
input_length=MAX_SEQUENCE_LENGTH,
|
||||
trainable=False)
|
||||
|
||||
print('Training model.')
|
||||
|
||||
# train a 1D convnet with global maxpooling
|
||||
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
|
||||
embedded_sequences = embedding_layer(sequence_input)
|
||||
x = Conv1D(128, 5, activation='relu')(embedded_sequences)
|
||||
x = MaxPooling1D(5)(x)
|
||||
x = Conv1D(128, 5, activation='relu')(x)
|
||||
x = MaxPooling1D(5)(x)
|
||||
x = Conv1D(128, 5, activation='relu')(x)
|
||||
x = MaxPooling1D(35)(x)
|
||||
x = Flatten()(x)
|
||||
x = Dense(128, activation='relu')(x)
|
||||
preds = Dense(len(labels_index), activation='softmax')(x)
|
||||
|
||||
model = Model(sequence_input, preds)
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='rmsprop',
|
||||
metrics=['acc'])
|
||||
|
||||
# happy learning!
|
||||
model.fit(x_train, y_train, validation_data=(x_val, y_val),
|
||||
nb_epoch=2, batch_size=128)
|
||||
@@ -0,0 +1,220 @@
|
||||
'''This script demonstrates how to build a deep residual network
|
||||
using the Keras functional API.
|
||||
|
||||
get_resnet50() returns the deep residual network model (50 layers)
|
||||
|
||||
Please visit Kaiming He's GitHub homepage:
|
||||
https://github.com/KaimingHe
|
||||
for more information.
|
||||
|
||||
The related paper is
|
||||
'Deep Residual Learning for Image Recognition'
|
||||
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
||||
http://arxiv.org/abs/1512.03385
|
||||
|
||||
Pretrained weights were converted from Kaiming He's caffe model directly.
|
||||
|
||||
For now we provide weights for the tensorflow backend only,
|
||||
thus use 'tf' dim_ordering (e.g. input_shape=(224, 224, 3) for 224*224 color image)
|
||||
would accelerate the computation, but we also provide weights for 'th' dim_ordering for compatibility.
|
||||
You can set your default dim ordering in your Keras config file at ~/.keras/keras.json
|
||||
|
||||
please donwload them at:
|
||||
http://pan.baidu.com/s/1o8pO2q2 ('th' dim ordering, for China)
|
||||
http://pan.baidu.com/s/1pLanuTt ('tf' dim ordering, for China)
|
||||
|
||||
https://drive.google.com/open?id=0B4ChsjFJvew3NVQ2U041Q0xHRHM ('th' dim ordering, for other countries)
|
||||
https://drive.google.com/open?id=0B4ChsjFJvew3NWN5THdxcTdSWmc ('tf' dim ordering, for other countries)
|
||||
|
||||
@author: BigMoyan, University of Electronic Science and Technology of China
|
||||
'''
|
||||
from __future__ import print_function
|
||||
from keras.layers import merge
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D
|
||||
from keras.layers.core import Dense, Activation, Flatten
|
||||
from keras.layers.normalization import BatchNormalization
|
||||
from keras.models import Model
|
||||
from keras.layers import Input
|
||||
from keras.preprocessing.image import load_img, img_to_array
|
||||
import keras.backend as K
|
||||
import numpy as np
|
||||
|
||||
# The names of layers in resnet50 are generated with the following format
|
||||
# [type][stage][block]_branch[branch][layer]
|
||||
# type: 'res' for conv layer, 'bn' and 'scale' for BN layer
|
||||
# stage: from '2' to '5', current stage number
|
||||
# block: 'a','b','c'... for different blocks in a stage
|
||||
# branch: '1' for shortcut and '2' for main path
|
||||
# layer: 'a','b','c'... for different layers in a block
|
||||
|
||||
|
||||
def identity_block(input_tensor, kernel_size, filters, stage, block):
|
||||
'''The identity_block is the block that has no conv layer at shortcut
|
||||
|
||||
# Arguments
|
||||
input_tensor: input tensor
|
||||
kernel_size: defualt 3, the kernel size of middle conv layer at main path
|
||||
filters: list of integers, the nb_filters of 3 conv layer at main path
|
||||
stage: integer, current stage label, used for generating layer names
|
||||
block: 'a','b'..., current block label, used for generating layer names
|
||||
'''
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
nb_filter1, nb_filter2, nb_filter3 = filters
|
||||
if dim_ordering == 'tf':
|
||||
bn_axis = 3
|
||||
else:
|
||||
bn_axis = 1
|
||||
conv_name_base = 'res' + str(stage) + block + '_branch'
|
||||
bn_name_base = 'bn' + str(stage) + block + '_branch'
|
||||
|
||||
out = Convolution2D(nb_filter1, 1, 1, dim_ordering=dim_ordering, name=conv_name_base + '2a')(input_tensor)
|
||||
out = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(out)
|
||||
out = Activation('relu')(out)
|
||||
|
||||
out = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same',
|
||||
dim_ordering=dim_ordering, name=conv_name_base + '2b')(out)
|
||||
out = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(out)
|
||||
out = Activation('relu')(out)
|
||||
|
||||
out = Convolution2D(nb_filter3, 1, 1, dim_ordering=dim_ordering, name=conv_name_base + '2c')(out)
|
||||
out = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(out)
|
||||
|
||||
out = merge([out, input_tensor], mode='sum')
|
||||
out = Activation('relu')(out)
|
||||
return out
|
||||
|
||||
|
||||
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
|
||||
'''conv_block is the block that has a conv layer at shortcut
|
||||
|
||||
# Arguments
|
||||
input_tensor: input tensor
|
||||
kernel_size: defualt 3, the kernel size of middle conv layer at main path
|
||||
filters: list of integers, the nb_filters of 3 conv layer at main path
|
||||
stage: integer, current stage label, used for generating layer names
|
||||
block: 'a','b'..., current block label, used for generating layer names
|
||||
|
||||
Note that from stage 3, the first conv layer at main path is with subsample=(2,2)
|
||||
And the shortcut should has subsample=(2,2) as well
|
||||
'''
|
||||
nb_filter1, nb_filter2, nb_filter3 = filters
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering == 'tf':
|
||||
bn_axis = 3
|
||||
else:
|
||||
bn_axis = 1
|
||||
conv_name_base = 'res' + str(stage) + block + '_branch'
|
||||
bn_name_base = 'bn' + str(stage) + block + '_branch'
|
||||
|
||||
out = Convolution2D(nb_filter1, 1, 1, subsample=strides,
|
||||
dim_ordering=dim_ordering, name=conv_name_base + '2a')(input_tensor)
|
||||
out = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(out)
|
||||
out = Activation('relu')(out)
|
||||
|
||||
out = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same',
|
||||
dim_ordering=dim_ordering, name=conv_name_base + '2b')(out)
|
||||
out = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(out)
|
||||
out = Activation('relu')(out)
|
||||
|
||||
out = Convolution2D(nb_filter3, 1, 1, dim_ordering=dim_ordering, name=conv_name_base + '2c')(out)
|
||||
out = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(out)
|
||||
|
||||
shortcut = Convolution2D(nb_filter3, 1, 1, subsample=strides,
|
||||
dim_ordering=dim_ordering, name=conv_name_base + '1')(input_tensor)
|
||||
shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
|
||||
|
||||
out = merge([out, shortcut], mode='sum')
|
||||
out = Activation('relu')(out)
|
||||
return out
|
||||
|
||||
|
||||
def read_img(img_path):
|
||||
'''This function returns a preprocessed image
|
||||
'''
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
mean = (103.939, 116.779, 123.68)
|
||||
img = load_img(img_path, target_size=(224, 224))
|
||||
img = img_to_array(img, dim_ordering=dim_ordering)
|
||||
|
||||
if dim_ordering == 'th':
|
||||
img[0, :, :] -= mean[0]
|
||||
img[1, :, :] -= mean[1]
|
||||
img[2, :, :] -= mean[2]
|
||||
# 'RGB'->'BGR'
|
||||
img = img[::-1, :, :]
|
||||
else:
|
||||
img[:, :, 0] -= mean[0]
|
||||
img[:, :, 1] -= mean[1]
|
||||
img[:, :, 2] -= mean[2]
|
||||
img = img[:, :, ::-1]
|
||||
|
||||
img = np.expand_dims(img, axis=0)
|
||||
return img
|
||||
|
||||
|
||||
def get_resnet50():
|
||||
'''This function returns the 50-layer residual network model
|
||||
you should load pretrained weights if you want to use it directly.
|
||||
Note that since the pretrained weights is converted from caffemodel
|
||||
the order of channels for input image should be 'BGR' (the channel order of caffe)
|
||||
'''
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
inp = Input(shape=(224, 224, 3))
|
||||
bn_axis = 3
|
||||
else:
|
||||
inp = Input(shape=(3, 224, 224))
|
||||
bn_axis = 1
|
||||
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
out = ZeroPadding2D((3, 3), dim_ordering=dim_ordering)(inp)
|
||||
out = Convolution2D(64, 7, 7, subsample=(2, 2), dim_ordering=dim_ordering, name='conv1')(out)
|
||||
out = BatchNormalization(axis=bn_axis, name='bn_conv1')(out)
|
||||
out = Activation('relu')(out)
|
||||
out = MaxPooling2D((3, 3), strides=(2, 2), dim_ordering=dim_ordering)(out)
|
||||
|
||||
out = conv_block(out, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
|
||||
out = identity_block(out, 3, [64, 64, 256], stage=2, block='b')
|
||||
out = identity_block(out, 3, [64, 64, 256], stage=2, block='c')
|
||||
|
||||
out = conv_block(out, 3, [128, 128, 512], stage=3, block='a')
|
||||
out = identity_block(out, 3, [128, 128, 512], stage=3, block='b')
|
||||
out = identity_block(out, 3, [128, 128, 512], stage=3, block='c')
|
||||
out = identity_block(out, 3, [128, 128, 512], stage=3, block='d')
|
||||
|
||||
out = conv_block(out, 3, [256, 256, 1024], stage=4, block='a')
|
||||
out = identity_block(out, 3, [256, 256, 1024], stage=4, block='b')
|
||||
out = identity_block(out, 3, [256, 256, 1024], stage=4, block='c')
|
||||
out = identity_block(out, 3, [256, 256, 1024], stage=4, block='d')
|
||||
out = identity_block(out, 3, [256, 256, 1024], stage=4, block='e')
|
||||
out = identity_block(out, 3, [256, 256, 1024], stage=4, block='f')
|
||||
|
||||
out = conv_block(out, 3, [512, 512, 2048], stage=5, block='a')
|
||||
out = identity_block(out, 3, [512, 512, 2048], stage=5, block='b')
|
||||
out = identity_block(out, 3, [512, 512, 2048], stage=5, block='c')
|
||||
|
||||
out = AveragePooling2D((7, 7), dim_ordering=dim_ordering)(out)
|
||||
out = Flatten()(out)
|
||||
out = Dense(1000, activation='softmax', name='fc1000')(out)
|
||||
|
||||
model = Model(inp, out)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
weights_file = K.image_dim_ordering() + '_dim_ordering_resnet50.h5'
|
||||
resnet_model = get_resnet50()
|
||||
resnet_model.load_weights(weights_file)
|
||||
|
||||
# you may download synset_words from the address given at the begining of this file
|
||||
class_table = open('synset_words.txt', 'r')
|
||||
lines = class_table.readlines()
|
||||
|
||||
test_img1 = read_img('cat.jpg')
|
||||
print('Result for test 1 is:')
|
||||
print(lines[np.argmax(resnet_model.predict(test_img1)[0])])
|
||||
|
||||
test_img2 = read_img('elephant.jpg')
|
||||
print('Result for test 2 is:')
|
||||
print(lines[np.argmax(resnet_model.predict(test_img2)[0])])
|
||||
class_table.close()
|
||||
@@ -16,7 +16,7 @@ epochs = 25
|
||||
lahead = 1
|
||||
|
||||
|
||||
def gen_cosine_amp(amp=100, period=25, x0=0, xn=50000, step=1, k=0.0001):
|
||||
def gen_cosine_amp(amp=100, period=1000, x0=0, xn=50000, step=1, k=0.0001):
|
||||
"""Generates an absolute cosine time series with the amplitude
|
||||
exponentially decreasing
|
||||
|
||||
@@ -31,7 +31,7 @@ def gen_cosine_amp(amp=100, period=25, x0=0, xn=50000, step=1, k=0.0001):
|
||||
cos = np.zeros(((xn - x0) * step, 1, 1))
|
||||
for i in range(len(cos)):
|
||||
idx = x0 + i * step
|
||||
cos[i, 0, 0] = amp * np.cos(idx / (2 * np.pi * period))
|
||||
cos[i, 0, 0] = amp * np.cos(2 * np.pi * idx / period)
|
||||
cos[i, 0, 0] = cos[i, 0, 0] * np.exp(-k * idx)
|
||||
return cos
|
||||
|
||||
|
||||
@@ -11,27 +11,25 @@ from keras import backend as K
|
||||
from keras import objectives
|
||||
from keras.datasets import mnist
|
||||
|
||||
batch_size = 16
|
||||
batch_size = 100
|
||||
original_dim = 784
|
||||
latent_dim = 2
|
||||
intermediate_dim = 128
|
||||
epsilon_std = 0.01
|
||||
nb_epoch = 40
|
||||
intermediate_dim = 256
|
||||
nb_epoch = 50
|
||||
|
||||
x = Input(batch_shape=(batch_size, original_dim))
|
||||
h = Dense(intermediate_dim, activation='relu')(x)
|
||||
z_mean = Dense(latent_dim)(h)
|
||||
z_log_std = Dense(latent_dim)(h)
|
||||
z_log_var = Dense(latent_dim)(h)
|
||||
|
||||
|
||||
def sampling(args):
|
||||
z_mean, z_log_std = args
|
||||
epsilon = K.random_normal(shape=(batch_size, latent_dim),
|
||||
mean=0., std=epsilon_std)
|
||||
return z_mean + K.exp(z_log_std) * epsilon
|
||||
z_mean, z_log_var = args
|
||||
epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0.)
|
||||
return z_mean + K.exp(z_log_var / 2) * epsilon
|
||||
|
||||
# note that "output_shape" isn't necessary with the TensorFlow backend
|
||||
# so you could write `Lambda(sampling)([z_mean, z_log_std])`
|
||||
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_std])
|
||||
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
|
||||
|
||||
# we instantiate these layers separately so as to reuse them later
|
||||
decoder_h = Dense(intermediate_dim, activation='relu')
|
||||
@@ -39,9 +37,10 @@ decoder_mean = Dense(original_dim, activation='sigmoid')
|
||||
h_decoded = decoder_h(z)
|
||||
x_decoded_mean = decoder_mean(h_decoded)
|
||||
|
||||
|
||||
def vae_loss(x, x_decoded_mean):
|
||||
xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)
|
||||
kl_loss = - 0.5 * K.mean(1 + z_log_std - K.square(z_mean) - K.exp(z_log_std), axis=-1)
|
||||
xent_loss = original_dim * objectives.binary_crossentropy(x, x_decoded_mean)
|
||||
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
|
||||
return xent_loss + kl_loss
|
||||
|
||||
vae = Model(x, x_decoded_mean)
|
||||
@@ -87,7 +86,7 @@ grid_y = np.linspace(-15, 15, n)
|
||||
|
||||
for i, yi in enumerate(grid_x):
|
||||
for j, xi in enumerate(grid_y):
|
||||
z_sample = np.array([[xi, yi]]) * epsilon_std
|
||||
z_sample = np.array([[xi, yi]])
|
||||
x_decoded = generator.predict(z_sample)
|
||||
digit = x_decoded[0].reshape(digit_size, digit_size)
|
||||
figure[i * digit_size: (i + 1) * digit_size,
|
||||
|
||||
@@ -0,0 +1,124 @@
|
||||
'''This script demonstrates how to build a variational autoencoder with Keras and deconvolution layers.
|
||||
|
||||
Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
|
||||
'''
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
|
||||
from keras.layers import Convolution2D, Deconvolution2D, MaxPooling2D
|
||||
from keras.models import Model
|
||||
from keras import backend as K
|
||||
from keras import objectives
|
||||
from keras.datasets import mnist
|
||||
|
||||
# input image dimensions
|
||||
img_rows, img_cols, img_chns = 28, 28, 1
|
||||
# number of convolutional filters to use
|
||||
nb_filters = 32
|
||||
# convolution kernel size
|
||||
nb_conv = 3
|
||||
|
||||
batch_size = 16
|
||||
original_dim = (img_chns, img_rows, img_cols)
|
||||
latent_dim = 2
|
||||
intermediate_dim = 128
|
||||
epsilon_std = 0.01
|
||||
nb_epoch = 5
|
||||
|
||||
|
||||
x = Input(batch_shape=(batch_size,) + original_dim)
|
||||
c = Convolution2D(nb_filters, nb_conv, nb_conv, border_mode='same', activation='relu')(x)
|
||||
f = Flatten()(c)
|
||||
h = Dense(intermediate_dim, activation='relu')(f)
|
||||
|
||||
z_mean = Dense(latent_dim)(h)
|
||||
z_log_var = Dense(latent_dim)(h)
|
||||
|
||||
|
||||
def sampling(args):
|
||||
z_mean, z_log_var = args
|
||||
epsilon = K.random_normal(shape=(batch_size, latent_dim),
|
||||
mean=0., std=epsilon_std)
|
||||
return z_mean + K.exp(z_log_var) * epsilon
|
||||
|
||||
# note that "output_shape" isn't necessary with the TensorFlow backend
|
||||
# so you could write `Lambda(sampling)([z_mean, z_log_var])`
|
||||
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
|
||||
|
||||
# we instantiate these layers separately so as to reuse them later
|
||||
decoder_h = Dense(intermediate_dim, activation='relu')
|
||||
decoder_f = Dense(nb_filters*img_rows*img_cols, activation='relu')
|
||||
decoder_c = Reshape((nb_filters, img_rows, img_cols))
|
||||
decoder_mean = Deconvolution2D(img_chns, nb_conv, nb_conv,
|
||||
(batch_size, img_chns, img_rows, img_cols),
|
||||
border_mode='same')
|
||||
|
||||
h_decoded = decoder_h(z)
|
||||
f_decoded = decoder_f(h_decoded)
|
||||
c_decoded = decoder_c(f_decoded)
|
||||
x_decoded_mean = decoder_mean(c_decoded)
|
||||
|
||||
|
||||
def vae_loss(x, x_decoded_mean):
|
||||
# NOTE: binary_crossentropy expects a batch_size by dim for x and x_decoded_mean, so we MUST flatten these!
|
||||
x = K.flatten(x)
|
||||
x_decoded_mean = K.flatten(x_decoded_mean)
|
||||
xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)
|
||||
kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
|
||||
return xent_loss + kl_loss
|
||||
|
||||
vae = Model(x, x_decoded_mean)
|
||||
vae.compile(optimizer='rmsprop', loss=vae_loss)
|
||||
vae.summary()
|
||||
|
||||
# train the VAE on MNIST digits
|
||||
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
||||
|
||||
x_train = x_train.astype('float32')[:, None, :, :] / 255.
|
||||
x_test = x_test.astype('float32')[:, None, :, :] / 255.
|
||||
|
||||
vae.fit(x_train, x_train,
|
||||
shuffle=True,
|
||||
nb_epoch=nb_epoch,
|
||||
batch_size=batch_size,
|
||||
validation_data=(x_test, x_test))
|
||||
|
||||
|
||||
# build a model to project inputs on the latent space
|
||||
encoder = Model(x, z_mean)
|
||||
|
||||
# display a 2D plot of the digit classes in the latent space
|
||||
x_test_encoded = encoder.predict(x_test, batch_size=batch_size)
|
||||
plt.figure(figsize=(6, 6))
|
||||
plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)
|
||||
plt.colorbar()
|
||||
plt.show()
|
||||
|
||||
# build a digit generator that can sample from the learned distribution
|
||||
decoder_input = Input(shape=(latent_dim,))
|
||||
_h_decoded = decoder_h(decoder_input)
|
||||
_f_decoded = decoder_f(_h_decoded)
|
||||
_c_decoded = decoder_c(_f_decoded)
|
||||
_x_decoded_mean = decoder_mean(_c_decoded)
|
||||
generator = Model(decoder_input, _x_decoded_mean)
|
||||
|
||||
# display a 2D manifold of the digits
|
||||
n = 15 # figure with 15x15 digits
|
||||
digit_size = 28
|
||||
figure = np.zeros((digit_size * n, digit_size * n))
|
||||
# we will sample n points within [-15, 15] standard deviations
|
||||
grid_x = np.linspace(-15, 15, n)
|
||||
grid_y = np.linspace(-15, 15, n)
|
||||
|
||||
for i, yi in enumerate(grid_x):
|
||||
for j, xi in enumerate(grid_y):
|
||||
z_sample = np.array([[xi, yi]])
|
||||
x_decoded = generator.predict(z_sample)
|
||||
digit = x_decoded[0].reshape(digit_size, digit_size)
|
||||
figure[i * digit_size: (i + 1) * digit_size,
|
||||
j * digit_size: (j + 1) * digit_size] = digit
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
plt.imshow(figure)
|
||||
plt.show()
|
||||
+1
-1
@@ -15,4 +15,4 @@ from . import objectives
|
||||
from . import optimizers
|
||||
from . import regularizers
|
||||
|
||||
__version__ = '1.0.6'
|
||||
__version__ = '1.0.8'
|
||||
|
||||
@@ -11,6 +11,9 @@ from .common import get_uid
|
||||
from .common import cast_to_floatx
|
||||
from .common import image_dim_ordering
|
||||
from .common import set_image_dim_ordering
|
||||
from .common import is_keras_tensor
|
||||
from .common import legacy_weight_ordering
|
||||
from .common import set_legacy_weight_ordering
|
||||
|
||||
_keras_base_dir = os.path.expanduser('~')
|
||||
if not os.access(_keras_base_dir, os.W_OK):
|
||||
@@ -39,12 +42,13 @@ if os.path.exists(_config_path):
|
||||
_BACKEND = _backend
|
||||
|
||||
# save config file
|
||||
_config = {'floatx': floatx(),
|
||||
'epsilon': epsilon(),
|
||||
'backend': _BACKEND,
|
||||
'image_dim_ordering': image_dim_ordering()}
|
||||
with open(_config_path, 'w') as f:
|
||||
f.write(json.dumps(_config, indent=4))
|
||||
if not os.path.exists(_config_path):
|
||||
_config = {'floatx': floatx(),
|
||||
'epsilon': epsilon(),
|
||||
'backend': _BACKEND,
|
||||
'image_dim_ordering': image_dim_ordering()}
|
||||
with open(_config_path, 'w') as f:
|
||||
f.write(json.dumps(_config, indent=4))
|
||||
|
||||
if 'KERAS_BACKEND' in os.environ:
|
||||
_backend = os.environ['KERAS_BACKEND']
|
||||
@@ -60,3 +64,10 @@ elif _BACKEND == 'tensorflow':
|
||||
from .tensorflow_backend import *
|
||||
else:
|
||||
raise Exception('Unknown backend: ' + str(_BACKEND))
|
||||
|
||||
|
||||
def backend():
|
||||
'''Publicly accessible method
|
||||
for determining the current backend.
|
||||
'''
|
||||
return _BACKEND
|
||||
|
||||
@@ -7,6 +7,7 @@ _FLOATX = 'float32'
|
||||
_EPSILON = 10e-8
|
||||
_UID_PREFIXES = defaultdict(int)
|
||||
_IMAGE_DIM_ORDERING = 'th'
|
||||
_LEGACY_WEIGHT_ORDERING = False
|
||||
|
||||
|
||||
def epsilon():
|
||||
@@ -64,3 +65,25 @@ def set_image_dim_ordering(dim_ordering):
|
||||
def get_uid(prefix=''):
|
||||
_UID_PREFIXES[prefix] += 1
|
||||
return _UID_PREFIXES[prefix]
|
||||
|
||||
|
||||
def reset_uids():
|
||||
global _UID_PREFIXES
|
||||
_UID_PREFIXES = defaultdict(int)
|
||||
|
||||
|
||||
def is_keras_tensor(x):
|
||||
if hasattr(x, '_keras_shape'):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def set_legacy_weight_ordering(value):
|
||||
global _LEGACY_WEIGHT_ORDERING
|
||||
assert value in {True, False}
|
||||
_LEGACY_WEIGHT_ORDERING = value
|
||||
|
||||
|
||||
def legacy_weight_ordering():
|
||||
return _LEGACY_WEIGHT_ORDERING
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.training import moving_averages
|
||||
import numpy as np
|
||||
import os
|
||||
import copy
|
||||
import warnings
|
||||
from .common import _FLOATX, _EPSILON, _IMAGE_DIM_ORDERING
|
||||
from .common import _FLOATX, _EPSILON, _IMAGE_DIM_ORDERING, reset_uids
|
||||
|
||||
# INTERNAL UTILS
|
||||
|
||||
@@ -12,6 +13,15 @@ _LEARNING_PHASE = tf.placeholder(dtype='uint8', name='keras_learning_phase') #
|
||||
_MANUAL_VAR_INIT = False
|
||||
|
||||
|
||||
def clear_session():
|
||||
global _SESSION
|
||||
global _LEARNING_PHASE
|
||||
tf.reset_default_graph()
|
||||
reset_uids()
|
||||
_SESSION = None
|
||||
_LEARNING_PHASE = tf.placeholder(dtype='uint8', name='keras_learning_phase')
|
||||
|
||||
|
||||
def manual_variable_initialization(value):
|
||||
'''Whether variables should be initialized
|
||||
as they are instantiated (default), or if
|
||||
@@ -34,7 +44,10 @@ def learning_phase():
|
||||
|
||||
def set_learning_phase(value):
|
||||
global _LEARNING_PHASE
|
||||
_LEARNING_PHASE = tf.constant(value, name='keras_learning_phase')
|
||||
if value not in {0, 1}:
|
||||
raise ValueError('Expected learning phase to be '
|
||||
'0 or 1.')
|
||||
_LEARNING_PHASE = value
|
||||
|
||||
|
||||
def get_session():
|
||||
@@ -79,16 +92,27 @@ def _convert_string_dtype(dtype):
|
||||
return tf.float32
|
||||
elif dtype == 'float64':
|
||||
return tf.float64
|
||||
elif dtype == 'int16':
|
||||
return tf.int16
|
||||
elif dtype == 'int32':
|
||||
return tf.int32
|
||||
elif dtype == 'int64':
|
||||
return tf.int64
|
||||
elif dtype == 'uint8':
|
||||
return tf.int8
|
||||
elif dtype == 'uint16':
|
||||
return tf.uint16
|
||||
else:
|
||||
raise ValueError('Unsupported dtype:', dtype)
|
||||
|
||||
|
||||
def _to_tensor(x, dtype):
|
||||
x = tf.convert_to_tensor(x)
|
||||
if x.dtype != dtype:
|
||||
x = tf.cast(x, dtype)
|
||||
return x
|
||||
|
||||
|
||||
def variable(value, dtype=_FLOATX, name=None):
|
||||
'''Instantiates a tensor.
|
||||
|
||||
@@ -184,15 +208,17 @@ def eval(x):
|
||||
def zeros(shape, dtype=_FLOATX, name=None):
|
||||
'''Instantiates an all-zeros tensor variable.
|
||||
'''
|
||||
return variable(lambda: tf.cast(tf.constant_initializer(0.)(shape), dtype),
|
||||
dtype, name)
|
||||
shape = tuple(map(int, shape))
|
||||
tf_dtype = _convert_string_dtype(dtype)
|
||||
return variable(tf.constant_initializer(0., dtype=tf_dtype)(shape), dtype, name)
|
||||
|
||||
|
||||
def ones(shape, dtype=_FLOATX, name=None):
|
||||
'''Instantiates an all-ones tensor variable.
|
||||
'''
|
||||
return variable(lambda: tf.cast(tf.constant_initializer(1.)(shape), dtype),
|
||||
dtype, name)
|
||||
shape = tuple(map(int, shape))
|
||||
tf_dtype = _convert_string_dtype(dtype)
|
||||
return variable(tf.constant_initializer(1., dtype=tf_dtype)(shape), dtype, name)
|
||||
|
||||
|
||||
def eye(size, dtype=_FLOATX, name=None):
|
||||
@@ -215,6 +241,30 @@ def ones_like(x, name=None):
|
||||
return tf.ones_like(x, name=name)
|
||||
|
||||
|
||||
def random_uniform_variable(shape, low, high, dtype=_FLOATX,
|
||||
name=None, seed=None):
|
||||
shape = tuple(map(int, shape))
|
||||
tf_dtype = _convert_string_dtype(dtype)
|
||||
if seed is None:
|
||||
# ensure that randomness is conditioned by the Numpy RNG
|
||||
seed = np.random.randint(10e8)
|
||||
value = tf.random_uniform_initializer(
|
||||
low, high, dtype=tf_dtype, seed=seed)(shape)
|
||||
return variable(value, dtype=dtype, name=name)
|
||||
|
||||
|
||||
def random_normal_variable(shape, mean, scale, dtype=_FLOATX,
|
||||
name=None, seed=None):
|
||||
shape = tuple(map(int, shape))
|
||||
tf_dtype = _convert_string_dtype(dtype)
|
||||
if seed is None:
|
||||
# ensure that randomness is conditioned by the Numpy RNG
|
||||
seed = np.random.randint(10e8)
|
||||
value = tf.random_normal_initializer(
|
||||
mean, scale, dtype=tf_dtype, seed=seed)(shape)
|
||||
return variable(value, dtype=dtype, name=name)
|
||||
|
||||
|
||||
def count_params(x):
|
||||
'''Returns the number of scalars in a tensor.
|
||||
'''
|
||||
@@ -228,6 +278,26 @@ def cast(x, dtype):
|
||||
return tf.cast(x, dtype)
|
||||
|
||||
|
||||
# UPDATES OPS
|
||||
|
||||
|
||||
def update(x, new_x):
|
||||
return tf.assign(x, new_x)
|
||||
|
||||
|
||||
def update_add(x, increment):
|
||||
return tf.assign_add(x, increment)
|
||||
|
||||
|
||||
def update_sub(x, decrement):
|
||||
return tf.assign_sub(x, decrement)
|
||||
|
||||
|
||||
def moving_average_update(variable, value, momentum):
|
||||
return moving_averages.assign_moving_average(
|
||||
variable, value, momentum)
|
||||
|
||||
|
||||
# LINEAR ALGEBRA
|
||||
|
||||
def dot(x, y):
|
||||
@@ -255,19 +325,36 @@ def batch_dot(x, y, axes=None):
|
||||
If the number of dimensions is reduced to 1, we use `expand_dims` to
|
||||
make sure that ndim is at least 2.
|
||||
|
||||
# Example
|
||||
Assume x = [[1, 2], [3, 4]] and y = [[5, 6], [7, 8]]
|
||||
batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal
|
||||
of x.dot(y.T), although we never have to calculate the off-diagonal
|
||||
elements.
|
||||
|
||||
|
||||
# Arguments
|
||||
x, y: tensors with ndim >= 2
|
||||
axes: list (or single) int with target dimensions
|
||||
|
||||
# Returns
|
||||
Tensor with ndim >= 2
|
||||
A tensor with shape equal to the concatenation of x's shape
|
||||
(less the dimension that was summed over) and y's shape
|
||||
(less the batch dimension and the dimension that was summed over).
|
||||
If the final rank is 1, we reshape it to (batch_size, 1).
|
||||
|
||||
# Examples
|
||||
Assume x = [[1, 2], [3, 4]] and y = [[5, 6], [7, 8]]
|
||||
batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal
|
||||
of x.dot(y.T), although we never have to calculate the off-diagonal
|
||||
elements.
|
||||
|
||||
Shape inference:
|
||||
Let x's shape be (100, 20) and y's shape be (100, 30, 20).
|
||||
If dot_axes is (1, 2), to find the output shape of resultant tensor,
|
||||
loop through each dimension in x's shape and y's shape:
|
||||
x.shape[0] : 100 : append to output shape
|
||||
x.shape[1] : 20 : do not append to output shape,
|
||||
dimension 1 of x has been summed over. (dot_axes[0] = 1)
|
||||
y.shape[0] : 100 : do not append to output shape,
|
||||
always ignore first dimension of y
|
||||
y.shape[1] : 30 : append to output shape
|
||||
y.shape[2] : 20 : do not append to output shape,
|
||||
dimension 2 of y has been summed over. (dot_axes[1] = 2)
|
||||
|
||||
output_shape = (100, 30)
|
||||
'''
|
||||
if type(axes) == int:
|
||||
axes = (axes, axes)
|
||||
@@ -429,8 +516,9 @@ def abs(x):
|
||||
def sqrt(x):
|
||||
'''Element-wise square root.
|
||||
'''
|
||||
x = tf.clip_by_value(x, tf.cast(0., dtype=_FLOATX),
|
||||
tf.cast(np.inf, dtype=_FLOATX))
|
||||
zero = _to_tensor(0., x.dtype.base_dtype)
|
||||
inf = _to_tensor(np.inf, x.dtype.base_dtype)
|
||||
x = tf.clip_by_value(x, zero, inf)
|
||||
return tf.sqrt(x)
|
||||
|
||||
|
||||
@@ -469,8 +557,9 @@ def clip(x, min_value, max_value):
|
||||
'''
|
||||
if max_value < min_value:
|
||||
max_value = min_value
|
||||
return tf.clip_by_value(x, tf.cast(min_value, dtype=_FLOATX),
|
||||
tf.cast(max_value, dtype=_FLOATX))
|
||||
min_value = _to_tensor(min_value, x.dtype.base_dtype)
|
||||
max_value = _to_tensor(max_value, x.dtype.base_dtype)
|
||||
return tf.clip_by_value(x, min_value, max_value)
|
||||
|
||||
|
||||
def equal(x, y):
|
||||
@@ -487,6 +576,34 @@ def not_equal(x, y):
|
||||
return tf.not_equal(x, y)
|
||||
|
||||
|
||||
def greater(x, y):
|
||||
'''Element-wise truth value of (x > y).
|
||||
Returns a bool tensor.
|
||||
'''
|
||||
return tf.greater(x, y)
|
||||
|
||||
|
||||
def greater_equal(x, y):
|
||||
'''Element-wise truth value of (x >= y).
|
||||
Returns a bool tensor.
|
||||
'''
|
||||
return tf.greater_equal(x, y)
|
||||
|
||||
|
||||
def lesser(x, y):
|
||||
'''Element-wise truth value of (x < y).
|
||||
Returns a bool tensor.
|
||||
'''
|
||||
return tf.less(x, y)
|
||||
|
||||
|
||||
def lesser_equal(x, y):
|
||||
'''Element-wise truth value of (x <= y).
|
||||
Returns a bool tensor.
|
||||
'''
|
||||
return tf.less_equal(x, y)
|
||||
|
||||
|
||||
def maximum(x, y):
|
||||
'''Element-wise maximum of two tensors.
|
||||
'''
|
||||
@@ -515,10 +632,10 @@ def normalize_batch_in_training(x, gamma, beta,
|
||||
reduction_axes, epsilon=0.0001):
|
||||
'''Compute mean and std for batch then apply batch_normalization on batch.
|
||||
'''
|
||||
mean, std = tf.nn.moments(x, reduction_axes,
|
||||
mean, var = tf.nn.moments(x, reduction_axes,
|
||||
shift=None, name=None, keep_dims=False)
|
||||
if sorted(reduction_axes) == range(ndim(x))[:-1]:
|
||||
normed = tf.nn.batch_normalization(x, mean, std,
|
||||
normed = tf.nn.batch_normalization(x, mean, var,
|
||||
beta, gamma,
|
||||
epsilon)
|
||||
else:
|
||||
@@ -532,19 +649,21 @@ def normalize_batch_in_training(x, gamma, beta,
|
||||
target_shape = tf.pack(target_shape)
|
||||
|
||||
broadcast_mean = tf.reshape(mean, target_shape)
|
||||
broadcast_std = tf.reshape(std, target_shape)
|
||||
broadcast_var = tf.reshape(var, target_shape)
|
||||
broadcast_gamma = tf.reshape(gamma, target_shape)
|
||||
broadcast_beta = tf.reshape(beta, target_shape)
|
||||
normed = tf.nn.batch_normalization(x, broadcast_mean, broadcast_std,
|
||||
normed = tf.nn.batch_normalization(x, broadcast_mean, broadcast_var,
|
||||
broadcast_beta, broadcast_gamma,
|
||||
epsilon)
|
||||
return normed, mean, std
|
||||
return normed, mean, var
|
||||
|
||||
|
||||
def batch_normalization(x, mean, std, beta, gamma, epsilon=0.0001):
|
||||
'''Apply batch normalization on x given mean, std, beta and gamma.
|
||||
def batch_normalization(x, mean, var, beta, gamma, epsilon=0.0001):
|
||||
'''Apply batch normalization on x given mean, var, beta and gamma:
|
||||
|
||||
output = (x - mean) / (sqrt(var) + epsilon) * gamma + beta
|
||||
'''
|
||||
return tf.nn.batch_normalization(x, mean, std, beta, gamma, epsilon)
|
||||
return tf.nn.batch_normalization(x, mean, var, beta, gamma, epsilon)
|
||||
|
||||
|
||||
# SHAPE OPERATIONS
|
||||
@@ -603,6 +722,27 @@ def resize_images(X, height_factor, width_factor, dim_ordering):
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
|
||||
|
||||
def resize_volumes(X, depth_factor, height_factor, width_factor, dim_ordering):
|
||||
'''Resize the volume contained in a 5D tensor of shape
|
||||
- [batch, channels, depth, height, width] (for 'th' dim_ordering)
|
||||
- [batch, depth, height, width, channels] (for 'tf' dim_ordering)
|
||||
by a factor of (depth_factor, height_factor, width_factor).
|
||||
All three factors should be positive integers.
|
||||
'''
|
||||
if dim_ordering == 'th':
|
||||
output = repeat_elements(X, depth_factor, axis=2)
|
||||
output = repeat_elements(output, height_factor, axis=3)
|
||||
output = repeat_elements(output, width_factor, axis=4)
|
||||
return output
|
||||
elif dim_ordering == 'tf':
|
||||
output = repeat_elements(X, depth_factor, axis=1)
|
||||
output = repeat_elements(output, height_factor, axis=2)
|
||||
output = repeat_elements(output, width_factor, axis=3)
|
||||
return output
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
|
||||
|
||||
def repeat_elements(x, rep, axis):
|
||||
'''Repeats the elements of a tensor along an axis, like np.repeat
|
||||
|
||||
@@ -624,13 +764,13 @@ def repeat(x, n):
|
||||
the output will have shape (samples, 2, dim)
|
||||
'''
|
||||
assert ndim(x) == 2
|
||||
tensors = [x] * n
|
||||
stacked = tf.pack(tensors)
|
||||
return tf.transpose(stacked, (1, 0, 2))
|
||||
x = tf.expand_dims(x, 1)
|
||||
pattern = tf.pack([1, n, 1])
|
||||
return tf.tile(x, pattern)
|
||||
|
||||
|
||||
def tile(x, n):
|
||||
if not hasattr(n, 'shape') and not hasattr(n, '__len__'):
|
||||
if not hasattr(n, 'shape') and not hasattr(n, '__len__') and not hasattr(n, '_shape'):
|
||||
n = [n]
|
||||
return tf.tile(x, n)
|
||||
|
||||
@@ -643,7 +783,7 @@ def batch_flatten(x):
|
||||
'''Turn a n-D tensor into a 2D tensor where
|
||||
the first dimension is conserved.
|
||||
'''
|
||||
x = tf.reshape(x, [-1, np.prod(x.get_shape()[1:].as_list())])
|
||||
x = tf.reshape(x, tf.pack([-1, prod(shape(x)[1:])]))
|
||||
return x
|
||||
|
||||
|
||||
@@ -681,10 +821,53 @@ def spatial_2d_padding(x, padding=(1, 1), dim_ordering='th'):
|
||||
return tf.pad(x, pattern)
|
||||
|
||||
|
||||
def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering='th'):
|
||||
'''Pads 5D tensor with zeros for the depth, height, width dimension with
|
||||
"padding[0]", "padding[1]" and "padding[2]" (resp.) zeros left and right
|
||||
|
||||
For 'tf' dim_ordering, the 2nd, 3rd and 4th dimension will be padded.
|
||||
For 'th' dim_ordering, the 3rd, 4th and 5th dimension will be padded.
|
||||
'''
|
||||
if dim_ordering == 'th':
|
||||
pattern = [
|
||||
[0, 0],
|
||||
[0, 0],
|
||||
[padding[0], padding[0]],
|
||||
[padding[1], padding[1]],
|
||||
[padding[2], padding[2]]
|
||||
]
|
||||
else:
|
||||
pattern = [
|
||||
[0, 0],
|
||||
[padding[0], padding[0]],
|
||||
[padding[1], padding[1]],
|
||||
[padding[2], padding[2]],
|
||||
[0, 0]
|
||||
]
|
||||
return tf.pad(x, pattern)
|
||||
|
||||
|
||||
def pack(x):
|
||||
return tf.pack(x)
|
||||
|
||||
|
||||
def one_hot(indices, nb_classes):
|
||||
'''Input: nD integer tensor of shape (batch_size, dim1, dim2, ... dim(n-1))
|
||||
Output: (n + 1)D one hot representation of the input
|
||||
with shape (batch_size, dim1, dim2, ... dim(n-1), nb_classes)
|
||||
'''
|
||||
return tf.one_hot(indices, depth=nb_classes, axis=-1)
|
||||
|
||||
|
||||
def reverse(x, axes):
|
||||
'''Reverse a tensor along the the specified axes
|
||||
'''
|
||||
if type(axes) == int:
|
||||
axes = [axes]
|
||||
dims = [True if i in axes else False for i in range(len(x.get_shape()._dims))]
|
||||
return tf.reverse(x, dims)
|
||||
|
||||
|
||||
# VALUE MANIPULATION
|
||||
|
||||
|
||||
@@ -709,7 +892,17 @@ def set_value(x, value):
|
||||
'''Sets the value of a tensor variable,
|
||||
from a Numpy array.
|
||||
'''
|
||||
tf.assign(x, np.asarray(value)).op.run(session=get_session())
|
||||
value = np.asarray(value)
|
||||
tf_dtype = _convert_string_dtype(x.dtype.name.split('_')[0])
|
||||
if hasattr(x, '_assign_placeholder'):
|
||||
assign_placeholder = x._assign_placeholder
|
||||
assign_op = x._assign_op
|
||||
else:
|
||||
assign_placeholder = tf.placeholder(tf_dtype, shape=value.shape)
|
||||
assign_op = x.assign(assign_placeholder)
|
||||
x._assign_placeholder = assign_placeholder
|
||||
x._assign_op = assign_op
|
||||
get_session().run(assign_op, feed_dict={assign_placeholder: value})
|
||||
|
||||
|
||||
def batch_set_value(tuples):
|
||||
@@ -720,8 +913,33 @@ def batch_set_value(tuples):
|
||||
`value` should be a Numpy array.
|
||||
'''
|
||||
if tuples:
|
||||
ops = [tf.assign(x, np.asarray(value)) for x, value in tuples]
|
||||
get_session().run(ops)
|
||||
assign_ops = []
|
||||
feed_dict = {}
|
||||
for x, value in tuples:
|
||||
value = np.asarray(value)
|
||||
tf_dtype = _convert_string_dtype(x.dtype.name.split('_')[0])
|
||||
if hasattr(x, '_assign_placeholder'):
|
||||
assign_placeholder = x._assign_placeholder
|
||||
assign_op = x._assign_op
|
||||
else:
|
||||
assign_placeholder = tf.placeholder(tf_dtype, shape=value.shape)
|
||||
assign_op = x.assign(assign_placeholder)
|
||||
x._assign_placeholder = assign_placeholder
|
||||
x._assign_op = assign_op
|
||||
assign_ops.append(assign_op)
|
||||
feed_dict[assign_placeholder] = value
|
||||
get_session().run(assign_ops, feed_dict=feed_dict)
|
||||
|
||||
|
||||
def get_variable_shape(x):
|
||||
return int_shape(x)
|
||||
|
||||
|
||||
def print_tensor(x, message=''):
|
||||
'''Print the message and the tensor when evaluated and return the same
|
||||
tensor.
|
||||
'''
|
||||
return tf.Print(x, [x], message)
|
||||
|
||||
|
||||
# GRAPH MANIPULATION
|
||||
@@ -735,14 +953,22 @@ class Function(object):
|
||||
self.inputs = list(inputs)
|
||||
self.outputs = list(outputs)
|
||||
with tf.control_dependencies(self.outputs):
|
||||
self.updates = [tf.assign(p, new_p) for (p, new_p) in updates]
|
||||
updates_ops = []
|
||||
for update in updates:
|
||||
if type(update) is tuple:
|
||||
p, new_p = update
|
||||
updates_ops.append(tf.assign(p, new_p))
|
||||
else:
|
||||
# assumed already an op
|
||||
updates_ops.append(update)
|
||||
self.updates_op = tf.group(*updates_ops)
|
||||
|
||||
def __call__(self, inputs):
|
||||
assert type(inputs) in {list, tuple}
|
||||
names = [v.name for v in self.inputs]
|
||||
names = [getattr(v, 'name', None) for v in self.inputs]
|
||||
feed_dict = dict(zip(names, inputs))
|
||||
session = get_session()
|
||||
updated = session.run(self.outputs + self.updates, feed_dict=feed_dict)
|
||||
updated = session.run(self.outputs + [self.updates_op], feed_dict=feed_dict)
|
||||
return updated[:len(self.outputs)]
|
||||
|
||||
|
||||
@@ -794,10 +1020,11 @@ def rnn(step_function, inputs, initial_states,
|
||||
time step.
|
||||
states: list of tensors.
|
||||
Returns:
|
||||
output: tensor with shape (samples, ...) (no time dimension),
|
||||
output: tensor with shape (samples, output_dim) (no time dimension),
|
||||
new_states: list of tensors, same length and shapes
|
||||
as 'states'.
|
||||
initial_states: tensor with shape (samples, ...) (no time dimension),
|
||||
as 'states'. The first state in the list must be the
|
||||
output tensor at the previous timestep.
|
||||
initial_states: tensor with shape (samples, output_dim) (no time dimension),
|
||||
containing the initial values for the states used in
|
||||
the step function.
|
||||
go_backwards: boolean. If True, do the iteration over
|
||||
@@ -821,66 +1048,164 @@ def rnn(step_function, inputs, initial_states,
|
||||
the step function, of shape (samples, ...).
|
||||
'''
|
||||
ndim = len(inputs.get_shape())
|
||||
assert ndim >= 3, "Input should be at least 3D."
|
||||
assert ndim >= 3, 'Input should be at least 3D.'
|
||||
axes = [1, 0] + list(range(2, ndim))
|
||||
inputs = tf.transpose(inputs, (axes))
|
||||
input_list = tf.unpack(inputs)
|
||||
|
||||
if constants is None:
|
||||
constants = []
|
||||
|
||||
states = initial_states
|
||||
successive_states = []
|
||||
successive_outputs = []
|
||||
if go_backwards:
|
||||
input_list.reverse()
|
||||
if unroll:
|
||||
if not inputs.get_shape()[0]:
|
||||
raise Exception('Unrolling requires a fixed number of timesteps.')
|
||||
|
||||
if mask is not None:
|
||||
# Transpose not supported by bool tensor types, hence round-trip to uint8.
|
||||
mask = tf.cast(mask, tf.uint8)
|
||||
if len(mask.get_shape()) == ndim-1:
|
||||
mask = expand_dims(mask)
|
||||
mask = tf.cast(tf.transpose(mask, axes), tf.bool)
|
||||
mask_list = tf.unpack(mask)
|
||||
states = initial_states
|
||||
successive_states = []
|
||||
successive_outputs = []
|
||||
|
||||
input_list = tf.unpack(inputs)
|
||||
if go_backwards:
|
||||
input_list.reverse()
|
||||
|
||||
if mask is not None:
|
||||
# Transpose not supported by bool tensor types, hence round-trip to uint8.
|
||||
mask = tf.cast(mask, tf.uint8)
|
||||
if len(mask.get_shape()) == ndim - 1:
|
||||
mask = expand_dims(mask)
|
||||
mask = tf.cast(tf.transpose(mask, axes), tf.bool)
|
||||
mask_list = tf.unpack(mask)
|
||||
|
||||
if go_backwards:
|
||||
mask_list.reverse()
|
||||
|
||||
for input, mask_t in zip(input_list, mask_list):
|
||||
output, new_states = step_function(input, states + constants)
|
||||
|
||||
# tf.select needs its condition tensor to be the same shape as its two
|
||||
# result tensors, but in our case the condition (mask) tensor is
|
||||
# (nsamples, 1), and A and B are (nsamples, ndimensions). So we need to
|
||||
# broadcast the mask to match the shape of A and B. That's what the
|
||||
# tile call does, is just repeat the mask along its second dimension
|
||||
# ndimensions times.
|
||||
tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(output)[1]]))
|
||||
|
||||
if len(successive_outputs) == 0:
|
||||
prev_output = zeros_like(output)
|
||||
else:
|
||||
prev_output = successive_outputs[-1]
|
||||
|
||||
output = tf.select(tiled_mask_t, output, prev_output)
|
||||
|
||||
return_states = []
|
||||
for state, new_state in zip(states, new_states):
|
||||
# (see earlier comment for tile explanation)
|
||||
tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(new_state)[1]]))
|
||||
return_states.append(tf.select(tiled_mask_t, new_state, state))
|
||||
|
||||
states = return_states
|
||||
successive_outputs.append(output)
|
||||
successive_states.append(states)
|
||||
last_output = successive_outputs[-1]
|
||||
new_states = successive_states[-1]
|
||||
outputs = tf.pack(successive_outputs)
|
||||
else:
|
||||
for input in input_list:
|
||||
output, states = step_function(input, states + constants)
|
||||
successive_outputs.append(output)
|
||||
successive_states.append(states)
|
||||
last_output = successive_outputs[-1]
|
||||
new_states = successive_states[-1]
|
||||
outputs = tf.pack(successive_outputs)
|
||||
|
||||
else:
|
||||
from tensorflow.python.ops.rnn import _dynamic_rnn_loop
|
||||
|
||||
if go_backwards:
|
||||
mask_list.reverse()
|
||||
inputs = tf.reverse(inputs, [True] + [False] * (ndim - 1))
|
||||
|
||||
for input, mask_t in zip(input_list, mask_list):
|
||||
output, new_states = step_function(input, states + constants)
|
||||
states = initial_states
|
||||
nb_states = len(states)
|
||||
if nb_states == 0:
|
||||
raise Exception('No initial states provided.')
|
||||
elif nb_states == 1:
|
||||
state = states[0]
|
||||
else:
|
||||
state = tf.concat(1, states)
|
||||
|
||||
# tf.select needs its condition tensor to be the same shape as its two
|
||||
# result tensors, but in our case the condition (mask) tensor is
|
||||
# (nsamples, 1), and A and B are (nsamples, ndimensions). So we need to
|
||||
# broadcast the mask to match the shape of A and B. That's what the
|
||||
# tile call does, is just repeat the mask along its second dimension
|
||||
# ndimensions times.
|
||||
tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(output)[1]]))
|
||||
state_size = int(states[0].get_shape()[-1])
|
||||
|
||||
if len(successive_outputs) == 0:
|
||||
prev_output = zeros_like(output)
|
||||
else:
|
||||
prev_output = successive_outputs[-1]
|
||||
if mask is not None:
|
||||
if go_backwards:
|
||||
mask = tf.reverse(mask, [True] + [False] * (ndim - 1))
|
||||
|
||||
output = tf.select(tiled_mask_t, output, prev_output)
|
||||
# Transpose not supported by bool tensor types, hence round-trip to uint8.
|
||||
mask = tf.cast(mask, tf.uint8)
|
||||
if len(mask.get_shape()) == ndim - 1:
|
||||
mask = expand_dims(mask)
|
||||
mask = tf.transpose(mask, axes)
|
||||
inputs = tf.concat(2, [tf.cast(mask, inputs.dtype), inputs])
|
||||
|
||||
return_states = []
|
||||
for state, new_state in zip(states, new_states):
|
||||
# (see earlier comment for tile explanation)
|
||||
tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(new_state)[1]]))
|
||||
return_states.append(tf.select(tiled_mask_t, new_state, state))
|
||||
def _step(input, state):
|
||||
if nb_states > 1:
|
||||
states = []
|
||||
for i in range(nb_states):
|
||||
states.append(state[:, i * state_size: (i + 1) * state_size])
|
||||
else:
|
||||
states = [state]
|
||||
mask_t = tf.cast(input[:, 0], tf.bool)
|
||||
input = input[:, 1:]
|
||||
output, new_states = step_function(input, states + constants)
|
||||
|
||||
states = return_states
|
||||
successive_outputs.append(output)
|
||||
successive_states.append(states)
|
||||
else:
|
||||
for input in input_list:
|
||||
output, states = step_function(input, states + constants)
|
||||
successive_outputs.append(output)
|
||||
successive_states.append(states)
|
||||
output = tf.select(mask_t, output, states[0])
|
||||
new_states = [tf.select(mask_t, new_states[i], states[i]) for i in range(len(states))]
|
||||
|
||||
last_output = successive_outputs[-1]
|
||||
outputs = tf.pack(successive_outputs)
|
||||
new_states = successive_states[-1]
|
||||
if len(new_states) == 1:
|
||||
new_state = new_states[0]
|
||||
else:
|
||||
new_state = tf.concat(1, new_states)
|
||||
|
||||
return output, new_state
|
||||
else:
|
||||
def _step(input, state):
|
||||
if nb_states > 1:
|
||||
states = []
|
||||
for i in range(nb_states):
|
||||
states.append(state[:, i * state_size: (i + 1) * state_size])
|
||||
else:
|
||||
states = [state]
|
||||
output, new_states = step_function(input, states + constants)
|
||||
|
||||
if len(new_states) == 1:
|
||||
new_state = new_states[0]
|
||||
else:
|
||||
new_state = tf.concat(1, new_states)
|
||||
return output, new_state
|
||||
|
||||
# state size is assumed to be the same as output size
|
||||
# (always the case)
|
||||
_step.state_size = state_size * nb_states
|
||||
_step.output_size = state_size
|
||||
|
||||
(outputs, final_state) = _dynamic_rnn_loop(
|
||||
_step,
|
||||
inputs,
|
||||
state,
|
||||
parallel_iterations=32,
|
||||
swap_memory=True,
|
||||
sequence_length=None)
|
||||
|
||||
if nb_states > 1:
|
||||
new_states = []
|
||||
for i in range(nb_states):
|
||||
new_states.append(final_state[:, i * state_size: (i + 1) * state_size])
|
||||
else:
|
||||
new_states = [final_state]
|
||||
|
||||
# all this circus is to recover the last vector in the sequence.
|
||||
begin = tf.pack([tf.shape(outputs)[0] - 1] + [0] * (ndim - 1))
|
||||
size = tf.pack([1] + [-1] * (ndim - 1))
|
||||
last_output = tf.slice(outputs, begin, size)
|
||||
last_output = tf.squeeze(last_output, [0])
|
||||
|
||||
axes = [1, 0] + list(range(2, len(outputs.get_shape())))
|
||||
outputs = tf.transpose(outputs, axes)
|
||||
@@ -909,6 +1234,11 @@ def in_train_phase(x, alt):
|
||||
'''Selects `x` in train phase, and `alt` otherwise.
|
||||
Note that `alt` should have the *same shape* as `x`.
|
||||
'''
|
||||
if _LEARNING_PHASE is 1:
|
||||
return x
|
||||
elif _LEARNING_PHASE is 0:
|
||||
return alt
|
||||
# else: assume learning phase is a placeholder.
|
||||
x_shape = copy.copy(x.get_shape())
|
||||
x = tf.python.control_flow_ops.cond(tf.cast(_LEARNING_PHASE, 'bool'),
|
||||
lambda: x,
|
||||
@@ -922,6 +1252,10 @@ def in_test_phase(x, alt):
|
||||
'''Selects `x` in test phase, and `alt` otherwise.
|
||||
Note that `alt` should have the *same shape* as `x`.
|
||||
'''
|
||||
if _LEARNING_PHASE is 1:
|
||||
return alt
|
||||
elif _LEARNING_PHASE is 0:
|
||||
return x
|
||||
x_shape = copy.copy(x.get_shape())
|
||||
x = tf.python.control_flow_ops.cond(tf.cast(_LEARNING_PHASE, 'bool'),
|
||||
lambda: alt,
|
||||
@@ -940,14 +1274,16 @@ def relu(x, alpha=0., max_value=None):
|
||||
alpha: slope of negative section.
|
||||
max_value: saturation threshold.
|
||||
'''
|
||||
negative_part = tf.nn.relu(-x)
|
||||
if alpha != 0.:
|
||||
negative_part = tf.nn.relu(-x)
|
||||
x = tf.nn.relu(x)
|
||||
if max_value is not None:
|
||||
x = tf.clip_by_value(x, tf.cast(0., dtype=_FLOATX),
|
||||
tf.cast(max_value, dtype=_FLOATX))
|
||||
if isinstance(alpha, (tuple, list, np.ndarray)) or np.isscalar(alpha):
|
||||
alpha = tf.constant(alpha, dtype=_FLOATX)
|
||||
x -= alpha * negative_part
|
||||
max_value = _to_tensor(max_value, x.dtype.base_dtype)
|
||||
zero = _to_tensor(0., x.dtype.base_dtype)
|
||||
x = tf.clip_by_value(x, zero, max_value)
|
||||
if alpha != 0.:
|
||||
alpha = _to_tensor(alpha, x.dtype.base_dtype)
|
||||
x -= alpha * negative_part
|
||||
return x
|
||||
|
||||
|
||||
@@ -980,8 +1316,8 @@ def categorical_crossentropy(output, target, from_logits=False):
|
||||
reduction_indices=len(output.get_shape()) - 1,
|
||||
keep_dims=True)
|
||||
# manual computation of crossentropy
|
||||
output = tf.clip_by_value(output, tf.cast(_EPSILON, dtype=_FLOATX),
|
||||
tf.cast(1. - _EPSILON, dtype=_FLOATX))
|
||||
epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype)
|
||||
output = tf.clip_by_value(output, epsilon, 1. - epsilon)
|
||||
return - tf.reduce_sum(target * tf.log(output),
|
||||
reduction_indices=len(output.get_shape()) - 1)
|
||||
else:
|
||||
@@ -995,8 +1331,8 @@ def sparse_categorical_crossentropy(output, target, from_logits=False):
|
||||
# Note: tf.nn.softmax_cross_entropy_with_logits
|
||||
# expects logits, Keras expects probabilities.
|
||||
if not from_logits:
|
||||
output = tf.clip_by_value(output, tf.cast(_EPSILON, dtype=_FLOATX),
|
||||
tf.cast(1.-_EPSILON, dtype=_FLOATX))
|
||||
epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype)
|
||||
output = tf.clip_by_value(output, epsilon, 1 - epsilon)
|
||||
output = tf.log(output)
|
||||
|
||||
output_shape = output.get_shape()
|
||||
@@ -1017,8 +1353,8 @@ def binary_crossentropy(output, target, from_logits=False):
|
||||
# expects logits, Keras expects probabilities.
|
||||
if not from_logits:
|
||||
# transform back to logits
|
||||
output = tf.clip_by_value(output, tf.cast(_EPSILON, dtype=_FLOATX),
|
||||
tf.cast(1.-_EPSILON, dtype=_FLOATX))
|
||||
epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype)
|
||||
output = tf.clip_by_value(output, epsilon, 1 - epsilon)
|
||||
output = tf.log(output / (1 - output))
|
||||
return tf.nn.sigmoid_cross_entropy_with_logits(output, target)
|
||||
|
||||
@@ -1034,8 +1370,9 @@ def hard_sigmoid(x):
|
||||
Faster than sigmoid.
|
||||
'''
|
||||
x = (0.2 * x) + 0.5
|
||||
x = tf.clip_by_value(x, tf.cast(0., dtype=_FLOATX),
|
||||
tf.cast(1., dtype=_FLOATX))
|
||||
zero = _to_tensor(0., x.dtype.base_dtype)
|
||||
one = _to_tensor(1., x.dtype.base_dtype)
|
||||
x = tf.clip_by_value(x, zero, one)
|
||||
return x
|
||||
|
||||
|
||||
@@ -1045,14 +1382,16 @@ def tanh(x):
|
||||
return tf.nn.tanh(x)
|
||||
|
||||
|
||||
def dropout(x, level, seed=None):
|
||||
def dropout(x, level, noise_shape=None, seed=None):
|
||||
'''Sets entries in `x` to zero at random,
|
||||
while scaling the entire tensor.
|
||||
|
||||
# Arguments
|
||||
x: tensor
|
||||
level: fraction of the entries in the tensor
|
||||
that will be set to 0
|
||||
that will be set to 0.
|
||||
noise_shape: shape for randomly generated keep/drop flags,
|
||||
must be broadcastable to the shape of `x`
|
||||
seed: random seed to ensure determinism.
|
||||
'''
|
||||
retain_prob = 1. - level
|
||||
@@ -1060,7 +1399,7 @@ def dropout(x, level, seed=None):
|
||||
seed = np.random.randint(10e6)
|
||||
# the dummy 1. works around a TF bug
|
||||
# (float32_ref vs. float32 incomptability)
|
||||
return tf.nn.dropout(x * 1., retain_prob, seed=seed)
|
||||
return tf.nn.dropout(x * 1., retain_prob, noise_shape, seed=seed)
|
||||
|
||||
|
||||
def l2_normalize(x, axis):
|
||||
@@ -1073,6 +1412,12 @@ def l2_normalize(x, axis):
|
||||
|
||||
# CONVOLUTIONS
|
||||
|
||||
def _preprocess_deconv_output_shape(shape, dim_ordering):
|
||||
if dim_ordering == 'th':
|
||||
shape = (shape[0], shape[2], shape[3], shape[1])
|
||||
return shape
|
||||
|
||||
|
||||
def _preprocess_conv2d_input(x, dim_ordering):
|
||||
if _FLOATX == 'float64':
|
||||
x = tf.cast(x, 'float32')
|
||||
@@ -1198,11 +1543,12 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
output_shape = _preprocess_deconv_output_shape(output_shape, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
kernel = tf.transpose(kernel, (0, 1, 3, 2))
|
||||
padding = _preprocess_border_mode(border_mode)
|
||||
strides = (1,) + strides + (1,)
|
||||
|
||||
# TODO: pre-process output_shape if dim_ordering == th
|
||||
x = tf.nn.conv2d_transpose(x, kernel, output_shape, strides,
|
||||
padding=padding)
|
||||
return _postprocess_conv2d_output(x, dim_ordering)
|
||||
@@ -1352,3 +1698,113 @@ def random_binomial(shape, p=0.0, dtype=_FLOATX, seed=None):
|
||||
return tf.select(tf.random_uniform(shape, dtype=dtype, seed=seed) <= p,
|
||||
tf.ones(shape, dtype=dtype),
|
||||
tf.zeros(shape, dtype=dtype))
|
||||
|
||||
# CTC
|
||||
# tensorflow has a native implemenation, but it uses sparse tensors
|
||||
# and therefore requires a wrapper for Keras. The functions below convert
|
||||
# dense to sparse tensors and also wraps up the beam search code that is
|
||||
# in tensorflow's CTC implementation
|
||||
|
||||
def ctc_label_dense_to_sparse(labels, label_lengths):
|
||||
# undocumented feature soon to be made public
|
||||
from tensorflow.python.ops import functional_ops
|
||||
label_shape = tf.shape(labels)
|
||||
num_batches_tns = tf.pack([label_shape[0]])
|
||||
max_num_labels_tns = tf.pack([label_shape[1]])
|
||||
|
||||
def range_less_than(previous_state, current_input):
|
||||
return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input
|
||||
|
||||
init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
|
||||
dense_mask = functional_ops.scan(range_less_than, label_lengths,
|
||||
initializer=init, parallel_iterations=1)
|
||||
dense_mask = dense_mask[:, 0, :]
|
||||
|
||||
label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns),
|
||||
label_shape)
|
||||
label_ind = tf.boolean_mask(label_array, dense_mask)
|
||||
|
||||
batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]),
|
||||
max_num_labels_tns), tf.reverse(label_shape, [True])))
|
||||
batch_ind = tf.boolean_mask(batch_array, dense_mask)
|
||||
indices = tf.transpose(tf.reshape(tf.concat(0, [batch_ind, label_ind]), [2, -1]))
|
||||
|
||||
vals_sparse = tf.gather_nd(labels, indices)
|
||||
|
||||
return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))
|
||||
|
||||
|
||||
def ctc_batch_cost(y_true, y_pred, input_length, label_length):
|
||||
|
||||
'''Runs CTC loss algorithm on each batch element.
|
||||
|
||||
# Arguments
|
||||
y_true: tensor (samples, max_string_length) containing the truth labels
|
||||
y_pred: tensor (samples, time_steps, num_categories) containing the prediction,
|
||||
or output of the softmax
|
||||
input_length: tensor (samples,1) containing the sequence length for
|
||||
each batch item in y_pred
|
||||
label_length: tensor (samples,1) containing the sequence length for
|
||||
each batch item in y_true
|
||||
|
||||
# Returns
|
||||
Tensor with shape (samples,1) containing the
|
||||
CTC loss of each element
|
||||
'''
|
||||
label_length = tf.to_int32(tf.squeeze(label_length))
|
||||
input_length = tf.to_int32(tf.squeeze(input_length))
|
||||
sparse_labels = tf.to_int32(ctc_label_dense_to_sparse(y_true, label_length))
|
||||
|
||||
y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-8)
|
||||
|
||||
return tf.expand_dims(tf.contrib.ctc.ctc_loss(inputs=y_pred,
|
||||
labels=sparse_labels,
|
||||
sequence_length=input_length), 1)
|
||||
|
||||
|
||||
def ctc_decode(y_pred, input_length, greedy=True, beam_width=None,
|
||||
dict_seq_lens=None, dict_values=None):
|
||||
'''Decodes the output of a softmax using either
|
||||
greedy (also known as best path) or a constrained dictionary
|
||||
search.
|
||||
|
||||
# Arguments
|
||||
y_pred: tensor (samples, time_steps, num_categories) containing the prediction,
|
||||
or output of the softmax
|
||||
input_length: tensor (samples,1) containing the sequence length for
|
||||
each batch item in y_pred
|
||||
greedy: perform much faster best-path search if true. This does
|
||||
not use a dictionary
|
||||
beam_width: if greedy is false and this value is not none, then
|
||||
the constrained dictionary search uses a beam of this width
|
||||
dict_seq_lens: the length of each element in the dict_values list
|
||||
dict_values: list of lists representing the dictionary.
|
||||
|
||||
# Returns
|
||||
Tensor with shape (samples,time_steps,num_categories) containing the
|
||||
path probabilities (in softmax output format). Note that a function that
|
||||
pulls out the argmax and collapses blank labels is still needed.
|
||||
'''
|
||||
y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-8)
|
||||
input_length = tf.to_int32(tf.squeeze(input_length))
|
||||
|
||||
if greedy:
|
||||
(decoded, log_prob) = tf.contrib.ctc.ctc_greedy_decoder(
|
||||
inputs=y_pred,
|
||||
sequence_length=input_length)
|
||||
else:
|
||||
if beam_width is not None:
|
||||
(decoded, log_prob) = tf.contrib.ctc.ctc_beam_search_decoder(
|
||||
inputs=y_pred,
|
||||
sequence_length=input_length,
|
||||
dict_seq_lens=dict_seq_lens, dict_values=dict_values)
|
||||
else:
|
||||
(decoded, log_prob) = tf.contrib.ctc.ctc_beam_search_decoder(
|
||||
inputs=y_pred,
|
||||
sequence_length=input_length, beam_width=beam_width,
|
||||
dict_seq_lens=dict_seq_lens, dict_values=dict_values)
|
||||
|
||||
decoded_dense = [tf.sparse_to_dense(st.indices, st.shape, st.values, default_value=-1)
|
||||
for st in decoded]
|
||||
|
||||
return (decoded_dense, log_prob)
|
||||
|
||||
@@ -3,6 +3,7 @@ from theano import tensor as T
|
||||
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
|
||||
from theano.tensor.signal import pool
|
||||
from theano.tensor.nnet import conv3d2d
|
||||
from theano.printing import Print
|
||||
try:
|
||||
from theano.tensor.nnet.nnet import softsign as T_softsign
|
||||
except ImportError:
|
||||
@@ -22,6 +23,14 @@ def learning_phase():
|
||||
return _LEARNING_PHASE
|
||||
|
||||
|
||||
def set_learning_phase(value):
|
||||
global _LEARNING_PHASE
|
||||
if value not in {0, 1}:
|
||||
raise ValueError('Expected learning phase to be '
|
||||
'0 or 1.')
|
||||
_LEARNING_PHASE = value
|
||||
|
||||
|
||||
# VARIABLE MANIPULATION
|
||||
|
||||
def variable(value, dtype=_FLOATX, name=None):
|
||||
@@ -97,6 +106,16 @@ def zeros_like(x):
|
||||
return T.zeros_like(x)
|
||||
|
||||
|
||||
def random_uniform_variable(shape, low, high, dtype=_FLOATX, name=None):
|
||||
return variable(np.random.uniform(low=low, high=high, size=shape),
|
||||
dtype=dtype, name=name)
|
||||
|
||||
|
||||
def random_normal_variable(shape, mean, scale, dtype=_FLOATX, name=None):
|
||||
return variable(np.random.normal(loc=0.0, scale=scale, size=shape),
|
||||
dtype=dtype, name=name)
|
||||
|
||||
|
||||
def count_params(x):
|
||||
'''Return number of scalars in a tensor.
|
||||
|
||||
@@ -109,6 +128,25 @@ def cast(x, dtype):
|
||||
return T.cast(x, dtype)
|
||||
|
||||
|
||||
# UPDATES OPS
|
||||
|
||||
|
||||
def update(x, new_x):
|
||||
return (x, new_x)
|
||||
|
||||
|
||||
def update_add(x, increment):
|
||||
return (x, x + increment)
|
||||
|
||||
|
||||
def update_sub(x, decrement):
|
||||
return (x, x - decrement)
|
||||
|
||||
|
||||
def moving_average_update(variable, value, momentum):
|
||||
return (variable, variable * momentum + value * (1. - momentum))
|
||||
|
||||
|
||||
# LINEAR ALGEBRA
|
||||
|
||||
'''
|
||||
@@ -122,24 +160,42 @@ def dot(x, y):
|
||||
|
||||
|
||||
def batch_dot(x, y, axes=None):
|
||||
'''batchwise dot product
|
||||
'''Batchwise dot product.
|
||||
|
||||
batch_dot results in a tensor with less dimensions than the input.
|
||||
If the number of dimensions is reduced to 1, we use `expand_dims` to
|
||||
make sure that ndim is at least 2.
|
||||
|
||||
# Example
|
||||
Assume x = [[1, 2], [3, 4]] and y = [[5, 6], [7, 8]]
|
||||
batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal
|
||||
of x.dot(y.T), although we never have to calculate the off-diagonal
|
||||
elements.
|
||||
|
||||
|
||||
# Arguments
|
||||
x, y: tensors with ndim >= 2
|
||||
axes: list (or single) int with target dimensions
|
||||
|
||||
# Returns
|
||||
Tensor with ndim >= 2
|
||||
A tensor with shape equal to the concatenation of x's shape
|
||||
(less the dimension that was summed over) and y's shape
|
||||
(less the batch dimension and the dimension that was summed over).
|
||||
If the final rank is 1, we reshape it to (batch_size, 1).
|
||||
|
||||
# Examples
|
||||
Assume x = [[1, 2], [3, 4]] and y = [[5, 6], [7, 8]]
|
||||
batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal
|
||||
of x.dot(y.T), although we never have to calculate the off-diagonal
|
||||
elements.
|
||||
|
||||
Shape inference:
|
||||
Let x's shape be (100, 20) and y's shape be (100, 30, 20).
|
||||
If dot_axes is (1, 2), to find the output shape of resultant tensor,
|
||||
loop through each dimension in x's shape and y's shape:
|
||||
x.shape[0] : 100 : append to output shape
|
||||
x.shape[1] : 20 : do not append to output shape,
|
||||
dimension 1 of x has been summed over. (dot_axes[0] = 1)
|
||||
y.shape[0] : 100 : do not append to output shape,
|
||||
always ignore first dimension of y
|
||||
y.shape[1] : 30 : append to output shape
|
||||
y.shape[2] : 20 : do not append to output shape,
|
||||
dimension 2 of y has been summed over. (dot_axes[1] = 2)
|
||||
|
||||
output_shape = (100, 30)
|
||||
'''
|
||||
if type(axes) == int:
|
||||
axes = (axes, axes)
|
||||
@@ -270,6 +326,22 @@ def not_equal(x, y):
|
||||
return T.neq(x, y)
|
||||
|
||||
|
||||
def greater(x, y):
|
||||
return T.gt(x, y)
|
||||
|
||||
|
||||
def greater_equal(x, y):
|
||||
return T.ge(x, y)
|
||||
|
||||
|
||||
def lesser(x, y):
|
||||
return T.lt(x, y)
|
||||
|
||||
|
||||
def lesser_equal(x, y):
|
||||
return T.le(x, y)
|
||||
|
||||
|
||||
def maximum(x, y):
|
||||
return T.maximum(x, y)
|
||||
|
||||
@@ -290,7 +362,7 @@ def normalize_batch_in_training(x, gamma, beta,
|
||||
reduction_axes, epsilon=0.0001):
|
||||
'''Compute mean and std for batch then apply batch_normalization on batch.
|
||||
'''
|
||||
std = T.sqrt(x.var(reduction_axes) + epsilon)
|
||||
var = x.var(reduction_axes)
|
||||
mean = x.mean(reduction_axes)
|
||||
|
||||
target_shape = []
|
||||
@@ -302,20 +374,26 @@ def normalize_batch_in_training(x, gamma, beta,
|
||||
target_shape = T.stack(*target_shape)
|
||||
|
||||
broadcast_mean = T.reshape(mean, target_shape)
|
||||
broadcast_std = T.reshape(std, target_shape)
|
||||
broadcast_var = T.reshape(var, target_shape)
|
||||
broadcast_beta = T.reshape(beta, target_shape)
|
||||
broadcast_gamma = T.reshape(gamma, target_shape)
|
||||
normed = batch_normalization(x, broadcast_mean, broadcast_std,
|
||||
normed = batch_normalization(x, broadcast_mean, broadcast_var,
|
||||
broadcast_beta, broadcast_gamma,
|
||||
epsilon)
|
||||
return normed, mean, std
|
||||
return normed, mean, var
|
||||
|
||||
|
||||
def batch_normalization(x, mean, std, beta, gamma, epsilon=0.0001):
|
||||
'''Apply batch normalization on x given mean, std, beta and gamma.
|
||||
def batch_normalization(x, mean, var, beta, gamma, epsilon=0.0001):
|
||||
'''Apply batch normalization on x given mean, var, beta and gamma.
|
||||
'''
|
||||
normed = (x - mean) * (gamma * T.inv(std + epsilon)) + beta
|
||||
return normed
|
||||
if theano.config.device.startswith('cuda') or theano.config.device.startswith('gpu'):
|
||||
try:
|
||||
return theano.sandbox.cuda.dnn.dnn_batch_normalization_test(x, gamma, beta, mean, var,
|
||||
'spatial', epsilon)
|
||||
except AttributeError:
|
||||
pass
|
||||
return T.nnet.bn.batch_normalization(x, gamma, beta, mean, sqrt(var + epsilon),
|
||||
mode='high_mem')
|
||||
|
||||
|
||||
# SHAPE OPERATIONS
|
||||
@@ -430,11 +508,9 @@ def expand_dims(x, dim=-1):
|
||||
def squeeze(x, axis):
|
||||
'''Remove a 1-dimension from the tensor at index "axis".
|
||||
'''
|
||||
broadcastable = x.broadcastable[:axis] + x.broadcastable[axis+1:]
|
||||
x = T.patternbroadcast(x, [i == axis for i in range(x.type.ndim)])
|
||||
x = T.squeeze(x)
|
||||
x = T.patternbroadcast(x, broadcastable)
|
||||
return x
|
||||
shape = list(x.shape)
|
||||
shape.pop(axis)
|
||||
return T.reshape(x, tuple(shape))
|
||||
|
||||
|
||||
def temporal_padding(x, padding=1):
|
||||
@@ -521,6 +597,28 @@ def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering='th'):
|
||||
def pack(x):
|
||||
return T.stack(*x)
|
||||
|
||||
|
||||
def one_hot(indices, nb_classes):
|
||||
'''Input: nD integer tensor of shape (batch_size, dim1, dim2, ... dim(n-1))
|
||||
Output: (n + 1)D one hot representation of the input
|
||||
with shape (batch_size, dim1, dim2, ... dim(n-1), nb_classes)
|
||||
'''
|
||||
input_shape = tuple((indices.shape[i] for i in range(indices.ndim)))
|
||||
indices = T.flatten(indices)
|
||||
oh = T.extra_ops.to_one_hot(indices, nb_classes)
|
||||
oh = T.reshape(oh, input_shape + (nb_classes,))
|
||||
return oh
|
||||
|
||||
|
||||
def reverse(x, axes):
|
||||
'''Reverse a tensor along the the specified axes
|
||||
'''
|
||||
if type(axes) == int:
|
||||
axes = [axes]
|
||||
slices = [slice(None, None, -1) if i in axes else slice(None, None, None) for i in range(x.ndim)]
|
||||
return x[slices]
|
||||
|
||||
|
||||
# VALUE MANIPULATION
|
||||
|
||||
|
||||
@@ -547,6 +645,18 @@ def batch_set_value(tuples):
|
||||
x.set_value(np.asarray(value, dtype=x.dtype))
|
||||
|
||||
|
||||
def get_variable_shape(x):
|
||||
return x.get_value().shape
|
||||
|
||||
|
||||
def print_tensor(x, message=''):
|
||||
'''Print the message and the tensor when evaluated and return the same
|
||||
tensor.
|
||||
'''
|
||||
p_op = Print(message)
|
||||
return p_op(x)
|
||||
|
||||
|
||||
# GRAPH MANIPULATION
|
||||
|
||||
class Function(object):
|
||||
@@ -554,7 +664,7 @@ class Function(object):
|
||||
def __init__(self, inputs, outputs, updates=[], **kwargs):
|
||||
self.function = theano.function(inputs, outputs, updates=updates,
|
||||
allow_input_downcast=True,
|
||||
on_unused_input='warn',
|
||||
on_unused_input='ignore',
|
||||
**kwargs)
|
||||
|
||||
def __call__(self, inputs):
|
||||
@@ -755,12 +865,20 @@ def switch(condition, then_expression, else_expression):
|
||||
|
||||
|
||||
def in_train_phase(x, alt):
|
||||
if _LEARNING_PHASE is 1:
|
||||
return x
|
||||
elif _LEARNING_PHASE is 0:
|
||||
return alt
|
||||
x = T.switch(_LEARNING_PHASE, x, alt)
|
||||
x._uses_learning_phase = True
|
||||
return x
|
||||
|
||||
|
||||
def in_test_phase(x, alt):
|
||||
if _LEARNING_PHASE is 1:
|
||||
return alt
|
||||
elif _LEARNING_PHASE is 0:
|
||||
return x
|
||||
x = T.switch(_LEARNING_PHASE, alt, x)
|
||||
x._uses_learning_phase = True
|
||||
return x
|
||||
@@ -829,14 +947,33 @@ def tanh(x):
|
||||
return T.tanh(x)
|
||||
|
||||
|
||||
def dropout(x, level, seed=None):
|
||||
def dropout(x, level, noise_shape=None, seed=None):
|
||||
'''Sets entries in `x` to zero at random,
|
||||
while scaling the entire tensor.
|
||||
|
||||
# Arguments
|
||||
x: tensor
|
||||
level: fraction of the entries in the tensor
|
||||
that will be set to 0.
|
||||
noise_shape: shape for randomly generated keep/drop flags,
|
||||
must be broadcastable to the shape of `x`
|
||||
seed: random seed to ensure determinism.
|
||||
'''
|
||||
if level < 0. or level >= 1:
|
||||
raise Exception('Dropout level must be in interval [0, 1[.')
|
||||
if seed is None:
|
||||
seed = np.random.randint(1, 10e6)
|
||||
|
||||
rng = RandomStreams(seed=seed)
|
||||
retain_prob = 1. - level
|
||||
x *= rng.binomial(x.shape, p=retain_prob, dtype=x.dtype)
|
||||
|
||||
if noise_shape is None:
|
||||
random_tensor = rng.binomial(x.shape, p=retain_prob, dtype=x.dtype)
|
||||
else:
|
||||
random_tensor = rng.binomial(noise_shape, p=retain_prob, dtype=x.dtype)
|
||||
random_tensor = T.patternbroadcast(random_tensor, [dim == 1 for dim in noise_shape])
|
||||
|
||||
x *= random_tensor
|
||||
x /= retain_prob
|
||||
return x
|
||||
|
||||
@@ -848,6 +985,79 @@ def l2_normalize(x, axis):
|
||||
|
||||
# CONVOLUTIONS
|
||||
|
||||
def _preprocess_conv2d_input(x, dim_ordering):
|
||||
if dim_ordering == 'tf':
|
||||
# TF uses the last dimension as channel dimension,
|
||||
# instead of the 2nd one.
|
||||
# TH input shape: (samples, input_depth, rows, cols)
|
||||
# TF input shape: (samples, rows, cols, input_depth)
|
||||
x = x.dimshuffle((0, 3, 1, 2))
|
||||
return x
|
||||
|
||||
|
||||
def _preprocess_conv2d_kernel(kernel, dim_ordering):
|
||||
if dim_ordering == 'tf':
|
||||
# TF uses the last dimension as channel dimension,
|
||||
# instead of the 2nd one.
|
||||
# TH kernel shape: (depth, input_depth, rows, cols)
|
||||
# TF kernel shape: (rows, cols, input_depth, depth)
|
||||
kernel = kernel.dimshuffle((3, 2, 0, 1))
|
||||
return kernel
|
||||
|
||||
|
||||
def _preprocess_border_mode(border_mode):
|
||||
if border_mode == 'same':
|
||||
th_border_mode = 'half'
|
||||
elif border_mode == 'valid':
|
||||
th_border_mode = 'valid'
|
||||
else:
|
||||
raise Exception('Border mode not supported: ' + str(border_mode))
|
||||
return th_border_mode
|
||||
|
||||
|
||||
def _preprocess_image_shape(dim_ordering, image_shape):
|
||||
# Theano might not accept long type
|
||||
def int_or_none(value):
|
||||
try:
|
||||
return int(value)
|
||||
except TypeError:
|
||||
return None
|
||||
if dim_ordering == 'tf':
|
||||
if image_shape:
|
||||
image_shape = (image_shape[0], image_shape[3],
|
||||
image_shape[1], image_shape[2])
|
||||
if image_shape is not None:
|
||||
image_shape = tuple(int_or_none(v) for v in image_shape)
|
||||
return image_shape
|
||||
|
||||
|
||||
def _preprocess_filter_shape(dim_ordering, filter_shape):
|
||||
# Theano might not accept long type
|
||||
def int_or_none(value):
|
||||
try:
|
||||
return int(value)
|
||||
except TypeError:
|
||||
return None
|
||||
if dim_ordering == 'tf':
|
||||
if filter_shape:
|
||||
filter_shape = (filter_shape[3], filter_shape[2],
|
||||
filter_shape[0], filter_shape[1])
|
||||
if filter_shape is not None:
|
||||
filter_shape = tuple(int_or_none(v) for v in filter_shape)
|
||||
return filter_shape
|
||||
|
||||
|
||||
def _postprocess_conv2d_output(conv_out, x, border_mode, np_kernel, strides, dim_ordering):
|
||||
if border_mode == 'same':
|
||||
if np_kernel.shape[2] % 2 == 0:
|
||||
conv_out = conv_out[:, :, :(x.shape[2] + strides[0] - 1) // strides[0], :]
|
||||
if np_kernel.shape[3] % 2 == 0:
|
||||
conv_out = conv_out[:, :, :, :(x.shape[3] + strides[1] - 1) // strides[1]]
|
||||
if dim_ordering == 'tf':
|
||||
conv_out = conv_out.dimshuffle((0, 2, 3, 1))
|
||||
return conv_out
|
||||
|
||||
|
||||
def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING, image_shape=None,
|
||||
filter_shape=None, filter_dilation=(1, 1)):
|
||||
@@ -864,42 +1074,12 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
if dim_ordering == 'tf':
|
||||
# TF uses the last dimension as channel dimension,
|
||||
# instead of the 2nd one.
|
||||
# TH input shape: (samples, input_depth, rows, cols)
|
||||
# TF input shape: (samples, rows, cols, input_depth)
|
||||
# TH kernel shape: (depth, input_depth, rows, cols)
|
||||
# TF kernel shape: (rows, cols, input_depth, depth)
|
||||
x = x.dimshuffle((0, 3, 1, 2))
|
||||
kernel = kernel.dimshuffle((3, 2, 0, 1))
|
||||
if image_shape:
|
||||
image_shape = (image_shape[0], image_shape[3],
|
||||
image_shape[1], image_shape[2])
|
||||
if filter_shape:
|
||||
filter_shape = (filter_shape[3], filter_shape[2],
|
||||
filter_shape[0], filter_shape[1])
|
||||
|
||||
if border_mode == 'same':
|
||||
th_border_mode = 'half'
|
||||
np_kernel = kernel.eval()
|
||||
elif border_mode == 'valid':
|
||||
th_border_mode = 'valid'
|
||||
else:
|
||||
raise Exception('Border mode not supported: ' + str(border_mode))
|
||||
|
||||
# Theano might not accept long type
|
||||
def int_or_none(value):
|
||||
try:
|
||||
return int(value)
|
||||
except TypeError:
|
||||
return None
|
||||
|
||||
if image_shape is not None:
|
||||
image_shape = tuple(int_or_none(v) for v in image_shape)
|
||||
|
||||
if filter_shape is not None:
|
||||
filter_shape = tuple(int_or_none(v) for v in filter_shape)
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
th_border_mode = _preprocess_border_mode(border_mode)
|
||||
np_kernel = kernel.eval()
|
||||
image_shape = _preprocess_image_shape(dim_ordering, image_shape)
|
||||
filter_shape = _preprocess_filter_shape(dim_ordering, filter_shape)
|
||||
|
||||
# TODO: remove the if statement when theano with no filter dilation is deprecated.
|
||||
if filter_dilation == (1, 1):
|
||||
@@ -916,14 +1096,8 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
filter_shape=filter_shape,
|
||||
filter_dilation=filter_dilation)
|
||||
|
||||
if border_mode == 'same':
|
||||
if np_kernel.shape[2] % 2 == 0:
|
||||
conv_out = conv_out[:, :, :(x.shape[2] + strides[0] - 1) // strides[0], :]
|
||||
if np_kernel.shape[3] % 2 == 0:
|
||||
conv_out = conv_out[:, :, :, :(x.shape[3] + strides[1] - 1) // strides[1]]
|
||||
|
||||
if dim_ordering == 'tf':
|
||||
conv_out = conv_out.dimshuffle((0, 2, 3, 1))
|
||||
conv_out = _postprocess_conv2d_output(conv_out, x, border_mode, np_kernel,
|
||||
strides, dim_ordering)
|
||||
return conv_out
|
||||
|
||||
|
||||
@@ -931,7 +1105,38 @@ def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
image_shape=None, filter_shape=None):
|
||||
raise NotImplementedError
|
||||
'''2D deconvolution (transposed convolution).
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
output_shape: desired dimensions of output.
|
||||
strides: strides tuple.
|
||||
border_mode: string, "same" or "valid".
|
||||
dim_ordering: "tf" or "th".
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
flip_filters = False
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
kernel = kernel.dimshuffle((1, 0, 2, 3))
|
||||
th_border_mode = _preprocess_border_mode(border_mode)
|
||||
np_kernel = kernel.eval()
|
||||
filter_shape = _preprocess_filter_shape(dim_ordering, filter_shape)
|
||||
|
||||
op = T.nnet.abstract_conv.AbstractConv2d_gradInputs(imshp=output_shape,
|
||||
kshp=filter_shape,
|
||||
subsample=strides,
|
||||
border_mode=th_border_mode,
|
||||
filter_flip=not flip_filters)
|
||||
conv_out = op(kernel, x, output_shape[2:])
|
||||
|
||||
conv_out = _postprocess_conv2d_output(conv_out, x, border_mode, np_kernel,
|
||||
strides, dim_ordering)
|
||||
return conv_out
|
||||
|
||||
|
||||
def atrous_conv2d(x, kernel, rate=1,
|
||||
@@ -1131,3 +1336,105 @@ def random_binomial(shape, p=0.0, dtype=_FLOATX, seed=None):
|
||||
seed = np.random.randint(1, 10e6)
|
||||
rng = RandomStreams(seed=seed)
|
||||
return rng.binomial(shape, p=p, dtype=dtype)
|
||||
|
||||
# Theano implementation of CTC
|
||||
# Used with permission from Shawn Tan
|
||||
# https://github.com/shawntan/
|
||||
# Note that tensorflow's native CTC code is significantly
|
||||
# faster than this
|
||||
|
||||
def ctc_interleave_blanks(Y):
|
||||
Y_ = T.alloc(-1, Y.shape[0] * 2 + 1)
|
||||
Y_ = T.set_subtensor(Y_[T.arange(Y.shape[0]) * 2 + 1], Y)
|
||||
return Y_
|
||||
|
||||
def ctc_create_skip_idxs(Y):
|
||||
skip_idxs = T.arange((Y.shape[0] - 3) // 2) * 2 + 1
|
||||
non_repeats = T.neq(Y[skip_idxs], Y[skip_idxs + 2])
|
||||
return skip_idxs[non_repeats.nonzero()]
|
||||
|
||||
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
|
||||
active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
|
||||
active_next = T.cast(T.minimum(
|
||||
T.maximum(
|
||||
active + 1,
|
||||
T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
|
||||
), log_p_curr.shape[0]), 'int32')
|
||||
|
||||
common_factor = T.max(log_p_prev[:active])
|
||||
p_prev = T.exp(log_p_prev[:active] - common_factor)
|
||||
_p_prev = zeros[:active_next]
|
||||
# copy over
|
||||
_p_prev = T.set_subtensor(_p_prev[:active], p_prev)
|
||||
# previous transitions
|
||||
_p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
|
||||
# skip transitions
|
||||
_p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
|
||||
updated_log_p_prev = T.log(_p_prev) + common_factor
|
||||
|
||||
log_p_next = T.set_subtensor(
|
||||
zeros[:active_next],
|
||||
log_p_curr[:active_next] + updated_log_p_prev
|
||||
)
|
||||
return active_next, log_p_next
|
||||
|
||||
def ctc_path_probs(predict, Y, alpha=1e-4):
|
||||
smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
|
||||
L = T.log(smoothed_predict)
|
||||
zeros = T.zeros_like(L[0])
|
||||
base = T.set_subtensor(zeros[:1], np.float32(1))
|
||||
log_first = zeros
|
||||
|
||||
f_skip_idxs = ctc_create_skip_idxs(Y)
|
||||
b_skip_idxs = ctc_create_skip_idxs(Y[::-1]) # there should be a shortcut to calculating this
|
||||
|
||||
def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev):
|
||||
f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev)
|
||||
b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev)
|
||||
return f_active_next, log_f_next, b_active_next, log_b_next
|
||||
|
||||
[f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan(
|
||||
step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first])
|
||||
|
||||
idxs = T.arange(L.shape[1]).dimshuffle('x', 0)
|
||||
mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1]
|
||||
log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
|
||||
return log_probs, mask
|
||||
|
||||
def ctc_cost(predict, Y):
|
||||
log_probs, mask = ctc_path_probs(predict, ctc_interleave_blanks(Y))
|
||||
common_factor = T.max(log_probs)
|
||||
total_log_prob = T.log(T.sum(T.exp(log_probs - common_factor)[mask.nonzero()])) + common_factor
|
||||
return -total_log_prob
|
||||
|
||||
# batchifies original CTC code
|
||||
def ctc_batch_cost(y_true, y_pred, input_length, label_length):
|
||||
'''Runs CTC loss algorithm on each batch element.
|
||||
|
||||
# Arguments
|
||||
y_true: tensor (samples, max_string_length) containing the truth labels
|
||||
y_pred: tensor (samples, time_steps, num_categories) containing the prediction,
|
||||
or output of the softmax
|
||||
input_length: tensor (samples,1) containing the sequence length for
|
||||
each batch item in y_pred
|
||||
label_length: tensor (samples,1) containing the sequence length for
|
||||
each batch item in y_true
|
||||
|
||||
# Returns
|
||||
Tensor with shape (samples,1) containing the
|
||||
CTC loss of each element
|
||||
'''
|
||||
|
||||
def ctc_step(y_true_step, y_pred_step, input_length_step, label_length_step):
|
||||
y_pred_step = y_pred_step[0: input_length_step[0]]
|
||||
y_true_step = y_true_step[0:label_length_step[0]]
|
||||
return ctc_cost(y_pred_step, y_true_step)
|
||||
|
||||
ret, _ = theano.scan(
|
||||
fn = ctc_step,
|
||||
outputs_info=None,
|
||||
sequences=[y_true, y_pred, input_length, label_length]
|
||||
)
|
||||
|
||||
ret = ret.dimshuffle('x', 0)
|
||||
return ret
|
||||
|
||||
+26
-12
@@ -9,6 +9,7 @@ import warnings
|
||||
from collections import deque
|
||||
from .utils.generic_utils import Progbar
|
||||
from keras import backend as K
|
||||
from pkg_resources import parse_version
|
||||
|
||||
|
||||
class CallbackList(object):
|
||||
@@ -212,6 +213,7 @@ class History(Callback):
|
||||
for k, v in logs.items():
|
||||
self.history.setdefault(k, []).append(v)
|
||||
|
||||
|
||||
class ModelCheckpoint(Callback):
|
||||
'''Save the model after every epoch.
|
||||
|
||||
@@ -229,25 +231,29 @@ class ModelCheckpoint(Callback):
|
||||
verbose: verbosity mode, 0 or 1.
|
||||
save_best_only: if `save_best_only=True`,
|
||||
the latest best model according to
|
||||
the validation loss will not be overwritten.
|
||||
the quantity monitored will not be overwritten.
|
||||
mode: one of {auto, min, max}.
|
||||
If `save_best_only=True`, the decision
|
||||
to overwrite the current save file is made
|
||||
based on either the maximization or the
|
||||
minization of the monitored. For `val_acc`,
|
||||
minimization of the monitored quantity. For `val_acc`,
|
||||
this should be `max`, for `val_loss` this should
|
||||
be `min`, etc. In `auto` mode, the direction is
|
||||
automatically inferred from the name of the monitored quantity.
|
||||
save_weights_only: if True, then only the model's weights will be
|
||||
saved (`model.save_weights(filepath)`), else the full model
|
||||
is saved (`model.save(filepath)`).
|
||||
|
||||
'''
|
||||
def __init__(self, filepath, monitor='val_loss', verbose=0,
|
||||
save_best_only=False, mode='auto'):
|
||||
|
||||
save_best_only=False, save_weights_only=False,
|
||||
mode='auto'):
|
||||
super(ModelCheckpoint, self).__init__()
|
||||
self.monitor = monitor
|
||||
self.verbose = verbose
|
||||
self.filepath = filepath
|
||||
self.save_best_only = save_best_only
|
||||
self.save_weights_only = save_weights_only
|
||||
|
||||
if mode not in ['auto', 'min', 'max']:
|
||||
warnings.warn('ModelCheckpoint mode %s is unknown, '
|
||||
@@ -284,7 +290,10 @@ class ModelCheckpoint(Callback):
|
||||
% (epoch, self.monitor, self.best,
|
||||
current, filepath))
|
||||
self.best = current
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
if self.save_weights_only:
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
else:
|
||||
self.model.save(filepath, overwrite=True)
|
||||
else:
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: %s did not improve' %
|
||||
@@ -292,7 +301,10 @@ class ModelCheckpoint(Callback):
|
||||
else:
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: saving model to %s' % (epoch, filepath))
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
if self.save_weights_only:
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
else:
|
||||
self.model.save(filepath, overwrite=True)
|
||||
|
||||
|
||||
class EarlyStopping(Callback):
|
||||
@@ -319,7 +331,8 @@ class EarlyStopping(Callback):
|
||||
|
||||
if mode not in ['auto', 'min', 'max']:
|
||||
warnings.warn('EarlyStopping mode %s is unknown, '
|
||||
'fallback to auto mode.' % (self.mode), RuntimeWarning)
|
||||
'fallback to auto mode.' % (self.mode),
|
||||
RuntimeWarning)
|
||||
mode = 'auto'
|
||||
|
||||
if mode == 'min':
|
||||
@@ -361,8 +374,8 @@ class RemoteMonitor(Callback):
|
||||
# Arguments
|
||||
root: root url to which the events will be sent (at the end
|
||||
of every epoch). Events are sent to
|
||||
`root + '/publish/epoch/end/'` by default. Calls are
|
||||
HTTP POST, with a `data` argument which is a
|
||||
`root + '/publish/epoch/end/'` by default. Calls are
|
||||
HTTP POST, with a `data` argument which is a
|
||||
JSON-encoded dictionary of event data.
|
||||
'''
|
||||
|
||||
@@ -433,8 +446,9 @@ class TensorBoard(Callback):
|
||||
histogram_freq: frequency (in epochs) at which to compute activation
|
||||
histograms for the layers of the model. If set to 0,
|
||||
histograms won't be computed.
|
||||
write_graph: whether to visualize the graph in Tensorboard. The log file can
|
||||
become quite large when write_graph is set to True.
|
||||
write_graph: whether to visualize the graph in Tensorboard.
|
||||
The log file can become quite large when
|
||||
write_graph is set to True.
|
||||
'''
|
||||
|
||||
def __init__(self, log_dir='./logs', histogram_freq=0, write_graph=True):
|
||||
@@ -465,7 +479,7 @@ class TensorBoard(Callback):
|
||||
layer.output)
|
||||
self.merged = tf.merge_all_summaries()
|
||||
if self.write_graph:
|
||||
if tf.__version__ >= '0.8.0':
|
||||
if parse_version(tf.__version__) >= parse_version('0.8.0'):
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph)
|
||||
else:
|
||||
|
||||
+58
-11
@@ -4,26 +4,58 @@ import gzip
|
||||
from ..utils.data_utils import get_file
|
||||
from six.moves import zip
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
|
||||
def load_data(path="imdb.pkl", nb_words=None, skip_top=0,
|
||||
maxlen=None, test_split=0.2, seed=113,
|
||||
def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0,
|
||||
maxlen=None, seed=113,
|
||||
start_char=1, oov_char=2, index_from=3):
|
||||
'''
|
||||
# Arguments
|
||||
path: where to store the data (in `/.keras/dataset`)
|
||||
nb_words: max number of words to include. Words are ranked
|
||||
by how often they occur (in the training set) and only
|
||||
the most frequent words are kept
|
||||
skip_top: skip the top N most frequently occuring words
|
||||
(which may not be informative).
|
||||
maxlen: truncate sequences after this length.
|
||||
seed: random seed for sample shuffling.
|
||||
start_char: The start of a sequence will be marked with this character.
|
||||
Set to 1 because 0 is usually the padding character.
|
||||
oov_char: words that were cut out because of the `nb_words`
|
||||
or `skip_top` limit will be replaced with this character.
|
||||
index_from: index actual words with this index and higher.
|
||||
|
||||
path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/imdb.pkl")
|
||||
Note that the 'out of vocabulary' character is only used for
|
||||
words that were present in the training set but are not included
|
||||
because they're not making the `nb_words` cut here.
|
||||
Words that were not seen in the trining set but are in the test set
|
||||
have simply been skipped.
|
||||
'''
|
||||
path = get_file(path,
|
||||
origin='https://s3.amazonaws.com/text-datasets/imdb_full.pkl',
|
||||
md5_hash='d091312047c43cf9e4e38fef92437263')
|
||||
|
||||
if path.endswith(".gz"):
|
||||
if path.endswith('.gz'):
|
||||
f = gzip.open(path, 'rb')
|
||||
else:
|
||||
f = open(path, 'rb')
|
||||
|
||||
X, labels = cPickle.load(f)
|
||||
(x_train, labels_train), (x_test, labels_test) = cPickle.load(f)
|
||||
f.close()
|
||||
|
||||
np.random.seed(seed)
|
||||
np.random.shuffle(X)
|
||||
np.random.shuffle(x_train)
|
||||
np.random.seed(seed)
|
||||
np.random.shuffle(labels)
|
||||
np.random.shuffle(labels_train)
|
||||
|
||||
np.random.seed(seed * 2)
|
||||
np.random.shuffle(x_test)
|
||||
np.random.seed(seed * 2)
|
||||
np.random.shuffle(labels_test)
|
||||
|
||||
X = x_train + x_test
|
||||
labels = labels_train + labels_test
|
||||
|
||||
if start_char is not None:
|
||||
X = [[start_char] + [w + index_from for w in x] for x in X]
|
||||
@@ -60,10 +92,25 @@ def load_data(path="imdb.pkl", nb_words=None, skip_top=0,
|
||||
nX.append(nx)
|
||||
X = nX
|
||||
|
||||
X_train = np.array(X[:int(len(X) * (1 - test_split))])
|
||||
y_train = np.array(labels[:int(len(X) * (1 - test_split))])
|
||||
X_train = np.array(X[:len(x_train)])
|
||||
y_train = np.array(labels[:len(x_train)])
|
||||
|
||||
X_test = np.array(X[int(len(X) * (1 - test_split)):])
|
||||
y_test = np.array(labels[int(len(X) * (1 - test_split)):])
|
||||
X_test = np.array(X[len(x_train):])
|
||||
y_test = np.array(labels[len(x_train):])
|
||||
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
|
||||
|
||||
def get_word_index(path='imdb_word_index.pkl'):
|
||||
path = get_file(path,
|
||||
origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.pkl',
|
||||
md5_hash='72d94b01291be4ff843198d3b0e1e4d7')
|
||||
f = open(path, 'rb')
|
||||
|
||||
if sys.version_info < (3,):
|
||||
data = cPickle.load(f)
|
||||
else:
|
||||
data = cPickle.load(f, encoding='latin1')
|
||||
|
||||
f.close()
|
||||
return data
|
||||
|
||||
@@ -7,11 +7,34 @@ import numpy as np
|
||||
import sys
|
||||
|
||||
|
||||
def load_data(path="reuters.pkl", nb_words=None, skip_top=0,
|
||||
def load_data(path='reuters.pkl', nb_words=None, skip_top=0,
|
||||
maxlen=None, test_split=0.2, seed=113,
|
||||
start_char=1, oov_char=2, index_from=3):
|
||||
'''
|
||||
# Arguments
|
||||
path: where to store the data (in `/.keras/dataset`)
|
||||
nb_words: max number of words to include. Words are ranked
|
||||
by how often they occur (in the training set) and only
|
||||
the most frequent words are kept
|
||||
skip_top: skip the top N most frequently occuring words
|
||||
(which may not be informative).
|
||||
maxlen: truncate sequences after this length.
|
||||
test_split: Fraction of the dataset to be used as test data.
|
||||
seed: random seed for sample shuffling.
|
||||
start_char: The start of a sequence will be marked with this character.
|
||||
Set to 1 because 0 is usually the padding character.
|
||||
oov_char: words that were cut out because of the `nb_words`
|
||||
or `skip_top` limit will be replaced with this character.
|
||||
index_from: index actual words with this index and higher.
|
||||
|
||||
path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/reuters.pkl")
|
||||
Note that the 'out of vocabulary' character is only used for
|
||||
words that were present in the training set but are not included
|
||||
because they're not making the `nb_words` cut here.
|
||||
Words that were not seen in the trining set but are in the test set
|
||||
have simply been skipped.
|
||||
'''
|
||||
|
||||
path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters.pkl')
|
||||
f = open(path, 'rb')
|
||||
X, labels = cPickle.load(f)
|
||||
f.close()
|
||||
@@ -62,14 +85,14 @@ def load_data(path="reuters.pkl", nb_words=None, skip_top=0,
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
|
||||
|
||||
def get_word_index(path="reuters_word_index.pkl"):
|
||||
path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl")
|
||||
def get_word_index(path='reuters_word_index.pkl'):
|
||||
path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl')
|
||||
f = open(path, 'rb')
|
||||
|
||||
if sys.version_info < (3,):
|
||||
data = cPickle.load(f)
|
||||
else:
|
||||
data = cPickle.load(f, encoding="latin1")
|
||||
data = cPickle.load(f, encoding='latin1')
|
||||
|
||||
f.close()
|
||||
return data
|
||||
|
||||
+212
-85
@@ -10,9 +10,11 @@ import marshal
|
||||
import types as python_types
|
||||
import warnings
|
||||
import copy
|
||||
import os
|
||||
from six.moves import zip
|
||||
|
||||
from keras import backend as K
|
||||
from .. import backend as K
|
||||
from ..utils.io_utils import ask_to_proceed_with_overwrite
|
||||
|
||||
|
||||
def to_list(x):
|
||||
@@ -282,10 +284,14 @@ class Layer(object):
|
||||
|
||||
# these properties will be set upon call of self.build(),
|
||||
# which itself will be called upon self.add_inbound_node if necessary.
|
||||
self.trainable_weights = []
|
||||
self.non_trainable_weights = []
|
||||
self.regularizers = []
|
||||
self.constraints = {} # dict {tensor: constraint instance}
|
||||
if not hasattr(self, 'trainable_weights'):
|
||||
self.trainable_weights = []
|
||||
if not hasattr(self, 'non_trainable_weights'):
|
||||
self.non_trainable_weights = []
|
||||
if not hasattr(self, 'regularizers'):
|
||||
self.regularizers = []
|
||||
if not hasattr(self, 'constraints'):
|
||||
self.constraints = {} # dict {tensor: constraint instance}
|
||||
self.built = False
|
||||
|
||||
# these properties should be set by the user via keyword arguments.
|
||||
@@ -321,6 +327,30 @@ class Layer(object):
|
||||
if 'create_input_layer' in kwargs:
|
||||
self.create_input_layer(batch_input_shape, input_dtype)
|
||||
|
||||
@property
|
||||
def trainable_weights(self):
|
||||
trainable = getattr(self, 'trainable', True)
|
||||
if trainable:
|
||||
return self._trainable_weights
|
||||
else:
|
||||
return []
|
||||
|
||||
@trainable_weights.setter
|
||||
def trainable_weights(self, weights):
|
||||
self._trainable_weights = weights
|
||||
|
||||
@property
|
||||
def non_trainable_weights(self):
|
||||
trainable = getattr(self, 'trainable', True)
|
||||
if not trainable:
|
||||
return self._trainable_weights + self._non_trainable_weights
|
||||
else:
|
||||
return self._non_trainable_weights
|
||||
|
||||
@non_trainable_weights.setter
|
||||
def non_trainable_weights(self, weights):
|
||||
self._non_trainable_weights = weights
|
||||
|
||||
def create_input_layer(self, batch_input_shape,
|
||||
input_dtype=None, name=None):
|
||||
if not name:
|
||||
@@ -694,15 +724,15 @@ class Layer(object):
|
||||
' outbound layers. '
|
||||
'This will cause part of your model '
|
||||
'to be disconnected.')
|
||||
if not shape:
|
||||
if hasattr(K, 'int_shape'):
|
||||
shape = K.int_shape(input_tensor)
|
||||
else:
|
||||
raise Exception('`set_input` needs to know the shape '
|
||||
'of the `input_tensor` it receives, but '
|
||||
'Keras was not able to infer it automatically.'
|
||||
' Specify it via: '
|
||||
'`model.set_input(input_tensor, shape)`')
|
||||
if hasattr(K, 'int_shape'):
|
||||
# auto-infered shape takes priority
|
||||
shape = K.int_shape(input_tensor)
|
||||
elif not shape:
|
||||
raise Exception('`set_input` needs to know the shape '
|
||||
'of the `input_tensor` it receives, but '
|
||||
'Keras was not able to infer it automatically.'
|
||||
' Specify it via: '
|
||||
'`model.set_input(input_tensor, shape)`')
|
||||
# reset layer connections
|
||||
self.inbound_nodes = []
|
||||
self.outbound_nodes = []
|
||||
@@ -828,6 +858,10 @@ class Layer(object):
|
||||
'ill-defined for the layer. ' +
|
||||
'Use `get_output_shape_at(node_index)` instead.')
|
||||
|
||||
@property
|
||||
def weights(self):
|
||||
return self.trainable_weights + self.non_trainable_weights
|
||||
|
||||
def set_weights(self, weights):
|
||||
'''Sets the weights of the layer, from Numpy arrays.
|
||||
|
||||
@@ -838,12 +872,12 @@ class Layer(object):
|
||||
of the layer (i.e. it should match the
|
||||
output of `get_weights`).
|
||||
'''
|
||||
params = self.trainable_weights + self.non_trainable_weights
|
||||
params = self.weights
|
||||
if len(params) != len(weights):
|
||||
raise Exception('You called `set_weights(weights)` on layer "' + self.name +
|
||||
'" with a weight list of length ' + str(len(weights)) +
|
||||
', but the layer was expecting ' + str(len(params)) +
|
||||
' weights. Provided weights: ' + str(weights))
|
||||
' weights. Provided weights: ' + str(weights)[:50] + '...')
|
||||
if not params:
|
||||
return
|
||||
weight_value_tuples = []
|
||||
@@ -861,7 +895,7 @@ class Layer(object):
|
||||
'''Returns the current weights of the layer,
|
||||
as a list of numpy arrays.
|
||||
'''
|
||||
params = self.trainable_weights + self.non_trainable_weights
|
||||
params = self.weights
|
||||
return K.batch_get_value(params)
|
||||
|
||||
def get_config(self):
|
||||
@@ -920,6 +954,8 @@ class InputLayer(Layer):
|
||||
self.uses_learning_phase = False
|
||||
self.trainable = False
|
||||
self.built = True
|
||||
self.trainable_weights = []
|
||||
self.non_trainable_weights = []
|
||||
|
||||
self.inbound_nodes = []
|
||||
self.outbound_nodes = []
|
||||
@@ -1084,7 +1120,7 @@ class Merge(Layer):
|
||||
If lambda/function, it should take as input a list of tensors
|
||||
and return a single tensor.
|
||||
concat_axis: integer, axis to use in mode `concat`.
|
||||
dot_axes: integer or tuple of integers, axes to use in mode `dot`.
|
||||
dot_axes: integer or tuple of integers, axes to use in mode `dot` or `cos`.
|
||||
output_shape: either a shape tuple (tuple of integers), or a lambda/function
|
||||
to compute `output_shape` (only if merge mode is a lambda/function).
|
||||
If the argument is a tuple,
|
||||
@@ -1111,8 +1147,6 @@ class Merge(Layer):
|
||||
self.mode = mode
|
||||
self.concat_axis = concat_axis
|
||||
self.dot_axes = dot_axes
|
||||
if type(self.dot_axes) == int:
|
||||
self.dot_axes = [self.dot_axes, ] * 2
|
||||
self._output_shape = output_shape
|
||||
self.node_indices = node_indices
|
||||
self._output_mask = output_mask
|
||||
@@ -1188,16 +1222,16 @@ class Merge(Layer):
|
||||
n2 = len(shape2)
|
||||
if type(dot_axes) == int:
|
||||
if dot_axes < 0:
|
||||
dot_axes = [dot_axes % n1, dot_axes % n2]
|
||||
self.dot_axes = [dot_axes % n1, dot_axes % n2]
|
||||
else:
|
||||
dot_axes = [n1 - dot_axes, n2-dot_axes]
|
||||
if type(dot_axes) not in [list, tuple]:
|
||||
self.dot_axes = [dot_axes, ] * 2
|
||||
if type(self.dot_axes) not in [list, tuple]:
|
||||
raise Exception('Invalid type for dot_axes - should be a list.')
|
||||
if len(dot_axes) != 2:
|
||||
if len(self.dot_axes) != 2:
|
||||
raise Exception('Invalid format for dot_axes - should contain two elements.')
|
||||
if type(dot_axes[0]) is not int or type(dot_axes[1]) is not int:
|
||||
if type(self.dot_axes[0]) is not int or type(self.dot_axes[1]) is not int:
|
||||
raise Exception('Invalid format for dot_axes - list elements should be "int".')
|
||||
if shape1[dot_axes[0]] != shape2[dot_axes[1]]:
|
||||
if shape1[self.dot_axes[0]] != shape2[self.dot_axes[1]]:
|
||||
raise Exception('Dimension incompatibility using dot mode: ' +
|
||||
'%s != %s. ' % (shape1[dot_axes[0]], shape2[dot_axes[1]]) +
|
||||
'Layer shapes: %s, %s' % (shape1, shape2))
|
||||
@@ -1336,15 +1370,13 @@ class Merge(Layer):
|
||||
elif self.mode in ['dot', 'cos']:
|
||||
shape1 = list(input_shapes[0])
|
||||
shape2 = list(input_shapes[1])
|
||||
dot_axes = [a - 1 for a in self.dot_axes]
|
||||
tensordot_output = np.tensordot(np.zeros(tuple(shape1[1:])),
|
||||
np.zeros(tuple(shape2[1:])),
|
||||
axes=dot_axes)
|
||||
if len(tensordot_output.shape) == 0:
|
||||
shape = (1,)
|
||||
else:
|
||||
shape = tensordot_output.shape
|
||||
return (shape1[0],) + shape
|
||||
shape1.pop(self.dot_axes[0])
|
||||
shape2.pop(self.dot_axes[1])
|
||||
shape2.pop(0)
|
||||
output_shape = shape1 + shape2
|
||||
if len(output_shape) == 1:
|
||||
output_shape += [1]
|
||||
return tuple(output_shape)
|
||||
|
||||
def compute_mask(self, inputs, mask=None):
|
||||
if mask is None or all([m is None for m in mask]):
|
||||
@@ -1356,9 +1388,19 @@ class Merge(Layer):
|
||||
masks = [K.expand_dims(m, 0) for m in mask if m is not None]
|
||||
return K.all(K.concatenate(masks, axis=0), axis=0, keepdims=False)
|
||||
elif self.mode == 'concat':
|
||||
masks = [K.ones_like(inputs[i][:-1]) if m is None else m for i, m in zip(inputs, mask)]
|
||||
expanded_dims = [K.expand_dims(m) for m in masks]
|
||||
concatenated = K.concatenate(expanded_dims, axis=self.concat_axis)
|
||||
# Make a list of masks while making sure the dimensionality of each mask
|
||||
# is the same as the corresponding input.
|
||||
masks = []
|
||||
for input_i, mask_i in zip(inputs, mask):
|
||||
if mask_i is None:
|
||||
# Input is unmasked. Append all 1s to masks, but cast it to uint8 first
|
||||
masks.append(K.cast(K.ones_like(input_i), 'uint8'))
|
||||
elif K.ndim(mask_i) < K.ndim(input_i):
|
||||
# Mask is smaller than the input, expand it
|
||||
masks.append(K.expand_dims(mask_i))
|
||||
else:
|
||||
masks.append(mask_i)
|
||||
concatenated = K.concatenate(masks, axis=self.concat_axis)
|
||||
return K.all(concatenated, axis=-1, keepdims=False)
|
||||
elif self.mode in ['cos', 'dot']:
|
||||
return None
|
||||
@@ -1452,7 +1494,7 @@ def merge(inputs, mode='sum', concat_axis=-1,
|
||||
If lambda/function, it should take as input a list of tensors
|
||||
and return a single tensor.
|
||||
concat_axis: integer, axis to use in mode `concat`.
|
||||
dot_axes: integer or tuple of integers, axes to use in mode `dot`.
|
||||
dot_axes: integer or tuple of integers, axes to use in mode `dot` or `cos`.
|
||||
output_shape: shape tuple (tuple of integers), or lambda/function
|
||||
to compute output_shape (only if merge mode is a lambda/function).
|
||||
If the latter case, it should take as input a list of shape tuples
|
||||
@@ -1544,6 +1586,9 @@ class Container(Layer):
|
||||
name = prefix + '_' + str(K.get_uid(prefix))
|
||||
self.name = name
|
||||
|
||||
# whether container weights are trainable
|
||||
self.trainable = True
|
||||
|
||||
# Container-specific properties
|
||||
if type(input) in {list, tuple}:
|
||||
self.inputs = list(input) # tensor or list of tensors
|
||||
@@ -1673,6 +1718,7 @@ class Container(Layer):
|
||||
container_nodes = set() # ids of all nodes relevant to the Container
|
||||
nodes_depths = {} # map {node: depth value}
|
||||
layers_depths = {} # map {layer: depth value}
|
||||
layer_indices = {} # map {layer: index in traversal}
|
||||
|
||||
def make_node_marker(node, depth):
|
||||
return str(id(node)) + '-' + str(depth)
|
||||
@@ -1716,6 +1762,8 @@ class Container(Layer):
|
||||
else:
|
||||
current_depth = max(depth, previously_seen_depth)
|
||||
layers_depths[layer] = current_depth
|
||||
if layer not in layer_indices:
|
||||
layer_indices[layer] = len(layer_indices)
|
||||
|
||||
# propagate to all previous tensors connected to this node
|
||||
for i in range(len(node.inbound_layers)):
|
||||
@@ -1756,8 +1804,12 @@ class Container(Layer):
|
||||
layers = []
|
||||
for depth in depth_keys:
|
||||
layers_for_depth = layers_by_depth[depth]
|
||||
# container.layers needs to have a deterministic order
|
||||
layers_for_depth.sort(key=lambda x: x.name)
|
||||
# container.layers needs to have a deterministic order:
|
||||
# here we order them by traversal order
|
||||
if K.legacy_weight_ordering():
|
||||
layers_for_depth.sort(key=lambda x: x.name)
|
||||
else:
|
||||
layers_for_depth.sort(key=lambda x: layer_indices[x])
|
||||
for layer in layers_for_depth:
|
||||
layers.append(layer)
|
||||
self.layers = layers
|
||||
@@ -1913,6 +1965,8 @@ class Container(Layer):
|
||||
|
||||
@property
|
||||
def trainable_weights(self):
|
||||
if not self.trainable:
|
||||
return []
|
||||
weights = []
|
||||
for layer in self.layers:
|
||||
weights += layer.trainable_weights
|
||||
@@ -1923,8 +1977,37 @@ class Container(Layer):
|
||||
weights = []
|
||||
for layer in self.layers:
|
||||
weights += layer.non_trainable_weights
|
||||
if not self.trainable:
|
||||
trainable_weights = []
|
||||
for layer in self.layers:
|
||||
trainable_weights += layer.trainable_weights
|
||||
return trainable_weights + weights
|
||||
return weights
|
||||
|
||||
def get_weights(self):
|
||||
'''Returns the weights of the model,
|
||||
as a flat list of Numpy arrays.
|
||||
'''
|
||||
weights = []
|
||||
for layer in self.layers:
|
||||
weights += layer.weights
|
||||
return K.batch_get_value(weights)
|
||||
|
||||
def set_weights(self, weights):
|
||||
'''Sets the weights of the model.
|
||||
The `weights` argument should be a list
|
||||
of Numpy arrays with shapes and types matching
|
||||
the output of `model.get_weights()`.
|
||||
'''
|
||||
tuples = []
|
||||
for layer in self.layers:
|
||||
nb_param = len(layer.weights)
|
||||
layer_weights = weights[:nb_param]
|
||||
for sw, w in zip(layer.weights, layer_weights):
|
||||
tuples.append((sw, w))
|
||||
weights = weights[nb_param:]
|
||||
K.batch_set_value(tuples)
|
||||
|
||||
@property
|
||||
def input_spec(self):
|
||||
specs = []
|
||||
@@ -2312,7 +2395,38 @@ class Container(Layer):
|
||||
output_tensors.append(layer_output_tensors[tensor_index])
|
||||
return cls(input=input_tensors, output=output_tensors, name=name)
|
||||
|
||||
def save_weights(self, filepath, overwrite=False):
|
||||
def save(self, filepath, overwrite=True):
|
||||
'''Save into a single HDF5 file:
|
||||
- the model architecture, allowing to re-instantiate the model
|
||||
- the model weights
|
||||
- the state of the optimizer, allowing to resume training
|
||||
exactly where you left off.
|
||||
|
||||
This allows you to save the entirety of the state of a model
|
||||
in a single file.
|
||||
|
||||
Saved models can be reinstantiated via `keras.models.load_model`.
|
||||
The model returned by `load_model`
|
||||
is a compiled model ready to be used (unless the saved model
|
||||
was never compiled in the first place).
|
||||
|
||||
# Example usage
|
||||
|
||||
```python
|
||||
from keras.models import load_model
|
||||
|
||||
model.save('my_model.h5') # creates a HDF5 file 'my_model.h5'
|
||||
del model # deletes the existing model
|
||||
|
||||
# returns a compiled model
|
||||
# identical to the previous one
|
||||
model = load_model('my_model.h5')
|
||||
```
|
||||
'''
|
||||
from ..models import save_model
|
||||
save_model(self, filepath, overwrite)
|
||||
|
||||
def save_weights(self, filepath, overwrite=True):
|
||||
'''Dumps all layer weights to a HDF5 file.
|
||||
|
||||
The weight file has:
|
||||
@@ -2325,33 +2439,28 @@ class Container(Layer):
|
||||
storing the weight value, named after the weight tensor
|
||||
'''
|
||||
import h5py
|
||||
import os.path
|
||||
# if file exists and should not be overwritten
|
||||
if not overwrite and os.path.isfile(filepath):
|
||||
import sys
|
||||
get_input = input
|
||||
if sys.version_info[:2] <= (2, 7):
|
||||
get_input = raw_input
|
||||
overwrite = get_input('[WARNING] %s already exists - overwrite? '
|
||||
'[y/n]' % (filepath))
|
||||
while overwrite not in ['y', 'n']:
|
||||
overwrite = get_input('Enter "y" (overwrite) or "n" (cancel).')
|
||||
if overwrite == 'n':
|
||||
proceed = ask_to_proceed_with_overwrite(filepath)
|
||||
if not proceed:
|
||||
return
|
||||
print('[TIP] Next time specify overwrite=True in save_weights!')
|
||||
f = h5py.File(filepath, 'w')
|
||||
self.save_weights_to_hdf5_group(f)
|
||||
f.flush()
|
||||
f.close()
|
||||
|
||||
def save_weights_to_hdf5_group(self, f):
|
||||
if hasattr(self, 'flattened_layers'):
|
||||
# support for legacy Sequential/Merge behavior
|
||||
flattened_layers = self.flattened_layers
|
||||
else:
|
||||
flattened_layers = self.layers
|
||||
|
||||
f = h5py.File(filepath, 'w')
|
||||
f.attrs['layer_names'] = [layer.name.encode('utf8') for layer in flattened_layers]
|
||||
|
||||
for layer in flattened_layers:
|
||||
g = f.create_group(layer.name)
|
||||
symbolic_weights = layer.trainable_weights + layer.non_trainable_weights
|
||||
symbolic_weights = layer.weights
|
||||
weight_values = K.batch_get_value(symbolic_weights)
|
||||
weight_names = []
|
||||
for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
|
||||
@@ -2364,16 +2473,30 @@ class Container(Layer):
|
||||
for name, val in zip(weight_names, weight_values):
|
||||
param_dset = g.create_dataset(name, val.shape,
|
||||
dtype=val.dtype)
|
||||
param_dset[:] = val
|
||||
f.flush()
|
||||
f.close()
|
||||
if not val.shape:
|
||||
# scalar
|
||||
param_dset[()] = val
|
||||
else:
|
||||
param_dset[:] = val
|
||||
|
||||
def load_weights(self, filepath):
|
||||
'''Load all layer weights from a HDF5 save file.
|
||||
'''
|
||||
import h5py
|
||||
f = h5py.File(filepath, mode='r')
|
||||
if 'layer_names' not in f.attrs and 'model_weights' in f:
|
||||
f = f['model_weights']
|
||||
self.load_weights_from_hdf5_group(f)
|
||||
if hasattr(f, 'close'):
|
||||
f.close()
|
||||
|
||||
def load_weights_from_hdf5_group(self, f):
|
||||
'''Weight loading is based on layer order in a list
|
||||
(matching model.flattened_layers for Sequential models,
|
||||
and model.layers for Model class instances), not
|
||||
on layer names.
|
||||
Layers that have no weights are skipped.
|
||||
'''
|
||||
if hasattr(self, 'flattened_layers'):
|
||||
# support for legacy Sequential/Merge behavior
|
||||
flattened_layers = self.flattened_layers
|
||||
@@ -2387,7 +2510,7 @@ class Container(Layer):
|
||||
raise Exception('You are trying to load a weight file '
|
||||
'containing ' + str(nb_layers) +
|
||||
' layers into a model with ' +
|
||||
str(len(flattened_layers)) + '.')
|
||||
str(len(flattened_layers)) + ' layers.')
|
||||
|
||||
for k in range(nb_layers):
|
||||
g = f['layer_{}'.format(k)]
|
||||
@@ -2395,7 +2518,21 @@ class Container(Layer):
|
||||
flattened_layers[k].set_weights(weights)
|
||||
else:
|
||||
# new file format
|
||||
filtered_layers = []
|
||||
for layer in flattened_layers:
|
||||
weights = layer.weights
|
||||
if weights:
|
||||
filtered_layers.append(layer)
|
||||
flattened_layers = filtered_layers
|
||||
|
||||
layer_names = [n.decode('utf8') for n in f.attrs['layer_names']]
|
||||
filtered_layer_names = []
|
||||
for name in layer_names:
|
||||
g = f[name]
|
||||
weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]
|
||||
if len(weight_names):
|
||||
filtered_layer_names.append(name)
|
||||
layer_names = filtered_layer_names
|
||||
if len(layer_names) != len(flattened_layers):
|
||||
raise Exception('You are trying to load a weight file '
|
||||
'containing ' + str(len(layer_names)) +
|
||||
@@ -2408,24 +2545,22 @@ class Container(Layer):
|
||||
for k, name in enumerate(layer_names):
|
||||
g = f[name]
|
||||
weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]
|
||||
if len(weight_names):
|
||||
weight_values = [g[weight_name] for weight_name in weight_names]
|
||||
layer = flattened_layers[k]
|
||||
symbolic_weights = layer.trainable_weights + layer.non_trainable_weights
|
||||
if len(weight_values) != len(symbolic_weights):
|
||||
raise Exception('Layer #' + str(k) +
|
||||
' (named "' + layer.name +
|
||||
'" in the current model) was found to '
|
||||
'correspond to layer ' + name +
|
||||
' in the save file. '
|
||||
'However the new layer ' + layer.name +
|
||||
' expects ' + str(len(symbolic_weights)) +
|
||||
' weights, but the saved weights have ' +
|
||||
str(len(weight_values)) +
|
||||
' elements.')
|
||||
weight_value_tuples += zip(symbolic_weights, weight_values)
|
||||
weight_values = [g[weight_name] for weight_name in weight_names]
|
||||
layer = flattened_layers[k]
|
||||
symbolic_weights = layer.weights
|
||||
if len(weight_values) != len(symbolic_weights):
|
||||
raise Exception('Layer #' + str(k) +
|
||||
' (named "' + layer.name +
|
||||
'" in the current model) was found to '
|
||||
'correspond to layer ' + name +
|
||||
' in the save file. '
|
||||
'However the new layer ' + layer.name +
|
||||
' expects ' + str(len(symbolic_weights)) +
|
||||
' weights, but the saved weights have ' +
|
||||
str(len(weight_values)) +
|
||||
' elements.')
|
||||
weight_value_tuples += zip(symbolic_weights, weight_values)
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
f.close()
|
||||
|
||||
def _updated_config(self):
|
||||
'''shared between different serialization methods'''
|
||||
@@ -2437,14 +2572,6 @@ class Container(Layer):
|
||||
'config': config,
|
||||
'keras_version': keras_version
|
||||
}
|
||||
|
||||
if hasattr(self, 'optimizer'):
|
||||
model_config['optimizer'] = self.optimizer.get_config()
|
||||
model_config['loss'] = getattr(self.loss, '__name__', self.loss)
|
||||
model_config['sample_weight_mode'] = self.sample_weight_mode
|
||||
|
||||
if hasattr(self, 'loss_weights'):
|
||||
model_config['loss_weights'] = self.loss_weights
|
||||
return model_config
|
||||
|
||||
def to_json(self, **kwargs):
|
||||
@@ -2464,7 +2591,7 @@ class Container(Layer):
|
||||
if type(obj).__name__ == type.__name__:
|
||||
return obj.__name__
|
||||
|
||||
raise TypeError('Not JSON Serializable')
|
||||
raise TypeError('Not JSON Serializable:', obj)
|
||||
|
||||
model_config = self._updated_config()
|
||||
return json.dumps(model_config, default=get_json_type, **kwargs)
|
||||
|
||||
+26
-29
@@ -418,15 +418,11 @@ def generator_queue(generator, max_q_size=10,
|
||||
_stop = threading.Event()
|
||||
|
||||
try:
|
||||
|
||||
def data_generator_task():
|
||||
while not _stop.is_set():
|
||||
try:
|
||||
if q.qsize() < max_q_size:
|
||||
try:
|
||||
generator_output = next(generator)
|
||||
except ValueError:
|
||||
continue
|
||||
if pickle_safe or q.qsize() < max_q_size:
|
||||
generator_output = next(generator)
|
||||
q.put(generator_output)
|
||||
else:
|
||||
time.sleep(wait_time)
|
||||
@@ -444,7 +440,6 @@ def generator_queue(generator, max_q_size=10,
|
||||
generator_threads.append(thread)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
|
||||
except:
|
||||
_stop.set()
|
||||
if pickle_safe:
|
||||
@@ -610,8 +605,9 @@ class Model(Container):
|
||||
self.targets.append(K.placeholder(ndim=len(shape), name=name + '_target'))
|
||||
|
||||
# prepare metrics
|
||||
self.metrics = metrics
|
||||
self.metrics_names = ['loss']
|
||||
self.metrics = []
|
||||
self.metrics_tensors = []
|
||||
|
||||
# compute total loss
|
||||
total_loss = None
|
||||
@@ -625,7 +621,7 @@ class Model(Container):
|
||||
output_loss = weighted_loss(y_true, y_pred,
|
||||
sample_weight, mask)
|
||||
if len(self.outputs) > 1:
|
||||
self.metrics.append(output_loss)
|
||||
self.metrics_tensors.append(output_loss)
|
||||
self.metrics_names.append(self.output_names[i] + '_loss')
|
||||
if total_loss is None:
|
||||
total_loss = loss_weight * output_loss
|
||||
@@ -650,21 +646,21 @@ class Model(Container):
|
||||
output_shape = self.internal_output_shapes[i]
|
||||
if output_shape[-1] == 1 or self.loss_functions[i] == objectives.binary_crossentropy:
|
||||
# case: binary accuracy
|
||||
self.metrics.append(metrics_module.binary_accuracy(y_true, y_pred))
|
||||
self.metrics_tensors.append(metrics_module.binary_accuracy(y_true, y_pred))
|
||||
elif self.loss_functions[i] == objectives.sparse_categorical_crossentropy:
|
||||
# case: categorical accuracy with sparse targets
|
||||
self.metrics.append(
|
||||
self.metrics_tensors.append(
|
||||
metrics_module.sparse_categorical_accuracy(y_true, y_pred))
|
||||
else:
|
||||
# case: categorical accuracy with dense targets
|
||||
self.metrics.append(metrics_module.categorical_accuracy(y_true, y_pred))
|
||||
self.metrics_tensors.append(metrics_module.categorical_accuracy(y_true, y_pred))
|
||||
if len(self.output_names) == 1:
|
||||
self.metrics_names.append('acc')
|
||||
else:
|
||||
self.metrics_names.append(self.output_layers[i].name + '_acc')
|
||||
else:
|
||||
metric_fn = metrics_module.get(metric)
|
||||
self.metrics.append(metric_fn(y_true, y_pred))
|
||||
self.metrics_tensors.append(metric_fn(y_true, y_pred))
|
||||
if len(self.output_names) == 1:
|
||||
self.metrics_names.append(metric_fn.__name__)
|
||||
else:
|
||||
@@ -688,7 +684,7 @@ class Model(Container):
|
||||
if not hasattr(self, 'train_function'):
|
||||
raise Exception('You must compile your model before using it.')
|
||||
if self.train_function is None:
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
inputs = self.inputs + self.targets + self.sample_weights + [K.learning_phase()]
|
||||
else:
|
||||
inputs = self.inputs + self.targets + self.sample_weights
|
||||
@@ -700,7 +696,7 @@ class Model(Container):
|
||||
|
||||
# returns loss and metrics. Updates weights at each call.
|
||||
self.train_function = K.function(inputs,
|
||||
[self.total_loss] + self.metrics,
|
||||
[self.total_loss] + self.metrics_tensors,
|
||||
updates=updates,
|
||||
**self._function_kwargs)
|
||||
|
||||
@@ -708,14 +704,14 @@ class Model(Container):
|
||||
if not hasattr(self, 'test_function'):
|
||||
raise Exception('You must compile your model before using it.')
|
||||
if self.test_function is None:
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
inputs = self.inputs + self.targets + self.sample_weights + [K.learning_phase()]
|
||||
else:
|
||||
inputs = self.inputs + self.targets + self.sample_weights
|
||||
# return loss and metrics, no gradient updates.
|
||||
# Does update the network states.
|
||||
self.test_function = K.function(inputs,
|
||||
[self.total_loss] + self.metrics,
|
||||
[self.total_loss] + self.metrics_tensors,
|
||||
updates=self.state_updates,
|
||||
**self._function_kwargs)
|
||||
|
||||
@@ -723,7 +719,7 @@ class Model(Container):
|
||||
if not hasattr(self, 'predict_function'):
|
||||
self.predict_function = None
|
||||
if self.predict_function is None:
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
inputs = self.inputs + [K.learning_phase()]
|
||||
else:
|
||||
inputs = self.inputs
|
||||
@@ -1050,7 +1046,7 @@ class Model(Container):
|
||||
batch_size=batch_size)
|
||||
self._make_test_function()
|
||||
val_f = self.test_function
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
val_ins = val_x + val_y + val_sample_weights + [0.]
|
||||
else:
|
||||
val_ins = val_x + val_y + val_sample_weights
|
||||
@@ -1064,7 +1060,7 @@ class Model(Container):
|
||||
slice_X(sample_weights, 0, split_at), slice_X(sample_weights, split_at))
|
||||
self._make_test_function()
|
||||
val_f = self.test_function
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
val_ins = val_x + val_y + val_sample_weights + [0.]
|
||||
else:
|
||||
val_ins = val_x + val_y + val_sample_weights
|
||||
@@ -1074,7 +1070,7 @@ class Model(Container):
|
||||
val_ins = None
|
||||
|
||||
# prepare input arrays and training function
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
ins = x + y + sample_weights + [1.]
|
||||
else:
|
||||
ins = x + y + sample_weights
|
||||
@@ -1134,7 +1130,7 @@ class Model(Container):
|
||||
check_batch_dim=False,
|
||||
batch_size=batch_size)
|
||||
# prepare inputs, delegate logic to _test_loop
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
ins = x + y + sample_weights + [0.]
|
||||
else:
|
||||
ins = x + y + sample_weights
|
||||
@@ -1171,7 +1167,7 @@ class Model(Container):
|
||||
'Batch size: ' + str(batch_size) + '.')
|
||||
|
||||
# prepare inputs, delegate logic to _predict_loop
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
ins = x + [0.]
|
||||
else:
|
||||
ins = x
|
||||
@@ -1215,7 +1211,7 @@ class Model(Container):
|
||||
sample_weight=sample_weight,
|
||||
class_weight=class_weight,
|
||||
check_batch_dim=True)
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
ins = x + y + sample_weights + [1.]
|
||||
else:
|
||||
ins = x + y + sample_weights
|
||||
@@ -1253,7 +1249,7 @@ class Model(Container):
|
||||
x, y, sample_weights = self._standardize_user_data(x, y,
|
||||
sample_weight=sample_weight,
|
||||
check_batch_dim=True)
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
ins = x + y + sample_weights + [0.]
|
||||
else:
|
||||
ins = x + y + sample_weights
|
||||
@@ -1268,7 +1264,7 @@ class Model(Container):
|
||||
'''
|
||||
x = standardize_input_data(x, self.input_names,
|
||||
self.internal_input_shapes)
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
ins = x + [0.]
|
||||
else:
|
||||
ins = x
|
||||
@@ -1314,7 +1310,7 @@ class Model(Container):
|
||||
max_q_size: maximum size for the generator queue
|
||||
nb_worker: maximum number of processes to spin up when using process based threading
|
||||
pickle_safe: if True, use process based threading. Note that because
|
||||
this implementation relies on multiprocessing, you should not pass non
|
||||
this implementation relies on multiprocessing, you should not pass
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
|
||||
@@ -1475,6 +1471,7 @@ class Model(Container):
|
||||
# no need for try/except because
|
||||
# data has already been validated
|
||||
val_outs = self.evaluate(val_x, val_y,
|
||||
batch_size=batch_size,
|
||||
sample_weight=val_sample_weights,
|
||||
verbose=0)
|
||||
if type(val_outs) is not list:
|
||||
@@ -1508,7 +1505,7 @@ class Model(Container):
|
||||
max_q_size: maximum size for the generator queue
|
||||
nb_worker: maximum number of processes to spin up when using process based threading
|
||||
pickle_safe: if True, use process based threading. Note that because
|
||||
this implementation relies on multiprocessing, you should not pass non
|
||||
this implementation relies on multiprocessing, you should not pass
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
|
||||
@@ -1593,7 +1590,7 @@ class Model(Container):
|
||||
max_q_size: maximum size for the generator queue
|
||||
nb_worker: maximum number of processes to spin up when using process based threading
|
||||
pickle_safe: if True, use process based threading. Note that because
|
||||
this implementation relies on multiprocessing, you should not pass non
|
||||
this implementation relies on multiprocessing, you should not pass
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
|
||||
|
||||
@@ -29,13 +29,11 @@ def get_fans(shape, dim_ordering='th'):
|
||||
|
||||
|
||||
def uniform(shape, scale=0.05, name=None):
|
||||
return K.variable(np.random.uniform(low=-scale, high=scale, size=shape),
|
||||
name=name)
|
||||
return K.random_uniform_variable(shape, -scale, scale, name=name)
|
||||
|
||||
|
||||
def normal(shape, scale=0.05, name=None):
|
||||
return K.variable(np.random.normal(loc=0.0, scale=scale, size=shape),
|
||||
name=name)
|
||||
return K.random_normal_variable(shape, 0.0, scale, name=name)
|
||||
|
||||
|
||||
def lecun_uniform(shape, name=None, dim_ordering='th'):
|
||||
|
||||
@@ -112,7 +112,7 @@ class ELU(Layer):
|
||||
return pos + self.alpha * (K.exp(neg) - 1.)
|
||||
|
||||
def get_config(self):
|
||||
config = {'alpha': self.alpha}
|
||||
config = {'alpha': float(self.alpha)}
|
||||
base_config = super(ELU, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -161,8 +161,8 @@ class ParametricSoftplus(Layer):
|
||||
return K.softplus(self.betas * x) * self.alphas
|
||||
|
||||
def get_config(self):
|
||||
config = {'alpha_init': self.alpha_init,
|
||||
'beta_init': self.beta_init}
|
||||
config = {'alpha_init': float(self.alpha_init),
|
||||
'beta_init': float(self.beta_init)}
|
||||
base_config = super(ParametricSoftplus, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -195,7 +195,7 @@ class ThresholdedReLU(Layer):
|
||||
return x * K.cast(x > self.theta, K.floatx())
|
||||
|
||||
def get_config(self):
|
||||
config = {'theta': self.theta}
|
||||
config = {'theta': float(self.theta)}
|
||||
base_config = super(ThresholdedReLU, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
+420
-31
@@ -4,7 +4,7 @@ from __future__ import absolute_import
|
||||
from .. import backend as K
|
||||
from .. import activations, initializations, regularizers, constraints
|
||||
from ..engine import Layer, InputSpec
|
||||
from ..utils.np_utils import conv_output_length
|
||||
from ..utils.np_utils import conv_output_length, conv_input_length
|
||||
|
||||
# imports for backwards namespace compatibility
|
||||
from .pooling import AveragePooling1D, AveragePooling2D, AveragePooling3D
|
||||
@@ -254,11 +254,12 @@ class Convolution2D(Layer):
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1), dim_ordering=K.image_dim_ordering(),
|
||||
border_mode='valid', subsample=(1, 1), dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution2D:', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
@@ -379,6 +380,164 @@ class Convolution2D(Layer):
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Deconvolution2D(Convolution2D):
|
||||
'''Transposed convolution operator for filtering windows of two-dimensional inputs.
|
||||
The need for transposed convolutions generally arises from the desire
|
||||
to use a transformation going in the opposite direction of a normal convolution,
|
||||
i.e., from something that has the shape of the output of some convolution
|
||||
to something that has the shape of its input
|
||||
while maintaining a connectivity pattern that is compatible with said convolution. [1]
|
||||
|
||||
When using this layer as the first layer in a model,
|
||||
provide the keyword argument `input_shape`
|
||||
(tuple of integers, does not include the sample axis),
|
||||
e.g. `input_shape=(3, 128, 128)` for 128x128 RGB pictures.
|
||||
|
||||
# Examples
|
||||
|
||||
```python
|
||||
# apply a 3x3 transposed convolution with stride 1x1 and 3 output filters on a 12x12 image:
|
||||
model = Sequential()
|
||||
model.add(Deconvolution2D(3, 3, 3, output_shape=(None, 3, 14, 14), border_mode='valid', input_shape=(3, 12, 12)))
|
||||
# output_shape will be (None, 3, 14, 14)
|
||||
|
||||
# apply a 3x3 transposed convolution with stride 2x2 and 3 output filters on a 12x12 image:
|
||||
model = Sequential()
|
||||
model.add(Deconvolution2D(3, 3, 3, output_shape=(None, 3, 25, 25), subsample=(2, 2), border_mode='valid', input_shape=(3, 12, 12)))
|
||||
model.summary()
|
||||
# output_shape will be (None, 3, 25, 25)
|
||||
```
|
||||
|
||||
# Arguments
|
||||
nb_filter: Number of transposed convolution filters to use.
|
||||
nb_row: Number of rows in the transposed convolution kernel.
|
||||
nb_col: Number of columns in the transposed convolution kernel.
|
||||
output_shape: Output shape of the transposed convolution operation.
|
||||
tuple of integers (nb_samples, nb_filter, nb_output_rows, nb_output_cols)
|
||||
Formula for calculation of the output shape [1], [2]:
|
||||
o = s (i - 1) + a + k - 2p, \quad a \in \{0, \ldots, s - 1\}
|
||||
where:
|
||||
i - input size (rows or cols),
|
||||
k - kernel size (nb_filter),
|
||||
s - stride (subsample for rows or cols respectively),
|
||||
p - padding size,
|
||||
a - user-specified quantity used to distinguish between
|
||||
the s different possible output sizes.
|
||||
init: name of initialization function for the weights of the layer
|
||||
(see [initializations](../initializations.md)), or alternatively,
|
||||
Theano function to use for weights initialization.
|
||||
This parameter is only relevant if you don't pass
|
||||
a `weights` argument.
|
||||
activation: name of activation function to use
|
||||
(see [activations](../activations.md)),
|
||||
or alternatively, elementwise Theano/TensorFlow function.
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: 'valid' or 'same'.
|
||||
subsample: tuple of length 2. Factor by which to oversample output.
|
||||
Also called strides elsewhere.
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
(eg. L1 or L2 regularization), applied to the main weights matrix.
|
||||
b_regularizer: instance of [WeightRegularizer](../regularizers.md),
|
||||
applied to the bias.
|
||||
activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
|
||||
applied to the network output.
|
||||
W_constraint: instance of the [constraints](../constraints.md) module
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 3.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "th".
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
|
||||
# Input shape
|
||||
4D tensor with shape:
|
||||
`(samples, channels, rows, cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, rows, cols, channels)` if dim_ordering='tf'.
|
||||
|
||||
# Output shape
|
||||
4D tensor with shape:
|
||||
`(samples, nb_filter, new_rows, new_cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, new_rows, new_cols, nb_filter)` if dim_ordering='tf'.
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
|
||||
# References
|
||||
[1] [A guide to convolution arithmetic for deep learning](https://arxiv.org/abs/1603.07285 "arXiv:1603.07285v1 [stat.ML]")
|
||||
[2] [Transposed convolution arithmetic](http://deeplearning.net/software/theano_versions/dev/tutorial/conv_arithmetic.html#transposed-convolution-arithmetic)
|
||||
[3] [Deconvolutional Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col, output_shape,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Deconvolution2D:', border_mode)
|
||||
|
||||
self.output_shape_ = output_shape
|
||||
|
||||
super(Deconvolution2D, self).__init__(nb_filter, nb_row, nb_col,
|
||||
init=init, activation=activation,
|
||||
weights=weights, border_mode=border_mode,
|
||||
subsample=subsample, dim_ordering=dim_ordering,
|
||||
W_regularizer=W_regularizer, b_regularizer=b_regularizer,
|
||||
activity_regularizer=activity_regularizer,
|
||||
W_constraint=W_constraint, b_constraint=b_constraint,
|
||||
bias=bias, **kwargs)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
rows = input_shape[2]
|
||||
cols = input_shape[3]
|
||||
elif self.dim_ordering == 'tf':
|
||||
rows = input_shape[1]
|
||||
cols = input_shape[2]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
rows = conv_input_length(rows, self.nb_row,
|
||||
self.border_mode, self.subsample[0])
|
||||
cols = conv_input_length(cols, self.nb_col,
|
||||
self.border_mode, self.subsample[1])
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], self.nb_filter, rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = K.deconv2d(x, self.W, self.output_shape_,
|
||||
strides=self.subsample,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering,
|
||||
filter_shape=self.W_shape)
|
||||
if self.bias:
|
||||
if self.dim_ordering == 'th':
|
||||
output += K.reshape(self.b, (1, self.nb_filter, 1, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, 1, 1, self.nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
config = {'output_shape': self.output_shape}
|
||||
base_config = super(Deconvolution2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class AtrousConvolution2D(Convolution2D):
|
||||
'''Atrous Convolution operator for filtering windows of two-dimensional inputs.
|
||||
A.k.a dilated convolution or convolution with holes.
|
||||
@@ -453,10 +612,12 @@ class AtrousConvolution2D(Convolution2D):
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
atrous_rate=(1, 1), dim_ordering=K.image_dim_ordering(),
|
||||
atrous_rate=(1, 1), dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for AtrousConv2D:', border_mode)
|
||||
@@ -535,6 +696,11 @@ class SeparableConvolution2D(Layer):
|
||||
(tuple of integers, does not include the sample axis),
|
||||
e.g. `input_shape=(3, 128, 128)` for 128x128 RGB pictures.
|
||||
|
||||
# Theano warning
|
||||
|
||||
This layer is only available with the
|
||||
TensorFlow backend for the time being.
|
||||
|
||||
# Arguments
|
||||
nb_filter: Number of convolution filters to use.
|
||||
nb_row: Number of rows in the convolution kernel.
|
||||
@@ -595,7 +761,7 @@ class SeparableConvolution2D(Layer):
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
depth_multiplier=1, dim_ordering=K.image_dim_ordering(),
|
||||
depth_multiplier=1, dim_ordering='default',
|
||||
depthwise_regularizer=None, pointwise_regularizer=None,
|
||||
b_regularizer=None, activity_regularizer=None,
|
||||
depthwise_constraint=None, pointwise_constraint=None,
|
||||
@@ -606,6 +772,9 @@ class SeparableConvolution2D(Layer):
|
||||
raise Exception('SeparableConv2D is only available '
|
||||
'with TensorFlow for the time being.')
|
||||
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for SeparableConv2D:', border_mode)
|
||||
|
||||
@@ -806,10 +975,13 @@ class Convolution3D(Layer):
|
||||
|
||||
def __init__(self, nb_filter, kernel_dim1, kernel_dim2, kernel_dim3,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1, 1), dim_ordering=K.image_dim_ordering(),
|
||||
border_mode='valid', subsample=(1, 1, 1), dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if border_mode not in {'valid', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution3D:', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
@@ -963,7 +1135,8 @@ class UpSampling1D(Layer):
|
||||
super(UpSampling1D, self).__init__(**kwargs)
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
return (input_shape[0], self.length * input_shape[1], input_shape[2])
|
||||
length = self.length * input_shape[1] if input_shape[1] is not None else None
|
||||
return (input_shape[0], length, input_shape[2])
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = K.repeat_elements(x, self.length, axis=1)
|
||||
@@ -1001,7 +1174,9 @@ class UpSampling2D(Layer):
|
||||
`(samples, upsampled_rows, upsampled_cols, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
|
||||
def __init__(self, size=(2, 2), dim_ordering=K.image_dim_ordering(), **kwargs):
|
||||
def __init__(self, size=(2, 2), dim_ordering='default', **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.size = tuple(size)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
self.dim_ordering = dim_ordering
|
||||
@@ -1010,14 +1185,18 @@ class UpSampling2D(Layer):
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
width = self.size[0] * input_shape[2] if input_shape[2] is not None else None
|
||||
height = self.size[1] * input_shape[3] if input_shape[3] is not None else None
|
||||
return (input_shape[0],
|
||||
input_shape[1],
|
||||
self.size[0] * input_shape[2],
|
||||
self.size[1] * input_shape[3])
|
||||
width,
|
||||
height)
|
||||
elif self.dim_ordering == 'tf':
|
||||
width = self.size[0] * input_shape[1] if input_shape[1] is not None else None
|
||||
height = self.size[1] * input_shape[2] if input_shape[2] is not None else None
|
||||
return (input_shape[0],
|
||||
self.size[0] * input_shape[1],
|
||||
self.size[1] * input_shape[2],
|
||||
width,
|
||||
height,
|
||||
input_shape[3])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
@@ -1036,8 +1215,6 @@ class UpSampling3D(Layer):
|
||||
'''Repeat the first, second and third dimension of the data
|
||||
by size[0], size[1] and size[2] respectively.
|
||||
|
||||
Note: this layer will only work with Theano for the time being.
|
||||
|
||||
# Arguments
|
||||
size: tuple of 3 integers. The upsampling factors for dim1, dim2 and dim3.
|
||||
dim_ordering: 'th' or 'tf'.
|
||||
@@ -1060,10 +1237,9 @@ class UpSampling3D(Layer):
|
||||
`(samples, upsampled_dim1, upsampled_dim2, upsampled_dim3, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
|
||||
def __init__(self, size=(2, 2, 2), dim_ordering=K.image_dim_ordering(), **kwargs):
|
||||
if K._BACKEND != 'theano':
|
||||
raise Exception(self.__class__.__name__ +
|
||||
' is currently only working with Theano backend.')
|
||||
def __init__(self, size=(2, 2, 2), dim_ordering='default', **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.size = tuple(size)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
self.dim_ordering = dim_ordering
|
||||
@@ -1072,16 +1248,22 @@ class UpSampling3D(Layer):
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
dim1 = self.size[0] * input_shape[2] if input_shape[2] is not None else None
|
||||
dim2 = self.size[1] * input_shape[3] if input_shape[3] is not None else None
|
||||
dim3 = self.size[2] * input_shape[4] if input_shape[4] is not None else None
|
||||
return (input_shape[0],
|
||||
input_shape[1],
|
||||
self.size[0] * input_shape[2],
|
||||
self.size[1] * input_shape[3],
|
||||
self.size[2] * input_shape[4])
|
||||
dim1,
|
||||
dim2,
|
||||
dim3)
|
||||
elif self.dim_ordering == 'tf':
|
||||
dim1 = self.size[0] * input_shape[1] if input_shape[1] is not None else None
|
||||
dim2 = self.size[1] * input_shape[2] if input_shape[2] is not None else None
|
||||
dim3 = self.size[2] * input_shape[3] if input_shape[3] is not None else None
|
||||
return (input_shape[0],
|
||||
self.size[0] * input_shape[1],
|
||||
self.size[1] * input_shape[2],
|
||||
self.size[2] * input_shape[3],
|
||||
dim1,
|
||||
dim2,
|
||||
dim3,
|
||||
input_shape[4])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
@@ -1154,8 +1336,10 @@ class ZeroPadding2D(Layer):
|
||||
(samples, depth, first_padded_axis, second_padded_axis)
|
||||
'''
|
||||
|
||||
def __init__(self, padding=(1, 1), dim_ordering=K.image_dim_ordering(), **kwargs):
|
||||
def __init__(self, padding=(1, 1), dim_ordering='default', **kwargs):
|
||||
super(ZeroPadding2D, self).__init__(**kwargs)
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.padding = tuple(padding)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
self.dim_ordering = dim_ordering
|
||||
@@ -1192,8 +1376,6 @@ class ZeroPadding2D(Layer):
|
||||
class ZeroPadding3D(Layer):
|
||||
'''Zero-padding layer for 3D data (spatial or spatio-temporal).
|
||||
|
||||
Note: this layer will only work with Theano for the time being.
|
||||
|
||||
# Arguments
|
||||
padding: tuple of int (length 3)
|
||||
How many zeros to add at the beginning and end of
|
||||
@@ -1214,11 +1396,10 @@ class ZeroPadding3D(Layer):
|
||||
(samples, depth, first_padded_axis, second_padded_axis, third_axis_to_pad)
|
||||
'''
|
||||
|
||||
def __init__(self, padding=(1, 1, 1), dim_ordering=K.image_dim_ordering(), **kwargs):
|
||||
if K._BACKEND != 'theano':
|
||||
raise Exception(self.__class__.__name__ +
|
||||
' is currently only working with Theano backend.')
|
||||
def __init__(self, padding=(1, 1, 1), dim_ordering='default', **kwargs):
|
||||
super(ZeroPadding3D, self).__init__(**kwargs)
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.padding = tuple(padding)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
self.dim_ordering = dim_ordering
|
||||
@@ -1255,11 +1436,219 @@ class ZeroPadding3D(Layer):
|
||||
base_config = super(ZeroPadding3D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
class Cropping1D(Layer):
|
||||
'''Cropping layer for 1D input (e.g. temporal sequence).
|
||||
It crops along the time dimension (axis 1).
|
||||
|
||||
# Arguments
|
||||
cropping: tuple of int (length 2)
|
||||
How many units should be trimmed off at the beginning and end of
|
||||
the cropping dimension (axis 1).
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape (samples, axis_to_crop, features)
|
||||
|
||||
# Output shape
|
||||
3D tensor with shape (samples, cropped_axis, features)
|
||||
'''
|
||||
|
||||
def __init__(self, cropping=(1, 1), **kwargs):
|
||||
super(Cropping1D, self).__init__(**kwargs)
|
||||
self.cropping = tuple(cropping)
|
||||
assert len(self.cropping) == 2, 'cropping must be a tuple length of 2'
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
length = input_shape[1] - self.cropping[0] - self.cropping[1] if input_shape[1] is not None else None
|
||||
return (input_shape[0],
|
||||
length,
|
||||
input_shape[2])
|
||||
|
||||
def call(self, x, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
return x[:, self.cropping[0]:input_shape[1]-self.cropping[1], :]
|
||||
|
||||
def get_config(self):
|
||||
config = {'cropping': self.cropping}
|
||||
base_config = super(Cropping1D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
class Cropping2D(Layer):
|
||||
'''Cropping layer for 2D input (e.g. picture).
|
||||
It crops along spatial dimensions, i.e. width and height.
|
||||
|
||||
# Arguments
|
||||
cropping: tuple of tuple of int (length 2)
|
||||
How many units should be trimmed off at the beginning and end of
|
||||
the 2 cropping dimensions (width, height).
|
||||
dim_ordering: 'th' or 'tf'.
|
||||
In 'th' mode, the channels dimension (the depth)
|
||||
is at index 1, in 'tf' mode is it at index 3.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "th".
|
||||
|
||||
# Input shape
|
||||
4D tensor with shape:
|
||||
(samples, depth, first_axis_to_crop, second_axis_to_crop)
|
||||
|
||||
# Output shape
|
||||
4D tensor with shape:
|
||||
(samples, depth, first_cropped_axis, second_cropped_axis)
|
||||
|
||||
# Examples
|
||||
|
||||
```python
|
||||
# Crop the input 2D images or feature maps
|
||||
model = Sequential()
|
||||
model.add(Cropping2D(cropping=((2, 2), (4, 4)), input_shape=(3, 28, 28)))
|
||||
# now model.output_shape == (None, 3, 24, 20)
|
||||
model.add(Convolution2D(64, 3, 3, border_mode='same))
|
||||
model.add(Cropping2D(cropping=((2, 2), (2, 2))))
|
||||
# now model.output_shape == (None, 64, 20, 16)
|
||||
|
||||
```
|
||||
|
||||
'''
|
||||
|
||||
def __init__(self, cropping=((0, 0), (0, 0)), dim_ordering='default', **kwargs):
|
||||
super(Cropping2D, self).__init__(**kwargs)
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.cropping = tuple(cropping)
|
||||
assert len(self.cropping) == 2, 'cropping must be a tuple length of 2'
|
||||
assert len(self.cropping[0]) == 2, 'cropping[0] must be a tuple length of 2'
|
||||
assert len(self.cropping[1]) == 2, 'cropping[1] must be a tuple length of 2'
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=4)]
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0],
|
||||
input_shape[1],
|
||||
input_shape[2] - self.cropping[0][0] - self.cropping[0][1],
|
||||
input_shape[3] - self.cropping[1][0] - self.cropping[1][1])
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0],
|
||||
input_shape[1] - self.cropping[0][0] - self.cropping[0][1],
|
||||
input_shape[2] - self.cropping[1][0] - self.cropping[1][1],
|
||||
input_shape[3])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
if self.dim_ordering == 'th':
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:input_shape[2]-self.cropping[0][1],
|
||||
self.cropping[1][0]:input_shape[3]-self.cropping[1][1]]
|
||||
elif self.dim_ordering == 'tf':
|
||||
return x[:,
|
||||
self.cropping[0][0]:input_shape[1]-self.cropping[0][1],
|
||||
self.cropping[1][0]:input_shape[2]-self.cropping[1][1],
|
||||
:]
|
||||
|
||||
def get_config(self):
|
||||
config = {'cropping': self.cropping}
|
||||
base_config = super(Cropping2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
class Cropping3D(Layer):
|
||||
'''Cropping layer for 2D input (e.g. picture).
|
||||
|
||||
# Arguments
|
||||
cropping: tuple of tuple of int (length 3)
|
||||
How many units should be trimmed off at the beginning and end of
|
||||
the 3 cropping dimensions (kernel_dim1, kernel_dim2, kernerl_dim3).
|
||||
dim_ordering: 'th' or 'tf'.
|
||||
In 'th' mode, the channels dimension (the depth)
|
||||
is at index 1, in 'tf' mode is it at index 4.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "th".
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape:
|
||||
(samples, depth, first_axis_to_crop, second_axis_to_crop, third_axis_to_crop)
|
||||
|
||||
# Output shape
|
||||
5D tensor with shape:
|
||||
(samples, depth, first_cropped_axis, second_cropped_axis, third_cropped_axis)
|
||||
|
||||
'''
|
||||
|
||||
def __init__(self, cropping=((1, 1), (1, 1), (1, 1)), dim_ordering='default', **kwargs):
|
||||
super(Cropping3D, self).__init__(**kwargs)
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.cropping = tuple(cropping)
|
||||
assert len(self.cropping) == 3, 'cropping must be a tuple length of 3'
|
||||
assert len(self.cropping[0]) == 2, 'cropping[0] must be a tuple length of 2'
|
||||
assert len(self.cropping[1]) == 2, 'cropping[1] must be a tuple length of 2'
|
||||
assert len(self.cropping[2]) == 2, 'cropping[2] must be a tuple length of 2'
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=5)]
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
dim1 = input_shape[2] - self.cropping[0][0] - self.cropping[0][1] if input_shape[2] is not None else None
|
||||
dim2 = input_shape[3] - self.cropping[1][0] - self.cropping[1][1] if input_shape[3] is not None else None
|
||||
dim3 = input_shape[4] - self.cropping[2][0] - self.cropping[2][1] if input_shape[4] is not None else None
|
||||
return (input_shape[0],
|
||||
input_shape[1],
|
||||
dim1,
|
||||
dim2,
|
||||
dim3)
|
||||
elif self.dim_ordering == 'tf':
|
||||
dim1 = input_shape[1] - self.cropping[0][0] - self.cropping[0][1] if input_shape[1] is not None else None
|
||||
dim2 = input_shape[2] - self.cropping[1][0] - self.cropping[1][1] if input_shape[2] is not None else None
|
||||
dim3 = input_shape[3] - self.cropping[2][0] - self.cropping[2][1] if input_shape[3] is not None else None
|
||||
return (input_shape[0],
|
||||
dim1,
|
||||
dim2,
|
||||
dim3,
|
||||
input_shape[4])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
input_shape = self.input_spec[0].shape
|
||||
if self.dim_ordering == 'th':
|
||||
return x[:,
|
||||
:,
|
||||
self.cropping[0][0]:input_shape[2]-self.cropping[0][1],
|
||||
self.cropping[1][0]:input_shape[3]-self.cropping[1][1],
|
||||
self.cropping[2][0]:input_shape[4]-self.cropping[2][1]]
|
||||
elif self.dim_ordering == 'tf':
|
||||
return x[:,
|
||||
self.cropping[0][0]:input_shape[1]-self.cropping[0][1],
|
||||
self.cropping[1][0]:input_shape[2]-self.cropping[1][1],
|
||||
self.cropping[2][0]:input_shape[3]-self.cropping[2][1],
|
||||
:]
|
||||
|
||||
def get_config(self):
|
||||
config = {'cropping': self.cropping}
|
||||
base_config = super(Cropping3D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
# Aliases
|
||||
|
||||
Conv1D = Convolution1D
|
||||
Conv2D = Convolution2D
|
||||
Conv3D = Convolution3D
|
||||
Deconv2D = Deconvolution2D
|
||||
AtrousConv2D = AtrousConvolution2D
|
||||
SeparableConv2D = SeparableConvolution2D
|
||||
|
||||
+100
-1
@@ -82,9 +82,13 @@ class Dropout(Layer):
|
||||
self.supports_masking = True
|
||||
super(Dropout, self).__init__(**kwargs)
|
||||
|
||||
def _get_noise_shape(self, x):
|
||||
return None
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if 0. < self.p < 1.:
|
||||
x = K.in_train_phase(K.dropout(x, level=self.p), x)
|
||||
noise_shape = self._get_noise_shape(x)
|
||||
x = K.in_train_phase(K.dropout(x, self.p, noise_shape), x)
|
||||
return x
|
||||
|
||||
def get_config(self):
|
||||
@@ -93,6 +97,101 @@ class Dropout(Layer):
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class SpatialDropout2D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
entire 2D feature maps instead of individual elements. If adjacent pixels
|
||||
within feature maps are strongly correlated (as is normally the case in
|
||||
early convolution layers) then regular dropout will not regularize the
|
||||
activations and will otherwise just result in an effective learning rate
|
||||
decrease. In this case, SpatialDropout2D will help promote independence
|
||||
between feature maps and should be used instead.
|
||||
|
||||
# Arguments
|
||||
p: float between 0 and 1. Fraction of the input units to drop.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 3.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "th".
|
||||
|
||||
# Input shape
|
||||
4D tensor with shape:
|
||||
`(samples, channels, rows, cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, rows, cols, channels)` if dim_ordering='tf'.
|
||||
|
||||
# Output shape
|
||||
Same as input
|
||||
|
||||
# References
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/pdf/1411.4280.pdf)
|
||||
'''
|
||||
def __init__(self, p, dim_ordering='default', **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
self.dim_ordering = dim_ordering
|
||||
super(SpatialDropout2D, self).__init__(p, **kwargs)
|
||||
|
||||
def _get_noise_shape(self, x):
|
||||
input_shape = K.shape(x)
|
||||
if self.dim_ordering == 'th':
|
||||
noise_shape = (input_shape[0], input_shape[1], 1, 1)
|
||||
elif self.dim_ordering == 'tf':
|
||||
noise_shape = (input_shape[0], 1, 1, input_shape[3])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
return noise_shape
|
||||
|
||||
|
||||
class SpatialDropout3D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
entire 3D feature maps instead of individual elements. If adjacent voxels
|
||||
within feature maps are strongly correlated (as is normally the case in
|
||||
early convolution layers) then regular dropout will not regularize the
|
||||
activations and will otherwise just result in an effective learning rate
|
||||
decrease. In this case, SpatialDropout3D will help promote independence
|
||||
between feature maps and should be used instead.
|
||||
|
||||
# Arguments
|
||||
p: float between 0 and 1. Fraction of the input units to drop.
|
||||
dim_ordering: 'th' or 'tf'.
|
||||
In 'th' mode, the channels dimension (the depth)
|
||||
is at index 1, in 'tf' mode is it at index 4.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "th".
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape:
|
||||
`(samples, channels, dim1, dim2, dim3)` if dim_ordering='th'
|
||||
or 5D tensor with shape:
|
||||
`(samples, dim1, dim2, dim3, channels)` if dim_ordering='tf'.
|
||||
|
||||
# Output shape
|
||||
Same as input
|
||||
|
||||
# References
|
||||
- [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/pdf/1411.4280.pdf)
|
||||
'''
|
||||
def __init__(self, p, dim_ordering='default', **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
self.dim_ordering = dim_ordering
|
||||
super(SpatialDropout3D, self).__init__(p, **kwargs)
|
||||
|
||||
def _get_noise_shape(self, x):
|
||||
input_shape = K.shape(x)
|
||||
if self.dim_ordering == 'th':
|
||||
noise_shape = (input_shape[0], input_shape[1], 1, 1, 1)
|
||||
elif self.dim_ordering == 'tf':
|
||||
noise_shape = (input_shape[0], 1, 1, 1, input_shape[4])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
return noise_shape
|
||||
|
||||
|
||||
class Activation(Layer):
|
||||
'''Applies an activation function to an output.
|
||||
|
||||
|
||||
@@ -125,6 +125,8 @@ class Embedding(Layer):
|
||||
return (input_shape[0], input_length, self.output_dim)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if K.dtype(x) != 'int32':
|
||||
x = K.cast(x, 'int32')
|
||||
if 0. < self.dropout < 1.:
|
||||
retain_p = 1. - self.dropout
|
||||
B = K.random_binomial((self.input_dim,), p=retain_p) * (1. / retain_p)
|
||||
|
||||
+23
-16
@@ -8,14 +8,17 @@ from ..utils.np_utils import conv_output_length
|
||||
|
||||
|
||||
class LocallyConnected1D(Layer):
|
||||
'''LocallyConnected1D layer works almost the same as Convolution1D layer,
|
||||
except that weights are unshared, that is, a different set of filters is
|
||||
applied at each different patch of the input. When using this layer as the
|
||||
first layer in a model, either provide the keyword argument `input_dim`
|
||||
'''The `LocallyConnected1D` layer works similarly to
|
||||
the `Convolution1D` layer, except that weights are unshared,
|
||||
that is, a different set of filters is applied at each different patch
|
||||
of the input.
|
||||
When using this layer as the first layer in a model,
|
||||
either provide the keyword argument `input_dim`
|
||||
(int, e.g. 128 for sequences of 128-dimensional vectors), or `input_shape`
|
||||
(tuple of integers, e.g. (10, 128) for sequences of 10 vectors of
|
||||
128-dimensional vectors). Also, you will need to fix shape of the previous
|
||||
layer, since the weights can only be defined with determined output shape.
|
||||
(tuple of integers, e.g. `input_shape=(10, 128)`
|
||||
for sequences of 10 vectors of 128-dimensional vectors).
|
||||
Also, note that this layer can only be used with
|
||||
a fully-specified input shape (`None` dimensions not allowed).
|
||||
|
||||
# Example
|
||||
```python
|
||||
@@ -75,7 +78,7 @@ class LocallyConnected1D(Layer):
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
if border_mode != 'valid':
|
||||
raise Exception('Invalid border mode for Convolution2D '
|
||||
raise Exception('Invalid border mode for LocallyConnected1D '
|
||||
'(only "valid" is supported):', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.filter_length = filter_length
|
||||
@@ -180,14 +183,16 @@ class LocallyConnected1D(Layer):
|
||||
|
||||
|
||||
class LocallyConnected2D(Layer):
|
||||
'''LocallyConnected2D layer works almost the same as Convolution2D layer,
|
||||
except that weights are unshared, that is, a different set of filters is
|
||||
applied at each different patch of the input. When using this layer as the
|
||||
'''The `LocallyConnected2D` layer works similarly
|
||||
to the `Convolution2D` layer, except that weights are unshared,
|
||||
that is, a different set of filters is applied at each
|
||||
different patch of the input.
|
||||
When using this layer as the
|
||||
first layer in a model, provide the keyword argument `input_shape` (tuple
|
||||
of integers, does not include the sample axis), e.g.
|
||||
`input_shape=(3, 128, 128)` for 128x128 RGB pictures. Also, you will need
|
||||
to fix shape of the previous layer, since the weights can only be defined
|
||||
with determined output shape.
|
||||
`input_shape=(3, 128, 128)` for 128x128 RGB pictures.
|
||||
Also, note that this layer can only be used with
|
||||
a fully-specified input shape (`None` dimensions not allowed).
|
||||
|
||||
# Examples
|
||||
```python
|
||||
@@ -251,12 +256,14 @@ class LocallyConnected2D(Layer):
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if border_mode != 'valid':
|
||||
raise Exception('Invalid border mode for Convolution2D '
|
||||
raise Exception('Invalid border mode for LocallyConnected2D '
|
||||
'(only "valid" is supported):', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.nb_row = nb_row
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
from ..engine import Layer
|
||||
from .. import backend as K
|
||||
import numpy as np
|
||||
|
||||
|
||||
class GaussianNoise(Layer):
|
||||
@@ -71,7 +72,7 @@ class GaussianDropout(Layer):
|
||||
def call(self, x, mask=None):
|
||||
if 0 < self.p < 1:
|
||||
noise_x = x * K.random_normal(shape=K.shape(x), mean=1.0,
|
||||
std=K.sqrt(self.p / (1.0 - self.p)))
|
||||
std=np.sqrt(self.p / (1.0 - self.p)))
|
||||
return K.in_train_phase(noise_x, x)
|
||||
return x
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ class BatchNormalization(Layer):
|
||||
weights: Initialization weights.
|
||||
List of 2 Numpy arrays, with shapes:
|
||||
`[(input_shape,), (input_shape,)]`
|
||||
Note that the order of this list is [gamma, beta, mean, std]
|
||||
beta_init: name of initialization function for shift parameter
|
||||
(see [initializations](../initializations.md)), or alternatively,
|
||||
Theano/TensorFlow function to use for weights initialization.
|
||||
@@ -55,7 +56,7 @@ class BatchNormalization(Layer):
|
||||
# References
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](http://jmlr.org/proceedings/papers/v37/ioffe15.html)
|
||||
'''
|
||||
def __init__(self, epsilon=1e-6, mode=0, axis=-1, momentum=0.9,
|
||||
def __init__(self, epsilon=1e-5, mode=0, axis=-1, momentum=0.99,
|
||||
weights=None, beta_init='zero', gamma_init='one', **kwargs):
|
||||
self.supports_masking = True
|
||||
self.beta_init = initializations.get(beta_init)
|
||||
@@ -99,17 +100,10 @@ class BatchNormalization(Layer):
|
||||
broadcast_shape = [1] * len(input_shape)
|
||||
broadcast_shape[self.axis] = input_shape[self.axis]
|
||||
|
||||
# # case: train mode (uses stats of the current batch)
|
||||
# mean = K.mean(x, axis=reduction_axes)
|
||||
# brodcast_mean = K.reshape(mean, broadcast_shape)
|
||||
# std = K.mean(K.square(x - brodcast_mean) + self.epsilon, axis=reduction_axes)
|
||||
# std = K.sqrt(std)
|
||||
# brodcast_std = K.reshape(std, broadcast_shape)
|
||||
|
||||
if self.mode == 2:
|
||||
x_normed, mean, std = K.normalize_batch_in_training(x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon)
|
||||
mean_update = self.momentum * self.running_mean + (1 - self.momentum) * mean
|
||||
std_update = self.momentum * self.running_std + (1 - self.momentum) * std
|
||||
x_normed, mean, std = K.normalize_batch_in_training(
|
||||
x, self.gamma, self.beta, reduction_axes,
|
||||
epsilon=self.epsilon)
|
||||
else:
|
||||
# mode 0
|
||||
if self.called_with not in {None, x}:
|
||||
@@ -123,29 +117,28 @@ class BatchNormalization(Layer):
|
||||
'(see docs for a description of '
|
||||
'the behavior).')
|
||||
self.called_with = x
|
||||
x_normed, mean, std = K.normalize_batch_in_training(x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon)
|
||||
mean_update = self.momentum * self.running_mean + (1 - self.momentum) * mean
|
||||
std_update = self.momentum * self.running_std + (1 - self.momentum) * std
|
||||
self.updates = [(self.running_mean, mean_update),
|
||||
(self.running_std, std_update)]
|
||||
x_normed, mean, std = K.normalize_batch_in_training(
|
||||
x, self.gamma, self.beta, reduction_axes,
|
||||
epsilon=self.epsilon)
|
||||
|
||||
self.updates = [K.moving_average_update(self.running_mean, mean, self.momentum),
|
||||
K.moving_average_update(self.running_std, std, self.momentum)]
|
||||
|
||||
if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
|
||||
x_normed_running = K.batch_normalization(x, self.running_mean,
|
||||
self.running_std,
|
||||
self.beta,
|
||||
self.gamma,
|
||||
epsilon=self.epsilon)
|
||||
x_normed_running = K.batch_normalization(
|
||||
x, self.running_mean, self.running_std,
|
||||
self.beta, self.gamma,
|
||||
epsilon=self.epsilon)
|
||||
else:
|
||||
# need broadcasting
|
||||
broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape)
|
||||
broadcast_running_std = K.reshape(self.running_std, broadcast_shape)
|
||||
broadcast_beta = K.reshape(self.beta, broadcast_shape)
|
||||
broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
|
||||
x_normed_running = K.batch_normalization(x, broadcast_running_mean,
|
||||
broadcast_running_std,
|
||||
broadcast_beta,
|
||||
broadcast_gamma,
|
||||
epsilon=self.epsilon)
|
||||
x_normed_running = K.batch_normalization(
|
||||
x, broadcast_running_mean, broadcast_running_std,
|
||||
broadcast_beta, broadcast_gamma,
|
||||
epsilon=self.epsilon)
|
||||
|
||||
# pick the normalized form of x corresponding to the training phase
|
||||
x_normed = K.in_train_phase(x_normed, x_normed_running)
|
||||
|
||||
@@ -61,8 +61,9 @@ class MaxPooling1D(_Pooling1D):
|
||||
3D tensor with shape: `(samples, downsampled_steps, features)`.
|
||||
|
||||
# Arguments
|
||||
pool_length: factor by which to downscale. 2 will halve the input.
|
||||
stride: integer, or None. Stride value.
|
||||
pool_length: size of the region to which max pooling is applied
|
||||
stride: integer, or None. factor by which to downscale.
|
||||
2 will halve the input.
|
||||
If None, it will default to `pool_length`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
@@ -114,8 +115,10 @@ class _Pooling2D(Layer):
|
||||
'''
|
||||
|
||||
def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering=K.image_dim_ordering(), **kwargs):
|
||||
dim_ordering='default', **kwargs):
|
||||
super(_Pooling2D, self).__init__(**kwargs)
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.pool_size = tuple(pool_size)
|
||||
if strides is None:
|
||||
strides = self.pool_size
|
||||
@@ -199,7 +202,7 @@ class MaxPooling2D(_Pooling2D):
|
||||
'''
|
||||
|
||||
def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering=K.image_dim_ordering(), **kwargs):
|
||||
dim_ordering='default', **kwargs):
|
||||
super(MaxPooling2D, self).__init__(pool_size, strides, border_mode,
|
||||
dim_ordering, **kwargs)
|
||||
|
||||
@@ -241,7 +244,7 @@ class AveragePooling2D(_Pooling2D):
|
||||
'''
|
||||
|
||||
def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering=K.image_dim_ordering(), **kwargs):
|
||||
dim_ordering='default', **kwargs):
|
||||
super(AveragePooling2D, self).__init__(pool_size, strides, border_mode,
|
||||
dim_ordering, **kwargs)
|
||||
|
||||
@@ -257,8 +260,10 @@ class _Pooling3D(Layer):
|
||||
'''
|
||||
|
||||
def __init__(self, pool_size=(2, 2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering=K.image_dim_ordering(), **kwargs):
|
||||
dim_ordering='default', **kwargs):
|
||||
super(_Pooling3D, self).__init__(**kwargs)
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.pool_size = tuple(pool_size)
|
||||
if strides is None:
|
||||
strides = self.pool_size
|
||||
@@ -344,7 +349,7 @@ class MaxPooling3D(_Pooling3D):
|
||||
'''
|
||||
|
||||
def __init__(self, pool_size=(2, 2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering=K.image_dim_ordering(), **kwargs):
|
||||
dim_ordering='default', **kwargs):
|
||||
super(MaxPooling3D, self).__init__(pool_size, strides, border_mode,
|
||||
dim_ordering, **kwargs)
|
||||
|
||||
@@ -384,7 +389,7 @@ class AveragePooling3D(_Pooling3D):
|
||||
'''
|
||||
|
||||
def __init__(self, pool_size=(2, 2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering=K.image_dim_ordering(), **kwargs):
|
||||
dim_ordering='default', **kwargs):
|
||||
super(AveragePooling3D, self).__init__(pool_size, strides, border_mode,
|
||||
dim_ordering, **kwargs)
|
||||
|
||||
|
||||
+17
-37
@@ -12,13 +12,10 @@ def time_distributed_dense(x, w, b=None, dropout=None,
|
||||
'''Apply y.w + b for every temporal slice y of x.
|
||||
'''
|
||||
if not input_dim:
|
||||
# won't work with TensorFlow
|
||||
input_dim = K.shape(x)[2]
|
||||
if not timesteps:
|
||||
# won't work with TensorFlow
|
||||
timesteps = K.shape(x)[1]
|
||||
if not output_dim:
|
||||
# won't work with TensorFlow
|
||||
output_dim = K.shape(w)[1]
|
||||
|
||||
if dropout is not None and 0. < dropout < 1.:
|
||||
@@ -30,12 +27,13 @@ def time_distributed_dense(x, w, b=None, dropout=None,
|
||||
|
||||
# collapse time dimension and batch dimension together
|
||||
x = K.reshape(x, (-1, input_dim))
|
||||
|
||||
x = K.dot(x, w)
|
||||
if b:
|
||||
x = x + b
|
||||
# reshape to 3D tensor
|
||||
x = K.reshape(x, (-1, timesteps, output_dim))
|
||||
x = K.reshape(x, K.pack([-1, timesteps, output_dim]))
|
||||
if K.backend() == 'tensorflow':
|
||||
x.set_shape([None, None, output_dim])
|
||||
return x
|
||||
|
||||
|
||||
@@ -120,14 +118,10 @@ class Recurrent(Layer):
|
||||
use an [Embedding](embeddings.md) layer with the `mask_zero` parameter
|
||||
set to `True`.
|
||||
|
||||
# TensorFlow warning
|
||||
For the time being, when using the TensorFlow backend,
|
||||
the number of timesteps used must be specified in your model.
|
||||
Make sure to pass an `input_length` int argument to your
|
||||
recurrent layer (if it comes first in your model),
|
||||
or to pass a complete `input_shape` argument to the first layer
|
||||
in your model otherwise.
|
||||
|
||||
# Note on performance
|
||||
You are likely to see better performance with RNNs in Theano compared
|
||||
to TensorFlow. Additionally, when using TensorFlow, it is often
|
||||
preferable to set `unroll=True` for better performance.
|
||||
|
||||
# Note on using statefulness in RNNs
|
||||
You can set RNN layers to be 'stateful', which means that the states
|
||||
@@ -139,16 +133,15 @@ class Recurrent(Layer):
|
||||
To enable statefulness:
|
||||
- specify `stateful=True` in the layer constructor.
|
||||
- specify a fixed batch size for your model, by passing
|
||||
a `batch_input_shape=(...)` to the first layer in your model.
|
||||
if sequential model:
|
||||
a `batch_input_shape=(...)` to the first layer in your model.
|
||||
else for functional model with 1 or more Input layers:
|
||||
a `batch_shape=(...)` to all the first layers in your model.
|
||||
This is the expected shape of your inputs *including the batch size*.
|
||||
It should be a tuple of integers, e.g. `(32, 10, 100)`.
|
||||
|
||||
To reset the states of your model, call `.reset_states()` on either
|
||||
a specific layer, or on your entire model.
|
||||
|
||||
# Note on using dropout with TensorFlow
|
||||
When using the TensorFlow backend, specify a fixed batch size for your model
|
||||
following the notes on statefulness RNNs.
|
||||
'''
|
||||
def __init__(self, weights=None,
|
||||
return_sequences=False, go_backwards=False, stateful=False,
|
||||
@@ -204,19 +197,6 @@ class Recurrent(Layer):
|
||||
# note that the .build() method of subclasses MUST define
|
||||
# self.input_spec with a complete input shape.
|
||||
input_shape = self.input_spec[0].shape
|
||||
if K._BACKEND == 'tensorflow':
|
||||
if not input_shape[1]:
|
||||
raise Exception('When using TensorFlow, you should define '
|
||||
'explicitly the number of timesteps of '
|
||||
'your sequences.\n'
|
||||
'If your first layer is an Embedding, '
|
||||
'make sure to pass it an "input_length" '
|
||||
'argument. Otherwise, make sure '
|
||||
'the first layer has '
|
||||
'an "input_shape" or "batch_input_shape" '
|
||||
'argument, including the time axis. '
|
||||
'Found input shape at layer ' + self.name +
|
||||
': ' + str(input_shape))
|
||||
if self.stateful:
|
||||
initial_states = self.states
|
||||
else:
|
||||
@@ -372,7 +352,7 @@ class SimpleRNN(Recurrent):
|
||||
constants = []
|
||||
if 0 < self.dropout_U < 1:
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.concatenate([ones] * self.output_dim, 1)
|
||||
ones = K.tile(ones, (1, self.output_dim))
|
||||
B_U = K.in_train_phase(K.dropout(ones, self.dropout_U), ones)
|
||||
constants.append(B_U)
|
||||
else:
|
||||
@@ -381,7 +361,7 @@ class SimpleRNN(Recurrent):
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.concatenate([ones] * input_dim, 1)
|
||||
ones = K.tile(ones, (1, input_dim))
|
||||
B_W = K.in_train_phase(K.dropout(ones, self.dropout_W), ones)
|
||||
constants.append(B_W)
|
||||
else:
|
||||
@@ -585,7 +565,7 @@ class GRU(Recurrent):
|
||||
constants = []
|
||||
if 0 < self.dropout_U < 1:
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.concatenate([ones] * self.output_dim, 1)
|
||||
ones = K.tile(ones, (1, self.output_dim))
|
||||
B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(3)]
|
||||
constants.append(B_U)
|
||||
else:
|
||||
@@ -595,7 +575,7 @@ class GRU(Recurrent):
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.concatenate([ones] * input_dim, 1)
|
||||
ones = K.tile(ones, (1, input_dim))
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3)]
|
||||
constants.append(B_W)
|
||||
else:
|
||||
@@ -825,7 +805,7 @@ class LSTM(Recurrent):
|
||||
constants = []
|
||||
if 0 < self.dropout_U < 1:
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.concatenate([ones] * self.output_dim, 1)
|
||||
ones = K.tile(ones, (1, self.output_dim))
|
||||
B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(4)]
|
||||
constants.append(B_U)
|
||||
else:
|
||||
@@ -835,7 +815,7 @@ class LSTM(Recurrent):
|
||||
input_shape = self.input_spec[0].shape
|
||||
input_dim = input_shape[-1]
|
||||
ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
|
||||
ones = K.concatenate([ones] * input_dim, 1)
|
||||
ones = K.tile(ones, (1, input_dim))
|
||||
B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(4)]
|
||||
constants.append(B_W)
|
||||
else:
|
||||
|
||||
@@ -133,3 +133,130 @@ class TimeDistributed(Wrapper):
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
y = K.reshape(y, (-1, input_length) + output_shape[2:])
|
||||
return y
|
||||
|
||||
|
||||
class Bidirectional(Wrapper):
|
||||
''' Bidirectional wrapper for RNNs
|
||||
|
||||
# Arguments:
|
||||
layer: `Recurrent` instance.
|
||||
merge_mode: Mode by which outputs of the forward and backward RNNs will be combined. One of {'sum', 'mul', 'concat', 'ave', None}. If None, the outputs will not be combined, they will be returned as a list.
|
||||
|
||||
# Examples:
|
||||
```python
|
||||
model = Sequential()
|
||||
model.add(Bidirectional(LSTM(10, return_sequences=True), input_shape=(5, 10)))
|
||||
model.add(Bidirectional(LSTM(10)))
|
||||
model.add(Dense(5))
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
```
|
||||
'''
|
||||
def __init__(self, layer, merge_mode='concat', weights=None, **kwargs):
|
||||
if merge_mode not in ['sum', 'mul', 'ave', 'concat', None]:
|
||||
raise ValueError('Invalid merge mode. '
|
||||
'Merge mode should be one of '
|
||||
'{"sum", "mul", "ave", "concat", None}')
|
||||
self.forward_layer = layer
|
||||
config = layer.get_config()
|
||||
config['go_backwards'] = not config['go_backwards']
|
||||
self.backward_layer = layer.__class__.from_config(config)
|
||||
self.forward_layer.name = 'forward_' + self.forward_layer.name
|
||||
self.backward_layer.name = 'backward_' + self.backward_layer.name
|
||||
self.merge_mode = merge_mode
|
||||
if weights:
|
||||
nw = len(weights)
|
||||
self.forward_layer.initial_weights = weights[:nw // 2]
|
||||
self.backward_layer.initial_weights = weights[nw // 2:]
|
||||
self.stateful = layer.stateful
|
||||
self.return_sequences = layer.return_sequences
|
||||
self.supports_masking = True
|
||||
super(Bidirectional, self).__init__(layer, **kwargs)
|
||||
|
||||
def get_weights(self):
|
||||
return self.forward_layer.get_weights() + self.backward_layer.get_weights()
|
||||
|
||||
def set_weights(self, weights):
|
||||
nw = len(weights)
|
||||
self.forward_layer.set_weights(weights[:nw // 2])
|
||||
self.backward_layer.set_weights(weights[nw // 2:])
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.merge_mode in ['sum', 'ave', 'mul']:
|
||||
return self.forward_layer.get_output_shape_for(input_shape)
|
||||
elif self.merge_mode == 'concat':
|
||||
shape = list(self.forward_layer.get_output_shape_for(input_shape))
|
||||
shape[-1] *= 2
|
||||
return tuple(shape)
|
||||
elif self.merge_mode is None:
|
||||
return [self.forward_layer.get_output_shape_for(input_shape)] * 2
|
||||
|
||||
def call(self, X, mask=None):
|
||||
Y = self.forward_layer.call(X, mask)
|
||||
Y_rev = self.backward_layer.call(X, mask)
|
||||
if self.return_sequences:
|
||||
Y_rev = K.reverse(Y_rev, 1)
|
||||
if self.merge_mode == 'concat':
|
||||
return K.concatenate([Y, Y_rev])
|
||||
elif self.merge_mode == 'sum':
|
||||
return Y + Y_rev
|
||||
elif self.merge_mode == 'ave':
|
||||
return (Y + Y_rev) / 2
|
||||
elif self.merge_mode == 'mul':
|
||||
return Y * Y_rev
|
||||
elif self.merge_mode is None:
|
||||
return [Y, Y_rev]
|
||||
|
||||
def reset_states(self):
|
||||
self.forward_layer.reset_states()
|
||||
self.backward_layer.reset_states()
|
||||
|
||||
def build(self, input_shape):
|
||||
self.forward_layer.build(input_shape)
|
||||
self.backward_layer.build(input_shape)
|
||||
|
||||
def compute_mask(self, input, mask):
|
||||
if self.return_sequences:
|
||||
if not self.merge_mode:
|
||||
return [mask, mask]
|
||||
else:
|
||||
return mask
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def trainable_weights(self):
|
||||
if hasattr(self.forward_layer, 'trainable_weights'):
|
||||
return self.forward_layer.trainable_weights + self.backward_layer.trainable_weights
|
||||
return []
|
||||
|
||||
@property
|
||||
def non_trainable_weights(self):
|
||||
if hasattr(self.forward_layer, 'non_trainable_weights'):
|
||||
return self.forward_layer.non_trainable_weights + self.backward_layer.non_trainable_weights
|
||||
return []
|
||||
|
||||
@property
|
||||
def updates(self):
|
||||
if hasattr(self.forward_layer, 'updates'):
|
||||
return self.forward_layer.updates + self.backward_layer.updates
|
||||
return []
|
||||
|
||||
@property
|
||||
def regularizers(self):
|
||||
if hasattr(self.forward_layer, 'regularizers'):
|
||||
return self.forward_layer.regularizers + self.backward_layer.regularizers
|
||||
return []
|
||||
|
||||
@property
|
||||
def constraints(self):
|
||||
_constraints = {}
|
||||
if hasattr(self.forward_layer, 'constraints'):
|
||||
_constraints.update(self.forward_layer.constraints)
|
||||
_constraints.update(self.backward_layer.constraints)
|
||||
return _constraints
|
||||
|
||||
def get_config(self):
|
||||
config = {"merge_mode": self.merge_mode}
|
||||
base_config = super(Bidirectional, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
+194
-3
@@ -1,13 +1,174 @@
|
||||
from __future__ import print_function
|
||||
import warnings
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
from . import backend as K
|
||||
from .utils.io_utils import ask_to_proceed_with_overwrite
|
||||
from .engine.training import Model
|
||||
from .engine.topology import get_source_inputs, Node
|
||||
from .optimizers import optimizer_from_config
|
||||
from .legacy.models import Graph
|
||||
|
||||
|
||||
def save_model(model, filepath, overwrite=True):
|
||||
|
||||
def get_json_type(obj):
|
||||
# if obj is a serializable Keras class instance
|
||||
# e.g. optimizer, layer
|
||||
if hasattr(obj, 'get_config'):
|
||||
return {'class_name': obj.__class__.__name__,
|
||||
'config': obj.get_config()}
|
||||
|
||||
# if obj is any numpy type
|
||||
if type(obj).__module__ == np.__name__:
|
||||
return obj.item()
|
||||
|
||||
# misc functions (e.g. loss function)
|
||||
if hasattr(obj, '__call__'):
|
||||
return obj.__name__
|
||||
|
||||
# if obj is a python 'type'
|
||||
if type(obj).__name__ == type.__name__:
|
||||
return obj.__name__
|
||||
|
||||
raise TypeError('Not JSON Serializable:', obj)
|
||||
|
||||
import h5py
|
||||
from keras import __version__ as keras_version
|
||||
|
||||
# if file exists and should not be overwritten
|
||||
if not overwrite and os.path.isfile(filepath):
|
||||
proceed = ask_to_proceed_with_overwrite(filepath)
|
||||
if not proceed:
|
||||
return
|
||||
|
||||
f = h5py.File(filepath, 'w')
|
||||
f.attrs['keras_version'] = str(keras_version).encode('utf8')
|
||||
f.attrs['model_config'] = json.dumps({
|
||||
'class_name': model.__class__.__name__,
|
||||
'config': model.get_config()
|
||||
}, default=get_json_type).encode('utf8')
|
||||
|
||||
model_weights_group = f.create_group('model_weights')
|
||||
model.save_weights_to_hdf5_group(model_weights_group)
|
||||
|
||||
if hasattr(model, 'optimizer'):
|
||||
f.attrs['training_config'] = json.dumps({
|
||||
'optimizer_config': {
|
||||
'class_name': model.optimizer.__class__.__name__,
|
||||
'config': model.optimizer.get_config()
|
||||
},
|
||||
'loss': model.loss,
|
||||
'metrics': model.metrics,
|
||||
'sample_weight_mode': model.sample_weight_mode,
|
||||
'loss_weights': model.loss_weights,
|
||||
}, default=get_json_type).encode('utf8')
|
||||
|
||||
# save optimizer weights
|
||||
symbolic_weights = getattr(model.optimizer, 'weights')
|
||||
if symbolic_weights:
|
||||
optimizer_weights_group = f.create_group('optimizer_weights')
|
||||
weight_values = K.batch_get_value(symbolic_weights)
|
||||
weight_names = []
|
||||
for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
|
||||
if hasattr(w, 'name') and w.name:
|
||||
name = str(w.name)
|
||||
else:
|
||||
name = 'param_' + str(i)
|
||||
weight_names.append(name.encode('utf8'))
|
||||
optimizer_weights_group.attrs['weight_names'] = weight_names
|
||||
for name, val in zip(weight_names, weight_values):
|
||||
param_dset = optimizer_weights_group.create_dataset(
|
||||
name,
|
||||
val.shape,
|
||||
dtype=val.dtype)
|
||||
if not val.shape:
|
||||
# scalar
|
||||
param_dset[()] = val
|
||||
else:
|
||||
param_dset[:] = val
|
||||
f.flush()
|
||||
f.close()
|
||||
|
||||
|
||||
def load_model(filepath, custom_objects={}):
|
||||
|
||||
def deserialize(obj):
|
||||
if type(obj) is list:
|
||||
deserialized = []
|
||||
for value in obj:
|
||||
if value in custom_objects:
|
||||
deserialized.append(custom_objects[value])
|
||||
else:
|
||||
deserialized.append(value)
|
||||
return deserialized
|
||||
if type(obj) is dict:
|
||||
deserialized = {}
|
||||
for key, value in obj.items():
|
||||
if value in custom_objects:
|
||||
deserialized[key] = custom_objects[value]
|
||||
else:
|
||||
deserialized[key] = value
|
||||
return deserialized
|
||||
if obj in custom_objects:
|
||||
return custom_objects[obj]
|
||||
return obj
|
||||
|
||||
import h5py
|
||||
f = h5py.File(filepath, mode='r')
|
||||
|
||||
# instantiate model
|
||||
model_config = f.attrs.get('model_config')
|
||||
if model_config is None:
|
||||
raise ValueError('No model found in config file.')
|
||||
model_config = json.loads(model_config.decode('utf-8'))
|
||||
model = model_from_config(model_config, custom_objects=custom_objects)
|
||||
|
||||
# set weights
|
||||
model.load_weights_from_hdf5_group(f['model_weights'])
|
||||
|
||||
# instantiate optimizer
|
||||
training_config = f.attrs.get('training_config')
|
||||
if training_config is None:
|
||||
warnings.warn('No training configuration found in save file: '
|
||||
'the model was *not* compiled. Compile it manually.')
|
||||
f.close()
|
||||
return model
|
||||
training_config = json.loads(training_config.decode('utf-8'))
|
||||
optimizer_config = training_config['optimizer_config']
|
||||
optimizer = optimizer_from_config(optimizer_config)
|
||||
|
||||
# recover loss functions and metrics
|
||||
loss = deserialize(training_config['loss'])
|
||||
metrics = deserialize(training_config['metrics'])
|
||||
sample_weight_mode = training_config['sample_weight_mode']
|
||||
loss_weights = training_config['loss_weights']
|
||||
|
||||
# compile model
|
||||
model.compile(optimizer=optimizer,
|
||||
loss=loss,
|
||||
metrics=metrics,
|
||||
loss_weights=loss_weights,
|
||||
sample_weight_mode=sample_weight_mode)
|
||||
|
||||
# set optimizer weights
|
||||
if 'optimizer_weights' in f:
|
||||
# build train function (to get weight updates)
|
||||
if model.__class__.__name__ == 'Sequential':
|
||||
model.model._make_train_function()
|
||||
else:
|
||||
model._make_train_function()
|
||||
optimizer_weights_group = f['optimizer_weights']
|
||||
optimizer_weight_names = [n.decode('utf8') for n in optimizer_weights_group.attrs['weight_names']]
|
||||
optimizer_weight_values = [optimizer_weights_group[n] for n in optimizer_weight_names]
|
||||
model.optimizer.set_weights(optimizer_weight_values)
|
||||
f.close()
|
||||
return model
|
||||
|
||||
|
||||
def model_from_config(config, custom_objects={}):
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
if isinstance(config, list):
|
||||
@@ -77,6 +238,7 @@ class Sequential(Model):
|
||||
self.model = None # internal Model instance
|
||||
self.inputs = [] # tensors
|
||||
self.outputs = [] # tensors (length 1)
|
||||
self.trainable = True
|
||||
|
||||
# model attributes
|
||||
self.inbound_nodes = []
|
||||
@@ -172,7 +334,27 @@ class Sequential(Model):
|
||||
else:
|
||||
self.layers[-1].outbound_nodes = []
|
||||
self.outputs = [self.layers[-1].output]
|
||||
# update self.inbound_nodes
|
||||
self.inbound_nodes[0].output_tensors = self.outputs
|
||||
self.inbound_nodes[0].output_shapes = [self.outputs[0]._keras_shape]
|
||||
self.built = False
|
||||
self._flattened_layers = None
|
||||
|
||||
def get_layer(self, name=None, index=None):
|
||||
'''Returns a layer based on either its name (unique)
|
||||
or its index in the graph. Indices are based on
|
||||
order of horizontal graph traversal (bottom-up).
|
||||
|
||||
# Arguments
|
||||
name: string, name of layer.
|
||||
index: integer, index of layer.
|
||||
|
||||
# Returns
|
||||
A layer instance.
|
||||
'''
|
||||
if not self.built:
|
||||
self.build()
|
||||
return self.model.get_layer(name, index)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if not self.built:
|
||||
@@ -257,13 +439,19 @@ class Sequential(Model):
|
||||
|
||||
@property
|
||||
def trainable_weights(self):
|
||||
if not self.trainable:
|
||||
return []
|
||||
# support for legacy behavior
|
||||
return self._gather_list_attr('trainable_weights')
|
||||
|
||||
@property
|
||||
def non_trainable_weights(self):
|
||||
# support for legacy behavior
|
||||
return self._gather_list_attr('non_trainable_weights')
|
||||
weights = self._gather_list_attr('non_trainable_weights')
|
||||
if not self.trainable:
|
||||
trainable_weights = self._gather_list_attr('trainable_weights')
|
||||
return trainable_weights + weights
|
||||
return weights
|
||||
|
||||
@property
|
||||
def updates(self):
|
||||
@@ -303,7 +491,7 @@ class Sequential(Model):
|
||||
'''
|
||||
# support for legacy behavior
|
||||
for layer in self.flattened_layers:
|
||||
nb_param = len(layer.get_weights())
|
||||
nb_param = len(layer.weights)
|
||||
layer.set_weights(weights[:nb_param])
|
||||
weights = weights[nb_param:]
|
||||
|
||||
@@ -359,6 +547,9 @@ class Sequential(Model):
|
||||
**kwargs)
|
||||
self.optimizer = self.model.optimizer
|
||||
self.loss = self.model.loss
|
||||
self.loss_weights = self.model.loss_weights
|
||||
self.metrics = self.model.metrics
|
||||
self.metrics_tensors = self.model.metrics_tensors
|
||||
self.metrics_names = self.model.metrics_names
|
||||
self.sample_weight_mode = self.model.sample_weight_mode
|
||||
|
||||
@@ -627,7 +818,7 @@ class Sequential(Model):
|
||||
max_q_size: maximum size for the generator queue
|
||||
nb_worker: maximum number of processes to spin up
|
||||
pickle_safe: if True, use process based threading. Note that because
|
||||
this implementation relies on multiprocessing, you should not pass non
|
||||
this implementation relies on multiprocessing, you should not pass
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
|
||||
|
||||
+86
-60
@@ -1,6 +1,5 @@
|
||||
from __future__ import absolute_import
|
||||
from . import backend as K
|
||||
import numpy as np
|
||||
from .utils.generic_utils import get_from_module
|
||||
from six.moves import zip
|
||||
|
||||
@@ -11,8 +10,24 @@ def clip_norm(g, c, n):
|
||||
return g
|
||||
|
||||
|
||||
def kl_divergence(p, p_hat):
|
||||
return p_hat - p + p * K.log(p / p_hat)
|
||||
def optimizer_from_config(config, custom_objects={}):
|
||||
all_classes = {
|
||||
'sgd': SGD,
|
||||
'rmsprop': RMSprop,
|
||||
'adagrad': Adagrad,
|
||||
'adadelta': Adadelta,
|
||||
'adam': Adam,
|
||||
'adamax': Adamax,
|
||||
'nadam': Nadam,
|
||||
}
|
||||
class_name = config['class_name']
|
||||
if class_name in custom_objects:
|
||||
cls = custom_objects[class_name]
|
||||
else:
|
||||
if class_name.lower() not in all_classes:
|
||||
raise ValueError('Optimizer class not found:', class_name)
|
||||
cls = all_classes[class_name.lower()]
|
||||
return cls.from_config(config['config'])
|
||||
|
||||
|
||||
class Optimizer(object):
|
||||
@@ -72,35 +87,35 @@ class Optimizer(object):
|
||||
output of `get_weights`).
|
||||
'''
|
||||
params = self.weights
|
||||
if len(params) != len(weights):
|
||||
raise Exception('Provided weight array does not match weights (' +
|
||||
str(len(params)) + ' optimizer params vs. ' +
|
||||
str(len(weights)) + ' provided weights)')
|
||||
for p, w in zip(params, weights):
|
||||
if K.get_value(p).shape != w.shape:
|
||||
weight_value_tuples = []
|
||||
param_values = K.batch_get_value(params)
|
||||
for pv, p, w in zip(param_values, params, weights):
|
||||
if pv.shape != w.shape:
|
||||
raise Exception('Optimizer weight shape ' +
|
||||
str(K.get_value(p).shape) +
|
||||
str(pv.shape) +
|
||||
' not compatible with '
|
||||
'provided weight shape ' + str(w.shape))
|
||||
K.set_value(p, w)
|
||||
weight_value_tuples.append((p, w))
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
|
||||
def get_weights(self):
|
||||
'''Returns the current weights of the optimizer,
|
||||
as a list of numpy arrays.
|
||||
'''
|
||||
weights = []
|
||||
for p in self.weights:
|
||||
weights.append(K.get_value(p))
|
||||
return weights
|
||||
return K.batch_get_value(self.weights)
|
||||
|
||||
def get_config(self):
|
||||
config = {'name': self.__class__.__name__}
|
||||
config = {}
|
||||
if hasattr(self, 'clipnorm'):
|
||||
config['clipnorm'] = self.clipnorm
|
||||
if hasattr(self, 'clipvalue'):
|
||||
config['clipvalue'] = self.clipvalue
|
||||
return config
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config):
|
||||
return cls(**config)
|
||||
|
||||
|
||||
class SGD(Optimizer):
|
||||
'''Stochastic gradient descent, with support for momentum,
|
||||
@@ -124,13 +139,15 @@ class SGD(Optimizer):
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
lr = self.lr * (1. / (1. + self.decay * self.iterations))
|
||||
self.updates = [(self.iterations, self.iterations + 1.)]
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
# momentum
|
||||
self.weights = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
for p, g, m in zip(params, grads, self.weights):
|
||||
shapes = [K.get_variable_shape(p) for p in params]
|
||||
moments = [K.zeros(shape) for shape in shapes]
|
||||
self.weights = [self.iterations] + moments
|
||||
for p, g, m in zip(params, grads, moments):
|
||||
v = self.momentum * m - lr * g # velocity
|
||||
self.updates.append((m, v))
|
||||
self.updates.append(K.update(m, v))
|
||||
|
||||
if self.nesterov:
|
||||
new_p = p + self.momentum * v - lr * g
|
||||
@@ -141,7 +158,8 @@ class SGD(Optimizer):
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
|
||||
self.updates.append(K.update(p, new_p))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
@@ -176,21 +194,22 @@ class RMSprop(Optimizer):
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
# accumulators
|
||||
self.weights = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
shapes = [K.get_variable_shape(p) for p in params]
|
||||
accumulators = [K.zeros(shape) for shape in shapes]
|
||||
self.weights = accumulators
|
||||
self.updates = []
|
||||
|
||||
for p, g, a in zip(params, grads, self.weights):
|
||||
for p, g, a in zip(params, grads, accumulators):
|
||||
# update accumulator
|
||||
new_a = self.rho * a + (1. - self.rho) * K.square(g)
|
||||
self.updates.append((a, new_a))
|
||||
self.updates.append(K.update(a, new_a))
|
||||
new_p = p - self.lr * g / (K.sqrt(new_a) + self.epsilon)
|
||||
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
self.updates.append(K.update(p, new_p))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
@@ -210,6 +229,9 @@ class Adagrad(Optimizer):
|
||||
# Arguments
|
||||
lr: float >= 0. Learning rate.
|
||||
epsilon: float >= 0.
|
||||
|
||||
# References
|
||||
- [Adaptive Subgradient Methods for Online Learning and Stochastic Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
|
||||
'''
|
||||
def __init__(self, lr=0.01, epsilon=1e-8, **kwargs):
|
||||
super(Adagrad, self).__init__(**kwargs)
|
||||
@@ -218,19 +240,20 @@ class Adagrad(Optimizer):
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
# accumulators
|
||||
self.weights = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
shapes = [K.get_variable_shape(p) for p in params]
|
||||
accumulators = [K.zeros(shape) for shape in shapes]
|
||||
self.weights = accumulators
|
||||
self.updates = []
|
||||
|
||||
for p, g, a in zip(params, grads, self.weights):
|
||||
for p, g, a in zip(params, grads, accumulators):
|
||||
new_a = a + K.square(g) # update accumulator
|
||||
self.updates.append((a, new_a))
|
||||
self.updates.append(K.update(a, new_a))
|
||||
new_p = p - self.lr * g / (K.sqrt(new_a) + self.epsilon)
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
self.updates.append(K.update(p, new_p))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
@@ -262,15 +285,16 @@ class Adadelta(Optimizer):
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
accumulators = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
delta_accumulators = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
shapes = [K.get_variable_shape(p) for p in params]
|
||||
accumulators = [K.zeros(shape) for shape in shapes]
|
||||
delta_accumulators = [K.zeros(shape) for shape in shapes]
|
||||
self.weights = accumulators + delta_accumulators
|
||||
self.updates = []
|
||||
|
||||
for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
|
||||
# update accumulator
|
||||
new_a = self.rho * a + (1. - self.rho) * K.square(g)
|
||||
self.updates.append((a, new_a))
|
||||
self.updates.append(K.update(a, new_a))
|
||||
|
||||
# use the new accumulator and the *old* delta_accumulator
|
||||
update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon)
|
||||
@@ -280,11 +304,11 @@ class Adadelta(Optimizer):
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
self.updates.append(K.update(p, new_p))
|
||||
|
||||
# update delta_accumulator
|
||||
new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update)
|
||||
self.updates.append((d_a, new_d_a))
|
||||
self.updates.append(K.update(d_a, new_d_a))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
@@ -319,29 +343,30 @@ class Adam(Optimizer):
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = [(self.iterations, self.iterations + 1)]
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
t = self.iterations + 1
|
||||
lr_t = self.lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))
|
||||
|
||||
ms = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
vs = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
self.weights = ms + vs
|
||||
shapes = [K.get_variable_shape(p) for p in params]
|
||||
ms = [K.zeros(shape) for shape in shapes]
|
||||
vs = [K.zeros(shape) for shape in shapes]
|
||||
self.weights = [self.iterations] + ms + vs
|
||||
|
||||
for p, g, m, v in zip(params, grads, ms, vs):
|
||||
m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
|
||||
v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
|
||||
p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
|
||||
|
||||
self.updates.append((m, m_t))
|
||||
self.updates.append((v, v_t))
|
||||
self.updates.append(K.update(m, m_t))
|
||||
self.updates.append(K.update(v, v_t))
|
||||
|
||||
new_p = p_t
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
self.updates.append(K.update(p, new_p))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
@@ -378,16 +403,17 @@ class Adamax(Optimizer):
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = [(self.iterations, self.iterations + 1)]
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
t = self.iterations + 1
|
||||
lr_t = self.lr / (1. - K.pow(self.beta_1, t))
|
||||
|
||||
shapes = [K.get_variable_shape(p) for p in params]
|
||||
# zero init of 1st moment
|
||||
ms = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
ms = [K.zeros(shape) for shape in shapes]
|
||||
# zero init of exponentially weighted infinity norm
|
||||
us = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
self.weights = ms + us
|
||||
us = [K.zeros(shape) for shape in shapes]
|
||||
self.weights = [self.iterations] + ms + us
|
||||
|
||||
for p, g, m, u in zip(params, grads, ms, us):
|
||||
|
||||
@@ -395,15 +421,15 @@ class Adamax(Optimizer):
|
||||
u_t = K.maximum(self.beta_2 * u, K.abs(g))
|
||||
p_t = p - lr_t * m_t / (u_t + self.epsilon)
|
||||
|
||||
self.updates.append((m, m_t))
|
||||
self.updates.append((u, u_t))
|
||||
self.updates.append(K.update(m, m_t))
|
||||
self.updates.append(K.update(u, u_t))
|
||||
|
||||
new_p = p_t
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
self.updates.append(K.update(p, new_p))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
@@ -430,9 +456,8 @@ class Nadam(Optimizer):
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
|
||||
# References
|
||||
[1] Nadam report - http://cs229.stanford.edu/proj2015/054_report.pdf
|
||||
[2] On the importance of initialization and momentum in deep learning -
|
||||
http://www.cs.toronto.edu/~fritz/absps/momentum.pdf
|
||||
- [Nadam report](http://cs229.stanford.edu/proj2015/054_report.pdf)
|
||||
- [On the importance of initialization and momentum in deep learning](http://www.cs.toronto.edu/~fritz/absps/momentum.pdf)
|
||||
'''
|
||||
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, schedule_decay=0.004, **kwargs):
|
||||
@@ -447,7 +472,7 @@ class Nadam(Optimizer):
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = [(self.iterations, self.iterations + 1)]
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
t = self.iterations + 1
|
||||
|
||||
@@ -458,10 +483,11 @@ class Nadam(Optimizer):
|
||||
m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
|
||||
self.updates.append((self.m_schedule, m_schedule_new))
|
||||
|
||||
ms = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
vs = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
shapes = [K.get_variable_shape(p) for p in params]
|
||||
ms = [K.zeros(shape) for shape in shapes]
|
||||
vs = [K.zeros(shape) for shape in shapes]
|
||||
|
||||
self.weights = ms + vs
|
||||
self.weights = [self.iterations] + ms + vs
|
||||
|
||||
for p, g, m, v in zip(params, grads, ms, vs):
|
||||
# the following equations given in [1]
|
||||
@@ -472,8 +498,8 @@ class Nadam(Optimizer):
|
||||
v_t_prime = v_t / (1. - K.pow(self.beta_2, t))
|
||||
m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime
|
||||
|
||||
self.updates.append((m, m_t))
|
||||
self.updates.append((v, v_t))
|
||||
self.updates.append(K.update(m, m_t))
|
||||
self.updates.append(K.update(v, v_t))
|
||||
|
||||
p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon)
|
||||
new_p = p_t
|
||||
@@ -482,7 +508,7 @@ class Nadam(Optimizer):
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
self.updates.append(K.update(p, new_p))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
|
||||
@@ -118,13 +118,17 @@ def flip_axis(x, axis):
|
||||
return x
|
||||
|
||||
|
||||
def array_to_img(x, dim_ordering=K.image_dim_ordering(), scale=True):
|
||||
def array_to_img(x, dim_ordering='default', scale=True):
|
||||
from PIL import Image
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering == 'th':
|
||||
x = x.transpose(1, 2, 0)
|
||||
if scale:
|
||||
x += max(-np.min(x), 0)
|
||||
x /= np.max(x)
|
||||
x_max = np.max(x)
|
||||
if x_max != 0:
|
||||
x /= x_max
|
||||
x *= 255
|
||||
if x.shape[2] == 3:
|
||||
# RGB
|
||||
@@ -136,7 +140,9 @@ def array_to_img(x, dim_ordering=K.image_dim_ordering(), scale=True):
|
||||
raise Exception('Unsupported channel number: ', x.shape[2])
|
||||
|
||||
|
||||
def img_to_array(img, dim_ordering=K.image_dim_ordering()):
|
||||
def img_to_array(img, dim_ordering='default'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering not in ['th', 'tf']:
|
||||
raise Exception('Unknown dim_ordering: ', dim_ordering)
|
||||
# image has dim_ordering (height, width, channel)
|
||||
@@ -162,7 +168,7 @@ def load_img(path, grayscale=False, target_size=None):
|
||||
else: # Ensure 3 channel even when loaded image is grayscale
|
||||
img = img.convert('RGB')
|
||||
if target_size:
|
||||
img = img.resize(target_size)
|
||||
img = img.resize((target_size[1], target_size[0]))
|
||||
return img
|
||||
|
||||
|
||||
@@ -222,7 +228,9 @@ class ImageDataGenerator(object):
|
||||
horizontal_flip=False,
|
||||
vertical_flip=False,
|
||||
rescale=None,
|
||||
dim_ordering=K.image_dim_ordering()):
|
||||
dim_ordering='default'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.__dict__.update(locals())
|
||||
self.mean = None
|
||||
self.std = None
|
||||
@@ -446,12 +454,14 @@ class NumpyArrayIterator(Iterator):
|
||||
|
||||
def __init__(self, X, y, image_data_generator,
|
||||
batch_size=32, shuffle=False, seed=None,
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
dim_ordering='default',
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
if y is not None and len(X) != len(y):
|
||||
raise Exception('X (images tensor) and y (labels) '
|
||||
'should have the same length. '
|
||||
'Found: X.shape = %s, y.shape = %s' % (np.asarray(X).shape, np.asarray(y).shape))
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.X = X
|
||||
self.y = y
|
||||
self.image_data_generator = image_data_generator
|
||||
@@ -493,10 +503,12 @@ class DirectoryIterator(Iterator):
|
||||
|
||||
def __init__(self, directory, image_data_generator,
|
||||
target_size=(256, 256), color_mode='rgb',
|
||||
dim_ordering=K.image_dim_ordering,
|
||||
dim_ordering='default',
|
||||
classes=None, class_mode='categorical',
|
||||
batch_size=32, shuffle=True, seed=None,
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.directory = directory
|
||||
self.image_data_generator = image_data_generator
|
||||
self.target_size = tuple(target_size)
|
||||
|
||||
@@ -99,6 +99,7 @@ class Tokenizer(object):
|
||||
wcounts = list(self.word_counts.items())
|
||||
wcounts.sort(key=lambda x: x[1], reverse=True)
|
||||
sorted_voc = [wc[0] for wc in wcounts]
|
||||
# note that index 0 is reserved, never assigned to an existing word
|
||||
self.word_index = dict(list(zip(sorted_voc, list(range(1, len(sorted_voc) + 1)))))
|
||||
|
||||
self.index_docs = {}
|
||||
|
||||
+14
-9
@@ -43,15 +43,15 @@ class EigenvalueRegularizer(Regularizer):
|
||||
|
||||
# power method for approximating the dominant eigenvector:
|
||||
o = K.ones([dim1, 1]) # initial values for the dominant eigenvector
|
||||
domin_eigenvect = K.dot(WW, o)
|
||||
main_eigenvect = K.dot(WW, o)
|
||||
for n in range(power - 1):
|
||||
domin_eigenvect = K.dot(WW, domin_eigenvect)
|
||||
main_eigenvect = K.dot(WW, main_eigenvect)
|
||||
|
||||
WWd = K.dot(WW, domin_eigenvect)
|
||||
WWd = K.dot(WW, main_eigenvect)
|
||||
|
||||
# the corresponding dominant eigenvalue:
|
||||
domin_eigenval = K.dot(K.transpose(WWd), domin_eigenvect) / K.dot(K.transpose(domin_eigenvect), domin_eigenvect)
|
||||
regularized_loss = loss + (domin_eigenval ** 0.5) * self.k # multiplied by the given regularization gain
|
||||
main_eigenval = K.dot(K.transpose(WWd), main_eigenvect) / K.dot(K.transpose(main_eigenvect), main_eigenvect)
|
||||
regularized_loss = loss + (main_eigenval ** 0.5) * self.k # multiplied by the given regularization gain
|
||||
|
||||
return K.in_train_phase(regularized_loss[0, 0], loss)
|
||||
|
||||
@@ -75,8 +75,11 @@ class WeightRegularizer(Regularizer):
|
||||
'ActivityRegularizer '
|
||||
'(i.e. activity_regularizer="l2" instead '
|
||||
'of activity_regularizer="activity_l2".')
|
||||
regularized_loss = loss + K.sum(K.abs(self.p)) * self.l1
|
||||
regularized_loss += K.sum(K.square(self.p)) * self.l2
|
||||
regularized_loss = loss
|
||||
if self.l1:
|
||||
regularized_loss += K.sum(self.l1 * K.abs(self.p))
|
||||
if self.l2:
|
||||
regularized_loss += K.sum(self.l2 * K.square(self.p))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
|
||||
def get_config(self):
|
||||
@@ -102,8 +105,10 @@ class ActivityRegularizer(Regularizer):
|
||||
regularized_loss = loss
|
||||
for i in range(len(self.layer.inbound_nodes)):
|
||||
output = self.layer.get_output_at(i)
|
||||
regularized_loss += self.l1 * K.sum(K.mean(K.abs(output), axis=0))
|
||||
regularized_loss += self.l2 * K.sum(K.mean(K.square(output), axis=0))
|
||||
if self.l1:
|
||||
regularized_loss += K.sum(self.l1 * K.abs(output))
|
||||
if self.l2:
|
||||
regularized_loss += K.sum(self.l2 * K.square(output))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
|
||||
def get_config(self):
|
||||
|
||||
@@ -5,6 +5,7 @@ import tarfile
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import hashlib
|
||||
from six.moves.urllib.request import urlopen
|
||||
from six.moves.urllib.error import URLError, HTTPError
|
||||
|
||||
@@ -21,9 +22,10 @@ if sys.version_info[0] == 2:
|
||||
count = 0
|
||||
while 1:
|
||||
chunk = response.read(chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
count += 1
|
||||
if not chunk:
|
||||
reporthook(count, total_size, total_size)
|
||||
break
|
||||
if reporthook:
|
||||
reporthook(count, chunk_size, total_size)
|
||||
yield chunk
|
||||
@@ -36,11 +38,12 @@ else:
|
||||
from six.moves.urllib.request import urlretrieve
|
||||
|
||||
|
||||
def get_file(fname, origin, untar=False):
|
||||
def get_file(fname, origin, untar=False,
|
||||
md5_hash=None, cache_subdir='datasets'):
|
||||
datadir_base = os.path.expanduser(os.path.join('~', '.keras'))
|
||||
if not os.access(datadir_base, os.W_OK):
|
||||
datadir_base = os.path.join('/tmp', '.keras')
|
||||
datadir = os.path.join(datadir_base, 'datasets')
|
||||
datadir = os.path.join(datadir_base, cache_subdir)
|
||||
if not os.path.exists(datadir):
|
||||
os.makedirs(datadir)
|
||||
|
||||
@@ -50,7 +53,18 @@ def get_file(fname, origin, untar=False):
|
||||
else:
|
||||
fpath = os.path.join(datadir, fname)
|
||||
|
||||
if not os.path.exists(fpath):
|
||||
download = False
|
||||
if os.path.exists(fpath):
|
||||
# file found; verify integrity if a hash was provided
|
||||
if md5_hash is not None:
|
||||
if not validate_file(fpath, md5_hash):
|
||||
print('A local file was found, but it seems to be '
|
||||
'incomplete or outdated.')
|
||||
download = True
|
||||
else:
|
||||
download = True
|
||||
|
||||
if download:
|
||||
print('Downloading data from', origin)
|
||||
global progbar
|
||||
progbar = None
|
||||
@@ -60,7 +74,7 @@ def get_file(fname, origin, untar=False):
|
||||
if progbar is None:
|
||||
progbar = Progbar(total_size)
|
||||
else:
|
||||
progbar.update(count*block_size)
|
||||
progbar.update(count * block_size)
|
||||
|
||||
error_msg = 'URL fetch failure on {}: {} -- {}'
|
||||
try:
|
||||
@@ -93,3 +107,14 @@ def get_file(fname, origin, untar=False):
|
||||
return untar_fpath
|
||||
|
||||
return fpath
|
||||
|
||||
|
||||
def validate_file(fpath, md5_hash):
|
||||
hasher = hashlib.md5()
|
||||
with open(fpath, 'rb') as f:
|
||||
buf = f.read()
|
||||
hasher.update(buf)
|
||||
if str(hasher.hexdigest()) == str(md5_hash):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
import h5py
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
@@ -8,6 +9,8 @@ class HDF5Matrix():
|
||||
refs = defaultdict(int)
|
||||
|
||||
def __init__(self, datapath, dataset, start, end, normalizer=None):
|
||||
import h5py
|
||||
|
||||
if datapath not in list(self.refs.keys()):
|
||||
f = h5py.File(datapath)
|
||||
self.refs[datapath] = f
|
||||
@@ -29,7 +32,7 @@ class HDF5Matrix():
|
||||
raise IndexError
|
||||
elif isinstance(key, int):
|
||||
if key + self.start < self.end:
|
||||
idx = key+self.start
|
||||
idx = key + self.start
|
||||
else:
|
||||
raise IndexError
|
||||
elif isinstance(key, np.ndarray):
|
||||
@@ -49,7 +52,7 @@ class HDF5Matrix():
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
return tuple([self.end - self.start, self.data.shape[1]])
|
||||
return (self.end - self.start,) + self.data.shape[1:]
|
||||
|
||||
|
||||
def save_array(array, name):
|
||||
@@ -69,3 +72,17 @@ def load_array(name):
|
||||
a[:] = array[:]
|
||||
f.close()
|
||||
return a
|
||||
|
||||
|
||||
def ask_to_proceed_with_overwrite(filepath):
|
||||
get_input = input
|
||||
if sys.version_info[:2] <= (2, 7):
|
||||
get_input = raw_input
|
||||
overwrite = get_input('[WARNING] %s already exists - overwrite? '
|
||||
'[y/n]' % (filepath))
|
||||
while overwrite not in ['y', 'n']:
|
||||
overwrite = get_input('Enter "y" (overwrite) or "n" (cancel).')
|
||||
if overwrite == 'n':
|
||||
return False
|
||||
print('[TIP] Next time specify overwrite=True!')
|
||||
return True
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import print_function
|
||||
|
||||
from .generic_utils import get_from_module
|
||||
from .np_utils import convert_kernel
|
||||
from ..layers import *
|
||||
from ..models import Model, Sequential, Graph
|
||||
from .. import backend as K
|
||||
@@ -97,3 +98,22 @@ def print_summary(layers, relevant_nodes=None, line_length=100, positions=[.33,
|
||||
|
||||
print('Total params: %s' % total_params)
|
||||
print('_' * line_length)
|
||||
|
||||
|
||||
def convert_all_kernels_in_model(model):
|
||||
# Note: SeparableConvolution not included
|
||||
# since only supported by TF.
|
||||
conv_classes = {
|
||||
'Convolution1D',
|
||||
'Convolution2D',
|
||||
'Convolution3D',
|
||||
'AtrousConvolution2D',
|
||||
'Deconvolution2D',
|
||||
}
|
||||
to_assign = []
|
||||
for layer in model.layers:
|
||||
if layer.__class__.__name__ in conv_classes:
|
||||
original_w = K.get_value(layer.W)
|
||||
converted_w = convert_kernel(original_w)
|
||||
to_assign.append((layer.W, converted_w))
|
||||
K.batch_set_value(to_assign)
|
||||
|
||||
@@ -120,3 +120,14 @@ def conv_output_length(input_length, filter_size, border_mode, stride, dilation=
|
||||
elif border_mode == 'valid':
|
||||
output_length = input_length - dilated_filter_size + 1
|
||||
return (output_length + stride - 1) // stride
|
||||
|
||||
|
||||
def conv_input_length(output_length, filter_size, border_mode, stride):
|
||||
if output_length is None:
|
||||
return None
|
||||
assert border_mode in {'same', 'valid'}
|
||||
if border_mode == 'same':
|
||||
pad = filter_size // 2
|
||||
elif border_mode == 'valid':
|
||||
pad = 0
|
||||
return (output_length - 1) * stride - 2 * pad + filter_size
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import inspect
|
||||
import functools
|
||||
|
||||
from ..engine import Model, Input
|
||||
from ..models import Sequential, model_from_json
|
||||
@@ -35,7 +36,8 @@ def get_test_data(nb_train=1000, nb_test=500, input_shape=(10,),
|
||||
|
||||
|
||||
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
input_data=None, expected_output=None, expected_output_dtype=None):
|
||||
input_data=None, expected_output=None,
|
||||
expected_output_dtype=None, fixed_batch_size=False):
|
||||
'''Test routine for a layer with a single input tensor
|
||||
and single output tensor.
|
||||
'''
|
||||
@@ -63,7 +65,10 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
layer = layer_cls(**kwargs)
|
||||
|
||||
# test in functional API
|
||||
x = Input(shape=input_shape[1:], dtype=input_dtype)
|
||||
if fixed_batch_size:
|
||||
x = Input(batch_shape=input_shape, dtype=input_dtype)
|
||||
else:
|
||||
x = Input(shape=input_shape[1:], dtype=input_dtype)
|
||||
y = layer(x)
|
||||
assert K.dtype(y) == expected_output_dtype
|
||||
|
||||
@@ -102,3 +107,15 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
|
||||
# for further checks in the caller function
|
||||
return actual_output
|
||||
|
||||
|
||||
def keras_test(func):
|
||||
'''Clean up after tensorflow tests.
|
||||
'''
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
output = func(*args, **kwargs)
|
||||
if K._BACKEND == 'tensorflow':
|
||||
K.clear_session()
|
||||
return output
|
||||
return wrapper
|
||||
|
||||
@@ -78,7 +78,7 @@ class BaseWrapper(object):
|
||||
|
||||
for params_name in params:
|
||||
if params_name not in legal_params:
|
||||
assert False, '{} is not a legal parameter'.format(params_name)
|
||||
raise ValueError('{} is not a legal parameter'.format(params_name))
|
||||
|
||||
def get_params(self, deep=True):
|
||||
'''Get parameters for this estimator.
|
||||
@@ -234,6 +234,13 @@ class KerasClassifier(BaseWrapper):
|
||||
Mean accuracy of predictions on X wrt. y.
|
||||
'''
|
||||
kwargs = self.filter_sk_params(Sequential.evaluate, kwargs)
|
||||
|
||||
loss_name = self.model.loss
|
||||
if hasattr(loss_name, '__name__'):
|
||||
loss_name = loss_name.__name__
|
||||
if loss_name == 'categorical_crossentropy' and len(y.shape) != 2:
|
||||
y = to_categorical(y)
|
||||
|
||||
outputs = self.model.evaluate(X, y, **kwargs)
|
||||
if type(outputs) is not list:
|
||||
outputs = [outputs]
|
||||
@@ -263,7 +270,7 @@ class KerasRegressor(BaseWrapper):
|
||||
Predictions.
|
||||
'''
|
||||
kwargs = self.filter_sk_params(Sequential.predict, kwargs)
|
||||
return self.model.predict(X, **kwargs)
|
||||
return np.squeeze(self.model.predict(X, **kwargs))
|
||||
|
||||
def score(self, X, y, **kwargs):
|
||||
'''Returns the mean loss on the given test data and labels.
|
||||
|
||||
+3
-2
@@ -3,15 +3,16 @@ from setuptools import find_packages
|
||||
|
||||
|
||||
setup(name='Keras',
|
||||
version='1.0.6',
|
||||
version='1.0.8',
|
||||
description='Deep Learning for Python',
|
||||
author='Francois Chollet',
|
||||
author_email='francois.chollet@gmail.com',
|
||||
url='https://github.com/fchollet/keras',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.0.6',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.0.8',
|
||||
license='MIT',
|
||||
install_requires=['theano', 'pyyaml', 'six'],
|
||||
extras_require={
|
||||
'h5py': ['h5py'],
|
||||
'visualize': ['pydot-ng'],
|
||||
},
|
||||
packages=find_packages())
|
||||
|
||||
@@ -2,13 +2,14 @@ from __future__ import print_function
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras.utils.test_utils import get_test_data, keras_test
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Flatten, Activation
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.utils.np_utils import to_categorical
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_image_classification():
|
||||
'''
|
||||
Classify random 16x16 color images into several classes using logistic regression
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
np.random.seed(1337)
|
||||
import pytest
|
||||
import string
|
||||
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras.utils.test_utils import get_test_data, keras_test
|
||||
from keras.utils.np_utils import to_categorical
|
||||
from keras.models import Sequential
|
||||
from keras.layers import TimeDistributedDense
|
||||
@@ -14,6 +15,7 @@ from keras.layers import LSTM
|
||||
from keras.layers import Embedding
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_temporal_classification():
|
||||
'''
|
||||
Classify temporal sequences of float numbers
|
||||
@@ -21,7 +23,6 @@ def test_temporal_classification():
|
||||
single layer of GRU units and softmax applied
|
||||
to the last activations of the units
|
||||
'''
|
||||
np.random.seed(1337)
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
|
||||
nb_test=500,
|
||||
input_shape=(3, 5),
|
||||
@@ -40,17 +41,17 @@ def test_temporal_classification():
|
||||
history = model.fit(X_train, y_train, nb_epoch=20, batch_size=32,
|
||||
validation_data=(X_test, y_test),
|
||||
verbose=0)
|
||||
assert(history.history['val_acc'][-1] >= 0.85)
|
||||
assert(history.history['val_acc'][-1] >= 0.8)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_temporal_regression():
|
||||
'''
|
||||
Predict float numbers (regression) based on sequences
|
||||
of float numbers of length 3 using a single layer of GRU units
|
||||
'''
|
||||
np.random.seed(1337)
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
|
||||
nb_test=200,
|
||||
nb_test=400,
|
||||
input_shape=(3, 5),
|
||||
output_shape=(2,),
|
||||
classification=False)
|
||||
@@ -60,9 +61,10 @@ def test_temporal_regression():
|
||||
model.compile(loss='hinge', optimizer='adam')
|
||||
history = model.fit(X_train, y_train, nb_epoch=5, batch_size=16,
|
||||
validation_data=(X_test, y_test), verbose=0)
|
||||
assert(history.history['val_loss'][-1] < 0.75)
|
||||
assert(history.history['val_loss'][-1] < 1.)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequence_to_sequence():
|
||||
'''
|
||||
Apply a same Dense layer for each element of time dimension of the input
|
||||
@@ -70,7 +72,6 @@ def test_sequence_to_sequence():
|
||||
This does not make use of the temporal structure of the sequence
|
||||
(see TimeDistributedDense for more details)
|
||||
'''
|
||||
np.random.seed(1337)
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
|
||||
nb_test=200,
|
||||
input_shape=(3, 5),
|
||||
@@ -86,13 +87,13 @@ def test_sequence_to_sequence():
|
||||
assert(history.history['val_loss'][-1] < 0.8)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_stacked_lstm_char_prediction():
|
||||
'''
|
||||
Learn alphabetical char sequence with stacked LSTM.
|
||||
Predict the whole alphabet based on the first two letters ('ab' -> 'ab...z')
|
||||
See non-toy example in examples/lstm_text_generation.py
|
||||
'''
|
||||
np.random.seed(1336)
|
||||
# generate alphabet: http://stackoverflow.com/questions/16060899/alphabet-range-python
|
||||
alphabet = string.ascii_lowercase
|
||||
number_of_chars = len(alphabet)
|
||||
@@ -135,6 +136,7 @@ def test_stacked_lstm_char_prediction():
|
||||
assert(generated == alphabet)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_masked_temporal():
|
||||
'''
|
||||
Confirm that even with masking on both inputs and outputs, cross-entropies are
|
||||
@@ -147,7 +149,6 @@ def test_masked_temporal():
|
||||
The ground-truth best cross-entropy loss should, then be -log(0.5) = 0.69
|
||||
|
||||
'''
|
||||
np.random.seed(55318)
|
||||
model = Sequential()
|
||||
model.add(Embedding(10, 20, mask_zero=True, input_length=20))
|
||||
model.add(TimeDistributedDense(10))
|
||||
@@ -182,5 +183,4 @@ def test_masked_temporal():
|
||||
assert(np.abs(history.history['val_loss'][-1] - ground_truth) < 0.06)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pytest.main([__file__])
|
||||
test_temporal_classification()
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -2,12 +2,13 @@ from __future__ import print_function
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras.utils.test_utils import get_test_data, keras_test
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense
|
||||
from keras.utils.np_utils import to_categorical
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_vector_classification():
|
||||
'''
|
||||
Classify random float vectors into 2 classes with logistic regression
|
||||
@@ -37,6 +38,7 @@ def test_vector_classification():
|
||||
assert(history.history['val_acc'][-1] > 0.8)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_vector_regression():
|
||||
'''
|
||||
Perform float data prediction (regression) using 2 layer MLP
|
||||
|
||||
@@ -38,6 +38,7 @@ def check_two_tensor_operation(function_name, x_input_shape,
|
||||
assert zth.shape == ztf.shape
|
||||
assert_allclose(zth, ztf, atol=1e-05)
|
||||
|
||||
|
||||
def check_composed_tensor_operations(first_function_name, first_function_args,
|
||||
second_function_name, second_function_args,
|
||||
input_shape):
|
||||
@@ -59,6 +60,7 @@ def check_composed_tensor_operations(first_function_name, first_function_args,
|
||||
assert zth.shape == ztf.shape
|
||||
assert_allclose(zth, ztf, atol=1e-05)
|
||||
|
||||
|
||||
class TestBackend(object):
|
||||
|
||||
def test_linear_operations(self):
|
||||
@@ -68,6 +70,8 @@ class TestBackend(object):
|
||||
check_two_tensor_operation('batch_dot', (4, 2, 3), (4, 5, 3),
|
||||
axes=(2, 2))
|
||||
check_single_tensor_operation('transpose', (4, 2))
|
||||
check_single_tensor_operation('reverse', (4, 3, 2), axes=1)
|
||||
check_single_tensor_operation('reverse', (4, 3, 2), axes=(1, 2))
|
||||
|
||||
def test_shape_operations(self):
|
||||
# concatenate
|
||||
@@ -90,14 +94,15 @@ class TestBackend(object):
|
||||
check_single_tensor_operation('expand_dims', (4, 3), dim=-1)
|
||||
check_single_tensor_operation('expand_dims', (4, 3, 2), dim=1)
|
||||
check_single_tensor_operation('squeeze', (4, 3, 1), axis=2)
|
||||
check_composed_tensor_operations('reshape', {'shape':(4,3,1,1)},
|
||||
'squeeze', {'axis':2},
|
||||
check_single_tensor_operation('squeeze', (4, 1, 1), axis=1)
|
||||
check_composed_tensor_operations('reshape', {'shape': (4, 3, 1, 1)},
|
||||
'squeeze', {'axis': 2},
|
||||
(4, 3, 1, 1))
|
||||
|
||||
def test_repeat_elements(self):
|
||||
reps = 3
|
||||
for ndims in [1, 2, 3]:
|
||||
shape = np.arange(2, 2+ndims)
|
||||
shape = np.arange(2, 2 + ndims)
|
||||
arr = np.arange(np.prod(shape)).reshape(shape)
|
||||
arr_th = KTH.variable(arr)
|
||||
arr_tf = KTF.variable(arr)
|
||||
@@ -149,6 +154,17 @@ class TestBackend(object):
|
||||
# count_params
|
||||
assert KTH.count_params(xth) == KTF.count_params(xtf)
|
||||
|
||||
# print_tensor
|
||||
check_single_tensor_operation('print_tensor', ())
|
||||
check_single_tensor_operation('print_tensor', (2,))
|
||||
check_single_tensor_operation('print_tensor', (4, 3))
|
||||
check_single_tensor_operation('print_tensor', (1, 2, 3))
|
||||
|
||||
val = np.random.random((3, 2))
|
||||
xth = KTH.variable(val)
|
||||
xtf = KTF.variable(val)
|
||||
assert KTH.get_variable_shape(xth) == KTF.get_variable_shape(xtf)
|
||||
|
||||
def test_elementwise_operations(self):
|
||||
check_single_tensor_operation('max', (4, 2))
|
||||
check_single_tensor_operation('max', (4, 2), axis=1, keepdims=True)
|
||||
@@ -196,6 +212,11 @@ class TestBackend(object):
|
||||
|
||||
# two-tensor ops
|
||||
check_two_tensor_operation('equal', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('not_equal', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('greater', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('greater_equal', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('lesser', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('lesser_equal', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('maximum', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('minimum', (4, 2), (4, 2))
|
||||
|
||||
@@ -276,6 +297,7 @@ class TestBackend(object):
|
||||
return output, [output]
|
||||
return step_function
|
||||
|
||||
# test default setup
|
||||
th_rnn_step_fn = rnn_step_fn(input_dim, output_dim, KTH)
|
||||
th_inputs = KTH.variable(input_val)
|
||||
th_initial_states = [KTH.variable(init_state_val)]
|
||||
@@ -321,6 +343,35 @@ class TestBackend(object):
|
||||
assert_allclose(th_outputs, unrolled_th_outputs, atol=1e-04)
|
||||
assert_allclose(th_state, unrolled_th_state, atol=1e-04)
|
||||
|
||||
# test go_backwards
|
||||
th_rnn_step_fn = rnn_step_fn(input_dim, output_dim, KTH)
|
||||
th_inputs = KTH.variable(input_val)
|
||||
th_initial_states = [KTH.variable(init_state_val)]
|
||||
last_output, outputs, new_states = KTH.rnn(th_rnn_step_fn, th_inputs,
|
||||
th_initial_states,
|
||||
go_backwards=True,
|
||||
mask=None)
|
||||
th_last_output = KTH.eval(last_output)
|
||||
th_outputs = KTH.eval(outputs)
|
||||
assert len(new_states) == 1
|
||||
th_state = KTH.eval(new_states[0])
|
||||
|
||||
tf_rnn_step_fn = rnn_step_fn(input_dim, output_dim, KTF)
|
||||
tf_inputs = KTF.variable(input_val)
|
||||
tf_initial_states = [KTF.variable(init_state_val)]
|
||||
last_output, outputs, new_states = KTF.rnn(tf_rnn_step_fn, tf_inputs,
|
||||
tf_initial_states,
|
||||
go_backwards=True,
|
||||
mask=None)
|
||||
tf_last_output = KTF.eval(last_output)
|
||||
tf_outputs = KTF.eval(outputs)
|
||||
assert len(new_states) == 1
|
||||
tf_state = KTF.eval(new_states[0])
|
||||
|
||||
assert_allclose(tf_last_output, th_last_output, atol=1e-04)
|
||||
assert_allclose(tf_outputs, th_outputs, atol=1e-04)
|
||||
assert_allclose(tf_state, th_state, atol=1e-04)
|
||||
|
||||
# test unroll with backwards = True
|
||||
bwd_last_output, bwd_outputs, bwd_new_states = KTH.rnn(
|
||||
th_rnn_step_fn, th_inputs,
|
||||
@@ -567,6 +618,58 @@ class TestBackend(object):
|
||||
assert(np.max(rand) == 1)
|
||||
assert(np.min(rand) == 0)
|
||||
|
||||
def test_ctc(self):
|
||||
# simplified version of TensorFlow's test
|
||||
|
||||
label_lens = np.expand_dims(np.asarray([5, 4]), 1)
|
||||
input_lens = np.expand_dims(np.asarray([5, 5]), 1) # number of timesteps
|
||||
|
||||
# the Theano and Tensorflow CTC code use different methods to ensure
|
||||
# numerical stability. The Theano code subtracts out the max
|
||||
# before the final log, so the results are different but scale
|
||||
# identically and still train properly
|
||||
loss_log_probs_tf = [3.34211, 5.42262]
|
||||
loss_log_probs_th = [1.73308, 3.81351]
|
||||
|
||||
# dimensions are batch x time x categories
|
||||
labels = np.asarray([[0, 1, 2, 1, 0], [0, 1, 1, 0, -1]])
|
||||
inputs = np.asarray(
|
||||
[[[0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553],
|
||||
[0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436],
|
||||
[0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688],
|
||||
[0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533],
|
||||
[0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107]],
|
||||
[[0.30176, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508],
|
||||
[0.24082, 0.397533, 0.0557226, 0.0546814, 0.0557528, 0.19549],
|
||||
[0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, 0.202456],
|
||||
[0.280884, 0.429522, 0.0326593, 0.0339046, 0.0326856, 0.190345],
|
||||
[0.423286, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046]]],
|
||||
dtype=np.float32)
|
||||
|
||||
labels_tf = KTF.variable(labels, dtype="int32")
|
||||
inputs_tf = KTF.variable(inputs, dtype="float32")
|
||||
input_lens_tf = KTF.variable(input_lens, dtype="int32")
|
||||
label_lens_tf = KTF.variable(label_lens, dtype="int32")
|
||||
res = KTF.eval(KTF.ctc_batch_cost(labels_tf, inputs_tf, input_lens_tf, label_lens_tf))
|
||||
assert_allclose(res[:, 0], loss_log_probs_tf, atol=1e-05)
|
||||
|
||||
labels_th = KTH.variable(labels, dtype="int32")
|
||||
inputs_th = KTH.variable(inputs, dtype="float32")
|
||||
input_lens_th = KTH.variable(input_lens, dtype="int32")
|
||||
label_lens_th = KTH.variable(label_lens, dtype="int32")
|
||||
res = KTH.eval(KTH.ctc_batch_cost(labels_th, inputs_th, input_lens_th, label_lens_th))
|
||||
assert_allclose(res[0, :], loss_log_probs_th, atol=1e-05)
|
||||
|
||||
def test_one_hot(self):
|
||||
input_length = 10
|
||||
nb_classes = 20
|
||||
batch_size = 30
|
||||
indices = np.random.randint(0, nb_classes, size=(batch_size, input_length))
|
||||
oh = np.eye(nb_classes)[indices]
|
||||
for K in [KTH, KTF]:
|
||||
koh = K.eval(K.one_hot(K.variable(indices, dtype='int32'), nb_classes))
|
||||
assert np.all(koh == oh)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -4,11 +4,56 @@ import numpy as np
|
||||
|
||||
from keras.layers import Dense, Dropout, InputLayer
|
||||
from keras.engine import merge, Input, get_source_inputs
|
||||
from keras.models import Model
|
||||
from keras.models import Model, Sequential
|
||||
from keras import backend as K
|
||||
from keras.models import model_from_json, model_from_yaml
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_trainable_weights():
|
||||
a = Input(shape=(2,))
|
||||
b = Dense(1)(a)
|
||||
model = Model(a, b)
|
||||
|
||||
weights = model.weights
|
||||
assert model.trainable_weights == weights
|
||||
assert model.non_trainable_weights == []
|
||||
|
||||
model.trainable = False
|
||||
assert model.trainable_weights == []
|
||||
assert model.non_trainable_weights == weights
|
||||
|
||||
model.trainable = True
|
||||
assert model.trainable_weights == weights
|
||||
assert model.non_trainable_weights == []
|
||||
|
||||
model.layers[1].trainable = False
|
||||
assert model.trainable_weights == []
|
||||
assert model.non_trainable_weights == weights
|
||||
|
||||
# sequential model
|
||||
model = Sequential()
|
||||
model.add(Dense(1, input_dim=2))
|
||||
weights = model.weights
|
||||
|
||||
assert model.trainable_weights == weights
|
||||
assert model.non_trainable_weights == []
|
||||
|
||||
model.trainable = False
|
||||
assert model.trainable_weights == []
|
||||
assert model.non_trainable_weights == weights
|
||||
|
||||
model.trainable = True
|
||||
assert model.trainable_weights == weights
|
||||
assert model.non_trainable_weights == []
|
||||
|
||||
model.layers[0].trainable = False
|
||||
assert model.trainable_weights == []
|
||||
assert model.non_trainable_weights == weights
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_learning_phase():
|
||||
a = Input(shape=(32,), name='input_a')
|
||||
b = Input(shape=(32,), name='input_b')
|
||||
@@ -50,6 +95,7 @@ def test_learning_phase():
|
||||
assert fn_outputs_no_dp[1].sum() != fn_outputs_dp[1].sum()
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_node_construction():
|
||||
####################################################
|
||||
# test basics
|
||||
@@ -128,6 +174,7 @@ def test_node_construction():
|
||||
assert dense.get_output_mask_at(1) is None
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multi_input_layer():
|
||||
####################################################
|
||||
# test multi-input layer
|
||||
@@ -209,6 +256,7 @@ def test_multi_input_layer():
|
||||
assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)]
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_recursion():
|
||||
####################################################
|
||||
# test recursion
|
||||
@@ -400,98 +448,100 @@ def test_recursion():
|
||||
y = Dense(2)(x)
|
||||
|
||||
|
||||
def test_functional_guide():
|
||||
# MNIST
|
||||
from keras.layers import Input, Dense, LSTM
|
||||
from keras.models import Model
|
||||
from keras.utils import np_utils
|
||||
# @keras_test
|
||||
# def test_functional_guide():
|
||||
# # MNIST
|
||||
# from keras.layers import Input, Dense, LSTM
|
||||
# from keras.models import Model
|
||||
# from keras.utils import np_utils
|
||||
|
||||
# this returns a tensor
|
||||
inputs = Input(shape=(784,))
|
||||
# # this returns a tensor
|
||||
# inputs = Input(shape=(784,))
|
||||
|
||||
# a layer instance is callable on a tensor, and returns a tensor
|
||||
x = Dense(64, activation='relu')(inputs)
|
||||
x = Dense(64, activation='relu')(x)
|
||||
predictions = Dense(10, activation='softmax')(x)
|
||||
# # a layer instance is callable on a tensor, and returns a tensor
|
||||
# x = Dense(64, activation='relu')(inputs)
|
||||
# x = Dense(64, activation='relu')(x)
|
||||
# predictions = Dense(10, activation='softmax')(x)
|
||||
|
||||
# this creates a model that includes
|
||||
# the Input layer and three Dense layers
|
||||
model = Model(input=inputs, output=predictions)
|
||||
model.compile(optimizer='rmsprop',
|
||||
loss='categorical_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
# # this creates a model that includes
|
||||
# # the Input layer and three Dense layers
|
||||
# model = Model(input=inputs, output=predictions)
|
||||
# model.compile(optimizer='rmsprop',
|
||||
# loss='categorical_crossentropy',
|
||||
# metrics=['accuracy'])
|
||||
|
||||
# the data, shuffled and split between tran and test sets
|
||||
X_train = np.random.random((100, 784))
|
||||
Y_train = np.random.random((100, 10))
|
||||
# # the data, shuffled and split between tran and test sets
|
||||
# X_train = np.random.random((100, 784))
|
||||
# Y_train = np.random.random((100, 10))
|
||||
|
||||
model.fit(X_train, Y_train, nb_epoch=2, batch_size=128)
|
||||
# model.fit(X_train, Y_train, nb_epoch=2, batch_size=128)
|
||||
|
||||
assert model.inputs == [inputs]
|
||||
assert model.outputs == [predictions]
|
||||
assert model.input == inputs
|
||||
assert model.output == predictions
|
||||
assert model.input_shape == (None, 784)
|
||||
assert model.output_shape == (None, 10)
|
||||
# assert model.inputs == [inputs]
|
||||
# assert model.outputs == [predictions]
|
||||
# assert model.input == inputs
|
||||
# assert model.output == predictions
|
||||
# assert model.input_shape == (None, 784)
|
||||
# assert model.output_shape == (None, 10)
|
||||
|
||||
# try calling the sequential model
|
||||
inputs = Input(shape=(784,))
|
||||
new_outputs = model(inputs)
|
||||
new_model = Model(input=inputs, output=new_outputs)
|
||||
new_model.compile(optimizer='rmsprop',
|
||||
loss='categorical_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
# # try calling the sequential model
|
||||
# inputs = Input(shape=(784,))
|
||||
# new_outputs = model(inputs)
|
||||
# new_model = Model(input=inputs, output=new_outputs)
|
||||
# new_model.compile(optimizer='rmsprop',
|
||||
# loss='categorical_crossentropy',
|
||||
# metrics=['accuracy'])
|
||||
|
||||
##################################################
|
||||
# multi-io
|
||||
##################################################
|
||||
tweet_a = Input(shape=(4, 25))
|
||||
tweet_b = Input(shape=(4, 25))
|
||||
# this layer can take as input a matrix
|
||||
# and will return a vector of size 64
|
||||
shared_lstm = LSTM(64)
|
||||
# ##################################################
|
||||
# # multi-io
|
||||
# ##################################################
|
||||
# tweet_a = Input(shape=(4, 25))
|
||||
# tweet_b = Input(shape=(4, 25))
|
||||
# # this layer can take as input a matrix
|
||||
# # and will return a vector of size 64
|
||||
# shared_lstm = LSTM(64)
|
||||
|
||||
# when we reuse the same layer instance
|
||||
# multiple times, the weights of the layer
|
||||
# are also being reused
|
||||
# (it is effectively *the same* layer)
|
||||
encoded_a = shared_lstm(tweet_a)
|
||||
encoded_b = shared_lstm(tweet_b)
|
||||
# # when we reuse the same layer instance
|
||||
# # multiple times, the weights of the layer
|
||||
# # are also being reused
|
||||
# # (it is effectively *the same* layer)
|
||||
# encoded_a = shared_lstm(tweet_a)
|
||||
# encoded_b = shared_lstm(tweet_b)
|
||||
|
||||
# we can then concatenate the two vectors:
|
||||
merged_vector = merge([encoded_a, encoded_b],
|
||||
mode='concat', concat_axis=-1)
|
||||
# # we can then concatenate the two vectors:
|
||||
# merged_vector = merge([encoded_a, encoded_b],
|
||||
# mode='concat', concat_axis=-1)
|
||||
|
||||
# and add a logistic regression on top
|
||||
predictions = Dense(1, activation='sigmoid')(merged_vector)
|
||||
# # and add a logistic regression on top
|
||||
# predictions = Dense(1, activation='sigmoid')(merged_vector)
|
||||
|
||||
# we define a trainable model linking the
|
||||
# tweet inputs to the predictions
|
||||
model = Model(input=[tweet_a, tweet_b], output=predictions)
|
||||
# # we define a trainable model linking the
|
||||
# # tweet inputs to the predictions
|
||||
# model = Model(input=[tweet_a, tweet_b], output=predictions)
|
||||
|
||||
model.compile(optimizer='rmsprop',
|
||||
loss='binary_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
data_a = np.random.random((1000, 4, 25))
|
||||
data_b = np.random.random((1000, 4, 25))
|
||||
labels = np.random.random((1000,))
|
||||
model.fit([data_a, data_b], labels, nb_epoch=1)
|
||||
# model.compile(optimizer='rmsprop',
|
||||
# loss='binary_crossentropy',
|
||||
# metrics=['accuracy'])
|
||||
# data_a = np.random.random((1000, 4, 25))
|
||||
# data_b = np.random.random((1000, 4, 25))
|
||||
# labels = np.random.random((1000,))
|
||||
# model.fit([data_a, data_b], labels, nb_epoch=1)
|
||||
|
||||
model.summary()
|
||||
assert model.inputs == [tweet_a, tweet_b]
|
||||
assert model.outputs == [predictions]
|
||||
assert model.input == [tweet_a, tweet_b]
|
||||
assert model.output == predictions
|
||||
# model.summary()
|
||||
# assert model.inputs == [tweet_a, tweet_b]
|
||||
# assert model.outputs == [predictions]
|
||||
# assert model.input == [tweet_a, tweet_b]
|
||||
# assert model.output == predictions
|
||||
|
||||
assert model.output == predictions
|
||||
assert model.input_shape == [(None, 4, 25), (None, 4, 25)]
|
||||
assert model.output_shape == (None, 1)
|
||||
# assert model.output == predictions
|
||||
# assert model.input_shape == [(None, 4, 25), (None, 4, 25)]
|
||||
# assert model.output_shape == (None, 1)
|
||||
|
||||
assert shared_lstm.get_output_at(0) == encoded_a
|
||||
assert shared_lstm.get_output_at(1) == encoded_b
|
||||
assert shared_lstm.input_shape == (None, 4, 25)
|
||||
# assert shared_lstm.get_output_at(0) == encoded_a
|
||||
# assert shared_lstm.get_output_at(1) == encoded_b
|
||||
# assert shared_lstm.input_shape == (None, 4, 25)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_regression():
|
||||
from keras.models import Sequential, Model
|
||||
from keras.layers import Merge, Embedding, BatchNormalization, LSTM, InputLayer, Input
|
||||
|
||||
@@ -7,8 +7,10 @@ from keras.engine.topology import merge, Input
|
||||
from keras.engine.training import Model
|
||||
from keras.models import Sequential, Graph
|
||||
from keras import backend as K
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_model_methods():
|
||||
a = Input(shape=(3,), name='input_a')
|
||||
b = Input(shape=(3,), name='input_b')
|
||||
@@ -167,6 +169,7 @@ def test_model_methods():
|
||||
out = model.predict([input_a_np, input_b_np], batch_size=4)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_trainable_argument():
|
||||
x = np.random.random((5, 3))
|
||||
y = np.random.random((5, 2))
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import pytest
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_leaky_relu():
|
||||
from keras.layers.advanced_activations import LeakyReLU
|
||||
for alpha in [0., .5, -1.]:
|
||||
@@ -9,12 +10,14 @@ def test_leaky_relu():
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_prelu():
|
||||
from keras.layers.advanced_activations import PReLU
|
||||
layer_test(PReLU, kwargs={},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_elu():
|
||||
from keras.layers.advanced_activations import ELU
|
||||
for alpha in [0., .5, -1.]:
|
||||
@@ -22,6 +25,7 @@ def test_elu():
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_parametric_softplus():
|
||||
from keras.layers.advanced_activations import ParametricSoftplus
|
||||
for alpha in [0., .5, -1.]:
|
||||
@@ -31,12 +35,14 @@ def test_parametric_softplus():
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_thresholded_relu():
|
||||
from keras.layers.advanced_activations import ThresholdedReLU
|
||||
layer_test(ThresholdedReLU, kwargs={'theta': 0.5},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_srelu():
|
||||
from keras.layers.advanced_activations import SReLU
|
||||
layer_test(SReLU, kwargs={},
|
||||
|
||||
@@ -2,17 +2,19 @@ import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
from keras.utils.np_utils import conv_input_length
|
||||
from keras import backend as K
|
||||
from keras.layers import convolutional
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_convolution_1d():
|
||||
nb_samples = 2
|
||||
nb_steps = 8
|
||||
input_dim = 5
|
||||
input_dim = 2
|
||||
filter_length = 3
|
||||
nb_filter = 4
|
||||
nb_filter = 3
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for subsample_length in [1]:
|
||||
@@ -36,6 +38,7 @@ def test_convolution_1d():
|
||||
input_shape=(nb_samples, nb_steps, input_dim))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_maxpooling_1d():
|
||||
for stride in [1, 2]:
|
||||
layer_test(convolutional.MaxPooling1D,
|
||||
@@ -44,6 +47,7 @@ def test_maxpooling_1d():
|
||||
input_shape=(3, 5, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_averagepooling_1d():
|
||||
for stride in [1, 2]:
|
||||
layer_test(convolutional.AveragePooling1D,
|
||||
@@ -52,10 +56,11 @@ def test_averagepooling_1d():
|
||||
input_shape=(3, 5, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_convolution_2d():
|
||||
nb_samples = 2
|
||||
nb_filter = 3
|
||||
stack_size = 4
|
||||
nb_filter = 2
|
||||
stack_size = 3
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
@@ -84,10 +89,50 @@ def test_convolution_2d():
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_deconvolution_2d():
|
||||
nb_samples = 2
|
||||
nb_filter = 2
|
||||
stack_size = 3
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
|
||||
rows = conv_input_length(nb_row, 3, border_mode, subsample[0])
|
||||
cols = conv_input_length(nb_col, 3, border_mode, subsample[1])
|
||||
layer_test(convolutional.Deconvolution2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'output_shape': (nb_samples, nb_filter, rows, cols),
|
||||
'border_mode': border_mode,
|
||||
'subsample': subsample},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col),
|
||||
fixed_batch_size=True)
|
||||
|
||||
layer_test(convolutional.Deconvolution2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'output_shape': (nb_samples, nb_filter, rows, cols),
|
||||
'border_mode': border_mode,
|
||||
'W_regularizer': 'l2',
|
||||
'b_regularizer': 'l2',
|
||||
'activity_regularizer': 'activity_l2',
|
||||
'subsample': subsample},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col),
|
||||
fixed_batch_size=True)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_atrous_conv_2d():
|
||||
nb_samples = 2
|
||||
nb_filter = 3
|
||||
stack_size = 4
|
||||
nb_filter = 2
|
||||
stack_size = 3
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
@@ -122,10 +167,11 @@ def test_atrous_conv_2d():
|
||||
|
||||
|
||||
@pytest.mark.skipif(K._BACKEND != 'tensorflow', reason="Requires TF backend")
|
||||
@keras_test
|
||||
def test_separable_conv_2d():
|
||||
nb_samples = 2
|
||||
nb_filter = 8
|
||||
stack_size = 4
|
||||
nb_filter = 6
|
||||
stack_size = 3
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
@@ -160,6 +206,7 @@ def test_separable_conv_2d():
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_maxpooling_2d():
|
||||
pool_size = (3, 3)
|
||||
|
||||
@@ -171,6 +218,7 @@ def test_maxpooling_2d():
|
||||
input_shape=(3, 4, 11, 12))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_averagepooling_2d():
|
||||
pool_size = (3, 3)
|
||||
|
||||
@@ -184,10 +232,11 @@ def test_averagepooling_2d():
|
||||
input_shape=(3, 4, 11, 12))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_convolution_3d():
|
||||
nb_samples = 2
|
||||
nb_filter = 5
|
||||
stack_size = 4
|
||||
nb_filter = 2
|
||||
stack_size = 3
|
||||
kernel_dim1 = 2
|
||||
kernel_dim2 = 3
|
||||
kernel_dim3 = 1
|
||||
@@ -225,6 +274,7 @@ def test_convolution_3d():
|
||||
input_len_dim1, input_len_dim2, input_len_dim3))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_maxpooling_3d():
|
||||
pool_size = (3, 3, 3)
|
||||
|
||||
@@ -236,6 +286,7 @@ def test_maxpooling_3d():
|
||||
input_shape=(3, 4, 11, 12, 10))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_averagepooling_3d():
|
||||
pool_size = (3, 3, 3)
|
||||
|
||||
@@ -247,9 +298,10 @@ def test_averagepooling_3d():
|
||||
input_shape=(3, 4, 11, 12, 10))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_zero_padding_2d():
|
||||
nb_samples = 2
|
||||
stack_size = 7
|
||||
stack_size = 2
|
||||
input_nb_row = 11
|
||||
input_nb_col = 12
|
||||
|
||||
@@ -272,10 +324,9 @@ def test_zero_padding_2d():
|
||||
layer.get_config()
|
||||
|
||||
|
||||
@pytest.mark.skipif(K._BACKEND != 'theano', reason="Requires Theano backend")
|
||||
def test_zero_padding_3d():
|
||||
nb_samples = 2
|
||||
stack_size = 7
|
||||
stack_size = 2
|
||||
input_len_dim1 = 10
|
||||
input_len_dim2 = 11
|
||||
input_len_dim3 = 12
|
||||
@@ -300,15 +351,17 @@ def test_zero_padding_3d():
|
||||
layer.get_config()
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_upsampling_1d():
|
||||
layer_test(convolutional.UpSampling1D,
|
||||
kwargs={'length': 2},
|
||||
input_shape=(3, 5, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_upsampling_2d():
|
||||
nb_samples = 2
|
||||
stack_size = 7
|
||||
stack_size = 2
|
||||
input_nb_row = 11
|
||||
input_nb_col = 12
|
||||
|
||||
@@ -346,10 +399,9 @@ def test_upsampling_2d():
|
||||
assert_allclose(out, expected_out)
|
||||
|
||||
|
||||
@pytest.mark.skipif(K._BACKEND != 'theano', reason="Requires Theano backend")
|
||||
def test_upsampling_3d():
|
||||
nb_samples = 2
|
||||
stack_size = 7
|
||||
stack_size = 2
|
||||
input_len_dim1 = 10
|
||||
input_len_dim2 = 11
|
||||
input_len_dim3 = 12
|
||||
@@ -392,5 +444,95 @@ def test_upsampling_3d():
|
||||
assert_allclose(out, expected_out)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_cropping_1d():
|
||||
nb_samples = 2
|
||||
time_length = 10
|
||||
input_len_dim1 = 2
|
||||
input = np.random.rand(nb_samples, time_length, input_len_dim1)
|
||||
|
||||
layer_test(convolutional.Cropping1D,
|
||||
kwargs={'cropping': (2, 2)},
|
||||
input_shape=input.shape)
|
||||
|
||||
def test_cropping_2d():
|
||||
nb_samples = 2
|
||||
stack_size = 2
|
||||
input_len_dim1 = 10
|
||||
input_len_dim2 = 20
|
||||
cropping = ((2, 2), (3, 3))
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if dim_ordering == 'th':
|
||||
input = np.random.rand(nb_samples, stack_size, input_len_dim1, input_len_dim2)
|
||||
else:
|
||||
input = np.random.rand(nb_samples, input_len_dim1, input_len_dim2, stack_size)
|
||||
# basic test
|
||||
layer_test(convolutional.Cropping2D,
|
||||
kwargs={'cropping': cropping,
|
||||
'dim_ordering': dim_ordering},
|
||||
input_shape=input.shape)
|
||||
# correctness test
|
||||
layer = convolutional.Cropping2D(cropping=cropping, dim_ordering=dim_ordering)
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
# compare with numpy
|
||||
if dim_ordering == 'th':
|
||||
expected_out = input[:,
|
||||
:,
|
||||
cropping[0][0]:-cropping[0][1],
|
||||
cropping[1][0]:-cropping[1][1]]
|
||||
else:
|
||||
expected_out = input[:,
|
||||
cropping[0][0]:-cropping[0][1],
|
||||
cropping[1][0]:-cropping[1][1],
|
||||
:]
|
||||
|
||||
assert_allclose(out, expected_out)
|
||||
|
||||
|
||||
def test_cropping_3d():
|
||||
nb_samples = 2
|
||||
stack_size = 2
|
||||
input_len_dim1 = 10
|
||||
input_len_dim2 = 20
|
||||
input_len_dim3 = 30
|
||||
cropping = ((2, 2), (3, 3), (2, 3))
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
|
||||
if dim_ordering == 'th':
|
||||
input = np.random.rand(nb_samples, stack_size, input_len_dim1, input_len_dim2, input_len_dim3)
|
||||
else:
|
||||
input = np.random.rand(nb_samples, input_len_dim1, input_len_dim2, input_len_dim3, stack_size)
|
||||
# basic test
|
||||
layer_test(convolutional.Cropping3D,
|
||||
kwargs={'cropping': cropping,
|
||||
'dim_ordering': dim_ordering},
|
||||
input_shape=input.shape)
|
||||
# correctness test
|
||||
layer = convolutional.Cropping3D(cropping=cropping, dim_ordering=dim_ordering)
|
||||
layer.set_input(K.variable(input), shape=input.shape)
|
||||
|
||||
out = K.eval(layer.output)
|
||||
# compare with numpy
|
||||
if dim_ordering == 'th':
|
||||
expected_out = input[:,
|
||||
:,
|
||||
cropping[0][0]:-cropping[0][1],
|
||||
cropping[1][0]:-cropping[1][1],
|
||||
cropping[2][0]:-cropping[2][1]]
|
||||
else:
|
||||
expected_out = input[:,
|
||||
cropping[0][0]:-cropping[0][1],
|
||||
cropping[1][0]:-cropping[1][1],
|
||||
cropping[2][0]:-cropping[2][1],
|
||||
:]
|
||||
|
||||
assert_allclose(out, expected_out)
|
||||
|
||||
|
||||
def test_cropping_3d():
|
||||
pass
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -3,15 +3,17 @@ import numpy as np
|
||||
|
||||
from keras import backend as K
|
||||
from keras.layers import core
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_masking():
|
||||
layer_test(core.Masking,
|
||||
kwargs={},
|
||||
input_shape=(3, 2, 3))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge():
|
||||
from keras.layers import Input, merge, Merge
|
||||
from keras.models import Model
|
||||
@@ -83,6 +85,7 @@ def test_merge():
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_mask_2d():
|
||||
from keras.layers import Input, merge, Masking
|
||||
from keras.models import Model
|
||||
@@ -97,21 +100,28 @@ def test_merge_mask_2d():
|
||||
masked_a = Masking(mask_value=0)(input_a)
|
||||
masked_b = Masking(mask_value=0)(input_b)
|
||||
|
||||
# two different types of merging
|
||||
# three different types of merging
|
||||
merged_sum = merge([masked_a, masked_b], mode='sum')
|
||||
merged_concat = merge([masked_a, masked_b], mode='concat', concat_axis=1)
|
||||
merged_concat_mixed = merge([masked_a, input_b], mode='concat', concat_axis=1)
|
||||
|
||||
# test sum
|
||||
model_sum = Model([input_a, input_b], [merged_sum])
|
||||
model_sum.compile(loss='mse', optimizer='sgd')
|
||||
model_sum.fit([rand(2,3), rand(2,3)], [rand(2,3)], nb_epoch=1)
|
||||
model_sum.fit([rand(2, 3), rand(2, 3)], [rand(2, 3)], nb_epoch=1)
|
||||
|
||||
# test concatenation
|
||||
model_concat = Model([input_a, input_b], [merged_concat])
|
||||
model_concat.compile(loss='mse', optimizer='sgd')
|
||||
model_concat.fit([rand(2,3), rand(2,3)], [rand(2,6)], nb_epoch=1)
|
||||
model_concat.fit([rand(2, 3), rand(2, 3)], [rand(2, 6)], nb_epoch=1)
|
||||
|
||||
# test concatenation with masked and non-masked inputs
|
||||
model_concat = Model([input_a, input_b], [merged_concat_mixed])
|
||||
model_concat.compile(loss='mse', optimizer='sgd')
|
||||
model_concat.fit([rand(2, 3), rand(2, 3)], [rand(2, 6)], nb_epoch=1)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_mask_3d():
|
||||
from keras.layers import Input, merge, Embedding, SimpleRNN
|
||||
from keras.models import Model
|
||||
@@ -134,15 +144,25 @@ def test_merge_mask_3d():
|
||||
merged_concat = merge([rnn_a, rnn_b], mode='concat', concat_axis=-1)
|
||||
model = Model([input_a, input_b], [merged_concat])
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
model.fit([rand(2,3), rand(2,3)], [rand(2,3,6)])
|
||||
model.fit([rand(2, 3), rand(2, 3)], [rand(2, 3, 6)])
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_dropout():
|
||||
layer_test(core.Dropout,
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(3, 2))
|
||||
|
||||
layer_test(core.SpatialDropout2D,
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(2, 3, 4, 5))
|
||||
|
||||
layer_test(core.SpatialDropout3D,
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(2, 3, 4, 5, 6))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_activation():
|
||||
# with string argument
|
||||
layer_test(core.Activation,
|
||||
@@ -155,30 +175,35 @@ def test_activation():
|
||||
input_shape=(3, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_reshape():
|
||||
layer_test(core.Reshape,
|
||||
kwargs={'target_shape': (8, 1)},
|
||||
input_shape=(3, 2, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_permute():
|
||||
layer_test(core.Permute,
|
||||
kwargs={'dims': (2, 1)},
|
||||
input_shape=(3, 2, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_flatten():
|
||||
layer_test(core.Flatten,
|
||||
kwargs={},
|
||||
input_shape=(3, 2, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_repeat_vector():
|
||||
layer_test(core.RepeatVector,
|
||||
kwargs={'n': 3},
|
||||
input_shape=(3, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_lambda():
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
Lambda = core.Lambda
|
||||
@@ -212,6 +237,7 @@ def test_lambda():
|
||||
ld = layer_from_config({'class_name': 'Lambda', 'config': config})
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_dense():
|
||||
from keras import regularizers
|
||||
from keras import constraints
|
||||
@@ -230,6 +256,7 @@ def test_dense():
|
||||
input_shape=(3, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_activity_regularization():
|
||||
from keras.engine import Input, Model
|
||||
|
||||
@@ -250,6 +277,7 @@ def test_activity_regularization():
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_maxout_dense():
|
||||
from keras import regularizers
|
||||
from keras import constraints
|
||||
@@ -268,6 +296,7 @@ def test_maxout_dense():
|
||||
input_shape=(3, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_highway():
|
||||
from keras import regularizers
|
||||
from keras import constraints
|
||||
@@ -285,6 +314,7 @@ def test_highway():
|
||||
input_shape=(3, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_timedistributeddense():
|
||||
from keras import regularizers
|
||||
from keras import constraints
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
import pytest
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
from keras.layers.embeddings import Embedding
|
||||
import keras.backend as K
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_embedding():
|
||||
layer_test(Embedding,
|
||||
kwargs={'output_dim': 4., 'input_dim': 10, 'input_length': 2},
|
||||
kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
|
||||
input_shape=(3, 2),
|
||||
input_dtype='int32',
|
||||
expected_output_dtype=K.floatx())
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import pytest
|
||||
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
from keras.layers import local
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_locallyconnected_1d():
|
||||
nb_samples = 2
|
||||
nb_steps = 8
|
||||
@@ -33,6 +34,7 @@ def test_locallyconnected_1d():
|
||||
input_shape=(nb_samples, nb_steps, input_dim))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_locallyconnected_2d():
|
||||
nb_samples = 8
|
||||
nb_filter = 3
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
import pytest
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
from keras.layers import noise
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_GaussianNoise():
|
||||
layer_test(noise.GaussianNoise,
|
||||
kwargs={'sigma': 1.},
|
||||
input_shape=(3, 2, 3))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_GaussianDropout():
|
||||
layer_test(noise.GaussianDropout,
|
||||
kwargs={'p': 0.5},
|
||||
|
||||
@@ -3,18 +3,18 @@ import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras.layers.core import Dense, Activation
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
from keras.layers import normalization
|
||||
from keras.models import Sequential, Graph
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
input_1 = np.arange(10)
|
||||
input_2 = np.zeros(10)
|
||||
input_3 = np.ones((10))
|
||||
input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))]
|
||||
|
||||
|
||||
@keras_test
|
||||
def basic_batchnorm_test():
|
||||
layer_test(normalization.BatchNormalization,
|
||||
kwargs={'mode': 1},
|
||||
@@ -24,16 +24,17 @@ def basic_batchnorm_test():
|
||||
input_shape=(3, 4, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchnorm_mode_0_or_2():
|
||||
for mode in [0, 2]:
|
||||
model = Sequential()
|
||||
norm_m0 = normalization.BatchNormalization(mode=mode, input_shape=(10,))
|
||||
norm_m0 = normalization.BatchNormalization(mode=mode, input_shape=(10,), momentum=0.8)
|
||||
model.add(norm_m0)
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
|
||||
# centered on 5.0, variance 10.0
|
||||
X = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10))
|
||||
model.fit(X, X, nb_epoch=5, verbose=0)
|
||||
model.fit(X, X, nb_epoch=4, verbose=0)
|
||||
out = model.predict(X)
|
||||
out -= K.eval(norm_m0.beta)
|
||||
out /= K.eval(norm_m0.gamma)
|
||||
@@ -42,15 +43,16 @@ def test_batchnorm_mode_0_or_2():
|
||||
assert_allclose(out.std(), 1.0, atol=1e-1)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchnorm_mode_0_convnet():
|
||||
model = Sequential()
|
||||
norm_m0 = normalization.BatchNormalization(mode=0, axis=1, input_shape=(3, 4, 4))
|
||||
norm_m0 = normalization.BatchNormalization(mode=0, axis=1, input_shape=(3, 4, 4), momentum=0.8)
|
||||
model.add(norm_m0)
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
|
||||
# centered on 5.0, variance 10.0
|
||||
X = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4))
|
||||
model.fit(X, X, nb_epoch=5, verbose=0)
|
||||
model.fit(X, X, nb_epoch=4, verbose=0)
|
||||
out = model.predict(X)
|
||||
out -= np.reshape(K.eval(norm_m0.beta), (1, 3, 1, 1))
|
||||
out /= np.reshape(K.eval(norm_m0.gamma), (1, 3, 1, 1))
|
||||
@@ -59,6 +61,7 @@ def test_batchnorm_mode_0_convnet():
|
||||
assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchnorm_mode_1():
|
||||
norm_m1 = normalization.BatchNormalization(input_shape=(10,), mode=1)
|
||||
norm_m1.build(input_shape=(None, 10))
|
||||
|
||||
@@ -7,10 +7,11 @@ from keras.layers import recurrent, embeddings
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Masking
|
||||
from keras import regularizers
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
from keras import backend as K
|
||||
|
||||
nb_samples, timesteps, embedding_dim, output_dim = 3, 5, 10, 5
|
||||
nb_samples, timesteps, embedding_dim, output_dim = 2, 5, 4, 3
|
||||
embedding_num = 12
|
||||
|
||||
|
||||
@@ -23,21 +24,21 @@ def _runner(layer_class):
|
||||
layer_test(layer_class,
|
||||
kwargs={'output_dim': output_dim,
|
||||
'return_sequences': True},
|
||||
input_shape=(3, 2, 3))
|
||||
input_shape=(nb_samples, timesteps, embedding_dim))
|
||||
|
||||
# check dropout
|
||||
layer_test(layer_class,
|
||||
kwargs={'output_dim': output_dim,
|
||||
'dropout_U': 0.1,
|
||||
'dropout_W': 0.1},
|
||||
input_shape=(3, 2, 3))
|
||||
input_shape=(nb_samples, timesteps, embedding_dim))
|
||||
|
||||
# check implementation modes
|
||||
for mode in ['cpu', 'mem', 'gpu']:
|
||||
layer_test(layer_class,
|
||||
kwargs={'output_dim': output_dim,
|
||||
'consume_less': mode},
|
||||
input_shape=(3, 2, 3))
|
||||
input_shape=(nb_samples, timesteps, embedding_dim))
|
||||
|
||||
# check statefulness
|
||||
model = Sequential()
|
||||
@@ -82,7 +83,6 @@ def _runner(layer_class):
|
||||
left_padded_input = np.ones((nb_samples, timesteps))
|
||||
left_padded_input[0, :1] = 0
|
||||
left_padded_input[1, :2] = 0
|
||||
left_padded_input[2, :3] = 0
|
||||
out6 = model.predict(left_padded_input)
|
||||
|
||||
layer.reset_states()
|
||||
@@ -90,7 +90,6 @@ def _runner(layer_class):
|
||||
right_padded_input = np.ones((nb_samples, timesteps))
|
||||
right_padded_input[0, -1:] = 0
|
||||
right_padded_input[1, -2:] = 0
|
||||
right_padded_input[2, -3:] = 0
|
||||
out7 = model.predict(right_padded_input)
|
||||
|
||||
assert_allclose(out7, out6, atol=1e-5)
|
||||
@@ -107,18 +106,22 @@ def _runner(layer_class):
|
||||
K.eval(layer.output)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_SimpleRNN():
|
||||
_runner(recurrent.SimpleRNN)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_GRU():
|
||||
_runner(recurrent.GRU)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_LSTM():
|
||||
_runner(recurrent.LSTM)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_masking_layer():
|
||||
''' This test based on a previously failing issue here:
|
||||
https://github.com/fchollet/keras/issues/1567
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras.utils.test_utils import keras_test
|
||||
from keras.layers import wrappers, Input
|
||||
from keras.layers import core, convolutional
|
||||
from keras.layers import core, convolutional, recurrent
|
||||
from keras.models import Sequential, Model, model_from_json
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_TimeDistributed():
|
||||
# first, test with Dense layer
|
||||
model = Sequential()
|
||||
@@ -75,5 +76,45 @@ def test_TimeDistributed():
|
||||
outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)), nb_epoch=1, batch_size=10)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_Bidirectional():
|
||||
rnn = recurrent.SimpleRNN
|
||||
nb_sample = 2
|
||||
dim = 2
|
||||
timesteps = 2
|
||||
output_dim = 2
|
||||
for mode in ['sum', 'concat']:
|
||||
x = np.random.random((nb_sample, timesteps, dim))
|
||||
target_dim = 2 * output_dim if mode == 'concat' else output_dim
|
||||
y = np.random.random((nb_sample, target_dim))
|
||||
|
||||
# test with Sequential model
|
||||
model = Sequential()
|
||||
model.add(wrappers.Bidirectional(rnn(output_dim),
|
||||
merge_mode=mode, input_shape=(timesteps, dim)))
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
model.fit(x, y, nb_epoch=1, batch_size=1)
|
||||
|
||||
# test config
|
||||
model.get_config()
|
||||
model = model_from_json(model.to_json())
|
||||
model.summary()
|
||||
|
||||
# test stacked bidirectional layers
|
||||
model = Sequential()
|
||||
model.add(wrappers.Bidirectional(rnn(output_dim, return_sequences=True),
|
||||
merge_mode=mode, input_shape=(timesteps, dim)))
|
||||
model.add(wrappers.Bidirectional(rnn(output_dim), merge_mode=mode))
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
model.fit(x, y, nb_epoch=1, batch_size=1)
|
||||
|
||||
# test with functional API
|
||||
input = Input((timesteps, dim))
|
||||
output = wrappers.Bidirectional(rnn(output_dim), merge_mode=mode)(input)
|
||||
model = Model(input, output)
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
model.fit(x, y, nb_epoch=1, batch_size=1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -1,449 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import pytest
|
||||
import os
|
||||
import numpy as np
|
||||
np.random.seed(1337)
|
||||
|
||||
from keras import backend as K
|
||||
from keras.models import Graph, Sequential
|
||||
from keras.layers.core import Dense, Activation, Merge, Lambda
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras.models import model_from_json, model_from_yaml
|
||||
|
||||
|
||||
batch_size = 32
|
||||
|
||||
(X_train_graph, y_train_graph), (X_test_graph, y_test_graph) = get_test_data(nb_train=100,
|
||||
nb_test=50,
|
||||
input_shape=(32,),
|
||||
classification=False,
|
||||
output_shape=(4,))
|
||||
(X2_train_graph, y2_train_graph), (X2_test_graph, y2_test_graph) = get_test_data(nb_train=100,
|
||||
nb_test=50,
|
||||
input_shape=(32,),
|
||||
classification=False,
|
||||
output_shape=(1,))
|
||||
|
||||
|
||||
def test_graph_fit_generator():
|
||||
def data_generator_graph(train):
|
||||
while 1:
|
||||
if train:
|
||||
yield {'input1': X_train_graph, 'output1': y_train_graph}
|
||||
else:
|
||||
yield {'input1': X_test_graph, 'output1': y_test_graph}
|
||||
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1',
|
||||
inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4)
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data={'input1': X_test_graph, 'output1': y_test_graph})
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data=data_generator_graph(False), nb_val_samples=batch_size * 3)
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data=data_generator_graph(False), nb_val_samples=batch_size * 3)
|
||||
gen_loss = graph.evaluate_generator(data_generator_graph(True), 128, verbose=0)
|
||||
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph}, verbose=0)
|
||||
|
||||
# test show_accuracy
|
||||
graph.compile('rmsprop', {'output1': 'mse'}, metrics=['accuracy'])
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4)
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data={'input1': X_test_graph, 'output1': y_test_graph})
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data=data_generator_graph(False), nb_val_samples=batch_size * 3)
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data=data_generator_graph(False), nb_val_samples=batch_size * 3)
|
||||
gen_loss = graph.evaluate_generator(data_generator_graph(True), 128, verbose=0)
|
||||
|
||||
|
||||
def test_1o_1i():
|
||||
# test a non-sequential graph with 1 input and 1 output
|
||||
np.random.seed(1337)
|
||||
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1',
|
||||
inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=10)
|
||||
out = graph.predict({'input1': X_test_graph})
|
||||
|
||||
assert(type(out) == dict)
|
||||
assert(len(out) == 1)
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.train_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph}, verbose=0)
|
||||
|
||||
# test accuracy:
|
||||
graph.compile('rmsprop', {'output1': 'mse'}, metrics=['accuracy'])
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=1)
|
||||
loss, acc = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss, acc = graph.train_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss, acc = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph}, verbose=0)
|
||||
|
||||
# test validation split
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph},
|
||||
validation_split=0.2, nb_epoch=1)
|
||||
# test validation data
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph},
|
||||
validation_data={'input1': X_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=1)
|
||||
|
||||
|
||||
def test_1o_1i_2():
|
||||
# test a more complex non-sequential graph with 1 input and 1 output
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(4), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2-0', input='input1')
|
||||
graph.add_node(Activation('relu'), name='dense2', input='dense2-0')
|
||||
|
||||
graph.add_node(Dense(4), name='dense3', input='dense2')
|
||||
graph.add_node(Dense(4), name='dense4', inputs=['dense1', 'dense3'],
|
||||
merge_mode='sum')
|
||||
|
||||
graph.add_output(name='output1', inputs=['dense2', 'dense4'],
|
||||
merge_mode='sum')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=2)
|
||||
out = graph.predict({'input1': X_train_graph})
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 1)
|
||||
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.train_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
|
||||
# test serialization
|
||||
config = graph.get_config()
|
||||
new_graph = Graph.from_config(config)
|
||||
|
||||
graph.summary()
|
||||
json_str = graph.to_json()
|
||||
new_graph = model_from_json(json_str)
|
||||
|
||||
yaml_str = graph.to_yaml()
|
||||
new_graph = model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
def test_1o_2i():
|
||||
# test a non-sequential graph with 2 inputs and 1 output
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_input(name='input2', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input2')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1', inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
graph.fit({'input1': X_train_graph, 'input2': X2_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=2)
|
||||
out = graph.predict({'input1': X_test_graph, 'input2': X2_test_graph})
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 1)
|
||||
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.train_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
|
||||
|
||||
# test serialization
|
||||
config = graph.get_config()
|
||||
new_graph = Graph.from_config(config)
|
||||
|
||||
graph.summary()
|
||||
json_str = graph.to_json()
|
||||
new_graph = model_from_json(json_str)
|
||||
|
||||
yaml_str = graph.to_yaml()
|
||||
new_graph = model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
def test_siamese_1():
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_input(name='input2', input_shape=(32,))
|
||||
|
||||
graph.add_shared_node(Dense(4), name='shared', inputs=['input1', 'input2'], merge_mode='sum')
|
||||
graph.add_node(Dense(4), name='dense1', input='shared')
|
||||
# graph.add_node(Dense(4), name='output1', input='shared', create_output=True)
|
||||
|
||||
# graph.add_output(name='output1', inputs=['dense1', 'shared'], merge_mode='sum')
|
||||
graph.add_output(name='output1', input='dense1')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
graph.fit({'input1': X_train_graph, 'input2': X2_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=10)
|
||||
out = graph.predict({'input1': X_test_graph, 'input2': X2_test_graph})
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 1)
|
||||
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.train_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
|
||||
assert(loss < 5.0)
|
||||
|
||||
# test serialization
|
||||
config = graph.get_config()
|
||||
new_graph = Graph.from_config(config)
|
||||
|
||||
graph.summary()
|
||||
json_str = graph.to_json()
|
||||
new_graph = model_from_json(json_str)
|
||||
|
||||
yaml_str = graph.to_yaml()
|
||||
new_graph = model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
'''Th test below is failing because of a known bug
|
||||
with the serialization of legacy Graph models
|
||||
containing shared nodes with named outputs.
|
||||
This is very low priority (= no plans to fix it),
|
||||
since the Graph model is deprecated.
|
||||
'''
|
||||
# def test_siamese_2():
|
||||
# graph = Graph()
|
||||
# graph.add_input(name='input1', input_shape=(32,))
|
||||
# graph.add_input(name='input2', input_shape=(32,))
|
||||
|
||||
# graph.add_shared_node(Dense(4), name='shared',
|
||||
# inputs=['input1', 'input2'],
|
||||
# outputs=['shared_output1', 'shared_output2'])
|
||||
# graph.add_node(Dense(4), name='dense1', input='shared_output1')
|
||||
# graph.add_node(Dense(4), name='dense2', input='shared_output2')
|
||||
|
||||
# graph.add_output(name='output1', inputs=['dense1', 'dense2'],
|
||||
# merge_mode='sum')
|
||||
# graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
# graph.fit({'input1': X_train_graph,
|
||||
# 'input2': X2_train_graph,
|
||||
# 'output1': y_train_graph},
|
||||
# nb_epoch=10)
|
||||
# out = graph.predict({'input1': X_test_graph,
|
||||
# 'input2': X2_test_graph})
|
||||
# assert(type(out == dict))
|
||||
# assert(len(out) == 1)
|
||||
|
||||
# loss = graph.test_on_batch({'input1': X_test_graph,
|
||||
# 'input2': X2_test_graph,
|
||||
# 'output1': y_test_graph})
|
||||
# loss = graph.train_on_batch({'input1': X_test_graph,
|
||||
# 'input2': X2_test_graph,
|
||||
# 'output1': y_test_graph})
|
||||
# loss = graph.evaluate({'input1': X_test_graph,
|
||||
# 'input2': X2_test_graph,
|
||||
# 'output1': y_test_graph})
|
||||
# # test serialization
|
||||
# config = graph.get_config()
|
||||
# new_graph = Graph.from_config(config)
|
||||
|
||||
# graph.summary()
|
||||
# json_str = graph.to_json()
|
||||
# new_graph = model_from_json(json_str)
|
||||
|
||||
# yaml_str = graph.to_yaml()
|
||||
# new_graph = model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
def test_2o_1i_save_weights():
|
||||
# test a non-sequential graph with 1 input and 2 outputs
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(1), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1', input='dense2')
|
||||
graph.add_output(name='output2', input='dense3')
|
||||
graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'})
|
||||
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph},
|
||||
nb_epoch=10)
|
||||
out = graph.predict({'input1': X_test_graph})
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 2)
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph})
|
||||
loss = graph.train_on_batch({'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph})
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph})
|
||||
|
||||
# test weight saving
|
||||
fname = 'test_2o_1i_weights_temp.h5'
|
||||
graph.save_weights(fname, overwrite=True)
|
||||
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(1), name='dense3', input='dense1')
|
||||
graph.add_output(name='output1', input='dense2')
|
||||
graph.add_output(name='output2', input='dense3')
|
||||
graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'})
|
||||
graph.load_weights('test_2o_1i_weights_temp.h5')
|
||||
os.remove(fname)
|
||||
|
||||
nloss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph})
|
||||
assert(loss == nloss)
|
||||
|
||||
# test loss weights
|
||||
graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'},
|
||||
loss_weights={'output1': 1., 'output2': 2.})
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph},
|
||||
nb_epoch=1)
|
||||
|
||||
|
||||
def test_2o_1i_sample_weights():
|
||||
# test a non-sequential graph with 1 input and 2 outputs with sample weights
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(1), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1', input='dense2')
|
||||
graph.add_output(name='output2', input='dense3')
|
||||
|
||||
weights1 = np.random.uniform(size=y_train_graph.shape[0])
|
||||
weights2 = np.random.uniform(size=y2_train_graph.shape[0])
|
||||
weights1_test = np.random.uniform(size=y_test_graph.shape[0])
|
||||
weights2_test = np.random.uniform(size=y2_test_graph.shape[0])
|
||||
|
||||
graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'})
|
||||
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph},
|
||||
nb_epoch=10,
|
||||
sample_weight={'output1': weights1, 'output2': weights2})
|
||||
out = graph.predict({'input1': X_test_graph})
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 2)
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph},
|
||||
sample_weight={'output1': weights1_test, 'output2': weights2_test})
|
||||
loss = graph.train_on_batch({'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph},
|
||||
sample_weight={'output1': weights1, 'output2': weights2})
|
||||
loss = graph.evaluate({'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph},
|
||||
sample_weight={'output1': weights1, 'output2': weights2})
|
||||
|
||||
|
||||
def test_recursive():
|
||||
# test layer-like API
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
graph.add_output(name='output1', inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum')
|
||||
|
||||
seq = Sequential()
|
||||
seq.add(Dense(32, input_shape=(32,)))
|
||||
seq.add(graph)
|
||||
seq.add(Dense(4))
|
||||
|
||||
seq.compile('rmsprop', 'mse')
|
||||
|
||||
seq.fit(X_train_graph, y_train_graph, batch_size=10, nb_epoch=10)
|
||||
loss = seq.evaluate(X_test_graph, y_test_graph)
|
||||
|
||||
# test serialization
|
||||
config = seq.get_config()
|
||||
new_graph = Sequential.from_config(config)
|
||||
|
||||
seq.summary()
|
||||
json_str = seq.to_json()
|
||||
new_graph = model_from_json(json_str)
|
||||
|
||||
yaml_str = seq.to_yaml()
|
||||
new_graph = model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
def test_create_output():
|
||||
# test create_output argument
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
graph.add_node(Dense(4), name='output1', inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum', create_output=True)
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
history = graph.fit({'input1': X_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=10)
|
||||
out = graph.predict({'input1': X_test_graph})
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 1)
|
||||
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.train_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
assert(loss < 2.5)
|
||||
|
||||
# test serialization
|
||||
config = graph.get_config()
|
||||
graph = Graph.from_config(config)
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
out = graph.predict({'input1': X_test_graph})
|
||||
|
||||
|
||||
def test_count_params():
|
||||
# test count params
|
||||
nb_units = 100
|
||||
nb_classes = 2
|
||||
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_input(name='input2', input_shape=(32,))
|
||||
graph.add_node(Dense(nb_units),
|
||||
name='dense1', input='input1')
|
||||
graph.add_node(Dense(nb_classes),
|
||||
name='dense2', input='input2')
|
||||
graph.add_node(Dense(nb_classes),
|
||||
name='dense3', input='dense1')
|
||||
graph.add_output(name='output', inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum')
|
||||
graph.build()
|
||||
|
||||
n = 32 * nb_units + nb_units
|
||||
n += 32 * nb_classes + nb_classes
|
||||
n += nb_units * nb_classes + nb_classes
|
||||
|
||||
assert(n == graph.count_params())
|
||||
|
||||
graph.compile('rmsprop', {'output': 'binary_crossentropy'})
|
||||
|
||||
assert(n == graph.count_params())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
@@ -2,14 +2,16 @@ from __future__ import print_function
|
||||
import pytest
|
||||
import numpy as np
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Activation
|
||||
from keras.layers.core import Dense
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multiprocessing_training():
|
||||
|
||||
reached_end = False
|
||||
|
||||
arr_data = np.random.randint(0,256, (500, 200))
|
||||
arr_data = np.random.randint(0, 256, (500, 2))
|
||||
arr_labels = np.random.randint(0, 2, 500)
|
||||
|
||||
def myGenerator():
|
||||
@@ -27,10 +29,7 @@ def test_multiprocessing_training():
|
||||
|
||||
# Build a NN
|
||||
model = Sequential()
|
||||
model.add(Dense(10, input_shape=(200, )))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(1))
|
||||
model.add(Activation('linear'))
|
||||
model.add(Dense(1, input_shape=(2, )))
|
||||
model.compile(loss='mse', optimizer='adadelta')
|
||||
|
||||
model.fit_generator(myGenerator(),
|
||||
@@ -53,11 +52,12 @@ def test_multiprocessing_training():
|
||||
assert reached_end
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multiprocessing_training_fromfile():
|
||||
|
||||
reached_end = False
|
||||
|
||||
arr_data = np.random.randint(0,256, (500, 200))
|
||||
arr_data = np.random.randint(0, 256, (500, 2))
|
||||
arr_labels = np.random.randint(0, 2, 500)
|
||||
np.savez("data.npz", **{"data": arr_data, "labels": arr_labels})
|
||||
|
||||
@@ -78,10 +78,7 @@ def test_multiprocessing_training_fromfile():
|
||||
|
||||
# Build a NN
|
||||
model = Sequential()
|
||||
model.add(Dense(10, input_shape=(200, )))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(1))
|
||||
model.add(Activation('linear'))
|
||||
model.add(Dense(1, input_shape=(2, )))
|
||||
model.compile(loss='mse', optimizer='adadelta')
|
||||
|
||||
model.fit_generator(myGenerator(),
|
||||
@@ -103,11 +100,12 @@ def test_multiprocessing_training_fromfile():
|
||||
assert reached_end
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multiprocessing_predicting():
|
||||
|
||||
reached_end = False
|
||||
|
||||
arr_data = np.random.randint(0,256, (500, 200))
|
||||
arr_data = np.random.randint(0, 256, (500, 2))
|
||||
|
||||
def myGenerator():
|
||||
|
||||
@@ -123,10 +121,7 @@ def test_multiprocessing_predicting():
|
||||
|
||||
# Build a NN
|
||||
model = Sequential()
|
||||
model.add(Dense(10, input_shape=(200, )))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(1))
|
||||
model.add(Activation('linear'))
|
||||
model.add(Dense(1, input_shape=(2, )))
|
||||
model.compile(loss='mse', optimizer='adadelta')
|
||||
model.predict_generator(myGenerator(),
|
||||
val_samples=320,
|
||||
@@ -142,11 +137,12 @@ def test_multiprocessing_predicting():
|
||||
assert reached_end
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multiprocessing_evaluating():
|
||||
|
||||
reached_end = False
|
||||
|
||||
arr_data = np.random.randint(0,256, (500, 200))
|
||||
arr_data = np.random.randint(0, 256, (500, 2))
|
||||
arr_labels = np.random.randint(0, 2, 500)
|
||||
|
||||
def myGenerator():
|
||||
@@ -164,10 +160,7 @@ def test_multiprocessing_evaluating():
|
||||
|
||||
# Build a NN
|
||||
model = Sequential()
|
||||
model.add(Dense(10, input_shape=(200, )))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(1))
|
||||
model.add(Activation('linear'))
|
||||
model.add(Dense(1, input_shape=(2, )))
|
||||
model.compile(loss='mse', optimizer='adadelta')
|
||||
|
||||
model.evaluate_generator(myGenerator(),
|
||||
|
||||
@@ -58,8 +58,8 @@ def test_Eigenvalue_reg():
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
|
||||
model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0)
|
||||
|
||||
|
||||
|
||||
|
||||
def test_W_reg():
|
||||
(X_train, Y_train), (X_test, Y_test), test_ids = get_data()
|
||||
for reg in [regularizers.l1(),
|
||||
|
||||
@@ -9,7 +9,7 @@ from keras import backend as K
|
||||
from keras.models import Graph, Sequential
|
||||
from keras.layers.core import Dense, Activation, Merge, Lambda
|
||||
from keras.utils import np_utils
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras.utils.test_utils import get_test_data, keras_test
|
||||
from keras.models import model_from_json, model_from_yaml
|
||||
from keras import objectives
|
||||
from keras.engine.training import make_batches
|
||||
@@ -22,6 +22,23 @@ batch_size = 32
|
||||
nb_epoch = 1
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_pop():
|
||||
model = Sequential()
|
||||
model.add(Dense(nb_hidden, input_dim=input_dim))
|
||||
model.add(Dense(nb_class))
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
x = np.random.random((batch_size, input_dim))
|
||||
y = np.random.random((batch_size, nb_class))
|
||||
model.fit(x, y, nb_epoch=1)
|
||||
model.pop()
|
||||
assert len(model.layers) == 1
|
||||
assert model.output_shape == (None, nb_hidden)
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
y = np.random.random((batch_size, nb_hidden))
|
||||
model.fit(x, y, nb_epoch=1)
|
||||
|
||||
|
||||
def _get_test_data():
|
||||
np.random.seed(1234)
|
||||
|
||||
@@ -38,6 +55,7 @@ def _get_test_data():
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_fit_generator():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
|
||||
@@ -69,10 +87,10 @@ def test_sequential_fit_generator():
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch,
|
||||
validation_data=data_generator(False), nb_val_samples=batch_size * 3)
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch, max_q_size=2)
|
||||
|
||||
loss = model.evaluate(X_train, y_train)
|
||||
model.evaluate(X_train, y_train)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
|
||||
@@ -130,16 +148,17 @@ def test_sequential():
|
||||
|
||||
# test serialization
|
||||
config = model.get_config()
|
||||
new_model = Sequential.from_config(config)
|
||||
Sequential.from_config(config)
|
||||
|
||||
model.summary()
|
||||
json_str = model.to_json()
|
||||
new_model = model_from_json(json_str)
|
||||
model_from_json(json_str)
|
||||
|
||||
yaml_str = model.to_yaml()
|
||||
new_model = model_from_yaml(yaml_str)
|
||||
model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_nested_sequential():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
|
||||
@@ -192,16 +211,17 @@ def test_nested_sequential():
|
||||
|
||||
# test serialization
|
||||
config = model.get_config()
|
||||
new_model = Sequential.from_config(config)
|
||||
Sequential.from_config(config)
|
||||
|
||||
model.summary()
|
||||
json_str = model.to_json()
|
||||
new_model = model_from_json(json_str)
|
||||
model_from_json(json_str)
|
||||
|
||||
yaml_str = model.to_yaml()
|
||||
new_model = model_from_yaml(yaml_str)
|
||||
model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_sum():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
left = Sequential()
|
||||
@@ -251,16 +271,17 @@ def test_merge_sum():
|
||||
|
||||
# test serialization
|
||||
config = model.get_config()
|
||||
new_model = Sequential.from_config(config)
|
||||
Sequential.from_config(config)
|
||||
|
||||
model.summary()
|
||||
json_str = model.to_json()
|
||||
new_model = model_from_json(json_str)
|
||||
model_from_json(json_str)
|
||||
|
||||
yaml_str = model.to_yaml()
|
||||
new_model = model_from_yaml(yaml_str)
|
||||
model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_dot():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
|
||||
@@ -295,6 +316,7 @@ def test_merge_dot():
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_concat():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
|
||||
@@ -334,6 +356,7 @@ def test_merge_concat():
|
||||
assert(loss == nloss)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_recursivity():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
left = Sequential()
|
||||
@@ -380,16 +403,17 @@ def test_merge_recursivity():
|
||||
|
||||
# test serialization
|
||||
config = model.get_config()
|
||||
new_model = Sequential.from_config(config)
|
||||
Sequential.from_config(config)
|
||||
|
||||
model.summary()
|
||||
json_str = model.to_json()
|
||||
new_model = model_from_json(json_str)
|
||||
model_from_json(json_str)
|
||||
|
||||
yaml_str = model.to_yaml()
|
||||
new_model = model_from_yaml(yaml_str)
|
||||
model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_overlap():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
left = Sequential()
|
||||
@@ -427,16 +451,17 @@ def test_merge_overlap():
|
||||
|
||||
# test serialization
|
||||
config = model.get_config()
|
||||
new_model = Sequential.from_config(config)
|
||||
Sequential.from_config(config)
|
||||
|
||||
model.summary()
|
||||
json_str = model.to_json()
|
||||
new_model = model_from_json(json_str)
|
||||
model_from_json(json_str)
|
||||
|
||||
yaml_str = model.to_yaml()
|
||||
new_model = model_from_yaml(yaml_str)
|
||||
model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_count_params():
|
||||
input_dim = 20
|
||||
nb_units = 10
|
||||
|
||||
@@ -9,9 +9,10 @@ from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Activation
|
||||
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
|
||||
|
||||
np.random.seed(1337)
|
||||
|
||||
input_dim = 10
|
||||
input_dim = 5
|
||||
hidden_dims = 5
|
||||
nb_train = 100
|
||||
nb_test = 50
|
||||
nb_class = 3
|
||||
batch_size = 32
|
||||
nb_epoch = 1
|
||||
@@ -19,25 +20,13 @@ verbosity = 0
|
||||
optim = 'adam'
|
||||
loss = 'categorical_crossentropy'
|
||||
|
||||
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=400,
|
||||
nb_test=200,
|
||||
input_shape=(input_dim,),
|
||||
classification=True,
|
||||
nb_class=nb_class)
|
||||
y_train = np_utils.to_categorical(y_train, nb_classes=nb_class)
|
||||
y_test = np_utils.to_categorical(y_test, nb_classes=nb_class)
|
||||
np.random.seed(42)
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(
|
||||
nb_train=nb_train, nb_test=nb_test, input_shape=(input_dim,),
|
||||
classification=True, nb_class=nb_class)
|
||||
|
||||
|
||||
(X_train_reg, y_train_reg), (X_test_reg, y_test_reg) = get_test_data(nb_train=400,
|
||||
nb_test=200,
|
||||
input_shape=(input_dim,),
|
||||
classification=False,
|
||||
nb_class=1,
|
||||
output_shape=(1,))
|
||||
|
||||
|
||||
def build_fn_clf(hidden_dims=50):
|
||||
def build_fn_clf(hidden_dims):
|
||||
model = Sequential()
|
||||
model.add(Dense(input_dim, input_shape=(input_dim,)))
|
||||
model.add(Activation('relu'))
|
||||
@@ -50,14 +39,52 @@ def build_fn_clf(hidden_dims=50):
|
||||
return model
|
||||
|
||||
|
||||
class Class_build_fn_clf(object):
|
||||
def __call__(self, hidden_dims):
|
||||
return build_fn_clf(hidden_dims)
|
||||
def test_clasify_build_fn():
|
||||
clf = KerasClassifier(
|
||||
build_fn=build_fn_clf, hidden_dims=hidden_dims,
|
||||
batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
|
||||
assert_classification_works(clf)
|
||||
|
||||
|
||||
class Inherit_class_build_fn_clf(KerasClassifier):
|
||||
def __call__(self, hidden_dims):
|
||||
return build_fn_clf(hidden_dims)
|
||||
def test_clasify_class_build_fn():
|
||||
class ClassBuildFnClf(object):
|
||||
def __call__(self, hidden_dims):
|
||||
return build_fn_clf(hidden_dims)
|
||||
|
||||
clf = KerasClassifier(
|
||||
build_fn=ClassBuildFnClf(), hidden_dims=hidden_dims,
|
||||
batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
|
||||
assert_classification_works(clf)
|
||||
|
||||
|
||||
def test_clasify_inherit_class_build_fn():
|
||||
class InheritClassBuildFnClf(KerasClassifier):
|
||||
def __call__(self, hidden_dims):
|
||||
return build_fn_clf(hidden_dims)
|
||||
|
||||
clf = InheritClassBuildFnClf(
|
||||
build_fn=None, hidden_dims=hidden_dims,
|
||||
batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
|
||||
assert_classification_works(clf)
|
||||
|
||||
|
||||
def assert_classification_works(clf):
|
||||
clf.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
|
||||
score = clf.score(X_train, y_train, batch_size=batch_size)
|
||||
assert np.isscalar(score) and np.isfinite(score)
|
||||
|
||||
preds = clf.predict(X_test, batch_size=batch_size)
|
||||
assert preds.shape == (nb_test, )
|
||||
for prediction in np.unique(preds):
|
||||
assert prediction in range(nb_class)
|
||||
|
||||
proba = clf.predict_proba(X_test, batch_size=batch_size)
|
||||
assert proba.shape == (nb_test, nb_class)
|
||||
assert np.allclose(np.sum(proba, axis=1), np.ones(nb_test))
|
||||
|
||||
|
||||
def build_fn_reg(hidden_dims=50):
|
||||
@@ -73,42 +100,50 @@ def build_fn_reg(hidden_dims=50):
|
||||
return model
|
||||
|
||||
|
||||
class Class_build_fn_reg(object):
|
||||
def __call__(self, hidden_dims):
|
||||
return build_fn_reg(hidden_dims)
|
||||
def test_regression_build_fn():
|
||||
reg = KerasRegressor(
|
||||
build_fn=build_fn_reg, hidden_dims=hidden_dims,
|
||||
batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
|
||||
assert_regression_works(reg)
|
||||
|
||||
|
||||
class Inherit_class_build_fn_reg(KerasRegressor):
|
||||
def __call__(self, hidden_dims):
|
||||
return build_fn_reg(hidden_dims)
|
||||
def test_regression_class_build_fn():
|
||||
class ClassBuildFnReg(object):
|
||||
def __call__(self, hidden_dims):
|
||||
return build_fn_reg(hidden_dims)
|
||||
|
||||
for fn in [build_fn_clf, Class_build_fn_clf(), Inherit_class_build_fn_clf]:
|
||||
if fn is Inherit_class_build_fn_clf:
|
||||
classifier = Inherit_class_build_fn_clf(
|
||||
build_fn=None, hidden_dims=50, batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
else:
|
||||
classifier = KerasClassifier(
|
||||
build_fn=fn, hidden_dims=50, batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
reg = KerasRegressor(
|
||||
build_fn=ClassBuildFnReg(), hidden_dims=hidden_dims,
|
||||
batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
|
||||
classifier.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
score = classifier.score(X_train, y_train, batch_size=batch_size)
|
||||
preds = classifier.predict(X_test, batch_size=batch_size)
|
||||
proba = classifier.predict_proba(X_test, batch_size=batch_size)
|
||||
assert_regression_works(reg)
|
||||
|
||||
|
||||
for fn in [build_fn_reg, Class_build_fn_reg(), Inherit_class_build_fn_reg]:
|
||||
if fn is Inherit_class_build_fn_reg:
|
||||
regressor = Inherit_class_build_fn_reg(
|
||||
build_fn=None, hidden_dims=50, batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
else:
|
||||
regressor = KerasRegressor(
|
||||
build_fn=fn, hidden_dims=50, batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
def test_regression_inherit_class_build_fn():
|
||||
class InheritClassBuildFnReg(KerasRegressor):
|
||||
def __call__(self, hidden_dims):
|
||||
return build_fn_reg(hidden_dims)
|
||||
|
||||
regressor.fit(X_train_reg, y_train_reg,
|
||||
batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
score = regressor.score(X_train_reg, y_train_reg, batch_size=batch_size)
|
||||
preds = regressor.predict(X_test, batch_size=batch_size)
|
||||
reg = InheritClassBuildFnReg(
|
||||
build_fn=None, hidden_dims=hidden_dims,
|
||||
batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
|
||||
assert_regression_works(reg)
|
||||
|
||||
|
||||
def assert_regression_works(reg):
|
||||
reg.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch)
|
||||
|
||||
score = reg.score(X_train, y_train, batch_size=batch_size)
|
||||
assert np.isscalar(score) and np.isfinite(score)
|
||||
|
||||
preds = reg.predict(X_test, batch_size=batch_size)
|
||||
assert preds.shape == (nb_test, )
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
# Usage of sklearn's grid_search
|
||||
# from sklearn import grid_search
|
||||
|
||||
@@ -4,10 +4,12 @@ import pytest
|
||||
from keras.models import Sequential
|
||||
from keras.engine.training import weighted_objective
|
||||
from keras.layers.core import TimeDistributedDense, Masking
|
||||
from keras.utils.test_utils import keras_test
|
||||
from keras import objectives
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_masking():
|
||||
np.random.seed(1337)
|
||||
X = np.array([[[1], [1]],
|
||||
@@ -22,6 +24,7 @@ def test_masking():
|
||||
assert loss == 0
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_loss_masking():
|
||||
weighted_loss = weighted_objective(objectives.get('mae'))
|
||||
shape = (3, 4, 2)
|
||||
|
||||
@@ -8,6 +8,7 @@ from keras.utils.test_utils import get_test_data
|
||||
from keras.models import Sequential, Graph
|
||||
from keras.layers import Dense, Activation, RepeatVector, TimeDistributedDense, GRU
|
||||
from keras.utils import np_utils
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
nb_classes = 10
|
||||
batch_size = 128
|
||||
@@ -61,15 +62,6 @@ def create_sequential_model():
|
||||
return model
|
||||
|
||||
|
||||
def create_graph_model():
|
||||
model = Graph()
|
||||
model.add_input(name='input', input_shape=(input_dim,))
|
||||
model.add_node(Dense(32, activation='relu'), name='d1', input='input')
|
||||
model.add_node(Dense(nb_classes, activation='softmax'), name='d2', input='d1')
|
||||
model.add_output(name='output', input='d2')
|
||||
return model
|
||||
|
||||
|
||||
def create_temporal_sequential_model():
|
||||
model = Sequential()
|
||||
model.add(GRU(32, input_shape=(timesteps, input_dim), return_sequences=True))
|
||||
@@ -78,17 +70,7 @@ def create_temporal_sequential_model():
|
||||
return model
|
||||
|
||||
|
||||
def create_temporal_graph_model():
|
||||
model = Graph()
|
||||
model.add_input(name='input', input_shape=(timesteps, input_dim))
|
||||
model.add_node(GRU(32, return_sequences=True),
|
||||
name='d1', input='input')
|
||||
model.add_node(TimeDistributedDense(nb_classes, activation='softmax'),
|
||||
name='d2', input='d1')
|
||||
model.add_output(name='output', input='d2')
|
||||
return model
|
||||
|
||||
|
||||
@keras_test
|
||||
def _test_weights_sequential(model, class_weight=None, sample_weight=None,
|
||||
X_train=X_train, Y_train=Y_train,
|
||||
X_test=X_test, Y_test=Y_test):
|
||||
@@ -122,39 +104,13 @@ def _test_weights_sequential(model, class_weight=None, sample_weight=None,
|
||||
return score
|
||||
|
||||
|
||||
def _test_weights_graph(model, class_weight=None, sample_weight=None,
|
||||
X_train=X_train, Y_train=Y_train,
|
||||
X_test=X_test, Y_test=Y_test):
|
||||
model.fit({'input': X_train, 'output': Y_train},
|
||||
batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0,
|
||||
class_weight={'output': class_weight},
|
||||
sample_weight={'output': sample_weight})
|
||||
model.fit({'input': X_train, 'output': Y_train},
|
||||
batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0,
|
||||
class_weight={'output': class_weight},
|
||||
sample_weight={'output': sample_weight}, validation_split=0.1)
|
||||
|
||||
model.train_on_batch({'input': X_train[:32], 'output': Y_train[:32]},
|
||||
class_weight={'output': class_weight},
|
||||
sample_weight={'output': sample_weight[:32] if sample_weight is not None else None})
|
||||
model.test_on_batch({'input': X_train[:32], 'output': Y_train[:32]},
|
||||
sample_weight={'output': sample_weight[:32] if sample_weight is not None else None})
|
||||
score = model.evaluate({'input': X_test[test_ids, :],
|
||||
'output': Y_test[test_ids, :]},
|
||||
verbose=0)
|
||||
return score
|
||||
|
||||
|
||||
# no weights: reference point
|
||||
model = create_sequential_model()
|
||||
model.compile(loss=loss, optimizer='rmsprop')
|
||||
standard_score_sequential = _test_weights_sequential(model)
|
||||
|
||||
model = create_graph_model()
|
||||
model.compile(loss={'output': loss}, optimizer='rmsprop')
|
||||
standard_score_graph = _test_weights_graph(model)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_class_weights():
|
||||
model = create_sequential_model()
|
||||
model.compile(loss=loss, optimizer='rmsprop')
|
||||
@@ -162,6 +118,7 @@ def test_sequential_class_weights():
|
||||
assert(score < standard_score_sequential)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_sample_weights():
|
||||
model = create_sequential_model()
|
||||
model.compile(loss=loss, optimizer='rmsprop')
|
||||
@@ -169,6 +126,7 @@ def test_sequential_sample_weights():
|
||||
assert(score < standard_score_sequential)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_temporal_sample_weights():
|
||||
model = create_temporal_sequential_model()
|
||||
model.compile(loss=loss, optimizer='rmsprop',
|
||||
@@ -194,32 +152,5 @@ def test_sequential_temporal_sample_weights():
|
||||
assert(score < standard_score_sequential)
|
||||
|
||||
|
||||
def test_graph_class_weights():
|
||||
model = create_graph_model()
|
||||
model.compile(loss={'output': loss}, optimizer='rmsprop')
|
||||
score = _test_weights_graph(model, class_weight=class_weight)
|
||||
assert(score < standard_score_graph)
|
||||
|
||||
|
||||
def test_graph_sample_weights():
|
||||
model = create_graph_model()
|
||||
model.compile(loss={'output': loss}, optimizer='rmsprop')
|
||||
score = _test_weights_graph(model, sample_weight=sample_weight)
|
||||
assert(score < standard_score_graph)
|
||||
|
||||
|
||||
def test_graph_temporal_sample_weight():
|
||||
model = create_temporal_graph_model()
|
||||
model.compile(loss={'output': loss}, optimizer='rmsprop',
|
||||
sample_weight_modes={'output': 'temporal'})
|
||||
score = _test_weights_graph(model,
|
||||
sample_weight=temporal_sample_weight,
|
||||
X_train=temporal_X_train,
|
||||
X_test=temporal_X_test,
|
||||
Y_train=temporal_Y_train,
|
||||
Y_test=temporal_Y_test)
|
||||
assert(score < standard_score_graph)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -0,0 +1,165 @@
|
||||
import pytest
|
||||
import os
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras.models import Model, Sequential
|
||||
from keras.layers import Dense, Dropout, RepeatVector, TimeDistributed
|
||||
from keras.layers import Input
|
||||
from keras import optimizers
|
||||
from keras import objectives
|
||||
from keras import metrics
|
||||
from keras.utils.test_utils import keras_test
|
||||
from keras.models import save_model, load_model
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_model_saving():
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=3))
|
||||
model.add(Dense(3))
|
||||
model.compile(loss='mse', optimizer='rmsprop', metrics=['acc'])
|
||||
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3))
|
||||
model.train_on_batch(x, y)
|
||||
|
||||
out = model.predict(x)
|
||||
fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
|
||||
save_model(model, fname)
|
||||
|
||||
new_model = load_model(fname)
|
||||
|
||||
out2 = new_model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
# test that new updates are the same with both models
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3))
|
||||
model.train_on_batch(x, y)
|
||||
new_model.train_on_batch(x, y)
|
||||
out = model.predict(x)
|
||||
out2 = new_model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
# test load_weights on model file
|
||||
model.load_weights(fname)
|
||||
os.remove(fname)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_model_saving_2():
|
||||
# test with funkier config
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=3))
|
||||
model.add(RepeatVector(3))
|
||||
model.add(TimeDistributed(Dense(3)))
|
||||
model.compile(loss=objectives.MSE,
|
||||
optimizer=optimizers.RMSprop(lr=0.0001),
|
||||
metrics=[metrics.categorical_accuracy],
|
||||
sample_weight_mode='temporal')
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3, 3))
|
||||
model.train_on_batch(x, y)
|
||||
|
||||
out = model.predict(x)
|
||||
fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
|
||||
save_model(model, fname)
|
||||
|
||||
new_model = load_model(fname)
|
||||
os.remove(fname)
|
||||
|
||||
out2 = new_model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
# test that new updates are the same with both models
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3, 3))
|
||||
model.train_on_batch(x, y)
|
||||
new_model.train_on_batch(x, y)
|
||||
out = model.predict(x)
|
||||
out2 = new_model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_model_saving_3():
|
||||
# test with custom optimizer, loss
|
||||
custom_opt = optimizers.rmsprop
|
||||
custom_loss = objectives.mse
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=3))
|
||||
model.add(Dense(3))
|
||||
model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc'])
|
||||
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3))
|
||||
model.train_on_batch(x, y)
|
||||
|
||||
out = model.predict(x)
|
||||
fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
|
||||
save_model(model, fname)
|
||||
|
||||
model = load_model(fname,
|
||||
custom_objects={'custom_opt': custom_opt,
|
||||
'custom_loss': custom_loss})
|
||||
os.remove(fname)
|
||||
|
||||
out2 = model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_fuctional_model_saving():
|
||||
input = Input(shape=(3,))
|
||||
x = Dense(2)(input)
|
||||
output = Dense(3)(x)
|
||||
|
||||
model = Model(input, output)
|
||||
model.compile(loss=objectives.MSE,
|
||||
optimizer=optimizers.RMSprop(lr=0.0001),
|
||||
metrics=[metrics.categorical_accuracy])
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3))
|
||||
model.train_on_batch(x, y)
|
||||
|
||||
out = model.predict(x)
|
||||
fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
|
||||
save_model(model, fname)
|
||||
|
||||
model = load_model(fname)
|
||||
os.remove(fname)
|
||||
|
||||
out2 = model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_saving_without_compilation():
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=3))
|
||||
model.add(Dense(3))
|
||||
model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
|
||||
|
||||
fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
|
||||
save_model(model, fname)
|
||||
model = load_model(fname)
|
||||
os.remove(fname)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_saving_right_after_compilation():
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=3))
|
||||
model.add(Dense(3))
|
||||
model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
|
||||
model.model._make_train_function()
|
||||
|
||||
fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
|
||||
save_model(model, fname)
|
||||
model = load_model(fname)
|
||||
os.remove(fname)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
Referência em uma Nova Issue
Bloquear um usuário