Comparar commits
139 Commits
| Autor | SHA1 | Data | |
|---|---|---|---|
| 447445388e | |||
| b2c66816d7 | |||
| b6f81c6cc3 | |||
| 98b289630a | |||
| d68c0bd795 | |||
| 5afda71f74 | |||
| 1b08a8d675 | |||
| b508ab64bd | |||
| 84f435e24b | |||
| 984ad34a61 | |||
| ad3231c29a | |||
| c3d20bbc53 | |||
| f9c03f183f | |||
| 046a3c8a28 | |||
| 05883934f1 | |||
| 97d2a73dd3 | |||
| 5367a44acb | |||
| 1deaf71388 | |||
| 99f564e972 | |||
| c725f8d354 | |||
| 257ace722c | |||
| 0cd9d46828 | |||
| cef9e28a6c | |||
| 6c42da2abf | |||
| a9fc2bed49 | |||
| 1855c49d1f | |||
| cce65ce34d | |||
| 70866c0154 | |||
| d06e3753b0 | |||
| cb4de1f859 | |||
| b6d23b2e2d | |||
| 6a8815de0c | |||
| e0179bad2f | |||
| 8778add0d6 | |||
| facc823612 | |||
| b91854ea9d | |||
| 05abe814ac | |||
| ea561ba6d8 | |||
| df84c69676 | |||
| 3726aba2ee | |||
| f6bcaffe4a | |||
| c689b52dd1 | |||
| 09d75a4347 | |||
| 59bd247603 | |||
| f221ef952f | |||
| d3c75e1d34 | |||
| 3aab55d29f | |||
| f9ef72c38a | |||
| 108159ed17 | |||
| defa1283c4 | |||
| 2788b60fe6 | |||
| 7e70e1768f | |||
| 896ba77061 | |||
| c034262b78 | |||
| b7edcf6eea | |||
| 23e1ad2df7 | |||
| 0a3939883a | |||
| 3c8f91ee3d | |||
| efa5b04797 | |||
| 2da66ed009 | |||
| 2ac6811362 | |||
| 74c51f213c | |||
| 4302d8060d | |||
| 576cf8978b | |||
| 3533912016 | |||
| cf9922ff1d | |||
| 4fa65fbb2f | |||
| f502ee2338 | |||
| 7a56925176 | |||
| 0a108b3fb2 | |||
| 381a108e6d | |||
| 726c9fc8a6 | |||
| 946ccd3228 | |||
| 8e1ebbfc11 | |||
| cc0e60c101 | |||
| ff3f00d845 | |||
| 40195c2fa2 | |||
| 7f7300b8cb | |||
| 1b158ff4ed | |||
| b686b85b52 | |||
| 8fa82ae5cb | |||
| 0d5289141e | |||
| 01d5e7bc47 | |||
| cfbaec60c7 | |||
| f3e7245910 | |||
| 892d9fae84 | |||
| 796f895f01 | |||
| 489bb4eb10 | |||
| 8f458066bb | |||
| 5dd7454260 | |||
| 571db82371 | |||
| d971e0cca5 | |||
| fde0aac733 | |||
| b9d904c12f | |||
| aa2ec42da6 | |||
| d90d473104 | |||
| 5a1e63990a | |||
| e836c10c6f | |||
| 47c09d9557 | |||
| b35b943364 | |||
| ca467cc50e | |||
| 51f7cf0367 | |||
| 642eaca618 | |||
| 55e5680535 | |||
| 52ea31b65c | |||
| b3a26a5b30 | |||
| 98974efa5f | |||
| b6a776b242 | |||
| 1ea3f44f06 | |||
| 64e1320ca0 | |||
| 6e0b50fbdc | |||
| 22502a8fe8 | |||
| a78ad01bb4 | |||
| 729e802e85 | |||
| 3ffff6d579 | |||
| 6e5f97fca5 | |||
| eaff5bdfd7 | |||
| 28819d36a4 | |||
| f9a4f6f306 | |||
| 27c83c693d | |||
| d106908a57 | |||
| b4adce34dc | |||
| 3927505d1a | |||
| ede79f818e | |||
| 742ac53262 | |||
| ee17ccc374 | |||
| 835a02c037 | |||
| ee8ff00a2a | |||
| 229f13a864 | |||
| 0d60d637af | |||
| c20e34a8b0 | |||
| 8d3f39852a | |||
| aa45dee5a4 | |||
| 885e6e621b | |||
| dc122c31ef | |||
| 3bc80d3db4 | |||
| 439f2f3b2b | |||
| a1610eb274 | |||
| 6b90eff03c |
+2
-2
@@ -49,9 +49,9 @@ install:
|
||||
|
||||
# install TensorFlow
|
||||
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.7.1-cp27-none-linux_x86_64.whl;
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl;
|
||||
elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.7.1-cp34-none-linux_x86_64.whl;
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl;
|
||||
fi
|
||||
# command to run tests
|
||||
script:
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
[](https://travis-ci.org/fchollet/keras)
|
||||
[](https://badge.fury.io/py/keras)
|
||||
[](https://github.com/fchollet/keras/blob/master/LICENSE)
|
||||
|
||||
## You have just found Keras.
|
||||
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
FROM nvidia/cuda:7.5-cudnn5-devel
|
||||
|
||||
ENV CONDA_DIR /opt/conda
|
||||
ENV PATH $CONDA_DIR/bin:$PATH
|
||||
|
||||
RUN mkdir -p $CONDA_DIR && \
|
||||
echo export PATH=$CONDA_DIR/bin:'$PATH' > /etc/profile.d/conda.sh && \
|
||||
apt-get update && \
|
||||
apt-get install -y wget git libhdf5-dev g++ graphviz && \
|
||||
wget --quiet https://repo.continuum.io/miniconda/Miniconda3-3.9.1-Linux-x86_64.sh && \
|
||||
echo "6c6b44acdd0bc4229377ee10d52c8ac6160c336d9cdd669db7371aa9344e1ac3 *Miniconda3-3.9.1-Linux-x86_64.sh" | sha256sum -c - && \
|
||||
/bin/bash /Miniconda3-3.9.1-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
|
||||
rm Miniconda3-3.9.1-Linux-x86_64.sh
|
||||
|
||||
ENV NB_USER keras
|
||||
ENV NB_UID 1000
|
||||
|
||||
RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \
|
||||
mkdir -p $CONDA_DIR && \
|
||||
chown keras $CONDA_DIR -R && \
|
||||
mkdir -p /src && \
|
||||
chown keras /src
|
||||
|
||||
USER keras
|
||||
|
||||
# Python
|
||||
ARG python_version=3.5.1
|
||||
ARG tensorflow_version=0.9.0rc0-cp35-cp35m
|
||||
RUN conda install -y python=${python_version} && \
|
||||
pip install https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-${tensorflow_version}-linux_x86_64.whl && \
|
||||
pip install git+git://github.com/Theano/Theano.git && \
|
||||
pip install ipdb pytest pytest-cov python-coveralls coverage==3.7.1 pytest-xdist pep8 pytest-pep8 pydot_ng && \
|
||||
conda install Pillow scikit-learn notebook pandas matplotlib nose pyyaml six h5py && \
|
||||
pip install git+git://github.com/fchollet/keras.git && \
|
||||
conda clean -yt
|
||||
|
||||
ADD theanorc /home/keras/.theanorc
|
||||
|
||||
ENV PYTHONPATH='/src/:$PYTHONPATH'
|
||||
|
||||
WORKDIR /src
|
||||
|
||||
EXPOSE 8888
|
||||
|
||||
CMD jupyter notebook --port=8888 --ip=0.0.0.0
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
help:
|
||||
@cat Makefile
|
||||
|
||||
DATA?="${HOME}/Data"
|
||||
GPU?=0
|
||||
DOCKER_FILE=Dockerfile
|
||||
DOCKER=GPU=$(GPU) nvidia-docker
|
||||
BACKEND=tensorflow
|
||||
TEST=tests/
|
||||
SRC=$(shell dirname `pwd`)
|
||||
|
||||
build:
|
||||
docker build -t keras --build-arg python_version=3.5 -f $(DOCKER_FILE) .
|
||||
|
||||
bash: build
|
||||
$(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras bash
|
||||
|
||||
ipython: build
|
||||
$(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras ipython
|
||||
|
||||
notebook: build
|
||||
$(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --net=host --env KERAS_BACKEND=$(BACKEND) keras
|
||||
|
||||
test: build
|
||||
$(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras py.test $(TEST)
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
# Using Keras via Docker
|
||||
|
||||
This directory contains `Dockerfile` to make it easy to get up and running with
|
||||
Keras via [Docker](http://www.docker.com/).
|
||||
|
||||
## Installing Docker
|
||||
|
||||
General installation instructions are
|
||||
[on the Docker site](https://docs.docker.com/installation/), but we give some
|
||||
quick links here:
|
||||
|
||||
* [OSX](https://docs.docker.com/installation/mac/): [docker toolbox](https://www.docker.com/toolbox)
|
||||
* [ubuntu](https://docs.docker.com/installation/ubuntulinux/)
|
||||
|
||||
## Running the container
|
||||
|
||||
We are using `Makefile` to simplify docker commands within make commands.
|
||||
|
||||
Build the container and start a jupyter notebook
|
||||
|
||||
$ make notebook
|
||||
|
||||
Build the container and start an iPython shell
|
||||
|
||||
$ make ipython
|
||||
|
||||
Build the container and start a bash
|
||||
|
||||
$ make bash
|
||||
|
||||
For GPU support install NVidia drivers (ideally latest) and
|
||||
[nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Run using
|
||||
|
||||
$ make notebook GPU=0 # or [ipython, bash]
|
||||
|
||||
Switch between Theano and TensorFlow
|
||||
|
||||
$ make notebook BACKEND=theano
|
||||
$ make notebook BACKEND=tensorflow
|
||||
|
||||
Mount a volume for external data sets
|
||||
|
||||
$ make DATA=~/mydata
|
||||
|
||||
Prints all make tasks
|
||||
|
||||
$ make help
|
||||
|
||||
You can change Theano parameters by editing `/docker/theanorc`.
|
||||
|
||||
|
||||
Note: If you would have a problem running nvidia-docker you may try the old way
|
||||
we have used. But it is not recommended. If you find a bug in the nvidia-docker report
|
||||
it there please and try using the nvidia-docker as described above.
|
||||
|
||||
$ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
|
||||
$ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
|
||||
$ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu
|
||||
@@ -0,0 +1,5 @@
|
||||
[global]
|
||||
floatX = float32
|
||||
optimizer=None
|
||||
device = gpu
|
||||
|
||||
+13
-6
@@ -88,6 +88,7 @@ EXCLUDE = {
|
||||
'Wrapper',
|
||||
'get_session',
|
||||
'set_session',
|
||||
'CallbackList',
|
||||
}
|
||||
|
||||
PAGES = [
|
||||
@@ -146,13 +147,8 @@ PAGES = [
|
||||
'classes': [
|
||||
convolutional.Convolution1D,
|
||||
convolutional.Convolution2D,
|
||||
convolutional.AtrousConv2D,
|
||||
convolutional.Convolution3D,
|
||||
convolutional.MaxPooling1D,
|
||||
convolutional.MaxPooling2D,
|
||||
convolutional.MaxPooling3D,
|
||||
convolutional.AveragePooling1D,
|
||||
convolutional.AveragePooling2D,
|
||||
convolutional.AveragePooling3D,
|
||||
convolutional.UpSampling1D,
|
||||
convolutional.UpSampling2D,
|
||||
convolutional.UpSampling3D,
|
||||
@@ -161,6 +157,17 @@ PAGES = [
|
||||
convolutional.ZeroPadding3D,
|
||||
],
|
||||
},
|
||||
{
|
||||
'page': 'layers/pooling.md',
|
||||
'classes': [
|
||||
convolutional.MaxPooling1D,
|
||||
convolutional.MaxPooling2D,
|
||||
convolutional.MaxPooling3D,
|
||||
convolutional.AveragePooling1D,
|
||||
convolutional.AveragePooling2D,
|
||||
convolutional.AveragePooling3D,
|
||||
],
|
||||
},
|
||||
{
|
||||
'page': 'layers/recurrent.md',
|
||||
'classes': [
|
||||
|
||||
@@ -24,6 +24,7 @@ pages:
|
||||
- About Keras layers: layers/about-keras-layers.md
|
||||
- Core Layers: layers/core.md
|
||||
- Convolutional Layers: layers/convolutional.md
|
||||
- Pooling Layers: layers/pooling.md
|
||||
- Recurrent Layers: layers/recurrent.md
|
||||
- Embedding Layers: layers/embeddings.md
|
||||
- Advanced Activations Layers: layers/advanced-activations.md
|
||||
|
||||
externo
+1
-1
@@ -29,7 +29,7 @@ You can also define the environment variable ``KERAS_BACKEND`` and this will
|
||||
override what is defined in your config file :
|
||||
|
||||
```bash
|
||||
KERAS_BACKEND=tensorflow python -c "from keras import backend; print backend._BACKEND"
|
||||
KERAS_BACKEND=tensorflow python -c "from keras import backend; print(backend._BACKEND)"
|
||||
Using TensorFlow backend.
|
||||
tensorflow
|
||||
```
|
||||
|
||||
+64
-18
@@ -12,6 +12,8 @@
|
||||
- [How can I record the training / validation loss / accuracy at each epoch?](#how-can-i-record-the-training-validation-loss-accuracy-at-each-epoch)
|
||||
- [How can I "freeze" layers?](#how-can-i-freeze-keras-layers)
|
||||
- [How can I use stateful RNNs?](#how-can-i-use-stateful-rnns)
|
||||
- [How can I remove a layer from a Sequential model?](#how-can-i-remove-a-layer-from-a-sequential-model)
|
||||
- [How can I use pre-trained models in Keras?](#how-can-i-use-pre-trained-models-in-keras)
|
||||
|
||||
---
|
||||
|
||||
@@ -56,7 +58,31 @@ theano.config.floatX = 'float32'
|
||||
|
||||
*It is not recommended to use pickle or cPickle to save a Keras model.*
|
||||
|
||||
If you only need to save the architecture of a model, and not its weights, you can do:
|
||||
You can use `model.save(filepath)` to save a Keras model into a single HDF5 file which will contain:
|
||||
|
||||
- the architecture of the model, allowing to re-create the model
|
||||
- the weights of the model
|
||||
- the training configuration (loss, optimizer)
|
||||
- the state of the optimizer, allowing to resume training exactly where you left off.
|
||||
|
||||
You can then use `keras.models.load_model(filepath)` to reinstantiate your model.
|
||||
`load_model` will also take care of compiling the model using the saved training configuration
|
||||
(unless the model was never compiled in the first place).
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
from keras.models import load_model
|
||||
|
||||
model.save('my_model.h5') # creates a HDF5 file 'my_model.h5'
|
||||
del model # deletes the existing model
|
||||
|
||||
# returns a compiled model
|
||||
# identical to the previous one
|
||||
model = load_model('my_model.h5')
|
||||
```
|
||||
|
||||
If you only need to save the **architecture of a model**, and not its weights or its training configuration, you can do:
|
||||
|
||||
```python
|
||||
# save as JSON
|
||||
@@ -66,6 +92,8 @@ json_string = model.to_json()
|
||||
yaml_string = model.to_yaml()
|
||||
```
|
||||
|
||||
The generated JSON / YAML files are human-readable and can be manually edited if needed.
|
||||
|
||||
You can then build a fresh model from this data:
|
||||
|
||||
```python
|
||||
@@ -77,7 +105,7 @@ model = model_from_json(json_string)
|
||||
model = model_from_yaml(yaml_string)
|
||||
```
|
||||
|
||||
If you need to save the weights of a model, you can do so in HDF5 with the code below.
|
||||
If you need to save the **weights of a model**, you can do so in HDF5 with the code below.
|
||||
|
||||
Note that you will first need to install HDF5 and the Python library h5py, which do not come bundled with Keras.
|
||||
|
||||
@@ -91,22 +119,6 @@ Assuming you have code for instantiating your model, you can then load the weigh
|
||||
model.load_weights('my_model_weights.h5')
|
||||
```
|
||||
|
||||
This leads us to a way to save and reconstruct models from only serialized data:
|
||||
```python
|
||||
json_string = model.to_json()
|
||||
open('my_model_architecture.json', 'w').write(json_string)
|
||||
model.save_weights('my_model_weights.h5')
|
||||
|
||||
# elsewhere...
|
||||
model = model_from_json(open('my_model_architecture.json').read())
|
||||
model.load_weights('my_model_weights.h5')
|
||||
```
|
||||
|
||||
Finally, before it can be used, the model shall be compiled.
|
||||
```python
|
||||
model.compile(optimizer='adagrad', loss='mse')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Why is the training loss much higher than the testing loss?
|
||||
@@ -296,3 +308,37 @@ model.layers[0].reset_states()
|
||||
|
||||
Notes that the methods `predict`, `fit`, `train_on_batch`, `predict_classes`, etc. will *all* update the states of the stateful layers in a model. This allows you to do not only stateful training, but also stateful prediction.
|
||||
|
||||
---
|
||||
|
||||
### How can I remove a layer from a Sequential model?
|
||||
|
||||
You can remove the last added layer in a Sequential model by calling `.pop()`:
|
||||
|
||||
```python
|
||||
model = Sequential()
|
||||
model.add(Dense(32, activation='relu', input_dim=784))
|
||||
model.add(Dense(32, activation='relu'))
|
||||
|
||||
print(len(model.layers)) # "2"
|
||||
|
||||
model.pop()
|
||||
print(len(model.layers)) # "1"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### How can I use pre-trained models in Keras?
|
||||
|
||||
Code and pre-trained weights are available for the following image classification models:
|
||||
|
||||
- [VGG-16](https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3)
|
||||
- [VGG-19](https://gist.github.com/baraldilorenzo/8d096f48a1be4a2d660d)
|
||||
- [AlexNet](https://github.com/heuritech/convnets-keras)
|
||||
|
||||
For an example of how to use such a pre-trained model for feature extraction or for fine-tuning, see [this blog post](http://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html).
|
||||
|
||||
The VGG-16 model is also the basis for several Keras example scripts:
|
||||
|
||||
- [Style transfer](https://github.com/fchollet/keras/blob/master/examples/neural_style_transfer.py)
|
||||
- [Feature visualization](https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py)
|
||||
- [Deep dream](https://github.com/fchollet/keras/blob/master/examples/deep_dream.py)
|
||||
|
||||
@@ -75,7 +75,7 @@ The model will also be supervised via two loss functions. Using the main loss fu
|
||||
|
||||
Here's what our model looks like:
|
||||
|
||||
<img src="http://s3.amazonaws.com/keras.io/img/multi-input-multi-output-graph.png" alt="multi-input-multi-output-graph" style="width: 400px;"/>
|
||||
<img src="https://s3.amazonaws.com/keras.io/img/multi-input-multi-output-graph.png" alt="multi-input-multi-output-graph" style="width: 400px;"/>
|
||||
|
||||
Let's implement it with the functional API.
|
||||
|
||||
@@ -310,7 +310,7 @@ from keras.layers import merge, Convolution2D, Input
|
||||
# input tensor for a 3-channel 256x256 image
|
||||
x = Input(shape=(3, 256, 256))
|
||||
# 3x3 conv with 3 output channels (same as input channels)
|
||||
y = Convolution2D(3, 3, 3, border_mode='same')
|
||||
y = Convolution2D(3, 3, 3, border_mode='same')(x)
|
||||
# this returns x + y.
|
||||
z = merge([x, y], mode='sum')
|
||||
```
|
||||
|
||||
@@ -86,7 +86,7 @@ final_model.add(merged)
|
||||
final_model.add(Dense(10, activation='softmax'))
|
||||
```
|
||||
|
||||
<img src="http://s3.amazonaws.com/keras.io/img/two_branches_sequential_model.png" alt="two branch Sequential" style="width: 400px;"/>
|
||||
<img src="https://s3.amazonaws.com/keras.io/img/two_branches_sequential_model.png" alt="two branch Sequential" style="width: 400px;"/>
|
||||
|
||||
Such a two-branch model can then be trained via e.g.:
|
||||
|
||||
@@ -149,7 +149,7 @@ Keras models are trained on Numpy arrays of input data and labels. For training
|
||||
# for a single-input model with 2 classes (binary):
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(1, input_dim=784, activation='softmax'))
|
||||
model.add(Dense(1, input_dim=784, activation='sigmoid'))
|
||||
model.compile(optimizer='rmsprop',
|
||||
loss='binary_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
@@ -381,7 +381,7 @@ image_model.load_weights('weight_file.h5')
|
||||
language_model = Sequential()
|
||||
language_model.add(Embedding(vocab_size, 256, input_length=max_caption_len))
|
||||
language_model.add(GRU(output_dim=128, return_sequences=True))
|
||||
language_model.add(TimeDistributedDense(128))
|
||||
language_model.add(TimeDistributed(Dense(128)))
|
||||
|
||||
# let's repeat the image vector to turn it into a sequence.
|
||||
image_model.add(RepeatVector(max_caption_len))
|
||||
@@ -418,7 +418,7 @@ The first two LSTMs return their full output sequences, but the last one only re
|
||||
the last step in its output sequence, thus dropping the temporal dimension
|
||||
(i.e. converting the input sequence into a single vector).
|
||||
|
||||
<img src="http://keras.io/img/regular_stacked_lstm.png" alt="stacked LSTM" style="width: 300px;"/>
|
||||
<img src="https://keras.io/img/regular_stacked_lstm.png" alt="stacked LSTM" style="width: 300px;"/>
|
||||
|
||||
```python
|
||||
from keras.models import Sequential
|
||||
@@ -507,7 +507,7 @@ In this model, two input sequences are encoded into vectors by two separate LSTM
|
||||
|
||||
These two vectors are then concatenated, and a fully connected network is trained on top of the concatenated representations.
|
||||
|
||||
<img src="http://keras.io/img/dual_lstm.png" alt="Dual LSTM" style="width: 600px;"/>
|
||||
<img src="https://keras.io/img/dual_lstm.png" alt="Dual LSTM" style="width: 600px;"/>
|
||||
|
||||
```python
|
||||
from keras.models import Sequential
|
||||
|
||||
+28
-1
@@ -1,7 +1,7 @@
|
||||
|
||||
## Usage of initializations
|
||||
|
||||
Initializations define the probability distribution used to set the initial random weights of Keras layers.
|
||||
Initializations define the way to set the initial random weights of Keras layers.
|
||||
|
||||
The keyword arguments used for passing initializations to layers will depend on the layer. Usually it is simply `init`:
|
||||
|
||||
@@ -21,3 +21,30 @@ model.add(Dense(64, init='uniform'))
|
||||
- __glorot_uniform__
|
||||
- __he_normal__: Gaussian initialization scaled by fan_in (He et al., 2014)
|
||||
- __he_uniform__
|
||||
|
||||
|
||||
An initialization may be passed as a string (must match one of the available initializations above), or as a callable.
|
||||
If a callable, then it must take two arguments: `shape` (shape of the variable to initialize) and `name` (name of the variable),
|
||||
and it must return a variable (e.g. output of `K.variable()`):
|
||||
|
||||
```python
|
||||
from keras import backend as K
|
||||
import numpy as np
|
||||
|
||||
def my_init(shape, name=None):
|
||||
value = np.random.random(shape)
|
||||
return K.variable(value, name=name)
|
||||
|
||||
model.add(Dense(64, init=my_init))
|
||||
```
|
||||
|
||||
You could also use functions from `keras.initializations` in this way:
|
||||
|
||||
```python
|
||||
from keras import initializations
|
||||
|
||||
def my_init(shape, name=None):
|
||||
return initializations.normal(shape, scale=0.01, name=name)
|
||||
|
||||
model.add(Dense(64, init=my_init))
|
||||
```
|
||||
externo
+20
-1
@@ -9,7 +9,7 @@ model.add(Dense(64, init='uniform', input_dim=10))
|
||||
model.add(Activation('tanh'))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss='mean_squared_error', optimizer=sgd)
|
||||
```
|
||||
|
||||
@@ -22,4 +22,23 @@ model.compile(loss='mean_squared_error', optimizer='sgd')
|
||||
|
||||
---
|
||||
|
||||
## Parameters common to all Keras optimizers
|
||||
|
||||
The parameters `clipnorm` and `clipvalue` can be used with all optimizers to control gradient clipping:
|
||||
|
||||
```python
|
||||
# all parameter gradients will be clipped to
|
||||
# a maximum norm of 1.
|
||||
sgd = SGD(lr=0.01, clipnorm=1.)
|
||||
```
|
||||
|
||||
```python
|
||||
# all parameter gradients will be clipped to
|
||||
# a maximum value of 0.5 and
|
||||
# a minimum value of -0.5.
|
||||
sgd = SGD(lr=0.01, clipvalue=0.5)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
{{autogenerated}}
|
||||
+1
-1
@@ -88,7 +88,7 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
Example of using `.flow(X, y)`:
|
||||
|
||||
```python
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data(test_split=0.1)
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
|
||||
+3
-3
@@ -1,12 +1,12 @@
|
||||
# Wrappers for the Scikit-Learn API
|
||||
|
||||
You can use `Sequential` Keras models (single-input only) as part of your Scikit-Learn workflow via the wrappers found at `keras.wrappers.sklearn.py`.
|
||||
You can use `Sequential` Keras models (single-input only) as part of your Scikit-Learn workflow via the wrappers found at `keras.wrappers.scikit_learn.py`.
|
||||
|
||||
There are two wrappers available:
|
||||
|
||||
`keras.wrappers.sklearn.KerasClassifier(build_fn=None, **sk_params)`, which implements the sklearn classifier interface,
|
||||
`keras.wrappers.scikit_learn.KerasClassifier(build_fn=None, **sk_params)`, which implements the Scikit-Learn classifier interface,
|
||||
|
||||
`keras.wrappers.sklearn.KerasRegressor(build_fn=None, **sk_params)`, which implements the sklearn regressor interface.
|
||||
`keras.wrappers.scikit_learn.KerasRegressor(build_fn=None, **sk_params)`, which implements the Scikit-Learn regressor interface.
|
||||
|
||||
### Arguments
|
||||
|
||||
|
||||
@@ -19,8 +19,7 @@ maxlen = 100 # cut texts after this number of words (among top max_features mos
|
||||
batch_size = 32
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features,
|
||||
test_split=0.2)
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
|
||||
+9
-11
@@ -1,6 +1,6 @@
|
||||
'''This example demonstrates the use of Convolution1D for text classification.
|
||||
|
||||
Gets to 0.88 test accuracy after 2 epochs.
|
||||
Gets to 0.89 test accuracy after 2 epochs.
|
||||
90s/epoch on Intel i5 2.4Ghz CPU.
|
||||
10s/epoch on Tesla K40 GPU.
|
||||
|
||||
@@ -12,9 +12,9 @@ np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Activation, Lambda
|
||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers import Embedding
|
||||
from keras.layers import Convolution1D
|
||||
from keras.layers import Convolution1D, MaxPooling1D
|
||||
from keras.datasets import imdb
|
||||
from keras import backend as K
|
||||
|
||||
@@ -30,8 +30,7 @@ hidden_dims = 250
|
||||
nb_epoch = 2
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features,
|
||||
test_split=0.2)
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
@@ -58,13 +57,12 @@ model.add(Convolution1D(nb_filter=nb_filter,
|
||||
border_mode='valid',
|
||||
activation='relu',
|
||||
subsample_length=1))
|
||||
# we use max pooling:
|
||||
model.add(MaxPooling1D(pool_length=model.output_shape[1]))
|
||||
|
||||
# we use max over time pooling by defining a python function to use
|
||||
# in a Lambda layer
|
||||
def max_1d(X):
|
||||
return K.max(X, axis=1)
|
||||
|
||||
model.add(Lambda(max_1d, output_shape=(nb_filter,)))
|
||||
# We flatten the output of the conv layer,
|
||||
# so that we can add a vanilla dense layer:
|
||||
model.add(Flatten())
|
||||
|
||||
# We add a vanilla hidden layer:
|
||||
model.add(Dense(hidden_dims))
|
||||
|
||||
@@ -22,9 +22,9 @@ maxlen = 100
|
||||
embedding_size = 128
|
||||
|
||||
# Convolution
|
||||
filter_length = 3
|
||||
filter_length = 5
|
||||
nb_filter = 64
|
||||
pool_length = 2
|
||||
pool_length = 4
|
||||
|
||||
# LSTM
|
||||
lstm_output_size = 70
|
||||
@@ -40,7 +40,7 @@ Only 2 epochs are needed as the dataset is very small.
|
||||
'''
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2)
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
'''Trains a LSTM on the IMDB sentiment classification task.
|
||||
|
||||
The dataset is actually too small for LSTM to be of any advantage
|
||||
compared to simpler, much faster methods such as TF-IDF+LogReg.
|
||||
compared to simpler, much faster methods such as TF-IDF + LogReg.
|
||||
|
||||
Notes:
|
||||
|
||||
@@ -28,8 +28,7 @@ maxlen = 80 # cut texts after this number of words (among top max_features most
|
||||
batch_size = 32
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features,
|
||||
test_split=0.2)
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
@@ -52,8 +51,6 @@ model.compile(loss='binary_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
|
||||
print('Train...')
|
||||
print(X_train.shape)
|
||||
print(y_train.shape)
|
||||
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15,
|
||||
validation_data=(X_test, y_test))
|
||||
score, acc = model.evaluate(X_test, y_test,
|
||||
|
||||
@@ -14,6 +14,7 @@ from __future__ import print_function
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Activation, Dropout
|
||||
from keras.layers import LSTM
|
||||
from keras.optimizers import RMSprop
|
||||
from keras.utils.data_utils import get_file
|
||||
import numpy as np
|
||||
import random
|
||||
@@ -50,20 +51,22 @@ for i, sentence in enumerate(sentences):
|
||||
# build the model: 2 stacked LSTM
|
||||
print('Build model...')
|
||||
model = Sequential()
|
||||
model.add(LSTM(512, return_sequences=True, input_shape=(maxlen, len(chars))))
|
||||
model.add(LSTM(512, return_sequences=False))
|
||||
model.add(Dropout(0.2))
|
||||
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
|
||||
model.add(Dense(len(chars)))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
optimizer = RMSprop(lr=0.01)
|
||||
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
|
||||
|
||||
|
||||
def sample(a, temperature=1.0):
|
||||
def sample(preds, temperature=1.0):
|
||||
# helper function to sample an index from a probability array
|
||||
a = np.log(a) / temperature
|
||||
a = np.exp(a) / np.sum(np.exp(a))
|
||||
return np.argmax(np.random.multinomial(1, a, 1))
|
||||
preds = np.asarray(preds).astype('float64')
|
||||
preds = np.log(preds) / temperature
|
||||
exp_preds = np.exp(preds)
|
||||
preds = exp_preds / np.sum(exp_preds)
|
||||
probas = np.random.multinomial(1, preds, 1)
|
||||
return np.argmax(probas)
|
||||
|
||||
# train the model, output generated text after each iteration
|
||||
for iteration in range(1, 60):
|
||||
|
||||
@@ -80,6 +80,7 @@ total_variation_weight = 1.
|
||||
style_weight = 1.
|
||||
content_weight = 0.025
|
||||
|
||||
|
||||
# dimensions of the generated picture.
|
||||
img_width = 400
|
||||
img_height = 400
|
||||
@@ -88,13 +89,21 @@ assert img_height == img_width, 'Due to the use of the Gram matrix, width and he
|
||||
# util function to open, resize and format pictures into appropriate tensors
|
||||
def preprocess_image(image_path):
|
||||
img = imresize(imread(image_path), (img_width, img_height))
|
||||
img = img.transpose((2, 0, 1)).astype('float64')
|
||||
img = img[:, :, ::-1].astype('float64')
|
||||
img[:, :, 0] -= 103.939
|
||||
img[:, :, 1] -= 116.779
|
||||
img[:, :, 2] -= 123.68
|
||||
img = img.transpose((2, 0, 1))
|
||||
img = np.expand_dims(img, axis=0)
|
||||
return img
|
||||
|
||||
# util function to convert a tensor into a valid image
|
||||
def deprocess_image(x):
|
||||
x = x.transpose((1, 2, 0))
|
||||
x[:, :, 0] += 103.939
|
||||
x[:, :, 1] += 116.779
|
||||
x[:, :, 2] += 123.68
|
||||
x = x[:, :, ::-1]
|
||||
x = np.clip(x, 0, 255).astype('uint8')
|
||||
return x
|
||||
|
||||
@@ -275,6 +284,9 @@ evaluator = Evaluator()
|
||||
# run scipy-based optimization (L-BFGS) over the pixels of the generated image
|
||||
# so as to minimize the neural style loss
|
||||
x = np.random.uniform(0, 255, (1, 3, img_width, img_height))
|
||||
x[0, 0, :, :] -= 103.939
|
||||
x[0, 1, :, :] -= 116.779
|
||||
x[0, 2, :, :] -= 123.68
|
||||
for i in range(10):
|
||||
print('Start of iteration', i)
|
||||
start_time = time.time()
|
||||
@@ -282,7 +294,7 @@ for i in range(10):
|
||||
fprime=evaluator.grads, maxfun=20)
|
||||
print('Current loss value:', min_val)
|
||||
# save current generated image
|
||||
img = deprocess_image(x.reshape((3, img_width, img_height)))
|
||||
img = deprocess_image(x.copy().reshape((3, img_width, img_height)))
|
||||
fname = result_prefix + '_at_iteration_%d.png' % i
|
||||
imsave(fname, img)
|
||||
end_time = time.time()
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
'''This script loads pre-trained word embeddings (GloVe embeddings)
|
||||
into a frozen Keras Embedding layer, and uses it to
|
||||
train a text classification model on the 20 Newsgroup dataset
|
||||
(classication of newsgroup messages into 20 different categories).
|
||||
|
||||
GloVe embedding data can be found at:
|
||||
http://nlp.stanford.edu/data/glove.6B.zip
|
||||
(source page: http://nlp.stanford.edu/projects/glove/)
|
||||
|
||||
20 Newsgroup data can be found at:
|
||||
http://www.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import numpy as np
|
||||
np.random.seed(1337)
|
||||
|
||||
from keras.preprocessing.text import Tokenizer
|
||||
from keras.preprocessing.sequence import pad_sequences
|
||||
from keras.utils.np_utils import to_categorical
|
||||
from keras.layers import Dense, Input, Flatten
|
||||
from keras.layers import Conv1D, MaxPooling1D, Embedding
|
||||
from keras.models import Model
|
||||
import sys
|
||||
|
||||
BASE_DIR = ''
|
||||
GLOVE_DIR = BASE_DIR + '/glove.6B/'
|
||||
TEXT_DATA_DIR = BASE_DIR + '/20_newsgroup/'
|
||||
MAX_SEQUENCE_LENGTH = 1000
|
||||
MAX_NB_WORDS = 20000
|
||||
EMBEDDING_DIM = 100
|
||||
VALIDATION_SPLIT = 0.2
|
||||
|
||||
# first, build index mapping words in the embeddings set
|
||||
# to their embedding vector
|
||||
|
||||
print('Indexing word vectors.')
|
||||
|
||||
embeddings_index = {}
|
||||
f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'))
|
||||
for line in f:
|
||||
values = line.split()
|
||||
word = values[0]
|
||||
coefs = np.asarray(values[1:], dtype='float32')
|
||||
embeddings_index[word] = coefs
|
||||
f.close()
|
||||
|
||||
print('Found %s word vectors.' % len(embeddings_index))
|
||||
|
||||
# second, prepare text samples and their labels
|
||||
print('Processing text dataset')
|
||||
|
||||
texts = [] # list of text samples
|
||||
labels_index = {} # dictionary mapping label name to numeric id
|
||||
labels = [] # list of label ids
|
||||
for name in sorted(os.listdir(TEXT_DATA_DIR)):
|
||||
path = os.path.join(TEXT_DATA_DIR, name)
|
||||
if os.path.isdir(path):
|
||||
label_id = len(labels_index)
|
||||
labels_index[name] = label_id
|
||||
for fname in sorted(os.listdir(path)):
|
||||
if fname.isdigit():
|
||||
fpath = os.path.join(path, fname)
|
||||
if sys.version_info < (3,):
|
||||
f = open(fpath)
|
||||
else:
|
||||
f = open(fpath, encoding='latin-1')
|
||||
texts.append(f.read())
|
||||
f.close()
|
||||
labels.append(label_id)
|
||||
|
||||
print('Found %s texts.' % len(texts))
|
||||
|
||||
# finally, vectorize the text samples into a 2D integer tensor
|
||||
tokenizer = Tokenizer(nb_words=MAX_NB_WORDS)
|
||||
tokenizer.fit_on_texts(texts)
|
||||
sequences = tokenizer.texts_to_sequences(texts)
|
||||
|
||||
word_index = tokenizer.word_index
|
||||
print('Found %s unique tokens.' % len(word_index))
|
||||
|
||||
data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
|
||||
|
||||
labels = to_categorical(np.asarray(labels))
|
||||
print('Shape of data tensor:', data.shape)
|
||||
print('Shape of label tensor:', labels.shape)
|
||||
|
||||
# split the data into a training set and a validation set
|
||||
indices = np.arange(data.shape[0])
|
||||
np.random.shuffle(indices)
|
||||
data = data[indices]
|
||||
labels = labels[indices]
|
||||
nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0])
|
||||
|
||||
x_train = data[:-nb_validation_samples]
|
||||
y_train = labels[:-nb_validation_samples]
|
||||
x_val = data[-nb_validation_samples:]
|
||||
y_val = labels[-nb_validation_samples:]
|
||||
|
||||
print('Preparing embedding matrix.')
|
||||
|
||||
# prepare embedding matrix
|
||||
nb_words = min(MAX_NB_WORDS, len(word_index))
|
||||
embedding_matrix = np.zeros((nb_words + 1, EMBEDDING_DIM))
|
||||
for word, i in word_index.items():
|
||||
if i > MAX_NB_WORDS:
|
||||
continue
|
||||
embedding_vector = embeddings_index.get(word)
|
||||
if embedding_vector is not None:
|
||||
# words not found in embedding index will be all-zeros.
|
||||
embedding_matrix[i] = embedding_vector
|
||||
|
||||
# load pre-trained word embeddings into an Embedding layer
|
||||
# note that we set trainable = False so as to keep the embeddings fixed
|
||||
embedding_layer = Embedding(nb_words + 1,
|
||||
EMBEDDING_DIM,
|
||||
weights=[embedding_matrix],
|
||||
input_length=MAX_SEQUENCE_LENGTH,
|
||||
trainable=False)
|
||||
|
||||
print('Training model.')
|
||||
|
||||
# train a 1D convnet with global maxpooling
|
||||
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
|
||||
embedded_sequences = embedding_layer(sequence_input)
|
||||
x = Conv1D(128, 5, activation='relu')(embedded_sequences)
|
||||
x = MaxPooling1D(5)(x)
|
||||
x = Conv1D(128, 5, activation='relu')(x)
|
||||
x = MaxPooling1D(5)(x)
|
||||
x = Conv1D(128, 5, activation='relu')(x)
|
||||
x = MaxPooling1D(35)(x)
|
||||
x = Flatten()(x)
|
||||
x = Dense(128, activation='relu')(x)
|
||||
preds = Dense(len(labels_index), activation='softmax')(x)
|
||||
|
||||
model = Model(sequence_input, preds)
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='rmsprop',
|
||||
metrics=['acc'])
|
||||
|
||||
# happy learning!
|
||||
model.fit(x_train, y_train, validation_data=(x_val, y_val),
|
||||
nb_epoch=2, batch_size=128)
|
||||
@@ -0,0 +1,220 @@
|
||||
'''This script demonstrates how to build a deep residual network
|
||||
using the Keras functional API.
|
||||
|
||||
get_resnet50() returns the deep residual network model (50 layers)
|
||||
|
||||
Please visit Kaiming He's GitHub homepage:
|
||||
https://github.com/KaimingHe
|
||||
for more information.
|
||||
|
||||
The related paper is
|
||||
'Deep Residual Learning for Image Recognition'
|
||||
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
||||
http://arxiv.org/abs/1512.03385
|
||||
|
||||
Pretrained weights were converted from Kaiming He's caffe model directly.
|
||||
|
||||
For now we provide weights for the tensorflow backend only,
|
||||
thus use 'tf' dim_ordering (e.g. input_shape=(224, 224, 3) for 224*224 color image)
|
||||
would accelerate the computation, but we also provide weights for 'th' dim_ordering for compatibility.
|
||||
You can set your default dim ordering in your Keras config file at ~/.keras/keras.json
|
||||
|
||||
please donwload them at:
|
||||
http://pan.baidu.com/s/1o8pO2q2 ('th' dim ordering, for China)
|
||||
http://pan.baidu.com/s/1pLanuTt ('tf' dim ordering, for China)
|
||||
|
||||
https://drive.google.com/open?id=0B4ChsjFJvew3NVQ2U041Q0xHRHM ('th' dim ordering, for other countries)
|
||||
https://drive.google.com/open?id=0B4ChsjFJvew3NWN5THdxcTdSWmc ('tf' dim ordering, for other countries)
|
||||
|
||||
@author: BigMoyan, University of Electronic Science and Technology of China
|
||||
'''
|
||||
from __future__ import print_function
|
||||
from keras.layers import merge
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D
|
||||
from keras.layers.core import Dense, Activation, Flatten
|
||||
from keras.layers.normalization import BatchNormalization
|
||||
from keras.models import Model
|
||||
from keras.layers import Input
|
||||
from keras.preprocessing.image import load_img, img_to_array
|
||||
import keras.backend as K
|
||||
import numpy as np
|
||||
|
||||
# The names of layers in resnet50 are generated with the following format
|
||||
# [type][stage][block]_branch[branch][layer]
|
||||
# type: 'res' for conv layer, 'bn' and 'scale' for BN layer
|
||||
# stage: from '2' to '5', current stage number
|
||||
# block: 'a','b','c'... for different blocks in a stage
|
||||
# branch: '1' for shortcut and '2' for main path
|
||||
# layer: 'a','b','c'... for different layers in a block
|
||||
|
||||
|
||||
def identity_block(input_tensor, kernel_size, filters, stage, block):
|
||||
'''The identity_block is the block that has no conv layer at shortcut
|
||||
|
||||
# Arguments
|
||||
input_tensor: input tensor
|
||||
kernel_size: defualt 3, the kernel size of middle conv layer at main path
|
||||
filters: list of integers, the nb_filters of 3 conv layer at main path
|
||||
stage: integer, current stage label, used for generating layer names
|
||||
block: 'a','b'..., current block label, used for generating layer names
|
||||
'''
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
nb_filter1, nb_filter2, nb_filter3 = filters
|
||||
if dim_ordering == 'tf':
|
||||
bn_axis = 3
|
||||
else:
|
||||
bn_axis = 1
|
||||
conv_name_base = 'res' + str(stage) + block + '_branch'
|
||||
bn_name_base = 'bn' + str(stage) + block + '_branch'
|
||||
|
||||
out = Convolution2D(nb_filter1, 1, 1, dim_ordering=dim_ordering, name=conv_name_base + '2a')(input_tensor)
|
||||
out = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(out)
|
||||
out = Activation('relu')(out)
|
||||
|
||||
out = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same',
|
||||
dim_ordering=dim_ordering, name=conv_name_base + '2b')(out)
|
||||
out = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(out)
|
||||
out = Activation('relu')(out)
|
||||
|
||||
out = Convolution2D(nb_filter3, 1, 1, dim_ordering=dim_ordering, name=conv_name_base + '2c')(out)
|
||||
out = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(out)
|
||||
|
||||
out = merge([out, input_tensor], mode='sum')
|
||||
out = Activation('relu')(out)
|
||||
return out
|
||||
|
||||
|
||||
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
|
||||
'''conv_block is the block that has a conv layer at shortcut
|
||||
|
||||
# Arguments
|
||||
input_tensor: input tensor
|
||||
kernel_size: defualt 3, the kernel size of middle conv layer at main path
|
||||
filters: list of integers, the nb_filters of 3 conv layer at main path
|
||||
stage: integer, current stage label, used for generating layer names
|
||||
block: 'a','b'..., current block label, used for generating layer names
|
||||
|
||||
Note that from stage 3, the first conv layer at main path is with subsample=(2,2)
|
||||
And the shortcut should has subsample=(2,2) as well
|
||||
'''
|
||||
nb_filter1, nb_filter2, nb_filter3 = filters
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering == 'tf':
|
||||
bn_axis = 3
|
||||
else:
|
||||
bn_axis = 1
|
||||
conv_name_base = 'res' + str(stage) + block + '_branch'
|
||||
bn_name_base = 'bn' + str(stage) + block + '_branch'
|
||||
|
||||
out = Convolution2D(nb_filter1, 1, 1, subsample=strides,
|
||||
dim_ordering=dim_ordering, name=conv_name_base + '2a')(input_tensor)
|
||||
out = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(out)
|
||||
out = Activation('relu')(out)
|
||||
|
||||
out = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same',
|
||||
dim_ordering=dim_ordering, name=conv_name_base + '2b')(out)
|
||||
out = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(out)
|
||||
out = Activation('relu')(out)
|
||||
|
||||
out = Convolution2D(nb_filter3, 1, 1, dim_ordering=dim_ordering, name=conv_name_base + '2c')(out)
|
||||
out = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(out)
|
||||
|
||||
shortcut = Convolution2D(nb_filter3, 1, 1, subsample=strides,
|
||||
dim_ordering=dim_ordering, name=conv_name_base + '1')(input_tensor)
|
||||
shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
|
||||
|
||||
out = merge([out, shortcut], mode='sum')
|
||||
out = Activation('relu')(out)
|
||||
return out
|
||||
|
||||
|
||||
def read_img(img_path):
|
||||
'''This function returns a preprocessed image
|
||||
'''
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
mean = (103.939, 116.779, 123.68)
|
||||
img = load_img(img_path, target_size=(224, 224))
|
||||
img = img_to_array(img, dim_ordering=dim_ordering)
|
||||
|
||||
if dim_ordering == 'th':
|
||||
img[0, :, :] -= mean[0]
|
||||
img[1, :, :] -= mean[1]
|
||||
img[2, :, :] -= mean[2]
|
||||
# 'RGB'->'BGR'
|
||||
img = img[::-1, :, :]
|
||||
else:
|
||||
img[:, :, 0] -= mean[0]
|
||||
img[:, :, 1] -= mean[1]
|
||||
img[:, :, 2] -= mean[2]
|
||||
img = img[:, :, ::-1]
|
||||
|
||||
img = np.expand_dims(img, axis=0)
|
||||
return img
|
||||
|
||||
|
||||
def get_resnet50():
|
||||
'''This function returns the 50-layer residual network model
|
||||
you should load pretrained weights if you want to use it directly.
|
||||
Note that since the pretrained weights is converted from caffemodel
|
||||
the order of channels for input image should be 'BGR' (the channel order of caffe)
|
||||
'''
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
inp = Input(shape=(224, 224, 3))
|
||||
bn_axis = 3
|
||||
else:
|
||||
inp = Input(shape=(3, 224, 224))
|
||||
bn_axis = 1
|
||||
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
out = ZeroPadding2D((3, 3), dim_ordering=dim_ordering)(inp)
|
||||
out = Convolution2D(64, 7, 7, subsample=(2, 2), dim_ordering=dim_ordering, name='conv1')(out)
|
||||
out = BatchNormalization(axis=bn_axis, name='bn_conv1')(out)
|
||||
out = Activation('relu')(out)
|
||||
out = MaxPooling2D((3, 3), strides=(2, 2), dim_ordering=dim_ordering)(out)
|
||||
|
||||
out = conv_block(out, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
|
||||
out = identity_block(out, 3, [64, 64, 256], stage=2, block='b')
|
||||
out = identity_block(out, 3, [64, 64, 256], stage=2, block='c')
|
||||
|
||||
out = conv_block(out, 3, [128, 128, 512], stage=3, block='a')
|
||||
out = identity_block(out, 3, [128, 128, 512], stage=3, block='b')
|
||||
out = identity_block(out, 3, [128, 128, 512], stage=3, block='c')
|
||||
out = identity_block(out, 3, [128, 128, 512], stage=3, block='d')
|
||||
|
||||
out = conv_block(out, 3, [256, 256, 1024], stage=4, block='a')
|
||||
out = identity_block(out, 3, [256, 256, 1024], stage=4, block='b')
|
||||
out = identity_block(out, 3, [256, 256, 1024], stage=4, block='c')
|
||||
out = identity_block(out, 3, [256, 256, 1024], stage=4, block='d')
|
||||
out = identity_block(out, 3, [256, 256, 1024], stage=4, block='e')
|
||||
out = identity_block(out, 3, [256, 256, 1024], stage=4, block='f')
|
||||
|
||||
out = conv_block(out, 3, [512, 512, 2048], stage=5, block='a')
|
||||
out = identity_block(out, 3, [512, 512, 2048], stage=5, block='b')
|
||||
out = identity_block(out, 3, [512, 512, 2048], stage=5, block='c')
|
||||
|
||||
out = AveragePooling2D((7, 7), dim_ordering=dim_ordering)(out)
|
||||
out = Flatten()(out)
|
||||
out = Dense(1000, activation='softmax', name='fc1000')(out)
|
||||
|
||||
model = Model(inp, out)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
weights_file = K.image_dim_ordering() + '_dim_ordering_resnet50.h5'
|
||||
resnet_model = get_resnet50()
|
||||
resnet_model.load_weights(weights_file)
|
||||
|
||||
# you may download synset_words from the address given at the begining of this file
|
||||
class_table = open('synset_words.txt', 'r')
|
||||
lines = class_table.readlines()
|
||||
|
||||
test_img1 = read_img('cat.jpg')
|
||||
print('Result for test 1 is:')
|
||||
print(lines[np.argmax(resnet_model.predict(test_img1)[0])])
|
||||
|
||||
test_img2 = read_img('elephant.jpg')
|
||||
print('Result for test 2 is:')
|
||||
print(lines[np.argmax(resnet_model.predict(test_img2)[0])])
|
||||
class_table.close()
|
||||
@@ -16,7 +16,7 @@ epochs = 25
|
||||
lahead = 1
|
||||
|
||||
|
||||
def gen_cosine_amp(amp=100, period=25, x0=0, xn=50000, step=1, k=0.0001):
|
||||
def gen_cosine_amp(amp=100, period=1000, x0=0, xn=50000, step=1, k=0.0001):
|
||||
"""Generates an absolute cosine time series with the amplitude
|
||||
exponentially decreasing
|
||||
|
||||
@@ -31,7 +31,7 @@ def gen_cosine_amp(amp=100, period=25, x0=0, xn=50000, step=1, k=0.0001):
|
||||
cos = np.zeros(((xn - x0) * step, 1, 1))
|
||||
for i in range(len(cos)):
|
||||
idx = x0 + i * step
|
||||
cos[i, 0, 0] = amp * np.cos(idx / (2 * np.pi * period))
|
||||
cos[i, 0, 0] = amp * np.cos(2 * np.pi * idx / period)
|
||||
cos[i, 0, 0] = cos[i, 0, 0] * np.exp(-k * idx)
|
||||
return cos
|
||||
|
||||
|
||||
@@ -11,27 +11,25 @@ from keras import backend as K
|
||||
from keras import objectives
|
||||
from keras.datasets import mnist
|
||||
|
||||
batch_size = 16
|
||||
batch_size = 100
|
||||
original_dim = 784
|
||||
latent_dim = 2
|
||||
intermediate_dim = 128
|
||||
epsilon_std = 0.01
|
||||
nb_epoch = 40
|
||||
intermediate_dim = 256
|
||||
nb_epoch = 50
|
||||
|
||||
x = Input(batch_shape=(batch_size, original_dim))
|
||||
h = Dense(intermediate_dim, activation='relu')(x)
|
||||
z_mean = Dense(latent_dim)(h)
|
||||
z_log_std = Dense(latent_dim)(h)
|
||||
z_log_var = Dense(latent_dim)(h)
|
||||
|
||||
|
||||
def sampling(args):
|
||||
z_mean, z_log_std = args
|
||||
epsilon = K.random_normal(shape=(batch_size, latent_dim),
|
||||
mean=0., std=epsilon_std)
|
||||
return z_mean + K.exp(z_log_std) * epsilon
|
||||
z_mean, z_log_var = args
|
||||
epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0.)
|
||||
return z_mean + K.exp(z_log_var / 2) * epsilon
|
||||
|
||||
# note that "output_shape" isn't necessary with the TensorFlow backend
|
||||
# so you could write `Lambda(sampling)([z_mean, z_log_std])`
|
||||
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_std])
|
||||
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
|
||||
|
||||
# we instantiate these layers separately so as to reuse them later
|
||||
decoder_h = Dense(intermediate_dim, activation='relu')
|
||||
@@ -39,9 +37,10 @@ decoder_mean = Dense(original_dim, activation='sigmoid')
|
||||
h_decoded = decoder_h(z)
|
||||
x_decoded_mean = decoder_mean(h_decoded)
|
||||
|
||||
|
||||
def vae_loss(x, x_decoded_mean):
|
||||
xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)
|
||||
kl_loss = - 0.5 * K.mean(1 + z_log_std - K.square(z_mean) - K.exp(z_log_std), axis=-1)
|
||||
xent_loss = original_dim * objectives.binary_crossentropy(x, x_decoded_mean)
|
||||
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
|
||||
return xent_loss + kl_loss
|
||||
|
||||
vae = Model(x, x_decoded_mean)
|
||||
@@ -87,7 +86,7 @@ grid_y = np.linspace(-15, 15, n)
|
||||
|
||||
for i, yi in enumerate(grid_x):
|
||||
for j, xi in enumerate(grid_y):
|
||||
z_sample = np.array([[xi, yi]]) * epsilon_std
|
||||
z_sample = np.array([[xi, yi]])
|
||||
x_decoded = generator.predict(z_sample)
|
||||
digit = x_decoded[0].reshape(digit_size, digit_size)
|
||||
figure[i * digit_size: (i + 1) * digit_size,
|
||||
|
||||
@@ -0,0 +1,124 @@
|
||||
'''This script demonstrates how to build a variational autoencoder with Keras and deconvolution layers.
|
||||
|
||||
Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
|
||||
'''
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
|
||||
from keras.layers import Convolution2D, Deconvolution2D, MaxPooling2D
|
||||
from keras.models import Model
|
||||
from keras import backend as K
|
||||
from keras import objectives
|
||||
from keras.datasets import mnist
|
||||
|
||||
# input image dimensions
|
||||
img_rows, img_cols, img_chns = 28, 28, 1
|
||||
# number of convolutional filters to use
|
||||
nb_filters = 32
|
||||
# convolution kernel size
|
||||
nb_conv = 3
|
||||
|
||||
batch_size = 16
|
||||
original_dim = (img_chns, img_rows, img_cols)
|
||||
latent_dim = 2
|
||||
intermediate_dim = 128
|
||||
epsilon_std = 0.01
|
||||
nb_epoch = 5
|
||||
|
||||
|
||||
x = Input(batch_shape=(batch_size,) + original_dim)
|
||||
c = Convolution2D(nb_filters, nb_conv, nb_conv, border_mode='same', activation='relu')(x)
|
||||
f = Flatten()(c)
|
||||
h = Dense(intermediate_dim, activation='relu')(f)
|
||||
|
||||
z_mean = Dense(latent_dim)(h)
|
||||
z_log_var = Dense(latent_dim)(h)
|
||||
|
||||
|
||||
def sampling(args):
|
||||
z_mean, z_log_var = args
|
||||
epsilon = K.random_normal(shape=(batch_size, latent_dim),
|
||||
mean=0., std=epsilon_std)
|
||||
return z_mean + K.exp(z_log_var) * epsilon
|
||||
|
||||
# note that "output_shape" isn't necessary with the TensorFlow backend
|
||||
# so you could write `Lambda(sampling)([z_mean, z_log_var])`
|
||||
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
|
||||
|
||||
# we instantiate these layers separately so as to reuse them later
|
||||
decoder_h = Dense(intermediate_dim, activation='relu')
|
||||
decoder_f = Dense(nb_filters*img_rows*img_cols, activation='relu')
|
||||
decoder_c = Reshape((nb_filters, img_rows, img_cols))
|
||||
decoder_mean = Deconvolution2D(img_chns, nb_conv, nb_conv,
|
||||
(batch_size, img_chns, img_rows, img_cols),
|
||||
border_mode='same')
|
||||
|
||||
h_decoded = decoder_h(z)
|
||||
f_decoded = decoder_f(h_decoded)
|
||||
c_decoded = decoder_c(f_decoded)
|
||||
x_decoded_mean = decoder_mean(c_decoded)
|
||||
|
||||
|
||||
def vae_loss(x, x_decoded_mean):
|
||||
# NOTE: binary_crossentropy expects a batch_size by dim for x and x_decoded_mean, so we MUST flatten these!
|
||||
x = K.flatten(x)
|
||||
x_decoded_mean = K.flatten(x_decoded_mean)
|
||||
xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)
|
||||
kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
|
||||
return xent_loss + kl_loss
|
||||
|
||||
vae = Model(x, x_decoded_mean)
|
||||
vae.compile(optimizer='rmsprop', loss=vae_loss)
|
||||
vae.summary()
|
||||
|
||||
# train the VAE on MNIST digits
|
||||
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
||||
|
||||
x_train = x_train.astype('float32')[:, None, :, :] / 255.
|
||||
x_test = x_test.astype('float32')[:, None, :, :] / 255.
|
||||
|
||||
vae.fit(x_train, x_train,
|
||||
shuffle=True,
|
||||
nb_epoch=nb_epoch,
|
||||
batch_size=batch_size,
|
||||
validation_data=(x_test, x_test))
|
||||
|
||||
|
||||
# build a model to project inputs on the latent space
|
||||
encoder = Model(x, z_mean)
|
||||
|
||||
# display a 2D plot of the digit classes in the latent space
|
||||
x_test_encoded = encoder.predict(x_test, batch_size=batch_size)
|
||||
plt.figure(figsize=(6, 6))
|
||||
plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)
|
||||
plt.colorbar()
|
||||
plt.show()
|
||||
|
||||
# build a digit generator that can sample from the learned distribution
|
||||
decoder_input = Input(shape=(latent_dim,))
|
||||
_h_decoded = decoder_h(decoder_input)
|
||||
_f_decoded = decoder_f(_h_decoded)
|
||||
_c_decoded = decoder_c(_f_decoded)
|
||||
_x_decoded_mean = decoder_mean(_c_decoded)
|
||||
generator = Model(decoder_input, _x_decoded_mean)
|
||||
|
||||
# display a 2D manifold of the digits
|
||||
n = 15 # figure with 15x15 digits
|
||||
digit_size = 28
|
||||
figure = np.zeros((digit_size * n, digit_size * n))
|
||||
# we will sample n points within [-15, 15] standard deviations
|
||||
grid_x = np.linspace(-15, 15, n)
|
||||
grid_y = np.linspace(-15, 15, n)
|
||||
|
||||
for i, yi in enumerate(grid_x):
|
||||
for j, xi in enumerate(grid_y):
|
||||
z_sample = np.array([[xi, yi]])
|
||||
x_decoded = generator.predict(z_sample)
|
||||
digit = x_decoded[0].reshape(digit_size, digit_size)
|
||||
figure[i * digit_size: (i + 1) * digit_size,
|
||||
j * digit_size: (j + 1) * digit_size] = digit
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
plt.imshow(figure)
|
||||
plt.show()
|
||||
+1
-1
@@ -15,4 +15,4 @@ from . import objectives
|
||||
from . import optimizers
|
||||
from . import regularizers
|
||||
|
||||
__version__ = '1.0.5'
|
||||
__version__ = '1.0.7'
|
||||
|
||||
@@ -48,4 +48,6 @@ def linear(x):
|
||||
|
||||
from .utils.generic_utils import get_from_module
|
||||
def get(identifier):
|
||||
if identifier is None:
|
||||
return linear
|
||||
return get_from_module(identifier, globals(), 'activation function')
|
||||
|
||||
@@ -11,6 +11,7 @@ from .common import get_uid
|
||||
from .common import cast_to_floatx
|
||||
from .common import image_dim_ordering
|
||||
from .common import set_image_dim_ordering
|
||||
from .common import is_keras_tensor
|
||||
|
||||
_keras_base_dir = os.path.expanduser('~')
|
||||
if not os.access(_keras_base_dir, os.W_OK):
|
||||
@@ -35,6 +36,7 @@ if os.path.exists(_config_path):
|
||||
|
||||
set_floatx(_floatx)
|
||||
set_epsilon(_epsilon)
|
||||
set_image_dim_ordering(_image_dim_ordering)
|
||||
_BACKEND = _backend
|
||||
|
||||
# save config file
|
||||
@@ -59,3 +61,10 @@ elif _BACKEND == 'tensorflow':
|
||||
from .tensorflow_backend import *
|
||||
else:
|
||||
raise Exception('Unknown backend: ' + str(_BACKEND))
|
||||
|
||||
|
||||
def backend():
|
||||
'''Publicly accessible method
|
||||
for determining the current backend.
|
||||
'''
|
||||
return _BACKEND
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import numpy as np
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
# the type of float to use throughout the session.
|
||||
_FLOATX = 'float32'
|
||||
_EPSILON = 10e-8
|
||||
_UID_PREFIXES = {}
|
||||
_UID_PREFIXES = defaultdict(int)
|
||||
_IMAGE_DIM_ORDERING = 'th'
|
||||
|
||||
|
||||
@@ -60,9 +62,17 @@ def set_image_dim_ordering(dim_ordering):
|
||||
|
||||
|
||||
def get_uid(prefix=''):
|
||||
if prefix not in _UID_PREFIXES:
|
||||
_UID_PREFIXES[prefix] = 1
|
||||
return 1
|
||||
_UID_PREFIXES[prefix] += 1
|
||||
return _UID_PREFIXES[prefix]
|
||||
|
||||
|
||||
def reset_uids():
|
||||
global _UID_PREFIXES
|
||||
_UID_PREFIXES = defaultdict(int)
|
||||
|
||||
|
||||
def is_keras_tensor(x):
|
||||
if hasattr(x, '_keras_shape'):
|
||||
return True
|
||||
else:
|
||||
_UID_PREFIXES[prefix] += 1
|
||||
return _UID_PREFIXES[prefix]
|
||||
return False
|
||||
|
||||
@@ -1,14 +1,35 @@
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.training import moving_averages
|
||||
import numpy as np
|
||||
import os
|
||||
import copy
|
||||
import warnings
|
||||
from .common import _FLOATX, _EPSILON
|
||||
from .common import _FLOATX, _EPSILON, _IMAGE_DIM_ORDERING, reset_uids
|
||||
|
||||
# INTERNAL UTILS
|
||||
|
||||
_SESSION = None
|
||||
_LEARNING_PHASE = tf.placeholder(dtype='uint8', name='keras_learning_phase') # 0 = test, 1 = train
|
||||
_MANUAL_VAR_INIT = False
|
||||
|
||||
|
||||
def clear_session():
|
||||
global _SESSION
|
||||
global _LEARNING_PHASE
|
||||
tf.reset_default_graph()
|
||||
reset_uids()
|
||||
_SESSION = None
|
||||
_LEARNING_PHASE = tf.placeholder(dtype='uint8', name='keras_learning_phase')
|
||||
|
||||
|
||||
def manual_variable_initialization(value):
|
||||
'''Whether variables should be initialized
|
||||
as they are instantiated (default), or if
|
||||
the user should handle the initialization
|
||||
(e.g. via tf.initialize_all_variables()).
|
||||
'''
|
||||
global _MANUAL_VAR_INIT
|
||||
_MANUAL_VAR_INIT = value
|
||||
|
||||
|
||||
def learning_phase():
|
||||
@@ -23,7 +44,10 @@ def learning_phase():
|
||||
|
||||
def set_learning_phase(value):
|
||||
global _LEARNING_PHASE
|
||||
_LEARNING_PHASE = tf.constant(value, name='keras_learning_phase')
|
||||
if value not in {0, 1}:
|
||||
raise ValueError('Expected learning phase to be '
|
||||
'0 or 1.')
|
||||
_LEARNING_PHASE = value
|
||||
|
||||
|
||||
def get_session():
|
||||
@@ -61,6 +85,34 @@ def set_session(session):
|
||||
|
||||
# VARIABLE MANIPULATION
|
||||
|
||||
def _convert_string_dtype(dtype):
|
||||
if dtype == 'float16':
|
||||
return tf.float16
|
||||
if dtype == 'float32':
|
||||
return tf.float32
|
||||
elif dtype == 'float64':
|
||||
return tf.float64
|
||||
elif dtype == 'int16':
|
||||
return tf.int16
|
||||
elif dtype == 'int32':
|
||||
return tf.int32
|
||||
elif dtype == 'int64':
|
||||
return tf.int64
|
||||
elif dtype == 'uint8':
|
||||
return tf.int8
|
||||
elif dtype == 'uint16':
|
||||
return tf.uint16
|
||||
else:
|
||||
raise ValueError('Unsupported dtype:', dtype)
|
||||
|
||||
|
||||
def _to_tensor(x, dtype):
|
||||
x = tf.convert_to_tensor(x)
|
||||
if x.dtype != dtype:
|
||||
x = tf.cast(x, dtype)
|
||||
return x
|
||||
|
||||
|
||||
def variable(value, dtype=_FLOATX, name=None):
|
||||
'''Instantiates a tensor.
|
||||
|
||||
@@ -72,7 +124,9 @@ def variable(value, dtype=_FLOATX, name=None):
|
||||
# Returns
|
||||
Tensor variable instance.
|
||||
'''
|
||||
v = tf.Variable(np.asarray(value, dtype=dtype), name=name)
|
||||
v = tf.Variable(value, dtype=_convert_string_dtype(dtype), name=name)
|
||||
if _MANUAL_VAR_INIT:
|
||||
return v
|
||||
if tf.get_default_graph() is get_session().graph:
|
||||
try:
|
||||
get_session().run(v.initializer)
|
||||
@@ -154,13 +208,17 @@ def eval(x):
|
||||
def zeros(shape, dtype=_FLOATX, name=None):
|
||||
'''Instantiates an all-zeros tensor variable.
|
||||
'''
|
||||
return variable(np.zeros(shape), dtype, name)
|
||||
shape = tuple(map(int, shape))
|
||||
tf_dtype = _convert_string_dtype(dtype)
|
||||
return variable(tf.constant_initializer(0., dtype=tf_dtype)(shape), dtype, name)
|
||||
|
||||
|
||||
def ones(shape, dtype=_FLOATX, name=None):
|
||||
'''Instantiates an all-ones tensor variable.
|
||||
'''
|
||||
return variable(np.ones(shape), dtype, name)
|
||||
shape = tuple(map(int, shape))
|
||||
tf_dtype = _convert_string_dtype(dtype)
|
||||
return variable(tf.constant_initializer(1., dtype=tf_dtype)(shape), dtype, name)
|
||||
|
||||
|
||||
def eye(size, dtype=_FLOATX, name=None):
|
||||
@@ -183,6 +241,20 @@ def ones_like(x, name=None):
|
||||
return tf.ones_like(x, name=name)
|
||||
|
||||
|
||||
def random_uniform_variable(shape, low, high, dtype=_FLOATX, name=None):
|
||||
shape = tuple(map(int, shape))
|
||||
tf_dtype = _convert_string_dtype(dtype)
|
||||
value = tf.random_uniform_initializer(low, high, dtype=tf_dtype)(shape)
|
||||
return variable(value, dtype=dtype, name=name)
|
||||
|
||||
|
||||
def random_normal_variable(shape, mean, scale, dtype=_FLOATX, name=None):
|
||||
shape = tuple(map(int, shape))
|
||||
tf_dtype = _convert_string_dtype(dtype)
|
||||
value = tf.random_normal_initializer(mean, scale, dtype=tf_dtype)(shape)
|
||||
return variable(value, dtype=dtype, name=name)
|
||||
|
||||
|
||||
def count_params(x):
|
||||
'''Returns the number of scalars in a tensor.
|
||||
'''
|
||||
@@ -196,6 +268,26 @@ def cast(x, dtype):
|
||||
return tf.cast(x, dtype)
|
||||
|
||||
|
||||
# UPDATES OPS
|
||||
|
||||
|
||||
def update(x, new_x):
|
||||
return tf.assign(x, new_x)
|
||||
|
||||
|
||||
def update_add(x, increment):
|
||||
return tf.assign_add(x, increment)
|
||||
|
||||
|
||||
def update_sub(x, decrement):
|
||||
return tf.assign_sub(x, decrement)
|
||||
|
||||
|
||||
def moving_average_update(variable, value, momentum):
|
||||
return moving_averages.assign_moving_average(
|
||||
variable, value, momentum)
|
||||
|
||||
|
||||
# LINEAR ALGEBRA
|
||||
|
||||
def dot(x, y):
|
||||
@@ -223,20 +315,36 @@ def batch_dot(x, y, axes=None):
|
||||
If the number of dimensions is reduced to 1, we use `expand_dims` to
|
||||
make sure that ndim is at least 2.
|
||||
|
||||
# Example
|
||||
Assume x = [[1, 2] and y = [[5, 6]
|
||||
[3, 4]] [7, 8]]
|
||||
batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal
|
||||
of x.dot(y.T), although we never have to calculate the off-diagonal
|
||||
elements.
|
||||
|
||||
|
||||
# Arguments
|
||||
x, y: tensors with ndim >= 2
|
||||
axes: list (or single) int with target dimensions
|
||||
|
||||
# Returns
|
||||
Tensor with ndim >= 2
|
||||
A tensor with shape equal to the concatenation of x's shape
|
||||
(less the dimension that was summed over) and y's shape
|
||||
(less the batch dimension and the dimension that was summed over).
|
||||
If the final rank is 1, we reshape it to (batch_size, 1).
|
||||
|
||||
# Examples
|
||||
Assume x = [[1, 2], [3, 4]] and y = [[5, 6], [7, 8]]
|
||||
batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal
|
||||
of x.dot(y.T), although we never have to calculate the off-diagonal
|
||||
elements.
|
||||
|
||||
Shape inference:
|
||||
Let x's shape be (100, 20) and y's shape be (100, 30, 20).
|
||||
If dot_axes is (1, 2), to find the output shape of resultant tensor,
|
||||
loop through each dimension in x's shape and y's shape:
|
||||
x.shape[0] : 100 : append to output shape
|
||||
x.shape[1] : 20 : do not append to output shape,
|
||||
dimension 1 of x has been summed over. (dot_axes[0] = 1)
|
||||
y.shape[0] : 100 : do not append to output shape,
|
||||
always ignore first dimension of y
|
||||
y.shape[1] : 30 : append to output shape
|
||||
y.shape[2] : 20 : do not append to output shape,
|
||||
dimension 2 of y has been summed over. (dot_axes[1] = 2)
|
||||
|
||||
output_shape = (100, 30)
|
||||
'''
|
||||
if type(axes) == int:
|
||||
axes = (axes, axes)
|
||||
@@ -398,8 +506,9 @@ def abs(x):
|
||||
def sqrt(x):
|
||||
'''Element-wise square root.
|
||||
'''
|
||||
x = tf.clip_by_value(x, tf.cast(0., dtype=_FLOATX),
|
||||
tf.cast(np.inf, dtype=_FLOATX))
|
||||
zero = _to_tensor(0., x.dtype.base_dtype)
|
||||
inf = _to_tensor(np.inf, x.dtype.base_dtype)
|
||||
x = tf.clip_by_value(x, zero, inf)
|
||||
return tf.sqrt(x)
|
||||
|
||||
|
||||
@@ -438,8 +547,9 @@ def clip(x, min_value, max_value):
|
||||
'''
|
||||
if max_value < min_value:
|
||||
max_value = min_value
|
||||
return tf.clip_by_value(x, tf.cast(min_value, dtype=_FLOATX),
|
||||
tf.cast(max_value, dtype=_FLOATX))
|
||||
min_value = _to_tensor(min_value, x.dtype.base_dtype)
|
||||
max_value = _to_tensor(max_value, x.dtype.base_dtype)
|
||||
return tf.clip_by_value(x, min_value, max_value)
|
||||
|
||||
|
||||
def equal(x, y):
|
||||
@@ -456,6 +566,34 @@ def not_equal(x, y):
|
||||
return tf.not_equal(x, y)
|
||||
|
||||
|
||||
def greater(x, y):
|
||||
'''Element-wise truth value of (x > y).
|
||||
Returns a bool tensor.
|
||||
'''
|
||||
return tf.greater(x, y)
|
||||
|
||||
|
||||
def greater_equal(x, y):
|
||||
'''Element-wise truth value of (x >= y).
|
||||
Returns a bool tensor.
|
||||
'''
|
||||
return tf.greater_equal(x, y)
|
||||
|
||||
|
||||
def lesser(x, y):
|
||||
'''Element-wise truth value of (x < y).
|
||||
Returns a bool tensor.
|
||||
'''
|
||||
return tf.less(x, y)
|
||||
|
||||
|
||||
def lesser_equal(x, y):
|
||||
'''Element-wise truth value of (x <= y).
|
||||
Returns a bool tensor.
|
||||
'''
|
||||
return tf.less_equal(x, y)
|
||||
|
||||
|
||||
def maximum(x, y):
|
||||
'''Element-wise maximum of two tensors.
|
||||
'''
|
||||
@@ -480,6 +618,44 @@ def cos(x):
|
||||
return tf.cos(x)
|
||||
|
||||
|
||||
def normalize_batch_in_training(x, gamma, beta,
|
||||
reduction_axes, epsilon=0.0001):
|
||||
'''Compute mean and std for batch then apply batch_normalization on batch.
|
||||
'''
|
||||
mean, var = tf.nn.moments(x, reduction_axes,
|
||||
shift=None, name=None, keep_dims=False)
|
||||
if sorted(reduction_axes) == range(ndim(x))[:-1]:
|
||||
normed = tf.nn.batch_normalization(x, mean, var,
|
||||
beta, gamma,
|
||||
epsilon)
|
||||
else:
|
||||
# need broadcasting
|
||||
target_shape = []
|
||||
for axis in range(ndim(x)):
|
||||
if axis in reduction_axes:
|
||||
target_shape.append(1)
|
||||
else:
|
||||
target_shape.append(tf.shape(x)[axis])
|
||||
target_shape = tf.pack(target_shape)
|
||||
|
||||
broadcast_mean = tf.reshape(mean, target_shape)
|
||||
broadcast_var = tf.reshape(var, target_shape)
|
||||
broadcast_gamma = tf.reshape(gamma, target_shape)
|
||||
broadcast_beta = tf.reshape(beta, target_shape)
|
||||
normed = tf.nn.batch_normalization(x, broadcast_mean, broadcast_var,
|
||||
broadcast_beta, broadcast_gamma,
|
||||
epsilon)
|
||||
return normed, mean, var
|
||||
|
||||
|
||||
def batch_normalization(x, mean, var, beta, gamma, epsilon=0.0001):
|
||||
'''Apply batch normalization on x given mean, var, beta and gamma:
|
||||
|
||||
output = (x - mean) / (sqrt(var) + epsilon) * gamma + beta
|
||||
'''
|
||||
return tf.nn.batch_normalization(x, mean, var, beta, gamma, epsilon)
|
||||
|
||||
|
||||
# SHAPE OPERATIONS
|
||||
|
||||
def concatenate(tensors, axis=-1):
|
||||
@@ -536,6 +712,27 @@ def resize_images(X, height_factor, width_factor, dim_ordering):
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
|
||||
|
||||
def resize_volumes(X, depth_factor, height_factor, width_factor, dim_ordering):
|
||||
'''Resize the volume contained in a 5D tensor of shape
|
||||
- [batch, channels, depth, height, width] (for 'th' dim_ordering)
|
||||
- [batch, depth, height, width, channels] (for 'tf' dim_ordering)
|
||||
by a factor of (depth_factor, height_factor, width_factor).
|
||||
All three factors should be positive integers.
|
||||
'''
|
||||
if dim_ordering == 'th':
|
||||
output = repeat_elements(X, depth_factor, axis=2)
|
||||
output = repeat_elements(output, height_factor, axis=3)
|
||||
output = repeat_elements(output, width_factor, axis=4)
|
||||
return output
|
||||
elif dim_ordering == 'tf':
|
||||
output = repeat_elements(X, depth_factor, axis=1)
|
||||
output = repeat_elements(output, height_factor, axis=2)
|
||||
output = repeat_elements(output, width_factor, axis=3)
|
||||
return output
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + dim_ordering)
|
||||
|
||||
|
||||
def repeat_elements(x, rep, axis):
|
||||
'''Repeats the elements of a tensor along an axis, like np.repeat
|
||||
|
||||
@@ -576,7 +773,7 @@ def batch_flatten(x):
|
||||
'''Turn a n-D tensor into a 2D tensor where
|
||||
the first dimension is conserved.
|
||||
'''
|
||||
x = tf.reshape(x, [-1, np.prod(x.get_shape()[1:].as_list())])
|
||||
x = tf.reshape(x, [-1, prod(shape(x)[1:])])
|
||||
return x
|
||||
|
||||
|
||||
@@ -614,9 +811,42 @@ def spatial_2d_padding(x, padding=(1, 1), dim_ordering='th'):
|
||||
return tf.pad(x, pattern)
|
||||
|
||||
|
||||
def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering='th'):
|
||||
'''Pads 5D tensor with zeros for the depth, height, width dimension with
|
||||
"padding[0]", "padding[1]" and "padding[2]" (resp.) zeros left and right
|
||||
|
||||
For 'tf' dim_ordering, the 2nd, 3rd and 4th dimension will be padded.
|
||||
For 'th' dim_ordering, the 3rd, 4th and 5th dimension will be padded.
|
||||
'''
|
||||
if dim_ordering == 'th':
|
||||
pattern = [
|
||||
[0, 0],
|
||||
[0, 0],
|
||||
[padding[0], padding[0]],
|
||||
[padding[1], padding[1]],
|
||||
[padding[2], padding[2]]
|
||||
]
|
||||
else:
|
||||
pattern = [
|
||||
[0, 0],
|
||||
[padding[0], padding[0]],
|
||||
[padding[1], padding[1]],
|
||||
[padding[2], padding[2]],
|
||||
[0, 0]
|
||||
]
|
||||
return tf.pad(x, pattern)
|
||||
|
||||
|
||||
def pack(x):
|
||||
return tf.pack(x)
|
||||
|
||||
def one_hot(indices, nb_classes):
|
||||
'''
|
||||
Input: nD integer tensor of shape (batch_size, dim1, dim2, ... dim(n-1))
|
||||
Output: (n + 1)D one hot representation of the input with shape (batch_size, dim1, dim2, ... dim(n-1), nb_classes)
|
||||
'''
|
||||
return tf.one_hot(indices, depth=nb_classes, axis=-1)
|
||||
|
||||
|
||||
# VALUE MANIPULATION
|
||||
|
||||
@@ -642,7 +872,11 @@ def set_value(x, value):
|
||||
'''Sets the value of a tensor variable,
|
||||
from a Numpy array.
|
||||
'''
|
||||
tf.assign(x, np.asarray(value)).op.run(session=get_session())
|
||||
value = np.asarray(value)
|
||||
tf_dtype = _convert_string_dtype(x.dtype.name.split('_')[0])
|
||||
assign_placeholder = tf.placeholder(tf_dtype, shape=value.shape)
|
||||
assign_op = x.assign(assign_placeholder)
|
||||
get_session().run(assign_op, feed_dict={assign_placeholder: value})
|
||||
|
||||
|
||||
def batch_set_value(tuples):
|
||||
@@ -653,8 +887,23 @@ def batch_set_value(tuples):
|
||||
`value` should be a Numpy array.
|
||||
'''
|
||||
if tuples:
|
||||
ops = [tf.assign(x, np.asarray(value)) for x, value in tuples]
|
||||
get_session().run(ops)
|
||||
assign_ops = []
|
||||
feed_dict = {}
|
||||
for x, value in tuples:
|
||||
value = np.asarray(value)
|
||||
tf_dtype = _convert_string_dtype(x.dtype.name.split('_')[0])
|
||||
assign_placeholder = tf.placeholder(tf_dtype, shape=value.shape)
|
||||
assign_ops.append(x.assign(assign_placeholder))
|
||||
feed_dict[assign_placeholder] = value
|
||||
get_session().run(assign_ops, feed_dict=feed_dict)
|
||||
|
||||
|
||||
def print_tensor(x, message=''):
|
||||
'''Print the message and the tensor when evaluated and return the same
|
||||
tensor.
|
||||
'''
|
||||
return tf.Print(x, [x], message)
|
||||
|
||||
|
||||
# GRAPH MANIPULATION
|
||||
|
||||
@@ -667,14 +916,22 @@ class Function(object):
|
||||
self.inputs = list(inputs)
|
||||
self.outputs = list(outputs)
|
||||
with tf.control_dependencies(self.outputs):
|
||||
self.updates = [tf.assign(p, new_p) for (p, new_p) in updates]
|
||||
updates_ops = []
|
||||
for update in updates:
|
||||
if type(update) is tuple:
|
||||
p, new_p = update
|
||||
updates_ops.append(tf.assign(p, new_p))
|
||||
else:
|
||||
# assumed already an op
|
||||
updates_ops.append(update)
|
||||
self.updates_op = tf.group(*updates_ops)
|
||||
|
||||
def __call__(self, inputs):
|
||||
assert type(inputs) in {list, tuple}
|
||||
names = [v.name for v in self.inputs]
|
||||
names = [getattr(v, 'name', None) for v in self.inputs]
|
||||
feed_dict = dict(zip(names, inputs))
|
||||
session = get_session()
|
||||
updated = session.run(self.outputs + self.updates, feed_dict=feed_dict)
|
||||
updated = session.run(self.outputs + [self.updates_op], feed_dict=feed_dict)
|
||||
return updated[:len(self.outputs)]
|
||||
|
||||
|
||||
@@ -702,6 +959,13 @@ def gradients(loss, variables):
|
||||
return tf.gradients(loss, variables)
|
||||
|
||||
|
||||
def stop_gradient(variables):
|
||||
'''Returns `variables` but with zero gradient with respect to every other
|
||||
variables.
|
||||
'''
|
||||
return tf.stop_gradient(variables)
|
||||
|
||||
|
||||
# CONTROL FLOW
|
||||
|
||||
def rnn(step_function, inputs, initial_states,
|
||||
@@ -834,6 +1098,11 @@ def in_train_phase(x, alt):
|
||||
'''Selects `x` in train phase, and `alt` otherwise.
|
||||
Note that `alt` should have the *same shape* as `x`.
|
||||
'''
|
||||
if _LEARNING_PHASE is 1:
|
||||
return x
|
||||
elif _LEARNING_PHASE is 0:
|
||||
return alt
|
||||
# else: assume learning phase is a placeholder.
|
||||
x_shape = copy.copy(x.get_shape())
|
||||
x = tf.python.control_flow_ops.cond(tf.cast(_LEARNING_PHASE, 'bool'),
|
||||
lambda: x,
|
||||
@@ -847,6 +1116,10 @@ def in_test_phase(x, alt):
|
||||
'''Selects `x` in test phase, and `alt` otherwise.
|
||||
Note that `alt` should have the *same shape* as `x`.
|
||||
'''
|
||||
if _LEARNING_PHASE is 1:
|
||||
return alt
|
||||
elif _LEARNING_PHASE is 0:
|
||||
return x
|
||||
x_shape = copy.copy(x.get_shape())
|
||||
x = tf.python.control_flow_ops.cond(tf.cast(_LEARNING_PHASE, 'bool'),
|
||||
lambda: alt,
|
||||
@@ -865,14 +1138,16 @@ def relu(x, alpha=0., max_value=None):
|
||||
alpha: slope of negative section.
|
||||
max_value: saturation threshold.
|
||||
'''
|
||||
negative_part = tf.nn.relu(-x)
|
||||
if alpha != 0.:
|
||||
negative_part = tf.nn.relu(-x)
|
||||
x = tf.nn.relu(x)
|
||||
if max_value is not None:
|
||||
x = tf.clip_by_value(x, tf.cast(0., dtype=_FLOATX),
|
||||
tf.cast(max_value, dtype=_FLOATX))
|
||||
if isinstance(alpha, (tuple, list, np.ndarray)) or np.isscalar(alpha):
|
||||
alpha = tf.constant(alpha, dtype=_FLOATX)
|
||||
x -= alpha * negative_part
|
||||
max_value = _to_tensor(max_value, x.dtype.base_dtype)
|
||||
zero = _to_tensor(0., x.dtype.base_dtype)
|
||||
x = tf.clip_by_value(x, zero, max_value)
|
||||
if alpha != 0.:
|
||||
alpha = _to_tensor(alpha, x.dtype.base_dtype)
|
||||
x -= alpha * negative_part
|
||||
return x
|
||||
|
||||
|
||||
@@ -905,8 +1180,8 @@ def categorical_crossentropy(output, target, from_logits=False):
|
||||
reduction_indices=len(output.get_shape()) - 1,
|
||||
keep_dims=True)
|
||||
# manual computation of crossentropy
|
||||
output = tf.clip_by_value(output, tf.cast(_EPSILON, dtype=_FLOATX),
|
||||
tf.cast(1. - _EPSILON, dtype=_FLOATX))
|
||||
epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype)
|
||||
output = tf.clip_by_value(output, epsilon, 1. - epsilon)
|
||||
return - tf.reduce_sum(target * tf.log(output),
|
||||
reduction_indices=len(output.get_shape()) - 1)
|
||||
else:
|
||||
@@ -920,8 +1195,8 @@ def sparse_categorical_crossentropy(output, target, from_logits=False):
|
||||
# Note: tf.nn.softmax_cross_entropy_with_logits
|
||||
# expects logits, Keras expects probabilities.
|
||||
if not from_logits:
|
||||
output = tf.clip_by_value(output, tf.cast(_EPSILON, dtype=_FLOATX),
|
||||
tf.cast(1.-_EPSILON, dtype=_FLOATX))
|
||||
epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype)
|
||||
output = tf.clip_by_value(output, epsilon, 1 - epsilon)
|
||||
output = tf.log(output)
|
||||
|
||||
output_shape = output.get_shape()
|
||||
@@ -942,8 +1217,8 @@ def binary_crossentropy(output, target, from_logits=False):
|
||||
# expects logits, Keras expects probabilities.
|
||||
if not from_logits:
|
||||
# transform back to logits
|
||||
output = tf.clip_by_value(output, tf.cast(_EPSILON, dtype=_FLOATX),
|
||||
tf.cast(1.-_EPSILON, dtype=_FLOATX))
|
||||
epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype)
|
||||
output = tf.clip_by_value(output, epsilon, 1 - epsilon)
|
||||
output = tf.log(output / (1 - output))
|
||||
return tf.nn.sigmoid_cross_entropy_with_logits(output, target)
|
||||
|
||||
@@ -959,8 +1234,9 @@ def hard_sigmoid(x):
|
||||
Faster than sigmoid.
|
||||
'''
|
||||
x = (0.2 * x) + 0.5
|
||||
x = tf.clip_by_value(x, tf.cast(0., dtype=_FLOATX),
|
||||
tf.cast(1., dtype=_FLOATX))
|
||||
zero = _to_tensor(0., x.dtype.base_dtype)
|
||||
one = _to_tensor(1., x.dtype.base_dtype)
|
||||
x = tf.clip_by_value(x, zero, one)
|
||||
return x
|
||||
|
||||
|
||||
@@ -998,55 +1274,212 @@ def l2_normalize(x, axis):
|
||||
|
||||
# CONVOLUTIONS
|
||||
|
||||
def _preprocess_deconv_output_shape(shape, dim_ordering):
|
||||
if dim_ordering == 'th':
|
||||
shape = (shape[0], shape[2], shape[3], shape[1])
|
||||
return shape
|
||||
|
||||
def conv2d(x, kernel, strides=(1, 1), border_mode='valid', dim_ordering='th',
|
||||
image_shape=None, filter_shape=None):
|
||||
'''2D convolution.
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
strides: strides tuple.
|
||||
border_mode: string, "same" or "valid".
|
||||
dim_ordering: "tf" or "th". Whether to use Theano or TensorFlow dimension ordering
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
def _preprocess_conv2d_input(x, dim_ordering):
|
||||
if _FLOATX == 'float64':
|
||||
x = tf.cast(x, 'float32')
|
||||
if dim_ordering == 'th':
|
||||
# TF uses the last dimension as channel dimension,
|
||||
# instead of the 2nd one.
|
||||
# TH input shape: (samples, input_depth, rows, cols)
|
||||
# TF input shape: (samples, rows, cols, input_depth)
|
||||
x = tf.transpose(x, (0, 2, 3, 1))
|
||||
return x
|
||||
|
||||
|
||||
def _preprocess_conv3d_input(x, dim_ordering):
|
||||
if _FLOATX == 'float64':
|
||||
x = tf.cast(x, 'float32')
|
||||
if dim_ordering == 'th':
|
||||
# TF uses the last dimension as channel dimension,
|
||||
# instead of the 2nd one.
|
||||
# TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3)
|
||||
# TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, input_depth)
|
||||
x = tf.transpose(x, (0, 2, 3, 4, 1))
|
||||
return x
|
||||
|
||||
|
||||
def _preprocess_conv2d_kernel(kernel, dim_ordering):
|
||||
if _FLOATX == 'float64':
|
||||
kernel = tf.cast(kernel, 'float32')
|
||||
if dim_ordering == 'th':
|
||||
# TF uses the last dimension as channel dimension,
|
||||
# instead of the 2nd one.
|
||||
# TH kernel shape: (depth, input_depth, rows, cols)
|
||||
# TF kernel shape: (rows, cols, input_depth, depth)
|
||||
kernel = tf.transpose(kernel, (2, 3, 1, 0))
|
||||
return kernel
|
||||
|
||||
|
||||
def _preprocess_conv3d_kernel(kernel, dim_ordering):
|
||||
if _FLOATX == 'float64':
|
||||
kernel = tf.cast(kernel, 'float32')
|
||||
if dim_ordering == 'th':
|
||||
# TF uses the last dimension as channel dimension,
|
||||
# instead of the 2nd one.
|
||||
# TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3)
|
||||
# TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth)
|
||||
kernel = tf.transpose(kernel, (2, 3, 4, 1, 0))
|
||||
return kernel
|
||||
|
||||
|
||||
def _preprocess_border_mode(border_mode):
|
||||
if border_mode == 'same':
|
||||
padding = 'SAME'
|
||||
elif border_mode == 'valid':
|
||||
padding = 'VALID'
|
||||
else:
|
||||
raise Exception('Invalid border mode: ' + str(border_mode))
|
||||
return padding
|
||||
|
||||
strides = (1,) + strides + (1,)
|
||||
|
||||
if _FLOATX == 'float64':
|
||||
# tf conv2d only supports float32
|
||||
x = tf.cast(x, 'float32')
|
||||
kernel = tf.cast(kernel, 'float32')
|
||||
|
||||
def _postprocess_conv2d_output(x, dim_ordering):
|
||||
if dim_ordering == 'th':
|
||||
# TF uses the last dimension as channel dimension,
|
||||
# instead of the 2nd one.
|
||||
# TH input shape: (samples, input_depth, rows, cols)
|
||||
# TF input shape: (samples, rows, cols, input_depth)
|
||||
# TH kernel shape: (depth, input_depth, rows, cols)
|
||||
# TF kernel shape: (rows, cols, input_depth, depth)
|
||||
x = tf.transpose(x, (0, 2, 3, 1))
|
||||
kernel = tf.transpose(kernel, (2, 3, 1, 0))
|
||||
x = tf.nn.conv2d(x, kernel, strides, padding=padding)
|
||||
x = tf.transpose(x, (0, 3, 1, 2))
|
||||
elif dim_ordering == 'tf':
|
||||
x = tf.nn.conv2d(x, kernel, strides, padding=padding)
|
||||
else:
|
||||
raise Exception('Unknown dim_ordering: ' + str(dim_ordering))
|
||||
|
||||
if _FLOATX == 'float64':
|
||||
x = tf.cast(x, 'float64')
|
||||
return x
|
||||
|
||||
|
||||
def _postprocess_conv3d_output(x, dim_ordering):
|
||||
if dim_ordering == 'th':
|
||||
x = tf.transpose(x, (0, 4, 1, 2, 3))
|
||||
|
||||
if _FLOATX == 'float64':
|
||||
x = tf.cast(x, 'float64')
|
||||
return x
|
||||
|
||||
|
||||
def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
image_shape=None, filter_shape=None, filter_dilation=(1, 1)):
|
||||
'''2D convolution.
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
strides: strides tuple.
|
||||
border_mode: string, "same" or "valid".
|
||||
dim_ordering: "tf" or "th".
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
for inputs/kernels/ouputs.
|
||||
'''
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
padding = _preprocess_border_mode(border_mode)
|
||||
if filter_dilation == (1, 1):
|
||||
strides = (1,) + strides + (1,)
|
||||
x = tf.nn.conv2d(x, kernel, strides, padding=padding)
|
||||
else:
|
||||
assert filter_dilation[0] == filter_dilation[1]
|
||||
assert strides == (1, 1), 'Invalid strides for dilated convolution'
|
||||
x = tf.nn.atrous_conv2d(x, kernel, filter_dilation[0], padding=padding)
|
||||
return _postprocess_conv2d_output(x, dim_ordering)
|
||||
|
||||
|
||||
def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
image_shape=None, filter_shape=None):
|
||||
'''2D deconvolution (i.e. transposed convolution).
|
||||
|
||||
# Arguments
|
||||
x: input tensor.
|
||||
kernel: kernel tensor.
|
||||
output_shape: 1D int tensor for the output shape.
|
||||
strides: strides tuple.
|
||||
border_mode: string, "same" or "valid".
|
||||
dim_ordering: "tf" or "th".
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
for inputs/kernels/ouputs.
|
||||
'''
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
output_shape = _preprocess_deconv_output_shape(output_shape, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
kernel = tf.transpose(kernel, (0, 1, 3, 2))
|
||||
padding = _preprocess_border_mode(border_mode)
|
||||
strides = (1,) + strides + (1,)
|
||||
|
||||
x = tf.nn.conv2d_transpose(x, kernel, output_shape, strides,
|
||||
padding=padding)
|
||||
return _postprocess_conv2d_output(x, dim_ordering)
|
||||
|
||||
|
||||
def atrous_conv2d(x, kernel, rate=1,
|
||||
border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
image_shape=None, filter_shape=None):
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
if rate == 1:
|
||||
return conv2d(x, kernel, strides=(1, 1), border_mode=border_mode,
|
||||
dim_ordering=dim_ordering)
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
padding = _preprocess_border_mode(border_mode)
|
||||
|
||||
x = tf.nn.atrous_conv2d(x, kernel, rate, padding)
|
||||
return _postprocess_conv2d_output(x, dim_ordering)
|
||||
|
||||
|
||||
def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1),
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
depthwise_kernel = _preprocess_conv2d_kernel(depthwise_kernel,
|
||||
dim_ordering)
|
||||
pointwise_kernel = _preprocess_conv2d_kernel(pointwise_kernel,
|
||||
dim_ordering)
|
||||
padding = _preprocess_border_mode(border_mode)
|
||||
strides = (1,) + strides + (1,)
|
||||
|
||||
x = tf.nn.separable_conv2d(x, depthwise_kernel, pointwise_kernel,
|
||||
strides, padding)
|
||||
return _postprocess_conv2d_output(x, dim_ordering)
|
||||
|
||||
|
||||
def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
volume_shape=None, filter_shape=None):
|
||||
'''3D convolution.
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
strides: strides tuple.
|
||||
border_mode: string, "same" or "valid".
|
||||
dim_ordering: "tf" or "th".
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
for inputs/kernels/ouputs.
|
||||
'''
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
x = _preprocess_conv3d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv3d_kernel(kernel, dim_ordering)
|
||||
padding = _preprocess_border_mode(border_mode)
|
||||
strides = (1,) + strides + (1,)
|
||||
|
||||
x = tf.nn.conv3d(x, kernel, strides, padding)
|
||||
return _postprocess_conv3d_output(x, dim_ordering)
|
||||
|
||||
|
||||
def pool2d(x, pool_size, strides=(1, 1),
|
||||
border_mode='valid', dim_ordering='th', pool_mode='max'):
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
pool_mode='max'):
|
||||
'''2D Pooling.
|
||||
|
||||
# Arguments
|
||||
@@ -1056,43 +1489,53 @@ def pool2d(x, pool_size, strides=(1, 1),
|
||||
dim_ordering: one of "th", "tf".
|
||||
pool_mode: one of "max", "avg".
|
||||
'''
|
||||
if border_mode == 'same':
|
||||
padding = 'SAME'
|
||||
elif border_mode == 'valid':
|
||||
padding = 'VALID'
|
||||
else:
|
||||
raise Exception('Invalid border mode: ' + str(border_mode))
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
padding = _preprocess_border_mode(border_mode)
|
||||
strides = (1,) + strides + (1,)
|
||||
pool_size = (1,) + pool_size + (1,)
|
||||
|
||||
if _FLOATX == 'float64':
|
||||
# tf max_pool only supports float32
|
||||
x = tf.cast(x, 'float32')
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
|
||||
if dim_ordering in {'tf', 'th'}:
|
||||
if dim_ordering == 'th':
|
||||
# TF uses the last dimension as channel dimension,
|
||||
# instead of the 2nd one.
|
||||
# TH input shape: (samples, input_depth, rows, cols)
|
||||
# TF input shape: (samples, rows, cols, input_depth)
|
||||
# TH kernel shape: (depth, input_depth, rows, cols)
|
||||
# TF kernel shape: (rows, cols, input_depth, depth)
|
||||
x = tf.transpose(x, (0, 2, 3, 1))
|
||||
if pool_mode == 'max':
|
||||
x = tf.nn.max_pool(x, pool_size, strides, padding=padding)
|
||||
elif pool_mode == 'avg':
|
||||
x = tf.nn.avg_pool(x, pool_size, strides, padding=padding)
|
||||
else:
|
||||
raise Exception('Invalid pooling mode: ' + str(pool_mode))
|
||||
if dim_ordering == 'th':
|
||||
x = tf.transpose(x, (0, 3, 1, 2))
|
||||
if pool_mode == 'max':
|
||||
x = tf.nn.max_pool(x, pool_size, strides, padding=padding)
|
||||
elif pool_mode == 'avg':
|
||||
x = tf.nn.avg_pool(x, pool_size, strides, padding=padding)
|
||||
else:
|
||||
raise Exception('Unknown dim_ordering: ' + str(dim_ordering))
|
||||
raise Exception('Invalid pooling mode: ' + str(pool_mode))
|
||||
|
||||
if _FLOATX == 'float64':
|
||||
x = tf.cast(x, 'float64')
|
||||
return x
|
||||
return _postprocess_conv2d_output(x, dim_ordering)
|
||||
|
||||
|
||||
def pool3d(x, pool_size, strides=(1, 1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING, pool_mode='max'):
|
||||
'''3D Pooling.
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 3 integers.
|
||||
strides: tuple of 3 integers.
|
||||
border_mode: one of "valid", "same".
|
||||
dim_ordering: one of "th", "tf".
|
||||
pool_mode: one of "max", "avg".
|
||||
'''
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
padding = _preprocess_border_mode(border_mode)
|
||||
strides = (1,) + strides + (1,)
|
||||
pool_size = (1,) + pool_size + (1,)
|
||||
|
||||
x = _preprocess_conv3d_input(x, dim_ordering)
|
||||
|
||||
if pool_mode == 'max':
|
||||
x = tf.nn.max_pool3d(x, pool_size, strides, padding=padding)
|
||||
elif pool_mode == 'avg':
|
||||
x = tf.nn.avg_pool3d(x, pool_size, strides, padding=padding)
|
||||
else:
|
||||
raise Exception('Invalid pooling mode: ' + str(pool_mode))
|
||||
|
||||
return _postprocess_conv3d_output(x, dim_ordering)
|
||||
|
||||
|
||||
# RANDOMNESS
|
||||
@@ -1115,4 +1558,5 @@ def random_binomial(shape, p=0.0, dtype=_FLOATX, seed=None):
|
||||
if seed is None:
|
||||
seed = np.random.randint(10e6)
|
||||
return tf.select(tf.random_uniform(shape, dtype=dtype, seed=seed) <= p,
|
||||
tf.ones(shape), tf.zeros(shape))
|
||||
tf.ones(shape, dtype=dtype),
|
||||
tf.zeros(shape, dtype=dtype))
|
||||
|
||||
@@ -3,13 +3,14 @@ from theano import tensor as T
|
||||
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
|
||||
from theano.tensor.signal import pool
|
||||
from theano.tensor.nnet import conv3d2d
|
||||
from theano.printing import Print
|
||||
try:
|
||||
from theano.tensor.nnet.nnet import softsign as T_softsign
|
||||
except ImportError:
|
||||
from theano.sandbox.softsign import softsign as T_softsign
|
||||
import inspect
|
||||
import numpy as np
|
||||
from .common import _FLOATX, _EPSILON
|
||||
from .common import _FLOATX, _EPSILON, _IMAGE_DIM_ORDERING
|
||||
|
||||
|
||||
# INTERNAL UTILS
|
||||
@@ -22,6 +23,14 @@ def learning_phase():
|
||||
return _LEARNING_PHASE
|
||||
|
||||
|
||||
def set_learning_phase(value):
|
||||
global _LEARNING_PHASE
|
||||
if value not in {0, 1}:
|
||||
raise ValueError('Expected learning phase to be '
|
||||
'0 or 1.')
|
||||
_LEARNING_PHASE = value
|
||||
|
||||
|
||||
# VARIABLE MANIPULATION
|
||||
|
||||
def variable(value, dtype=_FLOATX, name=None):
|
||||
@@ -97,6 +106,16 @@ def zeros_like(x):
|
||||
return T.zeros_like(x)
|
||||
|
||||
|
||||
def random_uniform_variable(shape, low, high, dtype=_FLOATX, name=None):
|
||||
return variable(np.random.uniform(low=low, high=high, size=shape),
|
||||
dtype=dtype, name=name)
|
||||
|
||||
|
||||
def random_normal_variable(shape, mean, scale, dtype=_FLOATX, name=None):
|
||||
return variable(np.random.normal(loc=0.0, scale=scale, size=shape),
|
||||
dtype=dtype, name=name)
|
||||
|
||||
|
||||
def count_params(x):
|
||||
'''Return number of scalars in a tensor.
|
||||
|
||||
@@ -109,6 +128,25 @@ def cast(x, dtype):
|
||||
return T.cast(x, dtype)
|
||||
|
||||
|
||||
# UPDATES OPS
|
||||
|
||||
|
||||
def update(x, new_x):
|
||||
return (x, new_x)
|
||||
|
||||
|
||||
def update_add(x, increment):
|
||||
return (x, x + increment)
|
||||
|
||||
|
||||
def update_sub(x, decrement):
|
||||
return (x, x - decrement)
|
||||
|
||||
|
||||
def moving_average_update(variable, value, momentum):
|
||||
return (variable, variable * momentum + value * (1. - momentum))
|
||||
|
||||
|
||||
# LINEAR ALGEBRA
|
||||
|
||||
'''
|
||||
@@ -122,25 +160,42 @@ def dot(x, y):
|
||||
|
||||
|
||||
def batch_dot(x, y, axes=None):
|
||||
'''batchwise dot product
|
||||
'''Batchwise dot product.
|
||||
|
||||
batch_dot results in a tensor with less dimensions than the input.
|
||||
If the number of dimensions is reduced to 1, we use `expand_dims` to
|
||||
make sure that ndim is at least 2.
|
||||
|
||||
# Example
|
||||
Assume x = [[1, 2] and y = [[5, 6]
|
||||
[3, 4]] [7, 8]]
|
||||
batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal
|
||||
of x.dot(y.T), although we never have to calculate the off-diagonal
|
||||
elements.
|
||||
|
||||
|
||||
# Arguments
|
||||
x, y: tensors with ndim >= 2
|
||||
axes: list (or single) int with target dimensions
|
||||
|
||||
# Returns
|
||||
Tensor with ndim >= 2
|
||||
A tensor with shape equal to the concatenation of x's shape
|
||||
(less the dimension that was summed over) and y's shape
|
||||
(less the batch dimension and the dimension that was summed over).
|
||||
If the final rank is 1, we reshape it to (batch_size, 1).
|
||||
|
||||
# Examples
|
||||
Assume x = [[1, 2], [3, 4]] and y = [[5, 6], [7, 8]]
|
||||
batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal
|
||||
of x.dot(y.T), although we never have to calculate the off-diagonal
|
||||
elements.
|
||||
|
||||
Shape inference:
|
||||
Let x's shape be (100, 20) and y's shape be (100, 30, 20).
|
||||
If dot_axes is (1, 2), to find the output shape of resultant tensor,
|
||||
loop through each dimension in x's shape and y's shape:
|
||||
x.shape[0] : 100 : append to output shape
|
||||
x.shape[1] : 20 : do not append to output shape,
|
||||
dimension 1 of x has been summed over. (dot_axes[0] = 1)
|
||||
y.shape[0] : 100 : do not append to output shape,
|
||||
always ignore first dimension of y
|
||||
y.shape[1] : 30 : append to output shape
|
||||
y.shape[2] : 20 : do not append to output shape,
|
||||
dimension 2 of y has been summed over. (dot_axes[1] = 2)
|
||||
|
||||
output_shape = (100, 30)
|
||||
'''
|
||||
if type(axes) == int:
|
||||
axes = (axes, axes)
|
||||
@@ -271,6 +326,22 @@ def not_equal(x, y):
|
||||
return T.neq(x, y)
|
||||
|
||||
|
||||
def greater(x, y):
|
||||
return T.gt(x, y)
|
||||
|
||||
|
||||
def greater_equal(x, y):
|
||||
return T.ge(x, y)
|
||||
|
||||
|
||||
def lesser(x, y):
|
||||
return T.lt(x, y)
|
||||
|
||||
|
||||
def lesser_equal(x, y):
|
||||
return T.le(x, y)
|
||||
|
||||
|
||||
def maximum(x, y):
|
||||
return T.maximum(x, y)
|
||||
|
||||
@@ -287,6 +358,40 @@ def cos(x):
|
||||
return T.cos(x)
|
||||
|
||||
|
||||
def normalize_batch_in_training(x, gamma, beta,
|
||||
reduction_axes, epsilon=0.0001):
|
||||
'''Compute mean and std for batch then apply batch_normalization on batch.
|
||||
'''
|
||||
var = x.var(reduction_axes)
|
||||
mean = x.mean(reduction_axes)
|
||||
|
||||
target_shape = []
|
||||
for axis in range(ndim(x)):
|
||||
if axis in reduction_axes:
|
||||
target_shape.append(1)
|
||||
else:
|
||||
target_shape.append(x.shape[axis])
|
||||
target_shape = T.stack(*target_shape)
|
||||
|
||||
broadcast_mean = T.reshape(mean, target_shape)
|
||||
broadcast_var = T.reshape(var, target_shape)
|
||||
broadcast_beta = T.reshape(beta, target_shape)
|
||||
broadcast_gamma = T.reshape(gamma, target_shape)
|
||||
normed = batch_normalization(x, broadcast_mean, broadcast_var,
|
||||
broadcast_beta, broadcast_gamma,
|
||||
epsilon)
|
||||
return normed, mean, var
|
||||
|
||||
|
||||
def batch_normalization(x, mean, var, beta, gamma, epsilon=0.0001):
|
||||
'''Apply batch normalization on x given mean, var, beta and gamma.
|
||||
'''
|
||||
normed = T.nnet.bn.batch_normalization(x, gamma, beta, mean,
|
||||
sqrt(var) + epsilon,
|
||||
mode='high_mem')
|
||||
return normed
|
||||
|
||||
|
||||
# SHAPE OPERATIONS
|
||||
|
||||
def concatenate(tensors, axis=-1):
|
||||
@@ -490,6 +595,19 @@ def spatial_3d_padding(x, padding=(1, 1, 1), dim_ordering='th'):
|
||||
def pack(x):
|
||||
return T.stack(*x)
|
||||
|
||||
|
||||
def one_hot(indices, nb_classes):
|
||||
'''
|
||||
Input: nD integer tensor of shape (batch_size, dim1, dim2, ... dim(n-1))
|
||||
Output: (n + 1)D one hot representation of the input with shape (batch_size, dim1, dim2, ... dim(n-1), nb_classes)
|
||||
'''
|
||||
input_shape = tuple((indices.shape[i] for i in range(indices.ndim)))
|
||||
indices = T.flatten(indices)
|
||||
oh = T.extra_ops.to_one_hot(indices, nb_classes)
|
||||
oh = T.reshape(oh, input_shape + (nb_classes,))
|
||||
return oh
|
||||
|
||||
|
||||
# VALUE MANIPULATION
|
||||
|
||||
|
||||
@@ -516,6 +634,14 @@ def batch_set_value(tuples):
|
||||
x.set_value(np.asarray(value, dtype=x.dtype))
|
||||
|
||||
|
||||
def print_tensor(x, message=''):
|
||||
'''Print the message and the tensor when evaluated and return the same
|
||||
tensor.
|
||||
'''
|
||||
p_op = Print(message)
|
||||
return p_op(x)
|
||||
|
||||
|
||||
# GRAPH MANIPULATION
|
||||
|
||||
class Function(object):
|
||||
@@ -523,7 +649,7 @@ class Function(object):
|
||||
def __init__(self, inputs, outputs, updates=[], **kwargs):
|
||||
self.function = theano.function(inputs, outputs, updates=updates,
|
||||
allow_input_downcast=True,
|
||||
on_unused_input='warn',
|
||||
on_unused_input='ignore',
|
||||
**kwargs)
|
||||
|
||||
def __call__(self, inputs):
|
||||
@@ -545,6 +671,13 @@ def gradients(loss, variables):
|
||||
return T.grad(loss, variables)
|
||||
|
||||
|
||||
def stop_gradient(variables):
|
||||
'''Returns `variables` but with zero gradient with respect to every other
|
||||
variables.
|
||||
'''
|
||||
return theano.gradient.disconnected_grad(variables)
|
||||
|
||||
|
||||
# CONTROL FLOW
|
||||
|
||||
def rnn(step_function, inputs, initial_states,
|
||||
@@ -717,12 +850,20 @@ def switch(condition, then_expression, else_expression):
|
||||
|
||||
|
||||
def in_train_phase(x, alt):
|
||||
if _LEARNING_PHASE is 1:
|
||||
return x
|
||||
elif _LEARNING_PHASE is 0:
|
||||
return alt
|
||||
x = T.switch(_LEARNING_PHASE, x, alt)
|
||||
x._uses_learning_phase = True
|
||||
return x
|
||||
|
||||
|
||||
def in_test_phase(x, alt):
|
||||
if _LEARNING_PHASE is 1:
|
||||
return alt
|
||||
elif _LEARNING_PHASE is 0:
|
||||
return x
|
||||
x = T.switch(_LEARNING_PHASE, alt, x)
|
||||
x._uses_learning_phase = True
|
||||
return x
|
||||
@@ -810,68 +951,172 @@ def l2_normalize(x, axis):
|
||||
|
||||
# CONVOLUTIONS
|
||||
|
||||
def conv2d(x, kernel, strides=(1, 1), border_mode='valid', dim_ordering='th',
|
||||
image_shape=None, filter_shape=None):
|
||||
'''
|
||||
border_mode: string, "same" or "valid".
|
||||
'''
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
def _preprocess_conv2d_input(x, dim_ordering):
|
||||
if dim_ordering == 'tf':
|
||||
# TF uses the last dimension as channel dimension,
|
||||
# instead of the 2nd one.
|
||||
# TH input shape: (samples, input_depth, rows, cols)
|
||||
# TF input shape: (samples, rows, cols, input_depth)
|
||||
x = x.dimshuffle((0, 3, 1, 2))
|
||||
return x
|
||||
|
||||
|
||||
def _preprocess_conv2d_kernel(kernel, dim_ordering):
|
||||
if dim_ordering == 'tf':
|
||||
# TF uses the last dimension as channel dimension,
|
||||
# instead of the 2nd one.
|
||||
# TH kernel shape: (depth, input_depth, rows, cols)
|
||||
# TF kernel shape: (rows, cols, input_depth, depth)
|
||||
x = x.dimshuffle((0, 3, 1, 2))
|
||||
kernel = kernel.dimshuffle((3, 2, 0, 1))
|
||||
if image_shape:
|
||||
image_shape = (image_shape[0], image_shape[3],
|
||||
image_shape[1], image_shape[2])
|
||||
if filter_shape:
|
||||
filter_shape = (filter_shape[3], filter_shape[2],
|
||||
filter_shape[0], filter_shape[1])
|
||||
return kernel
|
||||
|
||||
|
||||
def _preprocess_border_mode(border_mode):
|
||||
if border_mode == 'same':
|
||||
th_border_mode = 'half'
|
||||
np_kernel = kernel.eval()
|
||||
elif border_mode == 'valid':
|
||||
th_border_mode = 'valid'
|
||||
else:
|
||||
raise Exception('Border mode not supported: ' + str(border_mode))
|
||||
return th_border_mode
|
||||
|
||||
|
||||
def _preprocess_image_shape(dim_ordering, image_shape):
|
||||
# Theano might not accept long type
|
||||
def int_or_none(value):
|
||||
try:
|
||||
return int(value)
|
||||
except TypeError:
|
||||
return None
|
||||
|
||||
if dim_ordering == 'tf':
|
||||
if image_shape:
|
||||
image_shape = (image_shape[0], image_shape[3],
|
||||
image_shape[1], image_shape[2])
|
||||
if image_shape is not None:
|
||||
image_shape = tuple(int_or_none(v) for v in image_shape)
|
||||
return image_shape
|
||||
|
||||
|
||||
def _preprocess_filter_shape(dim_ordering, filter_shape):
|
||||
# Theano might not accept long type
|
||||
def int_or_none(value):
|
||||
try:
|
||||
return int(value)
|
||||
except TypeError:
|
||||
return None
|
||||
if dim_ordering == 'tf':
|
||||
if filter_shape:
|
||||
filter_shape = (filter_shape[3], filter_shape[2],
|
||||
filter_shape[0], filter_shape[1])
|
||||
if filter_shape is not None:
|
||||
filter_shape = tuple(int_or_none(v) for v in filter_shape)
|
||||
return filter_shape
|
||||
|
||||
conv_out = T.nnet.conv2d(x, kernel,
|
||||
border_mode=th_border_mode,
|
||||
subsample=strides,
|
||||
input_shape=image_shape,
|
||||
filter_shape=filter_shape)
|
||||
|
||||
def _postprocess_conv2d_output(conv_out, x, border_mode, np_kernel, strides, dim_ordering):
|
||||
if border_mode == 'same':
|
||||
if np_kernel.shape[2] % 2 == 0:
|
||||
conv_out = conv_out[:, :, :(x.shape[2] + strides[0] - 1) // strides[0], :]
|
||||
if np_kernel.shape[3] % 2 == 0:
|
||||
conv_out = conv_out[:, :, :, :(x.shape[3] + strides[1] - 1) // strides[1]]
|
||||
|
||||
if dim_ordering == 'tf':
|
||||
conv_out = conv_out.dimshuffle((0, 2, 3, 1))
|
||||
return conv_out
|
||||
|
||||
|
||||
def conv2d(x, kernel, strides=(1, 1), border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING, image_shape=None,
|
||||
filter_shape=None, filter_dilation=(1, 1)):
|
||||
'''2D convolution.
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
strides: strides tuple.
|
||||
border_mode: string, "same" or "valid".
|
||||
dim_ordering: "tf" or "th".
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
th_border_mode = _preprocess_border_mode(border_mode)
|
||||
np_kernel = kernel.eval()
|
||||
image_shape = _preprocess_image_shape(dim_ordering, image_shape)
|
||||
filter_shape = _preprocess_filter_shape(dim_ordering, filter_shape)
|
||||
|
||||
# TODO: remove the if statement when theano with no filter dilation is deprecated.
|
||||
if filter_dilation == (1, 1):
|
||||
conv_out = T.nnet.conv2d(x, kernel,
|
||||
border_mode=th_border_mode,
|
||||
subsample=strides,
|
||||
input_shape=image_shape,
|
||||
filter_shape=filter_shape)
|
||||
else:
|
||||
conv_out = T.nnet.conv2d(x, kernel,
|
||||
border_mode=th_border_mode,
|
||||
subsample=strides,
|
||||
input_shape=image_shape,
|
||||
filter_shape=filter_shape,
|
||||
filter_dilation=filter_dilation)
|
||||
|
||||
conv_out = _postprocess_conv2d_output(conv_out, x, border_mode, np_kernel,
|
||||
strides, dim_ordering)
|
||||
return conv_out
|
||||
|
||||
|
||||
def deconv2d(x, kernel, output_shape, strides=(1, 1),
|
||||
border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
image_shape=None, filter_shape=None):
|
||||
'''2D deconvolution (transposed convolution).
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
output_shape: desired dimensions of output.
|
||||
strides: strides tuple.
|
||||
border_mode: string, "same" or "valid".
|
||||
dim_ordering: "tf" or "th".
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
flip_filters = False
|
||||
if dim_ordering not in {'th', 'tf'}:
|
||||
raise Exception('Unknown dim_ordering ' + str(dim_ordering))
|
||||
|
||||
x = _preprocess_conv2d_input(x, dim_ordering)
|
||||
kernel = _preprocess_conv2d_kernel(kernel, dim_ordering)
|
||||
kernel = kernel.dimshuffle((1, 0, 2, 3))
|
||||
th_border_mode = _preprocess_border_mode(border_mode)
|
||||
np_kernel = kernel.eval()
|
||||
filter_shape = _preprocess_filter_shape(dim_ordering, filter_shape)
|
||||
|
||||
op = T.nnet.abstract_conv.AbstractConv2d_gradInputs(imshp=output_shape,
|
||||
kshp=filter_shape,
|
||||
subsample=strides,
|
||||
border_mode=th_border_mode,
|
||||
filter_flip=not flip_filters)
|
||||
conv_out = op(kernel, x, output_shape[2:])
|
||||
|
||||
conv_out = _postprocess_conv2d_output(conv_out, x, border_mode, np_kernel,
|
||||
strides, dim_ordering)
|
||||
return conv_out
|
||||
|
||||
|
||||
def atrous_conv2d(x, kernel, rate=1,
|
||||
border_mode='valid',
|
||||
dim_ordering=_IMAGE_DIM_ORDERING,
|
||||
image_shape=None, filter_shape=None):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1),
|
||||
border_mode='valid', dim_ordering=_IMAGE_DIM_ORDERING):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def conv3d(x, kernel, strides=(1, 1, 1),
|
||||
border_mode='valid', dim_ordering='th',
|
||||
volume_shape=None, filter_shape=None):
|
||||
|
||||
+36
-17
@@ -9,6 +9,7 @@ import warnings
|
||||
from collections import deque
|
||||
from .utils.generic_utils import Progbar
|
||||
from keras import backend as K
|
||||
from pkg_resources import parse_version
|
||||
|
||||
|
||||
class CallbackList(object):
|
||||
@@ -212,6 +213,7 @@ class History(Callback):
|
||||
for k, v in logs.items():
|
||||
self.history.setdefault(k, []).append(v)
|
||||
|
||||
|
||||
class ModelCheckpoint(Callback):
|
||||
'''Save the model after every epoch.
|
||||
|
||||
@@ -229,25 +231,29 @@ class ModelCheckpoint(Callback):
|
||||
verbose: verbosity mode, 0 or 1.
|
||||
save_best_only: if `save_best_only=True`,
|
||||
the latest best model according to
|
||||
the validation loss will not be overwritten.
|
||||
the quantity monitored will not be overwritten.
|
||||
mode: one of {auto, min, max}.
|
||||
If `save_best_only=True`, the decision
|
||||
to overwrite the current save file is made
|
||||
based on either the maximization or the
|
||||
minization of the monitored. For `val_acc`,
|
||||
minimization of the monitored quantity. For `val_acc`,
|
||||
this should be `max`, for `val_loss` this should
|
||||
be `min`, etc. In `auto` mode, the direction is
|
||||
automatically inferred from the name of the monitored quantity.
|
||||
save_weights_only: if True, then only the model's weights will be
|
||||
saved (`model.save_weights(filepath)`), else the full model
|
||||
is saved (`model.save(filepath)`).
|
||||
|
||||
'''
|
||||
def __init__(self, filepath, monitor='val_loss', verbose=0,
|
||||
save_best_only=False, mode='auto'):
|
||||
|
||||
save_best_only=False, save_weights_only=False,
|
||||
mode='auto'):
|
||||
super(ModelCheckpoint, self).__init__()
|
||||
self.monitor = monitor
|
||||
self.verbose = verbose
|
||||
self.filepath = filepath
|
||||
self.save_best_only = save_best_only
|
||||
self.save_weights_only = save_weights_only
|
||||
|
||||
if mode not in ['auto', 'min', 'max']:
|
||||
warnings.warn('ModelCheckpoint mode %s is unknown, '
|
||||
@@ -284,7 +290,10 @@ class ModelCheckpoint(Callback):
|
||||
% (epoch, self.monitor, self.best,
|
||||
current, filepath))
|
||||
self.best = current
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
if self.save_weights_only:
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
else:
|
||||
self.model.save(filepath, overwrite=True)
|
||||
else:
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: %s did not improve' %
|
||||
@@ -292,7 +301,10 @@ class ModelCheckpoint(Callback):
|
||||
else:
|
||||
if self.verbose > 0:
|
||||
print('Epoch %05d: saving model to %s' % (epoch, filepath))
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
if self.save_weights_only:
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
else:
|
||||
self.model.save(filepath, overwrite=True)
|
||||
|
||||
|
||||
class EarlyStopping(Callback):
|
||||
@@ -319,7 +331,8 @@ class EarlyStopping(Callback):
|
||||
|
||||
if mode not in ['auto', 'min', 'max']:
|
||||
warnings.warn('EarlyStopping mode %s is unknown, '
|
||||
'fallback to auto mode.' % (self.mode), RuntimeWarning)
|
||||
'fallback to auto mode.' % (self.mode),
|
||||
RuntimeWarning)
|
||||
mode = 'auto'
|
||||
|
||||
if mode == 'min':
|
||||
@@ -361,13 +374,19 @@ class RemoteMonitor(Callback):
|
||||
# Arguments
|
||||
root: root url to which the events will be sent (at the end
|
||||
of every epoch). Events are sent to
|
||||
`root + '/publish/epoch/end/'`. Calls are HTTP POST,
|
||||
with a `data` argument which is a JSON-encoded dictionary
|
||||
of event data.
|
||||
`root + '/publish/epoch/end/'` by default. Calls are
|
||||
HTTP POST, with a `data` argument which is a
|
||||
JSON-encoded dictionary of event data.
|
||||
'''
|
||||
def __init__(self, root='http://localhost:9000'):
|
||||
|
||||
def __init__(self,
|
||||
root='http://localhost:9000',
|
||||
path='/publish/epoch/end/',
|
||||
field='data'):
|
||||
super(RemoteMonitor, self).__init__()
|
||||
self.root = root
|
||||
self.path = path
|
||||
self.field = field
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
import requests
|
||||
@@ -375,10 +394,9 @@ class RemoteMonitor(Callback):
|
||||
send['epoch'] = epoch
|
||||
for k, v in logs.items():
|
||||
send[k] = v
|
||||
|
||||
try:
|
||||
requests.post(self.root + '/publish/epoch/end/',
|
||||
{'data': json.dumps(send)})
|
||||
requests.post(self.root + self.path,
|
||||
{self.field: json.dumps(send)})
|
||||
except:
|
||||
print('Warning: could not reach RemoteMonitor '
|
||||
'root server at ' + str(self.root))
|
||||
@@ -428,8 +446,9 @@ class TensorBoard(Callback):
|
||||
histogram_freq: frequency (in epochs) at which to compute activation
|
||||
histograms for the layers of the model. If set to 0,
|
||||
histograms won't be computed.
|
||||
write_graph: whether to visualize the graph in Tensorboard. The log file can
|
||||
become quite large when write_graph is set to True.
|
||||
write_graph: whether to visualize the graph in Tensorboard.
|
||||
The log file can become quite large when
|
||||
write_graph is set to True.
|
||||
'''
|
||||
|
||||
def __init__(self, log_dir='./logs', histogram_freq=0, write_graph=True):
|
||||
@@ -460,7 +479,7 @@ class TensorBoard(Callback):
|
||||
layer.output)
|
||||
self.merged = tf.merge_all_summaries()
|
||||
if self.write_graph:
|
||||
if tf.__version__ >= '0.8.0':
|
||||
if parse_version(tf.__version__) >= parse_version('0.8.0'):
|
||||
self.writer = tf.train.SummaryWriter(self.log_dir,
|
||||
self.sess.graph)
|
||||
else:
|
||||
|
||||
+58
-11
@@ -4,26 +4,58 @@ import gzip
|
||||
from ..utils.data_utils import get_file
|
||||
from six.moves import zip
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
|
||||
def load_data(path="imdb.pkl", nb_words=None, skip_top=0,
|
||||
maxlen=None, test_split=0.2, seed=113,
|
||||
def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0,
|
||||
maxlen=None, seed=113,
|
||||
start_char=1, oov_char=2, index_from=3):
|
||||
'''
|
||||
# Arguments
|
||||
path: where to store the data (in `/.keras/dataset`)
|
||||
nb_words: max number of words to include. Words are ranked
|
||||
by how often they occur (in the training set) and only
|
||||
the most frequent words are kept
|
||||
skip_top: skip the top N most frequently occuring words
|
||||
(which may not be informative).
|
||||
maxlen: truncate sequences after this length.
|
||||
seed: random seed for sample shuffling.
|
||||
start_char: The start of a sequence will be marked with this character.
|
||||
Set to 1 because 0 is usually the padding character.
|
||||
oov_char: words that were cut out because of the `nb_words`
|
||||
or `skip_top` limit will be replaced with this character.
|
||||
index_from: index actual words with this index and higher.
|
||||
|
||||
path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/imdb.pkl")
|
||||
Note that the 'out of vocabulary' character is only used for
|
||||
words that were present in the training set but are not included
|
||||
because they're not making the `nb_words` cut here.
|
||||
Words that were not seen in the trining set but are in the test set
|
||||
have simply been skipped.
|
||||
'''
|
||||
path = get_file(path,
|
||||
origin='https://s3.amazonaws.com/text-datasets/imdb_full.pkl',
|
||||
md5_hash='d091312047c43cf9e4e38fef92437263')
|
||||
|
||||
if path.endswith(".gz"):
|
||||
if path.endswith('.gz'):
|
||||
f = gzip.open(path, 'rb')
|
||||
else:
|
||||
f = open(path, 'rb')
|
||||
|
||||
X, labels = cPickle.load(f)
|
||||
(x_train, labels_train), (x_test, labels_test) = cPickle.load(f)
|
||||
f.close()
|
||||
|
||||
np.random.seed(seed)
|
||||
np.random.shuffle(X)
|
||||
np.random.shuffle(x_train)
|
||||
np.random.seed(seed)
|
||||
np.random.shuffle(labels)
|
||||
np.random.shuffle(labels_train)
|
||||
|
||||
np.random.seed(seed * 2)
|
||||
np.random.shuffle(x_test)
|
||||
np.random.seed(seed * 2)
|
||||
np.random.shuffle(labels_test)
|
||||
|
||||
X = x_train + x_test
|
||||
labels = labels_train + labels_test
|
||||
|
||||
if start_char is not None:
|
||||
X = [[start_char] + [w + index_from for w in x] for x in X]
|
||||
@@ -60,10 +92,25 @@ def load_data(path="imdb.pkl", nb_words=None, skip_top=0,
|
||||
nX.append(nx)
|
||||
X = nX
|
||||
|
||||
X_train = np.array(X[:int(len(X) * (1 - test_split))])
|
||||
y_train = np.array(labels[:int(len(X) * (1 - test_split))])
|
||||
X_train = np.array(X[:len(x_train)])
|
||||
y_train = np.array(labels[:len(x_train)])
|
||||
|
||||
X_test = np.array(X[int(len(X) * (1 - test_split)):])
|
||||
y_test = np.array(labels[int(len(X) * (1 - test_split)):])
|
||||
X_test = np.array(X[len(x_train):])
|
||||
y_test = np.array(labels[len(x_train):])
|
||||
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
|
||||
|
||||
def get_word_index(path='imdb_word_index.pkl'):
|
||||
path = get_file(path,
|
||||
origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.pkl',
|
||||
md5_hash='72d94b01291be4ff843198d3b0e1e4d7')
|
||||
f = open(path, 'rb')
|
||||
|
||||
if sys.version_info < (3,):
|
||||
data = cPickle.load(f)
|
||||
else:
|
||||
data = cPickle.load(f, encoding='latin1')
|
||||
|
||||
f.close()
|
||||
return data
|
||||
|
||||
@@ -7,11 +7,11 @@ import numpy as np
|
||||
import sys
|
||||
|
||||
|
||||
def load_data(path="reuters.pkl", nb_words=None, skip_top=0,
|
||||
def load_data(path='reuters.pkl', nb_words=None, skip_top=0,
|
||||
maxlen=None, test_split=0.2, seed=113,
|
||||
start_char=1, oov_char=2, index_from=3):
|
||||
|
||||
path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/reuters.pkl")
|
||||
path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters.pkl')
|
||||
f = open(path, 'rb')
|
||||
X, labels = cPickle.load(f)
|
||||
f.close()
|
||||
@@ -62,14 +62,14 @@ def load_data(path="reuters.pkl", nb_words=None, skip_top=0,
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
|
||||
|
||||
def get_word_index(path="reuters_word_index.pkl"):
|
||||
path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl")
|
||||
def get_word_index(path='reuters_word_index.pkl'):
|
||||
path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl')
|
||||
f = open(path, 'rb')
|
||||
|
||||
if sys.version_info < (3,):
|
||||
data = cPickle.load(f)
|
||||
else:
|
||||
data = cPickle.load(f, encoding="latin1")
|
||||
data = cPickle.load(f, encoding='latin1')
|
||||
|
||||
f.close()
|
||||
return data
|
||||
|
||||
+232
-114
@@ -10,9 +10,11 @@ import marshal
|
||||
import types as python_types
|
||||
import warnings
|
||||
import copy
|
||||
import os
|
||||
from six.moves import zip
|
||||
|
||||
from keras import backend as K
|
||||
from .. import backend as K
|
||||
from ..utils.io_utils import ask_to_proceed_with_overwrite
|
||||
|
||||
|
||||
def to_list(x):
|
||||
@@ -321,6 +323,30 @@ class Layer(object):
|
||||
if 'create_input_layer' in kwargs:
|
||||
self.create_input_layer(batch_input_shape, input_dtype)
|
||||
|
||||
@property
|
||||
def trainable_weights(self):
|
||||
trainable = getattr(self, 'trainable', True)
|
||||
if trainable:
|
||||
return self._trainable_weights
|
||||
else:
|
||||
return []
|
||||
|
||||
@trainable_weights.setter
|
||||
def trainable_weights(self, weights):
|
||||
self._trainable_weights = weights
|
||||
|
||||
@property
|
||||
def non_trainable_weights(self):
|
||||
trainable = getattr(self, 'trainable', True)
|
||||
if not trainable:
|
||||
return self._trainable_weights + self._non_trainable_weights
|
||||
else:
|
||||
return self._non_trainable_weights
|
||||
|
||||
@non_trainable_weights.setter
|
||||
def non_trainable_weights(self, weights):
|
||||
self._non_trainable_weights = weights
|
||||
|
||||
def create_input_layer(self, batch_input_shape,
|
||||
input_dtype=None, name=None):
|
||||
if not name:
|
||||
@@ -694,15 +720,15 @@ class Layer(object):
|
||||
' outbound layers. '
|
||||
'This will cause part of your model '
|
||||
'to be disconnected.')
|
||||
if not shape:
|
||||
if hasattr(K, 'int_shape'):
|
||||
shape = K.int_shape(input_tensor)
|
||||
else:
|
||||
raise Exception('`set_input` needs to know the shape '
|
||||
'of the `input_tensor` it receives, but '
|
||||
'Keras was not able to infer it automatically.'
|
||||
' Specify it via: '
|
||||
'`model.set_input(input_tensor, shape)`')
|
||||
if hasattr(K, 'int_shape'):
|
||||
# auto-infered shape takes priority
|
||||
shape = K.int_shape(input_tensor)
|
||||
elif not shape:
|
||||
raise Exception('`set_input` needs to know the shape '
|
||||
'of the `input_tensor` it receives, but '
|
||||
'Keras was not able to infer it automatically.'
|
||||
' Specify it via: '
|
||||
'`model.set_input(input_tensor, shape)`')
|
||||
# reset layer connections
|
||||
self.inbound_nodes = []
|
||||
self.outbound_nodes = []
|
||||
@@ -828,6 +854,10 @@ class Layer(object):
|
||||
'ill-defined for the layer. ' +
|
||||
'Use `get_output_shape_at(node_index)` instead.')
|
||||
|
||||
@property
|
||||
def weights(self):
|
||||
return self.trainable_weights + self.non_trainable_weights
|
||||
|
||||
def set_weights(self, weights):
|
||||
'''Sets the weights of the layer, from Numpy arrays.
|
||||
|
||||
@@ -838,12 +868,12 @@ class Layer(object):
|
||||
of the layer (i.e. it should match the
|
||||
output of `get_weights`).
|
||||
'''
|
||||
params = self.trainable_weights + self.non_trainable_weights
|
||||
params = self.weights
|
||||
if len(params) != len(weights):
|
||||
raise Exception('You called `set_weights(weights)` on layer "' + self.name +
|
||||
'" with a weight list of length ' + str(len(weights)) +
|
||||
', but the layer was expecting ' + str(len(params)) +
|
||||
' weights. Provided weights: ' + str(weights))
|
||||
' weights. Provided weights: ' + str(weights)[:50] + '...')
|
||||
if not params:
|
||||
return
|
||||
weight_value_tuples = []
|
||||
@@ -861,7 +891,7 @@ class Layer(object):
|
||||
'''Returns the current weights of the layer,
|
||||
as a list of numpy arrays.
|
||||
'''
|
||||
params = self.trainable_weights + self.non_trainable_weights
|
||||
params = self.weights
|
||||
return K.batch_get_value(params)
|
||||
|
||||
def get_config(self):
|
||||
@@ -914,12 +944,14 @@ class InputLayer(Layer):
|
||||
'''TODO: dosctring
|
||||
'''
|
||||
def __init__(self, input_shape=None, batch_input_shape=None,
|
||||
input_dtype=None, name=None):
|
||||
input_dtype=None, input_tensor=None, name=None):
|
||||
self.input_spec = None
|
||||
self.supports_masking = False
|
||||
self.uses_learning_phase = False
|
||||
self.trainable = False
|
||||
self.built = True
|
||||
self.trainable_weights = []
|
||||
self.non_trainable_weights = []
|
||||
|
||||
self.inbound_nodes = []
|
||||
self.outbound_nodes = []
|
||||
@@ -934,25 +966,48 @@ class InputLayer(Layer):
|
||||
name = prefix + '_' + str(K.get_uid(prefix))
|
||||
self.name = name
|
||||
|
||||
if input_shape and batch_input_shape:
|
||||
raise ValueError('Only provide the input_shape OR '
|
||||
'batch_input_shape argument to '
|
||||
'InputLayer, not both at the same time.')
|
||||
if input_tensor is not None:
|
||||
if not input_shape and not batch_input_shape:
|
||||
# attempt automatic input shape inference
|
||||
try:
|
||||
batch_input_shape = K.int_shape(input_tensor)
|
||||
except:
|
||||
raise ValueError('InputLayer was provided an input_tensor argument, '
|
||||
'but its input shape cannot be automatically inferred. '
|
||||
'You should pass an input_shape or batch_input_shape '
|
||||
'argument.')
|
||||
if not batch_input_shape:
|
||||
assert input_shape, 'An Input layer should be passed either a `batch_input_shape` or an `input_shape`.'
|
||||
batch_input_shape = (None,) + tuple(input_shape)
|
||||
if not input_shape:
|
||||
raise ValueError('An Input layer should be passed either '
|
||||
'a `batch_input_shape` or an `input_shape`.')
|
||||
else:
|
||||
batch_input_shape = (None,) + tuple(input_shape)
|
||||
else:
|
||||
batch_input_shape = tuple(batch_input_shape)
|
||||
|
||||
if not input_dtype:
|
||||
input_dtype = K.floatx()
|
||||
if input_tensor is None:
|
||||
input_dtype = K.floatx()
|
||||
else:
|
||||
input_dtype = K.dtype(input_tensor)
|
||||
|
||||
self.batch_input_shape = batch_input_shape
|
||||
self.input_dtype = input_dtype
|
||||
|
||||
input_tensor = K.placeholder(shape=batch_input_shape,
|
||||
dtype=input_dtype,
|
||||
name=self.name)
|
||||
if input_tensor is None:
|
||||
input_tensor = K.placeholder(shape=batch_input_shape,
|
||||
dtype=input_dtype,
|
||||
name=self.name)
|
||||
else:
|
||||
input_tensor._keras_shape = batch_input_shape
|
||||
# create an input node to add to self.outbound_node
|
||||
# and set output_tensors' _keras_history
|
||||
input_tensor._uses_learning_phase = False
|
||||
input_tensor._keras_history = (self, 0, 0)
|
||||
shape = input_tensor._keras_shape
|
||||
Node(self,
|
||||
inbound_layers=[],
|
||||
node_indices=[],
|
||||
@@ -961,8 +1016,8 @@ class InputLayer(Layer):
|
||||
output_tensors=[input_tensor],
|
||||
input_masks=[None],
|
||||
output_masks=[None],
|
||||
input_shapes=[shape],
|
||||
output_shapes=[shape])
|
||||
input_shapes=[batch_input_shape],
|
||||
output_shapes=[batch_input_shape])
|
||||
|
||||
def get_config(self):
|
||||
config = {'batch_input_shape': self.batch_input_shape,
|
||||
@@ -972,7 +1027,8 @@ class InputLayer(Layer):
|
||||
|
||||
|
||||
def Input(shape=None, batch_shape=None,
|
||||
name=None, dtype=K.floatx()):
|
||||
name=None, dtype=K.floatx(),
|
||||
tensor=None):
|
||||
'''`Input()` is used to instantiate a Keras tensor.
|
||||
A Keras tensor is a tensor object from the underlying backend
|
||||
(Theano or TensorFlow), which we augment with certain
|
||||
@@ -1014,14 +1070,15 @@ def Input(shape=None, batch_shape=None,
|
||||
model = Model(input=a, output=b)
|
||||
```
|
||||
'''
|
||||
if not batch_shape:
|
||||
if not batch_shape and tensor is None:
|
||||
assert shape, ('Please provide to Input either a `shape`' +
|
||||
' or a `batch_shape` argument. Note that ' +
|
||||
'`shape` does not include the batch '
|
||||
'dimension.')
|
||||
batch_shape = (None,) + tuple(shape)
|
||||
input_layer = InputLayer(batch_input_shape=batch_shape,
|
||||
name=name, input_dtype=dtype)
|
||||
name=name, input_dtype=dtype,
|
||||
input_tensor=tensor)
|
||||
# return tensor including _keras_shape and _keras_history
|
||||
# note that in this case train_output and test_output are the same pointer.
|
||||
outputs = input_layer.inbound_nodes[0].output_tensors
|
||||
@@ -1055,11 +1112,11 @@ class Merge(Layer):
|
||||
a list of layer instances. Must be more
|
||||
than one layer/tensor.
|
||||
mode: string or lambda/function. If string, must be one
|
||||
of: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot'.
|
||||
of: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot', 'max'.
|
||||
If lambda/function, it should take as input a list of tensors
|
||||
and return a single tensor.
|
||||
concat_axis: integer, axis to use in mode `concat`.
|
||||
dot_axes: integer or tuple of integers, axes to use in mode `dot`.
|
||||
dot_axes: integer or tuple of integers, axes to use in mode `dot` or `cos`.
|
||||
output_shape: either a shape tuple (tuple of integers), or a lambda/function
|
||||
to compute `output_shape` (only if merge mode is a lambda/function).
|
||||
If the argument is a tuple,
|
||||
@@ -1130,7 +1187,7 @@ class Merge(Layer):
|
||||
as appropriate.
|
||||
'''
|
||||
if not hasattr(mode, '__call__'):
|
||||
if mode not in {'sum', 'mul', 'concat', 'ave', 'cos', 'dot'}:
|
||||
if mode not in {'sum', 'mul', 'concat', 'ave', 'cos', 'dot', 'max'}:
|
||||
raise Exception('Invalid merge mode: ' + str(mode))
|
||||
if type(layers) not in {list, tuple} or len(layers) < 2:
|
||||
raise Exception('A Merge should only be applied to a list of '
|
||||
@@ -1148,7 +1205,7 @@ class Merge(Layer):
|
||||
layer_output_shape = layer_output_shape[tensor_indices[i]]
|
||||
input_shapes.append(layer_output_shape)
|
||||
|
||||
if mode in {'sum', 'mul', 'ave', 'cos'}:
|
||||
if mode in {'sum', 'mul', 'ave', 'cos', 'max'}:
|
||||
input_shapes_set = set(input_shapes)
|
||||
if len(input_shapes_set) > 1:
|
||||
raise Exception('Only layers of same output shape can '
|
||||
@@ -1161,22 +1218,21 @@ class Merge(Layer):
|
||||
shape2 = input_shapes[1]
|
||||
n1 = len(shape1)
|
||||
n2 = len(shape2)
|
||||
if mode == 'dot':
|
||||
if type(dot_axes) == int:
|
||||
if dot_axes < 0:
|
||||
dot_axes = [dot_axes % n1, dot_axes % n2]
|
||||
else:
|
||||
dot_axes = [n1 - dot_axes, n2-dot_axes]
|
||||
if type(dot_axes) not in [list, tuple]:
|
||||
raise Exception('Invalid type for dot_axes - should be a list.')
|
||||
if len(dot_axes) != 2:
|
||||
raise Exception('Invalid format for dot_axes - should contain two elements.')
|
||||
if type(dot_axes[0]) is not int or type(dot_axes[1]) is not int:
|
||||
raise Exception('Invalid format for dot_axes - list elements should be "int".')
|
||||
if shape1[dot_axes[0]] != shape2[dot_axes[1]]:
|
||||
raise Exception('Dimension incompatibility using dot mode: ' +
|
||||
'%s != %s. ' % (shape1[dot_axes[0]], shape2[dot_axes[1]]) +
|
||||
'Layer shapes: %s, %s' % (shape1, shape2))
|
||||
if type(dot_axes) == int:
|
||||
if dot_axes < 0:
|
||||
dot_axes = [dot_axes % n1, dot_axes % n2]
|
||||
else:
|
||||
dot_axes = [n1 - dot_axes, n2 - dot_axes]
|
||||
if type(dot_axes) not in [list, tuple]:
|
||||
raise Exception('Invalid type for dot_axes - should be a list.')
|
||||
if len(dot_axes) != 2:
|
||||
raise Exception('Invalid format for dot_axes - should contain two elements.')
|
||||
if type(dot_axes[0]) is not int or type(dot_axes[1]) is not int:
|
||||
raise Exception('Invalid format for dot_axes - list elements should be "int".')
|
||||
if shape1[dot_axes[0]] != shape2[dot_axes[1]]:
|
||||
raise Exception('Dimension incompatibility using dot mode: ' +
|
||||
'%s != %s. ' % (shape1[dot_axes[0]], shape2[dot_axes[1]]) +
|
||||
'Layer shapes: %s, %s' % (shape1, shape2))
|
||||
elif mode == 'concat':
|
||||
reduced_inputs_shapes = [list(shape) for shape in input_shapes]
|
||||
shape_set = set()
|
||||
@@ -1215,7 +1271,11 @@ class Merge(Layer):
|
||||
for i in range(1, len(inputs)):
|
||||
s *= inputs[i]
|
||||
return s
|
||||
|
||||
elif self.mode == 'max':
|
||||
s = inputs[0]
|
||||
for i in range(1, len(inputs)):
|
||||
s = K.maximum(s, inputs[i])
|
||||
return s
|
||||
elif self.mode == 'dot':
|
||||
l1 = inputs[0]
|
||||
l2 = inputs[1]
|
||||
@@ -1283,7 +1343,7 @@ class Merge(Layer):
|
||||
output_shape = self._output_shape(input_shape)
|
||||
return output_shape
|
||||
elif self._output_shape is not None:
|
||||
return (input_shape[0],) + tuple(self._output_shape)
|
||||
return (input_shape[0][0],) + tuple(self._output_shape)
|
||||
else:
|
||||
# TODO: consider shape auto-inference with TF
|
||||
raise Exception('The Merge layer ' + self.name +
|
||||
@@ -1294,7 +1354,7 @@ class Merge(Layer):
|
||||
'`output_shape` to Merge.')
|
||||
# pre-defined merge modes
|
||||
input_shapes = input_shape
|
||||
if self.mode in ['sum', 'mul', 'ave']:
|
||||
if self.mode in ['sum', 'mul', 'ave', 'max']:
|
||||
# all tuples in input_shapes should be the same
|
||||
return input_shapes[0]
|
||||
elif self.mode == 'concat':
|
||||
@@ -1305,23 +1365,19 @@ class Merge(Layer):
|
||||
break
|
||||
output_shape[self.concat_axis] += shape[self.concat_axis]
|
||||
return tuple(output_shape)
|
||||
elif self.mode == 'dot':
|
||||
elif self.mode in ['dot', 'cos']:
|
||||
shape1 = list(input_shapes[0])
|
||||
shape2 = list(input_shapes[1])
|
||||
dot_axes = [a - 1 for a in self.dot_axes]
|
||||
tensordot_output = np.tensordot(np.zeros(tuple(shape1[1:])),
|
||||
np.zeros(tuple(shape2[1:])),
|
||||
axes=dot_axes)
|
||||
if len(tensordot_output.shape) == 0:
|
||||
shape = (1,)
|
||||
else:
|
||||
shape = tensordot_output.shape
|
||||
return (shape1[0],) + shape
|
||||
elif self.mode == 'cos':
|
||||
return (input_shapes[0][0], 1)
|
||||
shape1.pop(self.dot_axes[0])
|
||||
shape2.pop(self.dot_axes[1])
|
||||
shape2.pop(0)
|
||||
output_shape = shape1 + shape2
|
||||
if len(output_shape) == 1:
|
||||
output_shape += [1]
|
||||
return tuple(output_shape)
|
||||
|
||||
def compute_mask(self, inputs, mask=None):
|
||||
if mask is None or not any([m is not None for m in mask]):
|
||||
if mask is None or all([m is None for m in mask]):
|
||||
return None
|
||||
|
||||
assert hasattr(mask, '__len__') and len(mask) == len(inputs)
|
||||
@@ -1330,9 +1386,19 @@ class Merge(Layer):
|
||||
masks = [K.expand_dims(m, 0) for m in mask if m is not None]
|
||||
return K.all(K.concatenate(masks, axis=0), axis=0, keepdims=False)
|
||||
elif self.mode == 'concat':
|
||||
masks = [K.ones_like(inputs[i][:-1]) if m is None else m for i, m in zip(inputs, mask)]
|
||||
expanded_dims = [K.expand_dims(m) for m in masks]
|
||||
concatenated = K.concatenate(expanded_dims, axis=self.concat_axis)
|
||||
# Make a list of masks while making sure the dimensionality of each mask
|
||||
# is the same as the corresponding input.
|
||||
masks = []
|
||||
for input_i, mask_i in zip(inputs, mask):
|
||||
if mask_i is None:
|
||||
# Input is unmasked. Append all 1s to masks, but cast it to uint8 first
|
||||
masks.append(K.cast(K.ones_like(input_i), 'uint8'))
|
||||
elif K.ndim(mask_i) < K.ndim(input_i):
|
||||
# Mask is smaller than the input, expand it
|
||||
masks.append(K.expand_dims(mask_i))
|
||||
else:
|
||||
masks.append(mask_i)
|
||||
concatenated = K.concatenate(masks, axis=self.concat_axis)
|
||||
return K.all(concatenated, axis=-1, keepdims=False)
|
||||
elif self.mode in ['cos', 'dot']:
|
||||
return None
|
||||
@@ -1426,7 +1492,7 @@ def merge(inputs, mode='sum', concat_axis=-1,
|
||||
If lambda/function, it should take as input a list of tensors
|
||||
and return a single tensor.
|
||||
concat_axis: integer, axis to use in mode `concat`.
|
||||
dot_axes: integer or tuple of integers, axes to use in mode `dot`.
|
||||
dot_axes: integer or tuple of integers, axes to use in mode `dot` or `cos`.
|
||||
output_shape: shape tuple (tuple of integers), or lambda/function
|
||||
to compute output_shape (only if merge mode is a lambda/function).
|
||||
If the latter case, it should take as input a list of shape tuples
|
||||
@@ -1593,21 +1659,27 @@ class Container(Layer):
|
||||
raise Exception('Output tensors to a ' + cls_name + ' must be '
|
||||
'Keras tensors. Found: ' + str(x))
|
||||
# build self.output_layers:
|
||||
masks = []
|
||||
for x in self.outputs:
|
||||
layer, node_index, tensor_index = x._keras_history
|
||||
self.output_layers.append(layer)
|
||||
self.output_layers_node_indices.append(node_index)
|
||||
self.output_layers_tensor_indices.append(tensor_index)
|
||||
|
||||
# also fill in the output mask cache
|
||||
# fill in the output mask cache
|
||||
masks = []
|
||||
for x in self.inputs:
|
||||
layer, node_index, tensor_index = x._keras_history
|
||||
node = layer.inbound_nodes[node_index]
|
||||
mask = node.output_masks[tensor_index]
|
||||
masks.append(mask)
|
||||
|
||||
# output mask cache
|
||||
mask_cache_key = ','.join([str(id(x)) for x in self.inputs])
|
||||
mask_cache_key += '_' + ','.join([str(id(x)) for x in masks])
|
||||
masks = []
|
||||
for x in self.outputs:
|
||||
layer, node_index, tensor_index = x._keras_history
|
||||
node = layer.inbound_nodes[node_index]
|
||||
mask = node.output_masks[tensor_index]
|
||||
masks.append(mask)
|
||||
if len(masks) == 1:
|
||||
mask = masks[0]
|
||||
else:
|
||||
@@ -2105,6 +2177,8 @@ class Container(Layer):
|
||||
for x, s in zip(output_tensors, shapes):
|
||||
x._keras_shape = s
|
||||
x._uses_learning_phase = uses_learning_phase
|
||||
|
||||
# update tensor_map
|
||||
for x, y, mask in zip(reference_output_tensors, output_tensors, output_masks):
|
||||
tensor_map[str(id(x))] = (y, mask)
|
||||
|
||||
@@ -2278,7 +2352,38 @@ class Container(Layer):
|
||||
output_tensors.append(layer_output_tensors[tensor_index])
|
||||
return cls(input=input_tensors, output=output_tensors, name=name)
|
||||
|
||||
def save_weights(self, filepath, overwrite=False):
|
||||
def save(self, filepath, overwrite=True):
|
||||
'''Save into a single HDF5 file:
|
||||
- the model architecture, allowing to re-instantiate the model
|
||||
- the model weights
|
||||
- the state of the optimizer, allowing to resume training
|
||||
exactly where you left off.
|
||||
|
||||
This allows you to save the entirety of the state of a model
|
||||
in a single file.
|
||||
|
||||
Saved models can be reinstantiated via `keras.models.load_model`.
|
||||
The model returned by `load_model`
|
||||
is a compiled model ready to be used (unless the saved model
|
||||
was never compiled in the first place).
|
||||
|
||||
# Example usage
|
||||
|
||||
```python
|
||||
from keras.models import load_model
|
||||
|
||||
model.save('my_model.h5') # creates a HDF5 file 'my_model.h5'
|
||||
del model # deletes the existing model
|
||||
|
||||
# returns a compiled model
|
||||
# identical to the previous one
|
||||
model = load_model('my_model.h5')
|
||||
```
|
||||
'''
|
||||
from ..models import save_model
|
||||
save_model(self, filepath, overwrite)
|
||||
|
||||
def save_weights(self, filepath, overwrite=True):
|
||||
'''Dumps all layer weights to a HDF5 file.
|
||||
|
||||
The weight file has:
|
||||
@@ -2291,33 +2396,28 @@ class Container(Layer):
|
||||
storing the weight value, named after the weight tensor
|
||||
'''
|
||||
import h5py
|
||||
import os.path
|
||||
# if file exists and should not be overwritten
|
||||
if not overwrite and os.path.isfile(filepath):
|
||||
import sys
|
||||
get_input = input
|
||||
if sys.version_info[:2] <= (2, 7):
|
||||
get_input = raw_input
|
||||
overwrite = get_input('[WARNING] %s already exists - overwrite? '
|
||||
'[y/n]' % (filepath))
|
||||
while overwrite not in ['y', 'n']:
|
||||
overwrite = get_input('Enter "y" (overwrite) or "n" (cancel).')
|
||||
if overwrite == 'n':
|
||||
proceed = ask_to_proceed_with_overwrite(filepath)
|
||||
if not proceed:
|
||||
return
|
||||
print('[TIP] Next time specify overwrite=True in save_weights!')
|
||||
f = h5py.File(filepath, 'w')
|
||||
self.save_weights_to_hdf5_group(f)
|
||||
f.flush()
|
||||
f.close()
|
||||
|
||||
def save_weights_to_hdf5_group(self, f):
|
||||
if hasattr(self, 'flattened_layers'):
|
||||
# support for legacy Sequential/Merge behavior
|
||||
flattened_layers = self.flattened_layers
|
||||
else:
|
||||
flattened_layers = self.layers
|
||||
|
||||
f = h5py.File(filepath, 'w')
|
||||
f.attrs['layer_names'] = [layer.name.encode('utf8') for layer in flattened_layers]
|
||||
|
||||
for layer in flattened_layers:
|
||||
g = f.create_group(layer.name)
|
||||
symbolic_weights = layer.trainable_weights + layer.non_trainable_weights
|
||||
symbolic_weights = layer.weights
|
||||
weight_values = K.batch_get_value(symbolic_weights)
|
||||
weight_names = []
|
||||
for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
|
||||
@@ -2330,16 +2430,30 @@ class Container(Layer):
|
||||
for name, val in zip(weight_names, weight_values):
|
||||
param_dset = g.create_dataset(name, val.shape,
|
||||
dtype=val.dtype)
|
||||
param_dset[:] = val
|
||||
f.flush()
|
||||
f.close()
|
||||
if not val.shape:
|
||||
# scalar
|
||||
param_dset[()] = val
|
||||
else:
|
||||
param_dset[:] = val
|
||||
|
||||
def load_weights(self, filepath):
|
||||
'''Load all layer weights from a HDF5 save file.
|
||||
'''
|
||||
import h5py
|
||||
f = h5py.File(filepath, mode='r')
|
||||
if 'layer_names' not in f.attrs and 'model_weights' in f:
|
||||
f = f['model_weights']
|
||||
self.load_weights_from_hdf5_group(f)
|
||||
if hasattr(f, 'close'):
|
||||
f.close()
|
||||
|
||||
def load_weights_from_hdf5_group(self, f):
|
||||
'''Weight loading is based on layer order in a list
|
||||
(matching model.flattened_layers for Sequential models,
|
||||
and model.layers for Model class instances), not
|
||||
on layer names.
|
||||
Layers that have no weights are skipped.
|
||||
'''
|
||||
if hasattr(self, 'flattened_layers'):
|
||||
# support for legacy Sequential/Merge behavior
|
||||
flattened_layers = self.flattened_layers
|
||||
@@ -2353,7 +2467,7 @@ class Container(Layer):
|
||||
raise Exception('You are trying to load a weight file '
|
||||
'containing ' + str(nb_layers) +
|
||||
' layers into a model with ' +
|
||||
str(len(flattened_layers)) + '.')
|
||||
str(len(flattened_layers)) + ' layers.')
|
||||
|
||||
for k in range(nb_layers):
|
||||
g = f['layer_{}'.format(k)]
|
||||
@@ -2361,7 +2475,21 @@ class Container(Layer):
|
||||
flattened_layers[k].set_weights(weights)
|
||||
else:
|
||||
# new file format
|
||||
filtered_layers = []
|
||||
for layer in flattened_layers:
|
||||
weights = layer.weights
|
||||
if weights:
|
||||
filtered_layers.append(layer)
|
||||
flattened_layers = filtered_layers
|
||||
|
||||
layer_names = [n.decode('utf8') for n in f.attrs['layer_names']]
|
||||
filtered_layer_names = []
|
||||
for name in layer_names:
|
||||
g = f[name]
|
||||
weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]
|
||||
if len(weight_names):
|
||||
filtered_layer_names.append(name)
|
||||
layer_names = filtered_layer_names
|
||||
if len(layer_names) != len(flattened_layers):
|
||||
raise Exception('You are trying to load a weight file '
|
||||
'containing ' + str(len(layer_names)) +
|
||||
@@ -2374,24 +2502,22 @@ class Container(Layer):
|
||||
for k, name in enumerate(layer_names):
|
||||
g = f[name]
|
||||
weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]
|
||||
if len(weight_names):
|
||||
weight_values = [g[weight_name] for weight_name in weight_names]
|
||||
layer = flattened_layers[k]
|
||||
symbolic_weights = layer.trainable_weights + layer.non_trainable_weights
|
||||
if len(weight_values) != len(symbolic_weights):
|
||||
raise Exception('Layer #' + str(k) +
|
||||
' (named "' + layer.name +
|
||||
'" in the current model) was found to '
|
||||
'correspond to layer ' + name +
|
||||
' in the save file. '
|
||||
'However the new layer ' + layer.name +
|
||||
' expects ' + str(len(symbolic_weights)) +
|
||||
' weights, but the saved weights have ' +
|
||||
str(len(weight_values)) +
|
||||
' elements.')
|
||||
weight_value_tuples += zip(symbolic_weights, weight_values)
|
||||
weight_values = [g[weight_name] for weight_name in weight_names]
|
||||
layer = flattened_layers[k]
|
||||
symbolic_weights = layer.weights
|
||||
if len(weight_values) != len(symbolic_weights):
|
||||
raise Exception('Layer #' + str(k) +
|
||||
' (named "' + layer.name +
|
||||
'" in the current model) was found to '
|
||||
'correspond to layer ' + name +
|
||||
' in the save file. '
|
||||
'However the new layer ' + layer.name +
|
||||
' expects ' + str(len(symbolic_weights)) +
|
||||
' weights, but the saved weights have ' +
|
||||
str(len(weight_values)) +
|
||||
' elements.')
|
||||
weight_value_tuples += zip(symbolic_weights, weight_values)
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
f.close()
|
||||
|
||||
def _updated_config(self):
|
||||
'''shared between different serialization methods'''
|
||||
@@ -2403,14 +2529,6 @@ class Container(Layer):
|
||||
'config': config,
|
||||
'keras_version': keras_version
|
||||
}
|
||||
|
||||
if hasattr(self, 'optimizer'):
|
||||
model_config['optimizer'] = self.optimizer.get_config()
|
||||
model_config['loss'] = getattr(self.loss, '__name__', self.loss)
|
||||
model_config['sample_weight_mode'] = self.sample_weight_mode
|
||||
|
||||
if hasattr(self, 'loss_weights'):
|
||||
model_config['loss_weights'] = self.loss_weights
|
||||
return model_config
|
||||
|
||||
def to_json(self, **kwargs):
|
||||
@@ -2430,7 +2548,7 @@ class Container(Layer):
|
||||
if type(obj).__name__ == type.__name__:
|
||||
return obj.__name__
|
||||
|
||||
raise TypeError('Not JSON Serializable')
|
||||
raise TypeError('Not JSON Serializable:', obj)
|
||||
|
||||
model_config = self._updated_config()
|
||||
return json.dumps(model_config, default=get_json_type, **kwargs)
|
||||
|
||||
+111
-62
@@ -5,6 +5,7 @@ import warnings
|
||||
import copy
|
||||
import time
|
||||
import numpy as np
|
||||
import multiprocessing
|
||||
import threading
|
||||
try:
|
||||
import queue
|
||||
@@ -205,12 +206,12 @@ def check_loss_and_target_compatibility(targets, losses, output_shapes):
|
||||
'`sparse_categorical_crossentropy` instead, '
|
||||
'which does expect integer targets.')
|
||||
if loss.__name__ in key_losses and shape[1] is not None and y.shape[1] != shape[1]:
|
||||
raise Exception('A target array with shape ' + str(y.shape) +
|
||||
' was passed for an output of shape ' + str(shape) +
|
||||
' while using as loss `' + loss.__name__ + '`. '
|
||||
'This loss expects '
|
||||
'targets to have the same shape '
|
||||
'as the output.')
|
||||
raise Exception('A target array with shape ' + str(y.shape) +
|
||||
' was passed for an output of shape ' + str(shape) +
|
||||
' while using as loss `' + loss.__name__ + '`. '
|
||||
'This loss expects '
|
||||
'targets to have the same shape '
|
||||
'as the output.')
|
||||
|
||||
|
||||
def collect_metrics(metrics, output_names):
|
||||
@@ -395,40 +396,62 @@ def standardize_weights(y, sample_weight=None, class_weight=None,
|
||||
return weights
|
||||
else:
|
||||
if sample_weight_mode is None:
|
||||
return np.ones((y.shape[0],))
|
||||
return np.ones((y.shape[0],), dtype=K.floatx())
|
||||
else:
|
||||
return np.ones((y.shape[0], y.shape[1]))
|
||||
return np.ones((y.shape[0], y.shape[1]), dtype=K.floatx())
|
||||
|
||||
|
||||
def generator_queue(generator, max_q_size=10,
|
||||
wait_time=0.05, nb_worker=1):
|
||||
'''Builds a threading queue out of a data generator.
|
||||
wait_time=0.05, nb_worker=1, pickle_safe=False):
|
||||
'''Builds a queue out of a data generator.
|
||||
If pickle_safe, use a multiprocessing approach. Else, use threading.
|
||||
Used in `fit_generator`, `evaluate_generator`, `predict_generator`.
|
||||
|
||||
'''
|
||||
q = queue.Queue()
|
||||
_stop = threading.Event()
|
||||
|
||||
def data_generator_task():
|
||||
while not _stop.is_set():
|
||||
try:
|
||||
if q.qsize() < max_q_size:
|
||||
try:
|
||||
generator_output = next(generator)
|
||||
except ValueError:
|
||||
continue
|
||||
q.put(generator_output)
|
||||
else:
|
||||
time.sleep(wait_time)
|
||||
except Exception:
|
||||
_stop.set()
|
||||
raise
|
||||
generator_threads = []
|
||||
if pickle_safe:
|
||||
q = multiprocessing.Queue(maxsize=max_q_size)
|
||||
_stop = multiprocessing.Event()
|
||||
else:
|
||||
q = queue.Queue()
|
||||
_stop = threading.Event()
|
||||
|
||||
generator_threads = [threading.Thread(target=data_generator_task)
|
||||
for _ in range(nb_worker)]
|
||||
try:
|
||||
def data_generator_task():
|
||||
while not _stop.is_set():
|
||||
try:
|
||||
if q.qsize() < max_q_size:
|
||||
try:
|
||||
generator_output = next(generator)
|
||||
except ValueError:
|
||||
continue
|
||||
q.put(generator_output)
|
||||
else:
|
||||
time.sleep(wait_time)
|
||||
except Exception:
|
||||
_stop.set()
|
||||
raise
|
||||
|
||||
for thread in generator_threads:
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
for i in range(nb_worker):
|
||||
if pickle_safe:
|
||||
# Reset random seed else all children processes share the same seed
|
||||
np.random.seed()
|
||||
thread = multiprocessing.Process(target=data_generator_task)
|
||||
else:
|
||||
thread = threading.Thread(target=data_generator_task)
|
||||
generator_threads.append(thread)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
except:
|
||||
_stop.set()
|
||||
if pickle_safe:
|
||||
# Terminate all daemon processes
|
||||
for p in generator_threads:
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
q.close()
|
||||
raise
|
||||
|
||||
return q, _stop
|
||||
|
||||
@@ -485,7 +508,7 @@ class Model(Container):
|
||||
'it should have one entry per model outputs. '
|
||||
'The model has ' + str(len(self.outputs)) +
|
||||
' outputs, but you passed loss_weights=' +
|
||||
str(loss))
|
||||
str(loss_weights))
|
||||
loss_weights_list = loss_weights
|
||||
else:
|
||||
raise Exception('Could not interpret loss_weights argument: ' +
|
||||
@@ -585,8 +608,9 @@ class Model(Container):
|
||||
self.targets.append(K.placeholder(ndim=len(shape), name=name + '_target'))
|
||||
|
||||
# prepare metrics
|
||||
self.metrics = metrics
|
||||
self.metrics_names = ['loss']
|
||||
self.metrics = []
|
||||
self.metrics_tensors = []
|
||||
|
||||
# compute total loss
|
||||
total_loss = None
|
||||
@@ -600,7 +624,7 @@ class Model(Container):
|
||||
output_loss = weighted_loss(y_true, y_pred,
|
||||
sample_weight, mask)
|
||||
if len(self.outputs) > 1:
|
||||
self.metrics.append(output_loss)
|
||||
self.metrics_tensors.append(output_loss)
|
||||
self.metrics_names.append(self.output_names[i] + '_loss')
|
||||
if total_loss is None:
|
||||
total_loss = loss_weight * output_loss
|
||||
@@ -623,23 +647,23 @@ class Model(Container):
|
||||
if metric == 'accuracy' or metric == 'acc':
|
||||
# custom handling of accuracy (because of class mode duality)
|
||||
output_shape = self.internal_output_shapes[i]
|
||||
if output_shape[-1] == 1:
|
||||
if output_shape[-1] == 1 or self.loss_functions[i] == objectives.binary_crossentropy:
|
||||
# case: binary accuracy
|
||||
self.metrics.append(metrics_module.binary_accuracy(y_true, y_pred))
|
||||
self.metrics_tensors.append(metrics_module.binary_accuracy(y_true, y_pred))
|
||||
elif self.loss_functions[i] == objectives.sparse_categorical_crossentropy:
|
||||
# case: categorical accuracy with sparse targets
|
||||
self.metrics.append(
|
||||
self.metrics_tensors.append(
|
||||
metrics_module.sparse_categorical_accuracy(y_true, y_pred))
|
||||
else:
|
||||
# case: categorical accuracy with dense targets
|
||||
self.metrics.append(metrics_module.categorical_accuracy(y_true, y_pred))
|
||||
self.metrics_tensors.append(metrics_module.categorical_accuracy(y_true, y_pred))
|
||||
if len(self.output_names) == 1:
|
||||
self.metrics_names.append('acc')
|
||||
else:
|
||||
self.metrics_names.append(self.output_layers[i].name + '_acc')
|
||||
else:
|
||||
metric_fn = metrics_module.get(metric)
|
||||
self.metrics.append(metric_fn(y_true, y_pred))
|
||||
self.metrics_tensors.append(metric_fn(y_true, y_pred))
|
||||
if len(self.output_names) == 1:
|
||||
self.metrics_names.append(metric_fn.__name__)
|
||||
else:
|
||||
@@ -663,7 +687,7 @@ class Model(Container):
|
||||
if not hasattr(self, 'train_function'):
|
||||
raise Exception('You must compile your model before using it.')
|
||||
if self.train_function is None:
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
inputs = self.inputs + self.targets + self.sample_weights + [K.learning_phase()]
|
||||
else:
|
||||
inputs = self.inputs + self.targets + self.sample_weights
|
||||
@@ -675,7 +699,7 @@ class Model(Container):
|
||||
|
||||
# returns loss and metrics. Updates weights at each call.
|
||||
self.train_function = K.function(inputs,
|
||||
[self.total_loss] + self.metrics,
|
||||
[self.total_loss] + self.metrics_tensors,
|
||||
updates=updates,
|
||||
**self._function_kwargs)
|
||||
|
||||
@@ -683,14 +707,14 @@ class Model(Container):
|
||||
if not hasattr(self, 'test_function'):
|
||||
raise Exception('You must compile your model before using it.')
|
||||
if self.test_function is None:
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
inputs = self.inputs + self.targets + self.sample_weights + [K.learning_phase()]
|
||||
else:
|
||||
inputs = self.inputs + self.targets + self.sample_weights
|
||||
# return loss and metrics, no gradient updates.
|
||||
# Does update the network states.
|
||||
self.test_function = K.function(inputs,
|
||||
[self.total_loss] + self.metrics,
|
||||
[self.total_loss] + self.metrics_tensors,
|
||||
updates=self.state_updates,
|
||||
**self._function_kwargs)
|
||||
|
||||
@@ -698,7 +722,7 @@ class Model(Container):
|
||||
if not hasattr(self, 'predict_function'):
|
||||
self.predict_function = None
|
||||
if self.predict_function is None:
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
inputs = self.inputs + [K.learning_phase()]
|
||||
else:
|
||||
inputs = self.inputs
|
||||
@@ -858,7 +882,7 @@ class Model(Container):
|
||||
if batch_index == 0:
|
||||
for batch_out in batch_outs:
|
||||
shape = (nb_sample,) + batch_out.shape[1:]
|
||||
outs.append(np.zeros(shape))
|
||||
outs.append(np.zeros(shape, dtype=K.floatx()))
|
||||
|
||||
for i, batch_out in enumerate(batch_outs):
|
||||
outs[i][batch_start:batch_end] = batch_out
|
||||
@@ -1025,7 +1049,7 @@ class Model(Container):
|
||||
batch_size=batch_size)
|
||||
self._make_test_function()
|
||||
val_f = self.test_function
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
val_ins = val_x + val_y + val_sample_weights + [0.]
|
||||
else:
|
||||
val_ins = val_x + val_y + val_sample_weights
|
||||
@@ -1035,10 +1059,11 @@ class Model(Container):
|
||||
split_at = int(len(x[0]) * (1. - validation_split))
|
||||
x, val_x = (slice_X(x, 0, split_at), slice_X(x, split_at))
|
||||
y, val_y = (slice_X(y, 0, split_at), slice_X(y, split_at))
|
||||
sample_weights, val_sample_weights = (slice_X(sample_weights, 0, split_at), slice_X(sample_weights, split_at))
|
||||
sample_weights, val_sample_weights = (
|
||||
slice_X(sample_weights, 0, split_at), slice_X(sample_weights, split_at))
|
||||
self._make_test_function()
|
||||
val_f = self.test_function
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
val_ins = val_x + val_y + val_sample_weights + [0.]
|
||||
else:
|
||||
val_ins = val_x + val_y + val_sample_weights
|
||||
@@ -1048,7 +1073,7 @@ class Model(Container):
|
||||
val_ins = None
|
||||
|
||||
# prepare input arrays and training function
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
ins = x + y + sample_weights + [1.]
|
||||
else:
|
||||
ins = x + y + sample_weights
|
||||
@@ -1108,7 +1133,7 @@ class Model(Container):
|
||||
check_batch_dim=False,
|
||||
batch_size=batch_size)
|
||||
# prepare inputs, delegate logic to _test_loop
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
ins = x + y + sample_weights + [0.]
|
||||
else:
|
||||
ins = x + y + sample_weights
|
||||
@@ -1145,7 +1170,7 @@ class Model(Container):
|
||||
'Batch size: ' + str(batch_size) + '.')
|
||||
|
||||
# prepare inputs, delegate logic to _predict_loop
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
ins = x + [0.]
|
||||
else:
|
||||
ins = x
|
||||
@@ -1189,7 +1214,7 @@ class Model(Container):
|
||||
sample_weight=sample_weight,
|
||||
class_weight=class_weight,
|
||||
check_batch_dim=True)
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
ins = x + y + sample_weights + [1.]
|
||||
else:
|
||||
ins = x + y + sample_weights
|
||||
@@ -1227,7 +1252,7 @@ class Model(Container):
|
||||
x, y, sample_weights = self._standardize_user_data(x, y,
|
||||
sample_weight=sample_weight,
|
||||
check_batch_dim=True)
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
ins = x + y + sample_weights + [0.]
|
||||
else:
|
||||
ins = x + y + sample_weights
|
||||
@@ -1242,7 +1267,7 @@ class Model(Container):
|
||||
'''
|
||||
x = standardize_input_data(x, self.input_names,
|
||||
self.internal_input_shapes)
|
||||
if self.uses_learning_phase:
|
||||
if self.uses_learning_phase and type(K.learning_phase()) is not int:
|
||||
ins = x + [0.]
|
||||
else:
|
||||
ins = x
|
||||
@@ -1255,7 +1280,7 @@ class Model(Container):
|
||||
def fit_generator(self, generator, samples_per_epoch, nb_epoch,
|
||||
verbose=1, callbacks=[],
|
||||
validation_data=None, nb_val_samples=None,
|
||||
class_weight={}, max_q_size=10):
|
||||
class_weight={}, max_q_size=10, nb_worker=1, pickle_safe=False):
|
||||
'''Fits the model on data generated batch-by-batch by
|
||||
a Python generator.
|
||||
The generator is run in parallel to the model, for efficiency.
|
||||
@@ -1286,6 +1311,11 @@ class Model(Container):
|
||||
class_weight: dictionary mapping class indices to a weight
|
||||
for the class.
|
||||
max_q_size: maximum size for the generator queue
|
||||
nb_worker: maximum number of processes to spin up when using process based threading
|
||||
pickle_safe: if True, use process based threading. Note that because
|
||||
this implementation relies on multiprocessing, you should not pass non
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
|
||||
# Returns
|
||||
A `History` object.
|
||||
@@ -1364,7 +1394,8 @@ class Model(Container):
|
||||
self.validation_data = None
|
||||
|
||||
# start generator thread storing batches into a queue
|
||||
data_gen_queue, _stop = generator_queue(generator, max_q_size=max_q_size)
|
||||
data_gen_queue, _stop = generator_queue(generator, max_q_size=max_q_size, nb_worker=nb_worker,
|
||||
pickle_safe=pickle_safe)
|
||||
|
||||
callback_model.stop_training = False
|
||||
while epoch < nb_epoch:
|
||||
@@ -1457,10 +1488,12 @@ class Model(Container):
|
||||
break
|
||||
|
||||
_stop.set()
|
||||
if pickle_safe:
|
||||
data_gen_queue.close()
|
||||
callbacks.on_train_end()
|
||||
return self.history
|
||||
|
||||
def evaluate_generator(self, generator, val_samples, max_q_size=10):
|
||||
def evaluate_generator(self, generator, val_samples, max_q_size=10, nb_worker=1, pickle_safe=False):
|
||||
'''Evaluates the model on a data generator. The generator should
|
||||
return the same kind of data as accepted by `test_on_batch`.
|
||||
|
||||
@@ -1472,6 +1505,11 @@ class Model(Container):
|
||||
total number of samples to generate from `generator`
|
||||
before returning.
|
||||
max_q_size: maximum size for the generator queue
|
||||
nb_worker: maximum number of processes to spin up when using process based threading
|
||||
pickle_safe: if True, use process based threading. Note that because
|
||||
this implementation relies on multiprocessing, you should not pass non
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
|
||||
# Returns
|
||||
Scalar test loss (if the model has a single output and no metrics)
|
||||
@@ -1485,7 +1523,8 @@ class Model(Container):
|
||||
wait_time = 0.01
|
||||
all_outs = []
|
||||
weights = []
|
||||
data_gen_queue, _stop = generator_queue(generator, max_q_size=max_q_size)
|
||||
data_gen_queue, _stop = generator_queue(generator, max_q_size=max_q_size, nb_worker=nb_worker,
|
||||
pickle_safe=pickle_safe)
|
||||
|
||||
while processed_samples < val_samples:
|
||||
generator_output = None
|
||||
@@ -1529,6 +1568,8 @@ class Model(Container):
|
||||
weights.append(nb_samples)
|
||||
|
||||
_stop.set()
|
||||
if pickle_safe:
|
||||
data_gen_queue.close()
|
||||
if type(outs) is not list:
|
||||
return np.average(np.asarray(all_outs),
|
||||
weights=weights)
|
||||
@@ -1536,10 +1577,10 @@ class Model(Container):
|
||||
averages = []
|
||||
for i in range(len(outs)):
|
||||
averages.append(np.average([out[i] for out in all_outs],
|
||||
weights=weights))
|
||||
weights=weights))
|
||||
return averages
|
||||
|
||||
def predict_generator(self, generator, val_samples, max_q_size=10):
|
||||
def predict_generator(self, generator, val_samples, max_q_size=10, nb_worker=1, pickle_safe=False):
|
||||
'''Generates predictions for the input samples from a data generator.
|
||||
The generator should return the same kind of data as accepted by
|
||||
`predict_on_batch`.
|
||||
@@ -1549,6 +1590,11 @@ class Model(Container):
|
||||
val_samples: total number of samples to generate from `generator`
|
||||
before returning.
|
||||
max_q_size: maximum size for the generator queue
|
||||
nb_worker: maximum number of processes to spin up when using process based threading
|
||||
pickle_safe: if True, use process based threading. Note that because
|
||||
this implementation relies on multiprocessing, you should not pass non
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
|
||||
# Returns
|
||||
Numpy array(s) of predictions.
|
||||
@@ -1558,7 +1604,8 @@ class Model(Container):
|
||||
processed_samples = 0
|
||||
wait_time = 0.01
|
||||
all_outs = []
|
||||
data_gen_queue, _stop = generator_queue(generator, max_q_size=max_q_size)
|
||||
data_gen_queue, _stop = generator_queue(generator, max_q_size=max_q_size, nb_worker=nb_worker,
|
||||
pickle_safe=pickle_safe)
|
||||
|
||||
while processed_samples < val_samples:
|
||||
generator_output = None
|
||||
@@ -1602,7 +1649,7 @@ class Model(Container):
|
||||
if len(all_outs) == 0:
|
||||
for out in outs:
|
||||
shape = (val_samples,) + out.shape[1:]
|
||||
all_outs.append(np.zeros(shape))
|
||||
all_outs.append(np.zeros(shape, dtype=K.floatx()))
|
||||
|
||||
for i, out in enumerate(outs):
|
||||
all_outs[i][processed_samples:(processed_samples + nb_samples)] = out
|
||||
@@ -1610,6 +1657,8 @@ class Model(Container):
|
||||
processed_samples += nb_samples
|
||||
|
||||
_stop.set()
|
||||
if pickle_safe:
|
||||
data_gen_queue.close()
|
||||
if len(all_outs) == 1:
|
||||
return all_outs[0]
|
||||
return all_outs
|
||||
|
||||
@@ -29,13 +29,11 @@ def get_fans(shape, dim_ordering='th'):
|
||||
|
||||
|
||||
def uniform(shape, scale=0.05, name=None):
|
||||
return K.variable(np.random.uniform(low=-scale, high=scale, size=shape),
|
||||
name=name)
|
||||
return K.random_uniform_variable(shape, -scale, scale, name=name)
|
||||
|
||||
|
||||
def normal(shape, scale=0.05, name=None):
|
||||
return K.variable(np.random.normal(loc=0.0, scale=scale, size=shape),
|
||||
name=name)
|
||||
return K.random_normal_variable(shape, 0.0, scale, name=name)
|
||||
|
||||
|
||||
def lecun_uniform(shape, name=None, dim_ordering='th'):
|
||||
|
||||
@@ -2,6 +2,8 @@ from __future__ import absolute_import
|
||||
from ..engine import Layer, Input, InputLayer, Merge, merge, InputSpec
|
||||
from .core import *
|
||||
from .convolutional import *
|
||||
from .pooling import *
|
||||
from .local import *
|
||||
from .recurrent import *
|
||||
from .normalization import *
|
||||
from .embeddings import *
|
||||
|
||||
@@ -112,7 +112,7 @@ class ELU(Layer):
|
||||
return pos + self.alpha * (K.exp(neg) - 1.)
|
||||
|
||||
def get_config(self):
|
||||
config = {'alpha': self.alpha}
|
||||
config = {'alpha': float(self.alpha)}
|
||||
base_config = super(ELU, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -161,8 +161,8 @@ class ParametricSoftplus(Layer):
|
||||
return K.softplus(self.betas * x) * self.alphas
|
||||
|
||||
def get_config(self):
|
||||
config = {'alpha_init': self.alpha_init,
|
||||
'beta_init': self.beta_init}
|
||||
config = {'alpha_init': float(self.alpha_init),
|
||||
'beta_init': float(self.beta_init)}
|
||||
base_config = super(ParametricSoftplus, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -195,7 +195,7 @@ class ThresholdedReLU(Layer):
|
||||
return x * K.cast(x > self.theta, K.floatx())
|
||||
|
||||
def get_config(self):
|
||||
config = {'theta': self.theta}
|
||||
config = {'theta': float(self.theta)}
|
||||
base_config = super(ThresholdedReLU, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
+482
-429
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -387,7 +387,14 @@ class Lambda(Layer):
|
||||
function: The function to be evaluated.
|
||||
Takes one argument: the output of previous layer
|
||||
output_shape: Expected output shape from function.
|
||||
Could be a tuple or a function of the shape of the input
|
||||
Can be a tuple or function.
|
||||
If a tuple, it only specifies the first dimension onward;
|
||||
sample dimension is assumed either the same as the input:
|
||||
`output_shape = (input_shape[0], ) + output_shape`
|
||||
or, the input is `None` and the sample dimension is also `None`:
|
||||
`output_shape = (None, ) + output_shape`
|
||||
If a function, it specifies the entire shape as a function of
|
||||
the input shape: `output_shape = f(input_shape)`
|
||||
arguments: optional dictionary of keyword arguments to be passed
|
||||
to the function.
|
||||
|
||||
@@ -402,7 +409,7 @@ class Lambda(Layer):
|
||||
def __init__(self, function, output_shape=None, arguments={}, **kwargs):
|
||||
self.function = function
|
||||
self.arguments = arguments
|
||||
self.supports_masking = True
|
||||
self.supports_masking = False
|
||||
|
||||
if output_shape is None:
|
||||
self._output_shape = None
|
||||
|
||||
@@ -125,6 +125,8 @@ class Embedding(Layer):
|
||||
return (input_shape[0], input_length, self.output_dim)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if K.dtype(x) != 'int32':
|
||||
x = K.cast(x, 'int32')
|
||||
if 0. < self.dropout < 1.:
|
||||
retain_p = 1. - self.dropout
|
||||
B = K.random_binomial((self.input_dim,), p=retain_p) * (1. / retain_p)
|
||||
|
||||
@@ -0,0 +1,423 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
|
||||
from keras import backend as K
|
||||
from keras.layers import activations, initializations, regularizers, constraints
|
||||
from keras.engine import Layer, InputSpec
|
||||
from ..utils.np_utils import conv_output_length
|
||||
|
||||
|
||||
class LocallyConnected1D(Layer):
|
||||
'''LocallyConnected1D layer works almost the same as Convolution1D layer,
|
||||
except that weights are unshared, that is, a different set of filters is
|
||||
applied at each different patch of the input. When using this layer as the
|
||||
first layer in a model, either provide the keyword argument `input_dim`
|
||||
(int, e.g. 128 for sequences of 128-dimensional vectors), or `input_shape`
|
||||
(tuple of integers, e.g. (10, 128) for sequences of 10 vectors of
|
||||
128-dimensional vectors). Also, you will need to fix shape of the previous
|
||||
layer, since the weights can only be defined with determined output shape.
|
||||
|
||||
# Example
|
||||
```python
|
||||
# apply a unshared weight convolution 1d of length 3 to a sequence with
|
||||
# 10 timesteps, with 64 output filters
|
||||
model = Sequential()
|
||||
model.add(LocallyConnected1D(64, 3, input_shape=(10, 32)))
|
||||
# now model.output_shape == (None, 8, 64)
|
||||
# add a new conv1d on top
|
||||
model.add(LocallyConnected1D(32, 3))
|
||||
# now model.output_shape == (None, 6, 32)
|
||||
```
|
||||
# Arguments
|
||||
nb_filter: Dimensionality of the output.
|
||||
filter_length: The extension (spatial or temporal) of each filter.
|
||||
init: name of initialization function for the weights of the layer
|
||||
(see [initializations](../initializations.md)),
|
||||
or alternatively, Theano function to use for weights initialization.
|
||||
This parameter is only relevant if you don't pass a `weights` argument.
|
||||
activation: name of activation function to use
|
||||
(see [activations](../activations.md)),
|
||||
or alternatively, elementwise Theano function.
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: Only support 'valid'. Please make good use of
|
||||
ZeroPadding1D to achieve same output length.
|
||||
subsample_length: factor by which to subsample output.
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
(eg. L1 or L2 regularization), applied to the main weights matrix.
|
||||
b_regularizer: instance of [WeightRegularizer](../regularizers.md),
|
||||
applied to the bias.
|
||||
activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
|
||||
applied to the network output.
|
||||
W_constraint: instance of the [constraints](../constraints.md) module
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
input_dim: Number of channels/dimensions in the input.
|
||||
Either this argument or the keyword argument `input_shape`must be
|
||||
provided when using this layer as the first layer in a model.
|
||||
input_length: Length of input sequences, when it is constant.
|
||||
This argument is required if you are going to connect
|
||||
`Flatten` then `Dense` layers upstream
|
||||
(without it, the shape of the dense outputs cannot be computed).
|
||||
# Input shape
|
||||
3D tensor with shape: `(samples, steps, input_dim)`.
|
||||
# Output shape
|
||||
3D tensor with shape: `(samples, new_steps, nb_filter)`.
|
||||
`steps` value might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, filter_length,
|
||||
init='uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample_length=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, input_dim=None, input_length=None, **kwargs):
|
||||
if border_mode != 'valid':
|
||||
raise Exception('Invalid border mode for LocallyConnected1D '
|
||||
'(only "valid" is supported):', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.filter_length = filter_length
|
||||
self.init = initializations.get(init, dim_ordering='th')
|
||||
self.activation = activations.get(activation)
|
||||
|
||||
self.border_mode = border_mode
|
||||
self.subsample_length = subsample_length
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.activity_regularizer = regularizers.get(activity_regularizer)
|
||||
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
self.initial_weights = weights
|
||||
self.input_dim = input_dim
|
||||
self.input_length = input_length
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_length, self.input_dim)
|
||||
super(LocallyConnected1D, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
input_dim = input_shape[2]
|
||||
_, output_length, nb_filter = self.get_output_shape_for(input_shape)
|
||||
|
||||
self.W_shape = (output_length, self.filter_length * input_dim, nb_filter)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
if self.bias:
|
||||
self.b = K.zeros((output_length, self.nb_filter), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
length = conv_output_length(input_shape[1],
|
||||
self.filter_length,
|
||||
self.border_mode,
|
||||
self.subsample_length)
|
||||
return (input_shape[0], length, self.nb_filter)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
stride = self.subsample_length
|
||||
output_length, feature_dim, nb_filter = self.W_shape
|
||||
|
||||
xs = []
|
||||
for i in range(output_length):
|
||||
slice_length = slice(i * stride, i * stride + self.filter_length)
|
||||
xs.append(K.reshape(x[:, slice_length, :], (1, -1, feature_dim)))
|
||||
x_aggregate = K.concatenate(xs, axis=0)
|
||||
# (output_length, batch_size, nb_filter)
|
||||
output = K.batch_dot(x_aggregate, self.W)
|
||||
output = K.permute_dimensions(output, (1, 0, 2))
|
||||
|
||||
if self.bias:
|
||||
output += K.reshape(self.b, (1, output_length, nb_filter))
|
||||
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
config = {'nb_filter': self.nb_filter,
|
||||
'filter_length': self.filter_length,
|
||||
'init': self.init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'border_mode': self.border_mode,
|
||||
'subsample_length': self.subsample_length,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias,
|
||||
'input_dim': self.input_dim,
|
||||
'input_length': self.input_length}
|
||||
base_config = super(LocallyConnected1D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class LocallyConnected2D(Layer):
|
||||
'''LocallyConnected2D layer works almost the same as Convolution2D layer,
|
||||
except that weights are unshared, that is, a different set of filters is
|
||||
applied at each different patch of the input. When using this layer as the
|
||||
first layer in a model, provide the keyword argument `input_shape` (tuple
|
||||
of integers, does not include the sample axis), e.g.
|
||||
`input_shape=(3, 128, 128)` for 128x128 RGB pictures. Also, you will need
|
||||
to fix shape of the previous layer, since the weights can only be defined
|
||||
with determined output shape.
|
||||
|
||||
# Examples
|
||||
```python
|
||||
# apply a 3x3 unshared weights convolution with 64 output filters on a 32x32 image:
|
||||
model = Sequential()
|
||||
model.add(LocallyConnected2D(64, 3, 3, input_shape=(3, 32, 32)))
|
||||
# now model.output_shape == (None, 64, 30, 30)
|
||||
# notice that this layer will consume (30*30)*(3*3*3*64) + (30*30)*64 parameters
|
||||
|
||||
# add a 3x3 unshared weights convolution on top, with 32 output filters:
|
||||
model.add(LocallyConnected2D(32, 3, 3))
|
||||
# now model.output_shape == (None, 32, 28, 28)
|
||||
```
|
||||
|
||||
# Arguments
|
||||
nb_filter: Number of convolution filters to use.
|
||||
nb_row: Number of rows in the convolution kernel.
|
||||
nb_col: Number of columns in the convolution kernel.
|
||||
init: name of initialization function for the weights of the layer
|
||||
(see [initializations](../initializations.md)), or alternatively,
|
||||
Theano function to use for weights initialization.
|
||||
This parameter is only relevant if you don't pass
|
||||
a `weights` argument.
|
||||
activation: name of activation function to use
|
||||
(see [activations](../activations.md)),
|
||||
or alternatively, elementwise Theano function.
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
border_mode: Only support 'valid'. Please make good use of
|
||||
ZeroPadding2D to achieve same output shape.
|
||||
subsample: tuple of length 2. Factor by which to subsample output.
|
||||
Also called strides elsewhere.
|
||||
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
(eg. L1 or L2 regularization), applied to the main weights matrix.
|
||||
b_regularizer: instance of [WeightRegularizer](../regularizers.md),
|
||||
applied to the bias.
|
||||
activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
|
||||
applied to the network output.
|
||||
W_constraint: instance of the [constraints](../constraints.md) module
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
b_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the bias.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 3.
|
||||
bias: whether to include a bias (i.e. make the layer affine rather than linear).
|
||||
|
||||
# Input shape
|
||||
4D tensor with shape:
|
||||
`(samples, channels, rows, cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, rows, cols, channels)` if dim_ordering='tf'.
|
||||
|
||||
# Output shape
|
||||
4D tensor with shape:
|
||||
`(samples, nb_filter, new_rows, new_cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, new_rows, new_cols, nb_filter)` if dim_ordering='tf'.
|
||||
`rows` and `cols` values might have changed due to padding.
|
||||
'''
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
dim_ordering='default',
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None,
|
||||
bias=True, **kwargs):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if border_mode != 'valid':
|
||||
raise Exception('Invalid border mode for LocallyConnected2D '
|
||||
'(only "valid" is supported):', border_mode)
|
||||
self.nb_filter = nb_filter
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
self.init = initializations.get(init, dim_ordering=dim_ordering)
|
||||
self.activation = activations.get(activation)
|
||||
|
||||
self.border_mode = border_mode
|
||||
self.subsample = tuple(subsample)
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
self.dim_ordering = dim_ordering
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.activity_regularizer = regularizers.get(activity_regularizer)
|
||||
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
|
||||
self.bias = bias
|
||||
self.input_spec = [InputSpec(ndim=4)]
|
||||
self.initial_weights = weights
|
||||
super(LocallyConnected2D, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
if self.dim_ordering == 'th':
|
||||
_, nb_filter, output_row, output_col = output_shape
|
||||
input_filter = input_shape[1]
|
||||
elif self.dim_ordering == 'tf':
|
||||
_, output_row, output_col, nb_filter = output_shape
|
||||
input_filter = input_shape[3]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
self.output_row = output_row
|
||||
self.output_col = output_col
|
||||
self.W_shape = (output_row * output_col, self.nb_row * self.nb_col * input_filter, nb_filter)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
|
||||
if self.bias:
|
||||
self.b = K.zeros((output_row, output_col, nb_filter), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
rows = input_shape[2]
|
||||
cols = input_shape[3]
|
||||
elif self.dim_ordering == 'tf':
|
||||
rows = input_shape[1]
|
||||
cols = input_shape[2]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
rows = conv_output_length(rows, self.nb_row,
|
||||
self.border_mode, self.subsample[0])
|
||||
cols = conv_output_length(cols, self.nb_col,
|
||||
self.border_mode, self.subsample[1])
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], self.nb_filter, rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, self.nb_filter)
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
stride_row, stride_col = self.subsample
|
||||
_, feature_dim, nb_filter = self.W_shape
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
if K._backend == 'theano':
|
||||
output = []
|
||||
for i in range(self.output_row):
|
||||
for j in range(self.output_col):
|
||||
slice_row = slice(i * stride_row,
|
||||
i * stride_row + self.nb_row)
|
||||
slice_col = slice(j * stride_col,
|
||||
j * stride_col + self.nb_col)
|
||||
x_flatten = K.reshape(x[:, :, slice_row, slice_col], (1, -1, feature_dim))
|
||||
output.append(K.dot(x_flatten, self.W[i * self.output_col + j, :, :]))
|
||||
output = K.concatenate(output, axis=0)
|
||||
else:
|
||||
xs = []
|
||||
for i in range(self.output_row):
|
||||
for j in range(self.output_col):
|
||||
slice_row = slice(i * stride_row,
|
||||
i * stride_row + self.nb_row)
|
||||
slice_col = slice(j * stride_col,
|
||||
j * stride_col + self.nb_col)
|
||||
xs.append(K.reshape(x[:, :, slice_row, slice_col], (1, -1, feature_dim)))
|
||||
x_aggregate = K.concatenate(xs, axis=0)
|
||||
output = K.batch_dot(x_aggregate, self.W)
|
||||
output = K.reshape(output, (self.output_row, self.output_col, -1, nb_filter))
|
||||
output = K.permute_dimensions(output, (2, 3, 0, 1))
|
||||
elif self.dim_ordering == 'tf':
|
||||
xs = []
|
||||
for i in range(self.output_row):
|
||||
for j in range(self.output_col):
|
||||
slice_row = slice(i * stride_row,
|
||||
i * stride_row + self.nb_row)
|
||||
slice_col = slice(j * stride_col,
|
||||
j * stride_col + self.nb_col)
|
||||
xs.append(K.reshape(x[:, slice_row, slice_col, :], (1, -1, feature_dim)))
|
||||
x_aggregate = K.concatenate(xs, axis=0)
|
||||
output = K.batch_dot(x_aggregate, self.W)
|
||||
output = K.reshape(output, (self.output_row, self.output_col, -1, nb_filter))
|
||||
output = K.permute_dimensions(output, (2, 0, 1, 3))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
if self.bias:
|
||||
if self.dim_ordering == 'th':
|
||||
output += K.reshape(self.b, (1, nb_filter, self.output_row, self.output_col))
|
||||
elif self.dim_ordering == 'tf':
|
||||
output += K.reshape(self.b, (1, self.output_row, self.output_col, nb_filter))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
config = {'nb_filter': self.nb_filter,
|
||||
'nb_row': self.nb_row,
|
||||
'nb_col': self.nb_col,
|
||||
'init': self.init.__name__,
|
||||
'activation': self.activation.__name__,
|
||||
'border_mode': self.border_mode,
|
||||
'subsample': self.subsample,
|
||||
'dim_ordering': self.dim_ordering,
|
||||
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
|
||||
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
|
||||
'bias': self.bias}
|
||||
base_config = super(LocallyConnected2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
from ..engine import Layer
|
||||
from .. import backend as K
|
||||
import numpy as np
|
||||
|
||||
|
||||
class GaussianNoise(Layer):
|
||||
@@ -71,7 +72,7 @@ class GaussianDropout(Layer):
|
||||
def call(self, x, mask=None):
|
||||
if 0 < self.p < 1:
|
||||
noise_x = x * K.random_normal(shape=K.shape(x), mean=1.0,
|
||||
std=K.sqrt(self.p / (1.0 - self.p)))
|
||||
std=np.sqrt(self.p / (1.0 - self.p)))
|
||||
return K.in_train_phase(noise_x, x)
|
||||
return x
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ class BatchNormalization(Layer):
|
||||
weights: Initialization weights.
|
||||
List of 2 Numpy arrays, with shapes:
|
||||
`[(input_shape,), (input_shape,)]`
|
||||
Note that the order of this list is [gamma, beta, mean, std]
|
||||
beta_init: name of initialization function for shift parameter
|
||||
(see [initializations](../initializations.md)), or alternatively,
|
||||
Theano/TensorFlow function to use for weights initialization.
|
||||
@@ -55,8 +56,9 @@ class BatchNormalization(Layer):
|
||||
# References
|
||||
- [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](http://jmlr.org/proceedings/papers/v37/ioffe15.html)
|
||||
'''
|
||||
def __init__(self, epsilon=1e-6, mode=0, axis=-1, momentum=0.9,
|
||||
def __init__(self, epsilon=1e-6, mode=0, axis=-1, momentum=0.99,
|
||||
weights=None, beta_init='zero', gamma_init='one', **kwargs):
|
||||
self.supports_masking = True
|
||||
self.beta_init = initializations.get(beta_init)
|
||||
self.gamma_init = initializations.get(gamma_init)
|
||||
self.epsilon = epsilon
|
||||
@@ -98,18 +100,10 @@ class BatchNormalization(Layer):
|
||||
broadcast_shape = [1] * len(input_shape)
|
||||
broadcast_shape[self.axis] = input_shape[self.axis]
|
||||
|
||||
# case: train mode (uses stats of the current batch)
|
||||
mean = K.mean(x, axis=reduction_axes)
|
||||
brodcast_mean = K.reshape(mean, broadcast_shape)
|
||||
std = K.mean(K.square(x - brodcast_mean) + self.epsilon, axis=reduction_axes)
|
||||
std = K.sqrt(std)
|
||||
brodcast_std = K.reshape(std, broadcast_shape)
|
||||
mean_update = self.momentum * self.running_mean + (1 - self.momentum) * mean
|
||||
std_update = self.momentum * self.running_std + (1 - self.momentum) * std
|
||||
|
||||
if self.mode == 2:
|
||||
x_normed = (x - brodcast_mean) / (brodcast_std + self.epsilon)
|
||||
out = K.reshape(self.gamma, broadcast_shape) * x_normed + K.reshape(self.beta, broadcast_shape)
|
||||
x_normed, mean, std = K.normalize_batch_in_training(
|
||||
x, self.gamma, self.beta, reduction_axes,
|
||||
epsilon=self.epsilon)
|
||||
else:
|
||||
# mode 0
|
||||
if self.called_with not in {None, x}:
|
||||
@@ -123,26 +117,39 @@ class BatchNormalization(Layer):
|
||||
'(see docs for a description of '
|
||||
'the behavior).')
|
||||
self.called_with = x
|
||||
self.updates = [(self.running_mean, mean_update),
|
||||
(self.running_std, std_update)]
|
||||
x_normed = (x - brodcast_mean) / (brodcast_std + self.epsilon)
|
||||
x_normed, mean, std = K.normalize_batch_in_training(
|
||||
x, self.gamma, self.beta, reduction_axes,
|
||||
epsilon=self.epsilon)
|
||||
|
||||
# case: test mode (uses running averages)
|
||||
brodcast_running_mean = K.reshape(self.running_mean, broadcast_shape)
|
||||
brodcast_running_std = K.reshape(self.running_std, broadcast_shape)
|
||||
x_normed_running = ((x - brodcast_running_mean) / (brodcast_running_std + self.epsilon))
|
||||
self.updates = [K.moving_average_update(self.running_mean, mean, self.momentum),
|
||||
K.moving_average_update(self.running_std, std, self.momentum)]
|
||||
|
||||
if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
|
||||
x_normed_running = K.batch_normalization(
|
||||
x, self.running_mean, self.running_std,
|
||||
self.beta, self.gamma,
|
||||
epsilon=self.epsilon)
|
||||
else:
|
||||
# need broadcasting
|
||||
broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape)
|
||||
broadcast_running_std = K.reshape(self.running_std, broadcast_shape)
|
||||
broadcast_beta = K.reshape(self.beta, broadcast_shape)
|
||||
broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
|
||||
x_normed_running = K.batch_normalization(
|
||||
x, broadcast_running_mean, broadcast_running_std,
|
||||
broadcast_beta, broadcast_gamma,
|
||||
epsilon=self.epsilon)
|
||||
|
||||
# pick the normalized form of x corresponding to the training phase
|
||||
x_normed = K.in_train_phase(x_normed, x_normed_running)
|
||||
out = K.reshape(self.gamma, broadcast_shape) * x_normed + K.reshape(self.beta, broadcast_shape)
|
||||
|
||||
elif self.mode == 1:
|
||||
# sample-wise normalization
|
||||
m = K.mean(x, axis=-1, keepdims=True)
|
||||
std = K.sqrt(K.var(x, axis=-1, keepdims=True) + self.epsilon)
|
||||
x_normed = (x - m) / (std + self.epsilon)
|
||||
out = self.gamma * x_normed + self.beta
|
||||
return out
|
||||
x_normed = self.gamma * x_normed + self.beta
|
||||
return x_normed
|
||||
|
||||
def get_config(self):
|
||||
config = {"epsilon": self.epsilon,
|
||||
|
||||
@@ -0,0 +1,400 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .. import backend as K
|
||||
from ..engine import Layer, InputSpec
|
||||
from ..utils.np_utils import conv_output_length
|
||||
|
||||
|
||||
class _Pooling1D(Layer):
|
||||
'''Abstract class for different pooling 1D layers.
|
||||
'''
|
||||
input_dim = 3
|
||||
|
||||
def __init__(self, pool_length=2, stride=None,
|
||||
border_mode='valid', **kwargs):
|
||||
super(_Pooling1D, self).__init__(**kwargs)
|
||||
if stride is None:
|
||||
stride = pool_length
|
||||
self.pool_length = pool_length
|
||||
self.stride = stride
|
||||
self.st = (self.stride, 1)
|
||||
self.pool_size = (pool_length, 1)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
self.border_mode = border_mode
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
length = conv_output_length(input_shape[1], self.pool_length,
|
||||
self.border_mode, self.stride)
|
||||
return (input_shape[0], length, input_shape[2])
|
||||
|
||||
def _pooling_function(self, back_end, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
raise NotImplementedError
|
||||
|
||||
def call(self, x, mask=None):
|
||||
x = K.expand_dims(x, -1) # add dummy last dimension
|
||||
x = K.permute_dimensions(x, (0, 2, 1, 3))
|
||||
output = self._pooling_function(inputs=x, pool_size=self.pool_size,
|
||||
strides=self.st,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering='th')
|
||||
output = K.permute_dimensions(output, (0, 2, 1, 3))
|
||||
return K.squeeze(output, 3) # remove dummy last dimension
|
||||
|
||||
def get_config(self):
|
||||
config = {'stride': self.stride,
|
||||
'pool_length': self.pool_length,
|
||||
'border_mode': self.border_mode}
|
||||
base_config = super(_Pooling1D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class MaxPooling1D(_Pooling1D):
|
||||
'''Max pooling operation for temporal data.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape: `(samples, steps, features)`.
|
||||
|
||||
# Output shape
|
||||
3D tensor with shape: `(samples, downsampled_steps, features)`.
|
||||
|
||||
# Arguments
|
||||
pool_length: size of the region to which max pooling is applied
|
||||
stride: integer, or None. factor by which to downscale.
|
||||
2 will halve the input.
|
||||
If None, it will default to `pool_length`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
'''
|
||||
|
||||
def __init__(self, pool_length=2, stride=None,
|
||||
border_mode='valid', **kwargs):
|
||||
super(MaxPooling1D, self).__init__(pool_length, stride,
|
||||
border_mode, **kwargs)
|
||||
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
output = K.pool2d(inputs, pool_size, strides,
|
||||
border_mode, dim_ordering, pool_mode='max')
|
||||
return output
|
||||
|
||||
|
||||
class AveragePooling1D(_Pooling1D):
|
||||
'''Average pooling for temporal data.
|
||||
|
||||
# Arguments
|
||||
pool_length: factor by which to downscale. 2 will halve the input.
|
||||
stride: integer, or None. Stride value.
|
||||
If None, it will default to `pool_length`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
|
||||
# Input shape
|
||||
3D tensor with shape: `(samples, steps, features)`.
|
||||
|
||||
# Output shape
|
||||
3D tensor with shape: `(samples, downsampled_steps, features)`.
|
||||
'''
|
||||
|
||||
def __init__(self, pool_length=2, stride=None,
|
||||
border_mode='valid', **kwargs):
|
||||
super(AveragePooling1D, self).__init__(pool_length, stride,
|
||||
border_mode, **kwargs)
|
||||
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
output = K.pool2d(inputs, pool_size, strides,
|
||||
border_mode, dim_ordering, pool_mode='avg')
|
||||
return output
|
||||
|
||||
|
||||
class _Pooling2D(Layer):
|
||||
'''Abstract class for different pooling 2D layers.
|
||||
'''
|
||||
|
||||
def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
super(_Pooling2D, self).__init__(**kwargs)
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.pool_size = tuple(pool_size)
|
||||
if strides is None:
|
||||
strides = self.pool_size
|
||||
self.strides = tuple(strides)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
self.border_mode = border_mode
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=4)]
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
rows = input_shape[2]
|
||||
cols = input_shape[3]
|
||||
elif self.dim_ordering == 'tf':
|
||||
rows = input_shape[1]
|
||||
cols = input_shape[2]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
rows = conv_output_length(rows, self.pool_size[0],
|
||||
self.border_mode, self.strides[0])
|
||||
cols = conv_output_length(cols, self.pool_size[1],
|
||||
self.border_mode, self.strides[1])
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], input_shape[1], rows, cols)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], rows, cols, input_shape[3])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
raise NotImplementedError
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = self._pooling_function(inputs=x, pool_size=self.pool_size,
|
||||
strides=self.strides,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
config = {'pool_size': self.pool_size,
|
||||
'border_mode': self.border_mode,
|
||||
'strides': self.strides,
|
||||
'dim_ordering': self.dim_ordering}
|
||||
base_config = super(_Pooling2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class MaxPooling2D(_Pooling2D):
|
||||
'''Max pooling operation for spatial data.
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 2 integers,
|
||||
factors by which to downscale (vertical, horizontal).
|
||||
(2, 2) will halve the image in each dimension.
|
||||
strides: tuple of 2 integers, or None. Strides values.
|
||||
If None, it will default to `pool_size`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 3.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "th".
|
||||
|
||||
# Input shape
|
||||
4D tensor with shape:
|
||||
`(samples, channels, rows, cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, rows, cols, channels)` if dim_ordering='tf'.
|
||||
|
||||
# Output shape
|
||||
4D tensor with shape:
|
||||
`(nb_samples, channels, pooled_rows, pooled_cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, pooled_rows, pooled_cols, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
|
||||
def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
super(MaxPooling2D, self).__init__(pool_size, strides, border_mode,
|
||||
dim_ordering, **kwargs)
|
||||
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
output = K.pool2d(inputs, pool_size, strides,
|
||||
border_mode, dim_ordering, pool_mode='max')
|
||||
return output
|
||||
|
||||
|
||||
class AveragePooling2D(_Pooling2D):
|
||||
'''Average pooling operation for spatial data.
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 2 integers,
|
||||
factors by which to downscale (vertical, horizontal).
|
||||
(2, 2) will halve the image in each dimension.
|
||||
strides: tuple of 2 integers, or None. Strides values.
|
||||
If None, it will default to `pool_size`.
|
||||
border_mode: 'valid' or 'same'.
|
||||
Note: 'same' will only work with TensorFlow for the time being.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 3.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "th".
|
||||
|
||||
# Input shape
|
||||
4D tensor with shape:
|
||||
`(samples, channels, rows, cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, rows, cols, channels)` if dim_ordering='tf'.
|
||||
|
||||
# Output shape
|
||||
4D tensor with shape:
|
||||
`(nb_samples, channels, pooled_rows, pooled_cols)` if dim_ordering='th'
|
||||
or 4D tensor with shape:
|
||||
`(samples, pooled_rows, pooled_cols, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
|
||||
def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
super(AveragePooling2D, self).__init__(pool_size, strides, border_mode,
|
||||
dim_ordering, **kwargs)
|
||||
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
output = K.pool2d(inputs, pool_size, strides,
|
||||
border_mode, dim_ordering, pool_mode='avg')
|
||||
return output
|
||||
|
||||
|
||||
class _Pooling3D(Layer):
|
||||
'''Abstract class for different pooling 3D layers.
|
||||
'''
|
||||
|
||||
def __init__(self, pool_size=(2, 2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
super(_Pooling3D, self).__init__(**kwargs)
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.pool_size = tuple(pool_size)
|
||||
if strides is None:
|
||||
strides = self.pool_size
|
||||
self.strides = tuple(strides)
|
||||
assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
|
||||
self.border_mode = border_mode
|
||||
assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
|
||||
self.dim_ordering = dim_ordering
|
||||
self.input_spec = [InputSpec(ndim=5)]
|
||||
|
||||
def get_output_shape_for(self, input_shape):
|
||||
if self.dim_ordering == 'th':
|
||||
len_dim1 = input_shape[2]
|
||||
len_dim2 = input_shape[3]
|
||||
len_dim3 = input_shape[4]
|
||||
elif self.dim_ordering == 'tf':
|
||||
len_dim1 = input_shape[1]
|
||||
len_dim2 = input_shape[2]
|
||||
len_dim3 = input_shape[3]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
len_dim1 = conv_output_length(len_dim1, self.pool_size[0],
|
||||
self.border_mode, self.strides[0])
|
||||
len_dim2 = conv_output_length(len_dim2, self.pool_size[1],
|
||||
self.border_mode, self.strides[1])
|
||||
len_dim3 = conv_output_length(len_dim3, self.pool_size[2],
|
||||
self.border_mode, self.strides[2])
|
||||
|
||||
if self.dim_ordering == 'th':
|
||||
return (input_shape[0], input_shape[1], len_dim1, len_dim2, len_dim3)
|
||||
elif self.dim_ordering == 'tf':
|
||||
return (input_shape[0], len_dim1, len_dim2, len_dim3, input_shape[4])
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
raise NotImplementedError
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = self._pooling_function(inputs=x, pool_size=self.pool_size,
|
||||
strides=self.strides,
|
||||
border_mode=self.border_mode,
|
||||
dim_ordering=self.dim_ordering)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
config = {'pool_size': self.pool_size,
|
||||
'border_mode': self.border_mode,
|
||||
'strides': self.strides,
|
||||
'dim_ordering': self.dim_ordering}
|
||||
base_config = super(_Pooling3D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class MaxPooling3D(_Pooling3D):
|
||||
'''Max pooling operation for 3D data (spatial or spatio-temporal).
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 3 integers,
|
||||
factors by which to downscale (dim1, dim2, dim3).
|
||||
(2, 2, 2) will halve the size of the 3D input in each dimension.
|
||||
strides: tuple of 3 integers, or None. Strides values.
|
||||
border_mode: 'valid' or 'same'.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 4.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "th".
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape:
|
||||
`(samples, channels, len_pool_dim1, len_pool_dim2, len_pool_dim3)` if dim_ordering='th'
|
||||
or 5D tensor with shape:
|
||||
`(samples, len_pool_dim1, len_pool_dim2, len_pool_dim3, channels)` if dim_ordering='tf'.
|
||||
|
||||
# Output shape
|
||||
5D tensor with shape:
|
||||
`(nb_samples, channels, pooled_dim1, pooled_dim2, pooled_dim3)` if dim_ordering='th'
|
||||
or 5D tensor with shape:
|
||||
`(samples, pooled_dim1, pooled_dim2, pooled_dim3, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
|
||||
def __init__(self, pool_size=(2, 2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
super(MaxPooling3D, self).__init__(pool_size, strides, border_mode,
|
||||
dim_ordering, **kwargs)
|
||||
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
output = K.pool3d(inputs, pool_size, strides,
|
||||
border_mode, dim_ordering, pool_mode='max')
|
||||
return output
|
||||
|
||||
|
||||
class AveragePooling3D(_Pooling3D):
|
||||
'''Average pooling operation for 3D data (spatial or spatio-temporal).
|
||||
|
||||
# Arguments
|
||||
pool_size: tuple of 3 integers,
|
||||
factors by which to downscale (dim1, dim2, dim3).
|
||||
(2, 2, 2) will halve the size of the 3D input in each dimension.
|
||||
strides: tuple of 3 integers, or None. Strides values.
|
||||
border_mode: 'valid' or 'same'.
|
||||
dim_ordering: 'th' or 'tf'. In 'th' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'tf' mode is it at index 4.
|
||||
It defaults to the `image_dim_ordering` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "th".
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape:
|
||||
`(samples, channels, len_pool_dim1, len_pool_dim2, len_pool_dim3)` if dim_ordering='th'
|
||||
or 5D tensor with shape:
|
||||
`(samples, len_pool_dim1, len_pool_dim2, len_pool_dim3, channels)` if dim_ordering='tf'.
|
||||
|
||||
# Output shape
|
||||
5D tensor with shape:
|
||||
`(nb_samples, channels, pooled_dim1, pooled_dim2, pooled_dim3)` if dim_ordering='th'
|
||||
or 5D tensor with shape:
|
||||
`(samples, pooled_dim1, pooled_dim2, pooled_dim3, channels)` if dim_ordering='tf'.
|
||||
'''
|
||||
|
||||
def __init__(self, pool_size=(2, 2, 2), strides=None, border_mode='valid',
|
||||
dim_ordering='default', **kwargs):
|
||||
super(AveragePooling3D, self).__init__(pool_size, strides, border_mode,
|
||||
dim_ordering, **kwargs)
|
||||
|
||||
def _pooling_function(self, inputs, pool_size, strides,
|
||||
border_mode, dim_ordering):
|
||||
output = K.pool3d(inputs, pool_size, strides,
|
||||
border_mode, dim_ordering, pool_mode='avg')
|
||||
return output
|
||||
@@ -139,7 +139,10 @@ class Recurrent(Layer):
|
||||
To enable statefulness:
|
||||
- specify `stateful=True` in the layer constructor.
|
||||
- specify a fixed batch size for your model, by passing
|
||||
a `batch_input_shape=(...)` to the first layer in your model.
|
||||
if sequential model:
|
||||
a `batch_input_shape=(...)` to the first layer in your model.
|
||||
else for functional model with 1 or more Input layers:
|
||||
a `batch_shape=(...)` to all the first layers in your model.
|
||||
This is the expected shape of your inputs *including the batch size*.
|
||||
It should be a tuple of integers, e.g. `(32, 10, 100)`.
|
||||
|
||||
@@ -190,9 +193,9 @@ class Recurrent(Layer):
|
||||
def get_initial_states(self, x):
|
||||
# build an all-zero tensor of shape (samples, output_dim)
|
||||
initial_state = K.zeros_like(x) # (samples, timesteps, input_dim)
|
||||
initial_state = K.sum(initial_state, axis=1) # (samples, input_dim)
|
||||
reducer = K.zeros((self.input_dim, self.output_dim))
|
||||
initial_state = K.dot(initial_state, reducer) # (samples, output_dim)
|
||||
initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,)
|
||||
initial_state = K.expand_dims(initial_state) # (samples, 1)
|
||||
initial_state = K.tile(initial_state, [1, self.output_dim]) # (samples, output_dim)
|
||||
initial_states = [initial_state for _ in range(len(self.states))]
|
||||
return initial_states
|
||||
|
||||
@@ -689,7 +692,7 @@ class LSTM(Recurrent):
|
||||
name='{}_U'.format(self.name))
|
||||
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
K.get_value(self.forget_bias_init(self.output_dim)),
|
||||
K.get_value(self.forget_bias_init((self.output_dim,))),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
|
||||
+221
-11
@@ -1,13 +1,174 @@
|
||||
from __future__ import print_function
|
||||
import warnings
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
from . import backend as K
|
||||
from .utils.io_utils import ask_to_proceed_with_overwrite
|
||||
from .engine.training import Model
|
||||
from .engine.topology import get_source_inputs, Node
|
||||
from .optimizers import optimizer_from_config
|
||||
from .legacy.models import Graph
|
||||
|
||||
|
||||
def save_model(model, filepath, overwrite=True):
|
||||
|
||||
def get_json_type(obj):
|
||||
# if obj is a serializable Keras class instance
|
||||
# e.g. optimizer, layer
|
||||
if hasattr(obj, 'get_config'):
|
||||
return {'class_name': obj.__class__.__name__,
|
||||
'config': obj.get_config()}
|
||||
|
||||
# if obj is any numpy type
|
||||
if type(obj).__module__ == np.__name__:
|
||||
return obj.item()
|
||||
|
||||
# misc functions (e.g. loss function)
|
||||
if hasattr(obj, '__call__'):
|
||||
return obj.__name__
|
||||
|
||||
# if obj is a python 'type'
|
||||
if type(obj).__name__ == type.__name__:
|
||||
return obj.__name__
|
||||
|
||||
raise TypeError('Not JSON Serializable:', obj)
|
||||
|
||||
import h5py
|
||||
from keras import __version__ as keras_version
|
||||
|
||||
# if file exists and should not be overwritten
|
||||
if not overwrite and os.path.isfile(filepath):
|
||||
proceed = ask_to_proceed_with_overwrite(filepath)
|
||||
if not proceed:
|
||||
return
|
||||
|
||||
f = h5py.File(filepath, 'w')
|
||||
f.attrs['keras_version'] = str(keras_version).encode('utf8')
|
||||
f.attrs['model_config'] = json.dumps({
|
||||
'class_name': model.__class__.__name__,
|
||||
'config': model.get_config()
|
||||
}, default=get_json_type).encode('utf8')
|
||||
|
||||
model_weights_group = f.create_group('model_weights')
|
||||
model.save_weights_to_hdf5_group(model_weights_group)
|
||||
|
||||
if hasattr(model, 'optimizer'):
|
||||
f.attrs['training_config'] = json.dumps({
|
||||
'optimizer_config': {
|
||||
'class_name': model.optimizer.__class__.__name__,
|
||||
'config': model.optimizer.get_config()
|
||||
},
|
||||
'loss': model.loss,
|
||||
'metrics': model.metrics,
|
||||
'sample_weight_mode': model.sample_weight_mode,
|
||||
'loss_weights': model.loss_weights,
|
||||
}, default=get_json_type).encode('utf8')
|
||||
|
||||
# save optimizer weights
|
||||
symbolic_weights = getattr(model.optimizer, 'weights')
|
||||
if symbolic_weights:
|
||||
optimizer_weights_group = f.create_group('optimizer_weights')
|
||||
weight_values = K.batch_get_value(symbolic_weights)
|
||||
weight_names = []
|
||||
for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
|
||||
if hasattr(w, 'name') and w.name:
|
||||
name = str(w.name)
|
||||
else:
|
||||
name = 'param_' + str(i)
|
||||
weight_names.append(name.encode('utf8'))
|
||||
optimizer_weights_group.attrs['weight_names'] = weight_names
|
||||
for name, val in zip(weight_names, weight_values):
|
||||
param_dset = optimizer_weights_group.create_dataset(
|
||||
name,
|
||||
val.shape,
|
||||
dtype=val.dtype)
|
||||
if not val.shape:
|
||||
# scalar
|
||||
param_dset[()] = val
|
||||
else:
|
||||
param_dset[:] = val
|
||||
f.flush()
|
||||
f.close()
|
||||
|
||||
|
||||
def load_model(filepath, custom_objects={}):
|
||||
|
||||
def deserialize(obj):
|
||||
if type(obj) is list:
|
||||
deserialized = []
|
||||
for value in obj:
|
||||
if value in custom_objects:
|
||||
deserialized.append(custom_objects[value])
|
||||
else:
|
||||
deserialized.append(value)
|
||||
return deserialized
|
||||
if type(obj) is dict:
|
||||
deserialized = {}
|
||||
for key, value in obj.items():
|
||||
if value in custom_objects:
|
||||
deserialized[key] = custom_objects[value]
|
||||
else:
|
||||
deserialized[key] = value
|
||||
return deserialized
|
||||
if obj in custom_objects:
|
||||
return custom_objects[obj]
|
||||
return obj
|
||||
|
||||
import h5py
|
||||
f = h5py.File(filepath, mode='r')
|
||||
|
||||
# instantiate model
|
||||
model_config = f.attrs.get('model_config')
|
||||
if model_config is None:
|
||||
raise ValueError('No model found in config file.')
|
||||
model_config = json.loads(model_config.decode('utf-8'))
|
||||
model = model_from_config(model_config, custom_objects=custom_objects)
|
||||
|
||||
# set weights
|
||||
model.load_weights_from_hdf5_group(f['model_weights'])
|
||||
|
||||
# instantiate optimizer
|
||||
training_config = f.attrs.get('training_config')
|
||||
if training_config is None:
|
||||
warnings.warn('No training configuration found in save file: '
|
||||
'the model was *not* compiled. Compile it manually.')
|
||||
f.close()
|
||||
return model
|
||||
training_config = json.loads(training_config.decode('utf-8'))
|
||||
optimizer_config = training_config['optimizer_config']
|
||||
optimizer = optimizer_from_config(optimizer_config)
|
||||
|
||||
# recover loss functions and metrics
|
||||
loss = deserialize(training_config['loss'])
|
||||
metrics = deserialize(training_config['metrics'])
|
||||
sample_weight_mode = training_config['sample_weight_mode']
|
||||
loss_weights = training_config['loss_weights']
|
||||
|
||||
# compile model
|
||||
model.compile(optimizer=optimizer,
|
||||
loss=loss,
|
||||
metrics=metrics,
|
||||
loss_weights=loss_weights,
|
||||
sample_weight_mode=sample_weight_mode)
|
||||
|
||||
# set optimizer weights
|
||||
if 'optimizer_weights' in f:
|
||||
# build train function (to get weight updates)
|
||||
if model.__class__.__name__ == 'Sequential':
|
||||
model.model._make_train_function()
|
||||
else:
|
||||
model._make_train_function()
|
||||
optimizer_weights_group = f['optimizer_weights']
|
||||
optimizer_weight_names = [n.decode('utf8') for n in optimizer_weights_group.attrs['weight_names']]
|
||||
optimizer_weight_values = [optimizer_weights_group[n] for n in optimizer_weight_names]
|
||||
model.optimizer.set_weights(optimizer_weight_values)
|
||||
f.close()
|
||||
return model
|
||||
|
||||
|
||||
def model_from_config(config, custom_objects={}):
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
if isinstance(config, list):
|
||||
@@ -158,6 +319,26 @@ class Sequential(Model):
|
||||
self.built = False
|
||||
self._flattened_layers = None
|
||||
|
||||
def pop(self):
|
||||
'''Removes the last layer in the model.
|
||||
'''
|
||||
if not self.layers:
|
||||
raise Exception('There are no layers in the model.')
|
||||
|
||||
self.layers.pop()
|
||||
if not self.layers:
|
||||
self.outputs = []
|
||||
self.inbound_nodes = []
|
||||
self.outbound_nodes = []
|
||||
else:
|
||||
self.layers[-1].outbound_nodes = []
|
||||
self.outputs = [self.layers[-1].output]
|
||||
# update self.inbound_nodes
|
||||
self.inbound_nodes[0].output_tensors = self.outputs
|
||||
self.inbound_nodes[0].output_shapes = [self.outputs[0]._keras_shape]
|
||||
self.built = False
|
||||
self._flattened_layers = None
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if not self.built:
|
||||
self.build()
|
||||
@@ -287,7 +468,7 @@ class Sequential(Model):
|
||||
'''
|
||||
# support for legacy behavior
|
||||
for layer in self.flattened_layers:
|
||||
nb_param = len(layer.get_weights())
|
||||
nb_param = len(layer.weights)
|
||||
layer.set_weights(weights[:nb_param])
|
||||
weights = weights[nb_param:]
|
||||
|
||||
@@ -343,6 +524,9 @@ class Sequential(Model):
|
||||
**kwargs)
|
||||
self.optimizer = self.model.optimizer
|
||||
self.loss = self.model.loss
|
||||
self.loss_weights = self.model.loss_weights
|
||||
self.metrics = self.model.metrics
|
||||
self.metrics_tensors = self.model.metrics_tensors
|
||||
self.metrics_names = self.model.metrics_names
|
||||
self.sample_weight_mode = self.model.sample_weight_mode
|
||||
|
||||
@@ -578,7 +762,7 @@ class Sequential(Model):
|
||||
def fit_generator(self, generator, samples_per_epoch, nb_epoch,
|
||||
verbose=1, callbacks=[],
|
||||
validation_data=None, nb_val_samples=None,
|
||||
class_weight=None, max_q_size=10, **kwargs):
|
||||
class_weight=None, max_q_size=10, nb_worker=1, pickle_safe=False, **kwargs):
|
||||
'''Fits the model on data generated batch-by-batch by
|
||||
a Python generator.
|
||||
The generator is run in parallel to the model, for efficiency.
|
||||
@@ -609,6 +793,11 @@ class Sequential(Model):
|
||||
class_weight: dictionary mapping class indices to a weight
|
||||
for the class.
|
||||
max_q_size: maximum size for the generator queue
|
||||
nb_worker: maximum number of processes to spin up
|
||||
pickle_safe: if True, use process based threading. Note that because
|
||||
this implementation relies on multiprocessing, you should not pass non
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
|
||||
# Returns
|
||||
A `History` object.
|
||||
@@ -632,6 +821,9 @@ class Sequential(Model):
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
if nb_worker > 1 and not pickle_safe:
|
||||
warnings.warn('The "nb_worker" argument is deprecated when pickle_safe is False')
|
||||
nb_worker = 1 # For backward compatibility
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
@@ -639,10 +831,6 @@ class Sequential(Model):
|
||||
'the model at compile time:\n'
|
||||
'`model.compile(optimizer, loss, '
|
||||
'metrics=["accuracy"])`')
|
||||
if 'nb_worker' in kwargs:
|
||||
kwargs.pop('nb_worker')
|
||||
warnings.warn('The "nb_worker" argument is deprecated, '
|
||||
'please remove it from your code.')
|
||||
if 'nb_val_worker' in kwargs:
|
||||
kwargs.pop('nb_val_worker')
|
||||
warnings.warn('The "nb_val_worker" argument is deprecated, '
|
||||
@@ -658,9 +846,11 @@ class Sequential(Model):
|
||||
validation_data=validation_data,
|
||||
nb_val_samples=nb_val_samples,
|
||||
class_weight=class_weight,
|
||||
max_q_size=max_q_size)
|
||||
max_q_size=max_q_size,
|
||||
nb_worker=nb_worker,
|
||||
pickle_safe=pickle_safe)
|
||||
|
||||
def evaluate_generator(self, generator, val_samples, max_q_size=10, **kwargs):
|
||||
def evaluate_generator(self, generator, val_samples, max_q_size=10, nb_worker=1, pickle_safe=False, **kwargs):
|
||||
'''Evaluates the model on a data generator. The generator should
|
||||
return the same kind of data as accepted by `test_on_batch`.
|
||||
|
||||
@@ -672,9 +862,17 @@ class Sequential(Model):
|
||||
total number of samples to generate from `generator`
|
||||
before returning.
|
||||
max_q_size: maximum size for the generator queue
|
||||
nb_worker: maximum number of processes to spin up
|
||||
pickle_safe: if True, use process based threading. Note that because
|
||||
this implementation relies on multiprocessing, you should not pass non
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
'''
|
||||
if self.model is None:
|
||||
raise Exception('The model needs to be compiled before being used.')
|
||||
if nb_worker > 1 and not pickle_safe:
|
||||
warnings.warn('The "nb_worker" argument is deprecated when pickle_safe is False')
|
||||
nb_worker = 1 # For backward compatibility
|
||||
if 'show_accuracy' in kwargs:
|
||||
kwargs.pop('show_accuracy')
|
||||
warnings.warn('The "show_accuracy" argument is deprecated, '
|
||||
@@ -690,9 +888,11 @@ class Sequential(Model):
|
||||
str(kwargs))
|
||||
return self.model.evaluate_generator(generator,
|
||||
val_samples,
|
||||
max_q_size=max_q_size)
|
||||
max_q_size=max_q_size,
|
||||
nb_worker=nb_worker,
|
||||
pickle_safe=pickle_safe)
|
||||
|
||||
def predict_generator(self, generator, val_samples, max_q_size=10):
|
||||
def predict_generator(self, generator, val_samples, max_q_size=10, nb_worker=1, pickle_safe=False):
|
||||
'''Generates predictions for the input samples from a data generator.
|
||||
The generator should return the same kind of data as accepted by
|
||||
`predict_on_batch`.
|
||||
@@ -702,14 +902,24 @@ class Sequential(Model):
|
||||
val_samples: total number of samples to generate from `generator`
|
||||
before returning.
|
||||
max_q_size: maximum size for the generator queue
|
||||
nb_worker: maximum number of processes to spin up
|
||||
pickle_safe: if True, use process based threading. Note that because
|
||||
this implementation relies on multiprocessing, you should not pass non
|
||||
non picklable arguments to the generator as they can't be passed
|
||||
easily to children processes.
|
||||
|
||||
# Returns
|
||||
A Numpy array of predictions.
|
||||
'''
|
||||
if self.model is None:
|
||||
self.build()
|
||||
if nb_worker > 1 and not pickle_safe:
|
||||
warnings.warn('The "nb_worker" argument is deprecated when pickle_safe is False')
|
||||
nb_worker = 1 # For backward compatibility
|
||||
return self.model.predict_generator(generator, val_samples,
|
||||
max_q_size=max_q_size)
|
||||
max_q_size=max_q_size,
|
||||
nb_worker=nb_worker,
|
||||
pickle_safe=pickle_safe)
|
||||
|
||||
def get_config(self):
|
||||
'''Returns the model configuration
|
||||
|
||||
+83
-60
@@ -1,6 +1,5 @@
|
||||
from __future__ import absolute_import
|
||||
from . import backend as K
|
||||
import numpy as np
|
||||
from .utils.generic_utils import get_from_module
|
||||
from six.moves import zip
|
||||
|
||||
@@ -11,8 +10,24 @@ def clip_norm(g, c, n):
|
||||
return g
|
||||
|
||||
|
||||
def kl_divergence(p, p_hat):
|
||||
return p_hat - p + p * K.log(p / p_hat)
|
||||
def optimizer_from_config(config, custom_objects={}):
|
||||
all_classes = {
|
||||
'sgd': SGD,
|
||||
'rmsprop': RMSprop,
|
||||
'adagrad': Adagrad,
|
||||
'adadelta': Adadelta,
|
||||
'adam': Adam,
|
||||
'adamax': Adamax,
|
||||
'nadam': Nadam,
|
||||
}
|
||||
class_name = config['class_name']
|
||||
if class_name in custom_objects:
|
||||
cls = custom_objects[class_name]
|
||||
else:
|
||||
if class_name.lower() not in all_classes:
|
||||
raise ValueError('Optimizer class not found:', class_name)
|
||||
cls = all_classes[class_name.lower()]
|
||||
return cls.from_config(config['config'])
|
||||
|
||||
|
||||
class Optimizer(object):
|
||||
@@ -72,35 +87,35 @@ class Optimizer(object):
|
||||
output of `get_weights`).
|
||||
'''
|
||||
params = self.weights
|
||||
if len(params) != len(weights):
|
||||
raise Exception('Provided weight array does not match weights (' +
|
||||
str(len(params)) + ' optimizer params vs. ' +
|
||||
str(len(weights)) + ' provided weights)')
|
||||
for p, w in zip(params, weights):
|
||||
if K.get_value(p).shape != w.shape:
|
||||
weight_value_tuples = []
|
||||
param_values = K.batch_get_value(params)
|
||||
for pv, p, w in zip(param_values, params, weights):
|
||||
if pv.shape != w.shape:
|
||||
raise Exception('Optimizer weight shape ' +
|
||||
str(K.get_value(p).shape) +
|
||||
str(pv.shape) +
|
||||
' not compatible with '
|
||||
'provided weight shape ' + str(w.shape))
|
||||
K.set_value(p, w)
|
||||
weight_value_tuples.append((p, w))
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
|
||||
def get_weights(self):
|
||||
'''Returns the current weights of the optimizer,
|
||||
as a list of numpy arrays.
|
||||
'''
|
||||
weights = []
|
||||
for p in self.weights:
|
||||
weights.append(K.get_value(p))
|
||||
return weights
|
||||
return K.batch_get_value(self.weights)
|
||||
|
||||
def get_config(self):
|
||||
config = {'name': self.__class__.__name__}
|
||||
config = {}
|
||||
if hasattr(self, 'clipnorm'):
|
||||
config['clipnorm'] = self.clipnorm
|
||||
if hasattr(self, 'clipvalue'):
|
||||
config['clipvalue'] = self.clipvalue
|
||||
return config
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config):
|
||||
return cls(**config)
|
||||
|
||||
|
||||
class SGD(Optimizer):
|
||||
'''Stochastic gradient descent, with support for momentum,
|
||||
@@ -124,13 +139,15 @@ class SGD(Optimizer):
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
lr = self.lr * (1. / (1. + self.decay * self.iterations))
|
||||
self.updates = [(self.iterations, self.iterations + 1.)]
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
# momentum
|
||||
self.weights = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
for p, g, m in zip(params, grads, self.weights):
|
||||
shapes = [x.shape for x in K.batch_get_value(params)]
|
||||
moments = [K.zeros(shape) for shape in shapes]
|
||||
self.weights = [self.iterations] + moments
|
||||
for p, g, m in zip(params, grads, moments):
|
||||
v = self.momentum * m - lr * g # velocity
|
||||
self.updates.append((m, v))
|
||||
self.updates.append(K.update(m, v))
|
||||
|
||||
if self.nesterov:
|
||||
new_p = p + self.momentum * v - lr * g
|
||||
@@ -141,7 +158,8 @@ class SGD(Optimizer):
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
|
||||
self.updates.append(K.update(p, new_p))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
@@ -176,21 +194,22 @@ class RMSprop(Optimizer):
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
# accumulators
|
||||
self.weights = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
shapes = [x.shape for x in K.batch_get_value(params)]
|
||||
accumulators = [K.zeros(shape) for shape in shapes]
|
||||
self.weights = accumulators
|
||||
self.updates = []
|
||||
|
||||
for p, g, a in zip(params, grads, self.weights):
|
||||
for p, g, a in zip(params, grads, accumulators):
|
||||
# update accumulator
|
||||
new_a = self.rho * a + (1. - self.rho) * K.square(g)
|
||||
self.updates.append((a, new_a))
|
||||
self.updates.append(K.update(a, new_a))
|
||||
new_p = p - self.lr * g / (K.sqrt(new_a) + self.epsilon)
|
||||
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
self.updates.append(K.update(p, new_p))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
@@ -218,19 +237,20 @@ class Adagrad(Optimizer):
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
# accumulators
|
||||
self.weights = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
shapes = [x.shape for x in K.batch_get_value(params)]
|
||||
accumulators = [K.zeros(shape) for shape in shapes]
|
||||
self.weights = accumulators
|
||||
self.updates = []
|
||||
|
||||
for p, g, a in zip(params, grads, self.weights):
|
||||
for p, g, a in zip(params, grads, accumulators):
|
||||
new_a = a + K.square(g) # update accumulator
|
||||
self.updates.append((a, new_a))
|
||||
self.updates.append(K.update(a, new_a))
|
||||
new_p = p - self.lr * g / (K.sqrt(new_a) + self.epsilon)
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
self.updates.append(K.update(p, new_p))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
@@ -262,15 +282,16 @@ class Adadelta(Optimizer):
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
accumulators = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
delta_accumulators = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
shapes = [x.shape for x in K.batch_get_value(params)]
|
||||
accumulators = [K.zeros(shape) for shape in shapes]
|
||||
delta_accumulators = [K.zeros(shape) for shape in shapes]
|
||||
self.weights = accumulators + delta_accumulators
|
||||
self.updates = []
|
||||
|
||||
for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
|
||||
# update accumulator
|
||||
new_a = self.rho * a + (1. - self.rho) * K.square(g)
|
||||
self.updates.append((a, new_a))
|
||||
self.updates.append(K.update(a, new_a))
|
||||
|
||||
# use the new accumulator and the *old* delta_accumulator
|
||||
update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon)
|
||||
@@ -280,11 +301,11 @@ class Adadelta(Optimizer):
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
self.updates.append(K.update(p, new_p))
|
||||
|
||||
# update delta_accumulator
|
||||
new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update)
|
||||
self.updates.append((d_a, new_d_a))
|
||||
self.updates.append(K.update(d_a, new_d_a))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
@@ -319,29 +340,30 @@ class Adam(Optimizer):
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = [(self.iterations, self.iterations + 1)]
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
t = self.iterations + 1
|
||||
lr_t = self.lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))
|
||||
|
||||
ms = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
vs = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
self.weights = ms + vs
|
||||
shapes = [x.shape for x in K.batch_get_value(params)]
|
||||
ms = [K.zeros(shape) for shape in shapes]
|
||||
vs = [K.zeros(shape) for shape in shapes]
|
||||
self.weights = [self.iterations] + ms + vs
|
||||
|
||||
for p, g, m, v in zip(params, grads, ms, vs):
|
||||
m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
|
||||
v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
|
||||
p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
|
||||
|
||||
self.updates.append((m, m_t))
|
||||
self.updates.append((v, v_t))
|
||||
self.updates.append(K.update(m, m_t))
|
||||
self.updates.append(K.update(v, v_t))
|
||||
|
||||
new_p = p_t
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
self.updates.append(K.update(p, new_p))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
@@ -378,16 +400,17 @@ class Adamax(Optimizer):
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = [(self.iterations, self.iterations + 1)]
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
t = self.iterations + 1
|
||||
lr_t = self.lr / (1. - K.pow(self.beta_1, t))
|
||||
|
||||
shapes = [x.shape for x in K.batch_get_value(params)]
|
||||
# zero init of 1st moment
|
||||
ms = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
ms = [K.zeros(shape) for shape in shapes]
|
||||
# zero init of exponentially weighted infinity norm
|
||||
us = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
self.weights = ms + us
|
||||
us = [K.zeros(shape) for shape in shapes]
|
||||
self.weights = [self.iterations] + ms + us
|
||||
|
||||
for p, g, m, u in zip(params, grads, ms, us):
|
||||
|
||||
@@ -395,15 +418,15 @@ class Adamax(Optimizer):
|
||||
u_t = K.maximum(self.beta_2 * u, K.abs(g))
|
||||
p_t = p - lr_t * m_t / (u_t + self.epsilon)
|
||||
|
||||
self.updates.append((m, m_t))
|
||||
self.updates.append((u, u_t))
|
||||
self.updates.append(K.update(m, m_t))
|
||||
self.updates.append(K.update(u, u_t))
|
||||
|
||||
new_p = p_t
|
||||
# apply constraints
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
self.updates.append(K.update(p, new_p))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
@@ -430,9 +453,8 @@ class Nadam(Optimizer):
|
||||
epsilon: float >= 0. Fuzz factor.
|
||||
|
||||
# References
|
||||
[1] Nadam report - http://cs229.stanford.edu/proj2015/054_report.pdf
|
||||
[2] On the importance of initialization and momentum in deep learning -
|
||||
http://www.cs.toronto.edu/~fritz/absps/momentum.pdf
|
||||
- [Nadam report](http://cs229.stanford.edu/proj2015/054_report.pdf)
|
||||
- [On the importance of initialization and momentum in deep learning](http://www.cs.toronto.edu/~fritz/absps/momentum.pdf)
|
||||
'''
|
||||
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
|
||||
epsilon=1e-8, schedule_decay=0.004, **kwargs):
|
||||
@@ -447,7 +469,7 @@ class Nadam(Optimizer):
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = [(self.iterations, self.iterations + 1)]
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
t = self.iterations + 1
|
||||
|
||||
@@ -458,10 +480,11 @@ class Nadam(Optimizer):
|
||||
m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
|
||||
self.updates.append((self.m_schedule, m_schedule_new))
|
||||
|
||||
ms = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
vs = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
|
||||
shapes = [x.shape for x in K.batch_get_value(params)]
|
||||
ms = [K.zeros(shape) for shape in shapes]
|
||||
vs = [K.zeros(shape) for shape in shapes]
|
||||
|
||||
self.weights = ms + vs
|
||||
self.weights = [self.iterations] + ms + vs
|
||||
|
||||
for p, g, m, v in zip(params, grads, ms, vs):
|
||||
# the following equations given in [1]
|
||||
@@ -472,8 +495,8 @@ class Nadam(Optimizer):
|
||||
v_t_prime = v_t / (1. - K.pow(self.beta_2, t))
|
||||
m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime
|
||||
|
||||
self.updates.append((m, m_t))
|
||||
self.updates.append((v, v_t))
|
||||
self.updates.append(K.update(m, m_t))
|
||||
self.updates.append(K.update(v, v_t))
|
||||
|
||||
p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon)
|
||||
new_p = p_t
|
||||
@@ -482,7 +505,7 @@ class Nadam(Optimizer):
|
||||
if p in constraints:
|
||||
c = constraints[p]
|
||||
new_p = c(new_p)
|
||||
self.updates.append((p, new_p))
|
||||
self.updates.append(K.update(p, new_p))
|
||||
return self.updates
|
||||
|
||||
def get_config(self):
|
||||
|
||||
@@ -118,13 +118,17 @@ def flip_axis(x, axis):
|
||||
return x
|
||||
|
||||
|
||||
def array_to_img(x, dim_ordering=K.image_dim_ordering(), scale=True):
|
||||
def array_to_img(x, dim_ordering='default', scale=True):
|
||||
from PIL import Image
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering == 'th':
|
||||
x = x.transpose(1, 2, 0)
|
||||
if scale:
|
||||
x += max(-np.min(x), 0)
|
||||
x /= np.max(x)
|
||||
x_max = np.max(x)
|
||||
if x_max != 0:
|
||||
x /= x_max
|
||||
x *= 255
|
||||
if x.shape[2] == 3:
|
||||
# RGB
|
||||
@@ -136,7 +140,9 @@ def array_to_img(x, dim_ordering=K.image_dim_ordering(), scale=True):
|
||||
raise Exception('Unsupported channel number: ', x.shape[2])
|
||||
|
||||
|
||||
def img_to_array(img, dim_ordering=K.image_dim_ordering()):
|
||||
def img_to_array(img, dim_ordering='default'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
if dim_ordering not in ['th', 'tf']:
|
||||
raise Exception('Unknown dim_ordering: ', dim_ordering)
|
||||
# image has dim_ordering (height, width, channel)
|
||||
@@ -162,7 +168,7 @@ def load_img(path, grayscale=False, target_size=None):
|
||||
else: # Ensure 3 channel even when loaded image is grayscale
|
||||
img = img.convert('RGB')
|
||||
if target_size:
|
||||
img = img.resize(target_size)
|
||||
img = img.resize((target_size[1], target_size[0]))
|
||||
return img
|
||||
|
||||
|
||||
@@ -222,7 +228,9 @@ class ImageDataGenerator(object):
|
||||
horizontal_flip=False,
|
||||
vertical_flip=False,
|
||||
rescale=None,
|
||||
dim_ordering=K.image_dim_ordering()):
|
||||
dim_ordering='default'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.__dict__.update(locals())
|
||||
self.mean = None
|
||||
self.std = None
|
||||
@@ -446,12 +454,14 @@ class NumpyArrayIterator(Iterator):
|
||||
|
||||
def __init__(self, X, y, image_data_generator,
|
||||
batch_size=32, shuffle=False, seed=None,
|
||||
dim_ordering=K.image_dim_ordering(),
|
||||
dim_ordering='default',
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
if y is not None and len(X) != len(y):
|
||||
raise Exception('X (images tensor) and y (labels) '
|
||||
'should have the same length. '
|
||||
'Found: X.shape = %s, y.shape = %s' % (np.asarray(X).shape, np.asarray(y).shape))
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.X = X
|
||||
self.y = y
|
||||
self.image_data_generator = image_data_generator
|
||||
@@ -493,10 +503,12 @@ class DirectoryIterator(Iterator):
|
||||
|
||||
def __init__(self, directory, image_data_generator,
|
||||
target_size=(256, 256), color_mode='rgb',
|
||||
dim_ordering=K.image_dim_ordering,
|
||||
dim_ordering='default',
|
||||
classes=None, class_mode='categorical',
|
||||
batch_size=32, shuffle=True, seed=None,
|
||||
save_to_dir=None, save_prefix='', save_format='jpeg'):
|
||||
if dim_ordering == 'default':
|
||||
dim_ordering = K.image_dim_ordering()
|
||||
self.directory = directory
|
||||
self.image_data_generator = image_data_generator
|
||||
self.target_size = tuple(target_size)
|
||||
|
||||
@@ -99,6 +99,7 @@ class Tokenizer(object):
|
||||
wcounts = list(self.word_counts.items())
|
||||
wcounts.sort(key=lambda x: x[1], reverse=True)
|
||||
sorted_voc = [wc[0] for wc in wcounts]
|
||||
# note that index 0 is reserved, never assigned to an existing word
|
||||
self.word_index = dict(list(zip(sorted_voc, list(range(1, len(sorted_voc) + 1)))))
|
||||
|
||||
self.index_docs = {}
|
||||
|
||||
+22
-19
@@ -1,5 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
from . import backend as K
|
||||
|
||||
|
||||
@@ -18,11 +17,11 @@ class Regularizer(object):
|
||||
|
||||
|
||||
class EigenvalueRegularizer(Regularizer):
|
||||
'''This takes a constant that controls the
|
||||
regularization by Eigenvalue Decay on the
|
||||
current layer and outputs the regularized
|
||||
loss (evaluated on the training data) and
|
||||
the original loss (evaluated on the
|
||||
'''This takes a constant that controls
|
||||
the regularization by Eigenvalue Decay on the
|
||||
current layer and outputs the regularized
|
||||
loss (evaluated on the training data) and
|
||||
the original loss (evaluated on the
|
||||
validation data).
|
||||
'''
|
||||
def __init__(self, k):
|
||||
@@ -41,19 +40,18 @@ class EigenvalueRegularizer(Regularizer):
|
||||
'and embedding layers.')
|
||||
WW = K.dot(K.transpose(W), W)
|
||||
dim1, dim2 = K.eval(K.shape(WW)) # number of neurons in the layer
|
||||
k = self.k
|
||||
|
||||
|
||||
# power method for approximating the dominant eigenvector:
|
||||
o = K.ones([dim1, 1]) # initial values for the dominant eigenvector
|
||||
domin_eigenvect = K.dot(WW, o)
|
||||
main_eigenvect = K.dot(WW, o)
|
||||
for n in range(power - 1):
|
||||
domin_eigenvect = K.dot(WW, domin_eigenvect)
|
||||
|
||||
WWd = K.dot(WW, domin_eigenvect)
|
||||
|
||||
main_eigenvect = K.dot(WW, main_eigenvect)
|
||||
|
||||
WWd = K.dot(WW, main_eigenvect)
|
||||
|
||||
# the corresponding dominant eigenvalue:
|
||||
domin_eigenval = K.dot(K.transpose(WWd), domin_eigenvect) / K.dot(K.transpose(domin_eigenvect), domin_eigenvect)
|
||||
regularized_loss = loss + (domin_eigenval ** 0.5) * self.k # multiplied by the given regularization gain
|
||||
main_eigenval = K.dot(K.transpose(WWd), main_eigenvect) / K.dot(K.transpose(main_eigenvect), main_eigenvect)
|
||||
regularized_loss = loss + (main_eigenval ** 0.5) * self.k # multiplied by the given regularization gain
|
||||
|
||||
return K.in_train_phase(regularized_loss[0, 0], loss)
|
||||
|
||||
@@ -77,8 +75,11 @@ class WeightRegularizer(Regularizer):
|
||||
'ActivityRegularizer '
|
||||
'(i.e. activity_regularizer="l2" instead '
|
||||
'of activity_regularizer="activity_l2".')
|
||||
regularized_loss = loss + K.sum(K.abs(self.p)) * self.l1
|
||||
regularized_loss += K.sum(K.square(self.p)) * self.l2
|
||||
regularized_loss = loss
|
||||
if self.l1:
|
||||
regularized_loss += K.sum(self.l1 * K.abs(self.p))
|
||||
if self.l2:
|
||||
regularized_loss += K.sum(self.l2 * K.square(self.p))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
|
||||
def get_config(self):
|
||||
@@ -104,8 +105,10 @@ class ActivityRegularizer(Regularizer):
|
||||
regularized_loss = loss
|
||||
for i in range(len(self.layer.inbound_nodes)):
|
||||
output = self.layer.get_output_at(i)
|
||||
regularized_loss += self.l1 * K.sum(K.mean(K.abs(output), axis=0))
|
||||
regularized_loss += self.l2 * K.sum(K.mean(K.square(output), axis=0))
|
||||
if self.l1:
|
||||
regularized_loss += K.sum(self.l1 * K.abs(output))
|
||||
if self.l2:
|
||||
regularized_loss += K.sum(self.l2 * K.square(output))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
|
||||
def get_config(self):
|
||||
|
||||
@@ -5,6 +5,7 @@ import tarfile
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import hashlib
|
||||
from six.moves.urllib.request import urlopen
|
||||
from six.moves.urllib.error import URLError, HTTPError
|
||||
|
||||
@@ -36,11 +37,12 @@ else:
|
||||
from six.moves.urllib.request import urlretrieve
|
||||
|
||||
|
||||
def get_file(fname, origin, untar=False):
|
||||
def get_file(fname, origin, untar=False,
|
||||
md5_hash=None, cache_subdir='datasets'):
|
||||
datadir_base = os.path.expanduser(os.path.join('~', '.keras'))
|
||||
if not os.access(datadir_base, os.W_OK):
|
||||
datadir_base = os.path.join('/tmp', '.keras')
|
||||
datadir = os.path.join(datadir_base, 'datasets')
|
||||
datadir = os.path.join(datadir_base, cache_subdir)
|
||||
if not os.path.exists(datadir):
|
||||
os.makedirs(datadir)
|
||||
|
||||
@@ -50,7 +52,18 @@ def get_file(fname, origin, untar=False):
|
||||
else:
|
||||
fpath = os.path.join(datadir, fname)
|
||||
|
||||
if not os.path.exists(fpath):
|
||||
download = False
|
||||
if os.path.exists(fpath):
|
||||
# file found; verify integrity if a hash was provided
|
||||
if md5_hash is not None:
|
||||
if not validate_file(fpath, md5_hash):
|
||||
print('A local file was found, but it seems to be '
|
||||
'incomplete or outdated.')
|
||||
download = True
|
||||
else:
|
||||
download = True
|
||||
|
||||
if download:
|
||||
print('Downloading data from', origin)
|
||||
global progbar
|
||||
progbar = None
|
||||
@@ -93,3 +106,14 @@ def get_file(fname, origin, untar=False):
|
||||
return untar_fpath
|
||||
|
||||
return fpath
|
||||
|
||||
|
||||
def validate_file(fpath, md5_hash):
|
||||
hasher = hashlib.md5()
|
||||
with open(fpath, 'rb') as f:
|
||||
buf = f.read()
|
||||
hasher.update(buf)
|
||||
if str(hasher.hexdigest()) == str(md5_hash):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import h5py
|
||||
import numpy as np
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
@@ -69,3 +71,17 @@ def load_array(name):
|
||||
a[:] = array[:]
|
||||
f.close()
|
||||
return a
|
||||
|
||||
|
||||
def ask_to_proceed_with_overwrite(filepath):
|
||||
get_input = input
|
||||
if sys.version_info[:2] <= (2, 7):
|
||||
get_input = raw_input
|
||||
overwrite = get_input('[WARNING] %s already exists - overwrite? '
|
||||
'[y/n]' % (filepath))
|
||||
while overwrite not in ['y', 'n']:
|
||||
overwrite = get_input('Enter "y" (overwrite) or "n" (cancel).')
|
||||
if overwrite == 'n':
|
||||
return False
|
||||
print('[TIP] Next time specify overwrite=True!')
|
||||
return True
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import print_function
|
||||
|
||||
from .generic_utils import get_from_module
|
||||
from .np_utils import convert_kernel
|
||||
from ..layers import *
|
||||
from ..models import Model, Sequential, Graph
|
||||
from .. import backend as K
|
||||
@@ -97,3 +98,22 @@ def print_summary(layers, relevant_nodes=None, line_length=100, positions=[.33,
|
||||
|
||||
print('Total params: %s' % total_params)
|
||||
print('_' * line_length)
|
||||
|
||||
|
||||
def convert_all_kernels_in_model(model):
|
||||
# Note: SeparableConvolution not included
|
||||
# since only supported by TF.
|
||||
conv_classes = {
|
||||
'Convolution1D',
|
||||
'Convolution2D',
|
||||
'Convolution3D',
|
||||
'AtrousConvolution2D',
|
||||
'Deconvolution2D',
|
||||
}
|
||||
to_assign = []
|
||||
for layer in model.layers:
|
||||
if layer.__class__.__name__ in conv_classes:
|
||||
original_w = K.get_value(layer.W)
|
||||
converted_w = convert_kernel(original_w)
|
||||
to_assign.append((layer.W, converted_w))
|
||||
K.batch_set_value(to_assign)
|
||||
|
||||
+70
-13
@@ -59,18 +59,75 @@ def convert_kernel(kernel, dim_ordering='th'):
|
||||
is its own inverse).
|
||||
'''
|
||||
new_kernel = np.copy(kernel)
|
||||
if dim_ordering == 'th':
|
||||
w = kernel.shape[2]
|
||||
h = kernel.shape[3]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
new_kernel[:, :, i, j] = kernel[:, :, w - i - 1, h - j - 1]
|
||||
elif dim_ordering == 'tf':
|
||||
w = kernel.shape[0]
|
||||
h = kernel.shape[1]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
new_kernel[i, j, :, :] = kernel[w - i - 1, h - j - 1, :, :]
|
||||
if kernel.ndim == 4:
|
||||
# conv 2d
|
||||
# TH kernel shape: (depth, input_depth, rows, cols)
|
||||
# TF kernel shape: (rows, cols, input_depth, depth)
|
||||
if dim_ordering == 'th':
|
||||
w = kernel.shape[2]
|
||||
h = kernel.shape[3]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
new_kernel[:, :, i, j] = kernel[:, :, w - i - 1, h - j - 1]
|
||||
elif dim_ordering == 'tf':
|
||||
w = kernel.shape[0]
|
||||
h = kernel.shape[1]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
new_kernel[i, j, :, :] = kernel[w - i - 1, h - j - 1, :, :]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + str(dim_ordering))
|
||||
elif kernel.ndim == 5:
|
||||
# conv 3d
|
||||
# TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3)
|
||||
# TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth)
|
||||
if dim_ordering == 'th':
|
||||
w = kernel.shape[2]
|
||||
h = kernel.shape[3]
|
||||
z = kernel.shape[4]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
for k in range(z):
|
||||
new_kernel[:, :, i, j, k] = kernel[:, :,
|
||||
w - i - 1,
|
||||
h - j - 1,
|
||||
z - k - 1]
|
||||
elif dim_ordering == 'tf':
|
||||
w = kernel.shape[0]
|
||||
h = kernel.shape[1]
|
||||
z = kernel.shape[2]
|
||||
for i in range(w):
|
||||
for j in range(h):
|
||||
for k in range(z):
|
||||
new_kernel[i, j, k, :, :] = kernel[w - i - 1,
|
||||
h - j - 1,
|
||||
z - k - 1,
|
||||
:, :]
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + str(dim_ordering))
|
||||
else:
|
||||
raise Exception('Invalid dim_ordering: ' + str(dim_ordering))
|
||||
raise ValueError('Invalid kernel shape:', kernel.shape)
|
||||
return new_kernel
|
||||
|
||||
|
||||
def conv_output_length(input_length, filter_size, border_mode, stride, dilation=1):
|
||||
if input_length is None:
|
||||
return None
|
||||
assert border_mode in {'same', 'valid'}
|
||||
dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1)
|
||||
if border_mode == 'same':
|
||||
output_length = input_length
|
||||
elif border_mode == 'valid':
|
||||
output_length = input_length - dilated_filter_size + 1
|
||||
return (output_length + stride - 1) // stride
|
||||
|
||||
|
||||
def conv_input_length(output_length, filter_size, border_mode, stride):
|
||||
if output_length is None:
|
||||
return None
|
||||
assert border_mode in {'same', 'valid'}
|
||||
if border_mode == 'same':
|
||||
pad = filter_size // 2
|
||||
elif border_mode == 'valid':
|
||||
pad = 0
|
||||
return (output_length - 1) * stride - 2 * pad + filter_size
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import inspect
|
||||
import functools
|
||||
|
||||
from ..engine import Model, Input
|
||||
from ..models import Sequential, model_from_json
|
||||
@@ -35,7 +36,8 @@ def get_test_data(nb_train=1000, nb_test=500, input_shape=(10,),
|
||||
|
||||
|
||||
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
input_data=None, expected_output=None, expected_output_dtype=None):
|
||||
input_data=None, expected_output=None,
|
||||
expected_output_dtype=None, fixed_batch_size=False):
|
||||
'''Test routine for a layer with a single input tensor
|
||||
and single output tensor.
|
||||
'''
|
||||
@@ -63,7 +65,10 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
layer = layer_cls(**kwargs)
|
||||
|
||||
# test in functional API
|
||||
x = Input(shape=input_shape[1:], dtype=input_dtype)
|
||||
if fixed_batch_size:
|
||||
x = Input(batch_shape=input_shape, dtype=input_dtype)
|
||||
else:
|
||||
x = Input(shape=input_shape[1:], dtype=input_dtype)
|
||||
y = layer(x)
|
||||
assert K.dtype(y) == expected_output_dtype
|
||||
|
||||
@@ -102,3 +107,15 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
|
||||
|
||||
# for further checks in the caller function
|
||||
return actual_output
|
||||
|
||||
|
||||
def keras_test(func):
|
||||
'''Clean up after tensorflow tests.
|
||||
'''
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
output = func(*args, **kwargs)
|
||||
if K._BACKEND == 'tensorflow':
|
||||
K.clear_session()
|
||||
return output
|
||||
return wrapper
|
||||
|
||||
+2
-2
@@ -3,12 +3,12 @@ from setuptools import find_packages
|
||||
|
||||
|
||||
setup(name='Keras',
|
||||
version='1.0.5',
|
||||
version='1.0.7',
|
||||
description='Deep Learning for Python',
|
||||
author='Francois Chollet',
|
||||
author_email='francois.chollet@gmail.com',
|
||||
url='https://github.com/fchollet/keras',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.0.5',
|
||||
download_url='https://github.com/fchollet/keras/tarball/1.0.7',
|
||||
license='MIT',
|
||||
install_requires=['theano', 'pyyaml', 'six'],
|
||||
extras_require={
|
||||
|
||||
@@ -2,13 +2,14 @@ from __future__ import print_function
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras.utils.test_utils import get_test_data, keras_test
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Flatten, Activation
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.utils.np_utils import to_categorical
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_image_classification():
|
||||
'''
|
||||
Classify random 16x16 color images into several classes using logistic regression
|
||||
|
||||
@@ -3,7 +3,7 @@ import numpy as np
|
||||
import pytest
|
||||
import string
|
||||
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras.utils.test_utils import get_test_data, keras_test
|
||||
from keras.utils.np_utils import to_categorical
|
||||
from keras.models import Sequential
|
||||
from keras.layers import TimeDistributedDense
|
||||
@@ -14,6 +14,7 @@ from keras.layers import LSTM
|
||||
from keras.layers import Embedding
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_temporal_classification():
|
||||
'''
|
||||
Classify temporal sequences of float numbers
|
||||
@@ -43,6 +44,7 @@ def test_temporal_classification():
|
||||
assert(history.history['val_acc'][-1] >= 0.85)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_temporal_regression():
|
||||
'''
|
||||
Predict float numbers (regression) based on sequences
|
||||
@@ -63,6 +65,7 @@ def test_temporal_regression():
|
||||
assert(history.history['val_loss'][-1] < 0.75)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequence_to_sequence():
|
||||
'''
|
||||
Apply a same Dense layer for each element of time dimension of the input
|
||||
@@ -86,6 +89,7 @@ def test_sequence_to_sequence():
|
||||
assert(history.history['val_loss'][-1] < 0.8)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_stacked_lstm_char_prediction():
|
||||
'''
|
||||
Learn alphabetical char sequence with stacked LSTM.
|
||||
@@ -135,6 +139,7 @@ def test_stacked_lstm_char_prediction():
|
||||
assert(generated == alphabet)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_masked_temporal():
|
||||
'''
|
||||
Confirm that even with masking on both inputs and outputs, cross-entropies are
|
||||
@@ -182,5 +187,4 @@ def test_masked_temporal():
|
||||
assert(np.abs(history.history['val_loss'][-1] - ground_truth) < 0.06)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pytest.main([__file__])
|
||||
test_temporal_classification()
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -2,12 +2,13 @@ from __future__ import print_function
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras.utils.test_utils import get_test_data, keras_test
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense
|
||||
from keras.utils.np_utils import to_categorical
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_vector_classification():
|
||||
'''
|
||||
Classify random float vectors into 2 classes with logistic regression
|
||||
@@ -37,6 +38,7 @@ def test_vector_classification():
|
||||
assert(history.history['val_acc'][-1] > 0.8)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_vector_regression():
|
||||
'''
|
||||
Perform float data prediction (regression) using 2 layer MLP
|
||||
|
||||
@@ -38,6 +38,7 @@ def check_two_tensor_operation(function_name, x_input_shape,
|
||||
assert zth.shape == ztf.shape
|
||||
assert_allclose(zth, ztf, atol=1e-05)
|
||||
|
||||
|
||||
def check_composed_tensor_operations(first_function_name, first_function_args,
|
||||
second_function_name, second_function_args,
|
||||
input_shape):
|
||||
@@ -57,7 +58,8 @@ def check_composed_tensor_operations(first_function_name, first_function_args,
|
||||
ztf = KTF.eval(getattr(KTF, second_function_name)(ytf, **second_function_args))
|
||||
|
||||
assert zth.shape == ztf.shape
|
||||
assert_allclose(zth, ztf, atol=1e-05)
|
||||
assert_allclose(zth, ztf, atol=1e-05)
|
||||
|
||||
|
||||
class TestBackend(object):
|
||||
|
||||
@@ -90,8 +92,8 @@ class TestBackend(object):
|
||||
check_single_tensor_operation('expand_dims', (4, 3), dim=-1)
|
||||
check_single_tensor_operation('expand_dims', (4, 3, 2), dim=1)
|
||||
check_single_tensor_operation('squeeze', (4, 3, 1), axis=2)
|
||||
check_composed_tensor_operations('reshape', {'shape':(4,3,1,1)},
|
||||
'squeeze', {'axis':2},
|
||||
check_composed_tensor_operations('reshape', {'shape':(4,3,1,1)},
|
||||
'squeeze', {'axis':2},
|
||||
(4, 3, 1, 1))
|
||||
|
||||
def test_repeat_elements(self):
|
||||
@@ -149,6 +151,12 @@ class TestBackend(object):
|
||||
# count_params
|
||||
assert KTH.count_params(xth) == KTF.count_params(xtf)
|
||||
|
||||
# print_tensor
|
||||
check_single_tensor_operation('print_tensor', ())
|
||||
check_single_tensor_operation('print_tensor', (2,))
|
||||
check_single_tensor_operation('print_tensor', (4, 3))
|
||||
check_single_tensor_operation('print_tensor', (1, 2, 3))
|
||||
|
||||
def test_elementwise_operations(self):
|
||||
check_single_tensor_operation('max', (4, 2))
|
||||
check_single_tensor_operation('max', (4, 2), axis=1, keepdims=True)
|
||||
@@ -196,6 +204,11 @@ class TestBackend(object):
|
||||
|
||||
# two-tensor ops
|
||||
check_two_tensor_operation('equal', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('not_equal', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('greater', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('greater_equal', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('lesser', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('lesser_equal', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('maximum', (4, 2), (4, 2))
|
||||
check_two_tensor_operation('minimum', (4, 2), (4, 2))
|
||||
|
||||
@@ -208,14 +221,24 @@ class TestBackend(object):
|
||||
exptf = xtf * KTF.exp(xtf)
|
||||
lossth = KTH.sum(expth)
|
||||
losstf = KTF.sum(exptf)
|
||||
zero_lossth = KTH.stop_gradient(lossth)
|
||||
zero_losstf = KTF.stop_gradient(losstf)
|
||||
|
||||
gradth = KTH.gradients(lossth, [expth])
|
||||
gradtf = KTF.gradients(losstf, [exptf])
|
||||
zero_gradth = KTH.gradients(lossth + zero_lossth, [expth])
|
||||
zero_gradtf = KTF.gradients(losstf + zero_losstf, [exptf])
|
||||
|
||||
zth = KTH.eval(gradth[0])
|
||||
ztf = KTF.eval(gradtf[0])
|
||||
zero_zth = KTH.eval(zero_gradth[0])
|
||||
zero_ztf = KTF.eval(zero_gradtf[0])
|
||||
assert zth.shape == ztf.shape
|
||||
assert zero_zth.shape == zero_ztf.shape
|
||||
assert_allclose(zth, ztf, atol=1e-05)
|
||||
assert_allclose(zero_zth, zero_ztf, atol=1e-05)
|
||||
assert_allclose(zero_zth, zth, atol=1e-05)
|
||||
assert_allclose(zero_ztf, ztf, atol=1e-05)
|
||||
|
||||
def test_function(self):
|
||||
val = np.random.random((4, 2))
|
||||
@@ -450,6 +473,51 @@ class TestBackend(object):
|
||||
assert zth.shape == ztf.shape
|
||||
assert_allclose(zth, ztf, atol=1e-05)
|
||||
|
||||
def test_conv3d(self):
|
||||
# TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3)
|
||||
# TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, input_depth)
|
||||
# TH kernel shape: (depth, input_depth, x, y, z)
|
||||
# TF kernel shape: (x, y, z, input_depth, depth)
|
||||
|
||||
# test in dim_ordering = th
|
||||
for input_shape in [(2, 3, 4, 5, 4), (2, 3, 5, 4, 6)]:
|
||||
for kernel_shape in [(4, 3, 2, 2, 2), (4, 3, 3, 2, 4)]:
|
||||
xval = np.random.random(input_shape)
|
||||
|
||||
xth = KTH.variable(xval)
|
||||
xtf = KTF.variable(xval)
|
||||
|
||||
kernel_val = np.random.random(kernel_shape) - 0.5
|
||||
|
||||
kernel_th = KTH.variable(convert_kernel(kernel_val))
|
||||
kernel_tf = KTF.variable(kernel_val)
|
||||
|
||||
zth = KTH.eval(KTH.conv3d(xth, kernel_th))
|
||||
ztf = KTF.eval(KTF.conv3d(xtf, kernel_tf))
|
||||
|
||||
assert zth.shape == ztf.shape
|
||||
assert_allclose(zth, ztf, atol=1e-05)
|
||||
|
||||
# test in dim_ordering = tf
|
||||
input_shape = (1, 2, 2, 2, 1)
|
||||
kernel_shape = (2, 2, 2, 1, 1)
|
||||
|
||||
xval = np.random.random(input_shape)
|
||||
|
||||
xth = KTH.variable(xval)
|
||||
xtf = KTF.variable(xval)
|
||||
|
||||
kernel_val = np.random.random(kernel_shape) - 0.5
|
||||
|
||||
kernel_th = KTH.variable(convert_kernel(kernel_val, dim_ordering='tf'))
|
||||
kernel_tf = KTF.variable(kernel_val)
|
||||
|
||||
zth = KTH.eval(KTH.conv3d(xth, kernel_th, dim_ordering='tf'))
|
||||
ztf = KTF.eval(KTF.conv3d(xtf, kernel_tf, dim_ordering='tf'))
|
||||
|
||||
assert zth.shape == ztf.shape
|
||||
assert_allclose(zth, ztf, atol=1e-05)
|
||||
|
||||
def test_pool2d(self):
|
||||
check_single_tensor_operation('pool2d', (5, 3, 10, 12), pool_size=(2, 2),
|
||||
strides=(1, 1), border_mode='valid')
|
||||
@@ -460,6 +528,16 @@ class TestBackend(object):
|
||||
check_single_tensor_operation('pool2d', (5, 3, 9, 11), pool_size=(2, 3),
|
||||
strides=(1, 1), border_mode='valid')
|
||||
|
||||
def test_pool3d(self):
|
||||
check_single_tensor_operation('pool3d', (5, 3, 10, 12, 5), pool_size=(2, 2, 2),
|
||||
strides=(1, 1, 1), border_mode='valid')
|
||||
|
||||
check_single_tensor_operation('pool3d', (5, 3, 9, 11, 5), pool_size=(2, 2, 2),
|
||||
strides=(1, 1, 1), border_mode='valid')
|
||||
|
||||
check_single_tensor_operation('pool3d', (5, 3, 9, 11, 5), pool_size=(2, 3, 2),
|
||||
strides=(1, 1, 1), border_mode='valid')
|
||||
|
||||
def test_random_normal(self):
|
||||
mean = 0.
|
||||
std = 1.
|
||||
@@ -502,6 +580,16 @@ class TestBackend(object):
|
||||
assert(np.max(rand) == 1)
|
||||
assert(np.min(rand) == 0)
|
||||
|
||||
def test_one_hot(self):
|
||||
input_length = 10
|
||||
nb_classes = 20
|
||||
batch_size = 30
|
||||
indices = np.random.randint(0, nb_classes, size=(batch_size, input_length))
|
||||
oh = np.eye(nb_classes)[indices]
|
||||
for K in [KTH, KTF]:
|
||||
koh = K.eval(K.one_hot(K.variable(indices, dtype='int32'), nb_classes))
|
||||
assert np.all(koh == oh)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -1,26 +1,44 @@
|
||||
from __future__ import print_function
|
||||
import pytest
|
||||
import time
|
||||
import random
|
||||
from keras.datasets import cifar10, cifar100, reuters, imdb, mnist
|
||||
|
||||
|
||||
def test_cifar():
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
|
||||
(X_train, y_train), (X_test, y_test) = cifar100.load_data('fine')
|
||||
(X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse')
|
||||
# only run data download tests 20% of the time
|
||||
# to speed up frequent testing
|
||||
random.seed(time.time())
|
||||
if random.random() > 0.8:
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
|
||||
(X_train, y_train), (X_test, y_test) = cifar100.load_data('fine')
|
||||
(X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse')
|
||||
|
||||
|
||||
def test_reuters():
|
||||
(X_train, y_train), (X_test, y_test) = reuters.load_data()
|
||||
(X_train, y_train), (X_test, y_test) = reuters.load_data(maxlen=10)
|
||||
# only run data download tests 20% of the time
|
||||
# to speed up frequent testing
|
||||
random.seed(time.time())
|
||||
if random.random() > 0.8:
|
||||
(X_train, y_train), (X_test, y_test) = reuters.load_data()
|
||||
(X_train, y_train), (X_test, y_test) = reuters.load_data(maxlen=10)
|
||||
|
||||
|
||||
def test_mnist():
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
# only run data download tests 20% of the time
|
||||
# to speed up frequent testing
|
||||
random.seed(time.time())
|
||||
if random.random() > 0.8:
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
|
||||
|
||||
def test_imdb():
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data()
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(maxlen=40)
|
||||
# only run data download tests 20% of the time
|
||||
# to speed up frequent testing
|
||||
random.seed(time.time())
|
||||
if random.random() > 0.8:
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data()
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(maxlen=40)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -2,13 +2,15 @@ import pytest
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
from keras.layers import Dense, Dropout
|
||||
from keras.layers import Dense, Dropout, InputLayer
|
||||
from keras.engine import merge, Input, get_source_inputs
|
||||
from keras.models import Model
|
||||
from keras import backend as K
|
||||
from keras.models import model_from_json, model_from_yaml
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_learning_phase():
|
||||
a = Input(shape=(32,), name='input_a')
|
||||
b = Input(shape=(32,), name='input_b')
|
||||
@@ -50,6 +52,7 @@ def test_learning_phase():
|
||||
assert fn_outputs_no_dp[1].sum() != fn_outputs_dp[1].sum()
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_node_construction():
|
||||
####################################################
|
||||
# test basics
|
||||
@@ -128,6 +131,7 @@ def test_node_construction():
|
||||
assert dense.get_output_mask_at(1) is None
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multi_input_layer():
|
||||
####################################################
|
||||
# test multi-input layer
|
||||
@@ -209,6 +213,7 @@ def test_multi_input_layer():
|
||||
assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)]
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_recursion():
|
||||
####################################################
|
||||
# test recursion
|
||||
@@ -392,7 +397,15 @@ def test_recursion():
|
||||
# test merge
|
||||
o_tf = merge([j_tf, k_tf], mode='concat', concat_axis=1)
|
||||
|
||||
# test tensor input
|
||||
x = tf.placeholder(shape=(None, 2), dtype=K.floatx())
|
||||
input_layer = InputLayer(input_tensor=x)
|
||||
|
||||
x = Input(tensor=x)
|
||||
y = Dense(2)(x)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_functional_guide():
|
||||
# MNIST
|
||||
from keras.layers import Input, Dense, LSTM
|
||||
@@ -485,6 +498,7 @@ def test_functional_guide():
|
||||
assert shared_lstm.input_shape == (None, 4, 25)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_regression():
|
||||
from keras.models import Sequential, Model
|
||||
from keras.layers import Merge, Embedding, BatchNormalization, LSTM, InputLayer, Input
|
||||
|
||||
@@ -7,8 +7,10 @@ from keras.engine.topology import merge, Input
|
||||
from keras.engine.training import Model
|
||||
from keras.models import Sequential, Graph
|
||||
from keras import backend as K
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_model_methods():
|
||||
a = Input(shape=(3,), name='input_a')
|
||||
b = Input(shape=(3,), name='input_b')
|
||||
@@ -167,6 +169,7 @@ def test_model_methods():
|
||||
out = model.predict([input_a_np, input_b_np], batch_size=4)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_trainable_argument():
|
||||
x = np.random.random((5, 3))
|
||||
y = np.random.random((5, 2))
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import pytest
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_leaky_relu():
|
||||
from keras.layers.advanced_activations import LeakyReLU
|
||||
for alpha in [0., .5, -1.]:
|
||||
@@ -9,12 +10,14 @@ def test_leaky_relu():
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_prelu():
|
||||
from keras.layers.advanced_activations import PReLU
|
||||
layer_test(PReLU, kwargs={},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_elu():
|
||||
from keras.layers.advanced_activations import ELU
|
||||
for alpha in [0., .5, -1.]:
|
||||
@@ -22,6 +25,7 @@ def test_elu():
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_parametric_softplus():
|
||||
from keras.layers.advanced_activations import ParametricSoftplus
|
||||
for alpha in [0., .5, -1.]:
|
||||
@@ -31,12 +35,14 @@ def test_parametric_softplus():
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_thresholded_relu():
|
||||
from keras.layers.advanced_activations import ThresholdedReLU
|
||||
layer_test(ThresholdedReLU, kwargs={'theta': 0.5},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_srelu():
|
||||
from keras.layers.advanced_activations import SReLU
|
||||
layer_test(SReLU, kwargs={},
|
||||
|
||||
@@ -2,17 +2,19 @@ import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
from keras.utils.np_utils import conv_input_length
|
||||
from keras import backend as K
|
||||
from keras.layers import convolutional
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_convolution_1d():
|
||||
nb_samples = 2
|
||||
nb_steps = 8
|
||||
input_dim = 5
|
||||
input_dim = 2
|
||||
filter_length = 3
|
||||
nb_filter = 4
|
||||
nb_filter = 3
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for subsample_length in [1]:
|
||||
@@ -36,6 +38,7 @@ def test_convolution_1d():
|
||||
input_shape=(nb_samples, nb_steps, input_dim))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_maxpooling_1d():
|
||||
for stride in [1, 2]:
|
||||
layer_test(convolutional.MaxPooling1D,
|
||||
@@ -44,6 +47,7 @@ def test_maxpooling_1d():
|
||||
input_shape=(3, 5, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_averagepooling_1d():
|
||||
for stride in [1, 2]:
|
||||
layer_test(convolutional.AveragePooling1D,
|
||||
@@ -52,10 +56,11 @@ def test_averagepooling_1d():
|
||||
input_shape=(3, 5, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_convolution_2d():
|
||||
nb_samples = 8
|
||||
nb_filter = 3
|
||||
stack_size = 4
|
||||
nb_samples = 2
|
||||
nb_filter = 2
|
||||
stack_size = 3
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
@@ -84,6 +89,124 @@ def test_convolution_2d():
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_deconvolution_2d():
|
||||
nb_samples = 2
|
||||
nb_filter = 2
|
||||
stack_size = 3
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
|
||||
rows = conv_input_length(nb_row, 3, border_mode, subsample[0])
|
||||
cols = conv_input_length(nb_col, 3, border_mode, subsample[1])
|
||||
layer_test(convolutional.Deconvolution2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'output_shape': (nb_samples, nb_filter, rows, cols),
|
||||
'border_mode': border_mode,
|
||||
'subsample': subsample},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col),
|
||||
fixed_batch_size=True)
|
||||
|
||||
layer_test(convolutional.Deconvolution2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'output_shape': (nb_samples, nb_filter, rows, cols),
|
||||
'border_mode': border_mode,
|
||||
'W_regularizer': 'l2',
|
||||
'b_regularizer': 'l2',
|
||||
'activity_regularizer': 'activity_l2',
|
||||
'subsample': subsample},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col),
|
||||
fixed_batch_size=True)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_atrous_conv_2d():
|
||||
nb_samples = 2
|
||||
nb_filter = 2
|
||||
stack_size = 3
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
for atrous_rate in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
if subsample != (1, 1) and atrous_rate != (1, 1):
|
||||
continue
|
||||
|
||||
layer_test(convolutional.AtrousConv2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'border_mode': border_mode,
|
||||
'subsample': subsample,
|
||||
'atrous_rate': atrous_rate},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col))
|
||||
|
||||
layer_test(convolutional.AtrousConv2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'border_mode': border_mode,
|
||||
'W_regularizer': 'l2',
|
||||
'b_regularizer': 'l2',
|
||||
'activity_regularizer': 'activity_l2',
|
||||
'subsample': subsample,
|
||||
'atrous_rate': atrous_rate},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col))
|
||||
|
||||
|
||||
@pytest.mark.skipif(K._BACKEND != 'tensorflow', reason="Requires TF backend")
|
||||
@keras_test
|
||||
def test_separable_conv_2d():
|
||||
nb_samples = 2
|
||||
nb_filter = 6
|
||||
stack_size = 3
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
for border_mode in ['valid', 'same']:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
for multiplier in [1, 2]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
|
||||
layer_test(convolutional.SeparableConv2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'border_mode': border_mode,
|
||||
'subsample': subsample,
|
||||
'depth_multiplier': multiplier},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col))
|
||||
|
||||
layer_test(convolutional.SeparableConv2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'border_mode': border_mode,
|
||||
'depthwise_regularizer': 'l2',
|
||||
'pointwise_regularizer': 'l2',
|
||||
'b_regularizer': 'l2',
|
||||
'activity_regularizer': 'activity_l2',
|
||||
'pointwise_constraint': 'unitnorm',
|
||||
'depthwise_constraint': 'unitnorm',
|
||||
'subsample': subsample,
|
||||
'depth_multiplier': multiplier},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_maxpooling_2d():
|
||||
pool_size = (3, 3)
|
||||
|
||||
@@ -95,6 +218,7 @@ def test_maxpooling_2d():
|
||||
input_shape=(3, 4, 11, 12))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_averagepooling_2d():
|
||||
pool_size = (3, 3)
|
||||
|
||||
@@ -108,11 +232,11 @@ def test_averagepooling_2d():
|
||||
input_shape=(3, 4, 11, 12))
|
||||
|
||||
|
||||
@pytest.mark.skipif(K._BACKEND != 'theano', reason="Requires Theano backend")
|
||||
@keras_test
|
||||
def test_convolution_3d():
|
||||
nb_samples = 2
|
||||
nb_filter = 5
|
||||
stack_size = 4
|
||||
nb_filter = 2
|
||||
stack_size = 3
|
||||
kernel_dim1 = 2
|
||||
kernel_dim2 = 3
|
||||
kernel_dim3 = 1
|
||||
@@ -150,7 +274,7 @@ def test_convolution_3d():
|
||||
input_len_dim1, input_len_dim2, input_len_dim3))
|
||||
|
||||
|
||||
@pytest.mark.skipif(K._BACKEND != 'theano', reason="Requires Theano backend")
|
||||
@keras_test
|
||||
def test_maxpooling_3d():
|
||||
pool_size = (3, 3, 3)
|
||||
|
||||
@@ -162,7 +286,7 @@ def test_maxpooling_3d():
|
||||
input_shape=(3, 4, 11, 12, 10))
|
||||
|
||||
|
||||
@pytest.mark.skipif(K._BACKEND != 'theano', reason="Requires Theano backend")
|
||||
@keras_test
|
||||
def test_averagepooling_3d():
|
||||
pool_size = (3, 3, 3)
|
||||
|
||||
@@ -174,9 +298,10 @@ def test_averagepooling_3d():
|
||||
input_shape=(3, 4, 11, 12, 10))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_zero_padding_2d():
|
||||
nb_samples = 9
|
||||
stack_size = 7
|
||||
nb_samples = 2
|
||||
stack_size = 2
|
||||
input_nb_row = 11
|
||||
input_nb_col = 12
|
||||
|
||||
@@ -199,10 +324,9 @@ def test_zero_padding_2d():
|
||||
layer.get_config()
|
||||
|
||||
|
||||
@pytest.mark.skipif(K._BACKEND != 'theano', reason="Requires Theano backend")
|
||||
def test_zero_padding_3d():
|
||||
nb_samples = 9
|
||||
stack_size = 7
|
||||
nb_samples = 2
|
||||
stack_size = 2
|
||||
input_len_dim1 = 10
|
||||
input_len_dim2 = 11
|
||||
input_len_dim3 = 12
|
||||
@@ -227,15 +351,17 @@ def test_zero_padding_3d():
|
||||
layer.get_config()
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_upsampling_1d():
|
||||
layer_test(convolutional.UpSampling1D,
|
||||
kwargs={'length': 2},
|
||||
input_shape=(3, 5, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_upsampling_2d():
|
||||
nb_samples = 9
|
||||
stack_size = 7
|
||||
nb_samples = 2
|
||||
stack_size = 2
|
||||
input_nb_row = 11
|
||||
input_nb_col = 12
|
||||
|
||||
@@ -273,10 +399,9 @@ def test_upsampling_2d():
|
||||
assert_allclose(out, expected_out)
|
||||
|
||||
|
||||
@pytest.mark.skipif(K._BACKEND != 'theano', reason="Requires Theano backend")
|
||||
def test_upsampling_3d():
|
||||
nb_samples = 9
|
||||
stack_size = 7
|
||||
nb_samples = 2
|
||||
stack_size = 2
|
||||
input_len_dim1 = 10
|
||||
input_len_dim2 = 11
|
||||
input_len_dim3 = 12
|
||||
@@ -320,5 +445,4 @@ def test_upsampling_3d():
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pytest.main([__file__])
|
||||
test_convolution_3d()
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -3,15 +3,17 @@ import numpy as np
|
||||
|
||||
from keras import backend as K
|
||||
from keras.layers import core
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_masking():
|
||||
layer_test(core.Masking,
|
||||
kwargs={},
|
||||
input_shape=(3, 2, 3))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge():
|
||||
from keras.layers import Input, merge, Merge
|
||||
from keras.models import Model
|
||||
@@ -21,7 +23,7 @@ def test_merge():
|
||||
inputs = [np.random.random(shape) for shape in input_shapes]
|
||||
|
||||
# test functional API
|
||||
for mode in ['sum', 'mul', 'concat', 'ave']:
|
||||
for mode in ['sum', 'mul', 'concat', 'ave', 'max']:
|
||||
print(mode)
|
||||
input_a = Input(shape=input_shapes[0][1:])
|
||||
input_b = Input(shape=input_shapes[1][1:])
|
||||
@@ -83,6 +85,7 @@ def test_merge():
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_mask_2d():
|
||||
from keras.layers import Input, merge, Masking
|
||||
from keras.models import Model
|
||||
@@ -97,21 +100,27 @@ def test_merge_mask_2d():
|
||||
masked_a = Masking(mask_value=0)(input_a)
|
||||
masked_b = Masking(mask_value=0)(input_b)
|
||||
|
||||
# two different types of merging
|
||||
# three different types of merging
|
||||
merged_sum = merge([masked_a, masked_b], mode='sum')
|
||||
merged_concat = merge([masked_a, masked_b], mode='concat', concat_axis=1)
|
||||
merged_concat_mixed = merge([masked_a, input_b], mode='concat', concat_axis=1)
|
||||
|
||||
# test sum
|
||||
model_sum = Model([input_a, input_b], [merged_sum])
|
||||
model_sum.compile(loss='mse', optimizer='sgd')
|
||||
model_sum.fit([rand(2,3), rand(2,3)], [rand(2,3)], nb_epoch=1)
|
||||
model_sum.fit([rand(2, 3), rand(2, 3)], [rand(2, 3)], nb_epoch=1)
|
||||
|
||||
# test concatenation
|
||||
model_concat = Model([input_a, input_b], [merged_concat])
|
||||
model_concat.compile(loss='mse', optimizer='sgd')
|
||||
model_concat.fit([rand(2, 3), rand(2, 3)], [rand(2, 6)], nb_epoch=1)
|
||||
|
||||
# test concatenation with masked and non-masked inputs
|
||||
model_concat = Model([input_a, input_b], [merged_concat_mixed])
|
||||
model_concat.compile(loss='mse', optimizer='sgd')
|
||||
model_concat.fit([rand(2,3), rand(2,3)], [rand(2,6)], nb_epoch=1)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_mask_3d():
|
||||
from keras.layers import Input, merge, Embedding, SimpleRNN
|
||||
from keras.models import Model
|
||||
@@ -134,15 +143,17 @@ def test_merge_mask_3d():
|
||||
merged_concat = merge([rnn_a, rnn_b], mode='concat', concat_axis=-1)
|
||||
model = Model([input_a, input_b], [merged_concat])
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
model.fit([rand(2,3), rand(2,3)], [rand(2,3,6)])
|
||||
model.fit([rand(2, 3), rand(2, 3)], [rand(2, 3, 6)])
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_dropout():
|
||||
layer_test(core.Dropout,
|
||||
kwargs={'p': 0.5},
|
||||
input_shape=(3, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_activation():
|
||||
# with string argument
|
||||
layer_test(core.Activation,
|
||||
@@ -155,30 +166,35 @@ def test_activation():
|
||||
input_shape=(3, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_reshape():
|
||||
layer_test(core.Reshape,
|
||||
kwargs={'target_shape': (8, 1)},
|
||||
input_shape=(3, 2, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_permute():
|
||||
layer_test(core.Permute,
|
||||
kwargs={'dims': (2, 1)},
|
||||
input_shape=(3, 2, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_flatten():
|
||||
layer_test(core.Flatten,
|
||||
kwargs={},
|
||||
input_shape=(3, 2, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_repeat_vector():
|
||||
layer_test(core.RepeatVector,
|
||||
kwargs={'n': 3},
|
||||
input_shape=(3, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_lambda():
|
||||
from keras.utils.layer_utils import layer_from_config
|
||||
Lambda = core.Lambda
|
||||
@@ -212,6 +228,7 @@ def test_lambda():
|
||||
ld = layer_from_config({'class_name': 'Lambda', 'config': config})
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_dense():
|
||||
from keras import regularizers
|
||||
from keras import constraints
|
||||
@@ -230,6 +247,7 @@ def test_dense():
|
||||
input_shape=(3, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_activity_regularization():
|
||||
from keras.engine import Input, Model
|
||||
|
||||
@@ -250,6 +268,7 @@ def test_activity_regularization():
|
||||
model.compile('rmsprop', 'mse')
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_maxout_dense():
|
||||
from keras import regularizers
|
||||
from keras import constraints
|
||||
@@ -268,6 +287,7 @@ def test_maxout_dense():
|
||||
input_shape=(3, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_highway():
|
||||
from keras import regularizers
|
||||
from keras import constraints
|
||||
@@ -285,6 +305,7 @@ def test_highway():
|
||||
input_shape=(3, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_timedistributeddense():
|
||||
from keras import regularizers
|
||||
from keras import constraints
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
import pytest
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
from keras.layers.embeddings import Embedding
|
||||
import keras.backend as K
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_embedding():
|
||||
layer_test(Embedding,
|
||||
kwargs={'output_dim': 4., 'input_dim': 10, 'input_length': 2},
|
||||
kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
|
||||
input_shape=(3, 2),
|
||||
input_dtype='int32',
|
||||
expected_output_dtype=K.floatx())
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
import pytest
|
||||
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
from keras.layers import local
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_locallyconnected_1d():
|
||||
nb_samples = 2
|
||||
nb_steps = 8
|
||||
input_dim = 5
|
||||
filter_length = 3
|
||||
nb_filter = 4
|
||||
|
||||
for border_mode in ['valid']:
|
||||
for subsample_length in [1]:
|
||||
if border_mode == 'same' and subsample_length != 1:
|
||||
continue
|
||||
layer_test(local.LocallyConnected1D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'filter_length': filter_length,
|
||||
'border_mode': border_mode,
|
||||
'subsample_length': subsample_length},
|
||||
input_shape=(nb_samples, nb_steps, input_dim))
|
||||
|
||||
layer_test(local.LocallyConnected1D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'filter_length': filter_length,
|
||||
'border_mode': border_mode,
|
||||
'W_regularizer': 'l2',
|
||||
'b_regularizer': 'l2',
|
||||
'activity_regularizer': 'activity_l2',
|
||||
'subsample_length': subsample_length},
|
||||
input_shape=(nb_samples, nb_steps, input_dim))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_locallyconnected_2d():
|
||||
nb_samples = 8
|
||||
nb_filter = 3
|
||||
stack_size = 4
|
||||
nb_row = 6
|
||||
nb_col = 10
|
||||
|
||||
for border_mode in ['valid']:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
|
||||
layer_test(local.LocallyConnected2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'border_mode': border_mode,
|
||||
'W_regularizer': 'l2',
|
||||
'b_regularizer': 'l2',
|
||||
'activity_regularizer': 'activity_l2',
|
||||
'subsample': subsample,
|
||||
'dim_ordering': 'tf'},
|
||||
input_shape=(nb_samples, nb_row, nb_col, stack_size))
|
||||
|
||||
layer_test(local.LocallyConnected2D,
|
||||
kwargs={'nb_filter': nb_filter,
|
||||
'nb_row': 3,
|
||||
'nb_col': 3,
|
||||
'border_mode': border_mode,
|
||||
'W_regularizer': 'l2',
|
||||
'b_regularizer': 'l2',
|
||||
'activity_regularizer': 'activity_l2',
|
||||
'subsample': subsample,
|
||||
'dim_ordering': 'th'},
|
||||
input_shape=(nb_samples, stack_size, nb_row, nb_col))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
@@ -1,14 +1,16 @@
|
||||
import pytest
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
from keras.layers import noise
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_GaussianNoise():
|
||||
layer_test(noise.GaussianNoise,
|
||||
kwargs={'sigma': 1.},
|
||||
input_shape=(3, 2, 3))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_GaussianDropout():
|
||||
layer_test(noise.GaussianDropout,
|
||||
kwargs={'p': 0.5},
|
||||
|
||||
@@ -3,18 +3,18 @@ import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras.layers.core import Dense, Activation
|
||||
from keras.utils.test_utils import layer_test
|
||||
from keras.utils.test_utils import layer_test, keras_test
|
||||
from keras.layers import normalization
|
||||
from keras.models import Sequential, Graph
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
input_1 = np.arange(10)
|
||||
input_2 = np.zeros(10)
|
||||
input_3 = np.ones((10))
|
||||
input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))]
|
||||
|
||||
|
||||
@keras_test
|
||||
def basic_batchnorm_test():
|
||||
layer_test(normalization.BatchNormalization,
|
||||
kwargs={'mode': 1},
|
||||
@@ -24,16 +24,17 @@ def basic_batchnorm_test():
|
||||
input_shape=(3, 4, 2))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchnorm_mode_0_or_2():
|
||||
for mode in [0, 2]:
|
||||
model = Sequential()
|
||||
norm_m0 = normalization.BatchNormalization(mode=mode, input_shape=(10,))
|
||||
norm_m0 = normalization.BatchNormalization(mode=mode, input_shape=(10,), momentum=0.8)
|
||||
model.add(norm_m0)
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
|
||||
# centered on 5.0, variance 10.0
|
||||
X = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10))
|
||||
model.fit(X, X, nb_epoch=5, verbose=0)
|
||||
model.fit(X, X, nb_epoch=4, verbose=0)
|
||||
out = model.predict(X)
|
||||
out -= K.eval(norm_m0.beta)
|
||||
out /= K.eval(norm_m0.gamma)
|
||||
@@ -42,15 +43,16 @@ def test_batchnorm_mode_0_or_2():
|
||||
assert_allclose(out.std(), 1.0, atol=1e-1)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchnorm_mode_0_convnet():
|
||||
model = Sequential()
|
||||
norm_m0 = normalization.BatchNormalization(mode=0, axis=1, input_shape=(3, 4, 4))
|
||||
norm_m0 = normalization.BatchNormalization(mode=0, axis=1, input_shape=(3, 4, 4), momentum=0.8)
|
||||
model.add(norm_m0)
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
|
||||
# centered on 5.0, variance 10.0
|
||||
X = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4))
|
||||
model.fit(X, X, nb_epoch=5, verbose=0)
|
||||
model.fit(X, X, nb_epoch=4, verbose=0)
|
||||
out = model.predict(X)
|
||||
out -= np.reshape(K.eval(norm_m0.beta), (1, 3, 1, 1))
|
||||
out /= np.reshape(K.eval(norm_m0.gamma), (1, 3, 1, 1))
|
||||
@@ -59,6 +61,7 @@ def test_batchnorm_mode_0_convnet():
|
||||
assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchnorm_mode_1():
|
||||
norm_m1 = normalization.BatchNormalization(input_shape=(10,), mode=1)
|
||||
norm_m1.build(input_shape=(None, 10))
|
||||
|
||||
@@ -7,10 +7,11 @@ from keras.layers import recurrent, embeddings
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Masking
|
||||
from keras import regularizers
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
from keras import backend as K
|
||||
|
||||
nb_samples, timesteps, embedding_dim, output_dim = 3, 5, 10, 5
|
||||
nb_samples, timesteps, embedding_dim, output_dim = 2, 5, 4, 3
|
||||
embedding_num = 12
|
||||
|
||||
|
||||
@@ -23,21 +24,21 @@ def _runner(layer_class):
|
||||
layer_test(layer_class,
|
||||
kwargs={'output_dim': output_dim,
|
||||
'return_sequences': True},
|
||||
input_shape=(3, 2, 3))
|
||||
input_shape=(nb_samples, timesteps, embedding_dim))
|
||||
|
||||
# check dropout
|
||||
layer_test(layer_class,
|
||||
kwargs={'output_dim': output_dim,
|
||||
'dropout_U': 0.1,
|
||||
'dropout_W': 0.1},
|
||||
input_shape=(3, 2, 3))
|
||||
input_shape=(nb_samples, timesteps, embedding_dim))
|
||||
|
||||
# check implementation modes
|
||||
for mode in ['cpu', 'mem', 'gpu']:
|
||||
layer_test(layer_class,
|
||||
kwargs={'output_dim': output_dim,
|
||||
'consume_less': mode},
|
||||
input_shape=(3, 2, 3))
|
||||
input_shape=(nb_samples, timesteps, embedding_dim))
|
||||
|
||||
# check statefulness
|
||||
model = Sequential()
|
||||
@@ -82,7 +83,6 @@ def _runner(layer_class):
|
||||
left_padded_input = np.ones((nb_samples, timesteps))
|
||||
left_padded_input[0, :1] = 0
|
||||
left_padded_input[1, :2] = 0
|
||||
left_padded_input[2, :3] = 0
|
||||
out6 = model.predict(left_padded_input)
|
||||
|
||||
layer.reset_states()
|
||||
@@ -90,7 +90,6 @@ def _runner(layer_class):
|
||||
right_padded_input = np.ones((nb_samples, timesteps))
|
||||
right_padded_input[0, -1:] = 0
|
||||
right_padded_input[1, -2:] = 0
|
||||
right_padded_input[2, -3:] = 0
|
||||
out7 = model.predict(right_padded_input)
|
||||
|
||||
assert_allclose(out7, out6, atol=1e-5)
|
||||
@@ -107,18 +106,22 @@ def _runner(layer_class):
|
||||
K.eval(layer.output)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_SimpleRNN():
|
||||
_runner(recurrent.SimpleRNN)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_GRU():
|
||||
_runner(recurrent.GRU)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_LSTM():
|
||||
_runner(recurrent.LSTM)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_masking_layer():
|
||||
''' This test based on a previously failing issue here:
|
||||
https://github.com/fchollet/keras/issues/1567
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras.utils.test_utils import keras_test
|
||||
from keras.layers import wrappers, Input
|
||||
from keras.layers import core, convolutional
|
||||
from keras.models import Sequential, Model, model_from_json
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_TimeDistributed():
|
||||
# first, test with Dense layer
|
||||
model = Sequential()
|
||||
|
||||
@@ -1,449 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import pytest
|
||||
import os
|
||||
import numpy as np
|
||||
np.random.seed(1337)
|
||||
|
||||
from keras import backend as K
|
||||
from keras.models import Graph, Sequential
|
||||
from keras.layers.core import Dense, Activation, Merge, Lambda
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras.models import model_from_json, model_from_yaml
|
||||
|
||||
|
||||
batch_size = 32
|
||||
|
||||
(X_train_graph, y_train_graph), (X_test_graph, y_test_graph) = get_test_data(nb_train=100,
|
||||
nb_test=50,
|
||||
input_shape=(32,),
|
||||
classification=False,
|
||||
output_shape=(4,))
|
||||
(X2_train_graph, y2_train_graph), (X2_test_graph, y2_test_graph) = get_test_data(nb_train=100,
|
||||
nb_test=50,
|
||||
input_shape=(32,),
|
||||
classification=False,
|
||||
output_shape=(1,))
|
||||
|
||||
|
||||
def test_graph_fit_generator():
|
||||
def data_generator_graph(train):
|
||||
while 1:
|
||||
if train:
|
||||
yield {'input1': X_train_graph, 'output1': y_train_graph}
|
||||
else:
|
||||
yield {'input1': X_test_graph, 'output1': y_test_graph}
|
||||
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1',
|
||||
inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4)
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data={'input1': X_test_graph, 'output1': y_test_graph})
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data=data_generator_graph(False), nb_val_samples=batch_size * 3)
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data=data_generator_graph(False), nb_val_samples=batch_size * 3)
|
||||
gen_loss = graph.evaluate_generator(data_generator_graph(True), 128, verbose=0)
|
||||
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph}, verbose=0)
|
||||
|
||||
# test show_accuracy
|
||||
graph.compile('rmsprop', {'output1': 'mse'}, metrics=['accuracy'])
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4)
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data={'input1': X_test_graph, 'output1': y_test_graph})
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data=data_generator_graph(False), nb_val_samples=batch_size * 3)
|
||||
graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4,
|
||||
validation_data=data_generator_graph(False), nb_val_samples=batch_size * 3)
|
||||
gen_loss = graph.evaluate_generator(data_generator_graph(True), 128, verbose=0)
|
||||
|
||||
|
||||
def test_1o_1i():
|
||||
# test a non-sequential graph with 1 input and 1 output
|
||||
np.random.seed(1337)
|
||||
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1',
|
||||
inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=10)
|
||||
out = graph.predict({'input1': X_test_graph})
|
||||
|
||||
assert(type(out) == dict)
|
||||
assert(len(out) == 1)
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.train_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph}, verbose=0)
|
||||
|
||||
# test accuracy:
|
||||
graph.compile('rmsprop', {'output1': 'mse'}, metrics=['accuracy'])
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=1)
|
||||
loss, acc = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss, acc = graph.train_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss, acc = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph}, verbose=0)
|
||||
|
||||
# test validation split
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph},
|
||||
validation_split=0.2, nb_epoch=1)
|
||||
# test validation data
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph},
|
||||
validation_data={'input1': X_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=1)
|
||||
|
||||
|
||||
def test_1o_1i_2():
|
||||
# test a more complex non-sequential graph with 1 input and 1 output
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(4), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2-0', input='input1')
|
||||
graph.add_node(Activation('relu'), name='dense2', input='dense2-0')
|
||||
|
||||
graph.add_node(Dense(4), name='dense3', input='dense2')
|
||||
graph.add_node(Dense(4), name='dense4', inputs=['dense1', 'dense3'],
|
||||
merge_mode='sum')
|
||||
|
||||
graph.add_output(name='output1', inputs=['dense2', 'dense4'],
|
||||
merge_mode='sum')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=2)
|
||||
out = graph.predict({'input1': X_train_graph})
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 1)
|
||||
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.train_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
|
||||
# test serialization
|
||||
config = graph.get_config()
|
||||
new_graph = Graph.from_config(config)
|
||||
|
||||
graph.summary()
|
||||
json_str = graph.to_json()
|
||||
new_graph = model_from_json(json_str)
|
||||
|
||||
yaml_str = graph.to_yaml()
|
||||
new_graph = model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
def test_1o_2i():
|
||||
# test a non-sequential graph with 2 inputs and 1 output
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_input(name='input2', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input2')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1', inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
graph.fit({'input1': X_train_graph, 'input2': X2_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=2)
|
||||
out = graph.predict({'input1': X_test_graph, 'input2': X2_test_graph})
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 1)
|
||||
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.train_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
|
||||
|
||||
# test serialization
|
||||
config = graph.get_config()
|
||||
new_graph = Graph.from_config(config)
|
||||
|
||||
graph.summary()
|
||||
json_str = graph.to_json()
|
||||
new_graph = model_from_json(json_str)
|
||||
|
||||
yaml_str = graph.to_yaml()
|
||||
new_graph = model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
def test_siamese_1():
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_input(name='input2', input_shape=(32,))
|
||||
|
||||
graph.add_shared_node(Dense(4), name='shared', inputs=['input1', 'input2'], merge_mode='sum')
|
||||
graph.add_node(Dense(4), name='dense1', input='shared')
|
||||
# graph.add_node(Dense(4), name='output1', input='shared', create_output=True)
|
||||
|
||||
# graph.add_output(name='output1', inputs=['dense1', 'shared'], merge_mode='sum')
|
||||
graph.add_output(name='output1', input='dense1')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
graph.fit({'input1': X_train_graph, 'input2': X2_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=10)
|
||||
out = graph.predict({'input1': X_test_graph, 'input2': X2_test_graph})
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 1)
|
||||
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.train_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
|
||||
assert(loss < 4.0)
|
||||
|
||||
# test serialization
|
||||
config = graph.get_config()
|
||||
new_graph = Graph.from_config(config)
|
||||
|
||||
graph.summary()
|
||||
json_str = graph.to_json()
|
||||
new_graph = model_from_json(json_str)
|
||||
|
||||
yaml_str = graph.to_yaml()
|
||||
new_graph = model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
'''Th test below is failing because of a known bug
|
||||
with the serialization of legacy Graph models
|
||||
containing shared nodes with named outputs.
|
||||
This is very low priority (= no plans to fix it),
|
||||
since the Graph model is deprecated.
|
||||
'''
|
||||
# def test_siamese_2():
|
||||
# graph = Graph()
|
||||
# graph.add_input(name='input1', input_shape=(32,))
|
||||
# graph.add_input(name='input2', input_shape=(32,))
|
||||
|
||||
# graph.add_shared_node(Dense(4), name='shared',
|
||||
# inputs=['input1', 'input2'],
|
||||
# outputs=['shared_output1', 'shared_output2'])
|
||||
# graph.add_node(Dense(4), name='dense1', input='shared_output1')
|
||||
# graph.add_node(Dense(4), name='dense2', input='shared_output2')
|
||||
|
||||
# graph.add_output(name='output1', inputs=['dense1', 'dense2'],
|
||||
# merge_mode='sum')
|
||||
# graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
# graph.fit({'input1': X_train_graph,
|
||||
# 'input2': X2_train_graph,
|
||||
# 'output1': y_train_graph},
|
||||
# nb_epoch=10)
|
||||
# out = graph.predict({'input1': X_test_graph,
|
||||
# 'input2': X2_test_graph})
|
||||
# assert(type(out == dict))
|
||||
# assert(len(out) == 1)
|
||||
|
||||
# loss = graph.test_on_batch({'input1': X_test_graph,
|
||||
# 'input2': X2_test_graph,
|
||||
# 'output1': y_test_graph})
|
||||
# loss = graph.train_on_batch({'input1': X_test_graph,
|
||||
# 'input2': X2_test_graph,
|
||||
# 'output1': y_test_graph})
|
||||
# loss = graph.evaluate({'input1': X_test_graph,
|
||||
# 'input2': X2_test_graph,
|
||||
# 'output1': y_test_graph})
|
||||
# # test serialization
|
||||
# config = graph.get_config()
|
||||
# new_graph = Graph.from_config(config)
|
||||
|
||||
# graph.summary()
|
||||
# json_str = graph.to_json()
|
||||
# new_graph = model_from_json(json_str)
|
||||
|
||||
# yaml_str = graph.to_yaml()
|
||||
# new_graph = model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
def test_2o_1i_save_weights():
|
||||
# test a non-sequential graph with 1 input and 2 outputs
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(1), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1', input='dense2')
|
||||
graph.add_output(name='output2', input='dense3')
|
||||
graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'})
|
||||
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph},
|
||||
nb_epoch=10)
|
||||
out = graph.predict({'input1': X_test_graph})
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 2)
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph})
|
||||
loss = graph.train_on_batch({'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph})
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph})
|
||||
|
||||
# test weight saving
|
||||
fname = 'test_2o_1i_weights_temp.h5'
|
||||
graph.save_weights(fname, overwrite=True)
|
||||
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(1), name='dense3', input='dense1')
|
||||
graph.add_output(name='output1', input='dense2')
|
||||
graph.add_output(name='output2', input='dense3')
|
||||
graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'})
|
||||
graph.load_weights('test_2o_1i_weights_temp.h5')
|
||||
os.remove(fname)
|
||||
|
||||
nloss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph})
|
||||
assert(loss == nloss)
|
||||
|
||||
# test loss weights
|
||||
graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'},
|
||||
loss_weights={'output1': 1., 'output2': 2.})
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph},
|
||||
nb_epoch=1)
|
||||
|
||||
|
||||
def test_2o_1i_sample_weights():
|
||||
# test a non-sequential graph with 1 input and 2 outputs with sample weights
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(1), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1', input='dense2')
|
||||
graph.add_output(name='output2', input='dense3')
|
||||
|
||||
weights1 = np.random.uniform(size=y_train_graph.shape[0])
|
||||
weights2 = np.random.uniform(size=y2_train_graph.shape[0])
|
||||
weights1_test = np.random.uniform(size=y_test_graph.shape[0])
|
||||
weights2_test = np.random.uniform(size=y2_test_graph.shape[0])
|
||||
|
||||
graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'})
|
||||
|
||||
graph.fit({'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph},
|
||||
nb_epoch=10,
|
||||
sample_weight={'output1': weights1, 'output2': weights2})
|
||||
out = graph.predict({'input1': X_test_graph})
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 2)
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph, 'output2': y2_test_graph},
|
||||
sample_weight={'output1': weights1_test, 'output2': weights2_test})
|
||||
loss = graph.train_on_batch({'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph},
|
||||
sample_weight={'output1': weights1, 'output2': weights2})
|
||||
loss = graph.evaluate({'input1': X_train_graph, 'output1': y_train_graph, 'output2': y2_train_graph},
|
||||
sample_weight={'output1': weights1, 'output2': weights2})
|
||||
|
||||
|
||||
def test_recursive():
|
||||
# test layer-like API
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
graph.add_output(name='output1', inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum')
|
||||
|
||||
seq = Sequential()
|
||||
seq.add(Dense(32, input_shape=(32,)))
|
||||
seq.add(graph)
|
||||
seq.add(Dense(4))
|
||||
|
||||
seq.compile('rmsprop', 'mse')
|
||||
|
||||
seq.fit(X_train_graph, y_train_graph, batch_size=10, nb_epoch=10)
|
||||
loss = seq.evaluate(X_test_graph, y_test_graph)
|
||||
|
||||
# test serialization
|
||||
config = seq.get_config()
|
||||
new_graph = Sequential.from_config(config)
|
||||
|
||||
seq.summary()
|
||||
json_str = seq.to_json()
|
||||
new_graph = model_from_json(json_str)
|
||||
|
||||
yaml_str = seq.to_yaml()
|
||||
new_graph = model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
def test_create_output():
|
||||
# test create_output argument
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
graph.add_node(Dense(4), name='output1', inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum', create_output=True)
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
history = graph.fit({'input1': X_train_graph, 'output1': y_train_graph},
|
||||
nb_epoch=10)
|
||||
out = graph.predict({'input1': X_test_graph})
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 1)
|
||||
|
||||
loss = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.train_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph})
|
||||
assert(loss < 2.5)
|
||||
|
||||
# test serialization
|
||||
config = graph.get_config()
|
||||
graph = Graph.from_config(config)
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
out = graph.predict({'input1': X_test_graph})
|
||||
|
||||
|
||||
def test_count_params():
|
||||
# test count params
|
||||
nb_units = 100
|
||||
nb_classes = 2
|
||||
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_input(name='input2', input_shape=(32,))
|
||||
graph.add_node(Dense(nb_units),
|
||||
name='dense1', input='input1')
|
||||
graph.add_node(Dense(nb_classes),
|
||||
name='dense2', input='input2')
|
||||
graph.add_node(Dense(nb_classes),
|
||||
name='dense3', input='dense1')
|
||||
graph.add_output(name='output', inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum')
|
||||
graph.build()
|
||||
|
||||
n = 32 * nb_units + nb_units
|
||||
n += 32 * nb_classes + nb_classes
|
||||
n += nb_units * nb_classes + nb_classes
|
||||
|
||||
assert(n == graph.count_params())
|
||||
|
||||
graph.compile('rmsprop', {'output': 'binary_crossentropy'})
|
||||
|
||||
assert(n == graph.count_params())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
@@ -0,0 +1,182 @@
|
||||
from __future__ import print_function
|
||||
import pytest
|
||||
import numpy as np
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multiprocessing_training():
|
||||
|
||||
reached_end = False
|
||||
|
||||
arr_data = np.random.randint(0, 256, (500, 2))
|
||||
arr_labels = np.random.randint(0, 2, 500)
|
||||
|
||||
def myGenerator():
|
||||
|
||||
batch_size = 32
|
||||
n_samples = 500
|
||||
|
||||
while True:
|
||||
batch_index = np.random.randint(0, n_samples - batch_size)
|
||||
start = batch_index
|
||||
end = start + batch_size
|
||||
X = arr_data[start: end]
|
||||
y = arr_labels[start: end]
|
||||
yield X, y
|
||||
|
||||
# Build a NN
|
||||
model = Sequential()
|
||||
model.add(Dense(1, input_shape=(2, )))
|
||||
model.compile(loss='mse', optimizer='adadelta')
|
||||
|
||||
model.fit_generator(myGenerator(),
|
||||
samples_per_epoch=320,
|
||||
nb_epoch=1,
|
||||
verbose=1,
|
||||
max_q_size=10,
|
||||
nb_worker=4,
|
||||
pickle_safe=True)
|
||||
|
||||
model.fit_generator(myGenerator(),
|
||||
samples_per_epoch=320,
|
||||
nb_epoch=1,
|
||||
verbose=1,
|
||||
max_q_size=10,
|
||||
pickle_safe=False)
|
||||
|
||||
reached_end = True
|
||||
|
||||
assert reached_end
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multiprocessing_training_fromfile():
|
||||
|
||||
reached_end = False
|
||||
|
||||
arr_data = np.random.randint(0, 256, (500, 2))
|
||||
arr_labels = np.random.randint(0, 2, 500)
|
||||
np.savez("data.npz", **{"data": arr_data, "labels": arr_labels})
|
||||
|
||||
def myGenerator():
|
||||
|
||||
batch_size = 32
|
||||
n_samples = 500
|
||||
|
||||
arr = np.load("data.npz")
|
||||
|
||||
while True:
|
||||
batch_index = np.random.randint(0, n_samples - batch_size)
|
||||
start = batch_index
|
||||
end = start + batch_size
|
||||
X = arr["data"][start: end]
|
||||
y = arr["labels"][start: end]
|
||||
yield X, y
|
||||
|
||||
# Build a NN
|
||||
model = Sequential()
|
||||
model.add(Dense(1, input_shape=(2, )))
|
||||
model.compile(loss='mse', optimizer='adadelta')
|
||||
|
||||
model.fit_generator(myGenerator(),
|
||||
samples_per_epoch=320,
|
||||
nb_epoch=1,
|
||||
verbose=1,
|
||||
max_q_size=10,
|
||||
nb_worker=2,
|
||||
pickle_safe=True)
|
||||
|
||||
model.fit_generator(myGenerator(),
|
||||
samples_per_epoch=320,
|
||||
nb_epoch=1,
|
||||
verbose=1,
|
||||
max_q_size=10,
|
||||
pickle_safe=False)
|
||||
reached_end = True
|
||||
|
||||
assert reached_end
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multiprocessing_predicting():
|
||||
|
||||
reached_end = False
|
||||
|
||||
arr_data = np.random.randint(0, 256, (500, 2))
|
||||
|
||||
def myGenerator():
|
||||
|
||||
batch_size = 32
|
||||
n_samples = 500
|
||||
|
||||
while True:
|
||||
batch_index = np.random.randint(0, n_samples - batch_size)
|
||||
start = batch_index
|
||||
end = start + batch_size
|
||||
X = arr_data[start: end]
|
||||
yield X
|
||||
|
||||
# Build a NN
|
||||
model = Sequential()
|
||||
model.add(Dense(1, input_shape=(2, )))
|
||||
model.compile(loss='mse', optimizer='adadelta')
|
||||
model.predict_generator(myGenerator(),
|
||||
val_samples=320,
|
||||
max_q_size=10,
|
||||
nb_worker=2,
|
||||
pickle_safe=True)
|
||||
model.predict_generator(myGenerator(),
|
||||
val_samples=320,
|
||||
max_q_size=10,
|
||||
pickle_safe=False)
|
||||
reached_end = True
|
||||
|
||||
assert reached_end
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_multiprocessing_evaluating():
|
||||
|
||||
reached_end = False
|
||||
|
||||
arr_data = np.random.randint(0, 256, (500, 2))
|
||||
arr_labels = np.random.randint(0, 2, 500)
|
||||
|
||||
def myGenerator():
|
||||
|
||||
batch_size = 32
|
||||
n_samples = 500
|
||||
|
||||
while True:
|
||||
batch_index = np.random.randint(0, n_samples - batch_size)
|
||||
start = batch_index
|
||||
end = start + batch_size
|
||||
X = arr_data[start: end]
|
||||
y = arr_labels[start: end]
|
||||
yield X, y
|
||||
|
||||
# Build a NN
|
||||
model = Sequential()
|
||||
model.add(Dense(1, input_shape=(2, )))
|
||||
model.compile(loss='mse', optimizer='adadelta')
|
||||
|
||||
model.evaluate_generator(myGenerator(),
|
||||
val_samples=320,
|
||||
max_q_size=10,
|
||||
nb_worker=2,
|
||||
pickle_safe=True)
|
||||
model.evaluate_generator(myGenerator(),
|
||||
val_samples=320,
|
||||
max_q_size=10,
|
||||
pickle_safe=False)
|
||||
reached_end = True
|
||||
|
||||
assert reached_end
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
pytest.main([__file__])
|
||||
@@ -58,8 +58,8 @@ def test_Eigenvalue_reg():
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
|
||||
model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0)
|
||||
|
||||
|
||||
|
||||
|
||||
def test_W_reg():
|
||||
(X_train, Y_train), (X_test, Y_test), test_ids = get_data()
|
||||
for reg in [regularizers.l1(),
|
||||
|
||||
@@ -9,7 +9,7 @@ from keras import backend as K
|
||||
from keras.models import Graph, Sequential
|
||||
from keras.layers.core import Dense, Activation, Merge, Lambda
|
||||
from keras.utils import np_utils
|
||||
from keras.utils.test_utils import get_test_data
|
||||
from keras.utils.test_utils import get_test_data, keras_test
|
||||
from keras.models import model_from_json, model_from_yaml
|
||||
from keras import objectives
|
||||
from keras.engine.training import make_batches
|
||||
@@ -22,6 +22,23 @@ batch_size = 32
|
||||
nb_epoch = 1
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_pop():
|
||||
model = Sequential()
|
||||
model.add(Dense(nb_hidden, input_dim=input_dim))
|
||||
model.add(Dense(nb_class))
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
x = np.random.random((batch_size, input_dim))
|
||||
y = np.random.random((batch_size, nb_class))
|
||||
model.fit(x, y, nb_epoch=1)
|
||||
model.pop()
|
||||
assert len(model.layers) == 1
|
||||
assert model.output_shape == (None, nb_hidden)
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
y = np.random.random((batch_size, nb_hidden))
|
||||
model.fit(x, y, nb_epoch=1)
|
||||
|
||||
|
||||
def _get_test_data():
|
||||
np.random.seed(1234)
|
||||
|
||||
@@ -38,6 +55,7 @@ def _get_test_data():
|
||||
return (X_train, y_train), (X_test, y_test)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_fit_generator():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
|
||||
@@ -59,6 +77,8 @@ def test_sequential_fit_generator():
|
||||
model.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(nb_class))
|
||||
model.pop()
|
||||
model.add(Dense(nb_class))
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
@@ -67,10 +87,10 @@ def test_sequential_fit_generator():
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch,
|
||||
validation_data=data_generator(False), nb_val_samples=batch_size * 3)
|
||||
model.fit_generator(data_generator(True), len(X_train), nb_epoch, max_q_size=2)
|
||||
|
||||
loss = model.evaluate(X_train, y_train)
|
||||
model.evaluate(X_train, y_train)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
|
||||
@@ -128,16 +148,17 @@ def test_sequential():
|
||||
|
||||
# test serialization
|
||||
config = model.get_config()
|
||||
new_model = Sequential.from_config(config)
|
||||
Sequential.from_config(config)
|
||||
|
||||
model.summary()
|
||||
json_str = model.to_json()
|
||||
new_model = model_from_json(json_str)
|
||||
model_from_json(json_str)
|
||||
|
||||
yaml_str = model.to_yaml()
|
||||
new_model = model_from_yaml(yaml_str)
|
||||
model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_nested_sequential():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
|
||||
@@ -190,16 +211,17 @@ def test_nested_sequential():
|
||||
|
||||
# test serialization
|
||||
config = model.get_config()
|
||||
new_model = Sequential.from_config(config)
|
||||
Sequential.from_config(config)
|
||||
|
||||
model.summary()
|
||||
json_str = model.to_json()
|
||||
new_model = model_from_json(json_str)
|
||||
model_from_json(json_str)
|
||||
|
||||
yaml_str = model.to_yaml()
|
||||
new_model = model_from_yaml(yaml_str)
|
||||
model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_sum():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
left = Sequential()
|
||||
@@ -249,16 +271,17 @@ def test_merge_sum():
|
||||
|
||||
# test serialization
|
||||
config = model.get_config()
|
||||
new_model = Sequential.from_config(config)
|
||||
Sequential.from_config(config)
|
||||
|
||||
model.summary()
|
||||
json_str = model.to_json()
|
||||
new_model = model_from_json(json_str)
|
||||
model_from_json(json_str)
|
||||
|
||||
yaml_str = model.to_yaml()
|
||||
new_model = model_from_yaml(yaml_str)
|
||||
model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_dot():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
|
||||
@@ -293,6 +316,7 @@ def test_merge_dot():
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_concat():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
|
||||
@@ -332,6 +356,7 @@ def test_merge_concat():
|
||||
assert(loss == nloss)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_recursivity():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
left = Sequential()
|
||||
@@ -378,16 +403,17 @@ def test_merge_recursivity():
|
||||
|
||||
# test serialization
|
||||
config = model.get_config()
|
||||
new_model = Sequential.from_config(config)
|
||||
Sequential.from_config(config)
|
||||
|
||||
model.summary()
|
||||
json_str = model.to_json()
|
||||
new_model = model_from_json(json_str)
|
||||
model_from_json(json_str)
|
||||
|
||||
yaml_str = model.to_yaml()
|
||||
new_model = model_from_yaml(yaml_str)
|
||||
model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_merge_overlap():
|
||||
(X_train, y_train), (X_test, y_test) = _get_test_data()
|
||||
left = Sequential()
|
||||
@@ -425,16 +451,17 @@ def test_merge_overlap():
|
||||
|
||||
# test serialization
|
||||
config = model.get_config()
|
||||
new_model = Sequential.from_config(config)
|
||||
Sequential.from_config(config)
|
||||
|
||||
model.summary()
|
||||
json_str = model.to_json()
|
||||
new_model = model_from_json(json_str)
|
||||
model_from_json(json_str)
|
||||
|
||||
yaml_str = model.to_yaml()
|
||||
new_model = model_from_yaml(yaml_str)
|
||||
model_from_yaml(yaml_str)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_count_params():
|
||||
input_dim = 20
|
||||
nb_units = 10
|
||||
|
||||
@@ -4,24 +4,27 @@ import pytest
|
||||
from keras.models import Sequential
|
||||
from keras.engine.training import weighted_objective
|
||||
from keras.layers.core import TimeDistributedDense, Masking
|
||||
from keras.utils.test_utils import keras_test
|
||||
from keras import objectives
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_masking():
|
||||
np.random.seed(1337)
|
||||
X = np.array(
|
||||
[[[1, 1], [2, 1], [3, 1], [5, 5]],
|
||||
[[1, 5], [5, 0], [0, 0], [0, 0]]], dtype=np.int32)
|
||||
X = np.array([[[1], [1]],
|
||||
[[0], [0]]])
|
||||
model = Sequential()
|
||||
model.add(Masking(mask_value=0, input_shape=(4, 2)))
|
||||
model.add(Masking(mask_value=0, input_shape=(2, 1)))
|
||||
model.add(TimeDistributedDense(1, init='one'))
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
y = model.predict(X)
|
||||
history = model.fit(X, 4 * y, nb_epoch=1, batch_size=2, verbose=1)
|
||||
assert history.history['loss'][0] == 285.
|
||||
y = np.array([[[1], [1]],
|
||||
[[1], [1]]])
|
||||
loss = model.train_on_batch(X, y)
|
||||
assert loss == 0
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_loss_masking():
|
||||
weighted_loss = weighted_objective(objectives.get('mae'))
|
||||
shape = (3, 4, 2)
|
||||
|
||||
@@ -8,6 +8,7 @@ from keras.utils.test_utils import get_test_data
|
||||
from keras.models import Sequential, Graph
|
||||
from keras.layers import Dense, Activation, RepeatVector, TimeDistributedDense, GRU
|
||||
from keras.utils import np_utils
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
nb_classes = 10
|
||||
batch_size = 128
|
||||
@@ -61,15 +62,6 @@ def create_sequential_model():
|
||||
return model
|
||||
|
||||
|
||||
def create_graph_model():
|
||||
model = Graph()
|
||||
model.add_input(name='input', input_shape=(input_dim,))
|
||||
model.add_node(Dense(32, activation='relu'), name='d1', input='input')
|
||||
model.add_node(Dense(nb_classes, activation='softmax'), name='d2', input='d1')
|
||||
model.add_output(name='output', input='d2')
|
||||
return model
|
||||
|
||||
|
||||
def create_temporal_sequential_model():
|
||||
model = Sequential()
|
||||
model.add(GRU(32, input_shape=(timesteps, input_dim), return_sequences=True))
|
||||
@@ -78,17 +70,7 @@ def create_temporal_sequential_model():
|
||||
return model
|
||||
|
||||
|
||||
def create_temporal_graph_model():
|
||||
model = Graph()
|
||||
model.add_input(name='input', input_shape=(timesteps, input_dim))
|
||||
model.add_node(GRU(32, return_sequences=True),
|
||||
name='d1', input='input')
|
||||
model.add_node(TimeDistributedDense(nb_classes, activation='softmax'),
|
||||
name='d2', input='d1')
|
||||
model.add_output(name='output', input='d2')
|
||||
return model
|
||||
|
||||
|
||||
@keras_test
|
||||
def _test_weights_sequential(model, class_weight=None, sample_weight=None,
|
||||
X_train=X_train, Y_train=Y_train,
|
||||
X_test=X_test, Y_test=Y_test):
|
||||
@@ -122,39 +104,13 @@ def _test_weights_sequential(model, class_weight=None, sample_weight=None,
|
||||
return score
|
||||
|
||||
|
||||
def _test_weights_graph(model, class_weight=None, sample_weight=None,
|
||||
X_train=X_train, Y_train=Y_train,
|
||||
X_test=X_test, Y_test=Y_test):
|
||||
model.fit({'input': X_train, 'output': Y_train},
|
||||
batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0,
|
||||
class_weight={'output': class_weight},
|
||||
sample_weight={'output': sample_weight})
|
||||
model.fit({'input': X_train, 'output': Y_train},
|
||||
batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0,
|
||||
class_weight={'output': class_weight},
|
||||
sample_weight={'output': sample_weight}, validation_split=0.1)
|
||||
|
||||
model.train_on_batch({'input': X_train[:32], 'output': Y_train[:32]},
|
||||
class_weight={'output': class_weight},
|
||||
sample_weight={'output': sample_weight[:32] if sample_weight is not None else None})
|
||||
model.test_on_batch({'input': X_train[:32], 'output': Y_train[:32]},
|
||||
sample_weight={'output': sample_weight[:32] if sample_weight is not None else None})
|
||||
score = model.evaluate({'input': X_test[test_ids, :],
|
||||
'output': Y_test[test_ids, :]},
|
||||
verbose=0)
|
||||
return score
|
||||
|
||||
|
||||
# no weights: reference point
|
||||
model = create_sequential_model()
|
||||
model.compile(loss=loss, optimizer='rmsprop')
|
||||
standard_score_sequential = _test_weights_sequential(model)
|
||||
|
||||
model = create_graph_model()
|
||||
model.compile(loss={'output': loss}, optimizer='rmsprop')
|
||||
standard_score_graph = _test_weights_graph(model)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_class_weights():
|
||||
model = create_sequential_model()
|
||||
model.compile(loss=loss, optimizer='rmsprop')
|
||||
@@ -162,6 +118,7 @@ def test_sequential_class_weights():
|
||||
assert(score < standard_score_sequential)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_sample_weights():
|
||||
model = create_sequential_model()
|
||||
model.compile(loss=loss, optimizer='rmsprop')
|
||||
@@ -169,6 +126,7 @@ def test_sequential_sample_weights():
|
||||
assert(score < standard_score_sequential)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_temporal_sample_weights():
|
||||
model = create_temporal_sequential_model()
|
||||
model.compile(loss=loss, optimizer='rmsprop',
|
||||
@@ -194,32 +152,5 @@ def test_sequential_temporal_sample_weights():
|
||||
assert(score < standard_score_sequential)
|
||||
|
||||
|
||||
def test_graph_class_weights():
|
||||
model = create_graph_model()
|
||||
model.compile(loss={'output': loss}, optimizer='rmsprop')
|
||||
score = _test_weights_graph(model, class_weight=class_weight)
|
||||
assert(score < standard_score_graph)
|
||||
|
||||
|
||||
def test_graph_sample_weights():
|
||||
model = create_graph_model()
|
||||
model.compile(loss={'output': loss}, optimizer='rmsprop')
|
||||
score = _test_weights_graph(model, sample_weight=sample_weight)
|
||||
assert(score < standard_score_graph)
|
||||
|
||||
|
||||
def test_graph_temporal_sample_weight():
|
||||
model = create_temporal_graph_model()
|
||||
model.compile(loss={'output': loss}, optimizer='rmsprop',
|
||||
sample_weight_modes={'output': 'temporal'})
|
||||
score = _test_weights_graph(model,
|
||||
sample_weight=temporal_sample_weight,
|
||||
X_train=temporal_X_train,
|
||||
X_test=temporal_X_test,
|
||||
Y_train=temporal_Y_train,
|
||||
Y_test=temporal_Y_test)
|
||||
assert(score < standard_score_graph)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -0,0 +1,165 @@
|
||||
import pytest
|
||||
import os
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from keras.models import Model, Sequential
|
||||
from keras.layers import Dense, Dropout, RepeatVector, TimeDistributed
|
||||
from keras.layers import Input
|
||||
from keras import optimizers
|
||||
from keras import objectives
|
||||
from keras import metrics
|
||||
from keras.utils.test_utils import keras_test
|
||||
from keras.models import save_model, load_model
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_model_saving():
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=3))
|
||||
model.add(Dense(3))
|
||||
model.compile(loss='mse', optimizer='rmsprop', metrics=['acc'])
|
||||
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3))
|
||||
model.train_on_batch(x, y)
|
||||
|
||||
out = model.predict(x)
|
||||
fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
|
||||
save_model(model, fname)
|
||||
|
||||
new_model = load_model(fname)
|
||||
|
||||
out2 = new_model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
# test that new updates are the same with both models
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3))
|
||||
model.train_on_batch(x, y)
|
||||
new_model.train_on_batch(x, y)
|
||||
out = model.predict(x)
|
||||
out2 = new_model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
# test load_weights on model file
|
||||
model.load_weights(fname)
|
||||
os.remove(fname)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_model_saving_2():
|
||||
# test with funkier config
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=3))
|
||||
model.add(RepeatVector(3))
|
||||
model.add(TimeDistributed(Dense(3)))
|
||||
model.compile(loss=objectives.MSE,
|
||||
optimizer=optimizers.RMSprop(lr=0.0001),
|
||||
metrics=[metrics.categorical_accuracy],
|
||||
sample_weight_mode='temporal')
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3, 3))
|
||||
model.train_on_batch(x, y)
|
||||
|
||||
out = model.predict(x)
|
||||
fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
|
||||
save_model(model, fname)
|
||||
|
||||
new_model = load_model(fname)
|
||||
os.remove(fname)
|
||||
|
||||
out2 = new_model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
# test that new updates are the same with both models
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3, 3))
|
||||
model.train_on_batch(x, y)
|
||||
new_model.train_on_batch(x, y)
|
||||
out = model.predict(x)
|
||||
out2 = new_model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_sequential_model_saving_3():
|
||||
# test with custom optimizer, loss
|
||||
custom_opt = optimizers.rmsprop
|
||||
custom_loss = objectives.mse
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=3))
|
||||
model.add(Dense(3))
|
||||
model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc'])
|
||||
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3))
|
||||
model.train_on_batch(x, y)
|
||||
|
||||
out = model.predict(x)
|
||||
fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
|
||||
save_model(model, fname)
|
||||
|
||||
model = load_model(fname,
|
||||
custom_objects={'custom_opt': custom_opt,
|
||||
'custom_loss': custom_loss})
|
||||
os.remove(fname)
|
||||
|
||||
out2 = model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_fuctional_model_saving():
|
||||
input = Input(shape=(3,))
|
||||
x = Dense(2)(input)
|
||||
output = Dense(3)(x)
|
||||
|
||||
model = Model(input, output)
|
||||
model.compile(loss=objectives.MSE,
|
||||
optimizer=optimizers.RMSprop(lr=0.0001),
|
||||
metrics=[metrics.categorical_accuracy])
|
||||
x = np.random.random((1, 3))
|
||||
y = np.random.random((1, 3))
|
||||
model.train_on_batch(x, y)
|
||||
|
||||
out = model.predict(x)
|
||||
fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
|
||||
save_model(model, fname)
|
||||
|
||||
model = load_model(fname)
|
||||
os.remove(fname)
|
||||
|
||||
out2 = model.predict(x)
|
||||
assert_allclose(out, out2, atol=1e-05)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_saving_without_compilation():
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=3))
|
||||
model.add(Dense(3))
|
||||
model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
|
||||
|
||||
fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
|
||||
save_model(model, fname)
|
||||
model = load_model(fname)
|
||||
os.remove(fname)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_saving_right_after_compilation():
|
||||
model = Sequential()
|
||||
model.add(Dense(2, input_dim=3))
|
||||
model.add(Dense(3))
|
||||
model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
|
||||
model.model._make_train_function()
|
||||
|
||||
fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
|
||||
save_model(model, fname)
|
||||
model = load_model(fname)
|
||||
os.remove(fname)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
Referência em uma Nova Issue
Bloquear um usuário