Comparar commits
70 Commits
| Autor | SHA1 | Data | |
|---|---|---|---|
| c627fa5bbd | |||
| affaa77078 | |||
| f1df88737c | |||
| 0ddc3360b7 | |||
| eaca5da3e2 | |||
| 70da22c31f | |||
| c158410168 | |||
| 0c237ebea2 | |||
| 8967d16d00 | |||
| 964023bec7 | |||
| 16aa56bb1d | |||
| bdf05c48ef | |||
| 653cfd2076 | |||
| bcbfcc000c | |||
| 54a417f616 | |||
| 5e51d02a94 | |||
| d3b9b9d5bb | |||
| 4f9e7bf93c | |||
| d491dafb80 | |||
| 365f621b24 | |||
| 7481b5d060 | |||
| 9295efb216 | |||
| 0d4fb04c7f | |||
| 791cba094c | |||
| 2bb9014c91 | |||
| 5be73f1ab3 | |||
| b8134f529c | |||
| 7d52af64c0 | |||
| 70ffba0766 | |||
| e7f3317de6 | |||
| 47350dc607 | |||
| d498a98465 | |||
| 0976afb46d | |||
| 7088ebd294 | |||
| f71831790f | |||
| 83001d195c | |||
| 8830c53135 | |||
| d89afdfd82 | |||
| 562860ca42 | |||
| fc4874f82c | |||
| 73a620b6e8 | |||
| e0697c3768 | |||
| 73bf06fb02 | |||
| 53bee20647 | |||
| 18ed60b9f2 | |||
| 707534e46e | |||
| cd6bbe7290 | |||
| f6cc059104 | |||
| 6572934f9a | |||
| 2a67506728 | |||
| 4507057e11 | |||
| eee1d90ef2 | |||
| 9d0efc081e | |||
| 2c284017d4 | |||
| 90758c3f4e | |||
| dcacdd3747 | |||
| 5bd3976e79 | |||
| 9eb7ecd3e5 | |||
| 05589a7c27 | |||
| 4aa41625bf | |||
| b2f0dd4cb2 | |||
| 17ef113ed7 | |||
| c029fa2f62 | |||
| 52b1377fe6 | |||
| 5598fcd33e | |||
| b558a7e97c | |||
| 172397ebf4 | |||
| 9adb43e44b | |||
| ac6fde801c | |||
| e848463347 |
+7
-3
@@ -7,6 +7,8 @@ matrix:
|
||||
env: KERAS_BACKEND=tensorflow TEST_MODE=PEP8
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=tensorflow TEST_MODE=INTEGRATION_TESTS
|
||||
- python: 3.5
|
||||
env: KERAS_BACKEND=tensorflow TEST_MODE=DOC
|
||||
- python: 2.7
|
||||
env: KERAS_BACKEND=tensorflow
|
||||
- python: 3.5
|
||||
@@ -34,7 +36,7 @@ install:
|
||||
|
||||
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py
|
||||
- source activate test-environment
|
||||
- pip install git+git://github.com/Theano/Theano.git
|
||||
- pip install theano
|
||||
|
||||
# install PIL for preprocessing tests
|
||||
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
|
||||
@@ -45,7 +47,7 @@ install:
|
||||
|
||||
- pip install -e .[tests]
|
||||
|
||||
# install TensorFlow
|
||||
# install TensorFlow (CPU version).
|
||||
- pip install tensorflow
|
||||
|
||||
# command to run tests
|
||||
@@ -61,6 +63,8 @@ script:
|
||||
PYTHONPATH=$PWD:$PYTHONPATH py.test tests/integration_tests;
|
||||
elif [[ "$TEST_MODE" == "PEP8" ]]; then
|
||||
PYTHONPATH=$PWD:$PYTHONPATH py.test --pep8 -m pep8 -n0;
|
||||
elif [[ "$TEST_MODE" == "DOC" ]]; then
|
||||
PYTHONPATH=$PWD:$PYTHONPATH py.test tests/test_documentation.py;
|
||||
else
|
||||
PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --ignore=tests/integration_tests --cov=keras tests/ --cov-fail-under 78 --cov-report term-missing;
|
||||
PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --ignore=tests/integration_tests --ignore=tests/test_documentation.py --cov=keras tests/ --cov-fail-under 78 --cov-report term-missing;
|
||||
fi
|
||||
|
||||
+21
-13
@@ -8,9 +8,7 @@ Index
|
||||
- Getting started
|
||||
Getting started with the sequential model
|
||||
Getting started with the functional api
|
||||
Examples
|
||||
FAQ
|
||||
Installation guide
|
||||
|
||||
- Models
|
||||
About Keras models
|
||||
@@ -26,18 +24,23 @@ Index
|
||||
explain common layer functions: get_weights, set_weights, get_config
|
||||
explain input_shape
|
||||
explain usage on non-Keras tensors
|
||||
Core layers
|
||||
Convolutional
|
||||
Recurrent
|
||||
Embeddings
|
||||
Normalization
|
||||
Advanced activations
|
||||
Noise
|
||||
Core Layers
|
||||
Convolutional Layers
|
||||
Pooling Layers
|
||||
Locally-connected Layers
|
||||
Recurrent Layers
|
||||
Embedding Layers
|
||||
Merge Layers
|
||||
Advanced Activations Layers
|
||||
Normalization Layers
|
||||
Noise Layers
|
||||
Layer Wrappers
|
||||
Writing your own Keras layers
|
||||
|
||||
- Preprocessing
|
||||
Image preprocessing
|
||||
Text preprocessing
|
||||
Sequence preprocessing
|
||||
Sequence Preprocessing
|
||||
Text Preprocessing
|
||||
Image Preprocessing
|
||||
|
||||
Losses
|
||||
Metrics
|
||||
@@ -45,12 +48,15 @@ Optimizers
|
||||
Activations
|
||||
Callbacks
|
||||
Datasets
|
||||
Applications
|
||||
Backend
|
||||
Initializations
|
||||
Initializers
|
||||
Regularizers
|
||||
Constraints
|
||||
Visualization
|
||||
Scikit-learn API
|
||||
Utils
|
||||
Contributing
|
||||
|
||||
'''
|
||||
from __future__ import print_function
|
||||
@@ -509,3 +515,5 @@ for page_data in PAGES:
|
||||
if not os.path.exists(subdir):
|
||||
os.makedirs(subdir)
|
||||
open(path, 'w').write(mkdown)
|
||||
|
||||
shutil.copyfile('../CONTRIBUTING.md', 'sources/contributing.md')
|
||||
|
||||
@@ -51,3 +51,4 @@ pages:
|
||||
- Visualization: visualization.md
|
||||
- Scikit-learn API: scikit-learn-api.md
|
||||
- Utils: utils.md
|
||||
- Contributing: contributing.md
|
||||
|
||||
externo
+1
-1
@@ -15,7 +15,7 @@ Weights are downloaded automatically when instantiating a model. They are stored
|
||||
- [ResNet50](#resnet50)
|
||||
- [InceptionV3](#inceptionv3)
|
||||
|
||||
All of these architectures (except Xception) are compatible with both TensorFlow and Theano, and upon instantiation the models will be built according to the image data format set in your Keras configuration file at `~/.keras/keras.json`. For instance, if you have set `image_data_format=tf`, then any model loaded from this repository will get built according to the TensorFlow data format convention, "Width-Height-Depth".
|
||||
All of these architectures (except Xception) are compatible with both TensorFlow and Theano, and upon instantiation the models will be built according to the image data format set in your Keras configuration file at `~/.keras/keras.json`. For instance, if you have set `image_data_format=channels_last`, then any model loaded from this repository will get built according to the TensorFlow data format convention, "Width-Height-Depth".
|
||||
|
||||
The Xception model is only available for TensorFlow, due to its reliance on `SeparableConvolution` layers.
|
||||
|
||||
|
||||
externo
+2
-2
@@ -36,7 +36,7 @@ class LossHistory(keras.callbacks.Callback):
|
||||
self.losses.append(logs.get('loss'))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(10, input_dim=784, init='uniform'))
|
||||
model.add(Dense(10, input_dim=784, kernel_initializer='uniform'))
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
@@ -58,7 +58,7 @@ print history.losses
|
||||
from keras.callbacks import ModelCheckpoint
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(10, input_dim=784, init='uniform'))
|
||||
model.add(Dense(10, input_dim=784, kernel_initializer='uniform'))
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
|
||||
externo
+8
-8
@@ -55,7 +55,7 @@ As a convention, "0" does not stand for a specific word, but instead is used to
|
||||
```python
|
||||
from keras.datasets import imdb
|
||||
|
||||
(x_train, y_train), (x_test, y_test) = imdb.load_data(path="imdb_full.pkl",
|
||||
(x_train, y_train), (x_test, y_test) = imdb.load_data(path="imdb.npz",
|
||||
num_words=None,
|
||||
skip_top=0,
|
||||
maxlen=None,
|
||||
@@ -72,13 +72,13 @@ from keras.datasets import imdb
|
||||
- __Arguments:__
|
||||
|
||||
- __path__: if you do not have the data locally (at `'~/.keras/datasets/' + path`), it will be downloaded to this location.
|
||||
- __num_words__: integer or None. Top most frequent words to consider. Any less frequent word will appear as 0 in the sequence data.
|
||||
- __skip_top__: integer. Top most frequent words to ignore (they will appear as 0s in the sequence data).
|
||||
- __num_words__: integer or None. Top most frequent words to consider. Any less frequent word will appear as `oov_char` value in the sequence data.
|
||||
- __skip_top__: integer. Top most frequent words to ignore (they will appear as `oov_char` value in the sequence data).
|
||||
- __maxlen__: int. Maximum sequence length. Any longer sequence will be truncated.
|
||||
- __seed__: int. Seed for reproducible data shuffling.
|
||||
- __start_char__: char. The start of a sequence will be marked with this character.
|
||||
- __start_char__: int. The start of a sequence will be marked with this character.
|
||||
Set to 1 because 0 is usually the padding character.
|
||||
- __oov_char__: char. words that were cut out because of the `num_words`
|
||||
- __oov_char__: int. words that were cut out because of the `num_words`
|
||||
or `skip_top` limit will be replaced with this character.
|
||||
- __index_from__: int. Index actual words with this index and higher.
|
||||
|
||||
@@ -94,7 +94,7 @@ Dataset of 11,228 newswires from Reuters, labeled over 46 topics. As with the IM
|
||||
```python
|
||||
from keras.datasets import reuters
|
||||
|
||||
(x_train, y_train), (x_test, y_test) = reuters.load_data(path="reuters.pkl",
|
||||
(x_train, y_train), (x_test, y_test) = reuters.load_data(path="reuters.npz",
|
||||
num_words=None,
|
||||
skip_top=0,
|
||||
maxlen=None,
|
||||
@@ -107,12 +107,12 @@ from keras.datasets import reuters
|
||||
|
||||
The specifications are the same as that of the IMDB dataset, with the addition of:
|
||||
|
||||
- __test_split__: float. Fraction of the dataset to be used as test data.
|
||||
- __test_split__: float. Fraction of the dataset to be used as test data.
|
||||
|
||||
This dataset also makes available the word index used for encoding the sequences:
|
||||
|
||||
```python
|
||||
word_index = reuters.get_word_index(path="reuters_word_index.pkl")
|
||||
word_index = reuters.get_word_index(path="reuters_word_index.json")
|
||||
```
|
||||
|
||||
- __Returns:__ A dictionary where key are words (str) and values are indexes (integer). eg. `word_index["giraffe"]` might return `1234`.
|
||||
|
||||
+3
-3
@@ -27,7 +27,7 @@ Please cite Keras in your publications if it helps your research. Here is an exa
|
||||
```
|
||||
@misc{chollet2015keras,
|
||||
title={Keras},
|
||||
author={Chollet, Fran\c{c}ois},
|
||||
author={Chollet, Fran\c{c}ois and others},
|
||||
year={2015},
|
||||
publisher={GitHub},
|
||||
howpublished={\url{https://github.com/fchollet/keras}},
|
||||
@@ -411,7 +411,7 @@ The VGG16 model is also the basis for several Keras example scripts:
|
||||
|
||||
### How can I use HDF5 inputs with Keras?
|
||||
|
||||
You can use the `HDF5Matrix` class from `keras.utils.io_utils`. See [the HDF5Matrix documentation](/io_utils/#HDF5Matrix) for details.
|
||||
You can use the `HDF5Matrix` class from `keras.utils.io_utils`. See [the HDF5Matrix documentation](/utils/#hdf5matrix) for details.
|
||||
|
||||
You can also directly use a HDF5 dataset:
|
||||
|
||||
@@ -451,6 +451,6 @@ It contains the following fields:
|
||||
- The image data format to be used as default by image processing layers and utilities (either `channels_last` or `channels_first`).
|
||||
- The `epsilon` numerical fuzz factor to be used to prevent division by zero in some operations.
|
||||
- The default float data type.
|
||||
- The default backend. See the (backend documentation)[/backend].
|
||||
- The default backend. See the [backend documentation](/backend).
|
||||
|
||||
Likewise, cached dataset files, such as those downloaded with [`get_file()`](/utils/#get_file), are stored by default in `$HOME/.keras/datasets/`.
|
||||
|
||||
@@ -354,7 +354,7 @@ A stateful recurrent model is one for which the internal states (memories) obtai
|
||||
of samples are reused as initial states for the samples of the next batch. This allows to process longer sequences
|
||||
while keeping computational complexity manageable.
|
||||
|
||||
[You can read more about stateful RNNs in the FAQ.](/faq/#how-can-i-use-stateful-rnns)
|
||||
[You can read more about stateful RNNs in the FAQ.](/getting-started/faq/#how-can-i-use-stateful-rnns)
|
||||
|
||||
```python
|
||||
from keras.models import Sequential
|
||||
|
||||
externo
+1
-1
@@ -39,5 +39,5 @@ from keras import backend as K
|
||||
def my_init(shape, dtype=None):
|
||||
return K.random_normal(shape, dtype=dtype)
|
||||
|
||||
model.add(Dense(64, init=my_init))
|
||||
model.add(Dense(64, kernel_initializer=my_init))
|
||||
```
|
||||
|
||||
+16
-16
@@ -56,10 +56,10 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
If you never set it, then it will be "channels_last".
|
||||
|
||||
- __Methods__:
|
||||
- __fit(X)__: Compute the internal data stats related to the data-dependent transformations, based on an array of sample data.
|
||||
- __fit(x)__: Compute the internal data stats related to the data-dependent transformations, based on an array of sample data.
|
||||
Only required if featurewise_center or featurewise_std_normalization or zca_whitening.
|
||||
- __Arguments__:
|
||||
- __X__: sample data. Should have rank 4.
|
||||
- __x__: sample data. Should have rank 4.
|
||||
In case of grayscale data,
|
||||
the channels axis should have value 1, and in case
|
||||
of RGB data, it should have value 3.
|
||||
@@ -68,7 +68,7 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
- __seed__: int (default: None). Random seed.
|
||||
- __flow(X, y)__: Takes numpy data & label arrays, and generates batches of augmented/normalized data. Yields batches indefinitely, in an infinite loop.
|
||||
- __Arguments__:
|
||||
- __X__: data. Should have rank 4.
|
||||
- __x__: data. Should have rank 4.
|
||||
In case of grayscale data,
|
||||
the channels axis should have value 1, and in case
|
||||
of RGB data, it should have value 3.
|
||||
@@ -88,8 +88,8 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
See [this script](https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d) for more details.
|
||||
- __target_size__: tuple of integers, default: `(256, 256)`. The dimensions to which all images found will be resized.
|
||||
- __color_mode__: one of "grayscale", "rbg". Default: "rgb". Whether the images will be converted to have 1 or 3 color channels.
|
||||
- __classes__: optional list of class subdirectories (e.g. `['dogs', 'cats']`). Default: None. If not provided, the list of classes will be automatically inferred (and the order of the classes, which will map to the label indices, will be alphanumeric).
|
||||
- __class_mode__: one of "categorical", "binary", "sparse" or None. Default: "categorical". Determines the type of label arrays that are returned: "categorical" will be 2D one-hot encoded labels, "binary" will be 1D binary labels, "sparse" will be 1D integer labels. If None, no labels are returned (the generator will only yield batches of image data, which is useful to use `model.predict_generator()`, `model.evaluate_generator()`, etc.).
|
||||
- __classes__: optional list of class subdirectories (e.g. `['dogs', 'cats']`). Default: None. If not provided, the list of classes will be automatically inferred from the subdirectory names/structure under `directory`, where each subdirectory will be treated as a different class (and the order of the classes, which will map to the label indices, will be alphanumeric). The dictionary containing the mapping from class names to class indices can be obtained via the attribute `class_indices`.
|
||||
- __class_mode__: one of "categorical", "binary", "sparse" or None. Default: "categorical". Determines the type of label arrays that are returned: "categorical" will be 2D one-hot encoded labels, "binary" will be 1D binary labels, "sparse" will be 1D integer labels. If None, no labels are returned (the generator will only yield batches of image data, which is useful to use `model.predict_generator()`, `model.evaluate_generator()`, etc.). Please note that in case of class_mode None, the data still needs to reside in a subdirectory of `directory` for it to work correctly.
|
||||
- __batch_size__: size of the batches of data (default: 32).
|
||||
- __shuffle__: whether to shuffle the data (default: True)
|
||||
- __seed__: optional random seed for shuffling and transformations.
|
||||
@@ -101,12 +101,12 @@ Generate batches of tensor image data with real-time data augmentation. The data
|
||||
|
||||
- __Examples__:
|
||||
|
||||
Example of using `.flow(X, y)`:
|
||||
Example of using `.flow(x, y)`:
|
||||
|
||||
```python
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
|
||||
Y_train = np_utils.to_categorical(y_train, num_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, num_classes)
|
||||
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
|
||||
y_train = np_utils.to_categorical(y_train, num_classes)
|
||||
y_test = np_utils.to_categorical(y_test, num_classes)
|
||||
|
||||
datagen = ImageDataGenerator(
|
||||
featurewise_center=True,
|
||||
@@ -118,20 +118,20 @@ datagen = ImageDataGenerator(
|
||||
|
||||
# compute quantities required for featurewise normalization
|
||||
# (std, mean, and principal components if ZCA whitening is applied)
|
||||
datagen.fit(X_train)
|
||||
datagen.fit(x_train)
|
||||
|
||||
# fits the model on batches with real-time data augmentation:
|
||||
model.fit_generator(datagen.flow(X_train, Y_train, batch_size=32),
|
||||
steps_per_epoch=len(X_train), epochs=epochs)
|
||||
model.fit_generator(datagen.flow(x_train, y_train, batch_size=32),
|
||||
steps_per_epoch=len(x_train) / 32, epochs=epochs)
|
||||
|
||||
# here's a more "manual" example
|
||||
for e in range(epochs):
|
||||
print 'Epoch', e
|
||||
print('Epoch', e)
|
||||
batches = 0
|
||||
for X_batch, Y_batch in datagen.flow(X_train, Y_train, batch_size=32):
|
||||
loss = model.train(X_batch, Y_batch)
|
||||
for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=32):
|
||||
model.fit(x_batch, y_batch)
|
||||
batches += 1
|
||||
if batches >= len(X_train) / 32:
|
||||
if batches >= len(x_train) / 32:
|
||||
# we need to break the loop by hand because
|
||||
# the generator loops indefinitely
|
||||
break
|
||||
|
||||
@@ -78,7 +78,7 @@ INVERT = True
|
||||
|
||||
# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
|
||||
# int is DIGITS.
|
||||
MAxLEN = DIGITS + 1 + DIGITS
|
||||
MAXLEN = DIGITS + 1 + DIGITS
|
||||
|
||||
# All the numbers, plus sign and space for padding.
|
||||
chars = '0123456789+ '
|
||||
@@ -98,9 +98,9 @@ while len(questions) < TRAINING_SIZE:
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
# Pad the data with spaces such that it is always MAxLEN.
|
||||
# Pad the data with spaces such that it is always MAXLEN.
|
||||
q = '{}+{}'.format(a, b)
|
||||
query = q + ' ' * (MAxLEN - len(q))
|
||||
query = q + ' ' * (MAXLEN - len(q))
|
||||
ans = str(a + b)
|
||||
# Answers can be of maximum size DIGITS + 1.
|
||||
ans += ' ' * (DIGITS + 1 - len(ans))
|
||||
@@ -113,10 +113,10 @@ while len(questions) < TRAINING_SIZE:
|
||||
print('Total addition questions:', len(questions))
|
||||
|
||||
print('Vectorization...')
|
||||
x = np.zeros((len(questions), MAxLEN, len(chars)), dtype=np.bool)
|
||||
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
|
||||
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
|
||||
for i, sentence in enumerate(questions):
|
||||
x[i] = ctable.encode(sentence, MAxLEN)
|
||||
x[i] = ctable.encode(sentence, MAXLEN)
|
||||
for i, sentence in enumerate(expected):
|
||||
y[i] = ctable.encode(sentence, DIGITS + 1)
|
||||
|
||||
@@ -151,7 +151,7 @@ model = Sequential()
|
||||
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
|
||||
# Note: In a situation where your input sequences have a variable length,
|
||||
# use input_shape=(None, num_feature).
|
||||
model.add(RNN(HIDDEN_SIZE, input_shape=(MAxLEN, len(chars))))
|
||||
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
|
||||
# As the decoder RNN's input, repeatedly provide with the last hidden state of
|
||||
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
|
||||
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
|
||||
|
||||
+121
-145
@@ -8,24 +8,16 @@ e.g.:
|
||||
```
|
||||
python deep_dream.py img/mypic.jpg results/dream
|
||||
```
|
||||
|
||||
It is preferable to run this script on GPU, for speed.
|
||||
If running on CPU, prefer the TensorFlow backend (much faster).
|
||||
|
||||
Example results: http://i.imgur.com/FX6ROg9.jpg
|
||||
'''
|
||||
from __future__ import print_function
|
||||
|
||||
from keras.preprocessing.image import load_img, img_to_array
|
||||
import numpy as np
|
||||
from scipy.misc import imsave
|
||||
from scipy.optimize import fmin_l_bfgs_b
|
||||
import time
|
||||
import scipy
|
||||
import argparse
|
||||
|
||||
from keras.applications import vgg16
|
||||
from keras.applications import inception_v3
|
||||
from keras import backend as K
|
||||
from keras.layers import Input
|
||||
|
||||
parser = argparse.ArgumentParser(description='Deep Dreams with Keras.')
|
||||
parser.add_argument('base_image_path', metavar='base', type=str,
|
||||
@@ -37,183 +29,167 @@ args = parser.parse_args()
|
||||
base_image_path = args.base_image_path
|
||||
result_prefix = args.result_prefix
|
||||
|
||||
# dimensions of the generated picture.
|
||||
img_height = 600
|
||||
img_width = 600
|
||||
|
||||
# some settings we found interesting
|
||||
saved_settings = {
|
||||
'bad_trip': {'features': {'block4_conv1': 0.05,
|
||||
'block4_conv2': 0.01,
|
||||
'block4_conv3': 0.01},
|
||||
'continuity': 0.1,
|
||||
'dream_l2': 0.8,
|
||||
'jitter': 5},
|
||||
'dreamy': {'features': {'block5_conv1': 0.05,
|
||||
'block5_conv2': 0.02},
|
||||
'continuity': 0.1,
|
||||
'dream_l2': 0.02,
|
||||
'jitter': 0},
|
||||
# These are the names of the layers
|
||||
# for which we try to maximize activation,
|
||||
# as well as their weight in the final loss
|
||||
# we try to maximize.
|
||||
# You can tweak these setting to obtain new visual effects.
|
||||
settings = {
|
||||
'features': {
|
||||
'mixed2': 0.2,
|
||||
'mixed3': 0.5,
|
||||
'mixed4': 2.,
|
||||
'mixed5': 1.5,
|
||||
},
|
||||
}
|
||||
# the settings we will use in this experiment
|
||||
settings = saved_settings['dreamy']
|
||||
|
||||
|
||||
def preprocess_image(image_path):
|
||||
# util function to open, resize and format pictures
|
||||
# into appropriate tensors
|
||||
img = load_img(image_path, target_size=(img_height, img_width))
|
||||
# Util function to open, resize and format pictures
|
||||
# into appropriate tensors.
|
||||
img = load_img(image_path)
|
||||
img = img_to_array(img)
|
||||
img = np.expand_dims(img, axis=0)
|
||||
img = vgg16.preprocess_input(img)
|
||||
img = inception_v3.preprocess_input(img)
|
||||
return img
|
||||
|
||||
|
||||
def deprocess_image(x):
|
||||
# util function to convert a tensor into a valid image
|
||||
# Util function to convert a tensor into a valid image.
|
||||
if K.image_data_format() == 'channels_first':
|
||||
x = x.reshape((3, img_height, img_width))
|
||||
x = x.reshape((3, x.shape[2], x.shape[3]))
|
||||
x = x.transpose((1, 2, 0))
|
||||
else:
|
||||
x = x.reshape((img_height, img_width, 3))
|
||||
# Remove zero-center by mean pixel
|
||||
x[:, :, 0] += 103.939
|
||||
x[:, :, 1] += 116.779
|
||||
x[:, :, 2] += 123.68
|
||||
# 'BGR'->'RGB'
|
||||
x = x[:, :, ::-1]
|
||||
x = x.reshape((x.shape[1], x.shape[2], 3))
|
||||
x /= 2.
|
||||
x += 0.5
|
||||
x *= 255.
|
||||
x = np.clip(x, 0, 255).astype('uint8')
|
||||
return x
|
||||
|
||||
if K.image_data_format() == 'channels_first':
|
||||
img_size = (3, img_height, img_width)
|
||||
else:
|
||||
img_size = (img_height, img_width, 3)
|
||||
# this will contain our generated image
|
||||
dream = Input(batch_shape=(1,) + img_size)
|
||||
K.set_learning_phase(0)
|
||||
|
||||
# build the VGG16 network with our placeholder
|
||||
# the model will be loaded with pre-trained ImageNet weights
|
||||
model = vgg16.VGG16(input_tensor=dream,
|
||||
weights='imagenet', include_top=False)
|
||||
# Build the InceptionV3 network with our placeholder.
|
||||
# The model will be loaded with pre-trained ImageNet weights.
|
||||
model = inception_v3.InceptionV3(weights='imagenet',
|
||||
include_top=False)
|
||||
dream = model.input
|
||||
print('Model loaded.')
|
||||
|
||||
# get the symbolic outputs of each "key" layer (we gave them unique names).
|
||||
# Get the symbolic outputs of each "key" layer (we gave them unique names).
|
||||
layer_dict = dict([(layer.name, layer) for layer in model.layers])
|
||||
|
||||
|
||||
def continuity_loss(x):
|
||||
# continuity loss util function
|
||||
assert K.ndim(x) == 4
|
||||
if K.image_data_format() == 'channels_first':
|
||||
a = K.square(x[:, :, :img_height - 1, :img_width - 1] -
|
||||
x[:, :, 1:, :img_width - 1])
|
||||
b = K.square(x[:, :, :img_height - 1, :img_width - 1] -
|
||||
x[:, :, :img_height - 1, 1:])
|
||||
else:
|
||||
a = K.square(x[:, :img_height - 1, :img_width - 1, :] -
|
||||
x[:, 1:, :img_width - 1, :])
|
||||
b = K.square(x[:, :img_height - 1, :img_width - 1, :] -
|
||||
x[:, :img_height - 1, 1:, :])
|
||||
return K.sum(K.pow(a + b, 1.25))
|
||||
|
||||
# define the loss
|
||||
# Define the loss.
|
||||
loss = K.variable(0.)
|
||||
for layer_name in settings['features']:
|
||||
# add the L2 norm of the features of a layer to the loss
|
||||
# Add the L2 norm of the features of a layer to the loss.
|
||||
assert layer_name in layer_dict.keys(), 'Layer ' + layer_name + ' not found in model.'
|
||||
coeff = settings['features'][layer_name]
|
||||
x = layer_dict[layer_name].output
|
||||
shape = layer_dict[layer_name].output_shape
|
||||
# we avoid border artifacts by only involving non-border pixels in the loss
|
||||
# We avoid border artifacts by only involving non-border pixels in the loss.
|
||||
scaling = K.prod(K.cast(K.shape(x), 'float32'))
|
||||
if K.image_data_format() == 'channels_first':
|
||||
loss -= coeff * K.sum(K.square(x[:, :, 2: shape[2] - 2, 2: shape[3] - 2])) / np.prod(shape[1:])
|
||||
loss += coeff * K.sum(K.square(x[:, :, 2: -2, 2: -2])) / scaling
|
||||
else:
|
||||
loss -= coeff * K.sum(K.square(x[:, 2: shape[1] - 2, 2: shape[2] - 2, :])) / np.prod(shape[1:])
|
||||
loss += coeff * K.sum(K.square(x[:, 2: -2, 2: -2, :])) / scaling
|
||||
|
||||
# add continuity loss (gives image local coherence, can result in an artful blur)
|
||||
loss += settings['continuity'] * continuity_loss(dream) / np.prod(img_size)
|
||||
# add image L2 norm to loss (prevents pixels from taking very high values, makes image darker)
|
||||
loss += settings['dream_l2'] * K.sum(K.square(dream)) / np.prod(img_size)
|
||||
# Compute the gradients of the dream wrt the loss.
|
||||
grads = K.gradients(loss, dream)[0]
|
||||
# Normalize gradients.
|
||||
grads /= K.maximum(K.mean(K.abs(grads)), 1e-7)
|
||||
|
||||
# feel free to further modify the loss as you see fit, to achieve new effects...
|
||||
|
||||
# compute the gradients of the dream wrt the loss
|
||||
grads = K.gradients(loss, dream)
|
||||
|
||||
outputs = [loss]
|
||||
if isinstance(grads, (list, tuple)):
|
||||
outputs += grads
|
||||
else:
|
||||
outputs.append(grads)
|
||||
|
||||
f_outputs = K.function([dream], outputs)
|
||||
# Set up function to retrieve the value
|
||||
# of the loss and gradients given an input image.
|
||||
outputs = [loss, grads]
|
||||
fetch_loss_and_grads = K.function([dream], outputs)
|
||||
|
||||
|
||||
def eval_loss_and_grads(x):
|
||||
x = x.reshape((1,) + img_size)
|
||||
outs = f_outputs([x])
|
||||
outs = fetch_loss_and_grads([x])
|
||||
loss_value = outs[0]
|
||||
if len(outs[1:]) == 1:
|
||||
grad_values = outs[1].flatten().astype('float64')
|
||||
else:
|
||||
grad_values = np.array(outs[1:]).flatten().astype('float64')
|
||||
grad_values = outs[1]
|
||||
return loss_value, grad_values
|
||||
|
||||
|
||||
class Evaluator(object):
|
||||
"""Loss and gradients evaluator.
|
||||
def resize_img(img, size):
|
||||
img = np.copy(img)
|
||||
if K.image_data_format() == 'channels_first':
|
||||
factors = (1, 1,
|
||||
float(size[0]) / img.shape[2],
|
||||
float(size[1]) / img.shape[3])
|
||||
else:
|
||||
factors = (1,
|
||||
float(size[0]) / img.shape[1],
|
||||
float(size[1]) / img.shape[2],
|
||||
1)
|
||||
return scipy.ndimage.zoom(img, factors, order=1)
|
||||
|
||||
This Evaluator class makes it possible
|
||||
to compute loss and gradients in one pass
|
||||
while retrieving them via two separate functions,
|
||||
"loss" and "grads". This is done because scipy.optimize
|
||||
requires separate functions for loss and gradients,
|
||||
but computing them separately would be inefficient.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.loss_value = None
|
||||
self.grad_values = None
|
||||
|
||||
def loss(self, x):
|
||||
assert self.loss_value is None
|
||||
def gradient_ascent(x, iterations, step, max_loss=None):
|
||||
for i in range(iterations):
|
||||
loss_value, grad_values = eval_loss_and_grads(x)
|
||||
self.loss_value = loss_value
|
||||
self.grad_values = grad_values
|
||||
return self.loss_value
|
||||
if max_loss is not None and loss_value > max_loss:
|
||||
break
|
||||
print('..Loss value at', i, ':', loss_value)
|
||||
x += step * grad_values
|
||||
return x
|
||||
|
||||
def grads(self, x):
|
||||
assert self.loss_value is not None
|
||||
grad_values = np.copy(self.grad_values)
|
||||
self.loss_value = None
|
||||
self.grad_values = None
|
||||
return grad_values
|
||||
|
||||
evaluator = Evaluator()
|
||||
def save_img(img, fname):
|
||||
pil_img = deprocess_image(np.copy(img))
|
||||
scipy.misc.imsave(fname, pil_img)
|
||||
|
||||
# Run scipy-based optimization (L-BFGS) over the pixels of the generated image
|
||||
# so as to minimize the loss
|
||||
x = preprocess_image(base_image_path)
|
||||
for i in range(5):
|
||||
print('Start of iteration', i)
|
||||
start_time = time.time()
|
||||
|
||||
# Add a random jitter to the initial image.
|
||||
# This will be reverted at decoding time
|
||||
random_jitter = (settings['jitter'] * 2) * (np.random.random(img_size) - 0.5)
|
||||
x += random_jitter
|
||||
"""Process:
|
||||
|
||||
# Run L-BFGS for 7 steps
|
||||
x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
|
||||
fprime=evaluator.grads, maxfun=7)
|
||||
print('Current loss value:', min_val)
|
||||
# Decode the dream and save it
|
||||
x = x.reshape(img_size)
|
||||
x -= random_jitter
|
||||
img = deprocess_image(np.copy(x))
|
||||
fname = result_prefix + '_at_iteration_%d.png' % i
|
||||
imsave(fname, img)
|
||||
end_time = time.time()
|
||||
print('Image saved as', fname)
|
||||
print('Iteration %d completed in %ds' % (i, end_time - start_time))
|
||||
- Load the original image.
|
||||
- Define a number of processing scales (i.e. image shapes),
|
||||
from smallest to largest.
|
||||
- Resize the original image to the smallest scale.
|
||||
- For every scale, starting with the smallest (i.e. current one):
|
||||
- Run gradient ascent
|
||||
- Upscale image to the next scale
|
||||
- Reinject the detail that was lost at upscaling time
|
||||
- Stop when we are back to the original size.
|
||||
|
||||
To obtain the detail lost during upscaling, we simply
|
||||
take the original image, shrink it down, upscale it,
|
||||
and compare the result to the (resized) original image.
|
||||
"""
|
||||
|
||||
|
||||
# Playing with these hyperparameters will also allow you to achieve new effects
|
||||
step = 0.01 # Gradient ascent step size
|
||||
num_octave = 3 # Number of scales at which to run gradient ascent
|
||||
octave_scale = 1.4 # Size ratio between scales
|
||||
iterations = 20 # Number of ascent steps per scale
|
||||
max_loss = 10.
|
||||
|
||||
img = preprocess_image(base_image_path)
|
||||
if K.image_data_format() == 'channels_first':
|
||||
original_shape = img.shape[2:]
|
||||
else:
|
||||
original_shape = img.shape[1:3]
|
||||
successive_shapes = [original_shape]
|
||||
for i in range(1, num_octave):
|
||||
shape = tuple([int(dim / (octave_scale ** i)) for dim in original_shape])
|
||||
successive_shapes.append(shape)
|
||||
successive_shapes = successive_shapes[::-1]
|
||||
original_img = np.copy(img)
|
||||
shrunk_original_img = resize_img(img, successive_shapes[0])
|
||||
|
||||
for shape in successive_shapes:
|
||||
print('Processing image shape', shape)
|
||||
img = resize_img(img, shape)
|
||||
img = gradient_ascent(img,
|
||||
iterations=iterations,
|
||||
step=step,
|
||||
max_loss=max_loss)
|
||||
upscaled_shrunk_original_img = resize_img(shrunk_original_img, shape)
|
||||
same_size_original = resize_img(original_img, shape)
|
||||
lost_detail = same_size_original - upscaled_shrunk_original_img
|
||||
|
||||
img += lost_detail
|
||||
shrunk_original_img = resize_img(original_img, shape)
|
||||
|
||||
save_img(img, fname=result_prefix + '.png')
|
||||
|
||||
@@ -24,7 +24,7 @@ from keras import backend as K
|
||||
|
||||
def euclidean_distance(vects):
|
||||
x, y = vects
|
||||
return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))
|
||||
return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
|
||||
|
||||
|
||||
def eucl_dist_output_shape(shapes):
|
||||
|
||||
@@ -6,7 +6,7 @@ import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy.stats import norm
|
||||
|
||||
from keras.layers import Input, Dense, Lambda
|
||||
from keras.layers import Input, Dense, Lambda, Layer
|
||||
from keras.models import Model
|
||||
from keras import backend as K
|
||||
from keras import metrics
|
||||
@@ -19,6 +19,7 @@ intermediate_dim = 256
|
||||
epochs = 50
|
||||
epsilon_std = 1.0
|
||||
|
||||
|
||||
x = Input(batch_shape=(batch_size, original_dim))
|
||||
h = Dense(intermediate_dim, activation='relu')(x)
|
||||
z_mean = Dense(latent_dim)(h)
|
||||
@@ -41,13 +42,29 @@ h_decoded = decoder_h(z)
|
||||
x_decoded_mean = decoder_mean(h_decoded)
|
||||
|
||||
|
||||
def vae_loss(x, x_decoded_mean):
|
||||
xent_loss = original_dim * metrics.binary_crossentropy(x, x_decoded_mean)
|
||||
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
|
||||
return xent_loss + kl_loss
|
||||
# Custom loss layer
|
||||
class CustomVariationalLayer(Layer):
|
||||
def __init__(self, **kwargs):
|
||||
self.is_placeholder = True
|
||||
super(CustomVariationalLayer, self).__init__(**kwargs)
|
||||
|
||||
def vae_loss(self, x, x_decoded_mean):
|
||||
xent_loss = original_dim * metrics.binary_crossentropy(x, x_decoded_mean)
|
||||
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
|
||||
return K.mean(xent_loss + kl_loss)
|
||||
|
||||
def call(self, inputs):
|
||||
x = inputs[0]
|
||||
x_decoded_mean = inputs[1]
|
||||
loss = self.vae_loss(x, x_decoded_mean)
|
||||
self.add_loss(loss, inputs=inputs)
|
||||
# We won't actually use the output.
|
||||
return x
|
||||
|
||||
y = CustomVariationalLayer()([x, x_decoded_mean])
|
||||
vae = Model(x, y)
|
||||
vae.compile(optimizer='rmsprop', loss=None)
|
||||
|
||||
vae = Model(x, x_decoded_mean)
|
||||
vae.compile(optimizer='rmsprop', loss=vae_loss)
|
||||
|
||||
# train the VAE on MNIST digits
|
||||
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
||||
@@ -57,7 +74,7 @@ x_test = x_test.astype('float32') / 255.
|
||||
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
|
||||
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
|
||||
|
||||
vae.fit(x_train, x_train,
|
||||
vae.fit(x_train,
|
||||
shuffle=True,
|
||||
epochs=epochs,
|
||||
batch_size=batch_size,
|
||||
|
||||
@@ -7,7 +7,7 @@ import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy.stats import norm
|
||||
|
||||
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
|
||||
from keras.layers import Input, Dense, Lambda, Flatten, Reshape, Layer
|
||||
from keras.layers import Conv2D, Conv2DTranspose
|
||||
from keras.models import Model
|
||||
from keras import backend as K
|
||||
@@ -106,17 +106,31 @@ x_decoded_relu = decoder_deconv_3_upsamp(deconv_2_decoded)
|
||||
x_decoded_mean_squash = decoder_mean_squash(x_decoded_relu)
|
||||
|
||||
|
||||
def vae_loss(x, x_decoded_mean):
|
||||
# NOTE: binary_crossentropy expects a batch_size by dim
|
||||
# for x and x_decoded_mean, so we MUST flatten these!
|
||||
x = K.flatten(x)
|
||||
x_decoded_mean = K.flatten(x_decoded_mean)
|
||||
xent_loss = img_rows * img_cols * metrics.binary_crossentropy(x, x_decoded_mean)
|
||||
kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
|
||||
return xent_loss + kl_loss
|
||||
# Custom loss layer
|
||||
class CustomVariationalLayer(Layer):
|
||||
def __init__(self, **kwargs):
|
||||
self.is_placeholder = True
|
||||
super(CustomVariationalLayer, self).__init__(**kwargs)
|
||||
|
||||
vae = Model(x, x_decoded_mean_squash)
|
||||
vae.compile(optimizer='rmsprop', loss=vae_loss)
|
||||
def vae_loss(self, x, x_decoded_mean_squash):
|
||||
x = K.flatten(x)
|
||||
x_decoded_mean_squash = K.flatten(x_decoded_mean_squash)
|
||||
xent_loss = img_rows * img_cols * metrics.binary_crossentropy(x, x_decoded_mean_squash)
|
||||
kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
|
||||
return K.mean(xent_loss + kl_loss)
|
||||
|
||||
def call(self, inputs):
|
||||
x = inputs[0]
|
||||
x_decoded_mean_squash = inputs[1]
|
||||
loss = self.vae_loss(x, x_decoded_mean_squash)
|
||||
self.add_loss(loss, inputs=inputs)
|
||||
# We don't use this output.
|
||||
return x
|
||||
|
||||
|
||||
y = CustomVariationalLayer()([x, x_decoded_mean_squash])
|
||||
vae = Model(x, y)
|
||||
vae.compile(optimizer='rmsprop', loss=None)
|
||||
vae.summary()
|
||||
|
||||
# train the VAE on MNIST digits
|
||||
@@ -129,7 +143,7 @@ x_test = x_test.reshape((x_test.shape[0],) + original_img_size)
|
||||
|
||||
print('x_train.shape:', x_train.shape)
|
||||
|
||||
vae.fit(x_train, x_train,
|
||||
vae.fit(x_train,
|
||||
shuffle=True,
|
||||
epochs=epochs,
|
||||
batch_size=batch_size,
|
||||
|
||||
+3
-1
@@ -17,5 +17,7 @@ from . import models
|
||||
from . import losses
|
||||
from . import optimizers
|
||||
from . import regularizers
|
||||
# Importable from root because it's technically not a layer
|
||||
from .layers import Input
|
||||
|
||||
__version__ = '2.0.3'
|
||||
__version__ = '2.0.4'
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from __future__ import absolute_import
|
||||
import six
|
||||
import warnings
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import deserialize_keras_object
|
||||
from .engine import Layer
|
||||
|
||||
|
||||
def softmax(x, axis=-1):
|
||||
@@ -78,6 +80,13 @@ def get(identifier):
|
||||
identifier = str(identifier)
|
||||
return deserialize(identifier)
|
||||
elif callable(identifier):
|
||||
if isinstance(identifier, Layer):
|
||||
warnings.warn((
|
||||
'Do not pass a layer instance (such as {identifier}) as the '
|
||||
'activation argument of another layer. Instead, advanced '
|
||||
'activation layers should be used just like any other '
|
||||
'layer in a model.'
|
||||
).format(identifier=identifier.__class__.__name__))
|
||||
return identifier
|
||||
else:
|
||||
raise ValueError('Could not interpret '
|
||||
|
||||
@@ -44,7 +44,7 @@ def set_epsilon(e):
|
||||
|
||||
|
||||
def floatx():
|
||||
"""Returns the default float type, as a string
|
||||
"""Returns the default float type, as a string.
|
||||
(e.g. 'float16', 'float32', 'float64').
|
||||
|
||||
# Returns
|
||||
@@ -109,8 +109,7 @@ def cast_to_floatx(x):
|
||||
|
||||
|
||||
def image_data_format():
|
||||
"""Returns the default image data format
|
||||
convention ('channels_first' or 'channels_last').
|
||||
"""Returns the default image data format convention ('channels_first' or 'channels_last').
|
||||
|
||||
# Returns
|
||||
A string, either `'channels_first'` or `'channels_last'`
|
||||
@@ -181,7 +180,7 @@ def set_image_dim_ordering(dim_ordering):
|
||||
"""Legacy setter for `image_data_format`.
|
||||
|
||||
# Arguments
|
||||
dim_ordering: string. `'tf'` or `'th'`.
|
||||
dim_ordering: string. `tf` or `th`.
|
||||
|
||||
# Example
|
||||
```python
|
||||
@@ -192,6 +191,9 @@ def set_image_dim_ordering(dim_ordering):
|
||||
>>> K.image_data_format()
|
||||
'channels_last'
|
||||
```
|
||||
|
||||
# Raises
|
||||
ValueError if invalid `dim_ordering`
|
||||
"""
|
||||
global _IMAGE_DATA_FORMAT
|
||||
if dim_ordering not in {'tf', 'th'}:
|
||||
@@ -205,6 +207,9 @@ def set_image_dim_ordering(dim_ordering):
|
||||
|
||||
def image_dim_ordering():
|
||||
"""Legacy getter for `image_data_format`.
|
||||
|
||||
# Returns
|
||||
string, one of `'th'`, `'tf'`
|
||||
"""
|
||||
if _IMAGE_DATA_FORMAT == 'channels_first':
|
||||
return 'th'
|
||||
|
||||
@@ -43,6 +43,14 @@ _MANUAL_VAR_INIT = False
|
||||
|
||||
|
||||
def get_uid(prefix=''):
|
||||
"""Get the uid for the default graph.
|
||||
|
||||
# Arguments
|
||||
prefix: An optional prefix of the graph.
|
||||
|
||||
# Returns
|
||||
A unique identifier for the graph.
|
||||
"""
|
||||
global _GRAPH_UID_DICTS
|
||||
graph = tf.get_default_graph()
|
||||
if graph not in _GRAPH_UID_DICTS:
|
||||
@@ -52,6 +60,7 @@ def get_uid(prefix=''):
|
||||
|
||||
|
||||
def reset_uids():
|
||||
"""Reset graph identifiers."""
|
||||
global _GRAPH_UID_DICTS
|
||||
_GRAPH_UID_DICTS = {}
|
||||
|
||||
@@ -169,6 +178,17 @@ def set_session(session):
|
||||
# VARIABLE MANIPULATION
|
||||
|
||||
def _convert_string_dtype(dtype):
|
||||
"""Get the type from a string.
|
||||
|
||||
# Arguments
|
||||
dtype: A string representation of a type.
|
||||
|
||||
# Returns:
|
||||
The type requested.
|
||||
|
||||
# Raises
|
||||
ValueError if `dtype` is not supported
|
||||
"""
|
||||
if dtype == 'float16':
|
||||
return tf.float16
|
||||
if dtype == 'float32':
|
||||
@@ -190,6 +210,15 @@ def _convert_string_dtype(dtype):
|
||||
|
||||
|
||||
def _to_tensor(x, dtype):
|
||||
"""Convert the input `x` to a tensor of type `dtype`.
|
||||
|
||||
# Arguments
|
||||
x: An object to be converted (numpy array, list, tensors).
|
||||
dtype: The destination type.
|
||||
|
||||
# Returns
|
||||
A tensor.
|
||||
"""
|
||||
x = tf.convert_to_tensor(x)
|
||||
if x.dtype != dtype:
|
||||
x = tf.cast(x, dtype)
|
||||
@@ -309,6 +338,17 @@ def _initialize_variables():
|
||||
|
||||
|
||||
def constant(value, dtype=None, shape=None, name=None):
|
||||
"""Creates a constant tensor.
|
||||
|
||||
# Arguments
|
||||
value: A constant value (or list)
|
||||
dtype: The type of the elements of the resulting tensor.
|
||||
shape: Optional dimensions of resulting tensor.
|
||||
name: Optional name for the tensor.
|
||||
|
||||
# Returns
|
||||
A Constant Tensor.
|
||||
"""
|
||||
if dtype is None:
|
||||
dtype = floatx()
|
||||
return tf.constant(value, dtype=dtype, shape=shape, name=name)
|
||||
@@ -626,6 +666,18 @@ def ones_like(x, dtype=None, name=None):
|
||||
return tf.ones_like(x, dtype=dtype, name=name)
|
||||
|
||||
|
||||
def identity(x):
|
||||
"""Returns a tensor with the same content as the input tensor.
|
||||
|
||||
# Arguments
|
||||
x: The input tensor.
|
||||
|
||||
# Returns
|
||||
A tensor of the same shape, type and content.
|
||||
"""
|
||||
return tf.identity(x)
|
||||
|
||||
|
||||
def random_uniform_variable(shape, low, high, dtype=None,
|
||||
name=None, seed=None):
|
||||
"""Instantiates a variable with values drawn from a uniform distribution.
|
||||
@@ -761,18 +813,54 @@ def cast(x, dtype):
|
||||
|
||||
|
||||
def update(x, new_x):
|
||||
"""Update the value of `x` to `new_x`.
|
||||
|
||||
# Arguments
|
||||
x: A Variable.
|
||||
new_x: A tensor of same shape as `x`.
|
||||
|
||||
# Returns
|
||||
The variable `x` updated.
|
||||
"""
|
||||
return tf.assign(x, new_x)
|
||||
|
||||
|
||||
def update_add(x, increment):
|
||||
"""Update the value of `x` by adding `increment`.
|
||||
|
||||
# Arguments
|
||||
x: A Variable.
|
||||
increment: A tensor of same shape as `x`.
|
||||
|
||||
# Returns
|
||||
The variable `x` updated.
|
||||
"""
|
||||
return tf.assign_add(x, increment)
|
||||
|
||||
|
||||
def update_sub(x, decrement):
|
||||
"""Update the value of `x` by subtracting `decrement`.
|
||||
|
||||
# Arguments
|
||||
x: A Variable.
|
||||
decrement: A tensor of same shape as `x`.
|
||||
|
||||
# Returns
|
||||
The variable `x` updated.
|
||||
"""
|
||||
return tf.assign_sub(x, decrement)
|
||||
|
||||
|
||||
def moving_average_update(x, value, momentum):
|
||||
"""Compute the moving average of a variable.
|
||||
|
||||
# Arguments
|
||||
x: A Variable.
|
||||
value: A tensor with the same shape as `variable`.
|
||||
momentum: The moving average momentum.
|
||||
|
||||
# Returns
|
||||
An Operation to update the variable."""
|
||||
return moving_averages.assign_moving_average(
|
||||
x, value, momentum, zero_debias=False)
|
||||
|
||||
@@ -902,6 +990,16 @@ def batch_dot(x, y, axes=None):
|
||||
"""
|
||||
if isinstance(axes, int):
|
||||
axes = (axes, axes)
|
||||
x_ndim = ndim(x)
|
||||
y_ndim = ndim(y)
|
||||
if x_ndim > y_ndim:
|
||||
diff = x_ndim - y_ndim
|
||||
y = tf.reshape(y, tf.concat([tf.shape(y), [1] * (diff)], axis=0))
|
||||
elif y_ndim > x_ndim:
|
||||
diff = y_ndim - x_ndim
|
||||
x = tf.reshape(x, tf.concat([tf.shape(x), [1] * (diff)], axis=0))
|
||||
else:
|
||||
diff = 0
|
||||
if ndim(x) == 2 and ndim(y) == 2:
|
||||
if axes[0] == axes[1]:
|
||||
out = tf.reduce_sum(tf.multiply(x, y), axes[0])
|
||||
@@ -915,6 +1013,12 @@ def batch_dot(x, y, axes=None):
|
||||
adj_x = None
|
||||
adj_y = None
|
||||
out = tf.matmul(x, y, adjoint_a=adj_x, adjoint_b=adj_y)
|
||||
if diff:
|
||||
if x_ndim > y_ndim:
|
||||
idx = x_ndim + y_ndim - 3
|
||||
else:
|
||||
idx = x_ndim - 1
|
||||
out = tf.squeeze(out, list(range(idx, idx + diff)))
|
||||
if ndim(out) == 1:
|
||||
out = expand_dims(out, 1)
|
||||
return out
|
||||
@@ -1276,6 +1380,28 @@ def log(x):
|
||||
return tf.log(x)
|
||||
|
||||
|
||||
def logsumexp(x, axis=None, keepdims=False):
|
||||
"""Computes log(sum(exp(elements across dimensions of a tensor))).
|
||||
|
||||
This function is more numerically stable than log(sum(exp(x))).
|
||||
It avoids overflows caused by taking the exp of large inputs and
|
||||
underflows caused by taking the log of small inputs.
|
||||
|
||||
# Arguments
|
||||
x: A tensor or variable.
|
||||
axis: An integer, the axis to reduce over.
|
||||
keepdims: A boolean, whether to keep the dimensions or not.
|
||||
If `keepdims` is `False`, the rank of the tensor is reduced
|
||||
by 1. If `keepdims` is `True`, the reduced dimension is
|
||||
retained with length 1.
|
||||
|
||||
# Returns
|
||||
The reduced tensor.
|
||||
"""
|
||||
axis = _normalize_axis(axis, ndim(x))
|
||||
return tf.reduce_logsumexp(x, reduction_indices=axis, keep_dims=keepdims)
|
||||
|
||||
|
||||
def round(x):
|
||||
"""Element-wise rounding to the closest integer.
|
||||
|
||||
@@ -2745,6 +2871,16 @@ def in_top_k(predictions, targets, k):
|
||||
# CONVOLUTIONS
|
||||
|
||||
def _preprocess_deconv_output_shape(x, shape, data_format):
|
||||
"""Get the output_shape for the deconvolution.
|
||||
|
||||
# Arguments
|
||||
x: input tensor.
|
||||
shape: output shape.
|
||||
data_format: string, one of 'channels_last', 'channels_first'.
|
||||
|
||||
# Returns
|
||||
The output shape.
|
||||
"""
|
||||
if data_format == 'channels_first':
|
||||
shape = (shape[0], shape[2], shape[3], shape[1])
|
||||
|
||||
@@ -2755,6 +2891,15 @@ def _preprocess_deconv_output_shape(x, shape, data_format):
|
||||
|
||||
|
||||
def _preprocess_conv2d_input(x, data_format):
|
||||
"""Transpose and cast the input before the conv2d.
|
||||
|
||||
# Arguments
|
||||
x: input tensor.
|
||||
data_format: string, one of 'channels_last', 'channels_first'.
|
||||
|
||||
# Returns
|
||||
A tensor.
|
||||
"""
|
||||
if dtype(x) == 'float64':
|
||||
x = tf.cast(x, 'float32')
|
||||
if data_format == 'channels_first':
|
||||
@@ -2767,6 +2912,15 @@ def _preprocess_conv2d_input(x, data_format):
|
||||
|
||||
|
||||
def _preprocess_conv3d_input(x, data_format):
|
||||
"""Transpose and cast the input before the conv3d.
|
||||
|
||||
# Arguments
|
||||
x: input tensor.
|
||||
data_format: string, one of 'channels_last', 'channels_first'.
|
||||
|
||||
# Returns
|
||||
A tensor.
|
||||
"""
|
||||
if dtype(x) == 'float64':
|
||||
x = tf.cast(x, 'float32')
|
||||
if data_format == 'channels_first':
|
||||
@@ -2775,6 +2929,15 @@ def _preprocess_conv3d_input(x, data_format):
|
||||
|
||||
|
||||
def _preprocess_conv2d_kernel(kernel, data_format):
|
||||
"""Transpose and cast the kernel before the conv2d.
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
data_format: string, one of 'channels_last', 'channels_first'.
|
||||
|
||||
# Returns
|
||||
A tensor.
|
||||
"""
|
||||
if dtype(kernel) == 'float64':
|
||||
kernel = tf.cast(kernel, 'float32')
|
||||
if data_format == 'channels_first':
|
||||
@@ -2783,6 +2946,15 @@ def _preprocess_conv2d_kernel(kernel, data_format):
|
||||
|
||||
|
||||
def _preprocess_conv3d_kernel(kernel, data_format):
|
||||
"""Transpose and cast the kernel before the conv3d.
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
data_format: string, one of 'channels_last', 'channels_first'.
|
||||
|
||||
# Returns
|
||||
A tensor.
|
||||
"""
|
||||
if dtype(kernel) == 'float64':
|
||||
kernel = tf.cast(kernel, 'float32')
|
||||
if data_format == 'channels_first':
|
||||
@@ -2791,16 +2963,37 @@ def _preprocess_conv3d_kernel(kernel, data_format):
|
||||
|
||||
|
||||
def _preprocess_padding(padding):
|
||||
"""Convert keras' padding to tensorflow's padding.
|
||||
|
||||
# Arguments
|
||||
padding: string, one of 'same' , 'valid'
|
||||
|
||||
# Returns
|
||||
a string, one of 'SAME', 'VALID'.
|
||||
|
||||
# Raises
|
||||
ValueError if invalid `padding'`
|
||||
"""
|
||||
if padding == 'same':
|
||||
padding = 'SAME'
|
||||
elif padding == 'valid':
|
||||
padding = 'VALID'
|
||||
else:
|
||||
raise ValueError('Invalid border mode:', padding)
|
||||
raise ValueError('Invalid padding:', padding)
|
||||
return padding
|
||||
|
||||
|
||||
def _postprocess_conv2d_output(x, data_format):
|
||||
"""Transpose and cast the output from conv2d if needed.
|
||||
|
||||
# Arguments
|
||||
x: A tensor.
|
||||
data_format: string, one of "channels_last", "channels_first".
|
||||
|
||||
# Returns
|
||||
A tensor.
|
||||
"""
|
||||
|
||||
if data_format == 'channels_first':
|
||||
x = tf.transpose(x, (0, 3, 1, 2))
|
||||
|
||||
@@ -2810,6 +3003,15 @@ def _postprocess_conv2d_output(x, data_format):
|
||||
|
||||
|
||||
def _postprocess_conv3d_output(x, data_format):
|
||||
"""Transpose and cast the output from conv3d if needed.
|
||||
|
||||
# Arguments
|
||||
x: A tensor.
|
||||
data_format: string, one of "channels_last", "channels_first".
|
||||
|
||||
# Returns
|
||||
A tensor.
|
||||
"""
|
||||
if data_format == 'channels_first':
|
||||
x = tf.transpose(x, (0, 4, 1, 2, 3))
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from collections import defaultdict
|
||||
from contextlib import contextmanager
|
||||
import theano
|
||||
from theano import ifelse
|
||||
from theano import tensor as T
|
||||
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
|
||||
from theano.tensor.signal import pool
|
||||
@@ -258,6 +259,18 @@ def zeros_like(x, dtype=None, name=None):
|
||||
return T.zeros_like(x, dtype=dtype)
|
||||
|
||||
|
||||
def identity(x):
|
||||
"""Returns a tensor with the same content as the input tensor.
|
||||
|
||||
# Arguments
|
||||
x: The input tensor.
|
||||
|
||||
# Returns
|
||||
A tensor of the same shape, type and content.
|
||||
"""
|
||||
return x.copy()
|
||||
|
||||
|
||||
def random_uniform_variable(shape, low, high, dtype=None, name=None):
|
||||
return variable(np.random.uniform(low=low, high=high, size=shape),
|
||||
dtype=dtype, name=name)
|
||||
@@ -515,6 +528,29 @@ def log(x):
|
||||
return T.log(x)
|
||||
|
||||
|
||||
def logsumexp(x, axis=None, keepdims=False):
|
||||
"""Computes log(sum(exp(elements across dimensions of a tensor))).
|
||||
|
||||
This function is more numerically stable than log(sum(exp(x))).
|
||||
It avoids overflows caused by taking the exp of large inputs and
|
||||
underflows caused by taking the log of small inputs.
|
||||
|
||||
# Arguments
|
||||
x: A tensor or variable.
|
||||
axis: An integer, the axis to reduce over.
|
||||
keepdims: A boolean, whether to keep the dimensions or not.
|
||||
If `keepdims` is `False`, the rank of the tensor is reduced
|
||||
by 1. If `keepdims` is `True`, the reduced dimension is
|
||||
retained with length 1.
|
||||
|
||||
# Returns
|
||||
The reduced tensor.
|
||||
"""
|
||||
# Theano has a built-in optimization for logsumexp (see https://github.com/Theano/Theano/pull/4736)
|
||||
# so we can just write the expression directly:
|
||||
return T.log(T.sum(T.exp(x), axis=axis, keepdims=keepdims))
|
||||
|
||||
|
||||
def round(x):
|
||||
return T.round(x, mode='half_to_even')
|
||||
|
||||
@@ -1898,10 +1934,14 @@ def pool2d(x, pool_size, strides=(1, 1), padding='valid',
|
||||
pad=pad,
|
||||
mode='max')
|
||||
elif pool_mode == 'avg':
|
||||
if padding == 'same':
|
||||
th_avg_pool_mode = 'average_inc_pad'
|
||||
elif padding == 'valid':
|
||||
th_avg_pool_mode = 'average_exc_pad'
|
||||
pool_out = pool.pool_2d(x, ws=pool_size, stride=strides,
|
||||
ignore_border=True,
|
||||
pad=pad,
|
||||
mode='average_exc_pad')
|
||||
mode=th_avg_pool_mode)
|
||||
else:
|
||||
raise ValueError('Invalid pooling mode:', pool_mode)
|
||||
if padding == 'same':
|
||||
|
||||
+13
-11
@@ -3,6 +3,7 @@ from __future__ import print_function
|
||||
|
||||
import os
|
||||
import csv
|
||||
import six
|
||||
|
||||
import numpy as np
|
||||
import time
|
||||
@@ -503,8 +504,7 @@ class RemoteMonitor(Callback):
|
||||
field: String; JSON field under which the data will be stored.
|
||||
headers: Dictionary; optional custom HTTP headers.
|
||||
Defaults to:
|
||||
`{'Accept': 'application/json',
|
||||
'Content-Type': 'application/json'}`
|
||||
`{'Accept': 'application/json', 'Content-Type': 'application/json'}`
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@@ -578,7 +578,7 @@ class TensorBoard(Callback):
|
||||
tensorboard --logdir=/full_path_to_your_logs
|
||||
```
|
||||
You can find more information about TensorBoard
|
||||
[here](https://www.tensorflow.org/versions/master/how_tos/summaries_and_tensorboard/index.html).
|
||||
[here](https://www.tensorflow.org/get_started/summaries_and_tensorboard).
|
||||
|
||||
# Arguments
|
||||
log_dir: the path of the directory where to save the log
|
||||
@@ -735,9 +735,9 @@ class ReduceLROnPlateau(Callback):
|
||||
|
||||
# Example
|
||||
```python
|
||||
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
|
||||
patience=5, min_lr=0.001)
|
||||
model.fit(X_train, Y_train, callbacks=[reduce_lr])
|
||||
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
|
||||
patience=5, min_lr=0.001)
|
||||
model.fit(X_train, Y_train, callbacks=[reduce_lr])
|
||||
```
|
||||
|
||||
# Arguments
|
||||
@@ -844,8 +844,8 @@ class CSVLogger(Callback):
|
||||
|
||||
# Example
|
||||
```python
|
||||
csv_logger = CSVLogger('training.log')
|
||||
model.fit(X_train, Y_train, callbacks=[csv_logger])
|
||||
csv_logger = CSVLogger('training.log')
|
||||
model.fit(X_train, Y_train, callbacks=[csv_logger])
|
||||
```
|
||||
|
||||
# Arguments
|
||||
@@ -862,16 +862,17 @@ class CSVLogger(Callback):
|
||||
self.writer = None
|
||||
self.keys = None
|
||||
self.append_header = True
|
||||
self.file_flags = 'b' if six.PY2 and os.name == 'nt' else ''
|
||||
super(CSVLogger, self).__init__()
|
||||
|
||||
def on_train_begin(self, logs=None):
|
||||
if self.append:
|
||||
if os.path.exists(self.filename):
|
||||
with open(self.filename) as f:
|
||||
with open(self.filename, 'r' + self.file_flags) as f:
|
||||
self.append_header = not bool(len(f.readline()))
|
||||
self.csv_file = open(self.filename, 'a')
|
||||
self.csv_file = open(self.filename, 'a' + self.file_flags)
|
||||
else:
|
||||
self.csv_file = open(self.filename, 'w')
|
||||
self.csv_file = open(self.filename, 'w' + self.file_flags)
|
||||
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
logs = logs or {}
|
||||
@@ -910,6 +911,7 @@ class LambdaCallback(Callback):
|
||||
This callback is constructed with anonymous functions that will be called
|
||||
at the appropriate time. Note that the callbacks expects positional
|
||||
arguments, as:
|
||||
|
||||
- `on_epoch_begin` and `on_epoch_end` expect two positional arguments:
|
||||
`epoch`, `logs`
|
||||
- `on_batch_begin` and `on_batch_end` expect two positional arguments:
|
||||
|
||||
@@ -100,7 +100,7 @@ def load_data(path='imdb.npz', num_words=None, skip_top=0,
|
||||
for x in xs:
|
||||
nx = []
|
||||
for w in x:
|
||||
if w >= num_words or w < skip_top:
|
||||
if skip_top <= w < num_words:
|
||||
nx.append(w)
|
||||
new_xs.append(nx)
|
||||
xs = new_xs
|
||||
|
||||
@@ -84,7 +84,7 @@ def load_data(path='reuters.npz', num_words=None, skip_top=0,
|
||||
for x in xs:
|
||||
nx = []
|
||||
for w in x:
|
||||
if w >= num_words or w < skip_top:
|
||||
if skip_top <= w < num_words:
|
||||
nx.append(w)
|
||||
new_xs.append(nx)
|
||||
xs = new_xs
|
||||
|
||||
+97
-40
@@ -360,28 +360,35 @@ class Layer(object):
|
||||
def non_trainable_weights(self, weights):
|
||||
self._non_trainable_weights = weights
|
||||
|
||||
def add_weight(self, shape, initializer,
|
||||
name=None,
|
||||
trainable=True,
|
||||
@interfaces.legacy_add_weight_support
|
||||
def add_weight(self,
|
||||
name,
|
||||
shape,
|
||||
dtype=None,
|
||||
initializer=None,
|
||||
regularizer=None,
|
||||
trainable=True,
|
||||
constraint=None):
|
||||
"""Adds a weight variable to the layer.
|
||||
|
||||
# Arguments
|
||||
shape: The shape tuple of the weight.
|
||||
initializer: An Initializer instance (callable).
|
||||
name: String, the name for the weight variable.
|
||||
shape: The shape tuple of the weight.
|
||||
dtype: The dtype of the weight.
|
||||
initializer: An Initializer instance (callable).
|
||||
regularizer: An optional Regularizer instance.
|
||||
trainable: A boolean, whether the weight should
|
||||
be trained via backprop or not (assuming
|
||||
that the layer itself is also trainable).
|
||||
regularizer: An optional Regularizer instance.
|
||||
constraint: An optional Constraint instance.
|
||||
|
||||
# Returns
|
||||
The created weight variable.
|
||||
"""
|
||||
initializer = initializers.get(initializer)
|
||||
weight = K.variable(initializer(shape), dtype=K.floatx(), name=name)
|
||||
if dtype is None:
|
||||
dtype = K.floatx()
|
||||
weight = K.variable(initializer(shape), dtype=dtype, name=name)
|
||||
if regularizer is not None:
|
||||
self.add_loss(regularizer(weight))
|
||||
if constraint is not None:
|
||||
@@ -578,6 +585,20 @@ class Layer(object):
|
||||
output = self.call(inputs, **kwargs)
|
||||
output_mask = self.compute_mask(inputs, previous_mask)
|
||||
|
||||
# If the layer returns tensors from its inputs, unmodified,
|
||||
# we copy them to avoid loss of tensor metadata.
|
||||
output_ls = _to_list(output)
|
||||
inputs_ls = _to_list(inputs)
|
||||
output_ls_copy = []
|
||||
for x in output_ls:
|
||||
if x in inputs_ls:
|
||||
x = K.identity(x)
|
||||
output_ls_copy.append(x)
|
||||
if len(output_ls_copy) == 1:
|
||||
output = output_ls_copy[0]
|
||||
else:
|
||||
output = output_ls_copy
|
||||
|
||||
# Infering the output shape is only relevant for Theano.
|
||||
if all([s is not None for s in _to_list(input_shape)]):
|
||||
output_shape = self.compute_output_shape(input_shape)
|
||||
@@ -1249,6 +1270,7 @@ class InputLayer(Layer):
|
||||
name: Name of the layer (string).
|
||||
"""
|
||||
|
||||
@interfaces.legacy_input_support
|
||||
def __init__(self, input_shape=None, batch_size=None,
|
||||
batch_input_shape=None,
|
||||
dtype=None, input_tensor=None, sparse=False, name=None):
|
||||
@@ -1586,56 +1608,53 @@ class Container(Layer):
|
||||
nodes_depths = {} # dict {node: depth value}
|
||||
layers_depths = {} # dict {layer: depth value}
|
||||
layer_indices = {} # dict {layer: index in traversal}
|
||||
nodes_in_decreasing_depth = []
|
||||
|
||||
def make_node_marker(node, depth):
|
||||
return str(id(node)) + '-' + str(depth)
|
||||
|
||||
def build_map_of_graph(tensor, seen_nodes=None, depth=0,
|
||||
def build_map_of_graph(tensor, finished_nodes, nodes_in_progress,
|
||||
layer=None, node_index=None, tensor_index=None):
|
||||
"""Builds a map of the graph of layers.
|
||||
|
||||
This recursively updates the maps `nodes_depths`,
|
||||
`layers_depths` and the set `container_nodes`.
|
||||
|
||||
Does not try to detect cycles in the graph.
|
||||
This recursively updates the map `layer_indices`,
|
||||
the list `nodes_in_decreasing_depth` and the set `container_nodes`.
|
||||
|
||||
# Arguments
|
||||
tensor: Some tensor in a graph.
|
||||
seen_nodes: Set of node ids ("{layer.name}_ib-{node_index}")
|
||||
of nodes seen so far. Useful to prevent infinite loops.
|
||||
depth: Current depth in the graph (0 = last output).
|
||||
finished_nodes: Set of nodes whose subgraphs have been traversed
|
||||
completely. Useful to prevent duplicated work.
|
||||
nodes_in_progress: Set of nodes that are currently active on the
|
||||
recursion stack. Useful to detect cycles.
|
||||
layer: Layer from which `tensor` comes from. If not provided,
|
||||
will be obtained from `tensor._keras_history`.
|
||||
node_index: Node index from which `tensor` comes from.
|
||||
tensor_index: Tensor_index from which `tensor` comes from.
|
||||
|
||||
# Raises
|
||||
RuntimeError: if a cycle is detected.
|
||||
"""
|
||||
seen_nodes = seen_nodes or set()
|
||||
if not layer or node_index is None or tensor_index is None:
|
||||
layer, node_index, tensor_index = tensor._keras_history
|
||||
node = layer.inbound_nodes[node_index]
|
||||
|
||||
# Prevent cycles.
|
||||
seen_nodes.add(make_node_marker(node, depth))
|
||||
if node in nodes_in_progress:
|
||||
raise RuntimeError(
|
||||
'The tensor ' + str(tensor) + ' at layer "' +
|
||||
layer.name + '" is part of a cycle.')
|
||||
|
||||
# Don't repeat work for shared subgraphs
|
||||
if node in finished_nodes:
|
||||
return
|
||||
|
||||
node_key = layer.name + '_ib-' + str(node_index)
|
||||
# Update container_nodes.
|
||||
container_nodes.add(node_key)
|
||||
# Update nodes_depths.
|
||||
node_depth = nodes_depths.get(node)
|
||||
if node_depth is None:
|
||||
nodes_depths[node] = depth
|
||||
else:
|
||||
nodes_depths[node] = max(depth, node_depth)
|
||||
# Update layers_depths.
|
||||
previously_seen_depth = layers_depths.get(layer)
|
||||
if previously_seen_depth is None:
|
||||
current_depth = depth
|
||||
else:
|
||||
current_depth = max(depth, previously_seen_depth)
|
||||
layers_depths[layer] = current_depth
|
||||
|
||||
# Store the traversal order for layer sorting.
|
||||
if layer not in layer_indices:
|
||||
layer_indices[layer] = len(layer_indices)
|
||||
|
||||
nodes_in_progress.add(node)
|
||||
|
||||
# Propagate to all previous tensors connected to this node.
|
||||
for i in range(len(node.inbound_layers)):
|
||||
x = node.input_tensors[i]
|
||||
@@ -1643,15 +1662,34 @@ class Container(Layer):
|
||||
node_index = node.node_indices[i]
|
||||
tensor_index = node.tensor_indices[i]
|
||||
next_node = layer.inbound_nodes[node_index]
|
||||
# use node_marker to prevent cycles
|
||||
node_marker = make_node_marker(next_node, current_depth + 1)
|
||||
if node_marker not in seen_nodes:
|
||||
build_map_of_graph(x, seen_nodes, current_depth + 1,
|
||||
layer, node_index, tensor_index)
|
||||
build_map_of_graph(x, finished_nodes, nodes_in_progress,
|
||||
layer, node_index, tensor_index)
|
||||
|
||||
finished_nodes.add(node)
|
||||
nodes_in_progress.remove(node)
|
||||
|
||||
nodes_in_decreasing_depth.append(node)
|
||||
|
||||
finished_nodes = set()
|
||||
nodes_in_progress = set()
|
||||
for x in self.outputs:
|
||||
seen_nodes = set()
|
||||
build_map_of_graph(x, seen_nodes, depth=0)
|
||||
build_map_of_graph(x, finished_nodes, nodes_in_progress)
|
||||
|
||||
for node in reversed(nodes_in_decreasing_depth):
|
||||
# If the depth is not set, the node has no outbound nodes (depth 0).
|
||||
depth = nodes_depths.setdefault(node, 0)
|
||||
|
||||
# Update the depth of inbound nodes.
|
||||
for i in range(len(node.inbound_layers)):
|
||||
inbound_layer = node.inbound_layers[i]
|
||||
node_index = node.node_indices[i]
|
||||
inbound_node = inbound_layer.inbound_nodes[node_index]
|
||||
previous_depth = nodes_depths.get(inbound_node, 0)
|
||||
nodes_depths[inbound_node] = max(depth + 1, previous_depth)
|
||||
|
||||
# Update the depth of the corresponding layer
|
||||
previous_depth = layers_depths.get(node.outbound_layer, 0)
|
||||
layers_depths[node.outbound_layer] = max(depth, previous_depth)
|
||||
|
||||
# Build a dict {depth: list of nodes with this depth}
|
||||
nodes_by_depth = {}
|
||||
@@ -2747,6 +2785,25 @@ def preprocess_weights_for_loading(layer, weights,
|
||||
A list of weights values (Numpy arrays).
|
||||
"""
|
||||
if original_keras_version == '1':
|
||||
if layer.__class__.__name__ == 'Bidirectional':
|
||||
num_weights_per_layer = len(weights) // 2
|
||||
|
||||
forward_weights = preprocess_weights_for_loading(layer.forward_layer,
|
||||
weights[:num_weights_per_layer],
|
||||
original_keras_version,
|
||||
original_backend)
|
||||
backward_weights = preprocess_weights_for_loading(layer.backward_layer,
|
||||
weights[num_weights_per_layer:],
|
||||
original_keras_version,
|
||||
original_backend)
|
||||
weights = forward_weights + backward_weights
|
||||
|
||||
if layer.__class__.__name__ == 'TimeDistributed':
|
||||
weights = preprocess_weights_for_loading(layer.layer,
|
||||
weights,
|
||||
original_keras_version,
|
||||
original_backend)
|
||||
|
||||
if layer.__class__.__name__ == 'Conv1D':
|
||||
shape = weights[0].shape
|
||||
# Handle Keras 1.1 format
|
||||
|
||||
+52
-39
@@ -50,6 +50,8 @@ def _standardize_input_data(data, names, shapes=None,
|
||||
# Raises
|
||||
ValueError: in case of improperly formatted user-provided data.
|
||||
"""
|
||||
if not names:
|
||||
return []
|
||||
if data is None:
|
||||
return [None for _ in range(len(names))]
|
||||
if isinstance(data, dict):
|
||||
@@ -63,7 +65,8 @@ def _standardize_input_data(data, names, shapes=None,
|
||||
elif isinstance(data, list):
|
||||
if len(data) != len(names):
|
||||
if data and hasattr(data[0], 'shape'):
|
||||
raise ValueError('Error when checking ' + exception_prefix +
|
||||
raise ValueError('Error when checking model ' +
|
||||
exception_prefix +
|
||||
': the list of Numpy arrays '
|
||||
'that you are passing to your model '
|
||||
'is not the size the model expected. '
|
||||
@@ -77,7 +80,8 @@ def _standardize_input_data(data, names, shapes=None,
|
||||
data = [np.asarray(data)]
|
||||
else:
|
||||
raise ValueError(
|
||||
'Error when checking ' + exception_prefix +
|
||||
'Error when checking model ' +
|
||||
exception_prefix +
|
||||
': you are passing a list as '
|
||||
'input to your model, '
|
||||
'but the model expects '
|
||||
@@ -88,15 +92,17 @@ def _standardize_input_data(data, names, shapes=None,
|
||||
arrays = data
|
||||
else:
|
||||
if not hasattr(data, 'shape'):
|
||||
raise TypeError('Error when checking ' + exception_prefix +
|
||||
raise TypeError('Error when checking model ' +
|
||||
exception_prefix +
|
||||
': data should be a Numpy array, '
|
||||
'or list/dict of Numpy arrays. '
|
||||
'Found: ' + str(data)[:200] + '...')
|
||||
if len(names) != 1:
|
||||
if len(names) > 1:
|
||||
# Case: model expects multiple inputs but only received
|
||||
# a single Numpy array.
|
||||
raise ValueError('The model expects ' + str(len(names)) +
|
||||
' input arrays, but only received one array. '
|
||||
exception_prefix +
|
||||
' arrays, but only received one array. '
|
||||
'Found: array with shape ' + str(data.shape))
|
||||
arrays = [data]
|
||||
|
||||
@@ -679,6 +685,8 @@ class Model(Container):
|
||||
See [losses](/losses).
|
||||
If the model has multiple outputs, you can use a different loss
|
||||
on each output by passing a dictionary or a list of losses.
|
||||
The loss value that will be minimized by the model
|
||||
will then be the sum of all individual losses.
|
||||
metrics: list of metrics to be evaluated by the model
|
||||
during training and testing.
|
||||
Typically you will use `metrics=['accuracy']`.
|
||||
@@ -688,6 +696,9 @@ class Model(Container):
|
||||
loss_weights: Optional list or dictionary specifying scalar
|
||||
coefficients (Python floats) to weight the loss contributions
|
||||
of different model outputs.
|
||||
The loss value that will be minimized by the model
|
||||
will then be the *weighted sum* of all individual losses,
|
||||
weighted by the `loss_weights` coefficients.
|
||||
If a list, it is expected to have a 1:1 mapping
|
||||
to the model's outputs. If a tensor, it is expected to map
|
||||
output names (strings) to scalar coefficients.
|
||||
@@ -1126,7 +1137,7 @@ class Model(Container):
|
||||
batch_ids = index_array[batch_start:batch_end]
|
||||
try:
|
||||
if isinstance(ins[-1], float):
|
||||
# do not slice the training phase flag
|
||||
# Do not slice the training phase flag.
|
||||
ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]]
|
||||
else:
|
||||
ins_batch = _slice_arrays(ins, batch_ids)
|
||||
@@ -1146,16 +1157,14 @@ class Model(Container):
|
||||
|
||||
callbacks.on_batch_end(batch_index, batch_logs)
|
||||
|
||||
if batch_index == len(batches) - 1: # last batch
|
||||
# validation
|
||||
if batch_index == len(batches) - 1: # Last batch.
|
||||
if do_validation:
|
||||
# replace with self._evaluate
|
||||
val_outs = self._test_loop(val_f, val_ins,
|
||||
batch_size=batch_size,
|
||||
verbose=0)
|
||||
if not isinstance(val_outs, list):
|
||||
val_outs = [val_outs]
|
||||
# same labels assumed
|
||||
# Same labels assumed.
|
||||
for l, o in zip(out_labels, val_outs):
|
||||
epoch_logs['val_' + l] = o
|
||||
callbacks.on_epoch_end(epoch, epoch_logs)
|
||||
@@ -1195,7 +1204,7 @@ class Model(Container):
|
||||
for batch_index, (batch_start, batch_end) in enumerate(batches):
|
||||
batch_ids = index_array[batch_start:batch_end]
|
||||
if ins and isinstance(ins[-1], float):
|
||||
# do not slice the training phase flag
|
||||
# Do not slice the training phase flag.
|
||||
ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]]
|
||||
else:
|
||||
ins_batch = _slice_arrays(ins, batch_ids)
|
||||
@@ -1249,7 +1258,7 @@ class Model(Container):
|
||||
for batch_index, (batch_start, batch_end) in enumerate(batches):
|
||||
batch_ids = index_array[batch_start:batch_end]
|
||||
if isinstance(ins[-1], float):
|
||||
# do not slice the training phase flag
|
||||
# Do not slice the training phase flag.
|
||||
ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]]
|
||||
else:
|
||||
ins_batch = _slice_arrays(ins, batch_ids)
|
||||
@@ -1293,11 +1302,11 @@ class Model(Container):
|
||||
x = _standardize_input_data(x, self._feed_input_names,
|
||||
self._feed_input_shapes,
|
||||
check_batch_axis=False,
|
||||
exception_prefix='model input')
|
||||
exception_prefix='input')
|
||||
y = _standardize_input_data(y, self._feed_output_names,
|
||||
output_shapes,
|
||||
check_batch_axis=False,
|
||||
exception_prefix='model target')
|
||||
exception_prefix='target')
|
||||
sample_weights = _standardize_sample_weights(sample_weight,
|
||||
self._feed_output_names)
|
||||
class_weights = _standardize_class_weights(class_weight,
|
||||
@@ -1318,6 +1327,20 @@ class Model(Container):
|
||||
str(x[0].shape[0]) + ' samples')
|
||||
return x, y, sample_weights
|
||||
|
||||
def _get_deduped_metrics_names(self):
|
||||
out_labels = self.metrics_names
|
||||
|
||||
# Rename duplicated metrics name
|
||||
# (can happen with an output layer shared among multiple dataflows).
|
||||
deduped_out_labels = []
|
||||
for i, label in enumerate(out_labels):
|
||||
new_label = label
|
||||
if out_labels.count(label) > 1:
|
||||
dup_idx = out_labels[:i].count(label)
|
||||
new_label += '_' + str(dup_idx + 1)
|
||||
deduped_out_labels.append(new_label)
|
||||
return deduped_out_labels
|
||||
|
||||
def fit(self, x=None,
|
||||
y=None,
|
||||
batch_size=32,
|
||||
@@ -1347,7 +1370,7 @@ class Model(Container):
|
||||
batch_size: integer. Number of samples per gradient update.
|
||||
epochs: integer, the number of times to iterate
|
||||
over the training data arrays.
|
||||
verbose: 0, 1, or 2. Verbosity mode.
|
||||
verbose: 0, 1, or 2. Verbosity mode.
|
||||
0 = silent, 1 = verbose, 2 = one log line per epoch.
|
||||
callbacks: list of callbacks to be called during training.
|
||||
See [callbacks](/callbacks).
|
||||
@@ -1397,14 +1420,14 @@ class Model(Container):
|
||||
if kwargs:
|
||||
raise TypeError('Unrecognized keyword arguments: ' + str(kwargs))
|
||||
|
||||
# validate user data
|
||||
# Validate user data.
|
||||
x, y, sample_weights = self._standardize_user_data(
|
||||
x, y,
|
||||
sample_weight=sample_weight,
|
||||
class_weight=class_weight,
|
||||
check_batch_axis=False,
|
||||
batch_size=batch_size)
|
||||
# prepare validation data
|
||||
# Prepare validation data.
|
||||
if validation_data:
|
||||
do_validation = True
|
||||
if len(validation_data) == 2:
|
||||
@@ -1450,7 +1473,7 @@ class Model(Container):
|
||||
val_f = None
|
||||
val_ins = None
|
||||
|
||||
# prepare input arrays and training function
|
||||
# Prepare input arrays and training function.
|
||||
if self.uses_learning_phase and not isinstance(K.learning_phase(), int):
|
||||
ins = x + y + sample_weights + [1.]
|
||||
else:
|
||||
@@ -1458,26 +1481,15 @@ class Model(Container):
|
||||
self._make_train_function()
|
||||
f = self.train_function
|
||||
|
||||
# prepare display labels
|
||||
out_labels = self.metrics_names
|
||||
|
||||
# rename duplicated metrics name
|
||||
# (can happen with an output layer shared among multiple dataflows)
|
||||
deduped_out_labels = []
|
||||
for i, label in enumerate(out_labels):
|
||||
new_label = label
|
||||
if out_labels.count(label) > 1:
|
||||
dup_idx = out_labels[:i].count(label)
|
||||
new_label += '_' + str(dup_idx + 1)
|
||||
deduped_out_labels.append(new_label)
|
||||
out_labels = deduped_out_labels
|
||||
# Prepare display labels.
|
||||
out_labels = self._get_deduped_metrics_names()
|
||||
|
||||
if do_validation:
|
||||
callback_metrics = copy.copy(out_labels) + ['val_' + n for n in out_labels]
|
||||
else:
|
||||
callback_metrics = copy.copy(out_labels)
|
||||
|
||||
# delegate logic to _fit_loop
|
||||
# Delegate logic to `_fit_loop`.
|
||||
return self._fit_loop(f, ins, out_labels=out_labels,
|
||||
batch_size=batch_size, epochs=epochs,
|
||||
verbose=verbose, callbacks=callbacks,
|
||||
@@ -1512,13 +1524,13 @@ class Model(Container):
|
||||
and/or metrics). The attribute `model.metrics_names` will give you
|
||||
the display labels for the scalar outputs.
|
||||
"""
|
||||
# validate user data
|
||||
# Validate user data.
|
||||
x, y, sample_weights = self._standardize_user_data(
|
||||
x, y,
|
||||
sample_weight=sample_weight,
|
||||
check_batch_axis=False,
|
||||
batch_size=batch_size)
|
||||
# prepare inputs, delegate logic to _test_loop
|
||||
# Prepare inputs, delegate logic to `_test_loop`.
|
||||
if self.uses_learning_phase and not isinstance(K.learning_phase(), int):
|
||||
ins = x + y + sample_weights + [0.]
|
||||
else:
|
||||
@@ -1549,7 +1561,7 @@ class Model(Container):
|
||||
or in case a stateful model receives a number of samples
|
||||
that is not a multiple of the batch size.
|
||||
"""
|
||||
# validate user data
|
||||
# Validate user data.
|
||||
x = _standardize_input_data(x, self._feed_input_names,
|
||||
self._feed_input_shapes,
|
||||
check_batch_axis=False)
|
||||
@@ -1562,7 +1574,7 @@ class Model(Container):
|
||||
str(x[0].shape[0]) + ' samples. '
|
||||
'Batch size: ' + str(batch_size) + '.')
|
||||
|
||||
# prepare inputs, delegate logic to _predict_loop
|
||||
# Prepare inputs, delegate logic to `_predict_loop`.
|
||||
if self.uses_learning_phase and not isinstance(K.learning_phase(), int):
|
||||
ins = x + [0.]
|
||||
else:
|
||||
@@ -1713,7 +1725,7 @@ class Model(Container):
|
||||
All arrays should contain the same number of samples.
|
||||
The generator is expected to loop over its data
|
||||
indefinitely. An epoch finishes when `steps_per_epoch`
|
||||
samples have been seen by the model.
|
||||
batches have been seen by the model.
|
||||
steps_per_epoch: Total number of steps (batches of samples)
|
||||
to yield from `generator` before declaring one epoch
|
||||
finished and starting the next epoch. It should typically
|
||||
@@ -1785,7 +1797,8 @@ class Model(Container):
|
||||
'you must specify a value for '
|
||||
'`validation_steps`.')
|
||||
|
||||
out_labels = self.metrics_names
|
||||
# Prepare display labels.
|
||||
out_labels = self._get_deduped_metrics_names()
|
||||
callback_metrics = out_labels + ['val_' + n for n in out_labels]
|
||||
|
||||
# prepare callbacks
|
||||
@@ -1931,7 +1944,7 @@ class Model(Container):
|
||||
The generator should return the same kind of data
|
||||
as accepted by `test_on_batch`.
|
||||
|
||||
Arguments:
|
||||
# Arguments
|
||||
generator: Generator yielding tuples (inputs, targets)
|
||||
or (inputs, targets, sample_weights)
|
||||
steps: Total number of steps (batches of samples)
|
||||
|
||||
@@ -21,6 +21,14 @@ from ..legacy.layers import *
|
||||
|
||||
|
||||
def serialize(layer):
|
||||
"""Serialize a layer.
|
||||
|
||||
# Arguments
|
||||
layer: a Layer object.
|
||||
|
||||
# Returns
|
||||
dictionary with config.
|
||||
"""
|
||||
return {'class_name': layer.__class__.__name__,
|
||||
'config': layer.get_config()}
|
||||
|
||||
|
||||
@@ -104,7 +104,7 @@ class PReLU(Layer):
|
||||
for i in self.shared_axes:
|
||||
param_shape[i - 1] = 1
|
||||
self.param_broadcast[i - 1] = True
|
||||
self.alpha = self.add_weight(param_shape,
|
||||
self.alpha = self.add_weight(shape=param_shape,
|
||||
name='alpha',
|
||||
initializer=self.alpha_initializer,
|
||||
regularizer=self.alpha_regularizer,
|
||||
|
||||
@@ -127,13 +127,13 @@ class _Conv(Layer):
|
||||
input_dim = input_shape[channel_axis]
|
||||
kernel_shape = self.kernel_size + (input_dim, self.filters)
|
||||
|
||||
self.kernel = self.add_weight(kernel_shape,
|
||||
self.kernel = self.add_weight(shape=kernel_shape,
|
||||
initializer=self.kernel_initializer,
|
||||
name='kernel',
|
||||
regularizer=self.kernel_regularizer,
|
||||
constraint=self.kernel_constraint)
|
||||
if self.use_bias:
|
||||
self.bias = self.add_weight((self.filters,),
|
||||
self.bias = self.add_weight(shape=(self.filters,),
|
||||
initializer=self.bias_initializer,
|
||||
name='bias',
|
||||
regularizer=self.bias_regularizer,
|
||||
@@ -721,13 +721,13 @@ class Conv2DTranspose(Conv2D):
|
||||
input_dim = input_shape[channel_axis]
|
||||
kernel_shape = self.kernel_size + (self.filters, input_dim)
|
||||
|
||||
self.kernel = self.add_weight(kernel_shape,
|
||||
self.kernel = self.add_weight(shape=kernel_shape,
|
||||
initializer=self.kernel_initializer,
|
||||
name='kernel',
|
||||
regularizer=self.kernel_regularizer,
|
||||
constraint=self.kernel_constraint)
|
||||
if self.use_bias:
|
||||
self.bias = self.add_weight((self.filters,),
|
||||
self.bias = self.add_weight(shape=(self.filters,),
|
||||
initializer=self.bias_initializer,
|
||||
name='bias',
|
||||
regularizer=self.bias_regularizer,
|
||||
@@ -952,20 +952,20 @@ class SeparableConv2D(Conv2D):
|
||||
self.filters)
|
||||
|
||||
self.depthwise_kernel = self.add_weight(
|
||||
depthwise_kernel_shape,
|
||||
shape=depthwise_kernel_shape,
|
||||
initializer=self.depthwise_initializer,
|
||||
name='depthwise_kernel',
|
||||
regularizer=self.depthwise_regularizer,
|
||||
constraint=self.depthwise_constraint)
|
||||
self.pointwise_kernel = self.add_weight(
|
||||
pointwise_kernel_shape,
|
||||
shape=pointwise_kernel_shape,
|
||||
initializer=self.pointwise_initializer,
|
||||
name='pointwise_kernel',
|
||||
regularizer=self.pointwise_regularizer,
|
||||
constraint=self.pointwise_constraint)
|
||||
|
||||
if self.use_bias:
|
||||
self.bias = self.add_weight((self.filters,),
|
||||
self.bias = self.add_weight(shape=(self.filters,),
|
||||
initializer=self.bias_initializer,
|
||||
name='bias',
|
||||
regularizer=self.bias_regularizer,
|
||||
@@ -1571,7 +1571,7 @@ class Cropping2D(Layer):
|
||||
model.add(Cropping2D(cropping=((2, 2), (4, 4)),
|
||||
input_shape=(28, 28, 3)))
|
||||
# now model.output_shape == (None, 24, 20, 3)
|
||||
model.add(Conv2D(64, (3, 3), padding='same))
|
||||
model.add(Conv2D(64, (3, 3), padding='same'))
|
||||
model.add(Cropping2D(cropping=((2, 2), (2, 2))))
|
||||
# now model.output_shape == (None, 20, 16. 64)
|
||||
```
|
||||
|
||||
@@ -351,19 +351,19 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
self.kernel_shape = kernel_shape
|
||||
recurrent_kernel_shape = self.kernel_size + (self.filters, self.filters * 4)
|
||||
|
||||
self.kernel = self.add_weight(kernel_shape,
|
||||
self.kernel = self.add_weight(shape=kernel_shape,
|
||||
initializer=self.kernel_initializer,
|
||||
name='kernel',
|
||||
regularizer=self.kernel_regularizer,
|
||||
constraint=self.kernel_constraint)
|
||||
self.recurrent_kernel = self.add_weight(
|
||||
recurrent_kernel_shape,
|
||||
shape=recurrent_kernel_shape,
|
||||
initializer=self.recurrent_initializer,
|
||||
name='recurrent_kernel',
|
||||
regularizer=self.recurrent_regularizer,
|
||||
constraint=self.recurrent_constraint)
|
||||
if self.use_bias:
|
||||
self.bias = self.add_weight((self.filters * 4,),
|
||||
self.bias = self.add_weight(shape=(self.filters * 4,),
|
||||
initializer=self.bias_initializer,
|
||||
name='bias',
|
||||
regularizer=self.bias_regularizer,
|
||||
@@ -396,7 +396,7 @@ class ConvLSTM2D(ConvRecurrent2D):
|
||||
self.bias_o = None
|
||||
self.built = True
|
||||
|
||||
def get_initial_states(self, inputs):
|
||||
def get_initial_state(self, inputs):
|
||||
# (samples, timesteps, rows, cols, filters)
|
||||
initial_state = K.zeros_like(inputs)
|
||||
# (samples, rows, cols, filters)
|
||||
|
||||
@@ -820,13 +820,13 @@ class Dense(Layer):
|
||||
assert len(input_shape) >= 2
|
||||
input_dim = input_shape[-1]
|
||||
|
||||
self.kernel = self.add_weight((input_dim, self.units),
|
||||
self.kernel = self.add_weight(shape=(input_dim, self.units),
|
||||
initializer=self.kernel_initializer,
|
||||
name='kernel',
|
||||
regularizer=self.kernel_regularizer,
|
||||
constraint=self.kernel_constraint)
|
||||
if self.use_bias:
|
||||
self.bias = self.add_weight((self.units,),
|
||||
self.bias = self.add_weight(shape=(self.units,),
|
||||
initializer=self.bias_initializer,
|
||||
name='bias',
|
||||
regularizer=self.bias_regularizer,
|
||||
|
||||
@@ -94,7 +94,7 @@ class Embedding(Layer):
|
||||
|
||||
def build(self, input_shape):
|
||||
self.embeddings = self.add_weight(
|
||||
(self.input_dim, self.output_dim),
|
||||
shape=(self.input_dim, self.output_dim),
|
||||
initializer=self.embeddings_initializer,
|
||||
name='embeddings',
|
||||
regularizer=self.embeddings_regularizer,
|
||||
|
||||
@@ -122,14 +122,14 @@ class LocallyConnected1D(Layer):
|
||||
self.kernel_size[0] * input_dim,
|
||||
self.filters)
|
||||
self.kernel = self.add_weight(
|
||||
self.kernel_shape,
|
||||
shape=self.kernel_shape,
|
||||
initializer=self.kernel_initializer,
|
||||
name='kernel',
|
||||
regularizer=self.kernel_regularizer,
|
||||
constraint=self.kernel_constraint)
|
||||
if self.use_bias:
|
||||
self.bias = self.add_weight(
|
||||
(output_length, self.filters),
|
||||
shape=(output_length, self.filters),
|
||||
initializer=self.bias_initializer,
|
||||
name='bias',
|
||||
regularizer=self.bias_regularizer,
|
||||
@@ -325,13 +325,13 @@ class LocallyConnected2D(Layer):
|
||||
self.kernel_shape = (output_row * output_col,
|
||||
self.kernel_size[0] * self.kernel_size[1] * input_filter,
|
||||
self.filters)
|
||||
self.kernel = self.add_weight(self.kernel_shape,
|
||||
self.kernel = self.add_weight(shape=self.kernel_shape,
|
||||
initializer=self.kernel_initializer,
|
||||
name='kernel',
|
||||
regularizer=self.kernel_regularizer,
|
||||
constraint=self.kernel_constraint)
|
||||
if self.use_bias:
|
||||
self.bias = self.add_weight((output_row, output_col, self.filters),
|
||||
self.bias = self.add_weight(shape=(output_row, output_col, self.filters),
|
||||
initializer=self.bias_initializer,
|
||||
name='bias',
|
||||
regularizer=self.bias_regularizer,
|
||||
|
||||
@@ -96,7 +96,7 @@ class BatchNormalization(Layer):
|
||||
shape = (dim,)
|
||||
|
||||
if self.scale:
|
||||
self.gamma = self.add_weight(shape,
|
||||
self.gamma = self.add_weight(shape=shape,
|
||||
name='gamma',
|
||||
initializer=self.gamma_initializer,
|
||||
regularizer=self.gamma_regularizer,
|
||||
@@ -104,7 +104,7 @@ class BatchNormalization(Layer):
|
||||
else:
|
||||
self.gamma = None
|
||||
if self.center:
|
||||
self.beta = self.add_weight(shape,
|
||||
self.beta = self.add_weight(shape=shape,
|
||||
name='beta',
|
||||
initializer=self.beta_initializer,
|
||||
regularizer=self.beta_regularizer,
|
||||
@@ -112,12 +112,12 @@ class BatchNormalization(Layer):
|
||||
else:
|
||||
self.beta = None
|
||||
self.moving_mean = self.add_weight(
|
||||
shape,
|
||||
shape=shape,
|
||||
name='moving_mean',
|
||||
initializer=self.moving_mean_initializer,
|
||||
trainable=False)
|
||||
self.moving_variance = self.add_weight(
|
||||
shape,
|
||||
shape=shape,
|
||||
name='moving_variance',
|
||||
initializer=self.moving_variance_initializer,
|
||||
trainable=False)
|
||||
@@ -135,55 +135,57 @@ class BatchNormalization(Layer):
|
||||
# Determines whether broadcasting is needed.
|
||||
needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1])
|
||||
|
||||
normed, mean, variance = K.normalize_batch_in_training(
|
||||
def normalize_inference():
|
||||
if needs_broadcasting:
|
||||
# In this case we must explictly broadcast all parameters.
|
||||
broadcast_moving_mean = K.reshape(self.moving_mean,
|
||||
broadcast_shape)
|
||||
broadcast_moving_variance = K.reshape(self.moving_variance,
|
||||
broadcast_shape)
|
||||
if self.center:
|
||||
broadcast_beta = K.reshape(self.beta, broadcast_shape)
|
||||
else:
|
||||
broadcast_beta = None
|
||||
if self.scale:
|
||||
broadcast_gamma = K.reshape(self.gamma,
|
||||
broadcast_shape)
|
||||
else:
|
||||
broadcast_gamma = None
|
||||
return K.batch_normalization(
|
||||
inputs,
|
||||
broadcast_moving_mean,
|
||||
broadcast_moving_variance,
|
||||
broadcast_beta,
|
||||
broadcast_gamma,
|
||||
epsilon=self.epsilon)
|
||||
else:
|
||||
return K.batch_normalization(
|
||||
inputs,
|
||||
self.moving_mean,
|
||||
self.moving_variance,
|
||||
self.beta,
|
||||
self.gamma,
|
||||
epsilon=self.epsilon)
|
||||
|
||||
# If the learning phase is *static* and set to inference:
|
||||
if training in {0, False}:
|
||||
return normalize_inference()
|
||||
|
||||
# If the learning is either dynamic, or set to training:
|
||||
normed_training, mean, variance = K.normalize_batch_in_training(
|
||||
inputs, self.gamma, self.beta, reduction_axes,
|
||||
epsilon=self.epsilon)
|
||||
|
||||
if training in {0, False}:
|
||||
return normed
|
||||
else:
|
||||
self.add_update([K.moving_average_update(self.moving_mean,
|
||||
mean,
|
||||
self.momentum),
|
||||
K.moving_average_update(self.moving_variance,
|
||||
variance,
|
||||
self.momentum)],
|
||||
inputs)
|
||||
|
||||
def normalize_inference():
|
||||
if needs_broadcasting:
|
||||
# In this case we must explictly broadcast all parameters.
|
||||
broadcast_moving_mean = K.reshape(self.moving_mean,
|
||||
broadcast_shape)
|
||||
broadcast_moving_variance = K.reshape(self.moving_variance,
|
||||
broadcast_shape)
|
||||
if self.center:
|
||||
broadcast_beta = K.reshape(self.beta, broadcast_shape)
|
||||
else:
|
||||
broadcast_beta = None
|
||||
if self.scale:
|
||||
broadcast_gamma = K.reshape(self.gamma,
|
||||
broadcast_shape)
|
||||
else:
|
||||
broadcast_gamma = None
|
||||
return K.batch_normalization(
|
||||
inputs,
|
||||
broadcast_moving_mean,
|
||||
broadcast_moving_variance,
|
||||
broadcast_beta,
|
||||
broadcast_gamma,
|
||||
epsilon=self.epsilon)
|
||||
else:
|
||||
return K.batch_normalization(
|
||||
inputs,
|
||||
self.moving_mean,
|
||||
self.moving_variance,
|
||||
self.beta,
|
||||
self.gamma,
|
||||
epsilon=self.epsilon)
|
||||
self.add_update([K.moving_average_update(self.moving_mean,
|
||||
mean,
|
||||
self.momentum),
|
||||
K.moving_average_update(self.moving_variance,
|
||||
variance,
|
||||
self.momentum)],
|
||||
inputs)
|
||||
|
||||
# Pick the normalized form corresponding to the training phase.
|
||||
return K.in_train_phase(normed,
|
||||
return K.in_train_phase(normed_training,
|
||||
normalize_inference,
|
||||
training=training)
|
||||
|
||||
|
||||
+107
-89
@@ -170,11 +170,16 @@ class Recurrent(Layer):
|
||||
To reset the states of your model, call `.reset_states()` on either
|
||||
a specific layer, or on your entire model.
|
||||
|
||||
# Note on specifying initial states in RNNs
|
||||
You can specify the initial state of RNN layers by calling them with
|
||||
the keyword argument `initial_state`. The value of `initial_state`
|
||||
should be a tensor or list of tensors representing the initial state
|
||||
of the RNN layer.
|
||||
# Note on specifying the initial state of RNNs
|
||||
You can specify the initial state of RNN layers symbolically by
|
||||
calling them with the keyword argument `initial_state`. The value of
|
||||
`initial_state` should be a tensor or list of tensors representing
|
||||
the initial state of the RNN layer.
|
||||
|
||||
You can specify the initial state of RNN layers numerically by
|
||||
calling `reset_states` with the keyword argument `states`. The value of
|
||||
`states` should be a numpy array or list of numpy arrays representing
|
||||
the initial state of the RNN layer.
|
||||
"""
|
||||
|
||||
def __init__(self, return_sequences=False,
|
||||
@@ -190,7 +195,7 @@ class Recurrent(Layer):
|
||||
self.unroll = unroll
|
||||
self.implementation = implementation
|
||||
self.supports_masking = True
|
||||
self.input_spec = InputSpec(ndim=3)
|
||||
self.input_spec = [InputSpec(ndim=3)]
|
||||
self.state_spec = None
|
||||
self.dropout = 0
|
||||
self.recurrent_dropout = 0
|
||||
@@ -205,6 +210,8 @@ class Recurrent(Layer):
|
||||
|
||||
def compute_mask(self, inputs, mask):
|
||||
if self.return_sequences:
|
||||
if isinstance(mask, list):
|
||||
return mask[0]
|
||||
return mask
|
||||
else:
|
||||
return None
|
||||
@@ -215,14 +222,14 @@ class Recurrent(Layer):
|
||||
def get_constants(self, inputs, training=None):
|
||||
return []
|
||||
|
||||
def get_initial_states(self, inputs):
|
||||
def get_initial_state(self, inputs):
|
||||
# build an all-zero tensor of shape (samples, output_dim)
|
||||
initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim)
|
||||
initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,)
|
||||
initial_state = K.expand_dims(initial_state) # (samples, 1)
|
||||
initial_state = K.tile(initial_state, [1, self.units]) # (samples, output_dim)
|
||||
initial_states = [initial_state for _ in range(len(self.states))]
|
||||
return initial_states
|
||||
initial_state = [initial_state for _ in range(len(self.states))]
|
||||
return initial_state
|
||||
|
||||
def preprocess_input(self, inputs, training=None):
|
||||
return inputs
|
||||
@@ -232,51 +239,61 @@ class Recurrent(Layer):
|
||||
# and if it a Keras tensor,
|
||||
# then add it to the inputs and temporarily
|
||||
# modify the input spec to include the state.
|
||||
if initial_state is not None:
|
||||
if hasattr(initial_state, '_keras_history'):
|
||||
# Compute the full input spec, including state
|
||||
input_spec = self.input_spec
|
||||
state_spec = self.state_spec
|
||||
if not isinstance(state_spec, list):
|
||||
state_spec = [state_spec]
|
||||
self.input_spec = [input_spec] + state_spec
|
||||
if initial_state is None:
|
||||
return super(Recurrent, self).__call__(inputs, **kwargs)
|
||||
|
||||
# Compute the full inputs, including state
|
||||
if not isinstance(initial_state, (list, tuple)):
|
||||
initial_state = [initial_state]
|
||||
inputs = [inputs] + list(initial_state)
|
||||
if not isinstance(initial_state, (list, tuple)):
|
||||
initial_state = [initial_state]
|
||||
|
||||
# Perform the call
|
||||
output = super(Recurrent, self).__call__(inputs, **kwargs)
|
||||
is_keras_tensor = hasattr(initial_state[0], '_keras_history')
|
||||
for tensor in initial_state:
|
||||
if hasattr(tensor, '_keras_history') != is_keras_tensor:
|
||||
raise ValueError('The initial state of an RNN layer cannot be'
|
||||
' specified with a mix of Keras tensors and'
|
||||
' non-Keras tensors')
|
||||
|
||||
# Restore original input spec
|
||||
self.input_spec = input_spec
|
||||
return output
|
||||
else:
|
||||
kwargs['initial_state'] = initial_state
|
||||
return super(Recurrent, self).__call__(inputs, **kwargs)
|
||||
if is_keras_tensor:
|
||||
# Compute the full input spec, including state
|
||||
input_spec = self.input_spec
|
||||
state_spec = self.state_spec
|
||||
if not isinstance(state_spec, list):
|
||||
state_spec = [state_spec]
|
||||
self.input_spec = input_spec + state_spec
|
||||
|
||||
def call(self, inputs, mask=None, initial_state=None, training=None):
|
||||
# Compute the full inputs, including state
|
||||
inputs = [inputs] + list(initial_state)
|
||||
|
||||
# Perform the call
|
||||
output = super(Recurrent, self).__call__(inputs, **kwargs)
|
||||
|
||||
# Restore original input spec
|
||||
self.input_spec = input_spec
|
||||
return output
|
||||
else:
|
||||
kwargs['initial_state'] = initial_state
|
||||
return super(Recurrent, self).__call__(inputs, **kwargs)
|
||||
|
||||
def call(self, inputs, mask=None, training=None, initial_state=None):
|
||||
# input shape: `(samples, time (padded with zeros), input_dim)`
|
||||
# note that the .build() method of subclasses MUST define
|
||||
# self.input_spec and self.state_spec with complete input shapes.
|
||||
if initial_state is not None:
|
||||
if not isinstance(initial_state, (list, tuple)):
|
||||
initial_states = [initial_state]
|
||||
else:
|
||||
initial_states = list(initial_state)
|
||||
if isinstance(inputs, list):
|
||||
initial_states = inputs[1:]
|
||||
initial_state = inputs[1:]
|
||||
inputs = inputs[0]
|
||||
elif initial_state is not None:
|
||||
pass
|
||||
elif self.stateful:
|
||||
initial_states = self.states
|
||||
initial_state = self.states
|
||||
else:
|
||||
initial_states = self.get_initial_states(inputs)
|
||||
initial_state = self.get_initial_state(inputs)
|
||||
|
||||
if len(initial_states) != len(self.states):
|
||||
if isinstance(mask, list):
|
||||
mask = mask[0]
|
||||
|
||||
if len(initial_state) != len(self.states):
|
||||
raise ValueError('Layer has ' + str(len(self.states)) +
|
||||
' states but was passed ' +
|
||||
str(len(initial_states)) +
|
||||
str(len(initial_state)) +
|
||||
' initial states.')
|
||||
input_shape = K.int_shape(inputs)
|
||||
if self.unroll and input_shape[1] is None:
|
||||
@@ -295,7 +312,7 @@ class Recurrent(Layer):
|
||||
preprocessed_input = self.preprocess_input(inputs, training=None)
|
||||
last_output, outputs, states = K.rnn(self.step,
|
||||
preprocessed_input,
|
||||
initial_states,
|
||||
initial_state,
|
||||
go_backwards=self.go_backwards,
|
||||
mask=mask,
|
||||
constants=constants,
|
||||
@@ -317,13 +334,10 @@ class Recurrent(Layer):
|
||||
else:
|
||||
return last_output
|
||||
|
||||
def reset_states(self, states_value=None):
|
||||
def reset_states(self, states=None):
|
||||
if not self.stateful:
|
||||
raise AttributeError('Layer must be stateful.')
|
||||
if not self.input_spec:
|
||||
raise RuntimeError('Layer has never been called '
|
||||
'and thus has no states.')
|
||||
batch_size = self.input_spec.shape[0]
|
||||
batch_size = self.input_spec[0].shape[0]
|
||||
if not batch_size:
|
||||
raise ValueError('If a RNN is stateful, it needs to know '
|
||||
'its batch size. Specify the batch size '
|
||||
@@ -335,31 +349,30 @@ class Recurrent(Layer):
|
||||
'- If using the functional API, specify '
|
||||
'the time dimension by passing a '
|
||||
'`batch_shape` argument to your Input layer.')
|
||||
if states_value is not None:
|
||||
if not isinstance(states_value, (list, tuple)):
|
||||
states_value = [states_value]
|
||||
if len(states_value) != len(self.states):
|
||||
raise ValueError('The layer has ' + str(len(self.states)) +
|
||||
' states, but the `states_value` '
|
||||
'argument passed '
|
||||
'only has ' + str(len(states_value)) +
|
||||
' entries')
|
||||
# initialize state if None
|
||||
if self.states[0] is None:
|
||||
self.states = [K.zeros((batch_size, self.units))
|
||||
for _ in self.states]
|
||||
if not states_value:
|
||||
return
|
||||
for i, state in enumerate(self.states):
|
||||
if states_value:
|
||||
value = states_value[i]
|
||||
elif states is None:
|
||||
for state in self.states:
|
||||
K.set_value(state, np.zeros((batch_size, self.units)))
|
||||
else:
|
||||
if not isinstance(states, (list, tuple)):
|
||||
states = [states]
|
||||
if len(states) != len(self.states):
|
||||
raise ValueError('Layer ' + self.name + ' expects ' +
|
||||
str(len(self.states)) + ' states, '
|
||||
'but it received ' + str(len(states)) +
|
||||
' state values. Input received: ' +
|
||||
str(states))
|
||||
for index, (value, state) in enumerate(zip(states, self.states)):
|
||||
if value.shape != (batch_size, self.units):
|
||||
raise ValueError(
|
||||
'Expected state #' + str(i) +
|
||||
' to have shape ' + str((batch_size, self.units)) +
|
||||
' but got array with shape ' + str(value.shape))
|
||||
else:
|
||||
value = np.zeros((batch_size, self.units))
|
||||
K.set_value(state, value)
|
||||
raise ValueError('State ' + str(index) +
|
||||
' is incompatible with layer ' +
|
||||
self.name + ': expected shape=' +
|
||||
str((batch_size, self.units)) +
|
||||
', found shape=' + str(value.shape))
|
||||
K.set_value(state, value)
|
||||
|
||||
def get_config(self):
|
||||
config = {'return_sequences': self.return_sequences,
|
||||
@@ -457,6 +470,7 @@ class SimpleRNN(Recurrent):
|
||||
|
||||
self.dropout = min(1., max(0., dropout))
|
||||
self.recurrent_dropout = min(1., max(0., recurrent_dropout))
|
||||
self.state_spec = InputSpec(shape=(None, self.units))
|
||||
|
||||
def build(self, input_shape):
|
||||
if isinstance(input_shape, list):
|
||||
@@ -464,26 +478,25 @@ class SimpleRNN(Recurrent):
|
||||
|
||||
batch_size = input_shape[0] if self.stateful else None
|
||||
self.input_dim = input_shape[2]
|
||||
self.input_spec = InputSpec(shape=(batch_size, None, self.input_dim))
|
||||
self.state_spec = InputSpec(shape=(batch_size, self.units))
|
||||
self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim))
|
||||
|
||||
self.states = [None]
|
||||
if self.stateful:
|
||||
self.reset_states()
|
||||
|
||||
self.kernel = self.add_weight((self.input_dim, self.units),
|
||||
self.kernel = self.add_weight(shape=(self.input_dim, self.units),
|
||||
name='kernel',
|
||||
initializer=self.kernel_initializer,
|
||||
regularizer=self.kernel_regularizer,
|
||||
constraint=self.kernel_constraint)
|
||||
self.recurrent_kernel = self.add_weight(
|
||||
(self.units, self.units),
|
||||
shape=(self.units, self.units),
|
||||
name='recurrent_kernel',
|
||||
initializer=self.recurrent_initializer,
|
||||
regularizer=self.recurrent_regularizer,
|
||||
constraint=self.recurrent_constraint)
|
||||
if self.use_bias:
|
||||
self.bias = self.add_weight((self.units,),
|
||||
self.bias = self.add_weight(shape=(self.units,),
|
||||
name='bias',
|
||||
initializer=self.bias_initializer,
|
||||
regularizer=self.bias_regularizer,
|
||||
@@ -676,6 +689,7 @@ class GRU(Recurrent):
|
||||
|
||||
self.dropout = min(1., max(0., dropout))
|
||||
self.recurrent_dropout = min(1., max(0., recurrent_dropout))
|
||||
self.state_spec = InputSpec(shape=(None, self.units))
|
||||
|
||||
def build(self, input_shape):
|
||||
if isinstance(input_shape, list):
|
||||
@@ -683,29 +697,28 @@ class GRU(Recurrent):
|
||||
|
||||
batch_size = input_shape[0] if self.stateful else None
|
||||
self.input_dim = input_shape[2]
|
||||
self.input_spec = InputSpec(shape=(batch_size, None, self.input_dim))
|
||||
self.state_spec = InputSpec(shape=(batch_size, self.units))
|
||||
self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim))
|
||||
|
||||
self.states = [None]
|
||||
if self.stateful:
|
||||
self.reset_states()
|
||||
|
||||
self.kernel = self.add_weight((self.input_dim, self.units * 3),
|
||||
self.kernel = self.add_weight(shape=(self.input_dim, self.units * 3),
|
||||
name='kernel',
|
||||
initializer=self.kernel_initializer,
|
||||
regularizer=self.kernel_regularizer,
|
||||
constraint=self.kernel_constraint)
|
||||
self.recurrent_kernel = self.add_weight(
|
||||
(self.units, self.units * 3),
|
||||
shape=(self.units, self.units * 3),
|
||||
name='recurrent_kernel',
|
||||
initializer=self.recurrent_initializer,
|
||||
regularizer=self.recurrent_regularizer,
|
||||
constraint=self.recurrent_constraint)
|
||||
|
||||
if self.use_bias:
|
||||
self.bias = self.add_weight((self.units * 3,),
|
||||
self.bias = self.add_weight(shape=(self.units * 3,),
|
||||
name='bias',
|
||||
initializer='zero',
|
||||
initializer=self.bias_initializer,
|
||||
regularizer=self.bias_regularizer,
|
||||
constraint=self.bias_constraint)
|
||||
else:
|
||||
@@ -955,6 +968,8 @@ class LSTM(Recurrent):
|
||||
|
||||
self.dropout = min(1., max(0., dropout))
|
||||
self.recurrent_dropout = min(1., max(0., recurrent_dropout))
|
||||
self.state_spec = [InputSpec(shape=(None, self.units)),
|
||||
InputSpec(shape=(None, self.units))]
|
||||
|
||||
def build(self, input_shape):
|
||||
if isinstance(input_shape, list):
|
||||
@@ -962,36 +977,39 @@ class LSTM(Recurrent):
|
||||
|
||||
batch_size = input_shape[0] if self.stateful else None
|
||||
self.input_dim = input_shape[2]
|
||||
self.input_spec = InputSpec(shape=(batch_size, None, self.input_dim))
|
||||
self.state_spec = [InputSpec(shape=(batch_size, self.units)),
|
||||
InputSpec(shape=(batch_size, self.units))]
|
||||
self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim))
|
||||
|
||||
self.states = [None, None]
|
||||
if self.stateful:
|
||||
self.reset_states()
|
||||
|
||||
self.kernel = self.add_weight((self.input_dim, self.units * 4),
|
||||
self.kernel = self.add_weight(shape=(self.input_dim, self.units * 4),
|
||||
name='kernel',
|
||||
initializer=self.kernel_initializer,
|
||||
regularizer=self.kernel_regularizer,
|
||||
constraint=self.kernel_constraint)
|
||||
self.recurrent_kernel = self.add_weight(
|
||||
(self.units, self.units * 4),
|
||||
shape=(self.units, self.units * 4),
|
||||
name='recurrent_kernel',
|
||||
initializer=self.recurrent_initializer,
|
||||
regularizer=self.recurrent_regularizer,
|
||||
constraint=self.recurrent_constraint)
|
||||
|
||||
if self.use_bias:
|
||||
self.bias = self.add_weight((self.units * 4,),
|
||||
if self.unit_forget_bias:
|
||||
def bias_initializer(shape, *args, **kwargs):
|
||||
return K.concatenate([
|
||||
self.bias_initializer((self.units,), *args, **kwargs),
|
||||
initializers.Ones()((self.units,), *args, **kwargs),
|
||||
self.bias_initializer((self.units * 2,), *args, **kwargs),
|
||||
])
|
||||
else:
|
||||
bias_initializer = self.bias_initializer
|
||||
self.bias = self.add_weight(shape=(self.units * 4,),
|
||||
name='bias',
|
||||
initializer=self.bias_initializer,
|
||||
initializer=bias_initializer,
|
||||
regularizer=self.bias_regularizer,
|
||||
constraint=self.bias_constraint)
|
||||
if self.unit_forget_bias:
|
||||
bias_value = np.zeros((self.units * 4,))
|
||||
bias_value[self.units: self.units * 2] = 1.
|
||||
K.set_value(self.bias, bias_value)
|
||||
else:
|
||||
self.bias = None
|
||||
|
||||
|
||||
@@ -602,3 +602,24 @@ legacy_model_constructor_support = generate_legacy_interface(
|
||||
allowed_positional_args=None,
|
||||
conversions=[('input', 'inputs'),
|
||||
('output', 'outputs')])
|
||||
|
||||
legacy_input_support = generate_legacy_interface(
|
||||
allowed_positional_args=None,
|
||||
conversions=[('input_dtype', 'dtype')])
|
||||
|
||||
|
||||
def add_weight_args_preprocessing(args, kwargs):
|
||||
if len(args) > 1:
|
||||
if isinstance(args[1], (tuple, list)):
|
||||
kwargs['shape'] = args[1]
|
||||
args = (args[0],) + args[2:]
|
||||
if len(args) > 1:
|
||||
if isinstance(args[1], six.string_types):
|
||||
kwargs['name'] = args[1]
|
||||
args = (args[0],) + args[2:]
|
||||
return args, kwargs, []
|
||||
|
||||
|
||||
legacy_add_weight_support = generate_legacy_interface(
|
||||
allowed_positional_args=['name', 'shape'],
|
||||
preprocessor=add_weight_args_preprocessing)
|
||||
|
||||
@@ -33,6 +33,12 @@ def hinge(y_true, y_pred):
|
||||
return K.mean(K.maximum(1. - y_true * y_pred, 0.), axis=-1)
|
||||
|
||||
|
||||
def logcosh(y_true, y_pred):
|
||||
def cosh(x):
|
||||
return (K.exp(x) + K.exp(-x)) / 2
|
||||
return K.mean(K.log(cosh(y_pred - y_true)), axis=-1)
|
||||
|
||||
|
||||
def categorical_crossentropy(y_true, y_pred):
|
||||
return K.categorical_crossentropy(y_pred, y_true)
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ from .losses import mean_absolute_error
|
||||
from .losses import mean_absolute_percentage_error
|
||||
from .losses import mean_squared_logarithmic_error
|
||||
from .losses import hinge
|
||||
from .losses import logcosh
|
||||
from .losses import squared_hinge
|
||||
from .losses import categorical_crossentropy
|
||||
from .losses import sparse_categorical_crossentropy
|
||||
|
||||
+17
-5
@@ -293,9 +293,12 @@ def model_from_config(config, custom_objects=None):
|
||||
|
||||
# Returns
|
||||
A Keras model instance (uncompiled).
|
||||
|
||||
# Raises
|
||||
TypeError if `config` is not a dictionary
|
||||
"""
|
||||
if isinstance(config, list):
|
||||
raise TypeError('`model_fom_config` expects a dictionary, not a list. '
|
||||
raise TypeError('`model_from_config` expects a dictionary, not a list. '
|
||||
'Maybe you meant to use '
|
||||
'`Sequential.from_config(config)`?')
|
||||
return layer_module.deserialize(config, custom_objects=custom_objects)
|
||||
@@ -744,7 +747,7 @@ class Sequential(Model):
|
||||
optimizer: str (name of optimizer) or optimizer object.
|
||||
See [optimizers](/optimizers).
|
||||
loss: str (name of objective function) or objective function.
|
||||
See [objectives](/objectives).
|
||||
See [losses](/losses).
|
||||
metrics: list of metrics to be evaluated by the model
|
||||
during training and testing.
|
||||
Typically you will use `metrics=['accuracy']`.
|
||||
@@ -1033,8 +1036,8 @@ class Sequential(Model):
|
||||
- a tuple (inputs, targets, sample_weights).
|
||||
All arrays should contain the same number of samples.
|
||||
The generator is expected to loop over its data
|
||||
indefinitely. An epoch finishes when `samples_per_epoch`
|
||||
samples have been seen by the model.
|
||||
indefinitely. An epoch finishes when `steps_per_epoch`
|
||||
batches have been seen by the model.
|
||||
steps_per_epoch: Total number of steps (batches of samples)
|
||||
to yield from `generator` before declaring one epoch
|
||||
finished and starting the next epoch. It should typically
|
||||
@@ -1087,7 +1090,7 @@ class Sequential(Model):
|
||||
f.close()
|
||||
|
||||
model.fit_generator(generate_arrays_from_file('/my_file.txt'),
|
||||
samples_per_epoch=10000, epochs=10)
|
||||
steps_per_epoch=1000, epochs=10)
|
||||
```
|
||||
"""
|
||||
if self.model is None:
|
||||
@@ -1227,6 +1230,15 @@ class Sequential(Model):
|
||||
|
||||
@classmethod
|
||||
def legacy_from_config(cls, config, layer_cache=None):
|
||||
"""Load a model from a legacy configuration.
|
||||
|
||||
# Arguments
|
||||
config: dictionary with configuration.
|
||||
layer_cache: cache to draw pre-existing layer.
|
||||
|
||||
# Returns
|
||||
The loaded Model.
|
||||
"""
|
||||
if not layer_cache:
|
||||
layer_cache = {}
|
||||
|
||||
|
||||
@@ -325,9 +325,9 @@ def load_img(path, grayscale=False, target_size=None):
|
||||
if img.mode != 'RGB':
|
||||
img = img.convert('RGB')
|
||||
if target_size:
|
||||
wh_tuple = (target_size[1], target_size[0])
|
||||
if img.size != wh_tuple:
|
||||
img = img.resize(wh_tuple)
|
||||
hw_tuple = (target_size[1], target_size[0])
|
||||
if img.size != hw_tuple:
|
||||
img = img.resize(hw_tuple)
|
||||
return img
|
||||
|
||||
|
||||
|
||||
@@ -78,6 +78,7 @@ def convert_kernel(kernel):
|
||||
# Raises
|
||||
ValueError: in case of invalid kernel shape or invalid data_format.
|
||||
"""
|
||||
kernel = np.asarray(kernel)
|
||||
if not 4 <= kernel.ndim <= 5:
|
||||
raise ValueError('Invalid kernel shape:', kernel.shape)
|
||||
slices = [slice(None, None, -1) for _ in range(kernel.ndim)]
|
||||
|
||||
@@ -27,8 +27,8 @@ class CustomObjectScope(object):
|
||||
Consider a custom object `MyObject`
|
||||
|
||||
```python
|
||||
with CustomObjectScope({"MyObject":MyObject}):
|
||||
layer = Dense(..., W_regularizer="MyObject")
|
||||
with CustomObjectScope({'MyObject':MyObject}):
|
||||
layer = Dense(..., kernel_regularizer='MyObject')
|
||||
# save, load, etc. will recognize custom object by name
|
||||
```
|
||||
"""
|
||||
@@ -63,8 +63,8 @@ def custom_object_scope(*args):
|
||||
Consider a custom object `MyObject`
|
||||
|
||||
```python
|
||||
with custom_object_scope({"MyObject":MyObject}):
|
||||
layer = Dense(..., W_regularizer="MyObject")
|
||||
with custom_object_scope({'MyObject':MyObject}):
|
||||
layer = Dense(..., kernel_regularizer='MyObject')
|
||||
# save, load, etc. will recognize custom object by name
|
||||
```
|
||||
|
||||
@@ -89,7 +89,7 @@ def get_custom_objects():
|
||||
|
||||
```python
|
||||
get_custom_objects().clear()
|
||||
get_custom_objects()["MyObject"] = MyObject
|
||||
get_custom_objects()['MyObject'] = MyObject
|
||||
```
|
||||
|
||||
# Returns
|
||||
|
||||
@@ -62,9 +62,14 @@ class HDF5Matrix(object):
|
||||
return self.end - self.start
|
||||
|
||||
def __getitem__(self, key):
|
||||
start, stop = key.start, key.stop
|
||||
if isinstance(key, slice):
|
||||
if key.stop + self.start <= self.end:
|
||||
idx = slice(key.start + self.start, key.stop + self.start)
|
||||
if start is None:
|
||||
start = 0
|
||||
if stop is None:
|
||||
stop = self.data.shape[0]
|
||||
if stop + self.start <= self.end:
|
||||
idx = slice(start + self.start, stop + self.start)
|
||||
else:
|
||||
raise IndexError
|
||||
elif isinstance(key, int):
|
||||
|
||||
@@ -19,8 +19,11 @@ def print_summary(model, line_length=None, positions=None):
|
||||
else:
|
||||
sequential_like = True
|
||||
for v in model.nodes_by_depth.values():
|
||||
if len(v) > 1:
|
||||
if (len(v) > 1) or (len(v) == 1 and len(v[0].inbound_layers) > 1):
|
||||
# if the model has multiple nodes or if the nodes have multiple inbound_layers
|
||||
# the model is no longer sequential
|
||||
sequential_like = False
|
||||
break
|
||||
|
||||
if sequential_like:
|
||||
line_length = line_length or 65
|
||||
@@ -75,12 +78,10 @@ def print_summary(model, line_length=None, positions=None):
|
||||
except AttributeError:
|
||||
output_shape = 'multiple'
|
||||
connections = []
|
||||
for node_index, node in enumerate(layer.inbound_nodes):
|
||||
if relevant_nodes:
|
||||
node_key = layer.name + '_ib-' + str(node_index)
|
||||
if node_key not in relevant_nodes:
|
||||
# node is node part of the current network
|
||||
continue
|
||||
for node in layer.inbound_nodes:
|
||||
if relevant_nodes and node not in relevant_nodes:
|
||||
# node is not part of the current network
|
||||
continue
|
||||
for i in range(len(node.inbound_layers)):
|
||||
inbound_layer = node.inbound_layers[i].name
|
||||
inbound_node_index = node.node_indices[i]
|
||||
@@ -111,7 +112,10 @@ def print_summary(model, line_length=None, positions=None):
|
||||
else:
|
||||
print('_' * line_length)
|
||||
|
||||
trainable_count, non_trainable_count = count_total_params(layers, layer_set=None)
|
||||
trainable_count = int(
|
||||
np.sum([K.count_params(p) for p in set(model.trainable_weights)]))
|
||||
non_trainable_count = int(
|
||||
np.sum([K.count_params(p) for p in set(model.non_trainable_weights)]))
|
||||
|
||||
print('Total params: {:,}'.format(trainable_count + non_trainable_count))
|
||||
print('Trainable params: {:,}'.format(trainable_count))
|
||||
@@ -119,35 +123,6 @@ def print_summary(model, line_length=None, positions=None):
|
||||
print('_' * line_length)
|
||||
|
||||
|
||||
def count_total_params(layers, layer_set=None):
|
||||
"""Counts the number of parameters in a list of layers.
|
||||
|
||||
# Arguments
|
||||
layers: list of layers.
|
||||
layer_set: set of layers already seen
|
||||
(so that we don't count their weights twice).
|
||||
|
||||
# Returns
|
||||
A tuple (count of trainable weights, count of non-trainable weights.)
|
||||
"""
|
||||
if layer_set is None:
|
||||
layer_set = set()
|
||||
trainable_count = 0
|
||||
non_trainable_count = 0
|
||||
for layer in layers:
|
||||
if layer in layer_set:
|
||||
continue
|
||||
layer_set.add(layer)
|
||||
if hasattr(layer, 'layers'):
|
||||
t, nt = count_total_params(layer.layers, layer_set)
|
||||
trainable_count += t
|
||||
non_trainable_count += nt
|
||||
else:
|
||||
trainable_count += np.sum([K.count_params(p) for p in layer.trainable_weights])
|
||||
non_trainable_count += np.sum([K.count_params(p) for p in layer.non_trainable_weights])
|
||||
return int(trainable_count), int(non_trainable_count)
|
||||
|
||||
|
||||
def convert_all_kernels_in_model(model):
|
||||
"""Converts all convolution kernels in a model from Theano to TensorFlow.
|
||||
|
||||
|
||||
@@ -13,7 +13,10 @@ except ImportError:
|
||||
|
||||
|
||||
def _check_pydot():
|
||||
if not (pydot and pydot.find_graphviz()):
|
||||
try:
|
||||
# Attempt to create an image of a blank graph to check the pydot/graphviz installation.
|
||||
pydot.Dot.create(pydot.Dot())
|
||||
except Exception: # pydot raises a generic Exception here, so no specific class can be caught.
|
||||
raise ImportError('Failed to import pydot. You must install pydot'
|
||||
' and graphviz for `pydotprint` to work.')
|
||||
|
||||
|
||||
+2
-2
@@ -3,12 +3,12 @@ from setuptools import find_packages
|
||||
|
||||
|
||||
setup(name='Keras',
|
||||
version='2.0.3',
|
||||
version='2.0.4',
|
||||
description='Deep Learning for Python',
|
||||
author='Francois Chollet',
|
||||
author_email='francois.chollet@gmail.com',
|
||||
url='https://github.com/fchollet/keras',
|
||||
download_url='https://github.com/fchollet/keras/tarball/2.0.3',
|
||||
download_url='https://github.com/fchollet/keras/tarball/2.0.4',
|
||||
license='MIT',
|
||||
install_requires=['theano', 'pyyaml', 'six'],
|
||||
extras_require={
|
||||
|
||||
@@ -83,6 +83,10 @@ class TestBackend(object):
|
||||
|
||||
check_two_tensor_operation('batch_dot', (4, 2, 3), (4, 5, 3),
|
||||
axes=(2, 2))
|
||||
check_two_tensor_operation('batch_dot', (4, 2, 3), (4, 3),
|
||||
axes=(2, 1))
|
||||
check_two_tensor_operation('batch_dot', (4, 2), (4, 2, 3),
|
||||
axes=(1, 1))
|
||||
check_two_tensor_operation('batch_dot', (32, 20), (32, 20), axes=1)
|
||||
check_two_tensor_operation('batch_dot', (32, 20), (32, 20), axes=(1, 1))
|
||||
check_single_tensor_operation('transpose', (4, 2))
|
||||
@@ -576,6 +580,41 @@ class TestBackend(object):
|
||||
assert_allclose(tf_last_output, th_last_output, atol=1e-04)
|
||||
assert_allclose(tf_outputs, th_outputs, atol=1e-04)
|
||||
|
||||
@pytest.mark.parametrize('x_np,axis,keepdims', [
|
||||
(np.array([1.1, 0.8, 0.9]), 0, False),
|
||||
(np.array([[1.1, 0.8, 0.9]]), 0, False),
|
||||
(np.array([[1.1, 0.8, 0.9]]), 1, False),
|
||||
(np.array([[1.1, 0.8, 0.9]]), -1, False),
|
||||
(np.array([[1.1, 0.8, 0.9]]), 1, True),
|
||||
(np.array([[1.1], [1.2]]), 0, False),
|
||||
(np.array([[1.1], [1.2]]), 1, False),
|
||||
(np.array([[1.1], [1.2]]), -1, False),
|
||||
(np.array([[1.1], [1.2]]), -1, True),
|
||||
(np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), None, False),
|
||||
(np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), 0, False),
|
||||
(np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), 1, False),
|
||||
(np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), -1, False),
|
||||
])
|
||||
@pytest.mark.parametrize('K', [KTH, KTF], ids=["KTH", "KTF"])
|
||||
def test_logsumexp(self, x_np, axis, keepdims, K):
|
||||
'''
|
||||
Check if K.logsumexp works properly for values close to one.
|
||||
'''
|
||||
x = K.variable(x_np)
|
||||
assert_allclose(K.eval(K.logsumexp(x, axis=axis, keepdims=keepdims)),
|
||||
np.log(np.sum(np.exp(x_np), axis=axis, keepdims=keepdims)),
|
||||
rtol=1e-5)
|
||||
|
||||
@pytest.mark.parametrize('K', [KTH, KTF], ids=["KTH", "KTF"])
|
||||
def test_logsumexp_optim(self, K):
|
||||
'''
|
||||
Check if optimization works.
|
||||
'''
|
||||
x_np = np.array([1e+4, 1e-4])
|
||||
assert_allclose(K.eval(K.logsumexp(K.variable(x_np), axis=0)),
|
||||
1e4,
|
||||
rtol=1e-5)
|
||||
|
||||
def test_switch(self):
|
||||
val = np.random.random()
|
||||
xth = KTH.variable(val)
|
||||
|
||||
@@ -490,6 +490,67 @@ def test_recursion():
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_load_layers():
|
||||
from keras.layers import ConvLSTM2D, TimeDistributed, Bidirectional, Conv2D, Input
|
||||
from keras.models import Model
|
||||
from keras.engine.topology import preprocess_weights_for_loading
|
||||
|
||||
if K.backend() == 'tensorflow':
|
||||
inputs = Input(shape=(10, 20, 20, 1))
|
||||
else:
|
||||
inputs = Input(shape=(10, 1, 20, 20))
|
||||
td_conv = TimeDistributed(Conv2D(15, (5, 5)))(inputs)
|
||||
bi_convlstm2d = Bidirectional(ConvLSTM2D(10, (3, 3)), merge_mode='concat')(td_conv)
|
||||
model = Model(inputs=inputs, outputs=bi_convlstm2d)
|
||||
|
||||
weight_value_tuples = []
|
||||
|
||||
# TimeDistributed Conv2D layer
|
||||
# use 'channels_first' data format to check that the function is being called correctly for Conv2D
|
||||
# old: (filters, stack_size, kernel_rows, kernel_cols)
|
||||
# new: (kernel_rows, kernel_cols, stack_size, filters)
|
||||
weight_tensor_td_conv_old = list()
|
||||
weight_tensor_td_conv_old.append(np.zeros((15, 1, 5, 5)))
|
||||
weight_tensor_td_conv_old.append(np.zeros((15,)))
|
||||
td_conv_layer = model.layers[1]
|
||||
td_conv_layer.layer.data_format = 'channels_first'
|
||||
weight_tensor_td_conv_new = preprocess_weights_for_loading(td_conv_layer,
|
||||
weight_tensor_td_conv_old,
|
||||
original_keras_version='1')
|
||||
symbolic_weights = td_conv_layer.weights
|
||||
assert (len(symbolic_weights) == len(weight_tensor_td_conv_new))
|
||||
weight_value_tuples += zip(symbolic_weights, weight_tensor_td_conv_new)
|
||||
|
||||
# Bidirectional ConvLSTM2D layer
|
||||
# old ConvLSTM2D took a list of 12 weight tensors, returns a list of 3 concatenated larger tensors.
|
||||
weight_tensor_bi_convlstm_old = []
|
||||
for j in range(2): # bidirectional
|
||||
for i in range(4):
|
||||
weight_tensor_bi_convlstm_old.append(np.zeros((3, 3, 15, 10))) # kernel
|
||||
weight_tensor_bi_convlstm_old.append(np.zeros((3, 3, 10, 10))) # recurrent kernel
|
||||
weight_tensor_bi_convlstm_old.append(np.zeros((10,))) # bias
|
||||
|
||||
bi_convlstm_layer = model.layers[2]
|
||||
weight_tensor_bi_convlstm_new = preprocess_weights_for_loading(bi_convlstm_layer,
|
||||
weight_tensor_bi_convlstm_old,
|
||||
original_keras_version='1')
|
||||
|
||||
symbolic_weights = bi_convlstm_layer.weights
|
||||
assert (len(symbolic_weights) == len(weight_tensor_bi_convlstm_new))
|
||||
weight_value_tuples += zip(symbolic_weights, weight_tensor_bi_convlstm_new)
|
||||
|
||||
K.batch_set_value(weight_value_tuples)
|
||||
|
||||
assert np.all(K.eval(model.layers[1].weights[0]) == weight_tensor_td_conv_new[0])
|
||||
assert np.all(K.eval(model.layers[1].weights[1]) == weight_tensor_td_conv_new[1])
|
||||
assert np.all(K.eval(model.layers[2].weights[0]) == weight_tensor_bi_convlstm_new[0])
|
||||
assert np.all(K.eval(model.layers[2].weights[1]) == weight_tensor_bi_convlstm_new[1])
|
||||
assert np.all(K.eval(model.layers[2].weights[2]) == weight_tensor_bi_convlstm_new[2])
|
||||
assert np.all(K.eval(model.layers[2].weights[3]) == weight_tensor_bi_convlstm_new[3])
|
||||
assert np.all(K.eval(model.layers[2].weights[4]) == weight_tensor_bi_convlstm_new[4])
|
||||
assert np.all(K.eval(model.layers[2].weights[5]) == weight_tensor_bi_convlstm_new[5])
|
||||
|
||||
|
||||
def test_recursion_with_bn_and_loss():
|
||||
model1 = Sequential([
|
||||
layers.Dense(5, input_dim=5, activity_regularizer='l1'),
|
||||
|
||||
@@ -52,6 +52,31 @@ def test_batchnorm_correctness():
|
||||
assert_allclose(out.std(), 1.0, atol=1e-1)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchnorm_training_argument():
|
||||
bn1 = normalization.BatchNormalization(input_shape=(10,))
|
||||
x1 = Input(shape=(10,))
|
||||
y1 = bn1(x1, training=True)
|
||||
assert bn1.updates
|
||||
|
||||
model1 = Model(x1, y1)
|
||||
np.random.seed(123)
|
||||
x = np.random.normal(loc=5.0, scale=10.0, size=(20, 10))
|
||||
output_a = model1.predict(x)
|
||||
|
||||
model1.compile(loss='mse', optimizer='rmsprop')
|
||||
model1.fit(x, x, epochs=1, verbose=0)
|
||||
output_b = model1.predict(x)
|
||||
assert np.abs(np.sum(output_a - output_b)) > 0.1
|
||||
assert_allclose(output_b.mean(), 0.0, atol=1e-1)
|
||||
assert_allclose(output_b.std(), 1.0, atol=1e-1)
|
||||
|
||||
bn2 = normalization.BatchNormalization(input_shape=(10,))
|
||||
x2 = Input(shape=(10,))
|
||||
bn2(x2, training=False)
|
||||
assert not bn2.updates
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchnorm_mode_twice():
|
||||
# This is a regression test for issue #4881 with the old
|
||||
|
||||
@@ -57,6 +57,14 @@ def test_dropout(layer_class):
|
||||
'dropout': 0.1,
|
||||
'recurrent_dropout': 0.1},
|
||||
input_shape=(num_samples, timesteps, embedding_dim))
|
||||
# Test that dropout is not applied during testing
|
||||
x = np.random.random((num_samples, timesteps, embedding_dim))
|
||||
layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5,
|
||||
input_shape=(timesteps, embedding_dim))
|
||||
model = Sequential([layer])
|
||||
y1 = model.predict(x)
|
||||
y2 = model.predict(x)
|
||||
assert_allclose(y1, y2)
|
||||
|
||||
|
||||
@rnn_test
|
||||
@@ -169,29 +177,41 @@ def test_from_config(layer_class):
|
||||
|
||||
|
||||
@rnn_test
|
||||
def test_specify_initial_state(layer_class):
|
||||
def test_specify_initial_state_keras_tensor(layer_class):
|
||||
num_states = 2 if layer_class is recurrent.LSTM else 1
|
||||
|
||||
# Test with Keras tensor
|
||||
inputs = Input((timesteps, embedding_dim))
|
||||
initial_state = [Input((units,)) for _ in range(num_states)]
|
||||
layer = layer_class(units)
|
||||
output = layer(inputs, initial_state=initial_state)
|
||||
if len(initial_state) == 1:
|
||||
output = layer(inputs, initial_state=initial_state[0])
|
||||
else:
|
||||
output = layer(inputs, initial_state=initial_state)
|
||||
assert initial_state[0] in layer.inbound_nodes[0].input_tensors
|
||||
|
||||
model = Model([inputs] + initial_state, output)
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
|
||||
inputs = np.random.random((num_samples, timesteps, embedding_dim))
|
||||
initial_states = [np.random.random((num_samples, units))
|
||||
for _ in range(num_states)]
|
||||
initial_state = [np.random.random((num_samples, units))
|
||||
for _ in range(num_states)]
|
||||
targets = np.random.random((num_samples, units))
|
||||
model.fit([inputs] + initial_states, targets)
|
||||
model.fit([inputs] + initial_state, targets)
|
||||
|
||||
|
||||
@rnn_test
|
||||
def test_specify_initial_state_non_keras_tensor(layer_class):
|
||||
num_states = 2 if layer_class is recurrent.LSTM else 1
|
||||
|
||||
# Test with non-Keras tensor
|
||||
inputs = Input((timesteps, embedding_dim))
|
||||
initial_state = [K.random_normal_variable((units,), 0, 1) for _ in range(num_states)]
|
||||
initial_state = [K.random_normal_variable((num_samples, units), 0, 1)
|
||||
for _ in range(num_states)]
|
||||
layer = layer_class(units)
|
||||
output = layer(inputs, initial_state=initial_state)
|
||||
model = Model([inputs], output)
|
||||
|
||||
model = Model(inputs, output)
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
|
||||
inputs = np.random.random((num_samples, timesteps, embedding_dim))
|
||||
@@ -213,10 +233,38 @@ def test_reset_states_with_values(layer_class):
|
||||
atol=1e-4)
|
||||
state_shapes = [K.int_shape(state) for state in layer.states]
|
||||
values = [np.ones(shape) for shape in state_shapes]
|
||||
if len(values) == 1:
|
||||
values = values[0]
|
||||
layer.reset_states(values)
|
||||
np.testing.assert_allclose(K.eval(layer.states[0]),
|
||||
np.ones(K.int_shape(layer.states[0])),
|
||||
atol=1e-4)
|
||||
|
||||
# Test fit with invalid data
|
||||
with pytest.raises(ValueError):
|
||||
layer.reset_states([1] * (len(layer.states) + 1))
|
||||
|
||||
|
||||
@rnn_test
|
||||
def test_specify_state_with_masking(layer_class):
|
||||
''' This test based on a previously failing issue here:
|
||||
https://github.com/fchollet/keras/issues/1567
|
||||
'''
|
||||
num_states = 2 if layer_class is recurrent.LSTM else 1
|
||||
|
||||
inputs = Input((timesteps, embedding_dim))
|
||||
masked_inputs = Masking()(inputs)
|
||||
initial_state = [Input((units,)) for _ in range(num_states)]
|
||||
output = layer_class(units)(inputs, initial_state=initial_state)
|
||||
|
||||
model = Model([inputs] + initial_state, output)
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
|
||||
inputs = np.random.random((num_samples, timesteps, embedding_dim))
|
||||
initial_state = [np.random.random((num_samples, units))
|
||||
for _ in range(num_states)]
|
||||
targets = np.random.random((num_samples, units))
|
||||
model.fit([inputs] + initial_state, targets)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -14,7 +14,8 @@ allobj = [losses.mean_squared_error,
|
||||
losses.binary_crossentropy,
|
||||
losses.kullback_leibler_divergence,
|
||||
losses.poisson,
|
||||
losses.cosine_proximity]
|
||||
losses.cosine_proximity,
|
||||
losses.logcosh]
|
||||
|
||||
|
||||
def test_objective_shapes_3d():
|
||||
|
||||
@@ -17,6 +17,7 @@ all_metrics = [
|
||||
metrics.binary_crossentropy,
|
||||
metrics.poisson,
|
||||
metrics.cosine_proximity,
|
||||
metrics.logcosh,
|
||||
]
|
||||
|
||||
all_sparse_metrics = [
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
import importlib
|
||||
import inspect
|
||||
import re
|
||||
import sys
|
||||
from itertools import compress
|
||||
|
||||
import pytest
|
||||
|
||||
modules = ['keras.layers', 'keras.models', 'keras', 'keras.backend.tensorflow_backend']
|
||||
accepted_name = ['from_config']
|
||||
accepted_module = ['keras.legacy.layers', 'keras.utils.generic_utils']
|
||||
|
||||
# Functions or classes with less than 'MIN_CODE_SIZE' lines can be ignored
|
||||
MIN_CODE_SIZE = 10
|
||||
|
||||
|
||||
def handle_class(name, member):
|
||||
if is_accepted(name, member):
|
||||
return
|
||||
|
||||
if member.__doc__ is None and not member_too_small(member):
|
||||
raise ValueError("{} class doesn't have any documentation".format(name),
|
||||
member.__module__, inspect.getmodule(member).__file__)
|
||||
for n, met in inspect.getmembers(member):
|
||||
if inspect.ismethod(met):
|
||||
handle_method(n, met)
|
||||
|
||||
|
||||
def handle_function(name, member):
|
||||
if is_accepted(name, member):
|
||||
return
|
||||
doc = member.__doc__
|
||||
if doc is None and not member_too_small(member):
|
||||
raise ValueError("{} function doesn't have any documentation".format(name),
|
||||
member.__module__, inspect.getmodule(member).__file__)
|
||||
args = list(inspect.signature(member).parameters.keys())
|
||||
assert_args_presence(args, doc, member, name)
|
||||
assert_function_style(name, member, doc, args)
|
||||
assert_doc_style(name, member, doc)
|
||||
|
||||
|
||||
def assert_doc_style(name, member, doc):
|
||||
lines = doc.split("\n")
|
||||
first_line = lines[0]
|
||||
if len(first_line.strip()) == 0:
|
||||
raise ValueError("{} the documentation should be on the first line.".format(name),
|
||||
member.__module__)
|
||||
if first_line.strip()[-1] != '.':
|
||||
raise ValueError("{} first line should end with a '.'".format(name),
|
||||
member.__module__)
|
||||
|
||||
|
||||
def assert_function_style(name, member, doc, args):
|
||||
code = inspect.getsource(member)
|
||||
has_return = re.findall(r"\s*return \S+", code, re.MULTILINE)
|
||||
if has_return and "# Returns" not in doc:
|
||||
innerfunction = [inspect.getsource(x) for x in member.__code__.co_consts if
|
||||
inspect.iscode(x)]
|
||||
return_in_sub = [ret for code_inner in innerfunction for ret in
|
||||
re.findall(r"\s*return \S+", code_inner, re.MULTILINE)]
|
||||
if len(return_in_sub) < len(has_return):
|
||||
raise ValueError("{} needs a '# Returns' section".format(name),
|
||||
member.__module__)
|
||||
|
||||
has_raise = re.findall(r"^\s*raise \S+", code, re.MULTILINE)
|
||||
if has_raise and "# Raises" not in doc:
|
||||
innerfunction = [inspect.getsource(x) for x in member.__code__.co_consts if
|
||||
inspect.iscode(x)]
|
||||
raise_in_sub = [ret for code_inner in innerfunction for ret in
|
||||
re.findall(r"\s*raise \S+", code_inner, re.MULTILINE)]
|
||||
if len(raise_in_sub) < len(has_raise):
|
||||
raise ValueError("{} needs a '# Raises' section".format(name),
|
||||
member.__module__)
|
||||
|
||||
if len(args) > 0 and "# Arguments" not in doc:
|
||||
raise ValueError("{} needs a '# Arguments' section".format(name),
|
||||
member.__module__)
|
||||
|
||||
assert_blank_before(name, member, doc, ['# Arguments', '# Raises', '# Returns'])
|
||||
|
||||
|
||||
def assert_blank_before(name, member, doc, keywords):
|
||||
doc_lines = [x.strip() for x in doc.split('\n')]
|
||||
for keyword in keywords:
|
||||
if keyword in doc_lines:
|
||||
index = doc_lines.index(keyword)
|
||||
if doc_lines[index - 1] != '':
|
||||
raise ValueError(
|
||||
"{} '{}' should have a blank line above.".format(name, keyword),
|
||||
member.__module__)
|
||||
|
||||
|
||||
def is_accepted(name, member):
|
||||
if 'keras' not in str(member.__module__):
|
||||
return True
|
||||
return name in accepted_name or member.__module__ in accepted_module
|
||||
|
||||
|
||||
def member_too_small(member):
|
||||
code = inspect.getsource(member).split('\n')
|
||||
return len(code) < MIN_CODE_SIZE
|
||||
|
||||
|
||||
def assert_args_presence(args, doc, member, name):
|
||||
args_not_in_doc = [arg not in doc for arg in args]
|
||||
if any(args_not_in_doc):
|
||||
raise ValueError(
|
||||
"{} {} arguments are not present in documentation ".format(name, list(
|
||||
compress(args, args_not_in_doc))), member.__module__)
|
||||
words = doc.replace('*', '').split()
|
||||
# Check arguments styling
|
||||
styles = [arg + ":" not in words for arg in args]
|
||||
if any(styles):
|
||||
raise ValueError(
|
||||
"{} {} are not style properly 'argument': documentation".format(name, list(
|
||||
compress(args, styles))), member.__module__)
|
||||
|
||||
# Check arguments order
|
||||
indexes = [words.index(arg + ":") for arg in args]
|
||||
if indexes != sorted(indexes):
|
||||
raise ValueError(
|
||||
"{} arguments order is different from the documentation".format(name),
|
||||
member.__module__)
|
||||
|
||||
|
||||
def handle_method(name, member):
|
||||
if name in accepted_name or member.__module__ in accepted_module:
|
||||
return
|
||||
handle_function(name, member)
|
||||
|
||||
|
||||
def handle_module(mod):
|
||||
for name, mem in inspect.getmembers(mod):
|
||||
if inspect.isclass(mem):
|
||||
handle_class(name, mem)
|
||||
elif inspect.isfunction(mem):
|
||||
handle_function(name, mem)
|
||||
elif 'keras' in name and inspect.ismodule(mem):
|
||||
# Only test keras' modules
|
||||
handle_module(mem)
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.version_info < (3, 3), reason="requires python3.3")
|
||||
def test_doc():
|
||||
for module in modules:
|
||||
mod = importlib.import_module(module)
|
||||
handle_module(mod)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
Referência em uma Nova Issue
Bloquear um usuário