Comparar commits
153 Commits
| Autor | SHA1 | Data | |
|---|---|---|---|
| d2189fef32 | |||
| 0a35173b33 | |||
| 5d2f3101ae | |||
| 73aac1c7c9 | |||
| 815f7064a2 | |||
| 63f81cafbd | |||
| 4c1a6fc27e | |||
| 9dbf04b699 | |||
| 856a99de6c | |||
| f463d23b38 | |||
| 775719983f | |||
| e839a4bdac | |||
| cfd2763514 | |||
| 0b8a52e463 | |||
| cb77f7d7e2 | |||
| e8e56d9013 | |||
| c60e2dfbdb | |||
| 9807dcd69b | |||
| 2bd4c295d6 | |||
| 35d66d672b | |||
| 11eaaeb695 | |||
| cc1251b307 | |||
| 4564dab62a | |||
| 0e62ae4eaa | |||
| 876bca046f | |||
| d7e0ba1c39 | |||
| e0bcee4963 | |||
| ca4fc2e72f | |||
| 83544cdb41 | |||
| 37978fcda6 | |||
| 61d76d4a07 | |||
| cc6280f34d | |||
| 5bab11eec7 | |||
| 65b048455b | |||
| 9be4480eab | |||
| 7219bb4b96 | |||
| fd2c6dbafd | |||
| 19c736a4ca | |||
| b9bf954f24 | |||
| 0bc7b25f59 | |||
| 9f47903daf | |||
| 7f1eb97000 | |||
| c506fbda4a | |||
| aa05c44145 | |||
| 2e0d96d1a2 | |||
| 6b62678e90 | |||
| cc8a901c31 | |||
| 7c44d16a77 | |||
| ee07e6ef74 | |||
| 88a0ab5e93 | |||
| 57bb9e2613 | |||
| c1857cfa66 | |||
| 2d8307622d | |||
| af932d3480 | |||
| 4ed53ae5a4 | |||
| 0d798c662b | |||
| 5f4675bd7f | |||
| 14b175c9b0 | |||
| 8d4e75894a | |||
| 3d888cbf7e | |||
| 3b76158c49 | |||
| 788d838160 | |||
| 56ae624f12 | |||
| ef43a271ee | |||
| 0b1a1e9761 | |||
| 52e3e2623a | |||
| 46a2fb6fd8 | |||
| b0f2446370 | |||
| 7a2e8ce8a2 | |||
| 200948c3be | |||
| 35612d698a | |||
| 8a5767a53e | |||
| f4ca4026a3 | |||
| e4d0ed5992 | |||
| 1325e73a59 | |||
| b6d8e9dd4e | |||
| 69afdd7ec4 | |||
| d5cd2687ed | |||
| ca60201fe5 | |||
| dd6697738b | |||
| cccc118225 | |||
| 36578f8569 | |||
| c18a9cd405 | |||
| cba5cfa597 | |||
| b2048d1d88 | |||
| 8bfafd6d7f | |||
| a6521de3e3 | |||
| 02ddc11858 | |||
| 588261acfc | |||
| 61a48d487f | |||
| eee20b4614 | |||
| 9827db2c85 | |||
| b9403cb262 | |||
| e379fff425 | |||
| 80c0c762fd | |||
| 51818e5b7b | |||
| 393642df55 | |||
| 6bb9eecd0c | |||
| f026bb2f5a | |||
| 5c3db2fea6 | |||
| 1a953feaf7 | |||
| 0733a80297 | |||
| a5653c245a | |||
| 1724fe5882 | |||
| a582b184c9 | |||
| 36ef1ca7b4 | |||
| 27edefe48c | |||
| 4b1b86783f | |||
| 7009e80b74 | |||
| cd82deb152 | |||
| 65b794957f | |||
| 7b4e6ef50c | |||
| f804b19fdc | |||
| eff8731db4 | |||
| 43ddbf4a4f | |||
| c5b3959b42 | |||
| 289804c67c | |||
| c6825eb343 | |||
| 92b8ad9d02 | |||
| 3dfba0504b | |||
| 4bdb43f244 | |||
| 83e285fd00 | |||
| 4e1ec93c2f | |||
| 2224c4cc1e | |||
| 9f6f206ccd | |||
| f3eeb982d0 | |||
| 2be651dc39 | |||
| c77ded2eb6 | |||
| 06ab8dbd34 | |||
| 8e293db9b5 | |||
| 5040aa386d | |||
| 8e67b040e8 | |||
| 84909a49c2 | |||
| 0969c569a6 | |||
| cb8f0a83e6 | |||
| 5648119b66 | |||
| 25e9b90550 | |||
| e98b24a767 | |||
| 034822359d | |||
| 2e60c99924 | |||
| 4bb6ac0b04 | |||
| c368b86d11 | |||
| 49335d4345 | |||
| 93c1a8c675 | |||
| 5f3bdeb0a3 | |||
| ddf908359c | |||
| 37f4d11ea9 | |||
| 94fbbd1c7e | |||
| 332d43e023 | |||
| f84fe7ce17 | |||
| 16d0e40560 | |||
| da24be79ab | |||
| ab8642e0ff |
+7
-3
@@ -8,11 +8,15 @@ before_install:
|
||||
- export PATH=/home/travis/miniconda/bin:$PATH
|
||||
- conda update --yes conda
|
||||
python:
|
||||
- "2.7"
|
||||
- "3.4"
|
||||
# command to install dependencies
|
||||
install:
|
||||
- conda install --yes python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py
|
||||
# Coverage packages are on my binstar channel
|
||||
- python setup.py install
|
||||
- pip install pytest-cov python-coveralls
|
||||
- pip install git+git://github.com/Theano/Theano.git
|
||||
# command to run tests
|
||||
script: py.test tests/
|
||||
script:
|
||||
- PYTHONPATH=$PWD:$PYTHONPATH py.test -v --cov-report term-missing --cov keras tests/
|
||||
after_success:
|
||||
- coveralls
|
||||
|
||||
+73
-52
@@ -34,13 +34,16 @@ from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.optimizers import SGD
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(20, 64, init='uniform'))
|
||||
# Dense(64) is a fully-connected layer with 64 hidden units.
|
||||
# in the first layer, you must specify the expected input data shape:
|
||||
# here, 20-dimensional vectors.
|
||||
model.add(Dense(64, input_dim=20, init='uniform'))
|
||||
model.add(Activation('tanh'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(64, 64, init='uniform'))
|
||||
model.add(Dense(64, init='uniform'))
|
||||
model.add(Activation('tanh'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(64, 2, init='uniform'))
|
||||
model.add(Dense(2, init='uniform'))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
@@ -54,11 +57,11 @@ score = model.evaluate(X_test, y_test, batch_size=16)
|
||||
|
||||
```python
|
||||
model = Sequential()
|
||||
model.add(Dense(20, 64, init='uniform', activation='tanh'))
|
||||
model.add(Dense(64, input_dim=20, init='uniform', activation='tanh'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(64, 64, init='uniform', activation='tanh'))
|
||||
model.add(Dense(64, init='uniform', activation='tanh'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(64, 2, init='uniform', activation='softmax'))
|
||||
model.add(Dense(2, init='uniform', activation='softmax'))
|
||||
|
||||
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss='mean_squared_error', optimizer=sgd)
|
||||
@@ -73,26 +76,29 @@ from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.optimizers import SGD
|
||||
|
||||
model = Sequential()
|
||||
model.add(Convolution2D(32, 3, 3, 3, border_mode='full'))
|
||||
# input: 100x100 images with 3 channels -> (3, 100, 100) tensors.
|
||||
# this applies 32 convolution filters of size 3x3 each.
|
||||
model.add(Convolution2D(32, 3, 3, border_mode='full', input_shape=(3, 100, 100)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(32, 32, 3, 3))
|
||||
model.add(Convolution2D(32, 3, 3))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
model.add(Convolution2D(64, 32, 3, 3, border_mode='full'))
|
||||
model.add(Convolution2D(64, 3, 3, border_mode='valid'))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(64, 64, 3, 3))
|
||||
model.add(Convolution2D(64, 3, 3))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
model.add(Flatten())
|
||||
model.add(Dense(64*8*8, 256))
|
||||
# Note: Keras does automatic shape inference.
|
||||
model.add(Dense(256))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dropout(0.5))
|
||||
|
||||
model.add(Dense(256, 10))
|
||||
model.add(Dense(10))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
@@ -111,10 +117,10 @@ from keras.layers.embeddings import Embedding
|
||||
from keras.layers.recurrent import LSTM
|
||||
|
||||
model = Sequential()
|
||||
model.add(Embedding(max_features, 256))
|
||||
model.add(LSTM(256, 128, activation='sigmoid', inner_activation='hard_sigmoid'))
|
||||
model.add(Embedding(max_features, 256, input_length=maxlen))
|
||||
model.add(LSTM(output_dim=128, activation='sigmoid', inner_activation='hard_sigmoid'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(128, 1))
|
||||
model.add(Dense(1))
|
||||
model.add(Activation('sigmoid'))
|
||||
|
||||
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
|
||||
@@ -126,51 +132,67 @@ score = model.evaluate(X_test, Y_test, batch_size=16)
|
||||
### Architecture for learning image captions with a convnet and a Gated Recurrent Unit:
|
||||
(word-level embedding, caption of maximum length 16 words).
|
||||
|
||||
Note that getting this to actually "work" will require using a bigger convnet, initialized with pre-trained weights.
|
||||
Displaying readable results will also require an embedding decoder.
|
||||
Note that getting this to work well will require using a bigger convnet, initialized with pre-trained weights.
|
||||
|
||||
```python
|
||||
max_caption_len = 16
|
||||
vocab_size = 10000
|
||||
|
||||
model = Sequential()
|
||||
model.add(Convolution2D(32, 3, 3, 3, border_mode='full'))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(32, 32, 3, 3))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
# first, let's define an image model that
|
||||
# will encode pictures into 128-dimensional vectors.
|
||||
# it should be initialized with pre-trained weights.
|
||||
image_model = Sequential()
|
||||
image_model.add(Convolution2D(32, 3, 3, border_mode='full', input_shape=(3, 100, 100)))
|
||||
image_model.add(Activation('relu'))
|
||||
image_model.add(Convolution2D(32, 3, 3))
|
||||
image_model.add(Activation('relu'))
|
||||
image_model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
|
||||
model.add(Convolution2D(64, 32, 3, 3, border_mode='full'))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(64, 64, 3, 3))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
image_model.add(Convolution2D(64, 3, 3, border_mode='full'))
|
||||
image_model.add(Activation('relu'))
|
||||
image_model.add(Convolution2D(64, 3, 3))
|
||||
image_model.add(Activation('relu'))
|
||||
image_model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
|
||||
model.add(Convolution2D(128, 64, 3, 3, border_mode='full'))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(128, 128, 3, 3))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
image_model.add(Flatten())
|
||||
image_model.add(Dense(128))
|
||||
|
||||
model.add(Flatten())
|
||||
model.add(Dense(128*4*4, 256))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dropout(0.5))
|
||||
# let's load the weights from a save file.
|
||||
image_model.load_weights('weight_file.h5')
|
||||
|
||||
model.add(RepeatVector(max_caption_len))
|
||||
# the GRU below returns sequences of max_caption_len vectors of size 256 (our word embedding size)
|
||||
model.add(GRU(256, 256, return_sequences=True))
|
||||
# next, let's define a RNN model that encodes sequences of words
|
||||
# into sequences of 128-dimensional word vectors.
|
||||
language_model = Sequential()
|
||||
language_model.add(Embedding(vocab_size, 256, input_length=max_caption_len))
|
||||
language_model.add(GRU(output_dim=128, return_sequences=True))
|
||||
language_model.add(Dense(128))
|
||||
|
||||
model.compile(loss='mean_squared_error', optimizer='rmsprop')
|
||||
# let's repeat the image vector to turn it into a sequence.
|
||||
image_model.add(RepeatVector(max_caption_len))
|
||||
|
||||
# "images" is a numpy array of shape (nb_samples, nb_channels=3, width, height)
|
||||
# "captions" is a numpy array of shape (nb_samples, max_caption_len=16, embedding_dim=256)
|
||||
# captions are supposed already embedded (dense vectors).
|
||||
model.fit(images, captions, batch_size=16, nb_epoch=100)
|
||||
|
||||
# the output of both models will be tensors of shape (samples, max_caption_len, 128).
|
||||
# let's concatenate these 2 vector sequences.
|
||||
model = Merge([image_model, language_model], mode='concat', concat_axis=-1)
|
||||
# let's encode this vector sequence into a single vector
|
||||
model.add(GRU(256, 256, return_sequences=False))
|
||||
# which will be used to compute a probability
|
||||
# distribution over what the next word in the caption should be!
|
||||
model.add(Dense(vocab_size))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
# "images" is a numpy float array of shape (nb_samples, nb_channels=3, width, height).
|
||||
# "captions" is a numpy integer array of shape (nb_samples, max_caption_len)
|
||||
# containing word index sequences representing partial captions.
|
||||
# "next_words" is a numpy float array of shape (nb_samples, vocab_size)
|
||||
# containing a categorical encoding (0s and 1s) of the next word in the corresponding
|
||||
# partial caption.
|
||||
model.fit([images, partial_captions], next_words, batch_size=16, nb_epoch=100)
|
||||
```
|
||||
|
||||
In the examples folder, you will find example models for real datasets:
|
||||
- CIFAR10 small images classification: Convnet with realtime data augmentation
|
||||
- CIFAR10 small images classification: Convolutional Neural Network (CNN) with realtime data augmentation
|
||||
- IMDB movie review sentiment classification: LSTM over sequences of words
|
||||
- Reuters newswires topic classification: Multilayer Perceptron (MLP)
|
||||
- MNIST handwritten digits classification: MLP & CNN
|
||||
@@ -183,7 +205,7 @@ In the examples folder, you will find example models for real datasets:
|
||||
|
||||
For complete coverage of the API, check out [the Keras documentation](http://keras.io).
|
||||
|
||||
A few highlights: convnets, LSTM, GRU, word2vec-style embeddings, PReLU, batch normalization...
|
||||
A few highlights: convnets, LSTM, GRU, word2vec-style embeddings, PReLU, BatchNormalization...
|
||||
|
||||
## Installation
|
||||
|
||||
@@ -196,7 +218,7 @@ Keras uses the following dependencies:
|
||||
- HDF5 and h5py (optional, required if you use model saving/loading functions)
|
||||
- Optional but recommended if you use CNNs: cuDNN.
|
||||
|
||||
Once you have the dependencies installed, cd to the Keras folder and run the install command:
|
||||
To install, `cd` to the Keras folder and run the install command:
|
||||
```
|
||||
sudo python setup.py install
|
||||
```
|
||||
@@ -213,4 +235,3 @@ Keras (κέρας) means _horn_ in Greek. It is a reference to a literary image
|
||||
Keras was developed as part of the research effort of project ONEIROS (Open-ended Neuro-Electronic Intelligent Robot Operating System).
|
||||
|
||||
>_"Oneiroi are beyond our unravelling --who can be sure what tale they tell? Not all that men look for comes to pass. Two gates there are that give passage to fleeting Oneiroi; one is made of horn, one of ivory. The Oneiroi that pass through sawn ivory are deceitful, bearing a message that will not be fulfilled; those that come out through polished horn have truth behind them, to be accomplished for men who see them."_ Homer, Odyssey 19. 562 ff (Shewring translation).
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ pages:
|
||||
- Home: index.md
|
||||
- Index: documentation.md
|
||||
- Examples: examples.md
|
||||
- FAQ: faq.md
|
||||
- Optimizers: optimizers.md
|
||||
- Objectives: objectives.md
|
||||
- Models: models.md
|
||||
|
||||
@@ -6,12 +6,12 @@ Activations can either be used through an `Activation` layer, or through the `ac
|
||||
```python
|
||||
from keras.layers.core import Activation, Dense
|
||||
|
||||
model.add(Dense(64, 64, init='uniform'))
|
||||
model.add(Dense(64))
|
||||
model.add(Activation('tanh'))
|
||||
```
|
||||
is equivalent to:
|
||||
```python
|
||||
model.add(Dense(20, 64, init='uniform', activation='tanh'))
|
||||
model.add(Dense(64, activation='tanh'))
|
||||
```
|
||||
|
||||
You can also pass an element-wise Theano function as an activation:
|
||||
@@ -20,7 +20,7 @@ You can also pass an element-wise Theano function as an activation:
|
||||
def tanh(x):
|
||||
return theano.tensor.tanh(x)
|
||||
|
||||
model.add(Dense(20, 64, init='uniform', activation=tanh))
|
||||
model.add(Dense(64, activation=tanh))
|
||||
model.add(Activation(tanh))
|
||||
```
|
||||
|
||||
|
||||
@@ -33,7 +33,10 @@ The `logs` dictionary will contain keys for quantities relevant to the current b
|
||||
keras.callbacks.ModelCheckpoint(filepath, verbose=0, save_best_only=False)
|
||||
```
|
||||
|
||||
Save the model after every epoch. If `save_best_only=True`, the latest best model according to the validation loss will not be overwritten.
|
||||
Save the model after every epoch. If `save_best_only=True`, the latest best model according to the validation loss will not be overwritten.
|
||||
`filepath` can contain named formatting options, which will be filled the value of `epoch` and keys in `logs` (passed in `on_epoch_end`).
|
||||
|
||||
For example: if `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`, then multiple files will be save with the epoch number and the validation loss.
|
||||
|
||||
|
||||
```python
|
||||
@@ -72,7 +75,7 @@ class LossHistory(keras.callbacks.Callback):
|
||||
self.losses.append(logs.get('loss'))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(784, 10, init='uniform'))
|
||||
model.add(Dense(10, input_dim=784, init='uniform'))
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
@@ -94,7 +97,7 @@ print history.losses
|
||||
from keras.callbacks import ModelCheckpoint
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(784, 10, init='uniform'))
|
||||
model.add(Dense(10, input_dim=784, init='uniform'))
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ These layers expose 2 keyword arguments:
|
||||
|
||||
```python
|
||||
from keras.constraints import maxnorm
|
||||
model.add(Dense(64, 64, W_constraint = maxnorm(2)))
|
||||
model.add(Dense(64, W_constraint = maxnorm(2)))
|
||||
```
|
||||
|
||||
## Available constraints
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
- [Home](index.md)
|
||||
- [Index](documentation.md)
|
||||
- [Examples](examples.md)
|
||||
- [FAQ](faq.md)
|
||||
|
||||
---
|
||||
|
||||
|
||||
+83
-70
@@ -1,7 +1,7 @@
|
||||
|
||||
Here are a few examples to get you started!
|
||||
|
||||
### Multilayer Perceptron (MLP)
|
||||
### Multilayer Perceptron (MLP):
|
||||
|
||||
```python
|
||||
from keras.models import Sequential
|
||||
@@ -9,13 +9,16 @@ from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.optimizers import SGD
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(20, 64, init='uniform'))
|
||||
# Dense(64) is a fully-connected layer with 64 hidden units.
|
||||
# in the first layer, you must specify the expected input data shape:
|
||||
# here, 20-dimensional vectors.
|
||||
model.add(Dense(64, input_dim=20, init='uniform'))
|
||||
model.add(Activation('tanh'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(64, 64, init='uniform'))
|
||||
model.add(Dense(64, init='uniform'))
|
||||
model.add(Activation('tanh'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(64, 2, init='uniform'))
|
||||
model.add(Dense(2, init='uniform'))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
@@ -25,25 +28,21 @@ model.fit(X_train, y_train, nb_epoch=20, batch_size=16)
|
||||
score = model.evaluate(X_test, y_test, batch_size=16)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Alternative implementation of MLP
|
||||
### Alternative implementation of MLP:
|
||||
|
||||
```python
|
||||
model = Sequential()
|
||||
model.add(Dense(20, 64, init='uniform', activation='tanh'))
|
||||
model.add(Dense(64, input_dim=20, init='uniform', activation='tanh'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(64, 64, init='uniform', activation='tanh'))
|
||||
model.add(Dense(64, init='uniform', activation='tanh'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(64, 2, init='uniform', activation='softmax'))
|
||||
model.add(Dense(2, init='uniform', activation='softmax'))
|
||||
|
||||
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss='mean_squared_error', optimizer=sgd)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### VGG-like convnet
|
||||
### VGG-like convnet:
|
||||
|
||||
```python
|
||||
from keras.models import Sequential
|
||||
@@ -52,26 +51,29 @@ from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.optimizers import SGD
|
||||
|
||||
model = Sequential()
|
||||
model.add(Convolution2D(32, 3, 3, 3, border_mode='full'))
|
||||
# input: 100x100 images with 3 channels -> (3, 100, 100) tensors.
|
||||
# this applies 32 convolution filters of size 3x3 each.
|
||||
model.add(Convolution2D(32, 3, 3, border_mode='full', input_shape=(3, 100, 100)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(32, 32, 3, 3))
|
||||
model.add(Convolution2D(32, 3, 3))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
model.add(Convolution2D(64, 32, 3, 3, border_mode='full'))
|
||||
model.add(Convolution2D(64, 3, 3, border_mode='valid'))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(64, 64, 3, 3))
|
||||
model.add(Convolution2D(64, 3, 3))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
model.add(Flatten())
|
||||
model.add(Dense(64*8*8, 256))
|
||||
# Note: Keras does automatic shape inference.
|
||||
model.add(Dense(256))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dropout(0.5))
|
||||
|
||||
model.add(Dense(256, 10))
|
||||
model.add(Dense(10))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
@@ -81,9 +83,7 @@ model.fit(X_train, Y_train, batch_size=32, nb_epoch=1)
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Sequence classification with LSTM
|
||||
### Sequence classification with LSTM:
|
||||
|
||||
```python
|
||||
from keras.models import Sequential
|
||||
@@ -92,11 +92,10 @@ from keras.layers.embeddings import Embedding
|
||||
from keras.layers.recurrent import LSTM
|
||||
|
||||
model = Sequential()
|
||||
# Add a mask_zero=True to the Embedding connstructor if 0 is a left-padding value in your data
|
||||
model.add(Embedding(max_features, 256))
|
||||
model.add(LSTM(256, 128, activation='sigmoid', inner_activation='hard_sigmoid'))
|
||||
model.add(Embedding(max_features, 256, input_length=maxlen))
|
||||
model.add(LSTM(output_dim=128, activation='sigmoid', inner_activation='hard_sigmoid'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(128, 1))
|
||||
model.add(Dense(1))
|
||||
model.add(Activation('sigmoid'))
|
||||
|
||||
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
|
||||
@@ -105,59 +104,73 @@ model.fit(X_train, Y_train, batch_size=16, nb_epoch=10)
|
||||
score = model.evaluate(X_test, Y_test, batch_size=16)
|
||||
```
|
||||
|
||||
---
|
||||
### Architecture for learning image captions with a convnet and a Gated Recurrent Unit:
|
||||
(word-level embedding, caption of maximum length 16 words).
|
||||
|
||||
### Image captioning
|
||||
|
||||
Architecture for learning image captions with a convnet and a Gated Recurrent Unit (word-level embedding, caption of maximum length 16 words).
|
||||
|
||||
Note that getting this to actually "work" will require using a bigger convnet, initialized with pre-trained weights.
|
||||
Displaying readable results will also require an embedding decoder.
|
||||
Note that getting this to work well will require using a bigger convnet, initialized with pre-trained weights.
|
||||
|
||||
```python
|
||||
max_caption_len = 16
|
||||
vocab_size = 10000
|
||||
|
||||
model = Sequential()
|
||||
model.add(Convolution2D(32, 3, 3, 3, border_mode='full'))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(32, 32, 3, 3))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
# first, let's define an image model that
|
||||
# will encode pictures into 128-dimensional vectors.
|
||||
# it should be initialized with pre-trained weights.
|
||||
image_model = Sequential()
|
||||
image_model.add(Convolution2D(32, 3, 3, border_mode='full', input_shape=(3, 100, 100)))
|
||||
image_model.add(Activation('relu'))
|
||||
image_model.add(Convolution2D(32, 3, 3))
|
||||
image_model.add(Activation('relu'))
|
||||
image_model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
|
||||
model.add(Convolution2D(64, 32, 3, 3, border_mode='full'))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(64, 64, 3, 3))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
image_model.add(Convolution2D(64, 3, 3, border_mode='full'))
|
||||
image_model.add(Activation('relu'))
|
||||
image_model.add(Convolution2D(64, 3, 3))
|
||||
image_model.add(Activation('relu'))
|
||||
image_model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
|
||||
model.add(Convolution2D(128, 64, 3, 3, border_mode='full'))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(128, 128, 3, 3))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
image_model.add(Flatten())
|
||||
image_model.add(Dense(128))
|
||||
|
||||
model.add(Flatten())
|
||||
model.add(Dense(128*4*4, 256))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dropout(0.5))
|
||||
# let's load the weights from a save file.
|
||||
image_model.load_weights('weight_file.h5')
|
||||
|
||||
model.add(RepeatVector(max_caption_len))
|
||||
# the GRU below returns sequences of max_caption_len vectors of size 256 (our word embedding size)
|
||||
model.add(GRU(256, 256, return_sequences=True))
|
||||
# next, let's define a RNN model that encodes sequences of words
|
||||
# into sequences of 128-dimensional word vectors.
|
||||
language_model = Sequential()
|
||||
language_model.add(Embedding(vocab_size, 256, input_length=max_caption_len))
|
||||
language_model.add(GRU(output_dim=128, return_sequences=True))
|
||||
language_model.add(Dense(128))
|
||||
|
||||
model.compile(loss='mean_squared_error', optimizer='rmsprop')
|
||||
# let's repeat the image vector to turn it into a sequence.
|
||||
image_model.add(RepeatVector(max_caption_len))
|
||||
|
||||
# "images" is a numpy array of shape (nb_samples, nb_channels=3, width, height)
|
||||
# "captions" is a numpy array of shape (nb_samples, max_caption_len=16, embedding_dim=256)
|
||||
# captions are supposed already embedded (dense vectors).
|
||||
model.fit(images, captions, batch_size=16, nb_epoch=100)
|
||||
|
||||
# the output of both models will be tensors of shape (samples, max_caption_len, 128).
|
||||
# let's concatenate these 2 vector sequences.
|
||||
model = Merge([image_model, language_model], mode='concat', concat_axis=-1)
|
||||
# let's encode this vector sequence into a single vector
|
||||
model.add(GRU(256, 256, return_sequences=False))
|
||||
# which will be used to compute a probability
|
||||
# distribution over what the next word in the caption should be!
|
||||
model.add(Dense(vocab_size))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
# "images" is a numpy float array of shape (nb_samples, nb_channels=3, width, height).
|
||||
# "captions" is a numpy integer array of shape (nb_samples, max_caption_len)
|
||||
# containing word index sequences representing partial captions.
|
||||
# "next_words" is a numpy float array of shape (nb_samples, vocab_size)
|
||||
# containing a categorical encoding (0s and 1s) of the next word in the corresponding
|
||||
# partial caption.
|
||||
model.fit([images, partial_captions], next_words, batch_size=16, nb_epoch=100)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
In the [examples folder](https://github.com/fchollet/keras/tree/master/examples), you will find example models for real datasets:
|
||||
|
||||
- CIFAR10 small images classification: Convnet with realtime data augmentation
|
||||
In the examples folder, you will find example models for real datasets:
|
||||
- CIFAR10 small images classification: Convolutional Neural Network (CNN) with realtime data augmentation
|
||||
- IMDB movie review sentiment classification: LSTM over sequences of words
|
||||
- Reuters newswires topic classification: Multilayer Perceptron
|
||||
- Reuters newswires topic classification: Multilayer Perceptron (MLP)
|
||||
- MNIST handwritten digits classification: MLP & CNN
|
||||
- Character-level text generation with LSTM
|
||||
|
||||
...and more.
|
||||
|
||||
@@ -0,0 +1,179 @@
|
||||
# Keras FAQ: Frequently Asked Keras Questions
|
||||
|
||||
[How can I run Keras on GPU?](#how-can-i-run-keras-on-gpu)
|
||||
|
||||
[How can I save a Keras model?](#how-can-i-save-a-keras-model)
|
||||
|
||||
[Why is the training loss much higher than the testing loss?](#why-is-the-training-loss-much-higher-than-the-testing-loss)
|
||||
|
||||
[How can I visualize the output of an intermediate layer?](#how-can-i-visualize-the-output-of-an-intermediate-layer)
|
||||
|
||||
[Isn't there a bug with Merge or Graph related to input concatenation?](#isnt-there-a-bug-with-merge-or-graph-related-to-input-concatenation)
|
||||
|
||||
[How can I use Keras with datasets that don't fit in memory?](#how-can-i-use-keras-with-datasets-that-dont-fit-in-memory)
|
||||
|
||||
[How can I interrupt training when the validation loss isn't decreasing anymore?](#how-can-i-interrupt-training-when-the-validation-loss-isnt-decreasing-anymore)
|
||||
|
||||
[How is the validation split computed?](#how-is-the-validation-split-computed)
|
||||
|
||||
[Is the data shuffled during training?](#is-the-data-shuffled-during-training)
|
||||
|
||||
[How can I record the training / validation loss / accuracy at each epoch?](#how-can-i-record-the-training-validation-loss-accuracy-at-each-epoch)
|
||||
|
||||
---
|
||||
|
||||
### How can I run Keras on GPU?
|
||||
|
||||
Method 1: use Theano flags.
|
||||
```bash
|
||||
THEANO_FLAGS=device=gpu,floatX=float32 python my_keras_script.py
|
||||
```
|
||||
|
||||
The name 'gpu' might have to be changed depending on your device's identifier (e.g. `gpu0`, `gpu1`, etc).
|
||||
|
||||
Method 2: set up your `.theanorc`: [Instructions](http://deeplearning.net/software/theano/library/config.html)
|
||||
|
||||
Method 3: manually set `theano.config.device`, `theano.config.floatX` at the beginning of your code:
|
||||
```python
|
||||
import theano
|
||||
theano.config.device = 'gpu'
|
||||
theano.config.floatX = 'float32'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### How can I save a Keras model?
|
||||
|
||||
*It is not recommended to use pickle or cPickle to save a Keras model.*
|
||||
|
||||
If you only need to save the architecture of a model, and not its weights, you can do:
|
||||
|
||||
```python
|
||||
# save as JSON
|
||||
json_string = model.to_json()
|
||||
|
||||
# save as YAML
|
||||
yaml_string = model.to_yaml()
|
||||
```
|
||||
|
||||
You can then build a fresh model from this data:
|
||||
|
||||
```python
|
||||
# model reconstruction from JSON:
|
||||
from keras.models import model_from_json
|
||||
model = model_from_json(json_string)
|
||||
|
||||
# model reconstruction from YAML
|
||||
model = model_from_yaml(yaml_string)
|
||||
```
|
||||
|
||||
If you need to save the weights of a model, you can do so in HDF5:
|
||||
```python
|
||||
model.save_weights('my_model_weights.h5')
|
||||
```
|
||||
|
||||
Assuming you have code for instantiating your model, you can then load the weights you saved into a model with the same architecture:
|
||||
|
||||
```python
|
||||
model.load_weights('my_model_weights.h5')
|
||||
```
|
||||
|
||||
This leads us to a way to save and reconstruct models from only serialized data:
|
||||
```python
|
||||
json_string = model.to_json()
|
||||
open('my_model_architecture.json', 'w').write(json_string)
|
||||
model.save_weights('my_model_weights.h5')
|
||||
|
||||
# elsewhere...
|
||||
model = model_from_json(open('my_model_architecture.json').read())
|
||||
model.load_weights('my_model_weights.h5')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Why is the training loss much higher than the testing loss?
|
||||
|
||||
A Keras model has two modes: training and testing. Regularization mechanisms, such as Dropout and L1/L2 weight regularization, are turned off at testing time.
|
||||
|
||||
Besides, the training loss is the average of the losses over each batch of training data. Because your model is changing over time, the loss over the first batches of an epoch is generally higher than over the last batches. On the other hand, the testing loss for an epoch is computed using the model as it is at the end of the epoch, resulting in a lower loss.
|
||||
|
||||
---
|
||||
|
||||
### How can I visualize the output of an intermediate layer?
|
||||
|
||||
You can build a Theano function that will return the output of a certain layer given a certain input, for example:
|
||||
|
||||
```python
|
||||
# with a Sequential model
|
||||
get_3rd_layer_output = theano.function([model.layers[0].input],
|
||||
model.layers[3].get_output(train=False))
|
||||
layer_output = get_3rd_layer_output(X)
|
||||
|
||||
# with a Graph model
|
||||
get_conv_layer_output = theano.function([model.inputs[i].input for i in model.input_order],
|
||||
model.outputs['conv'].get_output(train=False),
|
||||
on_unused_input='ignore')
|
||||
conv_output = get_conv_output(input_data_dict)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Isn't there a bug with Merge or Graph related to input concatenation?
|
||||
|
||||
Yes, there was a known bug with tensor concatenation in Thenao that was fixed early 2015.
|
||||
Please upgrade to the latest version of Theano:
|
||||
|
||||
```bash
|
||||
sudo pip install git+git://github.com/Theano/Theano.git
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### How can I use Keras with datasets that don't fit in memory?
|
||||
|
||||
You can do batch training using `model.train_on_batch(X, y)` and `model.test_on_batch(X, y)`. See the [models documentation](models.md).
|
||||
|
||||
You can also see batch training in action in our [CIFAR10 example](https://github.com/fchollet/keras/blob/master/examples/cifar10_cnn.py).
|
||||
|
||||
---
|
||||
|
||||
### How can I interrupt training when the validation loss isn't decreasing anymore?
|
||||
|
||||
You can use an `EarlyStopping` callback:
|
||||
|
||||
```python
|
||||
from keras.callbacks import EarlyStopping
|
||||
early_stopping = EarlyStopping(monitor='val_loss', patience=2)
|
||||
model.fit(X, y, validation_split=0.2, callbacks=[early_stopping])
|
||||
```
|
||||
|
||||
Find out more in the [callbacks documentation](callbacks.md).
|
||||
|
||||
---
|
||||
|
||||
### How is the validation split computed?
|
||||
|
||||
If you set the `validation_split` arugment in `model.fit` to e.g. 0.1, then the validation data used will be the *last 10%* of the data. If you set it to 0.25, it will be the last 25% of the data, etc.
|
||||
|
||||
|
||||
---
|
||||
|
||||
### Is the data shuffled during training?
|
||||
|
||||
Yes, if the `shuffle` argument in `model.fit` is set to `True` (which is the default), the training data will be randomly shuffled at each epoch.
|
||||
|
||||
Validation data isn't shuffled.
|
||||
|
||||
---
|
||||
|
||||
|
||||
### How can I record the training / validation loss / accuracy at each epoch?
|
||||
|
||||
The `model.fit` method returns an `History` callback, which has a `history` attribute containing the lists of successive losses / accuracies.
|
||||
|
||||
```python
|
||||
hist = model.fit(X, y, validation_split=0.2)
|
||||
print(hist.history)
|
||||
```
|
||||
|
||||
---
|
||||
@@ -46,9 +46,9 @@ Stacking layers is as easy as `.add()`:
|
||||
```python
|
||||
from keras.layers.core import Dense, Activation
|
||||
|
||||
model.add(Dense(input_dim=100, output_dim=64, init="glorot_uniform"))
|
||||
model.add(Dense(output_dim=64, input_dim=100, init="glorot_uniform"))
|
||||
model.add(Activation("relu"))
|
||||
model.add(Dense(input_dim=64, output_dim=10, init="glorot_uniform"))
|
||||
model.add(Dense(output_dim=10, init="glorot_uniform"))
|
||||
model.add(Activation("softmax"))
|
||||
```
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ Initializations define the probability distribution used to set the initial rand
|
||||
The keyword arguments used for passing initializations to layers will depend on the layer. Usually it is simply `init`:
|
||||
|
||||
```python
|
||||
model.add(Dense(64, 64, init='uniform'))
|
||||
model.add(Dense(64, init='uniform'))
|
||||
```
|
||||
|
||||
## Available initializations
|
||||
|
||||
@@ -7,7 +7,8 @@ keras.layers.advanced_activations.LeakyReLU(alpha=0.3)
|
||||
|
||||
Special version of a Rectified Linear Unit that allows a small gradient when the unit is not active (`f(x) = alpha*x for x < 0`).
|
||||
|
||||
- __Input shape__: This layer does not assume a specific input shape. As a result, it cannot be used as the first layer in a model.
|
||||
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: Same as input.
|
||||
|
||||
@@ -19,18 +20,16 @@ Special version of a Rectified Linear Unit that allows a small gradient when the
|
||||
## PReLU
|
||||
|
||||
```python
|
||||
keras.layers.advanced_activations.PReLU(input_shape)
|
||||
keras.layers.advanced_activations.PReLU()
|
||||
```
|
||||
|
||||
Parametrized linear unit. Similar to a LeakyReLU, where each input unit has its alpha coefficient, and where these coefficients are learned during training.
|
||||
|
||||
- __Input shape__: Same as `input_shape`. This layer cannot be used as first layer in a model.
|
||||
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: Same as input.
|
||||
|
||||
- __Arguments__:
|
||||
- __input_shape__: tuple.
|
||||
|
||||
- __References__:
|
||||
- [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification](http://arxiv.org/pdf/1502.01852v1.pdf)
|
||||
|
||||
@@ -39,18 +38,15 @@ Parametrized linear unit. Similar to a LeakyReLU, where each input unit has its
|
||||
## ParametricSoftplus
|
||||
|
||||
```python
|
||||
keras.layers.advanced_activations.ParametricSoftplus(input_shape)
|
||||
keras.layers.advanced_activations.ParametricSoftplus()
|
||||
```
|
||||
|
||||
Parametric Softplus of the form: (`f(x) = alpha * (1 + exp(beta * x))`). This is essentially a smooth version of ReLU where the parameters control the sharpness of the rectification. The parameters are initialized to more closely approximate a ReLU than the standard `softplus`: `alpha` initialized to `0.2` and `beta` initialized to `5.0`. The parameters are fit separately for each hidden unit.
|
||||
|
||||
- __Input shape__: Same as `input_shape`. This layer cannot be used as first layer in a model.
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape=...` when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: Same as input.
|
||||
|
||||
- __Arguments__:
|
||||
- __input_shape__: tuple.
|
||||
|
||||
- __References__:
|
||||
- [Inferring Nonlinear Neuronal Computation Based on Physiologically Plausible Inputs](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003143)
|
||||
|
||||
@@ -62,7 +58,8 @@ keras.layers.advanced_activations.ThresholdedLinear(theta)
|
||||
|
||||
Parametrized linear unit. provides a threshold near zero where values are zeroed.
|
||||
|
||||
- __Input shape__: Same as `input_shape`. This layer cannot be used as first layer in a model.
|
||||
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: Same as input.
|
||||
|
||||
@@ -80,7 +77,7 @@ keras.layers.advanced_activations.ThresholdedReLu(theta)
|
||||
|
||||
Parametrized rectified linear unit. provides a threshold near zero where values are zeroed.
|
||||
|
||||
- __Input shape__: Same as `input_shape`. This layer cannot be used as first layer in a model.
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape=...` when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: Same as input.
|
||||
|
||||
|
||||
@@ -2,22 +2,20 @@
|
||||
## Convolution1D
|
||||
|
||||
```python
|
||||
keras.layers.convolutional.Convolution1D(input_dim, nb_filter, filter_length,
|
||||
keras.layers.convolutional.Convolution1D(nb_filter, filter_length,
|
||||
init='uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample_length=1,
|
||||
W_regularizer=None, b_regularizer=None, W_constraint=None,
|
||||
b_constraint=None)
|
||||
b_constraint=None, input_dim=None, input_length=None)
|
||||
```
|
||||
|
||||
Convolution operator for filtering neighborhoods of one-dimensional inputs.
|
||||
|
||||
Convolution operator for filtering neighborhoods of one-dimensional inputs. When using this layer as the first layer in a model, either provide the keyword argument `input_dim` (int, e.g. 128 for sequences of 128-dimensional vectors), or `input_shape` (tuple of integers, e.g. (10, 128) for sequences of 10 vectors of 128-dimensional vectors).
|
||||
|
||||
- __Input shape__: 3D tensor with shape: `(nb_samples, steps, input_dim)`.
|
||||
|
||||
- __Output shape__: 3D tensor with shape: `(nb_samples, steps, nb_filter)`. `steps` value might have changed due to padding.
|
||||
|
||||
- __Arguments__:
|
||||
- __input_dim__: Number of channels/dimensions in the input.
|
||||
- __nb_filter__: Number of convolution kernels to use (dimensionality of the output).
|
||||
- __filter_length__: The extension (spatial or temporal) of each filter.
|
||||
- __init__: name of initialization function for the weights of the layer (see: [initializations](../initializations.md)), or alternatively, Theano function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument.
|
||||
@@ -30,31 +28,32 @@ Convolution operator for filtering neighborhoods of one-dimensional inputs.
|
||||
- __activity_regularizer__: instance of [ActivityRegularizer](../regularizers.md), applied to the network output.
|
||||
- __W_constraint__: instance of the [constraints](../constraints.md) module (eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
- __b_constraint__: instance of the [constraints](../constraints.md) module, applied to the bias.
|
||||
- __input_dim__: Number of channels/dimensions in the input. Either this argument or the keyword argument `input_shape` must be provided when using this layer as the first layer in a model.
|
||||
- __input_length__: Length of input sequences, when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed).
|
||||
|
||||
---
|
||||
|
||||
## Convolution2D
|
||||
|
||||
```python
|
||||
keras.layers.convolutional.Convolution2D(nb_filter, stack_size, nb_row, nb_col,
|
||||
keras.layers.convolutional.Convolution2D(nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
W_regularizer=None, b_regularizer=None, W_constraint=None)
|
||||
```
|
||||
|
||||
Convolution operator for filtering windows of two-dimensional inputs.
|
||||
Convolution operator for filtering windows of two-dimensional inputs. When using this layer as the first layer in a model, provide the keyword argument `input_shape` (tuple of integers, does not include the sample axis), e.g. `input_shape=(3, 128, 128)` for 128x128 RGB pictures.
|
||||
|
||||
- __Input shape__: 4D tensor with shape: `(nb_samples, stack_size, nb_row, nb_col)`.
|
||||
- __Input shape__: 4D tensor with shape: `(nb_samples, channels, rows, cols)`.
|
||||
|
||||
- __Output shape__: 4D tensor with shape: `(nb_samples, nb_filter, nb_row, nb_col)`. `nb_row`, `nb_col` might have changed due to padding.
|
||||
- __Output shape__: 4D tensor with shape: `(nb_samples, nb_filter, rows, cols)`. `rows`, `cols` might have changed due to padding.
|
||||
|
||||
|
||||
- __Arguments__:
|
||||
|
||||
- __nb_filter__: Number of convolution kernels to use.
|
||||
- __stack_size__: Number of channels in the input.
|
||||
- __nb_row__: Number of rows in the convolution kernels
|
||||
- __nb_col__: Number of columns in the convolution kernels
|
||||
- __nb_filter__: Number of convolution filters to use.
|
||||
- __nb_row__: Number of rows in the convolution kernel.
|
||||
- __nb_col__: Number of columns in the convolution kernel.
|
||||
- __init__: name of initialization function for the weights of the layer (see: [initializations](../initializations.md)), or alternatively, Theano function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument.
|
||||
- __activation__: name of activation function to use (see: [activations](../activations.md)), or alternatively, elementwise Theano function. If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x).
|
||||
- __weights__: list of numpy arrays to set as initial weights.
|
||||
@@ -90,7 +89,7 @@ keras.layers.convolutional.MaxPooling1D(pool_length=2, stride=None, ignore_borde
|
||||
## MaxPooling2D
|
||||
|
||||
```python
|
||||
keras.layers.convolutional.MaxPooling2D(poolsize=(2, 2), ignore_border=True)
|
||||
keras.layers.convolutional.MaxPooling2D(pool_size=(2, 2), ignore_border=True)
|
||||
```
|
||||
|
||||
- __Input shape__: 4D tensor with shape: `(nb_samples, stack_size, nb_row, nb_col)`.
|
||||
|
||||
@@ -76,8 +76,9 @@ get_config()
|
||||
|
||||
## Dense
|
||||
```python
|
||||
keras.layers.core.Dense(input_dim, output_dim, init='glorot_uniform', activation='linear', weights=None \
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None)
|
||||
keras.layers.core.Dense(output_dim, init='glorot_uniform', activation='linear', weights=None
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None)
|
||||
```
|
||||
|
||||
Standard 1D fully-connect layer.
|
||||
@@ -88,7 +89,6 @@ Standard 1D fully-connect layer.
|
||||
|
||||
- __Arguments__:
|
||||
|
||||
- __input_dim__: int >= 0.
|
||||
- __output_dim__: int >= 0.
|
||||
- __init__: name of initialization function for the weights of the layer (see: [initializations](../initializations.md)), or alternatively, Theano function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument.
|
||||
- __activation__: name of activation function to use (see: [activations](../activations.md)), or alternatively, elementwise Theano function. If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x).
|
||||
@@ -98,21 +98,22 @@ Standard 1D fully-connect layer.
|
||||
- __activity_regularizer__: instance of [ActivityRegularizer](../regularizers.md), applied to the network output.
|
||||
- __W_constraint__: instance of the [constraints](../constraints.md) module (eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
- __b_constraint__: instance of the [constraints](../constraints.md) module, applied to the bias.
|
||||
- __input_dim__: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model.
|
||||
|
||||
---
|
||||
|
||||
## TimeDistributedDense
|
||||
```python
|
||||
keras.layers.core.TimeDistributedDense(input_dim, output_dim, init='glorot_uniform', activation='linear', weights=None \
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None)
|
||||
keras.layers.core.TimeDistributedDense(output_dim, init='glorot_uniform', activation='linear', weights=None
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None,
|
||||
input_dim=None, input_length=None)
|
||||
```
|
||||
|
||||
Fully-connected layer distributed over the time dimension. Useful after a recurrent network set to `return_sequences=True`.
|
||||
|
||||
- __Input shape__: 3D tensor with shape: `(nb_samples, nb_timesteps, input_dim)`.
|
||||
- __Input shape__: 3D tensor with shape: `(nb_samples, timesteps, input_dim)`.
|
||||
|
||||
- __Arguments__:
|
||||
- __input_dim__: int >= 0.
|
||||
- __output_dim__: int >= 0.
|
||||
- __init__: name of initialization function for the weights of the layer (see: [initializations](../initializations.md)), or alternatively, Theano function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument.
|
||||
- __activation__: name of activation function to use (see: [activations](../activations.md)), or alternatively, elementwise Theano function. If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x).
|
||||
@@ -122,12 +123,14 @@ Fully-connected layer distributed over the time dimension. Useful after a recurr
|
||||
- __activity_regularizer__: instance of [ActivityRegularizer](../regularizers.md), applied to the network output.
|
||||
- __W_constraint__: instance of the [constraints](../constraints.md) module (eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
- __b_constraint__: instance of the [constraints](../constraints.md) module, applied to the bias.
|
||||
- __input_dim__: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model.
|
||||
- __input_length__: Length of input sequences, when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed).
|
||||
|
||||
- __Example__:
|
||||
```python
|
||||
# input shape: (nb_samples, nb_timesteps, 10)
|
||||
model.add(LSTM(10, 5, return_sequences=True)) # output shape: (nb_samples, nb_timesteps, 5)
|
||||
model.add(TimeDistributedDense(5, 10)) # output shape: (nb_samples, nb_timesteps, 10)
|
||||
# input shape: (nb_samples, timesteps, 10)
|
||||
model.add(LSTM(5, return_sequences=True, input_dim=10)) # output shape: (nb_samples, timesteps, 5)
|
||||
model.add(TimeDistributedDense(15)) # output shape: (nb_samples, timesteps, 15)
|
||||
```
|
||||
|
||||
|
||||
@@ -151,7 +154,7 @@ A customizable autoencoder model. If `output_reconstruction = True` then dim(inp
|
||||
|
||||
- __decoder__: A [layer](./) or [layer container](./containers.md).
|
||||
|
||||
- __output_reconstruction__: If this is False the when .predict() is called the output is the deepest hidden layer's activation. Otherwise the output of the final decoder layer is presented. Be sure your validation data confirms to this logic if you decide to use any.
|
||||
- __output_reconstruction__: If this is False, then when .predict() is called, the output is the deepest hidden layer's activation. Otherwise, the output of the final decoder layer is presented. Be sure your validation data conforms to this logic if you decide to use any.
|
||||
|
||||
- __weights__: list of numpy arrays to set as initial weights. The list should have 1 element, of shape `(input_dim, output_dim)`.
|
||||
|
||||
@@ -160,8 +163,8 @@ A customizable autoencoder model. If `output_reconstruction = True` then dim(inp
|
||||
from keras.layers import containers
|
||||
|
||||
# input shape: (nb_samples, 32)
|
||||
encoder = containers.Sequential([Dense(32, 16), Dense(16, 8)])
|
||||
decoder = containers.Sequential([Dense(8, 16), Dense(16, 32)])
|
||||
encoder = containers.Sequential([Dense(16, input_dim=32), Dense(8)])
|
||||
decoder = containers.Sequential([Dense(16, input_dim=8), Dense(32)])
|
||||
|
||||
autoencoder = Sequential()
|
||||
autoencoder.add(AutoEncoder(encoder=encoder, decoder=decoder, output_reconstruction=False))
|
||||
@@ -176,7 +179,8 @@ keras.layers.core.Activation(activation)
|
||||
```
|
||||
Apply an activation function to the input.
|
||||
|
||||
- __Input shape__: This layer does not assume a specific input shape. As a result, it cannot be used as the first layer in a model.
|
||||
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: Same as input.
|
||||
|
||||
@@ -193,7 +197,8 @@ keras.layers.core.Dropout(p)
|
||||
```
|
||||
Apply dropout to the input. Dropout consists in randomly setting a fraction `p` of input units to 0 at each update during training time, which helps prevent overfitting. Reference: [Dropout: A Simple Way to Prevent Neural Networks from Overfitting](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
|
||||
|
||||
- __Input shape__: This layer does not assume a specific input shape.
|
||||
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: Same as input.
|
||||
|
||||
@@ -206,24 +211,25 @@ Apply dropout to the input. Dropout consists in randomly setting a fraction `p`
|
||||
|
||||
## Reshape
|
||||
```python
|
||||
keras.layers.core.Reshape(*dims)
|
||||
keras.layers.core.Reshape(dims)
|
||||
```
|
||||
|
||||
Reshape the input to a new shape containing the same number of units.
|
||||
|
||||
- __Input shape__: This layer does not assume a specific input shape.
|
||||
|
||||
- __Output shape__: `(nb_samples, *dims)`.
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: `(nb_samples, dims)`.
|
||||
|
||||
- __Arguments__:
|
||||
|
||||
- *dims: integers. Dimensions of the new shape.
|
||||
- dims: tuple of integers. Dimensions of the new shape.
|
||||
|
||||
- __Example__:
|
||||
```python
|
||||
# input shape: (nb_samples, 10)
|
||||
model.add(Dense(10, 100)) # output shape: (nb_samples, 100)
|
||||
model.add(Reshape(10, 10)) # output shape: (nb_samples, 10, 10)
|
||||
model.add(Dense(100, input_dim=10)) # output shape: (nb_samples, 100)
|
||||
model.add(Reshape(dims=(10, 10))) # output shape: (nb_samples, 10, 10)
|
||||
```
|
||||
|
||||
---
|
||||
@@ -235,7 +241,7 @@ keras.layers.core.Flatten()
|
||||
|
||||
Convert a nD input to 1D.
|
||||
|
||||
- __Input shape__: (nb_samples, *). This layer cannot be used as the first layer in a model.
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: `(nb_samples, nb_input_units)`.
|
||||
|
||||
@@ -250,7 +256,7 @@ Repeat the 1D input n times. Dimensions of input are assumed to be `(nb_samples,
|
||||
|
||||
Note that the output is still a single tensor; `RepeatVector` does not split the data flow.
|
||||
|
||||
- __Input shape__: This layer does not assume a specific input shape. This layer cannot be used as the first layer in a model.
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: `(nb_samples, n, input_dims)`.
|
||||
|
||||
@@ -265,18 +271,18 @@ keras.layers.core.Permute(dims)
|
||||
```
|
||||
Permute the dimensions of the input data according to the given tuple. Sometimes useful for connecting RNNs and convnets together.
|
||||
|
||||
- __Input shape: This layer does not assume a specific input shape.
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape: Same as the input shape, but with the dimensions re-ordered according to the ordering specified by the tuple.
|
||||
- __Output shape__: Same as the input shape, but with the dimensions re-ordered according to the ordering specified by the tuple.
|
||||
|
||||
- __Argument: tuple specifying the permutation scheme (e.g. `(2, 1)` permutes the first and second dimension of the input).
|
||||
- __Argument__: tuple specifying the permutation scheme (e.g. `(2, 1)` permutes the first and second dimension of the input).
|
||||
|
||||
- __Example__:
|
||||
```python
|
||||
# input shape: (nb_samples, 10)
|
||||
model.add(Dense(10, 50)) # output shape: (nb_samples, 50)
|
||||
model.add(Reshape(10, 5)) # output shape: (nb_samples, 10, 5)
|
||||
model.add(Permute((2, 1))) #output shape: (nb_samples, 5, 10)
|
||||
model.add(Dense(50, input_dim=10)) # output shape: (nb_samples, 50)
|
||||
model.add(Reshape(dims=(10, 5))) # output shape: (nb_samples, 10, 5)
|
||||
model.add(Permute(dims=(2, 1))) #output shape: (nb_samples, 5, 10)
|
||||
```
|
||||
|
||||
---
|
||||
@@ -294,8 +300,9 @@ This layer can be used, for instance, to induce activation sparsity in the previ
|
||||
|
||||
## MaxoutDense
|
||||
```python
|
||||
keras.layers.core.MaxoutDense(input_dim, output_dim, nb_feature=4, init='glorot_uniform', weights=None, \
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None)
|
||||
keras.layers.core.MaxoutDense(output_dim, nb_feature=4, init='glorot_uniform', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None)
|
||||
```
|
||||
|
||||
A dense maxout layer. A `MaxoutDense` layer takes the element-wise maximum of `nb_feature` `Dense(input_dim, output_dim)` linear layers. This allows the layer to learn a convex, piecewise linear activation function over the inputs. See [this paper](http://arxiv.org/pdf/1302.4389.pdf) for more details. Note that this is a *linear* layer -- if you wish to apply activation function (you shouldn't need to -- they are universal function approximators), an `Activation` layer must be added after.
|
||||
@@ -306,7 +313,6 @@ A dense maxout layer. A `MaxoutDense` layer takes the element-wise maximum of `n
|
||||
|
||||
- __Arguments__:
|
||||
|
||||
- __input_dim__: int >= 0.
|
||||
- __output_dim__: int >= 0.
|
||||
- __nb_feature__: int >= 0. the number of features to create for the maxout. This is equivalent to the number of piecewise elements to be allowed for the activation function.
|
||||
- __init__: name of initialization function for the weights of the layer (see: [initializations](../initializations.md)), or alternatively, Theano function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument.
|
||||
@@ -316,12 +322,12 @@ A dense maxout layer. A `MaxoutDense` layer takes the element-wise maximum of `n
|
||||
- __activity_regularizer__: instance of [ActivityRegularizer](../regularizers.md), applied to the network output.
|
||||
- __W_constraint__: instance of the [constraints](../constraints.md) module (eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
- __b_constraint__: instance of the [constraints](../constraints.md) module, applied to the bias.
|
||||
- __input_dim__: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model.
|
||||
|
||||
```python
|
||||
# input shape: (nb_samples, 10)
|
||||
model.add(Dense(10, 100)) # output shape: (nb_samples, 100)
|
||||
model.add(MaxoutDense(100, 100, nb_feature=10)) # output shape: (nb_samples, 100)
|
||||
model.add(RepeatVector(2)) # output shape: (nb_samples, 2, 10)
|
||||
model.add(Dense(100, input_dim=10)) # output shape: (nb_samples, 100)
|
||||
model.add(MaxoutDense(50, nb_feature=10)) # output shape: (nb_samples, 50)
|
||||
```
|
||||
|
||||
## Merge
|
||||
@@ -329,27 +335,27 @@ model.add(RepeatVector(2)) # output shape: (nb_samples, 2, 10)
|
||||
keras.layers.core.Merge(models, mode='sum')
|
||||
```
|
||||
|
||||
Merge the output of a list of layers (or containers) into a single tensor, following one of two modes: `sum` or `concat`.
|
||||
Merge the output of a list of layers (or containers) into a single tensor, following one of three modes: `sum`, `mul` or `concat`.
|
||||
|
||||
- __Arguments__:
|
||||
- __layers__: List of layers or [containers](/layers/containers/).
|
||||
- __mode__: String, one of `{'sum', 'concat'}`. `sum` will simply sum the outputs of the layers (therefore all layers should have an output with the same shape). `concat` will concatenate the outputs along the last dimension (therefore all layers should have an output that only differ along the last dimension).
|
||||
- __mode__: String, one of `{'sum', 'mul', 'concat'}`. `sum` and `mul` will simply sum/multiply the outputs of the layers (therefore all layers should have an output with the same shape). `concat` will concatenate the outputs along the last dimension (therefore all layers should have an output that only differ along the last dimension).
|
||||
|
||||
- __Example__:
|
||||
|
||||
```python
|
||||
left = Sequential()
|
||||
left.add(Dense(784, 50))
|
||||
left.add(Dense(50, input_shape=(784,)))
|
||||
left.add(Activation('relu'))
|
||||
|
||||
right = Sequential()
|
||||
right.add(Dense(784, 50))
|
||||
right.add(Dense(50, input_shape=(784,)))
|
||||
right.add(Activation('relu'))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Merge([left, right], mode='sum'))
|
||||
|
||||
model.add(Dense(50, 10))
|
||||
model.add(Dense(10))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
@@ -2,15 +2,15 @@
|
||||
## Embedding
|
||||
|
||||
```python
|
||||
keras.layers.embeddings.Embedding(input_dim, output_dim, init='uniform', weights=None, W_regularizer=None, W_constraint=None, mask_zero=False)
|
||||
keras.layers.embeddings.Embedding(input_dim, output_dim, init='uniform', input_length=None, weights=None, W_regularizer=None, W_constraint=None, mask_zero=False)
|
||||
```
|
||||
|
||||
Turn positive integers (indexes) into denses vectors of fixed size,
|
||||
eg. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]`
|
||||
|
||||
- __Input shape__: 2D tensor with shape: `(nb_samples, maxlen)`.
|
||||
- __Input shape__: 2D tensor with shape: `(nb_samples, sequence_length)`.
|
||||
|
||||
- __Output shape__: 3D tensor with shape: `(nb_samples, maxlen, output_dim)`.
|
||||
- __Output shape__: 3D tensor with shape: `(nb_samples, sequence_length, output_dim)`.
|
||||
|
||||
- __Arguments__:
|
||||
|
||||
@@ -21,12 +21,13 @@ eg. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]`
|
||||
- __W_regularizer__: instance of the [regularizers](../regularizers.md) module (eg. L1 or L2 regularization), applied to the embedding matrix.
|
||||
- __W_constraint__: instance of the [constraints](../constraints.md) module (eg. maxnorm, nonneg), applied to the embedding matrix.
|
||||
- __mask_zero__: Whether or not the input value 0 is a special "padding" value that should be masked out. This is useful for [recurrent layers](recurrent.md) which may take variable length input. If this is `True` then all subsequent layers in the model need to support masking or an exception will be raised.
|
||||
- __input_length__: Length of input sequences, when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed).
|
||||
|
||||
|
||||
## WordContextProduct
|
||||
|
||||
```python
|
||||
keras.layers.embeddings.WordContextProduct(input_dim, proj_dim=128,
|
||||
keras.layers.embeddings.WordContextProduct(input_dim, proj_dim=128,
|
||||
init='uniform', activation='sigmoid', weights=None)
|
||||
```
|
||||
|
||||
|
||||
@@ -6,9 +6,9 @@ keras.layers.noise.GaussianNoise(sigma)
|
||||
```
|
||||
Apply to the input an additive zero-centred gaussian noise with standard deviation `sigma`. This is useful to mitigate overfitting (you could see it as a kind of random data augmentation). Gaussian Noise (GS) is a natural choice as corruption process for real valued inputs.
|
||||
|
||||
The Gaussian noise is only added at training time.
|
||||
Only active at training time.
|
||||
|
||||
- __Input shape__: This layer does not assume a specific input shape.
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: Same as input.
|
||||
|
||||
@@ -24,11 +24,9 @@ keras.layers.noise.GaussianDropout(p)
|
||||
```
|
||||
Apply to the input an multiplicative one-centred gaussian noise with standard deviation `sqrt(p/(1-p))`. p refers to drop probability to match Dropout layer syntax.
|
||||
|
||||
http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf
|
||||
Only active at training time.
|
||||
|
||||
The Gaussian noise is only used at training time.
|
||||
|
||||
- __Input shape__: This layer does not assume a specific input shape.
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: Same as input.
|
||||
|
||||
@@ -36,3 +34,4 @@ The Gaussian noise is only used at training time.
|
||||
|
||||
- __p__: float, drop probability as with Dropout.
|
||||
|
||||
|
||||
|
||||
@@ -2,17 +2,16 @@
|
||||
## BatchNormalization
|
||||
|
||||
```python
|
||||
keras.layers.normalization.BatchNormalization(input_shape, epsilon=1e-6, weights=None)
|
||||
keras.layers.normalization.BatchNormalization(epsilon=1e-6, weights=None)
|
||||
```
|
||||
|
||||
Normalize the activations of the previous layer at each batch.
|
||||
|
||||
- __Input shape__: Same as `input_shape`. This layer cannot be used as first layer in a model.
|
||||
- __Input shape__: Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
|
||||
|
||||
- __Output shape__: Same as input.
|
||||
|
||||
- __Arguments__:
|
||||
- __input_shape__: tuple.
|
||||
- __Arguments__:
|
||||
- __epsilon__: small float > 0. Fuzz parameter.
|
||||
- __weights__: Initialization weights. List of 2 numpy arrays, with shapes: `[(input_shape,), (input_shape,)]`
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
## SimpleRNN
|
||||
|
||||
```python
|
||||
keras.layers.recurrent.SimpleRNN(input_dim, output_dim,
|
||||
keras.layers.recurrent.SimpleRNN(output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', weights=None,
|
||||
truncate_gradient=-1, return_sequences=False)
|
||||
truncate_gradient=-1, return_sequences=False, input_dim=None, input_length=None)
|
||||
```
|
||||
Fully connected RNN where output is to fed back to input.
|
||||
|
||||
@@ -18,23 +18,25 @@ Fully connected RNN where output is to fed back to input.
|
||||
|
||||
|
||||
- __Arguments__:
|
||||
- __input_dim__: dimension of the input.
|
||||
- __output_dim__: dimension of the internal projections and the final output.
|
||||
- __init__: weight initialization function. Can be the name of an existing function (str), or a Theano function (see: [initializations](../initializations.md)).
|
||||
- __activation__: activation function. Can be the name of an existing function (str), or a Theano function (see: [activations](../activations.md)).
|
||||
- __weights__: list of numpy arrays to set as initial weights. The list should have 3 elements, of shapes: `[(input_dim, output_dim), (output_dim, output_dim), (output_dim,)]`.
|
||||
- __truncate_gradient__: Number of steps to use in truncated BPTT. See: [Theano "scan"](http://deeplearning.net/software/theano/library/scan.html).
|
||||
- __return_sequences__: Boolean. Whether to return the last output in the output sequence, or the full sequence.
|
||||
- __input_dim__: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model.
|
||||
- __input_length__: Length of input sequences, when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed).
|
||||
|
||||
---
|
||||
|
||||
## SimpleDeepRNN
|
||||
|
||||
```python
|
||||
keras.layers.recurrent.SimpleDeepRNN(input_dim, output_dim, depth=3,
|
||||
keras.layers.recurrent.SimpleDeepRNN(output_dim, depth=3,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
activation='sigmoid', inner_activation='hard_sigmoid',
|
||||
weights=None, truncate_gradient=-1, return_sequences=False)
|
||||
weights=None, truncate_gradient=-1, return_sequences=False,
|
||||
input_dim=None, input_length=None)
|
||||
```
|
||||
Fully connected RNN where the output of multiple timesteps (up to "depth" steps in the past) is fed back to the input:
|
||||
|
||||
@@ -64,6 +66,8 @@ Not a particularly useful model, included for demonstration purposes.
|
||||
- __weights__: list of numpy arrays to set as initial weights. The list should have depth+2 elements.
|
||||
- __truncate_gradient__: Number of steps to use in truncated BPTT. See: [Theano "scan"](http://deeplearning.net/software/theano/library/scan.html).
|
||||
- __return_sequences__: Boolean. Whether to return the last output in the output sequence, or the full sequence.
|
||||
- __input_dim__: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model.
|
||||
- __input_length__: Length of input sequences, when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed).
|
||||
|
||||
|
||||
---
|
||||
@@ -74,7 +78,8 @@ Not a particularly useful model, included for demonstration purposes.
|
||||
keras.layers.recurrent.GRU(input_dim, output_dim=128,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
activation='sigmoid', inner_activation='hard_sigmoid',
|
||||
weights=None, truncate_gradient=-1, return_sequences=False)
|
||||
weights=None, truncate_gradient=-1, return_sequences=False,
|
||||
input_dim=None, input_length=None)
|
||||
```
|
||||
|
||||
Gated Recurrent Unit - Cho et al. 2014.
|
||||
@@ -97,6 +102,8 @@ Gated Recurrent Unit - Cho et al. 2014.
|
||||
- __weights__: list of numpy arrays to set as initial weights. The list should have 9 elements.
|
||||
- __truncate_gradient__: Number of steps to use in truncated BPTT. See: [Theano "scan"](http://deeplearning.net/software/theano/library/scan.html).
|
||||
- __return_sequences__: Boolean. Whether to return the last output in the output sequence, or the full sequence.
|
||||
- __input_dim__: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model.
|
||||
- __input_length__: Length of input sequences, when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed).
|
||||
|
||||
- __References__:
|
||||
- [On the Properties of Neural Machine Translation: Encoder–Decoder Approaches](http://www.aclweb.org/anthology/W14-4012)
|
||||
@@ -110,7 +117,8 @@ Gated Recurrent Unit - Cho et al. 2014.
|
||||
keras.layers.recurrent.LSTM(input_dim, output_dim=128,
|
||||
init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one',
|
||||
activation='tanh', inner_activation='hard_sigmoid',
|
||||
weights=None, truncate_gradient=-1, return_sequences=False)
|
||||
weights=None, truncate_gradient=-1, return_sequences=False,
|
||||
input_dim=None, input_length=None)
|
||||
```
|
||||
|
||||
Long-Short Term Memory unit - Hochreiter 1997.
|
||||
@@ -134,6 +142,8 @@ Long-Short Term Memory unit - Hochreiter 1997.
|
||||
- __weights__: list of numpy arrays to set as initial weights. The list should have 12 elements.
|
||||
- __truncate_gradient__: Number of steps to use in truncated BPTT. See: [Theano "scan"](http://deeplearning.net/software/theano/library/scan.html).
|
||||
- __return_sequences__: Boolean. Whether to return the last output in the output sequence, or the full sequence.
|
||||
- __input_dim__: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model.
|
||||
- __input_length__: Length of input sequences, when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed).
|
||||
|
||||
- __References__:
|
||||
- [Long short-term memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf) (original 1997 paper)
|
||||
@@ -148,7 +158,8 @@ Long-Short Term Memory unit - Hochreiter 1997.
|
||||
keras.layers.recurrent.JZS1(input_dim, output_dim=128,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
activation='tanh', inner_activation='sigmoid',
|
||||
weights=None, truncate_gradient=-1, return_sequences=False)
|
||||
weights=None, truncate_gradient=-1, return_sequences=False,
|
||||
input_dim=None, input_length=None)
|
||||
```
|
||||
|
||||
Top 3 RNN architectures evolved from the evaluation of thousands of models. Serves as alternatives to LSTMs and GRUs. Corresponds to `MUT1`, `MUT2`, and `MUT3` architectures described in the paper: An Empirical Exploration of Recurrent Network Architectures, Jozefowicz et al. 2015.
|
||||
@@ -171,6 +182,8 @@ Top 3 RNN architectures evolved from the evaluation of thousands of models. Serv
|
||||
- __weights__: list of numpy arrays to set as initial weights. The list should have 9 elements.
|
||||
- __truncate_gradient__: Number of steps to use in truncated BPTT. See: [Theano "scan"](http://deeplearning.net/software/theano/library/scan.html).
|
||||
- __return_sequences__: Boolean. Whether to return the last output in the output sequence, or the full sequence.
|
||||
- __input_dim__: dimensionality of the input (integer). This argument (or alternatively, the keyword argument `input_shape`) is required when using this layer as the first layer in a model.
|
||||
- __input_length__: Length of input sequences, when it is constant. This argument is required if you are going to connect `Flatten` then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed).
|
||||
|
||||
- __References__:
|
||||
- [An Empirical Exploration of Recurrent Network Architectures](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
|
||||
|
||||
+17
-17
@@ -27,8 +27,8 @@ model = keras.models.Sequential()
|
||||
- __shuffle__: boolean or str (for 'batch'). Whether to shuffle the samples at each epoch. 'batch' is a special option for dealing with the limitations of HDF5 data; it shuffles in batch-sized chunks.
|
||||
- __show_accuracy__: boolean. Whether to display class accuracy in the logs to stdout at each epoch.
|
||||
- __class_weight__: dictionary mapping classes to a weight value, used for scaling the loss function (during training only).
|
||||
- __sample_weight__: list or numpy array with 1:1 mapping to the training samples, used for scaling the loss function (during training only). For time-distributed data, there is one weight per sample *per timestep*, i.e. if your output data is shaped `(nb_samples, timesteps, output_dim)`, your mask should be of shape `(nb_samples, timesteps)`. This allows you to mask out or reweight individual output timesteps, which is useful in sequence to sequence learning.
|
||||
- __evaluate__(X, y, batch_size=128, show_accuracy=False, verbose=1): Show performance of the model over some validation data.
|
||||
- __sample_weight__: list or numpy array with 1:1 mapping to the training samples, used for scaling the loss function (during training only). For time-distributed data, there is one weight per sample *per timestep*, i.e. if your output data is shaped `(nb_samples, timesteps, output_dim)`, your mask should be of shape `(nb_samples, timesteps, 1)`. This allows you to mask out or reweight individual output timesteps, which is useful in sequence to sequence learning.
|
||||
- __evaluate__(X, y, batch_size=128, show_accuracy=False, verbose=1, sample_weight=None): Show performance of the model over some validation data.
|
||||
- __Return__: The loss score over the data, or a `(loss, accuracy)` tuple if `show_accuracy=True`.
|
||||
- __Arguments__: Same meaning as fit method above. verbose is used as a binary flag (progress bar or nothing).
|
||||
- __predict__(X, batch_size=128, verbose=1):
|
||||
@@ -37,9 +37,9 @@ model = keras.models.Sequential()
|
||||
- __predict_classes__(X, batch_size=128, verbose=1): Return an array of class predictions for some test data.
|
||||
- __Return__: An array of labels for some test data.
|
||||
- __Arguments__: Same meaning as fit method above. verbose is used as a binary flag (progress bar or nothing).
|
||||
- __train_on_batch__(X, y, accuracy=False): Single gradient update on one batch.
|
||||
- __train_on_batch__(X, y, accuracy=False, class_weight=None, sample_weight=None): Single gradient update on one batch.
|
||||
- __Return__: loss over the data, or tuple `(loss, accuracy)` if `accuracy=True`.
|
||||
- __test_on_batch__(X, y, accuracy=False): Single performance evaluation on one batch.
|
||||
- __test_on_batch__(X, y, accuracy=False, sample_weight=None): Single performance evaluation on one batch.
|
||||
- __Return__: loss over the data, or tuple `(loss, accuracy)` if `accuracy=True`.
|
||||
- __save_weights__(fname, overwrite=False): Store the weights of all layers to a HDF5 file. If overwrite==False and the file already exists, an exception will be thrown.
|
||||
- __load_weights__(fname): Sets the weights of a model, based to weights stored by __save_weights__. You can only __load_weights__ on a savefile from a model with an identical architecture. __load_weights__ can be called either before or after the __compile__ step.
|
||||
@@ -52,7 +52,7 @@ from keras.layers.core import Dense, Dropout, Activation
|
||||
from keras.optimizers import SGD
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(64, 2, init='uniform'))
|
||||
model.add(Dense(2, init='uniform', input_dim=64))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
@@ -125,10 +125,10 @@ Arbitrary connection graph. It can have any number of inputs and outputs, with e
|
||||
model = keras.models.Graph()
|
||||
```
|
||||
- __Methods__:
|
||||
- __add_input__(name, ndim=2, dtype='float'): Add an input with shape dimensionality `ndim`.
|
||||
- __add_input__(name, input_shape, dtype='float'): Add an input with shape dimensionality `ndim`.
|
||||
- __Arguments__:
|
||||
- __ndim__: Use `ndim=2` for vector input `(samples, features)`, ndim=3 for temporal input `(samples, time, features)`, ndim=4 for image input `(samples, channels, height, width)`.
|
||||
- __dtype__: `float` or `int`. Use `int` if the input is connected to an Embedding layer, `float` otherwise.
|
||||
- __input_shape__: Integer tuple, shape of the expected input (not including the samples axis). E.g. (10,) for 10-dimensional vectors, (None, 128) for sequences (of variable length) of 128-dimensional vectors, (3, 32, 32) for 32x32 images with RGB channels.
|
||||
- __dtype__: `float` or `int`. Type of the expected input data.
|
||||
- __add_output__(name, input=None, inputs=[], merge_mode='concat'): Add an output connect to `input` or `inputs`.
|
||||
- __Arguments__:
|
||||
- __name__: str. unique identifier of the output.
|
||||
@@ -176,10 +176,10 @@ __Examples__:
|
||||
```python
|
||||
# graph model with one input and two outputs
|
||||
graph = Graph()
|
||||
graph.add_input(name='input', ndim=2)
|
||||
graph.add_node(Dense(32, 16), name='dense1', input='input')
|
||||
graph.add_node(Dense(32, 4), name='dense2', input='input')
|
||||
graph.add_node(Dense(16, 4), name='dense3', input='dense1')
|
||||
graph.add_input(name='input', input_shape=(32,))
|
||||
graph.add_node(Dense(16), name='dense1', input='input')
|
||||
graph.add_node(Dense(4), name='dense2', input='input')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
graph.add_output(name='output1', input='dense2')
|
||||
graph.add_output(name='output2', input='dense3')
|
||||
|
||||
@@ -191,11 +191,11 @@ history = graph.fit({'input':X_train, 'output1':y_train, 'output2':y2_train}, nb
|
||||
```python
|
||||
# graph model with two inputs and one output
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', ndim=2)
|
||||
graph.add_input(name='input2', ndim=2)
|
||||
graph.add_node(Dense(32, 16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(32, 4), name='dense2', input='input2')
|
||||
graph.add_node(Dense(16, 4), name='dense3', input='dense1')
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_input(name='input2', input_shape=(32,))
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input2')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
graph.add_output(name='output', inputs=['dense2', 'dense3'], merge_mode='sum')
|
||||
graph.compile('rmsprop', {'output':'mse'})
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ An optimizer is one of the two arguments required for compiling a Keras model:
|
||||
|
||||
```python
|
||||
model = Sequential()
|
||||
model.add(Dense(20, 64, init='uniform'))
|
||||
model.add(Dense(64, init='uniform', input_dim=10))
|
||||
model.add(Activation('tanh'))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ These layers expose 3 keyword arguments:
|
||||
|
||||
```python
|
||||
from keras.regularizers import l2, activity_l2
|
||||
model.add(Dense(64, 64, W_regularizer=l2(0.01), activity_regularizer=activity_l2(0.01)))
|
||||
model.add(Dense(64, input_dim=64, W_regularizer=l2(0.01), activity_regularizer=activity_l2(0.01)))
|
||||
```
|
||||
|
||||
## Available penalties
|
||||
|
||||
+15
-10
@@ -1,9 +1,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential, slice_X
|
||||
from keras.layers.core import Activation, Dense, RepeatVector
|
||||
from keras.layers.core import Activation, TimeDistributedDense, RepeatVector
|
||||
from keras.layers import recurrent
|
||||
from sklearn.utils import shuffle
|
||||
import numpy as np
|
||||
|
||||
"""
|
||||
@@ -25,18 +24,15 @@ and
|
||||
http://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf
|
||||
Theoretically it introduces shorter term dependencies between source and target.
|
||||
|
||||
|
||||
Two digits inverted:
|
||||
+ One layer JZS1 (128 HN), 5k training examples = 99% train/test accuracy in 55 epochs
|
||||
|
||||
Three digits inverted:
|
||||
+ One layer JZS1 (128 HN), 50k training examples = 99% train/test accuracy in 100 epochs
|
||||
|
||||
|
||||
Four digits inverted:
|
||||
+ One layer JZS1 (128 HN), 400k training examples = 99% train/test accuracy in 20 epochs
|
||||
|
||||
|
||||
Five digits inverted:
|
||||
+ One layer JZS1 (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs
|
||||
|
||||
@@ -122,23 +118,32 @@ for i, sentence in enumerate(expected):
|
||||
y[i] = ctable.encode(sentence, maxlen=DIGITS + 1)
|
||||
|
||||
# Shuffle (X, y) in unison as the later parts of X will almost all be larger digits
|
||||
X, y = shuffle(X, y)
|
||||
indices = np.arange(len(y))
|
||||
np.random.shuffle(indices)
|
||||
X = X[indices]
|
||||
y = y[indices]
|
||||
# Explicitly set apart 10% for validation data that we never train over
|
||||
split_at = len(X) - len(X) / 10
|
||||
(X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at))
|
||||
(y_train, y_val) = (y[:split_at], y[split_at:])
|
||||
|
||||
print(X_train.shape)
|
||||
print(y_train.shape)
|
||||
|
||||
print('Build model...')
|
||||
model = Sequential()
|
||||
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
|
||||
model.add(RNN(len(chars), HIDDEN_SIZE))
|
||||
# note: in a situation where your input sequences have a variable length,
|
||||
# use input_shape=(None, nb_feature).
|
||||
model.add(RNN(HIDDEN_SIZE, input_shape=(None, len(chars))))
|
||||
# For the decoder's input, we repeat the encoded input for each time step
|
||||
model.add(RepeatVector(DIGITS + 1))
|
||||
# The decoder RNN could be multiple layers stacked or a single layer
|
||||
for _ in xrange(LAYERS):
|
||||
model.add(RNN(HIDDEN_SIZE, HIDDEN_SIZE, return_sequences=True))
|
||||
model.add(RNN(HIDDEN_SIZE, return_sequences=True))
|
||||
|
||||
# For each of step of the output sequence, decide which character should be chosen
|
||||
model.add(Dense(HIDDEN_SIZE, len(chars)))
|
||||
model.add(TimeDistributedDense(len(chars)))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
@@ -148,7 +153,7 @@ for iteration in range(1, 200):
|
||||
print()
|
||||
print('-' * 50)
|
||||
print('Iteration', iteration)
|
||||
model.fit(X, y, batch_size=BATCH_SIZE, nb_epoch=1, validation_data=(X_val, y_val), show_accuracy=True)
|
||||
model.fit(X_train, y_train, batch_size=BATCH_SIZE, nb_epoch=1, validation_data=(X_val, y_val), show_accuracy=True)
|
||||
###
|
||||
# Select 10 samples from the validation set at random so we can visualize errors
|
||||
for i in xrange(10):
|
||||
|
||||
@@ -181,15 +181,15 @@ print('Build model...')
|
||||
|
||||
sentrnn = Sequential()
|
||||
sentrnn.add(Embedding(vocab_size, EMBED_HIDDEN_SIZE, mask_zero=True))
|
||||
sentrnn.add(RNN(EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, return_sequences=False))
|
||||
sentrnn.add(RNN(SENT_HIDDEN_SIZE, return_sequences=False))
|
||||
|
||||
qrnn = Sequential()
|
||||
qrnn.add(Embedding(vocab_size, EMBED_HIDDEN_SIZE))
|
||||
qrnn.add(RNN(EMBED_HIDDEN_SIZE, QUERY_HIDDEN_SIZE, return_sequences=False))
|
||||
qrnn.add(RNN(QUERY_HIDDEN_SIZE, return_sequences=False))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Merge([sentrnn, qrnn], mode='concat'))
|
||||
model.add(Dense(SENT_HIDDEN_SIZE + QUERY_HIDDEN_SIZE, vocab_size, activation='softmax'))
|
||||
model.add(Dense(vocab_size, activation='softmax'))
|
||||
|
||||
model.compile(optimizer='adam', loss='categorical_crossentropy', class_mode='categorical')
|
||||
|
||||
|
||||
@@ -28,6 +28,11 @@ nb_classes = 10
|
||||
nb_epoch = 200
|
||||
data_augmentation = True
|
||||
|
||||
# input image dimensions
|
||||
img_rows, img_cols = 32, 32
|
||||
# the CIFAR10 images are RGB
|
||||
img_channels = 3
|
||||
|
||||
# the data, shuffled and split between tran and test sets
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
|
||||
print('X_train shape:', X_train.shape)
|
||||
@@ -40,26 +45,26 @@ Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
model = Sequential()
|
||||
|
||||
model.add(Convolution2D(32, 3, 3, 3, border_mode='full'))
|
||||
model.add(Convolution2D(32, 3, 3, border_mode='full',
|
||||
input_shape=(img_channels, img_rows, img_cols)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(32, 32, 3, 3))
|
||||
model.add(Convolution2D(32, 3, 3))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
model.add(Convolution2D(64, 32, 3, 3, border_mode='full'))
|
||||
model.add(Convolution2D(64, 3, 3, border_mode='full'))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(64, 64, 3, 3))
|
||||
model.add(Convolution2D(64, 3, 3))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
model.add(Flatten())
|
||||
model.add(Dense(64*8*8, 512))
|
||||
model.add(Dense(512))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dropout(0.5))
|
||||
|
||||
model.add(Dense(512, nb_classes))
|
||||
model.add(Dense(nb_classes))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
# let's train the model using SGD + momentum (how original).
|
||||
|
||||
+7
-13
@@ -1,7 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
np.random.seed(1337) # for reproducibility
|
||||
np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.optimizers import RMSprop
|
||||
@@ -25,7 +25,7 @@ max_features = 5000
|
||||
maxlen = 100
|
||||
batch_size = 32
|
||||
embedding_dims = 100
|
||||
nb_filters = 250
|
||||
nb_filter = 250
|
||||
filter_length = 3
|
||||
hidden_dims = 250
|
||||
nb_epoch = 3
|
||||
@@ -47,35 +47,29 @@ model = Sequential()
|
||||
|
||||
# we start off with an efficient embedding layer which maps
|
||||
# our vocab indices into embedding_dims dimensions
|
||||
model.add(Embedding(max_features, embedding_dims))
|
||||
model.add(Embedding(max_features, embedding_dims, input_length=maxlen))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
# we add a Convolution1D, which will learn nb_filters
|
||||
# we add a Convolution1D, which will learn nb_filter
|
||||
# word group filters of size filter_length:
|
||||
model.add(Convolution1D(input_dim=embedding_dims,
|
||||
nb_filter=nb_filters,
|
||||
model.add(Convolution1D(nb_filter=nb_filter,
|
||||
filter_length=filter_length,
|
||||
border_mode="valid",
|
||||
activation="relu",
|
||||
subsample_length=1))
|
||||
|
||||
# we use standard max pooling (halving the output of the previous layer):
|
||||
model.add(MaxPooling1D(pool_length=2))
|
||||
|
||||
# We flatten the output of the conv layer, so that we can add a vanilla dense layer:
|
||||
model.add(Flatten())
|
||||
|
||||
# Computing the output shape of a conv layer can be tricky;
|
||||
# for a good tutorial, see: http://cs231n.github.io/convolutional-networks/
|
||||
output_size = nb_filters * (((maxlen - filter_length) / 1) + 1) / 2
|
||||
|
||||
# We add a vanilla hidden layer:
|
||||
model.add(Dense(output_size, hidden_dims))
|
||||
model.add(Dense(hidden_dims))
|
||||
model.add(Dropout(0.25))
|
||||
model.add(Activation('relu'))
|
||||
|
||||
# We project onto a single unit output layer, and squash it with a sigmoid:
|
||||
model.add(Dense(hidden_dims, 1))
|
||||
model.add(Dense(1))
|
||||
model.add(Activation('sigmoid'))
|
||||
|
||||
model.compile(loss='binary_crossentropy', optimizer='rmsprop', class_mode="binary")
|
||||
|
||||
@@ -48,10 +48,10 @@ print('X_test shape:', X_test.shape)
|
||||
|
||||
print('Build model...')
|
||||
model = Sequential()
|
||||
model.add(Embedding(max_features, 128))
|
||||
model.add(LSTM(128, 128)) # try using a GRU instead, for fun
|
||||
model.add(Embedding(max_features, 128, input_length=maxlen))
|
||||
model.add(LSTM(128)) # try using a GRU instead, for fun
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(128, 1))
|
||||
model.add(Dense(1))
|
||||
model.add(Activation('sigmoid'))
|
||||
|
||||
# try using different optimizers and different optimizer configs
|
||||
|
||||
@@ -20,11 +20,11 @@ from sklearn.preprocessing import StandardScaler
|
||||
|
||||
Compatible Python 2.7-3.4. Requires Scikit-Learn and Pandas.
|
||||
|
||||
Recommended to run on GPU:
|
||||
Recommended to run on GPU:
|
||||
Command: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python kaggle_otto_nn.py
|
||||
On EC2 g2.2xlarge instance: 19s/epoch. 6-7 minutes total training time.
|
||||
|
||||
Best validation score at epoch 21: 0.4881
|
||||
Best validation score at epoch 21: 0.4881
|
||||
|
||||
Try it at home:
|
||||
- with/without BatchNormalization (BatchNormalization helps!)
|
||||
@@ -78,7 +78,6 @@ def make_submission(y_prob, ids, encoder, fname):
|
||||
f.write('\n')
|
||||
print("Wrote submission to file {}.".format(fname))
|
||||
|
||||
|
||||
print("Loading data...")
|
||||
X, labels = load_data('train.csv', train=True)
|
||||
X, scaler = preprocess_data(X)
|
||||
@@ -96,31 +95,29 @@ print(dims, 'dims')
|
||||
print("Building model...")
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(dims, 512, init='glorot_uniform'))
|
||||
model.add(PReLU((512,)))
|
||||
model.add(BatchNormalization((512,)))
|
||||
model.add(Dense(512, input_shape=(dims,)))
|
||||
model.add(PReLU())
|
||||
model.add(BatchNormalization())
|
||||
model.add(Dropout(0.5))
|
||||
|
||||
model.add(Dense(512, 512, init='glorot_uniform'))
|
||||
model.add(PReLU((512,)))
|
||||
model.add(BatchNormalization((512,)))
|
||||
model.add(Dense(512))
|
||||
model.add(PReLU())
|
||||
model.add(BatchNormalization())
|
||||
model.add(Dropout(0.5))
|
||||
|
||||
model.add(Dense(512, 512, init='glorot_uniform'))
|
||||
model.add(PReLU((512,)))
|
||||
model.add(BatchNormalization((512,)))
|
||||
model.add(Dense(512))
|
||||
model.add(PReLU())
|
||||
model.add(BatchNormalization())
|
||||
model.add(Dropout(0.5))
|
||||
|
||||
model.add(Dense(512, nb_classes, init='glorot_uniform'))
|
||||
model.add(Dense(nb_classes))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer="adam")
|
||||
|
||||
print("Training model...")
|
||||
|
||||
model.fit(X, y, nb_epoch=20, batch_size=128, validation_split=0.15)
|
||||
|
||||
print("Generating submission...")
|
||||
|
||||
proba = model.predict_proba(X_test)
|
||||
make_submission(proba, ids, encoder, fname='keras-otto.csv')
|
||||
|
||||
@@ -4,7 +4,8 @@ from keras.layers.core import Dense, Activation, Dropout
|
||||
from keras.layers.recurrent import LSTM
|
||||
from keras.datasets.data_utils import get_file
|
||||
import numpy as np
|
||||
import random, sys
|
||||
import random
|
||||
import sys
|
||||
|
||||
'''
|
||||
Example script to generate text from Nietzsche's writings.
|
||||
@@ -15,7 +16,7 @@ import random, sys
|
||||
It is recommended to run this script on GPU, as recurrent
|
||||
networks are quite computationally intensive.
|
||||
|
||||
If you try this script on new data, make sure your corpus
|
||||
If you try this script on new data, make sure your corpus
|
||||
has at least ~100k characters. ~1M is better.
|
||||
'''
|
||||
|
||||
@@ -34,7 +35,7 @@ step = 3
|
||||
sentences = []
|
||||
next_chars = []
|
||||
for i in range(0, len(text) - maxlen, step):
|
||||
sentences.append(text[i : i + maxlen])
|
||||
sentences.append(text[i: i + maxlen])
|
||||
next_chars.append(text[i + maxlen])
|
||||
print('nb sequences:', len(sentences))
|
||||
|
||||
@@ -50,20 +51,21 @@ for i, sentence in enumerate(sentences):
|
||||
# build the model: 2 stacked LSTM
|
||||
print('Build model...')
|
||||
model = Sequential()
|
||||
model.add(LSTM(len(chars), 512, return_sequences=True))
|
||||
model.add(LSTM(512, return_sequences=True, input_shape=(maxlen, len(chars))))
|
||||
model.add(Dropout(0.2))
|
||||
model.add(LSTM(512, 512, return_sequences=False))
|
||||
model.add(LSTM(512, return_sequences=False))
|
||||
model.add(Dropout(0.2))
|
||||
model.add(Dense(512, len(chars)))
|
||||
model.add(Dense(len(chars)))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
# helper function to sample an index from a probability array
|
||||
|
||||
def sample(a, temperature=1.0):
|
||||
a = np.log(a)/temperature
|
||||
a = np.exp(a)/np.sum(np.exp(a))
|
||||
return np.argmax(np.random.multinomial(1,a,1))
|
||||
# helper function to sample an index from a probability array
|
||||
a = np.log(a) / temperature
|
||||
a = np.exp(a) / np.sum(np.exp(a))
|
||||
return np.argmax(np.random.multinomial(1, a, 1))
|
||||
|
||||
# train the model, output generated text after each iteration
|
||||
for iteration in range(1, 60):
|
||||
@@ -79,7 +81,7 @@ for iteration in range(1, 60):
|
||||
print('----- diversity:', diversity)
|
||||
|
||||
generated = ''
|
||||
sentence = text[start_index : start_index + maxlen]
|
||||
sentence = text[start_index: start_index + maxlen]
|
||||
generated += sentence
|
||||
print('----- Generating with seed: "' + sentence + '"')
|
||||
sys.stdout.write(generated)
|
||||
|
||||
+18
-8
@@ -22,11 +22,20 @@ batch_size = 128
|
||||
nb_classes = 10
|
||||
nb_epoch = 12
|
||||
|
||||
# input image dimensions
|
||||
img_rows, img_cols = 28, 28
|
||||
# number of convolutional filters to use
|
||||
nb_filters = 32
|
||||
# size of pooling area for max pooling
|
||||
nb_pool = 2
|
||||
# convolution kernel size
|
||||
nb_conv = 3
|
||||
|
||||
# the data, shuffled and split between tran and test sets
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
|
||||
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)
|
||||
X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
|
||||
X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
|
||||
X_train = X_train.astype("float32")
|
||||
X_test = X_test.astype("float32")
|
||||
X_train /= 255
|
||||
@@ -41,19 +50,20 @@ Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
model = Sequential()
|
||||
|
||||
model.add(Convolution2D(32, 1, 3, 3, border_mode='full'))
|
||||
model.add(Convolution2D(nb_filters, nb_conv, nb_conv,
|
||||
border_mode='full',
|
||||
input_shape=(1, img_rows, img_cols)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Convolution2D(32, 32, 3, 3))
|
||||
model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
model.add(Flatten())
|
||||
model.add(Dense(32*196, 128))
|
||||
model.add(Dense(128))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dropout(0.5))
|
||||
|
||||
model.add(Dense(128, nb_classes))
|
||||
model.add(Dense(nb_classes))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adadelta')
|
||||
|
||||
@@ -55,11 +55,12 @@ Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
print('Evaluate IRNN...')
|
||||
model = Sequential()
|
||||
model.add(SimpleRNN(input_dim=1, output_dim=hidden_units,
|
||||
model.add(SimpleRNN(output_dim=hidden_units,
|
||||
init=lambda shape: normal(shape, scale=0.001),
|
||||
inner_init=lambda shape: identity(shape, scale=1.0),
|
||||
activation='relu', truncate_gradient=BPTT_truncate))
|
||||
model.add(Dense(hidden_units, nb_classes))
|
||||
activation='relu', truncate_gradient=BPTT_truncate,
|
||||
input_shape=(None, 1)))
|
||||
model.add(Dense(nb_classes))
|
||||
model.add(Activation('softmax'))
|
||||
rmsprop = RMSprop(lr=learning_rate)
|
||||
model.compile(loss='categorical_crossentropy', optimizer=rmsprop)
|
||||
@@ -73,8 +74,8 @@ print('IRNN test accuracy:', scores[1])
|
||||
|
||||
print('Compare to LSTM...')
|
||||
model = Sequential()
|
||||
model.add(LSTM(1, hidden_units))
|
||||
model.add(Dense(hidden_units, nb_classes))
|
||||
model.add(LSTM(hidden_units, input_shape=(None, 1)))
|
||||
model.add(Dense(nb_classes))
|
||||
model.add(Activation('softmax'))
|
||||
rmsprop = RMSprop(lr=learning_rate)
|
||||
model.compile(loss='categorical_crossentropy', optimizer=rmsprop)
|
||||
|
||||
@@ -37,13 +37,13 @@ Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(784, 128))
|
||||
model.add(Dense(128, input_shape=(784,)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dropout(0.2))
|
||||
model.add(Dense(128, 128))
|
||||
model.add(Dense(128))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dropout(0.2))
|
||||
model.add(Dense(128, 10))
|
||||
model.add(Dense(10))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
rms = RMSprop()
|
||||
|
||||
@@ -45,10 +45,10 @@ print('Y_test shape:', Y_test.shape)
|
||||
|
||||
print("Building model...")
|
||||
model = Sequential()
|
||||
model.add(Dense(max_words, 512))
|
||||
model.add(Dense(512, input_shape=(max_words,)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(512, nb_classes))
|
||||
model.add(Dense(nb_classes))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
|
||||
@@ -32,7 +32,7 @@ from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import theano
|
||||
import six.moves.cPickle
|
||||
from six.moves import cPickle
|
||||
import os, re, json
|
||||
|
||||
from keras.preprocessing import sequence, text
|
||||
@@ -90,7 +90,7 @@ def text_generator(path=data_path):
|
||||
# model management
|
||||
if load_tokenizer:
|
||||
print('Load tokenizer...')
|
||||
tokenizer = six.moves.cPickle.load(open(os.path.join(save_dir, tokenizer_fname), 'rb'))
|
||||
tokenizer = cPickle.load(open(os.path.join(save_dir, tokenizer_fname), 'rb'))
|
||||
else:
|
||||
print("Fit tokenizer...")
|
||||
tokenizer = text.Tokenizer(nb_words=max_features)
|
||||
@@ -99,13 +99,13 @@ else:
|
||||
print("Save tokenizer...")
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
six.moves.cPickle.dump(tokenizer, open(os.path.join(save_dir, tokenizer_fname), "wb"))
|
||||
cPickle.dump(tokenizer, open(os.path.join(save_dir, tokenizer_fname), "wb"))
|
||||
|
||||
# training process
|
||||
if train_model:
|
||||
if load_model:
|
||||
print('Load model...')
|
||||
model = six.moves.cPickle.load(open(os.path.join(save_dir, model_load_fname), 'rb'))
|
||||
model = cPickle.load(open(os.path.join(save_dir, model_load_fname), 'rb'))
|
||||
else:
|
||||
print('Build model...')
|
||||
model = Sequential()
|
||||
@@ -129,7 +129,7 @@ if train_model:
|
||||
if couples:
|
||||
# one gradient update per sentence (one sentence = a few 1000s of word couples)
|
||||
X = np.array(couples, dtype="int32")
|
||||
loss = model.train(X, labels)
|
||||
loss = model.train_on_batch(X, labels)
|
||||
losses.append(loss)
|
||||
if len(losses) % 100 == 0:
|
||||
progbar.update(i, values=[("loss", np.mean(losses))])
|
||||
@@ -142,7 +142,7 @@ if train_model:
|
||||
print("Saving model...")
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
six.moves.cPickle.dump(model, open(os.path.join(save_dir, model_save_fname), "wb"))
|
||||
cPickle.dump(model, open(os.path.join(save_dir, model_save_fname), "wb"))
|
||||
|
||||
|
||||
print("It's test time!")
|
||||
@@ -158,7 +158,6 @@ norm_weights = np_utils.normalize(weights)
|
||||
|
||||
word_index = tokenizer.word_index
|
||||
reverse_word_index = dict([(v, k) for k, v in list(word_index.items())])
|
||||
word_index = tokenizer.word_index
|
||||
|
||||
|
||||
def embed_word(w):
|
||||
|
||||
@@ -17,7 +17,7 @@ def softplus(x):
|
||||
|
||||
|
||||
def relu(x):
|
||||
return (x + abs(x)) / 2.0
|
||||
return T.nnet.relu(x)
|
||||
|
||||
|
||||
def tanh(x):
|
||||
|
||||
@@ -99,10 +99,11 @@ class Callback(object):
|
||||
class BaseLogger(Callback):
|
||||
def on_train_begin(self, logs={}):
|
||||
self.verbose = self.params['verbose']
|
||||
self.nb_epoch = self.params['nb_epoch']
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
if self.verbose:
|
||||
print('Epoch %d' % epoch)
|
||||
print('Epoch %d/%d' % (epoch + 1, self.nb_epoch))
|
||||
self.progbar = Progbar(target=self.params['nb_sample'],
|
||||
verbose=self.verbose)
|
||||
self.seen = 0
|
||||
@@ -182,6 +183,7 @@ class ModelCheckpoint(Callback):
|
||||
self.best = np.Inf
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
filepath = self.filepath.format(epoch=epoch, **logs)
|
||||
if self.save_best_only:
|
||||
current = logs.get(self.monitor)
|
||||
if current is None:
|
||||
@@ -190,16 +192,16 @@ class ModelCheckpoint(Callback):
|
||||
if current < self.best:
|
||||
if self.verbose > 0:
|
||||
print("Epoch %05d: %s improved from %0.5f to %0.5f, saving model to %s"
|
||||
% (epoch, self.monitor, self.best, current, self.filepath))
|
||||
% (epoch, self.monitor, self.best, current, filepath))
|
||||
self.best = current
|
||||
self.model.save_weights(self.filepath, overwrite=True)
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
else:
|
||||
if self.verbose > 0:
|
||||
print("Epoch %05d: %s did not improve" % (epoch, self.monitor))
|
||||
else:
|
||||
if self.verbose > 0:
|
||||
print("Epoch %05d: saving model to %s" % (epoch, self.filepath))
|
||||
self.model.save_weights(self.filepath, overwrite=True)
|
||||
print("Epoch %05d: saving model to %s" % (epoch, filepath))
|
||||
self.model.save_weights(filepath, overwrite=True)
|
||||
|
||||
|
||||
class EarlyStopping(Callback):
|
||||
@@ -271,4 +273,4 @@ class LearningRateScheduler(Callback):
|
||||
self.schedule = schedule
|
||||
|
||||
def on_epoch_begin(self, epoch, logs={}):
|
||||
model.lr.set_value(self.schedule(epoch))
|
||||
self.model.optimizer.lr.set_value(self.schedule(epoch))
|
||||
|
||||
@@ -29,7 +29,8 @@ class MaxNorm(Constraint):
|
||||
|
||||
class NonNeg(Constraint):
|
||||
def __call__(self, p):
|
||||
p *= T.ge(p, 0)
|
||||
p = theano.shared(p)
|
||||
p *= T.ge(p, 0.)
|
||||
return p
|
||||
|
||||
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
import sys
|
||||
import six.moves.cPickle
|
||||
from six.moves import cPickle
|
||||
from six.moves import range
|
||||
|
||||
def load_batch(fpath, label_key='labels'):
|
||||
f = open(fpath, 'rb')
|
||||
if sys.version_info < (3,):
|
||||
d = six.moves.cPickle.load(f)
|
||||
d = cPickle.load(f)
|
||||
else:
|
||||
d = six.moves.cPickle.load(f, encoding="bytes")
|
||||
d = cPickle.load(f, encoding="bytes")
|
||||
# decode utf8
|
||||
for k, v in d.items():
|
||||
del(d[k])
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from __future__ import absolute_import
|
||||
import six.moves.cPickle
|
||||
from six.moves import cPickle
|
||||
import gzip
|
||||
from .data_utils import get_file
|
||||
import random
|
||||
@@ -17,7 +17,7 @@ def load_data(path="imdb.pkl", nb_words=None, skip_top=0, maxlen=None, test_spli
|
||||
else:
|
||||
f = open(path, 'rb')
|
||||
|
||||
X, labels = six.moves.cPickle.load(f)
|
||||
X, labels = cPickle.load(f)
|
||||
f.close()
|
||||
|
||||
np.random.seed(seed)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import gzip
|
||||
from .data_utils import get_file
|
||||
import six.moves.cPickle
|
||||
from six.moves import cPickle
|
||||
import sys
|
||||
|
||||
|
||||
@@ -14,9 +14,9 @@ def load_data(path="mnist.pkl.gz"):
|
||||
f = open(path, 'rb')
|
||||
|
||||
if sys.version_info < (3,):
|
||||
data = six.moves.cPickle.load(f)
|
||||
data = cPickle.load(f)
|
||||
else:
|
||||
data = six.moves.cPickle.load(f, encoding="bytes")
|
||||
data = cPickle.load(f, encoding="bytes")
|
||||
|
||||
f.close()
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ from .data_utils import get_file
|
||||
import string
|
||||
import random
|
||||
import os
|
||||
import six.moves.cPickle
|
||||
from six.moves import cPickle
|
||||
from six.moves import zip
|
||||
import numpy as np
|
||||
|
||||
@@ -78,8 +78,8 @@ def make_reuters_dataset(path=os.path.join('datasets', 'temp', 'reuters21578'),
|
||||
dataset = (X, labels)
|
||||
print('-')
|
||||
print('Saving...')
|
||||
six.moves.cPickle.dump(dataset, open(os.path.join('datasets', 'data', 'reuters.pkl'), 'w'))
|
||||
six.moves.cPickle.dump(tokenizer.word_index, open(os.path.join('datasets', 'data', 'reuters_word_index.pkl'), 'w'))
|
||||
cPickle.dump(dataset, open(os.path.join('datasets', 'data', 'reuters.pkl'), 'w'))
|
||||
cPickle.dump(tokenizer.word_index, open(os.path.join('datasets', 'data', 'reuters_word_index.pkl'), 'w'))
|
||||
|
||||
|
||||
def load_data(path="reuters.pkl", nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113,
|
||||
@@ -88,7 +88,7 @@ def load_data(path="reuters.pkl", nb_words=None, skip_top=0, maxlen=None, test_s
|
||||
path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/reuters.pkl")
|
||||
f = open(path, 'rb')
|
||||
|
||||
X, labels = six.moves.cPickle.load(f)
|
||||
X, labels = cPickle.load(f)
|
||||
f.close()
|
||||
|
||||
np.random.seed(seed)
|
||||
@@ -140,7 +140,7 @@ def load_data(path="reuters.pkl", nb_words=None, skip_top=0, maxlen=None, test_s
|
||||
def get_word_index(path="reuters_word_index.pkl"):
|
||||
path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl")
|
||||
f = open(path, 'rb')
|
||||
return six.moves.cPickle.load(f)
|
||||
return cPickle.load(f)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -58,7 +58,7 @@ def he_uniform(shape):
|
||||
|
||||
|
||||
def orthogonal(shape, scale=1.1):
|
||||
''' From Lasagne
|
||||
''' From Lasagne. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
|
||||
'''
|
||||
flat_shape = (shape[0], np.prod(shape[1:]))
|
||||
a = np.random.normal(0.0, 1.0, flat_shape)
|
||||
|
||||
@@ -6,17 +6,19 @@ import numpy as np
|
||||
|
||||
|
||||
class LeakyReLU(MaskedLayer):
|
||||
def __init__(self, alpha=0.3):
|
||||
super(LeakyReLU, self).__init__()
|
||||
def __init__(self, alpha=0.3, **kwargs):
|
||||
super(LeakyReLU, self).__init__(**kwargs)
|
||||
self.alpha = alpha
|
||||
|
||||
def get_output(self, train):
|
||||
X = self.get_input(train)
|
||||
return ((X + abs(X)) / 2.0) + self.alpha * ((X - abs(X)) / 2.0)
|
||||
return T.nnet.relu(X, self.alpha)
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"alpha": self.alpha}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"alpha": self.alpha}
|
||||
base_config = super(LeakyReLU, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class PReLU(MaskedLayer):
|
||||
@@ -25,26 +27,31 @@ class PReLU(MaskedLayer):
|
||||
Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification
|
||||
http://arxiv.org/pdf/1502.01852v1.pdf
|
||||
'''
|
||||
def __init__(self, input_shape, init='zero', weights=None):
|
||||
super(PReLU, self).__init__()
|
||||
def __init__(self, init='zero', weights=None, **kwargs):
|
||||
self.init = initializations.get(init)
|
||||
self.initial_weights = weights
|
||||
super(PReLU, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
input_shape = self.input_shape[1:]
|
||||
self.alphas = self.init(input_shape)
|
||||
self.params = [self.alphas]
|
||||
self.input_shape = input_shape
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
def get_output(self, train):
|
||||
X = self.get_input(train)
|
||||
pos = ((X + abs(X)) / 2.0)
|
||||
neg = self.alphas * ((X - abs(X)) / 2.0)
|
||||
pos = T.nnet.relu(X)
|
||||
neg = self.alphas * (X - abs(X)) * 0.5
|
||||
return pos + neg
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_shape": self.input_shape,
|
||||
"init": self.init.__name__}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"init": self.init.__name__}
|
||||
base_config = super(PReLU, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class ParametricSoftplus(MaskedLayer):
|
||||
@@ -55,28 +62,35 @@ class ParametricSoftplus(MaskedLayer):
|
||||
Inferring Nonlinear Neuronal Computation Based on Physiologically Plausible Inputs
|
||||
http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003143
|
||||
'''
|
||||
def __init__(self, input_shape, alpha_init=0.2, beta_init=5.0, weights=None):
|
||||
|
||||
super(ParametricSoftplus, self).__init__()
|
||||
def __init__(self, alpha_init=0.2, beta_init=5.0,
|
||||
weights=None, **kwargs):
|
||||
self.alpha_init = alpha_init
|
||||
self.beta_init = beta_init
|
||||
self.alphas = sharedX(alpha_init * np.ones(input_shape))
|
||||
self.betas = sharedX(beta_init * np.ones(input_shape))
|
||||
self.initial_weights = weights
|
||||
super(ParametricSoftplus, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
input_shape = self.input_shape[1:]
|
||||
self.alphas = sharedX(self.alpha_init * np.ones(input_shape))
|
||||
self.betas = sharedX(self.beta_init * np.ones(input_shape))
|
||||
self.params = [self.alphas, self.betas]
|
||||
self.input_shape = input_shape
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
def get_output(self, train):
|
||||
X = self.get_input(train)
|
||||
return T.nnet.softplus(self.betas * X) * self.alphas
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_shape": self.input_shape,
|
||||
"alpha_init": self.alpha_init,
|
||||
"beta_init": self.beta_init}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"alpha_init": self.alpha_init,
|
||||
"beta_init": self.beta_init}
|
||||
base_config = super(ParametricSoftplus, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class ThresholdedLinear(MaskedLayer):
|
||||
'''
|
||||
@@ -86,19 +100,22 @@ class ThresholdedLinear(MaskedLayer):
|
||||
Zero-Bias Autoencoders and the Benefits of Co-Adapting Features
|
||||
http://arxiv.org/pdf/1402.3337.pdf
|
||||
'''
|
||||
def __init__(self, theta=1.0):
|
||||
super(ThresholdedLinear, self).__init__()
|
||||
def __init__(self, theta=1.0, **kwargs):
|
||||
super(ThresholdedLinear, self).__init__(**kwargs)
|
||||
self.theta = theta
|
||||
|
||||
|
||||
def get_output(self, train):
|
||||
X = self.get_input(train)
|
||||
return T.switch( abs(X) < self.theta, 0, X )
|
||||
return T.switch(abs(X) < self.theta, 0, X)
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"theta": self.theta}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"theta": self.theta}
|
||||
base_config = super(ThresholdedLinear, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
class ThresholdedReLu(MaskedLayer):
|
||||
|
||||
class ThresholdedReLU(MaskedLayer):
|
||||
'''
|
||||
Thresholded Rectified Activation
|
||||
|
||||
@@ -106,14 +123,16 @@ class ThresholdedReLu(MaskedLayer):
|
||||
Zero-Bias Autoencoders and the Benefits of Co-Adapting Features
|
||||
http://arxiv.org/pdf/1402.3337.pdf
|
||||
'''
|
||||
def __init__(self, theta=1.0):
|
||||
super(ThresholdedReLu, self).__init__()
|
||||
def __init__(self, theta=1.0, **kwargs):
|
||||
super(ThresholdedReLU, self).__init__(**kwargs)
|
||||
self.theta = theta
|
||||
|
||||
|
||||
def get_output(self, train):
|
||||
X = self.get_input(train)
|
||||
return T.switch( X > self.theta, X, 0 )
|
||||
return T.switch(X > self.theta, X, 0)
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"theta": self.theta}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"theta": self.theta}
|
||||
base_config = super(ThresholdedReLU, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -37,7 +37,6 @@ class Sequential(Layer):
|
||||
self.layers[-1].set_previous(self.layers[-2])
|
||||
if not hasattr(self.layers[0], 'input'):
|
||||
self.set_input()
|
||||
layer.init_updates()
|
||||
|
||||
params, regularizers, constraints, updates = layer.get_params()
|
||||
self.params += params
|
||||
@@ -45,6 +44,10 @@ class Sequential(Layer):
|
||||
self.constraints += constraints
|
||||
self.updates += updates
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
return self.layers[-1].output_shape
|
||||
|
||||
def get_output(self, train=False):
|
||||
return self.layers[-1].get_output(train)
|
||||
|
||||
@@ -80,6 +83,9 @@ class Sequential(Layer):
|
||||
return {"name": self.__class__.__name__,
|
||||
"layers": [layer.get_config() for layer in self.layers]}
|
||||
|
||||
def count_params(self):
|
||||
return sum([layer.count_params() for layer in self.layers])
|
||||
|
||||
|
||||
class Graph(Layer):
|
||||
'''
|
||||
@@ -145,18 +151,29 @@ class Graph(Layer):
|
||||
def input(self):
|
||||
return self.get_input()
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
if self.nb_output == 1:
|
||||
# return tuple
|
||||
return self.outputs[self.output_order[0]].output_shape
|
||||
else:
|
||||
# return dictionary mapping output names to shape tuples
|
||||
return dict([(k, v.output_shape) for k, v in self.outputs.items()])
|
||||
|
||||
def get_output(self, train=False):
|
||||
if len(self.inputs) == len(self.outputs) == 1:
|
||||
return self.outputs[self.output_order[0]].get_output(train)
|
||||
else:
|
||||
return dict([(k, v.get_output(train)) for k, v in self.outputs.items()])
|
||||
|
||||
def add_input(self, name, ndim=2, dtype='float'):
|
||||
def add_input(self, name, input_shape, dtype='float'):
|
||||
if name in self.namespace:
|
||||
raise Exception('Duplicate node identifier: ' + name)
|
||||
self.namespace.add(name)
|
||||
self.input_order.append(name)
|
||||
layer = Layer() # empty layer
|
||||
layer.set_input_shape(input_shape)
|
||||
ndim = len(input_shape) + 1
|
||||
if dtype == 'float':
|
||||
layer.input = ndim_tensor(ndim)
|
||||
else:
|
||||
@@ -166,9 +183,12 @@ class Graph(Layer):
|
||||
raise Exception('Type "int" can only be used with ndim==2 (Embedding).')
|
||||
layer.input.name = name
|
||||
self.inputs[name] = layer
|
||||
self.input_config.append({'name': name, 'ndim': ndim, 'dtype': dtype})
|
||||
self.input_config.append({'name': name,
|
||||
'input_shape': input_shape,
|
||||
'dtype': dtype})
|
||||
|
||||
def add_node(self, layer, name, input=None, inputs=[], merge_mode='concat', create_output=False):
|
||||
def add_node(self, layer, name, input=None, inputs=[],
|
||||
merge_mode='concat', concat_axis=-1, create_output=False):
|
||||
if hasattr(layer, 'set_name'):
|
||||
layer.set_name(name)
|
||||
if name in self.namespace:
|
||||
@@ -189,7 +209,7 @@ class Graph(Layer):
|
||||
to_merge.append(self.inputs[n])
|
||||
else:
|
||||
raise Exception('Unknown identifier: ' + n)
|
||||
merge = Merge(to_merge, mode=merge_mode)
|
||||
merge = Merge(to_merge, mode=merge_mode, concat_axis=concat_axis)
|
||||
layer.set_previous(merge)
|
||||
|
||||
self.namespace.add(name)
|
||||
@@ -197,8 +217,9 @@ class Graph(Layer):
|
||||
self.node_config.append({'name': name,
|
||||
'input': input,
|
||||
'inputs': inputs,
|
||||
'merge_mode': merge_mode})
|
||||
layer.init_updates()
|
||||
'merge_mode': merge_mode,
|
||||
'concat_axis': concat_axis,
|
||||
'create_output': create_output})
|
||||
params, regularizers, constraints, updates = layer.get_params()
|
||||
self.params += params
|
||||
self.regularizers += regularizers
|
||||
@@ -208,7 +229,8 @@ class Graph(Layer):
|
||||
if create_output:
|
||||
self.add_output(name, input=name)
|
||||
|
||||
def add_output(self, name, input=None, inputs=[], merge_mode='concat'):
|
||||
def add_output(self, name, input=None, inputs=[],
|
||||
merge_mode='concat', concat_axis=-1):
|
||||
if name in self.output_order:
|
||||
raise Exception('Duplicate output identifier: ' + name)
|
||||
if input:
|
||||
@@ -224,14 +246,15 @@ class Graph(Layer):
|
||||
if n not in self.nodes:
|
||||
raise Exception('Unknown identifier: ' + n)
|
||||
to_merge.append(self.nodes[n])
|
||||
merge = Merge(to_merge, mode=merge_mode)
|
||||
merge = Merge(to_merge, mode=merge_mode, concat_axis=concat_axis)
|
||||
self.outputs[name] = merge
|
||||
|
||||
self.output_order.append(name)
|
||||
self.output_config.append({'name': name,
|
||||
'input': input,
|
||||
'inputs': inputs,
|
||||
'merge_mode': merge_mode})
|
||||
'merge_mode': merge_mode,
|
||||
'concat_axis': concat_axis})
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
@@ -241,3 +264,6 @@ class Graph(Layer):
|
||||
"input_order": self.input_order,
|
||||
"output_order": self.output_order,
|
||||
"nodes": dict([(c["name"], self.nodes[c["name"]].get_config()) for c in self.node_config])}
|
||||
|
||||
def count_params(self):
|
||||
return sum([layer.count_params() for layer in self.nodes.values()])
|
||||
|
||||
+356
-146
@@ -3,153 +3,241 @@ from __future__ import absolute_import
|
||||
|
||||
import theano
|
||||
import theano.tensor as T
|
||||
from theano.sandbox.cuda import dnn
|
||||
from theano.tensor.signal import downsample
|
||||
|
||||
from .. import activations, initializations, regularizers, constraints
|
||||
from ..utils.theano_utils import shared_zeros
|
||||
from ..utils.theano_utils import shared_zeros, on_gpu
|
||||
from ..layers.core import Layer
|
||||
|
||||
if on_gpu():
|
||||
from theano.sandbox.cuda import dnn
|
||||
|
||||
|
||||
def conv_output_length(input_length, filter_size, border_mode, stride):
|
||||
if input_length is None:
|
||||
return None
|
||||
assert border_mode in {'same', 'full', 'valid'}
|
||||
if border_mode == 'same':
|
||||
output_length = input_length
|
||||
elif border_mode == 'full':
|
||||
output_length = input_length + filter_size - 1
|
||||
elif border_mode == 'valid':
|
||||
output_length = input_length - filter_size + 1
|
||||
return (output_length + stride - 1) // stride
|
||||
|
||||
|
||||
def pool_output_length(input_length, pool_size, ignore_border, stride):
|
||||
if input_length is None:
|
||||
return None
|
||||
if ignore_border:
|
||||
output_length = input_length - pool_size + 1
|
||||
output_length = (output_length + stride - 1) // stride
|
||||
else:
|
||||
if pool_size == input_length:
|
||||
output_length = min(input_length, stride - stride % 2)
|
||||
if output_length <= 0:
|
||||
output_length = 1
|
||||
elif stride >= pool_size:
|
||||
output_length = (input_length + stride - 1) // stride
|
||||
else:
|
||||
output_length = (input_length - pool_size + stride - 1) // stride
|
||||
if output_length <= 0:
|
||||
output_length = 1
|
||||
else:
|
||||
output_length += 1
|
||||
return output_length
|
||||
|
||||
|
||||
class Convolution1D(Layer):
|
||||
def __init__(self, input_dim, nb_filter, filter_length,
|
||||
input_ndim = 3
|
||||
|
||||
def __init__(self, nb_filter, filter_length,
|
||||
init='uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample_length=1,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None):
|
||||
W_constraint=None, b_constraint=None, input_dim=None, input_length=None, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'full', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution1D:', border_mode)
|
||||
|
||||
super(Convolution1D, self).__init__()
|
||||
self.nb_filter = nb_filter
|
||||
self.input_dim = input_dim
|
||||
self.filter_length = filter_length
|
||||
self.subsample_length = subsample_length
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
self.subsample = (1, subsample_length)
|
||||
self.border_mode = border_mode
|
||||
self.subsample_length = subsample_length
|
||||
|
||||
self.input = T.tensor3()
|
||||
self.W_shape = (nb_filter, input_dim, filter_length, 1)
|
||||
self.W = self.init(self.W_shape)
|
||||
self.b = shared_zeros((nb_filter,))
|
||||
|
||||
self.params = [self.W, self.b]
|
||||
|
||||
self.regularizers = []
|
||||
self.subsample = (subsample_length, 1)
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.activity_regularizer = regularizers.get(activity_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
self.constraints = [self.W_constraint, self.b_constraint]
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
self.initial_weights = weights
|
||||
|
||||
def get_output(self, train):
|
||||
self.input_dim = input_dim
|
||||
self.input_length = input_length
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_length, self.input_dim)
|
||||
super(Convolution1D, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
input_dim = self.input_shape[2]
|
||||
self.input = T.tensor3()
|
||||
self.W_shape = (self.nb_filter, input_dim, self.filter_length, 1)
|
||||
self.W = self.init(self.W_shape)
|
||||
self.b = shared_zeros((self.nb_filter,))
|
||||
self.params = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
length = conv_output_length(self.input_shape[1], self.filter_length, self.border_mode, self.subsample[0])
|
||||
return (self.input_shape[0], length, self.nb_filter)
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
X = T.reshape(X, (X.shape[0], X.shape[1], X.shape[2], 1)).dimshuffle(0, 2, 1, 3)
|
||||
|
||||
border_mode = self.border_mode
|
||||
if border_mode == 'same':
|
||||
border_mode = 'full'
|
||||
if on_gpu() and dnn.dnn_available():
|
||||
if border_mode == 'same':
|
||||
assert(self.subsample_length == 1)
|
||||
pad_x = (self.filter_length - self.subsample_length) // 2
|
||||
conv_out = dnn.dnn_conv(img=X,
|
||||
kerns=self.W,
|
||||
border_mode=(pad_x, 0))
|
||||
else:
|
||||
conv_out = dnn.dnn_conv(img=X,
|
||||
kerns=self.W,
|
||||
border_mode=border_mode,
|
||||
subsample=self.subsample)
|
||||
else:
|
||||
if border_mode == 'same':
|
||||
assert(self.subsample_length == 1)
|
||||
border_mode = 'full'
|
||||
|
||||
conv_out = T.nnet.conv.conv2d(X, self.W, border_mode=border_mode, subsample=self.subsample)
|
||||
if self.border_mode == 'same':
|
||||
shift_x = (self.filter_length - 1) // 2
|
||||
conv_out = conv_out[:, :, shift_x:X.shape[2] + shift_x, :]
|
||||
input_shape = self.input_shape
|
||||
image_shape = (input_shape[0], input_shape[2], input_shape[1], 1)
|
||||
conv_out = T.nnet.conv.conv2d(X, self.W,
|
||||
border_mode=border_mode,
|
||||
subsample=self.subsample,
|
||||
image_shape=image_shape,
|
||||
filter_shape=self.W_shape)
|
||||
if self.border_mode == 'same':
|
||||
shift_x = (self.filter_length - 1) // 2
|
||||
conv_out = conv_out[:, :, shift_x:X.shape[2] + shift_x, :]
|
||||
|
||||
output = self.activation(conv_out + self.b.dimshuffle('x', 0, 'x', 'x'))
|
||||
output = T.reshape(output, (output.shape[0], output.shape[1], output.shape[2])).dimshuffle(0, 2, 1)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"nb_filter": self.nb_filter,
|
||||
"filter_length": self.filter_length,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"border_mode": self.border_mode,
|
||||
"subsample_length": self.subsample_length,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"nb_filter": self.nb_filter,
|
||||
"filter_length": self.filter_length,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"border_mode": self.border_mode,
|
||||
"subsample_length": self.subsample_length,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None,
|
||||
"input_dim": self.input_dim,
|
||||
"input_length": self.input_length}
|
||||
base_config = super(Convolution1D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Convolution2D(Layer):
|
||||
def __init__(self, nb_filter, stack_size, nb_row, nb_col,
|
||||
input_ndim = 4
|
||||
|
||||
def __init__(self, nb_filter, nb_row, nb_col,
|
||||
init='glorot_uniform', activation='linear', weights=None,
|
||||
border_mode='valid', subsample=(1, 1),
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None):
|
||||
W_constraint=None, b_constraint=None, **kwargs):
|
||||
|
||||
if border_mode not in {'valid', 'full', 'same'}:
|
||||
raise Exception('Invalid border mode for Convolution2D:', border_mode)
|
||||
|
||||
super(Convolution2D, self).__init__()
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
self.subsample = subsample
|
||||
self.border_mode = border_mode
|
||||
self.nb_filter = nb_filter
|
||||
self.stack_size = stack_size
|
||||
|
||||
self.nb_row = nb_row
|
||||
self.nb_col = nb_col
|
||||
|
||||
self.input = T.tensor4()
|
||||
self.W_shape = (nb_filter, stack_size, nb_row, nb_col)
|
||||
self.W = self.init(self.W_shape)
|
||||
self.b = shared_zeros((nb_filter,))
|
||||
|
||||
self.params = [self.W, self.b]
|
||||
|
||||
self.regularizers = []
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
self.border_mode = border_mode
|
||||
self.subsample = tuple(subsample)
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.activity_regularizer = regularizers.get(activity_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
self.constraints = [self.W_constraint, self.b_constraint]
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
self.initial_weights = weights
|
||||
super(Convolution2D, self).__init__(**kwargs)
|
||||
|
||||
def get_output(self, train):
|
||||
def build(self):
|
||||
stack_size = self.input_shape[1]
|
||||
self.input = T.tensor4()
|
||||
self.W_shape = (self.nb_filter, stack_size, self.nb_row, self.nb_col)
|
||||
self.W = self.init(self.W_shape)
|
||||
self.b = shared_zeros((self.nb_filter,))
|
||||
self.params = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shape = self.input_shape
|
||||
rows = input_shape[2]
|
||||
cols = input_shape[3]
|
||||
rows = conv_output_length(rows, self.nb_row, self.border_mode, self.subsample[0])
|
||||
cols = conv_output_length(cols, self.nb_col, self.border_mode, self.subsample[1])
|
||||
return (input_shape[0], self.nb_filter, rows, cols)
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
border_mode = self.border_mode
|
||||
if dnn.dnn_available() and theano.config.device[:3] == 'gpu':
|
||||
if on_gpu() and dnn.dnn_available():
|
||||
if border_mode == 'same':
|
||||
assert(self.subsample == (1, 1))
|
||||
pad_x = (self.nb_row - self.subsample[0]) // 2
|
||||
@@ -165,10 +253,13 @@ class Convolution2D(Layer):
|
||||
else:
|
||||
if border_mode == 'same':
|
||||
border_mode = 'full'
|
||||
assert(self.subsample == (1, 1))
|
||||
|
||||
conv_out = T.nnet.conv.conv2d(X, self.W,
|
||||
border_mode=border_mode,
|
||||
subsample=self.subsample)
|
||||
subsample=self.subsample,
|
||||
image_shape=self.input_shape,
|
||||
filter_shape=self.W_shape)
|
||||
if self.border_mode == 'same':
|
||||
shift_x = (self.nb_row - 1) // 2
|
||||
shift_y = (self.nb_col - 1) // 2
|
||||
@@ -177,118 +268,237 @@ class Convolution2D(Layer):
|
||||
return self.activation(conv_out + self.b.dimshuffle('x', 0, 'x', 'x'))
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"nb_filter": self.nb_filter,
|
||||
"stack_size": self.stack_size,
|
||||
"nb_row": self.nb_row,
|
||||
"nb_col": self.nb_col,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"border_mode": self.border_mode,
|
||||
"subsample": self.subsample,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"nb_filter": self.nb_filter,
|
||||
"nb_row": self.nb_row,
|
||||
"nb_col": self.nb_col,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"border_mode": self.border_mode,
|
||||
"subsample": self.subsample,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None}
|
||||
base_config = super(Convolution2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class MaxPooling1D(Layer):
|
||||
def __init__(self, pool_length=2, stride=None, ignore_border=True):
|
||||
super(MaxPooling1D, self).__init__()
|
||||
input_ndim = 3
|
||||
|
||||
def __init__(self, pool_length=2, stride=None, ignore_border=True, **kwargs):
|
||||
super(MaxPooling1D, self).__init__(**kwargs)
|
||||
if stride is None:
|
||||
stride = pool_length
|
||||
self.pool_length = pool_length
|
||||
self.stride = stride
|
||||
if self.stride:
|
||||
self.st = (self.stride, 1)
|
||||
else:
|
||||
self.st = None
|
||||
self.st = (self.stride, 1)
|
||||
|
||||
self.input = T.tensor3()
|
||||
self.poolsize = (pool_length, 1)
|
||||
self.pool_size = (pool_length, 1)
|
||||
self.ignore_border = ignore_border
|
||||
|
||||
def get_output(self, train):
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shape = self.input_shape
|
||||
length = pool_output_length(input_shape[1], self.pool_length, self.ignore_border, self.stride)
|
||||
return (input_shape[0], length, input_shape[2])
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
X = T.reshape(X, (X.shape[0], X.shape[1], X.shape[2], 1)).dimshuffle(0, 2, 1, 3)
|
||||
output = T.signal.downsample.max_pool_2d(X, ds=self.poolsize, st=self.st, ignore_border=self.ignore_border)
|
||||
output = downsample.max_pool_2d(X, ds=self.pool_size, st=self.st, ignore_border=self.ignore_border)
|
||||
output = output.dimshuffle(0, 2, 1, 3)
|
||||
return T.reshape(output, (output.shape[0], output.shape[1], output.shape[2]))
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"stride": self.stride,
|
||||
"pool_length": self.pool_length,
|
||||
"ignore_border": self.ignore_border}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"stride": self.stride,
|
||||
"pool_length": self.pool_length,
|
||||
"ignore_border": self.ignore_border}
|
||||
base_config = super(MaxPooling1D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class MaxPooling2D(Layer):
|
||||
def __init__(self, poolsize=(2, 2), stride=None, ignore_border=True):
|
||||
super(MaxPooling2D, self).__init__()
|
||||
input_ndim = 4
|
||||
|
||||
def __init__(self, pool_size=(2, 2), stride=None, ignore_border=True, **kwargs):
|
||||
super(MaxPooling2D, self).__init__(**kwargs)
|
||||
self.input = T.tensor4()
|
||||
self.poolsize = poolsize
|
||||
self.stride = stride
|
||||
self.pool_size = tuple(pool_size)
|
||||
if stride is None:
|
||||
stride = self.pool_size
|
||||
self.stride = tuple(stride)
|
||||
self.ignore_border = ignore_border
|
||||
|
||||
def get_output(self, train):
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shape = self.input_shape
|
||||
rows = pool_output_length(input_shape[2], self.pool_size[0], self.ignore_border, self.stride[0])
|
||||
cols = pool_output_length(input_shape[3], self.pool_size[1], self.ignore_border, self.stride[1])
|
||||
return (input_shape[0], input_shape[1], rows, cols)
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
output = T.signal.downsample.max_pool_2d(X, ds=self.poolsize, st=self.stride, ignore_border=self.ignore_border)
|
||||
output = downsample.max_pool_2d(X, ds=self.pool_size, st=self.stride, ignore_border=self.ignore_border)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"poolsize": self.poolsize,
|
||||
"ignore_border": self.ignore_border,
|
||||
"stride": self.stride}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"pool_size": self.pool_size,
|
||||
"ignore_border": self.ignore_border,
|
||||
"stride": self.stride}
|
||||
base_config = super(MaxPooling2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class UpSample1D(Layer):
|
||||
def __init__(self, length=2):
|
||||
super(UpSample1D, self).__init__()
|
||||
input_ndim = 3
|
||||
|
||||
def __init__(self, length=2, **kwargs):
|
||||
super(UpSample1D, self).__init__(**kwargs)
|
||||
self.length = length
|
||||
self.input = T.tensor3()
|
||||
|
||||
def get_output(self, train):
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shape = self.input_shape
|
||||
return (input_shape[0], self.length * input_shape[1], input_shape[2])
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
output = theano.tensor.extra_ops.repeat(X, self.length, axis=1)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"length": self.length}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"length": self.length}
|
||||
base_config = super(UpSample1D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class UpSample2D(Layer):
|
||||
def __init__(self, size=(2, 2)):
|
||||
super(UpSample2D, self).__init__()
|
||||
self.input = T.tensor4()
|
||||
self.size = size
|
||||
input_ndim = 4
|
||||
|
||||
def get_output(self, train):
|
||||
def __init__(self, size=(2, 2), **kwargs):
|
||||
super(UpSample2D, self).__init__(**kwargs)
|
||||
self.input = T.tensor4()
|
||||
self.size = tuple(size)
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shape = self.input_shape
|
||||
return (input_shape[0], input_shape[1], self.size[0] * input_shape[2], self.size[1] * input_shape[3])
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
Y = theano.tensor.extra_ops.repeat(X, self.size[0], axis=2)
|
||||
output = theano.tensor.extra_ops.repeat(Y, self.size[1], axis=3)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"size": self.size}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"size": self.size}
|
||||
base_config = super(UpSample2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class ZeroPadding1D(Layer):
|
||||
"""Zero-padding layer for 1D input (e.g. temporal sequence).
|
||||
|
||||
Input shape
|
||||
-----------
|
||||
3D tensor with shape (samples, axis_to_pad, features)
|
||||
|
||||
Output shape
|
||||
------------
|
||||
3D tensor with shape (samples, padded_axis, features)
|
||||
|
||||
Arguments
|
||||
---------
|
||||
padding: int
|
||||
How many zeros to add at the beginning and end of
|
||||
the padding dimension (axis 1).
|
||||
"""
|
||||
input_ndim = 3
|
||||
|
||||
def __init__(self, padding=1, **kwargs):
|
||||
super(ZeroPadding1D, self).__init__(**kwargs)
|
||||
self.padding = padding
|
||||
self.input = T.tensor3()
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shape = self.input_shape
|
||||
return (input_shape[0], input_shape[1] + self.padding * 2, input_shape[2])
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
input_shape = X.shape
|
||||
output_shape = (input_shape[0],
|
||||
input_shape[1] + 2 * self.padding,
|
||||
input_shape[2])
|
||||
output = T.zeros(output_shape)
|
||||
return T.set_subtensor(output[:, self.padding:X.shape[1] + self.padding, :], X)
|
||||
|
||||
def get_config(self):
|
||||
config = {"name": self.__class__.__name__,
|
||||
"padding": self.padding}
|
||||
base_config = super(ZeroPadding1D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class ZeroPadding2D(Layer):
|
||||
def __init__(self, pad=(1, 1)):
|
||||
super(ZeroPadding2D, self).__init__()
|
||||
self.pad = pad
|
||||
"""Zero-padding layer for 1D input (e.g. temporal sequence).
|
||||
|
||||
Input shape
|
||||
-----------
|
||||
4D tensor with shape (samples, depth, first_axis_to_pad, second_axis_to_pad)
|
||||
|
||||
Output shape
|
||||
------------
|
||||
4D tensor with shape (samples, depth, first_padded_axis, second_padded_axis)
|
||||
|
||||
Arguments
|
||||
---------
|
||||
padding: tuple of int (length 2)
|
||||
How many zeros to add at the beginning and end of
|
||||
the 2 padding dimensions (axis 3 and 4).
|
||||
"""
|
||||
input_ndim = 4
|
||||
|
||||
def __init__(self, padding=(1, 1), **kwargs):
|
||||
super(ZeroPadding2D, self).__init__(**kwargs)
|
||||
self.padding = tuple(padding)
|
||||
self.input = T.tensor4()
|
||||
|
||||
def get_output(self, train):
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shape = self.input_shape
|
||||
return (input_shape[0],
|
||||
input_shape[1],
|
||||
input_shape[2] + 2 * self.padding[0],
|
||||
input_shape[3] + 2 * self.padding[1])
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
pad = self.pad
|
||||
in_shape = X.shape
|
||||
out_shape = (in_shape[0], in_shape[1], in_shape[2] + 2 * pad[0], in_shape[3] + 2 * pad[1])
|
||||
out = T.zeros(out_shape)
|
||||
indices = (slice(None), slice(None), slice(pad[0], in_shape[2] + pad[0]), slice(pad[1], in_shape[3] + pad[1]))
|
||||
return T.set_subtensor(out[indices], X)
|
||||
input_shape = X.shape
|
||||
output_shape = (input_shape[0],
|
||||
input_shape[1],
|
||||
input_shape[2] + 2 * self.padding[0],
|
||||
input_shape[3] + 2 * self.padding[1])
|
||||
output = T.zeros(output_shape)
|
||||
indices = (slice(None),
|
||||
slice(None),
|
||||
slice(self.padding[0], input_shape[2] + self.padding[0]),
|
||||
slice(self.padding[1], input_shape[3] + self.padding[1]))
|
||||
return T.set_subtensor(output[indices], X)
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"pad": self.pad}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"padding": self.padding}
|
||||
base_config = super(ZeroPadding2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
+407
-181
@@ -5,8 +5,11 @@ import theano
|
||||
import theano.tensor as T
|
||||
import numpy as np
|
||||
|
||||
from collections import OrderedDict
|
||||
import copy
|
||||
|
||||
from .. import activations, initializations, regularizers, constraints
|
||||
from ..utils.theano_utils import shared_zeros, floatX
|
||||
from ..utils.theano_utils import shared_zeros, floatX, ndim_tensor
|
||||
from ..utils.generic_utils import make_tuple
|
||||
from ..regularizers import ActivityRegularizer, Regularizer
|
||||
|
||||
@@ -15,17 +18,32 @@ from six.moves import zip
|
||||
|
||||
|
||||
class Layer(object):
|
||||
def __init__(self):
|
||||
self.params = []
|
||||
|
||||
def init_updates(self):
|
||||
self.updates = []
|
||||
def __init__(self, **kwargs):
|
||||
for kwarg in kwargs:
|
||||
assert kwarg in {'input_shape'}, "Keyword argument not understood: " + kwarg
|
||||
if 'input_shape' in kwargs:
|
||||
self.set_input_shape(kwargs['input_shape'])
|
||||
if not hasattr(self, 'params'):
|
||||
self.params = []
|
||||
|
||||
def set_previous(self, layer, connection_map={}):
|
||||
assert self.nb_input == layer.nb_output == 1, "Cannot connect layers: input count and output count should be 1."
|
||||
if not self.supports_masked_input() and layer.get_output_mask() is not None:
|
||||
raise Exception("Cannot connect non-masking layer to layer with masked output")
|
||||
if hasattr(self, 'input_ndim'):
|
||||
assert self.input_ndim == len(layer.output_shape), "Incompatible shapes: layer expected input with ndim=" +\
|
||||
str(self.input_ndim) + " but previous layer has output_shape " + str(layer.output_shape)
|
||||
if layer.get_output_mask() is not None:
|
||||
assert self.supports_masked_input(), "Cannot connect non-masking layer to layer with masked output"
|
||||
self.previous = layer
|
||||
self.build()
|
||||
|
||||
def build(self):
|
||||
'''Instantiation of layer weights.
|
||||
|
||||
Called after `set_previous`, or after `set_input_shape`,
|
||||
once the layer has a defined input shape.
|
||||
Must be implemented on all layers that have weights.
|
||||
'''
|
||||
pass
|
||||
|
||||
@property
|
||||
def nb_input(self):
|
||||
@@ -35,14 +53,45 @@ class Layer(object):
|
||||
def nb_output(self):
|
||||
return 1
|
||||
|
||||
@property
|
||||
def input_shape(self):
|
||||
# if layer is not connected (e.g. input layer),
|
||||
# input shape can be set manually via _input_shape attribute.
|
||||
if hasattr(self, 'previous'):
|
||||
return self.previous.output_shape
|
||||
elif hasattr(self, '_input_shape'):
|
||||
return self._input_shape
|
||||
else:
|
||||
raise Exception('Layer is not connected. Did you forget to set "input_shape"?')
|
||||
|
||||
def set_input_shape(self, input_shape):
|
||||
if type(input_shape) not in [tuple, list]:
|
||||
raise Exception('Invalid input shape - input_shape should be a tuple of int.')
|
||||
input_shape = (None,) + tuple(input_shape)
|
||||
if hasattr(self, 'input_ndim') and self.input_ndim:
|
||||
if self.input_ndim != len(input_shape):
|
||||
raise Exception('Invalid input shape - Layer expects input ndim=' +
|
||||
str(self.input_ndim) + ', was provided with input shape ' + str(input_shape))
|
||||
self._input_shape = input_shape
|
||||
self.input = ndim_tensor(len(self._input_shape))
|
||||
self.build()
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
# default assumption: tensor shape unchanged.
|
||||
return self.input_shape
|
||||
|
||||
def get_output(self, train=False):
|
||||
return self.get_input(train)
|
||||
|
||||
def get_input(self, train=False):
|
||||
if hasattr(self, 'previous'):
|
||||
return self.previous.get_output(train=train)
|
||||
else:
|
||||
elif hasattr(self, 'input'):
|
||||
return self.input
|
||||
else:
|
||||
raise Exception('Layer is not connected\
|
||||
and is not an input layer.')
|
||||
|
||||
def supports_masked_input(self):
|
||||
''' Whether or not this layer respects the output mask of its previous layer in its calculations. If you try
|
||||
@@ -67,6 +116,8 @@ class Layer(object):
|
||||
return None
|
||||
|
||||
def set_weights(self, weights):
|
||||
assert len(self.params) == len(weights), 'Provided weight array does not match layer weights (' + \
|
||||
str(len(self.params)) + ' layer params vs. ' + str(len(weights)) + ' provided weights)'
|
||||
for p, w in zip(self.params, weights):
|
||||
if p.eval().shape != w.shape:
|
||||
raise Exception("Layer shape %s not compatible with weight shape %s." % (p.eval().shape, w.shape))
|
||||
@@ -79,7 +130,10 @@ class Layer(object):
|
||||
return weights
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__}
|
||||
config = {"name": self.__class__.__name__}
|
||||
if hasattr(self, '_input_shape'):
|
||||
config['input_shape'] = self._input_shape[1:]
|
||||
return config
|
||||
|
||||
def get_params(self):
|
||||
consts = []
|
||||
@@ -110,6 +164,9 @@ class Layer(object):
|
||||
for i in range(len(self.params)):
|
||||
self.params[i].name = '%s_p%d' % (name, i)
|
||||
|
||||
def count_params(self):
|
||||
return sum([np.prod(p.shape.eval()) for p in self.params])
|
||||
|
||||
|
||||
class MaskedLayer(Layer):
|
||||
'''
|
||||
@@ -142,8 +199,8 @@ class Masking(MaskedLayer):
|
||||
otherwise it is 1.
|
||||
|
||||
"""
|
||||
def __init__(self, mask_value=0.):
|
||||
super(Masking, self).__init__()
|
||||
def __init__(self, mask_value=0., **kwargs):
|
||||
super(Masking, self).__init__(**kwargs)
|
||||
self.mask_value = mask_value
|
||||
self.input = T.tensor3()
|
||||
|
||||
@@ -156,18 +213,64 @@ class Masking(MaskedLayer):
|
||||
return X * T.shape_padright(T.any((1. - T.eq(X, self.mask_value)), axis=-1))
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"mask_value": self.mask_value}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"mask_value": self.mask_value}
|
||||
base_config = super(Masking, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class TimeDistributedMerge(Layer):
|
||||
'''Sum/multiply/average over the outputs of a TimeDistributed layer.
|
||||
|
||||
mode: {'sum', 'mul', 'ave'}
|
||||
Tensor input dimensions: (nb_sample, time, features)
|
||||
Tensor output dimensions: (nb_sample, features)
|
||||
'''
|
||||
input_ndim = 3
|
||||
|
||||
def __init__(self, mode='sum', **kwargs):
|
||||
super(TimeDistributedMerge, self).__init__(**kwargs)
|
||||
self.mode = mode
|
||||
self.params = []
|
||||
self.regularizers = []
|
||||
self.constraints = []
|
||||
self.updates = []
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
return (None, self.input_shape[2])
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
if self.mode == 'sum' or self.mode == 'ave':
|
||||
s = theano.tensor.sum(X, axis=1)
|
||||
if self.mode == 'ave':
|
||||
s /= X.shape[1]
|
||||
return s
|
||||
elif self.mode == 'mul':
|
||||
s = theano.tensor.mul(X, axis=1)
|
||||
return s
|
||||
else:
|
||||
raise Exception('Unknown merge mode')
|
||||
|
||||
def get_config(self):
|
||||
config = {"name": self.__class__.__name__,
|
||||
"mode": self.mode}
|
||||
base_config = super(TimeDistributedMerge, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Merge(Layer):
|
||||
def __init__(self, layers, mode='sum'):
|
||||
def __init__(self, layers, mode='sum', concat_axis=-1):
|
||||
''' Merge the output of a list of layers or containers into a single tensor.
|
||||
mode: {'sum', 'concat'}
|
||||
mode: {'sum', 'mul', 'concat', 'ave'}
|
||||
'''
|
||||
if len(layers) < 2:
|
||||
raise Exception("Please specify two or more input layers (or containers) to merge")
|
||||
if mode not in {'sum', 'mul', 'concat', 'ave'}:
|
||||
raise Exception("Invalid merge mode: " + str(mode))
|
||||
self.mode = mode
|
||||
self.concat_axis = concat_axis
|
||||
self.layers = layers
|
||||
self.params = []
|
||||
self.regularizers = []
|
||||
@@ -183,18 +286,45 @@ class Merge(Layer):
|
||||
self.params.append(p)
|
||||
self.constraints.append(c)
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shapes = [layer.output_shape for layer in self.layers]
|
||||
if self.mode in ['sum', 'mul', 'ave']:
|
||||
return input_shapes[0]
|
||||
elif self.mode == 'concat':
|
||||
output_shape = list(input_shapes[0])
|
||||
for shape in input_shapes[1:]:
|
||||
output_shape[self.concat_axis] += shape[self.concat_axis]
|
||||
return tuple(output_shape)
|
||||
|
||||
def get_params(self):
|
||||
return self.params, self.regularizers, self.constraints, self.updates
|
||||
|
||||
def get_output(self, train=False):
|
||||
if self.mode == 'sum':
|
||||
if self.mode == 'sum' or self.mode == 'ave':
|
||||
s = self.layers[0].get_output(train)
|
||||
for i in range(1, len(self.layers)):
|
||||
s += self.layers[i].get_output(train)
|
||||
if self.mode == 'ave':
|
||||
s /= len(self.layers)
|
||||
return s
|
||||
elif self.mode == 'concat':
|
||||
inputs = [self.layers[i].get_output(train) for i in range(len(self.layers))]
|
||||
return T.concatenate(inputs, axis=-1)
|
||||
return T.concatenate(inputs, axis=self.concat_axis)
|
||||
elif self.mode == 'join':
|
||||
inputs = OrderedDict()
|
||||
for i in range(len(self.layers)):
|
||||
X = self.layers[i].get_output(train)
|
||||
if X.name is None:
|
||||
raise ValueError("merge_mode='join' only works with named inputs")
|
||||
else:
|
||||
inputs[X.name] = self.layers[i].get_output(train)
|
||||
return inputs
|
||||
elif self.mode == 'mul':
|
||||
s = self.layers[0].get_output(train)
|
||||
for i in range(1, len(self.layers)):
|
||||
s *= self.layers[i].get_output(train)
|
||||
return s
|
||||
else:
|
||||
raise Exception('Unknown merge mode')
|
||||
|
||||
@@ -232,17 +362,20 @@ class Merge(Layer):
|
||||
weights = weights[nb_param:]
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"layers": [l.get_config() for l in self.layers],
|
||||
"mode": self.mode}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"layers": [l.get_config() for l in self.layers],
|
||||
"mode": self.mode,
|
||||
"concat_axis": self.concat_axis}
|
||||
base_config = super(Merge, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Dropout(MaskedLayer):
|
||||
'''
|
||||
Hinton's dropout.
|
||||
'''
|
||||
def __init__(self, p):
|
||||
super(Dropout, self).__init__()
|
||||
def __init__(self, p, **kwargs):
|
||||
super(Dropout, self).__init__(**kwargs)
|
||||
self.p = p
|
||||
self.srng = RandomStreams(seed=np.random.randint(10e6))
|
||||
|
||||
@@ -257,16 +390,18 @@ class Dropout(MaskedLayer):
|
||||
return X
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"p": self.p}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"p": self.p}
|
||||
base_config = super(Dropout, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Activation(MaskedLayer):
|
||||
'''
|
||||
Apply an activation function to an output.
|
||||
'''
|
||||
def __init__(self, activation, target=0, beta=0.1):
|
||||
super(Activation, self).__init__()
|
||||
def __init__(self, activation, target=0, beta=0.1, **kwargs):
|
||||
super(Activation, self).__init__(**kwargs)
|
||||
self.activation = activations.get(activation)
|
||||
self.target = target
|
||||
self.beta = beta
|
||||
@@ -276,10 +411,12 @@ class Activation(MaskedLayer):
|
||||
return self.activation(X)
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"target": self.target,
|
||||
"beta": self.beta}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"target": self.target,
|
||||
"beta": self.beta}
|
||||
base_config = super(Activation, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Reshape(Layer):
|
||||
@@ -288,35 +425,52 @@ class Reshape(Layer):
|
||||
Can't be used as first layer in a model (no fixed input!)
|
||||
First dimension is assumed to be nb_samples.
|
||||
'''
|
||||
def __init__(self, *dims):
|
||||
super(Reshape, self).__init__()
|
||||
self.dims = dims
|
||||
def __init__(self, dims, **kwargs):
|
||||
super(Reshape, self).__init__(**kwargs)
|
||||
self.dims = tuple(dims)
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
return (self.input_shape[0],) + self.dims
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
nshape = make_tuple(X.shape[0], *self.dims)
|
||||
return theano.tensor.reshape(X, nshape)
|
||||
new_shape = (X.shape[0],) + self.dims
|
||||
return theano.tensor.reshape(X, new_shape)
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"dims": self.dims}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"dims": self.dims}
|
||||
base_config = super(Reshape, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Permute(Layer):
|
||||
'''
|
||||
Permute the dimensions of the data according to the given tuple
|
||||
Permute the dimensions of the input according to the given tuple.
|
||||
'''
|
||||
def __init__(self, dims):
|
||||
super(Permute, self).__init__()
|
||||
self.dims = dims
|
||||
def __init__(self, dims, **kwargs):
|
||||
super(Permute, self).__init__(**kwargs)
|
||||
self.dims = tuple(dims)
|
||||
|
||||
def get_output(self, train):
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shape = list(self.input_shape)
|
||||
output_shape = copy.copy(input_shape)
|
||||
for i, dim in enumerate(self.dims):
|
||||
target_dim = input_shape[dim]
|
||||
output_shape[i+1] = target_dim
|
||||
return tuple(output_shape)
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
return X.dimshuffle((0,) + self.dims)
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"dims": self.dims}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"dims": self.dims}
|
||||
base_config = super(Permute, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Flatten(Layer):
|
||||
@@ -324,8 +478,13 @@ class Flatten(Layer):
|
||||
Reshape input to flat shape.
|
||||
First dimension is assumed to be nb_samples.
|
||||
'''
|
||||
def __init__(self):
|
||||
super(Flatten, self).__init__()
|
||||
def __init__(self, **kwargs):
|
||||
super(Flatten, self).__init__(**kwargs)
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shape = self.input_shape
|
||||
return (input_shape[0], np.prod(input_shape[1:]))
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
@@ -341,10 +500,15 @@ class RepeatVector(Layer):
|
||||
Dimensions of input are assumed to be (nb_samples, dim).
|
||||
Return tensor of shape (nb_samples, n, dim).
|
||||
'''
|
||||
def __init__(self, n):
|
||||
super(RepeatVector, self).__init__()
|
||||
def __init__(self, n, **kwargs):
|
||||
super(RepeatVector, self).__init__(**kwargs)
|
||||
self.n = n
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shape = self.input_shape
|
||||
return (input_shape[0], self.n, input_shape[1])
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
tensors = [X]*self.n
|
||||
@@ -352,59 +516,69 @@ class RepeatVector(Layer):
|
||||
return stacked.dimshuffle((1, 0, 2))
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"n": self.n}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"n": self.n}
|
||||
base_config = super(RepeatVector, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class Dense(Layer):
|
||||
'''
|
||||
Just your regular fully connected NN layer.
|
||||
'''
|
||||
def __init__(self, input_dim, output_dim, init='glorot_uniform', activation='linear', weights=None, name=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None):
|
||||
input_ndim = 2
|
||||
|
||||
super(Dense, self).__init__()
|
||||
def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
|
||||
self.input = T.matrix()
|
||||
self.W = self.init((self.input_dim, self.output_dim))
|
||||
self.b = shared_zeros((self.output_dim))
|
||||
|
||||
self.params = [self.W, self.b]
|
||||
|
||||
self.regularizers = []
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.activity_regularizer = regularizers.get(activity_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
self.constraints = [self.W_constraint, self.b_constraint]
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
self.initial_weights = weights
|
||||
|
||||
if name is not None:
|
||||
self.set_name(name)
|
||||
self.input_dim = input_dim
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_dim,)
|
||||
super(Dense, self).__init__(**kwargs)
|
||||
|
||||
def set_name(self, name):
|
||||
self.W.name = '%s_W' % name
|
||||
self.b.name = '%s_b' % name
|
||||
def build(self):
|
||||
input_dim = self.input_shape[1]
|
||||
|
||||
self.input = T.matrix()
|
||||
self.W = self.init((input_dim, self.output_dim))
|
||||
self.b = shared_zeros((self.output_dim,))
|
||||
|
||||
self.params = [self.W, self.b]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
return (self.input_shape[0], self.output_dim)
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
@@ -412,16 +586,18 @@ class Dense(Layer):
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None,
|
||||
"input_dim": self.input_dim}
|
||||
base_config = super(Dense, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class ActivityRegularization(Layer):
|
||||
@@ -429,8 +605,8 @@ class ActivityRegularization(Layer):
|
||||
Layer that passes through its input unchanged, but applies an update
|
||||
to the cost function based on the activity.
|
||||
'''
|
||||
def __init__(self, l1=0., l2=0.):
|
||||
super(ActivityRegularization, self).__init__()
|
||||
def __init__(self, l1=0., l2=0., **kwargs):
|
||||
super(ActivityRegularization, self).__init__(**kwargs)
|
||||
self.l1 = l1
|
||||
self.l2 = l2
|
||||
|
||||
@@ -442,58 +618,76 @@ class ActivityRegularization(Layer):
|
||||
return self.get_input(train)
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"l1": self.l1,
|
||||
"l2": self.l2}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"l1": self.l1,
|
||||
"l2": self.l2}
|
||||
base_config = super(ActivityRegularization, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class TimeDistributedDense(MaskedLayer):
|
||||
'''
|
||||
Apply a same DenseLayer for each dimension[1] (shared_dimension) input
|
||||
Especially useful after a recurrent network with 'return_sequence=True'
|
||||
Tensor input dimensions: (nb_sample, shared_dimension, input_dim)
|
||||
Tensor output dimensions: (nb_sample, shared_dimension, output_dim)
|
||||
Apply a same Dense layer for each dimension[1] (time_dimension) input.
|
||||
Especially useful after a recurrent network with 'return_sequence=True'.
|
||||
Tensor input dimensions: (nb_sample, time_dimension, input_dim)
|
||||
Tensor output dimensions: (nb_sample, time_dimension, output_dim)
|
||||
|
||||
'''
|
||||
def __init__(self, input_dim, output_dim, init='glorot_uniform', activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None):
|
||||
input_ndim = 3
|
||||
|
||||
super(TimeDistributedDense, self).__init__()
|
||||
def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None, input_length=None, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.init = initializations.get(init)
|
||||
self.activation = activations.get(activation)
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
|
||||
self.input = T.tensor3()
|
||||
self.W = self.init((self.input_dim, self.output_dim))
|
||||
self.b = shared_zeros((self.output_dim))
|
||||
|
||||
self.params = [self.W, self.b]
|
||||
|
||||
self.regularizers = []
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.activity_regularizer = regularizers.get(activity_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
self.constraints = [self.W_constraint, self.b_constraint]
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
self.initial_weights = weights
|
||||
|
||||
self.input_dim = input_dim
|
||||
self.input_length = input_length
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_length, self.input_dim)
|
||||
super(TimeDistributedDense, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
input_dim = self.input_shape[2]
|
||||
|
||||
self.input = T.tensor3()
|
||||
self.W = self.init((input_dim, self.output_dim))
|
||||
self.b = shared_zeros((self.output_dim))
|
||||
|
||||
self.params = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shape = self.input_shape
|
||||
return (input_shape[0], input_shape[1], self.output_dim)
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
@@ -501,27 +695,33 @@ class TimeDistributedDense(MaskedLayer):
|
||||
return output.dimshuffle(1, 0, 2)
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None,
|
||||
"input_dim": self.input_dim,
|
||||
"input_length": self.input_length}
|
||||
base_config = super(TimeDistributedDense, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class AutoEncoder(Layer):
|
||||
'''
|
||||
A customizable autoencoder model.
|
||||
If output_reconstruction then dim(input) = dim(output)
|
||||
else dim(output) = dim(hidden)
|
||||
'''
|
||||
def __init__(self, encoder, decoder, output_reconstruction=True, weights=None):
|
||||
'''A customizable autoencoder model.
|
||||
|
||||
super(AutoEncoder, self).__init__()
|
||||
Tensor input dimensions: same as encoder input
|
||||
Tensor output dimensions:
|
||||
if output_reconstruction:
|
||||
same as encoder output
|
||||
else:
|
||||
same as decoder output
|
||||
'''
|
||||
def __init__(self, encoder, decoder, output_reconstruction=True, weights=None, **kwargs):
|
||||
super(AutoEncoder, self).__init__(**kwargs)
|
||||
|
||||
self.output_reconstruction = output_reconstruction
|
||||
self.encoder = encoder
|
||||
@@ -569,6 +769,17 @@ class AutoEncoder(Layer):
|
||||
def _get_hidden(self, train=False):
|
||||
return self.encoder.get_output(train)
|
||||
|
||||
@property
|
||||
def input_shape(self):
|
||||
self.encoder.previous.output_shape
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
if self.output_reconstruction:
|
||||
return self.encoder.previous.output_shape
|
||||
else:
|
||||
return self.decoder.previous.output_shape
|
||||
|
||||
def get_output(self, train=False):
|
||||
if not train and not self.output_reconstruction:
|
||||
return self.encoder.get_output(train)
|
||||
@@ -587,45 +798,58 @@ class MaxoutDense(Layer):
|
||||
Max-out layer, nb_feature is the number of pieces in the piecewise linear approx.
|
||||
Refer to http://arxiv.org/pdf/1302.4389.pdf
|
||||
'''
|
||||
def __init__(self, input_dim, output_dim, nb_feature=4, init='glorot_uniform', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None):
|
||||
input_ndim = 2
|
||||
|
||||
super(MaxoutDense, self).__init__()
|
||||
self.init = initializations.get(init)
|
||||
self.input_dim = input_dim
|
||||
def __init__(self, output_dim, nb_feature=4, init='glorot_uniform', weights=None,
|
||||
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
|
||||
W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.nb_feature = nb_feature
|
||||
|
||||
self.input = T.matrix()
|
||||
self.W = self.init((self.nb_feature, self.input_dim, self.output_dim))
|
||||
self.b = shared_zeros((self.nb_feature, self.output_dim))
|
||||
|
||||
self.params = [self.W, self.b]
|
||||
|
||||
self.regularizers = []
|
||||
self.init = initializations.get(init)
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.activity_regularizer = regularizers.get(activity_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.b_constraint = constraints.get(b_constraint)
|
||||
self.constraints = [self.W_constraint, self.b_constraint]
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
self.initial_weights = weights
|
||||
self.input_dim = input_dim
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_dim,)
|
||||
super(MaxoutDense, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
input_dim = self.input_shape[1]
|
||||
|
||||
self.input = T.matrix()
|
||||
self.W = self.init((self.nb_feature, input_dim, self.output_dim))
|
||||
self.b = shared_zeros((self.nb_feature, self.output_dim))
|
||||
|
||||
self.params = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
return (self.input_shape[0], self.output_dim)
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
@@ -634,13 +858,15 @@ class MaxoutDense(Layer):
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"nb_feature": self.nb_feature,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"nb_feature": self.nb_feature,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
|
||||
"b_constraint": self.b_constraint.get_config() if self.b_constraint else None,
|
||||
"input_dim": self.input_dim}
|
||||
base_config = super(MaxoutDense, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -17,38 +17,42 @@ class Embedding(Layer):
|
||||
@input_dim: size of vocabulary (highest input integer + 1)
|
||||
@out_dim: size of dense representation
|
||||
'''
|
||||
def __init__(self, input_dim, output_dim, init='uniform',
|
||||
W_regularizer=None, activity_regularizer=None, W_constraint=None,
|
||||
mask_zero=False, weights=None):
|
||||
input_ndim = 2
|
||||
|
||||
super(Embedding, self).__init__()
|
||||
self.init = initializations.get(init)
|
||||
def __init__(self, input_dim, output_dim, init='uniform', input_length=None,
|
||||
W_regularizer=None, activity_regularizer=None, W_constraint=None,
|
||||
mask_zero=False, weights=None, **kwargs):
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
|
||||
self.input = T.imatrix()
|
||||
self.W = self.init((self.input_dim, self.output_dim))
|
||||
self.init = initializations.get(init)
|
||||
self.input_length = input_length
|
||||
self.mask_zero = mask_zero
|
||||
|
||||
self.params = [self.W]
|
||||
|
||||
self.W_constraint = constraints.get(W_constraint)
|
||||
self.constraints = [self.W_constraint]
|
||||
|
||||
self.regularizers = []
|
||||
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.activity_regularizer = regularizers.get(activity_regularizer)
|
||||
|
||||
self.initial_weights = weights
|
||||
kwargs['input_shape'] = (self.input_dim,)
|
||||
super(Embedding, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
self.input = T.imatrix()
|
||||
self.W = self.init((self.input_dim, self.output_dim))
|
||||
self.params = [self.W]
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
self.activity_regularizer = regularizers.get(activity_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
|
||||
def get_output_mask(self, train=None):
|
||||
X = self.get_input(train)
|
||||
@@ -57,19 +61,27 @@ class Embedding(Layer):
|
||||
else:
|
||||
return T.ones_like(X) * (1 - T.eq(X, 0))
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
return (self.input_shape[0], self.input_length, self.output_dim)
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
out = self.W[X]
|
||||
return out
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"input_length": self.input_length,
|
||||
"mask_zero": self.mask_zero,
|
||||
"activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
|
||||
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
|
||||
"W_constraint": self.W_constraint.get_config() if self.W_constraint else None}
|
||||
base_config = super(Embedding, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class WordContextProduct(Layer):
|
||||
@@ -96,10 +108,12 @@ class WordContextProduct(Layer):
|
||||
Efficient Estimation of Word reprensentations in Vector Space
|
||||
http://arxiv.org/pdf/1301.3781v3.pdf
|
||||
'''
|
||||
def __init__(self, input_dim, proj_dim=128,
|
||||
init='uniform', activation='sigmoid', weights=None):
|
||||
input_ndim = 2
|
||||
|
||||
super(WordContextProduct, self).__init__()
|
||||
def __init__(self, input_dim, proj_dim=128,
|
||||
init='uniform', activation='sigmoid', weights=None, **kwargs):
|
||||
|
||||
super(WordContextProduct, self).__init__(**kwargs)
|
||||
self.input_dim = input_dim
|
||||
self.proj_dim = proj_dim
|
||||
self.init = initializations.get(init)
|
||||
@@ -116,6 +130,10 @@ class WordContextProduct(Layer):
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
return (self.input_shape[0], 1)
|
||||
|
||||
def get_output(self, train=False):
|
||||
X = self.get_input(train)
|
||||
w = self.W_w[X[:, 0]] # nb_samples, proj_dim
|
||||
@@ -126,8 +144,10 @@ class WordContextProduct(Layer):
|
||||
return self.activation(dot)
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"proj_dim": self.proj_dim,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"proj_dim": self.proj_dim,
|
||||
"init": self.init.__name__,
|
||||
"activation": self.activation.__name__}
|
||||
base_config = super(WordContextProduct, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
+12
-8
@@ -10,8 +10,8 @@ class GaussianNoise(MaskedLayer):
|
||||
'''
|
||||
Corruption process with GaussianNoise
|
||||
'''
|
||||
def __init__(self, sigma):
|
||||
super(GaussianNoise, self).__init__()
|
||||
def __init__(self, sigma, **kwargs):
|
||||
super(GaussianNoise, self).__init__(**kwargs)
|
||||
self.sigma = sigma
|
||||
self.srng = RandomStreams(seed=np.random.randint(10e6))
|
||||
|
||||
@@ -24,8 +24,10 @@ class GaussianNoise(MaskedLayer):
|
||||
dtype=theano.config.floatX)
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"sigma": self.sigma}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"sigma": self.sigma}
|
||||
base_config = super(GaussianNoise, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class GaussianDropout(MaskedLayer):
|
||||
@@ -36,8 +38,8 @@ class GaussianDropout(MaskedLayer):
|
||||
Srivastava, Hinton, et al. 2014
|
||||
http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf
|
||||
'''
|
||||
def __init__(self, p):
|
||||
super(GaussianDropout, self).__init__()
|
||||
def __init__(self, p, **kwargs):
|
||||
super(GaussianDropout, self).__init__(**kwargs)
|
||||
self.p = p
|
||||
self.srng = RandomStreams(seed=np.random.randint(10e6))
|
||||
|
||||
@@ -49,5 +51,7 @@ class GaussianDropout(MaskedLayer):
|
||||
return X
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"p": self.p}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"p": self.p}
|
||||
base_config = super(GaussianDropout, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from ..layers.core import Layer
|
||||
from ..utils.theano_utils import shared_zeros, shared_ones, ndim_tensor
|
||||
from ..utils.theano_utils import shared_zeros, shared_ones, ndim_tensor, floatX
|
||||
from .. import initializations
|
||||
|
||||
import theano.tensor as T
|
||||
@@ -16,25 +16,27 @@ class BatchNormalization(Layer):
|
||||
|
||||
momentum: momentum term in the computation of a running estimate of the mean and std of the data
|
||||
'''
|
||||
def __init__(self, input_shape, epsilon=1e-6, mode=0, momentum=0.9, weights=None):
|
||||
super(BatchNormalization, self).__init__()
|
||||
def __init__(self, epsilon=1e-6, mode=0, momentum=0.9, weights=None, **kwargs):
|
||||
self.init = initializations.get("uniform")
|
||||
self.input_shape = input_shape
|
||||
self.epsilon = epsilon
|
||||
self.mode = mode
|
||||
self.momentum = momentum
|
||||
self.input = ndim_tensor(len(self.input_shape) + 1)
|
||||
self.initial_weights = weights
|
||||
super(BatchNormalization, self).__init__(**kwargs)
|
||||
|
||||
self.gamma = self.init((self.input_shape))
|
||||
self.beta = shared_zeros(self.input_shape)
|
||||
def build(self):
|
||||
input_shape = self.input_shape # starts with samples axis
|
||||
input_shape = input_shape[1:]
|
||||
self.input = ndim_tensor(len(input_shape) + 1)
|
||||
|
||||
self.gamma = self.init((input_shape))
|
||||
self.beta = shared_zeros(input_shape)
|
||||
|
||||
self.params = [self.gamma, self.beta]
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
self.running_mean = shared_zeros(input_shape)
|
||||
self.running_std = shared_ones((input_shape))
|
||||
|
||||
def init_updates(self):
|
||||
self.running_mean = shared_zeros(self.input_shape)
|
||||
self.running_std = shared_ones((self.input_shape))
|
||||
# initialize self.updates: batch mean/std computation
|
||||
X = self.get_input(train=True)
|
||||
m = X.mean(axis=0)
|
||||
std = T.mean((X - m) ** 2 + self.epsilon, axis=0) ** 0.5
|
||||
@@ -42,6 +44,18 @@ class BatchNormalization(Layer):
|
||||
std_update = self.momentum * self.running_std + (1-self.momentum) * std
|
||||
self.updates = [(self.running_mean, mean_update), (self.running_std, std_update)]
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
def get_weights(self):
|
||||
return super(BatchNormalization, self).get_weights() + [self.running_mean.get_value(), self.running_std.get_value()]
|
||||
|
||||
def set_weights(self, weights):
|
||||
self.running_mean.set_value(floatX(weights[-2]))
|
||||
self.running_std.set_value(floatX(weights[-1]))
|
||||
super(BatchNormalization, self).set_weights(weights[:-2])
|
||||
|
||||
def get_output(self, train):
|
||||
X = self.get_input(train)
|
||||
|
||||
@@ -57,10 +71,12 @@ class BatchNormalization(Layer):
|
||||
return out
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_shape": self.input_shape,
|
||||
"epsilon": self.epsilon,
|
||||
"mode": self.mode}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"epsilon": self.epsilon,
|
||||
"mode": self.mode,
|
||||
"momentum": self.momentum}
|
||||
base_config = super(BatchNormalization, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class LRN2D(Layer):
|
||||
@@ -69,10 +85,10 @@ class LRN2D(Layer):
|
||||
License at: https://github.com/lisa-lab/pylearn2/blob/master/LICENSE.txt
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=1e-4, k=2, beta=0.75, n=5):
|
||||
def __init__(self, alpha=1e-4, k=2, beta=0.75, n=5, **kwargs):
|
||||
if n % 2 == 0:
|
||||
raise NotImplementedError("LRN2D only works with odd n. n provided: " + str(n))
|
||||
super(LRN2D, self).__init__()
|
||||
super(LRN2D, self).__init__(**kwargs)
|
||||
self.alpha = alpha
|
||||
self.k = k
|
||||
self.beta = beta
|
||||
@@ -92,8 +108,10 @@ class LRN2D(Layer):
|
||||
return X / scale
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"alpha": self.alpha,
|
||||
"k": self.k,
|
||||
"beta": self.beta,
|
||||
"n": self.n}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"alpha": self.alpha,
|
||||
"k": self.k,
|
||||
"beta": self.beta,
|
||||
"n": self.n}
|
||||
base_config = super(LRN2D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
+244
-154
@@ -11,6 +11,8 @@ from six.moves import range
|
||||
|
||||
|
||||
class Recurrent(MaskedLayer):
|
||||
input_ndim = 3
|
||||
|
||||
def get_output_mask(self, train=None):
|
||||
if self.return_sequences:
|
||||
return super(Recurrent, self).get_output_mask(train)
|
||||
@@ -24,7 +26,8 @@ class Recurrent(MaskedLayer):
|
||||
|
||||
# mask is (nb_samples, time)
|
||||
mask = T.shape_padright(mask) # (nb_samples, time, 1)
|
||||
mask = T.addbroadcast(mask, -1) # (time, nb_samples, 1) matrix.
|
||||
mask = T.addbroadcast(mask, -1) # the new dimension (the '1') is made broadcastable
|
||||
# see http://deeplearning.net/software/theano/library/tensor/basic.html#broadcasting-in-theano-vs-numpy
|
||||
mask = mask.dimshuffle(1, 0, 2) # (time, nb_samples, 1)
|
||||
|
||||
if pad > 0:
|
||||
@@ -33,6 +36,14 @@ class Recurrent(MaskedLayer):
|
||||
mask = T.concatenate([padding, mask], axis=0)
|
||||
return mask.astype('int8')
|
||||
|
||||
@property
|
||||
def output_shape(self):
|
||||
input_shape = self.input_shape
|
||||
if self.return_sequences:
|
||||
return (input_shape[0], input_shape[1], self.output_dim)
|
||||
else:
|
||||
return (input_shape[0], self.output_dim)
|
||||
|
||||
|
||||
class SimpleRNN(Recurrent):
|
||||
'''
|
||||
@@ -42,27 +53,35 @@ class SimpleRNN(Recurrent):
|
||||
included for demonstration purposes
|
||||
(demonstrates how to use theano.scan to build a basic RNN).
|
||||
'''
|
||||
def __init__(self, input_dim, output_dim,
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', weights=None,
|
||||
truncate_gradient=-1, return_sequences=False):
|
||||
|
||||
super(SimpleRNN, self).__init__()
|
||||
truncate_gradient=-1, return_sequences=False, input_dim=None, input_length=None, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.init = initializations.get(init)
|
||||
self.inner_init = initializations.get(inner_init)
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
self.truncate_gradient = truncate_gradient
|
||||
self.activation = activations.get(activation)
|
||||
self.return_sequences = return_sequences
|
||||
self.initial_weights = weights
|
||||
|
||||
self.input_dim = input_dim
|
||||
self.input_length = input_length
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_length, self.input_dim)
|
||||
super(SimpleRNN, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
input_dim = self.input_shape[2]
|
||||
self.input = T.tensor3()
|
||||
|
||||
self.W = self.init((self.input_dim, self.output_dim))
|
||||
self.W = self.init((input_dim, self.output_dim))
|
||||
self.U = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b = shared_zeros((self.output_dim))
|
||||
self.params = [self.W, self.U, self.b]
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
def _step(self, x_t, mask_tm1, h_tm1, u):
|
||||
'''
|
||||
@@ -95,14 +114,17 @@ class SimpleRNN(Recurrent):
|
||||
return outputs[-1]
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences,
|
||||
"input_dim": self.input_dim,
|
||||
"input_length": self.input_length}
|
||||
base_config = super(SimpleRNN, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class SimpleDeepRNN(Recurrent):
|
||||
@@ -115,30 +137,38 @@ class SimpleDeepRNN(Recurrent):
|
||||
This demonstrates how to build RNNs with arbitrary lookback.
|
||||
Also (probably) not a super useful model.
|
||||
'''
|
||||
def __init__(self, input_dim, output_dim, depth=3,
|
||||
def __init__(self, output_dim, depth=3,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
activation='sigmoid', inner_activation='hard_sigmoid',
|
||||
weights=None, truncate_gradient=-1, return_sequences=False):
|
||||
|
||||
super(SimpleDeepRNN, self).__init__()
|
||||
weights=None, truncate_gradient=-1, return_sequences=False,
|
||||
input_dim=None, input_length=None, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.init = initializations.get(init)
|
||||
self.inner_init = initializations.get(inner_init)
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
self.truncate_gradient = truncate_gradient
|
||||
self.activation = activations.get(activation)
|
||||
self.inner_activation = activations.get(inner_activation)
|
||||
self.depth = depth
|
||||
self.return_sequences = return_sequences
|
||||
self.input = T.tensor3()
|
||||
self.initial_weights = weights
|
||||
|
||||
self.W = self.init((self.input_dim, self.output_dim))
|
||||
self.input_dim = input_dim
|
||||
self.input_length = input_length
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_length, self.input_dim)
|
||||
super(SimpleDeepRNN, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
input_dim = self.input_shape[2]
|
||||
self.input = T.tensor3()
|
||||
self.W = self.init((input_dim, self.output_dim))
|
||||
self.Us = [self.inner_init((self.output_dim, self.output_dim)) for _ in range(self.depth)]
|
||||
self.b = shared_zeros((self.output_dim))
|
||||
self.params = [self.W] + self.Us + [self.b]
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
def _step(self, x_t, *args):
|
||||
o = x_t
|
||||
@@ -180,16 +210,19 @@ class SimpleDeepRNN(Recurrent):
|
||||
return outputs[-1]
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"output_dim": self.output_dim,
|
||||
"depth": self.depth,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"output_dim": self.output_dim,
|
||||
"depth": self.depth,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences,
|
||||
"input_dim": self.input_dim,
|
||||
"input_length": self.input_length}
|
||||
base_config = super(SimpleDeepRNN, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class GRU(Recurrent):
|
||||
@@ -214,32 +247,39 @@ class GRU(Recurrent):
|
||||
Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling
|
||||
http://arxiv.org/pdf/1412.3555v1.pdf
|
||||
'''
|
||||
def __init__(self, input_dim, output_dim=128,
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
activation='sigmoid', inner_activation='hard_sigmoid',
|
||||
weights=None, truncate_gradient=-1, return_sequences=False):
|
||||
|
||||
super(GRU, self).__init__()
|
||||
self.input_dim = input_dim
|
||||
weights=None, truncate_gradient=-1, return_sequences=False,
|
||||
input_dim=None, input_length=None, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.truncate_gradient = truncate_gradient
|
||||
self.return_sequences = return_sequences
|
||||
|
||||
self.init = initializations.get(init)
|
||||
self.inner_init = initializations.get(inner_init)
|
||||
self.activation = activations.get(activation)
|
||||
self.inner_activation = activations.get(inner_activation)
|
||||
self.truncate_gradient = truncate_gradient
|
||||
self.return_sequences = return_sequences
|
||||
self.initial_weights = weights
|
||||
|
||||
self.input_dim = input_dim
|
||||
self.input_length = input_length
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_length, self.input_dim)
|
||||
super(GRU, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
input_dim = self.input_shape[2]
|
||||
self.input = T.tensor3()
|
||||
|
||||
self.W_z = self.init((self.input_dim, self.output_dim))
|
||||
self.W_z = self.init((input_dim, self.output_dim))
|
||||
self.U_z = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_z = shared_zeros((self.output_dim))
|
||||
|
||||
self.W_r = self.init((self.input_dim, self.output_dim))
|
||||
self.W_r = self.init((input_dim, self.output_dim))
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_r = shared_zeros((self.output_dim))
|
||||
|
||||
self.W_h = self.init((self.input_dim, self.output_dim))
|
||||
self.W_h = self.init((input_dim, self.output_dim))
|
||||
self.U_h = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_h = shared_zeros((self.output_dim))
|
||||
|
||||
@@ -249,8 +289,9 @@ class GRU(Recurrent):
|
||||
self.W_h, self.U_h, self.b_h,
|
||||
]
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
def _step(self,
|
||||
xz_t, xr_t, xh_t, mask_tm1,
|
||||
@@ -283,15 +324,18 @@ class GRU(Recurrent):
|
||||
return outputs[-1]
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences,
|
||||
"input_dim": self.input_dim,
|
||||
"input_length": self.input_length}
|
||||
base_config = super(GRU, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class LSTM(Recurrent):
|
||||
@@ -319,37 +363,44 @@ class LSTM(Recurrent):
|
||||
Supervised sequence labelling with recurrent neural networks
|
||||
http://www.cs.toronto.edu/~graves/preprint.pdf
|
||||
'''
|
||||
def __init__(self, input_dim, output_dim=128,
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one',
|
||||
activation='tanh', inner_activation='hard_sigmoid',
|
||||
weights=None, truncate_gradient=-1, return_sequences=False):
|
||||
|
||||
super(LSTM, self).__init__()
|
||||
self.input_dim = input_dim
|
||||
weights=None, truncate_gradient=-1, return_sequences=False,
|
||||
input_dim=None, input_length=None, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.truncate_gradient = truncate_gradient
|
||||
self.return_sequences = return_sequences
|
||||
|
||||
self.init = initializations.get(init)
|
||||
self.inner_init = initializations.get(inner_init)
|
||||
self.forget_bias_init = initializations.get(forget_bias_init)
|
||||
self.activation = activations.get(activation)
|
||||
self.inner_activation = activations.get(inner_activation)
|
||||
self.truncate_gradient = truncate_gradient
|
||||
self.return_sequences = return_sequences
|
||||
self.initial_weights = weights
|
||||
|
||||
self.input_dim = input_dim
|
||||
self.input_length = input_length
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_length, self.input_dim)
|
||||
super(LSTM, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
input_dim = self.input_shape[2]
|
||||
self.input = T.tensor3()
|
||||
|
||||
self.W_i = self.init((self.input_dim, self.output_dim))
|
||||
self.W_i = self.init((input_dim, self.output_dim))
|
||||
self.U_i = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_i = shared_zeros((self.output_dim))
|
||||
|
||||
self.W_f = self.init((self.input_dim, self.output_dim))
|
||||
self.W_f = self.init((input_dim, self.output_dim))
|
||||
self.U_f = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_f = self.forget_bias_init((self.output_dim))
|
||||
|
||||
self.W_c = self.init((self.input_dim, self.output_dim))
|
||||
self.W_c = self.init((input_dim, self.output_dim))
|
||||
self.U_c = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_c = shared_zeros((self.output_dim))
|
||||
|
||||
self.W_o = self.init((self.input_dim, self.output_dim))
|
||||
self.W_o = self.init((input_dim, self.output_dim))
|
||||
self.U_o = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_o = shared_zeros((self.output_dim))
|
||||
|
||||
@@ -360,8 +411,9 @@ class LSTM(Recurrent):
|
||||
self.W_o, self.U_o, self.b_o,
|
||||
]
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
def _step(self,
|
||||
xi_t, xf_t, xo_t, xc_t, mask_tm1,
|
||||
@@ -402,16 +454,19 @@ class LSTM(Recurrent):
|
||||
return outputs[-1]
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"forget_bias_init": self.forget_bias_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"forget_bias_init": self.forget_bias_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences,
|
||||
"input_dim": self.input_dim,
|
||||
"input_length": self.input_length}
|
||||
base_config = super(LSTM, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class JZS1(Recurrent):
|
||||
@@ -434,27 +489,34 @@ class JZS1(Recurrent):
|
||||
An Empirical Exploration of Recurrent Network Architectures
|
||||
http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf
|
||||
'''
|
||||
def __init__(self, input_dim, output_dim=128,
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
activation='tanh', inner_activation='sigmoid',
|
||||
weights=None, truncate_gradient=-1, return_sequences=False):
|
||||
|
||||
super(JZS1, self).__init__()
|
||||
self.input_dim = input_dim
|
||||
weights=None, truncate_gradient=-1, return_sequences=False,
|
||||
input_dim=None, input_length=None, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.truncate_gradient = truncate_gradient
|
||||
self.return_sequences = return_sequences
|
||||
|
||||
self.init = initializations.get(init)
|
||||
self.inner_init = initializations.get(inner_init)
|
||||
self.activation = activations.get(activation)
|
||||
self.inner_activation = activations.get(inner_activation)
|
||||
self.truncate_gradient = truncate_gradient
|
||||
self.return_sequences = return_sequences
|
||||
self.initial_weights = weights
|
||||
|
||||
self.input_dim = input_dim
|
||||
self.input_length = input_length
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_length, self.input_dim)
|
||||
super(JZS1, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
input_dim = self.input_shape[2]
|
||||
self.input = T.tensor3()
|
||||
|
||||
self.W_z = self.init((self.input_dim, self.output_dim))
|
||||
self.W_z = self.init((input_dim, self.output_dim))
|
||||
self.b_z = shared_zeros((self.output_dim))
|
||||
|
||||
self.W_r = self.init((self.input_dim, self.output_dim))
|
||||
self.W_r = self.init((input_dim, self.output_dim))
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_r = shared_zeros((self.output_dim))
|
||||
|
||||
@@ -462,21 +524,23 @@ class JZS1(Recurrent):
|
||||
self.b_h = shared_zeros((self.output_dim))
|
||||
|
||||
# P_h used to project X onto different dimension, using sparse random projections
|
||||
if self.input_dim == self.output_dim:
|
||||
if input_dim == self.output_dim:
|
||||
self.Pmat = theano.shared(np.identity(self.output_dim, dtype=theano.config.floatX), name=None)
|
||||
else:
|
||||
P = np.random.binomial(1, 0.5, size=(self.input_dim, self.output_dim)).astype(theano.config.floatX) * 2 - 1
|
||||
P = 1 / np.sqrt(self.input_dim) * P
|
||||
P = np.random.binomial(1, 0.5, size=(input_dim, self.output_dim)).astype(theano.config.floatX) * 2 - 1
|
||||
P = 1 / np.sqrt(input_dim) * P
|
||||
self.Pmat = theano.shared(P, name=None)
|
||||
|
||||
self.params = [
|
||||
self.W_z, self.b_z,
|
||||
self.W_r, self.U_r, self.b_r,
|
||||
self.U_h, self.b_h,
|
||||
self.Pmat
|
||||
]
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
def _step(self,
|
||||
xz_t, xr_t, xh_t, mask_tm1,
|
||||
@@ -508,15 +572,18 @@ class JZS1(Recurrent):
|
||||
return outputs[-1]
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences,
|
||||
"input_dim": self.input_dim,
|
||||
"input_length": self.input_length}
|
||||
base_config = super(JZS1, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class JZS2(Recurrent):
|
||||
@@ -539,50 +606,59 @@ class JZS2(Recurrent):
|
||||
An Empirical Exploration of Recurrent Network Architectures
|
||||
http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf
|
||||
'''
|
||||
def __init__(self, input_dim, output_dim=128,
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
activation='tanh', inner_activation='sigmoid',
|
||||
weights=None, truncate_gradient=-1, return_sequences=False):
|
||||
|
||||
super(JZS2, self).__init__()
|
||||
self.input_dim = input_dim
|
||||
weights=None, truncate_gradient=-1, return_sequences=False,
|
||||
input_dim=None, input_length=None, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.truncate_gradient = truncate_gradient
|
||||
self.return_sequences = return_sequences
|
||||
|
||||
self.init = initializations.get(init)
|
||||
self.inner_init = initializations.get(inner_init)
|
||||
self.activation = activations.get(activation)
|
||||
self.inner_activation = activations.get(inner_activation)
|
||||
self.truncate_gradient = truncate_gradient
|
||||
self.return_sequences = return_sequences
|
||||
self.initial_weights = weights
|
||||
|
||||
self.input_dim = input_dim
|
||||
self.input_length = input_length
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_length, self.input_dim)
|
||||
super(JZS2, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
input_dim = self.input_shape[2]
|
||||
self.input = T.tensor3()
|
||||
|
||||
self.W_z = self.init((self.input_dim, self.output_dim))
|
||||
self.W_z = self.init((input_dim, self.output_dim))
|
||||
self.U_z = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_z = shared_zeros((self.output_dim))
|
||||
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_r = shared_zeros((self.output_dim))
|
||||
|
||||
self.W_h = self.init((self.input_dim, self.output_dim))
|
||||
self.W_h = self.init((input_dim, self.output_dim))
|
||||
self.U_h = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_h = shared_zeros((self.output_dim))
|
||||
|
||||
# P_h used to project X onto different dimension, using sparse random projections
|
||||
if self.input_dim == self.output_dim:
|
||||
if input_dim == self.output_dim:
|
||||
self.Pmat = theano.shared(np.identity(self.output_dim, dtype=theano.config.floatX), name=None)
|
||||
else:
|
||||
P = np.random.binomial(1, 0.5, size=(self.input_dim, self.output_dim)).astype(theano.config.floatX) * 2 - 1
|
||||
P = 1 / np.sqrt(self.input_dim) * P
|
||||
P = np.random.binomial(1, 0.5, size=(input_dim, self.output_dim)).astype(theano.config.floatX) * 2 - 1
|
||||
P = 1 / np.sqrt(input_dim) * P
|
||||
self.Pmat = theano.shared(P, name=None)
|
||||
|
||||
self.params = [
|
||||
self.W_z, self.U_z, self.b_z,
|
||||
self.U_r, self.b_r,
|
||||
self.W_h, self.U_h, self.b_h,
|
||||
self.Pmat
|
||||
]
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
def _step(self,
|
||||
xz_t, xr_t, xh_t, mask_tm1,
|
||||
@@ -614,15 +690,18 @@ class JZS2(Recurrent):
|
||||
return outputs[-1]
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences,
|
||||
"input_dim": self.input_dim,
|
||||
"input_length": self.input_length}
|
||||
base_config = super(JZS2, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
class JZS3(Recurrent):
|
||||
@@ -645,32 +724,39 @@ class JZS3(Recurrent):
|
||||
An Empirical Exploration of Recurrent Network Architectures
|
||||
http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf
|
||||
'''
|
||||
def __init__(self, input_dim, output_dim=128,
|
||||
def __init__(self, output_dim,
|
||||
init='glorot_uniform', inner_init='orthogonal',
|
||||
activation='tanh', inner_activation='sigmoid',
|
||||
weights=None, truncate_gradient=-1, return_sequences=False):
|
||||
|
||||
super(JZS3, self).__init__()
|
||||
self.input_dim = input_dim
|
||||
weights=None, truncate_gradient=-1, return_sequences=False,
|
||||
input_dim=None, input_length=None, **kwargs):
|
||||
self.output_dim = output_dim
|
||||
self.truncate_gradient = truncate_gradient
|
||||
self.return_sequences = return_sequences
|
||||
|
||||
self.init = initializations.get(init)
|
||||
self.inner_init = initializations.get(inner_init)
|
||||
self.activation = activations.get(activation)
|
||||
self.inner_activation = activations.get(inner_activation)
|
||||
self.truncate_gradient = truncate_gradient
|
||||
self.return_sequences = return_sequences
|
||||
self.initial_weights = weights
|
||||
|
||||
self.input_dim = input_dim
|
||||
self.input_length = input_length
|
||||
if self.input_dim:
|
||||
kwargs['input_shape'] = (self.input_length, self.input_dim)
|
||||
super(JZS3, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
input_dim = self.input_shape[2]
|
||||
self.input = T.tensor3()
|
||||
|
||||
self.W_z = self.init((self.input_dim, self.output_dim))
|
||||
self.W_z = self.init((input_dim, self.output_dim))
|
||||
self.U_z = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_z = shared_zeros((self.output_dim))
|
||||
|
||||
self.W_r = self.init((self.input_dim, self.output_dim))
|
||||
self.W_r = self.init((input_dim, self.output_dim))
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_r = shared_zeros((self.output_dim))
|
||||
|
||||
self.W_h = self.init((self.input_dim, self.output_dim))
|
||||
self.W_h = self.init((input_dim, self.output_dim))
|
||||
self.U_h = self.inner_init((self.output_dim, self.output_dim))
|
||||
self.b_h = shared_zeros((self.output_dim))
|
||||
|
||||
@@ -680,8 +766,9 @@ class JZS3(Recurrent):
|
||||
self.W_h, self.U_h, self.b_h,
|
||||
]
|
||||
|
||||
if weights is not None:
|
||||
self.set_weights(weights)
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
||||
def _step(self,
|
||||
xz_t, xr_t, xh_t, mask_tm1,
|
||||
@@ -714,12 +801,15 @@ class JZS3(Recurrent):
|
||||
return outputs[-1]
|
||||
|
||||
def get_config(self):
|
||||
return {"name": self.__class__.__name__,
|
||||
"input_dim": self.input_dim,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences}
|
||||
config = {"name": self.__class__.__name__,
|
||||
"output_dim": self.output_dim,
|
||||
"init": self.init.__name__,
|
||||
"inner_init": self.inner_init.__name__,
|
||||
"activation": self.activation.__name__,
|
||||
"inner_activation": self.inner_activation.__name__,
|
||||
"truncate_gradient": self.truncate_gradient,
|
||||
"return_sequences": self.return_sequences,
|
||||
"input_dim": self.input_dim,
|
||||
"input_length": self.input_length}
|
||||
base_config = super(JZS3, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
+14
-15
@@ -100,29 +100,29 @@ def standardize_weights(y, sample_weight=None, class_weight=None):
|
||||
return np.ones(y.shape[:-1] + (1,))
|
||||
|
||||
|
||||
def model_from_yaml(yaml_string):
|
||||
def model_from_yaml(yaml_string, custom_layers={}):
|
||||
'''
|
||||
Returns a model generated from a local yaml file,
|
||||
which is either created by hand or from to_yaml method of Sequential or Graph
|
||||
'''
|
||||
import yaml
|
||||
config = yaml.load(yaml_string)
|
||||
return model_from_config(config)
|
||||
return model_from_config(config, custom_layers=custom_layers)
|
||||
|
||||
|
||||
def model_from_json(json_string):
|
||||
def model_from_json(json_string, custom_layers={}):
|
||||
import json
|
||||
config = json.loads(json_string)
|
||||
return model_from_config(config)
|
||||
return model_from_config(config, custom_layers=custom_layers)
|
||||
|
||||
|
||||
def model_from_config(config):
|
||||
def model_from_config(config, custom_layers={}):
|
||||
model_name = config.get('name')
|
||||
if model_name not in {'Graph', 'Sequential'}:
|
||||
raise Exception('Unrecognized model:', model_name)
|
||||
|
||||
# Create a container then set class to appropriate model
|
||||
model = container_from_config(config)
|
||||
model = container_from_config(config, custom_layers=custom_layers)
|
||||
if model_name == 'Graph':
|
||||
model.__class__ = Graph
|
||||
elif model_name == 'Sequential':
|
||||
@@ -200,9 +200,8 @@ class Model(object):
|
||||
try:
|
||||
ins_batch = slice_X(ins, batch_ids)
|
||||
except TypeError as err:
|
||||
print('TypeError while preparing batch. \
|
||||
raise Exception('TypeError while preparing batch. \
|
||||
If using HDF5 input data, pass shuffle="batch".\n')
|
||||
raise
|
||||
|
||||
batch_logs = {}
|
||||
batch_logs['batch'] = batch_index
|
||||
@@ -313,17 +312,17 @@ class Model(object):
|
||||
pp.pprint(config)
|
||||
return config
|
||||
|
||||
def to_yaml(self):
|
||||
def to_yaml(self, **kwargs):
|
||||
# dump model configuration to yaml string
|
||||
import yaml
|
||||
config = self.get_config()
|
||||
return yaml.dump(config)
|
||||
return yaml.dump(config, **kwargs)
|
||||
|
||||
def to_json(self):
|
||||
def to_json(self, **kwargs):
|
||||
# dump model configuration to json string
|
||||
import json
|
||||
config = self.get_config()
|
||||
return json.dumps(config)
|
||||
return json.dumps(config, **kwargs)
|
||||
|
||||
|
||||
class Sequential(Model, containers.Sequential):
|
||||
@@ -643,8 +642,9 @@ class Graph(Model, containers.Graph):
|
||||
validation_split=0., validation_data=None, shuffle=True, class_weight={}, sample_weight={}):
|
||||
X = [data[name] for name in self.input_order]
|
||||
y = [standardize_y(data[name]) for name in self.output_order]
|
||||
sample_weight_list = [standardize_weights(data[name],
|
||||
sample_weight=sample_weight.get(name)) for name in self.output_order]
|
||||
|
||||
sample_weight_list = [standardize_weights(y[i],
|
||||
sample_weight=sample_weight.get(self.output_order[i])) for i in range(len(self.output_order))]
|
||||
class_weight_list = [class_weight.get(name) for name in self.output_order]
|
||||
|
||||
val_f = None
|
||||
@@ -671,7 +671,6 @@ class Graph(Model, containers.Graph):
|
||||
sample_weight=sample_weight_list[i],
|
||||
class_weight=class_weight_list[i]) for i in range(len(self.output_order))]
|
||||
ins = X + y + sample_weight_list
|
||||
|
||||
history = self._fit(f, ins, out_labels=out_labels, batch_size=batch_size, nb_epoch=nb_epoch,
|
||||
verbose=verbose, callbacks=callbacks,
|
||||
val_f=val_f, val_ins=val_ins,
|
||||
|
||||
@@ -41,6 +41,9 @@ class Optimizer(object):
|
||||
norm = T.sqrt(sum([T.sum(g ** 2) for g in grads]))
|
||||
grads = [clip_norm(g, self.clipnorm, norm) for g in grads]
|
||||
|
||||
if hasattr(self, 'clipvalue') and self.clipvalue > 0:
|
||||
grads = [T.clip(g, -self.clipvalue, self.clipvalue) for g in grads]
|
||||
|
||||
return grads
|
||||
|
||||
def get_config(self):
|
||||
@@ -55,6 +58,7 @@ class SGD(Optimizer):
|
||||
self.iterations = shared_scalar(0)
|
||||
self.lr = shared_scalar(lr)
|
||||
self.momentum = shared_scalar(momentum)
|
||||
self.decay = shared_scalar(decay)
|
||||
|
||||
def get_updates(self, params, constraints, loss):
|
||||
grads = self.get_gradients(loss, params)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import re
|
||||
from scipy import ndimage
|
||||
from scipy import linalg
|
||||
|
||||
@@ -103,7 +104,7 @@ def img_to_array(img):
|
||||
|
||||
def load_img(path, grayscale=False):
|
||||
from PIL import Image
|
||||
img = Image.open(open(path))
|
||||
img = Image.open(path)
|
||||
if grayscale:
|
||||
img = img.convert('L')
|
||||
else: # Assure 3 channel even when loaded image is grayscale
|
||||
|
||||
@@ -7,7 +7,7 @@ from six.moves import range
|
||||
def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.):
|
||||
"""
|
||||
Pad each sequence to the same length:
|
||||
the length of the longuest sequence.
|
||||
the length of the longest sequence.
|
||||
|
||||
If maxlen is provided, any sequence longer
|
||||
than maxlen is truncated to maxlen. Truncation happens off either the beginning (default) or
|
||||
|
||||
@@ -107,7 +107,10 @@ class Progbar(object):
|
||||
else:
|
||||
info += ' - %ds' % (now - self.start)
|
||||
for k in self.unique_values:
|
||||
info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1]))
|
||||
if type(self.sum_values[k]) is list:
|
||||
info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1]))
|
||||
else:
|
||||
info += ' - %s: %s' % (k, self.sum_values[k])
|
||||
|
||||
self.total_width += len(info)
|
||||
if prev_total_width > self.total_width:
|
||||
|
||||
@@ -5,7 +5,7 @@ import theano
|
||||
import copy
|
||||
|
||||
from ..layers.advanced_activations import LeakyReLU, PReLU
|
||||
from ..layers.core import Dense, Merge, Dropout, Activation, Reshape, Flatten, RepeatVector, Layer, AutoEncoder
|
||||
from ..layers.core import Dense, Merge, Dropout, Activation, Reshape, Flatten, RepeatVector, Layer, AutoEncoder, Masking, Permute
|
||||
from ..layers.core import ActivityRegularization, TimeDistributedDense, AutoEncoder, MaxoutDense
|
||||
from ..layers.convolutional import Convolution1D, Convolution2D, MaxPooling1D, MaxPooling2D, ZeroPadding2D
|
||||
from ..layers.embeddings import Embedding, WordContextProduct
|
||||
@@ -17,7 +17,7 @@ from .. import regularizers
|
||||
from .. import constraints
|
||||
|
||||
|
||||
def container_from_config(original_layer_dict):
|
||||
def container_from_config(original_layer_dict, custom_layers={}):
|
||||
layer_dict = copy.deepcopy(original_layer_dict)
|
||||
name = layer_dict.get('name')
|
||||
|
||||
@@ -26,7 +26,7 @@ def container_from_config(original_layer_dict):
|
||||
layers = layer_dict.get('layers')
|
||||
layer_list = []
|
||||
for layer in layers:
|
||||
init_layer = container_from_config(layer)
|
||||
init_layer = container_from_config(layer, custom_layers=custom_layers)
|
||||
layer_list.append(init_layer)
|
||||
merge_layer = Merge(layer_list, mode)
|
||||
return merge_layer
|
||||
@@ -35,7 +35,7 @@ def container_from_config(original_layer_dict):
|
||||
layers = layer_dict.get('layers')
|
||||
layer_list = []
|
||||
for layer in layers:
|
||||
init_layer = container_from_config(layer)
|
||||
init_layer = container_from_config(layer, custom_layers=custom_layers)
|
||||
layer_list.append(init_layer)
|
||||
seq_layer = containers.Sequential(layer_list)
|
||||
return seq_layer
|
||||
@@ -49,7 +49,8 @@ def container_from_config(original_layer_dict):
|
||||
|
||||
nodes = layer_dict.get('node_config')
|
||||
for node in nodes:
|
||||
layer = container_from_config(layer_dict['nodes'].get(node['name']))
|
||||
layer = container_from_config(layer_dict['nodes'].get(node['name']),
|
||||
custom_layers=custom_layers)
|
||||
node['layer'] = layer
|
||||
graph_layer.add_node(**node)
|
||||
|
||||
@@ -59,8 +60,10 @@ def container_from_config(original_layer_dict):
|
||||
return graph_layer
|
||||
|
||||
elif name == 'AutoEncoder':
|
||||
kwargs = {'encoder': container_from_config(layer_dict.get('encoder_config')),
|
||||
'decoder': container_from_config(layer_dict.get('decoder_config'))}
|
||||
kwargs = {'encoder': container_from_config(layer_dict.get('encoder_config'),
|
||||
custom_layers=custom_layers),
|
||||
'decoder': container_from_config(layer_dict.get('decoder_config'),
|
||||
custom_layers=custom_layers)}
|
||||
for kwarg in ['output_reconstruction', 'weights']:
|
||||
if kwarg in layer_dict:
|
||||
kwargs[kwarg] = layer_dict[kwarg]
|
||||
@@ -79,7 +82,7 @@ def container_from_config(original_layer_dict):
|
||||
if vname in [x for x, y in inspect.getmembers(regularizers, predicate=inspect.isclass)]:
|
||||
layer_dict[k] = regularizers.get(vname, v)
|
||||
|
||||
base_layer = get_layer(name, layer_dict)
|
||||
base_layer = get_layer(name, layer_dict, custom_layers=custom_layers)
|
||||
return base_layer
|
||||
|
||||
|
||||
@@ -121,5 +124,8 @@ def print_layer_shapes(model, input_shapes):
|
||||
|
||||
|
||||
from .generic_utils import get_from_module
|
||||
def get_layer(identifier, kwargs=None):
|
||||
def get_layer(identifier, kwargs=None, custom_layers={}):
|
||||
# Insert custom layers into globals so they can be accessed by `get_from_module`.
|
||||
for cls_key in custom_layers:
|
||||
globals()[cls_key] = custom_layers[cls_key]
|
||||
return get_from_module(identifier, globals(), 'layer', instantiate=True, kwargs=kwargs)
|
||||
|
||||
@@ -38,3 +38,7 @@ def ndim_tensor(ndim):
|
||||
elif ndim == 4:
|
||||
return T.tensor4()
|
||||
return T.matrix()
|
||||
|
||||
|
||||
def on_gpu():
|
||||
return theano.config.device[:3] == 'gpu'
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
import pydot
|
||||
# old pydot will not work with python3, must use one
|
||||
# that works with python3 such as pydot2 or pydot
|
||||
|
||||
|
||||
def plot(model, to_file='model.png'):
|
||||
|
||||
graph = pydot.Dot(graph_type='digraph')
|
||||
if type(model) == Sequential:
|
||||
previous_node = None
|
||||
written_nodes = []
|
||||
n = 1
|
||||
for node in model.get_config()['layers']:
|
||||
# append number in case layers have same name to differentiate
|
||||
if (node['name'] + str(n)) in written_nodes:
|
||||
n += 1
|
||||
current_node = pydot.Node(node['name'] + str(n))
|
||||
written_nodes.append(node['name'] + str(n))
|
||||
graph.add_node(current_node)
|
||||
if previous_node:
|
||||
graph.add_edge(pydot.Edge(previous_node, current_node))
|
||||
previous_node = current_node
|
||||
graph.write_png(to_file)
|
||||
|
||||
elif type(model) == Graph:
|
||||
# don't need to append number for names since all nodes labeled
|
||||
for input_node in model.input_config:
|
||||
graph.add_node(pydot.Node(input_node['name']))
|
||||
|
||||
# intermediate and output nodes have input defined
|
||||
for layer_config in [model.node_config, model.output_config]:
|
||||
for node in layer_config:
|
||||
graph.add_node(pydot.Node(node['name']))
|
||||
# possible to have multiple 'inputs' vs 1 'input'
|
||||
if node['inputs']:
|
||||
for e in node['inputs']:
|
||||
graph.add_edge(pydot.Edge(e, node['name']))
|
||||
else:
|
||||
graph.add_edge(pydot.Edge(node['input'], node['name']))
|
||||
|
||||
graph.write_png(to_file)
|
||||
+6
-3
@@ -3,12 +3,15 @@ from setuptools import find_packages
|
||||
|
||||
|
||||
setup(name='Keras',
|
||||
version='0.1.2',
|
||||
version='0.2.0',
|
||||
description='Theano-based Deep Learning library',
|
||||
author='Francois Chollet',
|
||||
author_email='francois.chollet@gmail.com',
|
||||
url='https://github.com/fchollet/keras',
|
||||
download_url='https://github.com/fchollet/keras/tarball/0.1.2',
|
||||
download_url='https://github.com/fchollet/keras/tarball/0.2.0',
|
||||
license='MIT',
|
||||
install_requires=['theano', 'pyyaml', 'h5py'],
|
||||
install_requires=['theano', 'pyyaml'],
|
||||
extras_require={
|
||||
'h5py': ['h5py'],
|
||||
},
|
||||
packages=find_packages())
|
||||
|
||||
@@ -0,0 +1,163 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import theano
|
||||
|
||||
from keras.layers import convolutional
|
||||
|
||||
|
||||
class TestConvolutions(unittest.TestCase):
|
||||
def test_convolution_1d(self):
|
||||
nb_samples = 9
|
||||
nb_steps = 7
|
||||
input_dim = 10
|
||||
filter_length = 6
|
||||
nb_filter = 5
|
||||
|
||||
weights_in = [np.ones((nb_filter, input_dim, filter_length, 1)), np.ones(nb_filter)]
|
||||
|
||||
input = np.ones((nb_samples, nb_steps, input_dim))
|
||||
for weight in [None, weights_in]:
|
||||
for border_mode in ['valid', 'full', 'same']:
|
||||
for subsample_length in [1, 3]:
|
||||
if border_mode == 'same' and subsample_length != 1:
|
||||
continue
|
||||
for W_regularizer in [None, 'l2']:
|
||||
for b_regularizer in [None, 'l2']:
|
||||
for act_regularizer in [None, 'l2']:
|
||||
layer = convolutional.Convolution1D(
|
||||
nb_filter, filter_length, weights=weight,
|
||||
border_mode=border_mode, W_regularizer=W_regularizer,
|
||||
b_regularizer=b_regularizer, activity_regularizer=act_regularizer,
|
||||
subsample_length=subsample_length, input_shape=(None, input_dim))
|
||||
|
||||
layer.input = theano.shared(value=input)
|
||||
for train in [True, False]:
|
||||
out = layer.get_output(train).eval()
|
||||
assert input.shape[0] == out.shape[0]
|
||||
if border_mode == 'same' and subsample_length == 1:
|
||||
assert input.shape[1] == out.shape[1]
|
||||
|
||||
config = layer.get_config()
|
||||
|
||||
def test_maxpooling_1d(self):
|
||||
nb_samples = 9
|
||||
nb_steps = 7
|
||||
input_dim = 10
|
||||
|
||||
input = np.ones((nb_samples, nb_steps, input_dim))
|
||||
for ignore_border in [True, False]:
|
||||
for stride in [1, 2]:
|
||||
layer = convolutional.MaxPooling1D(stride=stride, ignore_border=ignore_border)
|
||||
layer.input = theano.shared(value=input)
|
||||
for train in [True, False]:
|
||||
layer.get_output(train).eval()
|
||||
|
||||
config = layer.get_config()
|
||||
|
||||
def test_convolution_2d(self):
|
||||
nb_samples = 8
|
||||
nb_filter = 9
|
||||
stack_size = 7
|
||||
nb_row = 10
|
||||
nb_col = 6
|
||||
|
||||
input_nb_row = 11
|
||||
input_nb_col = 12
|
||||
|
||||
weights_in = [np.ones((nb_filter, stack_size, nb_row, nb_col)), np.ones(nb_filter)]
|
||||
|
||||
input = np.ones((nb_samples, stack_size, input_nb_row, input_nb_col))
|
||||
for weight in [None, weights_in]:
|
||||
for border_mode in ['valid', 'full', 'same']:
|
||||
for subsample in [(1, 1), (2, 3)]:
|
||||
if border_mode == 'same' and subsample != (1, 1):
|
||||
continue
|
||||
for W_regularizer in [None, 'l2']:
|
||||
for b_regularizer in [None, 'l2']:
|
||||
for act_regularizer in [None, 'l2']:
|
||||
layer = convolutional.Convolution2D(
|
||||
nb_filter, nb_row, nb_col, weights=weight,
|
||||
border_mode=border_mode, W_regularizer=W_regularizer,
|
||||
b_regularizer=b_regularizer, activity_regularizer=act_regularizer,
|
||||
subsample=subsample, input_shape=(stack_size, None, None))
|
||||
|
||||
layer.input = theano.shared(value=input)
|
||||
for train in [True, False]:
|
||||
out = layer.get_output(train).eval()
|
||||
if border_mode == 'same' and subsample == (1, 1):
|
||||
assert out.shape[2:] == input.shape[2:]
|
||||
|
||||
config = layer.get_config()
|
||||
|
||||
def test_maxpooling_2d(self):
|
||||
nb_samples = 9
|
||||
stack_size = 7
|
||||
input_nb_row = 11
|
||||
input_nb_col = 12
|
||||
pool_size = (3, 3)
|
||||
|
||||
input = np.ones((nb_samples, stack_size, input_nb_row, input_nb_col))
|
||||
for ignore_border in [True, False]:
|
||||
for stride in [(1, 1), (2, 2)]:
|
||||
layer = convolutional.MaxPooling2D(stride=stride, ignore_border=ignore_border, pool_size=pool_size)
|
||||
layer.input = theano.shared(value=input)
|
||||
for train in [True, False]:
|
||||
layer.get_output(train).eval()
|
||||
|
||||
config = layer.get_config()
|
||||
|
||||
def test_zero_padding_2d(self):
|
||||
nb_samples = 9
|
||||
stack_size = 7
|
||||
input_nb_row = 11
|
||||
input_nb_col = 12
|
||||
|
||||
input = np.ones((nb_samples, stack_size, input_nb_row, input_nb_col))
|
||||
layer = convolutional.ZeroPadding2D(padding=(2, 2))
|
||||
layer.input = theano.shared(value=input)
|
||||
for train in [True, False]:
|
||||
out = layer.get_output(train).eval()
|
||||
for offset in [0, 1, -1, -2]:
|
||||
assert_allclose(out[:, :, offset, :], 0.)
|
||||
assert_allclose(out[:, :, :, offset], 0.)
|
||||
assert_allclose(out[:, :, 2:-2, 2:-2], 1.)
|
||||
|
||||
config = layer.get_config()
|
||||
|
||||
def test_upsample_1d(self):
|
||||
nb_samples = 9
|
||||
nb_steps = 7
|
||||
input_dim = 10
|
||||
|
||||
input = np.ones((nb_samples, nb_steps, input_dim))
|
||||
for length in [2, 3, 9]:
|
||||
layer = convolutional.UpSample1D(length=length)
|
||||
layer.input = theano.shared(value=input)
|
||||
for train in [True, False]:
|
||||
out = layer.get_output(train).eval()
|
||||
assert out.shape[1] == length*nb_steps
|
||||
|
||||
config = layer.get_config()
|
||||
|
||||
def test_upsample_2d(self):
|
||||
nb_samples = 9
|
||||
stack_size = 7
|
||||
input_nb_row = 11
|
||||
input_nb_col = 12
|
||||
|
||||
input = np.ones((nb_samples, stack_size, input_nb_row, input_nb_col))
|
||||
|
||||
for length_row in [2, 3, 9]:
|
||||
for length_col in [2, 3, 9]:
|
||||
layer = convolutional.UpSample2D(size=(length_row, length_col))
|
||||
layer.input = theano.shared(value=input)
|
||||
for train in [True, False]:
|
||||
out = layer.get_output(train).eval()
|
||||
assert out.shape[2] == length_row*input_nb_row
|
||||
assert out.shape[3] == length_col*input_nb_col
|
||||
|
||||
config = layer.get_config()
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -12,11 +12,6 @@ class TestLayerBase(unittest.TestCase):
|
||||
input_dim = 5
|
||||
layer = core.Layer()
|
||||
|
||||
# As long as there is no input, an error should be raised.
|
||||
for train in [True, False]:
|
||||
self.assertRaises(AttributeError, layer.get_input, train)
|
||||
self.assertRaises(AttributeError, layer.get_output, train)
|
||||
|
||||
# Once an input is provided, it should be reachable through the
|
||||
# appropriate getters
|
||||
input = np.ones((nb_samples, input_dim))
|
||||
@@ -34,10 +29,6 @@ class TestLayerBase(unittest.TestCase):
|
||||
input = np.ones((nb_samples, input_dim))
|
||||
layer1.input = theano.shared(value=input)
|
||||
|
||||
# As long as there is no previous layer, an error should be raised.
|
||||
for train in [True, False]:
|
||||
self.assertRaises(AttributeError, layer2.get_input, train)
|
||||
|
||||
# After connecting, input of layer1 should be passed through
|
||||
layer2.set_previous(layer1)
|
||||
for train in [True, False]:
|
||||
@@ -81,7 +72,7 @@ class TestConfigParams(unittest.TestCase):
|
||||
self._runner(layer)
|
||||
|
||||
def test_reshape(self):
|
||||
layer = core.Reshape(10, 10)
|
||||
layer = core.Reshape(dims=(10, 10))
|
||||
self._runner(layer)
|
||||
|
||||
def test_flatten(self):
|
||||
@@ -93,7 +84,7 @@ class TestConfigParams(unittest.TestCase):
|
||||
self._runner(layer)
|
||||
|
||||
def test_dense(self):
|
||||
layer = core.Dense(10, 10)
|
||||
layer = core.Dense(10, input_shape=(10,))
|
||||
self._runner(layer)
|
||||
|
||||
def test_act_reg(self):
|
||||
@@ -101,7 +92,11 @@ class TestConfigParams(unittest.TestCase):
|
||||
self._runner(layer)
|
||||
|
||||
def test_time_dist_dense(self):
|
||||
layer = core.TimeDistributedDense(10, 10)
|
||||
layer = core.TimeDistributedDense(10, input_shape=(None, 10))
|
||||
self._runner(layer)
|
||||
|
||||
def test_time_dist_merge(self):
|
||||
layer = core.TimeDistributedMerge()
|
||||
self._runner(layer)
|
||||
|
||||
def test_autoencoder(self):
|
||||
@@ -126,9 +121,8 @@ class TestMasking(unittest.TestCase):
|
||||
func = theano.function([layer.input], layer.get_output_mask())
|
||||
self.assertTrue(np.all(
|
||||
# get mask for this input
|
||||
func(np.array(
|
||||
[[[1], [2], [3], [0]],
|
||||
[[0], [4], [5], [0]]], dtype=np.int32)) ==
|
||||
func(np.array([[[1], [2], [3], [0]],
|
||||
[[0], [4], [5], [0]]], dtype=np.int32)) ==
|
||||
# This is the expected output mask, one dimension less
|
||||
np.array([[1, 1, 1, 0], [0, 1, 1, 0]])))
|
||||
|
||||
@@ -138,9 +132,8 @@ class TestMasking(unittest.TestCase):
|
||||
func = theano.function([layer.input], layer.get_output_mask())
|
||||
self.assertTrue(np.all(
|
||||
# get mask for this input, if not all the values are 5, shouldn't masked
|
||||
func(np.array(
|
||||
[[[1, 1], [2, 1], [3, 1], [5, 5]],
|
||||
[[1, 5], [5, 0], [0, 0], [0, 0]]], dtype=np.int32)) ==
|
||||
func(np.array([[[1, 1], [2, 1], [3, 1], [5, 5]],
|
||||
[[1, 5], [5, 0], [0, 0], [0, 0]]], dtype=np.int32)) ==
|
||||
# This is the expected output mask, one dimension less
|
||||
np.array([[1, 1, 1, 0], [1, 1, 1, 1]])))
|
||||
|
||||
@@ -150,12 +143,11 @@ class TestMasking(unittest.TestCase):
|
||||
func = theano.function([layer.input], layer.get_output())
|
||||
self.assertTrue(np.all(
|
||||
# get output for this input, replace padding with 0
|
||||
func(np.array(
|
||||
[[[1, 1], [2, 1], [3, 1], [5, 5]],
|
||||
[[1, 5], [5, 0], [0, 0], [0, 0]]], dtype=np.int32)) ==
|
||||
func(np.array([[[1, 1], [2, 1], [3, 1], [5, 5]],
|
||||
[[1, 5], [5, 0], [0, 0], [0, 0]]], dtype=np.int32)) ==
|
||||
# This is the expected output
|
||||
np.array([[[1, 1], [2, 1], [3, 1], [0, 0]],
|
||||
[[1, 5], [5, 0], [0, 0], [0, 0]]])))
|
||||
[[1, 5], [5, 0], [0, 0], [0, 0]]])))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -12,21 +12,20 @@ def _runner(layer_class):
|
||||
All the recurrent layers share the same interface, so we can run through them with a single
|
||||
function.
|
||||
"""
|
||||
for weights in [None, [np.ones((input_dim, output_dim))]]:
|
||||
for ret_seq in [True, False]:
|
||||
layer = layer_class(input_dim, output_dim, return_sequences=ret_seq, weights=weights)
|
||||
layer.input = theano.shared(value=np.ones((nb_samples, timesteps, input_dim)))
|
||||
config = layer.get_config()
|
||||
for ret_seq in [True, False]:
|
||||
layer = layer_class(output_dim, return_sequences=ret_seq, weights=None, input_shape=(None, input_dim))
|
||||
layer.input = theano.shared(value=np.ones((nb_samples, timesteps, input_dim)))
|
||||
config = layer.get_config()
|
||||
|
||||
for train in [True, False]:
|
||||
out = layer.get_output(train).eval()
|
||||
# Make sure the output has the desired shape
|
||||
if ret_seq:
|
||||
assert(out.shape == (nb_samples, timesteps, output_dim))
|
||||
else:
|
||||
assert(out.shape == (nb_samples, output_dim))
|
||||
for train in [True, False]:
|
||||
out = layer.get_output(train).eval()
|
||||
# Make sure the output has the desired shape
|
||||
if ret_seq:
|
||||
assert(out.shape == (nb_samples, timesteps, output_dim))
|
||||
else:
|
||||
assert(out.shape == (nb_samples, output_dim))
|
||||
|
||||
mask = layer.get_output_mask(train)
|
||||
mask = layer.get_output_mask(train)
|
||||
|
||||
|
||||
class TestRNNS(unittest.TestCase):
|
||||
|
||||
@@ -6,6 +6,7 @@ import theano.tensor as T
|
||||
|
||||
import numpy
|
||||
|
||||
|
||||
def list_assert_equal(a, b, round_to=7):
|
||||
'''
|
||||
This will do a pairwise, rounded equality test across two lists of
|
||||
@@ -15,13 +16,14 @@ def list_assert_equal(a, b, round_to=7):
|
||||
for i, j in pairs:
|
||||
assert round(i, round_to) == round(j, round_to)
|
||||
|
||||
|
||||
def get_standard_values():
|
||||
'''
|
||||
These are just a set of floats used for testing the activation
|
||||
functions, and are useful in multiple tests.
|
||||
'''
|
||||
return [0, 0.1, 0.5, 0.9, 1.0]
|
||||
|
||||
return [0,0.1,0.5,0.9,1.0]
|
||||
|
||||
def test_softmax():
|
||||
|
||||
@@ -39,7 +41,7 @@ def test_softmax():
|
||||
x = T.vector()
|
||||
exp = s(x)
|
||||
f = theano.function([x], exp)
|
||||
test_values=get_standard_values()
|
||||
test_values = get_standard_values()
|
||||
|
||||
result = f(test_values)
|
||||
expected = softmax(test_values)
|
||||
@@ -49,6 +51,7 @@ def test_softmax():
|
||||
|
||||
list_assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_relu():
|
||||
'''
|
||||
Relu implementation doesn't depend on the value being
|
||||
@@ -69,11 +72,10 @@ def test_relu():
|
||||
test_values = get_standard_values()
|
||||
result = f(test_values)
|
||||
|
||||
list_assert_equal(result, test_values) # because no negatives in test values
|
||||
list_assert_equal(result, test_values) # because no negatives in test values
|
||||
|
||||
|
||||
def test_tanh():
|
||||
|
||||
from keras.activations import tanh as t
|
||||
test_values = get_standard_values()
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ class TestConstraints(unittest.TestCase):
|
||||
normalized = unitnorm_instance(self.example_array)
|
||||
|
||||
norm_of_normalized = np.sqrt(np.sum(normalized.eval()**2, axis=1))
|
||||
difference = norm_of_normalized - 1. #in the unit norm constraint, it should be equal to 1.
|
||||
difference = norm_of_normalized - 1. # in the unit norm constraint, it should be equal to 1.
|
||||
largest_difference = np.max(np.abs(difference))
|
||||
self.assertAlmostEqual(largest_difference, 0.)
|
||||
|
||||
|
||||
@@ -15,15 +15,15 @@ class TestBatchNormalization(unittest.TestCase):
|
||||
self.input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))]
|
||||
|
||||
def test_setup(self):
|
||||
norm_m0 = normalization.BatchNormalization((10, 10))
|
||||
norm_m1 = normalization.BatchNormalization((10, 10), mode=1)
|
||||
norm_m0 = normalization.BatchNormalization(input_shape=(10, 10))
|
||||
norm_m1 = normalization.BatchNormalization(input_shape=(10, 10), mode=1)
|
||||
|
||||
# mode 3 does not exist
|
||||
self.assertRaises(Exception, normalization.BatchNormalization((10, 10), mode=3))
|
||||
self.assertRaises(Exception, normalization.BatchNormalization(input_shape=(10, 10), mode=3))
|
||||
|
||||
def test_mode_0(self):
|
||||
model = Sequential()
|
||||
norm_m0 = normalization.BatchNormalization((10,))
|
||||
norm_m0 = normalization.BatchNormalization(input_shape=(10,))
|
||||
model.add(norm_m0)
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
|
||||
@@ -37,8 +37,7 @@ class TestBatchNormalization(unittest.TestCase):
|
||||
self.assertAlmostEqual(out.std().eval(), 1.0, places=1)
|
||||
|
||||
def test_mode_1(self):
|
||||
norm_m1 = normalization.BatchNormalization((10,), mode=1)
|
||||
norm_m1.init_updates()
|
||||
norm_m1 = normalization.BatchNormalization(input_shape=(10,), mode=1)
|
||||
|
||||
for inp in [self.input_1, self.input_2, self.input_3]:
|
||||
norm_m1.input = inp
|
||||
@@ -54,12 +53,11 @@ class TestBatchNormalization(unittest.TestCase):
|
||||
Test batch normalization with various input shapes
|
||||
"""
|
||||
for inp in self.input_shapes:
|
||||
norm_m0 = normalization.BatchNormalization(inp.shape, mode=0)
|
||||
norm_m0.init_updates()
|
||||
norm_m0 = normalization.BatchNormalization(input_shape=inp.shape, mode=0)
|
||||
norm_m0.input = inp
|
||||
out = (norm_m0.get_output(train=True) - norm_m0.beta) / norm_m0.gamma
|
||||
|
||||
norm_m1 = normalization.BatchNormalization(inp.shape, mode=1)
|
||||
norm_m1 = normalization.BatchNormalization(input_shape=inp.shape, mode=1)
|
||||
norm_m1.input = inp
|
||||
out = (norm_m1.get_output(train=True) - norm_m1.beta) / norm_m1.gamma
|
||||
|
||||
@@ -67,9 +65,8 @@ class TestBatchNormalization(unittest.TestCase):
|
||||
"""
|
||||
Test weight initialization
|
||||
"""
|
||||
|
||||
norm_m1 = normalization.BatchNormalization((10,), mode=1, weights=[np.ones(10), np.ones(10)])
|
||||
norm_m1.init_updates()
|
||||
norm_m1 = normalization.BatchNormalization(input_shape=(10,), mode=1,
|
||||
weights=[np.ones(10), np.ones(10), np.zeros(10), np.zeros(10)])
|
||||
|
||||
for inp in [self.input_1, self.input_2, self.input_3]:
|
||||
norm_m1.input = inp
|
||||
@@ -83,17 +80,19 @@ class TestBatchNormalization(unittest.TestCase):
|
||||
assert_allclose(norm_m1.gamma.eval(), np.ones(10))
|
||||
assert_allclose(norm_m1.beta.eval(), np.ones(10))
|
||||
|
||||
# Weights must be an iterable of gamma AND beta.
|
||||
self.assertRaises(Exception, normalization.BatchNormalization((10,)), weights=np.ones(10))
|
||||
|
||||
def test_config(self):
|
||||
norm = normalization.BatchNormalization((10, 10), mode=1, epsilon=0.1)
|
||||
norm = normalization.BatchNormalization(input_shape=(10, 10), mode=1, epsilon=0.1, momentum=0.9)
|
||||
conf = norm.get_config()
|
||||
conf_target = {"input_shape": (10, 10), "name": normalization.BatchNormalization.__name__,
|
||||
"epsilon": 0.1, "mode": 1}
|
||||
|
||||
"epsilon": 0.1, "mode": 1, "momentum": 0.9}
|
||||
self.assertDictEqual(conf, conf_target)
|
||||
|
||||
def test_save_weights(self):
|
||||
norm = normalization.BatchNormalization(input_shape=(10, 10), mode=1, epsilon=0.1)
|
||||
weights = norm.get_weights()
|
||||
assert(len(weights) == 4)
|
||||
norm.set_weights(weights)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -14,9 +14,9 @@ class TestEmbedding(unittest.TestCase):
|
||||
|
||||
def test_unitnorm_constraint(self):
|
||||
lookup = Sequential()
|
||||
lookup.add(Embedding(3, 2, weights=[self.W1], W_constraint=unitnorm()))
|
||||
lookup.add(Embedding(3, 2, weights=[self.W1], W_constraint=unitnorm(), input_length=1))
|
||||
lookup.add(Flatten())
|
||||
lookup.add(Dense(2, 1))
|
||||
lookup.add(Dense(1))
|
||||
lookup.add(Activation('sigmoid'))
|
||||
lookup.compile(loss='binary_crossentropy', optimizer='sgd', class_mode='binary')
|
||||
lookup.train_on_batch(self.X1, np.array([[1], [0]], dtype='int32'))
|
||||
|
||||
@@ -23,11 +23,11 @@ class TestGraph(unittest.TestCase):
|
||||
def test_1o_1i(self):
|
||||
print('test a non-sequential graph with 1 input and 1 output')
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', ndim=2)
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(32, 16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(32, 4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(16, 4), name='dense3', input='dense1')
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1', inputs=['dense2', 'dense3'], merge_mode='sum')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
@@ -45,14 +45,14 @@ class TestGraph(unittest.TestCase):
|
||||
def test_1o_1i_2(self):
|
||||
print('test a more complex non-sequential graph with 1 input and 1 output')
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', ndim=2)
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(32, 16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(32, 4), name='dense2-0', input='input1')
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2-0', input='input1')
|
||||
graph.add_node(Activation('relu'), name='dense2', input='dense2-0')
|
||||
|
||||
graph.add_node(Dense(4, 16), name='dense3', input='dense2')
|
||||
graph.add_node(Dense(16, 4), name='dense4', inputs=['dense1', 'dense3'], merge_mode='sum')
|
||||
graph.add_node(Dense(16), name='dense3', input='dense2')
|
||||
graph.add_node(Dense(4), name='dense4', inputs=['dense1', 'dense3'], merge_mode='sum')
|
||||
|
||||
graph.add_output(name='output1', inputs=['dense2', 'dense4'], merge_mode='sum')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
@@ -71,12 +71,12 @@ class TestGraph(unittest.TestCase):
|
||||
def test_1o_2i(self):
|
||||
print('test a non-sequential graph with 2 inputs and 1 output')
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', ndim=2)
|
||||
graph.add_input(name='input2', ndim=2)
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_input(name='input2', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(32, 16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(32, 4), name='dense2', input='input2')
|
||||
graph.add_node(Dense(16, 4), name='dense3', input='dense1')
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input2')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1', inputs=['dense2', 'dense3'], merge_mode='sum')
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
@@ -95,11 +95,11 @@ class TestGraph(unittest.TestCase):
|
||||
def test_2o_1i_weights(self):
|
||||
print('test a non-sequential graph with 1 input and 2 outputs')
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', ndim=2)
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(32, 16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(32, 4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(16, 1), name='dense3', input='dense1')
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(1), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1', input='dense2')
|
||||
graph.add_output(name='output2', input='dense3')
|
||||
@@ -118,10 +118,10 @@ class TestGraph(unittest.TestCase):
|
||||
print('test weight saving')
|
||||
graph.save_weights('temp.h5', overwrite=True)
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', ndim=2)
|
||||
graph.add_node(Dense(32, 16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(32, 4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(16, 1), name='dense3', input='dense1')
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(1), name='dense3', input='dense1')
|
||||
graph.add_output(name='output1', input='dense2')
|
||||
graph.add_output(name='output2', input='dense3')
|
||||
graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'})
|
||||
@@ -133,11 +133,11 @@ class TestGraph(unittest.TestCase):
|
||||
def test_2o_1i_sample_weights(self):
|
||||
print('test a non-sequential graph with 1 input and 2 outputs with sample weights')
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', ndim=2)
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(32, 16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(32, 4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(16, 1), name='dense3', input='dense1')
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(1), name='dense3', input='dense1')
|
||||
|
||||
graph.add_output(name='output1', input='dense2')
|
||||
graph.add_output(name='output2', input='dense3')
|
||||
@@ -166,16 +166,16 @@ class TestGraph(unittest.TestCase):
|
||||
print('test layer-like API')
|
||||
|
||||
graph = containers.Graph()
|
||||
graph.add_input(name='input1', ndim=2)
|
||||
graph.add_node(Dense(32, 16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(32, 4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(16, 4), name='dense3', input='dense1')
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
graph.add_output(name='output1', inputs=['dense2', 'dense3'], merge_mode='sum')
|
||||
|
||||
seq = Sequential()
|
||||
seq.add(Dense(32, 32, name='first_seq_dense'))
|
||||
seq.add(Dense(32, input_shape=(32,)))
|
||||
seq.add(graph)
|
||||
seq.add(Dense(4, 4, name='last_seq_dense'))
|
||||
seq.add(Dense(4))
|
||||
|
||||
seq.compile('rmsprop', 'mse')
|
||||
|
||||
@@ -191,12 +191,12 @@ class TestGraph(unittest.TestCase):
|
||||
def test_create_output(self):
|
||||
print('test create_output argument')
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', ndim=2)
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
|
||||
graph.add_node(Dense(32, 16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(32, 4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(16, 4), name='dense3', input='dense1')
|
||||
graph.add_node(Dense(4, 4), name='output1', inputs=['dense2', 'dense3'], merge_mode='sum', create_output=True)
|
||||
graph.add_node(Dense(16), name='dense1', input='input1')
|
||||
graph.add_node(Dense(4), name='dense2', input='input1')
|
||||
graph.add_node(Dense(4), name='dense3', input='dense1')
|
||||
graph.add_node(Dense(4), name='output1', inputs=['dense2', 'dense3'], merge_mode='sum', create_output=True)
|
||||
graph.compile('rmsprop', {'output1': 'mse'})
|
||||
|
||||
history = graph.fit({'input1': X_train, 'output1': y_train}, nb_epoch=10)
|
||||
@@ -209,6 +209,34 @@ class TestGraph(unittest.TestCase):
|
||||
print(loss)
|
||||
assert(loss < 2.5)
|
||||
|
||||
def test_count_params(self):
|
||||
print('test count params')
|
||||
|
||||
nb_units = 100
|
||||
nb_classes = 2
|
||||
|
||||
graph = Graph()
|
||||
graph.add_input(name='input1', input_shape=(32,))
|
||||
graph.add_input(name='input2', input_shape=(32,))
|
||||
graph.add_node(Dense(nb_units),
|
||||
name='dense1', input='input1')
|
||||
graph.add_node(Dense(nb_classes),
|
||||
name='dense2', input='input2')
|
||||
graph.add_node(Dense(nb_classes),
|
||||
name='dense3', input='dense1')
|
||||
graph.add_output(name='output', inputs=['dense2', 'dense3'],
|
||||
merge_mode='sum')
|
||||
|
||||
n = 32 * nb_units + nb_units
|
||||
n += 32 * nb_classes + nb_classes
|
||||
n += nb_units * nb_classes + nb_classes
|
||||
|
||||
self.assertEqual(n, graph.count_params())
|
||||
|
||||
graph.compile('rmsprop', {'output': 'binary_crossentropy'})
|
||||
|
||||
self.assertEqual(n, graph.count_params())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Test graph model')
|
||||
|
||||
@@ -14,8 +14,8 @@ class TestLossMasking(unittest.TestCase):
|
||||
[[[1, 1], [2, 1], [3, 1], [5, 5]],
|
||||
[[1, 5], [5, 0], [0, 0], [0, 0]]], dtype=np.int32)
|
||||
model = Sequential()
|
||||
model.add(Masking(mask_value=0))
|
||||
model.add(TimeDistributedDense(2, 1, init='one'))
|
||||
model.add(Masking(mask_value=0, input_shape=(None, 2)))
|
||||
model.add(TimeDistributedDense(1, init='one'))
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
y = model.predict(X)
|
||||
loss = model.fit(X, 4*y, nb_epoch=1, batch_size=2, verbose=1).history['loss'][0]
|
||||
|
||||
@@ -42,18 +42,18 @@ sample_weight[y_train == weighted_class] = high_weight
|
||||
|
||||
def create_sequential_model():
|
||||
model = Sequential()
|
||||
model.add(Dense(784, 50))
|
||||
model.add(Dense(50, input_shape=(784,)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(50, 10))
|
||||
model.add(Dense(10))
|
||||
model.add(Activation('softmax'))
|
||||
return model
|
||||
|
||||
|
||||
def create_graph_model():
|
||||
model = Graph()
|
||||
model.add_input(name='input')
|
||||
model.add_node(Dense(784, 50, activation='relu'), name='d1', input='input')
|
||||
model.add_node(Dense(50, 10, activation='softmax'), name='d2', input='d1')
|
||||
model.add_input(name='input', input_shape=(784,))
|
||||
model.add_node(Dense(50, activation='relu'), name='d1', input='input')
|
||||
model.add_node(Dense(10, activation='softmax'), name='d2', input='d1')
|
||||
model.add_output(name='output', input='d2')
|
||||
return model
|
||||
|
||||
|
||||
@@ -18,9 +18,9 @@ y_test = to_categorical(y_test)
|
||||
|
||||
def get_model(input_dim, nb_hidden, output_dim):
|
||||
model = Sequential()
|
||||
model.add(Dense(input_dim, nb_hidden))
|
||||
model.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(nb_hidden, output_dim))
|
||||
model.add(Dense(output_dim))
|
||||
model.add(Activation('softmax'))
|
||||
return model
|
||||
|
||||
|
||||
@@ -35,9 +35,9 @@ test_ids = np.where(y_test == np.array(weighted_class))[0]
|
||||
|
||||
def create_model(weight_reg=None, activity_reg=None):
|
||||
model = Sequential()
|
||||
model.add(Dense(784, 50))
|
||||
model.add(Dense(50, input_shape=(784,)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(50, 10, W_regularizer=weight_reg, activity_regularizer=activity_reg))
|
||||
model.add(Dense(10, W_regularizer=weight_reg, activity_regularizer=activity_reg))
|
||||
model.add(Activation('softmax'))
|
||||
return model
|
||||
|
||||
|
||||
@@ -30,9 +30,9 @@ class TestSequential(unittest.TestCase):
|
||||
def test_sequential(self):
|
||||
print('Test sequential')
|
||||
model = Sequential()
|
||||
model.add(Dense(input_dim, nb_hidden))
|
||||
model.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(nb_hidden, nb_class))
|
||||
model.add(Dense(nb_class))
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
@@ -57,9 +57,9 @@ class TestSequential(unittest.TestCase):
|
||||
print('test weight saving')
|
||||
model.save_weights('temp.h5', overwrite=True)
|
||||
model = Sequential()
|
||||
model.add(Dense(input_dim, nb_hidden))
|
||||
model.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(nb_hidden, nb_class))
|
||||
model.add(Dense(nb_class))
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
model.load_weights('temp.h5')
|
||||
@@ -79,17 +79,17 @@ class TestSequential(unittest.TestCase):
|
||||
def test_merge_sum(self):
|
||||
print('Test merge: sum')
|
||||
left = Sequential()
|
||||
left.add(Dense(input_dim, nb_hidden))
|
||||
left.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
left.add(Activation('relu'))
|
||||
|
||||
right = Sequential()
|
||||
right.add(Dense(input_dim, nb_hidden))
|
||||
right.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
right.add(Activation('relu'))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Merge([left, right], mode='sum'))
|
||||
|
||||
model.add(Dense(nb_hidden, nb_class))
|
||||
model.add(Dense(nb_class))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
@@ -113,14 +113,14 @@ class TestSequential(unittest.TestCase):
|
||||
print('test weight saving')
|
||||
model.save_weights('temp.h5', overwrite=True)
|
||||
left = Sequential()
|
||||
left.add(Dense(input_dim, nb_hidden))
|
||||
left.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
left.add(Activation('relu'))
|
||||
right = Sequential()
|
||||
right.add(Dense(input_dim, nb_hidden))
|
||||
right.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
right.add(Activation('relu'))
|
||||
model = Sequential()
|
||||
model.add(Merge([left, right], mode='sum'))
|
||||
model.add(Dense(nb_hidden, nb_class))
|
||||
model.add(Dense(nb_class))
|
||||
model.add(Activation('softmax'))
|
||||
model.load_weights('temp.h5')
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
@@ -132,17 +132,17 @@ class TestSequential(unittest.TestCase):
|
||||
def test_merge_concat(self):
|
||||
print('Test merge: concat')
|
||||
left = Sequential()
|
||||
left.add(Dense(input_dim, nb_hidden))
|
||||
left.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
left.add(Activation('relu'))
|
||||
|
||||
right = Sequential()
|
||||
right.add(Dense(input_dim, nb_hidden))
|
||||
right.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
right.add(Activation('relu'))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Merge([left, right], mode='concat'))
|
||||
|
||||
model.add(Dense(nb_hidden * 2, nb_class))
|
||||
model.add(Dense(nb_class))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
@@ -166,17 +166,17 @@ class TestSequential(unittest.TestCase):
|
||||
print('test weight saving')
|
||||
model.save_weights('temp.h5', overwrite=True)
|
||||
left = Sequential()
|
||||
left.add(Dense(input_dim, nb_hidden))
|
||||
left.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
left.add(Activation('relu'))
|
||||
|
||||
right = Sequential()
|
||||
right.add(Dense(input_dim, nb_hidden))
|
||||
right.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
right.add(Activation('relu'))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Merge([left, right], mode='concat'))
|
||||
|
||||
model.add(Dense(nb_hidden * 2, nb_class))
|
||||
model.add(Dense(nb_class))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
@@ -190,26 +190,26 @@ class TestSequential(unittest.TestCase):
|
||||
print('Test merge recursivity')
|
||||
|
||||
left = Sequential()
|
||||
left.add(Dense(input_dim, nb_hidden))
|
||||
left.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
left.add(Activation('relu'))
|
||||
|
||||
right = Sequential()
|
||||
right.add(Dense(input_dim, nb_hidden))
|
||||
right.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
right.add(Activation('relu'))
|
||||
|
||||
righter = Sequential()
|
||||
righter.add(Dense(input_dim, nb_hidden))
|
||||
righter.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
righter.add(Activation('relu'))
|
||||
|
||||
intermediate = Sequential()
|
||||
intermediate.add(Merge([left, right], mode='sum'))
|
||||
intermediate.add(Dense(nb_hidden, nb_hidden))
|
||||
intermediate.add(Dense(nb_hidden))
|
||||
intermediate.add(Activation('relu'))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Merge([intermediate, righter], mode='sum'))
|
||||
|
||||
model.add(Dense(nb_hidden, nb_class))
|
||||
model.add(Dense(nb_class))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
@@ -240,13 +240,13 @@ class TestSequential(unittest.TestCase):
|
||||
def test_merge_overlap(self):
|
||||
print('Test merge overlap')
|
||||
left = Sequential()
|
||||
left.add(Dense(input_dim, nb_hidden))
|
||||
left.add(Dense(nb_hidden, input_shape=(input_dim,)))
|
||||
left.add(Activation('relu'))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Merge([left, left], mode='sum'))
|
||||
|
||||
model.add(Dense(nb_hidden, nb_class))
|
||||
model.add(Dense(nb_class))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
@@ -276,6 +276,28 @@ class TestSequential(unittest.TestCase):
|
||||
print(nloss)
|
||||
assert(loss == nloss)
|
||||
|
||||
def test_count_params(self):
|
||||
print('test count params')
|
||||
input_dim = 20
|
||||
nb_units = 10
|
||||
nb_classes = 2
|
||||
|
||||
n = input_dim * nb_units + nb_units
|
||||
n += nb_units * nb_units + nb_units
|
||||
n += nb_units * nb_classes + nb_classes
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(nb_units, input_shape=(input_dim,)))
|
||||
model.add(Dense(nb_units))
|
||||
model.add(Dense(nb_classes))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
self.assertEqual(n, model.count_params())
|
||||
|
||||
model.compile('sgd', 'binary_crossentropy')
|
||||
|
||||
self.assertEqual(n, model.count_params())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Test Sequential model')
|
||||
|
||||
@@ -0,0 +1,132 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
import theano
|
||||
from keras.utils.theano_utils import ndim_tensor
|
||||
from keras.layers.core import *
|
||||
from keras.layers.convolutional import *
|
||||
from keras.layers.recurrent import SimpleRNN
|
||||
|
||||
|
||||
def check_layer_output_shape(layer, input_data):
|
||||
ndim = len(input_data.shape)
|
||||
layer.input = ndim_tensor(ndim)
|
||||
layer.set_input_shape(input_data.shape[1:])
|
||||
expected_output_shape = layer.output_shape[1:]
|
||||
|
||||
function = theano.function([layer.input], [layer.get_output()])
|
||||
output = function(input_data)[0]
|
||||
assert output.shape[1:] == expected_output_shape
|
||||
|
||||
|
||||
class TestShapeInference(unittest.TestCase):
|
||||
# ########
|
||||
# # Core #
|
||||
# ########
|
||||
def test_Reshape(self):
|
||||
layer = Reshape(dims=(2, 3))
|
||||
input_data = np.random.random((2, 6))
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
def test_Permute(self):
|
||||
layer = Permute(dims=(1, 3, 2))
|
||||
input_data = np.random.random((2, 2, 4, 3))
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
def test_Flatten(self):
|
||||
layer = Flatten()
|
||||
input_data = np.random.random((2, 2, 3))
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
def test_RepeatVector(self):
|
||||
layer = RepeatVector(2)
|
||||
input_data = np.random.random((2, 2))
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
def test_Dense(self):
|
||||
layer = Dense(3)
|
||||
input_data = np.random.random((2, 2))
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
def test_TimeDistributedDense(self):
|
||||
layer = TimeDistributedDense(2)
|
||||
input_data = np.random.random((2, 2, 3))
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
#################
|
||||
# Convolutional #
|
||||
#################
|
||||
def test_Convolution1D(self):
|
||||
for border_mode in ['same', 'full', 'valid']:
|
||||
for filter_length in [2, 3]:
|
||||
for subsample_length in [1, 2]:
|
||||
if subsample_length > 1 and border_mode == 'same':
|
||||
continue
|
||||
for input_data_shape in [(2, 3, 2), (2, 4, 2)]:
|
||||
layer = Convolution1D(nb_filter=1, filter_length=filter_length,
|
||||
border_mode=border_mode, subsample_length=subsample_length)
|
||||
input_data = np.random.random(input_data_shape)
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
def test_Convolution2D(self):
|
||||
for border_mode in ['same', 'full', 'valid']:
|
||||
for nb_row, nb_col in [(2, 1), (3, 2)]:
|
||||
for subsample in [(1, 1), (2, 2)]:
|
||||
if (subsample[0] > 1 or subsample[1] > 1) and border_mode == 'same':
|
||||
continue
|
||||
for input_data_shape in [(2, 1, 3, 3), (2, 1, 4, 4)]:
|
||||
layer = Convolution2D(nb_filter=1, nb_row=nb_row, nb_col=nb_row,
|
||||
border_mode=border_mode, subsample=subsample)
|
||||
input_data = np.random.random(input_data_shape)
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
def test_MaxPooling1D(self):
|
||||
for ignore_border in [True, False]:
|
||||
for stride in [1, 2]:
|
||||
for pool_length in [1, 2]:
|
||||
for input_data_shape in [(2, 1, 3), (2, 1, 4)]:
|
||||
layer = MaxPooling1D(pool_length=pool_length, stride=stride, ignore_border=ignore_border)
|
||||
input_data = np.random.random(input_data_shape)
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
def test_MaxPooling2D(self):
|
||||
for ignore_border in [True, False]:
|
||||
for stride in [(1, 1), (2, 2)]:
|
||||
for pool_size in [(2, 2), (3, 3), (4, 4)]:
|
||||
for input_data_shape in [(2, 1, 3, 3), (2, 1, 4, 4), (2, 1, 5, 5), (2, 1, 6, 6)]:
|
||||
layer = MaxPooling2D(pool_size=pool_size, stride=stride, ignore_border=ignore_border)
|
||||
input_data = np.random.random(input_data_shape)
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
def test_UpSample1D(self):
|
||||
layer = UpSample1D(length=2)
|
||||
input_data = np.random.random((2, 2, 3))
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
def test_UpSample2D(self):
|
||||
layer = UpSample2D(size=(2, 2))
|
||||
input_data = np.random.random((2, 1, 2, 3))
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
def test_ZeroPadding1D(self):
|
||||
layer = ZeroPadding1D(1)
|
||||
input_data = np.random.random((2, 2, 1))
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
def test_ZeroPadding2D(self):
|
||||
layer = ZeroPadding2D((1, 2))
|
||||
input_data = np.random.random((2, 1, 2, 3))
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
# #############
|
||||
# # Recurrent #
|
||||
# #############
|
||||
def test_SimpleRNN(self):
|
||||
# all recurrent layers inherit output_shape
|
||||
# from the same base recurrent layer
|
||||
layer = SimpleRNN(2)
|
||||
input_data = np.random.random((2, 2, 3))
|
||||
check_layer_output_shape(layer, input_data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
+16
-15
@@ -11,7 +11,7 @@ from keras.utils.np_utils import to_categorical
|
||||
import unittest
|
||||
|
||||
|
||||
class TestRegularizers(unittest.TestCase):
|
||||
class TestTasks(unittest.TestCase):
|
||||
def test_vector_clf(self):
|
||||
nb_hidden = 10
|
||||
|
||||
@@ -27,9 +27,9 @@ class TestRegularizers(unittest.TestCase):
|
||||
y_test = to_categorical(y_test)
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(X_train.shape[-1], nb_hidden))
|
||||
model.add(Dense(nb_hidden, input_shape=(X_train.shape[-1],)))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(nb_hidden, y_train.shape[-1]))
|
||||
model.add(Dense(y_train.shape[-1]))
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2)
|
||||
@@ -47,16 +47,16 @@ class TestRegularizers(unittest.TestCase):
|
||||
print('y_test:', y_test.shape)
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(X_train.shape[-1], nb_hidden))
|
||||
model.add(Dense(nb_hidden, input_shape=(X_train.shape[-1],)))
|
||||
model.add(Activation('tanh'))
|
||||
model.add(Dense(nb_hidden, y_train.shape[-1]))
|
||||
model.add(Dense(y_train.shape[-1]))
|
||||
model.compile(loss='hinge', optimizer='adagrad')
|
||||
history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2)
|
||||
self.assertTrue(history.history['val_loss'][-1] < 0.9)
|
||||
|
||||
def test_temporal_clf(self):
|
||||
print('temporal classification data:')
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5,10),
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 5),
|
||||
classification=True, nb_class=2)
|
||||
print('X_train:', X_train.shape)
|
||||
print('X_test:', X_test.shape)
|
||||
@@ -67,7 +67,7 @@ class TestRegularizers(unittest.TestCase):
|
||||
y_test = to_categorical(y_test)
|
||||
|
||||
model = Sequential()
|
||||
model.add(GRU(X_train.shape[-1], y_train.shape[-1]))
|
||||
model.add(GRU(y_train.shape[-1], input_shape=(None, X_train.shape[-1])))
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adadelta')
|
||||
history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2)
|
||||
@@ -75,7 +75,7 @@ class TestRegularizers(unittest.TestCase):
|
||||
|
||||
def test_temporal_reg(self):
|
||||
print('temporal regression data:')
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5, 10), output_shape=(2,),
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 5), output_shape=(2,),
|
||||
classification=False)
|
||||
print('X_train:', X_train.shape)
|
||||
print('X_test:', X_test.shape)
|
||||
@@ -83,14 +83,14 @@ class TestRegularizers(unittest.TestCase):
|
||||
print('y_test:', y_test.shape)
|
||||
|
||||
model = Sequential()
|
||||
model.add(GRU(X_train.shape[-1], y_train.shape[-1]))
|
||||
model.add(GRU(y_train.shape[-1], input_shape=(None, X_train.shape[-1])))
|
||||
model.compile(loss='hinge', optimizer='adam')
|
||||
history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2)
|
||||
self.assertTrue(history.history['val_loss'][-1] < 0.8)
|
||||
|
||||
def test_seq_to_seq(self):
|
||||
print('sequence to sequence data:')
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5, 10), output_shape=(5, 10),
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 5), output_shape=(3, 5),
|
||||
classification=False)
|
||||
print('X_train:', X_train.shape)
|
||||
print('X_test:', X_test.shape)
|
||||
@@ -98,14 +98,14 @@ class TestRegularizers(unittest.TestCase):
|
||||
print('y_test:', y_test.shape)
|
||||
|
||||
model = Sequential()
|
||||
model.add(TimeDistributedDense(X_train.shape[-1], y_train.shape[-1]))
|
||||
model.add(TimeDistributedDense(y_train.shape[-1], input_shape=(None, X_train.shape[-1])))
|
||||
model.compile(loss='hinge', optimizer='rmsprop')
|
||||
history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2)
|
||||
self.assertTrue(history.history['val_loss'][-1] < 0.75)
|
||||
self.assertTrue(history.history['val_loss'][-1] < 0.8)
|
||||
|
||||
def test_img_clf(self):
|
||||
print('image classification data:')
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 32, 32),
|
||||
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 8, 8),
|
||||
classification=True, nb_class=2)
|
||||
print('X_train:', X_train.shape)
|
||||
print('X_test:', X_test.shape)
|
||||
@@ -116,13 +116,14 @@ class TestRegularizers(unittest.TestCase):
|
||||
y_test = to_categorical(y_test)
|
||||
|
||||
model = Sequential()
|
||||
model.add(Convolution2D(32, 3, 32, 32))
|
||||
model.add(Convolution2D(8, 8, 8, input_shape=(3, 8, 8)))
|
||||
model.add(Activation('sigmoid'))
|
||||
model.add(Flatten())
|
||||
model.add(Dense(32, y_test.shape[-1]))
|
||||
model.add(Dense(y_test.shape[-1]))
|
||||
model.add(Activation('softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='sgd')
|
||||
history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2)
|
||||
print(history.history['val_acc'][-1])
|
||||
self.assertTrue(history.history['val_acc'][-1] > 0.9)
|
||||
|
||||
|
||||
|
||||
@@ -133,14 +133,14 @@ class DrawActivations(Callback):
|
||||
# model.add(Activation('softmax'))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Convolution2D(32, 1, 3, 3, border_mode='full'))
|
||||
model.add(Convolution2D(32, 1, 3, 3, border_mode='full'))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
model.add(Convolution2D(64, 32, 3, 3, border_mode='full'))
|
||||
model.add(Convolution2D(64, 32, 3, 3, border_mode='full'))
|
||||
model.add(Activation('relu'))
|
||||
model.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
model.add(Dropout(0.25))
|
||||
|
||||
model.add(Flatten())
|
||||
@@ -215,12 +215,22 @@ print("Test model checkpointer without validation data")
|
||||
import warnings
|
||||
warnings.filterwarnings('error')
|
||||
try:
|
||||
passed = False
|
||||
# this should issue a warning
|
||||
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, callbacks =[checkpointer])
|
||||
except:
|
||||
print("Tests passed")
|
||||
import sys
|
||||
sys.exit(0)
|
||||
passed = True
|
||||
if not passed:
|
||||
raise Exception("Modelcheckpoint tests did not pass")
|
||||
|
||||
raise Exception("Modelcheckpoint tests did not pass")
|
||||
print("Test model checkpointer with pattern")
|
||||
filename = "model_weights.{epoch:04d}.hdf5"
|
||||
f = os.path.join(path, filename)
|
||||
nb_epoch = 3
|
||||
checkpointer = cbks.ModelCheckpoint(f)
|
||||
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, callbacks=[checkpointer])
|
||||
for i in range(nb_epoch):
|
||||
if not os.path.isfile(f.format(epoch=i)):
|
||||
raise Exception("Model weights were not saved separately for each epoch")
|
||||
|
||||
print("Tests passed")
|
||||
|
||||
@@ -8,7 +8,7 @@ import keras.utils.layer_utils as layer_utils
|
||||
print('-- Sequential model')
|
||||
left = Sequential()
|
||||
left.add(Convolution2D(32, 1, 3, 3, border_mode='valid'))
|
||||
left.add(MaxPooling2D(poolsize=(2, 2)))
|
||||
left.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
left.add(Flatten())
|
||||
left.add(Dense(32 * 13 * 13, 50))
|
||||
left.add(Activation('relu'))
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário