From eeb55e48a656bda014955ee504b365e18c77c6b3 Mon Sep 17 00:00:00 2001
From: Soboleva Natalia <n.sob55@gmail.com>
Date: Wed, 23 May 2018 14:46:53 +0300
Subject: [PATCH] second version of CNN AE added

---
 CNN2_AE.ipynb | 719 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 719 insertions(+)
 create mode 100644 CNN2_AE.ipynb

diff --git a/CNN2_AE.ipynb b/CNN2_AE.ipynb
new file mode 100644
index 0000000..d56ebca
--- /dev/null
+++ b/CNN2_AE.ipynb
@@ -0,0 +1,719 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/.local/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
+      "  from ._conv import register_converters as _register_converters\n",
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import importlib \n",
+    "import keras\n",
+    "from keras.models import Sequential, model_from_json\n",
+    "from keras.layers import Conv2DTranspose, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, UpSampling2D, Reshape\n",
+    "\n",
+    "from keras.layers.normalization import BatchNormalization\n",
+    "\n",
+    "import pickle\n",
+    "import os\n",
+    "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "import h5py    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['/job:localhost/replica:0/task:0/device:GPU:0']"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from keras import backend as K\n",
+    "K.tensorflow_backend._get_available_gpus()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting keras\n",
+      "  Using cached https://files.pythonhosted.org/packages/54/e8/eaff7a09349ae9bd40d3ebaf028b49f5e2392c771f294910f75bb608b241/Keras-2.1.6-py2.py3-none-any.whl\n",
+      "Collecting pyyaml (from keras)\n",
+      "Collecting six>=1.9.0 (from keras)\n",
+      "  Using cached https://files.pythonhosted.org/packages/67/4b/141a581104b1f6397bfa78ac9d43d8ad29a7ca43ea90a2d863fe3056e86a/six-1.11.0-py2.py3-none-any.whl\n",
+      "Collecting scipy>=0.14 (from keras)\n",
+      "  Using cached https://files.pythonhosted.org/packages/2a/f3/de9c1bd16311982711209edaa8c6caa962db30ebb6a8cc6f1dcd2d3ef616/scipy-1.1.0-cp27-cp27mu-manylinux1_x86_64.whl\n",
+      "Collecting h5py (from keras)\n",
+      "  Using cached https://files.pythonhosted.org/packages/24/9e/d68bd01058e748bd5e7c3c6368d1703b4cd882b669e5d993a0237c75af5a/h5py-2.7.1-cp27-cp27mu-manylinux1_x86_64.whl\n",
+      "Collecting numpy>=1.9.1 (from keras)\n",
+      "  Using cached https://files.pythonhosted.org/packages/c0/e7/08f059a00367fd613e4f2875a16c70b6237268a1d6d166c6d36acada8301/numpy-1.14.3-cp27-cp27mu-manylinux1_x86_64.whl\n",
+      "Installing collected packages: pyyaml, six, numpy, scipy, h5py, keras\n",
+      "Successfully installed h5py-2.7.1 keras-2.1.6 numpy-1.14.3 pyyaml-3.12 scipy-1.1.0 six-1.11.0\n",
+      "\u001b[33mYou are using pip version 8.1.1, however version 10.0.1 is available.\n",
+      "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install keras"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_h5_file(file_name, scaler = None, preprocess = False):\n",
+    "    h5_file = h5py.File(train_eeg_dir + file_name, 'r')\n",
+    "    a_group_key = list(h5_file.keys())[0]\n",
+    "    eeg_data = np.array(h5_file[a_group_key]).T\n",
+    "    if preprocess:\n",
+    "        eeg_data = scaler.transform(eeg_data)\n",
+    "    return eeg_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_scaler(scaler, train_eeg_names, log = False):\n",
+    "    i = 0\n",
+    "    for eeg_name in train_eeg_names:\n",
+    "        if log:\n",
+    "            print(\"{} from {}\".format(i, len(train_eeg_names)))\n",
+    "            print(\"reading:{}\".format(eeg_name))\n",
+    "        data = read_h5_file(eeg_name)\n",
+    "        i = i+1\n",
+    "        scaler.fit(data)\n",
+    "        if log:\n",
+    "            print(\"trained on {}\".format(eeg_name))\n",
+    "            \n",
+    "def save_scaler(path,scaler):\n",
+    "    pickle.dump(scaler, open(path, 'wb'))\n",
+    "def load_scaler(path):\n",
+    "    scaler = pickle.load(open(path, 'rb'))\n",
+    "    return scaler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_eeg_dir = \"./data/train/\"\n",
+    "trained_scaler_path = None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "('Number of EEG overall:', 32)\n"
+     ]
+    }
+   ],
+   "source": [
+    "train_eeg_dir = \"./data/train/\"\n",
+    "all_train_eeg_names = [x for x in os.listdir(train_eeg_dir) \n",
+    "                 if x[-3:] == \".h5\"]\n",
+    "eeg_num = len(all_train_eeg_names)\n",
+    "print(\"Number of EEG overall:\", eeg_num)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "('Params before training ', {'copy': True, 'with_mean': True, 'with_std': True})\n",
+      "0 from 32\n",
+      "reading:2003_ivanova_post_eeg_processed.h5\n",
+      "trained on 2003_ivanova_post_eeg_processed.h5\n",
+      "1 from 32\n",
+      "reading:2403_kutuzova_posteeg_processed.h5\n",
+      "trained on 2403_kutuzova_posteeg_processed.h5\n",
+      "2 from 32\n",
+      "reading:miloslavov_22_05_pre_eeg_processed.h5\n",
+      "trained on miloslavov_22_05_pre_eeg_processed.h5\n",
+      "3 from 32\n",
+      "reading:2505_shirokova_post_eeg_processed.h5\n",
+      "trained on 2505_shirokova_post_eeg_processed.h5\n",
+      "4 from 32\n",
+      "reading:2505_shirokova_processed.h5\n",
+      "trained on 2505_shirokova_processed.h5\n",
+      "5 from 32\n",
+      "reading:zavrin_15021500_eyesclosed_post_eeg_processed.h5\n",
+      "trained on zavrin_15021500_eyesclosed_post_eeg_processed.h5\n",
+      "6 from 32\n",
+      "reading:gorin_rest_eeg_post_31011200_processed.h5\n",
+      "trained on gorin_rest_eeg_post_31011200_processed.h5\n",
+      "7 from 32\n",
+      "reading:zavrin_eyes_closed_eeg_15021500_processed.h5\n",
+      "trained on zavrin_eyes_closed_eeg_15021500_processed.h5\n",
+      "8 from 32\n",
+      "reading:gorin_310117_rest_eeg_processed.h5\n",
+      "trained on gorin_310117_rest_eeg_processed.h5\n",
+      "9 from 32\n",
+      "reading:gorbacheva_03021300_rest_eeg_processed.h5\n",
+      "trained on gorbacheva_03021300_rest_eeg_processed.h5\n",
+      "10 from 32\n",
+      "reading:300120171600_dagaev_rest_eeg_processed.h5\n",
+      "trained on 300120171600_dagaev_rest_eeg_processed.h5\n",
+      "11 from 32\n",
+      "reading:dagaev_post_rest_eeg_30011600_processed.h5\n",
+      "trained on dagaev_post_rest_eeg_30011600_processed.h5\n",
+      "12 from 32\n",
+      "reading:2403_kutuzova_pre_eeg_processed.h5\n",
+      "trained on 2403_kutuzova_pre_eeg_processed.h5\n",
+      "13 from 32\n",
+      "reading:zavrin_open_eyes_eeg_15021500_processed.h5\n",
+      "trained on zavrin_open_eyes_eeg_15021500_processed.h5\n",
+      "14 from 32\n",
+      "reading:08021400_post_eeg_shuhova_processed.h5\n",
+      "trained on 08021400_post_eeg_shuhova_processed.h5\n",
+      "15 from 32\n",
+      "reading:3103_petuhova_posteeg_processed.h5\n",
+      "trained on 3103_petuhova_posteeg_processed.h5\n",
+      "16 from 32\n",
+      "reading:2103_kozunova_post_eeg_processed.h5\n",
+      "trained on 2103_kozunova_post_eeg_processed.h5\n",
+      "17 from 32\n",
+      "reading:tsoy_pre_eeg_2504_processed.h5\n",
+      "trained on tsoy_pre_eeg_2504_processed.h5\n",
+      "18 from 32\n",
+      "reading:pre_egg_petuhova3103_processed.h5\n",
+      "trained on pre_egg_petuhova3103_processed.h5\n",
+      "19 from 32\n",
+      "reading:2704_zagirova_post_eeg_processed.h5\n",
+      "trained on 2704_zagirova_post_eeg_processed.h5\n",
+      "20 from 32\n",
+      "reading:bagaeva_post_eeg_13031500_processed.h5\n",
+      "trained on bagaeva_post_eeg_13031500_processed.h5\n",
+      "21 from 32\n",
+      "reading:2103_glebko_posteeg_processed.h5\n",
+      "trained on 2103_glebko_posteeg_processed.h5\n",
+      "22 from 32\n",
+      "reading:glebko_2103_pre_eeg_processed.h5\n",
+      "trained on glebko_2103_pre_eeg_processed.h5\n",
+      "23 from 32\n",
+      "reading:bagaeva_13021500_rest_eeg_processed.h5\n",
+      "trained on bagaeva_13021500_rest_eeg_processed.h5\n",
+      "24 from 32\n",
+      "reading:2205_miloslavov_post_eeg_processed.h5\n",
+      "trained on 2205_miloslavov_post_eeg_processed.h5\n",
+      "25 from 32\n",
+      "reading:post_eeg_tsoy_2504_processed.h5\n",
+      "trained on post_eeg_tsoy_2504_processed.h5\n",
+      "26 from 32\n",
+      "reading:shuhova_08022017_rest_eeg_processed.h5\n",
+      "trained on shuhova_08022017_rest_eeg_processed.h5\n",
+      "27 from 32\n",
+      "reading:200317_ivanova_pre_eeg_processed.h5\n",
+      "trained on 200317_ivanova_pre_eeg_processed.h5\n",
+      "28 from 32\n",
+      "reading:kozunova_pre_eeg_2103_processed.h5\n",
+      "trained on kozunova_pre_eeg_2103_processed.h5\n",
+      "29 from 32\n",
+      "reading:zavrib_post_eeg_eyesopen15021500_processed.h5\n",
+      "trained on zavrib_post_eeg_eyesopen15021500_processed.h5\n",
+      "30 from 32\n",
+      "reading:zagirova_pre_eeg_2704_processed.h5\n",
+      "trained on zagirova_pre_eeg_2704_processed.h5\n",
+      "31 from 32\n",
+      "reading:gorbacheva_03_02_2017_post_eeg_processed.h5\n",
+      "trained on gorbacheva_03_02_2017_post_eeg_processed.h5\n",
+      "('Params after training ', {'copy': True, 'with_mean': True, 'with_std': True})\n"
+     ]
+    }
+   ],
+   "source": [
+    "if trained_scaler_path:\n",
+    "    scaler = load_scaler(trained_scaler_path)\n",
+    "else:\n",
+    "    scaler = StandardScaler()\n",
+    "    print(\"Params before training \", scaler.get_params())\n",
+    "    train_scaler(scaler, all_train_eeg_names, log = True)\n",
+    "    print(\"Params after training \", scaler.get_params())\n",
+    "    save_scaler(\"./StandardScaler.p\", scaler)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "conv2d_4 (Conv2D)            (None, 6, 54, 5)          130       \n",
+      "_________________________________________________________________\n",
+      "dropout_4 (Dropout)          (None, 6, 54, 5)          0         \n",
+      "_________________________________________________________________\n",
+      "max_pooling2d_4 (MaxPooling2 (None, 3, 27, 5)          0         \n",
+      "_________________________________________________________________\n",
+      "flatten_4 (Flatten)          (None, 405)               0         \n",
+      "_________________________________________________________________\n",
+      "dense_8 (Dense)              (None, 50)                20300     \n",
+      "=================================================================\n",
+      "Total params: 20,430\n",
+      "Trainable params: 20,430\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n",
+      "(None, 50)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py:5: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(activation=\"relu\", input_shape=(10, 58, 1..., padding=\"valid\", filters=5, kernel_size=(5, 5))`\n",
+      "  \"\"\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "window_size = 10\n",
+    "encoding_dim = 50\n",
+    "\n",
+    "cnnencoder = Sequential((\n",
+    "    Conv2D(nb_filter=5, kernel_size=(5, 5), activation='relu', padding='valid', input_shape=(window_size, 58, 1)),\n",
+    "    Dropout(0.6),\n",
+    "    MaxPooling2D(),\n",
+    "    \n",
+    "    Flatten(),\n",
+    "    Dense(encoding_dim, activation='relu'),\n",
+    "))\n",
+    "cnnencoder.summary()\n",
+    "print(cnnencoder.output_shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "dense_9 (Dense)              (None, 405)               20655     \n",
+      "_________________________________________________________________\n",
+      "reshape_3 (Reshape)          (None, 3, 27, 5)          0         \n",
+      "_________________________________________________________________\n",
+      "up_sampling2d_3 (UpSampling2 (None, 6, 54, 5)          0         \n",
+      "_________________________________________________________________\n",
+      "conv2d_transpose_3 (Conv2DTr (None, 10, 58, 5)         630       \n",
+      "_________________________________________________________________\n",
+      "dense_10 (Dense)             (None, 10, 58, 1)         6         \n",
+      "=================================================================\n",
+      "Total params: 21,291\n",
+      "Trainable params: 21,291\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n",
+      "(None, 10, 58, 1)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py:5: UserWarning: Update your `Conv2DTranspose` call to the Keras 2 API: `Conv2DTranspose(padding=\"valid\", activation=\"relu\", filters=5, kernel_size=(5, 5))`\n",
+      "  \"\"\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "cnndecoder = Sequential((\n",
+    "    Dense(405, activation='relu', input_shape=(encoding_dim,)),\n",
+    "    Reshape((-1, 27, 5)),\n",
+    "    UpSampling2D(),\n",
+    "    Conv2DTranspose(nb_filter=5, kernel_size=(5, 5), activation='relu',  padding='valid'),\n",
+    "    Dense(1, activation='relu')\n",
+    "))\n",
+    "cnndecoder.summary()\n",
+    "print(cnndecoder.output_shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "input_1 (InputLayer)         (None, 10, 58, 1)         0         \n",
+      "_________________________________________________________________\n",
+      "sequential_6 (Sequential)    (None, 50)                20430     \n",
+      "_________________________________________________________________\n",
+      "sequential_7 (Sequential)    (None, 10, 58, 1)         21291     \n",
+      "=================================================================\n",
+      "Total params: 41,721\n",
+      "Trainable params: 41,721\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "from keras.models import Model\n",
+    "from keras.layers import Input\n",
+    "\n",
+    "input_ = Input(shape=(window_size, 58, 1))\n",
+    "\n",
+    "autoencoder = Model(input_, cnndecoder(cnnencoder(input_)), name=\"autoencoder\")\n",
+    "autoencoder.compile(loss='mse', optimizer='adam') # .compile(optimizer='adadelta', loss='binary_crossentropy')\n",
+    "\n",
+    "autoencoder.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "b1, b2, b3 = 'shuhova_08022017_rest_eeg_processed.h5', 'zavrib_post_eeg_eyesopen15021500_processed.h5', 'zavrin_15021500_eyesclosed_post_eeg_processed.h5'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_train_eeg_names = np.array(all_train_eeg_names)\n",
+    "all_train_eeg_names = all_train_eeg_names[(all_train_eeg_names != b1) & (all_train_eeg_names != b2) & (all_train_eeg_names != b3)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "('test_eeg_name is ', 'tsoy_pre_eeg_2504_processed.h5')\n"
+     ]
+    }
+   ],
+   "source": [
+    "overall_epoch_num = 10\n",
+    "file_epoch_num = 1\n",
+    "history_path = \"train_hist\"\n",
+    "\n",
+    "test_eeg_name = np.random.choice(all_train_eeg_names)\n",
+    "train_eeg_names = np.array(all_train_eeg_names)\n",
+    "print(\"test_eeg_name is \", test_eeg_name)\n",
+    "test_data = read_h5_file(test_eeg_name, scaler, True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learn_file_length = 300000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import threading\n",
+    "class threadsafe_iter:\n",
+    "    def __init__(self, it):\n",
+    "        self.it = it\n",
+    "        self.lock = threading.Lock()\n",
+    "    def __iter__(self):\n",
+    "        return self\n",
+    "    def __next__(self):\n",
+    "        with self.lock:\n",
+    "            return next(self.it)\n",
+    "\n",
+    "def threadsafe_generator(f):\n",
+    "    def g(*a, **kw):\n",
+    "        return threadsafe_iter(f(*a, **kw))\n",
+    "    return g"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_length = 10\n",
+    "def generate_batch():\n",
+    "    while True:\n",
+    "        cur_files = [[] for _ in range(len(train_eeg_names))]\n",
+    "        batches = [[] for _ in range(len(train_eeg_names))]\n",
+    "        files_count = len(train_eeg_names)\n",
+    "        while files_count > 0:\n",
+    "            file_ind = np.random.choice(np.arange(len(train_eeg_names)))\n",
+    "            if len(cur_files[file_ind]) == 1 and cur_files[file_ind] == -1: continue\n",
+    "            elif len(cur_files[file_ind]) < 1:\n",
+    "                raw = read_h5_file(train_eeg_names[file_ind], scaler, True)\n",
+    "                cur_files[file_ind] = raw\n",
+    "                #print(raw.shape[0] // batch_length)\n",
+    "                batches[file_ind] = np.arange(raw.shape[0] // batch_length)\n",
+    "                \n",
+    "            begin = np.random.choice(np.arange(len(batches[file_ind])))\n",
+    "            \n",
+    "            data = cur_files[file_ind][begin:begin+batch_length, :]\n",
+    "            yield data.reshape(-1, window_size, 58, 1), data.reshape(-1, window_size, 58, 1) # add noise later\n",
+    "            \n",
+    "            batches[file_ind] = np.delete(batches[file_ind], begin)\n",
+    "            if len(batches[file_ind]) == 0:\n",
+    "                cur_files[file_ind] = -1\n",
+    "                files_count -= 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py:5: UserWarning: The semantics of the Keras 2 argument `steps_per_epoch` is not the same as the Keras 1 argument `samples_per_epoch`. `steps_per_epoch` is the number of batches to draw from the generator at each epoch. Basically steps_per_epoch = samples_per_epoch/batch_size. Similarly `nb_val_samples`->`validation_steps` and `val_samples`->`steps` arguments have changed. Update your method calls accordingly.\n",
+      "  \"\"\"\n",
+      "/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py:5: UserWarning: Update your `fit_generator` call to the Keras 2 API: `fit_generator(<generator..., verbose=1, validation_data=(array([[[..., steps_per_epoch=30000, epochs=10)`\n",
+      "  \"\"\"\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10\n",
+      "30000/30000 [==============================] - 311s 10ms/step - loss: 0.9182 - val_loss: 0.6719\n",
+      "Epoch 2/10\n",
+      "30000/30000 [==============================] - 248s 8ms/step - loss: 0.8781 - val_loss: 0.6621\n",
+      "Epoch 3/10\n",
+      "30000/30000 [==============================] - 250s 8ms/step - loss: 0.8464 - val_loss: 0.7231\n",
+      "Epoch 4/10\n",
+      "30000/30000 [==============================] - 249s 8ms/step - loss: 0.8559 - val_loss: 0.6404\n",
+      "Epoch 5/10\n",
+      "30000/30000 [==============================] - 249s 8ms/step - loss: 0.8484 - val_loss: 0.6289\n",
+      "Epoch 6/10\n",
+      "30000/30000 [==============================] - 249s 8ms/step - loss: 0.8080 - val_loss: 0.6601\n",
+      "Epoch 7/10\n",
+      "30000/30000 [==============================] - 250s 8ms/step - loss: 0.8400 - val_loss: 0.6365\n",
+      "Epoch 8/10\n",
+      "30000/30000 [==============================] - 250s 8ms/step - loss: 0.8306 - val_loss: 0.6557\n",
+      "Epoch 9/10\n",
+      "29997/30000 [============================>.] - ETA: 0s - loss: 0.8902"
+     ]
+    }
+   ],
+   "source": [
+    "history = autoencoder.fit_generator(generate_batch(), \n",
+    "                                    samples_per_epoch=30000, \n",
+    "                                    verbose=1,\n",
+    "                                    nb_epoch=10,\n",
+    "                                    validation_data=(test_data.reshape(-1, window_size, 58, 1), test_data.reshape(-1, window_size, 58, 1))\n",
+    "                                   )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 137,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "history_path = \"train_hist_51.txt\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 138,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cnnencoder.save('CNN_encoder50.p')\n",
+    "autoencoder.save('CNN_autoencoder50.p')\n",
+    "with open(history_path, 'wb') as file:\n",
+    "    pickle.dump(history.history, file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 139,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "epoch: 0, file: 2003_ivanova_post_eeg_processed.h5\n",
+      "Train on 30000 samples, validate on 60495 samples\n",
+      "Epoch 1/1\n",
+      "30000/30000 [==============================] - 35s 1ms/step - loss: 0.9979 - val_loss: 0.5644\n",
+      "epoch: 0, file: 2403_kutuzova_posteeg_processed.h5\n",
+      "Train on 30000 samples, validate on 60495 samples\n",
+      "Epoch 1/1\n",
+      "23240/30000 [======================>.......] - ETA: 4s - loss: 1.0314"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m----------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                    Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-139-77118eefc12f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      9\u001b[0m                                   \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfile_epoch_num\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m                                   \u001b[0mbatch_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m                                   validation_data=(test_data.reshape(-1, window_size, 58, 1), test_data.reshape(-1, window_size, 58, 1)))\n\u001b[0m\u001b[1;32m     12\u001b[0m \u001b[0mcnnencoder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'CNN_encoder3.p'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0mautoencoder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'CNN_autoencoder3.p'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/home/ubuntu/.local/lib/python2.7/site-packages/keras/engine/training.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)\u001b[0m\n\u001b[1;32m   1703\u001b[0m                               \u001b[0minitial_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minitial_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1704\u001b[0m                               \u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1705\u001b[0;31m                               validation_steps=validation_steps)\n\u001b[0m\u001b[1;32m   1706\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1707\u001b[0m     def evaluate(self, x=None, y=None,\n",
+      "\u001b[0;32m/home/ubuntu/.local/lib/python2.7/site-packages/keras/engine/training.pyc\u001b[0m in \u001b[0;36m_fit_loop\u001b[0;34m(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)\u001b[0m\n\u001b[1;32m   1234\u001b[0m                         \u001b[0mins_batch\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mins_batch\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1235\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1236\u001b[0;31m                     \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1237\u001b[0m                     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mouts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1238\u001b[0m                         \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mouts\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/home/ubuntu/.local/lib/python2.7/site-packages/keras/backend/tensorflow_backend.pyc\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m   2480\u001b[0m         \u001b[0msession\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_session\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2481\u001b[0m         updated = session.run(fetches=fetches, feed_dict=feed_dict,\n\u001b[0;32m-> 2482\u001b[0;31m                               **self.session_kwargs)\n\u001b[0m\u001b[1;32m   2483\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mupdated\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2484\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m    887\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    888\u001b[0m       result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 889\u001b[0;31m                          run_metadata_ptr)\n\u001b[0m\u001b[1;32m    890\u001b[0m       \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    891\u001b[0m         \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m   1118\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mfinal_fetches\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mfinal_targets\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mhandle\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mfeed_dict_tensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1119\u001b[0m       results = self._do_run(handle, final_targets, final_fetches,\n\u001b[0;32m-> 1120\u001b[0;31m                              feed_dict_tensor, options, run_metadata)\n\u001b[0m\u001b[1;32m   1121\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1122\u001b[0m       \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_do_run\u001b[0;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m   1315\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mhandle\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1316\u001b[0m       return self._do_call(_run_fn, self._session, feeds, fetches, targets,\n\u001b[0;32m-> 1317\u001b[0;31m                            options, run_metadata)\n\u001b[0m\u001b[1;32m   1318\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1319\u001b[0m       \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_prun_fn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeeds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetches\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m   1321\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1322\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1323\u001b[0;31m       \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1324\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1325\u001b[0m       \u001b[0mmessage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m   1300\u001b[0m           return tf_session.TF_Run(session, options,\n\u001b[1;32m   1301\u001b[0m                                    \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1302\u001b[0;31m                                    status, run_metadata)\n\u001b[0m\u001b[1;32m   1303\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1304\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "for epoch in range(overall_epoch_num//file_epoch_num):\n",
+    "    for name in train_eeg_names:\n",
+    "        train_data = read_h5_file(name, scaler, True)\n",
+    "        if len(train_data) > learn_file_length:\n",
+    "            train_data = train_data[:learn_file_length]\n",
+    "        print(\"epoch: {}, file: {}\".format(epoch, name))\n",
+    "        history = autoencoder.fit(train_data.reshape(-1, window_size, 58, 1), train_data.reshape(-1, window_size, 58, 1), \n",
+    "                                  verbose=1, \n",
+    "                                  epochs=file_epoch_num,\n",
+    "                                  batch_size = 10,\n",
+    "                                  validation_data=(test_data.reshape(-1, window_size, 58, 1), test_data.reshape(-1, window_size, 58, 1)))\n",
+    "cnnencoder.save('CNN_encoder3.p')\n",
+    "autoencoder.save('CNN_autoencoder3.p')\n",
+    "with open(history_path, 'wb') as file:\n",
+    "    pickle.dump(history.history, file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_data.reshape(-1, 60, 58, 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(604950, 58)"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_data.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "'shuhova_08022017_rest_eeg_processed.h5', 'zavrib_post_eeg_eyesopen15021500_processed.h5', "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}