diff --git a/Preprocess_Deap.ipynb b/Preprocess_Deap.ipynb index 12bd501..2fcae81 100644 --- a/Preprocess_Deap.ipynb +++ b/Preprocess_Deap.ipynb @@ -29,14 +29,24 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", - "import pyeeg as pe\n", + "#import pyeeg as pe\n", "import pickle as pickle\n", - "import pandas as pd" + "import pandas as pd\n", + "import math\n", + "\n", + "from sklearn import svm\n", + "from sklearn.preprocessing import normalize\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.ensemble import AdaBoostRegressor\n", + "\n", + "import os\n", + "#import tensorflow as tf\n", + "import time" ] }, { @@ -82,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ @@ -124,6 +134,38 @@ " meta = np.array(meta)\n", " np.save('out\\s' + sub, meta, allow_pickle=True, fix_imports=True)\n", "\n", + "def testing (M, L, model):\n", + " '''\n", + " arguments: M: testing dataset\n", + " L: testing dataset label\n", + " model: scikit-learn model\n", + "\n", + " return: void\n", + " '''\n", + " output = model.predict(M[0:78080:32])\n", + " label = L[0:78080:32]\n", + "\n", + " k = 0\n", + " l = 0\n", + "\n", + " for i in range(len(label)):\n", + " k = k + (output[i] - label[i])*(output[i] - label[i]) #square difference \n", + "\n", + " #a good guess\n", + " if (output[i] > 5 and label[i] > 5):\n", + " l = l + 1\n", + " elif (output[i] < 5 and label[i] <5):\n", + " l = l + 1\n", + "\n", + " print (\"l2 error:\", k/len(label), \"classification accuracy:\", l / len(label),l, len(label))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "for subjects in subjectList:\n", " FFT_Processing (subjects, channel, band, window_size, step_size, sample_rate)" ] @@ -133,7 +175,7 @@ "metadata": {}, "source": [ "## 3.Segment of preprocessed data\n", - "* training dataset: 75.5%\n", + "* training dataset: 75 %\n", "* validation dataset: 12.5%\n", "* testing dataset: 12.5%\n", "\n", @@ -204,12 +246,453 @@ "print(\"validation dataset:\", np.array(data_validation).shape, np.array(label_validation).shape)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4.Regression\n", + "### 0. Loading Training and Testing dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "with open('out\\data_training.npy', 'rb') as fileTrain:\n", + " X = np.load(fileTrain)\n", + " \n", + "with open('out\\label_training.npy', 'rb') as fileTrainL:\n", + " Y = np.load(fileTrainL)\n", + " \n", + "X = normalize(X)\n", + "Z = np.ravel(Y[:, [1]])\n", + "\n", + "Arousal_Train = np.ravel(Y[:, [0]])\n", + "Valence_Train = np.ravel(Y[:, [1]])\n", + "Domain_Train = np.ravel(Y[:, [2]])\n", + "Like_Train = np.ravel(Y[:, [3]])\n", + "\n", + "\n", + "\n", + "with open('out\\data_validation.npy', 'rb') as fileTrain:\n", + " M = np.load(fileTrain)\n", + " \n", + "with open('out\\label_validation.npy', 'rb') as fileTrainL:\n", + " N = np.load(fileTrainL)\n", + "\n", + "M = normalize(M)\n", + "L = np.ravel(N[:, [1]])\n", + "\n", + "Arousal_Test = np.ravel(N[:, [0]])\n", + "Valence_Test = np.ravel(N[:, [1]])\n", + "Domain_Test = np.ravel(N[:, [2]])\n", + "Like_Test = np.ravel(N[:, [3]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "### 1. Support Vector Regression\n", + "* default setting, l1 error: 1.621761042477756 classification error: 0.6057377049180328 1478 2440" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',\n", + " kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clf = svm.SVR()\n", + "clf.fit(X[0:468480:32], Z[0:468480:32]) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Random Forest Regression\n", + "* n_estimators = 10, sample rate = 1/32, l1 error: 1.137919672131145 classification accuracy: 0.7774590163934426 1897 2440\n", + "* n_estimators = 100, sample rate = 1/32, l1 error: 1.1029040163934432 classification accuracy: 0.8147540983606557 1988 2440\n", + "* n_estimators = 100, min_samples_leaf=10, sample rate = 1/32, l1 error: 1.274458098574928 classification accuracy: 0.7622950819672131 1860 2440\n", + "* n_estimators = 100, min_samples_leaf=50, sample rate = 1/32, l1 error: 1.4575897309409926 classification accuracy: 0.6823770491803278 1665 2440\n", + "\n", + "* n_estimators = 250, sample rate = 1/32, l1 error: 1.0905590819672137 classification accuracy: 0.830327868852459 2026 2440\n", + "* n_estimators = 750, sample rate = 1/32, l1 error: 1.0953162021857932 classification accuracy: 0.8340163934426229 2035 2440\n", + "* n_estimators = 750, sample rate = 1/8, l1 error: l1 error: 1.066982950819674 classification accuracy: 0.8217213114754098 2005 2440\n", + "* __n_estimators = 512, sample rate = 1/32, l1 error: 1.092375304175206 classification accuracy: 0.8364754098360656 2041 2440\n", + "__\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "l2 error: 1.876775658972537 classification accuracy: 0.8290983606557377 2023 2440\n" + ] + } + ], + "source": [ + "Val_R = RandomForestRegressor(n_estimators=512, n_jobs=6)\n", + "Val_R.fit(X[0:468480:32], Valence_Train[0:468480:32])\n", + "testing (M, Valence_Test, Val_R)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "l2 error: 2.0764509040715233 classification accuracy: 0.8266393442622951 2017 2440\n" + ] + } + ], + "source": [ + "Aro_R = RandomForestRegressor(n_estimators=512, n_jobs=6)\n", + "Aro_R.fit(X[0:468480:32], Arousal_Train[0:468480:32])\n", + "testing (M, Arousal_Test, Aro_R)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "l2 error: 1.813647083229937 classification accuracy: 0.8184426229508197 1997 2440\n" + ] + } + ], + "source": [ + "Dom_R = RandomForestRegressor(n_estimators=512, n_jobs=6)\n", + "Dom_R.fit(X[0:468480:32], Domain_Train[0:468480:32])\n", + "testing (M, Domain_Test, Dom_R)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "l2 error: 2.489005384276336 classification accuracy: 0.8512295081967213 2077 2440\n" + ] + } + ], + "source": [ + "Lik_R = RandomForestRegressor(n_estimators=512, n_jobs=6)\n", + "Lik_R.fit(X[0:468480:32], Like_Train[0:468480:32])\n", + "testing (M, Like_Test, Lik_R)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. AdaBoost Regression\n", + "* n = 50, lr = 1.0: l2 error: 3.8454054839726695 classification accuracy: 0.6147540983606558 1500 2440\n", + "* n = 50, lr = 1.0, square: l2 error: 4.015289218608164 classification accuracy: 0.5913934426229508 1443 2440\n", + "* n = 500, lr = 1.0: l2 error: 3.8861651269012594 classification accuracy: 0.6155737704918033 1502 2440\n", + "*\n", + "*" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AdaBoostRegressor(base_estimator=None, learning_rate=0.01, loss='linear',\n", + " n_estimators=5000, random_state=None)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clf = AdaBoostRegressor(n_estimators=5000, learning_rate=0.01)\n", + "clf.fit(X[0:468480:32], Z[0:468480:32])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Calculating accuracy and loss" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "l2 error: 1.8832017200301692 classification accuracy: 0.8348360655737705 2037 2440\n" + ] + } + ], + "source": [ + "output = Val_R.predict(M[0:78080:32])\n", + "label = L[0:78080:32]\n", + "\n", + "k = 0\n", + "l = 0\n", + "\n", + "for i in range(len(label)):\n", + " k = k + (output[i] - label[i])*(output[i] - label[i]) #square difference \n", + " \n", + " #a good guess\n", + " if (output[i] > 5 and label[i] > 5):\n", + " l = l + 1\n", + " elif (output[i] < 5 and label[i] <5):\n", + " l = l + 1\n", + "\n", + "print (\"l2 error:\", k/len(label), \"classification accuracy:\", l / len(label),l, len(label))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. ANN\n", + "* 500 epoch 0.005 128 - 256 - 256 - 128 loss = 3.1\n", + "* 3000 epoch 0.0001 256-512-512-256 Epoch: 3196 - Training Cost: 1.8372873067855835 Testing Cost: 2.231332540512085\n" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Pull out columns for X (data to train with) and Y (value to predict)\n", + "X_training = X[0:468480:32]\n", + "Y_training = Z[0:468480:32]\n", + "\n", + "# Pull out columns for X (data to train with) and Y (value to predict)\n", + "X_testing = M[0:78080:32]\n", + "Y_testing = L[0:78080:32]\n", + "\n", + "# DO Scale both the training inputs and outputs\n", + "X_scaled_training = pd.DataFrame (data = X_training).values\n", + "Y_scaled_training = pd.DataFrame (data = Y_training).values\n", + "\n", + "# It's very important that the training and test data are scaled with the same scaler.\n", + "X_scaled_testing = pd.DataFrame (data = X_testing).values\n", + "Y_scaled_testing = pd.DataFrame (data = Y_testing).values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Turn off TensorFlow warning messages in program output\n", + "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'\n", + "\n", + "# Define model parameters\n", + "t = time.time()\n", + "learning_rate = 0.0001\n", + "training_epochs = 5000\n", + "display_step = 1\n", + "\n", + "# Define how many inputs and outputs are in our neural network\n", + "number_of_inputs = 70\n", + "number_of_outputs = 1\n", + "\n", + "# Define how many neurons we want in each layer of our neural network\n", + "layer_1_nodes = 512\n", + "layer_2_nodes = 1024\n", + "layer_3_nodes = 1024\n", + "layer_4_nodes = 512\n", + "\n", + "# Section One: Define the layers of the neural network itself\n", + "RUN_NAME = str(int(round(t * 1000))) + '_' + str(layer_1_nodes) + '_' + str(layer_2_nodes) + '_' + str(layer_3_nodes) + '_' + str(layer_4_nodes) + '_' + str(learning_rate) + '_' + str(training_epochs) + '_' + 'Val'\n", + "\n", + "\n", + "# Input Layer\n", + "with tf.variable_scope('input'):\n", + " X = tf.placeholder(tf.float32, shape=(None, number_of_inputs))\n", + "\n", + "# Layer 1\n", + "with tf.variable_scope('layer_1'):\n", + " weights = tf.get_variable(\"weights1\", shape=[number_of_inputs, layer_1_nodes], initializer=tf.contrib.layers.xavier_initializer())\n", + " biases = tf.get_variable(name=\"biases1\", shape=[layer_1_nodes], initializer=tf.zeros_initializer())\n", + " layer_1_output = tf.nn.relu(tf.matmul(X, weights) + biases)\n", + "\n", + "# Layer 2\n", + "with tf.variable_scope('layer_2'):\n", + " weights = tf.get_variable(\"weights2\", shape=[layer_1_nodes, layer_2_nodes], initializer=tf.contrib.layers.xavier_initializer())\n", + " biases = tf.get_variable(name=\"biases2\", shape=[layer_2_nodes], initializer=tf.zeros_initializer())\n", + " layer_2_output = tf.nn.relu(tf.matmul(layer_1_output, weights) + biases)\n", + "\n", + "# Layer 3\n", + "with tf.variable_scope('layer_3'):\n", + " weights = tf.get_variable(\"weights3\", shape=[layer_2_nodes, layer_3_nodes], initializer=tf.contrib.layers.xavier_initializer())\n", + " biases = tf.get_variable(name=\"biases3\", shape=[layer_3_nodes], initializer=tf.zeros_initializer())\n", + " layer_3_output = tf.nn.relu(tf.matmul(layer_2_output, weights) + biases)\n", + "\n", + "# Layer 4\n", + "with tf.variable_scope('layer_4'):\n", + " weights = tf.get_variable(\"weights4\", shape=[layer_3_nodes, layer_4_nodes], initializer=tf.contrib.layers.xavier_initializer())\n", + " biases = tf.get_variable(name=\"biases4\", shape=[layer_4_nodes], initializer=tf.zeros_initializer())\n", + " layer_4_output = tf.nn.relu(tf.matmul(layer_3_output, weights) + biases)\n", + "\n", + "# Output Layer\n", + "with tf.variable_scope('output'):\n", + " weights = tf.get_variable(\"weights5\", shape=[layer_4_nodes, number_of_outputs], initializer=tf.contrib.layers.xavier_initializer())\n", + " biases = tf.get_variable(name=\"biases5\", shape=[number_of_outputs], initializer=tf.zeros_initializer())\n", + " prediction = tf.matmul(layer_4_output, weights) + biases\n", + "\n", + "# Section Two: Define the cost function of the neural network that will be optimized during training\n", + "\n", + "with tf.variable_scope('cost'):\n", + " Y = tf.placeholder(tf.float32, shape=(None, 1))\n", + " cost = tf.reduce_mean(tf.squared_difference(prediction, Y))\n", + "\n", + "# Section Three: Define the optimizer function that will be run to optimize the neural network\n", + "\n", + "with tf.variable_scope('train'):\n", + " optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)\n", + "\n", + "# Create a summary operation to log the progress of the network\n", + "with tf.variable_scope('logging'):\n", + " tf.summary.scalar('current_cost', cost)\n", + " summary = tf.summary.merge_all()\n", + "\n", + "saver = tf.train.Saver()\n", + "\n", + "# Initialize a session so that we can run TensorFlow operations\n", + "with tf.Session() as session:\n", + "\n", + " # Run the global variable initializer to initialize all variables and layers of the neural network\n", + " session.run(tf.global_variables_initializer())\n", + "\n", + " # Create log file writers to record training progress.\n", + " # We'll store training and testing log data separately.\n", + " training_writer = tf.summary.FileWriter(\"./{}/logs/training\".format(RUN_NAME), session.graph)\n", + " testing_writer = tf.summary.FileWriter(\"./{}/logs/testing\".format(RUN_NAME), session.graph)\n", + "\n", + " # Run the optimizer over and over to train the network.\n", + " # One epoch is one full run through the training data set.\n", + " for epoch in range(training_epochs):\n", + "\n", + " # Feed in the training data and do one step of neural network training\n", + " session.run(optimizer, feed_dict={X: X_scaled_training, Y: Y_scaled_training})\n", + "\n", + " # Every few training steps, log our progress\n", + " if epoch % display_step == 0:\n", + " # Get the current accuracy scores by running the \"cost\" operation on the training and test data sets\n", + " training_cost, training_summary = session.run([cost, summary], feed_dict={X: X_scaled_training, Y:Y_scaled_training})\n", + " testing_cost, testing_summary = session.run([cost, summary], feed_dict={X: X_scaled_testing, Y:Y_scaled_testing})\n", + "\n", + " # Write the current training status to the log files (Which we can view with TensorBoard)\n", + " training_writer.add_summary(training_summary, epoch)\n", + " testing_writer.add_summary(testing_summary, epoch)\n", + "\n", + " # Print the current training status to the screen\n", + " print(\"Epoch: {} - Training Cost: {} Testing Cost: {}\".format(epoch, training_cost, testing_cost))\n", + "\n", + " # Training is now complete!\n", + "\n", + " # Get the final accuracy scores by running the \"cost\" operation on the training and test data sets\n", + " final_training_cost = session.run(cost, feed_dict={X: X_scaled_training, Y: Y_scaled_training})\n", + " final_testing_cost = session.run(cost, feed_dict={X: X_scaled_testing, Y: Y_scaled_testing})\n", + "\n", + " print(\"Final Training cost: {}\".format(final_training_cost))\n", + " print(\"Final Testing cost: {}\".format(final_testing_cost))\n", + "\n", + " save_path = saver.save(session, \"./{}/logs/trained_model.ckpt\".format(RUN_NAME))\n", + " print(\"Model saved: {}\".format(save_path))\n", + "\n", + " '''\n", + " # Now that the neural network is trained, let's use it to make predictions for our test data.\n", + " # Pass in the X testing data and run the \"prediciton\" operation\n", + " Y_predicted_scaled = session.run(prediction, feed_dict={X: X_scaled_testing})\n", + " # Unscale the data back to it's original units (dollars)\n", + " Y_predicted = Y_scaler.inverse_transform(Y_predicted_scaled)\n", + " real_earnings = test_data_df['total_earnings'].values[0]\n", + " predicted_earnings = Y_predicted[0][0]\n", + " print(\"The actual earnings of Game #1 were ${}\".format(real_earnings))\n", + " print(\"Our neural network predicted earnings of ${}\".format(predicted_earnings))\n", + " \n", + "'''\n", + " model_builder = tf.saved_model.builder.SavedModelBuilder(\"./{}/exported_model\".format(RUN_NAME))\n", + "\n", + " inputs = {\n", + " 'input': tf.saved_model.utils.build_tensor_info(X)\n", + " }\n", + " outputs = {\n", + " 'earnings': tf.saved_model.utils.build_tensor_info(prediction)\n", + " }\n", + "\n", + " signature_def = tf.saved_model.signature_def_utils.build_signature_def(\n", + " inputs=inputs,\n", + " outputs=outputs,\n", + " method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME\n", + " )\n", + "\n", + " model_builder.add_meta_graph_and_variables(\n", + " session,\n", + " tags=[tf.saved_model.tag_constants.SERVING],\n", + " signature_def_map={\n", + " tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def\n", + " }\n", + " )\n", + "\n", + " model_builder.save()\n", + " print('model saved')\n" + ] }, { "cell_type": "code",