Decoding_EEG/8_5_cross_validate.py

# -*- coding: utf-8 -*-
"""8.5_cross_validate.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1qEkrFcZ9lLqd6gNgxX8Y8QoXlOhH3wXC

#Leave One Subject Out Cross Validation

* DREAMER => Shape After Loading
X.shape= (414, 58240, 14)  Y.shape= (414, 2)  Z.shape= (414, 2)

* DEAP => Shape After Loading
X.shape= (1280, 40, 8064)  Y.shape= (1280, 2)  Z.shape= (1280, 2)

* OASIS => Shape After Loading
X.shape= (600, 640, 14)  Y.shape= (600, 2)  Z.shape= (600, 2)

* i.e. OASIS and DEAP are of form X = (rec, timepoints,channels)

* reshaping X to (rec, channels,timepoints)
 makes sense now
"""

!nvidia-smi

"""#RAPIDS Package Installation"""

# Install RAPIDS
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!bash rapidsai-csp-utils/colab/rapids-colab.sh stable

import sys, os

dist_package_index = sys.path.index('/usr/local/lib/python3.7/dist-packages')
sys.path = sys.path[:dist_package_index] + ['/usr/local/lib/python3.7/site-packages'] + sys.path[dist_package_index:]
sys.path
exec(open('rapidsai-csp-utils/colab/update_modules.py').read(), globals())

import cuml

"""-----------------------------------------------------------------------------------------------------------------------------------------------------"""

from google.colab import drive
drive.mount('/gdrive',force_remount=True)

# Commented out IPython magic to ensure Python compatibility.
# %cd /gdrive/MyDrive/Project_DEAP/4.1.2021/

################################################################################
import TopNByFSMethods
import TopNByClassifier
import EpochedFeatures
from args_eeg import args as my_args
import ImportUtils

from ImportUtils import *
from TopNByFSMethods import *
from  TopNByClassifier import *
from EpochedFeatures import *
from args_eeg import args as my_args
from ImportUtils import *
from TopNByFSMethods import *
from TopNByClassifier import *
from EpochedFeatures import *

from sklearn.svm import SVC


from DEAP_scripts.ImportUtils import *
from DEAP_scripts.TopNByFSMethods import *
from  DEAP_scripts.TopNByClassifier import *
from DEAP_scripts.EpochedFeatures import *
from DEAP_scripts.args_eeg import args as my_args
from sklearn.svm import SVC

################################################################################

mean_rmse = []
std_rmse = []

np.random.seed(42)
def cross_validate(dataset, window, stride, sfreq, label, best_features_list):
# Parameters :-
          # dataset :- Name of the Dataset
          # window :- Length of the sliding window in seconds
          # stride :- Stride of the sliding window in seconds
          # sfreq :- sampling frequency of the EEG dataset
          # best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
    pwd = os.getcwd()
    fs = sfreq

    featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
    ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
    Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']

    #load saved epoched features
    featuresDict = None
    featuresDict = loadFeaturesDict(dataset)

    # pop out not best features
    for k in list(featuresDict.keys()):
        if k not in best_features_list:

            featuresDict.pop(k)

    featuresList = list(featuresDict.keys())
    print(featuresList)

    #make feature matrix with select best features
    featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
    for key,value in featuresDict.items():
        featureMatrix = np.append(featureMatrix,value,axis=0)

    #remove NaN features
    if np.isnan(featureMatrix).any():
        featureMatrix = np.nan_to_num(featureMatrix,nan=0)

    #set datatype of feature matrix
    featureMatrix = featureMatrix.astype('float64')

    #transpose feature matrix to prepare X
    X = pd.DataFrame(featureMatrix.T)
    #replace infinity with NaN value and fill it with zero
    X = X.replace([np.inf, -np.inf], np.nan)
    X = X.fillna(0)
    X = X.astype(np.float32)

    #convert ndarray to dataframe
    Y_epoch = pd.DataFrame(Y_epoch)

    print("Number of feature vectors in X = ", X.shape[1])
    print("X.shape = " ,X.shape)


    #***********************************************************


    #Leave-one-subject-out-CV
    #number of folds = numbParticipants
    numbParticipants = 0
    numbRecordings = 0

    if(dataset == 'DEAP'):
        numbParticipants = 32
        numbRecordings = 40
    elif(dataset == 'DREAMER'):
        # Dreamer dataset has 23 subjects, each subject was shown 18 videos
        numbParticipants = 23
        numbRecordings = 18
    elif(dataset == 'OASIS'):
        numbParticipants = 15
        numbRecordings = 40


    #numbEpochs
    numbEpochs = X.shape[0]//(numbParticipants*numbRecordings)
    print(X.shape[0])
    print("numbParticipants = ", numbParticipants)
    print("numbRecordings = " , numbRecordings)
    print("numbEpochs = ", numbEpochs)
    pass

    print(type(X))
    print(type(Y_epoch))

    cv_rmse = []

    for i in range(numbParticipants):
        s = i*numbRecordings*numbEpochs
        e = (i+1)*numbRecordings*numbEpochs

        X_test = copy.deepcopy(X.iloc[s:e, :])
        y_test = copy.deepcopy(Y_epoch.iloc[s:e, label])

        X_train = copy.deepcopy(X.iloc[:s, :])
        X_train = np.append(X_train, X.iloc[e:, :],axis=0)

        y_train = copy.deepcopy(Y_epoch.iloc[:s, label])
        y_train = np.append(y_train, Y_epoch.iloc[e:, label],axis=0)

        print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

        clf = RandomForestRegressor()
        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)
        rmse = mean_squared_error(y_test, y_predict,squared=False)
        print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
        cv_rmse.append(rmse)


    print(cv_rmse)
    print("Mean Cross-validation RMSE  = ", np.mean(cv_rmse))
    mean_rmse.append(np.mean(cv_rmse))
    print("Standard Deviation of Cross-validated RMSE = ", np.std(cv_rmse))
    std_rmse.append(np.std(cv_rmse))

    #pickle list
    with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'wb') as f:
        pickle.dump(cv_rmse, f)

    fig = plt.gcf()
    fig.set_size_inches(40, 20)
    # X = pd.DataFrame([x for x in range(1,) ])
    plt.rcParams.update({'font.size': 40})
    plt.xlabel('Partipant No.')
    plt.ylabel('RMSE')
    plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
    plt.tight_layout()
    plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/CV_{}_{}.svg".format(dataset, label), bbox_inches='tight', dpi=500)
    plt.show()
    plt.clf()

def main(dataset, window, stride, sfreq, model, label, approach, ml_algo, top, fs_method, best_features_list):
  # Parameters :-
          # dataset :- Name of the Dataset
          # window :- Length of the sliding window in seconds
          # stride :- Stride of the sliding window in seconds
          # sfreq :- sampling frequency of the EEG dataset
          # best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets

    print(locals())
    pwd = os.getcwd()


    # getEpochedFeatures(dataset, window, stride, sfreq, label)
    cross_validate(dataset, window, stride, sfreq, label, best_features_list)
    return
    if(top == "e"):
        clf = RandomForestRegressor()
        topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
        topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
        topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
        plt.legend(["Method A","Method B", "Method C"])

        if(label == 1):
            plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            # plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            plt.show()
            plt.clf()

        else:
            plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            # plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            plt.show()
            plt.clf()

    elif(top == "f"):
        clf = RandomForestRegressor()
        topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
        topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
        topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
        if(label == 1):
            plt.legend(["Method A","Method B", "Method C"])
            plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            plt.show()
            plt.clf()
        else:
            plt.legend(["Method A","Method B", "Method C"])
            plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            plt.show()
            plt.clf()


if __name__ == '__main__':


    #DREAMER
    #VALENCE
    best_features_list = ['HjorthMob','HjorthComp','stdDev','bandPwr_theta','ShannonRes_gamma','bandPwr_beta']
    main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
    #AROUSAL
    best_features_list = ['HjorthMob','ShannonRes_gamma','HjorthComp','stdDev','bandPwr_gamma', 'bandPwr_theta']
    main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)

    #DEAP
    #VALENCE
    best_features_list = ['bandPwr_gamma','ShannonRes_gamma','ShannonRes_beta','rasm_gamma','dasm_gamma','bandPwr_beta']
    main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
    #AROUSAL
    best_features_list = ['HjorthMob','HjorthComp','stdDev','ShannonRes_gamma','bandPwr_beta','bandPwr_theta','ShannonRes_beta','dasm_beta']
    main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)

    #OASIS
    #VALENCE
    best_features_list = ['HjorthMob','stdDev','HjorthComp']
    main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
    #AROUSAL
    best_features_list = ['HjorthMob']
    main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)

    # print(len(best_features_list))
    # main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
    # --dataset DREAMER --window 1 --stride 1 --sfreq 128 --model rfr --label 0 --approach byfs --ml_algo regression --top f --fs_method SelectKBest

"""#MINIMUM RMSE DURING CROSS-VALIDATION 6-6-2021"""

# Commented out IPython magic to ensure Python compatibility.
import matplotlib.pyplot as plt
# %matplotlib inline
import seaborn as sns
import copy
import os
from scipy import io,signal
import numpy as np
import pandas as pd
import pickle
#{Dataset_Name}{0/1}_cv_rmse.pkl :- 0 is for Valence and 1 is for Arousal
pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
label = [0,1,0,1,0,1]
min_cv_rmse = []

for i in range(len(pl)):

    cv_rmse = None
    with open(pl[i], 'rb') as f:
        cv_rmse = pickle.load(f)

    min_cv_rmse.append(min(cv_rmse))

print(min_cv_rmse)

"""feature_select_main.py"""

!pip install dit
!pip install pyinform

from ImportUtils import *
from args_eeg import args as my_args

"""#Plot pickled results"""

# Commented out IPython magic to ensure Python compatibility.
import matplotlib.pyplot as plt
# %matplotlib inline
import seaborn as sns
import copy
import os
from scipy import io,signal
import numpy as np
import pandas as pd
import pickle

# with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'rb') as f:
#     pickle.dump(cv_rmse, f)

pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
label = [0,1,0,1,0,1]

for i in range(len(pl)):

    cv_rmse = None
    with open(pl[i], 'rb') as f:
        cv_rmse = pickle.load(f)

    fig = plt.gcf()
    fig.set_size_inches(40, 20)
    # X = pd.DataFrame([x for x in range(1,) ])
    plt.rcParams.update({'font.size': 50})
    plt.xlabel('Partipant No.')
    plt.ylabel('RMSE')
    plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
    plt.tight_layout()
    plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_stats/CV_{}_{}.svg".format(dataset[i], label[i]), bbox_inches='tight', dpi=500)
    plt.show()
    plt.clf()

with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/mean_cv_rmse.pkl', 'wb') as f:
    pickle.dump(mean_rmse, f)

with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/std_cv_rmse.pkl', 'wb') as f:
    pickle.dump(std_rmse, f)

df = pd.DataFrame()
df['Dataset-Label'] = ['DREAMER-V','DREAMER-A','DEAP-V','DEAP-A','OASIS-V','OASIS-A']
df['Mean RMSE'] = mean_rmse
df['Std Dev RMSE'] = std_rmse
df.to_csv('/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_rmse_stats.csv')