387 linhas
14 KiB
Python
387 linhas
14 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""8.5_cross_validate.ipynb
|
|
|
|
Automatically generated by Colaboratory.
|
|
|
|
Original file is located at
|
|
https://colab.research.google.com/drive/1qEkrFcZ9lLqd6gNgxX8Y8QoXlOhH3wXC
|
|
|
|
#Leave One Subject Out Cross Validation
|
|
|
|
* DREAMER => Shape After Loading
|
|
X.shape= (414, 58240, 14) Y.shape= (414, 2) Z.shape= (414, 2)
|
|
|
|
* DEAP => Shape After Loading
|
|
X.shape= (1280, 40, 8064) Y.shape= (1280, 2) Z.shape= (1280, 2)
|
|
|
|
* OASIS => Shape After Loading
|
|
X.shape= (600, 640, 14) Y.shape= (600, 2) Z.shape= (600, 2)
|
|
|
|
* i.e. OASIS and DEAP are of form X = (rec, timepoints,channels)
|
|
|
|
* reshaping X to (rec, channels,timepoints)
|
|
makes sense now
|
|
"""
|
|
|
|
!nvidia-smi
|
|
|
|
"""#RAPIDS Package Installation"""
|
|
|
|
# Install RAPIDS
|
|
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
|
|
!bash rapidsai-csp-utils/colab/rapids-colab.sh stable
|
|
|
|
import sys, os
|
|
|
|
dist_package_index = sys.path.index('/usr/local/lib/python3.7/dist-packages')
|
|
sys.path = sys.path[:dist_package_index] + ['/usr/local/lib/python3.7/site-packages'] + sys.path[dist_package_index:]
|
|
sys.path
|
|
exec(open('rapidsai-csp-utils/colab/update_modules.py').read(), globals())
|
|
|
|
import cuml
|
|
|
|
"""-----------------------------------------------------------------------------------------------------------------------------------------------------"""
|
|
|
|
from google.colab import drive
|
|
drive.mount('/gdrive',force_remount=True)
|
|
|
|
# Commented out IPython magic to ensure Python compatibility.
|
|
# %cd /gdrive/MyDrive/Project_DEAP/4.1.2021/
|
|
|
|
################################################################################
|
|
import TopNByFSMethods
|
|
import TopNByClassifier
|
|
import EpochedFeatures
|
|
from args_eeg import args as my_args
|
|
import ImportUtils
|
|
|
|
from ImportUtils import *
|
|
from TopNByFSMethods import *
|
|
from TopNByClassifier import *
|
|
from EpochedFeatures import *
|
|
from args_eeg import args as my_args
|
|
from ImportUtils import *
|
|
from TopNByFSMethods import *
|
|
from TopNByClassifier import *
|
|
from EpochedFeatures import *
|
|
|
|
from sklearn.svm import SVC
|
|
|
|
|
|
from DEAP_scripts.ImportUtils import *
|
|
from DEAP_scripts.TopNByFSMethods import *
|
|
from DEAP_scripts.TopNByClassifier import *
|
|
from DEAP_scripts.EpochedFeatures import *
|
|
from DEAP_scripts.args_eeg import args as my_args
|
|
from sklearn.svm import SVC
|
|
|
|
################################################################################
|
|
|
|
mean_rmse = []
|
|
std_rmse = []
|
|
|
|
np.random.seed(42)
|
|
def cross_validate(dataset, window, stride, sfreq, label, best_features_list):
|
|
# Parameters :-
|
|
# dataset :- Name of the Dataset
|
|
# window :- Length of the sliding window in seconds
|
|
# stride :- Stride of the sliding window in seconds
|
|
# sfreq :- sampling frequency of the EEG dataset
|
|
# best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
|
|
pwd = os.getcwd()
|
|
fs = sfreq
|
|
|
|
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
|
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
|
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
|
|
|
#load saved epoched features
|
|
featuresDict = None
|
|
featuresDict = loadFeaturesDict(dataset)
|
|
|
|
# pop out not best features
|
|
for k in list(featuresDict.keys()):
|
|
if k not in best_features_list:
|
|
|
|
featuresDict.pop(k)
|
|
|
|
featuresList = list(featuresDict.keys())
|
|
print(featuresList)
|
|
|
|
#make feature matrix with select best features
|
|
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
|
for key,value in featuresDict.items():
|
|
featureMatrix = np.append(featureMatrix,value,axis=0)
|
|
|
|
#remove NaN features
|
|
if np.isnan(featureMatrix).any():
|
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
|
|
|
#set datatype of feature matrix
|
|
featureMatrix = featureMatrix.astype('float64')
|
|
|
|
#transpose feature matrix to prepare X
|
|
X = pd.DataFrame(featureMatrix.T)
|
|
#replace infinity with NaN value and fill it with zero
|
|
X = X.replace([np.inf, -np.inf], np.nan)
|
|
X = X.fillna(0)
|
|
X = X.astype(np.float32)
|
|
|
|
#convert ndarray to dataframe
|
|
Y_epoch = pd.DataFrame(Y_epoch)
|
|
|
|
print("Number of feature vectors in X = ", X.shape[1])
|
|
print("X.shape = " ,X.shape)
|
|
|
|
|
|
#***********************************************************
|
|
|
|
|
|
|
|
#Leave-one-subject-out-CV
|
|
#number of folds = numbParticipants
|
|
numbParticipants = 0
|
|
numbRecordings = 0
|
|
|
|
if(dataset == 'DEAP'):
|
|
numbParticipants = 32
|
|
numbRecordings = 40
|
|
elif(dataset == 'DREAMER'):
|
|
# Dreamer dataset has 23 subjects, each subject was shown 18 videos
|
|
numbParticipants = 23
|
|
numbRecordings = 18
|
|
elif(dataset == 'OASIS'):
|
|
numbParticipants = 15
|
|
numbRecordings = 40
|
|
|
|
|
|
#numbEpochs
|
|
numbEpochs = X.shape[0]//(numbParticipants*numbRecordings)
|
|
print(X.shape[0])
|
|
print("numbParticipants = ", numbParticipants)
|
|
print("numbRecordings = " , numbRecordings)
|
|
print("numbEpochs = ", numbEpochs)
|
|
pass
|
|
|
|
print(type(X))
|
|
print(type(Y_epoch))
|
|
|
|
cv_rmse = []
|
|
|
|
for i in range(numbParticipants):
|
|
s = i*numbRecordings*numbEpochs
|
|
e = (i+1)*numbRecordings*numbEpochs
|
|
|
|
X_test = copy.deepcopy(X.iloc[s:e, :])
|
|
y_test = copy.deepcopy(Y_epoch.iloc[s:e, label])
|
|
|
|
X_train = copy.deepcopy(X.iloc[:s, :])
|
|
X_train = np.append(X_train, X.iloc[e:, :],axis=0)
|
|
|
|
y_train = copy.deepcopy(Y_epoch.iloc[:s, label])
|
|
y_train = np.append(y_train, Y_epoch.iloc[e:, label],axis=0)
|
|
|
|
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
|
|
|
|
clf = RandomForestRegressor()
|
|
clf.fit(X_train, y_train)
|
|
y_predict = clf.predict(X_test)
|
|
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
|
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
|
cv_rmse.append(rmse)
|
|
|
|
|
|
|
|
print(cv_rmse)
|
|
print("Mean Cross-validation RMSE = ", np.mean(cv_rmse))
|
|
mean_rmse.append(np.mean(cv_rmse))
|
|
print("Standard Deviation of Cross-validated RMSE = ", np.std(cv_rmse))
|
|
std_rmse.append(np.std(cv_rmse))
|
|
|
|
#pickle list
|
|
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'wb') as f:
|
|
pickle.dump(cv_rmse, f)
|
|
|
|
fig = plt.gcf()
|
|
fig.set_size_inches(40, 20)
|
|
# X = pd.DataFrame([x for x in range(1,) ])
|
|
plt.rcParams.update({'font.size': 40})
|
|
plt.xlabel('Partipant No.')
|
|
plt.ylabel('RMSE')
|
|
plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
|
|
plt.tight_layout()
|
|
plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/CV_{}_{}.svg".format(dataset, label), bbox_inches='tight', dpi=500)
|
|
plt.show()
|
|
plt.clf()
|
|
|
|
def main(dataset, window, stride, sfreq, model, label, approach, ml_algo, top, fs_method, best_features_list):
|
|
# Parameters :-
|
|
# dataset :- Name of the Dataset
|
|
# window :- Length of the sliding window in seconds
|
|
# stride :- Stride of the sliding window in seconds
|
|
# sfreq :- sampling frequency of the EEG dataset
|
|
# best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
|
|
|
|
print(locals())
|
|
pwd = os.getcwd()
|
|
|
|
|
|
# getEpochedFeatures(dataset, window, stride, sfreq, label)
|
|
cross_validate(dataset, window, stride, sfreq, label, best_features_list)
|
|
return
|
|
if(top == "e"):
|
|
clf = RandomForestRegressor()
|
|
topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
|
|
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
|
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
|
|
plt.legend(["Method A","Method B", "Method C"])
|
|
|
|
if(label == 1):
|
|
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
|
# plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
|
plt.show()
|
|
plt.clf()
|
|
|
|
else:
|
|
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
|
# plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
|
plt.show()
|
|
plt.clf()
|
|
|
|
elif(top == "f"):
|
|
clf = RandomForestRegressor()
|
|
topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
|
|
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
|
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
|
|
if(label == 1):
|
|
plt.legend(["Method A","Method B", "Method C"])
|
|
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
|
plt.show()
|
|
plt.clf()
|
|
else:
|
|
plt.legend(["Method A","Method B", "Method C"])
|
|
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
|
plt.show()
|
|
plt.clf()
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
#DREAMER
|
|
#VALENCE
|
|
best_features_list = ['HjorthMob','HjorthComp','stdDev','bandPwr_theta','ShannonRes_gamma','bandPwr_beta']
|
|
main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
|
#AROUSAL
|
|
best_features_list = ['HjorthMob','ShannonRes_gamma','HjorthComp','stdDev','bandPwr_gamma', 'bandPwr_theta']
|
|
main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
|
|
|
#DEAP
|
|
#VALENCE
|
|
best_features_list = ['bandPwr_gamma','ShannonRes_gamma','ShannonRes_beta','rasm_gamma','dasm_gamma','bandPwr_beta']
|
|
main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
|
#AROUSAL
|
|
best_features_list = ['HjorthMob','HjorthComp','stdDev','ShannonRes_gamma','bandPwr_beta','bandPwr_theta','ShannonRes_beta','dasm_beta']
|
|
main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
|
|
|
#OASIS
|
|
#VALENCE
|
|
best_features_list = ['HjorthMob','stdDev','HjorthComp']
|
|
main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
|
#AROUSAL
|
|
best_features_list = ['HjorthMob']
|
|
main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
|
|
|
# print(len(best_features_list))
|
|
# main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
|
# --dataset DREAMER --window 1 --stride 1 --sfreq 128 --model rfr --label 0 --approach byfs --ml_algo regression --top f --fs_method SelectKBest
|
|
|
|
"""#MINIMUM RMSE DURING CROSS-VALIDATION 6-6-2021"""
|
|
|
|
# Commented out IPython magic to ensure Python compatibility.
|
|
import matplotlib.pyplot as plt
|
|
# %matplotlib inline
|
|
import seaborn as sns
|
|
import copy
|
|
import os
|
|
from scipy import io,signal
|
|
import numpy as np
|
|
import pandas as pd
|
|
import pickle
|
|
#{Dataset_Name}{0/1}_cv_rmse.pkl :- 0 is for Valence and 1 is for Arousal
|
|
pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
|
|
dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
|
|
label = [0,1,0,1,0,1]
|
|
min_cv_rmse = []
|
|
|
|
for i in range(len(pl)):
|
|
|
|
cv_rmse = None
|
|
with open(pl[i], 'rb') as f:
|
|
cv_rmse = pickle.load(f)
|
|
|
|
min_cv_rmse.append(min(cv_rmse))
|
|
|
|
print(min_cv_rmse)
|
|
|
|
"""feature_select_main.py"""
|
|
|
|
!pip install dit
|
|
!pip install pyinform
|
|
|
|
from ImportUtils import *
|
|
from args_eeg import args as my_args
|
|
|
|
"""#Plot pickled results"""
|
|
|
|
# Commented out IPython magic to ensure Python compatibility.
|
|
import matplotlib.pyplot as plt
|
|
# %matplotlib inline
|
|
import seaborn as sns
|
|
import copy
|
|
import os
|
|
from scipy import io,signal
|
|
import numpy as np
|
|
import pandas as pd
|
|
import pickle
|
|
|
|
# with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'rb') as f:
|
|
# pickle.dump(cv_rmse, f)
|
|
|
|
pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
|
|
dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
|
|
label = [0,1,0,1,0,1]
|
|
|
|
for i in range(len(pl)):
|
|
|
|
cv_rmse = None
|
|
with open(pl[i], 'rb') as f:
|
|
cv_rmse = pickle.load(f)
|
|
|
|
fig = plt.gcf()
|
|
fig.set_size_inches(40, 20)
|
|
# X = pd.DataFrame([x for x in range(1,) ])
|
|
plt.rcParams.update({'font.size': 50})
|
|
plt.xlabel('Partipant No.')
|
|
plt.ylabel('RMSE')
|
|
plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
|
|
plt.tight_layout()
|
|
plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_stats/CV_{}_{}.svg".format(dataset[i], label[i]), bbox_inches='tight', dpi=500)
|
|
plt.show()
|
|
plt.clf()
|
|
|
|
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/mean_cv_rmse.pkl', 'wb') as f:
|
|
pickle.dump(mean_rmse, f)
|
|
|
|
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/std_cv_rmse.pkl', 'wb') as f:
|
|
pickle.dump(std_rmse, f)
|
|
|
|
df = pd.DataFrame()
|
|
df['Dataset-Label'] = ['DREAMER-V','DREAMER-A','DEAP-V','DEAP-A','OASIS-V','OASIS-A']
|
|
df['Mean RMSE'] = mean_rmse
|
|
df['Std Dev RMSE'] = std_rmse
|
|
df.to_csv('/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_rmse_stats.csv')
|
|
|