Arquivos
2022-11-08 22:04:08 +05:30

387 linhas
14 KiB
Python

# -*- coding: utf-8 -*-
"""8.5_cross_validate.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1qEkrFcZ9lLqd6gNgxX8Y8QoXlOhH3wXC
#Leave One Subject Out Cross Validation
* DREAMER => Shape After Loading
X.shape= (414, 58240, 14) Y.shape= (414, 2) Z.shape= (414, 2)
* DEAP => Shape After Loading
X.shape= (1280, 40, 8064) Y.shape= (1280, 2) Z.shape= (1280, 2)
* OASIS => Shape After Loading
X.shape= (600, 640, 14) Y.shape= (600, 2) Z.shape= (600, 2)
* i.e. OASIS and DEAP are of form X = (rec, timepoints,channels)
* reshaping X to (rec, channels,timepoints)
makes sense now
"""
!nvidia-smi
"""#RAPIDS Package Installation"""
# Install RAPIDS
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!bash rapidsai-csp-utils/colab/rapids-colab.sh stable
import sys, os
dist_package_index = sys.path.index('/usr/local/lib/python3.7/dist-packages')
sys.path = sys.path[:dist_package_index] + ['/usr/local/lib/python3.7/site-packages'] + sys.path[dist_package_index:]
sys.path
exec(open('rapidsai-csp-utils/colab/update_modules.py').read(), globals())
import cuml
"""-----------------------------------------------------------------------------------------------------------------------------------------------------"""
from google.colab import drive
drive.mount('/gdrive',force_remount=True)
# Commented out IPython magic to ensure Python compatibility.
# %cd /gdrive/MyDrive/Project_DEAP/4.1.2021/
################################################################################
import TopNByFSMethods
import TopNByClassifier
import EpochedFeatures
from args_eeg import args as my_args
import ImportUtils
from ImportUtils import *
from TopNByFSMethods import *
from TopNByClassifier import *
from EpochedFeatures import *
from args_eeg import args as my_args
from ImportUtils import *
from TopNByFSMethods import *
from TopNByClassifier import *
from EpochedFeatures import *
from sklearn.svm import SVC
from DEAP_scripts.ImportUtils import *
from DEAP_scripts.TopNByFSMethods import *
from DEAP_scripts.TopNByClassifier import *
from DEAP_scripts.EpochedFeatures import *
from DEAP_scripts.args_eeg import args as my_args
from sklearn.svm import SVC
################################################################################
mean_rmse = []
std_rmse = []
np.random.seed(42)
def cross_validate(dataset, window, stride, sfreq, label, best_features_list):
# Parameters :-
# dataset :- Name of the Dataset
# window :- Length of the sliding window in seconds
# stride :- Stride of the sliding window in seconds
# sfreq :- sampling frequency of the EEG dataset
# best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
pwd = os.getcwd()
fs = sfreq
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
#load saved epoched features
featuresDict = None
featuresDict = loadFeaturesDict(dataset)
# pop out not best features
for k in list(featuresDict.keys()):
if k not in best_features_list:
featuresDict.pop(k)
featuresList = list(featuresDict.keys())
print(featuresList)
#make feature matrix with select best features
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
for key,value in featuresDict.items():
featureMatrix = np.append(featureMatrix,value,axis=0)
#remove NaN features
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
#set datatype of feature matrix
featureMatrix = featureMatrix.astype('float64')
#transpose feature matrix to prepare X
X = pd.DataFrame(featureMatrix.T)
#replace infinity with NaN value and fill it with zero
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
X = X.astype(np.float32)
#convert ndarray to dataframe
Y_epoch = pd.DataFrame(Y_epoch)
print("Number of feature vectors in X = ", X.shape[1])
print("X.shape = " ,X.shape)
#***********************************************************
#Leave-one-subject-out-CV
#number of folds = numbParticipants
numbParticipants = 0
numbRecordings = 0
if(dataset == 'DEAP'):
numbParticipants = 32
numbRecordings = 40
elif(dataset == 'DREAMER'):
# Dreamer dataset has 23 subjects, each subject was shown 18 videos
numbParticipants = 23
numbRecordings = 18
elif(dataset == 'OASIS'):
numbParticipants = 15
numbRecordings = 40
#numbEpochs
numbEpochs = X.shape[0]//(numbParticipants*numbRecordings)
print(X.shape[0])
print("numbParticipants = ", numbParticipants)
print("numbRecordings = " , numbRecordings)
print("numbEpochs = ", numbEpochs)
pass
print(type(X))
print(type(Y_epoch))
cv_rmse = []
for i in range(numbParticipants):
s = i*numbRecordings*numbEpochs
e = (i+1)*numbRecordings*numbEpochs
X_test = copy.deepcopy(X.iloc[s:e, :])
y_test = copy.deepcopy(Y_epoch.iloc[s:e, label])
X_train = copy.deepcopy(X.iloc[:s, :])
X_train = np.append(X_train, X.iloc[e:, :],axis=0)
y_train = copy.deepcopy(Y_epoch.iloc[:s, label])
y_train = np.append(y_train, Y_epoch.iloc[e:, label],axis=0)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
clf = RandomForestRegressor()
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
rmse = mean_squared_error(y_test, y_predict,squared=False)
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
cv_rmse.append(rmse)
print(cv_rmse)
print("Mean Cross-validation RMSE = ", np.mean(cv_rmse))
mean_rmse.append(np.mean(cv_rmse))
print("Standard Deviation of Cross-validated RMSE = ", np.std(cv_rmse))
std_rmse.append(np.std(cv_rmse))
#pickle list
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'wb') as f:
pickle.dump(cv_rmse, f)
fig = plt.gcf()
fig.set_size_inches(40, 20)
# X = pd.DataFrame([x for x in range(1,) ])
plt.rcParams.update({'font.size': 40})
plt.xlabel('Partipant No.')
plt.ylabel('RMSE')
plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
plt.tight_layout()
plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/CV_{}_{}.svg".format(dataset, label), bbox_inches='tight', dpi=500)
plt.show()
plt.clf()
def main(dataset, window, stride, sfreq, model, label, approach, ml_algo, top, fs_method, best_features_list):
# Parameters :-
# dataset :- Name of the Dataset
# window :- Length of the sliding window in seconds
# stride :- Stride of the sliding window in seconds
# sfreq :- sampling frequency of the EEG dataset
# best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
print(locals())
pwd = os.getcwd()
# getEpochedFeatures(dataset, window, stride, sfreq, label)
cross_validate(dataset, window, stride, sfreq, label, best_features_list)
return
if(top == "e"):
clf = RandomForestRegressor()
topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
plt.legend(["Method A","Method B", "Method C"])
if(label == 1):
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
# plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
else:
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
# plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
elif(top == "f"):
clf = RandomForestRegressor()
topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
if(label == 1):
plt.legend(["Method A","Method B", "Method C"])
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
else:
plt.legend(["Method A","Method B", "Method C"])
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
if __name__ == '__main__':
#DREAMER
#VALENCE
best_features_list = ['HjorthMob','HjorthComp','stdDev','bandPwr_theta','ShannonRes_gamma','bandPwr_beta']
main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
#AROUSAL
best_features_list = ['HjorthMob','ShannonRes_gamma','HjorthComp','stdDev','bandPwr_gamma', 'bandPwr_theta']
main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
#DEAP
#VALENCE
best_features_list = ['bandPwr_gamma','ShannonRes_gamma','ShannonRes_beta','rasm_gamma','dasm_gamma','bandPwr_beta']
main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
#AROUSAL
best_features_list = ['HjorthMob','HjorthComp','stdDev','ShannonRes_gamma','bandPwr_beta','bandPwr_theta','ShannonRes_beta','dasm_beta']
main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
#OASIS
#VALENCE
best_features_list = ['HjorthMob','stdDev','HjorthComp']
main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
#AROUSAL
best_features_list = ['HjorthMob']
main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
# print(len(best_features_list))
# main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
# --dataset DREAMER --window 1 --stride 1 --sfreq 128 --model rfr --label 0 --approach byfs --ml_algo regression --top f --fs_method SelectKBest
"""#MINIMUM RMSE DURING CROSS-VALIDATION 6-6-2021"""
# Commented out IPython magic to ensure Python compatibility.
import matplotlib.pyplot as plt
# %matplotlib inline
import seaborn as sns
import copy
import os
from scipy import io,signal
import numpy as np
import pandas as pd
import pickle
#{Dataset_Name}{0/1}_cv_rmse.pkl :- 0 is for Valence and 1 is for Arousal
pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
label = [0,1,0,1,0,1]
min_cv_rmse = []
for i in range(len(pl)):
cv_rmse = None
with open(pl[i], 'rb') as f:
cv_rmse = pickle.load(f)
min_cv_rmse.append(min(cv_rmse))
print(min_cv_rmse)
"""feature_select_main.py"""
!pip install dit
!pip install pyinform
from ImportUtils import *
from args_eeg import args as my_args
"""#Plot pickled results"""
# Commented out IPython magic to ensure Python compatibility.
import matplotlib.pyplot as plt
# %matplotlib inline
import seaborn as sns
import copy
import os
from scipy import io,signal
import numpy as np
import pandas as pd
import pickle
# with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'rb') as f:
# pickle.dump(cv_rmse, f)
pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
label = [0,1,0,1,0,1]
for i in range(len(pl)):
cv_rmse = None
with open(pl[i], 'rb') as f:
cv_rmse = pickle.load(f)
fig = plt.gcf()
fig.set_size_inches(40, 20)
# X = pd.DataFrame([x for x in range(1,) ])
plt.rcParams.update({'font.size': 50})
plt.xlabel('Partipant No.')
plt.ylabel('RMSE')
plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
plt.tight_layout()
plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_stats/CV_{}_{}.svg".format(dataset[i], label[i]), bbox_inches='tight', dpi=500)
plt.show()
plt.clf()
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/mean_cv_rmse.pkl', 'wb') as f:
pickle.dump(mean_rmse, f)
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/std_cv_rmse.pkl', 'wb') as f:
pickle.dump(std_rmse, f)
df = pd.DataFrame()
df['Dataset-Label'] = ['DREAMER-V','DREAMER-A','DEAP-V','DEAP-A','OASIS-V','OASIS-A']
df['Mean RMSE'] = mean_rmse
df['Std Dev RMSE'] = std_rmse
df.to_csv('/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_rmse_stats.csv')