Added Scripts
Esse commit está contido em:
@@ -0,0 +1,386 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""8.5_cross_validate.ipynb
|
||||||
|
|
||||||
|
Automatically generated by Colaboratory.
|
||||||
|
|
||||||
|
Original file is located at
|
||||||
|
https://colab.research.google.com/drive/1qEkrFcZ9lLqd6gNgxX8Y8QoXlOhH3wXC
|
||||||
|
|
||||||
|
#Leave One Subject Out Cross Validation
|
||||||
|
|
||||||
|
* DREAMER => Shape After Loading
|
||||||
|
X.shape= (414, 58240, 14) Y.shape= (414, 2) Z.shape= (414, 2)
|
||||||
|
|
||||||
|
* DEAP => Shape After Loading
|
||||||
|
X.shape= (1280, 40, 8064) Y.shape= (1280, 2) Z.shape= (1280, 2)
|
||||||
|
|
||||||
|
* OASIS => Shape After Loading
|
||||||
|
X.shape= (600, 640, 14) Y.shape= (600, 2) Z.shape= (600, 2)
|
||||||
|
|
||||||
|
* i.e. OASIS and DEAP are of form X = (rec, timepoints,channels)
|
||||||
|
|
||||||
|
* reshaping X to (rec, channels,timepoints)
|
||||||
|
makes sense now
|
||||||
|
"""
|
||||||
|
|
||||||
|
!nvidia-smi
|
||||||
|
|
||||||
|
"""#RAPIDS Package Installation"""
|
||||||
|
|
||||||
|
# Install RAPIDS
|
||||||
|
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
|
||||||
|
!bash rapidsai-csp-utils/colab/rapids-colab.sh stable
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
|
||||||
|
dist_package_index = sys.path.index('/usr/local/lib/python3.7/dist-packages')
|
||||||
|
sys.path = sys.path[:dist_package_index] + ['/usr/local/lib/python3.7/site-packages'] + sys.path[dist_package_index:]
|
||||||
|
sys.path
|
||||||
|
exec(open('rapidsai-csp-utils/colab/update_modules.py').read(), globals())
|
||||||
|
|
||||||
|
import cuml
|
||||||
|
|
||||||
|
"""-----------------------------------------------------------------------------------------------------------------------------------------------------"""
|
||||||
|
|
||||||
|
from google.colab import drive
|
||||||
|
drive.mount('/gdrive',force_remount=True)
|
||||||
|
|
||||||
|
# Commented out IPython magic to ensure Python compatibility.
|
||||||
|
# %cd /gdrive/MyDrive/Project_DEAP/4.1.2021/
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
import TopNByFSMethods
|
||||||
|
import TopNByClassifier
|
||||||
|
import EpochedFeatures
|
||||||
|
from args_eeg import args as my_args
|
||||||
|
import ImportUtils
|
||||||
|
|
||||||
|
from ImportUtils import *
|
||||||
|
from TopNByFSMethods import *
|
||||||
|
from TopNByClassifier import *
|
||||||
|
from EpochedFeatures import *
|
||||||
|
from args_eeg import args as my_args
|
||||||
|
from ImportUtils import *
|
||||||
|
from TopNByFSMethods import *
|
||||||
|
from TopNByClassifier import *
|
||||||
|
from EpochedFeatures import *
|
||||||
|
|
||||||
|
from sklearn.svm import SVC
|
||||||
|
|
||||||
|
|
||||||
|
from DEAP_scripts.ImportUtils import *
|
||||||
|
from DEAP_scripts.TopNByFSMethods import *
|
||||||
|
from DEAP_scripts.TopNByClassifier import *
|
||||||
|
from DEAP_scripts.EpochedFeatures import *
|
||||||
|
from DEAP_scripts.args_eeg import args as my_args
|
||||||
|
from sklearn.svm import SVC
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
mean_rmse = []
|
||||||
|
std_rmse = []
|
||||||
|
|
||||||
|
np.random.seed(42)
|
||||||
|
def cross_validate(dataset, window, stride, sfreq, label, best_features_list):
|
||||||
|
# Parameters :-
|
||||||
|
# dataset :- Name of the Dataset
|
||||||
|
# window :- Length of the sliding window in seconds
|
||||||
|
# stride :- Stride of the sliding window in seconds
|
||||||
|
# sfreq :- sampling frequency of the EEG dataset
|
||||||
|
# best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
|
||||||
|
pwd = os.getcwd()
|
||||||
|
fs = sfreq
|
||||||
|
|
||||||
|
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||||
|
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||||
|
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||||
|
|
||||||
|
#load saved epoched features
|
||||||
|
featuresDict = None
|
||||||
|
featuresDict = loadFeaturesDict(dataset)
|
||||||
|
|
||||||
|
# pop out not best features
|
||||||
|
for k in list(featuresDict.keys()):
|
||||||
|
if k not in best_features_list:
|
||||||
|
|
||||||
|
featuresDict.pop(k)
|
||||||
|
|
||||||
|
featuresList = list(featuresDict.keys())
|
||||||
|
print(featuresList)
|
||||||
|
|
||||||
|
#make feature matrix with select best features
|
||||||
|
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||||
|
for key,value in featuresDict.items():
|
||||||
|
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||||
|
|
||||||
|
#remove NaN features
|
||||||
|
if np.isnan(featureMatrix).any():
|
||||||
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||||
|
|
||||||
|
#set datatype of feature matrix
|
||||||
|
featureMatrix = featureMatrix.astype('float64')
|
||||||
|
|
||||||
|
#transpose feature matrix to prepare X
|
||||||
|
X = pd.DataFrame(featureMatrix.T)
|
||||||
|
#replace infinity with NaN value and fill it with zero
|
||||||
|
X = X.replace([np.inf, -np.inf], np.nan)
|
||||||
|
X = X.fillna(0)
|
||||||
|
X = X.astype(np.float32)
|
||||||
|
|
||||||
|
#convert ndarray to dataframe
|
||||||
|
Y_epoch = pd.DataFrame(Y_epoch)
|
||||||
|
|
||||||
|
print("Number of feature vectors in X = ", X.shape[1])
|
||||||
|
print("X.shape = " ,X.shape)
|
||||||
|
|
||||||
|
|
||||||
|
#***********************************************************
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#Leave-one-subject-out-CV
|
||||||
|
#number of folds = numbParticipants
|
||||||
|
numbParticipants = 0
|
||||||
|
numbRecordings = 0
|
||||||
|
|
||||||
|
if(dataset == 'DEAP'):
|
||||||
|
numbParticipants = 32
|
||||||
|
numbRecordings = 40
|
||||||
|
elif(dataset == 'DREAMER'):
|
||||||
|
# Dreamer dataset has 23 subjects, each subject was shown 18 videos
|
||||||
|
numbParticipants = 23
|
||||||
|
numbRecordings = 18
|
||||||
|
elif(dataset == 'OASIS'):
|
||||||
|
numbParticipants = 15
|
||||||
|
numbRecordings = 40
|
||||||
|
|
||||||
|
|
||||||
|
#numbEpochs
|
||||||
|
numbEpochs = X.shape[0]//(numbParticipants*numbRecordings)
|
||||||
|
print(X.shape[0])
|
||||||
|
print("numbParticipants = ", numbParticipants)
|
||||||
|
print("numbRecordings = " , numbRecordings)
|
||||||
|
print("numbEpochs = ", numbEpochs)
|
||||||
|
pass
|
||||||
|
|
||||||
|
print(type(X))
|
||||||
|
print(type(Y_epoch))
|
||||||
|
|
||||||
|
cv_rmse = []
|
||||||
|
|
||||||
|
for i in range(numbParticipants):
|
||||||
|
s = i*numbRecordings*numbEpochs
|
||||||
|
e = (i+1)*numbRecordings*numbEpochs
|
||||||
|
|
||||||
|
X_test = copy.deepcopy(X.iloc[s:e, :])
|
||||||
|
y_test = copy.deepcopy(Y_epoch.iloc[s:e, label])
|
||||||
|
|
||||||
|
X_train = copy.deepcopy(X.iloc[:s, :])
|
||||||
|
X_train = np.append(X_train, X.iloc[e:, :],axis=0)
|
||||||
|
|
||||||
|
y_train = copy.deepcopy(Y_epoch.iloc[:s, label])
|
||||||
|
y_train = np.append(y_train, Y_epoch.iloc[e:, label],axis=0)
|
||||||
|
|
||||||
|
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
|
||||||
|
|
||||||
|
clf = RandomForestRegressor()
|
||||||
|
clf.fit(X_train, y_train)
|
||||||
|
y_predict = clf.predict(X_test)
|
||||||
|
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||||
|
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||||
|
cv_rmse.append(rmse)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
print(cv_rmse)
|
||||||
|
print("Mean Cross-validation RMSE = ", np.mean(cv_rmse))
|
||||||
|
mean_rmse.append(np.mean(cv_rmse))
|
||||||
|
print("Standard Deviation of Cross-validated RMSE = ", np.std(cv_rmse))
|
||||||
|
std_rmse.append(np.std(cv_rmse))
|
||||||
|
|
||||||
|
#pickle list
|
||||||
|
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'wb') as f:
|
||||||
|
pickle.dump(cv_rmse, f)
|
||||||
|
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(40, 20)
|
||||||
|
# X = pd.DataFrame([x for x in range(1,) ])
|
||||||
|
plt.rcParams.update({'font.size': 40})
|
||||||
|
plt.xlabel('Partipant No.')
|
||||||
|
plt.ylabel('RMSE')
|
||||||
|
plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/CV_{}_{}.svg".format(dataset, label), bbox_inches='tight', dpi=500)
|
||||||
|
plt.show()
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
|
def main(dataset, window, stride, sfreq, model, label, approach, ml_algo, top, fs_method, best_features_list):
|
||||||
|
# Parameters :-
|
||||||
|
# dataset :- Name of the Dataset
|
||||||
|
# window :- Length of the sliding window in seconds
|
||||||
|
# stride :- Stride of the sliding window in seconds
|
||||||
|
# sfreq :- sampling frequency of the EEG dataset
|
||||||
|
# best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
|
||||||
|
|
||||||
|
print(locals())
|
||||||
|
pwd = os.getcwd()
|
||||||
|
|
||||||
|
|
||||||
|
# getEpochedFeatures(dataset, window, stride, sfreq, label)
|
||||||
|
cross_validate(dataset, window, stride, sfreq, label, best_features_list)
|
||||||
|
return
|
||||||
|
if(top == "e"):
|
||||||
|
clf = RandomForestRegressor()
|
||||||
|
topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
|
||||||
|
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
||||||
|
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
|
||||||
|
plt.legend(["Method A","Method B", "Method C"])
|
||||||
|
|
||||||
|
if(label == 1):
|
||||||
|
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||||
|
# plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||||
|
plt.show()
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
|
else:
|
||||||
|
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||||
|
# plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||||
|
plt.show()
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
|
elif(top == "f"):
|
||||||
|
clf = RandomForestRegressor()
|
||||||
|
topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
|
||||||
|
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
||||||
|
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
|
||||||
|
if(label == 1):
|
||||||
|
plt.legend(["Method A","Method B", "Method C"])
|
||||||
|
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||||
|
plt.show()
|
||||||
|
plt.clf()
|
||||||
|
else:
|
||||||
|
plt.legend(["Method A","Method B", "Method C"])
|
||||||
|
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||||
|
plt.show()
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
|
||||||
|
#DREAMER
|
||||||
|
#VALENCE
|
||||||
|
best_features_list = ['HjorthMob','HjorthComp','stdDev','bandPwr_theta','ShannonRes_gamma','bandPwr_beta']
|
||||||
|
main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||||
|
#AROUSAL
|
||||||
|
best_features_list = ['HjorthMob','ShannonRes_gamma','HjorthComp','stdDev','bandPwr_gamma', 'bandPwr_theta']
|
||||||
|
main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||||
|
|
||||||
|
#DEAP
|
||||||
|
#VALENCE
|
||||||
|
best_features_list = ['bandPwr_gamma','ShannonRes_gamma','ShannonRes_beta','rasm_gamma','dasm_gamma','bandPwr_beta']
|
||||||
|
main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||||
|
#AROUSAL
|
||||||
|
best_features_list = ['HjorthMob','HjorthComp','stdDev','ShannonRes_gamma','bandPwr_beta','bandPwr_theta','ShannonRes_beta','dasm_beta']
|
||||||
|
main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||||
|
|
||||||
|
#OASIS
|
||||||
|
#VALENCE
|
||||||
|
best_features_list = ['HjorthMob','stdDev','HjorthComp']
|
||||||
|
main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||||
|
#AROUSAL
|
||||||
|
best_features_list = ['HjorthMob']
|
||||||
|
main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||||
|
|
||||||
|
# print(len(best_features_list))
|
||||||
|
# main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||||
|
# --dataset DREAMER --window 1 --stride 1 --sfreq 128 --model rfr --label 0 --approach byfs --ml_algo regression --top f --fs_method SelectKBest
|
||||||
|
|
||||||
|
"""#MINIMUM RMSE DURING CROSS-VALIDATION 6-6-2021"""
|
||||||
|
|
||||||
|
# Commented out IPython magic to ensure Python compatibility.
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
# %matplotlib inline
|
||||||
|
import seaborn as sns
|
||||||
|
import copy
|
||||||
|
import os
|
||||||
|
from scipy import io,signal
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import pickle
|
||||||
|
#{Dataset_Name}{0/1}_cv_rmse.pkl :- 0 is for Valence and 1 is for Arousal
|
||||||
|
pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
|
||||||
|
dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
|
||||||
|
label = [0,1,0,1,0,1]
|
||||||
|
min_cv_rmse = []
|
||||||
|
|
||||||
|
for i in range(len(pl)):
|
||||||
|
|
||||||
|
cv_rmse = None
|
||||||
|
with open(pl[i], 'rb') as f:
|
||||||
|
cv_rmse = pickle.load(f)
|
||||||
|
|
||||||
|
min_cv_rmse.append(min(cv_rmse))
|
||||||
|
|
||||||
|
print(min_cv_rmse)
|
||||||
|
|
||||||
|
"""feature_select_main.py"""
|
||||||
|
|
||||||
|
!pip install dit
|
||||||
|
!pip install pyinform
|
||||||
|
|
||||||
|
from ImportUtils import *
|
||||||
|
from args_eeg import args as my_args
|
||||||
|
|
||||||
|
"""#Plot pickled results"""
|
||||||
|
|
||||||
|
# Commented out IPython magic to ensure Python compatibility.
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
# %matplotlib inline
|
||||||
|
import seaborn as sns
|
||||||
|
import copy
|
||||||
|
import os
|
||||||
|
from scipy import io,signal
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
# with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'rb') as f:
|
||||||
|
# pickle.dump(cv_rmse, f)
|
||||||
|
|
||||||
|
pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
|
||||||
|
dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
|
||||||
|
label = [0,1,0,1,0,1]
|
||||||
|
|
||||||
|
for i in range(len(pl)):
|
||||||
|
|
||||||
|
cv_rmse = None
|
||||||
|
with open(pl[i], 'rb') as f:
|
||||||
|
cv_rmse = pickle.load(f)
|
||||||
|
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(40, 20)
|
||||||
|
# X = pd.DataFrame([x for x in range(1,) ])
|
||||||
|
plt.rcParams.update({'font.size': 50})
|
||||||
|
plt.xlabel('Partipant No.')
|
||||||
|
plt.ylabel('RMSE')
|
||||||
|
plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_stats/CV_{}_{}.svg".format(dataset[i], label[i]), bbox_inches='tight', dpi=500)
|
||||||
|
plt.show()
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
|
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/mean_cv_rmse.pkl', 'wb') as f:
|
||||||
|
pickle.dump(mean_rmse, f)
|
||||||
|
|
||||||
|
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/std_cv_rmse.pkl', 'wb') as f:
|
||||||
|
pickle.dump(std_rmse, f)
|
||||||
|
|
||||||
|
df = pd.DataFrame()
|
||||||
|
df['Dataset-Label'] = ['DREAMER-V','DREAMER-A','DEAP-V','DEAP-A','OASIS-V','OASIS-A']
|
||||||
|
df['Mean RMSE'] = mean_rmse
|
||||||
|
df['Std Dev RMSE'] = std_rmse
|
||||||
|
df.to_csv('/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_rmse_stats.csv')
|
||||||
|
|
||||||
+644
@@ -0,0 +1,644 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
import bisect
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import pywt
|
||||||
|
from scipy import stats, signal, integrate
|
||||||
|
from dit.other import tsallis_entropy
|
||||||
|
import dit
|
||||||
|
import librosa
|
||||||
|
import statsmodels.api as sm
|
||||||
|
import itertools
|
||||||
|
from pyinform import mutualinfo
|
||||||
|
from statsmodels import tsa
|
||||||
|
from sklearn.metrics import mutual_info_score
|
||||||
|
import numpy as np
|
||||||
|
from scipy import signal,integrate
|
||||||
|
from sklearn.metrics.cluster import normalized_mutual_info_score as normed_mutual_info
|
||||||
|
|
||||||
|
################################################
|
||||||
|
# Auxiliary Functions
|
||||||
|
################################################
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Filter the eegData, midpass filter
|
||||||
|
# eegData: 3D np array [chans x ms x epochs]
|
||||||
|
def filt_data(eegData, lowcut, highcut, fs, order=7):
|
||||||
|
nyq = 0.5 * fs
|
||||||
|
low = lowcut / nyq
|
||||||
|
high = highcut / nyq
|
||||||
|
b, a = signal.butter(order, [low, high], btype='band')
|
||||||
|
filt_eegData = signal.lfilter(b, a, eegData, axis = 1)
|
||||||
|
return filt_eegData
|
||||||
|
|
||||||
|
#########
|
||||||
|
# remove short bursts / spikes
|
||||||
|
def fcnRemoveShortEvents(z,n):
|
||||||
|
for chan in range(z.shape[0]):
|
||||||
|
# check for too-short suppressions
|
||||||
|
ct=0
|
||||||
|
i0=1
|
||||||
|
i1=1
|
||||||
|
for i in range(2,len(z[chan,:])):
|
||||||
|
if z[chan,i]==z[chan,i-1]:
|
||||||
|
ct=ct+1
|
||||||
|
i1=i
|
||||||
|
else:
|
||||||
|
if ct<n:
|
||||||
|
z[chan,i0:i1] = 0
|
||||||
|
z[chan,i1] = 0 #nasty little bug
|
||||||
|
ct=0
|
||||||
|
i0=i
|
||||||
|
i1=i
|
||||||
|
if z[chan,0] == 1 and z[chan,1] == 0:
|
||||||
|
z[chan,0] = 0
|
||||||
|
return z
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Find interval of consistent values in binary 1D numpy array
|
||||||
|
def get_intervals(A,B,endIdx=500):
|
||||||
|
# This function gives you intervals (a1,b1), (a2,b3) for every a in A=[a1,a2,a3,..]
|
||||||
|
# and the smallest element in b that is larger than a.
|
||||||
|
intervals = []
|
||||||
|
for ii,A_idx_lst in enumerate(A):
|
||||||
|
B_idx_lst = [bisect.bisect_left(B[ii], idx) for idx in A_idx_lst]
|
||||||
|
chan_intervals = []
|
||||||
|
for jj,idx_l in enumerate(B_idx_lst):
|
||||||
|
if idx_l == len(B[ii]):
|
||||||
|
chan_intervals.append((A_idx_lst[jj],endIdx))
|
||||||
|
else:
|
||||||
|
chan_intervals.append((A_idx_lst[jj],B[ii][idx_l]))
|
||||||
|
intervals.append(chan_intervals)
|
||||||
|
# previous code already takes care of the [] possibility
|
||||||
|
#if B_idx_lst == []:
|
||||||
|
# intervals.append([])
|
||||||
|
return intervals
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Detect bursts and supressions in eeg data
|
||||||
|
def burst_supression_detection(x,fs,suppression_threshold = 10):
|
||||||
|
'''
|
||||||
|
# DETECT EMG ARTIFACTS.
|
||||||
|
nyq = 0.5 * fs
|
||||||
|
low = low / nyq
|
||||||
|
high = high / nyq
|
||||||
|
be, ae = signal.butter(order, [low, high], btype='band')
|
||||||
|
'''
|
||||||
|
# CALCULATE ENVELOPE
|
||||||
|
e = abs(signal.hilbert(x,axis=1));
|
||||||
|
# same as smooth(e,Fs/4) in MATLAB, apply 1/2 second smoothing
|
||||||
|
ME = np.array([np.convolve(el,np.ones(int(fs/4))/(fs/4),'same') for el in e.tolist()])
|
||||||
|
e = ME
|
||||||
|
# DETECT SUPRESSIONS
|
||||||
|
# apply threshold -- 10uv
|
||||||
|
z = (ME<suppression_threshold)
|
||||||
|
# remove too-short suppression segments
|
||||||
|
z = fcnRemoveShortEvents(z,fs/2)
|
||||||
|
# remove too-short burst segments
|
||||||
|
b = fcnRemoveShortEvents(1-z,fs/2)
|
||||||
|
z = 1-b
|
||||||
|
went_high = [np.where(np.array(chD[:-1]) < np.array(chD[1:]))[0].tolist() for chD in z.tolist()]
|
||||||
|
went_low = [np.where(np.array(chD[:-1]) > np.array(chD[1:]))[0].tolist() for chD in z.tolist()]
|
||||||
|
|
||||||
|
bursts = get_intervals(went_high,went_low)
|
||||||
|
supressions = get_intervals(went_low,went_high)
|
||||||
|
|
||||||
|
return bursts,supressions
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Coherence in the Delta Band
|
||||||
|
def CoherenceDelta(eegData, i, j, fs=100):
|
||||||
|
nfft=eegData.shape[1]
|
||||||
|
f, Cxy = signal.coherence(eegData[i,:,:], eegData[j,:,:], fs=fs, nfft=nfft, axis=0)#, window=np.hanning(nfft))
|
||||||
|
out = np.mean(Cxy[np.all([f >= 0.5, f<=4], axis=0)], axis=0)
|
||||||
|
return out
|
||||||
|
|
||||||
|
##########
|
||||||
|
# correlation across channels
|
||||||
|
def PhaseLagIndex(eegData, i, j):
|
||||||
|
hxi = ss.hilbert(eegData[i,:,:])
|
||||||
|
hxj = ss.hilbert(eegData[j,:,:])
|
||||||
|
# calculating the INSTANTANEOUS PHASE
|
||||||
|
inst_phasei = np.arctan(np.angle(hxi))
|
||||||
|
inst_phasej = np.arctan(np.angle(hxj))
|
||||||
|
|
||||||
|
out = np.abs(np.mean(np.sign(inst_phasej - inst_phasei), axis=0))
|
||||||
|
return out
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Cross Correlation
|
||||||
|
def crossCorrelation(eegData, i, j):
|
||||||
|
out = np.zeros(eegData.shape[2])
|
||||||
|
for epoch in range(eegData.shape[2]):
|
||||||
|
ccor = np.correlate(eegData[i,:,epoch], eegData[j,:,epoch], mode="full")
|
||||||
|
absccor = np.abs(ccor)
|
||||||
|
out[epoch] = (np.max(absccor) - np.mean(absccor)) / np.std(absccor)
|
||||||
|
return out
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Auxilary Cross-correlation Lag
|
||||||
|
def corrCorrLagAux(eegData,ii,jj,Fs=100):
|
||||||
|
out = np.zeros(eegData.shape[2])
|
||||||
|
lagCorr = []
|
||||||
|
for lag in range(0,eegData.shape[1],int(0.2*Fs)):
|
||||||
|
tmp = eegData.copy()
|
||||||
|
tmp[jj,:,:] = np.roll(tmp[jj,:,:], lag, axis=0)
|
||||||
|
lagCorr.append(CrossCorrelation(tmp, ii, jj, Fs))
|
||||||
|
return np.argmax(lagCorr,axis=0)
|
||||||
|
|
||||||
|
################################################
|
||||||
|
# bandpower Functions
|
||||||
|
################################################
|
||||||
|
|
||||||
|
##########
|
||||||
|
# compute the bandpower (area under segment (from fband[0] to fband[1] in Hz)
|
||||||
|
# of curve in freqency domain) of data, at sampling frequency of Fs (100 ussually)
|
||||||
|
def bandpower(data, fs, fband):
|
||||||
|
freqs, powers = periodogram(data, fs)
|
||||||
|
idx_min = np.argmax(freqs > fband[0]) - 1
|
||||||
|
idx_max = np.argmax(freqs > fband[1]) - 1
|
||||||
|
idx_delta = np.zeros(dtype=bool, shape=freqs.shape)
|
||||||
|
idx_delta[idx_min:idx_max] = True
|
||||||
|
bpower = simps(powers[idx_delta], freqs[idx_delta])
|
||||||
|
return bpower
|
||||||
|
|
||||||
|
##########
|
||||||
|
# computes the same thing as vecbandpower but with a loop
|
||||||
|
def pfvecbandpower(data, fs, fband):
|
||||||
|
bpowers = np.zeros((data.shape[0], data.shape[2]))
|
||||||
|
for i in range(data.shape[0]):
|
||||||
|
freqs, powers = periodogram(data[i, :, :], fs, axis=0)
|
||||||
|
idx_min = np.argmax(freqs > fband[0]) - 1
|
||||||
|
idx_max = np.argmax(freqs > fband[1]) - 1
|
||||||
|
idx_delta = np.zeros(dtype=bool, shape=freqs.shape)
|
||||||
|
idx_delta[idx_min:idx_max] = True
|
||||||
|
|
||||||
|
bpower = simps(powers[idx_delta, :], freqs[idx_delta], axis=0)
|
||||||
|
bpowers[i, :] = bpower
|
||||||
|
|
||||||
|
return bpowers
|
||||||
|
|
||||||
|
################################################
|
||||||
|
# Complexity features
|
||||||
|
################################################
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Extract the Shannon Entropy
|
||||||
|
# threshold the signal and make it discrete, normalize it and then compute entropy
|
||||||
|
def shannonEntropy(eegData, bin_min, bin_max, binWidth):
|
||||||
|
H = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
for chan in range(H.shape[0]):
|
||||||
|
for epoch in range(H.shape[1]):
|
||||||
|
counts, binCenters = np.histogram(eegData[chan,:,epoch], bins=np.arange(bin_min+1, bin_max, binWidth))
|
||||||
|
nz = counts > 0
|
||||||
|
prob = counts[nz] / np.sum(counts[nz])
|
||||||
|
H[chan, epoch] = -np.dot(prob, np.log2(prob/binWidth))
|
||||||
|
return H
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Extract the tsalis Entropy
|
||||||
|
def tsalisEntropy(eegData, bin_min, bin_max, binWidth, orders = [1]):
|
||||||
|
H = [np.zeros((eegData.shape[0], eegData.shape[2]))]*len(orders)
|
||||||
|
for chan in range(H[0].shape[0]):
|
||||||
|
for epoch in range(H[0].shape[1]):
|
||||||
|
counts, bins = np.histogram(eegData[chan,:,epoch], bins=np.arange(-200+1, 200, 2))
|
||||||
|
dist = dit.Distribution([str(bc).zfill(5) for bc in bins[:-1]],counts/sum(counts))
|
||||||
|
for ii,order in enumerate(orders):
|
||||||
|
H[ii][chan,epoch] = tsallis_entropy(dist,order)
|
||||||
|
return H
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Cepstrum Coefficients (n=2)
|
||||||
|
def mfcc(eegData,fs,order=2):
|
||||||
|
H = np.zeros((eegData.shape[0], eegData.shape[2],order))
|
||||||
|
for chan in range(H.shape[0]):
|
||||||
|
for epoch in range(H.shape[1]):
|
||||||
|
H[chan, epoch, : ] = librosa.feature.mfcc(np.asfortranarray(eegData[chan,:,epoch]), sr=fs)[0:order].T
|
||||||
|
return H
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Lyapunov exponent
|
||||||
|
def lyapunov(eegData):
|
||||||
|
return np.mean(np.log(np.abs(np.gradient(eegData,axis=1))),axis=1)
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Fractal Embedding Dimension
|
||||||
|
# From pyrem: packadge for sleep scoring from EEG data
|
||||||
|
# https://github.com/gilestrolab/pyrem/blob/master/src/pyrem/univariate.py
|
||||||
|
def hFD(a, k_max): #Higuchi FD
|
||||||
|
L = []
|
||||||
|
x = []
|
||||||
|
N = len(a)
|
||||||
|
|
||||||
|
for k in range(1,k_max):
|
||||||
|
Lk = 0
|
||||||
|
for m in range(0,k):
|
||||||
|
#we pregenerate all idxs
|
||||||
|
idxs = np.arange(1,int(np.floor((N-m)/k)),dtype=np.int32)
|
||||||
|
Lmk = np.sum(np.abs(a[m+idxs*k] - a[m+k*(idxs-1)]))
|
||||||
|
Lmk = (Lmk*(N - 1)/(((N - m)/ k)* k)) / k
|
||||||
|
Lk += Lmk
|
||||||
|
|
||||||
|
L.append(np.log(Lk/(m+1)))
|
||||||
|
x.append([np.log(1.0/ k), 1])
|
||||||
|
|
||||||
|
(p, r1, r2, s)=np.linalg.lstsq(x, L)
|
||||||
|
return p[0]
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Hjorth Mobility
|
||||||
|
# Hjorth Complexity
|
||||||
|
# variance = mean(signal^2) iff mean(signal)=0
|
||||||
|
# which it is be because I normalized the signal
|
||||||
|
# Assuming signals have mean 0
|
||||||
|
# Mobility = sqrt( mean(dx^2) / mean(x^2) )
|
||||||
|
def hjorthParameters(xV):
|
||||||
|
dxV = np.diff(xV, axis=1)
|
||||||
|
ddxV = np.diff(dxV, axis=1)
|
||||||
|
|
||||||
|
mx2 = np.mean(np.square(xV), axis=1)
|
||||||
|
mdx2 = np.mean(np.square(dxV), axis=1)
|
||||||
|
mddx2 = np.mean(np.square(ddxV), axis=1)
|
||||||
|
|
||||||
|
mob = mdx2 / mx2
|
||||||
|
complexity = np.sqrt((mddx2 / mdx2) / mob)
|
||||||
|
mobility = np.sqrt(mob)
|
||||||
|
|
||||||
|
# PLEASE NOTE that Mohammad did NOT ACTUALLY use hjorth complexity,
|
||||||
|
# in the matlab code for hjorth complexity subtraction by mob not division was used
|
||||||
|
return mobility, complexity
|
||||||
|
|
||||||
|
##########
|
||||||
|
# false nearest neighbor descriptor
|
||||||
|
def falseNearestNeighbor(eegData, fast=True):
|
||||||
|
# Average Mutual Information
|
||||||
|
# There exist good arguments that if the time delayed mutual
|
||||||
|
# information exhibits a marked minimum at a certain value of tex2html_wrap_inline6553,
|
||||||
|
# then this is a good candidate for a reasonable time delay.
|
||||||
|
npts = 1000 # not sure about this?
|
||||||
|
maxdims = 50
|
||||||
|
max_delay = 2 # max_delay = 200 # TODO: need to use 200, but also need to speed this up
|
||||||
|
distance_thresh = 0.5
|
||||||
|
|
||||||
|
out = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
for chan in range(eegData.shape[0]):
|
||||||
|
for epoch in range(eegData.shape[2]):
|
||||||
|
if fast:
|
||||||
|
out[chan, epoch] = 0
|
||||||
|
else:
|
||||||
|
cur_eegData = eegData[chan, :, epoch]
|
||||||
|
lagidx = 0 # we are looking for the index of the lag that makes the signal maximally uncorrelated to the original
|
||||||
|
minNMI = 1 # normed_mutual_info is from 1 (perfectly correlated) to 0 (not at all correlated)
|
||||||
|
for lag in range(1, max_delay):
|
||||||
|
x = cur_eegData[:-lag]
|
||||||
|
xlag = cur_eegData[lag:]
|
||||||
|
convert float data into histogram bins
|
||||||
|
nbins = int(np.floor(1 + np.log2(len(x)) + 0.5))
|
||||||
|
x_discrete = np.histogram(x, bins=nbins)[0]
|
||||||
|
xlag_discrete = np.histogram(xlag, bins=nbins)[0]
|
||||||
|
cNMI = normed_mutual_info(x_discrete, xlag_discrete)
|
||||||
|
if cNMI < minNMI:
|
||||||
|
minNMI = cNMI
|
||||||
|
lagidx = lag
|
||||||
|
# nearest neighbors part
|
||||||
|
knn = int(max(2, 6*lagidx)) # heuristic (number of nearest neighbors to look up)
|
||||||
|
m = 1 # lagidx + 1
|
||||||
|
|
||||||
|
# y is the embedded version of the signal
|
||||||
|
y = np.zeros((maxdims+1, npts))
|
||||||
|
for d in range(maxdims+1):
|
||||||
|
tmp = cur_eegData[d*m:d*m + npts]
|
||||||
|
y[d, :tmp.shape[0]] = tmp
|
||||||
|
|
||||||
|
nnd = np.ones((npts, maxdims))
|
||||||
|
nnz = np.zeros((npts, maxdims))
|
||||||
|
|
||||||
|
# see where it tends to settle
|
||||||
|
for d in range(1, maxdims):
|
||||||
|
for k in range(0, npts):
|
||||||
|
# get the distances to all points in the window (distance given embedding dimension)
|
||||||
|
dists = []
|
||||||
|
for nextpt in range(1, knn+1):
|
||||||
|
if k+nextpt < npts:
|
||||||
|
dists.append(np.linalg.norm(y[:d, k] - y[:d, k+nextpt]))
|
||||||
|
if len(dists) > 0:
|
||||||
|
minIdx = np.argmin(dists)
|
||||||
|
if dists[minIdx] == 0:
|
||||||
|
dists[minIdx] = 0.0000001 # essentially 0 just silence the error
|
||||||
|
nnd[k, d-1] = dists[minIdx]
|
||||||
|
nnz[k, d-1] = np.abs( y[d+1, k] - y[d+1, minIdx+1+k] )
|
||||||
|
# aggregate results
|
||||||
|
mindim = np.mean(nnz/nnd > distance_thresh, axis=0) < 0.1
|
||||||
|
# get the index of the first occurence of the value true
|
||||||
|
# (a 1 in the binary representation of true and false)
|
||||||
|
out[chan, epoch] = np.argmax(mindim)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
##########
|
||||||
|
# ARMA coefficients
|
||||||
|
def arma(eegData,order=2):
|
||||||
|
H = np.zeros((eegData.shape[0], eegData.shape[2],order))
|
||||||
|
for chan in range(H.shape[0]):
|
||||||
|
for epoch in range(H.shape[1]):
|
||||||
|
arma_mod = sm.tsa.ARMA(eegData[chan,:,epoch], order=(order,order))
|
||||||
|
arma_res = arma_mod.fit(trend='nc', disp=-1)
|
||||||
|
H[chan, epoch, : ] = arma_res.arparams
|
||||||
|
return H
|
||||||
|
|
||||||
|
################################################
|
||||||
|
# Continuity features
|
||||||
|
################################################
|
||||||
|
|
||||||
|
##########
|
||||||
|
# median frequency
|
||||||
|
def medianFreq(eegData,fs):
|
||||||
|
H = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
for chan in range(H.shape[0]):
|
||||||
|
freqs, powers = signal.periodogram(eegData[chan, :, :], fs, axis=0)
|
||||||
|
H[chan,:] = freqs[np.argsort(powers,axis=0)[len(powers)//2]]
|
||||||
|
return H
|
||||||
|
|
||||||
|
##########
|
||||||
|
# calculate band power
|
||||||
|
def bandPower(eegData, lowcut, highcut, fs):
|
||||||
|
eegData_band = filt_data(eegData, lowcut, highcut, fs, order=7)
|
||||||
|
freqs, powers = signal.periodogram(eegData_band, fs, axis=1)
|
||||||
|
bandPwr = np.mean(powers,axis=1)
|
||||||
|
return bandPwr
|
||||||
|
|
||||||
|
##########
|
||||||
|
# numberOfSpikes
|
||||||
|
def spikeNum(eegData,minNumSamples=7,stdAway = 3):
|
||||||
|
H = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
for chan in range(H.shape[0]):
|
||||||
|
for epoch in range(H.shape[1]):
|
||||||
|
mean = np.mean(eegData[chan, :, epoch])
|
||||||
|
std = np.std(eegData[chan,:,epoch],axis=1)
|
||||||
|
H[chan,epoch] = len(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0])
|
||||||
|
return H
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Standard Deviation
|
||||||
|
def eegStd(eegData):
|
||||||
|
std_res = np.std(eegData,axis=1)
|
||||||
|
return std_res
|
||||||
|
|
||||||
|
##########
|
||||||
|
# α/δ Ratio
|
||||||
|
def eegRatio(eegData,fs):
|
||||||
|
# alpha (8–12 Hz)
|
||||||
|
eegData_alpha = filt_data(eegData, 8, 12, fs)
|
||||||
|
# delta (0.5–4 Hz)
|
||||||
|
eegData_delta = filt_data(eegData, 0.5, 4, fs)
|
||||||
|
# calculate the power
|
||||||
|
powers_alpha = bandPower(eegData, 8, 12, fs)
|
||||||
|
powers_delta = bandPower(eegData, 0.5, 4, fs)
|
||||||
|
ratio_res = np.sum(powers_alpha,axis=0) / np.sum(powers_delta,axis=0)
|
||||||
|
return np.expand_dims(x, axis=0)
|
||||||
|
|
||||||
|
###########
|
||||||
|
# Regularity (burst-suppression)
|
||||||
|
# Regularity of eeg
|
||||||
|
# filter with a window of 0.5 seconds to create a nonnegative smooth signal.
|
||||||
|
# In this technique, we first squared the signal and applied a moving-average
|
||||||
|
# The window length of the moving average was set at 0.5 seconds.
|
||||||
|
def eegRegularity(eegData, Fs=100):
|
||||||
|
in_x = np.square(eegData) # square signal
|
||||||
|
num_wts = Fs//2 # find the filter length in samples - we want 0.5 seconds.
|
||||||
|
q = signal.lfilter(np.ones(num_wts) / num_wts, 1, in_x, axis=1)
|
||||||
|
q = -np.sort(-q, axis=1) # descending sort on smooth signal
|
||||||
|
N = q.shape[1]
|
||||||
|
u2 = np.square(np.arange(1, N+1))
|
||||||
|
# COMPUTE THE Regularity
|
||||||
|
# dot each 5min epoch with the quadratic data points and then normalize by the size of the dotted things
|
||||||
|
reg = np.sqrt( np.einsum('ijk,j->ik', q, u2) / (np.sum(q, axis=1)*(N**2)/3) )
|
||||||
|
return reg
|
||||||
|
|
||||||
|
###########
|
||||||
|
# Voltage < (5μ, 10μ, 20μ)
|
||||||
|
def eegVoltage(eegData,voltage=20):
|
||||||
|
eegFilt = eegData.copy()
|
||||||
|
eegFilt[abs(eegFilt) > voltage] = np.nan
|
||||||
|
volt_res = np.nanmean(eegFilt,axis=1)
|
||||||
|
return volt_res
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Diffuse Slowing
|
||||||
|
# look for diffuse slowing (bandpower max from frequency domain integral)
|
||||||
|
# repeated integration of a huge tensor is really expensive
|
||||||
|
def diffuseSlowing(eegData, Fs=100, fast=True):
|
||||||
|
maxBP = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
idx = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
if fast:
|
||||||
|
return idx
|
||||||
|
for j in range(1, Fs//2):
|
||||||
|
print("BP", j)
|
||||||
|
cbp = bandpower(eegData, Fs, [j-1, j])
|
||||||
|
biggerCIdx = cbp > maxBP
|
||||||
|
idx[biggerCIdx] = j
|
||||||
|
maxBP[biggerCIdx] = cbp[biggerCIdx]
|
||||||
|
return (idx < 8)
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Spikes
|
||||||
|
def spikeNum(eegData,minNumSamples=7,stdAway = 3):
|
||||||
|
H = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
for chan in range(H.shape[0]):
|
||||||
|
for epoch in range(H.shape[1]):
|
||||||
|
mean = np.mean(eegData[chan, :, epoch])
|
||||||
|
std = np.std(eegData[chan,:,epoch])
|
||||||
|
H[chan,epoch] = len(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0])
|
||||||
|
return H
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Delta Burst after spike
|
||||||
|
def burstAfterSpike(eegData,eegData_subband,minNumSamples=7,stdAway = 3):
|
||||||
|
H = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
for chan in range(H.shape[0]):
|
||||||
|
for epoch in range(H.shape[1]):
|
||||||
|
preBurst = 0
|
||||||
|
postBurst = 0
|
||||||
|
mean = np.mean(eegData[chan, :, epoch])
|
||||||
|
std = np.std(eegData[chan,:,epoch])
|
||||||
|
idxList = signal.find_peaks(abs(eegData[chan,:,epoch]-mean), stdAway*std,epoch,width=minNumSamples)[0]
|
||||||
|
for idx in idxList:
|
||||||
|
preBurst += np.mean(eegData_subband[chan,idx-7:idx-1,epoch])
|
||||||
|
postBurst += np.mean(eegData_subband[chan,idx+1:idx+7,epoch])
|
||||||
|
H[chan,epoch] = postBurst - preBurst
|
||||||
|
return H
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Sharp spike
|
||||||
|
def shortSpikeNum(eegData,minNumSamples=7,stdAway = 3):
|
||||||
|
H = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
for chan in range(H.shape[0]):
|
||||||
|
for epoch in range(H.shape[1]):
|
||||||
|
mean = np.mean(eegData[chan, :, epoch])
|
||||||
|
std = np.std(eegData[chan,:,epoch])
|
||||||
|
longSpikes = set(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0])
|
||||||
|
shortSpikes = set(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=1)[0])
|
||||||
|
H[chan,epoch] = len(shortSpikes.difference(longSpikes))
|
||||||
|
return H
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Number of Bursts
|
||||||
|
def numBursts(eegData,fs):
|
||||||
|
bursts = []
|
||||||
|
supressions = []
|
||||||
|
for epoch in range(eegData.shape[2]):
|
||||||
|
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49)
|
||||||
|
bursts.append(epochBurst)
|
||||||
|
supressions.append(epochSupressions)
|
||||||
|
# Number of Bursts
|
||||||
|
numBursts_res = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
for chan in range(numBursts_res.shape[0]):
|
||||||
|
for epoch in range(numBursts_res.shape[1]):
|
||||||
|
numBursts_res[chan,epoch] = len(bursts[epoch][chan])
|
||||||
|
return numBursts_res
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Burst length μ and σ
|
||||||
|
def burstLengthStats(eegData,fs):
|
||||||
|
bursts = []
|
||||||
|
supressions = []
|
||||||
|
for epoch in range(eegData.shape[2]):
|
||||||
|
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49)
|
||||||
|
bursts.append(epochBurst)
|
||||||
|
supressions.append(epochSupressions)
|
||||||
|
# Number of Bursts
|
||||||
|
burstMean_res = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
burstStd_res = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
for chan in range(burstMean_res.shape[0]):
|
||||||
|
for epoch in range(burstMean_res.shape[1]):
|
||||||
|
burstMean_res[chan,epoch] = np.mean([burst[1]-burst[0] for burst in bursts[epoch][chan]])
|
||||||
|
burstStd_res[chan,epoch] = np.std([burst[1]-burst[0] for burst in bursts[epoch][chan]])
|
||||||
|
burstMean_res = np.nan_to_num(burstMean_res)
|
||||||
|
burstStd_res = np.nan_to_num(burstStd_res)
|
||||||
|
return burstMean_res,burstStd_res
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Burst band powers (δ, α, θ, β, γ)
|
||||||
|
def burstBandPowers(eegData, lowcut, highcut, fs, order=7):
|
||||||
|
band_burst_powers = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
bursts = []
|
||||||
|
supressions = []
|
||||||
|
for epoch in range(eegData.shape[2]):
|
||||||
|
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49)
|
||||||
|
bursts.append(epochBurst)
|
||||||
|
supressions.append(epochSupressions)
|
||||||
|
eegData_band = filt_data(eegData, lowcut, highcut, fs, order=7)
|
||||||
|
for epoch,epochBursts in enumerate(bursts):
|
||||||
|
for chan,chanBursts in enumerate(epochBursts):
|
||||||
|
epochPowers = []
|
||||||
|
for burst in chanBursts:
|
||||||
|
if burst[1] == eegData.shape[1]:
|
||||||
|
burstData = eegData_band[:,burst[0]:,epoch]
|
||||||
|
else:
|
||||||
|
burstData = eegData_band[:,burst[0]:burst[1],epoch]
|
||||||
|
freqs, powers = signal.periodogram(burstData, fs, axis=1)
|
||||||
|
epochPowers.append(np.mean(powers,axis=1))
|
||||||
|
band_burst_powers[chan,epoch] = np.mean(epochPowers)
|
||||||
|
return band_burst_powers
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Number of Suppressions
|
||||||
|
def numSuppressions(eegData,fs,suppression_threshold=10):
|
||||||
|
bursts = []
|
||||||
|
supressions = []
|
||||||
|
for epoch in range(eegData.shape[2]):
|
||||||
|
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=suppression_threshold)#,low=30,high=49)
|
||||||
|
bursts.append(epochBurst)
|
||||||
|
supressions.append(epochSupressions)
|
||||||
|
numSupprs_res = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
for chan in range(numSupprs_res.shape[0]):
|
||||||
|
for epoch in range(numSupprs_res.shape[1]):
|
||||||
|
numSupprs_res[chan,epoch] = len(supressions[epoch][chan])
|
||||||
|
return numSupprs_res
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Suppression length μ and σ
|
||||||
|
def suppressionLengthStats(eegData,fs,suppression_threshold=10):
|
||||||
|
bursts = []
|
||||||
|
supressions = []
|
||||||
|
for epoch in range(eegData.shape[2]):
|
||||||
|
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=suppression_threshold)#,low=30,high=49)
|
||||||
|
bursts.append(epochBurst)
|
||||||
|
supressions.append(epochSupressions)
|
||||||
|
supressionMean_res = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
supressionStd_res = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||||
|
for chan in range(supressionMean_res.shape[0]):
|
||||||
|
for epoch in range(supressionMean_res.shape[1]):
|
||||||
|
supressionMean_res[chan,epoch] = np.mean([suppr[1]-suppr[0] for suppr in supressions[epoch][chan]])
|
||||||
|
supressionStd_res[chan,epoch] = np.std([suppr[1]-suppr[0] for suppr in supressions[epoch][chan]])
|
||||||
|
supressionMean_res = np.nan_to_num(supressionMean_res)
|
||||||
|
supressionStd_res = np.nan_to_num(supressionStd_res)
|
||||||
|
return supressionMean_res, supressionStd_res
|
||||||
|
|
||||||
|
################################################
|
||||||
|
# Connectivity features
|
||||||
|
################################################
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Coherence - δ
|
||||||
|
def coherence(eegData,fs):
|
||||||
|
coh_res = []
|
||||||
|
for ii, jj in itertools.combinations(range(eegData.shape[0]), 2):
|
||||||
|
coh_res.append(CoherenceDelta(eegData, ii, jj, fs=fs))
|
||||||
|
coh_res = np.array(coh_res)
|
||||||
|
return coh_res
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Mutual information
|
||||||
|
def calculate2Chan_MI(eegData,ii,jj,bin_min=-200, bin_max=200, binWidth=2):
|
||||||
|
H = np.zeros(eegData.shape[2])
|
||||||
|
bins = np.arange(bin_min+1, bin_max, binWidth)
|
||||||
|
for epoch in range(eegData.shape[2]):
|
||||||
|
c_xy = np.histogram2d(eegData[ii,:,epoch],eegData[jj,:,epoch],bins)[0]
|
||||||
|
H[epoch] = mutual_info_score(None, None, contingency=c_xy)
|
||||||
|
return H
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Granger causality
|
||||||
|
def calcGrangerCausality(eegData,ii,jj):
|
||||||
|
H = np.zeros(eegData.shape[2])
|
||||||
|
for epoch in range(eegData.shape[2]):
|
||||||
|
X = np.vstack([eegData[ii,:,epoch],eegData[jj,:,epoch]]).T
|
||||||
|
H[epoch] = tsa.stattools.grangercausalitytests(X, 1, addconst=True, verbose=False)[1][0]['ssr_ftest'][0]
|
||||||
|
return H
|
||||||
|
|
||||||
|
##########
|
||||||
|
# phase Lag Index
|
||||||
|
def phaseLagIndex(eegData, i, j):
|
||||||
|
hxi = ss.hilbert(eegData[i,:,:])
|
||||||
|
hxj = ss.hilbert(eegData[j,:,:])
|
||||||
|
# calculating the INSTANTANEOUS PHASE
|
||||||
|
inst_phasei = np.arctan(np.angle(hxi))
|
||||||
|
inst_phasej = np.arctan(np.angle(hxj))
|
||||||
|
|
||||||
|
out = np.abs(np.mean(np.sign(inst_phasej - inst_phasei), axis=0))
|
||||||
|
return out
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Cross-correlation Magnitude
|
||||||
|
def crossCorrMag(eegData,ii,jj):
|
||||||
|
crossCorr_res = []
|
||||||
|
for ii, jj in itertools.combinations(range(eegData.shape[0]), 2):
|
||||||
|
crossCorr_res.append(crossCorrelation(eegData, ii, jj))
|
||||||
|
crossCorr_res = np.array(crossCorr_res)
|
||||||
|
return crossCorr_res
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Cross-correlation Lag
|
||||||
|
def corrCorrLag(eegData,ii,jj,fs=100):
|
||||||
|
crossCorrLag_res = []
|
||||||
|
for ii, jj in itertools.combinations(range(eegData.shape[0]), 2):
|
||||||
|
crossCorrLag_res.append(corrCorrLag(eegData, ii, jj, fs))
|
||||||
|
crossCorrLag_res = np.array(crossCorrLag_res)
|
||||||
|
return crossCorrLag_res
|
||||||
|
|
||||||
@@ -0,0 +1,459 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
import ImportUtils
|
||||||
|
import math
|
||||||
|
import EEGExtract as eeg
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
from scipy import io,signal
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn import preprocessing
|
||||||
|
import pickle
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.impute import SimpleImputer
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
import copy
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def select_channels(data,channels):
|
||||||
|
|
||||||
|
# parameters:-
|
||||||
|
# data - channelwise EEG preprocessed data
|
||||||
|
# channels - list of required channels
|
||||||
|
|
||||||
|
# returns:-
|
||||||
|
# ans - the selected channels from the entire dataset
|
||||||
|
|
||||||
|
ans = np.empty((data.shape[0],len(channels),data.shape[2]))
|
||||||
|
for sub in range(data.shape[0]):
|
||||||
|
ans[sub,:,:] = np.array([data[sub,x,:] for x in channels])
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def epoch_data(X, Y, Z, window, stride, sfreq):
|
||||||
|
|
||||||
|
# Function to epoch the data
|
||||||
|
|
||||||
|
# parameters:-
|
||||||
|
# X - The EEG data input passed as trial*channel*timepoints
|
||||||
|
# Y - VALD (depending on the dataset) as given by the user
|
||||||
|
# Z - Participant number and session number
|
||||||
|
# window - length of required window in seconds
|
||||||
|
# stride - stride of the required sliding window in seconds
|
||||||
|
# sfreq - sampling frequency of the obtained EEG data
|
||||||
|
|
||||||
|
# retruns:-
|
||||||
|
# X_new - Epoched X
|
||||||
|
# Y_new - Epoched Y (All the segments for a given trial shall have the same value, i.e the one given by the subject)
|
||||||
|
# Z_new - Epoched Z (All the segments for a given trial shall have the same value, i.e the one of the subject)
|
||||||
|
|
||||||
|
trials,channels,timepoints = X.shape
|
||||||
|
segment = int(window*sfreq)
|
||||||
|
step = int(stride*sfreq)
|
||||||
|
epochPerTrial = int((timepoints-segment)/step + 1)
|
||||||
|
|
||||||
|
X_new = np.empty((trials*epochPerTrial,channels,segment))
|
||||||
|
Y_new = np.empty((trials*epochPerTrial,Y.shape[1]))
|
||||||
|
Z_new = np.empty((trials*epochPerTrial,Z.shape[1]))
|
||||||
|
|
||||||
|
count=0
|
||||||
|
for trial in range(trials):
|
||||||
|
for epoch in range(epochPerTrial):
|
||||||
|
X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
|
||||||
|
Y_new[count,:] = Y[trial,:]
|
||||||
|
Z_new[count,:] = Z[trial,:]
|
||||||
|
count = count+1
|
||||||
|
|
||||||
|
return X_new, Y_new, Z_new
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def save_features(dataset, ans, Y_epoch, sfreq, window, stride):
|
||||||
|
|
||||||
|
# A function to generate the features and save them
|
||||||
|
|
||||||
|
# parameters:-
|
||||||
|
# dataset - name of the dataset
|
||||||
|
# ans - epoched segment of X
|
||||||
|
# Y_epoch - epoched segments of the valence and arousal scores taken from the subject
|
||||||
|
# window - window length in seconds
|
||||||
|
# stride - stride length in seconds
|
||||||
|
# sfreq - sampli5ng frequency of the EEG signal
|
||||||
|
|
||||||
|
# returns:-
|
||||||
|
# void
|
||||||
|
|
||||||
|
fs = sfreq
|
||||||
|
|
||||||
|
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||||
|
|
||||||
|
feature_matrix = eeg.shannonEntropy(ans, bin_min=-200, bin_max=200, binWidth=2)
|
||||||
|
np.savez((featurepath+"shannonEntropy_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.eegStd(ans)
|
||||||
|
stdshape = feature_matrix.shape
|
||||||
|
# The channels
|
||||||
|
emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||||
|
left_channels = ['AF3', 'F7','F3', 'FC5', 'T7', 'P7', 'O1']
|
||||||
|
right_channels = ['AF4','F8','F4','FC6','T8','P8','O2']
|
||||||
|
|
||||||
|
dasm_gamma = np.empty((0,stdshape[1]))
|
||||||
|
rasm_gamma = np.empty((0,stdshape[1]))
|
||||||
|
for lc,rc in zip(left_channels, right_channels):
|
||||||
|
lci = emotiv_channels.index(lc)
|
||||||
|
rci = emotiv_channels.index(rc)
|
||||||
|
|
||||||
|
#left differential entropy
|
||||||
|
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0),30,45,fs)))))
|
||||||
|
#right differential entropy
|
||||||
|
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0),30,45,fs)))))
|
||||||
|
|
||||||
|
dasm_gamma = np.append(dasm_gamma, np.subtract(dl,dr), axis=0)
|
||||||
|
rasm_gamma = np.append(rasm_gamma, np.divide(dl,dr), axis=0)
|
||||||
|
|
||||||
|
np.savez((featurepath+"dasm_gamma_{}_{}.npz").format(window,stride),features = dasm_gamma , Y = Y_epoch)
|
||||||
|
np.savez((featurepath+"rasm_gamma_{}_{}.npz").format(window,stride),features = rasm_gamma , Y = Y_epoch)
|
||||||
|
del dasm_gamma, rasm_gamma
|
||||||
|
|
||||||
|
return
|
||||||
|
'''
|
||||||
|
Subband Information Quantity
|
||||||
|
'''
|
||||||
|
|
||||||
|
# delta (0.5–4 Hz)
|
||||||
|
eegData_delta = eeg.filt_data(ans, 0.5, 4, fs)
|
||||||
|
feature_matrix = eeg.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2)
|
||||||
|
np.savez((featurepath+"ShannonRes_sub_bands_delta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
eegData_theta = eeg.filt_data(ans, 4, 8, fs)
|
||||||
|
feature_matrix = eeg.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2)
|
||||||
|
np.savez((featurepath+"ShannonRes_sub_bands_theta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
eegData_alpha = eeg.filt_data(ans, 8, 12, fs)
|
||||||
|
feature_matrix = eeg.shannonEntropy(eegData_alpha, bin_min=-200, bin_max=200, binWidth=2)
|
||||||
|
np.savez((featurepath+"ShannonRes_sub_bands_alpha_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
eegData_beta = eeg.filt_data(ans, 12, 30, fs)
|
||||||
|
feature_matrix = eeg.shannonEntropy(eegData_beta, bin_min=-200, bin_max=200, binWidth=2)
|
||||||
|
np.savez((featurepath+"ShannonRes_sub_bands_beta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
eegData_gamma = eeg.filt_data(ans, 30,45, fs)
|
||||||
|
feature_matrix = eeg.shannonEntropy(eegData_gamma, bin_min=-200, bin_max=200, binWidth=2)
|
||||||
|
np.savez((featurepath+"ShannonRes_sub_bands_gamma_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
HjorthMob, HjorthComp = eeg.hjorthParameters(ans)
|
||||||
|
feature_matrix = HjorthComp
|
||||||
|
np.savez((featurepath+"Hjorth_complexity_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = HjorthMob
|
||||||
|
np.savez((featurepath+"Hjorth_mobilty_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.falseNearestNeighbor(ans)
|
||||||
|
np.savez((featurepath+"falseNearestNeighbor_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.medianFreq(ans,fs)
|
||||||
|
np.savez((featurepath+"medianFreq_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.bandPower(ans, 0.5, 4, fs)
|
||||||
|
np.savez((featurepath+"bandPwr_delta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.bandPower(ans, 4, 8, fs)
|
||||||
|
np.savez((featurepath+"bandPwr_theta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.bandPower(ans, 8, 12, fs)
|
||||||
|
np.savez((featurepath+"bandPwr_alpha_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.bandPower(ans, 12, 30, fs)
|
||||||
|
np.savez((featurepath+"bandPwr_beta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.bandPower(ans, 30, 45, fs)
|
||||||
|
np.savez((featurepath+"bandPwr_gamma_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.eegStd(ans)
|
||||||
|
stdshape = feature_matrix.shape
|
||||||
|
np.savez((featurepath+"stdDev_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.diffuseSlowing(ans)
|
||||||
|
np.savez((featurepath+"diffuseSlowing_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
minNumSamples = int(70*fs/1000)
|
||||||
|
feature_matrix = eeg.spikeNum(ans,minNumSamples)
|
||||||
|
np.savez((featurepath+"spikeNum_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
|
||||||
|
feature_matrix = eeg.burstAfterSpike(ans,eegData_delta,minNumSamples=7,stdAway = 3)
|
||||||
|
np.savez((featurepath+"deltaBurstAfterSpike_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.shortSpikeNum(ans,minNumSamples)
|
||||||
|
np.savez((featurepath+"shortSpikeNum_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.numBursts(ans,fs)
|
||||||
|
np.savez((featurepath+"numBursts_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(ans,fs)
|
||||||
|
feature_matrix = burstLenMean_res
|
||||||
|
np.savez((featurepath+"burstLen_u_and_sigma_mean_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = burstLenStd_res
|
||||||
|
np.savez((featurepath+"burstLen_u_and_sigma_std_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
feature_matrix = eeg.numSuppressions(ans,fs)
|
||||||
|
np.savez((featurepath+"numSuppressions_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
|
||||||
|
suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(ans,fs)
|
||||||
|
feature_matrix = suppLenMean_res
|
||||||
|
np.savez((featurepath+"suppressionLen_u_and_sigma_mean_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
del suppLenMean_res
|
||||||
|
|
||||||
|
feature_matrix = suppLenStd_res
|
||||||
|
np.savez((featurepath+"suppressionLen_u_and_sigma_std_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||||
|
del suppLenStd_res
|
||||||
|
|
||||||
|
# DASM and RASM Features
|
||||||
|
# DASM = h(X lefti) − h(Xrighti), and (2)
|
||||||
|
# RASM = h(Xlefti)/h(Xrighti),
|
||||||
|
|
||||||
|
emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||||
|
left_channels = ['AF3', 'F7','F3', 'FC5', 'T7', 'P7', 'O1']
|
||||||
|
right_channels = ['AF4','F8','F4','FC6','T8','P8','O2']
|
||||||
|
|
||||||
|
#[chans x ms x epochs]
|
||||||
|
dasm_delta = np.empty((0,stdshape[1]))
|
||||||
|
rasm_delta = np.empty((0,stdshape[1]))
|
||||||
|
for lc,rc in zip(left_channels, right_channels):
|
||||||
|
lci = emotiv_channels.index(lc)
|
||||||
|
rci = emotiv_channels.index(rc)
|
||||||
|
|
||||||
|
#left differential entropy
|
||||||
|
inputarr = np.expand_dims(ans[lci,:,:], axis=0)
|
||||||
|
print("inputarr.shape=", inputarr.shape)
|
||||||
|
temp = eeg.filt_data(inputarr, 0.5, 4, fs)
|
||||||
|
tempstd = eeg.eegStd(temp)
|
||||||
|
|
||||||
|
|
||||||
|
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 0.5, 4, fs)))))
|
||||||
|
#right differential entropy
|
||||||
|
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 0.5, 4, fs)))))
|
||||||
|
|
||||||
|
print("temp.shape=", temp.shape,"tempstd.shape=", tempstd.shape,"dl.shape= ", dl.shape, "stdshape=", stdshape)
|
||||||
|
dasm_delta = np.append(dasm_delta, np.subtract(dl,dr), axis=0)
|
||||||
|
rasm_delta = np.append(rasm_delta, np.divide(dl,dr), axis=0)
|
||||||
|
|
||||||
|
np.savez((featurepath+"dasm_delta_{}_{}.npz").format(window,stride),features = dasm_delta , Y = Y_epoch)
|
||||||
|
np.savez((featurepath+"rasm_delta_{}_{}.npz").format(window,stride),features = rasm_delta , Y = Y_epoch)
|
||||||
|
del dasm_delta, rasm_delta
|
||||||
|
|
||||||
|
dasm_theta = np.empty((0,stdshape[1]))
|
||||||
|
rasm_theta = np.empty((0,stdshape[1]))
|
||||||
|
for lc,rc in zip(left_channels, right_channels):
|
||||||
|
lci = emotiv_channels.index(lc)
|
||||||
|
rci = emotiv_channels.index(rc)
|
||||||
|
|
||||||
|
#left differential entropy
|
||||||
|
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 4, 8, fs)))))
|
||||||
|
#right differential entropy
|
||||||
|
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 4, 8, fs)))))
|
||||||
|
|
||||||
|
dasm_theta = np.append(dasm_theta, np.subtract(dl,dr), axis=0)
|
||||||
|
rasm_theta = np.append(rasm_theta, np.divide(dl,dr), axis=0)
|
||||||
|
|
||||||
|
np.savez((featurepath+"dasm_theta_{}_{}.npz").format(window,stride),features = dasm_theta , Y = Y_epoch)
|
||||||
|
np.savez((featurepath+"rasm_theta_{}_{}.npz").format(window,stride),features = rasm_theta , Y = Y_epoch)
|
||||||
|
del dasm_theta, rasm_theta
|
||||||
|
|
||||||
|
dasm_alpha = np.empty((0,stdshape[1]))
|
||||||
|
rasm_alpha = np.empty((0,stdshape[1]))
|
||||||
|
for lc,rc in zip(left_channels, right_channels):
|
||||||
|
lci = emotiv_channels.index(lc)
|
||||||
|
rci = emotiv_channels.index(rc)
|
||||||
|
|
||||||
|
#left differential entropy
|
||||||
|
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 8, 12, fs)))))
|
||||||
|
#right differential entropy
|
||||||
|
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 8, 12, fs)))))
|
||||||
|
|
||||||
|
dasm_alpha = np.append(dasm_alpha, np.subtract(dl,dr), axis=0)
|
||||||
|
rasm_alpha = np.append(rasm_alpha, np.divide(dl,dr), axis=0)
|
||||||
|
|
||||||
|
np.savez((featurepath+"dasm_alpha_{}_{}.npz").format(window,stride),features = dasm_alpha , Y = Y_epoch)
|
||||||
|
np.savez((featurepath+"rasm_alpha_{}_{}.npz").format(window,stride),features = rasm_alpha , Y = Y_epoch)
|
||||||
|
del dasm_alpha, rasm_alpha
|
||||||
|
|
||||||
|
|
||||||
|
dasm_beta = np.empty((0,stdshape[1]))
|
||||||
|
rasm_beta = np.empty((0,stdshape[1]))
|
||||||
|
for lc,rc in zip(left_channels, right_channels):
|
||||||
|
lci = emotiv_channels.index(lc)
|
||||||
|
rci = emotiv_channels.index(rc)
|
||||||
|
|
||||||
|
#left differential entropy
|
||||||
|
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 12, 30,fs)))))
|
||||||
|
#right differential entropy
|
||||||
|
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 12, 30,fs)))))
|
||||||
|
|
||||||
|
dasm_beta = np.append(dasm_beta, np.subtract(dl,dr), axis=0)
|
||||||
|
rasm_beta = np.append(rasm_beta, np.divide(dl,dr), axis=0)
|
||||||
|
|
||||||
|
np.savez((featurepath+"dasm_beta_{}_{}.npz").format(window,stride),features = dasm_beta , Y = Y_epoch)
|
||||||
|
np.savez((featurepath+"rasm_beta_{}_{}.npz").format(window,stride),features = rasm_beta , Y = Y_epoch)
|
||||||
|
del dasm_beta, rasm_beta
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
dasm_gamma = np.empty((0,stdshape[1]))
|
||||||
|
rasm_gamma = np.empty((0,stdshape[1]))
|
||||||
|
for lc,rc in zip(left_channels, right_channels):
|
||||||
|
lci = emotiv_channels.index(lc)
|
||||||
|
rci = emotiv_channels.index(rc)
|
||||||
|
|
||||||
|
#left differential entropy
|
||||||
|
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0),30,45,fs)))))
|
||||||
|
#right differential entropy
|
||||||
|
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0),30,45,fs)))))
|
||||||
|
|
||||||
|
dasm_gamma = np.append(dasm_gamma, np.subtract(dl,dr), axis=0)
|
||||||
|
rasm_gamma = np.append(rasm_gamma, np.divide(dl,dr), axis=0)
|
||||||
|
|
||||||
|
np.savez((featurepath+"dasm_gamma_{}_{}.npz").format(window,stride),features = dasm_gamma , Y = Y_epoch)
|
||||||
|
np.savez((featurepath+"rasm_gamma_{}_{}.npz").format(window,stride),features = rasm_gamma , Y = Y_epoch)
|
||||||
|
del dasm_gamma, rasm_gamma
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def getEpochedFeatures(dataset, window, stride, sfreq, label):
|
||||||
|
|
||||||
|
# Function to reshape the arrays before passing on to the save_features function
|
||||||
|
|
||||||
|
# parameters:-
|
||||||
|
# dataset - name of the dataset
|
||||||
|
# window - length of window in seconds
|
||||||
|
# stride - length of stride in seconds
|
||||||
|
# sfreq - sampling frequency of the EEG signal
|
||||||
|
# label - valence/arousal (0/1)
|
||||||
|
|
||||||
|
# returns:-
|
||||||
|
# void
|
||||||
|
'''
|
||||||
|
Returns Accuracy vs Segment size plot for
|
||||||
|
window - length of window
|
||||||
|
stride - step
|
||||||
|
sfreq - sampling freq
|
||||||
|
label - 0-valence, 1-arousal, 2-dominance, 3-liking
|
||||||
|
'''
|
||||||
|
fs = sfreq
|
||||||
|
X = None
|
||||||
|
Y = None
|
||||||
|
Z = None
|
||||||
|
pwd = os.getcwd()
|
||||||
|
with np.load((pwd + '/data_extracted/{}.npz').format(dataset), allow_pickle=True) as data:
|
||||||
|
X = data['X']
|
||||||
|
Y = data['Y']
|
||||||
|
Z = data['Z']
|
||||||
|
|
||||||
|
print("Shape After Loading")
|
||||||
|
print("X.shape=", X.shape," Y.shape=",Y.shape," Z.shape=", Z.shape)
|
||||||
|
# return
|
||||||
|
#########!MODIFY FOR DREAMER AND DEAP DATASET########################################
|
||||||
|
#****
|
||||||
|
'''
|
||||||
|
Reshape Data
|
||||||
|
'''
|
||||||
|
if(dataset != "DEAP"):
|
||||||
|
temp_arr = np.empty((X.shape[0],X.shape[2],X.shape[1]))
|
||||||
|
for i in range(temp_arr.shape[0]):
|
||||||
|
temp_arr[i,:,:] = X[i,:,:].transpose()
|
||||||
|
X = copy.deepcopy(temp_arr)
|
||||||
|
del temp_arr
|
||||||
|
|
||||||
|
print("Shape after reshaping")
|
||||||
|
print("X.shape=", X.shape," Y.shape=",Y.shape," Z.shape=", Z.shape)
|
||||||
|
'''
|
||||||
|
Select Channels(if needed)
|
||||||
|
'''
|
||||||
|
|
||||||
|
print("Data Loaded...\n")
|
||||||
|
ch_names = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
|
||||||
|
emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||||
|
index_arr = [ch_names.index(x) for x in emotiv_channels]
|
||||||
|
|
||||||
|
X_new = None
|
||||||
|
if(dataset == "DEAP"):
|
||||||
|
X_new = select_channels(X,index_arr)
|
||||||
|
else:
|
||||||
|
X_new = copy.deepcopy(X)
|
||||||
|
|
||||||
|
print("X_new.shape = ", X_new.shape)
|
||||||
|
|
||||||
|
del X
|
||||||
|
print("Channel selection done ...\n")
|
||||||
|
'''
|
||||||
|
# X = (32*40,40,8064)
|
||||||
|
# Y = (32*40,4)
|
||||||
|
# Z = (32*40,2)
|
||||||
|
|
||||||
|
# X : (nbSegments, nbChannel, nbTimepoints) : Data
|
||||||
|
# Y : (nbSegments, nbEmotions) : Valence and arousal data
|
||||||
|
# Z : (nbSegments, 2) : Participant number, and session number
|
||||||
|
'''
|
||||||
|
|
||||||
|
'''
|
||||||
|
DREAMER Dataset
|
||||||
|
# X = (23*18,7808+54032,14)
|
||||||
|
# Y = (23*18,2)
|
||||||
|
# Z = (23*18,2)
|
||||||
|
'''
|
||||||
|
|
||||||
|
(X_epoch, Y_epoch, Z_epoch) = epoch_data(X_new, Y, Z,window,stride,sfreq)
|
||||||
|
del X_new
|
||||||
|
del Y
|
||||||
|
del Z
|
||||||
|
|
||||||
|
print("Epoching done ...\n")
|
||||||
|
print(X_epoch.shape, Y_epoch.shape, Z_epoch.shape) #debug
|
||||||
|
|
||||||
|
# 1280*63,40,128
|
||||||
|
# trial, channel, segment
|
||||||
|
trials, channels, segment = X_epoch.shape
|
||||||
|
ans = np.empty((channels, segment, trials)) #[chans x ms x epochs]
|
||||||
|
for i in range(trials):
|
||||||
|
ans[:,:,i] = X_epoch[i,:,]
|
||||||
|
del X_epoch
|
||||||
|
|
||||||
|
print("ans.shape = ", ans.shape)
|
||||||
|
print("Rotation of np.array done ...\n")
|
||||||
|
pwd = os.getcwd()
|
||||||
|
filepath = pwd + '/' + dataset + "/data_extracted/epochedData/data" + str(window) + str(stride) + ".npz"
|
||||||
|
np.savez(filepath,ans,Y_epoch, Z_epoch)
|
||||||
|
|
||||||
|
# featuresDict = getFeaturesDict(ans,sfreq)
|
||||||
|
save_features(dataset, ans, Y_epoch, sfreq, window, stride)
|
||||||
|
|
||||||
|
# with open(pwd + '/' + dataset + '/data_extracted/featureDicts/'+str(window)+str(stride)+ '.pkl', 'wb') as f:
|
||||||
|
# pickle.dump(featuresDict, f, pickle.HIGHEST_PROTOCOL)
|
||||||
|
|
||||||
|
print("Feature Extraction done ...\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
pass
|
||||||
|
|
||||||
@@ -0,0 +1,172 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# Script to import all the required libraries.<br>
|
||||||
|
# It also defines a function to make a dictionary and load the features.
|
||||||
|
#
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
from sklearn.feature_selection import chi2
|
||||||
|
from sklearn.feature_selection import SelectKBest, f_classif
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn import preprocessing
|
||||||
|
from sklearn.feature_selection import *
|
||||||
|
from sklearn.model_selection import RandomizedSearchCV
|
||||||
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import csv
|
||||||
|
import os
|
||||||
|
import math
|
||||||
|
import glob
|
||||||
|
from scipy import io,signal
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
import pickle
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.impute import SimpleImputer
|
||||||
|
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
import copy
|
||||||
|
from sklearn import feature_selection
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
import cuml
|
||||||
|
from cuml.svm import SVR
|
||||||
|
from cuml.ensemble import RandomForestRegressor
|
||||||
|
from cuml.svm import SVC
|
||||||
|
from cuml.ensemble import RandomForestClassifier
|
||||||
|
from cuml.metrics import accuracy_score
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def loadFeaturesDict(dataset):
|
||||||
|
|
||||||
|
# input parameters :- The name of the dataset
|
||||||
|
# return :- Feature dictionary
|
||||||
|
|
||||||
|
featuresDict = {'shannonEntropy': None,
|
||||||
|
'ShannonRes_delta':None,
|
||||||
|
'ShannonRes_theta':None,
|
||||||
|
'ShannonRes_alpha':None,
|
||||||
|
'ShannonRes_beta':None,
|
||||||
|
'ShannonRes_gamma':None,
|
||||||
|
'HjorthComp':None,
|
||||||
|
'HjorthMob':None,
|
||||||
|
'falseNearestNeighbor':None,
|
||||||
|
'medianFreq':None,
|
||||||
|
'bandPwr_delta':None,
|
||||||
|
'bandPwr_theta':None,
|
||||||
|
'bandPwr_alpha':None,
|
||||||
|
'bandPwr_beta':None,
|
||||||
|
'bandPwr_gamma':None,
|
||||||
|
'stdDev':None,
|
||||||
|
'diffuseSlowing':None,
|
||||||
|
'spikeNum':None,
|
||||||
|
'deltaBurstAfterSpike':None,
|
||||||
|
'shortSpikeNum':None,
|
||||||
|
'numBursts':None,
|
||||||
|
'burstLenMean':None,
|
||||||
|
'burstLenStd':None,
|
||||||
|
'numSuppressions':None,
|
||||||
|
'suppLenMean':None,
|
||||||
|
'suppLenStd':None,
|
||||||
|
'dasm_delta': None,
|
||||||
|
'dasm_theta': None,
|
||||||
|
'dasm_alpha': None,
|
||||||
|
'dasm_beta': None,
|
||||||
|
'dasm_gamma': None,
|
||||||
|
'rasm_delta': None,
|
||||||
|
'rasm_theta': None,
|
||||||
|
'rasm_alpha': None,
|
||||||
|
'rasm_beta': None,
|
||||||
|
'rasm_gamma': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||||
|
|
||||||
|
featuresDict['shannonEntropy'] = np.load(featurepath + "shannonEntropy_1_1.npz", allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['ShannonRes_delta'] = np.load(featurepath + "ShannonRes_sub_bands_delta_1_1.npz", allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['ShannonRes_theta'] = np.load(featurepath + "ShannonRes_sub_bands_theta_1_1.npz", allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['ShannonRes_alpha'] = np.load(featurepath + "ShannonRes_sub_bands_alpha_1_1.npz", allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['ShannonRes_beta'] = np.load(featurepath + "ShannonRes_sub_bands_beta_1_1.npz", allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['ShannonRes_gamma'] = np.load(featurepath + "ShannonRes_sub_bands_gamma_1_1.npz", allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['HjorthComp'] = np.load(featurepath + "Hjorth_complexity_1_1.npz", allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['HjorthMob'] = np.load(featurepath + "Hjorth_mobilty_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['falseNearestNeighbor'] = np.load(featurepath + "falseNearestNeighbor_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['medianFreq'] = np.load(featurepath + "medianFreq_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['bandPwr_delta']=np.load(featurepath+"bandPwr_delta_1_1.npz", allow_pickle = True)['features']
|
||||||
|
|
||||||
|
featuresDict['bandPwr_theta']=np.load(featurepath + "bandPwr_theta_1_1.npz", allow_pickle = True)['features']
|
||||||
|
|
||||||
|
featuresDict['bandPwr_alpha']=np.load(featurepath + "bandPwr_alpha_1_1.npz", allow_pickle = True)['features']
|
||||||
|
|
||||||
|
featuresDict['bandPwr_beta']=np.load(featurepath + "bandPwr_beta_1_1.npz", allow_pickle = True)['features']
|
||||||
|
|
||||||
|
featuresDict['bandPwr_gamma']=np.load(featurepath + "bandPwr_gamma_1_1.npz", allow_pickle = True)['features']
|
||||||
|
|
||||||
|
featuresDict['stdDev'] = np.load(featurepath + "stdDev_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['diffuseSlowing'] = np.load(featurepath + "diffuseSlowing_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['spikeNum'] = np.load(featurepath + "spikeNum_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['deltaBurstAfterSpike'] = np.load(featurepath + "deltaBurstAfterSpike_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['shortSpikeNum'] = np.load(featurepath + "shortSpikeNum_1_1.npz", allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['numBursts'] = np.load(featurepath + "numBursts_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['burstLenMean'] = np.load(featurepath + "burstLen_u_and_sigma_mean_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['burstLenStd'] = np.load(featurepath + "burstLen_u_and_sigma_std_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['numSuppressions'] = np.load(featurepath + "numSuppressions_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['suppLenMean'] = np.load(featurepath + "suppressionLen_u_and_sigma_mean_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['suppLenStd'] = np.load(featurepath + "suppressionLen_u_and_sigma_std_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['dasm_delta'] = np.load(featurepath + "dasm_delta_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['dasm_theta'] = np.load(featurepath + "dasm_theta_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['dasm_alpha'] = np.load(featurepath + "dasm_alpha_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['dasm_beta'] = np.load(featurepath + "dasm_beta_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['dasm_gamma'] = np.load(featurepath + "dasm_gamma_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['rasm_delta'] = np.load(featurepath + "rasm_delta_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['rasm_theta'] = np.load(featurepath + "rasm_theta_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['rasm_alpha'] = np.load(featurepath + "rasm_alpha_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['rasm_beta'] = np.load(featurepath + "rasm_beta_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
featuresDict['rasm_gamma'] = np.load(featurepath + "rasm_gamma_1_1.npz",allow_pickle=True)['features']
|
||||||
|
|
||||||
|
return featuresDict
|
||||||
|
|
||||||
@@ -0,0 +1,735 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
from ImportUtils import *
|
||||||
|
from sklearn.model_selection import ParameterGrid
|
||||||
|
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
from sklearn.feature_selection import chi2
|
||||||
|
from sklearn.feature_selection import SelectKBest, f_classif
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
from sklearn.ensemble import RandomForestRegressor as sklearnrfi
|
||||||
|
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
from scipy import io,signal
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn import preprocessing
|
||||||
|
import pickle
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.impute import SimpleImputer
|
||||||
|
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
# %matplotlib inline
|
||||||
|
import seaborn as sns
|
||||||
|
import copy
|
||||||
|
|
||||||
|
def topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
|
||||||
|
'''
|
||||||
|
Ranks of features according to rmse computed by regressor passed in clf
|
||||||
|
Plots electrode v/s rmse graph
|
||||||
|
|
||||||
|
'''
|
||||||
|
# parameters :-
|
||||||
|
# dataset - name of the dataset
|
||||||
|
# window - length of the sliding window in seconds
|
||||||
|
# stride - length of the stride of the sliding window in seconds
|
||||||
|
# sfreq - sampling frequency of the EEG data
|
||||||
|
# clf - name of the classifier to be used
|
||||||
|
# label - valence/arousal/dominance/liking label (shape depends upon the dataset) in an enumerated form (0- valence ; 1-arousal ; 2- like; 3-dominance)
|
||||||
|
# scale - sclaing of the EEG data if required
|
||||||
|
|
||||||
|
# returns :-
|
||||||
|
# void
|
||||||
|
|
||||||
|
pwd = os.getcwd()
|
||||||
|
|
||||||
|
#load extracted features
|
||||||
|
#####################################################################################################################################################
|
||||||
|
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||||
|
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||||
|
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||||
|
|
||||||
|
rmseList = []
|
||||||
|
electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||||
|
fs = sfreq
|
||||||
|
pwd = os.getcwd()
|
||||||
|
featuresDict = loadFeaturesDict(dataset)
|
||||||
|
asm_features = ['dasm_delta', 'dasm_theta', 'dasm_alpha', 'dasm_beta', 'dasm_gamma', 'rasm_delta', 'rasm_theta', 'rasm_alpha', 'rasm_beta', 'rasm_gamma']
|
||||||
|
for asm in asm_features:
|
||||||
|
featuresDict.pop(asm)
|
||||||
|
|
||||||
|
common = []
|
||||||
|
with open('intersection.pkl', 'rb') as f:
|
||||||
|
common = pickle.load(f)
|
||||||
|
|
||||||
|
for k in list(featuresDict.keys()):
|
||||||
|
if k not in common:
|
||||||
|
# pop out common feature
|
||||||
|
featuresDict.pop(k)
|
||||||
|
|
||||||
|
selectFeatures = list(featuresDict.keys())
|
||||||
|
y = Y_epoch[:,label] #valence
|
||||||
|
#####################################################################################################################################################
|
||||||
|
|
||||||
|
for electrode in range(14):
|
||||||
|
# Load FeaturesDict from memory
|
||||||
|
|
||||||
|
|
||||||
|
print("Number of segments are: {}".format(ans.shape[1]))
|
||||||
|
|
||||||
|
featureMatrix = np.empty((len(selectFeatures),ans.shape[1])) #[14*32 + 1,80640]
|
||||||
|
i=0
|
||||||
|
for key,value in featuresDict.items():
|
||||||
|
featureMatrix[i,:] = value[electrode,:]
|
||||||
|
i = i+1
|
||||||
|
|
||||||
|
print(featureMatrix.T.shape)
|
||||||
|
featureMatrix = featureMatrix.astype(np.float32)
|
||||||
|
|
||||||
|
#Impute NaN values with zero
|
||||||
|
if np.isnan(featureMatrix).any():
|
||||||
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||||
|
|
||||||
|
#Name Feature vector columns
|
||||||
|
feature_channel_index = []
|
||||||
|
for feature in selectFeatures:
|
||||||
|
feature_channel_index.append(feature + str(electrode))
|
||||||
|
|
||||||
|
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) #debug
|
||||||
|
|
||||||
|
#Preparing dataset from feature matrix
|
||||||
|
X = pd.DataFrame(featureMatrix.T)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
X = X.replace([np.inf, -np.inf], np.nan)
|
||||||
|
X = X.fillna(0)
|
||||||
|
|
||||||
|
|
||||||
|
print("Features Ready for undergoing selection tests done ...\n")
|
||||||
|
|
||||||
|
# Perform train_test_split to get training and test data
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Normalise-scale data
|
||||||
|
# Feature Scaling
|
||||||
|
if(scale == True):
|
||||||
|
sc = StandardScaler()
|
||||||
|
X_train = sc.fit_transform(X_train)
|
||||||
|
X_test = sc.transform(X_test)
|
||||||
|
|
||||||
|
# Apply classfier
|
||||||
|
clf.fit(X_train, y_train)
|
||||||
|
y_predict = clf.predict(X_test)
|
||||||
|
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||||
|
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||||
|
rmseList.append(rmse)
|
||||||
|
|
||||||
|
|
||||||
|
#rank electrodes based on RMSE computed by the classifier
|
||||||
|
electrode_df = pd.DataFrame(electrodeList)
|
||||||
|
rmse_df = pd.DataFrame(rmseList)
|
||||||
|
#concat two dataframes for better visualization
|
||||||
|
electrodeRanking = pd.concat([electrode_df, rmse_df],axis=1)
|
||||||
|
electrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns
|
||||||
|
features_result = electrodeRanking.sort_values('RMSE')
|
||||||
|
print(features_result)
|
||||||
|
# return features_result
|
||||||
|
|
||||||
|
##################################################################################
|
||||||
|
N = features_result.shape[0]
|
||||||
|
topRmseList = []
|
||||||
|
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||||
|
|
||||||
|
|
||||||
|
for n in range(1,N+1):
|
||||||
|
|
||||||
|
|
||||||
|
topnelectrodes = features_result.head(n)
|
||||||
|
electrode_index = topnelectrodes.index
|
||||||
|
electrode_index = list(electrode_index)[:n]
|
||||||
|
|
||||||
|
# X-Values
|
||||||
|
featureMatrix = np.empty((len(selectFeatures)*len(electrode_index),ans.shape[1]))
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
for index in electrode_index:
|
||||||
|
for key,value in featuresDict.items():
|
||||||
|
featureMatrix[i,:] = value[index,:]
|
||||||
|
i = i+1
|
||||||
|
|
||||||
|
featureMatrix = featureMatrix.astype(np.float32)
|
||||||
|
print(featureMatrix.T.shape)
|
||||||
|
|
||||||
|
# Removing NaN Values
|
||||||
|
if np.isnan(featureMatrix).any():
|
||||||
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||||
|
|
||||||
|
# Name Feature vector columns
|
||||||
|
feature_channel_index = []
|
||||||
|
for index in electrode_index:
|
||||||
|
for feature in selectFeatures:
|
||||||
|
feature_channel_index.append(feature + str(index))
|
||||||
|
|
||||||
|
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||||
|
|
||||||
|
X = pd.DataFrame(featureMatrix.T)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
X = X.replace([np.inf, -np.inf], np.nan)
|
||||||
|
X = X.fillna(0)
|
||||||
|
|
||||||
|
|
||||||
|
print("Features Ready for undergoing selection tests done ...\n")
|
||||||
|
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Normalise-scale data
|
||||||
|
# Feature Scaling
|
||||||
|
if(scale == True):
|
||||||
|
sc = StandardScaler()
|
||||||
|
X_train = sc.fit_transform(X_train)
|
||||||
|
X_test = sc.transform(X_test)
|
||||||
|
|
||||||
|
# Apply classfier
|
||||||
|
|
||||||
|
search_method = "tpot"
|
||||||
|
best_clf = None
|
||||||
|
if(search_method == "bayes_sk_opt"):
|
||||||
|
|
||||||
|
# BayesCV scikit opt
|
||||||
|
search_space = {"bootstrap": Categorical([True, False]), # values for boostrap can be either True or False
|
||||||
|
"max_depth": Integer(6, 20), # values of max_depth are integers from 6 to 20
|
||||||
|
"max_features": Categorical(['auto', 'sqrt','log2']),
|
||||||
|
"min_samples_leaf": Integer(2, 10),
|
||||||
|
"min_samples_split": Integer(2, 10),
|
||||||
|
"n_estimators": Integer(100, 500)
|
||||||
|
}
|
||||||
|
|
||||||
|
forest_bayes_search = BayesSearchCV(clf, search_space, n_iter=32, cv=5)
|
||||||
|
print(forest_bayes_search)
|
||||||
|
print(forest_bayes_search.fit(X_train, y_train))
|
||||||
|
print("Best Parameters are: ", forest_bayes_search.best_params_)
|
||||||
|
best_clf = forest_bayes_search.best_estimator_
|
||||||
|
|
||||||
|
elif(search_method =="random_grid_search"):
|
||||||
|
print("Random Search followed by GridSearch initiated!\n");
|
||||||
|
#RandomSearchCV followed by GridSearchCV
|
||||||
|
random_grid = {'n_estimators': [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)],
|
||||||
|
'max_features': ['auto', 'sqrt','log2'],
|
||||||
|
'max_depth': [int(x) for x in np.linspace(10, 1000,10)],
|
||||||
|
'min_samples_split': [2, 5, 10,14],
|
||||||
|
'min_samples_leaf': [1, 2, 4,6,8],
|
||||||
|
}
|
||||||
|
rf_randomcv=RandomizedSearchCV(estimator=clf,param_distributions=random_grid,n_iter=100,cv=5,verbose=2,random_state=100)
|
||||||
|
print(rf_randomcv.fit(X_train, y_train))
|
||||||
|
print("Best Parameters for RandomSearchCV are: ", rf_randomcv.best_params_)
|
||||||
|
print("RMSE with RandomSearchCV is :",mean_squared_error(y_test, rf_randomcv.best_estimator_.predict(X_test),squared=False));
|
||||||
|
|
||||||
|
param_grid = {
|
||||||
|
'max_depth': [rf_randomcv.best_params_['max_depth']],
|
||||||
|
'max_features': [rf_randomcv.best_params_['max_features']],
|
||||||
|
'min_samples_leaf': [rf_randomcv.best_params_['min_samples_leaf'],
|
||||||
|
rf_randomcv.best_params_['min_samples_leaf']+2,
|
||||||
|
rf_randomcv.best_params_['min_samples_leaf'] + 4],
|
||||||
|
'min_samples_split': [rf_randomcv.best_params_['min_samples_split'] - 2,
|
||||||
|
rf_randomcv.best_params_['min_samples_split'] - 1,
|
||||||
|
rf_randomcv.best_params_['min_samples_split'],
|
||||||
|
rf_randomcv.best_params_['min_samples_split'] +1,
|
||||||
|
rf_randomcv.best_params_['min_samples_split'] + 2],
|
||||||
|
'n_estimators': [rf_randomcv.best_params_['n_estimators'] - 200, rf_randomcv.best_params_['n_estimators'] - 100,
|
||||||
|
rf_randomcv.best_params_['n_estimators'],
|
||||||
|
rf_randomcv.best_params_['n_estimators'] + 100, rf_randomcv.best_params_['n_estimators'] + 200]
|
||||||
|
}
|
||||||
|
|
||||||
|
grid_search=GridSearchCV(estimator=rf,param_grid=param_grid,cv=10, verbose=5)
|
||||||
|
grid_search.fit(X_train,y_train)
|
||||||
|
best_clf = rf_randomcv.best_estimator_
|
||||||
|
elif search_method =="manual_search":
|
||||||
|
min_rmse = 1000
|
||||||
|
best_clf = clf
|
||||||
|
min_params = None
|
||||||
|
# 2*3*3*3*3
|
||||||
|
param_grid = {'n_estimators': [50, 100],
|
||||||
|
'max_features': ['auto'],
|
||||||
|
'max_depth': [2, 10, 100],
|
||||||
|
'min_samples_split': [2, 5, 10],
|
||||||
|
'min_samples_leaf': [1, 2, 8],
|
||||||
|
}
|
||||||
|
|
||||||
|
param_grid = ParameterGrid(param_grid)
|
||||||
|
for params in param_grid:
|
||||||
|
print("Current Parameters : ", params)
|
||||||
|
temp_clf = RandomForestRegressor( max_features = params['max_features'], min_samples_leaf = params['min_samples_leaf'], min_samples_split = params['min_samples_split'], n_estimators = params['n_estimators'],max_depth = params['max_depth']);
|
||||||
|
temp_clf.fit(X_train,y_train)
|
||||||
|
y_predict = temp_clf.predict(X_test)
|
||||||
|
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||||
|
print("Current RMSE with above params : ", rmse)
|
||||||
|
if(min_rmse > rmse):
|
||||||
|
min_rmse = rmse;
|
||||||
|
best_clf = temp_clf;
|
||||||
|
min_params = params;
|
||||||
|
|
||||||
|
print("Best Params for parameter search are : \n", min_params)
|
||||||
|
print("window: {}, stide: {}, rmse: {}".format(window,stride,min_rmse))
|
||||||
|
topRmseList.append(min_rmse)
|
||||||
|
elif search_method == "tpot":
|
||||||
|
from tpot import TPOTRegressor;
|
||||||
|
# TPOT setup
|
||||||
|
GENERATIONS = 5
|
||||||
|
POP_SIZE = 100
|
||||||
|
CV = 5
|
||||||
|
SEED = 42
|
||||||
|
|
||||||
|
tpot = TPOTRegressor(
|
||||||
|
generations=GENERATIONS,
|
||||||
|
population_size=POP_SIZE,
|
||||||
|
random_state=SEED,
|
||||||
|
config_dict="TPOT cuML",
|
||||||
|
n_jobs=1, # cuML requires n_jobs=1
|
||||||
|
cv=CV,
|
||||||
|
verbosity=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
tpot.fit(X_train, y_train)
|
||||||
|
|
||||||
|
y_predict = tpot.predict(X_test)
|
||||||
|
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||||
|
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||||
|
topRmseList.append(rmse)
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
best_clf = clf
|
||||||
|
best_clf.fit(X_train,y_train)
|
||||||
|
|
||||||
|
|
||||||
|
if search_method != "manual_search" and search_method != "tpot":
|
||||||
|
y_predict = best_clf.predict(X_test)
|
||||||
|
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||||
|
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||||
|
topRmseList.append(rmse)
|
||||||
|
|
||||||
|
|
||||||
|
topNElectrode_df = pd.DataFrame(topNList)
|
||||||
|
topNRmse_df = pd.DataFrame(topRmseList)
|
||||||
|
#concat two dataframes for better visualization
|
||||||
|
topNElectrodeRanking = pd.concat([topNElectrode_df, topNRmse_df],axis=1)
|
||||||
|
topNElectrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns
|
||||||
|
print(topNElectrodeRanking)
|
||||||
|
|
||||||
|
# Plotting
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(20, 10)
|
||||||
|
plt.rcParams.update({'font.size': 30})
|
||||||
|
plt.xlabel('Top N Electrodes')
|
||||||
|
plt.ylabel('RMSE')
|
||||||
|
plt.plot(topNElectrodeRanking.loc[:,"Electrode"], topNElectrodeRanking.loc[:,"RMSE"])
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
|
||||||
|
'''
|
||||||
|
Ranks of features according to rmse computed by regressor passed in clf
|
||||||
|
Plots electrode v/s rmse graph
|
||||||
|
|
||||||
|
'''
|
||||||
|
# parameters :-
|
||||||
|
# dataset - name of the dataset
|
||||||
|
# window - length of the sliding window in seconds
|
||||||
|
# stride - length of the stride of the sliding window in seconds
|
||||||
|
# sfreq - sampling frequency of the EEG data
|
||||||
|
# clf - name of the classifier to be used
|
||||||
|
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||||
|
# scale - sclaing of the EEG data if required
|
||||||
|
|
||||||
|
# returns :-
|
||||||
|
# void
|
||||||
|
fs = sfreq
|
||||||
|
pwd = os.getcwd()
|
||||||
|
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||||
|
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||||
|
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||||
|
print("Number of segments are: {}".format(ans.shape[1]))
|
||||||
|
|
||||||
|
featuresDict = None
|
||||||
|
featuresDict = loadFeaturesDict(dataset)
|
||||||
|
|
||||||
|
common = []
|
||||||
|
with open('intersection.pkl', 'rb') as f:
|
||||||
|
common = pickle.load(f)
|
||||||
|
|
||||||
|
for k in list(featuresDict.keys()):
|
||||||
|
if k not in common:
|
||||||
|
# pop out common feature
|
||||||
|
featuresDict.pop(k)
|
||||||
|
|
||||||
|
featuresList = list(featuresDict.keys())
|
||||||
|
|
||||||
|
y = Y_epoch[:,label] #valence
|
||||||
|
|
||||||
|
|
||||||
|
rmseList = []
|
||||||
|
|
||||||
|
####################################################################
|
||||||
|
#modify featuresList
|
||||||
|
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||||
|
for key,value in featuresDict.items():
|
||||||
|
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
if np.isnan(featureMatrix).any():
|
||||||
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||||
|
|
||||||
|
featureMatrix = featureMatrix.astype('float64')
|
||||||
|
|
||||||
|
|
||||||
|
feature_channel_index = []
|
||||||
|
for feature in featuresList:
|
||||||
|
for i in range(featuresDict[feature].shape[0]):
|
||||||
|
if(i>=10):
|
||||||
|
feature_channel_index.append(feature + str(i))
|
||||||
|
else:
|
||||||
|
feature_channel_index.append(feature + '0' + str(i))
|
||||||
|
|
||||||
|
print(len(list(featuresDict.keys())))
|
||||||
|
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||||
|
|
||||||
|
X = pd.DataFrame(featureMatrix.T)
|
||||||
|
X = X.replace([np.inf, -np.inf], np.nan)
|
||||||
|
X = X.fillna(0)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
|
||||||
|
#Remove Variance = 0 features
|
||||||
|
constant_filter = VarianceThreshold(threshold=0)
|
||||||
|
constant_filter.fit(X)
|
||||||
|
constant_columns = [column for column in X.columns
|
||||||
|
if column not in
|
||||||
|
X.columns[constant_filter.get_support()]]
|
||||||
|
X = constant_filter.transform(X)
|
||||||
|
|
||||||
|
for column in constant_columns:
|
||||||
|
feature_channel_index.remove(column)
|
||||||
|
|
||||||
|
print(len(feature_channel_index),feature_channel_index )
|
||||||
|
|
||||||
|
X = pd.DataFrame(X)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
|
||||||
|
|
||||||
|
filtered_featuresList = []
|
||||||
|
print(type(X))
|
||||||
|
for col in X.columns:
|
||||||
|
feature = col[:-2]
|
||||||
|
electrode = int(col[-2:])
|
||||||
|
if(feature not in filtered_featuresList):
|
||||||
|
filtered_featuresList.append(feature)
|
||||||
|
|
||||||
|
featuresList = filtered_featuresList
|
||||||
|
|
||||||
|
for feature in featuresList:
|
||||||
|
# Load FeaturesDict from memory
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
featureMatrix = featuresDict[feature]
|
||||||
|
featureMatrix = featureMatrix.astype(np.float32)
|
||||||
|
|
||||||
|
if np.isnan(featureMatrix).any():
|
||||||
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feature_channel_index = []
|
||||||
|
|
||||||
|
for i in range(featuresDict[feature].shape[0]):
|
||||||
|
feature_channel_index.append(feature + str(i))
|
||||||
|
|
||||||
|
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||||
|
|
||||||
|
X = pd.DataFrame(featureMatrix.T)
|
||||||
|
X = X.replace([np.inf, -np.inf], np.nan)
|
||||||
|
X = X.fillna(0)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
|
||||||
|
|
||||||
|
print("Features Ready for undergoing selection tests done ...\n")
|
||||||
|
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Normalise-scale data
|
||||||
|
# Feature Scaling
|
||||||
|
if(scale == True):
|
||||||
|
sc = StandardScaler()
|
||||||
|
X_train = sc.fit_transform(X_train)
|
||||||
|
X_test = sc.transform(X_test)
|
||||||
|
|
||||||
|
# Apply classfier
|
||||||
|
clf.fit(X_train, y_train)
|
||||||
|
y_predict = clf.predict(X_test)
|
||||||
|
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||||
|
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||||
|
rmseList.append(rmse)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
features_df = pd.DataFrame(featuresList)
|
||||||
|
rmse_df = pd.DataFrame(rmseList)
|
||||||
|
#concat two dataframes for better visualization
|
||||||
|
featureRanking = pd.concat([features_df, rmse_df],axis=1)
|
||||||
|
featureRanking.columns = ['Feature','RMSE'] #naming the dataframe columns
|
||||||
|
features_result = featureRanking.sort_values('RMSE')
|
||||||
|
features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonFeaturesRegressionRanking" + str(window) + str(stride) + ".csv")
|
||||||
|
print(features_result)
|
||||||
|
|
||||||
|
###########################################
|
||||||
|
N = features_result.shape[0]
|
||||||
|
topNRmseList = []
|
||||||
|
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for n in range(1,N+1):
|
||||||
|
|
||||||
|
|
||||||
|
topnfeatures = copy.deepcopy(features_result.head(n))
|
||||||
|
topnfeatures = topnfeatures['Feature'].tolist() #list of feature-names
|
||||||
|
|
||||||
|
# X-Values################################################
|
||||||
|
|
||||||
|
featureMatrix = np.empty((0,ans.shape[1]))
|
||||||
|
|
||||||
|
for feature in topnfeatures:
|
||||||
|
featureMatrix = np.append(featureMatrix, featuresDict[feature], axis=0)
|
||||||
|
|
||||||
|
featureMatrix = featureMatrix.astype(np.float32)
|
||||||
|
print(featureMatrix.T.shape)
|
||||||
|
|
||||||
|
feature_channel_index = []
|
||||||
|
for feature in topnfeatures:
|
||||||
|
i=0
|
||||||
|
for i in range(featuresDict[feature].shape[0]):
|
||||||
|
feature_channel_index.append(feature + str(i))
|
||||||
|
|
||||||
|
|
||||||
|
# Removing NaN Values
|
||||||
|
if np.isnan(featureMatrix).any():
|
||||||
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||||
|
|
||||||
|
|
||||||
|
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||||
|
|
||||||
|
X = pd.DataFrame(featureMatrix.T)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
X = X.replace([np.inf, -np.inf], np.nan)
|
||||||
|
X = X.fillna(0)
|
||||||
|
|
||||||
|
|
||||||
|
print("Features Ready for undergoing selection tests done ...\n")
|
||||||
|
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Normalise-scale data
|
||||||
|
# Feature Scaling
|
||||||
|
if(scale == True):
|
||||||
|
sc = StandardScaler()
|
||||||
|
X_train = sc.fit_transform(X_train)
|
||||||
|
X_test = sc.transform(X_test)
|
||||||
|
|
||||||
|
clf.fit(X_train, y_train)
|
||||||
|
y_predict = clf.predict(X_test)
|
||||||
|
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||||
|
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||||
|
topNRmseList.append(rmse)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
topNFeatures_df = pd.DataFrame(topNList)
|
||||||
|
topNRmse_df = pd.DataFrame(topNRmseList)
|
||||||
|
|
||||||
|
#concat two dataframes for better visualization
|
||||||
|
topNFeaturesRanking = pd.concat([topNFeatures_df, topNRmse_df],axis=1)
|
||||||
|
topNFeaturesRanking.columns = ['Feature','RMSE'] #naming the dataframe columns
|
||||||
|
print(topNFeaturesRanking)
|
||||||
|
topNFeaturesRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonFeaturesRegressionRanking" + str(window) + str(stride) + ".csv")
|
||||||
|
|
||||||
|
# Plotting
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(25, 10)
|
||||||
|
plt.rcParams.update({'font.size': 30})
|
||||||
|
plt.xlabel('Top N Features')
|
||||||
|
plt.ylabel('RMSE')
|
||||||
|
plt.plot(topNFeaturesRanking.loc[:,"Feature"], topNFeaturesRanking.loc[:,"RMSE"])
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def topFeatureColumnsRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
|
||||||
|
|
||||||
|
# parameters :-
|
||||||
|
# dataset - name of the dataset
|
||||||
|
# window - length of the sliding window in seconds
|
||||||
|
# stride - length of the stride of the sliding window in seconds
|
||||||
|
# sfreq - sampling frequency of the EEG data
|
||||||
|
# clf - name of the classifier to be used
|
||||||
|
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||||
|
# scale - sclaing of the EEG data if required
|
||||||
|
|
||||||
|
# returns :-
|
||||||
|
# void
|
||||||
|
|
||||||
|
fs = sfreq
|
||||||
|
pwd = os.getcwd()
|
||||||
|
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||||
|
|
||||||
|
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||||
|
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||||
|
electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||||
|
|
||||||
|
|
||||||
|
print("Number of segments are: {}".format(ans.shape[1]))
|
||||||
|
|
||||||
|
#X##############################################################################################
|
||||||
|
|
||||||
|
featuresDict = None
|
||||||
|
featuresDict = loadFeaturesDict(dataset)
|
||||||
|
|
||||||
|
common = []
|
||||||
|
with open('intersection.pkl', 'rb') as f:
|
||||||
|
common = pickle.load(f)
|
||||||
|
|
||||||
|
for k in list(featuresDict.keys()):
|
||||||
|
if k not in common:
|
||||||
|
# pop out common feature
|
||||||
|
featuresDict.pop(k)
|
||||||
|
|
||||||
|
|
||||||
|
featuresList = list(featuresDict.keys())
|
||||||
|
|
||||||
|
# defining column names
|
||||||
|
feature_channel_index = []
|
||||||
|
|
||||||
|
for feature in featuresList:
|
||||||
|
for i in range(featuresDict[feature].shape[0]):
|
||||||
|
feature_channel_index.append(feature + str(i))
|
||||||
|
|
||||||
|
#defining feature matrix
|
||||||
|
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||||
|
for key,value in featuresDict.items():
|
||||||
|
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
print("Shape of FeatureMatrix: {}\n".format(featureMatrix.T.shape))
|
||||||
|
|
||||||
|
#data-imputation and nan-removal
|
||||||
|
featureMatrix = featureMatrix.astype(np.float32)
|
||||||
|
|
||||||
|
if np.isnan(featureMatrix).any():
|
||||||
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||||
|
|
||||||
|
X = pd.DataFrame(featureMatrix.T)
|
||||||
|
X = X.replace([np.inf, -np.inf], np.nan)
|
||||||
|
X = X.fillna(0)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
|
||||||
|
|
||||||
|
#Y#####################################################################
|
||||||
|
|
||||||
|
y = Y_epoch[:,label] #valence
|
||||||
|
|
||||||
|
########################################################################
|
||||||
|
rmseList = []
|
||||||
|
|
||||||
|
for col in feature_channel_index:
|
||||||
|
input_df = pd.DataFrame(X[col])
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(input_df, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Normalise-scale data
|
||||||
|
# Feature Scaling
|
||||||
|
if(scale == True):
|
||||||
|
sc = StandardScaler()
|
||||||
|
X_train = sc.fit_transform(X_train)
|
||||||
|
X_test = sc.transform(X_test)
|
||||||
|
|
||||||
|
# Apply classfier
|
||||||
|
clf.fit(X_train, y_train)
|
||||||
|
y_predict = clf.predict(X_test)
|
||||||
|
rmse = mean_squared_error(y_test, y_predict, squared=False)
|
||||||
|
rmseList.append(rmse)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
col_df = pd.DataFrame(feature_channel_index)
|
||||||
|
rmse_df = pd.DataFrame(rmseList)
|
||||||
|
#concat two dataframes for better visualization
|
||||||
|
colRanking = pd.concat([col_df, rmse_df],axis=1)
|
||||||
|
colRanking.columns = ['Column','RMSE'] #naming the dataframe columns
|
||||||
|
features_result = colRanking.sort_values('RMSE')
|
||||||
|
print(features_result)
|
||||||
|
|
||||||
|
|
||||||
|
N = len(feature_channel_index)
|
||||||
|
topNRmseList = []
|
||||||
|
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||||
|
|
||||||
|
for n in range(1, N+1):
|
||||||
|
ranking_df = features_result.head(n)
|
||||||
|
topncols = ranking_df['Column'].tolist()
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X[topncols], y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Normalise-scale data
|
||||||
|
# Feature Scaling
|
||||||
|
if(scale == True):
|
||||||
|
sc = StandardScaler()
|
||||||
|
X_train = sc.fit_transform(X_train)
|
||||||
|
X_test = sc.transform(X_test)
|
||||||
|
|
||||||
|
# Apply classfier
|
||||||
|
clf.fit(X_train, y_train)
|
||||||
|
y_predict = clf.predict(X_test)
|
||||||
|
rmse = mean_squared_error(y_test, y_predict, squared=False)
|
||||||
|
topNRmseList.append(rmse)
|
||||||
|
|
||||||
|
|
||||||
|
topcol_df = pd.DataFrame(topNList)
|
||||||
|
toprmse_df = pd.DataFrame(topNRmseList)
|
||||||
|
#concat two dataframes for better visualization
|
||||||
|
topcolRanking = pd.concat([topcol_df, toprmse_df],axis=1)
|
||||||
|
topcolRanking.columns = ['Column','RMSE'] #naming the dataframe columns
|
||||||
|
topfeatures_result = topcolRanking
|
||||||
|
print(topfeatures_result)
|
||||||
|
topfeatures_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "ColumnsRegressionRanking" + str(window) + str(stride) + ".csv")
|
||||||
|
|
||||||
|
|
||||||
|
# Plotting
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(60, 9)
|
||||||
|
plt.xlabel('Top N Columns')
|
||||||
|
plt.ylabel('RMSE')
|
||||||
|
plt.title("Top N Columns v/s RMSE Plot for Window:{} Stride:{} epoched data by varying N".format(window,stride))
|
||||||
|
plt.plot(topfeatures_result.loc[:,"Column"], topfeatures_result.loc[:,"RMSE"])
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "topFeatureColumnsRegressionRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||||
|
plt.show()
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
@@ -0,0 +1,648 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
from ImportUtils import *
|
||||||
|
|
||||||
|
from sklearn.ensemble import RandomForestRegressor as sklearnrfi
|
||||||
|
from sklearn.feature_selection import VarianceThreshold
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
|
||||||
|
'''
|
||||||
|
Ranks of features according to rmse computed by F score based regression
|
||||||
|
Plots electrode v/s rmse graph
|
||||||
|
|
||||||
|
'''
|
||||||
|
# parameters :-
|
||||||
|
# dataset - name of the dataset
|
||||||
|
# window - length of the sliding window in seconds
|
||||||
|
# stride - length of the stride of the sliding window in seconds
|
||||||
|
# sfreq - sampling frequency of the EEG data
|
||||||
|
# clf - name of the classifier to be used
|
||||||
|
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||||
|
# scale - sclaing of the EEG data if required
|
||||||
|
# mutual_info - Mutual ranking between features based on information theory
|
||||||
|
# method - 'RandomForest' 'RFE' 'SelectKBest'
|
||||||
|
|
||||||
|
# returns :-
|
||||||
|
# void
|
||||||
|
pwd = os.getcwd()
|
||||||
|
fs = sfreq
|
||||||
|
electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||||
|
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||||
|
|
||||||
|
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||||
|
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||||
|
|
||||||
|
print("Number of segments are: {}".format(ans.shape[1]))
|
||||||
|
|
||||||
|
|
||||||
|
featuresDict = None
|
||||||
|
featuresDict = loadFeaturesDict(dataset)
|
||||||
|
asm_features = ['dasm_delta', 'dasm_theta', 'dasm_alpha', 'dasm_beta', 'dasm_gamma', 'rasm_delta', 'rasm_theta', 'rasm_alpha', 'rasm_beta', 'rasm_gamma']
|
||||||
|
for asm in asm_features:
|
||||||
|
featuresDict.pop(asm)
|
||||||
|
|
||||||
|
common = []
|
||||||
|
with open('intersection.pkl', 'rb') as f:
|
||||||
|
common = pickle.load(f)
|
||||||
|
|
||||||
|
for k in list(featuresDict.keys()):
|
||||||
|
if k not in common:
|
||||||
|
# pop out common feature
|
||||||
|
featuresDict.pop(k)
|
||||||
|
|
||||||
|
featuresList = list(featuresDict.keys())
|
||||||
|
print(featuresList)
|
||||||
|
|
||||||
|
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||||
|
for key,value in featuresDict.items():
|
||||||
|
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
if np.isnan(featureMatrix).any():
|
||||||
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||||
|
|
||||||
|
featureMatrix = featureMatrix.astype('float64')
|
||||||
|
|
||||||
|
|
||||||
|
feature_channel_index = []
|
||||||
|
for feature in featuresList:
|
||||||
|
for i in range(featuresDict[feature].shape[0]):
|
||||||
|
if(i>=10):
|
||||||
|
feature_channel_index.append(feature + str(i))
|
||||||
|
else:
|
||||||
|
feature_channel_index.append(feature + '0' + str(i))
|
||||||
|
|
||||||
|
|
||||||
|
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||||
|
|
||||||
|
X = pd.DataFrame(featureMatrix.T)
|
||||||
|
X = X.replace([np.inf, -np.inf], np.nan)
|
||||||
|
X = X.fillna(0)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
|
||||||
|
#################################################################
|
||||||
|
y = copy.deepcopy(Y_epoch[:,label]) #valence
|
||||||
|
print("y.shape: ", y.shape)
|
||||||
|
|
||||||
|
|
||||||
|
dfscores = None
|
||||||
|
|
||||||
|
if(method == 'RandomForest'):
|
||||||
|
'''Random Forest Feature Importances'''
|
||||||
|
# estimator = RandomForestRegressor()
|
||||||
|
estimator = sklearnrfi()
|
||||||
|
fit = estimator.fit(X,y)
|
||||||
|
dfscores = pd.DataFrame(fit.feature_importances_)
|
||||||
|
elif(method == 'RFE'):
|
||||||
|
''' RFE'''
|
||||||
|
selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
|
||||||
|
selector = selector.fit(X, y)
|
||||||
|
dfscores = pd.DataFrame(selector.ranking_)
|
||||||
|
|
||||||
|
elif(method == 'SelectKBest'):
|
||||||
|
"""SelecKBest"""
|
||||||
|
#apply SelectKBest class to extract top 10 best features
|
||||||
|
func = None
|
||||||
|
if mutual_info == False:
|
||||||
|
func = f_classif
|
||||||
|
else:
|
||||||
|
func = mutual_info_classif
|
||||||
|
|
||||||
|
bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
|
||||||
|
fit = bestfeatures.fit(X,y)
|
||||||
|
|
||||||
|
dfscores = pd.DataFrame(fit.scores_)
|
||||||
|
|
||||||
|
|
||||||
|
dfcolumns = pd.DataFrame(X.columns)
|
||||||
|
|
||||||
|
#concat two dataframes for better visualization
|
||||||
|
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
|
||||||
|
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
|
||||||
|
features_result = featureScores.nlargest(X.shape[1],'Score')
|
||||||
|
print(features_result)
|
||||||
|
features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonElectrodeFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
|
||||||
|
|
||||||
|
|
||||||
|
###################################################################
|
||||||
|
topcolumns = features_result['Specs'].values
|
||||||
|
topfeatures = []
|
||||||
|
topelectrodes = []
|
||||||
|
|
||||||
|
for col in topcolumns:
|
||||||
|
feature = col[:-2]
|
||||||
|
electrode = int(col[-2:])
|
||||||
|
if(feature not in topfeatures):
|
||||||
|
topfeatures.append(feature)
|
||||||
|
|
||||||
|
if(electrode not in topelectrodes):
|
||||||
|
topelectrodes.append(electrode)
|
||||||
|
|
||||||
|
##################################################################################
|
||||||
|
|
||||||
|
N = len(topelectrodes)
|
||||||
|
topRmseList = []
|
||||||
|
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||||
|
|
||||||
|
|
||||||
|
for n in range(1,N+1):
|
||||||
|
|
||||||
|
electrode_index = topelectrodes[:n]
|
||||||
|
print(topelectrodes)
|
||||||
|
print(electrode_index)
|
||||||
|
# X-Values
|
||||||
|
featureMatrix = np.empty((len(featuresList)*len(electrode_index),ans.shape[1]))
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
for index in electrode_index:
|
||||||
|
for key,value in featuresDict.items():
|
||||||
|
featureMatrix[i,:] = value[index,:]
|
||||||
|
i = i+1
|
||||||
|
|
||||||
|
# i = i+1
|
||||||
|
|
||||||
|
featureMatrix = featureMatrix.astype(np.float32)
|
||||||
|
print(featureMatrix.T.shape)
|
||||||
|
|
||||||
|
# Removing NaN Values
|
||||||
|
if np.isnan(featureMatrix).any():
|
||||||
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||||
|
|
||||||
|
|
||||||
|
feature_channel_index = []
|
||||||
|
for index in electrode_index:
|
||||||
|
for feature in featuresList:
|
||||||
|
feature_channel_index.append(feature + str(index))
|
||||||
|
|
||||||
|
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||||
|
|
||||||
|
X = pd.DataFrame(featureMatrix.T)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
X = X.replace([np.inf, -np.inf], np.nan)
|
||||||
|
X = X.fillna(0)
|
||||||
|
|
||||||
|
|
||||||
|
print("Features Ready for undergoing selection tests done ...\n")
|
||||||
|
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Normalise-scale data
|
||||||
|
# Feature Scaling
|
||||||
|
if(scale == True):
|
||||||
|
sc = StandardScaler()
|
||||||
|
X_train = sc.fit_transform(X_train)
|
||||||
|
X_test = sc.transform(X_test)
|
||||||
|
|
||||||
|
# Apply classfier
|
||||||
|
# clf = xgb.XGBClassifier(verbose = 5)
|
||||||
|
clf.fit(X_train, y_train)
|
||||||
|
y_predict = clf.predict(X_test)
|
||||||
|
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||||
|
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||||
|
topRmseList.append(rmse)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# features_result = features_result.reset_index()
|
||||||
|
topNElectrode_df = pd.DataFrame(topNList)
|
||||||
|
topNRmse_df = pd.DataFrame(topRmseList)
|
||||||
|
#concat two dataframes for better visualization
|
||||||
|
topNElectrodeRanking = pd.concat([topNElectrode_df, topNRmse_df],axis=1)
|
||||||
|
topNElectrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns
|
||||||
|
print(topNElectrodeRanking)
|
||||||
|
topNElectrodeRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonElectrodeFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
|
||||||
|
# return features_result
|
||||||
|
|
||||||
|
|
||||||
|
# Plotting
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(20, 10)
|
||||||
|
plt.rcParams.update({'font.size': 30})
|
||||||
|
plt.xlabel('Top N Electrodes')
|
||||||
|
plt.ylabel('RMSE')
|
||||||
|
plt.plot(topNElectrodeRanking.loc[:,"Electrode"], topNElectrodeRanking.loc[:,"RMSE"])
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
|
||||||
|
|
||||||
|
# parameters :-
|
||||||
|
# dataset - name of the dataset
|
||||||
|
# window - length of the sliding window in seconds
|
||||||
|
# stride - length of the stride of the sliding window in seconds
|
||||||
|
# sfreq - sampling frequency of the EEG data
|
||||||
|
# clf - name of the classifier to be used
|
||||||
|
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||||
|
# scale - sclaing of the EEG data if required
|
||||||
|
# mutual_info - Mutual ranking between features based on information theory
|
||||||
|
# method - 'RandomForest' 'RFE' 'SelectKBest'
|
||||||
|
|
||||||
|
# returns :-
|
||||||
|
# void
|
||||||
|
|
||||||
|
pwd = os.getcwd()
|
||||||
|
fs = sfreq
|
||||||
|
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||||
|
|
||||||
|
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||||
|
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||||
|
print("Number of segments are: {}".format(ans.shape[1]))
|
||||||
|
|
||||||
|
|
||||||
|
featuresDict = None
|
||||||
|
featuresDict = loadFeaturesDict(dataset)
|
||||||
|
|
||||||
|
common = []
|
||||||
|
with open('intersection.pkl', 'rb') as f:
|
||||||
|
common = pickle.load(f)
|
||||||
|
|
||||||
|
for k in list(featuresDict.keys()):
|
||||||
|
if k not in common:
|
||||||
|
# pop out common feature
|
||||||
|
featuresDict.pop(k)
|
||||||
|
|
||||||
|
|
||||||
|
##################################################################
|
||||||
|
# featuresToAvoid = ['volt05', 'volt10', 'volt20', 'burstBandPowers','hFD']
|
||||||
|
featuresList = list(featuresDict.keys())
|
||||||
|
print(featuresList)
|
||||||
|
|
||||||
|
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||||
|
for key,value in featuresDict.items():
|
||||||
|
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
if np.isnan(featureMatrix).any():
|
||||||
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||||
|
|
||||||
|
featureMatrix = featureMatrix.astype('float64')
|
||||||
|
|
||||||
|
|
||||||
|
feature_channel_index = []
|
||||||
|
for feature in featuresList:
|
||||||
|
for i in range(featuresDict[feature].shape[0]):
|
||||||
|
if(i>=10):
|
||||||
|
feature_channel_index.append(feature + str(i))
|
||||||
|
else:
|
||||||
|
feature_channel_index.append(feature + '0' + str(i))
|
||||||
|
|
||||||
|
print(len(list(featuresDict.keys())))
|
||||||
|
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||||
|
|
||||||
|
X = pd.DataFrame(featureMatrix.T)
|
||||||
|
X = X.replace([np.inf, -np.inf], np.nan)
|
||||||
|
X = X.fillna(0)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
|
||||||
|
#Remove Variance = 0 features
|
||||||
|
constant_filter = VarianceThreshold(threshold=0)
|
||||||
|
constant_filter.fit(X)
|
||||||
|
constant_columns = [column for column in X.columns
|
||||||
|
if column not in
|
||||||
|
X.columns[constant_filter.get_support()]]
|
||||||
|
X = constant_filter.transform(X)
|
||||||
|
for column in constant_columns:
|
||||||
|
feature_channel_index.remove(column)
|
||||||
|
|
||||||
|
print(len(feature_channel_index),feature_channel_index )
|
||||||
|
|
||||||
|
X = pd.DataFrame(X)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
|
||||||
|
#################################################################
|
||||||
|
y = copy.deepcopy(Y_epoch[:,label]) #valence
|
||||||
|
print("y.shape: ", y.shape)
|
||||||
|
|
||||||
|
|
||||||
|
dfscores = None
|
||||||
|
|
||||||
|
if(method == 'RandomForest'):
|
||||||
|
'''Random Forest Feature Importances'''
|
||||||
|
estimator = sklearnrfi() #RandomForestRegressor()
|
||||||
|
fit = estimator.fit(X,y)
|
||||||
|
dfscores = pd.DataFrame(fit.feature_importances_)
|
||||||
|
elif(method == 'RFE'):
|
||||||
|
''' RFE'''
|
||||||
|
selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
|
||||||
|
selector = selector.fit(X, y)
|
||||||
|
dfscores = pd.DataFrame(selector.ranking_)
|
||||||
|
|
||||||
|
elif(method == 'SelectKBest'):
|
||||||
|
"""SelecKBest"""
|
||||||
|
#apply SelectKBest class to extract top 10 best features
|
||||||
|
func = None
|
||||||
|
if mutual_info == False:
|
||||||
|
func = f_classif
|
||||||
|
else:
|
||||||
|
func = mutual_info_classif
|
||||||
|
|
||||||
|
bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
|
||||||
|
fit = bestfeatures.fit(X,y)
|
||||||
|
|
||||||
|
dfscores = pd.DataFrame(fit.scores_)
|
||||||
|
|
||||||
|
|
||||||
|
dfcolumns = pd.DataFrame(X.columns)
|
||||||
|
|
||||||
|
#concat two dataframes for better visualization
|
||||||
|
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
|
||||||
|
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
|
||||||
|
features_result = featureScores.nlargest(X.shape[1],'Score')
|
||||||
|
print(features_result)
|
||||||
|
features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonFeatureFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###################################################################
|
||||||
|
topcolumns = features_result['Specs'].values
|
||||||
|
topfeatures = []
|
||||||
|
topelectrodes = []
|
||||||
|
|
||||||
|
for col in topcolumns:
|
||||||
|
feature = col[:-2]
|
||||||
|
electrode = int(col[-2:])
|
||||||
|
if(feature not in topfeatures):
|
||||||
|
topfeatures.append(feature)
|
||||||
|
|
||||||
|
if(electrode not in topelectrodes):
|
||||||
|
topelectrodes.append(electrode)
|
||||||
|
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
# TOP-N-FEATURE-RANKING
|
||||||
|
print(topfeatures)
|
||||||
|
print(topelectrodes)
|
||||||
|
N = len(topfeatures)
|
||||||
|
topNRmseList = []
|
||||||
|
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for n in range(1,N+1):
|
||||||
|
|
||||||
|
topnfeatures = topfeatures[:n]
|
||||||
|
|
||||||
|
# X-Values################################################
|
||||||
|
|
||||||
|
featureMatrix = np.empty((0,ans.shape[1]))
|
||||||
|
|
||||||
|
for feature in topnfeatures:
|
||||||
|
featureMatrix = np.append(featureMatrix, featuresDict[feature], axis=0)
|
||||||
|
|
||||||
|
featureMatrix = featureMatrix.astype('float64')
|
||||||
|
print(featureMatrix.T.shape)
|
||||||
|
|
||||||
|
feature_channel_index = []
|
||||||
|
for feature in topnfeatures:
|
||||||
|
i=0
|
||||||
|
for i in range(featuresDict[feature].shape[0]):
|
||||||
|
feature_channel_index.append(feature + str(i))
|
||||||
|
|
||||||
|
|
||||||
|
# Removing NaN Values
|
||||||
|
if np.isnan(featureMatrix).any():
|
||||||
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||||
|
|
||||||
|
|
||||||
|
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||||
|
|
||||||
|
X = pd.DataFrame(featureMatrix.T)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
X = X.replace([np.inf, -np.inf], np.nan)
|
||||||
|
X = X.fillna(0)
|
||||||
|
|
||||||
|
|
||||||
|
print("Features Ready for undergoing selection tests done ...\n")
|
||||||
|
|
||||||
|
X = X.astype(np.float32)
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Normalise-scale data
|
||||||
|
# Feature Scaling
|
||||||
|
if(scale == True):
|
||||||
|
sc = StandardScaler()
|
||||||
|
X_train = sc.fit_transform(X_train)
|
||||||
|
X_test = sc.transform(X_test)
|
||||||
|
|
||||||
|
clf.fit(X_train, y_train)
|
||||||
|
y_predict = clf.predict(X_test)
|
||||||
|
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||||
|
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||||
|
topNRmseList.append(rmse)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
topNFeatures_df = pd.DataFrame(topNList)
|
||||||
|
|
||||||
|
topNRmse_df = pd.DataFrame(topNRmseList)
|
||||||
|
|
||||||
|
#concat two dataframes for better visualization
|
||||||
|
topNFeaturesRanking = pd.concat([topNFeatures_df, topNRmse_df],axis=1)
|
||||||
|
topNFeaturesRanking.columns = ['Feature','RMSE'] #naming the dataframe columns
|
||||||
|
print(topNFeaturesRanking)
|
||||||
|
topNFeaturesRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonFeatureFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
|
||||||
|
|
||||||
|
# Plotting
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(25, 10)
|
||||||
|
plt.rcParams.update({'font.size': 30})
|
||||||
|
plt.xlabel('Top N Features')
|
||||||
|
plt.ylabel('RMSE')
|
||||||
|
plt.plot(topNFeaturesRanking.loc[:,"Feature"], topNFeaturesRanking.loc[:,"RMSE"])
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def topFSColumnsRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
|
||||||
|
# Method C
|
||||||
|
# parameters :-
|
||||||
|
# dataset - name of the dataset
|
||||||
|
# window - length of the sliding window in seconds
|
||||||
|
# stride - length of the stride of the sliding window in seconds
|
||||||
|
# sfreq - sampling frequency of the EEG data
|
||||||
|
# clf - name of the classifier to be used
|
||||||
|
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||||
|
# scale - sclaing of the EEG data if required
|
||||||
|
# mutual_info - Mutual ranking between features based on information theory
|
||||||
|
# method - 'RandomForest' 'RFE' 'SelectKBest'
|
||||||
|
|
||||||
|
# returns :-
|
||||||
|
# void
|
||||||
|
fs = sfreq
|
||||||
|
pwd = os.getcwd()
|
||||||
|
|
||||||
|
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||||
|
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||||
|
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||||
|
|
||||||
|
print("Number of segments are: {}".format(ans.shape[1]))
|
||||||
|
|
||||||
|
#X##############################################################################################
|
||||||
|
|
||||||
|
featuresDict = None
|
||||||
|
featuresDict = loadFeaturesDict(dataset)
|
||||||
|
|
||||||
|
common = []
|
||||||
|
with open('intersection.pkl', 'rb') as f:
|
||||||
|
common = pickle.load(f)
|
||||||
|
|
||||||
|
for k in list(featuresDict.keys()):
|
||||||
|
if k not in common:
|
||||||
|
# pop out common feature
|
||||||
|
featuresDict.pop(k)
|
||||||
|
|
||||||
|
print("Number of Features:",len(list(featuresDict.keys())))
|
||||||
|
featuresList = list(featuresDict.keys())
|
||||||
|
|
||||||
|
feature_channel_index = []
|
||||||
|
|
||||||
|
feature_channel_index = []
|
||||||
|
for feature in featuresList:
|
||||||
|
for i in range(featuresDict[feature].shape[0]):
|
||||||
|
if(i>=10):
|
||||||
|
feature_channel_index.append(feature +'_'+ str(i))
|
||||||
|
else:
|
||||||
|
feature_channel_index.append(feature + '_0' + str(i))
|
||||||
|
|
||||||
|
print(len(list(featuresDict.keys())))
|
||||||
|
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||||
|
|
||||||
|
#defining feature matrix
|
||||||
|
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||||
|
for key,value in featuresDict.items():
|
||||||
|
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
print("Shape of FeatureMatrix: {}\n".format(featureMatrix.T.shape))
|
||||||
|
|
||||||
|
#data-imputation and nan-removal
|
||||||
|
featureMatrix = featureMatrix.astype(np.float32)
|
||||||
|
|
||||||
|
if np.isnan(featureMatrix).any():
|
||||||
|
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||||
|
|
||||||
|
X = pd.DataFrame(featureMatrix.T)
|
||||||
|
X = X.replace([np.inf, -np.inf], np.nan)
|
||||||
|
X = X.fillna(0)
|
||||||
|
X.columns = feature_channel_index
|
||||||
|
|
||||||
|
|
||||||
|
#Y#####################################################################
|
||||||
|
|
||||||
|
y = Y_epoch[:,label] #valence
|
||||||
|
# y = pd.DataFrame(y)
|
||||||
|
|
||||||
|
########################################################################
|
||||||
|
dfscores = None
|
||||||
|
|
||||||
|
if(method == 'RandomForest'):
|
||||||
|
'''Random Forest Feature Importances'''
|
||||||
|
estimator = sklearnrfi() #RandomForestRegressor()
|
||||||
|
fit = estimator.fit(X,y)
|
||||||
|
dfscores = pd.DataFrame(fit.feature_importances_)
|
||||||
|
elif(method == 'RFE'):
|
||||||
|
''' RFE'''
|
||||||
|
selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
|
||||||
|
selector = selector.fit(X, y)
|
||||||
|
dfscores = pd.DataFrame(selector.ranking_)
|
||||||
|
|
||||||
|
elif(method == 'SelectKBest'):
|
||||||
|
"""SelecKBest"""
|
||||||
|
#apply SelectKBest class to extract top 10 best features
|
||||||
|
func = None
|
||||||
|
if mutual_info == False:
|
||||||
|
func = f_classif
|
||||||
|
else:
|
||||||
|
func = mutual_info_classif
|
||||||
|
|
||||||
|
bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
|
||||||
|
fit = bestfeatures.fit(X,y)
|
||||||
|
|
||||||
|
dfscores = pd.DataFrame(fit.scores_)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
dfcolumns = pd.DataFrame(X.columns)
|
||||||
|
|
||||||
|
#concat two dataframes for better visualization
|
||||||
|
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
|
||||||
|
featureScores.columns = ['Column','Score'] #naming the dataframe columns
|
||||||
|
features_result = featureScores.nlargest(X.shape[1],'Score')
|
||||||
|
print(features_result)
|
||||||
|
|
||||||
|
N = len(feature_channel_index)
|
||||||
|
topNRmseList = []
|
||||||
|
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||||
|
|
||||||
|
for n in range(1, N+1):
|
||||||
|
ranking_df = features_result.head(n)
|
||||||
|
topncols = ranking_df['Column'].tolist()
|
||||||
|
|
||||||
|
input_df = pd.DataFrame(X[topncols])
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(input_df, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
# Normalise-scale data
|
||||||
|
# Feature Scaling
|
||||||
|
if(scale == True):
|
||||||
|
sc = StandardScaler()
|
||||||
|
X_train = sc.fit_transform(X_train)
|
||||||
|
X_test = sc.transform(X_test)
|
||||||
|
|
||||||
|
# Apply classfier
|
||||||
|
clf.fit(X_train, y_train)
|
||||||
|
y_predict = clf.predict(X_test)
|
||||||
|
rmse = mean_squared_error(y_test, y_predict, squared=False)
|
||||||
|
print(n,rmse)
|
||||||
|
topNRmseList.append(rmse)
|
||||||
|
|
||||||
|
|
||||||
|
topcol_df = pd.DataFrame(topNList)
|
||||||
|
toprmse_df = pd.DataFrame(topNRmseList)
|
||||||
|
#concat two dataframes for better visualization
|
||||||
|
topcolRanking = pd.concat([topcol_df, toprmse_df],axis=1)
|
||||||
|
topcolRanking.columns = ['Column','RMSE'] #naming the dataframe columns
|
||||||
|
topfeatures_result = topcolRanking
|
||||||
|
print(topfeatures_result)
|
||||||
|
topfeatures_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topFSColumnsRegressionRanking"+method + str(window) + str(stride) + ".csv")
|
||||||
|
|
||||||
|
# Plotting
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(60, 9)
|
||||||
|
|
||||||
|
plt.xlabel('Top N Columns')
|
||||||
|
plt.ylabel('RMSE')
|
||||||
|
plt.title("Top N Columns v/s RMSE Plot for Window:{} Stride:{} epoched data by varying N".format(window,stride))
|
||||||
|
plt.plot(topfeatures_result.loc[:,"Column"], topfeatures_result.loc[:,"RMSE"])
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "topFSColumnsRegressionRanking"+method + str(window) + str(stride) + ".svg", bbox_inches='tight', dpi=500)
|
||||||
|
plt.show()
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,319 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().system('git clone -l -s https://github.com/sari-saba-sadiya/EEGExtract.git cloned-repo')
|
||||||
|
get_ipython().run_line_magic('cd', 'cloned-repo')
|
||||||
|
get_ipython().system('ls')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().system('pip install -r requirements.txt')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
from google.colab import drive
|
||||||
|
drive.mount('/gdrive',force_remount=True)
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().system('pip install pyinform')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().run_line_magic('cd', '../../gdrive/MyDrive/emotion_recognition_project')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
import EEGExtract as eeg
|
||||||
|
from scipy import io,signal
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn import preprocessing
|
||||||
|
import pandas as pd
|
||||||
|
import pickle
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
class load_data:
|
||||||
|
'''
|
||||||
|
Load the preprocessed data here, store the paramters
|
||||||
|
'''
|
||||||
|
def __init__(self,name):
|
||||||
|
self.name = name #name of dataset
|
||||||
|
self.X = None
|
||||||
|
self.Y = None
|
||||||
|
self.Z = None
|
||||||
|
self.freq = None #(in Hz) is same for all datasets
|
||||||
|
self.channels = None
|
||||||
|
self.ch_type = 'eeg'
|
||||||
|
self.eegData = None
|
||||||
|
self.use_autoreject = 'y'
|
||||||
|
self.no_of_subjects = None
|
||||||
|
def load_arrays(self):
|
||||||
|
if self.name == 'DREAMER':
|
||||||
|
array = np.load('original_data/DREAMER.npz')
|
||||||
|
self.freq = 128
|
||||||
|
self.no_of_subjects = 23
|
||||||
|
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||||
|
if self.name == 'DEAP':
|
||||||
|
array = np.load('original_data/DEAP.npz')
|
||||||
|
self.no_of_subjects = 32
|
||||||
|
self.freq = 128
|
||||||
|
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
|
||||||
|
self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
|
||||||
|
if self.name == 'OASIS':
|
||||||
|
#array = np.load('original_data/OASIS.npz')
|
||||||
|
self.no_of_subjects = 15
|
||||||
|
if self.use_autoreject == 'y':
|
||||||
|
with open('preprocessed_data/oasis/with_autoreject.p','rb') as file:
|
||||||
|
self.X = pickle.load(file)
|
||||||
|
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||||
|
self.freq = 128
|
||||||
|
self.X ,self.Y= merge_dictionary(self.X)
|
||||||
|
(a,b,c) = self.X.shape
|
||||||
|
self.X = np.reshape(self.X,(a,c,b))
|
||||||
|
else:
|
||||||
|
array = np.load('preprocessed_data/oasis/without_autoreject.npz')
|
||||||
|
self.freq = 128
|
||||||
|
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||||
|
self.X = array['X']
|
||||||
|
self.Y = array['Y']
|
||||||
|
(a,b,c) = self.X.shape
|
||||||
|
self.X = np.reshape(self.X,(a,c,b))
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.X = array['X']
|
||||||
|
if self.name == 'DEAP':
|
||||||
|
self.X = self.X[:,:,[1,3,2,4,7,11,13,31,29,25,21,19,20,17]] # To maintain uniformity across all datasets, only 14 electrodes selected
|
||||||
|
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||||
|
if self.name != 'OASIS':
|
||||||
|
self.Y = array['Y']
|
||||||
|
#self.Z = array['Z']
|
||||||
|
self.reshape_data()
|
||||||
|
def reshape_data(self):
|
||||||
|
'''
|
||||||
|
reshapes data in the format EEGExtract module expects i.e channels x timepoints x epochs
|
||||||
|
'''
|
||||||
|
|
||||||
|
(epochs,timepoints,channels) = self.X.shape
|
||||||
|
self.eegData = np.reshape(self.X,(channels,timepoints,epochs))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def merge_dictionary(dictionary):
|
||||||
|
'''
|
||||||
|
merge all trial data to form one array
|
||||||
|
'''
|
||||||
|
no_of_trials = len(list(dictionary.keys()))
|
||||||
|
no_of_channels = dictionary[1][0].shape[1]
|
||||||
|
length_of_segment = dictionary[1][0].shape[2]
|
||||||
|
no_of_epochs_per_trial = dictionary[1][0].shape[0]
|
||||||
|
X = np.empty((0,no_of_channels,length_of_segment))
|
||||||
|
Y = np.empty((0,2))
|
||||||
|
for trial,lst in dictionary.items():
|
||||||
|
array = dictionary[trial][0]
|
||||||
|
score = dictionary[trial][3]
|
||||||
|
X = np.append(X,array,axis = 0)
|
||||||
|
for epoch in range(no_of_epochs_per_trial):
|
||||||
|
Y = np.append(Y,np.expand_dims(score,axis =0),axis = 0)
|
||||||
|
|
||||||
|
return X,Y
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_diffrential_entropy_for_bands(eegData,freq):
|
||||||
|
# Function to calculate the differential entropy for the different bands of EEG data
|
||||||
|
|
||||||
|
# parameters :-
|
||||||
|
# eegData :- The differential EEG signal value
|
||||||
|
# freq :- sampling frequency of the EEG signal
|
||||||
|
# returns :-
|
||||||
|
# bandwise DE
|
||||||
|
#delta band
|
||||||
|
delta_band = eeg.filt_data(eegData,0.5,4,freq)
|
||||||
|
#theta band
|
||||||
|
theta_band = eeg.filt_data(eegData,4,8,freq)
|
||||||
|
#alpha bad
|
||||||
|
alpha_band = eeg.filt_data(eegData,8,12,freq)
|
||||||
|
#beta band
|
||||||
|
beta_band = eeg.filt_data(eegData,12,30,freq)
|
||||||
|
#gamma band
|
||||||
|
gamma_band = eeg.filt_data(eegData,30,63,freq)
|
||||||
|
|
||||||
|
|
||||||
|
diffrential_entropy_delta = 1/2*np.log(np.var(delta_band,axis = 1)*np.pi*np.e*2)
|
||||||
|
|
||||||
|
diffrential_entropy_theta = 1/2*np.log(np.var(theta_band,axis = 1)*np.pi*np.e*2)
|
||||||
|
|
||||||
|
diffrential_entropy_alpha = 1/2*np.log(np.var(alpha_band,axis = 1)*np.pi*np.e*2)
|
||||||
|
|
||||||
|
diffrential_entropy_beta = 1/2*np.log(np.var(beta_band,axis = 1)*np.pi*np.e*2)
|
||||||
|
|
||||||
|
diffrential_entropy_gamma = 1/2*np.log(np.var(gamma_band,axis = 1)*np.pi*np.e*2)
|
||||||
|
#print(diffrential_entropy_delta.shape,diffrential_entropy_gamma.shape,diffrential_entropy_theta.shape,diffrential_entropy_alpha.shape,diffrential_entropy_beta.shape)
|
||||||
|
return diffrential_entropy_delta,diffrential_entropy_theta,diffrential_entropy_alpha,diffrential_entropy_beta,diffrential_entropy_gamma
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
#['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||||
|
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13
|
||||||
|
def calculate_RASM_DASM(band):
|
||||||
|
RASM_AF3_AF4 = np.expand_dims(band[0,:]/band[13,:],axis = 0)
|
||||||
|
RASM_F3_F4 = np.expand_dims(band[2,:]/band[11,:],axis = 0)
|
||||||
|
RASM_F7_F8 = np.expand_dims(band[1,:]/band[12,:],axis = 0)
|
||||||
|
RASM_FC5_FC6 = np.expand_dims(band[3,:]/band[10,:],axis = 0)
|
||||||
|
RASM_O1_O2 = np.expand_dims(band[6,:]/band[7,:],axis = 0)
|
||||||
|
RASM_P7_P8 = np.expand_dims(band[5,:]/band[8,:],axis=0)
|
||||||
|
RASM_T7_T8 = np.expand_dims(band[4,:]/band[9,:],axis=0)
|
||||||
|
|
||||||
|
DASM_AF3_AF4 = np.expand_dims(band[0,:]-band[13,:],axis = 0)
|
||||||
|
DASM_F3_F4 = np.expand_dims(band[2,:]-band[11,:],axis = 0)
|
||||||
|
DASM_F7_F8 = np.expand_dims(band[1,:]-band[12,:],axis = 0)
|
||||||
|
DASM_FC5_FC6 = np.expand_dims(band[3,:]-band[10,:],axis = 0)
|
||||||
|
DASM_O1_O2 = np.expand_dims(band[6,:]-band[7,:],axis = 0)
|
||||||
|
DASM_P7_P8 = np.expand_dims(band[5,:]-band[8,:],axis=0)
|
||||||
|
DASM_T7_T8 = np.expand_dims(band[4,:]-band[9,:],axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
features = np.empty((0,RASM_AF3_AF4.shape[1]))
|
||||||
|
features = np.append(features,RASM_AF3_AF4,axis = 0)
|
||||||
|
features = np.append(features,RASM_F3_F4,axis = 0)
|
||||||
|
features = np.append(features,RASM_F7_F8,axis = 0)
|
||||||
|
features = np.append(features,RASM_FC5_FC6,axis = 0)
|
||||||
|
features = np.append(features,RASM_O1_O2,axis = 0)
|
||||||
|
features = np.append(features,RASM_P7_P8,axis = 0)
|
||||||
|
features = np.append(features,RASM_T7_T8,axis = 0)
|
||||||
|
|
||||||
|
features = np.append(features,DASM_AF3_AF4,axis = 0)
|
||||||
|
features = np.append(features,DASM_F3_F4,axis = 0)
|
||||||
|
features = np.append(features,DASM_F7_F8,axis = 0)
|
||||||
|
features = np.append(features,DASM_FC5_FC6,axis = 0)
|
||||||
|
features = np.append(features,DASM_O1_O2,axis = 0)
|
||||||
|
features = np.append(features,DASM_P7_P8,axis = 0)
|
||||||
|
features = np.append(features,DASM_T7_T8,axis = 0)
|
||||||
|
return features.T
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def epoch_data(X,Y, window, stride, sfreq):
|
||||||
|
|
||||||
|
(channels,timepoints,trials )= X.shape
|
||||||
|
X = np.reshape(X,(trials,channels,timepoints))
|
||||||
|
segment = int(window*sfreq)
|
||||||
|
step = int(stride*sfreq)
|
||||||
|
epochPerTrial = int((timepoints-segment)/step + 1)
|
||||||
|
count = 0
|
||||||
|
X_new = np.empty((trials*epochPerTrial,channels,segment))
|
||||||
|
Y_new = np.empty((trials*epochPerTrial,2))
|
||||||
|
for trial in range(trials):
|
||||||
|
for epoch in range(epochPerTrial):
|
||||||
|
X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
|
||||||
|
Y_new[count,:] = Y[trial,:2]
|
||||||
|
count+=1
|
||||||
|
(trials,channels,timepoints) = X_new.shape
|
||||||
|
X_new = np.reshape(X_new,(channels,timepoints,trials))
|
||||||
|
|
||||||
|
return X_new,Y_new
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def segregate_data_of_subjects(feature_matrix,dataset,sfreq = 128):
|
||||||
|
total_samples = feature_matrix.shape[0]
|
||||||
|
subject_indexes = {}
|
||||||
|
if dataset.name != 'DEAP AND DREAMER':
|
||||||
|
samples_per_subject = total_samples//dataset.no_of_subjects
|
||||||
|
print('samples per subject taken are ',samples_per_subject)
|
||||||
|
subject_indexes = {}
|
||||||
|
for i in range(dataset.no_of_subjects):
|
||||||
|
subject_name = 'subject_' + str(i+1)
|
||||||
|
subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:]
|
||||||
|
else:
|
||||||
|
a = feature_matrix[:80640,:]
|
||||||
|
b = feature_matrix[80640:,:]
|
||||||
|
print(b.shape)
|
||||||
|
for i in range(32):
|
||||||
|
samples_per_subject = 2520
|
||||||
|
subject_name = 'subject_' + str(i+1)
|
||||||
|
subject_indexes[subject_name] = a[samples_per_subject*i:samples_per_subject*(i+1),:]
|
||||||
|
for i in range(0,23):
|
||||||
|
samples_per_subject = 8190
|
||||||
|
subject_name = 'subject_' + str(i+1+32)
|
||||||
|
subject_indexes[subject_name] = b[samples_per_subject*i:samples_per_subject*(i+1),:]
|
||||||
|
|
||||||
|
return subject_indexes
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def driver_code():
|
||||||
|
dataset = load_data('DREAMER')
|
||||||
|
dataset.load_arrays()
|
||||||
|
X = dataset.eegData
|
||||||
|
Y = dataset.Y
|
||||||
|
window = 1
|
||||||
|
stride = 1
|
||||||
|
|
||||||
|
X,Y = epoch_data(X,Y,window,stride,128)
|
||||||
|
print('shape after epoching')
|
||||||
|
print('X:',X.shape)
|
||||||
|
print('Y:',Y.shape)
|
||||||
|
print('')
|
||||||
|
print('')
|
||||||
|
delta,theta,alpha,beta,gamma = calculate_diffrential_entropy_for_bands(X,dataset.freq)
|
||||||
|
bands = {'delta':delta,'theta':theta,'alpha':alpha,'beta':beta,'gamma':gamma}
|
||||||
|
for name,band in bands.items():
|
||||||
|
feature_matrix = calculate_RASM_DASM(band) #extracted RASM ,DASM features for each eng band
|
||||||
|
print(name ,':' ,end = '')
|
||||||
|
print(feature_matrix.shape)
|
||||||
|
print(feature_matrix)
|
||||||
|
np.savez('features/'+dataset.name.lower()+'_RASM_DASM/'+name+'_'+str(window)+'_'+str(stride),features = feature_matrix,Y=Y)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
driver_code()
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
np.load('features/oasis/without_autoreject/shannonEntropy_1_1.npz')['features']
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,467 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""feature_extraction_25GB_RAM.ipynb
|
||||||
|
|
||||||
|
Automatically generated by Colaboratory.
|
||||||
|
|
||||||
|
Original file is located at
|
||||||
|
https://colab.research.google.com/drive/1QnVj7GyyJhLPrYF4vBTppMwynXqmOTEJ
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Commented out IPython magic to ensure Python compatibility.
|
||||||
|
|
||||||
|
|
||||||
|
import EEGExtract as eeg
|
||||||
|
from scipy import io,signal
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn import preprocessing
|
||||||
|
import pandas as pd
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
|
||||||
|
class load_data:
|
||||||
|
'''
|
||||||
|
Load the preprocessed data here, store the paramters
|
||||||
|
'''
|
||||||
|
def __init__(self,name):
|
||||||
|
self.name = name #name of dataset
|
||||||
|
self.X = None
|
||||||
|
self.Y = None
|
||||||
|
self.Z = None
|
||||||
|
self.freq = None #(in Hz) is same for all datasets
|
||||||
|
self.channels = None
|
||||||
|
self.ch_type = 'eeg'
|
||||||
|
self.eegData = None
|
||||||
|
self.use_autoreject = 'n'
|
||||||
|
def load_arrays(self):
|
||||||
|
if self.name == 'DREAMER':
|
||||||
|
array = np.load('original_data/DREAMER.npz')
|
||||||
|
self.freq = 128
|
||||||
|
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||||
|
if self.name == 'DEAP':
|
||||||
|
array = np.load('original_data/DEAP.npz')
|
||||||
|
self.freq = 128
|
||||||
|
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
|
||||||
|
self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
|
||||||
|
if self.name == 'OASIS':
|
||||||
|
#array = np.load('original_data/OASIS.npz')
|
||||||
|
if self.use_autoreject == 'y':
|
||||||
|
with open('preprocessed_data/oasis/with_autoreject.p','rb') as file:
|
||||||
|
self.X = pickle.load(file)
|
||||||
|
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||||
|
self.freq = 128
|
||||||
|
self.X ,self.Y= merge_dictionary(self.X)
|
||||||
|
(a,b,c) = self.X.shape
|
||||||
|
self.X = np.reshape(self.X,(a,c,b))
|
||||||
|
else:
|
||||||
|
array = np.load('preprocessed_data/oasis/without_autoreject.npz')
|
||||||
|
self.freq = 128
|
||||||
|
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||||
|
self.X = array['X']
|
||||||
|
self.Y = array['Y']
|
||||||
|
(a,b,c) = self.X.shape
|
||||||
|
self.X = np.reshape(self.X,(a,c,b))
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.X = array['X']
|
||||||
|
if self.name == 'DEAP':
|
||||||
|
self.X = self.X[:,:,[1,3,2,4,7,11,13,31,29,25,21,19,20,17]] # To maintain uniformity across all datasets, only 14 electrodes selected
|
||||||
|
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||||
|
if self.name != 'OASIS':
|
||||||
|
self.Y = array['Y']
|
||||||
|
#self.Z = array['Z']
|
||||||
|
self.reshape_data()
|
||||||
|
def reshape_data(self):
|
||||||
|
'''
|
||||||
|
reshapes data in the format EEGExtract module expects i.e channels x timepoints x epochs
|
||||||
|
'''
|
||||||
|
|
||||||
|
(epochs,timepoints,channels) = self.X.shape
|
||||||
|
self.eegData = np.reshape(self.X,(channels,timepoints,epochs))
|
||||||
|
|
||||||
|
class features:
|
||||||
|
############################ Complexity Features #############################
|
||||||
|
#1>
|
||||||
|
@staticmethod
|
||||||
|
def ShannonRes(eegData,**args):
|
||||||
|
#Shannon Entropy
|
||||||
|
ShannonRes = eeg.shannonEntropy(eegData, bin_min=-200, bin_max=200, binWidth=2)
|
||||||
|
return ShannonRes
|
||||||
|
#2>
|
||||||
|
@staticmethod
|
||||||
|
def ShannonRes_sub_band_delta(eegData,fs):
|
||||||
|
# Subband Information Quantity
|
||||||
|
# delta (0.5–4 Hz)
|
||||||
|
eegData_delta = eeg.filt_data(eegData, 0.5, 4, fs)
|
||||||
|
ShannonRes_delta = eeg.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2)
|
||||||
|
return ShannonRes_delta
|
||||||
|
#3>
|
||||||
|
@staticmethod
|
||||||
|
def ShannonRes_sub_band_theta(eegData,fs):
|
||||||
|
# theta (4–8 Hz)
|
||||||
|
eegData_theta = eeg.filt_data(eegData, 4, 8, fs)
|
||||||
|
ShannonRes_theta = eeg.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2)
|
||||||
|
return ShannonRes_theta
|
||||||
|
|
||||||
|
#4>
|
||||||
|
@staticmethod
|
||||||
|
def ShannonRes_sub_band_alpha(eegData,fs):
|
||||||
|
# alpha (8–12 Hz)
|
||||||
|
eegData_alpha = eeg.filt_data(eegData, 8, 12, fs)
|
||||||
|
ShannonRes_alpha = eeg.shannonEntropy(eegData_alpha, bin_min=-200, bin_max=200, binWidth=2)
|
||||||
|
return ShannonRes_alpha
|
||||||
|
|
||||||
|
#5>
|
||||||
|
@staticmethod
|
||||||
|
def ShannonRes_sub_band_beta(eegData,fs):
|
||||||
|
# beta (12–30 Hz)
|
||||||
|
eegData_beta = eeg.filt_data(eegData, 12, 30, fs)
|
||||||
|
ShannonRes_beta = eeg.shannonEntropy(eegData_beta, bin_min=-200, bin_max=200, binWidth=2)
|
||||||
|
return ShannonRes_beta
|
||||||
|
|
||||||
|
#6>
|
||||||
|
@staticmethod
|
||||||
|
def ShannonRes_sub_band_gamma(eegData,fs):
|
||||||
|
# gamma (30–100 Hz)
|
||||||
|
eegData_gamma = eeg.filt_data(eegData, 30, 63, fs)
|
||||||
|
ShannonRes_gamma = eeg.shannonEntropy(eegData_gamma, bin_min=-200, bin_max=200, binWidth=2)
|
||||||
|
return ShannonRes_gamma
|
||||||
|
|
||||||
|
|
||||||
|
#7>
|
||||||
|
@staticmethod
|
||||||
|
def Hojorth_Mobility(eegData,**args):
|
||||||
|
# Hjorth Mobility
|
||||||
|
# Hjorth Complexity
|
||||||
|
HjorthMob, HjorthComp = eeg.hjorthParameters(eegData)
|
||||||
|
return HjorthMob
|
||||||
|
#8>
|
||||||
|
@staticmethod
|
||||||
|
def Hojorth_Complexity(eegData,**args):
|
||||||
|
# Hjorth Mobility
|
||||||
|
# Hjorth Complexity
|
||||||
|
HjorthMob, HjorthComp = eeg.hjorthParameters(eegData)
|
||||||
|
return HjorthComp
|
||||||
|
#9>
|
||||||
|
@staticmethod
|
||||||
|
def False_Nearest_Neighbour(eegData,**args):
|
||||||
|
# False Nearest Neighbor
|
||||||
|
FalseNnRes = eeg.falseNearestNeighbor(eegData)
|
||||||
|
return FalseNnRes
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
########################Category Features#####################################
|
||||||
|
#10>
|
||||||
|
@staticmethod
|
||||||
|
def median_frequency(eegData,fs):
|
||||||
|
#fs-sampling frequency
|
||||||
|
# Median Frequency
|
||||||
|
medianFreqRes = eeg.medianFreq(eegData,fs)
|
||||||
|
return medianFreqRes
|
||||||
|
|
||||||
|
#11>
|
||||||
|
@staticmethod
|
||||||
|
def band_power_delta(eegData,fs):
|
||||||
|
#fs - sampling frequency
|
||||||
|
# δ band Power
|
||||||
|
bandPwr_delta = eeg.bandPower(eegData, 0.5, 4, fs)
|
||||||
|
return bandPwr_delta
|
||||||
|
#12>
|
||||||
|
@staticmethod
|
||||||
|
def band_power_theta(eegData,fs):
|
||||||
|
#fs - sampling frequency
|
||||||
|
# θ band Power
|
||||||
|
bandPwr_theta = eeg.bandPower(eegData, 4, 8, fs)
|
||||||
|
return bandPwr_theta
|
||||||
|
|
||||||
|
#13>
|
||||||
|
@staticmethod
|
||||||
|
def band_power_alpha(eegData,fs):
|
||||||
|
#fs - sampling frequency
|
||||||
|
# α band Power
|
||||||
|
bandPwr_alpha = eeg.bandPower(eegData, 8, 12, fs)
|
||||||
|
return bandPwr_alpha
|
||||||
|
|
||||||
|
#14>
|
||||||
|
@staticmethod
|
||||||
|
def band_power_beta(eegData,fs):
|
||||||
|
#fs - sampling frequency
|
||||||
|
# β band Power
|
||||||
|
bandPwr_beta = eeg.bandPower(eegData, 12, 30, fs)
|
||||||
|
return bandPwr_beta
|
||||||
|
|
||||||
|
#15>
|
||||||
|
@staticmethod
|
||||||
|
def band_power_gamma(eegData,fs):
|
||||||
|
#fs - sampling frequency
|
||||||
|
# γ band Power
|
||||||
|
bandPwr_gamma = eeg.bandPower(eegData, 30, 63, fs)
|
||||||
|
return bandPwr_gamma
|
||||||
|
|
||||||
|
#16>
|
||||||
|
@staticmethod
|
||||||
|
def standard_deviation(eegData,**args):
|
||||||
|
# Standard Deviation
|
||||||
|
std_res = eeg.eegStd(eegData)
|
||||||
|
return std_res
|
||||||
|
|
||||||
|
#17>
|
||||||
|
@staticmethod
|
||||||
|
def regularity(eegData,fs):
|
||||||
|
# Regularity (burst-suppression)
|
||||||
|
regularity_res = eeg.eegRegularity(eegData,fs)
|
||||||
|
return regularity_res
|
||||||
|
|
||||||
|
|
||||||
|
#18>
|
||||||
|
@staticmethod
|
||||||
|
def Diffuse_slowing(eegData,**args):
|
||||||
|
# Diffuse Slowing
|
||||||
|
df_res = eeg.diffuseSlowing(eegData)
|
||||||
|
return df_res
|
||||||
|
|
||||||
|
#19>
|
||||||
|
@staticmethod
|
||||||
|
def Spikes(eegData,fs,**args):
|
||||||
|
# Spikes
|
||||||
|
minNumSamples = int(70*fs/1000)
|
||||||
|
spikeNum_res = eeg.spikeNum(eegData,minNumSamples)
|
||||||
|
return spikeNum_res
|
||||||
|
|
||||||
|
#20>
|
||||||
|
@staticmethod
|
||||||
|
def delta_burst_after_spike(eegData,fs):
|
||||||
|
# Delta burst after Spike
|
||||||
|
eegData_delta = eeg.filt_data(eegData, 0.5, 4, fs)
|
||||||
|
deltaBurst_res = eeg.burstAfterSpike(eegData,eegData_delta,minNumSamples=7,stdAway = 3)
|
||||||
|
return deltaBurst_res
|
||||||
|
|
||||||
|
#21>
|
||||||
|
@staticmethod
|
||||||
|
def Sharp_spike(eegData,fs):
|
||||||
|
minNumSamples = int(70*fs/1000)
|
||||||
|
# Sharp spike
|
||||||
|
sharpSpike_res = eeg.shortSpikeNum(eegData,minNumSamples)
|
||||||
|
return sharpSpike_res
|
||||||
|
|
||||||
|
#22>
|
||||||
|
@staticmethod
|
||||||
|
def Number_of_Burst(eegData,fs):
|
||||||
|
# Number of Bursts
|
||||||
|
numBursts_res = eeg.numBursts(eegData,fs)
|
||||||
|
return numBursts_res
|
||||||
|
|
||||||
|
#23>
|
||||||
|
@staticmethod
|
||||||
|
def Burst_length_u_and_sigma_mean(eegData,fs):
|
||||||
|
# Burst length μ and σ
|
||||||
|
burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(eegData,fs)
|
||||||
|
return burstLenMean_res
|
||||||
|
|
||||||
|
#24>
|
||||||
|
@staticmethod
|
||||||
|
def Burst_length_u_and_sigma_std(eegData,fs):
|
||||||
|
burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(eegData,fs)
|
||||||
|
return burstLenStd_res
|
||||||
|
|
||||||
|
|
||||||
|
#25>
|
||||||
|
@staticmethod
|
||||||
|
def no_of_suprression(eegData,fs):
|
||||||
|
# Number of Suppressions
|
||||||
|
numSupps_res = eeg.numSuppressions(eegData,fs)
|
||||||
|
return numSupps_res
|
||||||
|
|
||||||
|
#26>
|
||||||
|
@staticmethod
|
||||||
|
def Suppression_length_u_and_sigma_mean(eegData,fs):
|
||||||
|
# Suppression length μ and σ
|
||||||
|
suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(eegData,fs)
|
||||||
|
return suppLenMean_res
|
||||||
|
|
||||||
|
#27>
|
||||||
|
@staticmethod
|
||||||
|
def Suppression_length_u_and_sigma_std(eegData,fs):
|
||||||
|
# Suppression length μ and σ
|
||||||
|
suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(eegData,fs)
|
||||||
|
return suppLenStd_res
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
def merge_dictionary(dictionary):
|
||||||
|
'''
|
||||||
|
merge all trial data to form one array
|
||||||
|
'''
|
||||||
|
no_of_trials = len(list(dictionary.keys()))
|
||||||
|
no_of_channels = dictionary[1][0].shape[1]
|
||||||
|
length_of_segment = dictionary[1][0].shape[2]
|
||||||
|
no_of_epochs_per_trial = dictionary[1][0].shape[0]
|
||||||
|
X = np.empty((0,no_of_channels,length_of_segment))
|
||||||
|
Y = np.empty((0,2))
|
||||||
|
for trial,lst in dictionary.items():
|
||||||
|
array = dictionary[trial][0]
|
||||||
|
score = dictionary[trial][3]
|
||||||
|
X = np.append(X,array,axis = 0)
|
||||||
|
for epoch in range(no_of_epochs_per_trial):
|
||||||
|
Y = np.append(Y,np.expand_dims(score,axis =0),axis = 0)
|
||||||
|
|
||||||
|
return X,Y
|
||||||
|
|
||||||
|
def epoch_data(X,Y, window, stride, sfreq):
|
||||||
|
|
||||||
|
(channels,timepoints,trials )= X.shape
|
||||||
|
X = np.reshape(X,(trials,channels,timepoints))
|
||||||
|
segment = int(window*sfreq)
|
||||||
|
step = int(stride*sfreq)
|
||||||
|
epochPerTrial = int((timepoints-segment)/step + 1)
|
||||||
|
count = 0
|
||||||
|
X_new = np.empty((trials*epochPerTrial,channels,segment))
|
||||||
|
Y_new = np.empty((trials*epochPerTrial,2))
|
||||||
|
for trial in range(trials):
|
||||||
|
for epoch in range(epochPerTrial):
|
||||||
|
X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
|
||||||
|
Y_new[count,:] = Y[trial,:2]
|
||||||
|
count+=1
|
||||||
|
(trials,channels,timepoints) = X_new.shape
|
||||||
|
X_new = np.reshape(X_new,(channels,timepoints,trials))
|
||||||
|
|
||||||
|
return X_new,Y_new
|
||||||
|
|
||||||
|
def driver_code():
|
||||||
|
dataset_dictionary = {0:'DEAP',1:'OASIS',2:'DREAMER'}
|
||||||
|
print(dataset_dictionary)
|
||||||
|
print('enter number for loading dataset')
|
||||||
|
mapping = int(input())
|
||||||
|
print('plz wait loading dataset preprocessed arrays')
|
||||||
|
dataset = load_data(dataset_dictionary[mapping])
|
||||||
|
if mapping == 1:
|
||||||
|
print('do you want to use with autoreject data? if yes press y')
|
||||||
|
boolean = input()
|
||||||
|
if boolean == 'y':
|
||||||
|
dataset.use_autoreject = 'y'
|
||||||
|
|
||||||
|
dataset.load_arrays()
|
||||||
|
print('loading complete')
|
||||||
|
print('shape of data we will use to make features:',dataset.eegData.shape)
|
||||||
|
print('do you want to segment the data before calculating feature values? y/n')
|
||||||
|
boolean = input()
|
||||||
|
if boolean == 'y':
|
||||||
|
window = float(input('enter window size'))
|
||||||
|
stride = float(input('enter stride size'))
|
||||||
|
dataset.eegData,dataset.Y = epoch_data(dataset.eegData,dataset.Y,window,stride,dataset.freq)
|
||||||
|
print('new shapes of X and Y:',dataset.eegData.shape,' ',dataset.Y.shape)
|
||||||
|
else:
|
||||||
|
window = 0
|
||||||
|
stride = 0
|
||||||
|
print('features available')
|
||||||
|
featuresDict = {0:'shannonEntropy',
|
||||||
|
1:'ShannonRes_sub_bands_alpha',
|
||||||
|
2:'ShannonRes_sub_bands_beta',
|
||||||
|
3:'ShannonRes_sub_bands_delta',
|
||||||
|
4:'ShannonRes_sub_bands_theta',
|
||||||
|
5:'ShannonRes_sub_bands_gamma',
|
||||||
|
6:'Hjorth_mobilty',
|
||||||
|
7:'Hjorth_complexity',
|
||||||
|
8:'falseNearestNeighbor',
|
||||||
|
9:'medianFreq',
|
||||||
|
10:'bandPwr_alpha',
|
||||||
|
11:'bandPwr_beta',
|
||||||
|
12:'bandPwr_gamma',
|
||||||
|
13:'bandPwr_theta',
|
||||||
|
14:'bandPwr_delta',
|
||||||
|
15:'stdDev',
|
||||||
|
16:'diffuseSlowing',
|
||||||
|
17:'spikeNum',
|
||||||
|
18:'deltaBurstAfterSpike',
|
||||||
|
19:'shortSpikeNum',
|
||||||
|
20:'Sharp spike',
|
||||||
|
21:'numBursts',
|
||||||
|
22:'burstLen_u_and_sigma_mean',
|
||||||
|
23:'burstLen_u_and_sigma_std',
|
||||||
|
24:'numSuppressions',
|
||||||
|
25:'suppressionLen_u_and_sigma_mean',
|
||||||
|
26:'suppressionLen_u_and_sigma_std',
|
||||||
|
}
|
||||||
|
featureMethod={0:features.ShannonRes,
|
||||||
|
1:features.ShannonRes_sub_band_alpha,
|
||||||
|
2:features.ShannonRes_sub_band_beta,
|
||||||
|
3:features.ShannonRes_sub_band_delta,
|
||||||
|
4:features.ShannonRes_sub_band_theta,
|
||||||
|
5:features.ShannonRes_sub_band_gamma,
|
||||||
|
6:features.Hojorth_Mobility,
|
||||||
|
7:features.Hojorth_Complexity,
|
||||||
|
8:features.False_Nearest_Neighbour,
|
||||||
|
9:features.median_frequency,
|
||||||
|
10:features.band_power_alpha,
|
||||||
|
11:features.band_power_beta,
|
||||||
|
12:features.band_power_gamma,
|
||||||
|
13:features.band_power_theta,
|
||||||
|
14:features.band_power_delta,
|
||||||
|
15:features.standard_deviation,
|
||||||
|
16:features.regularity,
|
||||||
|
17:features.Diffuse_slowing,
|
||||||
|
18:features.Spikes,
|
||||||
|
19:features.delta_burst_after_spike,
|
||||||
|
20:features.Sharp_spike,
|
||||||
|
21:features.Number_of_Burst,
|
||||||
|
22:features.Burst_length_u_and_sigma_mean,
|
||||||
|
23:features.Burst_length_u_and_sigma_std,
|
||||||
|
24:features.no_of_suprression,
|
||||||
|
25:features.Suppression_length_u_and_sigma_mean,
|
||||||
|
26:features.Suppression_length_u_and_sigma_std,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
print(featuresDict)
|
||||||
|
|
||||||
|
#define path for saving before hand in np.savez line below
|
||||||
|
path = 'features/'
|
||||||
|
#os.mkdir('features/'+window+'_'+stride)
|
||||||
|
if dataset.name == 'DEAP':
|
||||||
|
path = path +'deap/'
|
||||||
|
elif dataset.name == 'DREAMER':
|
||||||
|
path = path + 'dreamer/'
|
||||||
|
else:
|
||||||
|
if dataset.use_autoreject == 'y':
|
||||||
|
path = path +'oasis/with_autoreject/'
|
||||||
|
else:
|
||||||
|
path = path +'oasis/without_autoreject/'
|
||||||
|
boolean = input('do you want to individually make features? y/n')
|
||||||
|
if boolean =='n':
|
||||||
|
for key in featureMethod.keys():
|
||||||
|
feature_matrix = featureMethod[key](eegData = dataset.eegData,fs=dataset.freq)
|
||||||
|
filename = featuresDict[key]
|
||||||
|
print('saving ---',filename)
|
||||||
|
np.savez(path+filename+'_'+str(int(window))+'_'+str(int(stride)),features = feature_matrix , Y = dataset.Y)
|
||||||
|
else:
|
||||||
|
found_features = False
|
||||||
|
while not found_features:
|
||||||
|
print('enter feature no')
|
||||||
|
key = int(input())
|
||||||
|
feature_matrix = featureMethod[key](eegData = dataset.eegData,fs=dataset.freq)
|
||||||
|
filename = featuresDict[key]
|
||||||
|
print('saving ---',filename)
|
||||||
|
np.savez(path+filename+'_'+str(int(window))+'_'+str(int(stride)),features = feature_matrix , Y = dataset.Y)
|
||||||
|
boolean = input('do you want to find more features? y/n ')
|
||||||
|
if boolean =='n':
|
||||||
|
found_features = True
|
||||||
|
|
||||||
|
|
||||||
|
print('feature extraction done!!!!')
|
||||||
|
|
||||||
|
def __main__():
|
||||||
|
driver_code()
|
||||||
|
|
||||||
|
__main__()
|
||||||
|
|
||||||
|
if __name__ == 'main':
|
||||||
|
driver_code()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# Script to get the feature ranking and electrode ranking through
|
||||||
|
# Method A :- Random Forest Regressor
|
||||||
|
# Method B :- F score based Ranking
|
||||||
|
# Method C :- Random Forest Importances approach
|
||||||
|
# Main function
|
||||||
|
|
||||||
|
from ImportUtils import *
|
||||||
|
from TopNByFSMethods import *
|
||||||
|
from TopNByClassifier import *
|
||||||
|
from args_eeg import args as my_args
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
# args object to fetch command line inputs
|
||||||
|
args = my_args()
|
||||||
|
print(args.__dict__)
|
||||||
|
pwd = os.getcwd()
|
||||||
|
|
||||||
|
dataset = args.dataset
|
||||||
|
window = args.window
|
||||||
|
stride = args.stride
|
||||||
|
sfreq = args.sfreq
|
||||||
|
model = args.model
|
||||||
|
label = args.label
|
||||||
|
approach = args.approach #byclassifier or byfs
|
||||||
|
ml_algo = args.ml_algo #classification or regression
|
||||||
|
top = args.top #e or f or ef
|
||||||
|
fs_method = args.fs_method
|
||||||
|
|
||||||
|
#feature extraction
|
||||||
|
getEpochedFeatures(dataset, window, stride, sfreq, label)
|
||||||
|
if(top == "e"):
|
||||||
|
clf = RandomForestRegressor()
|
||||||
|
topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
|
||||||
|
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
||||||
|
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
|
||||||
|
plt.legend(["Method A","Method B", "Method C"])
|
||||||
|
|
||||||
|
if(label == 1):
|
||||||
|
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||||
|
plt.show()
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
|
else:
|
||||||
|
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||||
|
plt.show()
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
|
elif(top == "f"):
|
||||||
|
clf = RandomForestRegressor()
|
||||||
|
topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
|
||||||
|
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
||||||
|
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
|
||||||
|
if(label == 1):
|
||||||
|
plt.legend(["Method A","Method B", "Method C"])
|
||||||
|
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||||
|
plt.show()
|
||||||
|
plt.clf()
|
||||||
|
else:
|
||||||
|
plt.legend(["Method A","Method B", "Method C"])
|
||||||
|
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||||
|
plt.show()
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
@@ -0,0 +1,264 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import pandas as pd
|
||||||
|
import sys
|
||||||
|
from sklearn.preprocessing import MinMaxScaler,StandardScaler
|
||||||
|
from sklearn.utils import shuffle
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
scaler_min_max = MinMaxScaler()
|
||||||
|
scaler_standard = StandardScaler()
|
||||||
|
|
||||||
|
# Either one of the MinMaxScaling or StandardScaling function can be used
|
||||||
|
|
||||||
|
def MinMaxScaling(feature_matrix):
|
||||||
|
global scaler_min_max
|
||||||
|
scaler_min_max.fit(feature_matrix)
|
||||||
|
return scaler_min_max.transform(feature_matrix)
|
||||||
|
|
||||||
|
def StandardScaling(feature_matrix):
|
||||||
|
global scaler_standard
|
||||||
|
scaler_standard.fit(feature_matrix)
|
||||||
|
print('scaling shape',scaler_standard.mean_.shape)
|
||||||
|
return scaler.transform(feature_matrix)
|
||||||
|
|
||||||
|
architecture = 'sklearn'
|
||||||
|
if architecture == 'sklearn':
|
||||||
|
from sklearn.svm import SVR
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
else:
|
||||||
|
from cuml.svm import SVR
|
||||||
|
from cuml.metrics import accuracy_score
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# """##DEAP dataset
|
||||||
|
# 1> Valence - features selected
|
||||||
|
# >
|
||||||
|
# * bandPwr_gamma
|
||||||
|
# * bandPwr_beta
|
||||||
|
# * ShannonRes_gamma
|
||||||
|
# * ShannonRes_beta
|
||||||
|
# * rasm_gamma
|
||||||
|
# * dasm_gamma
|
||||||
|
#
|
||||||
|
# 2> Arousal - feature selected
|
||||||
|
# >
|
||||||
|
# * HjorthMob
|
||||||
|
# * HjorthComp
|
||||||
|
# * stdDev
|
||||||
|
# * bandPwr_theta
|
||||||
|
# * bandPwr_beta
|
||||||
|
# * ShannonRes_beta
|
||||||
|
# * ShannonRes_gamma
|
||||||
|
# * dasm_beta
|
||||||
|
# """
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# now for incremental learning we need to segregate data of subjects
|
||||||
|
def segregate_data_of_subjects(feature_matrix,total_subjects,sfreq = 128):
|
||||||
|
'''
|
||||||
|
reuturs a dictionary which contains the samples data only corresponding to particular subjects of feature matrix
|
||||||
|
'''
|
||||||
|
# parameters :-
|
||||||
|
# feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation
|
||||||
|
# total_subjects :- Total number of subjects in the study
|
||||||
|
# sfreq :- sampling frequency of the EEG data
|
||||||
|
# returns :-
|
||||||
|
# subject_indexes :- Subject wise features in a dictionary form
|
||||||
|
|
||||||
|
total_samples = feature_matrix.shape[0]
|
||||||
|
subject_indexes = {}
|
||||||
|
samples_per_subject = total_samples//total_subjects
|
||||||
|
for i in range(total_subjects):
|
||||||
|
subject_name = 'subject_' + str(i+1)
|
||||||
|
subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:]
|
||||||
|
|
||||||
|
return subject_indexes
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# now defining a function which carries out the incremenatal learning algo
|
||||||
|
def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject):
|
||||||
|
# parameters :-
|
||||||
|
# model :- The training model to be used (SVR in this case)
|
||||||
|
# featrue_matrix :- feature matrix obtained in the above function
|
||||||
|
# Y :- The Valence and Arousal values as entered by the subjects
|
||||||
|
# subject_indexes :-Subject wise features in a dictionary form
|
||||||
|
# number_of_subjects :- Total number of subjects in the study
|
||||||
|
# total_subjects :- Total number of subjects in the study
|
||||||
|
# rmse_score :- RMSE of the previous iterations
|
||||||
|
# test_subject :- Cross validation test subject list
|
||||||
|
|
||||||
|
# returns :-
|
||||||
|
# rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration
|
||||||
|
# test_subject :- Updated Cross validation test subject list
|
||||||
|
no_of_features = feature_matrix.shape[1]
|
||||||
|
X = np.empty((0,no_of_features))
|
||||||
|
print('training on subject_no:',end = ' ')
|
||||||
|
|
||||||
|
#create a feature matrix containing data upto subjects given by the number number_of_subjects
|
||||||
|
#for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model
|
||||||
|
|
||||||
|
for subject in range(number_of_subjects):
|
||||||
|
print(subject+1,end = ' ')
|
||||||
|
subject_name = 'subject_'+str(subject+1)
|
||||||
|
subject_data = subject_indexes[subject_name]
|
||||||
|
X = np.append(X,subject_data,axis=0)
|
||||||
|
print(' ')
|
||||||
|
|
||||||
|
#apply a MinMax scaling to the current iteration feature matrix
|
||||||
|
X = MinMaxScaling(X)
|
||||||
|
|
||||||
|
#now we also need to extract the valence arousal data for the corresponding subject
|
||||||
|
y = np.empty((0))
|
||||||
|
total_samples = feature_matrix.shape[0]
|
||||||
|
samples_per_subject = total_samples//total_subjects
|
||||||
|
for subject in range(number_of_subjects):
|
||||||
|
y = Y[:samples_per_subject*(number_of_subjects)]
|
||||||
|
|
||||||
|
print('shape of X is :',X.shape)
|
||||||
|
print('shape of y is :',y.shape)
|
||||||
|
|
||||||
|
#shuffling data randomly to feed to model
|
||||||
|
X,y = shuffle(X,y,random_state = 0)
|
||||||
|
|
||||||
|
#doing a train test split of 80:20
|
||||||
|
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2)
|
||||||
|
|
||||||
|
#training_model
|
||||||
|
model = model.fit(X_train,y_train)
|
||||||
|
|
||||||
|
#testing_model
|
||||||
|
y_predict = model.predict(X_test)
|
||||||
|
|
||||||
|
|
||||||
|
#calculating rmse values for valence and arousal using model fitted for current iteration
|
||||||
|
y_rms = np.sqrt(mean_squared_error(y_test,y_predict))
|
||||||
|
print('rms on y :',y_rms)
|
||||||
|
print('')
|
||||||
|
rmse_score.append(y_rms)
|
||||||
|
test_subject.append(subject_name)
|
||||||
|
|
||||||
|
return rmse_score,test_subject
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def driver_code(save):
|
||||||
|
|
||||||
|
# Function to load the features, then train the regressor and will give the validation and test plot
|
||||||
|
|
||||||
|
#extracting file data corresponding to valence features
|
||||||
|
bandPwr_gamma_v = np.load('features/deap/bandPwr_gamma_1_1.npz')
|
||||||
|
bandPwr_beta_v = np.load('features/deap/bandPwr_beta_1_1.npz')
|
||||||
|
ShannonRes_gamma_v = np.load('features/deap/ShannonRes_sub_bands_gamma_1_1.npz')
|
||||||
|
ShannonRes_beta_v = np.load('features/deap/ShanninRes_sub_bands_beta_1_1.npz')
|
||||||
|
rasm_gamma_v = np.load('features/deap_RASM_DASM/gamma_1_1.npz')#shape of feature is 80640 x 14, be careful to extract only rasm features, i.e first 7 columns
|
||||||
|
dasm_gamma_v = np.load('features/deap_RASM_DASM/gamma_1_1.npz')
|
||||||
|
|
||||||
|
#creating a feature matrix for valence
|
||||||
|
feature_matrix_valence = np.empty((0,80640))
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_gamma_v['features'],axis = 0)
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_beta_v['features'],axis = 0)
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_gamma_v['features'],axis = 0)
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_beta_v['features'],axis = 0)
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,rasm_gamma_v['features'].T[:7,:],axis = 0)
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,dasm_gamma_v['features'].T[7:,:],axis = 0)
|
||||||
|
feature_matrix_valence = feature_matrix_valence.T#feature matrix is of shape 80640 x 70
|
||||||
|
|
||||||
|
#extracting labels
|
||||||
|
Y_val = bandPwr_gamma_v['Y'][:,0]
|
||||||
|
|
||||||
|
#extracting file data corresponding to arousal features
|
||||||
|
HjorthMob_a = np.load('features/deap/Hjorth_mobilty_1_1.npz')
|
||||||
|
HjorthComp_a = np.load('features/deap/Hjorth_complexity_1_1.npz')
|
||||||
|
stdDev_a = np.load('features/deap/stdDev_1_1.npz')
|
||||||
|
bandPwr_beta_a = np.load('features/deap/bandPwr_beta_1_1.npz')
|
||||||
|
bandPwr_theta_a = np.load('features/deap/bandPwr_theta_1_1.npz')
|
||||||
|
ShannonRes_beta_a = np.load('features/deap/ShanninRes_sub_bands_beta_1_1.npz')
|
||||||
|
ShannonRes_gamma_a = np.load('features/deap/ShannonRes_sub_bands_gamma_1_1.npz')
|
||||||
|
dasm_beta_a = np.load('features/deap_RASM_DASM/beta_1_1.npz')
|
||||||
|
|
||||||
|
#creating feature matrix for arousal
|
||||||
|
feature_matrix_arousal = np.empty((0,80640))
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthComp_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,stdDev_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_beta_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_theta_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_beta_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_gamma_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,dasm_beta_a['features'].T[7:,:],axis = 0)
|
||||||
|
feature_matrix_arousal = feature_matrix_arousal.T#shape of feature matrix is 80640 x 105
|
||||||
|
|
||||||
|
#extracting labels
|
||||||
|
Y_aro = HjorthMob_a['Y'][:,1]
|
||||||
|
|
||||||
|
model = SVR()#initializing support vector regressor for training
|
||||||
|
|
||||||
|
#running incremental learning loop for valence
|
||||||
|
print('')
|
||||||
|
print('Incremental training for valence')
|
||||||
|
print('')
|
||||||
|
test_subject = []
|
||||||
|
rmse_val = []
|
||||||
|
subject_indexes_valence = segregate_data_of_subjects(feature_matrix_valence,32,128)
|
||||||
|
i = 1
|
||||||
|
while i <= 32:
|
||||||
|
rmse_val,test_subject= training_phase(model,feature_matrix_valence,Y_val,subject_indexes_valence,i,32,rmse_val,test_subject)
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
#running incremental learning loop for arousal
|
||||||
|
print('')
|
||||||
|
print('Incremental training for arousal ')
|
||||||
|
print(' ')
|
||||||
|
|
||||||
|
model = SVR()#reinitialize model
|
||||||
|
test_subject = []
|
||||||
|
rmse_aro = []
|
||||||
|
subject_indexes_arousal = segregate_data_of_subjects(feature_matrix_arousal,32,128)
|
||||||
|
i=1
|
||||||
|
while i<=32:
|
||||||
|
rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,Y_aro,subject_indexes_arousal,i,32,rmse_aro,test_subject)
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
|
||||||
|
fig,axe = plt.subplots(1,1,figsize = (40,20))
|
||||||
|
axe.plot(test_subject,rmse_val,color='r',label='rmse valence')
|
||||||
|
axe.plot(test_subject,rmse_aro,color = 'g',label='rmse arousal')
|
||||||
|
axe.set_xlabel('trained upto subject')
|
||||||
|
axe.set_ylabel('rmse')
|
||||||
|
axe.set_title('support vector regressor')
|
||||||
|
axe.legend(loc = 'upper right')
|
||||||
|
|
||||||
|
df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms'])
|
||||||
|
print(df)
|
||||||
|
|
||||||
|
if save == 'y':
|
||||||
|
plt.savefig('plots/deap/all_feature_valence_arousal_rmse',format = "svg")
|
||||||
|
df.to_csv('plots/deap/all_features_valence_arousal_rmse.csv')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
driver_code(sys.argv[1])
|
||||||
|
|
||||||
@@ -0,0 +1,259 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import pandas as pd
|
||||||
|
import sys
|
||||||
|
from sklearn.preprocessing import MinMaxScaler,StandardScaler
|
||||||
|
from sklearn.utils import shuffle
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
architecture = 'sklearn'
|
||||||
|
if architecture == 'sklearn':
|
||||||
|
from sklearn.svm import SVR
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
else:
|
||||||
|
from cuml.svm import SVR
|
||||||
|
from cuml.ensemble import RandomForestRegressor
|
||||||
|
from cuml.metrics import accuracy_score
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# Either one of the MinMaxScaling or StandardScaling function can be used
|
||||||
|
|
||||||
|
scaler_min_max = MinMaxScaler()
|
||||||
|
scaler_standard = StandardScaler()
|
||||||
|
def MinMaxScaling(feature_matrix):
|
||||||
|
global scaler_min_max
|
||||||
|
scaler_min_max.fit(feature_matrix)
|
||||||
|
return scaler_min_max.transform(feature_matrix)
|
||||||
|
def StandardScaling(feature_matrix):
|
||||||
|
global scaler_standard
|
||||||
|
scaler_standard.fit(feature_matrix)
|
||||||
|
print('scaling shape',scaler_standard.mean_.shape)
|
||||||
|
return scaler.transform(feature_matrix)
|
||||||
|
|
||||||
|
|
||||||
|
# """##DREAMER dataset
|
||||||
|
# 1> Valence - features selected
|
||||||
|
# >
|
||||||
|
# * HjorthMob
|
||||||
|
# * HjorthCom
|
||||||
|
# * stdDev
|
||||||
|
# * bandPwr_theta
|
||||||
|
# * ShannonRes_gamma
|
||||||
|
# * bandPwr_beta
|
||||||
|
#
|
||||||
|
# 2> Arousal - feature selected
|
||||||
|
# >
|
||||||
|
# * HjorthMob
|
||||||
|
# * HjorthComp
|
||||||
|
# * stdDev
|
||||||
|
# * bandPwr_theta
|
||||||
|
# * bandPwr_gamma
|
||||||
|
# * ShannonRes_gamma
|
||||||
|
# """
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# now for incremental learning we need to segregate data of subjects
|
||||||
|
def segregate_data_of_subjects(feature_matrix,total_subjects,sfreq = 128):
|
||||||
|
'''
|
||||||
|
reuturs a dictionary which contains the samples data only corresponding to particular subjects of feature matrix
|
||||||
|
'''
|
||||||
|
# parameters :-
|
||||||
|
# feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation
|
||||||
|
# total_subjects :- Total number of subjects in the study
|
||||||
|
# sfreq :- sampling frequency of the EEG data
|
||||||
|
# returns :-
|
||||||
|
# subject_indexes :- Subject wise features in a dictionary form
|
||||||
|
|
||||||
|
total_samples = feature_matrix.shape[0]
|
||||||
|
subject_indexes = {}
|
||||||
|
samples_per_subject = total_samples//total_subjects
|
||||||
|
for i in range(total_subjects):
|
||||||
|
subject_name = 'subject_' + str(i+1)
|
||||||
|
subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:]
|
||||||
|
|
||||||
|
return subject_indexes
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# now defining a function which carries out the incremenatal learning algo
|
||||||
|
def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject):
|
||||||
|
# parameters :-
|
||||||
|
# model :- The training model to be used (SVR in this case)
|
||||||
|
# featrue_matrix :- feature matrix obtained in the above function
|
||||||
|
# Y :- The Valence and Arousal values as entered by the subjects
|
||||||
|
# subject_indexes :-Subject wise features in a dictionary form
|
||||||
|
# number_of_subjects :- Total number of subjects in the study
|
||||||
|
# total_subjects :- Total number of subjects in the study
|
||||||
|
# rmse_score :- RMSE of the previous iterations
|
||||||
|
# test_subject :- Cross validation test subject list
|
||||||
|
|
||||||
|
# returns :-
|
||||||
|
# rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration
|
||||||
|
# test_subject :- Updated Cross validation test subject list
|
||||||
|
no_of_features = feature_matrix.shape[1]
|
||||||
|
X = np.empty((0,no_of_features))
|
||||||
|
print('training on subject_no:',end = ' ')
|
||||||
|
|
||||||
|
#create a feature matrix containing data upto subjects given by the number number_of_subjects
|
||||||
|
#for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model
|
||||||
|
|
||||||
|
for subject in range(number_of_subjects):
|
||||||
|
print(subject+1,end = ' ')
|
||||||
|
subject_name = 'subject_'+str(subject+1)
|
||||||
|
subject_data = subject_indexes[subject_name]
|
||||||
|
X = np.append(X,subject_data,axis=0)
|
||||||
|
print(' ')
|
||||||
|
|
||||||
|
#apply a MinMax scaling to the current iteration feature matrix
|
||||||
|
X = MinMaxScaling(X)
|
||||||
|
|
||||||
|
#now we also need to extract the valence arousal data for the corresponding subject
|
||||||
|
y = np.empty((0))
|
||||||
|
total_samples = feature_matrix.shape[0]
|
||||||
|
samples_per_subject = total_samples//total_subjects
|
||||||
|
for subject in range(number_of_subjects):
|
||||||
|
y = Y[:samples_per_subject*(number_of_subjects)]
|
||||||
|
|
||||||
|
|
||||||
|
print('shape of X is :',X.shape)
|
||||||
|
print('shape of y is :',y.shape)
|
||||||
|
|
||||||
|
#shuffling data randomly to feed to model
|
||||||
|
X,y = shuffle(X,y,random_state = 0)
|
||||||
|
|
||||||
|
#doing a train test split of 80:20
|
||||||
|
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2)
|
||||||
|
|
||||||
|
#training_model
|
||||||
|
model = model.fit(X_train,y_train)
|
||||||
|
|
||||||
|
#testing_model
|
||||||
|
y_predict = model.predict(X_test)
|
||||||
|
|
||||||
|
|
||||||
|
#calculating rmse values for valence and arousal using model fitted for current iteration
|
||||||
|
y_rms = np.sqrt(mean_squared_error(y_test,y_predict))
|
||||||
|
print('rms on y :',y_rms)
|
||||||
|
print('')
|
||||||
|
rmse_score.append(y_rms)
|
||||||
|
test_subject.append(subject_name)
|
||||||
|
|
||||||
|
return rmse_score,test_subject
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def driver_code(save):
|
||||||
|
|
||||||
|
# Function to load the features, then train the regressor and will give the validation and test plot
|
||||||
|
|
||||||
|
|
||||||
|
#extracting file data corresponding to valence features
|
||||||
|
HjorthMob_v = np.load('features/dreamer/Hjorth_mobilty_1_1.npz')
|
||||||
|
HjorthCom_v = np.load('features/dreamer/Hjorth_complexity_1_1.npz')
|
||||||
|
stdDev_v = np.load('features/dreamer/stdDev_1_1.npz')
|
||||||
|
bandPwr_theta_v = np.load('features/dreamer/bandPwr_theta_1_1.npz')
|
||||||
|
bandPwr_beta_v = np.load('features/dreamer/bandPwr_beta_1_1.npz')
|
||||||
|
ShannonRes_gamma_v = np.load('features/dreamer/ShannonRes_sub_bands_gamma_1_1.npz')
|
||||||
|
|
||||||
|
|
||||||
|
# creating a feature matrix out of all feature data for valence
|
||||||
|
feature_matrix_valence = np.empty((0,188370))
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,HjorthMob_v['features'],axis =0)
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,HjorthCom_v['features'],axis =0)
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,stdDev_v['features'],axis =0)
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_theta_v['features'],axis =0)
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_beta_v['features'],axis =0)
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_gamma_v['features'],axis =0)
|
||||||
|
feature_matrix_valence = feature_matrix_valence.T # feature matrix becomes of shape 188370 x 84 i.e (samples X features per sample)
|
||||||
|
|
||||||
|
# extracting valence values for each sample
|
||||||
|
Y_val = HjorthMob_v['Y'][:,0]#all features have same valnece labels
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#extracting file data corresponding to arousal features
|
||||||
|
HjorthMob_a = np.load('features/dreamer/Hjorth_mobilty_1_1.npz')
|
||||||
|
HjorthCom_a = np.load('features/dreamer/Hjorth_complexity_1_1.npz')
|
||||||
|
stdDev_a = np.load('features/dreamer/stdDev_1_1.npz')
|
||||||
|
bandPwr_theta_a = np.load('features/dreamer/bandPwr_theta_1_1.npz')
|
||||||
|
bandPwr_gamma_a = np.load('features/dreamer/bandPwr_gamma_1_1.npz')
|
||||||
|
ShannonRes_gamma_a = np.load('features/dreamer/ShannonRes_sub_bands_gamma_1_1.npz')
|
||||||
|
|
||||||
|
#creating feature matrix for all feature data for arousal
|
||||||
|
feature_matrix_arousal = np.empty((0,188370))
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthCom_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,stdDev_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_theta_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_gamma_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_gamma_a['features'],axis = 0)
|
||||||
|
feature_matrix_arousal = feature_matrix_arousal.T
|
||||||
|
|
||||||
|
#extracting arousal values for
|
||||||
|
Y_aro = HjorthMob_a['Y'][:,1]#all features have same arousal labels
|
||||||
|
|
||||||
|
model =SVR()#initializing support vector regressor for training
|
||||||
|
|
||||||
|
#running incremental learning loop for valence
|
||||||
|
print('')
|
||||||
|
print('Incremental training for valence')
|
||||||
|
print('')
|
||||||
|
test_subject = []
|
||||||
|
rmse_val = []
|
||||||
|
subject_indexes_valence = segregate_data_of_subjects(feature_matrix_valence,23,128)
|
||||||
|
i = 1
|
||||||
|
while i <= 23:
|
||||||
|
rmse_val,test_subject= training_phase(model,feature_matrix_valence,Y_val,subject_indexes_valence,i,23,rmse_val,test_subject)
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
#running incremental learning loop for arousal
|
||||||
|
print('')
|
||||||
|
print('Incremental training for arousal ')
|
||||||
|
print(' ')
|
||||||
|
|
||||||
|
test_subject = []
|
||||||
|
rmse_aro = []
|
||||||
|
subject_indexes_arousal = segregate_data_of_subjects(feature_matrix_arousal,23,128)
|
||||||
|
i=1
|
||||||
|
while i<=23:
|
||||||
|
rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,Y_aro,subject_indexes_arousal,i,23,rmse_aro,test_subject)
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
|
||||||
|
fig,axe = plt.subplots(1,1,figsize = (40,20))
|
||||||
|
axe.plot(test_subject,rmse_val,color='r',label = 'rms valence')
|
||||||
|
axe.plot(test_subject,rmse_aro,color = 'g',label = 'rms arousal')
|
||||||
|
axe.set_xlabel('trained upto subject')
|
||||||
|
axe.set_ylabel('rmse')
|
||||||
|
axe.set_title('support vector regressor')
|
||||||
|
axe.legend(loc='upper right')
|
||||||
|
df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms'])
|
||||||
|
print(df)
|
||||||
|
|
||||||
|
if save == 'y':
|
||||||
|
plt.savefig('plots/dreamer/all_feature_valence_arousal_rmse',format = "svg")
|
||||||
|
df.to_csv('plots/dreamer/all_features_valence_arousal_rmse.csv')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
driver_code(sys.argv[1])
|
||||||
|
|
||||||
@@ -0,0 +1,255 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import pandas as pd
|
||||||
|
import sys
|
||||||
|
from sklearn.preprocessing import MinMaxScaler,StandardScaler
|
||||||
|
from sklearn.utils import shuffle
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.svm import SVR
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# Either one of the MinMaxScaling or StandardScaling function can be used
|
||||||
|
|
||||||
|
scaler_min_max = MinMaxScaler()
|
||||||
|
scaler_standard = StandardScaler()
|
||||||
|
def MinMaxScaling(feature_matrix):
|
||||||
|
global scaler_min_max
|
||||||
|
scaler_min_max.fit(feature_matrix)
|
||||||
|
return scaler_min_max.transform(feature_matrix)
|
||||||
|
|
||||||
|
def StandardScaling(feature_matrix):
|
||||||
|
global scaler_standard
|
||||||
|
scaler_standard.fit(feature_matrix)
|
||||||
|
print('scaling shape',scaler_standard.mean_.shape)
|
||||||
|
return scaler.transform(feature_matrix)
|
||||||
|
|
||||||
|
|
||||||
|
# """##OASIS dataset
|
||||||
|
# 1> Valence - features selected
|
||||||
|
# >
|
||||||
|
# * HjorthMob
|
||||||
|
# * HjorthComp
|
||||||
|
# * stdDev
|
||||||
|
#
|
||||||
|
# 2> Arousal - feature selected
|
||||||
|
# >
|
||||||
|
# * HjorthMob
|
||||||
|
# """
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# now for incremental learning we need to segregate data of subjects
|
||||||
|
def segregate_data_of_subjects(feature_matrix,Y,total_subjects,sfreq = 128):
|
||||||
|
'''
|
||||||
|
returns a dictionary which contains the samples data only corresponding to particular subjects of feature matrix
|
||||||
|
'''
|
||||||
|
# parameters :-
|
||||||
|
# feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation
|
||||||
|
# Y :- The Valence and Arousal values as entered by the subjects
|
||||||
|
# total_subjects :- Total number of subjects in the study
|
||||||
|
# sfreq :- sampling frequency of the EEG data
|
||||||
|
# returns :-
|
||||||
|
# subject_indexes :- Subject wise features in a dictionary form
|
||||||
|
# aligned_y :- the y values corresponding to each subject
|
||||||
|
|
||||||
|
subject_indexes = { 'subject_1':feature_matrix[:200],
|
||||||
|
'subject_2':feature_matrix[200:400],
|
||||||
|
'subject_3':feature_matrix[400:600],
|
||||||
|
'subject_4':feature_matrix[600:795],
|
||||||
|
'subject_5':feature_matrix[795:995],
|
||||||
|
'subject_6':feature_matrix[995:1185],
|
||||||
|
'subject_7':feature_matrix[1185:1375],
|
||||||
|
'subject_8':feature_matrix[1375:1575],
|
||||||
|
'subject_9':feature_matrix[1575:1770],
|
||||||
|
'subject_10':feature_matrix[1770:1965],
|
||||||
|
'subject_11':feature_matrix[1965:2160],
|
||||||
|
'subject_12':feature_matrix[2160:2360],
|
||||||
|
'subject_13':feature_matrix[2360:2550],
|
||||||
|
'subject_14':feature_matrix[2550:2740],
|
||||||
|
'subject_15':feature_matrix[2740:2935]
|
||||||
|
}
|
||||||
|
|
||||||
|
aligned_y = { 'subject_1':Y[:200],
|
||||||
|
'subject_2':Y[200:400],
|
||||||
|
'subject_3':Y[400:600],
|
||||||
|
'subject_4':Y[600:795],
|
||||||
|
'subject_5':Y[795:995],
|
||||||
|
'subject_6':Y[995:1185],
|
||||||
|
'subject_7':Y[1185:1375],
|
||||||
|
'subject_8':Y[1375:1575],
|
||||||
|
'subject_9':Y[1575:1770],
|
||||||
|
'subject_10':Y[1770:1965],
|
||||||
|
'subject_11':Y[1965:2160],
|
||||||
|
'subject_12':Y[2160:2360],
|
||||||
|
'subject_13':Y[2360:2550],
|
||||||
|
'subject_14':Y[2550:2740],
|
||||||
|
'subject_15':Y[2740:2935]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return subject_indexes,aligned_y
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# now defining a function which carries out the incremenatal learning algo
|
||||||
|
def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject):
|
||||||
|
|
||||||
|
# parameters :-
|
||||||
|
# model :- The training model to be used (SVR in this case)
|
||||||
|
# featrue_matrix :- feature matrix obtained in the above function
|
||||||
|
# Y :- The Valence and Arousal values as entered by the subjects
|
||||||
|
# subject_indexes :-Subject wise features in a dictionary form
|
||||||
|
# number_of_subjects :- Total number of subjects in the study
|
||||||
|
# total_subjects :- Total number of subjects in the study
|
||||||
|
# rmse_score :- RMSE of the previous iterations
|
||||||
|
# test_subject :- Cross validation test subject list
|
||||||
|
|
||||||
|
# returns :-
|
||||||
|
# rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration
|
||||||
|
# test_subject :- Updated Cross validation test subject list
|
||||||
|
|
||||||
|
no_of_features = feature_matrix.shape[1]
|
||||||
|
X = np.empty((0,no_of_features))
|
||||||
|
print('training on subject_no:',end = ' ')
|
||||||
|
|
||||||
|
#create a feature matrix containing data upto subjects given by the number number_of_subjects
|
||||||
|
#for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model
|
||||||
|
|
||||||
|
for subject in range(number_of_subjects):
|
||||||
|
print(subject+1,end = ' ')
|
||||||
|
subject_name = 'subject_'+str(subject+1)
|
||||||
|
subject_data = subject_indexes[subject_name]
|
||||||
|
X = np.append(X,subject_data,axis=0)
|
||||||
|
print(' ')
|
||||||
|
|
||||||
|
#apply a MinMax scaling to the current iteration feature matrix
|
||||||
|
X = MinMaxScaling(X)
|
||||||
|
|
||||||
|
#now we also need to extract the valence/arousal data for the corresponding subject
|
||||||
|
y = np.empty((0))
|
||||||
|
for subject in range(number_of_subjects):
|
||||||
|
subject_name = 'subject_'+str(subject+1)
|
||||||
|
subject_y_data = Y[subject_name]
|
||||||
|
y = np.append(y,subject_y_data,axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
print('shape of X is :',X.shape)
|
||||||
|
print('shape of y is :',y.shape)
|
||||||
|
#shuffling data randomly to feed to model
|
||||||
|
X,y = shuffle(X,y,random_state = 0)
|
||||||
|
|
||||||
|
#doing a train test split of 80:20
|
||||||
|
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2)
|
||||||
|
|
||||||
|
#training_model
|
||||||
|
model = model.fit(X_train,y_train)
|
||||||
|
|
||||||
|
#testing_model
|
||||||
|
y_predict = model.predict(X_test)
|
||||||
|
|
||||||
|
|
||||||
|
#calculating rmse values for valence and arousal using model fitted for current iteration
|
||||||
|
y_rms = np.sqrt(mean_squared_error(y_test,y_predict))
|
||||||
|
print('rms on y :',y_rms)
|
||||||
|
print('')
|
||||||
|
rmse_score.append(y_rms)
|
||||||
|
test_subject.append(subject_name)
|
||||||
|
|
||||||
|
return rmse_score,test_subject
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def driver_code(save):
|
||||||
|
|
||||||
|
#extracting feature data related to valence
|
||||||
|
HjorthMob_v = np.load('features/oasis/with_autoreject/Hjorth_mobilty_0_0.npz')
|
||||||
|
HjorthComp_v = np.load('features/oasis/with_autoreject/Hjorth_complexity_0_0.npz')
|
||||||
|
stdDev_v = np.load('features/oasis/with_autoreject/stdDev_0_0.npz')
|
||||||
|
|
||||||
|
#creating feature matrix
|
||||||
|
feature_matrix_valence = np.empty((0,2935))
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,HjorthMob_v['features'],axis = 0)
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,HjorthComp_v['features'],axis = 0)
|
||||||
|
feature_matrix_valence = np.append(feature_matrix_valence,stdDev_v['features'],axis = 0)
|
||||||
|
feature_matrix_valence = feature_matrix_valence.T #shape of feature matrix is 2935 x 42
|
||||||
|
|
||||||
|
#extracting valence labels
|
||||||
|
Y_val = HjorthMob_v['Y'][:,0]
|
||||||
|
|
||||||
|
#extracting feature data related to arousal
|
||||||
|
HjorthMob_a = np.load('features/oasis/with_autoreject/Hjorth_mobilty_0_0.npz')
|
||||||
|
|
||||||
|
#creating feature matrix for arousal
|
||||||
|
feature_matrix_arousal = np.empty((0,2935))
|
||||||
|
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis=0)
|
||||||
|
feature_matrix_arousal = feature_matrix_arousal.T
|
||||||
|
|
||||||
|
#extracting arousal labels
|
||||||
|
Y_aro = HjorthMob_a['Y'][:,1]
|
||||||
|
|
||||||
|
model = SVR() #initialize model
|
||||||
|
|
||||||
|
#running incremental learning loop for valence
|
||||||
|
print('')
|
||||||
|
print('Incremental training for valence')
|
||||||
|
print('')
|
||||||
|
test_subject = []
|
||||||
|
rmse_val = []
|
||||||
|
subject_indexes_valence,aligned_Y_val = segregate_data_of_subjects(feature_matrix_valence,Y_val,15,128)
|
||||||
|
i = 1
|
||||||
|
while i <= 15:
|
||||||
|
rmse_val,test_subject= training_phase(model,feature_matrix_valence,aligned_Y_val,subject_indexes_valence,i,15,rmse_val,test_subject)
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
#running incremental learning loop for arousal
|
||||||
|
print('')
|
||||||
|
print('Incremental training for arousal ')
|
||||||
|
print(' ')
|
||||||
|
|
||||||
|
model = SVR()#reinitialize model
|
||||||
|
test_subject = []
|
||||||
|
rmse_aro = []
|
||||||
|
subject_indexes_arousal,aligned_Y_aro = segregate_data_of_subjects(feature_matrix_arousal,Y_aro,15,128)
|
||||||
|
i=1
|
||||||
|
while i<=15:
|
||||||
|
rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,aligned_Y_aro,subject_indexes_arousal,i,15,rmse_aro,test_subject)
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
|
||||||
|
fig,axe = plt.subplots(1,1,figsize = (40,20))
|
||||||
|
axe.plot(test_subject,rmse_val,color='r',label = 'rmse valence')
|
||||||
|
axe.plot(test_subject,rmse_aro,color = 'g',label = 'rmse arousal')
|
||||||
|
axe.set_xlabel('trained upto subject')
|
||||||
|
axe.set_ylabel('rmse')
|
||||||
|
axe.set_title('support vector regressor')
|
||||||
|
axe.legend(loc = 'upper right')
|
||||||
|
|
||||||
|
df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms'])
|
||||||
|
|
||||||
|
if save == 'y':
|
||||||
|
plt.savefig('plots/oasis/all_feature_valence_arousal_rmse',format="svg")
|
||||||
|
df.to_csv('plots/oasis/all_features_valence_arousal_rmse.csv')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
driver_code(sys.argv[1])
|
||||||
|
|
||||||
@@ -0,0 +1,207 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
import glob
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
from google.colab import drive
|
||||||
|
drive.mount('/gdrive',force_remount=True)
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().run_line_magic('cd', '/gdrive/MyDrive/emotion_recognition_project/')
|
||||||
|
|
||||||
|
|
||||||
|
# Script to obtain the incremental learning graph for the DEAP, DREAMER and OASIS datasets.
|
||||||
|
|
||||||
|
# ##plots for DEAP
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
dataset_deap=glob.glob('plots/deap/*.csv')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
dataset_deap
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
dataset_svr_deap = pd.read_csv(dataset_deap[0]).T
|
||||||
|
dataset_svr_deap.columns = ['valence','arousal']
|
||||||
|
dataset_svr_deap = dataset_svr_deap.drop('Unnamed: 0')
|
||||||
|
dataset_svr_deap= dataset_svr_deap[::1]
|
||||||
|
x_deap = range(1,33,1)
|
||||||
|
dataset_svr_deap
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
fig_deap,axe_deap = plt.subplots(1,1,figsize = (17,10))
|
||||||
|
axe_deap.plot(x_deap,dataset_svr_deap['valence'],color='green',marker = 'x',markersize=10)
|
||||||
|
axe_deap.plot(x_deap,dataset_svr_deap['arousal'],color ='red',marker = 'x',markersize=10)
|
||||||
|
axe_deap.legend(['rfr_valence','rfr_arousal'],)
|
||||||
|
axe_deap.set_xlabel('trained upto subject')
|
||||||
|
axe_deap.set_ylabel('RMSE values')
|
||||||
|
plt.rcParams.update({'font.size':40})
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.xticks(x_deap[::3])
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
fig_deap.savefig('final_plots/deap_rfr__valence_arousal_rms.svg')
|
||||||
|
fig_deap.savefig('final_plots/deap_rfr__valence_arousal_rms.png')
|
||||||
|
|
||||||
|
|
||||||
|
# ##plots for DREAMER
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
dataset_dreamer=glob.glob('plots/dreamer/*.csv')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
dataset_dreamer
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
dataset_svr_dreamer = pd.read_csv(dataset_dreamer[0]).T
|
||||||
|
dataset_svr_dreamer.columns = ['valence','arousal']
|
||||||
|
dataset_svr_dreamer = dataset_svr_dreamer.drop('Unnamed: 0')
|
||||||
|
x_dreamer = range(1,24,1)
|
||||||
|
dataset_svr_dreamer= dataset_svr_dreamer[::1]
|
||||||
|
dataset_svr_dreamer
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
fig_dreamer,axe_dreamer = plt.subplots(1,1,figsize=(17,10))
|
||||||
|
axe_dreamer.plot(x_dreamer,dataset_svr_dreamer['valence'],color='green',marker = 'x',markersize=10)
|
||||||
|
axe_dreamer.plot(x_dreamer,dataset_svr_dreamer['arousal'],color ='red',marker = 'x',markersize=10)
|
||||||
|
axe_dreamer.legend(['rfr_valence','rfr_arousal'],)
|
||||||
|
axe_dreamer.set_xlabel('trained upto subject')
|
||||||
|
axe_dreamer.set_ylabel('RMSE values')
|
||||||
|
plt.rcParams.update({'font.size':40})
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.xticks(x_dreamer[::3])
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
fig_dreamer.savefig('final_plots/dreamer_rfr__valence_arousal_rms.svg')
|
||||||
|
fig_dreamer.savefig('final_plots/dreamer_rfr__valence_arousal_rms.png')
|
||||||
|
|
||||||
|
|
||||||
|
# ##plots for oasis
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
dataset_oasis=glob.glob('plots/oasis/*.csv')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
dataset_oasis
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
dataset_svr_oasis = pd.read_csv(dataset_oasis[0]).T
|
||||||
|
dataset_svr_oasis.columns = ['valence','arousal']
|
||||||
|
dataset_svr_oasis = dataset_svr_oasis.drop('Unnamed: 0')
|
||||||
|
x_oasis = range(1,16,1)
|
||||||
|
dataset_svr_oasis= dataset_svr_oasis[::1]
|
||||||
|
dataset_svr_oasis
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
fig_oasis,axe_oasis = plt.subplots(1,1,figsize=(17,10))
|
||||||
|
axe_oasis.plot(x_oasis,dataset_svr_oasis['valence'],color='green',marker = 'x',markersize=10)
|
||||||
|
axe_oasis.plot(x_oasis,dataset_svr_oasis['arousal'],color ='red',marker = 'x',markersize=10)
|
||||||
|
axe_oasis.set_xlabel('trained upto subject')
|
||||||
|
axe_oasis.set_ylabel('RMSE values')
|
||||||
|
axe_oasis.legend(['rfr_valence','rfr_arousal'],loc = 'lower right')
|
||||||
|
plt.rcParams.update({'font.size':40})
|
||||||
|
plt.xticks(x_oasis[::3])
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
fig_oasis.savefig('final_plots/oasis_rfr__valence_arousal_rms.svg')
|
||||||
|
fig_oasis.savefig('final_plots/oasis_rfr__valence_arousal_rms.png')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
f,a = plt.subplots(3,1,figsize = (40,30))
|
||||||
|
a[0].plot(x_deap,dataset_svr_deap['valence'],color='green',marker = 'x',markersize=10)
|
||||||
|
a[0].plot(x_deap,dataset_svr_deap['arousal'],color ='red',marker = 'x',markersize=10)
|
||||||
|
a[0].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],)
|
||||||
|
#a[0].set_xlabel('trained upto subject')
|
||||||
|
a[0].set_ylabel('RMSE values')
|
||||||
|
a[0].set_title('DEAP')
|
||||||
|
a[1].plot(x_dreamer,dataset_svr_dreamer['valence'],color='green',marker = 'x',markersize=10)
|
||||||
|
a[1].plot(x_dreamer,dataset_svr_dreamer['arousal'],color ='red',marker = 'x',markersize=10)
|
||||||
|
#a[1].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],)
|
||||||
|
#a[1].set_xlabel('trained upto subject')
|
||||||
|
a[1].set_ylabel('RMSE values')
|
||||||
|
a[1].set_title('DREAMER')
|
||||||
|
a[2].plot(x_oasis,dataset_svr_oasis['valence'],color='green',marker = 'x',markersize=10)
|
||||||
|
a[2].plot(x_oasis,dataset_svr_oasis['arousal'],color ='red',marker = 'x',markersize=10)
|
||||||
|
a[2].set_xlabel('trained upto subject')
|
||||||
|
a[2].set_ylabel('RMSE values')
|
||||||
|
#a[2].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],loc = 'lower right')
|
||||||
|
a[2].set_title('OASIS')
|
||||||
|
plt.rcParams.update({'font.size':40})
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
f.savefig('final_plots/all_plots_incremental learning.svg')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
+487
@@ -0,0 +1,487 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
from google.colab import drive
|
||||||
|
drive.mount('/gdrive',force_remount = True)
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().system('pip install mne')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().system('pip install autoreject')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import mne
|
||||||
|
import autoreject
|
||||||
|
from scipy.stats import pearsonr
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().run_line_magic('cd', '/gdrive/MyDrive/emotion_recognition_project/')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
class preprocessing:
|
||||||
|
'''
|
||||||
|
Load the data here, store the paramters
|
||||||
|
'''
|
||||||
|
def __init__(self,name):
|
||||||
|
self.name = name #name of dataset
|
||||||
|
self.X = None
|
||||||
|
self.Y = None
|
||||||
|
self.Z = None
|
||||||
|
self.gyroscope = None
|
||||||
|
self.freq = None #(in Hz) is same for all datasets
|
||||||
|
self.channels = None
|
||||||
|
self.ch_type = 'eeg'
|
||||||
|
def load_arrays(self):
|
||||||
|
'''
|
||||||
|
loads arrays in object variables of the form
|
||||||
|
X: trials x channels x timepoints, using reshape method at the end
|
||||||
|
Y: trials x (valence,arousal)
|
||||||
|
Z: trials x participant no
|
||||||
|
'''
|
||||||
|
if self.name == 'DREAMER':
|
||||||
|
array = np.load('original_data/DREAMER.npz')
|
||||||
|
self.freq = 128
|
||||||
|
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||||
|
if self.name == 'DEAP':
|
||||||
|
array = np.load('original_data/DEAP.npz')
|
||||||
|
self.freq = 128
|
||||||
|
self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
|
||||||
|
if self.name == 'OASIS':
|
||||||
|
array = np.load('original_data/OASIS.npz')
|
||||||
|
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||||
|
self.freq = 128
|
||||||
|
self.X = array['X']
|
||||||
|
if self.name == 'DEAP':
|
||||||
|
self.X = self.X[:,:,:32]
|
||||||
|
self.channels = self.channels[:32]
|
||||||
|
|
||||||
|
if self.name == 'OASIS':
|
||||||
|
self.gyroscope = array['gyroscope']
|
||||||
|
|
||||||
|
self.Y = array['Y']
|
||||||
|
self.Z = array['Z']
|
||||||
|
self.reshape_data()
|
||||||
|
|
||||||
|
def reshape_data(self):
|
||||||
|
'''
|
||||||
|
exchanges last two dimensions of data
|
||||||
|
'''
|
||||||
|
(a,b,c) = self.X.shape
|
||||||
|
self.X = np.reshape(self.X,(a,c,b))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
class filters():
|
||||||
|
'''
|
||||||
|
define filters to be used for preprocessing
|
||||||
|
'''
|
||||||
|
@staticmethod
|
||||||
|
def notch_filter(data,sfreq,notch_freq):
|
||||||
|
# parameters :-
|
||||||
|
# data :- EEG data
|
||||||
|
# sfreq :- sampling frequency
|
||||||
|
# notch_freq :- frequency of the notch filter (generally 50Hz due to the AC current frequency)
|
||||||
|
return mne.filter.notch_filter(data,sfreq,np.arange(notch_freq,notch_freq+1,1))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def butterworth_filter(data,sfreq,lfreq,hfreq):
|
||||||
|
# parameters :-
|
||||||
|
# data :- EEG data
|
||||||
|
# sfreq :- sampling frequency
|
||||||
|
# lfreq :- low pass frequency value
|
||||||
|
#hfreq :- high pass frequency value
|
||||||
|
return mne.filter.filter_data(data = data,sfreq = sfreq,l_freq = lfreq,h_freq = hfreq,method = 'iir',verbose = False)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
class referencing():
|
||||||
|
'''
|
||||||
|
referencing electrodes to some value
|
||||||
|
'''
|
||||||
|
@staticmethod
|
||||||
|
def average(data):
|
||||||
|
'''
|
||||||
|
Computes average voltage of all channels for a particular trial and a particular timepoint, and subtracts average value from all channels
|
||||||
|
'''
|
||||||
|
temp = data
|
||||||
|
avg = np.average(temp,axis=1)
|
||||||
|
avg = np.expand_dims(avg,axis=1)
|
||||||
|
return temp-avg
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
class autoreject_custom:
|
||||||
|
'''
|
||||||
|
Run Auotoreject algorithm here for artifact rejections
|
||||||
|
'''
|
||||||
|
#make epoch object
|
||||||
|
@staticmethod
|
||||||
|
def raw_object_creation(raw_data,channel_name,ch_types,sfreq):
|
||||||
|
'''
|
||||||
|
defining parameters for creation of raw object which will be used for creating an epoch object
|
||||||
|
retutns raw object after setting parameters
|
||||||
|
'''
|
||||||
|
# parameters :-
|
||||||
|
# raw_data :- EEG data
|
||||||
|
# channel_name :- Names of the channels of EEG data used
|
||||||
|
# ch_types :- Whether each channel is EEG/Gyro, etc
|
||||||
|
# sfreq :- sampling frequency
|
||||||
|
montage = mne.channels.make_standard_montage('standard_1020')
|
||||||
|
|
||||||
|
#creating a info object to create epochs later and setting its montage to 12-20 system
|
||||||
|
info = mne.create_info(ch_names=channel_name,sfreq=sfreq,ch_types = ch_types,verbose = False)
|
||||||
|
|
||||||
|
#create raw object directly from array
|
||||||
|
raw_object = mne.io.RawArray(data = raw_data,info = info,verbose = False)
|
||||||
|
|
||||||
|
#setting montage
|
||||||
|
raw_object.set_montage(montage)
|
||||||
|
|
||||||
|
return raw_object
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def epoch_object_creation(raw_object,start=0,duration=1,tmin=0,tmax=0.99):
|
||||||
|
'''
|
||||||
|
making an epoch object which will be used for autoreject algorithm
|
||||||
|
'''
|
||||||
|
#creating fixed length events
|
||||||
|
events = mne.make_fixed_length_events(raw_object,id=1,start=0,duration = duration)
|
||||||
|
#creating an epoch object
|
||||||
|
epoch_object = mne.Epochs(raw_object,events = events,preload=True,baseline = None,reject=None,verbose=False,tmin=0,tmax=0.99)
|
||||||
|
|
||||||
|
return epoch_object
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def autoreject_algo(epoch_object,n_interpolates,consensus_percs):
|
||||||
|
'''
|
||||||
|
cleans the epochs,and returns cleaned epochs,rejecting bad epochs based on optimal parameters calculation
|
||||||
|
n_interpolates are the ρ values that we would like autoreject to try and consensus_percs are the κ values that autoreject will try
|
||||||
|
Epochs with more than κ∗N sensors (N total sensors) bad are dropped
|
||||||
|
'''
|
||||||
|
ar = autoreject.AutoReject(n_interpolates, consensus_percs, random_state=42,verbose = 'tqdm_notebook',cv=4,n_jobs=10)
|
||||||
|
#fitting autoreject model to epoch data
|
||||||
|
ar.fit(epoch_object)
|
||||||
|
epochs_clean = ar.transform(epoch_object)
|
||||||
|
evoked_clean = epochs_clean.average()
|
||||||
|
evoked = epoch_object.average()
|
||||||
|
|
||||||
|
return epochs_clean,ar.get_reject_log(epoch_object)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
class source_decomposition():
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def ica(data,channels,ch_type,sfreq):
|
||||||
|
# parameters :-
|
||||||
|
# data :- EEG data
|
||||||
|
# channels :- Names of the channels of EEG data used
|
||||||
|
# ch_types :- Whether each channel is EEG/Gyro, etc
|
||||||
|
# sfreq :- sampling frequency
|
||||||
|
#defining ICA parameters
|
||||||
|
raw = autoreject_custom.raw_object_creation(data,channels,ch_type,sfreq)
|
||||||
|
ica = mne.preprocessing.ICA(method='infomax',n_components=14)
|
||||||
|
ica.fit_params['max_iter'] =300
|
||||||
|
ica.fit(raw,verbose=False)
|
||||||
|
return ica.get_sources(raw).get_data(),ica.mixing_matrix_
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def process_trial(a,acc_x,acc_y,acc_z):
|
||||||
|
'''
|
||||||
|
a are the source signals obtained after decomposition
|
||||||
|
acc_<> are accelerometer readings in respective axis
|
||||||
|
'''
|
||||||
|
# parameters :-
|
||||||
|
# a :- EEG source signal after ICA
|
||||||
|
# acc_x :- accelerometer channel along X axis
|
||||||
|
# acc_y :- accelerometer channel along Y axis
|
||||||
|
# acc_y :- accelerometer channel along Z axis
|
||||||
|
|
||||||
|
#pearson co-eff between each source signal,and accelerometer readings
|
||||||
|
pcoeff_arr = np.zeros((a.shape[0],3))#array will record p_coeff for each source with x,y,z accelermeter readings
|
||||||
|
for i in range(a.shape[0]):
|
||||||
|
source = a[i] #extracting particular source
|
||||||
|
#calculating pearson co-relation coeff between particular source each of accelerometer axis readings
|
||||||
|
r_x,_ = pearsonr(source,acc_x)
|
||||||
|
r_y,_ = pearsonr(source,acc_y)
|
||||||
|
r_z,_ = pearsonr(source,acc_z)
|
||||||
|
pcoeff_arr[i,0] = r_x
|
||||||
|
pcoeff_arr[i,1] = r_y
|
||||||
|
pcoeff_arr[i,2] = r_z
|
||||||
|
#print('############')
|
||||||
|
#calculating mean ,std deviation of pearson co-eff for all sources for each axis i.e X,Y,Z
|
||||||
|
mean = np.mean(pcoeff_arr,axis = 0)
|
||||||
|
std = np.std(pcoeff_arr,axis = 0)
|
||||||
|
error = mean + 2 * std
|
||||||
|
|
||||||
|
#calculating which sources differ have pearson co-eff of atleast one axis greater than 2 standard deviation from mean
|
||||||
|
bad_source_index = []
|
||||||
|
for i in range(pcoeff_arr.shape[0]):
|
||||||
|
if pcoeff_arr[i,0] > error[0] or pcoeff_arr[i,1] > error[1] or pcoeff_arr[i,2] > error[2]:
|
||||||
|
bad_source_index.append(i)
|
||||||
|
|
||||||
|
#correcting bad sources by butterworth filter by high pass 3Hz frequency as motion artifacts are said to exist in low power frequencies
|
||||||
|
for index in bad_source_index:
|
||||||
|
source_to_be_filtered = a[index]
|
||||||
|
a[index] = filters.butterworth_filter(source_to_be_filtered,dataset.freq,3,None)#high pass filter 3Hz
|
||||||
|
|
||||||
|
return a #return corrected source signals
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
#loading dataset arrays
|
||||||
|
dataset = preprocessing('OASIS')
|
||||||
|
dataset.load_arrays()
|
||||||
|
dataset.gyroscope.shape
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
#referencing electrodes to average value method
|
||||||
|
average_data = referencing.average(dataset.X)
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
#running butterworth filter (bandpass filter)
|
||||||
|
filtered_data = filters.notch_filter(average_data,dataset.freq,60)#butterworth_filter(average_data,dataset.freq,0.1,40)
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
no_of_trials = dataset.X.shape[0]
|
||||||
|
|
||||||
|
(a,b,c) = dataset.gyroscope.shape
|
||||||
|
gyroscope_trials = np.reshape (dataset.gyroscope,(a,c,b))# reshaping trials so they are of the shape trials x channels x timepoints
|
||||||
|
|
||||||
|
#iterating over all trials and correcting trial data for motion artifact
|
||||||
|
for trial_n in range(no_of_trials):
|
||||||
|
print('processing trial no:',trial_n+1)
|
||||||
|
trial_data = filtered_data[trial_n]
|
||||||
|
gyroscope_trial = gyroscope_trials[0,4:,:] #only acclerometer values extracted for a particular trial
|
||||||
|
gyroscope_trial_x = gyroscope_trial[0] # accelerometer x axis reading
|
||||||
|
gyroscope_trial_y = gyroscope_trial[1] # accelerometer y axis reading
|
||||||
|
gyroscope_trial_z = gyroscope_trial[2] # accelerometer z axis reading
|
||||||
|
source_signals,mixing_matrix = source_decomposition.ica(trial_data,dataset.channels,dataset.ch_type,dataset.freq)
|
||||||
|
corrected_sources = process_trial(source_signals,gyroscope_trial_x,gyroscope_trial_y,gyroscope_trial_z)
|
||||||
|
|
||||||
|
#corrected sources are projected back into orignal dimensional space of EEG data using mixing matrix
|
||||||
|
project_back = np.matmul(mixing_matrix,corrected_sources)
|
||||||
|
filtered_data[trial_n] = project_back
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
filtered_data.shape
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
no_of_trials = dataset.X.shape[0]
|
||||||
|
'''
|
||||||
|
dictionary contains information about each trial
|
||||||
|
each trial number i is mapped to a list containing the cleaned epochs given by autoreject,boolena array indicating which epoch was dropped,and
|
||||||
|
a percentage indicating epochs dropped out of total, valence ,arousal rating for trial and image_id
|
||||||
|
'''
|
||||||
|
#running autoreject for each trial data
|
||||||
|
|
||||||
|
'''
|
||||||
|
autoreject divides each trial data into 5 epochs of 1 sec segment i.e 640 timepoints into 128 timepoints per epochs,and runs algo on each
|
||||||
|
epoch,rejecting epochs based on estimated parameters
|
||||||
|
'''
|
||||||
|
clean_epochs ={}
|
||||||
|
for trial in range(no_of_trials):
|
||||||
|
print('trial no',trial)
|
||||||
|
temp = filtered_data[trial]
|
||||||
|
raw_object = autoreject_custom.raw_object_creation(temp,dataset.channels,dataset.ch_type,dataset.freq)
|
||||||
|
print(raw_object.get_data().shape)
|
||||||
|
epoch = autoreject_custom.epoch_object_creation(raw_object)
|
||||||
|
print(epoch.get_data().shape)
|
||||||
|
#print('epochs shape',epoch.get_data().shape)
|
||||||
|
clean_epoch,reject_log = autoreject_custom.autoreject_algo(epoch,n_interpolates = np.array([1, 4, 32]),consensus_percs = np.linspace(0, 1.0, 11))
|
||||||
|
#clean_epochs.append([clean_epoch,reject_log])
|
||||||
|
if clean_epoch.drop_log_stats() == 0:
|
||||||
|
clean_epochs[trial+1] = [clean_epoch.get_data(),reject_log.bad_epochs,clean_epoch.drop_log_stats(),dataset.Y[trial],dataset.Z[trial][1]]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def driver_code():
|
||||||
|
|
||||||
|
#load dataset
|
||||||
|
dataset_dict = {0:'DEAP',1:'OASIS',2:'DREAMER'}
|
||||||
|
print(dataset_dict)
|
||||||
|
print('enter dataset mapping number you want to use')
|
||||||
|
mapping = int(input())
|
||||||
|
dataset = preprocessing(dataset_dict[mapping])
|
||||||
|
dataset.load_arrays()
|
||||||
|
|
||||||
|
#referencing
|
||||||
|
print('next step in preprocessing is referencing')
|
||||||
|
referencing_dict = {1:'average_referencing'}
|
||||||
|
print(referencing_dict)
|
||||||
|
print('enter referencing method')
|
||||||
|
mapping = int(input())
|
||||||
|
if mapping ==1 :
|
||||||
|
averaged_data = referencing.average(dataset.X)
|
||||||
|
print('next step is applying filters')
|
||||||
|
filter_dict = {1:'notch_filter',2:"butter_worth_filter"}
|
||||||
|
|
||||||
|
|
||||||
|
#filtering
|
||||||
|
applyed_filters = False
|
||||||
|
while applyed_filters == False:
|
||||||
|
print(filter_dict)
|
||||||
|
mapping = int(input())
|
||||||
|
print('sampling frequency of dataset is',dataset.freq)
|
||||||
|
if mapping == 1 :
|
||||||
|
print('enter notch frequency')
|
||||||
|
notch_freq = float(input())
|
||||||
|
filtered_data = filters.notch_filter(averaged_data,dataset.freq,notch_freq)
|
||||||
|
|
||||||
|
if mapping == 2:
|
||||||
|
print('enter lower frequency')
|
||||||
|
lfreq = float(input())
|
||||||
|
print('enter higher frequency')
|
||||||
|
hfreq = float(input())
|
||||||
|
filtered_data = filters.butterworth_filter(dataset.X,dataset.freq,lfreq,hfreq)
|
||||||
|
|
||||||
|
print('Do you want to apply filters again?enter y/n')
|
||||||
|
boolean = input()
|
||||||
|
if boolean == 'n':
|
||||||
|
applyed_filters = True
|
||||||
|
|
||||||
|
print('do you want to save the data preprocessed so far?y/n')
|
||||||
|
boolean = input()
|
||||||
|
if boolean == 'y':
|
||||||
|
filename = input('enter filename to save as')
|
||||||
|
np.savez('preprocessed_data/'+dataset.name.lower()+'/'+filename,X = dataset.X,Y = dataset.Y)
|
||||||
|
|
||||||
|
#if motion artifact correction using gyrscopic data if dataset is oasis
|
||||||
|
if dataset.name == 'OASIS':
|
||||||
|
print('do you want to use motion artifact removal using gyroscopic data? y/n')
|
||||||
|
boolean = input()
|
||||||
|
if boolean == 'y':
|
||||||
|
no_of_trials = dataset.X.shape[0]
|
||||||
|
print('shape of gyroscope data before reshaping is:',dataset.gyroscope.shape)
|
||||||
|
(a,b,c) = dataset.gyroscope.shape
|
||||||
|
gyroscope_trials = np.reshape (dataset.gyroscope,(a,c,b))# reshaping trials so they are of the shape trials x channels x timepoints
|
||||||
|
|
||||||
|
#iterating over all trials and correcting trial data for motion artifact
|
||||||
|
for trial_n in range(no_of_trials):
|
||||||
|
print('processing trial no:',trial_n+1)
|
||||||
|
trial_data = filtered_data[trial_n]
|
||||||
|
gyroscope_trial = gyroscope_trials[trial_n,:,:] #only acclerometer values extracted for a particular trial
|
||||||
|
gyroscope_trial_x = gyroscope_trial[0] # accelerometer x axis reading
|
||||||
|
gyroscope_trial_y = gyroscope_trial[1] # accelerometer y axis reading
|
||||||
|
gyroscope_trial_z = gyroscope_trial[2] # accelerometer z axis reading
|
||||||
|
source_signals,mixing_matrix = source_decomposition.ica(trial_data,dataset.channels,dataset.ch_type,dataset.freq)
|
||||||
|
corrected_sources = process_trial(source_signals,gyroscope_trial_x,gyroscope_trial_y,gyroscope_trial_z)
|
||||||
|
|
||||||
|
#corrected sources are projected back into orignal dimensional space of EEG data using mixing matrix
|
||||||
|
project_back = np.matmul(mixing_matrix,corrected_sources)
|
||||||
|
filtered_data[trial_n] = project_back
|
||||||
|
|
||||||
|
print(filtered_data.shape)
|
||||||
|
print('do you want to save the data preprocessed so far?y/n')
|
||||||
|
boolean = input()
|
||||||
|
if boolean == 'y':
|
||||||
|
filename = input('enter filename to save as')
|
||||||
|
np.savez('preprocessed_data/'+dataset.name.lower()+'/'+filename,X = dataset.X,Y = dataset.Y)
|
||||||
|
|
||||||
|
if dataset.name == 'OASIS':
|
||||||
|
print('do you want to use autoreject? y/n')
|
||||||
|
boolean = input()
|
||||||
|
if boolean == 'y':
|
||||||
|
print('do you want to save this autoreject cleaned data? y/n')
|
||||||
|
boolean = input()
|
||||||
|
no_of_trials = dataset.X.shape[0]
|
||||||
|
'''
|
||||||
|
dictionary contains information about each trial
|
||||||
|
each trial number i is mapped to a list containing the cleaned epochs given by autoreject,boolena array indicating which epoch was dropped,and
|
||||||
|
a percentage indicating epochs dropped out of total, valence ,arousal rating for trial and image_id
|
||||||
|
'''
|
||||||
|
#running autoreject for each trial data
|
||||||
|
|
||||||
|
'''
|
||||||
|
autoreject divides each trial data into 5 epochs of 1 sec segment i.e 640 timepoints into 128 timepoints per epochs,and runs algo on each
|
||||||
|
epoch,rejecting epochs based on estimated parameters
|
||||||
|
'''
|
||||||
|
clean_epochs ={}
|
||||||
|
for trial in range(no_of_trials):
|
||||||
|
print('trial no',trial)
|
||||||
|
temp = filtered_data[trial]
|
||||||
|
raw_object = autoreject_custom.raw_object_creation(temp,dataset.channels,dataset.ch_type,dataset.freq)
|
||||||
|
print(raw_object.get_data().shape)
|
||||||
|
epoch = autoreject_custom.epoch_object_creation(raw_object)
|
||||||
|
print(epoch.get_data().shape)
|
||||||
|
#print('epochs shape',epoch.get_data().shape)
|
||||||
|
clean_epoch,reject_log = autoreject_custom.autoreject_algo(epoch,n_interpolates = np.array([1, 4, 32]),consensus_percs = np.linspace(0, 1.0, 11))
|
||||||
|
#clean_epochs.append([clean_epoch,reject_log])
|
||||||
|
if clean_epoch.drop_log_stats() == 0:
|
||||||
|
clean_epochs[trial+1] = [clean_epoch.get_data(),reject_log.bad_epochs,clean_epoch.drop_log_stats(),dataset.Y[trial],dataset.Z[trial][1]]
|
||||||
|
|
||||||
|
if boolean == 'y':
|
||||||
|
with open('preprocessed_data/oasis/with_autoreject.p','wb') as file:
|
||||||
|
pickle.dump(clean_epochs,file,protocol=pickle.HIGHEST_PROTOCOL)
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def __main__():
|
||||||
|
driver_code()
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
__main__()
|
||||||
|
|
||||||
@@ -0,0 +1,106 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
from google.colab import drive
|
||||||
|
drive.mount('/gdrive',force_remount = True)
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().run_line_magic('cd', '../gdrive/MyDrive/emotion_recognition_project/')
|
||||||
|
|
||||||
|
|
||||||
|
# # Incremental Learning for OASIS
|
||||||
|
|
||||||
|
# **Arguments**
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# ---
|
||||||
|
#
|
||||||
|
# save = 'y/n'
|
||||||
|
#
|
||||||
|
# ---
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Eg. if you want to run model on OASIS dataset,don't want to save plots, with
|
||||||
|
# command would be
|
||||||
|
#
|
||||||
|
# !python incremental_learning_OASIS.py n
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().system('python incremental_learning_oasis.py n')
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# # Incremental Learning for DEAP
|
||||||
|
|
||||||
|
# **Arguments**
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# ---
|
||||||
|
#
|
||||||
|
# save = 'y/n'
|
||||||
|
#
|
||||||
|
# ---
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Eg. if you want to run model on DEAP dataset,don't want to save plots, with
|
||||||
|
# command would be
|
||||||
|
#
|
||||||
|
# !python incremental_learning_DEAP.py n
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().system('python incremental_learning_deap.py n ')
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# # Incremental Learning for DREAMER
|
||||||
|
|
||||||
|
# **Arguments**
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# ---
|
||||||
|
#
|
||||||
|
# save = 'y/n'
|
||||||
|
#
|
||||||
|
# ---
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Eg. if you want to run model on DEAP dataset,don't want to save plots, with
|
||||||
|
# command would be
|
||||||
|
#
|
||||||
|
# !python incremental_learning_DREAMER.py n
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().system('python incremental_learning_dreamer.py n ')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
+46
@@ -0,0 +1,46 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""utils.ipynb
|
||||||
|
|
||||||
|
Automatically generated by Colaboratory.
|
||||||
|
|
||||||
|
Original file is located at
|
||||||
|
https://colab.research.google.com/drive/1Z2e7rxy64W9WIIcEfH1vyzfVdMNIK8Om
|
||||||
|
"""
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def epoch_data(X,Y, window, stride, sfreq):
|
||||||
|
|
||||||
|
# Fucntion to segment the dataset into epochs
|
||||||
|
|
||||||
|
# Parameters :-
|
||||||
|
# X :- The input EEG signal in the format of channels*timepoints*trials
|
||||||
|
# Y :- The values for VALD (depending on the dataset) given by the users
|
||||||
|
# window :- length of the epoch in seconds
|
||||||
|
# stride :- stride of the sliding window in seconds
|
||||||
|
# sfreq :- sampling frequency of the EEG signal
|
||||||
|
|
||||||
|
(channels,timepoints,trials )= X.shape
|
||||||
|
X = np.reshape(X,(trials,channels,timepoints))
|
||||||
|
segment = int(window*sfreq)
|
||||||
|
step = int(stride*sfreq)
|
||||||
|
epochPerTrial = int((timepoints-segment)/step + 1)
|
||||||
|
count = 0
|
||||||
|
X_new = np.empty((trials*epochPerTrial,channels,segment))
|
||||||
|
Y_new = np.empty((trials*epochPerTrial,2))
|
||||||
|
for trial in range(trials):
|
||||||
|
for epoch in range(epochPerTrial):
|
||||||
|
X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
|
||||||
|
Y_new[count,:] = Y[trial,:2]
|
||||||
|
count+=1
|
||||||
|
(trials,channels,timepoints) = X_new.shape
|
||||||
|
X_new = np.reshape(X_new,(channels,timepoints,trials))
|
||||||
|
|
||||||
|
return X_new,Y_new
|
||||||
|
|
||||||
Referência em uma Nova Issue
Bloquear um usuário