Added Scripts
Esse commit está contido em:
@@ -0,0 +1,386 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""8.5_cross_validate.ipynb
|
||||
|
||||
Automatically generated by Colaboratory.
|
||||
|
||||
Original file is located at
|
||||
https://colab.research.google.com/drive/1qEkrFcZ9lLqd6gNgxX8Y8QoXlOhH3wXC
|
||||
|
||||
#Leave One Subject Out Cross Validation
|
||||
|
||||
* DREAMER => Shape After Loading
|
||||
X.shape= (414, 58240, 14) Y.shape= (414, 2) Z.shape= (414, 2)
|
||||
|
||||
* DEAP => Shape After Loading
|
||||
X.shape= (1280, 40, 8064) Y.shape= (1280, 2) Z.shape= (1280, 2)
|
||||
|
||||
* OASIS => Shape After Loading
|
||||
X.shape= (600, 640, 14) Y.shape= (600, 2) Z.shape= (600, 2)
|
||||
|
||||
* i.e. OASIS and DEAP are of form X = (rec, timepoints,channels)
|
||||
|
||||
* reshaping X to (rec, channels,timepoints)
|
||||
makes sense now
|
||||
"""
|
||||
|
||||
!nvidia-smi
|
||||
|
||||
"""#RAPIDS Package Installation"""
|
||||
|
||||
# Install RAPIDS
|
||||
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
|
||||
!bash rapidsai-csp-utils/colab/rapids-colab.sh stable
|
||||
|
||||
import sys, os
|
||||
|
||||
dist_package_index = sys.path.index('/usr/local/lib/python3.7/dist-packages')
|
||||
sys.path = sys.path[:dist_package_index] + ['/usr/local/lib/python3.7/site-packages'] + sys.path[dist_package_index:]
|
||||
sys.path
|
||||
exec(open('rapidsai-csp-utils/colab/update_modules.py').read(), globals())
|
||||
|
||||
import cuml
|
||||
|
||||
"""-----------------------------------------------------------------------------------------------------------------------------------------------------"""
|
||||
|
||||
from google.colab import drive
|
||||
drive.mount('/gdrive',force_remount=True)
|
||||
|
||||
# Commented out IPython magic to ensure Python compatibility.
|
||||
# %cd /gdrive/MyDrive/Project_DEAP/4.1.2021/
|
||||
|
||||
################################################################################
|
||||
import TopNByFSMethods
|
||||
import TopNByClassifier
|
||||
import EpochedFeatures
|
||||
from args_eeg import args as my_args
|
||||
import ImportUtils
|
||||
|
||||
from ImportUtils import *
|
||||
from TopNByFSMethods import *
|
||||
from TopNByClassifier import *
|
||||
from EpochedFeatures import *
|
||||
from args_eeg import args as my_args
|
||||
from ImportUtils import *
|
||||
from TopNByFSMethods import *
|
||||
from TopNByClassifier import *
|
||||
from EpochedFeatures import *
|
||||
|
||||
from sklearn.svm import SVC
|
||||
|
||||
|
||||
from DEAP_scripts.ImportUtils import *
|
||||
from DEAP_scripts.TopNByFSMethods import *
|
||||
from DEAP_scripts.TopNByClassifier import *
|
||||
from DEAP_scripts.EpochedFeatures import *
|
||||
from DEAP_scripts.args_eeg import args as my_args
|
||||
from sklearn.svm import SVC
|
||||
|
||||
################################################################################
|
||||
|
||||
mean_rmse = []
|
||||
std_rmse = []
|
||||
|
||||
np.random.seed(42)
|
||||
def cross_validate(dataset, window, stride, sfreq, label, best_features_list):
|
||||
# Parameters :-
|
||||
# dataset :- Name of the Dataset
|
||||
# window :- Length of the sliding window in seconds
|
||||
# stride :- Stride of the sliding window in seconds
|
||||
# sfreq :- sampling frequency of the EEG dataset
|
||||
# best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
|
||||
pwd = os.getcwd()
|
||||
fs = sfreq
|
||||
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||
|
||||
#load saved epoched features
|
||||
featuresDict = None
|
||||
featuresDict = loadFeaturesDict(dataset)
|
||||
|
||||
# pop out not best features
|
||||
for k in list(featuresDict.keys()):
|
||||
if k not in best_features_list:
|
||||
|
||||
featuresDict.pop(k)
|
||||
|
||||
featuresList = list(featuresDict.keys())
|
||||
print(featuresList)
|
||||
|
||||
#make feature matrix with select best features
|
||||
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||
|
||||
#remove NaN features
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
#set datatype of feature matrix
|
||||
featureMatrix = featureMatrix.astype('float64')
|
||||
|
||||
#transpose feature matrix to prepare X
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
#replace infinity with NaN value and fill it with zero
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
X = X.astype(np.float32)
|
||||
|
||||
#convert ndarray to dataframe
|
||||
Y_epoch = pd.DataFrame(Y_epoch)
|
||||
|
||||
print("Number of feature vectors in X = ", X.shape[1])
|
||||
print("X.shape = " ,X.shape)
|
||||
|
||||
|
||||
#***********************************************************
|
||||
|
||||
|
||||
|
||||
#Leave-one-subject-out-CV
|
||||
#number of folds = numbParticipants
|
||||
numbParticipants = 0
|
||||
numbRecordings = 0
|
||||
|
||||
if(dataset == 'DEAP'):
|
||||
numbParticipants = 32
|
||||
numbRecordings = 40
|
||||
elif(dataset == 'DREAMER'):
|
||||
# Dreamer dataset has 23 subjects, each subject was shown 18 videos
|
||||
numbParticipants = 23
|
||||
numbRecordings = 18
|
||||
elif(dataset == 'OASIS'):
|
||||
numbParticipants = 15
|
||||
numbRecordings = 40
|
||||
|
||||
|
||||
#numbEpochs
|
||||
numbEpochs = X.shape[0]//(numbParticipants*numbRecordings)
|
||||
print(X.shape[0])
|
||||
print("numbParticipants = ", numbParticipants)
|
||||
print("numbRecordings = " , numbRecordings)
|
||||
print("numbEpochs = ", numbEpochs)
|
||||
pass
|
||||
|
||||
print(type(X))
|
||||
print(type(Y_epoch))
|
||||
|
||||
cv_rmse = []
|
||||
|
||||
for i in range(numbParticipants):
|
||||
s = i*numbRecordings*numbEpochs
|
||||
e = (i+1)*numbRecordings*numbEpochs
|
||||
|
||||
X_test = copy.deepcopy(X.iloc[s:e, :])
|
||||
y_test = copy.deepcopy(Y_epoch.iloc[s:e, label])
|
||||
|
||||
X_train = copy.deepcopy(X.iloc[:s, :])
|
||||
X_train = np.append(X_train, X.iloc[e:, :],axis=0)
|
||||
|
||||
y_train = copy.deepcopy(Y_epoch.iloc[:s, label])
|
||||
y_train = np.append(y_train, Y_epoch.iloc[e:, label],axis=0)
|
||||
|
||||
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
|
||||
|
||||
clf = RandomForestRegressor()
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
cv_rmse.append(rmse)
|
||||
|
||||
|
||||
|
||||
print(cv_rmse)
|
||||
print("Mean Cross-validation RMSE = ", np.mean(cv_rmse))
|
||||
mean_rmse.append(np.mean(cv_rmse))
|
||||
print("Standard Deviation of Cross-validated RMSE = ", np.std(cv_rmse))
|
||||
std_rmse.append(np.std(cv_rmse))
|
||||
|
||||
#pickle list
|
||||
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'wb') as f:
|
||||
pickle.dump(cv_rmse, f)
|
||||
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(40, 20)
|
||||
# X = pd.DataFrame([x for x in range(1,) ])
|
||||
plt.rcParams.update({'font.size': 40})
|
||||
plt.xlabel('Partipant No.')
|
||||
plt.ylabel('RMSE')
|
||||
plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
|
||||
plt.tight_layout()
|
||||
plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/CV_{}_{}.svg".format(dataset, label), bbox_inches='tight', dpi=500)
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
def main(dataset, window, stride, sfreq, model, label, approach, ml_algo, top, fs_method, best_features_list):
|
||||
# Parameters :-
|
||||
# dataset :- Name of the Dataset
|
||||
# window :- Length of the sliding window in seconds
|
||||
# stride :- Stride of the sliding window in seconds
|
||||
# sfreq :- sampling frequency of the EEG dataset
|
||||
# best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
|
||||
|
||||
print(locals())
|
||||
pwd = os.getcwd()
|
||||
|
||||
|
||||
# getEpochedFeatures(dataset, window, stride, sfreq, label)
|
||||
cross_validate(dataset, window, stride, sfreq, label, best_features_list)
|
||||
return
|
||||
if(top == "e"):
|
||||
clf = RandomForestRegressor()
|
||||
topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
|
||||
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
||||
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
|
||||
plt.legend(["Method A","Method B", "Method C"])
|
||||
|
||||
if(label == 1):
|
||||
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
# plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
else:
|
||||
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
# plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
elif(top == "f"):
|
||||
clf = RandomForestRegressor()
|
||||
topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
|
||||
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
||||
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
|
||||
if(label == 1):
|
||||
plt.legend(["Method A","Method B", "Method C"])
|
||||
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
else:
|
||||
plt.legend(["Method A","Method B", "Method C"])
|
||||
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
||||
#DREAMER
|
||||
#VALENCE
|
||||
best_features_list = ['HjorthMob','HjorthComp','stdDev','bandPwr_theta','ShannonRes_gamma','bandPwr_beta']
|
||||
main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||
#AROUSAL
|
||||
best_features_list = ['HjorthMob','ShannonRes_gamma','HjorthComp','stdDev','bandPwr_gamma', 'bandPwr_theta']
|
||||
main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||
|
||||
#DEAP
|
||||
#VALENCE
|
||||
best_features_list = ['bandPwr_gamma','ShannonRes_gamma','ShannonRes_beta','rasm_gamma','dasm_gamma','bandPwr_beta']
|
||||
main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||
#AROUSAL
|
||||
best_features_list = ['HjorthMob','HjorthComp','stdDev','ShannonRes_gamma','bandPwr_beta','bandPwr_theta','ShannonRes_beta','dasm_beta']
|
||||
main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||
|
||||
#OASIS
|
||||
#VALENCE
|
||||
best_features_list = ['HjorthMob','stdDev','HjorthComp']
|
||||
main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||
#AROUSAL
|
||||
best_features_list = ['HjorthMob']
|
||||
main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||
|
||||
# print(len(best_features_list))
|
||||
# main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
|
||||
# --dataset DREAMER --window 1 --stride 1 --sfreq 128 --model rfr --label 0 --approach byfs --ml_algo regression --top f --fs_method SelectKBest
|
||||
|
||||
"""#MINIMUM RMSE DURING CROSS-VALIDATION 6-6-2021"""
|
||||
|
||||
# Commented out IPython magic to ensure Python compatibility.
|
||||
import matplotlib.pyplot as plt
|
||||
# %matplotlib inline
|
||||
import seaborn as sns
|
||||
import copy
|
||||
import os
|
||||
from scipy import io,signal
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pickle
|
||||
#{Dataset_Name}{0/1}_cv_rmse.pkl :- 0 is for Valence and 1 is for Arousal
|
||||
pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
|
||||
dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
|
||||
label = [0,1,0,1,0,1]
|
||||
min_cv_rmse = []
|
||||
|
||||
for i in range(len(pl)):
|
||||
|
||||
cv_rmse = None
|
||||
with open(pl[i], 'rb') as f:
|
||||
cv_rmse = pickle.load(f)
|
||||
|
||||
min_cv_rmse.append(min(cv_rmse))
|
||||
|
||||
print(min_cv_rmse)
|
||||
|
||||
"""feature_select_main.py"""
|
||||
|
||||
!pip install dit
|
||||
!pip install pyinform
|
||||
|
||||
from ImportUtils import *
|
||||
from args_eeg import args as my_args
|
||||
|
||||
"""#Plot pickled results"""
|
||||
|
||||
# Commented out IPython magic to ensure Python compatibility.
|
||||
import matplotlib.pyplot as plt
|
||||
# %matplotlib inline
|
||||
import seaborn as sns
|
||||
import copy
|
||||
import os
|
||||
from scipy import io,signal
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pickle
|
||||
|
||||
# with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'rb') as f:
|
||||
# pickle.dump(cv_rmse, f)
|
||||
|
||||
pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
|
||||
dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
|
||||
label = [0,1,0,1,0,1]
|
||||
|
||||
for i in range(len(pl)):
|
||||
|
||||
cv_rmse = None
|
||||
with open(pl[i], 'rb') as f:
|
||||
cv_rmse = pickle.load(f)
|
||||
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(40, 20)
|
||||
# X = pd.DataFrame([x for x in range(1,) ])
|
||||
plt.rcParams.update({'font.size': 50})
|
||||
plt.xlabel('Partipant No.')
|
||||
plt.ylabel('RMSE')
|
||||
plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
|
||||
plt.tight_layout()
|
||||
plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_stats/CV_{}_{}.svg".format(dataset[i], label[i]), bbox_inches='tight', dpi=500)
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/mean_cv_rmse.pkl', 'wb') as f:
|
||||
pickle.dump(mean_rmse, f)
|
||||
|
||||
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/std_cv_rmse.pkl', 'wb') as f:
|
||||
pickle.dump(std_rmse, f)
|
||||
|
||||
df = pd.DataFrame()
|
||||
df['Dataset-Label'] = ['DREAMER-V','DREAMER-A','DEAP-V','DEAP-A','OASIS-V','OASIS-A']
|
||||
df['Mean RMSE'] = mean_rmse
|
||||
df['Std Dev RMSE'] = std_rmse
|
||||
df.to_csv('/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_rmse_stats.csv')
|
||||
|
||||
+644
@@ -0,0 +1,644 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import bisect
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pywt
|
||||
from scipy import stats, signal, integrate
|
||||
from dit.other import tsallis_entropy
|
||||
import dit
|
||||
import librosa
|
||||
import statsmodels.api as sm
|
||||
import itertools
|
||||
from pyinform import mutualinfo
|
||||
from statsmodels import tsa
|
||||
from sklearn.metrics import mutual_info_score
|
||||
import numpy as np
|
||||
from scipy import signal,integrate
|
||||
from sklearn.metrics.cluster import normalized_mutual_info_score as normed_mutual_info
|
||||
|
||||
################################################
|
||||
# Auxiliary Functions
|
||||
################################################
|
||||
|
||||
##########
|
||||
# Filter the eegData, midpass filter
|
||||
# eegData: 3D np array [chans x ms x epochs]
|
||||
def filt_data(eegData, lowcut, highcut, fs, order=7):
|
||||
nyq = 0.5 * fs
|
||||
low = lowcut / nyq
|
||||
high = highcut / nyq
|
||||
b, a = signal.butter(order, [low, high], btype='band')
|
||||
filt_eegData = signal.lfilter(b, a, eegData, axis = 1)
|
||||
return filt_eegData
|
||||
|
||||
#########
|
||||
# remove short bursts / spikes
|
||||
def fcnRemoveShortEvents(z,n):
|
||||
for chan in range(z.shape[0]):
|
||||
# check for too-short suppressions
|
||||
ct=0
|
||||
i0=1
|
||||
i1=1
|
||||
for i in range(2,len(z[chan,:])):
|
||||
if z[chan,i]==z[chan,i-1]:
|
||||
ct=ct+1
|
||||
i1=i
|
||||
else:
|
||||
if ct<n:
|
||||
z[chan,i0:i1] = 0
|
||||
z[chan,i1] = 0 #nasty little bug
|
||||
ct=0
|
||||
i0=i
|
||||
i1=i
|
||||
if z[chan,0] == 1 and z[chan,1] == 0:
|
||||
z[chan,0] = 0
|
||||
return z
|
||||
|
||||
##########
|
||||
# Find interval of consistent values in binary 1D numpy array
|
||||
def get_intervals(A,B,endIdx=500):
|
||||
# This function gives you intervals (a1,b1), (a2,b3) for every a in A=[a1,a2,a3,..]
|
||||
# and the smallest element in b that is larger than a.
|
||||
intervals = []
|
||||
for ii,A_idx_lst in enumerate(A):
|
||||
B_idx_lst = [bisect.bisect_left(B[ii], idx) for idx in A_idx_lst]
|
||||
chan_intervals = []
|
||||
for jj,idx_l in enumerate(B_idx_lst):
|
||||
if idx_l == len(B[ii]):
|
||||
chan_intervals.append((A_idx_lst[jj],endIdx))
|
||||
else:
|
||||
chan_intervals.append((A_idx_lst[jj],B[ii][idx_l]))
|
||||
intervals.append(chan_intervals)
|
||||
# previous code already takes care of the [] possibility
|
||||
#if B_idx_lst == []:
|
||||
# intervals.append([])
|
||||
return intervals
|
||||
|
||||
##########
|
||||
# Detect bursts and supressions in eeg data
|
||||
def burst_supression_detection(x,fs,suppression_threshold = 10):
|
||||
'''
|
||||
# DETECT EMG ARTIFACTS.
|
||||
nyq = 0.5 * fs
|
||||
low = low / nyq
|
||||
high = high / nyq
|
||||
be, ae = signal.butter(order, [low, high], btype='band')
|
||||
'''
|
||||
# CALCULATE ENVELOPE
|
||||
e = abs(signal.hilbert(x,axis=1));
|
||||
# same as smooth(e,Fs/4) in MATLAB, apply 1/2 second smoothing
|
||||
ME = np.array([np.convolve(el,np.ones(int(fs/4))/(fs/4),'same') for el in e.tolist()])
|
||||
e = ME
|
||||
# DETECT SUPRESSIONS
|
||||
# apply threshold -- 10uv
|
||||
z = (ME<suppression_threshold)
|
||||
# remove too-short suppression segments
|
||||
z = fcnRemoveShortEvents(z,fs/2)
|
||||
# remove too-short burst segments
|
||||
b = fcnRemoveShortEvents(1-z,fs/2)
|
||||
z = 1-b
|
||||
went_high = [np.where(np.array(chD[:-1]) < np.array(chD[1:]))[0].tolist() for chD in z.tolist()]
|
||||
went_low = [np.where(np.array(chD[:-1]) > np.array(chD[1:]))[0].tolist() for chD in z.tolist()]
|
||||
|
||||
bursts = get_intervals(went_high,went_low)
|
||||
supressions = get_intervals(went_low,went_high)
|
||||
|
||||
return bursts,supressions
|
||||
|
||||
##########
|
||||
# Coherence in the Delta Band
|
||||
def CoherenceDelta(eegData, i, j, fs=100):
|
||||
nfft=eegData.shape[1]
|
||||
f, Cxy = signal.coherence(eegData[i,:,:], eegData[j,:,:], fs=fs, nfft=nfft, axis=0)#, window=np.hanning(nfft))
|
||||
out = np.mean(Cxy[np.all([f >= 0.5, f<=4], axis=0)], axis=0)
|
||||
return out
|
||||
|
||||
##########
|
||||
# correlation across channels
|
||||
def PhaseLagIndex(eegData, i, j):
|
||||
hxi = ss.hilbert(eegData[i,:,:])
|
||||
hxj = ss.hilbert(eegData[j,:,:])
|
||||
# calculating the INSTANTANEOUS PHASE
|
||||
inst_phasei = np.arctan(np.angle(hxi))
|
||||
inst_phasej = np.arctan(np.angle(hxj))
|
||||
|
||||
out = np.abs(np.mean(np.sign(inst_phasej - inst_phasei), axis=0))
|
||||
return out
|
||||
|
||||
##########
|
||||
# Cross Correlation
|
||||
def crossCorrelation(eegData, i, j):
|
||||
out = np.zeros(eegData.shape[2])
|
||||
for epoch in range(eegData.shape[2]):
|
||||
ccor = np.correlate(eegData[i,:,epoch], eegData[j,:,epoch], mode="full")
|
||||
absccor = np.abs(ccor)
|
||||
out[epoch] = (np.max(absccor) - np.mean(absccor)) / np.std(absccor)
|
||||
return out
|
||||
|
||||
##########
|
||||
# Auxilary Cross-correlation Lag
|
||||
def corrCorrLagAux(eegData,ii,jj,Fs=100):
|
||||
out = np.zeros(eegData.shape[2])
|
||||
lagCorr = []
|
||||
for lag in range(0,eegData.shape[1],int(0.2*Fs)):
|
||||
tmp = eegData.copy()
|
||||
tmp[jj,:,:] = np.roll(tmp[jj,:,:], lag, axis=0)
|
||||
lagCorr.append(CrossCorrelation(tmp, ii, jj, Fs))
|
||||
return np.argmax(lagCorr,axis=0)
|
||||
|
||||
################################################
|
||||
# bandpower Functions
|
||||
################################################
|
||||
|
||||
##########
|
||||
# compute the bandpower (area under segment (from fband[0] to fband[1] in Hz)
|
||||
# of curve in freqency domain) of data, at sampling frequency of Fs (100 ussually)
|
||||
def bandpower(data, fs, fband):
|
||||
freqs, powers = periodogram(data, fs)
|
||||
idx_min = np.argmax(freqs > fband[0]) - 1
|
||||
idx_max = np.argmax(freqs > fband[1]) - 1
|
||||
idx_delta = np.zeros(dtype=bool, shape=freqs.shape)
|
||||
idx_delta[idx_min:idx_max] = True
|
||||
bpower = simps(powers[idx_delta], freqs[idx_delta])
|
||||
return bpower
|
||||
|
||||
##########
|
||||
# computes the same thing as vecbandpower but with a loop
|
||||
def pfvecbandpower(data, fs, fband):
|
||||
bpowers = np.zeros((data.shape[0], data.shape[2]))
|
||||
for i in range(data.shape[0]):
|
||||
freqs, powers = periodogram(data[i, :, :], fs, axis=0)
|
||||
idx_min = np.argmax(freqs > fband[0]) - 1
|
||||
idx_max = np.argmax(freqs > fband[1]) - 1
|
||||
idx_delta = np.zeros(dtype=bool, shape=freqs.shape)
|
||||
idx_delta[idx_min:idx_max] = True
|
||||
|
||||
bpower = simps(powers[idx_delta, :], freqs[idx_delta], axis=0)
|
||||
bpowers[i, :] = bpower
|
||||
|
||||
return bpowers
|
||||
|
||||
################################################
|
||||
# Complexity features
|
||||
################################################
|
||||
|
||||
##########
|
||||
# Extract the Shannon Entropy
|
||||
# threshold the signal and make it discrete, normalize it and then compute entropy
|
||||
def shannonEntropy(eegData, bin_min, bin_max, binWidth):
|
||||
H = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
for chan in range(H.shape[0]):
|
||||
for epoch in range(H.shape[1]):
|
||||
counts, binCenters = np.histogram(eegData[chan,:,epoch], bins=np.arange(bin_min+1, bin_max, binWidth))
|
||||
nz = counts > 0
|
||||
prob = counts[nz] / np.sum(counts[nz])
|
||||
H[chan, epoch] = -np.dot(prob, np.log2(prob/binWidth))
|
||||
return H
|
||||
|
||||
##########
|
||||
# Extract the tsalis Entropy
|
||||
def tsalisEntropy(eegData, bin_min, bin_max, binWidth, orders = [1]):
|
||||
H = [np.zeros((eegData.shape[0], eegData.shape[2]))]*len(orders)
|
||||
for chan in range(H[0].shape[0]):
|
||||
for epoch in range(H[0].shape[1]):
|
||||
counts, bins = np.histogram(eegData[chan,:,epoch], bins=np.arange(-200+1, 200, 2))
|
||||
dist = dit.Distribution([str(bc).zfill(5) for bc in bins[:-1]],counts/sum(counts))
|
||||
for ii,order in enumerate(orders):
|
||||
H[ii][chan,epoch] = tsallis_entropy(dist,order)
|
||||
return H
|
||||
|
||||
##########
|
||||
# Cepstrum Coefficients (n=2)
|
||||
def mfcc(eegData,fs,order=2):
|
||||
H = np.zeros((eegData.shape[0], eegData.shape[2],order))
|
||||
for chan in range(H.shape[0]):
|
||||
for epoch in range(H.shape[1]):
|
||||
H[chan, epoch, : ] = librosa.feature.mfcc(np.asfortranarray(eegData[chan,:,epoch]), sr=fs)[0:order].T
|
||||
return H
|
||||
|
||||
##########
|
||||
# Lyapunov exponent
|
||||
def lyapunov(eegData):
|
||||
return np.mean(np.log(np.abs(np.gradient(eegData,axis=1))),axis=1)
|
||||
|
||||
##########
|
||||
# Fractal Embedding Dimension
|
||||
# From pyrem: packadge for sleep scoring from EEG data
|
||||
# https://github.com/gilestrolab/pyrem/blob/master/src/pyrem/univariate.py
|
||||
def hFD(a, k_max): #Higuchi FD
|
||||
L = []
|
||||
x = []
|
||||
N = len(a)
|
||||
|
||||
for k in range(1,k_max):
|
||||
Lk = 0
|
||||
for m in range(0,k):
|
||||
#we pregenerate all idxs
|
||||
idxs = np.arange(1,int(np.floor((N-m)/k)),dtype=np.int32)
|
||||
Lmk = np.sum(np.abs(a[m+idxs*k] - a[m+k*(idxs-1)]))
|
||||
Lmk = (Lmk*(N - 1)/(((N - m)/ k)* k)) / k
|
||||
Lk += Lmk
|
||||
|
||||
L.append(np.log(Lk/(m+1)))
|
||||
x.append([np.log(1.0/ k), 1])
|
||||
|
||||
(p, r1, r2, s)=np.linalg.lstsq(x, L)
|
||||
return p[0]
|
||||
|
||||
##########
|
||||
# Hjorth Mobility
|
||||
# Hjorth Complexity
|
||||
# variance = mean(signal^2) iff mean(signal)=0
|
||||
# which it is be because I normalized the signal
|
||||
# Assuming signals have mean 0
|
||||
# Mobility = sqrt( mean(dx^2) / mean(x^2) )
|
||||
def hjorthParameters(xV):
|
||||
dxV = np.diff(xV, axis=1)
|
||||
ddxV = np.diff(dxV, axis=1)
|
||||
|
||||
mx2 = np.mean(np.square(xV), axis=1)
|
||||
mdx2 = np.mean(np.square(dxV), axis=1)
|
||||
mddx2 = np.mean(np.square(ddxV), axis=1)
|
||||
|
||||
mob = mdx2 / mx2
|
||||
complexity = np.sqrt((mddx2 / mdx2) / mob)
|
||||
mobility = np.sqrt(mob)
|
||||
|
||||
# PLEASE NOTE that Mohammad did NOT ACTUALLY use hjorth complexity,
|
||||
# in the matlab code for hjorth complexity subtraction by mob not division was used
|
||||
return mobility, complexity
|
||||
|
||||
##########
|
||||
# false nearest neighbor descriptor
|
||||
def falseNearestNeighbor(eegData, fast=True):
|
||||
# Average Mutual Information
|
||||
# There exist good arguments that if the time delayed mutual
|
||||
# information exhibits a marked minimum at a certain value of tex2html_wrap_inline6553,
|
||||
# then this is a good candidate for a reasonable time delay.
|
||||
npts = 1000 # not sure about this?
|
||||
maxdims = 50
|
||||
max_delay = 2 # max_delay = 200 # TODO: need to use 200, but also need to speed this up
|
||||
distance_thresh = 0.5
|
||||
|
||||
out = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
for chan in range(eegData.shape[0]):
|
||||
for epoch in range(eegData.shape[2]):
|
||||
if fast:
|
||||
out[chan, epoch] = 0
|
||||
else:
|
||||
cur_eegData = eegData[chan, :, epoch]
|
||||
lagidx = 0 # we are looking for the index of the lag that makes the signal maximally uncorrelated to the original
|
||||
minNMI = 1 # normed_mutual_info is from 1 (perfectly correlated) to 0 (not at all correlated)
|
||||
for lag in range(1, max_delay):
|
||||
x = cur_eegData[:-lag]
|
||||
xlag = cur_eegData[lag:]
|
||||
convert float data into histogram bins
|
||||
nbins = int(np.floor(1 + np.log2(len(x)) + 0.5))
|
||||
x_discrete = np.histogram(x, bins=nbins)[0]
|
||||
xlag_discrete = np.histogram(xlag, bins=nbins)[0]
|
||||
cNMI = normed_mutual_info(x_discrete, xlag_discrete)
|
||||
if cNMI < minNMI:
|
||||
minNMI = cNMI
|
||||
lagidx = lag
|
||||
# nearest neighbors part
|
||||
knn = int(max(2, 6*lagidx)) # heuristic (number of nearest neighbors to look up)
|
||||
m = 1 # lagidx + 1
|
||||
|
||||
# y is the embedded version of the signal
|
||||
y = np.zeros((maxdims+1, npts))
|
||||
for d in range(maxdims+1):
|
||||
tmp = cur_eegData[d*m:d*m + npts]
|
||||
y[d, :tmp.shape[0]] = tmp
|
||||
|
||||
nnd = np.ones((npts, maxdims))
|
||||
nnz = np.zeros((npts, maxdims))
|
||||
|
||||
# see where it tends to settle
|
||||
for d in range(1, maxdims):
|
||||
for k in range(0, npts):
|
||||
# get the distances to all points in the window (distance given embedding dimension)
|
||||
dists = []
|
||||
for nextpt in range(1, knn+1):
|
||||
if k+nextpt < npts:
|
||||
dists.append(np.linalg.norm(y[:d, k] - y[:d, k+nextpt]))
|
||||
if len(dists) > 0:
|
||||
minIdx = np.argmin(dists)
|
||||
if dists[minIdx] == 0:
|
||||
dists[minIdx] = 0.0000001 # essentially 0 just silence the error
|
||||
nnd[k, d-1] = dists[minIdx]
|
||||
nnz[k, d-1] = np.abs( y[d+1, k] - y[d+1, minIdx+1+k] )
|
||||
# aggregate results
|
||||
mindim = np.mean(nnz/nnd > distance_thresh, axis=0) < 0.1
|
||||
# get the index of the first occurence of the value true
|
||||
# (a 1 in the binary representation of true and false)
|
||||
out[chan, epoch] = np.argmax(mindim)
|
||||
|
||||
return out
|
||||
|
||||
##########
|
||||
# ARMA coefficients
|
||||
def arma(eegData,order=2):
|
||||
H = np.zeros((eegData.shape[0], eegData.shape[2],order))
|
||||
for chan in range(H.shape[0]):
|
||||
for epoch in range(H.shape[1]):
|
||||
arma_mod = sm.tsa.ARMA(eegData[chan,:,epoch], order=(order,order))
|
||||
arma_res = arma_mod.fit(trend='nc', disp=-1)
|
||||
H[chan, epoch, : ] = arma_res.arparams
|
||||
return H
|
||||
|
||||
################################################
|
||||
# Continuity features
|
||||
################################################
|
||||
|
||||
##########
|
||||
# median frequency
|
||||
def medianFreq(eegData,fs):
|
||||
H = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
for chan in range(H.shape[0]):
|
||||
freqs, powers = signal.periodogram(eegData[chan, :, :], fs, axis=0)
|
||||
H[chan,:] = freqs[np.argsort(powers,axis=0)[len(powers)//2]]
|
||||
return H
|
||||
|
||||
##########
|
||||
# calculate band power
|
||||
def bandPower(eegData, lowcut, highcut, fs):
|
||||
eegData_band = filt_data(eegData, lowcut, highcut, fs, order=7)
|
||||
freqs, powers = signal.periodogram(eegData_band, fs, axis=1)
|
||||
bandPwr = np.mean(powers,axis=1)
|
||||
return bandPwr
|
||||
|
||||
##########
|
||||
# numberOfSpikes
|
||||
def spikeNum(eegData,minNumSamples=7,stdAway = 3):
|
||||
H = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
for chan in range(H.shape[0]):
|
||||
for epoch in range(H.shape[1]):
|
||||
mean = np.mean(eegData[chan, :, epoch])
|
||||
std = np.std(eegData[chan,:,epoch],axis=1)
|
||||
H[chan,epoch] = len(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0])
|
||||
return H
|
||||
|
||||
##########
|
||||
# Standard Deviation
|
||||
def eegStd(eegData):
|
||||
std_res = np.std(eegData,axis=1)
|
||||
return std_res
|
||||
|
||||
##########
|
||||
# α/δ Ratio
|
||||
def eegRatio(eegData,fs):
|
||||
# alpha (8–12 Hz)
|
||||
eegData_alpha = filt_data(eegData, 8, 12, fs)
|
||||
# delta (0.5–4 Hz)
|
||||
eegData_delta = filt_data(eegData, 0.5, 4, fs)
|
||||
# calculate the power
|
||||
powers_alpha = bandPower(eegData, 8, 12, fs)
|
||||
powers_delta = bandPower(eegData, 0.5, 4, fs)
|
||||
ratio_res = np.sum(powers_alpha,axis=0) / np.sum(powers_delta,axis=0)
|
||||
return np.expand_dims(x, axis=0)
|
||||
|
||||
###########
|
||||
# Regularity (burst-suppression)
|
||||
# Regularity of eeg
|
||||
# filter with a window of 0.5 seconds to create a nonnegative smooth signal.
|
||||
# In this technique, we first squared the signal and applied a moving-average
|
||||
# The window length of the moving average was set at 0.5 seconds.
|
||||
def eegRegularity(eegData, Fs=100):
|
||||
in_x = np.square(eegData) # square signal
|
||||
num_wts = Fs//2 # find the filter length in samples - we want 0.5 seconds.
|
||||
q = signal.lfilter(np.ones(num_wts) / num_wts, 1, in_x, axis=1)
|
||||
q = -np.sort(-q, axis=1) # descending sort on smooth signal
|
||||
N = q.shape[1]
|
||||
u2 = np.square(np.arange(1, N+1))
|
||||
# COMPUTE THE Regularity
|
||||
# dot each 5min epoch with the quadratic data points and then normalize by the size of the dotted things
|
||||
reg = np.sqrt( np.einsum('ijk,j->ik', q, u2) / (np.sum(q, axis=1)*(N**2)/3) )
|
||||
return reg
|
||||
|
||||
###########
|
||||
# Voltage < (5μ, 10μ, 20μ)
|
||||
def eegVoltage(eegData,voltage=20):
|
||||
eegFilt = eegData.copy()
|
||||
eegFilt[abs(eegFilt) > voltage] = np.nan
|
||||
volt_res = np.nanmean(eegFilt,axis=1)
|
||||
return volt_res
|
||||
|
||||
##########
|
||||
# Diffuse Slowing
|
||||
# look for diffuse slowing (bandpower max from frequency domain integral)
|
||||
# repeated integration of a huge tensor is really expensive
|
||||
def diffuseSlowing(eegData, Fs=100, fast=True):
|
||||
maxBP = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
idx = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
if fast:
|
||||
return idx
|
||||
for j in range(1, Fs//2):
|
||||
print("BP", j)
|
||||
cbp = bandpower(eegData, Fs, [j-1, j])
|
||||
biggerCIdx = cbp > maxBP
|
||||
idx[biggerCIdx] = j
|
||||
maxBP[biggerCIdx] = cbp[biggerCIdx]
|
||||
return (idx < 8)
|
||||
|
||||
##########
|
||||
# Spikes
|
||||
def spikeNum(eegData,minNumSamples=7,stdAway = 3):
|
||||
H = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
for chan in range(H.shape[0]):
|
||||
for epoch in range(H.shape[1]):
|
||||
mean = np.mean(eegData[chan, :, epoch])
|
||||
std = np.std(eegData[chan,:,epoch])
|
||||
H[chan,epoch] = len(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0])
|
||||
return H
|
||||
|
||||
##########
|
||||
# Delta Burst after spike
|
||||
def burstAfterSpike(eegData,eegData_subband,minNumSamples=7,stdAway = 3):
|
||||
H = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
for chan in range(H.shape[0]):
|
||||
for epoch in range(H.shape[1]):
|
||||
preBurst = 0
|
||||
postBurst = 0
|
||||
mean = np.mean(eegData[chan, :, epoch])
|
||||
std = np.std(eegData[chan,:,epoch])
|
||||
idxList = signal.find_peaks(abs(eegData[chan,:,epoch]-mean), stdAway*std,epoch,width=minNumSamples)[0]
|
||||
for idx in idxList:
|
||||
preBurst += np.mean(eegData_subband[chan,idx-7:idx-1,epoch])
|
||||
postBurst += np.mean(eegData_subband[chan,idx+1:idx+7,epoch])
|
||||
H[chan,epoch] = postBurst - preBurst
|
||||
return H
|
||||
|
||||
##########
|
||||
# Sharp spike
|
||||
def shortSpikeNum(eegData,minNumSamples=7,stdAway = 3):
|
||||
H = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
for chan in range(H.shape[0]):
|
||||
for epoch in range(H.shape[1]):
|
||||
mean = np.mean(eegData[chan, :, epoch])
|
||||
std = np.std(eegData[chan,:,epoch])
|
||||
longSpikes = set(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0])
|
||||
shortSpikes = set(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=1)[0])
|
||||
H[chan,epoch] = len(shortSpikes.difference(longSpikes))
|
||||
return H
|
||||
|
||||
##########
|
||||
# Number of Bursts
|
||||
def numBursts(eegData,fs):
|
||||
bursts = []
|
||||
supressions = []
|
||||
for epoch in range(eegData.shape[2]):
|
||||
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49)
|
||||
bursts.append(epochBurst)
|
||||
supressions.append(epochSupressions)
|
||||
# Number of Bursts
|
||||
numBursts_res = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
for chan in range(numBursts_res.shape[0]):
|
||||
for epoch in range(numBursts_res.shape[1]):
|
||||
numBursts_res[chan,epoch] = len(bursts[epoch][chan])
|
||||
return numBursts_res
|
||||
|
||||
##########
|
||||
# Burst length μ and σ
|
||||
def burstLengthStats(eegData,fs):
|
||||
bursts = []
|
||||
supressions = []
|
||||
for epoch in range(eegData.shape[2]):
|
||||
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49)
|
||||
bursts.append(epochBurst)
|
||||
supressions.append(epochSupressions)
|
||||
# Number of Bursts
|
||||
burstMean_res = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
burstStd_res = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
for chan in range(burstMean_res.shape[0]):
|
||||
for epoch in range(burstMean_res.shape[1]):
|
||||
burstMean_res[chan,epoch] = np.mean([burst[1]-burst[0] for burst in bursts[epoch][chan]])
|
||||
burstStd_res[chan,epoch] = np.std([burst[1]-burst[0] for burst in bursts[epoch][chan]])
|
||||
burstMean_res = np.nan_to_num(burstMean_res)
|
||||
burstStd_res = np.nan_to_num(burstStd_res)
|
||||
return burstMean_res,burstStd_res
|
||||
|
||||
##########
|
||||
# Burst band powers (δ, α, θ, β, γ)
|
||||
def burstBandPowers(eegData, lowcut, highcut, fs, order=7):
|
||||
band_burst_powers = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
bursts = []
|
||||
supressions = []
|
||||
for epoch in range(eegData.shape[2]):
|
||||
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49)
|
||||
bursts.append(epochBurst)
|
||||
supressions.append(epochSupressions)
|
||||
eegData_band = filt_data(eegData, lowcut, highcut, fs, order=7)
|
||||
for epoch,epochBursts in enumerate(bursts):
|
||||
for chan,chanBursts in enumerate(epochBursts):
|
||||
epochPowers = []
|
||||
for burst in chanBursts:
|
||||
if burst[1] == eegData.shape[1]:
|
||||
burstData = eegData_band[:,burst[0]:,epoch]
|
||||
else:
|
||||
burstData = eegData_band[:,burst[0]:burst[1],epoch]
|
||||
freqs, powers = signal.periodogram(burstData, fs, axis=1)
|
||||
epochPowers.append(np.mean(powers,axis=1))
|
||||
band_burst_powers[chan,epoch] = np.mean(epochPowers)
|
||||
return band_burst_powers
|
||||
|
||||
##########
|
||||
# Number of Suppressions
|
||||
def numSuppressions(eegData,fs,suppression_threshold=10):
|
||||
bursts = []
|
||||
supressions = []
|
||||
for epoch in range(eegData.shape[2]):
|
||||
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=suppression_threshold)#,low=30,high=49)
|
||||
bursts.append(epochBurst)
|
||||
supressions.append(epochSupressions)
|
||||
numSupprs_res = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
for chan in range(numSupprs_res.shape[0]):
|
||||
for epoch in range(numSupprs_res.shape[1]):
|
||||
numSupprs_res[chan,epoch] = len(supressions[epoch][chan])
|
||||
return numSupprs_res
|
||||
|
||||
##########
|
||||
# Suppression length μ and σ
|
||||
def suppressionLengthStats(eegData,fs,suppression_threshold=10):
|
||||
bursts = []
|
||||
supressions = []
|
||||
for epoch in range(eegData.shape[2]):
|
||||
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=suppression_threshold)#,low=30,high=49)
|
||||
bursts.append(epochBurst)
|
||||
supressions.append(epochSupressions)
|
||||
supressionMean_res = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
supressionStd_res = np.zeros((eegData.shape[0], eegData.shape[2]))
|
||||
for chan in range(supressionMean_res.shape[0]):
|
||||
for epoch in range(supressionMean_res.shape[1]):
|
||||
supressionMean_res[chan,epoch] = np.mean([suppr[1]-suppr[0] for suppr in supressions[epoch][chan]])
|
||||
supressionStd_res[chan,epoch] = np.std([suppr[1]-suppr[0] for suppr in supressions[epoch][chan]])
|
||||
supressionMean_res = np.nan_to_num(supressionMean_res)
|
||||
supressionStd_res = np.nan_to_num(supressionStd_res)
|
||||
return supressionMean_res, supressionStd_res
|
||||
|
||||
################################################
|
||||
# Connectivity features
|
||||
################################################
|
||||
|
||||
##########
|
||||
# Coherence - δ
|
||||
def coherence(eegData,fs):
|
||||
coh_res = []
|
||||
for ii, jj in itertools.combinations(range(eegData.shape[0]), 2):
|
||||
coh_res.append(CoherenceDelta(eegData, ii, jj, fs=fs))
|
||||
coh_res = np.array(coh_res)
|
||||
return coh_res
|
||||
|
||||
##########
|
||||
# Mutual information
|
||||
def calculate2Chan_MI(eegData,ii,jj,bin_min=-200, bin_max=200, binWidth=2):
|
||||
H = np.zeros(eegData.shape[2])
|
||||
bins = np.arange(bin_min+1, bin_max, binWidth)
|
||||
for epoch in range(eegData.shape[2]):
|
||||
c_xy = np.histogram2d(eegData[ii,:,epoch],eegData[jj,:,epoch],bins)[0]
|
||||
H[epoch] = mutual_info_score(None, None, contingency=c_xy)
|
||||
return H
|
||||
|
||||
##########
|
||||
# Granger causality
|
||||
def calcGrangerCausality(eegData,ii,jj):
|
||||
H = np.zeros(eegData.shape[2])
|
||||
for epoch in range(eegData.shape[2]):
|
||||
X = np.vstack([eegData[ii,:,epoch],eegData[jj,:,epoch]]).T
|
||||
H[epoch] = tsa.stattools.grangercausalitytests(X, 1, addconst=True, verbose=False)[1][0]['ssr_ftest'][0]
|
||||
return H
|
||||
|
||||
##########
|
||||
# phase Lag Index
|
||||
def phaseLagIndex(eegData, i, j):
|
||||
hxi = ss.hilbert(eegData[i,:,:])
|
||||
hxj = ss.hilbert(eegData[j,:,:])
|
||||
# calculating the INSTANTANEOUS PHASE
|
||||
inst_phasei = np.arctan(np.angle(hxi))
|
||||
inst_phasej = np.arctan(np.angle(hxj))
|
||||
|
||||
out = np.abs(np.mean(np.sign(inst_phasej - inst_phasei), axis=0))
|
||||
return out
|
||||
|
||||
##########
|
||||
# Cross-correlation Magnitude
|
||||
def crossCorrMag(eegData,ii,jj):
|
||||
crossCorr_res = []
|
||||
for ii, jj in itertools.combinations(range(eegData.shape[0]), 2):
|
||||
crossCorr_res.append(crossCorrelation(eegData, ii, jj))
|
||||
crossCorr_res = np.array(crossCorr_res)
|
||||
return crossCorr_res
|
||||
|
||||
##########
|
||||
# Cross-correlation Lag
|
||||
def corrCorrLag(eegData,ii,jj,fs=100):
|
||||
crossCorrLag_res = []
|
||||
for ii, jj in itertools.combinations(range(eegData.shape[0]), 2):
|
||||
crossCorrLag_res.append(corrCorrLag(eegData, ii, jj, fs))
|
||||
crossCorrLag_res = np.array(crossCorrLag_res)
|
||||
return crossCorrLag_res
|
||||
|
||||
@@ -0,0 +1,459 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import ImportUtils
|
||||
import math
|
||||
import EEGExtract as eeg
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
import os
|
||||
import glob
|
||||
from scipy import io,signal
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn import preprocessing
|
||||
import pickle
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.impute import SimpleImputer
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import copy
|
||||
import os
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def select_channels(data,channels):
|
||||
|
||||
# parameters:-
|
||||
# data - channelwise EEG preprocessed data
|
||||
# channels - list of required channels
|
||||
|
||||
# returns:-
|
||||
# ans - the selected channels from the entire dataset
|
||||
|
||||
ans = np.empty((data.shape[0],len(channels),data.shape[2]))
|
||||
for sub in range(data.shape[0]):
|
||||
ans[sub,:,:] = np.array([data[sub,x,:] for x in channels])
|
||||
return ans
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def epoch_data(X, Y, Z, window, stride, sfreq):
|
||||
|
||||
# Function to epoch the data
|
||||
|
||||
# parameters:-
|
||||
# X - The EEG data input passed as trial*channel*timepoints
|
||||
# Y - VALD (depending on the dataset) as given by the user
|
||||
# Z - Participant number and session number
|
||||
# window - length of required window in seconds
|
||||
# stride - stride of the required sliding window in seconds
|
||||
# sfreq - sampling frequency of the obtained EEG data
|
||||
|
||||
# retruns:-
|
||||
# X_new - Epoched X
|
||||
# Y_new - Epoched Y (All the segments for a given trial shall have the same value, i.e the one given by the subject)
|
||||
# Z_new - Epoched Z (All the segments for a given trial shall have the same value, i.e the one of the subject)
|
||||
|
||||
trials,channels,timepoints = X.shape
|
||||
segment = int(window*sfreq)
|
||||
step = int(stride*sfreq)
|
||||
epochPerTrial = int((timepoints-segment)/step + 1)
|
||||
|
||||
X_new = np.empty((trials*epochPerTrial,channels,segment))
|
||||
Y_new = np.empty((trials*epochPerTrial,Y.shape[1]))
|
||||
Z_new = np.empty((trials*epochPerTrial,Z.shape[1]))
|
||||
|
||||
count=0
|
||||
for trial in range(trials):
|
||||
for epoch in range(epochPerTrial):
|
||||
X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
|
||||
Y_new[count,:] = Y[trial,:]
|
||||
Z_new[count,:] = Z[trial,:]
|
||||
count = count+1
|
||||
|
||||
return X_new, Y_new, Z_new
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def save_features(dataset, ans, Y_epoch, sfreq, window, stride):
|
||||
|
||||
# A function to generate the features and save them
|
||||
|
||||
# parameters:-
|
||||
# dataset - name of the dataset
|
||||
# ans - epoched segment of X
|
||||
# Y_epoch - epoched segments of the valence and arousal scores taken from the subject
|
||||
# window - window length in seconds
|
||||
# stride - stride length in seconds
|
||||
# sfreq - sampli5ng frequency of the EEG signal
|
||||
|
||||
# returns:-
|
||||
# void
|
||||
|
||||
fs = sfreq
|
||||
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
|
||||
feature_matrix = eeg.shannonEntropy(ans, bin_min=-200, bin_max=200, binWidth=2)
|
||||
np.savez((featurepath+"shannonEntropy_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.eegStd(ans)
|
||||
stdshape = feature_matrix.shape
|
||||
# The channels
|
||||
emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||
left_channels = ['AF3', 'F7','F3', 'FC5', 'T7', 'P7', 'O1']
|
||||
right_channels = ['AF4','F8','F4','FC6','T8','P8','O2']
|
||||
|
||||
dasm_gamma = np.empty((0,stdshape[1]))
|
||||
rasm_gamma = np.empty((0,stdshape[1]))
|
||||
for lc,rc in zip(left_channels, right_channels):
|
||||
lci = emotiv_channels.index(lc)
|
||||
rci = emotiv_channels.index(rc)
|
||||
|
||||
#left differential entropy
|
||||
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0),30,45,fs)))))
|
||||
#right differential entropy
|
||||
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0),30,45,fs)))))
|
||||
|
||||
dasm_gamma = np.append(dasm_gamma, np.subtract(dl,dr), axis=0)
|
||||
rasm_gamma = np.append(rasm_gamma, np.divide(dl,dr), axis=0)
|
||||
|
||||
np.savez((featurepath+"dasm_gamma_{}_{}.npz").format(window,stride),features = dasm_gamma , Y = Y_epoch)
|
||||
np.savez((featurepath+"rasm_gamma_{}_{}.npz").format(window,stride),features = rasm_gamma , Y = Y_epoch)
|
||||
del dasm_gamma, rasm_gamma
|
||||
|
||||
return
|
||||
'''
|
||||
Subband Information Quantity
|
||||
'''
|
||||
|
||||
# delta (0.5–4 Hz)
|
||||
eegData_delta = eeg.filt_data(ans, 0.5, 4, fs)
|
||||
feature_matrix = eeg.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2)
|
||||
np.savez((featurepath+"ShannonRes_sub_bands_delta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
eegData_theta = eeg.filt_data(ans, 4, 8, fs)
|
||||
feature_matrix = eeg.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2)
|
||||
np.savez((featurepath+"ShannonRes_sub_bands_theta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
eegData_alpha = eeg.filt_data(ans, 8, 12, fs)
|
||||
feature_matrix = eeg.shannonEntropy(eegData_alpha, bin_min=-200, bin_max=200, binWidth=2)
|
||||
np.savez((featurepath+"ShannonRes_sub_bands_alpha_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
eegData_beta = eeg.filt_data(ans, 12, 30, fs)
|
||||
feature_matrix = eeg.shannonEntropy(eegData_beta, bin_min=-200, bin_max=200, binWidth=2)
|
||||
np.savez((featurepath+"ShannonRes_sub_bands_beta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
eegData_gamma = eeg.filt_data(ans, 30,45, fs)
|
||||
feature_matrix = eeg.shannonEntropy(eegData_gamma, bin_min=-200, bin_max=200, binWidth=2)
|
||||
np.savez((featurepath+"ShannonRes_sub_bands_gamma_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
HjorthMob, HjorthComp = eeg.hjorthParameters(ans)
|
||||
feature_matrix = HjorthComp
|
||||
np.savez((featurepath+"Hjorth_complexity_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = HjorthMob
|
||||
np.savez((featurepath+"Hjorth_mobilty_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.falseNearestNeighbor(ans)
|
||||
np.savez((featurepath+"falseNearestNeighbor_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.medianFreq(ans,fs)
|
||||
np.savez((featurepath+"medianFreq_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.bandPower(ans, 0.5, 4, fs)
|
||||
np.savez((featurepath+"bandPwr_delta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.bandPower(ans, 4, 8, fs)
|
||||
np.savez((featurepath+"bandPwr_theta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.bandPower(ans, 8, 12, fs)
|
||||
np.savez((featurepath+"bandPwr_alpha_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.bandPower(ans, 12, 30, fs)
|
||||
np.savez((featurepath+"bandPwr_beta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.bandPower(ans, 30, 45, fs)
|
||||
np.savez((featurepath+"bandPwr_gamma_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.eegStd(ans)
|
||||
stdshape = feature_matrix.shape
|
||||
np.savez((featurepath+"stdDev_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.diffuseSlowing(ans)
|
||||
np.savez((featurepath+"diffuseSlowing_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
minNumSamples = int(70*fs/1000)
|
||||
feature_matrix = eeg.spikeNum(ans,minNumSamples)
|
||||
np.savez((featurepath+"spikeNum_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
|
||||
feature_matrix = eeg.burstAfterSpike(ans,eegData_delta,minNumSamples=7,stdAway = 3)
|
||||
np.savez((featurepath+"deltaBurstAfterSpike_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.shortSpikeNum(ans,minNumSamples)
|
||||
np.savez((featurepath+"shortSpikeNum_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.numBursts(ans,fs)
|
||||
np.savez((featurepath+"numBursts_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(ans,fs)
|
||||
feature_matrix = burstLenMean_res
|
||||
np.savez((featurepath+"burstLen_u_and_sigma_mean_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = burstLenStd_res
|
||||
np.savez((featurepath+"burstLen_u_and_sigma_std_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
feature_matrix = eeg.numSuppressions(ans,fs)
|
||||
np.savez((featurepath+"numSuppressions_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
|
||||
suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(ans,fs)
|
||||
feature_matrix = suppLenMean_res
|
||||
np.savez((featurepath+"suppressionLen_u_and_sigma_mean_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
del suppLenMean_res
|
||||
|
||||
feature_matrix = suppLenStd_res
|
||||
np.savez((featurepath+"suppressionLen_u_and_sigma_std_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
|
||||
del suppLenStd_res
|
||||
|
||||
# DASM and RASM Features
|
||||
# DASM = h(X lefti) − h(Xrighti), and (2)
|
||||
# RASM = h(Xlefti)/h(Xrighti),
|
||||
|
||||
emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||
left_channels = ['AF3', 'F7','F3', 'FC5', 'T7', 'P7', 'O1']
|
||||
right_channels = ['AF4','F8','F4','FC6','T8','P8','O2']
|
||||
|
||||
#[chans x ms x epochs]
|
||||
dasm_delta = np.empty((0,stdshape[1]))
|
||||
rasm_delta = np.empty((0,stdshape[1]))
|
||||
for lc,rc in zip(left_channels, right_channels):
|
||||
lci = emotiv_channels.index(lc)
|
||||
rci = emotiv_channels.index(rc)
|
||||
|
||||
#left differential entropy
|
||||
inputarr = np.expand_dims(ans[lci,:,:], axis=0)
|
||||
print("inputarr.shape=", inputarr.shape)
|
||||
temp = eeg.filt_data(inputarr, 0.5, 4, fs)
|
||||
tempstd = eeg.eegStd(temp)
|
||||
|
||||
|
||||
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 0.5, 4, fs)))))
|
||||
#right differential entropy
|
||||
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 0.5, 4, fs)))))
|
||||
|
||||
print("temp.shape=", temp.shape,"tempstd.shape=", tempstd.shape,"dl.shape= ", dl.shape, "stdshape=", stdshape)
|
||||
dasm_delta = np.append(dasm_delta, np.subtract(dl,dr), axis=0)
|
||||
rasm_delta = np.append(rasm_delta, np.divide(dl,dr), axis=0)
|
||||
|
||||
np.savez((featurepath+"dasm_delta_{}_{}.npz").format(window,stride),features = dasm_delta , Y = Y_epoch)
|
||||
np.savez((featurepath+"rasm_delta_{}_{}.npz").format(window,stride),features = rasm_delta , Y = Y_epoch)
|
||||
del dasm_delta, rasm_delta
|
||||
|
||||
dasm_theta = np.empty((0,stdshape[1]))
|
||||
rasm_theta = np.empty((0,stdshape[1]))
|
||||
for lc,rc in zip(left_channels, right_channels):
|
||||
lci = emotiv_channels.index(lc)
|
||||
rci = emotiv_channels.index(rc)
|
||||
|
||||
#left differential entropy
|
||||
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 4, 8, fs)))))
|
||||
#right differential entropy
|
||||
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 4, 8, fs)))))
|
||||
|
||||
dasm_theta = np.append(dasm_theta, np.subtract(dl,dr), axis=0)
|
||||
rasm_theta = np.append(rasm_theta, np.divide(dl,dr), axis=0)
|
||||
|
||||
np.savez((featurepath+"dasm_theta_{}_{}.npz").format(window,stride),features = dasm_theta , Y = Y_epoch)
|
||||
np.savez((featurepath+"rasm_theta_{}_{}.npz").format(window,stride),features = rasm_theta , Y = Y_epoch)
|
||||
del dasm_theta, rasm_theta
|
||||
|
||||
dasm_alpha = np.empty((0,stdshape[1]))
|
||||
rasm_alpha = np.empty((0,stdshape[1]))
|
||||
for lc,rc in zip(left_channels, right_channels):
|
||||
lci = emotiv_channels.index(lc)
|
||||
rci = emotiv_channels.index(rc)
|
||||
|
||||
#left differential entropy
|
||||
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 8, 12, fs)))))
|
||||
#right differential entropy
|
||||
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 8, 12, fs)))))
|
||||
|
||||
dasm_alpha = np.append(dasm_alpha, np.subtract(dl,dr), axis=0)
|
||||
rasm_alpha = np.append(rasm_alpha, np.divide(dl,dr), axis=0)
|
||||
|
||||
np.savez((featurepath+"dasm_alpha_{}_{}.npz").format(window,stride),features = dasm_alpha , Y = Y_epoch)
|
||||
np.savez((featurepath+"rasm_alpha_{}_{}.npz").format(window,stride),features = rasm_alpha , Y = Y_epoch)
|
||||
del dasm_alpha, rasm_alpha
|
||||
|
||||
|
||||
dasm_beta = np.empty((0,stdshape[1]))
|
||||
rasm_beta = np.empty((0,stdshape[1]))
|
||||
for lc,rc in zip(left_channels, right_channels):
|
||||
lci = emotiv_channels.index(lc)
|
||||
rci = emotiv_channels.index(rc)
|
||||
|
||||
#left differential entropy
|
||||
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 12, 30,fs)))))
|
||||
#right differential entropy
|
||||
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 12, 30,fs)))))
|
||||
|
||||
dasm_beta = np.append(dasm_beta, np.subtract(dl,dr), axis=0)
|
||||
rasm_beta = np.append(rasm_beta, np.divide(dl,dr), axis=0)
|
||||
|
||||
np.savez((featurepath+"dasm_beta_{}_{}.npz").format(window,stride),features = dasm_beta , Y = Y_epoch)
|
||||
np.savez((featurepath+"rasm_beta_{}_{}.npz").format(window,stride),features = rasm_beta , Y = Y_epoch)
|
||||
del dasm_beta, rasm_beta
|
||||
|
||||
|
||||
|
||||
dasm_gamma = np.empty((0,stdshape[1]))
|
||||
rasm_gamma = np.empty((0,stdshape[1]))
|
||||
for lc,rc in zip(left_channels, right_channels):
|
||||
lci = emotiv_channels.index(lc)
|
||||
rci = emotiv_channels.index(rc)
|
||||
|
||||
#left differential entropy
|
||||
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0),30,45,fs)))))
|
||||
#right differential entropy
|
||||
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0),30,45,fs)))))
|
||||
|
||||
dasm_gamma = np.append(dasm_gamma, np.subtract(dl,dr), axis=0)
|
||||
rasm_gamma = np.append(rasm_gamma, np.divide(dl,dr), axis=0)
|
||||
|
||||
np.savez((featurepath+"dasm_gamma_{}_{}.npz").format(window,stride),features = dasm_gamma , Y = Y_epoch)
|
||||
np.savez((featurepath+"rasm_gamma_{}_{}.npz").format(window,stride),features = rasm_gamma , Y = Y_epoch)
|
||||
del dasm_gamma, rasm_gamma
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def getEpochedFeatures(dataset, window, stride, sfreq, label):
|
||||
|
||||
# Function to reshape the arrays before passing on to the save_features function
|
||||
|
||||
# parameters:-
|
||||
# dataset - name of the dataset
|
||||
# window - length of window in seconds
|
||||
# stride - length of stride in seconds
|
||||
# sfreq - sampling frequency of the EEG signal
|
||||
# label - valence/arousal (0/1)
|
||||
|
||||
# returns:-
|
||||
# void
|
||||
'''
|
||||
Returns Accuracy vs Segment size plot for
|
||||
window - length of window
|
||||
stride - step
|
||||
sfreq - sampling freq
|
||||
label - 0-valence, 1-arousal, 2-dominance, 3-liking
|
||||
'''
|
||||
fs = sfreq
|
||||
X = None
|
||||
Y = None
|
||||
Z = None
|
||||
pwd = os.getcwd()
|
||||
with np.load((pwd + '/data_extracted/{}.npz').format(dataset), allow_pickle=True) as data:
|
||||
X = data['X']
|
||||
Y = data['Y']
|
||||
Z = data['Z']
|
||||
|
||||
print("Shape After Loading")
|
||||
print("X.shape=", X.shape," Y.shape=",Y.shape," Z.shape=", Z.shape)
|
||||
# return
|
||||
#########!MODIFY FOR DREAMER AND DEAP DATASET########################################
|
||||
#****
|
||||
'''
|
||||
Reshape Data
|
||||
'''
|
||||
if(dataset != "DEAP"):
|
||||
temp_arr = np.empty((X.shape[0],X.shape[2],X.shape[1]))
|
||||
for i in range(temp_arr.shape[0]):
|
||||
temp_arr[i,:,:] = X[i,:,:].transpose()
|
||||
X = copy.deepcopy(temp_arr)
|
||||
del temp_arr
|
||||
|
||||
print("Shape after reshaping")
|
||||
print("X.shape=", X.shape," Y.shape=",Y.shape," Z.shape=", Z.shape)
|
||||
'''
|
||||
Select Channels(if needed)
|
||||
'''
|
||||
|
||||
print("Data Loaded...\n")
|
||||
ch_names = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
|
||||
emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||
index_arr = [ch_names.index(x) for x in emotiv_channels]
|
||||
|
||||
X_new = None
|
||||
if(dataset == "DEAP"):
|
||||
X_new = select_channels(X,index_arr)
|
||||
else:
|
||||
X_new = copy.deepcopy(X)
|
||||
|
||||
print("X_new.shape = ", X_new.shape)
|
||||
|
||||
del X
|
||||
print("Channel selection done ...\n")
|
||||
'''
|
||||
# X = (32*40,40,8064)
|
||||
# Y = (32*40,4)
|
||||
# Z = (32*40,2)
|
||||
|
||||
# X : (nbSegments, nbChannel, nbTimepoints) : Data
|
||||
# Y : (nbSegments, nbEmotions) : Valence and arousal data
|
||||
# Z : (nbSegments, 2) : Participant number, and session number
|
||||
'''
|
||||
|
||||
'''
|
||||
DREAMER Dataset
|
||||
# X = (23*18,7808+54032,14)
|
||||
# Y = (23*18,2)
|
||||
# Z = (23*18,2)
|
||||
'''
|
||||
|
||||
(X_epoch, Y_epoch, Z_epoch) = epoch_data(X_new, Y, Z,window,stride,sfreq)
|
||||
del X_new
|
||||
del Y
|
||||
del Z
|
||||
|
||||
print("Epoching done ...\n")
|
||||
print(X_epoch.shape, Y_epoch.shape, Z_epoch.shape) #debug
|
||||
|
||||
# 1280*63,40,128
|
||||
# trial, channel, segment
|
||||
trials, channels, segment = X_epoch.shape
|
||||
ans = np.empty((channels, segment, trials)) #[chans x ms x epochs]
|
||||
for i in range(trials):
|
||||
ans[:,:,i] = X_epoch[i,:,]
|
||||
del X_epoch
|
||||
|
||||
print("ans.shape = ", ans.shape)
|
||||
print("Rotation of np.array done ...\n")
|
||||
pwd = os.getcwd()
|
||||
filepath = pwd + '/' + dataset + "/data_extracted/epochedData/data" + str(window) + str(stride) + ".npz"
|
||||
np.savez(filepath,ans,Y_epoch, Z_epoch)
|
||||
|
||||
# featuresDict = getFeaturesDict(ans,sfreq)
|
||||
save_features(dataset, ans, Y_epoch, sfreq, window, stride)
|
||||
|
||||
# with open(pwd + '/' + dataset + '/data_extracted/featureDicts/'+str(window)+str(stride)+ '.pkl', 'wb') as f:
|
||||
# pickle.dump(featuresDict, f, pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
print("Feature Extraction done ...\n")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,172 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# Script to import all the required libraries.<br>
|
||||
# It also defines a function to make a dictionary and load the features.
|
||||
#
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
from sklearn.feature_selection import chi2
|
||||
from sklearn.feature_selection import SelectKBest, f_classif
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn import preprocessing
|
||||
from sklearn.feature_selection import *
|
||||
from sklearn.model_selection import RandomizedSearchCV
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
|
||||
import sys
|
||||
import csv
|
||||
import os
|
||||
import math
|
||||
import glob
|
||||
from scipy import io,signal
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import pickle
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.impute import SimpleImputer
|
||||
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import copy
|
||||
from sklearn import feature_selection
|
||||
import argparse
|
||||
|
||||
import cuml
|
||||
from cuml.svm import SVR
|
||||
from cuml.ensemble import RandomForestRegressor
|
||||
from cuml.svm import SVC
|
||||
from cuml.ensemble import RandomForestClassifier
|
||||
from cuml.metrics import accuracy_score
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def loadFeaturesDict(dataset):
|
||||
|
||||
# input parameters :- The name of the dataset
|
||||
# return :- Feature dictionary
|
||||
|
||||
featuresDict = {'shannonEntropy': None,
|
||||
'ShannonRes_delta':None,
|
||||
'ShannonRes_theta':None,
|
||||
'ShannonRes_alpha':None,
|
||||
'ShannonRes_beta':None,
|
||||
'ShannonRes_gamma':None,
|
||||
'HjorthComp':None,
|
||||
'HjorthMob':None,
|
||||
'falseNearestNeighbor':None,
|
||||
'medianFreq':None,
|
||||
'bandPwr_delta':None,
|
||||
'bandPwr_theta':None,
|
||||
'bandPwr_alpha':None,
|
||||
'bandPwr_beta':None,
|
||||
'bandPwr_gamma':None,
|
||||
'stdDev':None,
|
||||
'diffuseSlowing':None,
|
||||
'spikeNum':None,
|
||||
'deltaBurstAfterSpike':None,
|
||||
'shortSpikeNum':None,
|
||||
'numBursts':None,
|
||||
'burstLenMean':None,
|
||||
'burstLenStd':None,
|
||||
'numSuppressions':None,
|
||||
'suppLenMean':None,
|
||||
'suppLenStd':None,
|
||||
'dasm_delta': None,
|
||||
'dasm_theta': None,
|
||||
'dasm_alpha': None,
|
||||
'dasm_beta': None,
|
||||
'dasm_gamma': None,
|
||||
'rasm_delta': None,
|
||||
'rasm_theta': None,
|
||||
'rasm_alpha': None,
|
||||
'rasm_beta': None,
|
||||
'rasm_gamma': None,
|
||||
}
|
||||
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
|
||||
featuresDict['shannonEntropy'] = np.load(featurepath + "shannonEntropy_1_1.npz", allow_pickle=True)['features']
|
||||
|
||||
featuresDict['ShannonRes_delta'] = np.load(featurepath + "ShannonRes_sub_bands_delta_1_1.npz", allow_pickle=True)['features']
|
||||
|
||||
featuresDict['ShannonRes_theta'] = np.load(featurepath + "ShannonRes_sub_bands_theta_1_1.npz", allow_pickle=True)['features']
|
||||
|
||||
featuresDict['ShannonRes_alpha'] = np.load(featurepath + "ShannonRes_sub_bands_alpha_1_1.npz", allow_pickle=True)['features']
|
||||
|
||||
featuresDict['ShannonRes_beta'] = np.load(featurepath + "ShannonRes_sub_bands_beta_1_1.npz", allow_pickle=True)['features']
|
||||
|
||||
featuresDict['ShannonRes_gamma'] = np.load(featurepath + "ShannonRes_sub_bands_gamma_1_1.npz", allow_pickle=True)['features']
|
||||
|
||||
featuresDict['HjorthComp'] = np.load(featurepath + "Hjorth_complexity_1_1.npz", allow_pickle=True)['features']
|
||||
|
||||
featuresDict['HjorthMob'] = np.load(featurepath + "Hjorth_mobilty_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['falseNearestNeighbor'] = np.load(featurepath + "falseNearestNeighbor_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['medianFreq'] = np.load(featurepath + "medianFreq_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['bandPwr_delta']=np.load(featurepath+"bandPwr_delta_1_1.npz", allow_pickle = True)['features']
|
||||
|
||||
featuresDict['bandPwr_theta']=np.load(featurepath + "bandPwr_theta_1_1.npz", allow_pickle = True)['features']
|
||||
|
||||
featuresDict['bandPwr_alpha']=np.load(featurepath + "bandPwr_alpha_1_1.npz", allow_pickle = True)['features']
|
||||
|
||||
featuresDict['bandPwr_beta']=np.load(featurepath + "bandPwr_beta_1_1.npz", allow_pickle = True)['features']
|
||||
|
||||
featuresDict['bandPwr_gamma']=np.load(featurepath + "bandPwr_gamma_1_1.npz", allow_pickle = True)['features']
|
||||
|
||||
featuresDict['stdDev'] = np.load(featurepath + "stdDev_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['diffuseSlowing'] = np.load(featurepath + "diffuseSlowing_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['spikeNum'] = np.load(featurepath + "spikeNum_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['deltaBurstAfterSpike'] = np.load(featurepath + "deltaBurstAfterSpike_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['shortSpikeNum'] = np.load(featurepath + "shortSpikeNum_1_1.npz", allow_pickle=True)['features']
|
||||
|
||||
featuresDict['numBursts'] = np.load(featurepath + "numBursts_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['burstLenMean'] = np.load(featurepath + "burstLen_u_and_sigma_mean_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['burstLenStd'] = np.load(featurepath + "burstLen_u_and_sigma_std_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['numSuppressions'] = np.load(featurepath + "numSuppressions_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['suppLenMean'] = np.load(featurepath + "suppressionLen_u_and_sigma_mean_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['suppLenStd'] = np.load(featurepath + "suppressionLen_u_and_sigma_std_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['dasm_delta'] = np.load(featurepath + "dasm_delta_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['dasm_theta'] = np.load(featurepath + "dasm_theta_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['dasm_alpha'] = np.load(featurepath + "dasm_alpha_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['dasm_beta'] = np.load(featurepath + "dasm_beta_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['dasm_gamma'] = np.load(featurepath + "dasm_gamma_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['rasm_delta'] = np.load(featurepath + "rasm_delta_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['rasm_theta'] = np.load(featurepath + "rasm_theta_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['rasm_alpha'] = np.load(featurepath + "rasm_alpha_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['rasm_beta'] = np.load(featurepath + "rasm_beta_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
featuresDict['rasm_gamma'] = np.load(featurepath + "rasm_gamma_1_1.npz",allow_pickle=True)['features']
|
||||
|
||||
return featuresDict
|
||||
|
||||
@@ -0,0 +1,735 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
from ImportUtils import *
|
||||
from sklearn.model_selection import ParameterGrid
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
from sklearn.feature_selection import chi2
|
||||
from sklearn.feature_selection import SelectKBest, f_classif
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
from sklearn.ensemble import RandomForestRegressor as sklearnrfi
|
||||
|
||||
import os
|
||||
import glob
|
||||
from scipy import io,signal
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn import preprocessing
|
||||
import pickle
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.impute import SimpleImputer
|
||||
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
# %matplotlib inline
|
||||
import seaborn as sns
|
||||
import copy
|
||||
|
||||
def topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
|
||||
'''
|
||||
Ranks of features according to rmse computed by regressor passed in clf
|
||||
Plots electrode v/s rmse graph
|
||||
|
||||
'''
|
||||
# parameters :-
|
||||
# dataset - name of the dataset
|
||||
# window - length of the sliding window in seconds
|
||||
# stride - length of the stride of the sliding window in seconds
|
||||
# sfreq - sampling frequency of the EEG data
|
||||
# clf - name of the classifier to be used
|
||||
# label - valence/arousal/dominance/liking label (shape depends upon the dataset) in an enumerated form (0- valence ; 1-arousal ; 2- like; 3-dominance)
|
||||
# scale - sclaing of the EEG data if required
|
||||
|
||||
# returns :-
|
||||
# void
|
||||
|
||||
pwd = os.getcwd()
|
||||
|
||||
#load extracted features
|
||||
#####################################################################################################################################################
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||
|
||||
rmseList = []
|
||||
electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||
fs = sfreq
|
||||
pwd = os.getcwd()
|
||||
featuresDict = loadFeaturesDict(dataset)
|
||||
asm_features = ['dasm_delta', 'dasm_theta', 'dasm_alpha', 'dasm_beta', 'dasm_gamma', 'rasm_delta', 'rasm_theta', 'rasm_alpha', 'rasm_beta', 'rasm_gamma']
|
||||
for asm in asm_features:
|
||||
featuresDict.pop(asm)
|
||||
|
||||
common = []
|
||||
with open('intersection.pkl', 'rb') as f:
|
||||
common = pickle.load(f)
|
||||
|
||||
for k in list(featuresDict.keys()):
|
||||
if k not in common:
|
||||
# pop out common feature
|
||||
featuresDict.pop(k)
|
||||
|
||||
selectFeatures = list(featuresDict.keys())
|
||||
y = Y_epoch[:,label] #valence
|
||||
#####################################################################################################################################################
|
||||
|
||||
for electrode in range(14):
|
||||
# Load FeaturesDict from memory
|
||||
|
||||
|
||||
print("Number of segments are: {}".format(ans.shape[1]))
|
||||
|
||||
featureMatrix = np.empty((len(selectFeatures),ans.shape[1])) #[14*32 + 1,80640]
|
||||
i=0
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix[i,:] = value[electrode,:]
|
||||
i = i+1
|
||||
|
||||
print(featureMatrix.T.shape)
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
|
||||
#Impute NaN values with zero
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
#Name Feature vector columns
|
||||
feature_channel_index = []
|
||||
for feature in selectFeatures:
|
||||
feature_channel_index.append(feature + str(electrode))
|
||||
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) #debug
|
||||
|
||||
#Preparing dataset from feature matrix
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X.columns = feature_channel_index
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
|
||||
|
||||
print("Features Ready for undergoing selection tests done ...\n")
|
||||
|
||||
# Perform train_test_split to get training and test data
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
rmseList.append(rmse)
|
||||
|
||||
|
||||
#rank electrodes based on RMSE computed by the classifier
|
||||
electrode_df = pd.DataFrame(electrodeList)
|
||||
rmse_df = pd.DataFrame(rmseList)
|
||||
#concat two dataframes for better visualization
|
||||
electrodeRanking = pd.concat([electrode_df, rmse_df],axis=1)
|
||||
electrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns
|
||||
features_result = electrodeRanking.sort_values('RMSE')
|
||||
print(features_result)
|
||||
# return features_result
|
||||
|
||||
##################################################################################
|
||||
N = features_result.shape[0]
|
||||
topRmseList = []
|
||||
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||
|
||||
|
||||
for n in range(1,N+1):
|
||||
|
||||
|
||||
topnelectrodes = features_result.head(n)
|
||||
electrode_index = topnelectrodes.index
|
||||
electrode_index = list(electrode_index)[:n]
|
||||
|
||||
# X-Values
|
||||
featureMatrix = np.empty((len(selectFeatures)*len(electrode_index),ans.shape[1]))
|
||||
|
||||
i = 0
|
||||
for index in electrode_index:
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix[i,:] = value[index,:]
|
||||
i = i+1
|
||||
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
print(featureMatrix.T.shape)
|
||||
|
||||
# Removing NaN Values
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
# Name Feature vector columns
|
||||
feature_channel_index = []
|
||||
for index in electrode_index:
|
||||
for feature in selectFeatures:
|
||||
feature_channel_index.append(feature + str(index))
|
||||
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X.columns = feature_channel_index
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
|
||||
|
||||
print("Features Ready for undergoing selection tests done ...\n")
|
||||
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
|
||||
search_method = "tpot"
|
||||
best_clf = None
|
||||
if(search_method == "bayes_sk_opt"):
|
||||
|
||||
# BayesCV scikit opt
|
||||
search_space = {"bootstrap": Categorical([True, False]), # values for boostrap can be either True or False
|
||||
"max_depth": Integer(6, 20), # values of max_depth are integers from 6 to 20
|
||||
"max_features": Categorical(['auto', 'sqrt','log2']),
|
||||
"min_samples_leaf": Integer(2, 10),
|
||||
"min_samples_split": Integer(2, 10),
|
||||
"n_estimators": Integer(100, 500)
|
||||
}
|
||||
|
||||
forest_bayes_search = BayesSearchCV(clf, search_space, n_iter=32, cv=5)
|
||||
print(forest_bayes_search)
|
||||
print(forest_bayes_search.fit(X_train, y_train))
|
||||
print("Best Parameters are: ", forest_bayes_search.best_params_)
|
||||
best_clf = forest_bayes_search.best_estimator_
|
||||
|
||||
elif(search_method =="random_grid_search"):
|
||||
print("Random Search followed by GridSearch initiated!\n");
|
||||
#RandomSearchCV followed by GridSearchCV
|
||||
random_grid = {'n_estimators': [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)],
|
||||
'max_features': ['auto', 'sqrt','log2'],
|
||||
'max_depth': [int(x) for x in np.linspace(10, 1000,10)],
|
||||
'min_samples_split': [2, 5, 10,14],
|
||||
'min_samples_leaf': [1, 2, 4,6,8],
|
||||
}
|
||||
rf_randomcv=RandomizedSearchCV(estimator=clf,param_distributions=random_grid,n_iter=100,cv=5,verbose=2,random_state=100)
|
||||
print(rf_randomcv.fit(X_train, y_train))
|
||||
print("Best Parameters for RandomSearchCV are: ", rf_randomcv.best_params_)
|
||||
print("RMSE with RandomSearchCV is :",mean_squared_error(y_test, rf_randomcv.best_estimator_.predict(X_test),squared=False));
|
||||
|
||||
param_grid = {
|
||||
'max_depth': [rf_randomcv.best_params_['max_depth']],
|
||||
'max_features': [rf_randomcv.best_params_['max_features']],
|
||||
'min_samples_leaf': [rf_randomcv.best_params_['min_samples_leaf'],
|
||||
rf_randomcv.best_params_['min_samples_leaf']+2,
|
||||
rf_randomcv.best_params_['min_samples_leaf'] + 4],
|
||||
'min_samples_split': [rf_randomcv.best_params_['min_samples_split'] - 2,
|
||||
rf_randomcv.best_params_['min_samples_split'] - 1,
|
||||
rf_randomcv.best_params_['min_samples_split'],
|
||||
rf_randomcv.best_params_['min_samples_split'] +1,
|
||||
rf_randomcv.best_params_['min_samples_split'] + 2],
|
||||
'n_estimators': [rf_randomcv.best_params_['n_estimators'] - 200, rf_randomcv.best_params_['n_estimators'] - 100,
|
||||
rf_randomcv.best_params_['n_estimators'],
|
||||
rf_randomcv.best_params_['n_estimators'] + 100, rf_randomcv.best_params_['n_estimators'] + 200]
|
||||
}
|
||||
|
||||
grid_search=GridSearchCV(estimator=rf,param_grid=param_grid,cv=10, verbose=5)
|
||||
grid_search.fit(X_train,y_train)
|
||||
best_clf = rf_randomcv.best_estimator_
|
||||
elif search_method =="manual_search":
|
||||
min_rmse = 1000
|
||||
best_clf = clf
|
||||
min_params = None
|
||||
# 2*3*3*3*3
|
||||
param_grid = {'n_estimators': [50, 100],
|
||||
'max_features': ['auto'],
|
||||
'max_depth': [2, 10, 100],
|
||||
'min_samples_split': [2, 5, 10],
|
||||
'min_samples_leaf': [1, 2, 8],
|
||||
}
|
||||
|
||||
param_grid = ParameterGrid(param_grid)
|
||||
for params in param_grid:
|
||||
print("Current Parameters : ", params)
|
||||
temp_clf = RandomForestRegressor( max_features = params['max_features'], min_samples_leaf = params['min_samples_leaf'], min_samples_split = params['min_samples_split'], n_estimators = params['n_estimators'],max_depth = params['max_depth']);
|
||||
temp_clf.fit(X_train,y_train)
|
||||
y_predict = temp_clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("Current RMSE with above params : ", rmse)
|
||||
if(min_rmse > rmse):
|
||||
min_rmse = rmse;
|
||||
best_clf = temp_clf;
|
||||
min_params = params;
|
||||
|
||||
print("Best Params for parameter search are : \n", min_params)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,min_rmse))
|
||||
topRmseList.append(min_rmse)
|
||||
elif search_method == "tpot":
|
||||
from tpot import TPOTRegressor;
|
||||
# TPOT setup
|
||||
GENERATIONS = 5
|
||||
POP_SIZE = 100
|
||||
CV = 5
|
||||
SEED = 42
|
||||
|
||||
tpot = TPOTRegressor(
|
||||
generations=GENERATIONS,
|
||||
population_size=POP_SIZE,
|
||||
random_state=SEED,
|
||||
config_dict="TPOT cuML",
|
||||
n_jobs=1, # cuML requires n_jobs=1
|
||||
cv=CV,
|
||||
verbosity=2,
|
||||
)
|
||||
|
||||
tpot.fit(X_train, y_train)
|
||||
|
||||
y_predict = tpot.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
topRmseList.append(rmse)
|
||||
|
||||
|
||||
else:
|
||||
best_clf = clf
|
||||
best_clf.fit(X_train,y_train)
|
||||
|
||||
|
||||
if search_method != "manual_search" and search_method != "tpot":
|
||||
y_predict = best_clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
topRmseList.append(rmse)
|
||||
|
||||
|
||||
topNElectrode_df = pd.DataFrame(topNList)
|
||||
topNRmse_df = pd.DataFrame(topRmseList)
|
||||
#concat two dataframes for better visualization
|
||||
topNElectrodeRanking = pd.concat([topNElectrode_df, topNRmse_df],axis=1)
|
||||
topNElectrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns
|
||||
print(topNElectrodeRanking)
|
||||
|
||||
# Plotting
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(20, 10)
|
||||
plt.rcParams.update({'font.size': 30})
|
||||
plt.xlabel('Top N Electrodes')
|
||||
plt.ylabel('RMSE')
|
||||
plt.plot(topNElectrodeRanking.loc[:,"Electrode"], topNElectrodeRanking.loc[:,"RMSE"])
|
||||
plt.tight_layout()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
|
||||
'''
|
||||
Ranks of features according to rmse computed by regressor passed in clf
|
||||
Plots electrode v/s rmse graph
|
||||
|
||||
'''
|
||||
# parameters :-
|
||||
# dataset - name of the dataset
|
||||
# window - length of the sliding window in seconds
|
||||
# stride - length of the stride of the sliding window in seconds
|
||||
# sfreq - sampling frequency of the EEG data
|
||||
# clf - name of the classifier to be used
|
||||
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||
# scale - sclaing of the EEG data if required
|
||||
|
||||
# returns :-
|
||||
# void
|
||||
fs = sfreq
|
||||
pwd = os.getcwd()
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||
print("Number of segments are: {}".format(ans.shape[1]))
|
||||
|
||||
featuresDict = None
|
||||
featuresDict = loadFeaturesDict(dataset)
|
||||
|
||||
common = []
|
||||
with open('intersection.pkl', 'rb') as f:
|
||||
common = pickle.load(f)
|
||||
|
||||
for k in list(featuresDict.keys()):
|
||||
if k not in common:
|
||||
# pop out common feature
|
||||
featuresDict.pop(k)
|
||||
|
||||
featuresList = list(featuresDict.keys())
|
||||
|
||||
y = Y_epoch[:,label] #valence
|
||||
|
||||
|
||||
rmseList = []
|
||||
|
||||
####################################################################
|
||||
#modify featuresList
|
||||
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||
|
||||
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
featureMatrix = featureMatrix.astype('float64')
|
||||
|
||||
|
||||
feature_channel_index = []
|
||||
for feature in featuresList:
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
if(i>=10):
|
||||
feature_channel_index.append(feature + str(i))
|
||||
else:
|
||||
feature_channel_index.append(feature + '0' + str(i))
|
||||
|
||||
print(len(list(featuresDict.keys())))
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
#Remove Variance = 0 features
|
||||
constant_filter = VarianceThreshold(threshold=0)
|
||||
constant_filter.fit(X)
|
||||
constant_columns = [column for column in X.columns
|
||||
if column not in
|
||||
X.columns[constant_filter.get_support()]]
|
||||
X = constant_filter.transform(X)
|
||||
|
||||
for column in constant_columns:
|
||||
feature_channel_index.remove(column)
|
||||
|
||||
print(len(feature_channel_index),feature_channel_index )
|
||||
|
||||
X = pd.DataFrame(X)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
|
||||
filtered_featuresList = []
|
||||
print(type(X))
|
||||
for col in X.columns:
|
||||
feature = col[:-2]
|
||||
electrode = int(col[-2:])
|
||||
if(feature not in filtered_featuresList):
|
||||
filtered_featuresList.append(feature)
|
||||
|
||||
featuresList = filtered_featuresList
|
||||
|
||||
for feature in featuresList:
|
||||
# Load FeaturesDict from memory
|
||||
|
||||
|
||||
|
||||
featureMatrix = featuresDict[feature]
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
|
||||
|
||||
feature_channel_index = []
|
||||
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
feature_channel_index.append(feature + str(i))
|
||||
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
|
||||
print("Features Ready for undergoing selection tests done ...\n")
|
||||
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
rmseList.append(rmse)
|
||||
|
||||
|
||||
|
||||
features_df = pd.DataFrame(featuresList)
|
||||
rmse_df = pd.DataFrame(rmseList)
|
||||
#concat two dataframes for better visualization
|
||||
featureRanking = pd.concat([features_df, rmse_df],axis=1)
|
||||
featureRanking.columns = ['Feature','RMSE'] #naming the dataframe columns
|
||||
features_result = featureRanking.sort_values('RMSE')
|
||||
features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonFeaturesRegressionRanking" + str(window) + str(stride) + ".csv")
|
||||
print(features_result)
|
||||
|
||||
###########################################
|
||||
N = features_result.shape[0]
|
||||
topNRmseList = []
|
||||
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||
|
||||
|
||||
|
||||
for n in range(1,N+1):
|
||||
|
||||
|
||||
topnfeatures = copy.deepcopy(features_result.head(n))
|
||||
topnfeatures = topnfeatures['Feature'].tolist() #list of feature-names
|
||||
|
||||
# X-Values################################################
|
||||
|
||||
featureMatrix = np.empty((0,ans.shape[1]))
|
||||
|
||||
for feature in topnfeatures:
|
||||
featureMatrix = np.append(featureMatrix, featuresDict[feature], axis=0)
|
||||
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
print(featureMatrix.T.shape)
|
||||
|
||||
feature_channel_index = []
|
||||
for feature in topnfeatures:
|
||||
i=0
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
feature_channel_index.append(feature + str(i))
|
||||
|
||||
|
||||
# Removing NaN Values
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X.columns = feature_channel_index
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
|
||||
|
||||
print("Features Ready for undergoing selection tests done ...\n")
|
||||
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
topNRmseList.append(rmse)
|
||||
|
||||
|
||||
|
||||
topNFeatures_df = pd.DataFrame(topNList)
|
||||
topNRmse_df = pd.DataFrame(topNRmseList)
|
||||
|
||||
#concat two dataframes for better visualization
|
||||
topNFeaturesRanking = pd.concat([topNFeatures_df, topNRmse_df],axis=1)
|
||||
topNFeaturesRanking.columns = ['Feature','RMSE'] #naming the dataframe columns
|
||||
print(topNFeaturesRanking)
|
||||
topNFeaturesRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonFeaturesRegressionRanking" + str(window) + str(stride) + ".csv")
|
||||
|
||||
# Plotting
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(25, 10)
|
||||
plt.rcParams.update({'font.size': 30})
|
||||
plt.xlabel('Top N Features')
|
||||
plt.ylabel('RMSE')
|
||||
plt.plot(topNFeaturesRanking.loc[:,"Feature"], topNFeaturesRanking.loc[:,"RMSE"])
|
||||
plt.tight_layout()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def topFeatureColumnsRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
|
||||
|
||||
# parameters :-
|
||||
# dataset - name of the dataset
|
||||
# window - length of the sliding window in seconds
|
||||
# stride - length of the stride of the sliding window in seconds
|
||||
# sfreq - sampling frequency of the EEG data
|
||||
# clf - name of the classifier to be used
|
||||
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||
# scale - sclaing of the EEG data if required
|
||||
|
||||
# returns :-
|
||||
# void
|
||||
|
||||
fs = sfreq
|
||||
pwd = os.getcwd()
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
|
||||
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||
electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||
|
||||
|
||||
print("Number of segments are: {}".format(ans.shape[1]))
|
||||
|
||||
#X##############################################################################################
|
||||
|
||||
featuresDict = None
|
||||
featuresDict = loadFeaturesDict(dataset)
|
||||
|
||||
common = []
|
||||
with open('intersection.pkl', 'rb') as f:
|
||||
common = pickle.load(f)
|
||||
|
||||
for k in list(featuresDict.keys()):
|
||||
if k not in common:
|
||||
# pop out common feature
|
||||
featuresDict.pop(k)
|
||||
|
||||
|
||||
featuresList = list(featuresDict.keys())
|
||||
|
||||
# defining column names
|
||||
feature_channel_index = []
|
||||
|
||||
for feature in featuresList:
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
feature_channel_index.append(feature + str(i))
|
||||
|
||||
#defining feature matrix
|
||||
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||
|
||||
|
||||
print("Shape of FeatureMatrix: {}\n".format(featureMatrix.T.shape))
|
||||
|
||||
#data-imputation and nan-removal
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
|
||||
#Y#####################################################################
|
||||
|
||||
y = Y_epoch[:,label] #valence
|
||||
|
||||
########################################################################
|
||||
rmseList = []
|
||||
|
||||
for col in feature_channel_index:
|
||||
input_df = pd.DataFrame(X[col])
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(input_df, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict, squared=False)
|
||||
rmseList.append(rmse)
|
||||
|
||||
|
||||
|
||||
col_df = pd.DataFrame(feature_channel_index)
|
||||
rmse_df = pd.DataFrame(rmseList)
|
||||
#concat two dataframes for better visualization
|
||||
colRanking = pd.concat([col_df, rmse_df],axis=1)
|
||||
colRanking.columns = ['Column','RMSE'] #naming the dataframe columns
|
||||
features_result = colRanking.sort_values('RMSE')
|
||||
print(features_result)
|
||||
|
||||
|
||||
N = len(feature_channel_index)
|
||||
topNRmseList = []
|
||||
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||
|
||||
for n in range(1, N+1):
|
||||
ranking_df = features_result.head(n)
|
||||
topncols = ranking_df['Column'].tolist()
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X[topncols], y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict, squared=False)
|
||||
topNRmseList.append(rmse)
|
||||
|
||||
|
||||
topcol_df = pd.DataFrame(topNList)
|
||||
toprmse_df = pd.DataFrame(topNRmseList)
|
||||
#concat two dataframes for better visualization
|
||||
topcolRanking = pd.concat([topcol_df, toprmse_df],axis=1)
|
||||
topcolRanking.columns = ['Column','RMSE'] #naming the dataframe columns
|
||||
topfeatures_result = topcolRanking
|
||||
print(topfeatures_result)
|
||||
topfeatures_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "ColumnsRegressionRanking" + str(window) + str(stride) + ".csv")
|
||||
|
||||
|
||||
# Plotting
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(60, 9)
|
||||
plt.xlabel('Top N Columns')
|
||||
plt.ylabel('RMSE')
|
||||
plt.title("Top N Columns v/s RMSE Plot for Window:{} Stride:{} epoched data by varying N".format(window,stride))
|
||||
plt.plot(topfeatures_result.loc[:,"Column"], topfeatures_result.loc[:,"RMSE"])
|
||||
plt.tight_layout()
|
||||
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "topFeatureColumnsRegressionRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
@@ -0,0 +1,648 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
from ImportUtils import *
|
||||
|
||||
from sklearn.ensemble import RandomForestRegressor as sklearnrfi
|
||||
from sklearn.feature_selection import VarianceThreshold
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
|
||||
'''
|
||||
Ranks of features according to rmse computed by F score based regression
|
||||
Plots electrode v/s rmse graph
|
||||
|
||||
'''
|
||||
# parameters :-
|
||||
# dataset - name of the dataset
|
||||
# window - length of the sliding window in seconds
|
||||
# stride - length of the stride of the sliding window in seconds
|
||||
# sfreq - sampling frequency of the EEG data
|
||||
# clf - name of the classifier to be used
|
||||
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||
# scale - sclaing of the EEG data if required
|
||||
# mutual_info - Mutual ranking between features based on information theory
|
||||
# method - 'RandomForest' 'RFE' 'SelectKBest'
|
||||
|
||||
# returns :-
|
||||
# void
|
||||
pwd = os.getcwd()
|
||||
fs = sfreq
|
||||
electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
|
||||
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||
|
||||
print("Number of segments are: {}".format(ans.shape[1]))
|
||||
|
||||
|
||||
featuresDict = None
|
||||
featuresDict = loadFeaturesDict(dataset)
|
||||
asm_features = ['dasm_delta', 'dasm_theta', 'dasm_alpha', 'dasm_beta', 'dasm_gamma', 'rasm_delta', 'rasm_theta', 'rasm_alpha', 'rasm_beta', 'rasm_gamma']
|
||||
for asm in asm_features:
|
||||
featuresDict.pop(asm)
|
||||
|
||||
common = []
|
||||
with open('intersection.pkl', 'rb') as f:
|
||||
common = pickle.load(f)
|
||||
|
||||
for k in list(featuresDict.keys()):
|
||||
if k not in common:
|
||||
# pop out common feature
|
||||
featuresDict.pop(k)
|
||||
|
||||
featuresList = list(featuresDict.keys())
|
||||
print(featuresList)
|
||||
|
||||
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||
|
||||
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
featureMatrix = featureMatrix.astype('float64')
|
||||
|
||||
|
||||
feature_channel_index = []
|
||||
for feature in featuresList:
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
if(i>=10):
|
||||
feature_channel_index.append(feature + str(i))
|
||||
else:
|
||||
feature_channel_index.append(feature + '0' + str(i))
|
||||
|
||||
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
#################################################################
|
||||
y = copy.deepcopy(Y_epoch[:,label]) #valence
|
||||
print("y.shape: ", y.shape)
|
||||
|
||||
|
||||
dfscores = None
|
||||
|
||||
if(method == 'RandomForest'):
|
||||
'''Random Forest Feature Importances'''
|
||||
# estimator = RandomForestRegressor()
|
||||
estimator = sklearnrfi()
|
||||
fit = estimator.fit(X,y)
|
||||
dfscores = pd.DataFrame(fit.feature_importances_)
|
||||
elif(method == 'RFE'):
|
||||
''' RFE'''
|
||||
selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
|
||||
selector = selector.fit(X, y)
|
||||
dfscores = pd.DataFrame(selector.ranking_)
|
||||
|
||||
elif(method == 'SelectKBest'):
|
||||
"""SelecKBest"""
|
||||
#apply SelectKBest class to extract top 10 best features
|
||||
func = None
|
||||
if mutual_info == False:
|
||||
func = f_classif
|
||||
else:
|
||||
func = mutual_info_classif
|
||||
|
||||
bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
|
||||
fit = bestfeatures.fit(X,y)
|
||||
|
||||
dfscores = pd.DataFrame(fit.scores_)
|
||||
|
||||
|
||||
dfcolumns = pd.DataFrame(X.columns)
|
||||
|
||||
#concat two dataframes for better visualization
|
||||
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
|
||||
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
|
||||
features_result = featureScores.nlargest(X.shape[1],'Score')
|
||||
print(features_result)
|
||||
features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonElectrodeFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
|
||||
|
||||
|
||||
###################################################################
|
||||
topcolumns = features_result['Specs'].values
|
||||
topfeatures = []
|
||||
topelectrodes = []
|
||||
|
||||
for col in topcolumns:
|
||||
feature = col[:-2]
|
||||
electrode = int(col[-2:])
|
||||
if(feature not in topfeatures):
|
||||
topfeatures.append(feature)
|
||||
|
||||
if(electrode not in topelectrodes):
|
||||
topelectrodes.append(electrode)
|
||||
|
||||
##################################################################################
|
||||
|
||||
N = len(topelectrodes)
|
||||
topRmseList = []
|
||||
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||
|
||||
|
||||
for n in range(1,N+1):
|
||||
|
||||
electrode_index = topelectrodes[:n]
|
||||
print(topelectrodes)
|
||||
print(electrode_index)
|
||||
# X-Values
|
||||
featureMatrix = np.empty((len(featuresList)*len(electrode_index),ans.shape[1]))
|
||||
|
||||
i = 0
|
||||
for index in electrode_index:
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix[i,:] = value[index,:]
|
||||
i = i+1
|
||||
|
||||
# i = i+1
|
||||
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
print(featureMatrix.T.shape)
|
||||
|
||||
# Removing NaN Values
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
|
||||
feature_channel_index = []
|
||||
for index in electrode_index:
|
||||
for feature in featuresList:
|
||||
feature_channel_index.append(feature + str(index))
|
||||
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X.columns = feature_channel_index
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
|
||||
|
||||
print("Features Ready for undergoing selection tests done ...\n")
|
||||
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
# clf = xgb.XGBClassifier(verbose = 5)
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
topRmseList.append(rmse)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# features_result = features_result.reset_index()
|
||||
topNElectrode_df = pd.DataFrame(topNList)
|
||||
topNRmse_df = pd.DataFrame(topRmseList)
|
||||
#concat two dataframes for better visualization
|
||||
topNElectrodeRanking = pd.concat([topNElectrode_df, topNRmse_df],axis=1)
|
||||
topNElectrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns
|
||||
print(topNElectrodeRanking)
|
||||
topNElectrodeRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonElectrodeFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
|
||||
# return features_result
|
||||
|
||||
|
||||
# Plotting
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(20, 10)
|
||||
plt.rcParams.update({'font.size': 30})
|
||||
plt.xlabel('Top N Electrodes')
|
||||
plt.ylabel('RMSE')
|
||||
plt.plot(topNElectrodeRanking.loc[:,"Electrode"], topNElectrodeRanking.loc[:,"RMSE"])
|
||||
plt.tight_layout()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
|
||||
|
||||
# parameters :-
|
||||
# dataset - name of the dataset
|
||||
# window - length of the sliding window in seconds
|
||||
# stride - length of the stride of the sliding window in seconds
|
||||
# sfreq - sampling frequency of the EEG data
|
||||
# clf - name of the classifier to be used
|
||||
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||
# scale - sclaing of the EEG data if required
|
||||
# mutual_info - Mutual ranking between features based on information theory
|
||||
# method - 'RandomForest' 'RFE' 'SelectKBest'
|
||||
|
||||
# returns :-
|
||||
# void
|
||||
|
||||
pwd = os.getcwd()
|
||||
fs = sfreq
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
|
||||
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||
print("Number of segments are: {}".format(ans.shape[1]))
|
||||
|
||||
|
||||
featuresDict = None
|
||||
featuresDict = loadFeaturesDict(dataset)
|
||||
|
||||
common = []
|
||||
with open('intersection.pkl', 'rb') as f:
|
||||
common = pickle.load(f)
|
||||
|
||||
for k in list(featuresDict.keys()):
|
||||
if k not in common:
|
||||
# pop out common feature
|
||||
featuresDict.pop(k)
|
||||
|
||||
|
||||
##################################################################
|
||||
# featuresToAvoid = ['volt05', 'volt10', 'volt20', 'burstBandPowers','hFD']
|
||||
featuresList = list(featuresDict.keys())
|
||||
print(featuresList)
|
||||
|
||||
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||
|
||||
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
featureMatrix = featureMatrix.astype('float64')
|
||||
|
||||
|
||||
feature_channel_index = []
|
||||
for feature in featuresList:
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
if(i>=10):
|
||||
feature_channel_index.append(feature + str(i))
|
||||
else:
|
||||
feature_channel_index.append(feature + '0' + str(i))
|
||||
|
||||
print(len(list(featuresDict.keys())))
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
#Remove Variance = 0 features
|
||||
constant_filter = VarianceThreshold(threshold=0)
|
||||
constant_filter.fit(X)
|
||||
constant_columns = [column for column in X.columns
|
||||
if column not in
|
||||
X.columns[constant_filter.get_support()]]
|
||||
X = constant_filter.transform(X)
|
||||
for column in constant_columns:
|
||||
feature_channel_index.remove(column)
|
||||
|
||||
print(len(feature_channel_index),feature_channel_index )
|
||||
|
||||
X = pd.DataFrame(X)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
#################################################################
|
||||
y = copy.deepcopy(Y_epoch[:,label]) #valence
|
||||
print("y.shape: ", y.shape)
|
||||
|
||||
|
||||
dfscores = None
|
||||
|
||||
if(method == 'RandomForest'):
|
||||
'''Random Forest Feature Importances'''
|
||||
estimator = sklearnrfi() #RandomForestRegressor()
|
||||
fit = estimator.fit(X,y)
|
||||
dfscores = pd.DataFrame(fit.feature_importances_)
|
||||
elif(method == 'RFE'):
|
||||
''' RFE'''
|
||||
selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
|
||||
selector = selector.fit(X, y)
|
||||
dfscores = pd.DataFrame(selector.ranking_)
|
||||
|
||||
elif(method == 'SelectKBest'):
|
||||
"""SelecKBest"""
|
||||
#apply SelectKBest class to extract top 10 best features
|
||||
func = None
|
||||
if mutual_info == False:
|
||||
func = f_classif
|
||||
else:
|
||||
func = mutual_info_classif
|
||||
|
||||
bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
|
||||
fit = bestfeatures.fit(X,y)
|
||||
|
||||
dfscores = pd.DataFrame(fit.scores_)
|
||||
|
||||
|
||||
dfcolumns = pd.DataFrame(X.columns)
|
||||
|
||||
#concat two dataframes for better visualization
|
||||
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
|
||||
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
|
||||
features_result = featureScores.nlargest(X.shape[1],'Score')
|
||||
print(features_result)
|
||||
features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonFeatureFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
|
||||
|
||||
|
||||
|
||||
###################################################################
|
||||
topcolumns = features_result['Specs'].values
|
||||
topfeatures = []
|
||||
topelectrodes = []
|
||||
|
||||
for col in topcolumns:
|
||||
feature = col[:-2]
|
||||
electrode = int(col[-2:])
|
||||
if(feature not in topfeatures):
|
||||
topfeatures.append(feature)
|
||||
|
||||
if(electrode not in topelectrodes):
|
||||
topelectrodes.append(electrode)
|
||||
|
||||
|
||||
######################################################################
|
||||
# TOP-N-FEATURE-RANKING
|
||||
print(topfeatures)
|
||||
print(topelectrodes)
|
||||
N = len(topfeatures)
|
||||
topNRmseList = []
|
||||
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||
|
||||
|
||||
|
||||
for n in range(1,N+1):
|
||||
|
||||
topnfeatures = topfeatures[:n]
|
||||
|
||||
# X-Values################################################
|
||||
|
||||
featureMatrix = np.empty((0,ans.shape[1]))
|
||||
|
||||
for feature in topnfeatures:
|
||||
featureMatrix = np.append(featureMatrix, featuresDict[feature], axis=0)
|
||||
|
||||
featureMatrix = featureMatrix.astype('float64')
|
||||
print(featureMatrix.T.shape)
|
||||
|
||||
feature_channel_index = []
|
||||
for feature in topnfeatures:
|
||||
i=0
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
feature_channel_index.append(feature + str(i))
|
||||
|
||||
|
||||
# Removing NaN Values
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X.columns = feature_channel_index
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
|
||||
|
||||
print("Features Ready for undergoing selection tests done ...\n")
|
||||
|
||||
X = X.astype(np.float32)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
topNRmseList.append(rmse)
|
||||
|
||||
|
||||
|
||||
|
||||
topNFeatures_df = pd.DataFrame(topNList)
|
||||
|
||||
topNRmse_df = pd.DataFrame(topNRmseList)
|
||||
|
||||
#concat two dataframes for better visualization
|
||||
topNFeaturesRanking = pd.concat([topNFeatures_df, topNRmse_df],axis=1)
|
||||
topNFeaturesRanking.columns = ['Feature','RMSE'] #naming the dataframe columns
|
||||
print(topNFeaturesRanking)
|
||||
topNFeaturesRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonFeatureFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
|
||||
|
||||
# Plotting
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(25, 10)
|
||||
plt.rcParams.update({'font.size': 30})
|
||||
plt.xlabel('Top N Features')
|
||||
plt.ylabel('RMSE')
|
||||
plt.plot(topNFeaturesRanking.loc[:,"Feature"], topNFeaturesRanking.loc[:,"RMSE"])
|
||||
plt.tight_layout()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def topFSColumnsRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
|
||||
# Method C
|
||||
# parameters :-
|
||||
# dataset - name of the dataset
|
||||
# window - length of the sliding window in seconds
|
||||
# stride - length of the stride of the sliding window in seconds
|
||||
# sfreq - sampling frequency of the EEG data
|
||||
# clf - name of the classifier to be used
|
||||
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||
# scale - sclaing of the EEG data if required
|
||||
# mutual_info - Mutual ranking between features based on information theory
|
||||
# method - 'RandomForest' 'RFE' 'SelectKBest'
|
||||
|
||||
# returns :-
|
||||
# void
|
||||
fs = sfreq
|
||||
pwd = os.getcwd()
|
||||
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||
|
||||
print("Number of segments are: {}".format(ans.shape[1]))
|
||||
|
||||
#X##############################################################################################
|
||||
|
||||
featuresDict = None
|
||||
featuresDict = loadFeaturesDict(dataset)
|
||||
|
||||
common = []
|
||||
with open('intersection.pkl', 'rb') as f:
|
||||
common = pickle.load(f)
|
||||
|
||||
for k in list(featuresDict.keys()):
|
||||
if k not in common:
|
||||
# pop out common feature
|
||||
featuresDict.pop(k)
|
||||
|
||||
print("Number of Features:",len(list(featuresDict.keys())))
|
||||
featuresList = list(featuresDict.keys())
|
||||
|
||||
feature_channel_index = []
|
||||
|
||||
feature_channel_index = []
|
||||
for feature in featuresList:
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
if(i>=10):
|
||||
feature_channel_index.append(feature +'_'+ str(i))
|
||||
else:
|
||||
feature_channel_index.append(feature + '_0' + str(i))
|
||||
|
||||
print(len(list(featuresDict.keys())))
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
#defining feature matrix
|
||||
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||
|
||||
|
||||
|
||||
print("Shape of FeatureMatrix: {}\n".format(featureMatrix.T.shape))
|
||||
|
||||
#data-imputation and nan-removal
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
|
||||
#Y#####################################################################
|
||||
|
||||
y = Y_epoch[:,label] #valence
|
||||
# y = pd.DataFrame(y)
|
||||
|
||||
########################################################################
|
||||
dfscores = None
|
||||
|
||||
if(method == 'RandomForest'):
|
||||
'''Random Forest Feature Importances'''
|
||||
estimator = sklearnrfi() #RandomForestRegressor()
|
||||
fit = estimator.fit(X,y)
|
||||
dfscores = pd.DataFrame(fit.feature_importances_)
|
||||
elif(method == 'RFE'):
|
||||
''' RFE'''
|
||||
selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
|
||||
selector = selector.fit(X, y)
|
||||
dfscores = pd.DataFrame(selector.ranking_)
|
||||
|
||||
elif(method == 'SelectKBest'):
|
||||
"""SelecKBest"""
|
||||
#apply SelectKBest class to extract top 10 best features
|
||||
func = None
|
||||
if mutual_info == False:
|
||||
func = f_classif
|
||||
else:
|
||||
func = mutual_info_classif
|
||||
|
||||
bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
|
||||
fit = bestfeatures.fit(X,y)
|
||||
|
||||
dfscores = pd.DataFrame(fit.scores_)
|
||||
|
||||
|
||||
|
||||
|
||||
dfcolumns = pd.DataFrame(X.columns)
|
||||
|
||||
#concat two dataframes for better visualization
|
||||
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
|
||||
featureScores.columns = ['Column','Score'] #naming the dataframe columns
|
||||
features_result = featureScores.nlargest(X.shape[1],'Score')
|
||||
print(features_result)
|
||||
|
||||
N = len(feature_channel_index)
|
||||
topNRmseList = []
|
||||
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||
|
||||
for n in range(1, N+1):
|
||||
ranking_df = features_result.head(n)
|
||||
topncols = ranking_df['Column'].tolist()
|
||||
|
||||
input_df = pd.DataFrame(X[topncols])
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(input_df, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict, squared=False)
|
||||
print(n,rmse)
|
||||
topNRmseList.append(rmse)
|
||||
|
||||
|
||||
topcol_df = pd.DataFrame(topNList)
|
||||
toprmse_df = pd.DataFrame(topNRmseList)
|
||||
#concat two dataframes for better visualization
|
||||
topcolRanking = pd.concat([topcol_df, toprmse_df],axis=1)
|
||||
topcolRanking.columns = ['Column','RMSE'] #naming the dataframe columns
|
||||
topfeatures_result = topcolRanking
|
||||
print(topfeatures_result)
|
||||
topfeatures_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topFSColumnsRegressionRanking"+method + str(window) + str(stride) + ".csv")
|
||||
|
||||
# Plotting
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(60, 9)
|
||||
|
||||
plt.xlabel('Top N Columns')
|
||||
plt.ylabel('RMSE')
|
||||
plt.title("Top N Columns v/s RMSE Plot for Window:{} Stride:{} epoched data by varying N".format(window,stride))
|
||||
plt.plot(topfeatures_result.loc[:,"Column"], topfeatures_result.loc[:,"RMSE"])
|
||||
plt.tight_layout()
|
||||
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "topFSColumnsRegressionRanking"+method + str(window) + str(stride) + ".svg", bbox_inches='tight', dpi=500)
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
|
||||
|
||||
@@ -0,0 +1,319 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('git clone -l -s https://github.com/sari-saba-sadiya/EEGExtract.git cloned-repo')
|
||||
get_ipython().run_line_magic('cd', 'cloned-repo')
|
||||
get_ipython().system('ls')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('pip install -r requirements.txt')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
from google.colab import drive
|
||||
drive.mount('/gdrive',force_remount=True)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('pip install pyinform')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().run_line_magic('cd', '../../gdrive/MyDrive/emotion_recognition_project')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import EEGExtract as eeg
|
||||
from scipy import io,signal
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn import preprocessing
|
||||
import pandas as pd
|
||||
import pickle
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
class load_data:
|
||||
'''
|
||||
Load the preprocessed data here, store the paramters
|
||||
'''
|
||||
def __init__(self,name):
|
||||
self.name = name #name of dataset
|
||||
self.X = None
|
||||
self.Y = None
|
||||
self.Z = None
|
||||
self.freq = None #(in Hz) is same for all datasets
|
||||
self.channels = None
|
||||
self.ch_type = 'eeg'
|
||||
self.eegData = None
|
||||
self.use_autoreject = 'y'
|
||||
self.no_of_subjects = None
|
||||
def load_arrays(self):
|
||||
if self.name == 'DREAMER':
|
||||
array = np.load('original_data/DREAMER.npz')
|
||||
self.freq = 128
|
||||
self.no_of_subjects = 23
|
||||
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||
if self.name == 'DEAP':
|
||||
array = np.load('original_data/DEAP.npz')
|
||||
self.no_of_subjects = 32
|
||||
self.freq = 128
|
||||
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
|
||||
self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
|
||||
if self.name == 'OASIS':
|
||||
#array = np.load('original_data/OASIS.npz')
|
||||
self.no_of_subjects = 15
|
||||
if self.use_autoreject == 'y':
|
||||
with open('preprocessed_data/oasis/with_autoreject.p','rb') as file:
|
||||
self.X = pickle.load(file)
|
||||
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||
self.freq = 128
|
||||
self.X ,self.Y= merge_dictionary(self.X)
|
||||
(a,b,c) = self.X.shape
|
||||
self.X = np.reshape(self.X,(a,c,b))
|
||||
else:
|
||||
array = np.load('preprocessed_data/oasis/without_autoreject.npz')
|
||||
self.freq = 128
|
||||
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||
self.X = array['X']
|
||||
self.Y = array['Y']
|
||||
(a,b,c) = self.X.shape
|
||||
self.X = np.reshape(self.X,(a,c,b))
|
||||
|
||||
else:
|
||||
self.X = array['X']
|
||||
if self.name == 'DEAP':
|
||||
self.X = self.X[:,:,[1,3,2,4,7,11,13,31,29,25,21,19,20,17]] # To maintain uniformity across all datasets, only 14 electrodes selected
|
||||
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||
if self.name != 'OASIS':
|
||||
self.Y = array['Y']
|
||||
#self.Z = array['Z']
|
||||
self.reshape_data()
|
||||
def reshape_data(self):
|
||||
'''
|
||||
reshapes data in the format EEGExtract module expects i.e channels x timepoints x epochs
|
||||
'''
|
||||
|
||||
(epochs,timepoints,channels) = self.X.shape
|
||||
self.eegData = np.reshape(self.X,(channels,timepoints,epochs))
|
||||
|
||||
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def merge_dictionary(dictionary):
|
||||
'''
|
||||
merge all trial data to form one array
|
||||
'''
|
||||
no_of_trials = len(list(dictionary.keys()))
|
||||
no_of_channels = dictionary[1][0].shape[1]
|
||||
length_of_segment = dictionary[1][0].shape[2]
|
||||
no_of_epochs_per_trial = dictionary[1][0].shape[0]
|
||||
X = np.empty((0,no_of_channels,length_of_segment))
|
||||
Y = np.empty((0,2))
|
||||
for trial,lst in dictionary.items():
|
||||
array = dictionary[trial][0]
|
||||
score = dictionary[trial][3]
|
||||
X = np.append(X,array,axis = 0)
|
||||
for epoch in range(no_of_epochs_per_trial):
|
||||
Y = np.append(Y,np.expand_dims(score,axis =0),axis = 0)
|
||||
|
||||
return X,Y
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def calculate_diffrential_entropy_for_bands(eegData,freq):
|
||||
# Function to calculate the differential entropy for the different bands of EEG data
|
||||
|
||||
# parameters :-
|
||||
# eegData :- The differential EEG signal value
|
||||
# freq :- sampling frequency of the EEG signal
|
||||
# returns :-
|
||||
# bandwise DE
|
||||
#delta band
|
||||
delta_band = eeg.filt_data(eegData,0.5,4,freq)
|
||||
#theta band
|
||||
theta_band = eeg.filt_data(eegData,4,8,freq)
|
||||
#alpha bad
|
||||
alpha_band = eeg.filt_data(eegData,8,12,freq)
|
||||
#beta band
|
||||
beta_band = eeg.filt_data(eegData,12,30,freq)
|
||||
#gamma band
|
||||
gamma_band = eeg.filt_data(eegData,30,63,freq)
|
||||
|
||||
|
||||
diffrential_entropy_delta = 1/2*np.log(np.var(delta_band,axis = 1)*np.pi*np.e*2)
|
||||
|
||||
diffrential_entropy_theta = 1/2*np.log(np.var(theta_band,axis = 1)*np.pi*np.e*2)
|
||||
|
||||
diffrential_entropy_alpha = 1/2*np.log(np.var(alpha_band,axis = 1)*np.pi*np.e*2)
|
||||
|
||||
diffrential_entropy_beta = 1/2*np.log(np.var(beta_band,axis = 1)*np.pi*np.e*2)
|
||||
|
||||
diffrential_entropy_gamma = 1/2*np.log(np.var(gamma_band,axis = 1)*np.pi*np.e*2)
|
||||
#print(diffrential_entropy_delta.shape,diffrential_entropy_gamma.shape,diffrential_entropy_theta.shape,diffrential_entropy_alpha.shape,diffrential_entropy_beta.shape)
|
||||
return diffrential_entropy_delta,diffrential_entropy_theta,diffrential_entropy_alpha,diffrential_entropy_beta,diffrential_entropy_gamma
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13
|
||||
def calculate_RASM_DASM(band):
|
||||
RASM_AF3_AF4 = np.expand_dims(band[0,:]/band[13,:],axis = 0)
|
||||
RASM_F3_F4 = np.expand_dims(band[2,:]/band[11,:],axis = 0)
|
||||
RASM_F7_F8 = np.expand_dims(band[1,:]/band[12,:],axis = 0)
|
||||
RASM_FC5_FC6 = np.expand_dims(band[3,:]/band[10,:],axis = 0)
|
||||
RASM_O1_O2 = np.expand_dims(band[6,:]/band[7,:],axis = 0)
|
||||
RASM_P7_P8 = np.expand_dims(band[5,:]/band[8,:],axis=0)
|
||||
RASM_T7_T8 = np.expand_dims(band[4,:]/band[9,:],axis=0)
|
||||
|
||||
DASM_AF3_AF4 = np.expand_dims(band[0,:]-band[13,:],axis = 0)
|
||||
DASM_F3_F4 = np.expand_dims(band[2,:]-band[11,:],axis = 0)
|
||||
DASM_F7_F8 = np.expand_dims(band[1,:]-band[12,:],axis = 0)
|
||||
DASM_FC5_FC6 = np.expand_dims(band[3,:]-band[10,:],axis = 0)
|
||||
DASM_O1_O2 = np.expand_dims(band[6,:]-band[7,:],axis = 0)
|
||||
DASM_P7_P8 = np.expand_dims(band[5,:]-band[8,:],axis=0)
|
||||
DASM_T7_T8 = np.expand_dims(band[4,:]-band[9,:],axis=0)
|
||||
|
||||
|
||||
features = np.empty((0,RASM_AF3_AF4.shape[1]))
|
||||
features = np.append(features,RASM_AF3_AF4,axis = 0)
|
||||
features = np.append(features,RASM_F3_F4,axis = 0)
|
||||
features = np.append(features,RASM_F7_F8,axis = 0)
|
||||
features = np.append(features,RASM_FC5_FC6,axis = 0)
|
||||
features = np.append(features,RASM_O1_O2,axis = 0)
|
||||
features = np.append(features,RASM_P7_P8,axis = 0)
|
||||
features = np.append(features,RASM_T7_T8,axis = 0)
|
||||
|
||||
features = np.append(features,DASM_AF3_AF4,axis = 0)
|
||||
features = np.append(features,DASM_F3_F4,axis = 0)
|
||||
features = np.append(features,DASM_F7_F8,axis = 0)
|
||||
features = np.append(features,DASM_FC5_FC6,axis = 0)
|
||||
features = np.append(features,DASM_O1_O2,axis = 0)
|
||||
features = np.append(features,DASM_P7_P8,axis = 0)
|
||||
features = np.append(features,DASM_T7_T8,axis = 0)
|
||||
return features.T
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def epoch_data(X,Y, window, stride, sfreq):
|
||||
|
||||
(channels,timepoints,trials )= X.shape
|
||||
X = np.reshape(X,(trials,channels,timepoints))
|
||||
segment = int(window*sfreq)
|
||||
step = int(stride*sfreq)
|
||||
epochPerTrial = int((timepoints-segment)/step + 1)
|
||||
count = 0
|
||||
X_new = np.empty((trials*epochPerTrial,channels,segment))
|
||||
Y_new = np.empty((trials*epochPerTrial,2))
|
||||
for trial in range(trials):
|
||||
for epoch in range(epochPerTrial):
|
||||
X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
|
||||
Y_new[count,:] = Y[trial,:2]
|
||||
count+=1
|
||||
(trials,channels,timepoints) = X_new.shape
|
||||
X_new = np.reshape(X_new,(channels,timepoints,trials))
|
||||
|
||||
return X_new,Y_new
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def segregate_data_of_subjects(feature_matrix,dataset,sfreq = 128):
|
||||
total_samples = feature_matrix.shape[0]
|
||||
subject_indexes = {}
|
||||
if dataset.name != 'DEAP AND DREAMER':
|
||||
samples_per_subject = total_samples//dataset.no_of_subjects
|
||||
print('samples per subject taken are ',samples_per_subject)
|
||||
subject_indexes = {}
|
||||
for i in range(dataset.no_of_subjects):
|
||||
subject_name = 'subject_' + str(i+1)
|
||||
subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:]
|
||||
else:
|
||||
a = feature_matrix[:80640,:]
|
||||
b = feature_matrix[80640:,:]
|
||||
print(b.shape)
|
||||
for i in range(32):
|
||||
samples_per_subject = 2520
|
||||
subject_name = 'subject_' + str(i+1)
|
||||
subject_indexes[subject_name] = a[samples_per_subject*i:samples_per_subject*(i+1),:]
|
||||
for i in range(0,23):
|
||||
samples_per_subject = 8190
|
||||
subject_name = 'subject_' + str(i+1+32)
|
||||
subject_indexes[subject_name] = b[samples_per_subject*i:samples_per_subject*(i+1),:]
|
||||
|
||||
return subject_indexes
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def driver_code():
|
||||
dataset = load_data('DREAMER')
|
||||
dataset.load_arrays()
|
||||
X = dataset.eegData
|
||||
Y = dataset.Y
|
||||
window = 1
|
||||
stride = 1
|
||||
|
||||
X,Y = epoch_data(X,Y,window,stride,128)
|
||||
print('shape after epoching')
|
||||
print('X:',X.shape)
|
||||
print('Y:',Y.shape)
|
||||
print('')
|
||||
print('')
|
||||
delta,theta,alpha,beta,gamma = calculate_diffrential_entropy_for_bands(X,dataset.freq)
|
||||
bands = {'delta':delta,'theta':theta,'alpha':alpha,'beta':beta,'gamma':gamma}
|
||||
for name,band in bands.items():
|
||||
feature_matrix = calculate_RASM_DASM(band) #extracted RASM ,DASM features for each eng band
|
||||
print(name ,':' ,end = '')
|
||||
print(feature_matrix.shape)
|
||||
print(feature_matrix)
|
||||
np.savez('features/'+dataset.name.lower()+'_RASM_DASM/'+name+'_'+str(window)+'_'+str(stride),features = feature_matrix,Y=Y)
|
||||
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
driver_code()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
||||
np.load('features/oasis/without_autoreject/shannonEntropy_1_1.npz')['features']
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,467 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
"""feature_extraction_25GB_RAM.ipynb
|
||||
|
||||
Automatically generated by Colaboratory.
|
||||
|
||||
Original file is located at
|
||||
https://colab.research.google.com/drive/1QnVj7GyyJhLPrYF4vBTppMwynXqmOTEJ
|
||||
"""
|
||||
|
||||
# Commented out IPython magic to ensure Python compatibility.
|
||||
|
||||
|
||||
import EEGExtract as eeg
|
||||
from scipy import io,signal
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn import preprocessing
|
||||
import pandas as pd
|
||||
import pickle
|
||||
|
||||
|
||||
class load_data:
|
||||
'''
|
||||
Load the preprocessed data here, store the paramters
|
||||
'''
|
||||
def __init__(self,name):
|
||||
self.name = name #name of dataset
|
||||
self.X = None
|
||||
self.Y = None
|
||||
self.Z = None
|
||||
self.freq = None #(in Hz) is same for all datasets
|
||||
self.channels = None
|
||||
self.ch_type = 'eeg'
|
||||
self.eegData = None
|
||||
self.use_autoreject = 'n'
|
||||
def load_arrays(self):
|
||||
if self.name == 'DREAMER':
|
||||
array = np.load('original_data/DREAMER.npz')
|
||||
self.freq = 128
|
||||
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||
if self.name == 'DEAP':
|
||||
array = np.load('original_data/DEAP.npz')
|
||||
self.freq = 128
|
||||
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
|
||||
self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
|
||||
if self.name == 'OASIS':
|
||||
#array = np.load('original_data/OASIS.npz')
|
||||
if self.use_autoreject == 'y':
|
||||
with open('preprocessed_data/oasis/with_autoreject.p','rb') as file:
|
||||
self.X = pickle.load(file)
|
||||
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||
self.freq = 128
|
||||
self.X ,self.Y= merge_dictionary(self.X)
|
||||
(a,b,c) = self.X.shape
|
||||
self.X = np.reshape(self.X,(a,c,b))
|
||||
else:
|
||||
array = np.load('preprocessed_data/oasis/without_autoreject.npz')
|
||||
self.freq = 128
|
||||
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||
self.X = array['X']
|
||||
self.Y = array['Y']
|
||||
(a,b,c) = self.X.shape
|
||||
self.X = np.reshape(self.X,(a,c,b))
|
||||
|
||||
else:
|
||||
self.X = array['X']
|
||||
if self.name == 'DEAP':
|
||||
self.X = self.X[:,:,[1,3,2,4,7,11,13,31,29,25,21,19,20,17]] # To maintain uniformity across all datasets, only 14 electrodes selected
|
||||
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||
if self.name != 'OASIS':
|
||||
self.Y = array['Y']
|
||||
#self.Z = array['Z']
|
||||
self.reshape_data()
|
||||
def reshape_data(self):
|
||||
'''
|
||||
reshapes data in the format EEGExtract module expects i.e channels x timepoints x epochs
|
||||
'''
|
||||
|
||||
(epochs,timepoints,channels) = self.X.shape
|
||||
self.eegData = np.reshape(self.X,(channels,timepoints,epochs))
|
||||
|
||||
class features:
|
||||
############################ Complexity Features #############################
|
||||
#1>
|
||||
@staticmethod
|
||||
def ShannonRes(eegData,**args):
|
||||
#Shannon Entropy
|
||||
ShannonRes = eeg.shannonEntropy(eegData, bin_min=-200, bin_max=200, binWidth=2)
|
||||
return ShannonRes
|
||||
#2>
|
||||
@staticmethod
|
||||
def ShannonRes_sub_band_delta(eegData,fs):
|
||||
# Subband Information Quantity
|
||||
# delta (0.5–4 Hz)
|
||||
eegData_delta = eeg.filt_data(eegData, 0.5, 4, fs)
|
||||
ShannonRes_delta = eeg.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2)
|
||||
return ShannonRes_delta
|
||||
#3>
|
||||
@staticmethod
|
||||
def ShannonRes_sub_band_theta(eegData,fs):
|
||||
# theta (4–8 Hz)
|
||||
eegData_theta = eeg.filt_data(eegData, 4, 8, fs)
|
||||
ShannonRes_theta = eeg.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2)
|
||||
return ShannonRes_theta
|
||||
|
||||
#4>
|
||||
@staticmethod
|
||||
def ShannonRes_sub_band_alpha(eegData,fs):
|
||||
# alpha (8–12 Hz)
|
||||
eegData_alpha = eeg.filt_data(eegData, 8, 12, fs)
|
||||
ShannonRes_alpha = eeg.shannonEntropy(eegData_alpha, bin_min=-200, bin_max=200, binWidth=2)
|
||||
return ShannonRes_alpha
|
||||
|
||||
#5>
|
||||
@staticmethod
|
||||
def ShannonRes_sub_band_beta(eegData,fs):
|
||||
# beta (12–30 Hz)
|
||||
eegData_beta = eeg.filt_data(eegData, 12, 30, fs)
|
||||
ShannonRes_beta = eeg.shannonEntropy(eegData_beta, bin_min=-200, bin_max=200, binWidth=2)
|
||||
return ShannonRes_beta
|
||||
|
||||
#6>
|
||||
@staticmethod
|
||||
def ShannonRes_sub_band_gamma(eegData,fs):
|
||||
# gamma (30–100 Hz)
|
||||
eegData_gamma = eeg.filt_data(eegData, 30, 63, fs)
|
||||
ShannonRes_gamma = eeg.shannonEntropy(eegData_gamma, bin_min=-200, bin_max=200, binWidth=2)
|
||||
return ShannonRes_gamma
|
||||
|
||||
|
||||
#7>
|
||||
@staticmethod
|
||||
def Hojorth_Mobility(eegData,**args):
|
||||
# Hjorth Mobility
|
||||
# Hjorth Complexity
|
||||
HjorthMob, HjorthComp = eeg.hjorthParameters(eegData)
|
||||
return HjorthMob
|
||||
#8>
|
||||
@staticmethod
|
||||
def Hojorth_Complexity(eegData,**args):
|
||||
# Hjorth Mobility
|
||||
# Hjorth Complexity
|
||||
HjorthMob, HjorthComp = eeg.hjorthParameters(eegData)
|
||||
return HjorthComp
|
||||
#9>
|
||||
@staticmethod
|
||||
def False_Nearest_Neighbour(eegData,**args):
|
||||
# False Nearest Neighbor
|
||||
FalseNnRes = eeg.falseNearestNeighbor(eegData)
|
||||
return FalseNnRes
|
||||
##############################################################################
|
||||
|
||||
########################Category Features#####################################
|
||||
#10>
|
||||
@staticmethod
|
||||
def median_frequency(eegData,fs):
|
||||
#fs-sampling frequency
|
||||
# Median Frequency
|
||||
medianFreqRes = eeg.medianFreq(eegData,fs)
|
||||
return medianFreqRes
|
||||
|
||||
#11>
|
||||
@staticmethod
|
||||
def band_power_delta(eegData,fs):
|
||||
#fs - sampling frequency
|
||||
# δ band Power
|
||||
bandPwr_delta = eeg.bandPower(eegData, 0.5, 4, fs)
|
||||
return bandPwr_delta
|
||||
#12>
|
||||
@staticmethod
|
||||
def band_power_theta(eegData,fs):
|
||||
#fs - sampling frequency
|
||||
# θ band Power
|
||||
bandPwr_theta = eeg.bandPower(eegData, 4, 8, fs)
|
||||
return bandPwr_theta
|
||||
|
||||
#13>
|
||||
@staticmethod
|
||||
def band_power_alpha(eegData,fs):
|
||||
#fs - sampling frequency
|
||||
# α band Power
|
||||
bandPwr_alpha = eeg.bandPower(eegData, 8, 12, fs)
|
||||
return bandPwr_alpha
|
||||
|
||||
#14>
|
||||
@staticmethod
|
||||
def band_power_beta(eegData,fs):
|
||||
#fs - sampling frequency
|
||||
# β band Power
|
||||
bandPwr_beta = eeg.bandPower(eegData, 12, 30, fs)
|
||||
return bandPwr_beta
|
||||
|
||||
#15>
|
||||
@staticmethod
|
||||
def band_power_gamma(eegData,fs):
|
||||
#fs - sampling frequency
|
||||
# γ band Power
|
||||
bandPwr_gamma = eeg.bandPower(eegData, 30, 63, fs)
|
||||
return bandPwr_gamma
|
||||
|
||||
#16>
|
||||
@staticmethod
|
||||
def standard_deviation(eegData,**args):
|
||||
# Standard Deviation
|
||||
std_res = eeg.eegStd(eegData)
|
||||
return std_res
|
||||
|
||||
#17>
|
||||
@staticmethod
|
||||
def regularity(eegData,fs):
|
||||
# Regularity (burst-suppression)
|
||||
regularity_res = eeg.eegRegularity(eegData,fs)
|
||||
return regularity_res
|
||||
|
||||
|
||||
#18>
|
||||
@staticmethod
|
||||
def Diffuse_slowing(eegData,**args):
|
||||
# Diffuse Slowing
|
||||
df_res = eeg.diffuseSlowing(eegData)
|
||||
return df_res
|
||||
|
||||
#19>
|
||||
@staticmethod
|
||||
def Spikes(eegData,fs,**args):
|
||||
# Spikes
|
||||
minNumSamples = int(70*fs/1000)
|
||||
spikeNum_res = eeg.spikeNum(eegData,minNumSamples)
|
||||
return spikeNum_res
|
||||
|
||||
#20>
|
||||
@staticmethod
|
||||
def delta_burst_after_spike(eegData,fs):
|
||||
# Delta burst after Spike
|
||||
eegData_delta = eeg.filt_data(eegData, 0.5, 4, fs)
|
||||
deltaBurst_res = eeg.burstAfterSpike(eegData,eegData_delta,minNumSamples=7,stdAway = 3)
|
||||
return deltaBurst_res
|
||||
|
||||
#21>
|
||||
@staticmethod
|
||||
def Sharp_spike(eegData,fs):
|
||||
minNumSamples = int(70*fs/1000)
|
||||
# Sharp spike
|
||||
sharpSpike_res = eeg.shortSpikeNum(eegData,minNumSamples)
|
||||
return sharpSpike_res
|
||||
|
||||
#22>
|
||||
@staticmethod
|
||||
def Number_of_Burst(eegData,fs):
|
||||
# Number of Bursts
|
||||
numBursts_res = eeg.numBursts(eegData,fs)
|
||||
return numBursts_res
|
||||
|
||||
#23>
|
||||
@staticmethod
|
||||
def Burst_length_u_and_sigma_mean(eegData,fs):
|
||||
# Burst length μ and σ
|
||||
burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(eegData,fs)
|
||||
return burstLenMean_res
|
||||
|
||||
#24>
|
||||
@staticmethod
|
||||
def Burst_length_u_and_sigma_std(eegData,fs):
|
||||
burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(eegData,fs)
|
||||
return burstLenStd_res
|
||||
|
||||
|
||||
#25>
|
||||
@staticmethod
|
||||
def no_of_suprression(eegData,fs):
|
||||
# Number of Suppressions
|
||||
numSupps_res = eeg.numSuppressions(eegData,fs)
|
||||
return numSupps_res
|
||||
|
||||
#26>
|
||||
@staticmethod
|
||||
def Suppression_length_u_and_sigma_mean(eegData,fs):
|
||||
# Suppression length μ and σ
|
||||
suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(eegData,fs)
|
||||
return suppLenMean_res
|
||||
|
||||
#27>
|
||||
@staticmethod
|
||||
def Suppression_length_u_and_sigma_std(eegData,fs):
|
||||
# Suppression length μ and σ
|
||||
suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(eegData,fs)
|
||||
return suppLenStd_res
|
||||
|
||||
##############################################################################
|
||||
|
||||
def merge_dictionary(dictionary):
|
||||
'''
|
||||
merge all trial data to form one array
|
||||
'''
|
||||
no_of_trials = len(list(dictionary.keys()))
|
||||
no_of_channels = dictionary[1][0].shape[1]
|
||||
length_of_segment = dictionary[1][0].shape[2]
|
||||
no_of_epochs_per_trial = dictionary[1][0].shape[0]
|
||||
X = np.empty((0,no_of_channels,length_of_segment))
|
||||
Y = np.empty((0,2))
|
||||
for trial,lst in dictionary.items():
|
||||
array = dictionary[trial][0]
|
||||
score = dictionary[trial][3]
|
||||
X = np.append(X,array,axis = 0)
|
||||
for epoch in range(no_of_epochs_per_trial):
|
||||
Y = np.append(Y,np.expand_dims(score,axis =0),axis = 0)
|
||||
|
||||
return X,Y
|
||||
|
||||
def epoch_data(X,Y, window, stride, sfreq):
|
||||
|
||||
(channels,timepoints,trials )= X.shape
|
||||
X = np.reshape(X,(trials,channels,timepoints))
|
||||
segment = int(window*sfreq)
|
||||
step = int(stride*sfreq)
|
||||
epochPerTrial = int((timepoints-segment)/step + 1)
|
||||
count = 0
|
||||
X_new = np.empty((trials*epochPerTrial,channels,segment))
|
||||
Y_new = np.empty((trials*epochPerTrial,2))
|
||||
for trial in range(trials):
|
||||
for epoch in range(epochPerTrial):
|
||||
X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
|
||||
Y_new[count,:] = Y[trial,:2]
|
||||
count+=1
|
||||
(trials,channels,timepoints) = X_new.shape
|
||||
X_new = np.reshape(X_new,(channels,timepoints,trials))
|
||||
|
||||
return X_new,Y_new
|
||||
|
||||
def driver_code():
|
||||
dataset_dictionary = {0:'DEAP',1:'OASIS',2:'DREAMER'}
|
||||
print(dataset_dictionary)
|
||||
print('enter number for loading dataset')
|
||||
mapping = int(input())
|
||||
print('plz wait loading dataset preprocessed arrays')
|
||||
dataset = load_data(dataset_dictionary[mapping])
|
||||
if mapping == 1:
|
||||
print('do you want to use with autoreject data? if yes press y')
|
||||
boolean = input()
|
||||
if boolean == 'y':
|
||||
dataset.use_autoreject = 'y'
|
||||
|
||||
dataset.load_arrays()
|
||||
print('loading complete')
|
||||
print('shape of data we will use to make features:',dataset.eegData.shape)
|
||||
print('do you want to segment the data before calculating feature values? y/n')
|
||||
boolean = input()
|
||||
if boolean == 'y':
|
||||
window = float(input('enter window size'))
|
||||
stride = float(input('enter stride size'))
|
||||
dataset.eegData,dataset.Y = epoch_data(dataset.eegData,dataset.Y,window,stride,dataset.freq)
|
||||
print('new shapes of X and Y:',dataset.eegData.shape,' ',dataset.Y.shape)
|
||||
else:
|
||||
window = 0
|
||||
stride = 0
|
||||
print('features available')
|
||||
featuresDict = {0:'shannonEntropy',
|
||||
1:'ShannonRes_sub_bands_alpha',
|
||||
2:'ShannonRes_sub_bands_beta',
|
||||
3:'ShannonRes_sub_bands_delta',
|
||||
4:'ShannonRes_sub_bands_theta',
|
||||
5:'ShannonRes_sub_bands_gamma',
|
||||
6:'Hjorth_mobilty',
|
||||
7:'Hjorth_complexity',
|
||||
8:'falseNearestNeighbor',
|
||||
9:'medianFreq',
|
||||
10:'bandPwr_alpha',
|
||||
11:'bandPwr_beta',
|
||||
12:'bandPwr_gamma',
|
||||
13:'bandPwr_theta',
|
||||
14:'bandPwr_delta',
|
||||
15:'stdDev',
|
||||
16:'diffuseSlowing',
|
||||
17:'spikeNum',
|
||||
18:'deltaBurstAfterSpike',
|
||||
19:'shortSpikeNum',
|
||||
20:'Sharp spike',
|
||||
21:'numBursts',
|
||||
22:'burstLen_u_and_sigma_mean',
|
||||
23:'burstLen_u_and_sigma_std',
|
||||
24:'numSuppressions',
|
||||
25:'suppressionLen_u_and_sigma_mean',
|
||||
26:'suppressionLen_u_and_sigma_std',
|
||||
}
|
||||
featureMethod={0:features.ShannonRes,
|
||||
1:features.ShannonRes_sub_band_alpha,
|
||||
2:features.ShannonRes_sub_band_beta,
|
||||
3:features.ShannonRes_sub_band_delta,
|
||||
4:features.ShannonRes_sub_band_theta,
|
||||
5:features.ShannonRes_sub_band_gamma,
|
||||
6:features.Hojorth_Mobility,
|
||||
7:features.Hojorth_Complexity,
|
||||
8:features.False_Nearest_Neighbour,
|
||||
9:features.median_frequency,
|
||||
10:features.band_power_alpha,
|
||||
11:features.band_power_beta,
|
||||
12:features.band_power_gamma,
|
||||
13:features.band_power_theta,
|
||||
14:features.band_power_delta,
|
||||
15:features.standard_deviation,
|
||||
16:features.regularity,
|
||||
17:features.Diffuse_slowing,
|
||||
18:features.Spikes,
|
||||
19:features.delta_burst_after_spike,
|
||||
20:features.Sharp_spike,
|
||||
21:features.Number_of_Burst,
|
||||
22:features.Burst_length_u_and_sigma_mean,
|
||||
23:features.Burst_length_u_and_sigma_std,
|
||||
24:features.no_of_suprression,
|
||||
25:features.Suppression_length_u_and_sigma_mean,
|
||||
26:features.Suppression_length_u_and_sigma_std,
|
||||
}
|
||||
|
||||
|
||||
print(featuresDict)
|
||||
|
||||
#define path for saving before hand in np.savez line below
|
||||
path = 'features/'
|
||||
#os.mkdir('features/'+window+'_'+stride)
|
||||
if dataset.name == 'DEAP':
|
||||
path = path +'deap/'
|
||||
elif dataset.name == 'DREAMER':
|
||||
path = path + 'dreamer/'
|
||||
else:
|
||||
if dataset.use_autoreject == 'y':
|
||||
path = path +'oasis/with_autoreject/'
|
||||
else:
|
||||
path = path +'oasis/without_autoreject/'
|
||||
boolean = input('do you want to individually make features? y/n')
|
||||
if boolean =='n':
|
||||
for key in featureMethod.keys():
|
||||
feature_matrix = featureMethod[key](eegData = dataset.eegData,fs=dataset.freq)
|
||||
filename = featuresDict[key]
|
||||
print('saving ---',filename)
|
||||
np.savez(path+filename+'_'+str(int(window))+'_'+str(int(stride)),features = feature_matrix , Y = dataset.Y)
|
||||
else:
|
||||
found_features = False
|
||||
while not found_features:
|
||||
print('enter feature no')
|
||||
key = int(input())
|
||||
feature_matrix = featureMethod[key](eegData = dataset.eegData,fs=dataset.freq)
|
||||
filename = featuresDict[key]
|
||||
print('saving ---',filename)
|
||||
np.savez(path+filename+'_'+str(int(window))+'_'+str(int(stride)),features = feature_matrix , Y = dataset.Y)
|
||||
boolean = input('do you want to find more features? y/n ')
|
||||
if boolean =='n':
|
||||
found_features = True
|
||||
|
||||
|
||||
print('feature extraction done!!!!')
|
||||
|
||||
def __main__():
|
||||
driver_code()
|
||||
|
||||
__main__()
|
||||
|
||||
if __name__ == 'main':
|
||||
driver_code()
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Script to get the feature ranking and electrode ranking through
|
||||
# Method A :- Random Forest Regressor
|
||||
# Method B :- F score based Ranking
|
||||
# Method C :- Random Forest Importances approach
|
||||
# Main function
|
||||
|
||||
from ImportUtils import *
|
||||
from TopNByFSMethods import *
|
||||
from TopNByClassifier import *
|
||||
from args_eeg import args as my_args
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# args object to fetch command line inputs
|
||||
args = my_args()
|
||||
print(args.__dict__)
|
||||
pwd = os.getcwd()
|
||||
|
||||
dataset = args.dataset
|
||||
window = args.window
|
||||
stride = args.stride
|
||||
sfreq = args.sfreq
|
||||
model = args.model
|
||||
label = args.label
|
||||
approach = args.approach #byclassifier or byfs
|
||||
ml_algo = args.ml_algo #classification or regression
|
||||
top = args.top #e or f or ef
|
||||
fs_method = args.fs_method
|
||||
|
||||
#feature extraction
|
||||
getEpochedFeatures(dataset, window, stride, sfreq, label)
|
||||
if(top == "e"):
|
||||
clf = RandomForestRegressor()
|
||||
topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
|
||||
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
||||
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
|
||||
plt.legend(["Method A","Method B", "Method C"])
|
||||
|
||||
if(label == 1):
|
||||
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
else:
|
||||
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
elif(top == "f"):
|
||||
clf = RandomForestRegressor()
|
||||
topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
|
||||
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
||||
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
|
||||
if(label == 1):
|
||||
plt.legend(["Method A","Method B", "Method C"])
|
||||
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
else:
|
||||
plt.legend(["Method A","Method B", "Method C"])
|
||||
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
@@ -0,0 +1,264 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import sys
|
||||
from sklearn.preprocessing import MinMaxScaler,StandardScaler
|
||||
from sklearn.utils import shuffle
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
scaler_min_max = MinMaxScaler()
|
||||
scaler_standard = StandardScaler()
|
||||
|
||||
# Either one of the MinMaxScaling or StandardScaling function can be used
|
||||
|
||||
def MinMaxScaling(feature_matrix):
|
||||
global scaler_min_max
|
||||
scaler_min_max.fit(feature_matrix)
|
||||
return scaler_min_max.transform(feature_matrix)
|
||||
|
||||
def StandardScaling(feature_matrix):
|
||||
global scaler_standard
|
||||
scaler_standard.fit(feature_matrix)
|
||||
print('scaling shape',scaler_standard.mean_.shape)
|
||||
return scaler.transform(feature_matrix)
|
||||
|
||||
architecture = 'sklearn'
|
||||
if architecture == 'sklearn':
|
||||
from sklearn.svm import SVR
|
||||
from sklearn.metrics import accuracy_score
|
||||
else:
|
||||
from cuml.svm import SVR
|
||||
from cuml.metrics import accuracy_score
|
||||
|
||||
|
||||
#
|
||||
# """##DEAP dataset
|
||||
# 1> Valence - features selected
|
||||
# >
|
||||
# * bandPwr_gamma
|
||||
# * bandPwr_beta
|
||||
# * ShannonRes_gamma
|
||||
# * ShannonRes_beta
|
||||
# * rasm_gamma
|
||||
# * dasm_gamma
|
||||
#
|
||||
# 2> Arousal - feature selected
|
||||
# >
|
||||
# * HjorthMob
|
||||
# * HjorthComp
|
||||
# * stdDev
|
||||
# * bandPwr_theta
|
||||
# * bandPwr_beta
|
||||
# * ShannonRes_beta
|
||||
# * ShannonRes_gamma
|
||||
# * dasm_beta
|
||||
# """
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# now for incremental learning we need to segregate data of subjects
|
||||
def segregate_data_of_subjects(feature_matrix,total_subjects,sfreq = 128):
|
||||
'''
|
||||
reuturs a dictionary which contains the samples data only corresponding to particular subjects of feature matrix
|
||||
'''
|
||||
# parameters :-
|
||||
# feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation
|
||||
# total_subjects :- Total number of subjects in the study
|
||||
# sfreq :- sampling frequency of the EEG data
|
||||
# returns :-
|
||||
# subject_indexes :- Subject wise features in a dictionary form
|
||||
|
||||
total_samples = feature_matrix.shape[0]
|
||||
subject_indexes = {}
|
||||
samples_per_subject = total_samples//total_subjects
|
||||
for i in range(total_subjects):
|
||||
subject_name = 'subject_' + str(i+1)
|
||||
subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:]
|
||||
|
||||
return subject_indexes
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# now defining a function which carries out the incremenatal learning algo
|
||||
def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject):
|
||||
# parameters :-
|
||||
# model :- The training model to be used (SVR in this case)
|
||||
# featrue_matrix :- feature matrix obtained in the above function
|
||||
# Y :- The Valence and Arousal values as entered by the subjects
|
||||
# subject_indexes :-Subject wise features in a dictionary form
|
||||
# number_of_subjects :- Total number of subjects in the study
|
||||
# total_subjects :- Total number of subjects in the study
|
||||
# rmse_score :- RMSE of the previous iterations
|
||||
# test_subject :- Cross validation test subject list
|
||||
|
||||
# returns :-
|
||||
# rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration
|
||||
# test_subject :- Updated Cross validation test subject list
|
||||
no_of_features = feature_matrix.shape[1]
|
||||
X = np.empty((0,no_of_features))
|
||||
print('training on subject_no:',end = ' ')
|
||||
|
||||
#create a feature matrix containing data upto subjects given by the number number_of_subjects
|
||||
#for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model
|
||||
|
||||
for subject in range(number_of_subjects):
|
||||
print(subject+1,end = ' ')
|
||||
subject_name = 'subject_'+str(subject+1)
|
||||
subject_data = subject_indexes[subject_name]
|
||||
X = np.append(X,subject_data,axis=0)
|
||||
print(' ')
|
||||
|
||||
#apply a MinMax scaling to the current iteration feature matrix
|
||||
X = MinMaxScaling(X)
|
||||
|
||||
#now we also need to extract the valence arousal data for the corresponding subject
|
||||
y = np.empty((0))
|
||||
total_samples = feature_matrix.shape[0]
|
||||
samples_per_subject = total_samples//total_subjects
|
||||
for subject in range(number_of_subjects):
|
||||
y = Y[:samples_per_subject*(number_of_subjects)]
|
||||
|
||||
print('shape of X is :',X.shape)
|
||||
print('shape of y is :',y.shape)
|
||||
|
||||
#shuffling data randomly to feed to model
|
||||
X,y = shuffle(X,y,random_state = 0)
|
||||
|
||||
#doing a train test split of 80:20
|
||||
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2)
|
||||
|
||||
#training_model
|
||||
model = model.fit(X_train,y_train)
|
||||
|
||||
#testing_model
|
||||
y_predict = model.predict(X_test)
|
||||
|
||||
|
||||
#calculating rmse values for valence and arousal using model fitted for current iteration
|
||||
y_rms = np.sqrt(mean_squared_error(y_test,y_predict))
|
||||
print('rms on y :',y_rms)
|
||||
print('')
|
||||
rmse_score.append(y_rms)
|
||||
test_subject.append(subject_name)
|
||||
|
||||
return rmse_score,test_subject
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def driver_code(save):
|
||||
|
||||
# Function to load the features, then train the regressor and will give the validation and test plot
|
||||
|
||||
#extracting file data corresponding to valence features
|
||||
bandPwr_gamma_v = np.load('features/deap/bandPwr_gamma_1_1.npz')
|
||||
bandPwr_beta_v = np.load('features/deap/bandPwr_beta_1_1.npz')
|
||||
ShannonRes_gamma_v = np.load('features/deap/ShannonRes_sub_bands_gamma_1_1.npz')
|
||||
ShannonRes_beta_v = np.load('features/deap/ShanninRes_sub_bands_beta_1_1.npz')
|
||||
rasm_gamma_v = np.load('features/deap_RASM_DASM/gamma_1_1.npz')#shape of feature is 80640 x 14, be careful to extract only rasm features, i.e first 7 columns
|
||||
dasm_gamma_v = np.load('features/deap_RASM_DASM/gamma_1_1.npz')
|
||||
|
||||
#creating a feature matrix for valence
|
||||
feature_matrix_valence = np.empty((0,80640))
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_gamma_v['features'],axis = 0)
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_beta_v['features'],axis = 0)
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_gamma_v['features'],axis = 0)
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_beta_v['features'],axis = 0)
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,rasm_gamma_v['features'].T[:7,:],axis = 0)
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,dasm_gamma_v['features'].T[7:,:],axis = 0)
|
||||
feature_matrix_valence = feature_matrix_valence.T#feature matrix is of shape 80640 x 70
|
||||
|
||||
#extracting labels
|
||||
Y_val = bandPwr_gamma_v['Y'][:,0]
|
||||
|
||||
#extracting file data corresponding to arousal features
|
||||
HjorthMob_a = np.load('features/deap/Hjorth_mobilty_1_1.npz')
|
||||
HjorthComp_a = np.load('features/deap/Hjorth_complexity_1_1.npz')
|
||||
stdDev_a = np.load('features/deap/stdDev_1_1.npz')
|
||||
bandPwr_beta_a = np.load('features/deap/bandPwr_beta_1_1.npz')
|
||||
bandPwr_theta_a = np.load('features/deap/bandPwr_theta_1_1.npz')
|
||||
ShannonRes_beta_a = np.load('features/deap/ShanninRes_sub_bands_beta_1_1.npz')
|
||||
ShannonRes_gamma_a = np.load('features/deap/ShannonRes_sub_bands_gamma_1_1.npz')
|
||||
dasm_beta_a = np.load('features/deap_RASM_DASM/beta_1_1.npz')
|
||||
|
||||
#creating feature matrix for arousal
|
||||
feature_matrix_arousal = np.empty((0,80640))
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis = 0)
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthComp_a['features'],axis = 0)
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,stdDev_a['features'],axis = 0)
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_beta_a['features'],axis = 0)
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_theta_a['features'],axis = 0)
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_beta_a['features'],axis = 0)
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_gamma_a['features'],axis = 0)
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,dasm_beta_a['features'].T[7:,:],axis = 0)
|
||||
feature_matrix_arousal = feature_matrix_arousal.T#shape of feature matrix is 80640 x 105
|
||||
|
||||
#extracting labels
|
||||
Y_aro = HjorthMob_a['Y'][:,1]
|
||||
|
||||
model = SVR()#initializing support vector regressor for training
|
||||
|
||||
#running incremental learning loop for valence
|
||||
print('')
|
||||
print('Incremental training for valence')
|
||||
print('')
|
||||
test_subject = []
|
||||
rmse_val = []
|
||||
subject_indexes_valence = segregate_data_of_subjects(feature_matrix_valence,32,128)
|
||||
i = 1
|
||||
while i <= 32:
|
||||
rmse_val,test_subject= training_phase(model,feature_matrix_valence,Y_val,subject_indexes_valence,i,32,rmse_val,test_subject)
|
||||
i+=1
|
||||
|
||||
#running incremental learning loop for arousal
|
||||
print('')
|
||||
print('Incremental training for arousal ')
|
||||
print(' ')
|
||||
|
||||
model = SVR()#reinitialize model
|
||||
test_subject = []
|
||||
rmse_aro = []
|
||||
subject_indexes_arousal = segregate_data_of_subjects(feature_matrix_arousal,32,128)
|
||||
i=1
|
||||
while i<=32:
|
||||
rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,Y_aro,subject_indexes_arousal,i,32,rmse_aro,test_subject)
|
||||
i+=1
|
||||
|
||||
|
||||
fig,axe = plt.subplots(1,1,figsize = (40,20))
|
||||
axe.plot(test_subject,rmse_val,color='r',label='rmse valence')
|
||||
axe.plot(test_subject,rmse_aro,color = 'g',label='rmse arousal')
|
||||
axe.set_xlabel('trained upto subject')
|
||||
axe.set_ylabel('rmse')
|
||||
axe.set_title('support vector regressor')
|
||||
axe.legend(loc = 'upper right')
|
||||
|
||||
df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms'])
|
||||
print(df)
|
||||
|
||||
if save == 'y':
|
||||
plt.savefig('plots/deap/all_feature_valence_arousal_rmse',format = "svg")
|
||||
df.to_csv('plots/deap/all_features_valence_arousal_rmse.csv')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
driver_code(sys.argv[1])
|
||||
|
||||
@@ -0,0 +1,259 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import sys
|
||||
from sklearn.preprocessing import MinMaxScaler,StandardScaler
|
||||
from sklearn.utils import shuffle
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
architecture = 'sklearn'
|
||||
if architecture == 'sklearn':
|
||||
from sklearn.svm import SVR
|
||||
from sklearn.metrics import accuracy_score
|
||||
else:
|
||||
from cuml.svm import SVR
|
||||
from cuml.ensemble import RandomForestRegressor
|
||||
from cuml.metrics import accuracy_score
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Either one of the MinMaxScaling or StandardScaling function can be used
|
||||
|
||||
scaler_min_max = MinMaxScaler()
|
||||
scaler_standard = StandardScaler()
|
||||
def MinMaxScaling(feature_matrix):
|
||||
global scaler_min_max
|
||||
scaler_min_max.fit(feature_matrix)
|
||||
return scaler_min_max.transform(feature_matrix)
|
||||
def StandardScaling(feature_matrix):
|
||||
global scaler_standard
|
||||
scaler_standard.fit(feature_matrix)
|
||||
print('scaling shape',scaler_standard.mean_.shape)
|
||||
return scaler.transform(feature_matrix)
|
||||
|
||||
|
||||
# """##DREAMER dataset
|
||||
# 1> Valence - features selected
|
||||
# >
|
||||
# * HjorthMob
|
||||
# * HjorthCom
|
||||
# * stdDev
|
||||
# * bandPwr_theta
|
||||
# * ShannonRes_gamma
|
||||
# * bandPwr_beta
|
||||
#
|
||||
# 2> Arousal - feature selected
|
||||
# >
|
||||
# * HjorthMob
|
||||
# * HjorthComp
|
||||
# * stdDev
|
||||
# * bandPwr_theta
|
||||
# * bandPwr_gamma
|
||||
# * ShannonRes_gamma
|
||||
# """
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# now for incremental learning we need to segregate data of subjects
|
||||
def segregate_data_of_subjects(feature_matrix,total_subjects,sfreq = 128):
|
||||
'''
|
||||
reuturs a dictionary which contains the samples data only corresponding to particular subjects of feature matrix
|
||||
'''
|
||||
# parameters :-
|
||||
# feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation
|
||||
# total_subjects :- Total number of subjects in the study
|
||||
# sfreq :- sampling frequency of the EEG data
|
||||
# returns :-
|
||||
# subject_indexes :- Subject wise features in a dictionary form
|
||||
|
||||
total_samples = feature_matrix.shape[0]
|
||||
subject_indexes = {}
|
||||
samples_per_subject = total_samples//total_subjects
|
||||
for i in range(total_subjects):
|
||||
subject_name = 'subject_' + str(i+1)
|
||||
subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:]
|
||||
|
||||
return subject_indexes
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# now defining a function which carries out the incremenatal learning algo
|
||||
def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject):
|
||||
# parameters :-
|
||||
# model :- The training model to be used (SVR in this case)
|
||||
# featrue_matrix :- feature matrix obtained in the above function
|
||||
# Y :- The Valence and Arousal values as entered by the subjects
|
||||
# subject_indexes :-Subject wise features in a dictionary form
|
||||
# number_of_subjects :- Total number of subjects in the study
|
||||
# total_subjects :- Total number of subjects in the study
|
||||
# rmse_score :- RMSE of the previous iterations
|
||||
# test_subject :- Cross validation test subject list
|
||||
|
||||
# returns :-
|
||||
# rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration
|
||||
# test_subject :- Updated Cross validation test subject list
|
||||
no_of_features = feature_matrix.shape[1]
|
||||
X = np.empty((0,no_of_features))
|
||||
print('training on subject_no:',end = ' ')
|
||||
|
||||
#create a feature matrix containing data upto subjects given by the number number_of_subjects
|
||||
#for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model
|
||||
|
||||
for subject in range(number_of_subjects):
|
||||
print(subject+1,end = ' ')
|
||||
subject_name = 'subject_'+str(subject+1)
|
||||
subject_data = subject_indexes[subject_name]
|
||||
X = np.append(X,subject_data,axis=0)
|
||||
print(' ')
|
||||
|
||||
#apply a MinMax scaling to the current iteration feature matrix
|
||||
X = MinMaxScaling(X)
|
||||
|
||||
#now we also need to extract the valence arousal data for the corresponding subject
|
||||
y = np.empty((0))
|
||||
total_samples = feature_matrix.shape[0]
|
||||
samples_per_subject = total_samples//total_subjects
|
||||
for subject in range(number_of_subjects):
|
||||
y = Y[:samples_per_subject*(number_of_subjects)]
|
||||
|
||||
|
||||
print('shape of X is :',X.shape)
|
||||
print('shape of y is :',y.shape)
|
||||
|
||||
#shuffling data randomly to feed to model
|
||||
X,y = shuffle(X,y,random_state = 0)
|
||||
|
||||
#doing a train test split of 80:20
|
||||
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2)
|
||||
|
||||
#training_model
|
||||
model = model.fit(X_train,y_train)
|
||||
|
||||
#testing_model
|
||||
y_predict = model.predict(X_test)
|
||||
|
||||
|
||||
#calculating rmse values for valence and arousal using model fitted for current iteration
|
||||
y_rms = np.sqrt(mean_squared_error(y_test,y_predict))
|
||||
print('rms on y :',y_rms)
|
||||
print('')
|
||||
rmse_score.append(y_rms)
|
||||
test_subject.append(subject_name)
|
||||
|
||||
return rmse_score,test_subject
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def driver_code(save):
|
||||
|
||||
# Function to load the features, then train the regressor and will give the validation and test plot
|
||||
|
||||
|
||||
#extracting file data corresponding to valence features
|
||||
HjorthMob_v = np.load('features/dreamer/Hjorth_mobilty_1_1.npz')
|
||||
HjorthCom_v = np.load('features/dreamer/Hjorth_complexity_1_1.npz')
|
||||
stdDev_v = np.load('features/dreamer/stdDev_1_1.npz')
|
||||
bandPwr_theta_v = np.load('features/dreamer/bandPwr_theta_1_1.npz')
|
||||
bandPwr_beta_v = np.load('features/dreamer/bandPwr_beta_1_1.npz')
|
||||
ShannonRes_gamma_v = np.load('features/dreamer/ShannonRes_sub_bands_gamma_1_1.npz')
|
||||
|
||||
|
||||
# creating a feature matrix out of all feature data for valence
|
||||
feature_matrix_valence = np.empty((0,188370))
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,HjorthMob_v['features'],axis =0)
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,HjorthCom_v['features'],axis =0)
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,stdDev_v['features'],axis =0)
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_theta_v['features'],axis =0)
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_beta_v['features'],axis =0)
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_gamma_v['features'],axis =0)
|
||||
feature_matrix_valence = feature_matrix_valence.T # feature matrix becomes of shape 188370 x 84 i.e (samples X features per sample)
|
||||
|
||||
# extracting valence values for each sample
|
||||
Y_val = HjorthMob_v['Y'][:,0]#all features have same valnece labels
|
||||
|
||||
|
||||
|
||||
#extracting file data corresponding to arousal features
|
||||
HjorthMob_a = np.load('features/dreamer/Hjorth_mobilty_1_1.npz')
|
||||
HjorthCom_a = np.load('features/dreamer/Hjorth_complexity_1_1.npz')
|
||||
stdDev_a = np.load('features/dreamer/stdDev_1_1.npz')
|
||||
bandPwr_theta_a = np.load('features/dreamer/bandPwr_theta_1_1.npz')
|
||||
bandPwr_gamma_a = np.load('features/dreamer/bandPwr_gamma_1_1.npz')
|
||||
ShannonRes_gamma_a = np.load('features/dreamer/ShannonRes_sub_bands_gamma_1_1.npz')
|
||||
|
||||
#creating feature matrix for all feature data for arousal
|
||||
feature_matrix_arousal = np.empty((0,188370))
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis = 0)
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthCom_a['features'],axis = 0)
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,stdDev_a['features'],axis = 0)
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_theta_a['features'],axis = 0)
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_gamma_a['features'],axis = 0)
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_gamma_a['features'],axis = 0)
|
||||
feature_matrix_arousal = feature_matrix_arousal.T
|
||||
|
||||
#extracting arousal values for
|
||||
Y_aro = HjorthMob_a['Y'][:,1]#all features have same arousal labels
|
||||
|
||||
model =SVR()#initializing support vector regressor for training
|
||||
|
||||
#running incremental learning loop for valence
|
||||
print('')
|
||||
print('Incremental training for valence')
|
||||
print('')
|
||||
test_subject = []
|
||||
rmse_val = []
|
||||
subject_indexes_valence = segregate_data_of_subjects(feature_matrix_valence,23,128)
|
||||
i = 1
|
||||
while i <= 23:
|
||||
rmse_val,test_subject= training_phase(model,feature_matrix_valence,Y_val,subject_indexes_valence,i,23,rmse_val,test_subject)
|
||||
i+=1
|
||||
|
||||
#running incremental learning loop for arousal
|
||||
print('')
|
||||
print('Incremental training for arousal ')
|
||||
print(' ')
|
||||
|
||||
test_subject = []
|
||||
rmse_aro = []
|
||||
subject_indexes_arousal = segregate_data_of_subjects(feature_matrix_arousal,23,128)
|
||||
i=1
|
||||
while i<=23:
|
||||
rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,Y_aro,subject_indexes_arousal,i,23,rmse_aro,test_subject)
|
||||
i+=1
|
||||
|
||||
|
||||
fig,axe = plt.subplots(1,1,figsize = (40,20))
|
||||
axe.plot(test_subject,rmse_val,color='r',label = 'rms valence')
|
||||
axe.plot(test_subject,rmse_aro,color = 'g',label = 'rms arousal')
|
||||
axe.set_xlabel('trained upto subject')
|
||||
axe.set_ylabel('rmse')
|
||||
axe.set_title('support vector regressor')
|
||||
axe.legend(loc='upper right')
|
||||
df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms'])
|
||||
print(df)
|
||||
|
||||
if save == 'y':
|
||||
plt.savefig('plots/dreamer/all_feature_valence_arousal_rmse',format = "svg")
|
||||
df.to_csv('plots/dreamer/all_features_valence_arousal_rmse.csv')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
driver_code(sys.argv[1])
|
||||
|
||||
@@ -0,0 +1,255 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import sys
|
||||
from sklearn.preprocessing import MinMaxScaler,StandardScaler
|
||||
from sklearn.utils import shuffle
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.svm import SVR
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Either one of the MinMaxScaling or StandardScaling function can be used
|
||||
|
||||
scaler_min_max = MinMaxScaler()
|
||||
scaler_standard = StandardScaler()
|
||||
def MinMaxScaling(feature_matrix):
|
||||
global scaler_min_max
|
||||
scaler_min_max.fit(feature_matrix)
|
||||
return scaler_min_max.transform(feature_matrix)
|
||||
|
||||
def StandardScaling(feature_matrix):
|
||||
global scaler_standard
|
||||
scaler_standard.fit(feature_matrix)
|
||||
print('scaling shape',scaler_standard.mean_.shape)
|
||||
return scaler.transform(feature_matrix)
|
||||
|
||||
|
||||
# """##OASIS dataset
|
||||
# 1> Valence - features selected
|
||||
# >
|
||||
# * HjorthMob
|
||||
# * HjorthComp
|
||||
# * stdDev
|
||||
#
|
||||
# 2> Arousal - feature selected
|
||||
# >
|
||||
# * HjorthMob
|
||||
# """
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# now for incremental learning we need to segregate data of subjects
|
||||
def segregate_data_of_subjects(feature_matrix,Y,total_subjects,sfreq = 128):
|
||||
'''
|
||||
returns a dictionary which contains the samples data only corresponding to particular subjects of feature matrix
|
||||
'''
|
||||
# parameters :-
|
||||
# feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation
|
||||
# Y :- The Valence and Arousal values as entered by the subjects
|
||||
# total_subjects :- Total number of subjects in the study
|
||||
# sfreq :- sampling frequency of the EEG data
|
||||
# returns :-
|
||||
# subject_indexes :- Subject wise features in a dictionary form
|
||||
# aligned_y :- the y values corresponding to each subject
|
||||
|
||||
subject_indexes = { 'subject_1':feature_matrix[:200],
|
||||
'subject_2':feature_matrix[200:400],
|
||||
'subject_3':feature_matrix[400:600],
|
||||
'subject_4':feature_matrix[600:795],
|
||||
'subject_5':feature_matrix[795:995],
|
||||
'subject_6':feature_matrix[995:1185],
|
||||
'subject_7':feature_matrix[1185:1375],
|
||||
'subject_8':feature_matrix[1375:1575],
|
||||
'subject_9':feature_matrix[1575:1770],
|
||||
'subject_10':feature_matrix[1770:1965],
|
||||
'subject_11':feature_matrix[1965:2160],
|
||||
'subject_12':feature_matrix[2160:2360],
|
||||
'subject_13':feature_matrix[2360:2550],
|
||||
'subject_14':feature_matrix[2550:2740],
|
||||
'subject_15':feature_matrix[2740:2935]
|
||||
}
|
||||
|
||||
aligned_y = { 'subject_1':Y[:200],
|
||||
'subject_2':Y[200:400],
|
||||
'subject_3':Y[400:600],
|
||||
'subject_4':Y[600:795],
|
||||
'subject_5':Y[795:995],
|
||||
'subject_6':Y[995:1185],
|
||||
'subject_7':Y[1185:1375],
|
||||
'subject_8':Y[1375:1575],
|
||||
'subject_9':Y[1575:1770],
|
||||
'subject_10':Y[1770:1965],
|
||||
'subject_11':Y[1965:2160],
|
||||
'subject_12':Y[2160:2360],
|
||||
'subject_13':Y[2360:2550],
|
||||
'subject_14':Y[2550:2740],
|
||||
'subject_15':Y[2740:2935]
|
||||
}
|
||||
|
||||
|
||||
return subject_indexes,aligned_y
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# now defining a function which carries out the incremenatal learning algo
|
||||
def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject):
|
||||
|
||||
# parameters :-
|
||||
# model :- The training model to be used (SVR in this case)
|
||||
# featrue_matrix :- feature matrix obtained in the above function
|
||||
# Y :- The Valence and Arousal values as entered by the subjects
|
||||
# subject_indexes :-Subject wise features in a dictionary form
|
||||
# number_of_subjects :- Total number of subjects in the study
|
||||
# total_subjects :- Total number of subjects in the study
|
||||
# rmse_score :- RMSE of the previous iterations
|
||||
# test_subject :- Cross validation test subject list
|
||||
|
||||
# returns :-
|
||||
# rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration
|
||||
# test_subject :- Updated Cross validation test subject list
|
||||
|
||||
no_of_features = feature_matrix.shape[1]
|
||||
X = np.empty((0,no_of_features))
|
||||
print('training on subject_no:',end = ' ')
|
||||
|
||||
#create a feature matrix containing data upto subjects given by the number number_of_subjects
|
||||
#for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model
|
||||
|
||||
for subject in range(number_of_subjects):
|
||||
print(subject+1,end = ' ')
|
||||
subject_name = 'subject_'+str(subject+1)
|
||||
subject_data = subject_indexes[subject_name]
|
||||
X = np.append(X,subject_data,axis=0)
|
||||
print(' ')
|
||||
|
||||
#apply a MinMax scaling to the current iteration feature matrix
|
||||
X = MinMaxScaling(X)
|
||||
|
||||
#now we also need to extract the valence/arousal data for the corresponding subject
|
||||
y = np.empty((0))
|
||||
for subject in range(number_of_subjects):
|
||||
subject_name = 'subject_'+str(subject+1)
|
||||
subject_y_data = Y[subject_name]
|
||||
y = np.append(y,subject_y_data,axis=0)
|
||||
|
||||
|
||||
print('shape of X is :',X.shape)
|
||||
print('shape of y is :',y.shape)
|
||||
#shuffling data randomly to feed to model
|
||||
X,y = shuffle(X,y,random_state = 0)
|
||||
|
||||
#doing a train test split of 80:20
|
||||
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2)
|
||||
|
||||
#training_model
|
||||
model = model.fit(X_train,y_train)
|
||||
|
||||
#testing_model
|
||||
y_predict = model.predict(X_test)
|
||||
|
||||
|
||||
#calculating rmse values for valence and arousal using model fitted for current iteration
|
||||
y_rms = np.sqrt(mean_squared_error(y_test,y_predict))
|
||||
print('rms on y :',y_rms)
|
||||
print('')
|
||||
rmse_score.append(y_rms)
|
||||
test_subject.append(subject_name)
|
||||
|
||||
return rmse_score,test_subject
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def driver_code(save):
|
||||
|
||||
#extracting feature data related to valence
|
||||
HjorthMob_v = np.load('features/oasis/with_autoreject/Hjorth_mobilty_0_0.npz')
|
||||
HjorthComp_v = np.load('features/oasis/with_autoreject/Hjorth_complexity_0_0.npz')
|
||||
stdDev_v = np.load('features/oasis/with_autoreject/stdDev_0_0.npz')
|
||||
|
||||
#creating feature matrix
|
||||
feature_matrix_valence = np.empty((0,2935))
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,HjorthMob_v['features'],axis = 0)
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,HjorthComp_v['features'],axis = 0)
|
||||
feature_matrix_valence = np.append(feature_matrix_valence,stdDev_v['features'],axis = 0)
|
||||
feature_matrix_valence = feature_matrix_valence.T #shape of feature matrix is 2935 x 42
|
||||
|
||||
#extracting valence labels
|
||||
Y_val = HjorthMob_v['Y'][:,0]
|
||||
|
||||
#extracting feature data related to arousal
|
||||
HjorthMob_a = np.load('features/oasis/with_autoreject/Hjorth_mobilty_0_0.npz')
|
||||
|
||||
#creating feature matrix for arousal
|
||||
feature_matrix_arousal = np.empty((0,2935))
|
||||
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis=0)
|
||||
feature_matrix_arousal = feature_matrix_arousal.T
|
||||
|
||||
#extracting arousal labels
|
||||
Y_aro = HjorthMob_a['Y'][:,1]
|
||||
|
||||
model = SVR() #initialize model
|
||||
|
||||
#running incremental learning loop for valence
|
||||
print('')
|
||||
print('Incremental training for valence')
|
||||
print('')
|
||||
test_subject = []
|
||||
rmse_val = []
|
||||
subject_indexes_valence,aligned_Y_val = segregate_data_of_subjects(feature_matrix_valence,Y_val,15,128)
|
||||
i = 1
|
||||
while i <= 15:
|
||||
rmse_val,test_subject= training_phase(model,feature_matrix_valence,aligned_Y_val,subject_indexes_valence,i,15,rmse_val,test_subject)
|
||||
i+=1
|
||||
|
||||
#running incremental learning loop for arousal
|
||||
print('')
|
||||
print('Incremental training for arousal ')
|
||||
print(' ')
|
||||
|
||||
model = SVR()#reinitialize model
|
||||
test_subject = []
|
||||
rmse_aro = []
|
||||
subject_indexes_arousal,aligned_Y_aro = segregate_data_of_subjects(feature_matrix_arousal,Y_aro,15,128)
|
||||
i=1
|
||||
while i<=15:
|
||||
rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,aligned_Y_aro,subject_indexes_arousal,i,15,rmse_aro,test_subject)
|
||||
i+=1
|
||||
|
||||
|
||||
fig,axe = plt.subplots(1,1,figsize = (40,20))
|
||||
axe.plot(test_subject,rmse_val,color='r',label = 'rmse valence')
|
||||
axe.plot(test_subject,rmse_aro,color = 'g',label = 'rmse arousal')
|
||||
axe.set_xlabel('trained upto subject')
|
||||
axe.set_ylabel('rmse')
|
||||
axe.set_title('support vector regressor')
|
||||
axe.legend(loc = 'upper right')
|
||||
|
||||
df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms'])
|
||||
|
||||
if save == 'y':
|
||||
plt.savefig('plots/oasis/all_feature_valence_arousal_rmse',format="svg")
|
||||
df.to_csv('plots/oasis/all_features_valence_arousal_rmse.csv')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
driver_code(sys.argv[1])
|
||||
|
||||
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import glob
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
from google.colab import drive
|
||||
drive.mount('/gdrive',force_remount=True)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().run_line_magic('cd', '/gdrive/MyDrive/emotion_recognition_project/')
|
||||
|
||||
|
||||
# Script to obtain the incremental learning graph for the DEAP, DREAMER and OASIS datasets.
|
||||
|
||||
# ##plots for DEAP
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
dataset_deap=glob.glob('plots/deap/*.csv')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
dataset_deap
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
dataset_svr_deap = pd.read_csv(dataset_deap[0]).T
|
||||
dataset_svr_deap.columns = ['valence','arousal']
|
||||
dataset_svr_deap = dataset_svr_deap.drop('Unnamed: 0')
|
||||
dataset_svr_deap= dataset_svr_deap[::1]
|
||||
x_deap = range(1,33,1)
|
||||
dataset_svr_deap
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
fig_deap,axe_deap = plt.subplots(1,1,figsize = (17,10))
|
||||
axe_deap.plot(x_deap,dataset_svr_deap['valence'],color='green',marker = 'x',markersize=10)
|
||||
axe_deap.plot(x_deap,dataset_svr_deap['arousal'],color ='red',marker = 'x',markersize=10)
|
||||
axe_deap.legend(['rfr_valence','rfr_arousal'],)
|
||||
axe_deap.set_xlabel('trained upto subject')
|
||||
axe_deap.set_ylabel('RMSE values')
|
||||
plt.rcParams.update({'font.size':40})
|
||||
plt.tight_layout()
|
||||
plt.xticks(x_deap[::3])
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
fig_deap.savefig('final_plots/deap_rfr__valence_arousal_rms.svg')
|
||||
fig_deap.savefig('final_plots/deap_rfr__valence_arousal_rms.png')
|
||||
|
||||
|
||||
# ##plots for DREAMER
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
dataset_dreamer=glob.glob('plots/dreamer/*.csv')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
dataset_dreamer
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
dataset_svr_dreamer = pd.read_csv(dataset_dreamer[0]).T
|
||||
dataset_svr_dreamer.columns = ['valence','arousal']
|
||||
dataset_svr_dreamer = dataset_svr_dreamer.drop('Unnamed: 0')
|
||||
x_dreamer = range(1,24,1)
|
||||
dataset_svr_dreamer= dataset_svr_dreamer[::1]
|
||||
dataset_svr_dreamer
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
fig_dreamer,axe_dreamer = plt.subplots(1,1,figsize=(17,10))
|
||||
axe_dreamer.plot(x_dreamer,dataset_svr_dreamer['valence'],color='green',marker = 'x',markersize=10)
|
||||
axe_dreamer.plot(x_dreamer,dataset_svr_dreamer['arousal'],color ='red',marker = 'x',markersize=10)
|
||||
axe_dreamer.legend(['rfr_valence','rfr_arousal'],)
|
||||
axe_dreamer.set_xlabel('trained upto subject')
|
||||
axe_dreamer.set_ylabel('RMSE values')
|
||||
plt.rcParams.update({'font.size':40})
|
||||
plt.tight_layout()
|
||||
plt.xticks(x_dreamer[::3])
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
fig_dreamer.savefig('final_plots/dreamer_rfr__valence_arousal_rms.svg')
|
||||
fig_dreamer.savefig('final_plots/dreamer_rfr__valence_arousal_rms.png')
|
||||
|
||||
|
||||
# ##plots for oasis
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
dataset_oasis=glob.glob('plots/oasis/*.csv')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
dataset_oasis
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
dataset_svr_oasis = pd.read_csv(dataset_oasis[0]).T
|
||||
dataset_svr_oasis.columns = ['valence','arousal']
|
||||
dataset_svr_oasis = dataset_svr_oasis.drop('Unnamed: 0')
|
||||
x_oasis = range(1,16,1)
|
||||
dataset_svr_oasis= dataset_svr_oasis[::1]
|
||||
dataset_svr_oasis
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
fig_oasis,axe_oasis = plt.subplots(1,1,figsize=(17,10))
|
||||
axe_oasis.plot(x_oasis,dataset_svr_oasis['valence'],color='green',marker = 'x',markersize=10)
|
||||
axe_oasis.plot(x_oasis,dataset_svr_oasis['arousal'],color ='red',marker = 'x',markersize=10)
|
||||
axe_oasis.set_xlabel('trained upto subject')
|
||||
axe_oasis.set_ylabel('RMSE values')
|
||||
axe_oasis.legend(['rfr_valence','rfr_arousal'],loc = 'lower right')
|
||||
plt.rcParams.update({'font.size':40})
|
||||
plt.xticks(x_oasis[::3])
|
||||
plt.tight_layout()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
fig_oasis.savefig('final_plots/oasis_rfr__valence_arousal_rms.svg')
|
||||
fig_oasis.savefig('final_plots/oasis_rfr__valence_arousal_rms.png')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
f,a = plt.subplots(3,1,figsize = (40,30))
|
||||
a[0].plot(x_deap,dataset_svr_deap['valence'],color='green',marker = 'x',markersize=10)
|
||||
a[0].plot(x_deap,dataset_svr_deap['arousal'],color ='red',marker = 'x',markersize=10)
|
||||
a[0].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],)
|
||||
#a[0].set_xlabel('trained upto subject')
|
||||
a[0].set_ylabel('RMSE values')
|
||||
a[0].set_title('DEAP')
|
||||
a[1].plot(x_dreamer,dataset_svr_dreamer['valence'],color='green',marker = 'x',markersize=10)
|
||||
a[1].plot(x_dreamer,dataset_svr_dreamer['arousal'],color ='red',marker = 'x',markersize=10)
|
||||
#a[1].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],)
|
||||
#a[1].set_xlabel('trained upto subject')
|
||||
a[1].set_ylabel('RMSE values')
|
||||
a[1].set_title('DREAMER')
|
||||
a[2].plot(x_oasis,dataset_svr_oasis['valence'],color='green',marker = 'x',markersize=10)
|
||||
a[2].plot(x_oasis,dataset_svr_oasis['arousal'],color ='red',marker = 'x',markersize=10)
|
||||
a[2].set_xlabel('trained upto subject')
|
||||
a[2].set_ylabel('RMSE values')
|
||||
#a[2].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],loc = 'lower right')
|
||||
a[2].set_title('OASIS')
|
||||
plt.rcParams.update({'font.size':40})
|
||||
plt.tight_layout()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
f.savefig('final_plots/all_plots_incremental learning.svg')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
||||
|
||||
+487
@@ -0,0 +1,487 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
from google.colab import drive
|
||||
drive.mount('/gdrive',force_remount = True)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('pip install mne')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('pip install autoreject')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import numpy as np
|
||||
import mne
|
||||
import autoreject
|
||||
from scipy.stats import pearsonr
|
||||
import pickle
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().run_line_magic('cd', '/gdrive/MyDrive/emotion_recognition_project/')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
class preprocessing:
|
||||
'''
|
||||
Load the data here, store the paramters
|
||||
'''
|
||||
def __init__(self,name):
|
||||
self.name = name #name of dataset
|
||||
self.X = None
|
||||
self.Y = None
|
||||
self.Z = None
|
||||
self.gyroscope = None
|
||||
self.freq = None #(in Hz) is same for all datasets
|
||||
self.channels = None
|
||||
self.ch_type = 'eeg'
|
||||
def load_arrays(self):
|
||||
'''
|
||||
loads arrays in object variables of the form
|
||||
X: trials x channels x timepoints, using reshape method at the end
|
||||
Y: trials x (valence,arousal)
|
||||
Z: trials x participant no
|
||||
'''
|
||||
if self.name == 'DREAMER':
|
||||
array = np.load('original_data/DREAMER.npz')
|
||||
self.freq = 128
|
||||
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
|
||||
if self.name == 'DEAP':
|
||||
array = np.load('original_data/DEAP.npz')
|
||||
self.freq = 128
|
||||
self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
|
||||
if self.name == 'OASIS':
|
||||
array = np.load('original_data/OASIS.npz')
|
||||
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||
self.freq = 128
|
||||
self.X = array['X']
|
||||
if self.name == 'DEAP':
|
||||
self.X = self.X[:,:,:32]
|
||||
self.channels = self.channels[:32]
|
||||
|
||||
if self.name == 'OASIS':
|
||||
self.gyroscope = array['gyroscope']
|
||||
|
||||
self.Y = array['Y']
|
||||
self.Z = array['Z']
|
||||
self.reshape_data()
|
||||
|
||||
def reshape_data(self):
|
||||
'''
|
||||
exchanges last two dimensions of data
|
||||
'''
|
||||
(a,b,c) = self.X.shape
|
||||
self.X = np.reshape(self.X,(a,c,b))
|
||||
|
||||
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
class filters():
|
||||
'''
|
||||
define filters to be used for preprocessing
|
||||
'''
|
||||
@staticmethod
|
||||
def notch_filter(data,sfreq,notch_freq):
|
||||
# parameters :-
|
||||
# data :- EEG data
|
||||
# sfreq :- sampling frequency
|
||||
# notch_freq :- frequency of the notch filter (generally 50Hz due to the AC current frequency)
|
||||
return mne.filter.notch_filter(data,sfreq,np.arange(notch_freq,notch_freq+1,1))
|
||||
|
||||
@staticmethod
|
||||
def butterworth_filter(data,sfreq,lfreq,hfreq):
|
||||
# parameters :-
|
||||
# data :- EEG data
|
||||
# sfreq :- sampling frequency
|
||||
# lfreq :- low pass frequency value
|
||||
#hfreq :- high pass frequency value
|
||||
return mne.filter.filter_data(data = data,sfreq = sfreq,l_freq = lfreq,h_freq = hfreq,method = 'iir',verbose = False)
|
||||
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
class referencing():
|
||||
'''
|
||||
referencing electrodes to some value
|
||||
'''
|
||||
@staticmethod
|
||||
def average(data):
|
||||
'''
|
||||
Computes average voltage of all channels for a particular trial and a particular timepoint, and subtracts average value from all channels
|
||||
'''
|
||||
temp = data
|
||||
avg = np.average(temp,axis=1)
|
||||
avg = np.expand_dims(avg,axis=1)
|
||||
return temp-avg
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
class autoreject_custom:
|
||||
'''
|
||||
Run Auotoreject algorithm here for artifact rejections
|
||||
'''
|
||||
#make epoch object
|
||||
@staticmethod
|
||||
def raw_object_creation(raw_data,channel_name,ch_types,sfreq):
|
||||
'''
|
||||
defining parameters for creation of raw object which will be used for creating an epoch object
|
||||
retutns raw object after setting parameters
|
||||
'''
|
||||
# parameters :-
|
||||
# raw_data :- EEG data
|
||||
# channel_name :- Names of the channels of EEG data used
|
||||
# ch_types :- Whether each channel is EEG/Gyro, etc
|
||||
# sfreq :- sampling frequency
|
||||
montage = mne.channels.make_standard_montage('standard_1020')
|
||||
|
||||
#creating a info object to create epochs later and setting its montage to 12-20 system
|
||||
info = mne.create_info(ch_names=channel_name,sfreq=sfreq,ch_types = ch_types,verbose = False)
|
||||
|
||||
#create raw object directly from array
|
||||
raw_object = mne.io.RawArray(data = raw_data,info = info,verbose = False)
|
||||
|
||||
#setting montage
|
||||
raw_object.set_montage(montage)
|
||||
|
||||
return raw_object
|
||||
|
||||
@staticmethod
|
||||
def epoch_object_creation(raw_object,start=0,duration=1,tmin=0,tmax=0.99):
|
||||
'''
|
||||
making an epoch object which will be used for autoreject algorithm
|
||||
'''
|
||||
#creating fixed length events
|
||||
events = mne.make_fixed_length_events(raw_object,id=1,start=0,duration = duration)
|
||||
#creating an epoch object
|
||||
epoch_object = mne.Epochs(raw_object,events = events,preload=True,baseline = None,reject=None,verbose=False,tmin=0,tmax=0.99)
|
||||
|
||||
return epoch_object
|
||||
|
||||
@staticmethod
|
||||
def autoreject_algo(epoch_object,n_interpolates,consensus_percs):
|
||||
'''
|
||||
cleans the epochs,and returns cleaned epochs,rejecting bad epochs based on optimal parameters calculation
|
||||
n_interpolates are the ρ values that we would like autoreject to try and consensus_percs are the κ values that autoreject will try
|
||||
Epochs with more than κ∗N sensors (N total sensors) bad are dropped
|
||||
'''
|
||||
ar = autoreject.AutoReject(n_interpolates, consensus_percs, random_state=42,verbose = 'tqdm_notebook',cv=4,n_jobs=10)
|
||||
#fitting autoreject model to epoch data
|
||||
ar.fit(epoch_object)
|
||||
epochs_clean = ar.transform(epoch_object)
|
||||
evoked_clean = epochs_clean.average()
|
||||
evoked = epoch_object.average()
|
||||
|
||||
return epochs_clean,ar.get_reject_log(epoch_object)
|
||||
|
||||
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
class source_decomposition():
|
||||
|
||||
@staticmethod
|
||||
def ica(data,channels,ch_type,sfreq):
|
||||
# parameters :-
|
||||
# data :- EEG data
|
||||
# channels :- Names of the channels of EEG data used
|
||||
# ch_types :- Whether each channel is EEG/Gyro, etc
|
||||
# sfreq :- sampling frequency
|
||||
#defining ICA parameters
|
||||
raw = autoreject_custom.raw_object_creation(data,channels,ch_type,sfreq)
|
||||
ica = mne.preprocessing.ICA(method='infomax',n_components=14)
|
||||
ica.fit_params['max_iter'] =300
|
||||
ica.fit(raw,verbose=False)
|
||||
return ica.get_sources(raw).get_data(),ica.mixing_matrix_
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def process_trial(a,acc_x,acc_y,acc_z):
|
||||
'''
|
||||
a are the source signals obtained after decomposition
|
||||
acc_<> are accelerometer readings in respective axis
|
||||
'''
|
||||
# parameters :-
|
||||
# a :- EEG source signal after ICA
|
||||
# acc_x :- accelerometer channel along X axis
|
||||
# acc_y :- accelerometer channel along Y axis
|
||||
# acc_y :- accelerometer channel along Z axis
|
||||
|
||||
#pearson co-eff between each source signal,and accelerometer readings
|
||||
pcoeff_arr = np.zeros((a.shape[0],3))#array will record p_coeff for each source with x,y,z accelermeter readings
|
||||
for i in range(a.shape[0]):
|
||||
source = a[i] #extracting particular source
|
||||
#calculating pearson co-relation coeff between particular source each of accelerometer axis readings
|
||||
r_x,_ = pearsonr(source,acc_x)
|
||||
r_y,_ = pearsonr(source,acc_y)
|
||||
r_z,_ = pearsonr(source,acc_z)
|
||||
pcoeff_arr[i,0] = r_x
|
||||
pcoeff_arr[i,1] = r_y
|
||||
pcoeff_arr[i,2] = r_z
|
||||
#print('############')
|
||||
#calculating mean ,std deviation of pearson co-eff for all sources for each axis i.e X,Y,Z
|
||||
mean = np.mean(pcoeff_arr,axis = 0)
|
||||
std = np.std(pcoeff_arr,axis = 0)
|
||||
error = mean + 2 * std
|
||||
|
||||
#calculating which sources differ have pearson co-eff of atleast one axis greater than 2 standard deviation from mean
|
||||
bad_source_index = []
|
||||
for i in range(pcoeff_arr.shape[0]):
|
||||
if pcoeff_arr[i,0] > error[0] or pcoeff_arr[i,1] > error[1] or pcoeff_arr[i,2] > error[2]:
|
||||
bad_source_index.append(i)
|
||||
|
||||
#correcting bad sources by butterworth filter by high pass 3Hz frequency as motion artifacts are said to exist in low power frequencies
|
||||
for index in bad_source_index:
|
||||
source_to_be_filtered = a[index]
|
||||
a[index] = filters.butterworth_filter(source_to_be_filtered,dataset.freq,3,None)#high pass filter 3Hz
|
||||
|
||||
return a #return corrected source signals
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#loading dataset arrays
|
||||
dataset = preprocessing('OASIS')
|
||||
dataset.load_arrays()
|
||||
dataset.gyroscope.shape
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#referencing electrodes to average value method
|
||||
average_data = referencing.average(dataset.X)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#running butterworth filter (bandpass filter)
|
||||
filtered_data = filters.notch_filter(average_data,dataset.freq,60)#butterworth_filter(average_data,dataset.freq,0.1,40)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
no_of_trials = dataset.X.shape[0]
|
||||
|
||||
(a,b,c) = dataset.gyroscope.shape
|
||||
gyroscope_trials = np.reshape (dataset.gyroscope,(a,c,b))# reshaping trials so they are of the shape trials x channels x timepoints
|
||||
|
||||
#iterating over all trials and correcting trial data for motion artifact
|
||||
for trial_n in range(no_of_trials):
|
||||
print('processing trial no:',trial_n+1)
|
||||
trial_data = filtered_data[trial_n]
|
||||
gyroscope_trial = gyroscope_trials[0,4:,:] #only acclerometer values extracted for a particular trial
|
||||
gyroscope_trial_x = gyroscope_trial[0] # accelerometer x axis reading
|
||||
gyroscope_trial_y = gyroscope_trial[1] # accelerometer y axis reading
|
||||
gyroscope_trial_z = gyroscope_trial[2] # accelerometer z axis reading
|
||||
source_signals,mixing_matrix = source_decomposition.ica(trial_data,dataset.channels,dataset.ch_type,dataset.freq)
|
||||
corrected_sources = process_trial(source_signals,gyroscope_trial_x,gyroscope_trial_y,gyroscope_trial_z)
|
||||
|
||||
#corrected sources are projected back into orignal dimensional space of EEG data using mixing matrix
|
||||
project_back = np.matmul(mixing_matrix,corrected_sources)
|
||||
filtered_data[trial_n] = project_back
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
filtered_data.shape
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
no_of_trials = dataset.X.shape[0]
|
||||
'''
|
||||
dictionary contains information about each trial
|
||||
each trial number i is mapped to a list containing the cleaned epochs given by autoreject,boolena array indicating which epoch was dropped,and
|
||||
a percentage indicating epochs dropped out of total, valence ,arousal rating for trial and image_id
|
||||
'''
|
||||
#running autoreject for each trial data
|
||||
|
||||
'''
|
||||
autoreject divides each trial data into 5 epochs of 1 sec segment i.e 640 timepoints into 128 timepoints per epochs,and runs algo on each
|
||||
epoch,rejecting epochs based on estimated parameters
|
||||
'''
|
||||
clean_epochs ={}
|
||||
for trial in range(no_of_trials):
|
||||
print('trial no',trial)
|
||||
temp = filtered_data[trial]
|
||||
raw_object = autoreject_custom.raw_object_creation(temp,dataset.channels,dataset.ch_type,dataset.freq)
|
||||
print(raw_object.get_data().shape)
|
||||
epoch = autoreject_custom.epoch_object_creation(raw_object)
|
||||
print(epoch.get_data().shape)
|
||||
#print('epochs shape',epoch.get_data().shape)
|
||||
clean_epoch,reject_log = autoreject_custom.autoreject_algo(epoch,n_interpolates = np.array([1, 4, 32]),consensus_percs = np.linspace(0, 1.0, 11))
|
||||
#clean_epochs.append([clean_epoch,reject_log])
|
||||
if clean_epoch.drop_log_stats() == 0:
|
||||
clean_epochs[trial+1] = [clean_epoch.get_data(),reject_log.bad_epochs,clean_epoch.drop_log_stats(),dataset.Y[trial],dataset.Z[trial][1]]
|
||||
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def driver_code():
|
||||
|
||||
#load dataset
|
||||
dataset_dict = {0:'DEAP',1:'OASIS',2:'DREAMER'}
|
||||
print(dataset_dict)
|
||||
print('enter dataset mapping number you want to use')
|
||||
mapping = int(input())
|
||||
dataset = preprocessing(dataset_dict[mapping])
|
||||
dataset.load_arrays()
|
||||
|
||||
#referencing
|
||||
print('next step in preprocessing is referencing')
|
||||
referencing_dict = {1:'average_referencing'}
|
||||
print(referencing_dict)
|
||||
print('enter referencing method')
|
||||
mapping = int(input())
|
||||
if mapping ==1 :
|
||||
averaged_data = referencing.average(dataset.X)
|
||||
print('next step is applying filters')
|
||||
filter_dict = {1:'notch_filter',2:"butter_worth_filter"}
|
||||
|
||||
|
||||
#filtering
|
||||
applyed_filters = False
|
||||
while applyed_filters == False:
|
||||
print(filter_dict)
|
||||
mapping = int(input())
|
||||
print('sampling frequency of dataset is',dataset.freq)
|
||||
if mapping == 1 :
|
||||
print('enter notch frequency')
|
||||
notch_freq = float(input())
|
||||
filtered_data = filters.notch_filter(averaged_data,dataset.freq,notch_freq)
|
||||
|
||||
if mapping == 2:
|
||||
print('enter lower frequency')
|
||||
lfreq = float(input())
|
||||
print('enter higher frequency')
|
||||
hfreq = float(input())
|
||||
filtered_data = filters.butterworth_filter(dataset.X,dataset.freq,lfreq,hfreq)
|
||||
|
||||
print('Do you want to apply filters again?enter y/n')
|
||||
boolean = input()
|
||||
if boolean == 'n':
|
||||
applyed_filters = True
|
||||
|
||||
print('do you want to save the data preprocessed so far?y/n')
|
||||
boolean = input()
|
||||
if boolean == 'y':
|
||||
filename = input('enter filename to save as')
|
||||
np.savez('preprocessed_data/'+dataset.name.lower()+'/'+filename,X = dataset.X,Y = dataset.Y)
|
||||
|
||||
#if motion artifact correction using gyrscopic data if dataset is oasis
|
||||
if dataset.name == 'OASIS':
|
||||
print('do you want to use motion artifact removal using gyroscopic data? y/n')
|
||||
boolean = input()
|
||||
if boolean == 'y':
|
||||
no_of_trials = dataset.X.shape[0]
|
||||
print('shape of gyroscope data before reshaping is:',dataset.gyroscope.shape)
|
||||
(a,b,c) = dataset.gyroscope.shape
|
||||
gyroscope_trials = np.reshape (dataset.gyroscope,(a,c,b))# reshaping trials so they are of the shape trials x channels x timepoints
|
||||
|
||||
#iterating over all trials and correcting trial data for motion artifact
|
||||
for trial_n in range(no_of_trials):
|
||||
print('processing trial no:',trial_n+1)
|
||||
trial_data = filtered_data[trial_n]
|
||||
gyroscope_trial = gyroscope_trials[trial_n,:,:] #only acclerometer values extracted for a particular trial
|
||||
gyroscope_trial_x = gyroscope_trial[0] # accelerometer x axis reading
|
||||
gyroscope_trial_y = gyroscope_trial[1] # accelerometer y axis reading
|
||||
gyroscope_trial_z = gyroscope_trial[2] # accelerometer z axis reading
|
||||
source_signals,mixing_matrix = source_decomposition.ica(trial_data,dataset.channels,dataset.ch_type,dataset.freq)
|
||||
corrected_sources = process_trial(source_signals,gyroscope_trial_x,gyroscope_trial_y,gyroscope_trial_z)
|
||||
|
||||
#corrected sources are projected back into orignal dimensional space of EEG data using mixing matrix
|
||||
project_back = np.matmul(mixing_matrix,corrected_sources)
|
||||
filtered_data[trial_n] = project_back
|
||||
|
||||
print(filtered_data.shape)
|
||||
print('do you want to save the data preprocessed so far?y/n')
|
||||
boolean = input()
|
||||
if boolean == 'y':
|
||||
filename = input('enter filename to save as')
|
||||
np.savez('preprocessed_data/'+dataset.name.lower()+'/'+filename,X = dataset.X,Y = dataset.Y)
|
||||
|
||||
if dataset.name == 'OASIS':
|
||||
print('do you want to use autoreject? y/n')
|
||||
boolean = input()
|
||||
if boolean == 'y':
|
||||
print('do you want to save this autoreject cleaned data? y/n')
|
||||
boolean = input()
|
||||
no_of_trials = dataset.X.shape[0]
|
||||
'''
|
||||
dictionary contains information about each trial
|
||||
each trial number i is mapped to a list containing the cleaned epochs given by autoreject,boolena array indicating which epoch was dropped,and
|
||||
a percentage indicating epochs dropped out of total, valence ,arousal rating for trial and image_id
|
||||
'''
|
||||
#running autoreject for each trial data
|
||||
|
||||
'''
|
||||
autoreject divides each trial data into 5 epochs of 1 sec segment i.e 640 timepoints into 128 timepoints per epochs,and runs algo on each
|
||||
epoch,rejecting epochs based on estimated parameters
|
||||
'''
|
||||
clean_epochs ={}
|
||||
for trial in range(no_of_trials):
|
||||
print('trial no',trial)
|
||||
temp = filtered_data[trial]
|
||||
raw_object = autoreject_custom.raw_object_creation(temp,dataset.channels,dataset.ch_type,dataset.freq)
|
||||
print(raw_object.get_data().shape)
|
||||
epoch = autoreject_custom.epoch_object_creation(raw_object)
|
||||
print(epoch.get_data().shape)
|
||||
#print('epochs shape',epoch.get_data().shape)
|
||||
clean_epoch,reject_log = autoreject_custom.autoreject_algo(epoch,n_interpolates = np.array([1, 4, 32]),consensus_percs = np.linspace(0, 1.0, 11))
|
||||
#clean_epochs.append([clean_epoch,reject_log])
|
||||
if clean_epoch.drop_log_stats() == 0:
|
||||
clean_epochs[trial+1] = [clean_epoch.get_data(),reject_log.bad_epochs,clean_epoch.drop_log_stats(),dataset.Y[trial],dataset.Z[trial][1]]
|
||||
|
||||
if boolean == 'y':
|
||||
with open('preprocessed_data/oasis/with_autoreject.p','wb') as file:
|
||||
pickle.dump(clean_epochs,file,protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
||||
def __main__():
|
||||
driver_code()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
__main__()
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
from google.colab import drive
|
||||
drive.mount('/gdrive',force_remount = True)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().run_line_magic('cd', '../gdrive/MyDrive/emotion_recognition_project/')
|
||||
|
||||
|
||||
# # Incremental Learning for OASIS
|
||||
|
||||
# **Arguments**
|
||||
#
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# save = 'y/n'
|
||||
#
|
||||
# ---
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
# Eg. if you want to run model on OASIS dataset,don't want to save plots, with
|
||||
# command would be
|
||||
#
|
||||
# !python incremental_learning_OASIS.py n
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('python incremental_learning_oasis.py n')
|
||||
|
||||
|
||||
#
|
||||
# # Incremental Learning for DEAP
|
||||
|
||||
# **Arguments**
|
||||
#
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# save = 'y/n'
|
||||
#
|
||||
# ---
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
# Eg. if you want to run model on DEAP dataset,don't want to save plots, with
|
||||
# command would be
|
||||
#
|
||||
# !python incremental_learning_DEAP.py n
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('python incremental_learning_deap.py n ')
|
||||
|
||||
|
||||
#
|
||||
# # Incremental Learning for DREAMER
|
||||
|
||||
# **Arguments**
|
||||
#
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# save = 'y/n'
|
||||
#
|
||||
# ---
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
# Eg. if you want to run model on DEAP dataset,don't want to save plots, with
|
||||
# command would be
|
||||
#
|
||||
# !python incremental_learning_DREAMER.py n
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('python incremental_learning_dreamer.py n ')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
||||
|
||||
+46
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
"""utils.ipynb
|
||||
|
||||
Automatically generated by Colaboratory.
|
||||
|
||||
Original file is located at
|
||||
https://colab.research.google.com/drive/1Z2e7rxy64W9WIIcEfH1vyzfVdMNIK8Om
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
|
||||
def epoch_data(X,Y, window, stride, sfreq):
|
||||
|
||||
# Fucntion to segment the dataset into epochs
|
||||
|
||||
# Parameters :-
|
||||
# X :- The input EEG signal in the format of channels*timepoints*trials
|
||||
# Y :- The values for VALD (depending on the dataset) given by the users
|
||||
# window :- length of the epoch in seconds
|
||||
# stride :- stride of the sliding window in seconds
|
||||
# sfreq :- sampling frequency of the EEG signal
|
||||
|
||||
(channels,timepoints,trials )= X.shape
|
||||
X = np.reshape(X,(trials,channels,timepoints))
|
||||
segment = int(window*sfreq)
|
||||
step = int(stride*sfreq)
|
||||
epochPerTrial = int((timepoints-segment)/step + 1)
|
||||
count = 0
|
||||
X_new = np.empty((trials*epochPerTrial,channels,segment))
|
||||
Y_new = np.empty((trials*epochPerTrial,2))
|
||||
for trial in range(trials):
|
||||
for epoch in range(epochPerTrial):
|
||||
X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
|
||||
Y_new[count,:] = Y[trial,:2]
|
||||
count+=1
|
||||
(trials,channels,timepoints) = X_new.shape
|
||||
X_new = np.reshape(X_new,(channels,timepoints,trials))
|
||||
|
||||
return X_new,Y_new
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário