From d142115fe62b0e06a3a11bd11e46885b5fae9892 Mon Sep 17 00:00:00 2001 From: Rohit Garg Date: Sat, 18 Sep 2021 23:21:23 +0530 Subject: [PATCH] Added Scripts --- 8_5_cross_validate.py | 386 ++++++++++++ EEGExtract.py | 644 ++++++++++++++++++++ EpochedFeatures.py | 459 ++++++++++++++ ImportUtils.py | 172 ++++++ TopNByClassifier.py | 735 +++++++++++++++++++++++ TopNByFSMethods.py | 648 ++++++++++++++++++++ feature_extraction_25GB_RAM_DASM_RASM.py | 319 ++++++++++ feature_extraction_25gb_ram.py | 467 ++++++++++++++ feature_select_main.py | 70 +++ incremental_learning_deap.py | 264 ++++++++ incremental_learning_dreamer.py | 259 ++++++++ incremental_learning_oasis.py | 255 ++++++++ incremetal_learning_final_plots.py | 207 +++++++ preprocess.py | 487 +++++++++++++++ run_scripts_incremental_learning.py | 106 ++++ utils.py | 46 ++ 16 files changed, 5524 insertions(+) create mode 100644 8_5_cross_validate.py create mode 100644 EEGExtract.py create mode 100644 EpochedFeatures.py create mode 100644 ImportUtils.py create mode 100644 TopNByClassifier.py create mode 100644 TopNByFSMethods.py create mode 100644 feature_extraction_25GB_RAM_DASM_RASM.py create mode 100644 feature_extraction_25gb_ram.py create mode 100644 feature_select_main.py create mode 100644 incremental_learning_deap.py create mode 100644 incremental_learning_dreamer.py create mode 100644 incremental_learning_oasis.py create mode 100644 incremetal_learning_final_plots.py create mode 100644 preprocess.py create mode 100644 run_scripts_incremental_learning.py create mode 100644 utils.py diff --git a/8_5_cross_validate.py b/8_5_cross_validate.py new file mode 100644 index 0000000..41d5179 --- /dev/null +++ b/8_5_cross_validate.py @@ -0,0 +1,386 @@ +# -*- coding: utf-8 -*- +"""8.5_cross_validate.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/1qEkrFcZ9lLqd6gNgxX8Y8QoXlOhH3wXC + +#Leave One Subject Out Cross Validation + +* DREAMER => Shape After Loading +X.shape= (414, 58240, 14) Y.shape= (414, 2) Z.shape= (414, 2) + +* DEAP => Shape After Loading +X.shape= (1280, 40, 8064) Y.shape= (1280, 2) Z.shape= (1280, 2) + +* OASIS => Shape After Loading +X.shape= (600, 640, 14) Y.shape= (600, 2) Z.shape= (600, 2) + +* i.e. OASIS and DEAP are of form X = (rec, timepoints,channels) + +* reshaping X to (rec, channels,timepoints) + makes sense now +""" + +!nvidia-smi + +"""#RAPIDS Package Installation""" + +# Install RAPIDS +!git clone https://github.com/rapidsai/rapidsai-csp-utils.git +!bash rapidsai-csp-utils/colab/rapids-colab.sh stable + +import sys, os + +dist_package_index = sys.path.index('/usr/local/lib/python3.7/dist-packages') +sys.path = sys.path[:dist_package_index] + ['/usr/local/lib/python3.7/site-packages'] + sys.path[dist_package_index:] +sys.path +exec(open('rapidsai-csp-utils/colab/update_modules.py').read(), globals()) + +import cuml + +"""-----------------------------------------------------------------------------------------------------------------------------------------------------""" + +from google.colab import drive +drive.mount('/gdrive',force_remount=True) + +# Commented out IPython magic to ensure Python compatibility. +# %cd /gdrive/MyDrive/Project_DEAP/4.1.2021/ + +################################################################################ +import TopNByFSMethods +import TopNByClassifier +import EpochedFeatures +from args_eeg import args as my_args +import ImportUtils + +from ImportUtils import * +from TopNByFSMethods import * +from TopNByClassifier import * +from EpochedFeatures import * +from args_eeg import args as my_args +from ImportUtils import * +from TopNByFSMethods import * +from TopNByClassifier import * +from EpochedFeatures import * + +from sklearn.svm import SVC + + +from DEAP_scripts.ImportUtils import * +from DEAP_scripts.TopNByFSMethods import * +from DEAP_scripts.TopNByClassifier import * +from DEAP_scripts.EpochedFeatures import * +from DEAP_scripts.args_eeg import args as my_args +from sklearn.svm import SVC + +################################################################################ + +mean_rmse = [] +std_rmse = [] + +np.random.seed(42) +def cross_validate(dataset, window, stride, sfreq, label, best_features_list): +# Parameters :- + # dataset :- Name of the Dataset + # window :- Length of the sliding window in seconds + # stride :- Stride of the sliding window in seconds + # sfreq :- sampling frequency of the EEG dataset + # best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets + pwd = os.getcwd() + fs = sfreq + + featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/' + ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features'] + Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y'] + + #load saved epoched features + featuresDict = None + featuresDict = loadFeaturesDict(dataset) + + # pop out not best features + for k in list(featuresDict.keys()): + if k not in best_features_list: + + featuresDict.pop(k) + + featuresList = list(featuresDict.keys()) + print(featuresList) + + #make feature matrix with select best features + featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640] + for key,value in featuresDict.items(): + featureMatrix = np.append(featureMatrix,value,axis=0) + + #remove NaN features + if np.isnan(featureMatrix).any(): + featureMatrix = np.nan_to_num(featureMatrix,nan=0) + + #set datatype of feature matrix + featureMatrix = featureMatrix.astype('float64') + + #transpose feature matrix to prepare X + X = pd.DataFrame(featureMatrix.T) + #replace infinity with NaN value and fill it with zero + X = X.replace([np.inf, -np.inf], np.nan) + X = X.fillna(0) + X = X.astype(np.float32) + + #convert ndarray to dataframe + Y_epoch = pd.DataFrame(Y_epoch) + + print("Number of feature vectors in X = ", X.shape[1]) + print("X.shape = " ,X.shape) + + + #*********************************************************** + + + + #Leave-one-subject-out-CV + #number of folds = numbParticipants + numbParticipants = 0 + numbRecordings = 0 + + if(dataset == 'DEAP'): + numbParticipants = 32 + numbRecordings = 40 + elif(dataset == 'DREAMER'): + # Dreamer dataset has 23 subjects, each subject was shown 18 videos + numbParticipants = 23 + numbRecordings = 18 + elif(dataset == 'OASIS'): + numbParticipants = 15 + numbRecordings = 40 + + + #numbEpochs + numbEpochs = X.shape[0]//(numbParticipants*numbRecordings) + print(X.shape[0]) + print("numbParticipants = ", numbParticipants) + print("numbRecordings = " , numbRecordings) + print("numbEpochs = ", numbEpochs) + pass + + print(type(X)) + print(type(Y_epoch)) + + cv_rmse = [] + + for i in range(numbParticipants): + s = i*numbRecordings*numbEpochs + e = (i+1)*numbRecordings*numbEpochs + + X_test = copy.deepcopy(X.iloc[s:e, :]) + y_test = copy.deepcopy(Y_epoch.iloc[s:e, label]) + + X_train = copy.deepcopy(X.iloc[:s, :]) + X_train = np.append(X_train, X.iloc[e:, :],axis=0) + + y_train = copy.deepcopy(Y_epoch.iloc[:s, label]) + y_train = np.append(y_train, Y_epoch.iloc[e:, label],axis=0) + + print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) + + clf = RandomForestRegressor() + clf.fit(X_train, y_train) + y_predict = clf.predict(X_test) + rmse = mean_squared_error(y_test, y_predict,squared=False) + print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse)) + cv_rmse.append(rmse) + + + + print(cv_rmse) + print("Mean Cross-validation RMSE = ", np.mean(cv_rmse)) + mean_rmse.append(np.mean(cv_rmse)) + print("Standard Deviation of Cross-validated RMSE = ", np.std(cv_rmse)) + std_rmse.append(np.std(cv_rmse)) + + #pickle list + with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'wb') as f: + pickle.dump(cv_rmse, f) + + fig = plt.gcf() + fig.set_size_inches(40, 20) + # X = pd.DataFrame([x for x in range(1,) ]) + plt.rcParams.update({'font.size': 40}) + plt.xlabel('Partipant No.') + plt.ylabel('RMSE') + plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15) + plt.tight_layout() + plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/CV_{}_{}.svg".format(dataset, label), bbox_inches='tight', dpi=500) + plt.show() + plt.clf() + +def main(dataset, window, stride, sfreq, model, label, approach, ml_algo, top, fs_method, best_features_list): + # Parameters :- + # dataset :- Name of the Dataset + # window :- Length of the sliding window in seconds + # stride :- Stride of the sliding window in seconds + # sfreq :- sampling frequency of the EEG dataset + # best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets + + print(locals()) + pwd = os.getcwd() + + + # getEpochedFeatures(dataset, window, stride, sfreq, label) + cross_validate(dataset, window, stride, sfreq, label, best_features_list) + return + if(top == "e"): + clf = RandomForestRegressor() + topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False) + topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest') + topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest') + plt.legend(["Method A","Method B", "Method C"]) + + if(label == 1): + plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight') + # plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight') + plt.show() + plt.clf() + + else: + plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight') + # plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight') + plt.show() + plt.clf() + + elif(top == "f"): + clf = RandomForestRegressor() + topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False) + topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest') + topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest') + if(label == 1): + plt.legend(["Method A","Method B", "Method C"]) + plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight') + plt.show() + plt.clf() + else: + plt.legend(["Method A","Method B", "Method C"]) + plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight') + plt.show() + plt.clf() + + + + +if __name__ == '__main__': + + + #DREAMER + #VALENCE + best_features_list = ['HjorthMob','HjorthComp','stdDev','bandPwr_theta','ShannonRes_gamma','bandPwr_beta'] + main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list) + #AROUSAL + best_features_list = ['HjorthMob','ShannonRes_gamma','HjorthComp','stdDev','bandPwr_gamma', 'bandPwr_theta'] + main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list) + + #DEAP + #VALENCE + best_features_list = ['bandPwr_gamma','ShannonRes_gamma','ShannonRes_beta','rasm_gamma','dasm_gamma','bandPwr_beta'] + main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list) + #AROUSAL + best_features_list = ['HjorthMob','HjorthComp','stdDev','ShannonRes_gamma','bandPwr_beta','bandPwr_theta','ShannonRes_beta','dasm_beta'] + main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list) + + #OASIS + #VALENCE + best_features_list = ['HjorthMob','stdDev','HjorthComp'] + main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list) + #AROUSAL + best_features_list = ['HjorthMob'] + main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list) + + # print(len(best_features_list)) + # main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list) + # --dataset DREAMER --window 1 --stride 1 --sfreq 128 --model rfr --label 0 --approach byfs --ml_algo regression --top f --fs_method SelectKBest + +"""#MINIMUM RMSE DURING CROSS-VALIDATION 6-6-2021""" + +# Commented out IPython magic to ensure Python compatibility. +import matplotlib.pyplot as plt +# %matplotlib inline +import seaborn as sns +import copy +import os +from scipy import io,signal +import numpy as np +import pandas as pd +import pickle +#{Dataset_Name}{0/1}_cv_rmse.pkl :- 0 is for Valence and 1 is for Arousal +pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl'] +dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS'] +label = [0,1,0,1,0,1] +min_cv_rmse = [] + +for i in range(len(pl)): + + cv_rmse = None + with open(pl[i], 'rb') as f: + cv_rmse = pickle.load(f) + + min_cv_rmse.append(min(cv_rmse)) + +print(min_cv_rmse) + +"""feature_select_main.py""" + +!pip install dit +!pip install pyinform + +from ImportUtils import * +from args_eeg import args as my_args + +"""#Plot pickled results""" + +# Commented out IPython magic to ensure Python compatibility. +import matplotlib.pyplot as plt +# %matplotlib inline +import seaborn as sns +import copy +import os +from scipy import io,signal +import numpy as np +import pandas as pd +import pickle + +# with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'rb') as f: +# pickle.dump(cv_rmse, f) + +pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl'] +dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS'] +label = [0,1,0,1,0,1] + +for i in range(len(pl)): + + cv_rmse = None + with open(pl[i], 'rb') as f: + cv_rmse = pickle.load(f) + + fig = plt.gcf() + fig.set_size_inches(40, 20) + # X = pd.DataFrame([x for x in range(1,) ]) + plt.rcParams.update({'font.size': 50}) + plt.xlabel('Partipant No.') + plt.ylabel('RMSE') + plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15) + plt.tight_layout() + plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_stats/CV_{}_{}.svg".format(dataset[i], label[i]), bbox_inches='tight', dpi=500) + plt.show() + plt.clf() + +with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/mean_cv_rmse.pkl', 'wb') as f: + pickle.dump(mean_rmse, f) + +with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/std_cv_rmse.pkl', 'wb') as f: + pickle.dump(std_rmse, f) + +df = pd.DataFrame() +df['Dataset-Label'] = ['DREAMER-V','DREAMER-A','DEAP-V','DEAP-A','OASIS-V','OASIS-A'] +df['Mean RMSE'] = mean_rmse +df['Std Dev RMSE'] = std_rmse +df.to_csv('/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_rmse_stats.csv') + diff --git a/EEGExtract.py b/EEGExtract.py new file mode 100644 index 0000000..00c46f5 --- /dev/null +++ b/EEGExtract.py @@ -0,0 +1,644 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +import bisect +import numpy as np +import pandas as pd +import pywt +from scipy import stats, signal, integrate +from dit.other import tsallis_entropy +import dit +import librosa +import statsmodels.api as sm +import itertools +from pyinform import mutualinfo +from statsmodels import tsa +from sklearn.metrics import mutual_info_score +import numpy as np +from scipy import signal,integrate +from sklearn.metrics.cluster import normalized_mutual_info_score as normed_mutual_info + +################################################ +# Auxiliary Functions +################################################ + +########## +# Filter the eegData, midpass filter +# eegData: 3D np array [chans x ms x epochs] +def filt_data(eegData, lowcut, highcut, fs, order=7): + nyq = 0.5 * fs + low = lowcut / nyq + high = highcut / nyq + b, a = signal.butter(order, [low, high], btype='band') + filt_eegData = signal.lfilter(b, a, eegData, axis = 1) + return filt_eegData + +######### +# remove short bursts / spikes +def fcnRemoveShortEvents(z,n): + for chan in range(z.shape[0]): + # check for too-short suppressions + ct=0 + i0=1 + i1=1 + for i in range(2,len(z[chan,:])): + if z[chan,i]==z[chan,i-1]: + ct=ct+1 + i1=i + else: + if ct np.array(chD[1:]))[0].tolist() for chD in z.tolist()] + + bursts = get_intervals(went_high,went_low) + supressions = get_intervals(went_low,went_high) + + return bursts,supressions + +########## +# Coherence in the Delta Band +def CoherenceDelta(eegData, i, j, fs=100): + nfft=eegData.shape[1] + f, Cxy = signal.coherence(eegData[i,:,:], eegData[j,:,:], fs=fs, nfft=nfft, axis=0)#, window=np.hanning(nfft)) + out = np.mean(Cxy[np.all([f >= 0.5, f<=4], axis=0)], axis=0) + return out + +########## +# correlation across channels +def PhaseLagIndex(eegData, i, j): + hxi = ss.hilbert(eegData[i,:,:]) + hxj = ss.hilbert(eegData[j,:,:]) + # calculating the INSTANTANEOUS PHASE + inst_phasei = np.arctan(np.angle(hxi)) + inst_phasej = np.arctan(np.angle(hxj)) + + out = np.abs(np.mean(np.sign(inst_phasej - inst_phasei), axis=0)) + return out + +########## +# Cross Correlation +def crossCorrelation(eegData, i, j): + out = np.zeros(eegData.shape[2]) + for epoch in range(eegData.shape[2]): + ccor = np.correlate(eegData[i,:,epoch], eegData[j,:,epoch], mode="full") + absccor = np.abs(ccor) + out[epoch] = (np.max(absccor) - np.mean(absccor)) / np.std(absccor) + return out + +########## +# Auxilary Cross-correlation Lag +def corrCorrLagAux(eegData,ii,jj,Fs=100): + out = np.zeros(eegData.shape[2]) + lagCorr = [] + for lag in range(0,eegData.shape[1],int(0.2*Fs)): + tmp = eegData.copy() + tmp[jj,:,:] = np.roll(tmp[jj,:,:], lag, axis=0) + lagCorr.append(CrossCorrelation(tmp, ii, jj, Fs)) + return np.argmax(lagCorr,axis=0) + +################################################ +# bandpower Functions +################################################ + +########## +# compute the bandpower (area under segment (from fband[0] to fband[1] in Hz) +# of curve in freqency domain) of data, at sampling frequency of Fs (100 ussually) +def bandpower(data, fs, fband): + freqs, powers = periodogram(data, fs) + idx_min = np.argmax(freqs > fband[0]) - 1 + idx_max = np.argmax(freqs > fband[1]) - 1 + idx_delta = np.zeros(dtype=bool, shape=freqs.shape) + idx_delta[idx_min:idx_max] = True + bpower = simps(powers[idx_delta], freqs[idx_delta]) + return bpower + +########## +# computes the same thing as vecbandpower but with a loop +def pfvecbandpower(data, fs, fband): + bpowers = np.zeros((data.shape[0], data.shape[2])) + for i in range(data.shape[0]): + freqs, powers = periodogram(data[i, :, :], fs, axis=0) + idx_min = np.argmax(freqs > fband[0]) - 1 + idx_max = np.argmax(freqs > fband[1]) - 1 + idx_delta = np.zeros(dtype=bool, shape=freqs.shape) + idx_delta[idx_min:idx_max] = True + + bpower = simps(powers[idx_delta, :], freqs[idx_delta], axis=0) + bpowers[i, :] = bpower + + return bpowers + +################################################ +# Complexity features +################################################ + +########## +# Extract the Shannon Entropy +# threshold the signal and make it discrete, normalize it and then compute entropy +def shannonEntropy(eegData, bin_min, bin_max, binWidth): + H = np.zeros((eegData.shape[0], eegData.shape[2])) + for chan in range(H.shape[0]): + for epoch in range(H.shape[1]): + counts, binCenters = np.histogram(eegData[chan,:,epoch], bins=np.arange(bin_min+1, bin_max, binWidth)) + nz = counts > 0 + prob = counts[nz] / np.sum(counts[nz]) + H[chan, epoch] = -np.dot(prob, np.log2(prob/binWidth)) + return H + +########## +# Extract the tsalis Entropy +def tsalisEntropy(eegData, bin_min, bin_max, binWidth, orders = [1]): + H = [np.zeros((eegData.shape[0], eegData.shape[2]))]*len(orders) + for chan in range(H[0].shape[0]): + for epoch in range(H[0].shape[1]): + counts, bins = np.histogram(eegData[chan,:,epoch], bins=np.arange(-200+1, 200, 2)) + dist = dit.Distribution([str(bc).zfill(5) for bc in bins[:-1]],counts/sum(counts)) + for ii,order in enumerate(orders): + H[ii][chan,epoch] = tsallis_entropy(dist,order) + return H + +########## +# Cepstrum Coefficients (n=2) +def mfcc(eegData,fs,order=2): + H = np.zeros((eegData.shape[0], eegData.shape[2],order)) + for chan in range(H.shape[0]): + for epoch in range(H.shape[1]): + H[chan, epoch, : ] = librosa.feature.mfcc(np.asfortranarray(eegData[chan,:,epoch]), sr=fs)[0:order].T + return H + +########## +# Lyapunov exponent +def lyapunov(eegData): + return np.mean(np.log(np.abs(np.gradient(eegData,axis=1))),axis=1) + +########## +# Fractal Embedding Dimension +# From pyrem: packadge for sleep scoring from EEG data +# https://github.com/gilestrolab/pyrem/blob/master/src/pyrem/univariate.py +def hFD(a, k_max): #Higuchi FD + L = [] + x = [] + N = len(a) + + for k in range(1,k_max): + Lk = 0 + for m in range(0,k): + #we pregenerate all idxs + idxs = np.arange(1,int(np.floor((N-m)/k)),dtype=np.int32) + Lmk = np.sum(np.abs(a[m+idxs*k] - a[m+k*(idxs-1)])) + Lmk = (Lmk*(N - 1)/(((N - m)/ k)* k)) / k + Lk += Lmk + + L.append(np.log(Lk/(m+1))) + x.append([np.log(1.0/ k), 1]) + + (p, r1, r2, s)=np.linalg.lstsq(x, L) + return p[0] + +########## +# Hjorth Mobility +# Hjorth Complexity +# variance = mean(signal^2) iff mean(signal)=0 +# which it is be because I normalized the signal +# Assuming signals have mean 0 +# Mobility = sqrt( mean(dx^2) / mean(x^2) ) +def hjorthParameters(xV): + dxV = np.diff(xV, axis=1) + ddxV = np.diff(dxV, axis=1) + + mx2 = np.mean(np.square(xV), axis=1) + mdx2 = np.mean(np.square(dxV), axis=1) + mddx2 = np.mean(np.square(ddxV), axis=1) + + mob = mdx2 / mx2 + complexity = np.sqrt((mddx2 / mdx2) / mob) + mobility = np.sqrt(mob) + + # PLEASE NOTE that Mohammad did NOT ACTUALLY use hjorth complexity, + # in the matlab code for hjorth complexity subtraction by mob not division was used + return mobility, complexity + +########## +# false nearest neighbor descriptor +def falseNearestNeighbor(eegData, fast=True): + # Average Mutual Information + # There exist good arguments that if the time delayed mutual + # information exhibits a marked minimum at a certain value of tex2html_wrap_inline6553, + # then this is a good candidate for a reasonable time delay. + npts = 1000 # not sure about this? + maxdims = 50 + max_delay = 2 # max_delay = 200 # TODO: need to use 200, but also need to speed this up + distance_thresh = 0.5 + + out = np.zeros((eegData.shape[0], eegData.shape[2])) + for chan in range(eegData.shape[0]): + for epoch in range(eegData.shape[2]): + if fast: + out[chan, epoch] = 0 + else: + cur_eegData = eegData[chan, :, epoch] + lagidx = 0 # we are looking for the index of the lag that makes the signal maximally uncorrelated to the original + minNMI = 1 # normed_mutual_info is from 1 (perfectly correlated) to 0 (not at all correlated) + for lag in range(1, max_delay): + x = cur_eegData[:-lag] + xlag = cur_eegData[lag:] + convert float data into histogram bins + nbins = int(np.floor(1 + np.log2(len(x)) + 0.5)) + x_discrete = np.histogram(x, bins=nbins)[0] + xlag_discrete = np.histogram(xlag, bins=nbins)[0] + cNMI = normed_mutual_info(x_discrete, xlag_discrete) + if cNMI < minNMI: + minNMI = cNMI + lagidx = lag + # nearest neighbors part + knn = int(max(2, 6*lagidx)) # heuristic (number of nearest neighbors to look up) + m = 1 # lagidx + 1 + + # y is the embedded version of the signal + y = np.zeros((maxdims+1, npts)) + for d in range(maxdims+1): + tmp = cur_eegData[d*m:d*m + npts] + y[d, :tmp.shape[0]] = tmp + + nnd = np.ones((npts, maxdims)) + nnz = np.zeros((npts, maxdims)) + + # see where it tends to settle + for d in range(1, maxdims): + for k in range(0, npts): + # get the distances to all points in the window (distance given embedding dimension) + dists = [] + for nextpt in range(1, knn+1): + if k+nextpt < npts: + dists.append(np.linalg.norm(y[:d, k] - y[:d, k+nextpt])) + if len(dists) > 0: + minIdx = np.argmin(dists) + if dists[minIdx] == 0: + dists[minIdx] = 0.0000001 # essentially 0 just silence the error + nnd[k, d-1] = dists[minIdx] + nnz[k, d-1] = np.abs( y[d+1, k] - y[d+1, minIdx+1+k] ) + # aggregate results + mindim = np.mean(nnz/nnd > distance_thresh, axis=0) < 0.1 + # get the index of the first occurence of the value true + # (a 1 in the binary representation of true and false) + out[chan, epoch] = np.argmax(mindim) + + return out + +########## +# ARMA coefficients +def arma(eegData,order=2): + H = np.zeros((eegData.shape[0], eegData.shape[2],order)) + for chan in range(H.shape[0]): + for epoch in range(H.shape[1]): + arma_mod = sm.tsa.ARMA(eegData[chan,:,epoch], order=(order,order)) + arma_res = arma_mod.fit(trend='nc', disp=-1) + H[chan, epoch, : ] = arma_res.arparams + return H + +################################################ +# Continuity features +################################################ + +########## +# median frequency +def medianFreq(eegData,fs): + H = np.zeros((eegData.shape[0], eegData.shape[2])) + for chan in range(H.shape[0]): + freqs, powers = signal.periodogram(eegData[chan, :, :], fs, axis=0) + H[chan,:] = freqs[np.argsort(powers,axis=0)[len(powers)//2]] + return H + +########## +# calculate band power +def bandPower(eegData, lowcut, highcut, fs): + eegData_band = filt_data(eegData, lowcut, highcut, fs, order=7) + freqs, powers = signal.periodogram(eegData_band, fs, axis=1) + bandPwr = np.mean(powers,axis=1) + return bandPwr + +########## +# numberOfSpikes +def spikeNum(eegData,minNumSamples=7,stdAway = 3): + H = np.zeros((eegData.shape[0], eegData.shape[2])) + for chan in range(H.shape[0]): + for epoch in range(H.shape[1]): + mean = np.mean(eegData[chan, :, epoch]) + std = np.std(eegData[chan,:,epoch],axis=1) + H[chan,epoch] = len(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0]) + return H + +########## +# Standard Deviation +def eegStd(eegData): + std_res = np.std(eegData,axis=1) + return std_res + +########## +# α/δ Ratio +def eegRatio(eegData,fs): + # alpha (8–12 Hz) + eegData_alpha = filt_data(eegData, 8, 12, fs) + # delta (0.5–4 Hz) + eegData_delta = filt_data(eegData, 0.5, 4, fs) + # calculate the power + powers_alpha = bandPower(eegData, 8, 12, fs) + powers_delta = bandPower(eegData, 0.5, 4, fs) + ratio_res = np.sum(powers_alpha,axis=0) / np.sum(powers_delta,axis=0) + return np.expand_dims(x, axis=0) + +########### +# Regularity (burst-suppression) +# Regularity of eeg +# filter with a window of 0.5 seconds to create a nonnegative smooth signal. +# In this technique, we first squared the signal and applied a moving-average +# The window length of the moving average was set at 0.5 seconds. +def eegRegularity(eegData, Fs=100): + in_x = np.square(eegData) # square signal + num_wts = Fs//2 # find the filter length in samples - we want 0.5 seconds. + q = signal.lfilter(np.ones(num_wts) / num_wts, 1, in_x, axis=1) + q = -np.sort(-q, axis=1) # descending sort on smooth signal + N = q.shape[1] + u2 = np.square(np.arange(1, N+1)) + # COMPUTE THE Regularity + # dot each 5min epoch with the quadratic data points and then normalize by the size of the dotted things + reg = np.sqrt( np.einsum('ijk,j->ik', q, u2) / (np.sum(q, axis=1)*(N**2)/3) ) + return reg + +########### +# Voltage < (5μ, 10μ, 20μ) +def eegVoltage(eegData,voltage=20): + eegFilt = eegData.copy() + eegFilt[abs(eegFilt) > voltage] = np.nan + volt_res = np.nanmean(eegFilt,axis=1) + return volt_res + +########## +# Diffuse Slowing +# look for diffuse slowing (bandpower max from frequency domain integral) +# repeated integration of a huge tensor is really expensive +def diffuseSlowing(eegData, Fs=100, fast=True): + maxBP = np.zeros((eegData.shape[0], eegData.shape[2])) + idx = np.zeros((eegData.shape[0], eegData.shape[2])) + if fast: + return idx + for j in range(1, Fs//2): + print("BP", j) + cbp = bandpower(eegData, Fs, [j-1, j]) + biggerCIdx = cbp > maxBP + idx[biggerCIdx] = j + maxBP[biggerCIdx] = cbp[biggerCIdx] + return (idx < 8) + +########## +# Spikes +def spikeNum(eegData,minNumSamples=7,stdAway = 3): + H = np.zeros((eegData.shape[0], eegData.shape[2])) + for chan in range(H.shape[0]): + for epoch in range(H.shape[1]): + mean = np.mean(eegData[chan, :, epoch]) + std = np.std(eegData[chan,:,epoch]) + H[chan,epoch] = len(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0]) + return H + +########## +# Delta Burst after spike +def burstAfterSpike(eegData,eegData_subband,minNumSamples=7,stdAway = 3): + H = np.zeros((eegData.shape[0], eegData.shape[2])) + for chan in range(H.shape[0]): + for epoch in range(H.shape[1]): + preBurst = 0 + postBurst = 0 + mean = np.mean(eegData[chan, :, epoch]) + std = np.std(eegData[chan,:,epoch]) + idxList = signal.find_peaks(abs(eegData[chan,:,epoch]-mean), stdAway*std,epoch,width=minNumSamples)[0] + for idx in idxList: + preBurst += np.mean(eegData_subband[chan,idx-7:idx-1,epoch]) + postBurst += np.mean(eegData_subband[chan,idx+1:idx+7,epoch]) + H[chan,epoch] = postBurst - preBurst + return H + +########## +# Sharp spike +def shortSpikeNum(eegData,minNumSamples=7,stdAway = 3): + H = np.zeros((eegData.shape[0], eegData.shape[2])) + for chan in range(H.shape[0]): + for epoch in range(H.shape[1]): + mean = np.mean(eegData[chan, :, epoch]) + std = np.std(eegData[chan,:,epoch]) + longSpikes = set(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0]) + shortSpikes = set(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=1)[0]) + H[chan,epoch] = len(shortSpikes.difference(longSpikes)) + return H + +########## +# Number of Bursts +def numBursts(eegData,fs): + bursts = [] + supressions = [] + for epoch in range(eegData.shape[2]): + epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49) + bursts.append(epochBurst) + supressions.append(epochSupressions) + # Number of Bursts + numBursts_res = np.zeros((eegData.shape[0], eegData.shape[2])) + for chan in range(numBursts_res.shape[0]): + for epoch in range(numBursts_res.shape[1]): + numBursts_res[chan,epoch] = len(bursts[epoch][chan]) + return numBursts_res + +########## +# Burst length μ and σ +def burstLengthStats(eegData,fs): + bursts = [] + supressions = [] + for epoch in range(eegData.shape[2]): + epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49) + bursts.append(epochBurst) + supressions.append(epochSupressions) + # Number of Bursts + burstMean_res = np.zeros((eegData.shape[0], eegData.shape[2])) + burstStd_res = np.zeros((eegData.shape[0], eegData.shape[2])) + for chan in range(burstMean_res.shape[0]): + for epoch in range(burstMean_res.shape[1]): + burstMean_res[chan,epoch] = np.mean([burst[1]-burst[0] for burst in bursts[epoch][chan]]) + burstStd_res[chan,epoch] = np.std([burst[1]-burst[0] for burst in bursts[epoch][chan]]) + burstMean_res = np.nan_to_num(burstMean_res) + burstStd_res = np.nan_to_num(burstStd_res) + return burstMean_res,burstStd_res + +########## +# Burst band powers (δ, α, θ, β, γ) +def burstBandPowers(eegData, lowcut, highcut, fs, order=7): + band_burst_powers = np.zeros((eegData.shape[0], eegData.shape[2])) + bursts = [] + supressions = [] + for epoch in range(eegData.shape[2]): + epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49) + bursts.append(epochBurst) + supressions.append(epochSupressions) + eegData_band = filt_data(eegData, lowcut, highcut, fs, order=7) + for epoch,epochBursts in enumerate(bursts): + for chan,chanBursts in enumerate(epochBursts): + epochPowers = [] + for burst in chanBursts: + if burst[1] == eegData.shape[1]: + burstData = eegData_band[:,burst[0]:,epoch] + else: + burstData = eegData_band[:,burst[0]:burst[1],epoch] + freqs, powers = signal.periodogram(burstData, fs, axis=1) + epochPowers.append(np.mean(powers,axis=1)) + band_burst_powers[chan,epoch] = np.mean(epochPowers) + return band_burst_powers + +########## +# Number of Suppressions +def numSuppressions(eegData,fs,suppression_threshold=10): + bursts = [] + supressions = [] + for epoch in range(eegData.shape[2]): + epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=suppression_threshold)#,low=30,high=49) + bursts.append(epochBurst) + supressions.append(epochSupressions) + numSupprs_res = np.zeros((eegData.shape[0], eegData.shape[2])) + for chan in range(numSupprs_res.shape[0]): + for epoch in range(numSupprs_res.shape[1]): + numSupprs_res[chan,epoch] = len(supressions[epoch][chan]) + return numSupprs_res + +########## +# Suppression length μ and σ +def suppressionLengthStats(eegData,fs,suppression_threshold=10): + bursts = [] + supressions = [] + for epoch in range(eegData.shape[2]): + epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=suppression_threshold)#,low=30,high=49) + bursts.append(epochBurst) + supressions.append(epochSupressions) + supressionMean_res = np.zeros((eegData.shape[0], eegData.shape[2])) + supressionStd_res = np.zeros((eegData.shape[0], eegData.shape[2])) + for chan in range(supressionMean_res.shape[0]): + for epoch in range(supressionMean_res.shape[1]): + supressionMean_res[chan,epoch] = np.mean([suppr[1]-suppr[0] for suppr in supressions[epoch][chan]]) + supressionStd_res[chan,epoch] = np.std([suppr[1]-suppr[0] for suppr in supressions[epoch][chan]]) + supressionMean_res = np.nan_to_num(supressionMean_res) + supressionStd_res = np.nan_to_num(supressionStd_res) + return supressionMean_res, supressionStd_res + +################################################ +# Connectivity features +################################################ + +########## +# Coherence - δ +def coherence(eegData,fs): + coh_res = [] + for ii, jj in itertools.combinations(range(eegData.shape[0]), 2): + coh_res.append(CoherenceDelta(eegData, ii, jj, fs=fs)) + coh_res = np.array(coh_res) + return coh_res + +########## +# Mutual information +def calculate2Chan_MI(eegData,ii,jj,bin_min=-200, bin_max=200, binWidth=2): + H = np.zeros(eegData.shape[2]) + bins = np.arange(bin_min+1, bin_max, binWidth) + for epoch in range(eegData.shape[2]): + c_xy = np.histogram2d(eegData[ii,:,epoch],eegData[jj,:,epoch],bins)[0] + H[epoch] = mutual_info_score(None, None, contingency=c_xy) + return H + +########## +# Granger causality +def calcGrangerCausality(eegData,ii,jj): + H = np.zeros(eegData.shape[2]) + for epoch in range(eegData.shape[2]): + X = np.vstack([eegData[ii,:,epoch],eegData[jj,:,epoch]]).T + H[epoch] = tsa.stattools.grangercausalitytests(X, 1, addconst=True, verbose=False)[1][0]['ssr_ftest'][0] + return H + +########## +# phase Lag Index +def phaseLagIndex(eegData, i, j): + hxi = ss.hilbert(eegData[i,:,:]) + hxj = ss.hilbert(eegData[j,:,:]) + # calculating the INSTANTANEOUS PHASE + inst_phasei = np.arctan(np.angle(hxi)) + inst_phasej = np.arctan(np.angle(hxj)) + + out = np.abs(np.mean(np.sign(inst_phasej - inst_phasei), axis=0)) + return out + +########## +# Cross-correlation Magnitude +def crossCorrMag(eegData,ii,jj): + crossCorr_res = [] + for ii, jj in itertools.combinations(range(eegData.shape[0]), 2): + crossCorr_res.append(crossCorrelation(eegData, ii, jj)) + crossCorr_res = np.array(crossCorr_res) + return crossCorr_res + +########## +# Cross-correlation Lag +def corrCorrLag(eegData,ii,jj,fs=100): + crossCorrLag_res = [] + for ii, jj in itertools.combinations(range(eegData.shape[0]), 2): + crossCorrLag_res.append(corrCorrLag(eegData, ii, jj, fs)) + crossCorrLag_res = np.array(crossCorrLag_res) + return crossCorrLag_res + diff --git a/EpochedFeatures.py b/EpochedFeatures.py new file mode 100644 index 0000000..4854f5a --- /dev/null +++ b/EpochedFeatures.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +import ImportUtils +import math +import EEGExtract as eeg +from sklearn.model_selection import train_test_split + +import os +import glob +from scipy import io,signal +import numpy as np +import pandas as pd +from sklearn import preprocessing +import pickle +from sklearn.metrics import mean_squared_error +from sklearn.impute import SimpleImputer + +import matplotlib.pyplot as plt +import seaborn as sns +import copy +import os + + +# In[ ]: + + +def select_channels(data,channels): + +# parameters:- + # data - channelwise EEG preprocessed data + # channels - list of required channels + +# returns:- + # ans - the selected channels from the entire dataset + + ans = np.empty((data.shape[0],len(channels),data.shape[2])) + for sub in range(data.shape[0]): + ans[sub,:,:] = np.array([data[sub,x,:] for x in channels]) + return ans + + +# In[ ]: + + +def epoch_data(X, Y, Z, window, stride, sfreq): + +# Function to epoch the data + +# parameters:- + # X - The EEG data input passed as trial*channel*timepoints + # Y - VALD (depending on the dataset) as given by the user + # Z - Participant number and session number + # window - length of required window in seconds + # stride - stride of the required sliding window in seconds + # sfreq - sampling frequency of the obtained EEG data + +# retruns:- + # X_new - Epoched X + # Y_new - Epoched Y (All the segments for a given trial shall have the same value, i.e the one given by the subject) + # Z_new - Epoched Z (All the segments for a given trial shall have the same value, i.e the one of the subject) + + trials,channels,timepoints = X.shape + segment = int(window*sfreq) + step = int(stride*sfreq) + epochPerTrial = int((timepoints-segment)/step + 1) + + X_new = np.empty((trials*epochPerTrial,channels,segment)) + Y_new = np.empty((trials*epochPerTrial,Y.shape[1])) + Z_new = np.empty((trials*epochPerTrial,Z.shape[1])) + + count=0 + for trial in range(trials): + for epoch in range(epochPerTrial): + X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment] + Y_new[count,:] = Y[trial,:] + Z_new[count,:] = Z[trial,:] + count = count+1 + + return X_new, Y_new, Z_new + + +# In[ ]: + + +def save_features(dataset, ans, Y_epoch, sfreq, window, stride): + +# A function to generate the features and save them + +# parameters:- + # dataset - name of the dataset + # ans - epoched segment of X + # Y_epoch - epoched segments of the valence and arousal scores taken from the subject + # window - window length in seconds + # stride - stride length in seconds + # sfreq - sampli5ng frequency of the EEG signal + +# returns:- + # void + + fs = sfreq + + featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/' + + feature_matrix = eeg.shannonEntropy(ans, bin_min=-200, bin_max=200, binWidth=2) + np.savez((featurepath+"shannonEntropy_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.eegStd(ans) + stdshape = feature_matrix.shape + # The channels + emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4'] + left_channels = ['AF3', 'F7','F3', 'FC5', 'T7', 'P7', 'O1'] + right_channels = ['AF4','F8','F4','FC6','T8','P8','O2'] + + dasm_gamma = np.empty((0,stdshape[1])) + rasm_gamma = np.empty((0,stdshape[1])) + for lc,rc in zip(left_channels, right_channels): + lci = emotiv_channels.index(lc) + rci = emotiv_channels.index(rc) + + #left differential entropy + dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0),30,45,fs))))) + #right differential entropy + dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0),30,45,fs))))) + + dasm_gamma = np.append(dasm_gamma, np.subtract(dl,dr), axis=0) + rasm_gamma = np.append(rasm_gamma, np.divide(dl,dr), axis=0) + + np.savez((featurepath+"dasm_gamma_{}_{}.npz").format(window,stride),features = dasm_gamma , Y = Y_epoch) + np.savez((featurepath+"rasm_gamma_{}_{}.npz").format(window,stride),features = rasm_gamma , Y = Y_epoch) + del dasm_gamma, rasm_gamma + + return + ''' + Subband Information Quantity + ''' + + # delta (0.5–4 Hz) + eegData_delta = eeg.filt_data(ans, 0.5, 4, fs) + feature_matrix = eeg.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2) + np.savez((featurepath+"ShannonRes_sub_bands_delta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + eegData_theta = eeg.filt_data(ans, 4, 8, fs) + feature_matrix = eeg.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2) + np.savez((featurepath+"ShannonRes_sub_bands_theta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + eegData_alpha = eeg.filt_data(ans, 8, 12, fs) + feature_matrix = eeg.shannonEntropy(eegData_alpha, bin_min=-200, bin_max=200, binWidth=2) + np.savez((featurepath+"ShannonRes_sub_bands_alpha_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + eegData_beta = eeg.filt_data(ans, 12, 30, fs) + feature_matrix = eeg.shannonEntropy(eegData_beta, bin_min=-200, bin_max=200, binWidth=2) + np.savez((featurepath+"ShannonRes_sub_bands_beta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + eegData_gamma = eeg.filt_data(ans, 30,45, fs) + feature_matrix = eeg.shannonEntropy(eegData_gamma, bin_min=-200, bin_max=200, binWidth=2) + np.savez((featurepath+"ShannonRes_sub_bands_gamma_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + HjorthMob, HjorthComp = eeg.hjorthParameters(ans) + feature_matrix = HjorthComp + np.savez((featurepath+"Hjorth_complexity_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = HjorthMob + np.savez((featurepath+"Hjorth_mobilty_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.falseNearestNeighbor(ans) + np.savez((featurepath+"falseNearestNeighbor_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.medianFreq(ans,fs) + np.savez((featurepath+"medianFreq_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.bandPower(ans, 0.5, 4, fs) + np.savez((featurepath+"bandPwr_delta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.bandPower(ans, 4, 8, fs) + np.savez((featurepath+"bandPwr_theta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.bandPower(ans, 8, 12, fs) + np.savez((featurepath+"bandPwr_alpha_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.bandPower(ans, 12, 30, fs) + np.savez((featurepath+"bandPwr_beta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.bandPower(ans, 30, 45, fs) + np.savez((featurepath+"bandPwr_gamma_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.eegStd(ans) + stdshape = feature_matrix.shape + np.savez((featurepath+"stdDev_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.diffuseSlowing(ans) + np.savez((featurepath+"diffuseSlowing_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + minNumSamples = int(70*fs/1000) + feature_matrix = eeg.spikeNum(ans,minNumSamples) + np.savez((featurepath+"spikeNum_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + + feature_matrix = eeg.burstAfterSpike(ans,eegData_delta,minNumSamples=7,stdAway = 3) + np.savez((featurepath+"deltaBurstAfterSpike_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.shortSpikeNum(ans,minNumSamples) + np.savez((featurepath+"shortSpikeNum_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.numBursts(ans,fs) + np.savez((featurepath+"numBursts_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(ans,fs) + feature_matrix = burstLenMean_res + np.savez((featurepath+"burstLen_u_and_sigma_mean_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = burstLenStd_res + np.savez((featurepath+"burstLen_u_and_sigma_std_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + feature_matrix = eeg.numSuppressions(ans,fs) + np.savez((featurepath+"numSuppressions_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + + suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(ans,fs) + feature_matrix = suppLenMean_res + np.savez((featurepath+"suppressionLen_u_and_sigma_mean_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + del suppLenMean_res + + feature_matrix = suppLenStd_res + np.savez((featurepath+"suppressionLen_u_and_sigma_std_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch) + del suppLenStd_res + + # DASM and RASM Features + # DASM = h(X lefti) − h(Xrighti), and (2) + # RASM = h(Xlefti)/h(Xrighti), + + emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4'] + left_channels = ['AF3', 'F7','F3', 'FC5', 'T7', 'P7', 'O1'] + right_channels = ['AF4','F8','F4','FC6','T8','P8','O2'] + + #[chans x ms x epochs] + dasm_delta = np.empty((0,stdshape[1])) + rasm_delta = np.empty((0,stdshape[1])) + for lc,rc in zip(left_channels, right_channels): + lci = emotiv_channels.index(lc) + rci = emotiv_channels.index(rc) + + #left differential entropy + inputarr = np.expand_dims(ans[lci,:,:], axis=0) + print("inputarr.shape=", inputarr.shape) + temp = eeg.filt_data(inputarr, 0.5, 4, fs) + tempstd = eeg.eegStd(temp) + + + dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 0.5, 4, fs))))) + #right differential entropy + dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 0.5, 4, fs))))) + + print("temp.shape=", temp.shape,"tempstd.shape=", tempstd.shape,"dl.shape= ", dl.shape, "stdshape=", stdshape) + dasm_delta = np.append(dasm_delta, np.subtract(dl,dr), axis=0) + rasm_delta = np.append(rasm_delta, np.divide(dl,dr), axis=0) + + np.savez((featurepath+"dasm_delta_{}_{}.npz").format(window,stride),features = dasm_delta , Y = Y_epoch) + np.savez((featurepath+"rasm_delta_{}_{}.npz").format(window,stride),features = rasm_delta , Y = Y_epoch) + del dasm_delta, rasm_delta + + dasm_theta = np.empty((0,stdshape[1])) + rasm_theta = np.empty((0,stdshape[1])) + for lc,rc in zip(left_channels, right_channels): + lci = emotiv_channels.index(lc) + rci = emotiv_channels.index(rc) + + #left differential entropy + dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 4, 8, fs))))) + #right differential entropy + dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 4, 8, fs))))) + + dasm_theta = np.append(dasm_theta, np.subtract(dl,dr), axis=0) + rasm_theta = np.append(rasm_theta, np.divide(dl,dr), axis=0) + + np.savez((featurepath+"dasm_theta_{}_{}.npz").format(window,stride),features = dasm_theta , Y = Y_epoch) + np.savez((featurepath+"rasm_theta_{}_{}.npz").format(window,stride),features = rasm_theta , Y = Y_epoch) + del dasm_theta, rasm_theta + + dasm_alpha = np.empty((0,stdshape[1])) + rasm_alpha = np.empty((0,stdshape[1])) + for lc,rc in zip(left_channels, right_channels): + lci = emotiv_channels.index(lc) + rci = emotiv_channels.index(rc) + + #left differential entropy + dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 8, 12, fs))))) + #right differential entropy + dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 8, 12, fs))))) + + dasm_alpha = np.append(dasm_alpha, np.subtract(dl,dr), axis=0) + rasm_alpha = np.append(rasm_alpha, np.divide(dl,dr), axis=0) + + np.savez((featurepath+"dasm_alpha_{}_{}.npz").format(window,stride),features = dasm_alpha , Y = Y_epoch) + np.savez((featurepath+"rasm_alpha_{}_{}.npz").format(window,stride),features = rasm_alpha , Y = Y_epoch) + del dasm_alpha, rasm_alpha + + + dasm_beta = np.empty((0,stdshape[1])) + rasm_beta = np.empty((0,stdshape[1])) + for lc,rc in zip(left_channels, right_channels): + lci = emotiv_channels.index(lc) + rci = emotiv_channels.index(rc) + + #left differential entropy + dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 12, 30,fs))))) + #right differential entropy + dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 12, 30,fs))))) + + dasm_beta = np.append(dasm_beta, np.subtract(dl,dr), axis=0) + rasm_beta = np.append(rasm_beta, np.divide(dl,dr), axis=0) + + np.savez((featurepath+"dasm_beta_{}_{}.npz").format(window,stride),features = dasm_beta , Y = Y_epoch) + np.savez((featurepath+"rasm_beta_{}_{}.npz").format(window,stride),features = rasm_beta , Y = Y_epoch) + del dasm_beta, rasm_beta + + + + dasm_gamma = np.empty((0,stdshape[1])) + rasm_gamma = np.empty((0,stdshape[1])) + for lc,rc in zip(left_channels, right_channels): + lci = emotiv_channels.index(lc) + rci = emotiv_channels.index(rc) + + #left differential entropy + dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0),30,45,fs))))) + #right differential entropy + dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0),30,45,fs))))) + + dasm_gamma = np.append(dasm_gamma, np.subtract(dl,dr), axis=0) + rasm_gamma = np.append(rasm_gamma, np.divide(dl,dr), axis=0) + + np.savez((featurepath+"dasm_gamma_{}_{}.npz").format(window,stride),features = dasm_gamma , Y = Y_epoch) + np.savez((featurepath+"rasm_gamma_{}_{}.npz").format(window,stride),features = rasm_gamma , Y = Y_epoch) + del dasm_gamma, rasm_gamma + + +# In[ ]: + + +def getEpochedFeatures(dataset, window, stride, sfreq, label): + +# Function to reshape the arrays before passing on to the save_features function + +# parameters:- + # dataset - name of the dataset + # window - length of window in seconds + # stride - length of stride in seconds + # sfreq - sampling frequency of the EEG signal + # label - valence/arousal (0/1) + +# returns:- + # void + ''' + Returns Accuracy vs Segment size plot for + window - length of window + stride - step + sfreq - sampling freq + label - 0-valence, 1-arousal, 2-dominance, 3-liking + ''' + fs = sfreq + X = None + Y = None + Z = None + pwd = os.getcwd() + with np.load((pwd + '/data_extracted/{}.npz').format(dataset), allow_pickle=True) as data: + X = data['X'] + Y = data['Y'] + Z = data['Z'] + + print("Shape After Loading") + print("X.shape=", X.shape," Y.shape=",Y.shape," Z.shape=", Z.shape) + # return + #########!MODIFY FOR DREAMER AND DEAP DATASET######################################## + #**** + ''' + Reshape Data + ''' + if(dataset != "DEAP"): + temp_arr = np.empty((X.shape[0],X.shape[2],X.shape[1])) + for i in range(temp_arr.shape[0]): + temp_arr[i,:,:] = X[i,:,:].transpose() + X = copy.deepcopy(temp_arr) + del temp_arr + + print("Shape after reshaping") + print("X.shape=", X.shape," Y.shape=",Y.shape," Z.shape=", Z.shape) + ''' + Select Channels(if needed) + ''' + + print("Data Loaded...\n") + ch_names = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature'] + emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4'] + index_arr = [ch_names.index(x) for x in emotiv_channels] + + X_new = None + if(dataset == "DEAP"): + X_new = select_channels(X,index_arr) + else: + X_new = copy.deepcopy(X) + + print("X_new.shape = ", X_new.shape) + + del X + print("Channel selection done ...\n") + ''' + # X = (32*40,40,8064) + # Y = (32*40,4) + # Z = (32*40,2) + + # X : (nbSegments, nbChannel, nbTimepoints) : Data + # Y : (nbSegments, nbEmotions) : Valence and arousal data + # Z : (nbSegments, 2) : Participant number, and session number + ''' + + ''' + DREAMER Dataset + # X = (23*18,7808+54032,14) + # Y = (23*18,2) + # Z = (23*18,2) + ''' + + (X_epoch, Y_epoch, Z_epoch) = epoch_data(X_new, Y, Z,window,stride,sfreq) + del X_new + del Y + del Z + + print("Epoching done ...\n") + print(X_epoch.shape, Y_epoch.shape, Z_epoch.shape) #debug + + # 1280*63,40,128 + # trial, channel, segment + trials, channels, segment = X_epoch.shape + ans = np.empty((channels, segment, trials)) #[chans x ms x epochs] + for i in range(trials): + ans[:,:,i] = X_epoch[i,:,] + del X_epoch + + print("ans.shape = ", ans.shape) + print("Rotation of np.array done ...\n") + pwd = os.getcwd() + filepath = pwd + '/' + dataset + "/data_extracted/epochedData/data" + str(window) + str(stride) + ".npz" + np.savez(filepath,ans,Y_epoch, Z_epoch) + + # featuresDict = getFeaturesDict(ans,sfreq) + save_features(dataset, ans, Y_epoch, sfreq, window, stride) + + # with open(pwd + '/' + dataset + '/data_extracted/featureDicts/'+str(window)+str(stride)+ '.pkl', 'wb') as f: + # pickle.dump(featuresDict, f, pickle.HIGHEST_PROTOCOL) + + print("Feature Extraction done ...\n") + + +if __name__ == '__main__': + pass + diff --git a/ImportUtils.py b/ImportUtils.py new file mode 100644 index 0000000..10c64b7 --- /dev/null +++ b/ImportUtils.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Script to import all the required libraries.
+# It also defines a function to make a dictionary and load the features. +# + +# In[ ]: + + +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import accuracy_score + +from sklearn.feature_selection import chi2 +from sklearn.feature_selection import SelectKBest, f_classif +from sklearn.model_selection import train_test_split +from sklearn import preprocessing +from sklearn.feature_selection import * +from sklearn.model_selection import RandomizedSearchCV +from sklearn.model_selection import GridSearchCV + +import sys +import csv +import os +import math +import glob +from scipy import io,signal +import numpy as np +import pandas as pd + +import pickle +from sklearn.metrics import mean_squared_error +from sklearn.impute import SimpleImputer + + +import matplotlib.pyplot as plt +import seaborn as sns +import copy +from sklearn import feature_selection +import argparse + +import cuml +from cuml.svm import SVR +from cuml.ensemble import RandomForestRegressor +from cuml.svm import SVC +from cuml.ensemble import RandomForestClassifier +from cuml.metrics import accuracy_score + + +# In[ ]: + + +def loadFeaturesDict(dataset): + +# input parameters :- The name of the dataset +# return :- Feature dictionary + + featuresDict = {'shannonEntropy': None, + 'ShannonRes_delta':None, + 'ShannonRes_theta':None, + 'ShannonRes_alpha':None, + 'ShannonRes_beta':None, + 'ShannonRes_gamma':None, + 'HjorthComp':None, + 'HjorthMob':None, + 'falseNearestNeighbor':None, + 'medianFreq':None, + 'bandPwr_delta':None, + 'bandPwr_theta':None, + 'bandPwr_alpha':None, + 'bandPwr_beta':None, + 'bandPwr_gamma':None, + 'stdDev':None, + 'diffuseSlowing':None, + 'spikeNum':None, + 'deltaBurstAfterSpike':None, + 'shortSpikeNum':None, + 'numBursts':None, + 'burstLenMean':None, + 'burstLenStd':None, + 'numSuppressions':None, + 'suppLenMean':None, + 'suppLenStd':None, + 'dasm_delta': None, + 'dasm_theta': None, + 'dasm_alpha': None, + 'dasm_beta': None, + 'dasm_gamma': None, + 'rasm_delta': None, + 'rasm_theta': None, + 'rasm_alpha': None, + 'rasm_beta': None, + 'rasm_gamma': None, + } + + featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/' + + featuresDict['shannonEntropy'] = np.load(featurepath + "shannonEntropy_1_1.npz", allow_pickle=True)['features'] + + featuresDict['ShannonRes_delta'] = np.load(featurepath + "ShannonRes_sub_bands_delta_1_1.npz", allow_pickle=True)['features'] + + featuresDict['ShannonRes_theta'] = np.load(featurepath + "ShannonRes_sub_bands_theta_1_1.npz", allow_pickle=True)['features'] + + featuresDict['ShannonRes_alpha'] = np.load(featurepath + "ShannonRes_sub_bands_alpha_1_1.npz", allow_pickle=True)['features'] + + featuresDict['ShannonRes_beta'] = np.load(featurepath + "ShannonRes_sub_bands_beta_1_1.npz", allow_pickle=True)['features'] + + featuresDict['ShannonRes_gamma'] = np.load(featurepath + "ShannonRes_sub_bands_gamma_1_1.npz", allow_pickle=True)['features'] + + featuresDict['HjorthComp'] = np.load(featurepath + "Hjorth_complexity_1_1.npz", allow_pickle=True)['features'] + + featuresDict['HjorthMob'] = np.load(featurepath + "Hjorth_mobilty_1_1.npz",allow_pickle=True)['features'] + + featuresDict['falseNearestNeighbor'] = np.load(featurepath + "falseNearestNeighbor_1_1.npz",allow_pickle=True)['features'] + + featuresDict['medianFreq'] = np.load(featurepath + "medianFreq_1_1.npz",allow_pickle=True)['features'] + + featuresDict['bandPwr_delta']=np.load(featurepath+"bandPwr_delta_1_1.npz", allow_pickle = True)['features'] + + featuresDict['bandPwr_theta']=np.load(featurepath + "bandPwr_theta_1_1.npz", allow_pickle = True)['features'] + + featuresDict['bandPwr_alpha']=np.load(featurepath + "bandPwr_alpha_1_1.npz", allow_pickle = True)['features'] + + featuresDict['bandPwr_beta']=np.load(featurepath + "bandPwr_beta_1_1.npz", allow_pickle = True)['features'] + + featuresDict['bandPwr_gamma']=np.load(featurepath + "bandPwr_gamma_1_1.npz", allow_pickle = True)['features'] + + featuresDict['stdDev'] = np.load(featurepath + "stdDev_1_1.npz",allow_pickle=True)['features'] + + featuresDict['diffuseSlowing'] = np.load(featurepath + "diffuseSlowing_1_1.npz",allow_pickle=True)['features'] + + featuresDict['spikeNum'] = np.load(featurepath + "spikeNum_1_1.npz",allow_pickle=True)['features'] + + featuresDict['deltaBurstAfterSpike'] = np.load(featurepath + "deltaBurstAfterSpike_1_1.npz",allow_pickle=True)['features'] + + featuresDict['shortSpikeNum'] = np.load(featurepath + "shortSpikeNum_1_1.npz", allow_pickle=True)['features'] + + featuresDict['numBursts'] = np.load(featurepath + "numBursts_1_1.npz",allow_pickle=True)['features'] + + featuresDict['burstLenMean'] = np.load(featurepath + "burstLen_u_and_sigma_mean_1_1.npz",allow_pickle=True)['features'] + + featuresDict['burstLenStd'] = np.load(featurepath + "burstLen_u_and_sigma_std_1_1.npz",allow_pickle=True)['features'] + + featuresDict['numSuppressions'] = np.load(featurepath + "numSuppressions_1_1.npz",allow_pickle=True)['features'] + + featuresDict['suppLenMean'] = np.load(featurepath + "suppressionLen_u_and_sigma_mean_1_1.npz",allow_pickle=True)['features'] + + featuresDict['suppLenStd'] = np.load(featurepath + "suppressionLen_u_and_sigma_std_1_1.npz",allow_pickle=True)['features'] + + featuresDict['dasm_delta'] = np.load(featurepath + "dasm_delta_1_1.npz",allow_pickle=True)['features'] + + featuresDict['dasm_theta'] = np.load(featurepath + "dasm_theta_1_1.npz",allow_pickle=True)['features'] + + featuresDict['dasm_alpha'] = np.load(featurepath + "dasm_alpha_1_1.npz",allow_pickle=True)['features'] + + featuresDict['dasm_beta'] = np.load(featurepath + "dasm_beta_1_1.npz",allow_pickle=True)['features'] + + featuresDict['dasm_gamma'] = np.load(featurepath + "dasm_gamma_1_1.npz",allow_pickle=True)['features'] + + featuresDict['rasm_delta'] = np.load(featurepath + "rasm_delta_1_1.npz",allow_pickle=True)['features'] + + featuresDict['rasm_theta'] = np.load(featurepath + "rasm_theta_1_1.npz",allow_pickle=True)['features'] + + featuresDict['rasm_alpha'] = np.load(featurepath + "rasm_alpha_1_1.npz",allow_pickle=True)['features'] + + featuresDict['rasm_beta'] = np.load(featurepath + "rasm_beta_1_1.npz",allow_pickle=True)['features'] + + featuresDict['rasm_gamma'] = np.load(featurepath + "rasm_gamma_1_1.npz",allow_pickle=True)['features'] + + return featuresDict + diff --git a/TopNByClassifier.py b/TopNByClassifier.py new file mode 100644 index 0000000..d83f9e8 --- /dev/null +++ b/TopNByClassifier.py @@ -0,0 +1,735 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +from ImportUtils import * +from sklearn.model_selection import ParameterGrid + +from sklearn.model_selection import train_test_split + +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import accuracy_score + +from sklearn.feature_selection import chi2 +from sklearn.feature_selection import SelectKBest, f_classif +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import accuracy_score + +from sklearn.ensemble import RandomForestRegressor as sklearnrfi + +import os +import glob +from scipy import io,signal +import numpy as np +import pandas as pd +from sklearn import preprocessing +import pickle +from sklearn.metrics import mean_squared_error +from sklearn.impute import SimpleImputer + + +import matplotlib.pyplot as plt +# %matplotlib inline +import seaborn as sns +import copy + +def topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False): + ''' + Ranks of features according to rmse computed by regressor passed in clf + Plots electrode v/s rmse graph + + ''' + # parameters :- + # dataset - name of the dataset + # window - length of the sliding window in seconds + # stride - length of the stride of the sliding window in seconds + # sfreq - sampling frequency of the EEG data + # clf - name of the classifier to be used + # label - valence/arousal/dominance/liking label (shape depends upon the dataset) in an enumerated form (0- valence ; 1-arousal ; 2- like; 3-dominance) + # scale - sclaing of the EEG data if required + + # returns :- + # void + + pwd = os.getcwd() + + #load extracted features + ##################################################################################################################################################### + featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/' + ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features'] + Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y'] + + rmseList = [] + electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4'] + fs = sfreq + pwd = os.getcwd() + featuresDict = loadFeaturesDict(dataset) + asm_features = ['dasm_delta', 'dasm_theta', 'dasm_alpha', 'dasm_beta', 'dasm_gamma', 'rasm_delta', 'rasm_theta', 'rasm_alpha', 'rasm_beta', 'rasm_gamma'] + for asm in asm_features: + featuresDict.pop(asm) + + common = [] + with open('intersection.pkl', 'rb') as f: + common = pickle.load(f) + + for k in list(featuresDict.keys()): + if k not in common: + # pop out common feature + featuresDict.pop(k) + + selectFeatures = list(featuresDict.keys()) + y = Y_epoch[:,label] #valence + ##################################################################################################################################################### + + for electrode in range(14): + # Load FeaturesDict from memory + + + print("Number of segments are: {}".format(ans.shape[1])) + + featureMatrix = np.empty((len(selectFeatures),ans.shape[1])) #[14*32 + 1,80640] + i=0 + for key,value in featuresDict.items(): + featureMatrix[i,:] = value[electrode,:] + i = i+1 + + print(featureMatrix.T.shape) + featureMatrix = featureMatrix.astype(np.float32) + + #Impute NaN values with zero + if np.isnan(featureMatrix).any(): + featureMatrix = np.nan_to_num(featureMatrix,nan=0) + + #Name Feature vector columns + feature_channel_index = [] + for feature in selectFeatures: + feature_channel_index.append(feature + str(electrode)) + + print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) #debug + + #Preparing dataset from feature matrix + X = pd.DataFrame(featureMatrix.T) + X.columns = feature_channel_index + X = X.replace([np.inf, -np.inf], np.nan) + X = X.fillna(0) + + + print("Features Ready for undergoing selection tests done ...\n") + + # Perform train_test_split to get training and test data + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Normalise-scale data + # Feature Scaling + if(scale == True): + sc = StandardScaler() + X_train = sc.fit_transform(X_train) + X_test = sc.transform(X_test) + + # Apply classfier + clf.fit(X_train, y_train) + y_predict = clf.predict(X_test) + rmse = mean_squared_error(y_test, y_predict,squared=False) + print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse)) + rmseList.append(rmse) + + + #rank electrodes based on RMSE computed by the classifier + electrode_df = pd.DataFrame(electrodeList) + rmse_df = pd.DataFrame(rmseList) + #concat two dataframes for better visualization + electrodeRanking = pd.concat([electrode_df, rmse_df],axis=1) + electrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns + features_result = electrodeRanking.sort_values('RMSE') + print(features_result) + # return features_result + + ################################################################################## + N = features_result.shape[0] + topRmseList = [] + topNList = ["{}".format(x) for x in range(1,N+1)] + + + for n in range(1,N+1): + + + topnelectrodes = features_result.head(n) + electrode_index = topnelectrodes.index + electrode_index = list(electrode_index)[:n] + + # X-Values + featureMatrix = np.empty((len(selectFeatures)*len(electrode_index),ans.shape[1])) + + i = 0 + for index in electrode_index: + for key,value in featuresDict.items(): + featureMatrix[i,:] = value[index,:] + i = i+1 + + featureMatrix = featureMatrix.astype(np.float32) + print(featureMatrix.T.shape) + + # Removing NaN Values + if np.isnan(featureMatrix).any(): + featureMatrix = np.nan_to_num(featureMatrix,nan=0) + + # Name Feature vector columns + feature_channel_index = [] + for index in electrode_index: + for feature in selectFeatures: + feature_channel_index.append(feature + str(index)) + + print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) + + X = pd.DataFrame(featureMatrix.T) + X.columns = feature_channel_index + X = X.replace([np.inf, -np.inf], np.nan) + X = X.fillna(0) + + + print("Features Ready for undergoing selection tests done ...\n") + + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Normalise-scale data + # Feature Scaling + if(scale == True): + sc = StandardScaler() + X_train = sc.fit_transform(X_train) + X_test = sc.transform(X_test) + + # Apply classfier + + search_method = "tpot" + best_clf = None + if(search_method == "bayes_sk_opt"): + + # BayesCV scikit opt + search_space = {"bootstrap": Categorical([True, False]), # values for boostrap can be either True or False + "max_depth": Integer(6, 20), # values of max_depth are integers from 6 to 20 + "max_features": Categorical(['auto', 'sqrt','log2']), + "min_samples_leaf": Integer(2, 10), + "min_samples_split": Integer(2, 10), + "n_estimators": Integer(100, 500) + } + + forest_bayes_search = BayesSearchCV(clf, search_space, n_iter=32, cv=5) + print(forest_bayes_search) + print(forest_bayes_search.fit(X_train, y_train)) + print("Best Parameters are: ", forest_bayes_search.best_params_) + best_clf = forest_bayes_search.best_estimator_ + + elif(search_method =="random_grid_search"): + print("Random Search followed by GridSearch initiated!\n"); + #RandomSearchCV followed by GridSearchCV + random_grid = {'n_estimators': [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)], + 'max_features': ['auto', 'sqrt','log2'], + 'max_depth': [int(x) for x in np.linspace(10, 1000,10)], + 'min_samples_split': [2, 5, 10,14], + 'min_samples_leaf': [1, 2, 4,6,8], + } + rf_randomcv=RandomizedSearchCV(estimator=clf,param_distributions=random_grid,n_iter=100,cv=5,verbose=2,random_state=100) + print(rf_randomcv.fit(X_train, y_train)) + print("Best Parameters for RandomSearchCV are: ", rf_randomcv.best_params_) + print("RMSE with RandomSearchCV is :",mean_squared_error(y_test, rf_randomcv.best_estimator_.predict(X_test),squared=False)); + + param_grid = { + 'max_depth': [rf_randomcv.best_params_['max_depth']], + 'max_features': [rf_randomcv.best_params_['max_features']], + 'min_samples_leaf': [rf_randomcv.best_params_['min_samples_leaf'], + rf_randomcv.best_params_['min_samples_leaf']+2, + rf_randomcv.best_params_['min_samples_leaf'] + 4], + 'min_samples_split': [rf_randomcv.best_params_['min_samples_split'] - 2, + rf_randomcv.best_params_['min_samples_split'] - 1, + rf_randomcv.best_params_['min_samples_split'], + rf_randomcv.best_params_['min_samples_split'] +1, + rf_randomcv.best_params_['min_samples_split'] + 2], + 'n_estimators': [rf_randomcv.best_params_['n_estimators'] - 200, rf_randomcv.best_params_['n_estimators'] - 100, + rf_randomcv.best_params_['n_estimators'], + rf_randomcv.best_params_['n_estimators'] + 100, rf_randomcv.best_params_['n_estimators'] + 200] + } + + grid_search=GridSearchCV(estimator=rf,param_grid=param_grid,cv=10, verbose=5) + grid_search.fit(X_train,y_train) + best_clf = rf_randomcv.best_estimator_ + elif search_method =="manual_search": + min_rmse = 1000 + best_clf = clf + min_params = None + # 2*3*3*3*3 + param_grid = {'n_estimators': [50, 100], + 'max_features': ['auto'], + 'max_depth': [2, 10, 100], + 'min_samples_split': [2, 5, 10], + 'min_samples_leaf': [1, 2, 8], + } + + param_grid = ParameterGrid(param_grid) + for params in param_grid: + print("Current Parameters : ", params) + temp_clf = RandomForestRegressor( max_features = params['max_features'], min_samples_leaf = params['min_samples_leaf'], min_samples_split = params['min_samples_split'], n_estimators = params['n_estimators'],max_depth = params['max_depth']); + temp_clf.fit(X_train,y_train) + y_predict = temp_clf.predict(X_test) + rmse = mean_squared_error(y_test, y_predict,squared=False) + print("Current RMSE with above params : ", rmse) + if(min_rmse > rmse): + min_rmse = rmse; + best_clf = temp_clf; + min_params = params; + + print("Best Params for parameter search are : \n", min_params) + print("window: {}, stide: {}, rmse: {}".format(window,stride,min_rmse)) + topRmseList.append(min_rmse) + elif search_method == "tpot": + from tpot import TPOTRegressor; + # TPOT setup + GENERATIONS = 5 + POP_SIZE = 100 + CV = 5 + SEED = 42 + + tpot = TPOTRegressor( + generations=GENERATIONS, + population_size=POP_SIZE, + random_state=SEED, + config_dict="TPOT cuML", + n_jobs=1, # cuML requires n_jobs=1 + cv=CV, + verbosity=2, + ) + + tpot.fit(X_train, y_train) + + y_predict = tpot.predict(X_test) + rmse = mean_squared_error(y_test, y_predict,squared=False) + print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse)) + topRmseList.append(rmse) + + + else: + best_clf = clf + best_clf.fit(X_train,y_train) + + + if search_method != "manual_search" and search_method != "tpot": + y_predict = best_clf.predict(X_test) + rmse = mean_squared_error(y_test, y_predict,squared=False) + print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse)) + topRmseList.append(rmse) + + + topNElectrode_df = pd.DataFrame(topNList) + topNRmse_df = pd.DataFrame(topRmseList) + #concat two dataframes for better visualization + topNElectrodeRanking = pd.concat([topNElectrode_df, topNRmse_df],axis=1) + topNElectrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns + print(topNElectrodeRanking) + + # Plotting + fig = plt.gcf() + fig.set_size_inches(20, 10) + plt.rcParams.update({'font.size': 30}) + plt.xlabel('Top N Electrodes') + plt.ylabel('RMSE') + plt.plot(topNElectrodeRanking.loc[:,"Electrode"], topNElectrodeRanking.loc[:,"RMSE"]) + plt.tight_layout() + + +# In[ ]: + + +def topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False): + ''' + Ranks of features according to rmse computed by regressor passed in clf + Plots electrode v/s rmse graph + + ''' + # parameters :- + # dataset - name of the dataset + # window - length of the sliding window in seconds + # stride - length of the stride of the sliding window in seconds + # sfreq - sampling frequency of the EEG data + # clf - name of the classifier to be used + # label - valence/arousal/dominance/liking label (shape depends upon the dataset) + # scale - sclaing of the EEG data if required + + # returns :- + # void + fs = sfreq + pwd = os.getcwd() + featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/' + ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features'] + Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y'] + print("Number of segments are: {}".format(ans.shape[1])) + + featuresDict = None + featuresDict = loadFeaturesDict(dataset) + + common = [] + with open('intersection.pkl', 'rb') as f: + common = pickle.load(f) + + for k in list(featuresDict.keys()): + if k not in common: + # pop out common feature + featuresDict.pop(k) + + featuresList = list(featuresDict.keys()) + + y = Y_epoch[:,label] #valence + + + rmseList = [] + + #################################################################### + #modify featuresList + featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640] + for key,value in featuresDict.items(): + featureMatrix = np.append(featureMatrix,value,axis=0) + + + if np.isnan(featureMatrix).any(): + featureMatrix = np.nan_to_num(featureMatrix,nan=0) + + featureMatrix = featureMatrix.astype('float64') + + + feature_channel_index = [] + for feature in featuresList: + for i in range(featuresDict[feature].shape[0]): + if(i>=10): + feature_channel_index.append(feature + str(i)) + else: + feature_channel_index.append(feature + '0' + str(i)) + + print(len(list(featuresDict.keys()))) + print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) + + X = pd.DataFrame(featureMatrix.T) + X = X.replace([np.inf, -np.inf], np.nan) + X = X.fillna(0) + X.columns = feature_channel_index + + #Remove Variance = 0 features + constant_filter = VarianceThreshold(threshold=0) + constant_filter.fit(X) + constant_columns = [column for column in X.columns + if column not in + X.columns[constant_filter.get_support()]] + X = constant_filter.transform(X) + + for column in constant_columns: + feature_channel_index.remove(column) + + print(len(feature_channel_index),feature_channel_index ) + + X = pd.DataFrame(X) + X.columns = feature_channel_index + + + filtered_featuresList = [] + print(type(X)) + for col in X.columns: + feature = col[:-2] + electrode = int(col[-2:]) + if(feature not in filtered_featuresList): + filtered_featuresList.append(feature) + + featuresList = filtered_featuresList + + for feature in featuresList: + # Load FeaturesDict from memory + + + + featureMatrix = featuresDict[feature] + featureMatrix = featureMatrix.astype(np.float32) + + if np.isnan(featureMatrix).any(): + featureMatrix = np.nan_to_num(featureMatrix,nan=0) + + + + feature_channel_index = [] + + for i in range(featuresDict[feature].shape[0]): + feature_channel_index.append(feature + str(i)) + + print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) + + X = pd.DataFrame(featureMatrix.T) + X = X.replace([np.inf, -np.inf], np.nan) + X = X.fillna(0) + X.columns = feature_channel_index + + + print("Features Ready for undergoing selection tests done ...\n") + + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Normalise-scale data + # Feature Scaling + if(scale == True): + sc = StandardScaler() + X_train = sc.fit_transform(X_train) + X_test = sc.transform(X_test) + + # Apply classfier + clf.fit(X_train, y_train) + y_predict = clf.predict(X_test) + rmse = mean_squared_error(y_test, y_predict,squared=False) + print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse)) + rmseList.append(rmse) + + + + features_df = pd.DataFrame(featuresList) + rmse_df = pd.DataFrame(rmseList) + #concat two dataframes for better visualization + featureRanking = pd.concat([features_df, rmse_df],axis=1) + featureRanking.columns = ['Feature','RMSE'] #naming the dataframe columns + features_result = featureRanking.sort_values('RMSE') + features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonFeaturesRegressionRanking" + str(window) + str(stride) + ".csv") + print(features_result) + + ########################################### + N = features_result.shape[0] + topNRmseList = [] + topNList = ["{}".format(x) for x in range(1,N+1)] + + + + for n in range(1,N+1): + + + topnfeatures = copy.deepcopy(features_result.head(n)) + topnfeatures = topnfeatures['Feature'].tolist() #list of feature-names + + # X-Values################################################ + + featureMatrix = np.empty((0,ans.shape[1])) + + for feature in topnfeatures: + featureMatrix = np.append(featureMatrix, featuresDict[feature], axis=0) + + featureMatrix = featureMatrix.astype(np.float32) + print(featureMatrix.T.shape) + + feature_channel_index = [] + for feature in topnfeatures: + i=0 + for i in range(featuresDict[feature].shape[0]): + feature_channel_index.append(feature + str(i)) + + + # Removing NaN Values + if np.isnan(featureMatrix).any(): + featureMatrix = np.nan_to_num(featureMatrix,nan=0) + + + print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) + + X = pd.DataFrame(featureMatrix.T) + X.columns = feature_channel_index + X = X.replace([np.inf, -np.inf], np.nan) + X = X.fillna(0) + + + print("Features Ready for undergoing selection tests done ...\n") + + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Normalise-scale data + # Feature Scaling + if(scale == True): + sc = StandardScaler() + X_train = sc.fit_transform(X_train) + X_test = sc.transform(X_test) + + clf.fit(X_train, y_train) + y_predict = clf.predict(X_test) + rmse = mean_squared_error(y_test, y_predict,squared=False) + print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse)) + topNRmseList.append(rmse) + + + + topNFeatures_df = pd.DataFrame(topNList) + topNRmse_df = pd.DataFrame(topNRmseList) + + #concat two dataframes for better visualization + topNFeaturesRanking = pd.concat([topNFeatures_df, topNRmse_df],axis=1) + topNFeaturesRanking.columns = ['Feature','RMSE'] #naming the dataframe columns + print(topNFeaturesRanking) + topNFeaturesRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonFeaturesRegressionRanking" + str(window) + str(stride) + ".csv") + + # Plotting + fig = plt.gcf() + fig.set_size_inches(25, 10) + plt.rcParams.update({'font.size': 30}) + plt.xlabel('Top N Features') + plt.ylabel('RMSE') + plt.plot(topNFeaturesRanking.loc[:,"Feature"], topNFeaturesRanking.loc[:,"RMSE"]) + plt.tight_layout() + + +# In[ ]: + + +def topFeatureColumnsRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False): + + # parameters :- + # dataset - name of the dataset + # window - length of the sliding window in seconds + # stride - length of the stride of the sliding window in seconds + # sfreq - sampling frequency of the EEG data + # clf - name of the classifier to be used + # label - valence/arousal/dominance/liking label (shape depends upon the dataset) + # scale - sclaing of the EEG data if required + + # returns :- + # void + + fs = sfreq + pwd = os.getcwd() + featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/' + + ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features'] + Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y'] + electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4'] + + + print("Number of segments are: {}".format(ans.shape[1])) + + #X############################################################################################## + + featuresDict = None + featuresDict = loadFeaturesDict(dataset) + + common = [] + with open('intersection.pkl', 'rb') as f: + common = pickle.load(f) + + for k in list(featuresDict.keys()): + if k not in common: + # pop out common feature + featuresDict.pop(k) + + + featuresList = list(featuresDict.keys()) + + # defining column names + feature_channel_index = [] + + for feature in featuresList: + for i in range(featuresDict[feature].shape[0]): + feature_channel_index.append(feature + str(i)) + + #defining feature matrix + featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640] + for key,value in featuresDict.items(): + featureMatrix = np.append(featureMatrix,value,axis=0) + + + print("Shape of FeatureMatrix: {}\n".format(featureMatrix.T.shape)) + + #data-imputation and nan-removal + featureMatrix = featureMatrix.astype(np.float32) + + if np.isnan(featureMatrix).any(): + featureMatrix = np.nan_to_num(featureMatrix,nan=0) + + X = pd.DataFrame(featureMatrix.T) + X = X.replace([np.inf, -np.inf], np.nan) + X = X.fillna(0) + X.columns = feature_channel_index + + + #Y##################################################################### + + y = Y_epoch[:,label] #valence + + ######################################################################## + rmseList = [] + + for col in feature_channel_index: + input_df = pd.DataFrame(X[col]) + + X_train, X_test, y_train, y_test = train_test_split(input_df, y, test_size=0.2, random_state=42) + + # Normalise-scale data + # Feature Scaling + if(scale == True): + sc = StandardScaler() + X_train = sc.fit_transform(X_train) + X_test = sc.transform(X_test) + + # Apply classfier + clf.fit(X_train, y_train) + y_predict = clf.predict(X_test) + rmse = mean_squared_error(y_test, y_predict, squared=False) + rmseList.append(rmse) + + + + col_df = pd.DataFrame(feature_channel_index) + rmse_df = pd.DataFrame(rmseList) + #concat two dataframes for better visualization + colRanking = pd.concat([col_df, rmse_df],axis=1) + colRanking.columns = ['Column','RMSE'] #naming the dataframe columns + features_result = colRanking.sort_values('RMSE') + print(features_result) + + + N = len(feature_channel_index) + topNRmseList = [] + topNList = ["{}".format(x) for x in range(1,N+1)] + + for n in range(1, N+1): + ranking_df = features_result.head(n) + topncols = ranking_df['Column'].tolist() + + X_train, X_test, y_train, y_test = train_test_split(X[topncols], y, test_size=0.2, random_state=42) + + # Normalise-scale data + # Feature Scaling + if(scale == True): + sc = StandardScaler() + X_train = sc.fit_transform(X_train) + X_test = sc.transform(X_test) + + # Apply classfier + clf.fit(X_train, y_train) + y_predict = clf.predict(X_test) + rmse = mean_squared_error(y_test, y_predict, squared=False) + topNRmseList.append(rmse) + + + topcol_df = pd.DataFrame(topNList) + toprmse_df = pd.DataFrame(topNRmseList) + #concat two dataframes for better visualization + topcolRanking = pd.concat([topcol_df, toprmse_df],axis=1) + topcolRanking.columns = ['Column','RMSE'] #naming the dataframe columns + topfeatures_result = topcolRanking + print(topfeatures_result) + topfeatures_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "ColumnsRegressionRanking" + str(window) + str(stride) + ".csv") + + + # Plotting + fig = plt.gcf() + fig.set_size_inches(60, 9) + plt.xlabel('Top N Columns') + plt.ylabel('RMSE') + plt.title("Top N Columns v/s RMSE Plot for Window:{} Stride:{} epoched data by varying N".format(window,stride)) + plt.plot(topfeatures_result.loc[:,"Column"], topfeatures_result.loc[:,"RMSE"]) + plt.tight_layout() + plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "topFeatureColumnsRegressionRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight') + plt.show() + plt.clf() + diff --git a/TopNByFSMethods.py b/TopNByFSMethods.py new file mode 100644 index 0000000..e903210 --- /dev/null +++ b/TopNByFSMethods.py @@ -0,0 +1,648 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +from ImportUtils import * + +from sklearn.ensemble import RandomForestRegressor as sklearnrfi +from sklearn.feature_selection import VarianceThreshold + + +# In[ ]: + + +def topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'): + ''' + Ranks of features according to rmse computed by F score based regression + Plots electrode v/s rmse graph + + ''' + # parameters :- + # dataset - name of the dataset + # window - length of the sliding window in seconds + # stride - length of the stride of the sliding window in seconds + # sfreq - sampling frequency of the EEG data + # clf - name of the classifier to be used + # label - valence/arousal/dominance/liking label (shape depends upon the dataset) + # scale - sclaing of the EEG data if required + # mutual_info - Mutual ranking between features based on information theory + # method - 'RandomForest' 'RFE' 'SelectKBest' + + # returns :- + # void + pwd = os.getcwd() + fs = sfreq + electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4'] + featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/' + + ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features'] + Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y'] + + print("Number of segments are: {}".format(ans.shape[1])) + + + featuresDict = None + featuresDict = loadFeaturesDict(dataset) + asm_features = ['dasm_delta', 'dasm_theta', 'dasm_alpha', 'dasm_beta', 'dasm_gamma', 'rasm_delta', 'rasm_theta', 'rasm_alpha', 'rasm_beta', 'rasm_gamma'] + for asm in asm_features: + featuresDict.pop(asm) + + common = [] + with open('intersection.pkl', 'rb') as f: + common = pickle.load(f) + + for k in list(featuresDict.keys()): + if k not in common: + # pop out common feature + featuresDict.pop(k) + + featuresList = list(featuresDict.keys()) + print(featuresList) + + featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640] + for key,value in featuresDict.items(): + featureMatrix = np.append(featureMatrix,value,axis=0) + + + if np.isnan(featureMatrix).any(): + featureMatrix = np.nan_to_num(featureMatrix,nan=0) + + featureMatrix = featureMatrix.astype('float64') + + + feature_channel_index = [] + for feature in featuresList: + for i in range(featuresDict[feature].shape[0]): + if(i>=10): + feature_channel_index.append(feature + str(i)) + else: + feature_channel_index.append(feature + '0' + str(i)) + + + print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) + + X = pd.DataFrame(featureMatrix.T) + X = X.replace([np.inf, -np.inf], np.nan) + X = X.fillna(0) + X.columns = feature_channel_index + + ################################################################# + y = copy.deepcopy(Y_epoch[:,label]) #valence + print("y.shape: ", y.shape) + + + dfscores = None + + if(method == 'RandomForest'): + '''Random Forest Feature Importances''' + # estimator = RandomForestRegressor() + estimator = sklearnrfi() + fit = estimator.fit(X,y) + dfscores = pd.DataFrame(fit.feature_importances_) + elif(method == 'RFE'): + ''' RFE''' + selector = RFE(clf, n_features_to_select=X.shape[1], step=1) + selector = selector.fit(X, y) + dfscores = pd.DataFrame(selector.ranking_) + + elif(method == 'SelectKBest'): + """SelecKBest""" + #apply SelectKBest class to extract top 10 best features + func = None + if mutual_info == False: + func = f_classif + else: + func = mutual_info_classif + + bestfeatures = SelectKBest(score_func=func, k=X.shape[1]) + fit = bestfeatures.fit(X,y) + + dfscores = pd.DataFrame(fit.scores_) + + + dfcolumns = pd.DataFrame(X.columns) + + #concat two dataframes for better visualization + featureScores = pd.concat([dfcolumns,dfscores],axis=1) + featureScores.columns = ['Specs','Score'] #naming the dataframe columns + features_result = featureScores.nlargest(X.shape[1],'Score') + print(features_result) + features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonElectrodeFSRegressionRanking"+ method + str(window) + str(stride) + ".csv") + + + ################################################################### + topcolumns = features_result['Specs'].values + topfeatures = [] + topelectrodes = [] + + for col in topcolumns: + feature = col[:-2] + electrode = int(col[-2:]) + if(feature not in topfeatures): + topfeatures.append(feature) + + if(electrode not in topelectrodes): + topelectrodes.append(electrode) + + ################################################################################## + + N = len(topelectrodes) + topRmseList = [] + topNList = ["{}".format(x) for x in range(1,N+1)] + + + for n in range(1,N+1): + + electrode_index = topelectrodes[:n] + print(topelectrodes) + print(electrode_index) + # X-Values + featureMatrix = np.empty((len(featuresList)*len(electrode_index),ans.shape[1])) + + i = 0 + for index in electrode_index: + for key,value in featuresDict.items(): + featureMatrix[i,:] = value[index,:] + i = i+1 + + # i = i+1 + + featureMatrix = featureMatrix.astype(np.float32) + print(featureMatrix.T.shape) + + # Removing NaN Values + if np.isnan(featureMatrix).any(): + featureMatrix = np.nan_to_num(featureMatrix,nan=0) + + + feature_channel_index = [] + for index in electrode_index: + for feature in featuresList: + feature_channel_index.append(feature + str(index)) + + print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) + + X = pd.DataFrame(featureMatrix.T) + X.columns = feature_channel_index + X = X.replace([np.inf, -np.inf], np.nan) + X = X.fillna(0) + + + print("Features Ready for undergoing selection tests done ...\n") + + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Normalise-scale data + # Feature Scaling + if(scale == True): + sc = StandardScaler() + X_train = sc.fit_transform(X_train) + X_test = sc.transform(X_test) + + # Apply classfier + # clf = xgb.XGBClassifier(verbose = 5) + clf.fit(X_train, y_train) + y_predict = clf.predict(X_test) + rmse = mean_squared_error(y_test, y_predict,squared=False) + print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse)) + topRmseList.append(rmse) + + + + + + # features_result = features_result.reset_index() + topNElectrode_df = pd.DataFrame(topNList) + topNRmse_df = pd.DataFrame(topRmseList) + #concat two dataframes for better visualization + topNElectrodeRanking = pd.concat([topNElectrode_df, topNRmse_df],axis=1) + topNElectrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns + print(topNElectrodeRanking) + topNElectrodeRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonElectrodeFSRegressionRanking"+ method + str(window) + str(stride) + ".csv") + # return features_result + + + # Plotting + fig = plt.gcf() + fig.set_size_inches(20, 10) + plt.rcParams.update({'font.size': 30}) + plt.xlabel('Top N Electrodes') + plt.ylabel('RMSE') + plt.plot(topNElectrodeRanking.loc[:,"Electrode"], topNElectrodeRanking.loc[:,"RMSE"]) + plt.tight_layout() + + +# In[ ]: + + +def topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'): + + # parameters :- + # dataset - name of the dataset + # window - length of the sliding window in seconds + # stride - length of the stride of the sliding window in seconds + # sfreq - sampling frequency of the EEG data + # clf - name of the classifier to be used + # label - valence/arousal/dominance/liking label (shape depends upon the dataset) + # scale - sclaing of the EEG data if required + # mutual_info - Mutual ranking between features based on information theory + # method - 'RandomForest' 'RFE' 'SelectKBest' + + # returns :- + # void + + pwd = os.getcwd() + fs = sfreq + featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/' + + ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features'] + Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y'] + print("Number of segments are: {}".format(ans.shape[1])) + + + featuresDict = None + featuresDict = loadFeaturesDict(dataset) + + common = [] + with open('intersection.pkl', 'rb') as f: + common = pickle.load(f) + + for k in list(featuresDict.keys()): + if k not in common: + # pop out common feature + featuresDict.pop(k) + + + ################################################################## + # featuresToAvoid = ['volt05', 'volt10', 'volt20', 'burstBandPowers','hFD'] + featuresList = list(featuresDict.keys()) + print(featuresList) + + featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640] + for key,value in featuresDict.items(): + featureMatrix = np.append(featureMatrix,value,axis=0) + + + if np.isnan(featureMatrix).any(): + featureMatrix = np.nan_to_num(featureMatrix,nan=0) + + featureMatrix = featureMatrix.astype('float64') + + + feature_channel_index = [] + for feature in featuresList: + for i in range(featuresDict[feature].shape[0]): + if(i>=10): + feature_channel_index.append(feature + str(i)) + else: + feature_channel_index.append(feature + '0' + str(i)) + + print(len(list(featuresDict.keys()))) + print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) + + X = pd.DataFrame(featureMatrix.T) + X = X.replace([np.inf, -np.inf], np.nan) + X = X.fillna(0) + X.columns = feature_channel_index + + #Remove Variance = 0 features + constant_filter = VarianceThreshold(threshold=0) + constant_filter.fit(X) + constant_columns = [column for column in X.columns + if column not in + X.columns[constant_filter.get_support()]] + X = constant_filter.transform(X) + for column in constant_columns: + feature_channel_index.remove(column) + + print(len(feature_channel_index),feature_channel_index ) + + X = pd.DataFrame(X) + X.columns = feature_channel_index + + ################################################################# + y = copy.deepcopy(Y_epoch[:,label]) #valence + print("y.shape: ", y.shape) + + + dfscores = None + + if(method == 'RandomForest'): + '''Random Forest Feature Importances''' + estimator = sklearnrfi() #RandomForestRegressor() + fit = estimator.fit(X,y) + dfscores = pd.DataFrame(fit.feature_importances_) + elif(method == 'RFE'): + ''' RFE''' + selector = RFE(clf, n_features_to_select=X.shape[1], step=1) + selector = selector.fit(X, y) + dfscores = pd.DataFrame(selector.ranking_) + + elif(method == 'SelectKBest'): + """SelecKBest""" + #apply SelectKBest class to extract top 10 best features + func = None + if mutual_info == False: + func = f_classif + else: + func = mutual_info_classif + + bestfeatures = SelectKBest(score_func=func, k=X.shape[1]) + fit = bestfeatures.fit(X,y) + + dfscores = pd.DataFrame(fit.scores_) + + + dfcolumns = pd.DataFrame(X.columns) + + #concat two dataframes for better visualization + featureScores = pd.concat([dfcolumns,dfscores],axis=1) + featureScores.columns = ['Specs','Score'] #naming the dataframe columns + features_result = featureScores.nlargest(X.shape[1],'Score') + print(features_result) + features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonFeatureFSRegressionRanking"+ method + str(window) + str(stride) + ".csv") + + + + ################################################################### + topcolumns = features_result['Specs'].values + topfeatures = [] + topelectrodes = [] + + for col in topcolumns: + feature = col[:-2] + electrode = int(col[-2:]) + if(feature not in topfeatures): + topfeatures.append(feature) + + if(electrode not in topelectrodes): + topelectrodes.append(electrode) + + + ###################################################################### + # TOP-N-FEATURE-RANKING + print(topfeatures) + print(topelectrodes) + N = len(topfeatures) + topNRmseList = [] + topNList = ["{}".format(x) for x in range(1,N+1)] + + + + for n in range(1,N+1): + + topnfeatures = topfeatures[:n] + + # X-Values################################################ + + featureMatrix = np.empty((0,ans.shape[1])) + + for feature in topnfeatures: + featureMatrix = np.append(featureMatrix, featuresDict[feature], axis=0) + + featureMatrix = featureMatrix.astype('float64') + print(featureMatrix.T.shape) + + feature_channel_index = [] + for feature in topnfeatures: + i=0 + for i in range(featuresDict[feature].shape[0]): + feature_channel_index.append(feature + str(i)) + + + # Removing NaN Values + if np.isnan(featureMatrix).any(): + featureMatrix = np.nan_to_num(featureMatrix,nan=0) + + + print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) + + X = pd.DataFrame(featureMatrix.T) + X.columns = feature_channel_index + X = X.replace([np.inf, -np.inf], np.nan) + X = X.fillna(0) + + + print("Features Ready for undergoing selection tests done ...\n") + + X = X.astype(np.float32) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Normalise-scale data + # Feature Scaling + if(scale == True): + sc = StandardScaler() + X_train = sc.fit_transform(X_train) + X_test = sc.transform(X_test) + + clf.fit(X_train, y_train) + y_predict = clf.predict(X_test) + rmse = mean_squared_error(y_test, y_predict,squared=False) + print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse)) + topNRmseList.append(rmse) + + + + + topNFeatures_df = pd.DataFrame(topNList) + + topNRmse_df = pd.DataFrame(topNRmseList) + + #concat two dataframes for better visualization + topNFeaturesRanking = pd.concat([topNFeatures_df, topNRmse_df],axis=1) + topNFeaturesRanking.columns = ['Feature','RMSE'] #naming the dataframe columns + print(topNFeaturesRanking) + topNFeaturesRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonFeatureFSRegressionRanking"+ method + str(window) + str(stride) + ".csv") + + # Plotting + fig = plt.gcf() + fig.set_size_inches(25, 10) + plt.rcParams.update({'font.size': 30}) + plt.xlabel('Top N Features') + plt.ylabel('RMSE') + plt.plot(topNFeaturesRanking.loc[:,"Feature"], topNFeaturesRanking.loc[:,"RMSE"]) + plt.tight_layout() + + +# In[ ]: + + +def topFSColumnsRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'): + # Method C + # parameters :- + # dataset - name of the dataset + # window - length of the sliding window in seconds + # stride - length of the stride of the sliding window in seconds + # sfreq - sampling frequency of the EEG data + # clf - name of the classifier to be used + # label - valence/arousal/dominance/liking label (shape depends upon the dataset) + # scale - sclaing of the EEG data if required + # mutual_info - Mutual ranking between features based on information theory + # method - 'RandomForest' 'RFE' 'SelectKBest' + + # returns :- + # void + fs = sfreq + pwd = os.getcwd() + + featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/' + ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features'] + Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y'] + + print("Number of segments are: {}".format(ans.shape[1])) + + #X############################################################################################## + + featuresDict = None + featuresDict = loadFeaturesDict(dataset) + + common = [] + with open('intersection.pkl', 'rb') as f: + common = pickle.load(f) + + for k in list(featuresDict.keys()): + if k not in common: + # pop out common feature + featuresDict.pop(k) + + print("Number of Features:",len(list(featuresDict.keys()))) + featuresList = list(featuresDict.keys()) + + feature_channel_index = [] + + feature_channel_index = [] + for feature in featuresList: + for i in range(featuresDict[feature].shape[0]): + if(i>=10): + feature_channel_index.append(feature +'_'+ str(i)) + else: + feature_channel_index.append(feature + '_0' + str(i)) + + print(len(list(featuresDict.keys()))) + print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) + + #defining feature matrix + featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640] + for key,value in featuresDict.items(): + featureMatrix = np.append(featureMatrix,value,axis=0) + + + + print("Shape of FeatureMatrix: {}\n".format(featureMatrix.T.shape)) + + #data-imputation and nan-removal + featureMatrix = featureMatrix.astype(np.float32) + + if np.isnan(featureMatrix).any(): + featureMatrix = np.nan_to_num(featureMatrix,nan=0) + + X = pd.DataFrame(featureMatrix.T) + X = X.replace([np.inf, -np.inf], np.nan) + X = X.fillna(0) + X.columns = feature_channel_index + + + #Y##################################################################### + + y = Y_epoch[:,label] #valence + # y = pd.DataFrame(y) + + ######################################################################## + dfscores = None + + if(method == 'RandomForest'): + '''Random Forest Feature Importances''' + estimator = sklearnrfi() #RandomForestRegressor() + fit = estimator.fit(X,y) + dfscores = pd.DataFrame(fit.feature_importances_) + elif(method == 'RFE'): + ''' RFE''' + selector = RFE(clf, n_features_to_select=X.shape[1], step=1) + selector = selector.fit(X, y) + dfscores = pd.DataFrame(selector.ranking_) + + elif(method == 'SelectKBest'): + """SelecKBest""" + #apply SelectKBest class to extract top 10 best features + func = None + if mutual_info == False: + func = f_classif + else: + func = mutual_info_classif + + bestfeatures = SelectKBest(score_func=func, k=X.shape[1]) + fit = bestfeatures.fit(X,y) + + dfscores = pd.DataFrame(fit.scores_) + + + + + dfcolumns = pd.DataFrame(X.columns) + + #concat two dataframes for better visualization + featureScores = pd.concat([dfcolumns,dfscores],axis=1) + featureScores.columns = ['Column','Score'] #naming the dataframe columns + features_result = featureScores.nlargest(X.shape[1],'Score') + print(features_result) + + N = len(feature_channel_index) + topNRmseList = [] + topNList = ["{}".format(x) for x in range(1,N+1)] + + for n in range(1, N+1): + ranking_df = features_result.head(n) + topncols = ranking_df['Column'].tolist() + + input_df = pd.DataFrame(X[topncols]) + + X_train, X_test, y_train, y_test = train_test_split(input_df, y, test_size=0.2, random_state=42) + + # Normalise-scale data + # Feature Scaling + if(scale == True): + sc = StandardScaler() + X_train = sc.fit_transform(X_train) + X_test = sc.transform(X_test) + + # Apply classfier + clf.fit(X_train, y_train) + y_predict = clf.predict(X_test) + rmse = mean_squared_error(y_test, y_predict, squared=False) + print(n,rmse) + topNRmseList.append(rmse) + + + topcol_df = pd.DataFrame(topNList) + toprmse_df = pd.DataFrame(topNRmseList) + #concat two dataframes for better visualization + topcolRanking = pd.concat([topcol_df, toprmse_df],axis=1) + topcolRanking.columns = ['Column','RMSE'] #naming the dataframe columns + topfeatures_result = topcolRanking + print(topfeatures_result) + topfeatures_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topFSColumnsRegressionRanking"+method + str(window) + str(stride) + ".csv") + + # Plotting + fig = plt.gcf() + fig.set_size_inches(60, 9) + + plt.xlabel('Top N Columns') + plt.ylabel('RMSE') + plt.title("Top N Columns v/s RMSE Plot for Window:{} Stride:{} epoched data by varying N".format(window,stride)) + plt.plot(topfeatures_result.loc[:,"Column"], topfeatures_result.loc[:,"RMSE"]) + plt.tight_layout() + plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "topFSColumnsRegressionRanking"+method + str(window) + str(stride) + ".svg", bbox_inches='tight', dpi=500) + plt.show() + plt.clf() + + +# In[ ]: + + +if __name__ == '__main__': + pass + + diff --git a/feature_extraction_25GB_RAM_DASM_RASM.py b/feature_extraction_25GB_RAM_DASM_RASM.py new file mode 100644 index 0000000..10de804 --- /dev/null +++ b/feature_extraction_25GB_RAM_DASM_RASM.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +get_ipython().system('git clone -l -s https://github.com/sari-saba-sadiya/EEGExtract.git cloned-repo') +get_ipython().run_line_magic('cd', 'cloned-repo') +get_ipython().system('ls') + + +# In[ ]: + + +get_ipython().system('pip install -r requirements.txt') + + +# In[ ]: + + +from google.colab import drive +drive.mount('/gdrive',force_remount=True) + + +# In[ ]: + + +get_ipython().system('pip install pyinform') + + +# In[ ]: + + +get_ipython().run_line_magic('cd', '../../gdrive/MyDrive/emotion_recognition_project') + + +# In[ ]: + + +import EEGExtract as eeg +from scipy import io,signal +import numpy as np +import pandas as pd +from sklearn import preprocessing +import pandas as pd +import pickle +import matplotlib.pyplot as plt + + +# In[ ]: + + +class load_data: + ''' + Load the preprocessed data here, store the paramters + ''' + def __init__(self,name): + self.name = name #name of dataset + self.X = None + self.Y = None + self.Z = None + self.freq = None #(in Hz) is same for all datasets + self.channels = None + self.ch_type = 'eeg' + self.eegData = None + self.use_autoreject = 'y' + self.no_of_subjects = None + def load_arrays(self): + if self.name == 'DREAMER': + array = np.load('original_data/DREAMER.npz') + self.freq = 128 + self.no_of_subjects = 23 + self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4'] + if self.name == 'DEAP': + array = np.load('original_data/DEAP.npz') + self.no_of_subjects = 32 + self.freq = 128 + # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 + self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature'] + if self.name == 'OASIS': + #array = np.load('original_data/OASIS.npz') + self.no_of_subjects = 15 + if self.use_autoreject == 'y': + with open('preprocessed_data/oasis/with_autoreject.p','rb') as file: + self.X = pickle.load(file) + self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4'] + self.freq = 128 + self.X ,self.Y= merge_dictionary(self.X) + (a,b,c) = self.X.shape + self.X = np.reshape(self.X,(a,c,b)) + else: + array = np.load('preprocessed_data/oasis/without_autoreject.npz') + self.freq = 128 + self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4'] + self.X = array['X'] + self.Y = array['Y'] + (a,b,c) = self.X.shape + self.X = np.reshape(self.X,(a,c,b)) + + else: + self.X = array['X'] + if self.name == 'DEAP': + self.X = self.X[:,:,[1,3,2,4,7,11,13,31,29,25,21,19,20,17]] # To maintain uniformity across all datasets, only 14 electrodes selected + self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4'] + if self.name != 'OASIS': + self.Y = array['Y'] + #self.Z = array['Z'] + self.reshape_data() + def reshape_data(self): + ''' + reshapes data in the format EEGExtract module expects i.e channels x timepoints x epochs + ''' + + (epochs,timepoints,channels) = self.X.shape + self.eegData = np.reshape(self.X,(channels,timepoints,epochs)) + + + + +# In[ ]: + + +def merge_dictionary(dictionary): + ''' + merge all trial data to form one array + ''' + no_of_trials = len(list(dictionary.keys())) + no_of_channels = dictionary[1][0].shape[1] + length_of_segment = dictionary[1][0].shape[2] + no_of_epochs_per_trial = dictionary[1][0].shape[0] + X = np.empty((0,no_of_channels,length_of_segment)) + Y = np.empty((0,2)) + for trial,lst in dictionary.items(): + array = dictionary[trial][0] + score = dictionary[trial][3] + X = np.append(X,array,axis = 0) + for epoch in range(no_of_epochs_per_trial): + Y = np.append(Y,np.expand_dims(score,axis =0),axis = 0) + + return X,Y + + +# In[ ]: + + +def calculate_diffrential_entropy_for_bands(eegData,freq): +# Function to calculate the differential entropy for the different bands of EEG data + +# parameters :- + # eegData :- The differential EEG signal value + # freq :- sampling frequency of the EEG signal +# returns :- + # bandwise DE + #delta band + delta_band = eeg.filt_data(eegData,0.5,4,freq) + #theta band + theta_band = eeg.filt_data(eegData,4,8,freq) + #alpha bad + alpha_band = eeg.filt_data(eegData,8,12,freq) + #beta band + beta_band = eeg.filt_data(eegData,12,30,freq) + #gamma band + gamma_band = eeg.filt_data(eegData,30,63,freq) + + + diffrential_entropy_delta = 1/2*np.log(np.var(delta_band,axis = 1)*np.pi*np.e*2) + + diffrential_entropy_theta = 1/2*np.log(np.var(theta_band,axis = 1)*np.pi*np.e*2) + + diffrential_entropy_alpha = 1/2*np.log(np.var(alpha_band,axis = 1)*np.pi*np.e*2) + + diffrential_entropy_beta = 1/2*np.log(np.var(beta_band,axis = 1)*np.pi*np.e*2) + + diffrential_entropy_gamma = 1/2*np.log(np.var(gamma_band,axis = 1)*np.pi*np.e*2) + #print(diffrential_entropy_delta.shape,diffrential_entropy_gamma.shape,diffrential_entropy_theta.shape,diffrential_entropy_alpha.shape,diffrential_entropy_beta.shape) + return diffrential_entropy_delta,diffrential_entropy_theta,diffrential_entropy_alpha,diffrential_entropy_beta,diffrential_entropy_gamma + + +# In[ ]: + + +#['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4'] +# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 +def calculate_RASM_DASM(band): + RASM_AF3_AF4 = np.expand_dims(band[0,:]/band[13,:],axis = 0) + RASM_F3_F4 = np.expand_dims(band[2,:]/band[11,:],axis = 0) + RASM_F7_F8 = np.expand_dims(band[1,:]/band[12,:],axis = 0) + RASM_FC5_FC6 = np.expand_dims(band[3,:]/band[10,:],axis = 0) + RASM_O1_O2 = np.expand_dims(band[6,:]/band[7,:],axis = 0) + RASM_P7_P8 = np.expand_dims(band[5,:]/band[8,:],axis=0) + RASM_T7_T8 = np.expand_dims(band[4,:]/band[9,:],axis=0) + + DASM_AF3_AF4 = np.expand_dims(band[0,:]-band[13,:],axis = 0) + DASM_F3_F4 = np.expand_dims(band[2,:]-band[11,:],axis = 0) + DASM_F7_F8 = np.expand_dims(band[1,:]-band[12,:],axis = 0) + DASM_FC5_FC6 = np.expand_dims(band[3,:]-band[10,:],axis = 0) + DASM_O1_O2 = np.expand_dims(band[6,:]-band[7,:],axis = 0) + DASM_P7_P8 = np.expand_dims(band[5,:]-band[8,:],axis=0) + DASM_T7_T8 = np.expand_dims(band[4,:]-band[9,:],axis=0) + + + features = np.empty((0,RASM_AF3_AF4.shape[1])) + features = np.append(features,RASM_AF3_AF4,axis = 0) + features = np.append(features,RASM_F3_F4,axis = 0) + features = np.append(features,RASM_F7_F8,axis = 0) + features = np.append(features,RASM_FC5_FC6,axis = 0) + features = np.append(features,RASM_O1_O2,axis = 0) + features = np.append(features,RASM_P7_P8,axis = 0) + features = np.append(features,RASM_T7_T8,axis = 0) + + features = np.append(features,DASM_AF3_AF4,axis = 0) + features = np.append(features,DASM_F3_F4,axis = 0) + features = np.append(features,DASM_F7_F8,axis = 0) + features = np.append(features,DASM_FC5_FC6,axis = 0) + features = np.append(features,DASM_O1_O2,axis = 0) + features = np.append(features,DASM_P7_P8,axis = 0) + features = np.append(features,DASM_T7_T8,axis = 0) + return features.T + + +# In[ ]: + + +def epoch_data(X,Y, window, stride, sfreq): + + (channels,timepoints,trials )= X.shape + X = np.reshape(X,(trials,channels,timepoints)) + segment = int(window*sfreq) + step = int(stride*sfreq) + epochPerTrial = int((timepoints-segment)/step + 1) + count = 0 + X_new = np.empty((trials*epochPerTrial,channels,segment)) + Y_new = np.empty((trials*epochPerTrial,2)) + for trial in range(trials): + for epoch in range(epochPerTrial): + X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment] + Y_new[count,:] = Y[trial,:2] + count+=1 + (trials,channels,timepoints) = X_new.shape + X_new = np.reshape(X_new,(channels,timepoints,trials)) + + return X_new,Y_new + + +# In[ ]: + + +def segregate_data_of_subjects(feature_matrix,dataset,sfreq = 128): + total_samples = feature_matrix.shape[0] + subject_indexes = {} + if dataset.name != 'DEAP AND DREAMER': + samples_per_subject = total_samples//dataset.no_of_subjects + print('samples per subject taken are ',samples_per_subject) + subject_indexes = {} + for i in range(dataset.no_of_subjects): + subject_name = 'subject_' + str(i+1) + subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:] + else: + a = feature_matrix[:80640,:] + b = feature_matrix[80640:,:] + print(b.shape) + for i in range(32): + samples_per_subject = 2520 + subject_name = 'subject_' + str(i+1) + subject_indexes[subject_name] = a[samples_per_subject*i:samples_per_subject*(i+1),:] + for i in range(0,23): + samples_per_subject = 8190 + subject_name = 'subject_' + str(i+1+32) + subject_indexes[subject_name] = b[samples_per_subject*i:samples_per_subject*(i+1),:] + + return subject_indexes + + +# In[ ]: + + +def driver_code(): + dataset = load_data('DREAMER') + dataset.load_arrays() + X = dataset.eegData + Y = dataset.Y + window = 1 + stride = 1 + + X,Y = epoch_data(X,Y,window,stride,128) + print('shape after epoching') + print('X:',X.shape) + print('Y:',Y.shape) + print('') + print('') + delta,theta,alpha,beta,gamma = calculate_diffrential_entropy_for_bands(X,dataset.freq) + bands = {'delta':delta,'theta':theta,'alpha':alpha,'beta':beta,'gamma':gamma} + for name,band in bands.items(): + feature_matrix = calculate_RASM_DASM(band) #extracted RASM ,DASM features for each eng band + print(name ,':' ,end = '') + print(feature_matrix.shape) + print(feature_matrix) + np.savez('features/'+dataset.name.lower()+'_RASM_DASM/'+name+'_'+str(window)+'_'+str(stride),features = feature_matrix,Y=Y) + + + +# In[ ]: + + +driver_code() + + +# In[ ]: + + + +np.load('features/oasis/without_autoreject/shannonEntropy_1_1.npz')['features'] + + +# In[ ]: + + + + diff --git a/feature_extraction_25gb_ram.py b/feature_extraction_25gb_ram.py new file mode 100644 index 0000000..5d6f7e5 --- /dev/null +++ b/feature_extraction_25gb_ram.py @@ -0,0 +1,467 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +# -*- coding: utf-8 -*- +"""feature_extraction_25GB_RAM.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/1QnVj7GyyJhLPrYF4vBTppMwynXqmOTEJ +""" + +# Commented out IPython magic to ensure Python compatibility. + + +import EEGExtract as eeg +from scipy import io,signal +import numpy as np +import pandas as pd +from sklearn import preprocessing +import pandas as pd +import pickle + + +class load_data: + ''' + Load the preprocessed data here, store the paramters + ''' + def __init__(self,name): + self.name = name #name of dataset + self.X = None + self.Y = None + self.Z = None + self.freq = None #(in Hz) is same for all datasets + self.channels = None + self.ch_type = 'eeg' + self.eegData = None + self.use_autoreject = 'n' + def load_arrays(self): + if self.name == 'DREAMER': + array = np.load('original_data/DREAMER.npz') + self.freq = 128 + self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4'] + if self.name == 'DEAP': + array = np.load('original_data/DEAP.npz') + self.freq = 128 + # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 + self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature'] + if self.name == 'OASIS': + #array = np.load('original_data/OASIS.npz') + if self.use_autoreject == 'y': + with open('preprocessed_data/oasis/with_autoreject.p','rb') as file: + self.X = pickle.load(file) + self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4'] + self.freq = 128 + self.X ,self.Y= merge_dictionary(self.X) + (a,b,c) = self.X.shape + self.X = np.reshape(self.X,(a,c,b)) + else: + array = np.load('preprocessed_data/oasis/without_autoreject.npz') + self.freq = 128 + self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4'] + self.X = array['X'] + self.Y = array['Y'] + (a,b,c) = self.X.shape + self.X = np.reshape(self.X,(a,c,b)) + + else: + self.X = array['X'] + if self.name == 'DEAP': + self.X = self.X[:,:,[1,3,2,4,7,11,13,31,29,25,21,19,20,17]] # To maintain uniformity across all datasets, only 14 electrodes selected + self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4'] + if self.name != 'OASIS': + self.Y = array['Y'] + #self.Z = array['Z'] + self.reshape_data() + def reshape_data(self): + ''' + reshapes data in the format EEGExtract module expects i.e channels x timepoints x epochs + ''' + + (epochs,timepoints,channels) = self.X.shape + self.eegData = np.reshape(self.X,(channels,timepoints,epochs)) + +class features: + ############################ Complexity Features ############################# + #1> + @staticmethod + def ShannonRes(eegData,**args): + #Shannon Entropy + ShannonRes = eeg.shannonEntropy(eegData, bin_min=-200, bin_max=200, binWidth=2) + return ShannonRes + #2> + @staticmethod + def ShannonRes_sub_band_delta(eegData,fs): + # Subband Information Quantity + # delta (0.5–4 Hz) + eegData_delta = eeg.filt_data(eegData, 0.5, 4, fs) + ShannonRes_delta = eeg.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2) + return ShannonRes_delta + #3> + @staticmethod + def ShannonRes_sub_band_theta(eegData,fs): + # theta (4–8 Hz) + eegData_theta = eeg.filt_data(eegData, 4, 8, fs) + ShannonRes_theta = eeg.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2) + return ShannonRes_theta + + #4> + @staticmethod + def ShannonRes_sub_band_alpha(eegData,fs): + # alpha (8–12 Hz) + eegData_alpha = eeg.filt_data(eegData, 8, 12, fs) + ShannonRes_alpha = eeg.shannonEntropy(eegData_alpha, bin_min=-200, bin_max=200, binWidth=2) + return ShannonRes_alpha + + #5> + @staticmethod + def ShannonRes_sub_band_beta(eegData,fs): + # beta (12–30 Hz) + eegData_beta = eeg.filt_data(eegData, 12, 30, fs) + ShannonRes_beta = eeg.shannonEntropy(eegData_beta, bin_min=-200, bin_max=200, binWidth=2) + return ShannonRes_beta + + #6> + @staticmethod + def ShannonRes_sub_band_gamma(eegData,fs): + # gamma (30–100 Hz) + eegData_gamma = eeg.filt_data(eegData, 30, 63, fs) + ShannonRes_gamma = eeg.shannonEntropy(eegData_gamma, bin_min=-200, bin_max=200, binWidth=2) + return ShannonRes_gamma + + + #7> + @staticmethod + def Hojorth_Mobility(eegData,**args): + # Hjorth Mobility + # Hjorth Complexity + HjorthMob, HjorthComp = eeg.hjorthParameters(eegData) + return HjorthMob + #8> + @staticmethod + def Hojorth_Complexity(eegData,**args): + # Hjorth Mobility + # Hjorth Complexity + HjorthMob, HjorthComp = eeg.hjorthParameters(eegData) + return HjorthComp + #9> + @staticmethod + def False_Nearest_Neighbour(eegData,**args): + # False Nearest Neighbor + FalseNnRes = eeg.falseNearestNeighbor(eegData) + return FalseNnRes + ############################################################################## + + ########################Category Features##################################### + #10> + @staticmethod + def median_frequency(eegData,fs): + #fs-sampling frequency + # Median Frequency + medianFreqRes = eeg.medianFreq(eegData,fs) + return medianFreqRes + + #11> + @staticmethod + def band_power_delta(eegData,fs): + #fs - sampling frequency + # δ band Power + bandPwr_delta = eeg.bandPower(eegData, 0.5, 4, fs) + return bandPwr_delta + #12> + @staticmethod + def band_power_theta(eegData,fs): + #fs - sampling frequency + # θ band Power + bandPwr_theta = eeg.bandPower(eegData, 4, 8, fs) + return bandPwr_theta + + #13> + @staticmethod + def band_power_alpha(eegData,fs): + #fs - sampling frequency + # α band Power + bandPwr_alpha = eeg.bandPower(eegData, 8, 12, fs) + return bandPwr_alpha + + #14> + @staticmethod + def band_power_beta(eegData,fs): + #fs - sampling frequency + # β band Power + bandPwr_beta = eeg.bandPower(eegData, 12, 30, fs) + return bandPwr_beta + + #15> + @staticmethod + def band_power_gamma(eegData,fs): + #fs - sampling frequency + # γ band Power + bandPwr_gamma = eeg.bandPower(eegData, 30, 63, fs) + return bandPwr_gamma + + #16> + @staticmethod + def standard_deviation(eegData,**args): + # Standard Deviation + std_res = eeg.eegStd(eegData) + return std_res + + #17> + @staticmethod + def regularity(eegData,fs): + # Regularity (burst-suppression) + regularity_res = eeg.eegRegularity(eegData,fs) + return regularity_res + + + #18> + @staticmethod + def Diffuse_slowing(eegData,**args): + # Diffuse Slowing + df_res = eeg.diffuseSlowing(eegData) + return df_res + + #19> + @staticmethod + def Spikes(eegData,fs,**args): + # Spikes + minNumSamples = int(70*fs/1000) + spikeNum_res = eeg.spikeNum(eegData,minNumSamples) + return spikeNum_res + + #20> + @staticmethod + def delta_burst_after_spike(eegData,fs): + # Delta burst after Spike + eegData_delta = eeg.filt_data(eegData, 0.5, 4, fs) + deltaBurst_res = eeg.burstAfterSpike(eegData,eegData_delta,minNumSamples=7,stdAway = 3) + return deltaBurst_res + + #21> + @staticmethod + def Sharp_spike(eegData,fs): + minNumSamples = int(70*fs/1000) + # Sharp spike + sharpSpike_res = eeg.shortSpikeNum(eegData,minNumSamples) + return sharpSpike_res + + #22> + @staticmethod + def Number_of_Burst(eegData,fs): + # Number of Bursts + numBursts_res = eeg.numBursts(eegData,fs) + return numBursts_res + + #23> + @staticmethod + def Burst_length_u_and_sigma_mean(eegData,fs): + # Burst length μ and σ + burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(eegData,fs) + return burstLenMean_res + + #24> + @staticmethod + def Burst_length_u_and_sigma_std(eegData,fs): + burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(eegData,fs) + return burstLenStd_res + + + #25> + @staticmethod + def no_of_suprression(eegData,fs): + # Number of Suppressions + numSupps_res = eeg.numSuppressions(eegData,fs) + return numSupps_res + + #26> + @staticmethod + def Suppression_length_u_and_sigma_mean(eegData,fs): + # Suppression length μ and σ + suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(eegData,fs) + return suppLenMean_res + + #27> + @staticmethod + def Suppression_length_u_and_sigma_std(eegData,fs): + # Suppression length μ and σ + suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(eegData,fs) + return suppLenStd_res + + ############################################################################## + +def merge_dictionary(dictionary): + ''' + merge all trial data to form one array + ''' + no_of_trials = len(list(dictionary.keys())) + no_of_channels = dictionary[1][0].shape[1] + length_of_segment = dictionary[1][0].shape[2] + no_of_epochs_per_trial = dictionary[1][0].shape[0] + X = np.empty((0,no_of_channels,length_of_segment)) + Y = np.empty((0,2)) + for trial,lst in dictionary.items(): + array = dictionary[trial][0] + score = dictionary[trial][3] + X = np.append(X,array,axis = 0) + for epoch in range(no_of_epochs_per_trial): + Y = np.append(Y,np.expand_dims(score,axis =0),axis = 0) + + return X,Y + +def epoch_data(X,Y, window, stride, sfreq): + + (channels,timepoints,trials )= X.shape + X = np.reshape(X,(trials,channels,timepoints)) + segment = int(window*sfreq) + step = int(stride*sfreq) + epochPerTrial = int((timepoints-segment)/step + 1) + count = 0 + X_new = np.empty((trials*epochPerTrial,channels,segment)) + Y_new = np.empty((trials*epochPerTrial,2)) + for trial in range(trials): + for epoch in range(epochPerTrial): + X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment] + Y_new[count,:] = Y[trial,:2] + count+=1 + (trials,channels,timepoints) = X_new.shape + X_new = np.reshape(X_new,(channels,timepoints,trials)) + + return X_new,Y_new + +def driver_code(): + dataset_dictionary = {0:'DEAP',1:'OASIS',2:'DREAMER'} + print(dataset_dictionary) + print('enter number for loading dataset') + mapping = int(input()) + print('plz wait loading dataset preprocessed arrays') + dataset = load_data(dataset_dictionary[mapping]) + if mapping == 1: + print('do you want to use with autoreject data? if yes press y') + boolean = input() + if boolean == 'y': + dataset.use_autoreject = 'y' + + dataset.load_arrays() + print('loading complete') + print('shape of data we will use to make features:',dataset.eegData.shape) + print('do you want to segment the data before calculating feature values? y/n') + boolean = input() + if boolean == 'y': + window = float(input('enter window size')) + stride = float(input('enter stride size')) + dataset.eegData,dataset.Y = epoch_data(dataset.eegData,dataset.Y,window,stride,dataset.freq) + print('new shapes of X and Y:',dataset.eegData.shape,' ',dataset.Y.shape) + else: + window = 0 + stride = 0 + print('features available') + featuresDict = {0:'shannonEntropy', + 1:'ShannonRes_sub_bands_alpha', + 2:'ShannonRes_sub_bands_beta', + 3:'ShannonRes_sub_bands_delta', + 4:'ShannonRes_sub_bands_theta', + 5:'ShannonRes_sub_bands_gamma', + 6:'Hjorth_mobilty', + 7:'Hjorth_complexity', + 8:'falseNearestNeighbor', + 9:'medianFreq', + 10:'bandPwr_alpha', + 11:'bandPwr_beta', + 12:'bandPwr_gamma', + 13:'bandPwr_theta', + 14:'bandPwr_delta', + 15:'stdDev', + 16:'diffuseSlowing', + 17:'spikeNum', + 18:'deltaBurstAfterSpike', + 19:'shortSpikeNum', + 20:'Sharp spike', + 21:'numBursts', + 22:'burstLen_u_and_sigma_mean', + 23:'burstLen_u_and_sigma_std', + 24:'numSuppressions', + 25:'suppressionLen_u_and_sigma_mean', + 26:'suppressionLen_u_and_sigma_std', + } + featureMethod={0:features.ShannonRes, + 1:features.ShannonRes_sub_band_alpha, + 2:features.ShannonRes_sub_band_beta, + 3:features.ShannonRes_sub_band_delta, + 4:features.ShannonRes_sub_band_theta, + 5:features.ShannonRes_sub_band_gamma, + 6:features.Hojorth_Mobility, + 7:features.Hojorth_Complexity, + 8:features.False_Nearest_Neighbour, + 9:features.median_frequency, + 10:features.band_power_alpha, + 11:features.band_power_beta, + 12:features.band_power_gamma, + 13:features.band_power_theta, + 14:features.band_power_delta, + 15:features.standard_deviation, + 16:features.regularity, + 17:features.Diffuse_slowing, + 18:features.Spikes, + 19:features.delta_burst_after_spike, + 20:features.Sharp_spike, + 21:features.Number_of_Burst, + 22:features.Burst_length_u_and_sigma_mean, + 23:features.Burst_length_u_and_sigma_std, + 24:features.no_of_suprression, + 25:features.Suppression_length_u_and_sigma_mean, + 26:features.Suppression_length_u_and_sigma_std, + } + + + print(featuresDict) + + #define path for saving before hand in np.savez line below + path = 'features/' + #os.mkdir('features/'+window+'_'+stride) + if dataset.name == 'DEAP': + path = path +'deap/' + elif dataset.name == 'DREAMER': + path = path + 'dreamer/' + else: + if dataset.use_autoreject == 'y': + path = path +'oasis/with_autoreject/' + else: + path = path +'oasis/without_autoreject/' + boolean = input('do you want to individually make features? y/n') + if boolean =='n': + for key in featureMethod.keys(): + feature_matrix = featureMethod[key](eegData = dataset.eegData,fs=dataset.freq) + filename = featuresDict[key] + print('saving ---',filename) + np.savez(path+filename+'_'+str(int(window))+'_'+str(int(stride)),features = feature_matrix , Y = dataset.Y) + else: + found_features = False + while not found_features: + print('enter feature no') + key = int(input()) + feature_matrix = featureMethod[key](eegData = dataset.eegData,fs=dataset.freq) + filename = featuresDict[key] + print('saving ---',filename) + np.savez(path+filename+'_'+str(int(window))+'_'+str(int(stride)),features = feature_matrix , Y = dataset.Y) + boolean = input('do you want to find more features? y/n ') + if boolean =='n': + found_features = True + + + print('feature extraction done!!!!') + +def __main__(): + driver_code() + +__main__() + +if __name__ == 'main': + driver_code() + + + diff --git a/feature_select_main.py b/feature_select_main.py new file mode 100644 index 0000000..389c38a --- /dev/null +++ b/feature_select_main.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +# Script to get the feature ranking and electrode ranking through + # Method A :- Random Forest Regressor + # Method B :- F score based Ranking + # Method C :- Random Forest Importances approach +# Main function + +from ImportUtils import * +from TopNByFSMethods import * +from TopNByClassifier import * +from args_eeg import args as my_args + +if __name__ == '__main__': + + # args object to fetch command line inputs + args = my_args() + print(args.__dict__) + pwd = os.getcwd() + + dataset = args.dataset + window = args.window + stride = args.stride + sfreq = args.sfreq + model = args.model + label = args.label + approach = args.approach #byclassifier or byfs + ml_algo = args.ml_algo #classification or regression + top = args.top #e or f or ef + fs_method = args.fs_method + + #feature extraction + getEpochedFeatures(dataset, window, stride, sfreq, label) + if(top == "e"): + clf = RandomForestRegressor() + topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False) + topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest') + topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest') + plt.legend(["Method A","Method B", "Method C"]) + + if(label == 1): + plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight') + plt.show() + plt.clf() + + else: + plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight') + plt.show() + plt.clf() + + elif(top == "f"): + clf = RandomForestRegressor() + topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False) + topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest') + topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest') + if(label == 1): + plt.legend(["Method A","Method B", "Method C"]) + plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight') + plt.show() + plt.clf() + else: + plt.legend(["Method A","Method B", "Method C"]) + plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight') + plt.show() + plt.clf() + diff --git a/incremental_learning_deap.py b/incremental_learning_deap.py new file mode 100644 index 0000000..2a5f57f --- /dev/null +++ b/incremental_learning_deap.py @@ -0,0 +1,264 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import sys +from sklearn.preprocessing import MinMaxScaler,StandardScaler +from sklearn.utils import shuffle +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split + + +# In[ ]: + + +scaler_min_max = MinMaxScaler() +scaler_standard = StandardScaler() + +# Either one of the MinMaxScaling or StandardScaling function can be used + +def MinMaxScaling(feature_matrix): + global scaler_min_max + scaler_min_max.fit(feature_matrix) + return scaler_min_max.transform(feature_matrix) + +def StandardScaling(feature_matrix): + global scaler_standard + scaler_standard.fit(feature_matrix) + print('scaling shape',scaler_standard.mean_.shape) + return scaler.transform(feature_matrix) + +architecture = 'sklearn' +if architecture == 'sklearn': + from sklearn.svm import SVR + from sklearn.metrics import accuracy_score +else: + from cuml.svm import SVR + from cuml.metrics import accuracy_score + + +# +# """##DEAP dataset +# 1> Valence - features selected +# > +# * bandPwr_gamma +# * bandPwr_beta +# * ShannonRes_gamma +# * ShannonRes_beta +# * rasm_gamma +# * dasm_gamma +# +# 2> Arousal - feature selected +# > +# * HjorthMob +# * HjorthComp +# * stdDev +# * bandPwr_theta +# * bandPwr_beta +# * ShannonRes_beta +# * ShannonRes_gamma +# * dasm_beta +# """ + +# In[ ]: + + +# now for incremental learning we need to segregate data of subjects +def segregate_data_of_subjects(feature_matrix,total_subjects,sfreq = 128): + ''' + reuturs a dictionary which contains the samples data only corresponding to particular subjects of feature matrix + ''' +# parameters :- + # feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation + # total_subjects :- Total number of subjects in the study + # sfreq :- sampling frequency of the EEG data +# returns :- + # subject_indexes :- Subject wise features in a dictionary form + + total_samples = feature_matrix.shape[0] + subject_indexes = {} + samples_per_subject = total_samples//total_subjects + for i in range(total_subjects): + subject_name = 'subject_' + str(i+1) + subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:] + + return subject_indexes + + +# In[ ]: + + +# now defining a function which carries out the incremenatal learning algo +def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject): +# parameters :- + # model :- The training model to be used (SVR in this case) + # featrue_matrix :- feature matrix obtained in the above function + # Y :- The Valence and Arousal values as entered by the subjects + # subject_indexes :-Subject wise features in a dictionary form + # number_of_subjects :- Total number of subjects in the study + # total_subjects :- Total number of subjects in the study + # rmse_score :- RMSE of the previous iterations + # test_subject :- Cross validation test subject list + +# returns :- + # rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration + # test_subject :- Updated Cross validation test subject list + no_of_features = feature_matrix.shape[1] + X = np.empty((0,no_of_features)) + print('training on subject_no:',end = ' ') + + #create a feature matrix containing data upto subjects given by the number number_of_subjects + #for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model + + for subject in range(number_of_subjects): + print(subject+1,end = ' ') + subject_name = 'subject_'+str(subject+1) + subject_data = subject_indexes[subject_name] + X = np.append(X,subject_data,axis=0) + print(' ') + + #apply a MinMax scaling to the current iteration feature matrix + X = MinMaxScaling(X) + + #now we also need to extract the valence arousal data for the corresponding subject + y = np.empty((0)) + total_samples = feature_matrix.shape[0] + samples_per_subject = total_samples//total_subjects + for subject in range(number_of_subjects): + y = Y[:samples_per_subject*(number_of_subjects)] + + print('shape of X is :',X.shape) + print('shape of y is :',y.shape) + + #shuffling data randomly to feed to model + X,y = shuffle(X,y,random_state = 0) + + #doing a train test split of 80:20 + X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2) + + #training_model + model = model.fit(X_train,y_train) + + #testing_model + y_predict = model.predict(X_test) + + + #calculating rmse values for valence and arousal using model fitted for current iteration + y_rms = np.sqrt(mean_squared_error(y_test,y_predict)) + print('rms on y :',y_rms) + print('') + rmse_score.append(y_rms) + test_subject.append(subject_name) + + return rmse_score,test_subject + + +# In[ ]: + + +def driver_code(save): + + # Function to load the features, then train the regressor and will give the validation and test plot + + #extracting file data corresponding to valence features + bandPwr_gamma_v = np.load('features/deap/bandPwr_gamma_1_1.npz') + bandPwr_beta_v = np.load('features/deap/bandPwr_beta_1_1.npz') + ShannonRes_gamma_v = np.load('features/deap/ShannonRes_sub_bands_gamma_1_1.npz') + ShannonRes_beta_v = np.load('features/deap/ShanninRes_sub_bands_beta_1_1.npz') + rasm_gamma_v = np.load('features/deap_RASM_DASM/gamma_1_1.npz')#shape of feature is 80640 x 14, be careful to extract only rasm features, i.e first 7 columns + dasm_gamma_v = np.load('features/deap_RASM_DASM/gamma_1_1.npz') + + #creating a feature matrix for valence + feature_matrix_valence = np.empty((0,80640)) + feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_gamma_v['features'],axis = 0) + feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_beta_v['features'],axis = 0) + feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_gamma_v['features'],axis = 0) + feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_beta_v['features'],axis = 0) + feature_matrix_valence = np.append(feature_matrix_valence,rasm_gamma_v['features'].T[:7,:],axis = 0) + feature_matrix_valence = np.append(feature_matrix_valence,dasm_gamma_v['features'].T[7:,:],axis = 0) + feature_matrix_valence = feature_matrix_valence.T#feature matrix is of shape 80640 x 70 + + #extracting labels + Y_val = bandPwr_gamma_v['Y'][:,0] + + #extracting file data corresponding to arousal features + HjorthMob_a = np.load('features/deap/Hjorth_mobilty_1_1.npz') + HjorthComp_a = np.load('features/deap/Hjorth_complexity_1_1.npz') + stdDev_a = np.load('features/deap/stdDev_1_1.npz') + bandPwr_beta_a = np.load('features/deap/bandPwr_beta_1_1.npz') + bandPwr_theta_a = np.load('features/deap/bandPwr_theta_1_1.npz') + ShannonRes_beta_a = np.load('features/deap/ShanninRes_sub_bands_beta_1_1.npz') + ShannonRes_gamma_a = np.load('features/deap/ShannonRes_sub_bands_gamma_1_1.npz') + dasm_beta_a = np.load('features/deap_RASM_DASM/beta_1_1.npz') + + #creating feature matrix for arousal + feature_matrix_arousal = np.empty((0,80640)) + feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis = 0) + feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthComp_a['features'],axis = 0) + feature_matrix_arousal = np.append(feature_matrix_arousal,stdDev_a['features'],axis = 0) + feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_beta_a['features'],axis = 0) + feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_theta_a['features'],axis = 0) + feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_beta_a['features'],axis = 0) + feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_gamma_a['features'],axis = 0) + feature_matrix_arousal = np.append(feature_matrix_arousal,dasm_beta_a['features'].T[7:,:],axis = 0) + feature_matrix_arousal = feature_matrix_arousal.T#shape of feature matrix is 80640 x 105 + + #extracting labels + Y_aro = HjorthMob_a['Y'][:,1] + + model = SVR()#initializing support vector regressor for training + + #running incremental learning loop for valence + print('') + print('Incremental training for valence') + print('') + test_subject = [] + rmse_val = [] + subject_indexes_valence = segregate_data_of_subjects(feature_matrix_valence,32,128) + i = 1 + while i <= 32: + rmse_val,test_subject= training_phase(model,feature_matrix_valence,Y_val,subject_indexes_valence,i,32,rmse_val,test_subject) + i+=1 + + #running incremental learning loop for arousal + print('') + print('Incremental training for arousal ') + print(' ') + + model = SVR()#reinitialize model + test_subject = [] + rmse_aro = [] + subject_indexes_arousal = segregate_data_of_subjects(feature_matrix_arousal,32,128) + i=1 + while i<=32: + rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,Y_aro,subject_indexes_arousal,i,32,rmse_aro,test_subject) + i+=1 + + + fig,axe = plt.subplots(1,1,figsize = (40,20)) + axe.plot(test_subject,rmse_val,color='r',label='rmse valence') + axe.plot(test_subject,rmse_aro,color = 'g',label='rmse arousal') + axe.set_xlabel('trained upto subject') + axe.set_ylabel('rmse') + axe.set_title('support vector regressor') + axe.legend(loc = 'upper right') + + df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms']) + print(df) + + if save == 'y': + plt.savefig('plots/deap/all_feature_valence_arousal_rmse',format = "svg") + df.to_csv('plots/deap/all_features_valence_arousal_rmse.csv') + + +# In[ ]: + + +if __name__ == '__main__': + driver_code(sys.argv[1]) + diff --git a/incremental_learning_dreamer.py b/incremental_learning_dreamer.py new file mode 100644 index 0000000..55e4446 --- /dev/null +++ b/incremental_learning_dreamer.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import sys +from sklearn.preprocessing import MinMaxScaler,StandardScaler +from sklearn.utils import shuffle +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split + +architecture = 'sklearn' +if architecture == 'sklearn': + from sklearn.svm import SVR + from sklearn.metrics import accuracy_score +else: + from cuml.svm import SVR + from cuml.ensemble import RandomForestRegressor + from cuml.metrics import accuracy_score + + +# In[ ]: + + +# Either one of the MinMaxScaling or StandardScaling function can be used + +scaler_min_max = MinMaxScaler() +scaler_standard = StandardScaler() +def MinMaxScaling(feature_matrix): + global scaler_min_max + scaler_min_max.fit(feature_matrix) + return scaler_min_max.transform(feature_matrix) +def StandardScaling(feature_matrix): + global scaler_standard + scaler_standard.fit(feature_matrix) + print('scaling shape',scaler_standard.mean_.shape) + return scaler.transform(feature_matrix) + + +# """##DREAMER dataset +# 1> Valence - features selected +# > +# * HjorthMob +# * HjorthCom +# * stdDev +# * bandPwr_theta +# * ShannonRes_gamma +# * bandPwr_beta +# +# 2> Arousal - feature selected +# > +# * HjorthMob +# * HjorthComp +# * stdDev +# * bandPwr_theta +# * bandPwr_gamma +# * ShannonRes_gamma +# """ + +# In[ ]: + + +# now for incremental learning we need to segregate data of subjects +def segregate_data_of_subjects(feature_matrix,total_subjects,sfreq = 128): + ''' + reuturs a dictionary which contains the samples data only corresponding to particular subjects of feature matrix + ''' +# parameters :- + # feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation + # total_subjects :- Total number of subjects in the study + # sfreq :- sampling frequency of the EEG data +# returns :- + # subject_indexes :- Subject wise features in a dictionary form + + total_samples = feature_matrix.shape[0] + subject_indexes = {} + samples_per_subject = total_samples//total_subjects + for i in range(total_subjects): + subject_name = 'subject_' + str(i+1) + subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:] + + return subject_indexes + + +# In[ ]: + + +# now defining a function which carries out the incremenatal learning algo +def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject): +# parameters :- + # model :- The training model to be used (SVR in this case) + # featrue_matrix :- feature matrix obtained in the above function + # Y :- The Valence and Arousal values as entered by the subjects + # subject_indexes :-Subject wise features in a dictionary form + # number_of_subjects :- Total number of subjects in the study + # total_subjects :- Total number of subjects in the study + # rmse_score :- RMSE of the previous iterations + # test_subject :- Cross validation test subject list + +# returns :- + # rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration + # test_subject :- Updated Cross validation test subject list + no_of_features = feature_matrix.shape[1] + X = np.empty((0,no_of_features)) + print('training on subject_no:',end = ' ') + + #create a feature matrix containing data upto subjects given by the number number_of_subjects + #for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model + + for subject in range(number_of_subjects): + print(subject+1,end = ' ') + subject_name = 'subject_'+str(subject+1) + subject_data = subject_indexes[subject_name] + X = np.append(X,subject_data,axis=0) + print(' ') + + #apply a MinMax scaling to the current iteration feature matrix + X = MinMaxScaling(X) + + #now we also need to extract the valence arousal data for the corresponding subject + y = np.empty((0)) + total_samples = feature_matrix.shape[0] + samples_per_subject = total_samples//total_subjects + for subject in range(number_of_subjects): + y = Y[:samples_per_subject*(number_of_subjects)] + + + print('shape of X is :',X.shape) + print('shape of y is :',y.shape) + + #shuffling data randomly to feed to model + X,y = shuffle(X,y,random_state = 0) + + #doing a train test split of 80:20 + X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2) + + #training_model + model = model.fit(X_train,y_train) + + #testing_model + y_predict = model.predict(X_test) + + + #calculating rmse values for valence and arousal using model fitted for current iteration + y_rms = np.sqrt(mean_squared_error(y_test,y_predict)) + print('rms on y :',y_rms) + print('') + rmse_score.append(y_rms) + test_subject.append(subject_name) + + return rmse_score,test_subject + + +# In[ ]: + + +def driver_code(save): + + # Function to load the features, then train the regressor and will give the validation and test plot + + + #extracting file data corresponding to valence features + HjorthMob_v = np.load('features/dreamer/Hjorth_mobilty_1_1.npz') + HjorthCom_v = np.load('features/dreamer/Hjorth_complexity_1_1.npz') + stdDev_v = np.load('features/dreamer/stdDev_1_1.npz') + bandPwr_theta_v = np.load('features/dreamer/bandPwr_theta_1_1.npz') + bandPwr_beta_v = np.load('features/dreamer/bandPwr_beta_1_1.npz') + ShannonRes_gamma_v = np.load('features/dreamer/ShannonRes_sub_bands_gamma_1_1.npz') + + + # creating a feature matrix out of all feature data for valence + feature_matrix_valence = np.empty((0,188370)) + feature_matrix_valence = np.append(feature_matrix_valence,HjorthMob_v['features'],axis =0) + feature_matrix_valence = np.append(feature_matrix_valence,HjorthCom_v['features'],axis =0) + feature_matrix_valence = np.append(feature_matrix_valence,stdDev_v['features'],axis =0) + feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_theta_v['features'],axis =0) + feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_beta_v['features'],axis =0) + feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_gamma_v['features'],axis =0) + feature_matrix_valence = feature_matrix_valence.T # feature matrix becomes of shape 188370 x 84 i.e (samples X features per sample) + + # extracting valence values for each sample + Y_val = HjorthMob_v['Y'][:,0]#all features have same valnece labels + + + + #extracting file data corresponding to arousal features + HjorthMob_a = np.load('features/dreamer/Hjorth_mobilty_1_1.npz') + HjorthCom_a = np.load('features/dreamer/Hjorth_complexity_1_1.npz') + stdDev_a = np.load('features/dreamer/stdDev_1_1.npz') + bandPwr_theta_a = np.load('features/dreamer/bandPwr_theta_1_1.npz') + bandPwr_gamma_a = np.load('features/dreamer/bandPwr_gamma_1_1.npz') + ShannonRes_gamma_a = np.load('features/dreamer/ShannonRes_sub_bands_gamma_1_1.npz') + + #creating feature matrix for all feature data for arousal + feature_matrix_arousal = np.empty((0,188370)) + feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis = 0) + feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthCom_a['features'],axis = 0) + feature_matrix_arousal = np.append(feature_matrix_arousal,stdDev_a['features'],axis = 0) + feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_theta_a['features'],axis = 0) + feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_gamma_a['features'],axis = 0) + feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_gamma_a['features'],axis = 0) + feature_matrix_arousal = feature_matrix_arousal.T + + #extracting arousal values for + Y_aro = HjorthMob_a['Y'][:,1]#all features have same arousal labels + + model =SVR()#initializing support vector regressor for training + + #running incremental learning loop for valence + print('') + print('Incremental training for valence') + print('') + test_subject = [] + rmse_val = [] + subject_indexes_valence = segregate_data_of_subjects(feature_matrix_valence,23,128) + i = 1 + while i <= 23: + rmse_val,test_subject= training_phase(model,feature_matrix_valence,Y_val,subject_indexes_valence,i,23,rmse_val,test_subject) + i+=1 + + #running incremental learning loop for arousal + print('') + print('Incremental training for arousal ') + print(' ') + + test_subject = [] + rmse_aro = [] + subject_indexes_arousal = segregate_data_of_subjects(feature_matrix_arousal,23,128) + i=1 + while i<=23: + rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,Y_aro,subject_indexes_arousal,i,23,rmse_aro,test_subject) + i+=1 + + + fig,axe = plt.subplots(1,1,figsize = (40,20)) + axe.plot(test_subject,rmse_val,color='r',label = 'rms valence') + axe.plot(test_subject,rmse_aro,color = 'g',label = 'rms arousal') + axe.set_xlabel('trained upto subject') + axe.set_ylabel('rmse') + axe.set_title('support vector regressor') + axe.legend(loc='upper right') + df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms']) + print(df) + + if save == 'y': + plt.savefig('plots/dreamer/all_feature_valence_arousal_rmse',format = "svg") + df.to_csv('plots/dreamer/all_features_valence_arousal_rmse.csv') + + +# In[ ]: + + +if __name__ == '__main__': + driver_code(sys.argv[1]) + diff --git a/incremental_learning_oasis.py b/incremental_learning_oasis.py new file mode 100644 index 0000000..7819b83 --- /dev/null +++ b/incremental_learning_oasis.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import sys +from sklearn.preprocessing import MinMaxScaler,StandardScaler +from sklearn.utils import shuffle +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +from sklearn.svm import SVR +from sklearn.metrics import accuracy_score + + +# In[ ]: + + +# Either one of the MinMaxScaling or StandardScaling function can be used + +scaler_min_max = MinMaxScaler() +scaler_standard = StandardScaler() +def MinMaxScaling(feature_matrix): + global scaler_min_max + scaler_min_max.fit(feature_matrix) + return scaler_min_max.transform(feature_matrix) + +def StandardScaling(feature_matrix): + global scaler_standard + scaler_standard.fit(feature_matrix) + print('scaling shape',scaler_standard.mean_.shape) + return scaler.transform(feature_matrix) + + +# """##OASIS dataset +# 1> Valence - features selected +# > +# * HjorthMob +# * HjorthComp +# * stdDev +# +# 2> Arousal - feature selected +# > +# * HjorthMob +# """ + +# In[ ]: + + +# now for incremental learning we need to segregate data of subjects +def segregate_data_of_subjects(feature_matrix,Y,total_subjects,sfreq = 128): + ''' + returns a dictionary which contains the samples data only corresponding to particular subjects of feature matrix + ''' +# parameters :- + # feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation + # Y :- The Valence and Arousal values as entered by the subjects + # total_subjects :- Total number of subjects in the study + # sfreq :- sampling frequency of the EEG data +# returns :- + # subject_indexes :- Subject wise features in a dictionary form + # aligned_y :- the y values corresponding to each subject + + subject_indexes = { 'subject_1':feature_matrix[:200], + 'subject_2':feature_matrix[200:400], + 'subject_3':feature_matrix[400:600], + 'subject_4':feature_matrix[600:795], + 'subject_5':feature_matrix[795:995], + 'subject_6':feature_matrix[995:1185], + 'subject_7':feature_matrix[1185:1375], + 'subject_8':feature_matrix[1375:1575], + 'subject_9':feature_matrix[1575:1770], + 'subject_10':feature_matrix[1770:1965], + 'subject_11':feature_matrix[1965:2160], + 'subject_12':feature_matrix[2160:2360], + 'subject_13':feature_matrix[2360:2550], + 'subject_14':feature_matrix[2550:2740], + 'subject_15':feature_matrix[2740:2935] + } + + aligned_y = { 'subject_1':Y[:200], + 'subject_2':Y[200:400], + 'subject_3':Y[400:600], + 'subject_4':Y[600:795], + 'subject_5':Y[795:995], + 'subject_6':Y[995:1185], + 'subject_7':Y[1185:1375], + 'subject_8':Y[1375:1575], + 'subject_9':Y[1575:1770], + 'subject_10':Y[1770:1965], + 'subject_11':Y[1965:2160], + 'subject_12':Y[2160:2360], + 'subject_13':Y[2360:2550], + 'subject_14':Y[2550:2740], + 'subject_15':Y[2740:2935] + } + + + return subject_indexes,aligned_y + + +# In[ ]: + + +# now defining a function which carries out the incremenatal learning algo +def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject): + +# parameters :- + # model :- The training model to be used (SVR in this case) + # featrue_matrix :- feature matrix obtained in the above function + # Y :- The Valence and Arousal values as entered by the subjects + # subject_indexes :-Subject wise features in a dictionary form + # number_of_subjects :- Total number of subjects in the study + # total_subjects :- Total number of subjects in the study + # rmse_score :- RMSE of the previous iterations + # test_subject :- Cross validation test subject list + +# returns :- + # rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration + # test_subject :- Updated Cross validation test subject list + + no_of_features = feature_matrix.shape[1] + X = np.empty((0,no_of_features)) + print('training on subject_no:',end = ' ') + + #create a feature matrix containing data upto subjects given by the number number_of_subjects + #for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model + + for subject in range(number_of_subjects): + print(subject+1,end = ' ') + subject_name = 'subject_'+str(subject+1) + subject_data = subject_indexes[subject_name] + X = np.append(X,subject_data,axis=0) + print(' ') + + #apply a MinMax scaling to the current iteration feature matrix + X = MinMaxScaling(X) + + #now we also need to extract the valence/arousal data for the corresponding subject + y = np.empty((0)) + for subject in range(number_of_subjects): + subject_name = 'subject_'+str(subject+1) + subject_y_data = Y[subject_name] + y = np.append(y,subject_y_data,axis=0) + + + print('shape of X is :',X.shape) + print('shape of y is :',y.shape) + #shuffling data randomly to feed to model + X,y = shuffle(X,y,random_state = 0) + + #doing a train test split of 80:20 + X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2) + + #training_model + model = model.fit(X_train,y_train) + + #testing_model + y_predict = model.predict(X_test) + + + #calculating rmse values for valence and arousal using model fitted for current iteration + y_rms = np.sqrt(mean_squared_error(y_test,y_predict)) + print('rms on y :',y_rms) + print('') + rmse_score.append(y_rms) + test_subject.append(subject_name) + + return rmse_score,test_subject + + +# In[ ]: + + +def driver_code(save): + + #extracting feature data related to valence + HjorthMob_v = np.load('features/oasis/with_autoreject/Hjorth_mobilty_0_0.npz') + HjorthComp_v = np.load('features/oasis/with_autoreject/Hjorth_complexity_0_0.npz') + stdDev_v = np.load('features/oasis/with_autoreject/stdDev_0_0.npz') + + #creating feature matrix + feature_matrix_valence = np.empty((0,2935)) + feature_matrix_valence = np.append(feature_matrix_valence,HjorthMob_v['features'],axis = 0) + feature_matrix_valence = np.append(feature_matrix_valence,HjorthComp_v['features'],axis = 0) + feature_matrix_valence = np.append(feature_matrix_valence,stdDev_v['features'],axis = 0) + feature_matrix_valence = feature_matrix_valence.T #shape of feature matrix is 2935 x 42 + + #extracting valence labels + Y_val = HjorthMob_v['Y'][:,0] + + #extracting feature data related to arousal + HjorthMob_a = np.load('features/oasis/with_autoreject/Hjorth_mobilty_0_0.npz') + + #creating feature matrix for arousal + feature_matrix_arousal = np.empty((0,2935)) + feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis=0) + feature_matrix_arousal = feature_matrix_arousal.T + + #extracting arousal labels + Y_aro = HjorthMob_a['Y'][:,1] + + model = SVR() #initialize model + + #running incremental learning loop for valence + print('') + print('Incremental training for valence') + print('') + test_subject = [] + rmse_val = [] + subject_indexes_valence,aligned_Y_val = segregate_data_of_subjects(feature_matrix_valence,Y_val,15,128) + i = 1 + while i <= 15: + rmse_val,test_subject= training_phase(model,feature_matrix_valence,aligned_Y_val,subject_indexes_valence,i,15,rmse_val,test_subject) + i+=1 + + #running incremental learning loop for arousal + print('') + print('Incremental training for arousal ') + print(' ') + + model = SVR()#reinitialize model + test_subject = [] + rmse_aro = [] + subject_indexes_arousal,aligned_Y_aro = segregate_data_of_subjects(feature_matrix_arousal,Y_aro,15,128) + i=1 + while i<=15: + rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,aligned_Y_aro,subject_indexes_arousal,i,15,rmse_aro,test_subject) + i+=1 + + + fig,axe = plt.subplots(1,1,figsize = (40,20)) + axe.plot(test_subject,rmse_val,color='r',label = 'rmse valence') + axe.plot(test_subject,rmse_aro,color = 'g',label = 'rmse arousal') + axe.set_xlabel('trained upto subject') + axe.set_ylabel('rmse') + axe.set_title('support vector regressor') + axe.legend(loc = 'upper right') + + df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms']) + + if save == 'y': + plt.savefig('plots/oasis/all_feature_valence_arousal_rmse',format="svg") + df.to_csv('plots/oasis/all_features_valence_arousal_rmse.csv') + + +# In[ ]: + + +if __name__ == '__main__': + driver_code(sys.argv[1]) + diff --git a/incremetal_learning_final_plots.py b/incremetal_learning_final_plots.py new file mode 100644 index 0000000..1a7de4a --- /dev/null +++ b/incremetal_learning_final_plots.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +import glob +import pandas as pd +import matplotlib.pyplot as plt + + +# In[ ]: + + +from google.colab import drive +drive.mount('/gdrive',force_remount=True) + + +# In[ ]: + + +get_ipython().run_line_magic('cd', '/gdrive/MyDrive/emotion_recognition_project/') + + +# Script to obtain the incremental learning graph for the DEAP, DREAMER and OASIS datasets. + +# ##plots for DEAP + +# In[ ]: + + +dataset_deap=glob.glob('plots/deap/*.csv') + + +# In[ ]: + + +dataset_deap + + +# In[ ]: + + +dataset_svr_deap = pd.read_csv(dataset_deap[0]).T +dataset_svr_deap.columns = ['valence','arousal'] +dataset_svr_deap = dataset_svr_deap.drop('Unnamed: 0') +dataset_svr_deap= dataset_svr_deap[::1] +x_deap = range(1,33,1) +dataset_svr_deap + + +# In[ ]: + + +fig_deap,axe_deap = plt.subplots(1,1,figsize = (17,10)) +axe_deap.plot(x_deap,dataset_svr_deap['valence'],color='green',marker = 'x',markersize=10) +axe_deap.plot(x_deap,dataset_svr_deap['arousal'],color ='red',marker = 'x',markersize=10) +axe_deap.legend(['rfr_valence','rfr_arousal'],) +axe_deap.set_xlabel('trained upto subject') +axe_deap.set_ylabel('RMSE values') +plt.rcParams.update({'font.size':40}) +plt.tight_layout() +plt.xticks(x_deap[::3]) + + +# In[ ]: + + +fig_deap.savefig('final_plots/deap_rfr__valence_arousal_rms.svg') +fig_deap.savefig('final_plots/deap_rfr__valence_arousal_rms.png') + + +# ##plots for DREAMER + +# In[ ]: + + +dataset_dreamer=glob.glob('plots/dreamer/*.csv') + + +# In[ ]: + + +dataset_dreamer + + +# In[ ]: + + +dataset_svr_dreamer = pd.read_csv(dataset_dreamer[0]).T +dataset_svr_dreamer.columns = ['valence','arousal'] +dataset_svr_dreamer = dataset_svr_dreamer.drop('Unnamed: 0') +x_dreamer = range(1,24,1) +dataset_svr_dreamer= dataset_svr_dreamer[::1] +dataset_svr_dreamer + + +# In[ ]: + + +fig_dreamer,axe_dreamer = plt.subplots(1,1,figsize=(17,10)) +axe_dreamer.plot(x_dreamer,dataset_svr_dreamer['valence'],color='green',marker = 'x',markersize=10) +axe_dreamer.plot(x_dreamer,dataset_svr_dreamer['arousal'],color ='red',marker = 'x',markersize=10) +axe_dreamer.legend(['rfr_valence','rfr_arousal'],) +axe_dreamer.set_xlabel('trained upto subject') +axe_dreamer.set_ylabel('RMSE values') +plt.rcParams.update({'font.size':40}) +plt.tight_layout() +plt.xticks(x_dreamer[::3]) + + +# In[ ]: + + +fig_dreamer.savefig('final_plots/dreamer_rfr__valence_arousal_rms.svg') +fig_dreamer.savefig('final_plots/dreamer_rfr__valence_arousal_rms.png') + + +# ##plots for oasis + +# In[ ]: + + +dataset_oasis=glob.glob('plots/oasis/*.csv') + + +# In[ ]: + + +dataset_oasis + + +# In[ ]: + + +dataset_svr_oasis = pd.read_csv(dataset_oasis[0]).T +dataset_svr_oasis.columns = ['valence','arousal'] +dataset_svr_oasis = dataset_svr_oasis.drop('Unnamed: 0') +x_oasis = range(1,16,1) +dataset_svr_oasis= dataset_svr_oasis[::1] +dataset_svr_oasis + + +# In[ ]: + + +fig_oasis,axe_oasis = plt.subplots(1,1,figsize=(17,10)) +axe_oasis.plot(x_oasis,dataset_svr_oasis['valence'],color='green',marker = 'x',markersize=10) +axe_oasis.plot(x_oasis,dataset_svr_oasis['arousal'],color ='red',marker = 'x',markersize=10) +axe_oasis.set_xlabel('trained upto subject') +axe_oasis.set_ylabel('RMSE values') +axe_oasis.legend(['rfr_valence','rfr_arousal'],loc = 'lower right') +plt.rcParams.update({'font.size':40}) +plt.xticks(x_oasis[::3]) +plt.tight_layout() + + +# In[ ]: + + +fig_oasis.savefig('final_plots/oasis_rfr__valence_arousal_rms.svg') +fig_oasis.savefig('final_plots/oasis_rfr__valence_arousal_rms.png') + + +# In[ ]: + + + + + +# In[ ]: + + +f,a = plt.subplots(3,1,figsize = (40,30)) +a[0].plot(x_deap,dataset_svr_deap['valence'],color='green',marker = 'x',markersize=10) +a[0].plot(x_deap,dataset_svr_deap['arousal'],color ='red',marker = 'x',markersize=10) +a[0].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],) +#a[0].set_xlabel('trained upto subject') +a[0].set_ylabel('RMSE values') +a[0].set_title('DEAP') +a[1].plot(x_dreamer,dataset_svr_dreamer['valence'],color='green',marker = 'x',markersize=10) +a[1].plot(x_dreamer,dataset_svr_dreamer['arousal'],color ='red',marker = 'x',markersize=10) +#a[1].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],) +#a[1].set_xlabel('trained upto subject') +a[1].set_ylabel('RMSE values') +a[1].set_title('DREAMER') +a[2].plot(x_oasis,dataset_svr_oasis['valence'],color='green',marker = 'x',markersize=10) +a[2].plot(x_oasis,dataset_svr_oasis['arousal'],color ='red',marker = 'x',markersize=10) +a[2].set_xlabel('trained upto subject') +a[2].set_ylabel('RMSE values') +#a[2].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],loc = 'lower right') +a[2].set_title('OASIS') +plt.rcParams.update({'font.size':40}) +plt.tight_layout() + + +# In[ ]: + + +f.savefig('final_plots/all_plots_incremental learning.svg') + + +# In[ ]: + + + + diff --git a/preprocess.py b/preprocess.py new file mode 100644 index 0000000..6d91105 --- /dev/null +++ b/preprocess.py @@ -0,0 +1,487 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +from google.colab import drive +drive.mount('/gdrive',force_remount = True) + + +# In[ ]: + + +get_ipython().system('pip install mne') + + +# In[ ]: + + +get_ipython().system('pip install autoreject') + + +# In[ ]: + + +import numpy as np +import mne +import autoreject +from scipy.stats import pearsonr +import pickle + + +# In[ ]: + + +get_ipython().run_line_magic('cd', '/gdrive/MyDrive/emotion_recognition_project/') + + +# In[ ]: + + +class preprocessing: + ''' + Load the data here, store the paramters + ''' + def __init__(self,name): + self.name = name #name of dataset + self.X = None + self.Y = None + self.Z = None + self.gyroscope = None + self.freq = None #(in Hz) is same for all datasets + self.channels = None + self.ch_type = 'eeg' + def load_arrays(self): + ''' + loads arrays in object variables of the form + X: trials x channels x timepoints, using reshape method at the end + Y: trials x (valence,arousal) + Z: trials x participant no + ''' + if self.name == 'DREAMER': + array = np.load('original_data/DREAMER.npz') + self.freq = 128 + self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4'] + if self.name == 'DEAP': + array = np.load('original_data/DEAP.npz') + self.freq = 128 + self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature'] + if self.name == 'OASIS': + array = np.load('original_data/OASIS.npz') + self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4'] + self.freq = 128 + self.X = array['X'] + if self.name == 'DEAP': + self.X = self.X[:,:,:32] + self.channels = self.channels[:32] + + if self.name == 'OASIS': + self.gyroscope = array['gyroscope'] + + self.Y = array['Y'] + self.Z = array['Z'] + self.reshape_data() + + def reshape_data(self): + ''' + exchanges last two dimensions of data + ''' + (a,b,c) = self.X.shape + self.X = np.reshape(self.X,(a,c,b)) + + + + +# In[ ]: + + +class filters(): + ''' + define filters to be used for preprocessing + ''' + @staticmethod + def notch_filter(data,sfreq,notch_freq): + # parameters :- + # data :- EEG data + # sfreq :- sampling frequency + # notch_freq :- frequency of the notch filter (generally 50Hz due to the AC current frequency) + return mne.filter.notch_filter(data,sfreq,np.arange(notch_freq,notch_freq+1,1)) + + @staticmethod + def butterworth_filter(data,sfreq,lfreq,hfreq): + # parameters :- + # data :- EEG data + # sfreq :- sampling frequency + # lfreq :- low pass frequency value + #hfreq :- high pass frequency value + return mne.filter.filter_data(data = data,sfreq = sfreq,l_freq = lfreq,h_freq = hfreq,method = 'iir',verbose = False) + + + +# In[ ]: + + +class referencing(): + ''' + referencing electrodes to some value + ''' + @staticmethod + def average(data): + ''' + Computes average voltage of all channels for a particular trial and a particular timepoint, and subtracts average value from all channels + ''' + temp = data + avg = np.average(temp,axis=1) + avg = np.expand_dims(avg,axis=1) + return temp-avg + + +# In[ ]: + + +class autoreject_custom: + ''' + Run Auotoreject algorithm here for artifact rejections + ''' + #make epoch object + @staticmethod + def raw_object_creation(raw_data,channel_name,ch_types,sfreq): + ''' + defining parameters for creation of raw object which will be used for creating an epoch object + retutns raw object after setting parameters + ''' + # parameters :- + # raw_data :- EEG data + # channel_name :- Names of the channels of EEG data used + # ch_types :- Whether each channel is EEG/Gyro, etc + # sfreq :- sampling frequency + montage = mne.channels.make_standard_montage('standard_1020') + + #creating a info object to create epochs later and setting its montage to 12-20 system + info = mne.create_info(ch_names=channel_name,sfreq=sfreq,ch_types = ch_types,verbose = False) + + #create raw object directly from array + raw_object = mne.io.RawArray(data = raw_data,info = info,verbose = False) + + #setting montage + raw_object.set_montage(montage) + + return raw_object + + @staticmethod + def epoch_object_creation(raw_object,start=0,duration=1,tmin=0,tmax=0.99): + ''' + making an epoch object which will be used for autoreject algorithm + ''' + #creating fixed length events + events = mne.make_fixed_length_events(raw_object,id=1,start=0,duration = duration) + #creating an epoch object + epoch_object = mne.Epochs(raw_object,events = events,preload=True,baseline = None,reject=None,verbose=False,tmin=0,tmax=0.99) + + return epoch_object + + @staticmethod + def autoreject_algo(epoch_object,n_interpolates,consensus_percs): + ''' + cleans the epochs,and returns cleaned epochs,rejecting bad epochs based on optimal parameters calculation + n_interpolates are the ρ values that we would like autoreject to try and consensus_percs are the κ values that autoreject will try + Epochs with more than κ∗N sensors (N total sensors) bad are dropped + ''' + ar = autoreject.AutoReject(n_interpolates, consensus_percs, random_state=42,verbose = 'tqdm_notebook',cv=4,n_jobs=10) + #fitting autoreject model to epoch data + ar.fit(epoch_object) + epochs_clean = ar.transform(epoch_object) + evoked_clean = epochs_clean.average() + evoked = epoch_object.average() + + return epochs_clean,ar.get_reject_log(epoch_object) + + + + +# In[ ]: + + +class source_decomposition(): + + @staticmethod + def ica(data,channels,ch_type,sfreq): + # parameters :- + # data :- EEG data + # channels :- Names of the channels of EEG data used + # ch_types :- Whether each channel is EEG/Gyro, etc + # sfreq :- sampling frequency + #defining ICA parameters + raw = autoreject_custom.raw_object_creation(data,channels,ch_type,sfreq) + ica = mne.preprocessing.ICA(method='infomax',n_components=14) + ica.fit_params['max_iter'] =300 + ica.fit(raw,verbose=False) + return ica.get_sources(raw).get_data(),ica.mixing_matrix_ + + +# In[ ]: + + +def process_trial(a,acc_x,acc_y,acc_z): + ''' + a are the source signals obtained after decomposition + acc_<> are accelerometer readings in respective axis + ''' +# parameters :- + # a :- EEG source signal after ICA + # acc_x :- accelerometer channel along X axis + # acc_y :- accelerometer channel along Y axis + # acc_y :- accelerometer channel along Z axis + +#pearson co-eff between each source signal,and accelerometer readings + pcoeff_arr = np.zeros((a.shape[0],3))#array will record p_coeff for each source with x,y,z accelermeter readings + for i in range(a.shape[0]): + source = a[i] #extracting particular source + #calculating pearson co-relation coeff between particular source each of accelerometer axis readings + r_x,_ = pearsonr(source,acc_x) + r_y,_ = pearsonr(source,acc_y) + r_z,_ = pearsonr(source,acc_z) + pcoeff_arr[i,0] = r_x + pcoeff_arr[i,1] = r_y + pcoeff_arr[i,2] = r_z + #print('############') + #calculating mean ,std deviation of pearson co-eff for all sources for each axis i.e X,Y,Z + mean = np.mean(pcoeff_arr,axis = 0) + std = np.std(pcoeff_arr,axis = 0) + error = mean + 2 * std + + #calculating which sources differ have pearson co-eff of atleast one axis greater than 2 standard deviation from mean + bad_source_index = [] + for i in range(pcoeff_arr.shape[0]): + if pcoeff_arr[i,0] > error[0] or pcoeff_arr[i,1] > error[1] or pcoeff_arr[i,2] > error[2]: + bad_source_index.append(i) + + #correcting bad sources by butterworth filter by high pass 3Hz frequency as motion artifacts are said to exist in low power frequencies + for index in bad_source_index: + source_to_be_filtered = a[index] + a[index] = filters.butterworth_filter(source_to_be_filtered,dataset.freq,3,None)#high pass filter 3Hz + + return a #return corrected source signals + + +# In[ ]: + + +#loading dataset arrays +dataset = preprocessing('OASIS') +dataset.load_arrays() +dataset.gyroscope.shape + + +# In[ ]: + + +#referencing electrodes to average value method +average_data = referencing.average(dataset.X) + + +# In[ ]: + + +#running butterworth filter (bandpass filter) +filtered_data = filters.notch_filter(average_data,dataset.freq,60)#butterworth_filter(average_data,dataset.freq,0.1,40) + + +# In[ ]: + + +no_of_trials = dataset.X.shape[0] + +(a,b,c) = dataset.gyroscope.shape +gyroscope_trials = np.reshape (dataset.gyroscope,(a,c,b))# reshaping trials so they are of the shape trials x channels x timepoints + +#iterating over all trials and correcting trial data for motion artifact +for trial_n in range(no_of_trials): + print('processing trial no:',trial_n+1) + trial_data = filtered_data[trial_n] + gyroscope_trial = gyroscope_trials[0,4:,:] #only acclerometer values extracted for a particular trial + gyroscope_trial_x = gyroscope_trial[0] # accelerometer x axis reading + gyroscope_trial_y = gyroscope_trial[1] # accelerometer y axis reading + gyroscope_trial_z = gyroscope_trial[2] # accelerometer z axis reading + source_signals,mixing_matrix = source_decomposition.ica(trial_data,dataset.channels,dataset.ch_type,dataset.freq) + corrected_sources = process_trial(source_signals,gyroscope_trial_x,gyroscope_trial_y,gyroscope_trial_z) + + #corrected sources are projected back into orignal dimensional space of EEG data using mixing matrix + project_back = np.matmul(mixing_matrix,corrected_sources) + filtered_data[trial_n] = project_back + + +# In[ ]: + + +filtered_data.shape + + +# In[ ]: + + +no_of_trials = dataset.X.shape[0] +''' +dictionary contains information about each trial +each trial number i is mapped to a list containing the cleaned epochs given by autoreject,boolena array indicating which epoch was dropped,and +a percentage indicating epochs dropped out of total, valence ,arousal rating for trial and image_id +''' +#running autoreject for each trial data + +''' +autoreject divides each trial data into 5 epochs of 1 sec segment i.e 640 timepoints into 128 timepoints per epochs,and runs algo on each +epoch,rejecting epochs based on estimated parameters +''' +clean_epochs ={} +for trial in range(no_of_trials): + print('trial no',trial) + temp = filtered_data[trial] + raw_object = autoreject_custom.raw_object_creation(temp,dataset.channels,dataset.ch_type,dataset.freq) + print(raw_object.get_data().shape) + epoch = autoreject_custom.epoch_object_creation(raw_object) + print(epoch.get_data().shape) + #print('epochs shape',epoch.get_data().shape) + clean_epoch,reject_log = autoreject_custom.autoreject_algo(epoch,n_interpolates = np.array([1, 4, 32]),consensus_percs = np.linspace(0, 1.0, 11)) + #clean_epochs.append([clean_epoch,reject_log]) + if clean_epoch.drop_log_stats() == 0: + clean_epochs[trial+1] = [clean_epoch.get_data(),reject_log.bad_epochs,clean_epoch.drop_log_stats(),dataset.Y[trial],dataset.Z[trial][1]] + + + +# In[ ]: + + +def driver_code(): + + #load dataset + dataset_dict = {0:'DEAP',1:'OASIS',2:'DREAMER'} + print(dataset_dict) + print('enter dataset mapping number you want to use') + mapping = int(input()) + dataset = preprocessing(dataset_dict[mapping]) + dataset.load_arrays() + + #referencing + print('next step in preprocessing is referencing') + referencing_dict = {1:'average_referencing'} + print(referencing_dict) + print('enter referencing method') + mapping = int(input()) + if mapping ==1 : + averaged_data = referencing.average(dataset.X) + print('next step is applying filters') + filter_dict = {1:'notch_filter',2:"butter_worth_filter"} + + + #filtering + applyed_filters = False + while applyed_filters == False: + print(filter_dict) + mapping = int(input()) + print('sampling frequency of dataset is',dataset.freq) + if mapping == 1 : + print('enter notch frequency') + notch_freq = float(input()) + filtered_data = filters.notch_filter(averaged_data,dataset.freq,notch_freq) + + if mapping == 2: + print('enter lower frequency') + lfreq = float(input()) + print('enter higher frequency') + hfreq = float(input()) + filtered_data = filters.butterworth_filter(dataset.X,dataset.freq,lfreq,hfreq) + + print('Do you want to apply filters again?enter y/n') + boolean = input() + if boolean == 'n': + applyed_filters = True + + print('do you want to save the data preprocessed so far?y/n') + boolean = input() + if boolean == 'y': + filename = input('enter filename to save as') + np.savez('preprocessed_data/'+dataset.name.lower()+'/'+filename,X = dataset.X,Y = dataset.Y) + + #if motion artifact correction using gyrscopic data if dataset is oasis + if dataset.name == 'OASIS': + print('do you want to use motion artifact removal using gyroscopic data? y/n') + boolean = input() + if boolean == 'y': + no_of_trials = dataset.X.shape[0] + print('shape of gyroscope data before reshaping is:',dataset.gyroscope.shape) + (a,b,c) = dataset.gyroscope.shape + gyroscope_trials = np.reshape (dataset.gyroscope,(a,c,b))# reshaping trials so they are of the shape trials x channels x timepoints + + #iterating over all trials and correcting trial data for motion artifact + for trial_n in range(no_of_trials): + print('processing trial no:',trial_n+1) + trial_data = filtered_data[trial_n] + gyroscope_trial = gyroscope_trials[trial_n,:,:] #only acclerometer values extracted for a particular trial + gyroscope_trial_x = gyroscope_trial[0] # accelerometer x axis reading + gyroscope_trial_y = gyroscope_trial[1] # accelerometer y axis reading + gyroscope_trial_z = gyroscope_trial[2] # accelerometer z axis reading + source_signals,mixing_matrix = source_decomposition.ica(trial_data,dataset.channels,dataset.ch_type,dataset.freq) + corrected_sources = process_trial(source_signals,gyroscope_trial_x,gyroscope_trial_y,gyroscope_trial_z) + + #corrected sources are projected back into orignal dimensional space of EEG data using mixing matrix + project_back = np.matmul(mixing_matrix,corrected_sources) + filtered_data[trial_n] = project_back + + print(filtered_data.shape) + print('do you want to save the data preprocessed so far?y/n') + boolean = input() + if boolean == 'y': + filename = input('enter filename to save as') + np.savez('preprocessed_data/'+dataset.name.lower()+'/'+filename,X = dataset.X,Y = dataset.Y) + + if dataset.name == 'OASIS': + print('do you want to use autoreject? y/n') + boolean = input() + if boolean == 'y': + print('do you want to save this autoreject cleaned data? y/n') + boolean = input() + no_of_trials = dataset.X.shape[0] + ''' + dictionary contains information about each trial + each trial number i is mapped to a list containing the cleaned epochs given by autoreject,boolena array indicating which epoch was dropped,and + a percentage indicating epochs dropped out of total, valence ,arousal rating for trial and image_id + ''' + #running autoreject for each trial data + + ''' + autoreject divides each trial data into 5 epochs of 1 sec segment i.e 640 timepoints into 128 timepoints per epochs,and runs algo on each + epoch,rejecting epochs based on estimated parameters + ''' + clean_epochs ={} + for trial in range(no_of_trials): + print('trial no',trial) + temp = filtered_data[trial] + raw_object = autoreject_custom.raw_object_creation(temp,dataset.channels,dataset.ch_type,dataset.freq) + print(raw_object.get_data().shape) + epoch = autoreject_custom.epoch_object_creation(raw_object) + print(epoch.get_data().shape) + #print('epochs shape',epoch.get_data().shape) + clean_epoch,reject_log = autoreject_custom.autoreject_algo(epoch,n_interpolates = np.array([1, 4, 32]),consensus_percs = np.linspace(0, 1.0, 11)) + #clean_epochs.append([clean_epoch,reject_log]) + if clean_epoch.drop_log_stats() == 0: + clean_epochs[trial+1] = [clean_epoch.get_data(),reject_log.bad_epochs,clean_epoch.drop_log_stats(),dataset.Y[trial],dataset.Z[trial][1]] + + if boolean == 'y': + with open('preprocessed_data/oasis/with_autoreject.p','wb') as file: + pickle.dump(clean_epochs,file,protocol=pickle.HIGHEST_PROTOCOL) + + +# In[ ]: + + + +def __main__(): + driver_code() + + +# In[ ]: + + +__main__() + diff --git a/run_scripts_incremental_learning.py b/run_scripts_incremental_learning.py new file mode 100644 index 0000000..e094c92 --- /dev/null +++ b/run_scripts_incremental_learning.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +from google.colab import drive +drive.mount('/gdrive',force_remount = True) + + +# In[ ]: + + +get_ipython().run_line_magic('cd', '../gdrive/MyDrive/emotion_recognition_project/') + + +# # Incremental Learning for OASIS + +# **Arguments** +# +# +# --- +# +# save = 'y/n' +# +# --- +# +# +# +# +# +# Eg. if you want to run model on OASIS dataset,don't want to save plots, with +# command would be +# +# !python incremental_learning_OASIS.py n +# +# +# +# +# + +# In[ ]: + + +get_ipython().system('python incremental_learning_oasis.py n') + + +# +# # Incremental Learning for DEAP + +# **Arguments** +# +# +# --- +# +# save = 'y/n' +# +# --- +# +# +# +# +# +# Eg. if you want to run model on DEAP dataset,don't want to save plots, with +# command would be +# +# !python incremental_learning_DEAP.py n + +# In[ ]: + + +get_ipython().system('python incremental_learning_deap.py n ') + + +# +# # Incremental Learning for DREAMER + +# **Arguments** +# +# +# --- +# +# save = 'y/n' +# +# --- +# +# +# +# +# +# Eg. if you want to run model on DEAP dataset,don't want to save plots, with +# command would be +# +# !python incremental_learning_DREAMER.py n + +# In[ ]: + + +get_ipython().system('python incremental_learning_dreamer.py n ') + + +# In[ ]: + + + + diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..ce1feda --- /dev/null +++ b/utils.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +# -*- coding: utf-8 -*- +"""utils.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/1Z2e7rxy64W9WIIcEfH1vyzfVdMNIK8Om +""" +import numpy as np + + +def epoch_data(X,Y, window, stride, sfreq): + + # Fucntion to segment the dataset into epochs + + # Parameters :- + # X :- The input EEG signal in the format of channels*timepoints*trials + # Y :- The values for VALD (depending on the dataset) given by the users + # window :- length of the epoch in seconds + # stride :- stride of the sliding window in seconds + # sfreq :- sampling frequency of the EEG signal + + (channels,timepoints,trials )= X.shape + X = np.reshape(X,(trials,channels,timepoints)) + segment = int(window*sfreq) + step = int(stride*sfreq) + epochPerTrial = int((timepoints-segment)/step + 1) + count = 0 + X_new = np.empty((trials*epochPerTrial,channels,segment)) + Y_new = np.empty((trials*epochPerTrial,2)) + for trial in range(trials): + for epoch in range(epochPerTrial): + X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment] + Y_new[count,:] = Y[trial,:2] + count+=1 + (trials,channels,timepoints) = X_new.shape + X_new = np.reshape(X_new,(channels,timepoints,trials)) + + return X_new,Y_new +