Added Scripts

Esse commit está contido em:
Rohit Garg
2021-09-18 23:21:23 +05:30
commit d142115fe6
16 arquivos alterados com 5524 adições e 0 exclusões
+386
Ver Arquivo
@@ -0,0 +1,386 @@
# -*- coding: utf-8 -*-
"""8.5_cross_validate.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1qEkrFcZ9lLqd6gNgxX8Y8QoXlOhH3wXC
#Leave One Subject Out Cross Validation
* DREAMER => Shape After Loading
X.shape= (414, 58240, 14) Y.shape= (414, 2) Z.shape= (414, 2)
* DEAP => Shape After Loading
X.shape= (1280, 40, 8064) Y.shape= (1280, 2) Z.shape= (1280, 2)
* OASIS => Shape After Loading
X.shape= (600, 640, 14) Y.shape= (600, 2) Z.shape= (600, 2)
* i.e. OASIS and DEAP are of form X = (rec, timepoints,channels)
* reshaping X to (rec, channels,timepoints)
makes sense now
"""
!nvidia-smi
"""#RAPIDS Package Installation"""
# Install RAPIDS
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!bash rapidsai-csp-utils/colab/rapids-colab.sh stable
import sys, os
dist_package_index = sys.path.index('/usr/local/lib/python3.7/dist-packages')
sys.path = sys.path[:dist_package_index] + ['/usr/local/lib/python3.7/site-packages'] + sys.path[dist_package_index:]
sys.path
exec(open('rapidsai-csp-utils/colab/update_modules.py').read(), globals())
import cuml
"""-----------------------------------------------------------------------------------------------------------------------------------------------------"""
from google.colab import drive
drive.mount('/gdrive',force_remount=True)
# Commented out IPython magic to ensure Python compatibility.
# %cd /gdrive/MyDrive/Project_DEAP/4.1.2021/
################################################################################
import TopNByFSMethods
import TopNByClassifier
import EpochedFeatures
from args_eeg import args as my_args
import ImportUtils
from ImportUtils import *
from TopNByFSMethods import *
from TopNByClassifier import *
from EpochedFeatures import *
from args_eeg import args as my_args
from ImportUtils import *
from TopNByFSMethods import *
from TopNByClassifier import *
from EpochedFeatures import *
from sklearn.svm import SVC
from DEAP_scripts.ImportUtils import *
from DEAP_scripts.TopNByFSMethods import *
from DEAP_scripts.TopNByClassifier import *
from DEAP_scripts.EpochedFeatures import *
from DEAP_scripts.args_eeg import args as my_args
from sklearn.svm import SVC
################################################################################
mean_rmse = []
std_rmse = []
np.random.seed(42)
def cross_validate(dataset, window, stride, sfreq, label, best_features_list):
# Parameters :-
# dataset :- Name of the Dataset
# window :- Length of the sliding window in seconds
# stride :- Stride of the sliding window in seconds
# sfreq :- sampling frequency of the EEG dataset
# best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
pwd = os.getcwd()
fs = sfreq
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
#load saved epoched features
featuresDict = None
featuresDict = loadFeaturesDict(dataset)
# pop out not best features
for k in list(featuresDict.keys()):
if k not in best_features_list:
featuresDict.pop(k)
featuresList = list(featuresDict.keys())
print(featuresList)
#make feature matrix with select best features
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
for key,value in featuresDict.items():
featureMatrix = np.append(featureMatrix,value,axis=0)
#remove NaN features
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
#set datatype of feature matrix
featureMatrix = featureMatrix.astype('float64')
#transpose feature matrix to prepare X
X = pd.DataFrame(featureMatrix.T)
#replace infinity with NaN value and fill it with zero
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
X = X.astype(np.float32)
#convert ndarray to dataframe
Y_epoch = pd.DataFrame(Y_epoch)
print("Number of feature vectors in X = ", X.shape[1])
print("X.shape = " ,X.shape)
#***********************************************************
#Leave-one-subject-out-CV
#number of folds = numbParticipants
numbParticipants = 0
numbRecordings = 0
if(dataset == 'DEAP'):
numbParticipants = 32
numbRecordings = 40
elif(dataset == 'DREAMER'):
# Dreamer dataset has 23 subjects, each subject was shown 18 videos
numbParticipants = 23
numbRecordings = 18
elif(dataset == 'OASIS'):
numbParticipants = 15
numbRecordings = 40
#numbEpochs
numbEpochs = X.shape[0]//(numbParticipants*numbRecordings)
print(X.shape[0])
print("numbParticipants = ", numbParticipants)
print("numbRecordings = " , numbRecordings)
print("numbEpochs = ", numbEpochs)
pass
print(type(X))
print(type(Y_epoch))
cv_rmse = []
for i in range(numbParticipants):
s = i*numbRecordings*numbEpochs
e = (i+1)*numbRecordings*numbEpochs
X_test = copy.deepcopy(X.iloc[s:e, :])
y_test = copy.deepcopy(Y_epoch.iloc[s:e, label])
X_train = copy.deepcopy(X.iloc[:s, :])
X_train = np.append(X_train, X.iloc[e:, :],axis=0)
y_train = copy.deepcopy(Y_epoch.iloc[:s, label])
y_train = np.append(y_train, Y_epoch.iloc[e:, label],axis=0)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
clf = RandomForestRegressor()
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
rmse = mean_squared_error(y_test, y_predict,squared=False)
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
cv_rmse.append(rmse)
print(cv_rmse)
print("Mean Cross-validation RMSE = ", np.mean(cv_rmse))
mean_rmse.append(np.mean(cv_rmse))
print("Standard Deviation of Cross-validated RMSE = ", np.std(cv_rmse))
std_rmse.append(np.std(cv_rmse))
#pickle list
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'wb') as f:
pickle.dump(cv_rmse, f)
fig = plt.gcf()
fig.set_size_inches(40, 20)
# X = pd.DataFrame([x for x in range(1,) ])
plt.rcParams.update({'font.size': 40})
plt.xlabel('Partipant No.')
plt.ylabel('RMSE')
plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
plt.tight_layout()
plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/CV_{}_{}.svg".format(dataset, label), bbox_inches='tight', dpi=500)
plt.show()
plt.clf()
def main(dataset, window, stride, sfreq, model, label, approach, ml_algo, top, fs_method, best_features_list):
# Parameters :-
# dataset :- Name of the Dataset
# window :- Length of the sliding window in seconds
# stride :- Stride of the sliding window in seconds
# sfreq :- sampling frequency of the EEG dataset
# best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
print(locals())
pwd = os.getcwd()
# getEpochedFeatures(dataset, window, stride, sfreq, label)
cross_validate(dataset, window, stride, sfreq, label, best_features_list)
return
if(top == "e"):
clf = RandomForestRegressor()
topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
plt.legend(["Method A","Method B", "Method C"])
if(label == 1):
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
# plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
else:
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
# plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
elif(top == "f"):
clf = RandomForestRegressor()
topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
if(label == 1):
plt.legend(["Method A","Method B", "Method C"])
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
else:
plt.legend(["Method A","Method B", "Method C"])
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
if __name__ == '__main__':
#DREAMER
#VALENCE
best_features_list = ['HjorthMob','HjorthComp','stdDev','bandPwr_theta','ShannonRes_gamma','bandPwr_beta']
main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
#AROUSAL
best_features_list = ['HjorthMob','ShannonRes_gamma','HjorthComp','stdDev','bandPwr_gamma', 'bandPwr_theta']
main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
#DEAP
#VALENCE
best_features_list = ['bandPwr_gamma','ShannonRes_gamma','ShannonRes_beta','rasm_gamma','dasm_gamma','bandPwr_beta']
main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
#AROUSAL
best_features_list = ['HjorthMob','HjorthComp','stdDev','ShannonRes_gamma','bandPwr_beta','bandPwr_theta','ShannonRes_beta','dasm_beta']
main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
#OASIS
#VALENCE
best_features_list = ['HjorthMob','stdDev','HjorthComp']
main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
#AROUSAL
best_features_list = ['HjorthMob']
main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
# print(len(best_features_list))
# main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
# --dataset DREAMER --window 1 --stride 1 --sfreq 128 --model rfr --label 0 --approach byfs --ml_algo regression --top f --fs_method SelectKBest
"""#MINIMUM RMSE DURING CROSS-VALIDATION 6-6-2021"""
# Commented out IPython magic to ensure Python compatibility.
import matplotlib.pyplot as plt
# %matplotlib inline
import seaborn as sns
import copy
import os
from scipy import io,signal
import numpy as np
import pandas as pd
import pickle
#{Dataset_Name}{0/1}_cv_rmse.pkl :- 0 is for Valence and 1 is for Arousal
pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
label = [0,1,0,1,0,1]
min_cv_rmse = []
for i in range(len(pl)):
cv_rmse = None
with open(pl[i], 'rb') as f:
cv_rmse = pickle.load(f)
min_cv_rmse.append(min(cv_rmse))
print(min_cv_rmse)
"""feature_select_main.py"""
!pip install dit
!pip install pyinform
from ImportUtils import *
from args_eeg import args as my_args
"""#Plot pickled results"""
# Commented out IPython magic to ensure Python compatibility.
import matplotlib.pyplot as plt
# %matplotlib inline
import seaborn as sns
import copy
import os
from scipy import io,signal
import numpy as np
import pandas as pd
import pickle
# with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'rb') as f:
# pickle.dump(cv_rmse, f)
pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
label = [0,1,0,1,0,1]
for i in range(len(pl)):
cv_rmse = None
with open(pl[i], 'rb') as f:
cv_rmse = pickle.load(f)
fig = plt.gcf()
fig.set_size_inches(40, 20)
# X = pd.DataFrame([x for x in range(1,) ])
plt.rcParams.update({'font.size': 50})
plt.xlabel('Partipant No.')
plt.ylabel('RMSE')
plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
plt.tight_layout()
plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_stats/CV_{}_{}.svg".format(dataset[i], label[i]), bbox_inches='tight', dpi=500)
plt.show()
plt.clf()
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/mean_cv_rmse.pkl', 'wb') as f:
pickle.dump(mean_rmse, f)
with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/std_cv_rmse.pkl', 'wb') as f:
pickle.dump(std_rmse, f)
df = pd.DataFrame()
df['Dataset-Label'] = ['DREAMER-V','DREAMER-A','DEAP-V','DEAP-A','OASIS-V','OASIS-A']
df['Mean RMSE'] = mean_rmse
df['Std Dev RMSE'] = std_rmse
df.to_csv('/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_rmse_stats.csv')
+644
Ver Arquivo
@@ -0,0 +1,644 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
import bisect
import numpy as np
import pandas as pd
import pywt
from scipy import stats, signal, integrate
from dit.other import tsallis_entropy
import dit
import librosa
import statsmodels.api as sm
import itertools
from pyinform import mutualinfo
from statsmodels import tsa
from sklearn.metrics import mutual_info_score
import numpy as np
from scipy import signal,integrate
from sklearn.metrics.cluster import normalized_mutual_info_score as normed_mutual_info
################################################
# Auxiliary Functions
################################################
##########
# Filter the eegData, midpass filter
# eegData: 3D np array [chans x ms x epochs]
def filt_data(eegData, lowcut, highcut, fs, order=7):
nyq = 0.5 * fs
low = lowcut / nyq
high = highcut / nyq
b, a = signal.butter(order, [low, high], btype='band')
filt_eegData = signal.lfilter(b, a, eegData, axis = 1)
return filt_eegData
#########
# remove short bursts / spikes
def fcnRemoveShortEvents(z,n):
for chan in range(z.shape[0]):
# check for too-short suppressions
ct=0
i0=1
i1=1
for i in range(2,len(z[chan,:])):
if z[chan,i]==z[chan,i-1]:
ct=ct+1
i1=i
else:
if ct<n:
z[chan,i0:i1] = 0
z[chan,i1] = 0 #nasty little bug
ct=0
i0=i
i1=i
if z[chan,0] == 1 and z[chan,1] == 0:
z[chan,0] = 0
return z
##########
# Find interval of consistent values in binary 1D numpy array
def get_intervals(A,B,endIdx=500):
# This function gives you intervals (a1,b1), (a2,b3) for every a in A=[a1,a2,a3,..]
# and the smallest element in b that is larger than a.
intervals = []
for ii,A_idx_lst in enumerate(A):
B_idx_lst = [bisect.bisect_left(B[ii], idx) for idx in A_idx_lst]
chan_intervals = []
for jj,idx_l in enumerate(B_idx_lst):
if idx_l == len(B[ii]):
chan_intervals.append((A_idx_lst[jj],endIdx))
else:
chan_intervals.append((A_idx_lst[jj],B[ii][idx_l]))
intervals.append(chan_intervals)
# previous code already takes care of the [] possibility
#if B_idx_lst == []:
# intervals.append([])
return intervals
##########
# Detect bursts and supressions in eeg data
def burst_supression_detection(x,fs,suppression_threshold = 10):
'''
# DETECT EMG ARTIFACTS.
nyq = 0.5 * fs
low = low / nyq
high = high / nyq
be, ae = signal.butter(order, [low, high], btype='band')
'''
# CALCULATE ENVELOPE
e = abs(signal.hilbert(x,axis=1));
# same as smooth(e,Fs/4) in MATLAB, apply 1/2 second smoothing
ME = np.array([np.convolve(el,np.ones(int(fs/4))/(fs/4),'same') for el in e.tolist()])
e = ME
# DETECT SUPRESSIONS
# apply threshold -- 10uv
z = (ME<suppression_threshold)
# remove too-short suppression segments
z = fcnRemoveShortEvents(z,fs/2)
# remove too-short burst segments
b = fcnRemoveShortEvents(1-z,fs/2)
z = 1-b
went_high = [np.where(np.array(chD[:-1]) < np.array(chD[1:]))[0].tolist() for chD in z.tolist()]
went_low = [np.where(np.array(chD[:-1]) > np.array(chD[1:]))[0].tolist() for chD in z.tolist()]
bursts = get_intervals(went_high,went_low)
supressions = get_intervals(went_low,went_high)
return bursts,supressions
##########
# Coherence in the Delta Band
def CoherenceDelta(eegData, i, j, fs=100):
nfft=eegData.shape[1]
f, Cxy = signal.coherence(eegData[i,:,:], eegData[j,:,:], fs=fs, nfft=nfft, axis=0)#, window=np.hanning(nfft))
out = np.mean(Cxy[np.all([f >= 0.5, f<=4], axis=0)], axis=0)
return out
##########
# correlation across channels
def PhaseLagIndex(eegData, i, j):
hxi = ss.hilbert(eegData[i,:,:])
hxj = ss.hilbert(eegData[j,:,:])
# calculating the INSTANTANEOUS PHASE
inst_phasei = np.arctan(np.angle(hxi))
inst_phasej = np.arctan(np.angle(hxj))
out = np.abs(np.mean(np.sign(inst_phasej - inst_phasei), axis=0))
return out
##########
# Cross Correlation
def crossCorrelation(eegData, i, j):
out = np.zeros(eegData.shape[2])
for epoch in range(eegData.shape[2]):
ccor = np.correlate(eegData[i,:,epoch], eegData[j,:,epoch], mode="full")
absccor = np.abs(ccor)
out[epoch] = (np.max(absccor) - np.mean(absccor)) / np.std(absccor)
return out
##########
# Auxilary Cross-correlation Lag
def corrCorrLagAux(eegData,ii,jj,Fs=100):
out = np.zeros(eegData.shape[2])
lagCorr = []
for lag in range(0,eegData.shape[1],int(0.2*Fs)):
tmp = eegData.copy()
tmp[jj,:,:] = np.roll(tmp[jj,:,:], lag, axis=0)
lagCorr.append(CrossCorrelation(tmp, ii, jj, Fs))
return np.argmax(lagCorr,axis=0)
################################################
# bandpower Functions
################################################
##########
# compute the bandpower (area under segment (from fband[0] to fband[1] in Hz)
# of curve in freqency domain) of data, at sampling frequency of Fs (100 ussually)
def bandpower(data, fs, fband):
freqs, powers = periodogram(data, fs)
idx_min = np.argmax(freqs > fband[0]) - 1
idx_max = np.argmax(freqs > fband[1]) - 1
idx_delta = np.zeros(dtype=bool, shape=freqs.shape)
idx_delta[idx_min:idx_max] = True
bpower = simps(powers[idx_delta], freqs[idx_delta])
return bpower
##########
# computes the same thing as vecbandpower but with a loop
def pfvecbandpower(data, fs, fband):
bpowers = np.zeros((data.shape[0], data.shape[2]))
for i in range(data.shape[0]):
freqs, powers = periodogram(data[i, :, :], fs, axis=0)
idx_min = np.argmax(freqs > fband[0]) - 1
idx_max = np.argmax(freqs > fband[1]) - 1
idx_delta = np.zeros(dtype=bool, shape=freqs.shape)
idx_delta[idx_min:idx_max] = True
bpower = simps(powers[idx_delta, :], freqs[idx_delta], axis=0)
bpowers[i, :] = bpower
return bpowers
################################################
# Complexity features
################################################
##########
# Extract the Shannon Entropy
# threshold the signal and make it discrete, normalize it and then compute entropy
def shannonEntropy(eegData, bin_min, bin_max, binWidth):
H = np.zeros((eegData.shape[0], eegData.shape[2]))
for chan in range(H.shape[0]):
for epoch in range(H.shape[1]):
counts, binCenters = np.histogram(eegData[chan,:,epoch], bins=np.arange(bin_min+1, bin_max, binWidth))
nz = counts > 0
prob = counts[nz] / np.sum(counts[nz])
H[chan, epoch] = -np.dot(prob, np.log2(prob/binWidth))
return H
##########
# Extract the tsalis Entropy
def tsalisEntropy(eegData, bin_min, bin_max, binWidth, orders = [1]):
H = [np.zeros((eegData.shape[0], eegData.shape[2]))]*len(orders)
for chan in range(H[0].shape[0]):
for epoch in range(H[0].shape[1]):
counts, bins = np.histogram(eegData[chan,:,epoch], bins=np.arange(-200+1, 200, 2))
dist = dit.Distribution([str(bc).zfill(5) for bc in bins[:-1]],counts/sum(counts))
for ii,order in enumerate(orders):
H[ii][chan,epoch] = tsallis_entropy(dist,order)
return H
##########
# Cepstrum Coefficients (n=2)
def mfcc(eegData,fs,order=2):
H = np.zeros((eegData.shape[0], eegData.shape[2],order))
for chan in range(H.shape[0]):
for epoch in range(H.shape[1]):
H[chan, epoch, : ] = librosa.feature.mfcc(np.asfortranarray(eegData[chan,:,epoch]), sr=fs)[0:order].T
return H
##########
# Lyapunov exponent
def lyapunov(eegData):
return np.mean(np.log(np.abs(np.gradient(eegData,axis=1))),axis=1)
##########
# Fractal Embedding Dimension
# From pyrem: packadge for sleep scoring from EEG data
# https://github.com/gilestrolab/pyrem/blob/master/src/pyrem/univariate.py
def hFD(a, k_max): #Higuchi FD
L = []
x = []
N = len(a)
for k in range(1,k_max):
Lk = 0
for m in range(0,k):
#we pregenerate all idxs
idxs = np.arange(1,int(np.floor((N-m)/k)),dtype=np.int32)
Lmk = np.sum(np.abs(a[m+idxs*k] - a[m+k*(idxs-1)]))
Lmk = (Lmk*(N - 1)/(((N - m)/ k)* k)) / k
Lk += Lmk
L.append(np.log(Lk/(m+1)))
x.append([np.log(1.0/ k), 1])
(p, r1, r2, s)=np.linalg.lstsq(x, L)
return p[0]
##########
# Hjorth Mobility
# Hjorth Complexity
# variance = mean(signal^2) iff mean(signal)=0
# which it is be because I normalized the signal
# Assuming signals have mean 0
# Mobility = sqrt( mean(dx^2) / mean(x^2) )
def hjorthParameters(xV):
dxV = np.diff(xV, axis=1)
ddxV = np.diff(dxV, axis=1)
mx2 = np.mean(np.square(xV), axis=1)
mdx2 = np.mean(np.square(dxV), axis=1)
mddx2 = np.mean(np.square(ddxV), axis=1)
mob = mdx2 / mx2
complexity = np.sqrt((mddx2 / mdx2) / mob)
mobility = np.sqrt(mob)
# PLEASE NOTE that Mohammad did NOT ACTUALLY use hjorth complexity,
# in the matlab code for hjorth complexity subtraction by mob not division was used
return mobility, complexity
##########
# false nearest neighbor descriptor
def falseNearestNeighbor(eegData, fast=True):
# Average Mutual Information
# There exist good arguments that if the time delayed mutual
# information exhibits a marked minimum at a certain value of tex2html_wrap_inline6553,
# then this is a good candidate for a reasonable time delay.
npts = 1000 # not sure about this?
maxdims = 50
max_delay = 2 # max_delay = 200 # TODO: need to use 200, but also need to speed this up
distance_thresh = 0.5
out = np.zeros((eegData.shape[0], eegData.shape[2]))
for chan in range(eegData.shape[0]):
for epoch in range(eegData.shape[2]):
if fast:
out[chan, epoch] = 0
else:
cur_eegData = eegData[chan, :, epoch]
lagidx = 0 # we are looking for the index of the lag that makes the signal maximally uncorrelated to the original
minNMI = 1 # normed_mutual_info is from 1 (perfectly correlated) to 0 (not at all correlated)
for lag in range(1, max_delay):
x = cur_eegData[:-lag]
xlag = cur_eegData[lag:]
convert float data into histogram bins
nbins = int(np.floor(1 + np.log2(len(x)) + 0.5))
x_discrete = np.histogram(x, bins=nbins)[0]
xlag_discrete = np.histogram(xlag, bins=nbins)[0]
cNMI = normed_mutual_info(x_discrete, xlag_discrete)
if cNMI < minNMI:
minNMI = cNMI
lagidx = lag
# nearest neighbors part
knn = int(max(2, 6*lagidx)) # heuristic (number of nearest neighbors to look up)
m = 1 # lagidx + 1
# y is the embedded version of the signal
y = np.zeros((maxdims+1, npts))
for d in range(maxdims+1):
tmp = cur_eegData[d*m:d*m + npts]
y[d, :tmp.shape[0]] = tmp
nnd = np.ones((npts, maxdims))
nnz = np.zeros((npts, maxdims))
# see where it tends to settle
for d in range(1, maxdims):
for k in range(0, npts):
# get the distances to all points in the window (distance given embedding dimension)
dists = []
for nextpt in range(1, knn+1):
if k+nextpt < npts:
dists.append(np.linalg.norm(y[:d, k] - y[:d, k+nextpt]))
if len(dists) > 0:
minIdx = np.argmin(dists)
if dists[minIdx] == 0:
dists[minIdx] = 0.0000001 # essentially 0 just silence the error
nnd[k, d-1] = dists[minIdx]
nnz[k, d-1] = np.abs( y[d+1, k] - y[d+1, minIdx+1+k] )
# aggregate results
mindim = np.mean(nnz/nnd > distance_thresh, axis=0) < 0.1
# get the index of the first occurence of the value true
# (a 1 in the binary representation of true and false)
out[chan, epoch] = np.argmax(mindim)
return out
##########
# ARMA coefficients
def arma(eegData,order=2):
H = np.zeros((eegData.shape[0], eegData.shape[2],order))
for chan in range(H.shape[0]):
for epoch in range(H.shape[1]):
arma_mod = sm.tsa.ARMA(eegData[chan,:,epoch], order=(order,order))
arma_res = arma_mod.fit(trend='nc', disp=-1)
H[chan, epoch, : ] = arma_res.arparams
return H
################################################
# Continuity features
################################################
##########
# median frequency
def medianFreq(eegData,fs):
H = np.zeros((eegData.shape[0], eegData.shape[2]))
for chan in range(H.shape[0]):
freqs, powers = signal.periodogram(eegData[chan, :, :], fs, axis=0)
H[chan,:] = freqs[np.argsort(powers,axis=0)[len(powers)//2]]
return H
##########
# calculate band power
def bandPower(eegData, lowcut, highcut, fs):
eegData_band = filt_data(eegData, lowcut, highcut, fs, order=7)
freqs, powers = signal.periodogram(eegData_band, fs, axis=1)
bandPwr = np.mean(powers,axis=1)
return bandPwr
##########
# numberOfSpikes
def spikeNum(eegData,minNumSamples=7,stdAway = 3):
H = np.zeros((eegData.shape[0], eegData.shape[2]))
for chan in range(H.shape[0]):
for epoch in range(H.shape[1]):
mean = np.mean(eegData[chan, :, epoch])
std = np.std(eegData[chan,:,epoch],axis=1)
H[chan,epoch] = len(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0])
return H
##########
# Standard Deviation
def eegStd(eegData):
std_res = np.std(eegData,axis=1)
return std_res
##########
# α/δ Ratio
def eegRatio(eegData,fs):
# alpha (8–12 Hz)
eegData_alpha = filt_data(eegData, 8, 12, fs)
# delta (0.5–4 Hz)
eegData_delta = filt_data(eegData, 0.5, 4, fs)
# calculate the power
powers_alpha = bandPower(eegData, 8, 12, fs)
powers_delta = bandPower(eegData, 0.5, 4, fs)
ratio_res = np.sum(powers_alpha,axis=0) / np.sum(powers_delta,axis=0)
return np.expand_dims(x, axis=0)
###########
# Regularity (burst-suppression)
# Regularity of eeg
# filter with a window of 0.5 seconds to create a nonnegative smooth signal.
# In this technique, we first squared the signal and applied a moving-average
# The window length of the moving average was set at 0.5 seconds.
def eegRegularity(eegData, Fs=100):
in_x = np.square(eegData) # square signal
num_wts = Fs//2 # find the filter length in samples - we want 0.5 seconds.
q = signal.lfilter(np.ones(num_wts) / num_wts, 1, in_x, axis=1)
q = -np.sort(-q, axis=1) # descending sort on smooth signal
N = q.shape[1]
u2 = np.square(np.arange(1, N+1))
# COMPUTE THE Regularity
# dot each 5min epoch with the quadratic data points and then normalize by the size of the dotted things
reg = np.sqrt( np.einsum('ijk,j->ik', q, u2) / (np.sum(q, axis=1)*(N**2)/3) )
return reg
###########
# Voltage < (5μ, 10μ, 20μ)
def eegVoltage(eegData,voltage=20):
eegFilt = eegData.copy()
eegFilt[abs(eegFilt) > voltage] = np.nan
volt_res = np.nanmean(eegFilt,axis=1)
return volt_res
##########
# Diffuse Slowing
# look for diffuse slowing (bandpower max from frequency domain integral)
# repeated integration of a huge tensor is really expensive
def diffuseSlowing(eegData, Fs=100, fast=True):
maxBP = np.zeros((eegData.shape[0], eegData.shape[2]))
idx = np.zeros((eegData.shape[0], eegData.shape[2]))
if fast:
return idx
for j in range(1, Fs//2):
print("BP", j)
cbp = bandpower(eegData, Fs, [j-1, j])
biggerCIdx = cbp > maxBP
idx[biggerCIdx] = j
maxBP[biggerCIdx] = cbp[biggerCIdx]
return (idx < 8)
##########
# Spikes
def spikeNum(eegData,minNumSamples=7,stdAway = 3):
H = np.zeros((eegData.shape[0], eegData.shape[2]))
for chan in range(H.shape[0]):
for epoch in range(H.shape[1]):
mean = np.mean(eegData[chan, :, epoch])
std = np.std(eegData[chan,:,epoch])
H[chan,epoch] = len(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0])
return H
##########
# Delta Burst after spike
def burstAfterSpike(eegData,eegData_subband,minNumSamples=7,stdAway = 3):
H = np.zeros((eegData.shape[0], eegData.shape[2]))
for chan in range(H.shape[0]):
for epoch in range(H.shape[1]):
preBurst = 0
postBurst = 0
mean = np.mean(eegData[chan, :, epoch])
std = np.std(eegData[chan,:,epoch])
idxList = signal.find_peaks(abs(eegData[chan,:,epoch]-mean), stdAway*std,epoch,width=minNumSamples)[0]
for idx in idxList:
preBurst += np.mean(eegData_subband[chan,idx-7:idx-1,epoch])
postBurst += np.mean(eegData_subband[chan,idx+1:idx+7,epoch])
H[chan,epoch] = postBurst - preBurst
return H
##########
# Sharp spike
def shortSpikeNum(eegData,minNumSamples=7,stdAway = 3):
H = np.zeros((eegData.shape[0], eegData.shape[2]))
for chan in range(H.shape[0]):
for epoch in range(H.shape[1]):
mean = np.mean(eegData[chan, :, epoch])
std = np.std(eegData[chan,:,epoch])
longSpikes = set(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0])
shortSpikes = set(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=1)[0])
H[chan,epoch] = len(shortSpikes.difference(longSpikes))
return H
##########
# Number of Bursts
def numBursts(eegData,fs):
bursts = []
supressions = []
for epoch in range(eegData.shape[2]):
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49)
bursts.append(epochBurst)
supressions.append(epochSupressions)
# Number of Bursts
numBursts_res = np.zeros((eegData.shape[0], eegData.shape[2]))
for chan in range(numBursts_res.shape[0]):
for epoch in range(numBursts_res.shape[1]):
numBursts_res[chan,epoch] = len(bursts[epoch][chan])
return numBursts_res
##########
# Burst length μ and σ
def burstLengthStats(eegData,fs):
bursts = []
supressions = []
for epoch in range(eegData.shape[2]):
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49)
bursts.append(epochBurst)
supressions.append(epochSupressions)
# Number of Bursts
burstMean_res = np.zeros((eegData.shape[0], eegData.shape[2]))
burstStd_res = np.zeros((eegData.shape[0], eegData.shape[2]))
for chan in range(burstMean_res.shape[0]):
for epoch in range(burstMean_res.shape[1]):
burstMean_res[chan,epoch] = np.mean([burst[1]-burst[0] for burst in bursts[epoch][chan]])
burstStd_res[chan,epoch] = np.std([burst[1]-burst[0] for burst in bursts[epoch][chan]])
burstMean_res = np.nan_to_num(burstMean_res)
burstStd_res = np.nan_to_num(burstStd_res)
return burstMean_res,burstStd_res
##########
# Burst band powers (δ, α, θ, β, γ)
def burstBandPowers(eegData, lowcut, highcut, fs, order=7):
band_burst_powers = np.zeros((eegData.shape[0], eegData.shape[2]))
bursts = []
supressions = []
for epoch in range(eegData.shape[2]):
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49)
bursts.append(epochBurst)
supressions.append(epochSupressions)
eegData_band = filt_data(eegData, lowcut, highcut, fs, order=7)
for epoch,epochBursts in enumerate(bursts):
for chan,chanBursts in enumerate(epochBursts):
epochPowers = []
for burst in chanBursts:
if burst[1] == eegData.shape[1]:
burstData = eegData_band[:,burst[0]:,epoch]
else:
burstData = eegData_band[:,burst[0]:burst[1],epoch]
freqs, powers = signal.periodogram(burstData, fs, axis=1)
epochPowers.append(np.mean(powers,axis=1))
band_burst_powers[chan,epoch] = np.mean(epochPowers)
return band_burst_powers
##########
# Number of Suppressions
def numSuppressions(eegData,fs,suppression_threshold=10):
bursts = []
supressions = []
for epoch in range(eegData.shape[2]):
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=suppression_threshold)#,low=30,high=49)
bursts.append(epochBurst)
supressions.append(epochSupressions)
numSupprs_res = np.zeros((eegData.shape[0], eegData.shape[2]))
for chan in range(numSupprs_res.shape[0]):
for epoch in range(numSupprs_res.shape[1]):
numSupprs_res[chan,epoch] = len(supressions[epoch][chan])
return numSupprs_res
##########
# Suppression length μ and σ
def suppressionLengthStats(eegData,fs,suppression_threshold=10):
bursts = []
supressions = []
for epoch in range(eegData.shape[2]):
epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=suppression_threshold)#,low=30,high=49)
bursts.append(epochBurst)
supressions.append(epochSupressions)
supressionMean_res = np.zeros((eegData.shape[0], eegData.shape[2]))
supressionStd_res = np.zeros((eegData.shape[0], eegData.shape[2]))
for chan in range(supressionMean_res.shape[0]):
for epoch in range(supressionMean_res.shape[1]):
supressionMean_res[chan,epoch] = np.mean([suppr[1]-suppr[0] for suppr in supressions[epoch][chan]])
supressionStd_res[chan,epoch] = np.std([suppr[1]-suppr[0] for suppr in supressions[epoch][chan]])
supressionMean_res = np.nan_to_num(supressionMean_res)
supressionStd_res = np.nan_to_num(supressionStd_res)
return supressionMean_res, supressionStd_res
################################################
# Connectivity features
################################################
##########
# Coherence - δ
def coherence(eegData,fs):
coh_res = []
for ii, jj in itertools.combinations(range(eegData.shape[0]), 2):
coh_res.append(CoherenceDelta(eegData, ii, jj, fs=fs))
coh_res = np.array(coh_res)
return coh_res
##########
# Mutual information
def calculate2Chan_MI(eegData,ii,jj,bin_min=-200, bin_max=200, binWidth=2):
H = np.zeros(eegData.shape[2])
bins = np.arange(bin_min+1, bin_max, binWidth)
for epoch in range(eegData.shape[2]):
c_xy = np.histogram2d(eegData[ii,:,epoch],eegData[jj,:,epoch],bins)[0]
H[epoch] = mutual_info_score(None, None, contingency=c_xy)
return H
##########
# Granger causality
def calcGrangerCausality(eegData,ii,jj):
H = np.zeros(eegData.shape[2])
for epoch in range(eegData.shape[2]):
X = np.vstack([eegData[ii,:,epoch],eegData[jj,:,epoch]]).T
H[epoch] = tsa.stattools.grangercausalitytests(X, 1, addconst=True, verbose=False)[1][0]['ssr_ftest'][0]
return H
##########
# phase Lag Index
def phaseLagIndex(eegData, i, j):
hxi = ss.hilbert(eegData[i,:,:])
hxj = ss.hilbert(eegData[j,:,:])
# calculating the INSTANTANEOUS PHASE
inst_phasei = np.arctan(np.angle(hxi))
inst_phasej = np.arctan(np.angle(hxj))
out = np.abs(np.mean(np.sign(inst_phasej - inst_phasei), axis=0))
return out
##########
# Cross-correlation Magnitude
def crossCorrMag(eegData,ii,jj):
crossCorr_res = []
for ii, jj in itertools.combinations(range(eegData.shape[0]), 2):
crossCorr_res.append(crossCorrelation(eegData, ii, jj))
crossCorr_res = np.array(crossCorr_res)
return crossCorr_res
##########
# Cross-correlation Lag
def corrCorrLag(eegData,ii,jj,fs=100):
crossCorrLag_res = []
for ii, jj in itertools.combinations(range(eegData.shape[0]), 2):
crossCorrLag_res.append(corrCorrLag(eegData, ii, jj, fs))
crossCorrLag_res = np.array(crossCorrLag_res)
return crossCorrLag_res
+459
Ver Arquivo
@@ -0,0 +1,459 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
import ImportUtils
import math
import EEGExtract as eeg
from sklearn.model_selection import train_test_split
import os
import glob
from scipy import io,signal
import numpy as np
import pandas as pd
from sklearn import preprocessing
import pickle
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import os
# In[ ]:
def select_channels(data,channels):
# parameters:-
# data - channelwise EEG preprocessed data
# channels - list of required channels
# returns:-
# ans - the selected channels from the entire dataset
ans = np.empty((data.shape[0],len(channels),data.shape[2]))
for sub in range(data.shape[0]):
ans[sub,:,:] = np.array([data[sub,x,:] for x in channels])
return ans
# In[ ]:
def epoch_data(X, Y, Z, window, stride, sfreq):
# Function to epoch the data
# parameters:-
# X - The EEG data input passed as trial*channel*timepoints
# Y - VALD (depending on the dataset) as given by the user
# Z - Participant number and session number
# window - length of required window in seconds
# stride - stride of the required sliding window in seconds
# sfreq - sampling frequency of the obtained EEG data
# retruns:-
# X_new - Epoched X
# Y_new - Epoched Y (All the segments for a given trial shall have the same value, i.e the one given by the subject)
# Z_new - Epoched Z (All the segments for a given trial shall have the same value, i.e the one of the subject)
trials,channels,timepoints = X.shape
segment = int(window*sfreq)
step = int(stride*sfreq)
epochPerTrial = int((timepoints-segment)/step + 1)
X_new = np.empty((trials*epochPerTrial,channels,segment))
Y_new = np.empty((trials*epochPerTrial,Y.shape[1]))
Z_new = np.empty((trials*epochPerTrial,Z.shape[1]))
count=0
for trial in range(trials):
for epoch in range(epochPerTrial):
X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
Y_new[count,:] = Y[trial,:]
Z_new[count,:] = Z[trial,:]
count = count+1
return X_new, Y_new, Z_new
# In[ ]:
def save_features(dataset, ans, Y_epoch, sfreq, window, stride):
# A function to generate the features and save them
# parameters:-
# dataset - name of the dataset
# ans - epoched segment of X
# Y_epoch - epoched segments of the valence and arousal scores taken from the subject
# window - window length in seconds
# stride - stride length in seconds
# sfreq - sampli5ng frequency of the EEG signal
# returns:-
# void
fs = sfreq
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
feature_matrix = eeg.shannonEntropy(ans, bin_min=-200, bin_max=200, binWidth=2)
np.savez((featurepath+"shannonEntropy_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.eegStd(ans)
stdshape = feature_matrix.shape
# The channels
emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
left_channels = ['AF3', 'F7','F3', 'FC5', 'T7', 'P7', 'O1']
right_channels = ['AF4','F8','F4','FC6','T8','P8','O2']
dasm_gamma = np.empty((0,stdshape[1]))
rasm_gamma = np.empty((0,stdshape[1]))
for lc,rc in zip(left_channels, right_channels):
lci = emotiv_channels.index(lc)
rci = emotiv_channels.index(rc)
#left differential entropy
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0),30,45,fs)))))
#right differential entropy
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0),30,45,fs)))))
dasm_gamma = np.append(dasm_gamma, np.subtract(dl,dr), axis=0)
rasm_gamma = np.append(rasm_gamma, np.divide(dl,dr), axis=0)
np.savez((featurepath+"dasm_gamma_{}_{}.npz").format(window,stride),features = dasm_gamma , Y = Y_epoch)
np.savez((featurepath+"rasm_gamma_{}_{}.npz").format(window,stride),features = rasm_gamma , Y = Y_epoch)
del dasm_gamma, rasm_gamma
return
'''
Subband Information Quantity
'''
# delta (0.5–4 Hz)
eegData_delta = eeg.filt_data(ans, 0.5, 4, fs)
feature_matrix = eeg.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2)
np.savez((featurepath+"ShannonRes_sub_bands_delta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
eegData_theta = eeg.filt_data(ans, 4, 8, fs)
feature_matrix = eeg.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2)
np.savez((featurepath+"ShannonRes_sub_bands_theta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
eegData_alpha = eeg.filt_data(ans, 8, 12, fs)
feature_matrix = eeg.shannonEntropy(eegData_alpha, bin_min=-200, bin_max=200, binWidth=2)
np.savez((featurepath+"ShannonRes_sub_bands_alpha_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
eegData_beta = eeg.filt_data(ans, 12, 30, fs)
feature_matrix = eeg.shannonEntropy(eegData_beta, bin_min=-200, bin_max=200, binWidth=2)
np.savez((featurepath+"ShannonRes_sub_bands_beta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
eegData_gamma = eeg.filt_data(ans, 30,45, fs)
feature_matrix = eeg.shannonEntropy(eegData_gamma, bin_min=-200, bin_max=200, binWidth=2)
np.savez((featurepath+"ShannonRes_sub_bands_gamma_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
HjorthMob, HjorthComp = eeg.hjorthParameters(ans)
feature_matrix = HjorthComp
np.savez((featurepath+"Hjorth_complexity_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = HjorthMob
np.savez((featurepath+"Hjorth_mobilty_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.falseNearestNeighbor(ans)
np.savez((featurepath+"falseNearestNeighbor_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.medianFreq(ans,fs)
np.savez((featurepath+"medianFreq_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.bandPower(ans, 0.5, 4, fs)
np.savez((featurepath+"bandPwr_delta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.bandPower(ans, 4, 8, fs)
np.savez((featurepath+"bandPwr_theta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.bandPower(ans, 8, 12, fs)
np.savez((featurepath+"bandPwr_alpha_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.bandPower(ans, 12, 30, fs)
np.savez((featurepath+"bandPwr_beta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.bandPower(ans, 30, 45, fs)
np.savez((featurepath+"bandPwr_gamma_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.eegStd(ans)
stdshape = feature_matrix.shape
np.savez((featurepath+"stdDev_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.diffuseSlowing(ans)
np.savez((featurepath+"diffuseSlowing_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
minNumSamples = int(70*fs/1000)
feature_matrix = eeg.spikeNum(ans,minNumSamples)
np.savez((featurepath+"spikeNum_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.burstAfterSpike(ans,eegData_delta,minNumSamples=7,stdAway = 3)
np.savez((featurepath+"deltaBurstAfterSpike_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.shortSpikeNum(ans,minNumSamples)
np.savez((featurepath+"shortSpikeNum_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.numBursts(ans,fs)
np.savez((featurepath+"numBursts_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(ans,fs)
feature_matrix = burstLenMean_res
np.savez((featurepath+"burstLen_u_and_sigma_mean_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = burstLenStd_res
np.savez((featurepath+"burstLen_u_and_sigma_std_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
feature_matrix = eeg.numSuppressions(ans,fs)
np.savez((featurepath+"numSuppressions_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(ans,fs)
feature_matrix = suppLenMean_res
np.savez((featurepath+"suppressionLen_u_and_sigma_mean_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
del suppLenMean_res
feature_matrix = suppLenStd_res
np.savez((featurepath+"suppressionLen_u_and_sigma_std_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
del suppLenStd_res
# DASM and RASM Features
# DASM = h(X lefti) h(Xrighti), and (2)
# RASM = h(Xlefti)/h(Xrighti),
emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
left_channels = ['AF3', 'F7','F3', 'FC5', 'T7', 'P7', 'O1']
right_channels = ['AF4','F8','F4','FC6','T8','P8','O2']
#[chans x ms x epochs]
dasm_delta = np.empty((0,stdshape[1]))
rasm_delta = np.empty((0,stdshape[1]))
for lc,rc in zip(left_channels, right_channels):
lci = emotiv_channels.index(lc)
rci = emotiv_channels.index(rc)
#left differential entropy
inputarr = np.expand_dims(ans[lci,:,:], axis=0)
print("inputarr.shape=", inputarr.shape)
temp = eeg.filt_data(inputarr, 0.5, 4, fs)
tempstd = eeg.eegStd(temp)
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 0.5, 4, fs)))))
#right differential entropy
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 0.5, 4, fs)))))
print("temp.shape=", temp.shape,"tempstd.shape=", tempstd.shape,"dl.shape= ", dl.shape, "stdshape=", stdshape)
dasm_delta = np.append(dasm_delta, np.subtract(dl,dr), axis=0)
rasm_delta = np.append(rasm_delta, np.divide(dl,dr), axis=0)
np.savez((featurepath+"dasm_delta_{}_{}.npz").format(window,stride),features = dasm_delta , Y = Y_epoch)
np.savez((featurepath+"rasm_delta_{}_{}.npz").format(window,stride),features = rasm_delta , Y = Y_epoch)
del dasm_delta, rasm_delta
dasm_theta = np.empty((0,stdshape[1]))
rasm_theta = np.empty((0,stdshape[1]))
for lc,rc in zip(left_channels, right_channels):
lci = emotiv_channels.index(lc)
rci = emotiv_channels.index(rc)
#left differential entropy
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 4, 8, fs)))))
#right differential entropy
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 4, 8, fs)))))
dasm_theta = np.append(dasm_theta, np.subtract(dl,dr), axis=0)
rasm_theta = np.append(rasm_theta, np.divide(dl,dr), axis=0)
np.savez((featurepath+"dasm_theta_{}_{}.npz").format(window,stride),features = dasm_theta , Y = Y_epoch)
np.savez((featurepath+"rasm_theta_{}_{}.npz").format(window,stride),features = rasm_theta , Y = Y_epoch)
del dasm_theta, rasm_theta
dasm_alpha = np.empty((0,stdshape[1]))
rasm_alpha = np.empty((0,stdshape[1]))
for lc,rc in zip(left_channels, right_channels):
lci = emotiv_channels.index(lc)
rci = emotiv_channels.index(rc)
#left differential entropy
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 8, 12, fs)))))
#right differential entropy
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 8, 12, fs)))))
dasm_alpha = np.append(dasm_alpha, np.subtract(dl,dr), axis=0)
rasm_alpha = np.append(rasm_alpha, np.divide(dl,dr), axis=0)
np.savez((featurepath+"dasm_alpha_{}_{}.npz").format(window,stride),features = dasm_alpha , Y = Y_epoch)
np.savez((featurepath+"rasm_alpha_{}_{}.npz").format(window,stride),features = rasm_alpha , Y = Y_epoch)
del dasm_alpha, rasm_alpha
dasm_beta = np.empty((0,stdshape[1]))
rasm_beta = np.empty((0,stdshape[1]))
for lc,rc in zip(left_channels, right_channels):
lci = emotiv_channels.index(lc)
rci = emotiv_channels.index(rc)
#left differential entropy
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 12, 30,fs)))))
#right differential entropy
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 12, 30,fs)))))
dasm_beta = np.append(dasm_beta, np.subtract(dl,dr), axis=0)
rasm_beta = np.append(rasm_beta, np.divide(dl,dr), axis=0)
np.savez((featurepath+"dasm_beta_{}_{}.npz").format(window,stride),features = dasm_beta , Y = Y_epoch)
np.savez((featurepath+"rasm_beta_{}_{}.npz").format(window,stride),features = rasm_beta , Y = Y_epoch)
del dasm_beta, rasm_beta
dasm_gamma = np.empty((0,stdshape[1]))
rasm_gamma = np.empty((0,stdshape[1]))
for lc,rc in zip(left_channels, right_channels):
lci = emotiv_channels.index(lc)
rci = emotiv_channels.index(rc)
#left differential entropy
dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0),30,45,fs)))))
#right differential entropy
dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0),30,45,fs)))))
dasm_gamma = np.append(dasm_gamma, np.subtract(dl,dr), axis=0)
rasm_gamma = np.append(rasm_gamma, np.divide(dl,dr), axis=0)
np.savez((featurepath+"dasm_gamma_{}_{}.npz").format(window,stride),features = dasm_gamma , Y = Y_epoch)
np.savez((featurepath+"rasm_gamma_{}_{}.npz").format(window,stride),features = rasm_gamma , Y = Y_epoch)
del dasm_gamma, rasm_gamma
# In[ ]:
def getEpochedFeatures(dataset, window, stride, sfreq, label):
# Function to reshape the arrays before passing on to the save_features function
# parameters:-
# dataset - name of the dataset
# window - length of window in seconds
# stride - length of stride in seconds
# sfreq - sampling frequency of the EEG signal
# label - valence/arousal (0/1)
# returns:-
# void
'''
Returns Accuracy vs Segment size plot for
window - length of window
stride - step
sfreq - sampling freq
label - 0-valence, 1-arousal, 2-dominance, 3-liking
'''
fs = sfreq
X = None
Y = None
Z = None
pwd = os.getcwd()
with np.load((pwd + '/data_extracted/{}.npz').format(dataset), allow_pickle=True) as data:
X = data['X']
Y = data['Y']
Z = data['Z']
print("Shape After Loading")
print("X.shape=", X.shape," Y.shape=",Y.shape," Z.shape=", Z.shape)
# return
#########!MODIFY FOR DREAMER AND DEAP DATASET########################################
#****
'''
Reshape Data
'''
if(dataset != "DEAP"):
temp_arr = np.empty((X.shape[0],X.shape[2],X.shape[1]))
for i in range(temp_arr.shape[0]):
temp_arr[i,:,:] = X[i,:,:].transpose()
X = copy.deepcopy(temp_arr)
del temp_arr
print("Shape after reshaping")
print("X.shape=", X.shape," Y.shape=",Y.shape," Z.shape=", Z.shape)
'''
Select Channels(if needed)
'''
print("Data Loaded...\n")
ch_names = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
index_arr = [ch_names.index(x) for x in emotiv_channels]
X_new = None
if(dataset == "DEAP"):
X_new = select_channels(X,index_arr)
else:
X_new = copy.deepcopy(X)
print("X_new.shape = ", X_new.shape)
del X
print("Channel selection done ...\n")
'''
# X = (32*40,40,8064)
# Y = (32*40,4)
# Z = (32*40,2)
# X : (nbSegments, nbChannel, nbTimepoints) : Data
# Y : (nbSegments, nbEmotions) : Valence and arousal data
# Z : (nbSegments, 2) : Participant number, and session number
'''
'''
DREAMER Dataset
# X = (23*18,7808+54032,14)
# Y = (23*18,2)
# Z = (23*18,2)
'''
(X_epoch, Y_epoch, Z_epoch) = epoch_data(X_new, Y, Z,window,stride,sfreq)
del X_new
del Y
del Z
print("Epoching done ...\n")
print(X_epoch.shape, Y_epoch.shape, Z_epoch.shape) #debug
# 1280*63,40,128
# trial, channel, segment
trials, channels, segment = X_epoch.shape
ans = np.empty((channels, segment, trials)) #[chans x ms x epochs]
for i in range(trials):
ans[:,:,i] = X_epoch[i,:,]
del X_epoch
print("ans.shape = ", ans.shape)
print("Rotation of np.array done ...\n")
pwd = os.getcwd()
filepath = pwd + '/' + dataset + "/data_extracted/epochedData/data" + str(window) + str(stride) + ".npz"
np.savez(filepath,ans,Y_epoch, Z_epoch)
# featuresDict = getFeaturesDict(ans,sfreq)
save_features(dataset, ans, Y_epoch, sfreq, window, stride)
# with open(pwd + '/' + dataset + '/data_extracted/featureDicts/'+str(window)+str(stride)+ '.pkl', 'wb') as f:
# pickle.dump(featuresDict, f, pickle.HIGHEST_PROTOCOL)
print("Feature Extraction done ...\n")
if __name__ == '__main__':
pass
+172
Ver Arquivo
@@ -0,0 +1,172 @@
#!/usr/bin/env python
# coding: utf-8
# Script to import all the required libraries.<br>
# It also defines a function to make a dictionary and load the features.
#
# In[ ]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import chi2
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.feature_selection import *
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
import sys
import csv
import os
import math
import glob
from scipy import io,signal
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt
import seaborn as sns
import copy
from sklearn import feature_selection
import argparse
import cuml
from cuml.svm import SVR
from cuml.ensemble import RandomForestRegressor
from cuml.svm import SVC
from cuml.ensemble import RandomForestClassifier
from cuml.metrics import accuracy_score
# In[ ]:
def loadFeaturesDict(dataset):
# input parameters :- The name of the dataset
# return :- Feature dictionary
featuresDict = {'shannonEntropy': None,
'ShannonRes_delta':None,
'ShannonRes_theta':None,
'ShannonRes_alpha':None,
'ShannonRes_beta':None,
'ShannonRes_gamma':None,
'HjorthComp':None,
'HjorthMob':None,
'falseNearestNeighbor':None,
'medianFreq':None,
'bandPwr_delta':None,
'bandPwr_theta':None,
'bandPwr_alpha':None,
'bandPwr_beta':None,
'bandPwr_gamma':None,
'stdDev':None,
'diffuseSlowing':None,
'spikeNum':None,
'deltaBurstAfterSpike':None,
'shortSpikeNum':None,
'numBursts':None,
'burstLenMean':None,
'burstLenStd':None,
'numSuppressions':None,
'suppLenMean':None,
'suppLenStd':None,
'dasm_delta': None,
'dasm_theta': None,
'dasm_alpha': None,
'dasm_beta': None,
'dasm_gamma': None,
'rasm_delta': None,
'rasm_theta': None,
'rasm_alpha': None,
'rasm_beta': None,
'rasm_gamma': None,
}
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
featuresDict['shannonEntropy'] = np.load(featurepath + "shannonEntropy_1_1.npz", allow_pickle=True)['features']
featuresDict['ShannonRes_delta'] = np.load(featurepath + "ShannonRes_sub_bands_delta_1_1.npz", allow_pickle=True)['features']
featuresDict['ShannonRes_theta'] = np.load(featurepath + "ShannonRes_sub_bands_theta_1_1.npz", allow_pickle=True)['features']
featuresDict['ShannonRes_alpha'] = np.load(featurepath + "ShannonRes_sub_bands_alpha_1_1.npz", allow_pickle=True)['features']
featuresDict['ShannonRes_beta'] = np.load(featurepath + "ShannonRes_sub_bands_beta_1_1.npz", allow_pickle=True)['features']
featuresDict['ShannonRes_gamma'] = np.load(featurepath + "ShannonRes_sub_bands_gamma_1_1.npz", allow_pickle=True)['features']
featuresDict['HjorthComp'] = np.load(featurepath + "Hjorth_complexity_1_1.npz", allow_pickle=True)['features']
featuresDict['HjorthMob'] = np.load(featurepath + "Hjorth_mobilty_1_1.npz",allow_pickle=True)['features']
featuresDict['falseNearestNeighbor'] = np.load(featurepath + "falseNearestNeighbor_1_1.npz",allow_pickle=True)['features']
featuresDict['medianFreq'] = np.load(featurepath + "medianFreq_1_1.npz",allow_pickle=True)['features']
featuresDict['bandPwr_delta']=np.load(featurepath+"bandPwr_delta_1_1.npz", allow_pickle = True)['features']
featuresDict['bandPwr_theta']=np.load(featurepath + "bandPwr_theta_1_1.npz", allow_pickle = True)['features']
featuresDict['bandPwr_alpha']=np.load(featurepath + "bandPwr_alpha_1_1.npz", allow_pickle = True)['features']
featuresDict['bandPwr_beta']=np.load(featurepath + "bandPwr_beta_1_1.npz", allow_pickle = True)['features']
featuresDict['bandPwr_gamma']=np.load(featurepath + "bandPwr_gamma_1_1.npz", allow_pickle = True)['features']
featuresDict['stdDev'] = np.load(featurepath + "stdDev_1_1.npz",allow_pickle=True)['features']
featuresDict['diffuseSlowing'] = np.load(featurepath + "diffuseSlowing_1_1.npz",allow_pickle=True)['features']
featuresDict['spikeNum'] = np.load(featurepath + "spikeNum_1_1.npz",allow_pickle=True)['features']
featuresDict['deltaBurstAfterSpike'] = np.load(featurepath + "deltaBurstAfterSpike_1_1.npz",allow_pickle=True)['features']
featuresDict['shortSpikeNum'] = np.load(featurepath + "shortSpikeNum_1_1.npz", allow_pickle=True)['features']
featuresDict['numBursts'] = np.load(featurepath + "numBursts_1_1.npz",allow_pickle=True)['features']
featuresDict['burstLenMean'] = np.load(featurepath + "burstLen_u_and_sigma_mean_1_1.npz",allow_pickle=True)['features']
featuresDict['burstLenStd'] = np.load(featurepath + "burstLen_u_and_sigma_std_1_1.npz",allow_pickle=True)['features']
featuresDict['numSuppressions'] = np.load(featurepath + "numSuppressions_1_1.npz",allow_pickle=True)['features']
featuresDict['suppLenMean'] = np.load(featurepath + "suppressionLen_u_and_sigma_mean_1_1.npz",allow_pickle=True)['features']
featuresDict['suppLenStd'] = np.load(featurepath + "suppressionLen_u_and_sigma_std_1_1.npz",allow_pickle=True)['features']
featuresDict['dasm_delta'] = np.load(featurepath + "dasm_delta_1_1.npz",allow_pickle=True)['features']
featuresDict['dasm_theta'] = np.load(featurepath + "dasm_theta_1_1.npz",allow_pickle=True)['features']
featuresDict['dasm_alpha'] = np.load(featurepath + "dasm_alpha_1_1.npz",allow_pickle=True)['features']
featuresDict['dasm_beta'] = np.load(featurepath + "dasm_beta_1_1.npz",allow_pickle=True)['features']
featuresDict['dasm_gamma'] = np.load(featurepath + "dasm_gamma_1_1.npz",allow_pickle=True)['features']
featuresDict['rasm_delta'] = np.load(featurepath + "rasm_delta_1_1.npz",allow_pickle=True)['features']
featuresDict['rasm_theta'] = np.load(featurepath + "rasm_theta_1_1.npz",allow_pickle=True)['features']
featuresDict['rasm_alpha'] = np.load(featurepath + "rasm_alpha_1_1.npz",allow_pickle=True)['features']
featuresDict['rasm_beta'] = np.load(featurepath + "rasm_beta_1_1.npz",allow_pickle=True)['features']
featuresDict['rasm_gamma'] = np.load(featurepath + "rasm_gamma_1_1.npz",allow_pickle=True)['features']
return featuresDict
+735
Ver Arquivo
@@ -0,0 +1,735 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
from ImportUtils import *
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import chi2
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestRegressor as sklearnrfi
import os
import glob
from scipy import io,signal
import numpy as np
import pandas as pd
from sklearn import preprocessing
import pickle
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt
# %matplotlib inline
import seaborn as sns
import copy
def topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
'''
Ranks of features according to rmse computed by regressor passed in clf
Plots electrode v/s rmse graph
'''
# parameters :-
# dataset - name of the dataset
# window - length of the sliding window in seconds
# stride - length of the stride of the sliding window in seconds
# sfreq - sampling frequency of the EEG data
# clf - name of the classifier to be used
# label - valence/arousal/dominance/liking label (shape depends upon the dataset) in an enumerated form (0- valence ; 1-arousal ; 2- like; 3-dominance)
# scale - sclaing of the EEG data if required
# returns :-
# void
pwd = os.getcwd()
#load extracted features
#####################################################################################################################################################
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
rmseList = []
electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
fs = sfreq
pwd = os.getcwd()
featuresDict = loadFeaturesDict(dataset)
asm_features = ['dasm_delta', 'dasm_theta', 'dasm_alpha', 'dasm_beta', 'dasm_gamma', 'rasm_delta', 'rasm_theta', 'rasm_alpha', 'rasm_beta', 'rasm_gamma']
for asm in asm_features:
featuresDict.pop(asm)
common = []
with open('intersection.pkl', 'rb') as f:
common = pickle.load(f)
for k in list(featuresDict.keys()):
if k not in common:
# pop out common feature
featuresDict.pop(k)
selectFeatures = list(featuresDict.keys())
y = Y_epoch[:,label] #valence
#####################################################################################################################################################
for electrode in range(14):
# Load FeaturesDict from memory
print("Number of segments are: {}".format(ans.shape[1]))
featureMatrix = np.empty((len(selectFeatures),ans.shape[1])) #[14*32 + 1,80640]
i=0
for key,value in featuresDict.items():
featureMatrix[i,:] = value[electrode,:]
i = i+1
print(featureMatrix.T.shape)
featureMatrix = featureMatrix.astype(np.float32)
#Impute NaN values with zero
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
#Name Feature vector columns
feature_channel_index = []
for feature in selectFeatures:
feature_channel_index.append(feature + str(electrode))
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) #debug
#Preparing dataset from feature matrix
X = pd.DataFrame(featureMatrix.T)
X.columns = feature_channel_index
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
print("Features Ready for undergoing selection tests done ...\n")
# Perform train_test_split to get training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Normalise-scale data
# Feature Scaling
if(scale == True):
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Apply classfier
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
rmse = mean_squared_error(y_test, y_predict,squared=False)
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
rmseList.append(rmse)
#rank electrodes based on RMSE computed by the classifier
electrode_df = pd.DataFrame(electrodeList)
rmse_df = pd.DataFrame(rmseList)
#concat two dataframes for better visualization
electrodeRanking = pd.concat([electrode_df, rmse_df],axis=1)
electrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns
features_result = electrodeRanking.sort_values('RMSE')
print(features_result)
# return features_result
##################################################################################
N = features_result.shape[0]
topRmseList = []
topNList = ["{}".format(x) for x in range(1,N+1)]
for n in range(1,N+1):
topnelectrodes = features_result.head(n)
electrode_index = topnelectrodes.index
electrode_index = list(electrode_index)[:n]
# X-Values
featureMatrix = np.empty((len(selectFeatures)*len(electrode_index),ans.shape[1]))
i = 0
for index in electrode_index:
for key,value in featuresDict.items():
featureMatrix[i,:] = value[index,:]
i = i+1
featureMatrix = featureMatrix.astype(np.float32)
print(featureMatrix.T.shape)
# Removing NaN Values
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
# Name Feature vector columns
feature_channel_index = []
for index in electrode_index:
for feature in selectFeatures:
feature_channel_index.append(feature + str(index))
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
X = pd.DataFrame(featureMatrix.T)
X.columns = feature_channel_index
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
print("Features Ready for undergoing selection tests done ...\n")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Normalise-scale data
# Feature Scaling
if(scale == True):
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Apply classfier
search_method = "tpot"
best_clf = None
if(search_method == "bayes_sk_opt"):
# BayesCV scikit opt
search_space = {"bootstrap": Categorical([True, False]), # values for boostrap can be either True or False
"max_depth": Integer(6, 20), # values of max_depth are integers from 6 to 20
"max_features": Categorical(['auto', 'sqrt','log2']),
"min_samples_leaf": Integer(2, 10),
"min_samples_split": Integer(2, 10),
"n_estimators": Integer(100, 500)
}
forest_bayes_search = BayesSearchCV(clf, search_space, n_iter=32, cv=5)
print(forest_bayes_search)
print(forest_bayes_search.fit(X_train, y_train))
print("Best Parameters are: ", forest_bayes_search.best_params_)
best_clf = forest_bayes_search.best_estimator_
elif(search_method =="random_grid_search"):
print("Random Search followed by GridSearch initiated!\n");
#RandomSearchCV followed by GridSearchCV
random_grid = {'n_estimators': [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)],
'max_features': ['auto', 'sqrt','log2'],
'max_depth': [int(x) for x in np.linspace(10, 1000,10)],
'min_samples_split': [2, 5, 10,14],
'min_samples_leaf': [1, 2, 4,6,8],
}
rf_randomcv=RandomizedSearchCV(estimator=clf,param_distributions=random_grid,n_iter=100,cv=5,verbose=2,random_state=100)
print(rf_randomcv.fit(X_train, y_train))
print("Best Parameters for RandomSearchCV are: ", rf_randomcv.best_params_)
print("RMSE with RandomSearchCV is :",mean_squared_error(y_test, rf_randomcv.best_estimator_.predict(X_test),squared=False));
param_grid = {
'max_depth': [rf_randomcv.best_params_['max_depth']],
'max_features': [rf_randomcv.best_params_['max_features']],
'min_samples_leaf': [rf_randomcv.best_params_['min_samples_leaf'],
rf_randomcv.best_params_['min_samples_leaf']+2,
rf_randomcv.best_params_['min_samples_leaf'] + 4],
'min_samples_split': [rf_randomcv.best_params_['min_samples_split'] - 2,
rf_randomcv.best_params_['min_samples_split'] - 1,
rf_randomcv.best_params_['min_samples_split'],
rf_randomcv.best_params_['min_samples_split'] +1,
rf_randomcv.best_params_['min_samples_split'] + 2],
'n_estimators': [rf_randomcv.best_params_['n_estimators'] - 200, rf_randomcv.best_params_['n_estimators'] - 100,
rf_randomcv.best_params_['n_estimators'],
rf_randomcv.best_params_['n_estimators'] + 100, rf_randomcv.best_params_['n_estimators'] + 200]
}
grid_search=GridSearchCV(estimator=rf,param_grid=param_grid,cv=10, verbose=5)
grid_search.fit(X_train,y_train)
best_clf = rf_randomcv.best_estimator_
elif search_method =="manual_search":
min_rmse = 1000
best_clf = clf
min_params = None
# 2*3*3*3*3
param_grid = {'n_estimators': [50, 100],
'max_features': ['auto'],
'max_depth': [2, 10, 100],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 8],
}
param_grid = ParameterGrid(param_grid)
for params in param_grid:
print("Current Parameters : ", params)
temp_clf = RandomForestRegressor( max_features = params['max_features'], min_samples_leaf = params['min_samples_leaf'], min_samples_split = params['min_samples_split'], n_estimators = params['n_estimators'],max_depth = params['max_depth']);
temp_clf.fit(X_train,y_train)
y_predict = temp_clf.predict(X_test)
rmse = mean_squared_error(y_test, y_predict,squared=False)
print("Current RMSE with above params : ", rmse)
if(min_rmse > rmse):
min_rmse = rmse;
best_clf = temp_clf;
min_params = params;
print("Best Params for parameter search are : \n", min_params)
print("window: {}, stide: {}, rmse: {}".format(window,stride,min_rmse))
topRmseList.append(min_rmse)
elif search_method == "tpot":
from tpot import TPOTRegressor;
# TPOT setup
GENERATIONS = 5
POP_SIZE = 100
CV = 5
SEED = 42
tpot = TPOTRegressor(
generations=GENERATIONS,
population_size=POP_SIZE,
random_state=SEED,
config_dict="TPOT cuML",
n_jobs=1, # cuML requires n_jobs=1
cv=CV,
verbosity=2,
)
tpot.fit(X_train, y_train)
y_predict = tpot.predict(X_test)
rmse = mean_squared_error(y_test, y_predict,squared=False)
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
topRmseList.append(rmse)
else:
best_clf = clf
best_clf.fit(X_train,y_train)
if search_method != "manual_search" and search_method != "tpot":
y_predict = best_clf.predict(X_test)
rmse = mean_squared_error(y_test, y_predict,squared=False)
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
topRmseList.append(rmse)
topNElectrode_df = pd.DataFrame(topNList)
topNRmse_df = pd.DataFrame(topRmseList)
#concat two dataframes for better visualization
topNElectrodeRanking = pd.concat([topNElectrode_df, topNRmse_df],axis=1)
topNElectrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns
print(topNElectrodeRanking)
# Plotting
fig = plt.gcf()
fig.set_size_inches(20, 10)
plt.rcParams.update({'font.size': 30})
plt.xlabel('Top N Electrodes')
plt.ylabel('RMSE')
plt.plot(topNElectrodeRanking.loc[:,"Electrode"], topNElectrodeRanking.loc[:,"RMSE"])
plt.tight_layout()
# In[ ]:
def topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
'''
Ranks of features according to rmse computed by regressor passed in clf
Plots electrode v/s rmse graph
'''
# parameters :-
# dataset - name of the dataset
# window - length of the sliding window in seconds
# stride - length of the stride of the sliding window in seconds
# sfreq - sampling frequency of the EEG data
# clf - name of the classifier to be used
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
# scale - sclaing of the EEG data if required
# returns :-
# void
fs = sfreq
pwd = os.getcwd()
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
print("Number of segments are: {}".format(ans.shape[1]))
featuresDict = None
featuresDict = loadFeaturesDict(dataset)
common = []
with open('intersection.pkl', 'rb') as f:
common = pickle.load(f)
for k in list(featuresDict.keys()):
if k not in common:
# pop out common feature
featuresDict.pop(k)
featuresList = list(featuresDict.keys())
y = Y_epoch[:,label] #valence
rmseList = []
####################################################################
#modify featuresList
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
for key,value in featuresDict.items():
featureMatrix = np.append(featureMatrix,value,axis=0)
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
featureMatrix = featureMatrix.astype('float64')
feature_channel_index = []
for feature in featuresList:
for i in range(featuresDict[feature].shape[0]):
if(i>=10):
feature_channel_index.append(feature + str(i))
else:
feature_channel_index.append(feature + '0' + str(i))
print(len(list(featuresDict.keys())))
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
X = pd.DataFrame(featureMatrix.T)
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
X.columns = feature_channel_index
#Remove Variance = 0 features
constant_filter = VarianceThreshold(threshold=0)
constant_filter.fit(X)
constant_columns = [column for column in X.columns
if column not in
X.columns[constant_filter.get_support()]]
X = constant_filter.transform(X)
for column in constant_columns:
feature_channel_index.remove(column)
print(len(feature_channel_index),feature_channel_index )
X = pd.DataFrame(X)
X.columns = feature_channel_index
filtered_featuresList = []
print(type(X))
for col in X.columns:
feature = col[:-2]
electrode = int(col[-2:])
if(feature not in filtered_featuresList):
filtered_featuresList.append(feature)
featuresList = filtered_featuresList
for feature in featuresList:
# Load FeaturesDict from memory
featureMatrix = featuresDict[feature]
featureMatrix = featureMatrix.astype(np.float32)
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
feature_channel_index = []
for i in range(featuresDict[feature].shape[0]):
feature_channel_index.append(feature + str(i))
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
X = pd.DataFrame(featureMatrix.T)
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
X.columns = feature_channel_index
print("Features Ready for undergoing selection tests done ...\n")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Normalise-scale data
# Feature Scaling
if(scale == True):
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Apply classfier
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
rmse = mean_squared_error(y_test, y_predict,squared=False)
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
rmseList.append(rmse)
features_df = pd.DataFrame(featuresList)
rmse_df = pd.DataFrame(rmseList)
#concat two dataframes for better visualization
featureRanking = pd.concat([features_df, rmse_df],axis=1)
featureRanking.columns = ['Feature','RMSE'] #naming the dataframe columns
features_result = featureRanking.sort_values('RMSE')
features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonFeaturesRegressionRanking" + str(window) + str(stride) + ".csv")
print(features_result)
###########################################
N = features_result.shape[0]
topNRmseList = []
topNList = ["{}".format(x) for x in range(1,N+1)]
for n in range(1,N+1):
topnfeatures = copy.deepcopy(features_result.head(n))
topnfeatures = topnfeatures['Feature'].tolist() #list of feature-names
# X-Values################################################
featureMatrix = np.empty((0,ans.shape[1]))
for feature in topnfeatures:
featureMatrix = np.append(featureMatrix, featuresDict[feature], axis=0)
featureMatrix = featureMatrix.astype(np.float32)
print(featureMatrix.T.shape)
feature_channel_index = []
for feature in topnfeatures:
i=0
for i in range(featuresDict[feature].shape[0]):
feature_channel_index.append(feature + str(i))
# Removing NaN Values
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
X = pd.DataFrame(featureMatrix.T)
X.columns = feature_channel_index
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
print("Features Ready for undergoing selection tests done ...\n")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Normalise-scale data
# Feature Scaling
if(scale == True):
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
rmse = mean_squared_error(y_test, y_predict,squared=False)
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
topNRmseList.append(rmse)
topNFeatures_df = pd.DataFrame(topNList)
topNRmse_df = pd.DataFrame(topNRmseList)
#concat two dataframes for better visualization
topNFeaturesRanking = pd.concat([topNFeatures_df, topNRmse_df],axis=1)
topNFeaturesRanking.columns = ['Feature','RMSE'] #naming the dataframe columns
print(topNFeaturesRanking)
topNFeaturesRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonFeaturesRegressionRanking" + str(window) + str(stride) + ".csv")
# Plotting
fig = plt.gcf()
fig.set_size_inches(25, 10)
plt.rcParams.update({'font.size': 30})
plt.xlabel('Top N Features')
plt.ylabel('RMSE')
plt.plot(topNFeaturesRanking.loc[:,"Feature"], topNFeaturesRanking.loc[:,"RMSE"])
plt.tight_layout()
# In[ ]:
def topFeatureColumnsRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
# parameters :-
# dataset - name of the dataset
# window - length of the sliding window in seconds
# stride - length of the stride of the sliding window in seconds
# sfreq - sampling frequency of the EEG data
# clf - name of the classifier to be used
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
# scale - sclaing of the EEG data if required
# returns :-
# void
fs = sfreq
pwd = os.getcwd()
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
print("Number of segments are: {}".format(ans.shape[1]))
#X##############################################################################################
featuresDict = None
featuresDict = loadFeaturesDict(dataset)
common = []
with open('intersection.pkl', 'rb') as f:
common = pickle.load(f)
for k in list(featuresDict.keys()):
if k not in common:
# pop out common feature
featuresDict.pop(k)
featuresList = list(featuresDict.keys())
# defining column names
feature_channel_index = []
for feature in featuresList:
for i in range(featuresDict[feature].shape[0]):
feature_channel_index.append(feature + str(i))
#defining feature matrix
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
for key,value in featuresDict.items():
featureMatrix = np.append(featureMatrix,value,axis=0)
print("Shape of FeatureMatrix: {}\n".format(featureMatrix.T.shape))
#data-imputation and nan-removal
featureMatrix = featureMatrix.astype(np.float32)
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
X = pd.DataFrame(featureMatrix.T)
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
X.columns = feature_channel_index
#Y#####################################################################
y = Y_epoch[:,label] #valence
########################################################################
rmseList = []
for col in feature_channel_index:
input_df = pd.DataFrame(X[col])
X_train, X_test, y_train, y_test = train_test_split(input_df, y, test_size=0.2, random_state=42)
# Normalise-scale data
# Feature Scaling
if(scale == True):
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Apply classfier
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
rmse = mean_squared_error(y_test, y_predict, squared=False)
rmseList.append(rmse)
col_df = pd.DataFrame(feature_channel_index)
rmse_df = pd.DataFrame(rmseList)
#concat two dataframes for better visualization
colRanking = pd.concat([col_df, rmse_df],axis=1)
colRanking.columns = ['Column','RMSE'] #naming the dataframe columns
features_result = colRanking.sort_values('RMSE')
print(features_result)
N = len(feature_channel_index)
topNRmseList = []
topNList = ["{}".format(x) for x in range(1,N+1)]
for n in range(1, N+1):
ranking_df = features_result.head(n)
topncols = ranking_df['Column'].tolist()
X_train, X_test, y_train, y_test = train_test_split(X[topncols], y, test_size=0.2, random_state=42)
# Normalise-scale data
# Feature Scaling
if(scale == True):
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Apply classfier
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
rmse = mean_squared_error(y_test, y_predict, squared=False)
topNRmseList.append(rmse)
topcol_df = pd.DataFrame(topNList)
toprmse_df = pd.DataFrame(topNRmseList)
#concat two dataframes for better visualization
topcolRanking = pd.concat([topcol_df, toprmse_df],axis=1)
topcolRanking.columns = ['Column','RMSE'] #naming the dataframe columns
topfeatures_result = topcolRanking
print(topfeatures_result)
topfeatures_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "ColumnsRegressionRanking" + str(window) + str(stride) + ".csv")
# Plotting
fig = plt.gcf()
fig.set_size_inches(60, 9)
plt.xlabel('Top N Columns')
plt.ylabel('RMSE')
plt.title("Top N Columns v/s RMSE Plot for Window:{} Stride:{} epoched data by varying N".format(window,stride))
plt.plot(topfeatures_result.loc[:,"Column"], topfeatures_result.loc[:,"RMSE"])
plt.tight_layout()
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "topFeatureColumnsRegressionRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
+648
Ver Arquivo
@@ -0,0 +1,648 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
from ImportUtils import *
from sklearn.ensemble import RandomForestRegressor as sklearnrfi
from sklearn.feature_selection import VarianceThreshold
# In[ ]:
def topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
'''
Ranks of features according to rmse computed by F score based regression
Plots electrode v/s rmse graph
'''
# parameters :-
# dataset - name of the dataset
# window - length of the sliding window in seconds
# stride - length of the stride of the sliding window in seconds
# sfreq - sampling frequency of the EEG data
# clf - name of the classifier to be used
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
# scale - sclaing of the EEG data if required
# mutual_info - Mutual ranking between features based on information theory
# method - 'RandomForest' 'RFE' 'SelectKBest'
# returns :-
# void
pwd = os.getcwd()
fs = sfreq
electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
print("Number of segments are: {}".format(ans.shape[1]))
featuresDict = None
featuresDict = loadFeaturesDict(dataset)
asm_features = ['dasm_delta', 'dasm_theta', 'dasm_alpha', 'dasm_beta', 'dasm_gamma', 'rasm_delta', 'rasm_theta', 'rasm_alpha', 'rasm_beta', 'rasm_gamma']
for asm in asm_features:
featuresDict.pop(asm)
common = []
with open('intersection.pkl', 'rb') as f:
common = pickle.load(f)
for k in list(featuresDict.keys()):
if k not in common:
# pop out common feature
featuresDict.pop(k)
featuresList = list(featuresDict.keys())
print(featuresList)
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
for key,value in featuresDict.items():
featureMatrix = np.append(featureMatrix,value,axis=0)
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
featureMatrix = featureMatrix.astype('float64')
feature_channel_index = []
for feature in featuresList:
for i in range(featuresDict[feature].shape[0]):
if(i>=10):
feature_channel_index.append(feature + str(i))
else:
feature_channel_index.append(feature + '0' + str(i))
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
X = pd.DataFrame(featureMatrix.T)
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
X.columns = feature_channel_index
#################################################################
y = copy.deepcopy(Y_epoch[:,label]) #valence
print("y.shape: ", y.shape)
dfscores = None
if(method == 'RandomForest'):
'''Random Forest Feature Importances'''
# estimator = RandomForestRegressor()
estimator = sklearnrfi()
fit = estimator.fit(X,y)
dfscores = pd.DataFrame(fit.feature_importances_)
elif(method == 'RFE'):
''' RFE'''
selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
selector = selector.fit(X, y)
dfscores = pd.DataFrame(selector.ranking_)
elif(method == 'SelectKBest'):
"""SelecKBest"""
#apply SelectKBest class to extract top 10 best features
func = None
if mutual_info == False:
func = f_classif
else:
func = mutual_info_classif
bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
fit = bestfeatures.fit(X,y)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X.columns)
#concat two dataframes for better visualization
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
features_result = featureScores.nlargest(X.shape[1],'Score')
print(features_result)
features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonElectrodeFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
###################################################################
topcolumns = features_result['Specs'].values
topfeatures = []
topelectrodes = []
for col in topcolumns:
feature = col[:-2]
electrode = int(col[-2:])
if(feature not in topfeatures):
topfeatures.append(feature)
if(electrode not in topelectrodes):
topelectrodes.append(electrode)
##################################################################################
N = len(topelectrodes)
topRmseList = []
topNList = ["{}".format(x) for x in range(1,N+1)]
for n in range(1,N+1):
electrode_index = topelectrodes[:n]
print(topelectrodes)
print(electrode_index)
# X-Values
featureMatrix = np.empty((len(featuresList)*len(electrode_index),ans.shape[1]))
i = 0
for index in electrode_index:
for key,value in featuresDict.items():
featureMatrix[i,:] = value[index,:]
i = i+1
# i = i+1
featureMatrix = featureMatrix.astype(np.float32)
print(featureMatrix.T.shape)
# Removing NaN Values
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
feature_channel_index = []
for index in electrode_index:
for feature in featuresList:
feature_channel_index.append(feature + str(index))
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
X = pd.DataFrame(featureMatrix.T)
X.columns = feature_channel_index
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
print("Features Ready for undergoing selection tests done ...\n")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Normalise-scale data
# Feature Scaling
if(scale == True):
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Apply classfier
# clf = xgb.XGBClassifier(verbose = 5)
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
rmse = mean_squared_error(y_test, y_predict,squared=False)
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
topRmseList.append(rmse)
# features_result = features_result.reset_index()
topNElectrode_df = pd.DataFrame(topNList)
topNRmse_df = pd.DataFrame(topRmseList)
#concat two dataframes for better visualization
topNElectrodeRanking = pd.concat([topNElectrode_df, topNRmse_df],axis=1)
topNElectrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns
print(topNElectrodeRanking)
topNElectrodeRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonElectrodeFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
# return features_result
# Plotting
fig = plt.gcf()
fig.set_size_inches(20, 10)
plt.rcParams.update({'font.size': 30})
plt.xlabel('Top N Electrodes')
plt.ylabel('RMSE')
plt.plot(topNElectrodeRanking.loc[:,"Electrode"], topNElectrodeRanking.loc[:,"RMSE"])
plt.tight_layout()
# In[ ]:
def topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
# parameters :-
# dataset - name of the dataset
# window - length of the sliding window in seconds
# stride - length of the stride of the sliding window in seconds
# sfreq - sampling frequency of the EEG data
# clf - name of the classifier to be used
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
# scale - sclaing of the EEG data if required
# mutual_info - Mutual ranking between features based on information theory
# method - 'RandomForest' 'RFE' 'SelectKBest'
# returns :-
# void
pwd = os.getcwd()
fs = sfreq
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
print("Number of segments are: {}".format(ans.shape[1]))
featuresDict = None
featuresDict = loadFeaturesDict(dataset)
common = []
with open('intersection.pkl', 'rb') as f:
common = pickle.load(f)
for k in list(featuresDict.keys()):
if k not in common:
# pop out common feature
featuresDict.pop(k)
##################################################################
# featuresToAvoid = ['volt05', 'volt10', 'volt20', 'burstBandPowers','hFD']
featuresList = list(featuresDict.keys())
print(featuresList)
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
for key,value in featuresDict.items():
featureMatrix = np.append(featureMatrix,value,axis=0)
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
featureMatrix = featureMatrix.astype('float64')
feature_channel_index = []
for feature in featuresList:
for i in range(featuresDict[feature].shape[0]):
if(i>=10):
feature_channel_index.append(feature + str(i))
else:
feature_channel_index.append(feature + '0' + str(i))
print(len(list(featuresDict.keys())))
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
X = pd.DataFrame(featureMatrix.T)
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
X.columns = feature_channel_index
#Remove Variance = 0 features
constant_filter = VarianceThreshold(threshold=0)
constant_filter.fit(X)
constant_columns = [column for column in X.columns
if column not in
X.columns[constant_filter.get_support()]]
X = constant_filter.transform(X)
for column in constant_columns:
feature_channel_index.remove(column)
print(len(feature_channel_index),feature_channel_index )
X = pd.DataFrame(X)
X.columns = feature_channel_index
#################################################################
y = copy.deepcopy(Y_epoch[:,label]) #valence
print("y.shape: ", y.shape)
dfscores = None
if(method == 'RandomForest'):
'''Random Forest Feature Importances'''
estimator = sklearnrfi() #RandomForestRegressor()
fit = estimator.fit(X,y)
dfscores = pd.DataFrame(fit.feature_importances_)
elif(method == 'RFE'):
''' RFE'''
selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
selector = selector.fit(X, y)
dfscores = pd.DataFrame(selector.ranking_)
elif(method == 'SelectKBest'):
"""SelecKBest"""
#apply SelectKBest class to extract top 10 best features
func = None
if mutual_info == False:
func = f_classif
else:
func = mutual_info_classif
bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
fit = bestfeatures.fit(X,y)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X.columns)
#concat two dataframes for better visualization
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
features_result = featureScores.nlargest(X.shape[1],'Score')
print(features_result)
features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonFeatureFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
###################################################################
topcolumns = features_result['Specs'].values
topfeatures = []
topelectrodes = []
for col in topcolumns:
feature = col[:-2]
electrode = int(col[-2:])
if(feature not in topfeatures):
topfeatures.append(feature)
if(electrode not in topelectrodes):
topelectrodes.append(electrode)
######################################################################
# TOP-N-FEATURE-RANKING
print(topfeatures)
print(topelectrodes)
N = len(topfeatures)
topNRmseList = []
topNList = ["{}".format(x) for x in range(1,N+1)]
for n in range(1,N+1):
topnfeatures = topfeatures[:n]
# X-Values################################################
featureMatrix = np.empty((0,ans.shape[1]))
for feature in topnfeatures:
featureMatrix = np.append(featureMatrix, featuresDict[feature], axis=0)
featureMatrix = featureMatrix.astype('float64')
print(featureMatrix.T.shape)
feature_channel_index = []
for feature in topnfeatures:
i=0
for i in range(featuresDict[feature].shape[0]):
feature_channel_index.append(feature + str(i))
# Removing NaN Values
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
X = pd.DataFrame(featureMatrix.T)
X.columns = feature_channel_index
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
print("Features Ready for undergoing selection tests done ...\n")
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Normalise-scale data
# Feature Scaling
if(scale == True):
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
rmse = mean_squared_error(y_test, y_predict,squared=False)
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
topNRmseList.append(rmse)
topNFeatures_df = pd.DataFrame(topNList)
topNRmse_df = pd.DataFrame(topNRmseList)
#concat two dataframes for better visualization
topNFeaturesRanking = pd.concat([topNFeatures_df, topNRmse_df],axis=1)
topNFeaturesRanking.columns = ['Feature','RMSE'] #naming the dataframe columns
print(topNFeaturesRanking)
topNFeaturesRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonFeatureFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
# Plotting
fig = plt.gcf()
fig.set_size_inches(25, 10)
plt.rcParams.update({'font.size': 30})
plt.xlabel('Top N Features')
plt.ylabel('RMSE')
plt.plot(topNFeaturesRanking.loc[:,"Feature"], topNFeaturesRanking.loc[:,"RMSE"])
plt.tight_layout()
# In[ ]:
def topFSColumnsRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
# Method C
# parameters :-
# dataset - name of the dataset
# window - length of the sliding window in seconds
# stride - length of the stride of the sliding window in seconds
# sfreq - sampling frequency of the EEG data
# clf - name of the classifier to be used
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
# scale - sclaing of the EEG data if required
# mutual_info - Mutual ranking between features based on information theory
# method - 'RandomForest' 'RFE' 'SelectKBest'
# returns :-
# void
fs = sfreq
pwd = os.getcwd()
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
print("Number of segments are: {}".format(ans.shape[1]))
#X##############################################################################################
featuresDict = None
featuresDict = loadFeaturesDict(dataset)
common = []
with open('intersection.pkl', 'rb') as f:
common = pickle.load(f)
for k in list(featuresDict.keys()):
if k not in common:
# pop out common feature
featuresDict.pop(k)
print("Number of Features:",len(list(featuresDict.keys())))
featuresList = list(featuresDict.keys())
feature_channel_index = []
feature_channel_index = []
for feature in featuresList:
for i in range(featuresDict[feature].shape[0]):
if(i>=10):
feature_channel_index.append(feature +'_'+ str(i))
else:
feature_channel_index.append(feature + '_0' + str(i))
print(len(list(featuresDict.keys())))
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
#defining feature matrix
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
for key,value in featuresDict.items():
featureMatrix = np.append(featureMatrix,value,axis=0)
print("Shape of FeatureMatrix: {}\n".format(featureMatrix.T.shape))
#data-imputation and nan-removal
featureMatrix = featureMatrix.astype(np.float32)
if np.isnan(featureMatrix).any():
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
X = pd.DataFrame(featureMatrix.T)
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(0)
X.columns = feature_channel_index
#Y#####################################################################
y = Y_epoch[:,label] #valence
# y = pd.DataFrame(y)
########################################################################
dfscores = None
if(method == 'RandomForest'):
'''Random Forest Feature Importances'''
estimator = sklearnrfi() #RandomForestRegressor()
fit = estimator.fit(X,y)
dfscores = pd.DataFrame(fit.feature_importances_)
elif(method == 'RFE'):
''' RFE'''
selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
selector = selector.fit(X, y)
dfscores = pd.DataFrame(selector.ranking_)
elif(method == 'SelectKBest'):
"""SelecKBest"""
#apply SelectKBest class to extract top 10 best features
func = None
if mutual_info == False:
func = f_classif
else:
func = mutual_info_classif
bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
fit = bestfeatures.fit(X,y)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X.columns)
#concat two dataframes for better visualization
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Column','Score'] #naming the dataframe columns
features_result = featureScores.nlargest(X.shape[1],'Score')
print(features_result)
N = len(feature_channel_index)
topNRmseList = []
topNList = ["{}".format(x) for x in range(1,N+1)]
for n in range(1, N+1):
ranking_df = features_result.head(n)
topncols = ranking_df['Column'].tolist()
input_df = pd.DataFrame(X[topncols])
X_train, X_test, y_train, y_test = train_test_split(input_df, y, test_size=0.2, random_state=42)
# Normalise-scale data
# Feature Scaling
if(scale == True):
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Apply classfier
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
rmse = mean_squared_error(y_test, y_predict, squared=False)
print(n,rmse)
topNRmseList.append(rmse)
topcol_df = pd.DataFrame(topNList)
toprmse_df = pd.DataFrame(topNRmseList)
#concat two dataframes for better visualization
topcolRanking = pd.concat([topcol_df, toprmse_df],axis=1)
topcolRanking.columns = ['Column','RMSE'] #naming the dataframe columns
topfeatures_result = topcolRanking
print(topfeatures_result)
topfeatures_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topFSColumnsRegressionRanking"+method + str(window) + str(stride) + ".csv")
# Plotting
fig = plt.gcf()
fig.set_size_inches(60, 9)
plt.xlabel('Top N Columns')
plt.ylabel('RMSE')
plt.title("Top N Columns v/s RMSE Plot for Window:{} Stride:{} epoched data by varying N".format(window,stride))
plt.plot(topfeatures_result.loc[:,"Column"], topfeatures_result.loc[:,"RMSE"])
plt.tight_layout()
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "topFSColumnsRegressionRanking"+method + str(window) + str(stride) + ".svg", bbox_inches='tight', dpi=500)
plt.show()
plt.clf()
# In[ ]:
if __name__ == '__main__':
pass
+319
Ver Arquivo
@@ -0,0 +1,319 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
get_ipython().system('git clone -l -s https://github.com/sari-saba-sadiya/EEGExtract.git cloned-repo')
get_ipython().run_line_magic('cd', 'cloned-repo')
get_ipython().system('ls')
# In[ ]:
get_ipython().system('pip install -r requirements.txt')
# In[ ]:
from google.colab import drive
drive.mount('/gdrive',force_remount=True)
# In[ ]:
get_ipython().system('pip install pyinform')
# In[ ]:
get_ipython().run_line_magic('cd', '../../gdrive/MyDrive/emotion_recognition_project')
# In[ ]:
import EEGExtract as eeg
from scipy import io,signal
import numpy as np
import pandas as pd
from sklearn import preprocessing
import pandas as pd
import pickle
import matplotlib.pyplot as plt
# In[ ]:
class load_data:
'''
Load the preprocessed data here, store the paramters
'''
def __init__(self,name):
self.name = name #name of dataset
self.X = None
self.Y = None
self.Z = None
self.freq = None #(in Hz) is same for all datasets
self.channels = None
self.ch_type = 'eeg'
self.eegData = None
self.use_autoreject = 'y'
self.no_of_subjects = None
def load_arrays(self):
if self.name == 'DREAMER':
array = np.load('original_data/DREAMER.npz')
self.freq = 128
self.no_of_subjects = 23
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
if self.name == 'DEAP':
array = np.load('original_data/DEAP.npz')
self.no_of_subjects = 32
self.freq = 128
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
if self.name == 'OASIS':
#array = np.load('original_data/OASIS.npz')
self.no_of_subjects = 15
if self.use_autoreject == 'y':
with open('preprocessed_data/oasis/with_autoreject.p','rb') as file:
self.X = pickle.load(file)
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
self.freq = 128
self.X ,self.Y= merge_dictionary(self.X)
(a,b,c) = self.X.shape
self.X = np.reshape(self.X,(a,c,b))
else:
array = np.load('preprocessed_data/oasis/without_autoreject.npz')
self.freq = 128
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
self.X = array['X']
self.Y = array['Y']
(a,b,c) = self.X.shape
self.X = np.reshape(self.X,(a,c,b))
else:
self.X = array['X']
if self.name == 'DEAP':
self.X = self.X[:,:,[1,3,2,4,7,11,13,31,29,25,21,19,20,17]] # To maintain uniformity across all datasets, only 14 electrodes selected
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
if self.name != 'OASIS':
self.Y = array['Y']
#self.Z = array['Z']
self.reshape_data()
def reshape_data(self):
'''
reshapes data in the format EEGExtract module expects i.e channels x timepoints x epochs
'''
(epochs,timepoints,channels) = self.X.shape
self.eegData = np.reshape(self.X,(channels,timepoints,epochs))
# In[ ]:
def merge_dictionary(dictionary):
'''
merge all trial data to form one array
'''
no_of_trials = len(list(dictionary.keys()))
no_of_channels = dictionary[1][0].shape[1]
length_of_segment = dictionary[1][0].shape[2]
no_of_epochs_per_trial = dictionary[1][0].shape[0]
X = np.empty((0,no_of_channels,length_of_segment))
Y = np.empty((0,2))
for trial,lst in dictionary.items():
array = dictionary[trial][0]
score = dictionary[trial][3]
X = np.append(X,array,axis = 0)
for epoch in range(no_of_epochs_per_trial):
Y = np.append(Y,np.expand_dims(score,axis =0),axis = 0)
return X,Y
# In[ ]:
def calculate_diffrential_entropy_for_bands(eegData,freq):
# Function to calculate the differential entropy for the different bands of EEG data
# parameters :-
# eegData :- The differential EEG signal value
# freq :- sampling frequency of the EEG signal
# returns :-
# bandwise DE
#delta band
delta_band = eeg.filt_data(eegData,0.5,4,freq)
#theta band
theta_band = eeg.filt_data(eegData,4,8,freq)
#alpha bad
alpha_band = eeg.filt_data(eegData,8,12,freq)
#beta band
beta_band = eeg.filt_data(eegData,12,30,freq)
#gamma band
gamma_band = eeg.filt_data(eegData,30,63,freq)
diffrential_entropy_delta = 1/2*np.log(np.var(delta_band,axis = 1)*np.pi*np.e*2)
diffrential_entropy_theta = 1/2*np.log(np.var(theta_band,axis = 1)*np.pi*np.e*2)
diffrential_entropy_alpha = 1/2*np.log(np.var(alpha_band,axis = 1)*np.pi*np.e*2)
diffrential_entropy_beta = 1/2*np.log(np.var(beta_band,axis = 1)*np.pi*np.e*2)
diffrential_entropy_gamma = 1/2*np.log(np.var(gamma_band,axis = 1)*np.pi*np.e*2)
#print(diffrential_entropy_delta.shape,diffrential_entropy_gamma.shape,diffrential_entropy_theta.shape,diffrential_entropy_alpha.shape,diffrential_entropy_beta.shape)
return diffrential_entropy_delta,diffrential_entropy_theta,diffrential_entropy_alpha,diffrential_entropy_beta,diffrential_entropy_gamma
# In[ ]:
#['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13
def calculate_RASM_DASM(band):
RASM_AF3_AF4 = np.expand_dims(band[0,:]/band[13,:],axis = 0)
RASM_F3_F4 = np.expand_dims(band[2,:]/band[11,:],axis = 0)
RASM_F7_F8 = np.expand_dims(band[1,:]/band[12,:],axis = 0)
RASM_FC5_FC6 = np.expand_dims(band[3,:]/band[10,:],axis = 0)
RASM_O1_O2 = np.expand_dims(band[6,:]/band[7,:],axis = 0)
RASM_P7_P8 = np.expand_dims(band[5,:]/band[8,:],axis=0)
RASM_T7_T8 = np.expand_dims(band[4,:]/band[9,:],axis=0)
DASM_AF3_AF4 = np.expand_dims(band[0,:]-band[13,:],axis = 0)
DASM_F3_F4 = np.expand_dims(band[2,:]-band[11,:],axis = 0)
DASM_F7_F8 = np.expand_dims(band[1,:]-band[12,:],axis = 0)
DASM_FC5_FC6 = np.expand_dims(band[3,:]-band[10,:],axis = 0)
DASM_O1_O2 = np.expand_dims(band[6,:]-band[7,:],axis = 0)
DASM_P7_P8 = np.expand_dims(band[5,:]-band[8,:],axis=0)
DASM_T7_T8 = np.expand_dims(band[4,:]-band[9,:],axis=0)
features = np.empty((0,RASM_AF3_AF4.shape[1]))
features = np.append(features,RASM_AF3_AF4,axis = 0)
features = np.append(features,RASM_F3_F4,axis = 0)
features = np.append(features,RASM_F7_F8,axis = 0)
features = np.append(features,RASM_FC5_FC6,axis = 0)
features = np.append(features,RASM_O1_O2,axis = 0)
features = np.append(features,RASM_P7_P8,axis = 0)
features = np.append(features,RASM_T7_T8,axis = 0)
features = np.append(features,DASM_AF3_AF4,axis = 0)
features = np.append(features,DASM_F3_F4,axis = 0)
features = np.append(features,DASM_F7_F8,axis = 0)
features = np.append(features,DASM_FC5_FC6,axis = 0)
features = np.append(features,DASM_O1_O2,axis = 0)
features = np.append(features,DASM_P7_P8,axis = 0)
features = np.append(features,DASM_T7_T8,axis = 0)
return features.T
# In[ ]:
def epoch_data(X,Y, window, stride, sfreq):
(channels,timepoints,trials )= X.shape
X = np.reshape(X,(trials,channels,timepoints))
segment = int(window*sfreq)
step = int(stride*sfreq)
epochPerTrial = int((timepoints-segment)/step + 1)
count = 0
X_new = np.empty((trials*epochPerTrial,channels,segment))
Y_new = np.empty((trials*epochPerTrial,2))
for trial in range(trials):
for epoch in range(epochPerTrial):
X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
Y_new[count,:] = Y[trial,:2]
count+=1
(trials,channels,timepoints) = X_new.shape
X_new = np.reshape(X_new,(channels,timepoints,trials))
return X_new,Y_new
# In[ ]:
def segregate_data_of_subjects(feature_matrix,dataset,sfreq = 128):
total_samples = feature_matrix.shape[0]
subject_indexes = {}
if dataset.name != 'DEAP AND DREAMER':
samples_per_subject = total_samples//dataset.no_of_subjects
print('samples per subject taken are ',samples_per_subject)
subject_indexes = {}
for i in range(dataset.no_of_subjects):
subject_name = 'subject_' + str(i+1)
subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:]
else:
a = feature_matrix[:80640,:]
b = feature_matrix[80640:,:]
print(b.shape)
for i in range(32):
samples_per_subject = 2520
subject_name = 'subject_' + str(i+1)
subject_indexes[subject_name] = a[samples_per_subject*i:samples_per_subject*(i+1),:]
for i in range(0,23):
samples_per_subject = 8190
subject_name = 'subject_' + str(i+1+32)
subject_indexes[subject_name] = b[samples_per_subject*i:samples_per_subject*(i+1),:]
return subject_indexes
# In[ ]:
def driver_code():
dataset = load_data('DREAMER')
dataset.load_arrays()
X = dataset.eegData
Y = dataset.Y
window = 1
stride = 1
X,Y = epoch_data(X,Y,window,stride,128)
print('shape after epoching')
print('X:',X.shape)
print('Y:',Y.shape)
print('')
print('')
delta,theta,alpha,beta,gamma = calculate_diffrential_entropy_for_bands(X,dataset.freq)
bands = {'delta':delta,'theta':theta,'alpha':alpha,'beta':beta,'gamma':gamma}
for name,band in bands.items():
feature_matrix = calculate_RASM_DASM(band) #extracted RASM ,DASM features for each eng band
print(name ,':' ,end = '')
print(feature_matrix.shape)
print(feature_matrix)
np.savez('features/'+dataset.name.lower()+'_RASM_DASM/'+name+'_'+str(window)+'_'+str(stride),features = feature_matrix,Y=Y)
# In[ ]:
driver_code()
# In[ ]:
np.load('features/oasis/without_autoreject/shannonEntropy_1_1.npz')['features']
# In[ ]:
+467
Ver Arquivo
@@ -0,0 +1,467 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
# -*- coding: utf-8 -*-
"""feature_extraction_25GB_RAM.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1QnVj7GyyJhLPrYF4vBTppMwynXqmOTEJ
"""
# Commented out IPython magic to ensure Python compatibility.
import EEGExtract as eeg
from scipy import io,signal
import numpy as np
import pandas as pd
from sklearn import preprocessing
import pandas as pd
import pickle
class load_data:
'''
Load the preprocessed data here, store the paramters
'''
def __init__(self,name):
self.name = name #name of dataset
self.X = None
self.Y = None
self.Z = None
self.freq = None #(in Hz) is same for all datasets
self.channels = None
self.ch_type = 'eeg'
self.eegData = None
self.use_autoreject = 'n'
def load_arrays(self):
if self.name == 'DREAMER':
array = np.load('original_data/DREAMER.npz')
self.freq = 128
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
if self.name == 'DEAP':
array = np.load('original_data/DEAP.npz')
self.freq = 128
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
if self.name == 'OASIS':
#array = np.load('original_data/OASIS.npz')
if self.use_autoreject == 'y':
with open('preprocessed_data/oasis/with_autoreject.p','rb') as file:
self.X = pickle.load(file)
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
self.freq = 128
self.X ,self.Y= merge_dictionary(self.X)
(a,b,c) = self.X.shape
self.X = np.reshape(self.X,(a,c,b))
else:
array = np.load('preprocessed_data/oasis/without_autoreject.npz')
self.freq = 128
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
self.X = array['X']
self.Y = array['Y']
(a,b,c) = self.X.shape
self.X = np.reshape(self.X,(a,c,b))
else:
self.X = array['X']
if self.name == 'DEAP':
self.X = self.X[:,:,[1,3,2,4,7,11,13,31,29,25,21,19,20,17]] # To maintain uniformity across all datasets, only 14 electrodes selected
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
if self.name != 'OASIS':
self.Y = array['Y']
#self.Z = array['Z']
self.reshape_data()
def reshape_data(self):
'''
reshapes data in the format EEGExtract module expects i.e channels x timepoints x epochs
'''
(epochs,timepoints,channels) = self.X.shape
self.eegData = np.reshape(self.X,(channels,timepoints,epochs))
class features:
############################ Complexity Features #############################
#1>
@staticmethod
def ShannonRes(eegData,**args):
#Shannon Entropy
ShannonRes = eeg.shannonEntropy(eegData, bin_min=-200, bin_max=200, binWidth=2)
return ShannonRes
#2>
@staticmethod
def ShannonRes_sub_band_delta(eegData,fs):
# Subband Information Quantity
# delta (0.5–4 Hz)
eegData_delta = eeg.filt_data(eegData, 0.5, 4, fs)
ShannonRes_delta = eeg.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2)
return ShannonRes_delta
#3>
@staticmethod
def ShannonRes_sub_band_theta(eegData,fs):
# theta (4–8 Hz)
eegData_theta = eeg.filt_data(eegData, 4, 8, fs)
ShannonRes_theta = eeg.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2)
return ShannonRes_theta
#4>
@staticmethod
def ShannonRes_sub_band_alpha(eegData,fs):
# alpha (8–12 Hz)
eegData_alpha = eeg.filt_data(eegData, 8, 12, fs)
ShannonRes_alpha = eeg.shannonEntropy(eegData_alpha, bin_min=-200, bin_max=200, binWidth=2)
return ShannonRes_alpha
#5>
@staticmethod
def ShannonRes_sub_band_beta(eegData,fs):
# beta (12–30 Hz)
eegData_beta = eeg.filt_data(eegData, 12, 30, fs)
ShannonRes_beta = eeg.shannonEntropy(eegData_beta, bin_min=-200, bin_max=200, binWidth=2)
return ShannonRes_beta
#6>
@staticmethod
def ShannonRes_sub_band_gamma(eegData,fs):
# gamma (30–100 Hz)
eegData_gamma = eeg.filt_data(eegData, 30, 63, fs)
ShannonRes_gamma = eeg.shannonEntropy(eegData_gamma, bin_min=-200, bin_max=200, binWidth=2)
return ShannonRes_gamma
#7>
@staticmethod
def Hojorth_Mobility(eegData,**args):
# Hjorth Mobility
# Hjorth Complexity
HjorthMob, HjorthComp = eeg.hjorthParameters(eegData)
return HjorthMob
#8>
@staticmethod
def Hojorth_Complexity(eegData,**args):
# Hjorth Mobility
# Hjorth Complexity
HjorthMob, HjorthComp = eeg.hjorthParameters(eegData)
return HjorthComp
#9>
@staticmethod
def False_Nearest_Neighbour(eegData,**args):
# False Nearest Neighbor
FalseNnRes = eeg.falseNearestNeighbor(eegData)
return FalseNnRes
##############################################################################
########################Category Features#####################################
#10>
@staticmethod
def median_frequency(eegData,fs):
#fs-sampling frequency
# Median Frequency
medianFreqRes = eeg.medianFreq(eegData,fs)
return medianFreqRes
#11>
@staticmethod
def band_power_delta(eegData,fs):
#fs - sampling frequency
# δ band Power
bandPwr_delta = eeg.bandPower(eegData, 0.5, 4, fs)
return bandPwr_delta
#12>
@staticmethod
def band_power_theta(eegData,fs):
#fs - sampling frequency
# θ band Power
bandPwr_theta = eeg.bandPower(eegData, 4, 8, fs)
return bandPwr_theta
#13>
@staticmethod
def band_power_alpha(eegData,fs):
#fs - sampling frequency
# α band Power
bandPwr_alpha = eeg.bandPower(eegData, 8, 12, fs)
return bandPwr_alpha
#14>
@staticmethod
def band_power_beta(eegData,fs):
#fs - sampling frequency
# β band Power
bandPwr_beta = eeg.bandPower(eegData, 12, 30, fs)
return bandPwr_beta
#15>
@staticmethod
def band_power_gamma(eegData,fs):
#fs - sampling frequency
# γ band Power
bandPwr_gamma = eeg.bandPower(eegData, 30, 63, fs)
return bandPwr_gamma
#16>
@staticmethod
def standard_deviation(eegData,**args):
# Standard Deviation
std_res = eeg.eegStd(eegData)
return std_res
#17>
@staticmethod
def regularity(eegData,fs):
# Regularity (burst-suppression)
regularity_res = eeg.eegRegularity(eegData,fs)
return regularity_res
#18>
@staticmethod
def Diffuse_slowing(eegData,**args):
# Diffuse Slowing
df_res = eeg.diffuseSlowing(eegData)
return df_res
#19>
@staticmethod
def Spikes(eegData,fs,**args):
# Spikes
minNumSamples = int(70*fs/1000)
spikeNum_res = eeg.spikeNum(eegData,minNumSamples)
return spikeNum_res
#20>
@staticmethod
def delta_burst_after_spike(eegData,fs):
# Delta burst after Spike
eegData_delta = eeg.filt_data(eegData, 0.5, 4, fs)
deltaBurst_res = eeg.burstAfterSpike(eegData,eegData_delta,minNumSamples=7,stdAway = 3)
return deltaBurst_res
#21>
@staticmethod
def Sharp_spike(eegData,fs):
minNumSamples = int(70*fs/1000)
# Sharp spike
sharpSpike_res = eeg.shortSpikeNum(eegData,minNumSamples)
return sharpSpike_res
#22>
@staticmethod
def Number_of_Burst(eegData,fs):
# Number of Bursts
numBursts_res = eeg.numBursts(eegData,fs)
return numBursts_res
#23>
@staticmethod
def Burst_length_u_and_sigma_mean(eegData,fs):
# Burst length μ and σ
burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(eegData,fs)
return burstLenMean_res
#24>
@staticmethod
def Burst_length_u_and_sigma_std(eegData,fs):
burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(eegData,fs)
return burstLenStd_res
#25>
@staticmethod
def no_of_suprression(eegData,fs):
# Number of Suppressions
numSupps_res = eeg.numSuppressions(eegData,fs)
return numSupps_res
#26>
@staticmethod
def Suppression_length_u_and_sigma_mean(eegData,fs):
# Suppression length μ and σ
suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(eegData,fs)
return suppLenMean_res
#27>
@staticmethod
def Suppression_length_u_and_sigma_std(eegData,fs):
# Suppression length μ and σ
suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(eegData,fs)
return suppLenStd_res
##############################################################################
def merge_dictionary(dictionary):
'''
merge all trial data to form one array
'''
no_of_trials = len(list(dictionary.keys()))
no_of_channels = dictionary[1][0].shape[1]
length_of_segment = dictionary[1][0].shape[2]
no_of_epochs_per_trial = dictionary[1][0].shape[0]
X = np.empty((0,no_of_channels,length_of_segment))
Y = np.empty((0,2))
for trial,lst in dictionary.items():
array = dictionary[trial][0]
score = dictionary[trial][3]
X = np.append(X,array,axis = 0)
for epoch in range(no_of_epochs_per_trial):
Y = np.append(Y,np.expand_dims(score,axis =0),axis = 0)
return X,Y
def epoch_data(X,Y, window, stride, sfreq):
(channels,timepoints,trials )= X.shape
X = np.reshape(X,(trials,channels,timepoints))
segment = int(window*sfreq)
step = int(stride*sfreq)
epochPerTrial = int((timepoints-segment)/step + 1)
count = 0
X_new = np.empty((trials*epochPerTrial,channels,segment))
Y_new = np.empty((trials*epochPerTrial,2))
for trial in range(trials):
for epoch in range(epochPerTrial):
X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
Y_new[count,:] = Y[trial,:2]
count+=1
(trials,channels,timepoints) = X_new.shape
X_new = np.reshape(X_new,(channels,timepoints,trials))
return X_new,Y_new
def driver_code():
dataset_dictionary = {0:'DEAP',1:'OASIS',2:'DREAMER'}
print(dataset_dictionary)
print('enter number for loading dataset')
mapping = int(input())
print('plz wait loading dataset preprocessed arrays')
dataset = load_data(dataset_dictionary[mapping])
if mapping == 1:
print('do you want to use with autoreject data? if yes press y')
boolean = input()
if boolean == 'y':
dataset.use_autoreject = 'y'
dataset.load_arrays()
print('loading complete')
print('shape of data we will use to make features:',dataset.eegData.shape)
print('do you want to segment the data before calculating feature values? y/n')
boolean = input()
if boolean == 'y':
window = float(input('enter window size'))
stride = float(input('enter stride size'))
dataset.eegData,dataset.Y = epoch_data(dataset.eegData,dataset.Y,window,stride,dataset.freq)
print('new shapes of X and Y:',dataset.eegData.shape,' ',dataset.Y.shape)
else:
window = 0
stride = 0
print('features available')
featuresDict = {0:'shannonEntropy',
1:'ShannonRes_sub_bands_alpha',
2:'ShannonRes_sub_bands_beta',
3:'ShannonRes_sub_bands_delta',
4:'ShannonRes_sub_bands_theta',
5:'ShannonRes_sub_bands_gamma',
6:'Hjorth_mobilty',
7:'Hjorth_complexity',
8:'falseNearestNeighbor',
9:'medianFreq',
10:'bandPwr_alpha',
11:'bandPwr_beta',
12:'bandPwr_gamma',
13:'bandPwr_theta',
14:'bandPwr_delta',
15:'stdDev',
16:'diffuseSlowing',
17:'spikeNum',
18:'deltaBurstAfterSpike',
19:'shortSpikeNum',
20:'Sharp spike',
21:'numBursts',
22:'burstLen_u_and_sigma_mean',
23:'burstLen_u_and_sigma_std',
24:'numSuppressions',
25:'suppressionLen_u_and_sigma_mean',
26:'suppressionLen_u_and_sigma_std',
}
featureMethod={0:features.ShannonRes,
1:features.ShannonRes_sub_band_alpha,
2:features.ShannonRes_sub_band_beta,
3:features.ShannonRes_sub_band_delta,
4:features.ShannonRes_sub_band_theta,
5:features.ShannonRes_sub_band_gamma,
6:features.Hojorth_Mobility,
7:features.Hojorth_Complexity,
8:features.False_Nearest_Neighbour,
9:features.median_frequency,
10:features.band_power_alpha,
11:features.band_power_beta,
12:features.band_power_gamma,
13:features.band_power_theta,
14:features.band_power_delta,
15:features.standard_deviation,
16:features.regularity,
17:features.Diffuse_slowing,
18:features.Spikes,
19:features.delta_burst_after_spike,
20:features.Sharp_spike,
21:features.Number_of_Burst,
22:features.Burst_length_u_and_sigma_mean,
23:features.Burst_length_u_and_sigma_std,
24:features.no_of_suprression,
25:features.Suppression_length_u_and_sigma_mean,
26:features.Suppression_length_u_and_sigma_std,
}
print(featuresDict)
#define path for saving before hand in np.savez line below
path = 'features/'
#os.mkdir('features/'+window+'_'+stride)
if dataset.name == 'DEAP':
path = path +'deap/'
elif dataset.name == 'DREAMER':
path = path + 'dreamer/'
else:
if dataset.use_autoreject == 'y':
path = path +'oasis/with_autoreject/'
else:
path = path +'oasis/without_autoreject/'
boolean = input('do you want to individually make features? y/n')
if boolean =='n':
for key in featureMethod.keys():
feature_matrix = featureMethod[key](eegData = dataset.eegData,fs=dataset.freq)
filename = featuresDict[key]
print('saving ---',filename)
np.savez(path+filename+'_'+str(int(window))+'_'+str(int(stride)),features = feature_matrix , Y = dataset.Y)
else:
found_features = False
while not found_features:
print('enter feature no')
key = int(input())
feature_matrix = featureMethod[key](eegData = dataset.eegData,fs=dataset.freq)
filename = featuresDict[key]
print('saving ---',filename)
np.savez(path+filename+'_'+str(int(window))+'_'+str(int(stride)),features = feature_matrix , Y = dataset.Y)
boolean = input('do you want to find more features? y/n ')
if boolean =='n':
found_features = True
print('feature extraction done!!!!')
def __main__():
driver_code()
__main__()
if __name__ == 'main':
driver_code()
+70
Ver Arquivo
@@ -0,0 +1,70 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
# Script to get the feature ranking and electrode ranking through
# Method A :- Random Forest Regressor
# Method B :- F score based Ranking
# Method C :- Random Forest Importances approach
# Main function
from ImportUtils import *
from TopNByFSMethods import *
from TopNByClassifier import *
from args_eeg import args as my_args
if __name__ == '__main__':
# args object to fetch command line inputs
args = my_args()
print(args.__dict__)
pwd = os.getcwd()
dataset = args.dataset
window = args.window
stride = args.stride
sfreq = args.sfreq
model = args.model
label = args.label
approach = args.approach #byclassifier or byfs
ml_algo = args.ml_algo #classification or regression
top = args.top #e or f or ef
fs_method = args.fs_method
#feature extraction
getEpochedFeatures(dataset, window, stride, sfreq, label)
if(top == "e"):
clf = RandomForestRegressor()
topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
plt.legend(["Method A","Method B", "Method C"])
if(label == 1):
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
else:
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
elif(top == "f"):
clf = RandomForestRegressor()
topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
if(label == 1):
plt.legend(["Method A","Method B", "Method C"])
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
else:
plt.legend(["Method A","Method B", "Method C"])
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
plt.show()
plt.clf()
+264
Ver Arquivo
@@ -0,0 +1,264 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
# In[ ]:
scaler_min_max = MinMaxScaler()
scaler_standard = StandardScaler()
# Either one of the MinMaxScaling or StandardScaling function can be used
def MinMaxScaling(feature_matrix):
global scaler_min_max
scaler_min_max.fit(feature_matrix)
return scaler_min_max.transform(feature_matrix)
def StandardScaling(feature_matrix):
global scaler_standard
scaler_standard.fit(feature_matrix)
print('scaling shape',scaler_standard.mean_.shape)
return scaler.transform(feature_matrix)
architecture = 'sklearn'
if architecture == 'sklearn':
from sklearn.svm import SVR
from sklearn.metrics import accuracy_score
else:
from cuml.svm import SVR
from cuml.metrics import accuracy_score
#
# """##DEAP dataset
# 1> Valence - features selected
# >
# * bandPwr_gamma
# * bandPwr_beta
# * ShannonRes_gamma
# * ShannonRes_beta
# * rasm_gamma
# * dasm_gamma
#
# 2> Arousal - feature selected
# >
# * HjorthMob
# * HjorthComp
# * stdDev
# * bandPwr_theta
# * bandPwr_beta
# * ShannonRes_beta
# * ShannonRes_gamma
# * dasm_beta
# """
# In[ ]:
# now for incremental learning we need to segregate data of subjects
def segregate_data_of_subjects(feature_matrix,total_subjects,sfreq = 128):
'''
reuturs a dictionary which contains the samples data only corresponding to particular subjects of feature matrix
'''
# parameters :-
# feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation
# total_subjects :- Total number of subjects in the study
# sfreq :- sampling frequency of the EEG data
# returns :-
# subject_indexes :- Subject wise features in a dictionary form
total_samples = feature_matrix.shape[0]
subject_indexes = {}
samples_per_subject = total_samples//total_subjects
for i in range(total_subjects):
subject_name = 'subject_' + str(i+1)
subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:]
return subject_indexes
# In[ ]:
# now defining a function which carries out the incremenatal learning algo
def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject):
# parameters :-
# model :- The training model to be used (SVR in this case)
# featrue_matrix :- feature matrix obtained in the above function
# Y :- The Valence and Arousal values as entered by the subjects
# subject_indexes :-Subject wise features in a dictionary form
# number_of_subjects :- Total number of subjects in the study
# total_subjects :- Total number of subjects in the study
# rmse_score :- RMSE of the previous iterations
# test_subject :- Cross validation test subject list
# returns :-
# rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration
# test_subject :- Updated Cross validation test subject list
no_of_features = feature_matrix.shape[1]
X = np.empty((0,no_of_features))
print('training on subject_no:',end = ' ')
#create a feature matrix containing data upto subjects given by the number number_of_subjects
#for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model
for subject in range(number_of_subjects):
print(subject+1,end = ' ')
subject_name = 'subject_'+str(subject+1)
subject_data = subject_indexes[subject_name]
X = np.append(X,subject_data,axis=0)
print(' ')
#apply a MinMax scaling to the current iteration feature matrix
X = MinMaxScaling(X)
#now we also need to extract the valence arousal data for the corresponding subject
y = np.empty((0))
total_samples = feature_matrix.shape[0]
samples_per_subject = total_samples//total_subjects
for subject in range(number_of_subjects):
y = Y[:samples_per_subject*(number_of_subjects)]
print('shape of X is :',X.shape)
print('shape of y is :',y.shape)
#shuffling data randomly to feed to model
X,y = shuffle(X,y,random_state = 0)
#doing a train test split of 80:20
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2)
#training_model
model = model.fit(X_train,y_train)
#testing_model
y_predict = model.predict(X_test)
#calculating rmse values for valence and arousal using model fitted for current iteration
y_rms = np.sqrt(mean_squared_error(y_test,y_predict))
print('rms on y :',y_rms)
print('')
rmse_score.append(y_rms)
test_subject.append(subject_name)
return rmse_score,test_subject
# In[ ]:
def driver_code(save):
# Function to load the features, then train the regressor and will give the validation and test plot
#extracting file data corresponding to valence features
bandPwr_gamma_v = np.load('features/deap/bandPwr_gamma_1_1.npz')
bandPwr_beta_v = np.load('features/deap/bandPwr_beta_1_1.npz')
ShannonRes_gamma_v = np.load('features/deap/ShannonRes_sub_bands_gamma_1_1.npz')
ShannonRes_beta_v = np.load('features/deap/ShanninRes_sub_bands_beta_1_1.npz')
rasm_gamma_v = np.load('features/deap_RASM_DASM/gamma_1_1.npz')#shape of feature is 80640 x 14, be careful to extract only rasm features, i.e first 7 columns
dasm_gamma_v = np.load('features/deap_RASM_DASM/gamma_1_1.npz')
#creating a feature matrix for valence
feature_matrix_valence = np.empty((0,80640))
feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_gamma_v['features'],axis = 0)
feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_beta_v['features'],axis = 0)
feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_gamma_v['features'],axis = 0)
feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_beta_v['features'],axis = 0)
feature_matrix_valence = np.append(feature_matrix_valence,rasm_gamma_v['features'].T[:7,:],axis = 0)
feature_matrix_valence = np.append(feature_matrix_valence,dasm_gamma_v['features'].T[7:,:],axis = 0)
feature_matrix_valence = feature_matrix_valence.T#feature matrix is of shape 80640 x 70
#extracting labels
Y_val = bandPwr_gamma_v['Y'][:,0]
#extracting file data corresponding to arousal features
HjorthMob_a = np.load('features/deap/Hjorth_mobilty_1_1.npz')
HjorthComp_a = np.load('features/deap/Hjorth_complexity_1_1.npz')
stdDev_a = np.load('features/deap/stdDev_1_1.npz')
bandPwr_beta_a = np.load('features/deap/bandPwr_beta_1_1.npz')
bandPwr_theta_a = np.load('features/deap/bandPwr_theta_1_1.npz')
ShannonRes_beta_a = np.load('features/deap/ShanninRes_sub_bands_beta_1_1.npz')
ShannonRes_gamma_a = np.load('features/deap/ShannonRes_sub_bands_gamma_1_1.npz')
dasm_beta_a = np.load('features/deap_RASM_DASM/beta_1_1.npz')
#creating feature matrix for arousal
feature_matrix_arousal = np.empty((0,80640))
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis = 0)
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthComp_a['features'],axis = 0)
feature_matrix_arousal = np.append(feature_matrix_arousal,stdDev_a['features'],axis = 0)
feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_beta_a['features'],axis = 0)
feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_theta_a['features'],axis = 0)
feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_beta_a['features'],axis = 0)
feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_gamma_a['features'],axis = 0)
feature_matrix_arousal = np.append(feature_matrix_arousal,dasm_beta_a['features'].T[7:,:],axis = 0)
feature_matrix_arousal = feature_matrix_arousal.T#shape of feature matrix is 80640 x 105
#extracting labels
Y_aro = HjorthMob_a['Y'][:,1]
model = SVR()#initializing support vector regressor for training
#running incremental learning loop for valence
print('')
print('Incremental training for valence')
print('')
test_subject = []
rmse_val = []
subject_indexes_valence = segregate_data_of_subjects(feature_matrix_valence,32,128)
i = 1
while i <= 32:
rmse_val,test_subject= training_phase(model,feature_matrix_valence,Y_val,subject_indexes_valence,i,32,rmse_val,test_subject)
i+=1
#running incremental learning loop for arousal
print('')
print('Incremental training for arousal ')
print(' ')
model = SVR()#reinitialize model
test_subject = []
rmse_aro = []
subject_indexes_arousal = segregate_data_of_subjects(feature_matrix_arousal,32,128)
i=1
while i<=32:
rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,Y_aro,subject_indexes_arousal,i,32,rmse_aro,test_subject)
i+=1
fig,axe = plt.subplots(1,1,figsize = (40,20))
axe.plot(test_subject,rmse_val,color='r',label='rmse valence')
axe.plot(test_subject,rmse_aro,color = 'g',label='rmse arousal')
axe.set_xlabel('trained upto subject')
axe.set_ylabel('rmse')
axe.set_title('support vector regressor')
axe.legend(loc = 'upper right')
df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms'])
print(df)
if save == 'y':
plt.savefig('plots/deap/all_feature_valence_arousal_rmse',format = "svg")
df.to_csv('plots/deap/all_features_valence_arousal_rmse.csv')
# In[ ]:
if __name__ == '__main__':
driver_code(sys.argv[1])
+259
Ver Arquivo
@@ -0,0 +1,259 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
architecture = 'sklearn'
if architecture == 'sklearn':
from sklearn.svm import SVR
from sklearn.metrics import accuracy_score
else:
from cuml.svm import SVR
from cuml.ensemble import RandomForestRegressor
from cuml.metrics import accuracy_score
# In[ ]:
# Either one of the MinMaxScaling or StandardScaling function can be used
scaler_min_max = MinMaxScaler()
scaler_standard = StandardScaler()
def MinMaxScaling(feature_matrix):
global scaler_min_max
scaler_min_max.fit(feature_matrix)
return scaler_min_max.transform(feature_matrix)
def StandardScaling(feature_matrix):
global scaler_standard
scaler_standard.fit(feature_matrix)
print('scaling shape',scaler_standard.mean_.shape)
return scaler.transform(feature_matrix)
# """##DREAMER dataset
# 1> Valence - features selected
# >
# * HjorthMob
# * HjorthCom
# * stdDev
# * bandPwr_theta
# * ShannonRes_gamma
# * bandPwr_beta
#
# 2> Arousal - feature selected
# >
# * HjorthMob
# * HjorthComp
# * stdDev
# * bandPwr_theta
# * bandPwr_gamma
# * ShannonRes_gamma
# """
# In[ ]:
# now for incremental learning we need to segregate data of subjects
def segregate_data_of_subjects(feature_matrix,total_subjects,sfreq = 128):
'''
reuturs a dictionary which contains the samples data only corresponding to particular subjects of feature matrix
'''
# parameters :-
# feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation
# total_subjects :- Total number of subjects in the study
# sfreq :- sampling frequency of the EEG data
# returns :-
# subject_indexes :- Subject wise features in a dictionary form
total_samples = feature_matrix.shape[0]
subject_indexes = {}
samples_per_subject = total_samples//total_subjects
for i in range(total_subjects):
subject_name = 'subject_' + str(i+1)
subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:]
return subject_indexes
# In[ ]:
# now defining a function which carries out the incremenatal learning algo
def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject):
# parameters :-
# model :- The training model to be used (SVR in this case)
# featrue_matrix :- feature matrix obtained in the above function
# Y :- The Valence and Arousal values as entered by the subjects
# subject_indexes :-Subject wise features in a dictionary form
# number_of_subjects :- Total number of subjects in the study
# total_subjects :- Total number of subjects in the study
# rmse_score :- RMSE of the previous iterations
# test_subject :- Cross validation test subject list
# returns :-
# rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration
# test_subject :- Updated Cross validation test subject list
no_of_features = feature_matrix.shape[1]
X = np.empty((0,no_of_features))
print('training on subject_no:',end = ' ')
#create a feature matrix containing data upto subjects given by the number number_of_subjects
#for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model
for subject in range(number_of_subjects):
print(subject+1,end = ' ')
subject_name = 'subject_'+str(subject+1)
subject_data = subject_indexes[subject_name]
X = np.append(X,subject_data,axis=0)
print(' ')
#apply a MinMax scaling to the current iteration feature matrix
X = MinMaxScaling(X)
#now we also need to extract the valence arousal data for the corresponding subject
y = np.empty((0))
total_samples = feature_matrix.shape[0]
samples_per_subject = total_samples//total_subjects
for subject in range(number_of_subjects):
y = Y[:samples_per_subject*(number_of_subjects)]
print('shape of X is :',X.shape)
print('shape of y is :',y.shape)
#shuffling data randomly to feed to model
X,y = shuffle(X,y,random_state = 0)
#doing a train test split of 80:20
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2)
#training_model
model = model.fit(X_train,y_train)
#testing_model
y_predict = model.predict(X_test)
#calculating rmse values for valence and arousal using model fitted for current iteration
y_rms = np.sqrt(mean_squared_error(y_test,y_predict))
print('rms on y :',y_rms)
print('')
rmse_score.append(y_rms)
test_subject.append(subject_name)
return rmse_score,test_subject
# In[ ]:
def driver_code(save):
# Function to load the features, then train the regressor and will give the validation and test plot
#extracting file data corresponding to valence features
HjorthMob_v = np.load('features/dreamer/Hjorth_mobilty_1_1.npz')
HjorthCom_v = np.load('features/dreamer/Hjorth_complexity_1_1.npz')
stdDev_v = np.load('features/dreamer/stdDev_1_1.npz')
bandPwr_theta_v = np.load('features/dreamer/bandPwr_theta_1_1.npz')
bandPwr_beta_v = np.load('features/dreamer/bandPwr_beta_1_1.npz')
ShannonRes_gamma_v = np.load('features/dreamer/ShannonRes_sub_bands_gamma_1_1.npz')
# creating a feature matrix out of all feature data for valence
feature_matrix_valence = np.empty((0,188370))
feature_matrix_valence = np.append(feature_matrix_valence,HjorthMob_v['features'],axis =0)
feature_matrix_valence = np.append(feature_matrix_valence,HjorthCom_v['features'],axis =0)
feature_matrix_valence = np.append(feature_matrix_valence,stdDev_v['features'],axis =0)
feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_theta_v['features'],axis =0)
feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_beta_v['features'],axis =0)
feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_gamma_v['features'],axis =0)
feature_matrix_valence = feature_matrix_valence.T # feature matrix becomes of shape 188370 x 84 i.e (samples X features per sample)
# extracting valence values for each sample
Y_val = HjorthMob_v['Y'][:,0]#all features have same valnece labels
#extracting file data corresponding to arousal features
HjorthMob_a = np.load('features/dreamer/Hjorth_mobilty_1_1.npz')
HjorthCom_a = np.load('features/dreamer/Hjorth_complexity_1_1.npz')
stdDev_a = np.load('features/dreamer/stdDev_1_1.npz')
bandPwr_theta_a = np.load('features/dreamer/bandPwr_theta_1_1.npz')
bandPwr_gamma_a = np.load('features/dreamer/bandPwr_gamma_1_1.npz')
ShannonRes_gamma_a = np.load('features/dreamer/ShannonRes_sub_bands_gamma_1_1.npz')
#creating feature matrix for all feature data for arousal
feature_matrix_arousal = np.empty((0,188370))
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis = 0)
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthCom_a['features'],axis = 0)
feature_matrix_arousal = np.append(feature_matrix_arousal,stdDev_a['features'],axis = 0)
feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_theta_a['features'],axis = 0)
feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_gamma_a['features'],axis = 0)
feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_gamma_a['features'],axis = 0)
feature_matrix_arousal = feature_matrix_arousal.T
#extracting arousal values for
Y_aro = HjorthMob_a['Y'][:,1]#all features have same arousal labels
model =SVR()#initializing support vector regressor for training
#running incremental learning loop for valence
print('')
print('Incremental training for valence')
print('')
test_subject = []
rmse_val = []
subject_indexes_valence = segregate_data_of_subjects(feature_matrix_valence,23,128)
i = 1
while i <= 23:
rmse_val,test_subject= training_phase(model,feature_matrix_valence,Y_val,subject_indexes_valence,i,23,rmse_val,test_subject)
i+=1
#running incremental learning loop for arousal
print('')
print('Incremental training for arousal ')
print(' ')
test_subject = []
rmse_aro = []
subject_indexes_arousal = segregate_data_of_subjects(feature_matrix_arousal,23,128)
i=1
while i<=23:
rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,Y_aro,subject_indexes_arousal,i,23,rmse_aro,test_subject)
i+=1
fig,axe = plt.subplots(1,1,figsize = (40,20))
axe.plot(test_subject,rmse_val,color='r',label = 'rms valence')
axe.plot(test_subject,rmse_aro,color = 'g',label = 'rms arousal')
axe.set_xlabel('trained upto subject')
axe.set_ylabel('rmse')
axe.set_title('support vector regressor')
axe.legend(loc='upper right')
df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms'])
print(df)
if save == 'y':
plt.savefig('plots/dreamer/all_feature_valence_arousal_rmse',format = "svg")
df.to_csv('plots/dreamer/all_features_valence_arousal_rmse.csv')
# In[ ]:
if __name__ == '__main__':
driver_code(sys.argv[1])
+255
Ver Arquivo
@@ -0,0 +1,255 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import accuracy_score
# In[ ]:
# Either one of the MinMaxScaling or StandardScaling function can be used
scaler_min_max = MinMaxScaler()
scaler_standard = StandardScaler()
def MinMaxScaling(feature_matrix):
global scaler_min_max
scaler_min_max.fit(feature_matrix)
return scaler_min_max.transform(feature_matrix)
def StandardScaling(feature_matrix):
global scaler_standard
scaler_standard.fit(feature_matrix)
print('scaling shape',scaler_standard.mean_.shape)
return scaler.transform(feature_matrix)
# """##OASIS dataset
# 1> Valence - features selected
# >
# * HjorthMob
# * HjorthComp
# * stdDev
#
# 2> Arousal - feature selected
# >
# * HjorthMob
# """
# In[ ]:
# now for incremental learning we need to segregate data of subjects
def segregate_data_of_subjects(feature_matrix,Y,total_subjects,sfreq = 128):
'''
returns a dictionary which contains the samples data only corresponding to particular subjects of feature matrix
'''
# parameters :-
# feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation
# Y :- The Valence and Arousal values as entered by the subjects
# total_subjects :- Total number of subjects in the study
# sfreq :- sampling frequency of the EEG data
# returns :-
# subject_indexes :- Subject wise features in a dictionary form
# aligned_y :- the y values corresponding to each subject
subject_indexes = { 'subject_1':feature_matrix[:200],
'subject_2':feature_matrix[200:400],
'subject_3':feature_matrix[400:600],
'subject_4':feature_matrix[600:795],
'subject_5':feature_matrix[795:995],
'subject_6':feature_matrix[995:1185],
'subject_7':feature_matrix[1185:1375],
'subject_8':feature_matrix[1375:1575],
'subject_9':feature_matrix[1575:1770],
'subject_10':feature_matrix[1770:1965],
'subject_11':feature_matrix[1965:2160],
'subject_12':feature_matrix[2160:2360],
'subject_13':feature_matrix[2360:2550],
'subject_14':feature_matrix[2550:2740],
'subject_15':feature_matrix[2740:2935]
}
aligned_y = { 'subject_1':Y[:200],
'subject_2':Y[200:400],
'subject_3':Y[400:600],
'subject_4':Y[600:795],
'subject_5':Y[795:995],
'subject_6':Y[995:1185],
'subject_7':Y[1185:1375],
'subject_8':Y[1375:1575],
'subject_9':Y[1575:1770],
'subject_10':Y[1770:1965],
'subject_11':Y[1965:2160],
'subject_12':Y[2160:2360],
'subject_13':Y[2360:2550],
'subject_14':Y[2550:2740],
'subject_15':Y[2740:2935]
}
return subject_indexes,aligned_y
# In[ ]:
# now defining a function which carries out the incremenatal learning algo
def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject):
# parameters :-
# model :- The training model to be used (SVR in this case)
# featrue_matrix :- feature matrix obtained in the above function
# Y :- The Valence and Arousal values as entered by the subjects
# subject_indexes :-Subject wise features in a dictionary form
# number_of_subjects :- Total number of subjects in the study
# total_subjects :- Total number of subjects in the study
# rmse_score :- RMSE of the previous iterations
# test_subject :- Cross validation test subject list
# returns :-
# rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration
# test_subject :- Updated Cross validation test subject list
no_of_features = feature_matrix.shape[1]
X = np.empty((0,no_of_features))
print('training on subject_no:',end = ' ')
#create a feature matrix containing data upto subjects given by the number number_of_subjects
#for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model
for subject in range(number_of_subjects):
print(subject+1,end = ' ')
subject_name = 'subject_'+str(subject+1)
subject_data = subject_indexes[subject_name]
X = np.append(X,subject_data,axis=0)
print(' ')
#apply a MinMax scaling to the current iteration feature matrix
X = MinMaxScaling(X)
#now we also need to extract the valence/arousal data for the corresponding subject
y = np.empty((0))
for subject in range(number_of_subjects):
subject_name = 'subject_'+str(subject+1)
subject_y_data = Y[subject_name]
y = np.append(y,subject_y_data,axis=0)
print('shape of X is :',X.shape)
print('shape of y is :',y.shape)
#shuffling data randomly to feed to model
X,y = shuffle(X,y,random_state = 0)
#doing a train test split of 80:20
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2)
#training_model
model = model.fit(X_train,y_train)
#testing_model
y_predict = model.predict(X_test)
#calculating rmse values for valence and arousal using model fitted for current iteration
y_rms = np.sqrt(mean_squared_error(y_test,y_predict))
print('rms on y :',y_rms)
print('')
rmse_score.append(y_rms)
test_subject.append(subject_name)
return rmse_score,test_subject
# In[ ]:
def driver_code(save):
#extracting feature data related to valence
HjorthMob_v = np.load('features/oasis/with_autoreject/Hjorth_mobilty_0_0.npz')
HjorthComp_v = np.load('features/oasis/with_autoreject/Hjorth_complexity_0_0.npz')
stdDev_v = np.load('features/oasis/with_autoreject/stdDev_0_0.npz')
#creating feature matrix
feature_matrix_valence = np.empty((0,2935))
feature_matrix_valence = np.append(feature_matrix_valence,HjorthMob_v['features'],axis = 0)
feature_matrix_valence = np.append(feature_matrix_valence,HjorthComp_v['features'],axis = 0)
feature_matrix_valence = np.append(feature_matrix_valence,stdDev_v['features'],axis = 0)
feature_matrix_valence = feature_matrix_valence.T #shape of feature matrix is 2935 x 42
#extracting valence labels
Y_val = HjorthMob_v['Y'][:,0]
#extracting feature data related to arousal
HjorthMob_a = np.load('features/oasis/with_autoreject/Hjorth_mobilty_0_0.npz')
#creating feature matrix for arousal
feature_matrix_arousal = np.empty((0,2935))
feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis=0)
feature_matrix_arousal = feature_matrix_arousal.T
#extracting arousal labels
Y_aro = HjorthMob_a['Y'][:,1]
model = SVR() #initialize model
#running incremental learning loop for valence
print('')
print('Incremental training for valence')
print('')
test_subject = []
rmse_val = []
subject_indexes_valence,aligned_Y_val = segregate_data_of_subjects(feature_matrix_valence,Y_val,15,128)
i = 1
while i <= 15:
rmse_val,test_subject= training_phase(model,feature_matrix_valence,aligned_Y_val,subject_indexes_valence,i,15,rmse_val,test_subject)
i+=1
#running incremental learning loop for arousal
print('')
print('Incremental training for arousal ')
print(' ')
model = SVR()#reinitialize model
test_subject = []
rmse_aro = []
subject_indexes_arousal,aligned_Y_aro = segregate_data_of_subjects(feature_matrix_arousal,Y_aro,15,128)
i=1
while i<=15:
rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,aligned_Y_aro,subject_indexes_arousal,i,15,rmse_aro,test_subject)
i+=1
fig,axe = plt.subplots(1,1,figsize = (40,20))
axe.plot(test_subject,rmse_val,color='r',label = 'rmse valence')
axe.plot(test_subject,rmse_aro,color = 'g',label = 'rmse arousal')
axe.set_xlabel('trained upto subject')
axe.set_ylabel('rmse')
axe.set_title('support vector regressor')
axe.legend(loc = 'upper right')
df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms'])
if save == 'y':
plt.savefig('plots/oasis/all_feature_valence_arousal_rmse',format="svg")
df.to_csv('plots/oasis/all_features_valence_arousal_rmse.csv')
# In[ ]:
if __name__ == '__main__':
driver_code(sys.argv[1])
+207
Ver Arquivo
@@ -0,0 +1,207 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
import glob
import pandas as pd
import matplotlib.pyplot as plt
# In[ ]:
from google.colab import drive
drive.mount('/gdrive',force_remount=True)
# In[ ]:
get_ipython().run_line_magic('cd', '/gdrive/MyDrive/emotion_recognition_project/')
# Script to obtain the incremental learning graph for the DEAP, DREAMER and OASIS datasets.
# ##plots for DEAP
# In[ ]:
dataset_deap=glob.glob('plots/deap/*.csv')
# In[ ]:
dataset_deap
# In[ ]:
dataset_svr_deap = pd.read_csv(dataset_deap[0]).T
dataset_svr_deap.columns = ['valence','arousal']
dataset_svr_deap = dataset_svr_deap.drop('Unnamed: 0')
dataset_svr_deap= dataset_svr_deap[::1]
x_deap = range(1,33,1)
dataset_svr_deap
# In[ ]:
fig_deap,axe_deap = plt.subplots(1,1,figsize = (17,10))
axe_deap.plot(x_deap,dataset_svr_deap['valence'],color='green',marker = 'x',markersize=10)
axe_deap.plot(x_deap,dataset_svr_deap['arousal'],color ='red',marker = 'x',markersize=10)
axe_deap.legend(['rfr_valence','rfr_arousal'],)
axe_deap.set_xlabel('trained upto subject')
axe_deap.set_ylabel('RMSE values')
plt.rcParams.update({'font.size':40})
plt.tight_layout()
plt.xticks(x_deap[::3])
# In[ ]:
fig_deap.savefig('final_plots/deap_rfr__valence_arousal_rms.svg')
fig_deap.savefig('final_plots/deap_rfr__valence_arousal_rms.png')
# ##plots for DREAMER
# In[ ]:
dataset_dreamer=glob.glob('plots/dreamer/*.csv')
# In[ ]:
dataset_dreamer
# In[ ]:
dataset_svr_dreamer = pd.read_csv(dataset_dreamer[0]).T
dataset_svr_dreamer.columns = ['valence','arousal']
dataset_svr_dreamer = dataset_svr_dreamer.drop('Unnamed: 0')
x_dreamer = range(1,24,1)
dataset_svr_dreamer= dataset_svr_dreamer[::1]
dataset_svr_dreamer
# In[ ]:
fig_dreamer,axe_dreamer = plt.subplots(1,1,figsize=(17,10))
axe_dreamer.plot(x_dreamer,dataset_svr_dreamer['valence'],color='green',marker = 'x',markersize=10)
axe_dreamer.plot(x_dreamer,dataset_svr_dreamer['arousal'],color ='red',marker = 'x',markersize=10)
axe_dreamer.legend(['rfr_valence','rfr_arousal'],)
axe_dreamer.set_xlabel('trained upto subject')
axe_dreamer.set_ylabel('RMSE values')
plt.rcParams.update({'font.size':40})
plt.tight_layout()
plt.xticks(x_dreamer[::3])
# In[ ]:
fig_dreamer.savefig('final_plots/dreamer_rfr__valence_arousal_rms.svg')
fig_dreamer.savefig('final_plots/dreamer_rfr__valence_arousal_rms.png')
# ##plots for oasis
# In[ ]:
dataset_oasis=glob.glob('plots/oasis/*.csv')
# In[ ]:
dataset_oasis
# In[ ]:
dataset_svr_oasis = pd.read_csv(dataset_oasis[0]).T
dataset_svr_oasis.columns = ['valence','arousal']
dataset_svr_oasis = dataset_svr_oasis.drop('Unnamed: 0')
x_oasis = range(1,16,1)
dataset_svr_oasis= dataset_svr_oasis[::1]
dataset_svr_oasis
# In[ ]:
fig_oasis,axe_oasis = plt.subplots(1,1,figsize=(17,10))
axe_oasis.plot(x_oasis,dataset_svr_oasis['valence'],color='green',marker = 'x',markersize=10)
axe_oasis.plot(x_oasis,dataset_svr_oasis['arousal'],color ='red',marker = 'x',markersize=10)
axe_oasis.set_xlabel('trained upto subject')
axe_oasis.set_ylabel('RMSE values')
axe_oasis.legend(['rfr_valence','rfr_arousal'],loc = 'lower right')
plt.rcParams.update({'font.size':40})
plt.xticks(x_oasis[::3])
plt.tight_layout()
# In[ ]:
fig_oasis.savefig('final_plots/oasis_rfr__valence_arousal_rms.svg')
fig_oasis.savefig('final_plots/oasis_rfr__valence_arousal_rms.png')
# In[ ]:
# In[ ]:
f,a = plt.subplots(3,1,figsize = (40,30))
a[0].plot(x_deap,dataset_svr_deap['valence'],color='green',marker = 'x',markersize=10)
a[0].plot(x_deap,dataset_svr_deap['arousal'],color ='red',marker = 'x',markersize=10)
a[0].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],)
#a[0].set_xlabel('trained upto subject')
a[0].set_ylabel('RMSE values')
a[0].set_title('DEAP')
a[1].plot(x_dreamer,dataset_svr_dreamer['valence'],color='green',marker = 'x',markersize=10)
a[1].plot(x_dreamer,dataset_svr_dreamer['arousal'],color ='red',marker = 'x',markersize=10)
#a[1].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],)
#a[1].set_xlabel('trained upto subject')
a[1].set_ylabel('RMSE values')
a[1].set_title('DREAMER')
a[2].plot(x_oasis,dataset_svr_oasis['valence'],color='green',marker = 'x',markersize=10)
a[2].plot(x_oasis,dataset_svr_oasis['arousal'],color ='red',marker = 'x',markersize=10)
a[2].set_xlabel('trained upto subject')
a[2].set_ylabel('RMSE values')
#a[2].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],loc = 'lower right')
a[2].set_title('OASIS')
plt.rcParams.update({'font.size':40})
plt.tight_layout()
# In[ ]:
f.savefig('final_plots/all_plots_incremental learning.svg')
# In[ ]:
+487
Ver Arquivo
@@ -0,0 +1,487 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
from google.colab import drive
drive.mount('/gdrive',force_remount = True)
# In[ ]:
get_ipython().system('pip install mne')
# In[ ]:
get_ipython().system('pip install autoreject')
# In[ ]:
import numpy as np
import mne
import autoreject
from scipy.stats import pearsonr
import pickle
# In[ ]:
get_ipython().run_line_magic('cd', '/gdrive/MyDrive/emotion_recognition_project/')
# In[ ]:
class preprocessing:
'''
Load the data here, store the paramters
'''
def __init__(self,name):
self.name = name #name of dataset
self.X = None
self.Y = None
self.Z = None
self.gyroscope = None
self.freq = None #(in Hz) is same for all datasets
self.channels = None
self.ch_type = 'eeg'
def load_arrays(self):
'''
loads arrays in object variables of the form
X: trials x channels x timepoints, using reshape method at the end
Y: trials x (valence,arousal)
Z: trials x participant no
'''
if self.name == 'DREAMER':
array = np.load('original_data/DREAMER.npz')
self.freq = 128
self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
if self.name == 'DEAP':
array = np.load('original_data/DEAP.npz')
self.freq = 128
self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
if self.name == 'OASIS':
array = np.load('original_data/OASIS.npz')
self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
self.freq = 128
self.X = array['X']
if self.name == 'DEAP':
self.X = self.X[:,:,:32]
self.channels = self.channels[:32]
if self.name == 'OASIS':
self.gyroscope = array['gyroscope']
self.Y = array['Y']
self.Z = array['Z']
self.reshape_data()
def reshape_data(self):
'''
exchanges last two dimensions of data
'''
(a,b,c) = self.X.shape
self.X = np.reshape(self.X,(a,c,b))
# In[ ]:
class filters():
'''
define filters to be used for preprocessing
'''
@staticmethod
def notch_filter(data,sfreq,notch_freq):
# parameters :-
# data :- EEG data
# sfreq :- sampling frequency
# notch_freq :- frequency of the notch filter (generally 50Hz due to the AC current frequency)
return mne.filter.notch_filter(data,sfreq,np.arange(notch_freq,notch_freq+1,1))
@staticmethod
def butterworth_filter(data,sfreq,lfreq,hfreq):
# parameters :-
# data :- EEG data
# sfreq :- sampling frequency
# lfreq :- low pass frequency value
#hfreq :- high pass frequency value
return mne.filter.filter_data(data = data,sfreq = sfreq,l_freq = lfreq,h_freq = hfreq,method = 'iir',verbose = False)
# In[ ]:
class referencing():
'''
referencing electrodes to some value
'''
@staticmethod
def average(data):
'''
Computes average voltage of all channels for a particular trial and a particular timepoint, and subtracts average value from all channels
'''
temp = data
avg = np.average(temp,axis=1)
avg = np.expand_dims(avg,axis=1)
return temp-avg
# In[ ]:
class autoreject_custom:
'''
Run Auotoreject algorithm here for artifact rejections
'''
#make epoch object
@staticmethod
def raw_object_creation(raw_data,channel_name,ch_types,sfreq):
'''
defining parameters for creation of raw object which will be used for creating an epoch object
retutns raw object after setting parameters
'''
# parameters :-
# raw_data :- EEG data
# channel_name :- Names of the channels of EEG data used
# ch_types :- Whether each channel is EEG/Gyro, etc
# sfreq :- sampling frequency
montage = mne.channels.make_standard_montage('standard_1020')
#creating a info object to create epochs later and setting its montage to 12-20 system
info = mne.create_info(ch_names=channel_name,sfreq=sfreq,ch_types = ch_types,verbose = False)
#create raw object directly from array
raw_object = mne.io.RawArray(data = raw_data,info = info,verbose = False)
#setting montage
raw_object.set_montage(montage)
return raw_object
@staticmethod
def epoch_object_creation(raw_object,start=0,duration=1,tmin=0,tmax=0.99):
'''
making an epoch object which will be used for autoreject algorithm
'''
#creating fixed length events
events = mne.make_fixed_length_events(raw_object,id=1,start=0,duration = duration)
#creating an epoch object
epoch_object = mne.Epochs(raw_object,events = events,preload=True,baseline = None,reject=None,verbose=False,tmin=0,tmax=0.99)
return epoch_object
@staticmethod
def autoreject_algo(epoch_object,n_interpolates,consensus_percs):
'''
cleans the epochs,and returns cleaned epochs,rejecting bad epochs based on optimal parameters calculation
n_interpolates are the ρ values that we would like autoreject to try and consensus_percs are the κ values that autoreject will try
Epochs with more than κ∗N sensors (N total sensors) bad are dropped
'''
ar = autoreject.AutoReject(n_interpolates, consensus_percs, random_state=42,verbose = 'tqdm_notebook',cv=4,n_jobs=10)
#fitting autoreject model to epoch data
ar.fit(epoch_object)
epochs_clean = ar.transform(epoch_object)
evoked_clean = epochs_clean.average()
evoked = epoch_object.average()
return epochs_clean,ar.get_reject_log(epoch_object)
# In[ ]:
class source_decomposition():
@staticmethod
def ica(data,channels,ch_type,sfreq):
# parameters :-
# data :- EEG data
# channels :- Names of the channels of EEG data used
# ch_types :- Whether each channel is EEG/Gyro, etc
# sfreq :- sampling frequency
#defining ICA parameters
raw = autoreject_custom.raw_object_creation(data,channels,ch_type,sfreq)
ica = mne.preprocessing.ICA(method='infomax',n_components=14)
ica.fit_params['max_iter'] =300
ica.fit(raw,verbose=False)
return ica.get_sources(raw).get_data(),ica.mixing_matrix_
# In[ ]:
def process_trial(a,acc_x,acc_y,acc_z):
'''
a are the source signals obtained after decomposition
acc_<> are accelerometer readings in respective axis
'''
# parameters :-
# a :- EEG source signal after ICA
# acc_x :- accelerometer channel along X axis
# acc_y :- accelerometer channel along Y axis
# acc_y :- accelerometer channel along Z axis
#pearson co-eff between each source signal,and accelerometer readings
pcoeff_arr = np.zeros((a.shape[0],3))#array will record p_coeff for each source with x,y,z accelermeter readings
for i in range(a.shape[0]):
source = a[i] #extracting particular source
#calculating pearson co-relation coeff between particular source each of accelerometer axis readings
r_x,_ = pearsonr(source,acc_x)
r_y,_ = pearsonr(source,acc_y)
r_z,_ = pearsonr(source,acc_z)
pcoeff_arr[i,0] = r_x
pcoeff_arr[i,1] = r_y
pcoeff_arr[i,2] = r_z
#print('############')
#calculating mean ,std deviation of pearson co-eff for all sources for each axis i.e X,Y,Z
mean = np.mean(pcoeff_arr,axis = 0)
std = np.std(pcoeff_arr,axis = 0)
error = mean + 2 * std
#calculating which sources differ have pearson co-eff of atleast one axis greater than 2 standard deviation from mean
bad_source_index = []
for i in range(pcoeff_arr.shape[0]):
if pcoeff_arr[i,0] > error[0] or pcoeff_arr[i,1] > error[1] or pcoeff_arr[i,2] > error[2]:
bad_source_index.append(i)
#correcting bad sources by butterworth filter by high pass 3Hz frequency as motion artifacts are said to exist in low power frequencies
for index in bad_source_index:
source_to_be_filtered = a[index]
a[index] = filters.butterworth_filter(source_to_be_filtered,dataset.freq,3,None)#high pass filter 3Hz
return a #return corrected source signals
# In[ ]:
#loading dataset arrays
dataset = preprocessing('OASIS')
dataset.load_arrays()
dataset.gyroscope.shape
# In[ ]:
#referencing electrodes to average value method
average_data = referencing.average(dataset.X)
# In[ ]:
#running butterworth filter (bandpass filter)
filtered_data = filters.notch_filter(average_data,dataset.freq,60)#butterworth_filter(average_data,dataset.freq,0.1,40)
# In[ ]:
no_of_trials = dataset.X.shape[0]
(a,b,c) = dataset.gyroscope.shape
gyroscope_trials = np.reshape (dataset.gyroscope,(a,c,b))# reshaping trials so they are of the shape trials x channels x timepoints
#iterating over all trials and correcting trial data for motion artifact
for trial_n in range(no_of_trials):
print('processing trial no:',trial_n+1)
trial_data = filtered_data[trial_n]
gyroscope_trial = gyroscope_trials[0,4:,:] #only acclerometer values extracted for a particular trial
gyroscope_trial_x = gyroscope_trial[0] # accelerometer x axis reading
gyroscope_trial_y = gyroscope_trial[1] # accelerometer y axis reading
gyroscope_trial_z = gyroscope_trial[2] # accelerometer z axis reading
source_signals,mixing_matrix = source_decomposition.ica(trial_data,dataset.channels,dataset.ch_type,dataset.freq)
corrected_sources = process_trial(source_signals,gyroscope_trial_x,gyroscope_trial_y,gyroscope_trial_z)
#corrected sources are projected back into orignal dimensional space of EEG data using mixing matrix
project_back = np.matmul(mixing_matrix,corrected_sources)
filtered_data[trial_n] = project_back
# In[ ]:
filtered_data.shape
# In[ ]:
no_of_trials = dataset.X.shape[0]
'''
dictionary contains information about each trial
each trial number i is mapped to a list containing the cleaned epochs given by autoreject,boolena array indicating which epoch was dropped,and
a percentage indicating epochs dropped out of total, valence ,arousal rating for trial and image_id
'''
#running autoreject for each trial data
'''
autoreject divides each trial data into 5 epochs of 1 sec segment i.e 640 timepoints into 128 timepoints per epochs,and runs algo on each
epoch,rejecting epochs based on estimated parameters
'''
clean_epochs ={}
for trial in range(no_of_trials):
print('trial no',trial)
temp = filtered_data[trial]
raw_object = autoreject_custom.raw_object_creation(temp,dataset.channels,dataset.ch_type,dataset.freq)
print(raw_object.get_data().shape)
epoch = autoreject_custom.epoch_object_creation(raw_object)
print(epoch.get_data().shape)
#print('epochs shape',epoch.get_data().shape)
clean_epoch,reject_log = autoreject_custom.autoreject_algo(epoch,n_interpolates = np.array([1, 4, 32]),consensus_percs = np.linspace(0, 1.0, 11))
#clean_epochs.append([clean_epoch,reject_log])
if clean_epoch.drop_log_stats() == 0:
clean_epochs[trial+1] = [clean_epoch.get_data(),reject_log.bad_epochs,clean_epoch.drop_log_stats(),dataset.Y[trial],dataset.Z[trial][1]]
# In[ ]:
def driver_code():
#load dataset
dataset_dict = {0:'DEAP',1:'OASIS',2:'DREAMER'}
print(dataset_dict)
print('enter dataset mapping number you want to use')
mapping = int(input())
dataset = preprocessing(dataset_dict[mapping])
dataset.load_arrays()
#referencing
print('next step in preprocessing is referencing')
referencing_dict = {1:'average_referencing'}
print(referencing_dict)
print('enter referencing method')
mapping = int(input())
if mapping ==1 :
averaged_data = referencing.average(dataset.X)
print('next step is applying filters')
filter_dict = {1:'notch_filter',2:"butter_worth_filter"}
#filtering
applyed_filters = False
while applyed_filters == False:
print(filter_dict)
mapping = int(input())
print('sampling frequency of dataset is',dataset.freq)
if mapping == 1 :
print('enter notch frequency')
notch_freq = float(input())
filtered_data = filters.notch_filter(averaged_data,dataset.freq,notch_freq)
if mapping == 2:
print('enter lower frequency')
lfreq = float(input())
print('enter higher frequency')
hfreq = float(input())
filtered_data = filters.butterworth_filter(dataset.X,dataset.freq,lfreq,hfreq)
print('Do you want to apply filters again?enter y/n')
boolean = input()
if boolean == 'n':
applyed_filters = True
print('do you want to save the data preprocessed so far?y/n')
boolean = input()
if boolean == 'y':
filename = input('enter filename to save as')
np.savez('preprocessed_data/'+dataset.name.lower()+'/'+filename,X = dataset.X,Y = dataset.Y)
#if motion artifact correction using gyrscopic data if dataset is oasis
if dataset.name == 'OASIS':
print('do you want to use motion artifact removal using gyroscopic data? y/n')
boolean = input()
if boolean == 'y':
no_of_trials = dataset.X.shape[0]
print('shape of gyroscope data before reshaping is:',dataset.gyroscope.shape)
(a,b,c) = dataset.gyroscope.shape
gyroscope_trials = np.reshape (dataset.gyroscope,(a,c,b))# reshaping trials so they are of the shape trials x channels x timepoints
#iterating over all trials and correcting trial data for motion artifact
for trial_n in range(no_of_trials):
print('processing trial no:',trial_n+1)
trial_data = filtered_data[trial_n]
gyroscope_trial = gyroscope_trials[trial_n,:,:] #only acclerometer values extracted for a particular trial
gyroscope_trial_x = gyroscope_trial[0] # accelerometer x axis reading
gyroscope_trial_y = gyroscope_trial[1] # accelerometer y axis reading
gyroscope_trial_z = gyroscope_trial[2] # accelerometer z axis reading
source_signals,mixing_matrix = source_decomposition.ica(trial_data,dataset.channels,dataset.ch_type,dataset.freq)
corrected_sources = process_trial(source_signals,gyroscope_trial_x,gyroscope_trial_y,gyroscope_trial_z)
#corrected sources are projected back into orignal dimensional space of EEG data using mixing matrix
project_back = np.matmul(mixing_matrix,corrected_sources)
filtered_data[trial_n] = project_back
print(filtered_data.shape)
print('do you want to save the data preprocessed so far?y/n')
boolean = input()
if boolean == 'y':
filename = input('enter filename to save as')
np.savez('preprocessed_data/'+dataset.name.lower()+'/'+filename,X = dataset.X,Y = dataset.Y)
if dataset.name == 'OASIS':
print('do you want to use autoreject? y/n')
boolean = input()
if boolean == 'y':
print('do you want to save this autoreject cleaned data? y/n')
boolean = input()
no_of_trials = dataset.X.shape[0]
'''
dictionary contains information about each trial
each trial number i is mapped to a list containing the cleaned epochs given by autoreject,boolena array indicating which epoch was dropped,and
a percentage indicating epochs dropped out of total, valence ,arousal rating for trial and image_id
'''
#running autoreject for each trial data
'''
autoreject divides each trial data into 5 epochs of 1 sec segment i.e 640 timepoints into 128 timepoints per epochs,and runs algo on each
epoch,rejecting epochs based on estimated parameters
'''
clean_epochs ={}
for trial in range(no_of_trials):
print('trial no',trial)
temp = filtered_data[trial]
raw_object = autoreject_custom.raw_object_creation(temp,dataset.channels,dataset.ch_type,dataset.freq)
print(raw_object.get_data().shape)
epoch = autoreject_custom.epoch_object_creation(raw_object)
print(epoch.get_data().shape)
#print('epochs shape',epoch.get_data().shape)
clean_epoch,reject_log = autoreject_custom.autoreject_algo(epoch,n_interpolates = np.array([1, 4, 32]),consensus_percs = np.linspace(0, 1.0, 11))
#clean_epochs.append([clean_epoch,reject_log])
if clean_epoch.drop_log_stats() == 0:
clean_epochs[trial+1] = [clean_epoch.get_data(),reject_log.bad_epochs,clean_epoch.drop_log_stats(),dataset.Y[trial],dataset.Z[trial][1]]
if boolean == 'y':
with open('preprocessed_data/oasis/with_autoreject.p','wb') as file:
pickle.dump(clean_epochs,file,protocol=pickle.HIGHEST_PROTOCOL)
# In[ ]:
def __main__():
driver_code()
# In[ ]:
__main__()
+106
Ver Arquivo
@@ -0,0 +1,106 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
from google.colab import drive
drive.mount('/gdrive',force_remount = True)
# In[ ]:
get_ipython().run_line_magic('cd', '../gdrive/MyDrive/emotion_recognition_project/')
# # Incremental Learning for OASIS
# **Arguments**
#
#
# ---
#
# save = 'y/n'
#
# ---
#
#
#
#
#
# Eg. if you want to run model on OASIS dataset,don't want to save plots, with
# command would be
#
# !python incremental_learning_OASIS.py n
#
#
#
#
#
# In[ ]:
get_ipython().system('python incremental_learning_oasis.py n')
#
# # Incremental Learning for DEAP
# **Arguments**
#
#
# ---
#
# save = 'y/n'
#
# ---
#
#
#
#
#
# Eg. if you want to run model on DEAP dataset,don't want to save plots, with
# command would be
#
# !python incremental_learning_DEAP.py n
# In[ ]:
get_ipython().system('python incremental_learning_deap.py n ')
#
# # Incremental Learning for DREAMER
# **Arguments**
#
#
# ---
#
# save = 'y/n'
#
# ---
#
#
#
#
#
# Eg. if you want to run model on DEAP dataset,don't want to save plots, with
# command would be
#
# !python incremental_learning_DREAMER.py n
# In[ ]:
get_ipython().system('python incremental_learning_dreamer.py n ')
# In[ ]:
+46
Ver Arquivo
@@ -0,0 +1,46 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
# -*- coding: utf-8 -*-
"""utils.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1Z2e7rxy64W9WIIcEfH1vyzfVdMNIK8Om
"""
import numpy as np
def epoch_data(X,Y, window, stride, sfreq):
# Fucntion to segment the dataset into epochs
# Parameters :-
# X :- The input EEG signal in the format of channels*timepoints*trials
# Y :- The values for VALD (depending on the dataset) given by the users
# window :- length of the epoch in seconds
# stride :- stride of the sliding window in seconds
# sfreq :- sampling frequency of the EEG signal
(channels,timepoints,trials )= X.shape
X = np.reshape(X,(trials,channels,timepoints))
segment = int(window*sfreq)
step = int(stride*sfreq)
epochPerTrial = int((timepoints-segment)/step + 1)
count = 0
X_new = np.empty((trials*epochPerTrial,channels,segment))
Y_new = np.empty((trials*epochPerTrial,2))
for trial in range(trials):
for epoch in range(epochPerTrial):
X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
Y_new[count,:] = Y[trial,:2]
count+=1
(trials,channels,timepoints) = X_new.shape
X_new = np.reshape(X_new,(channels,timepoints,trials))
return X_new,Y_new