Added Scripts

2021-09-18 23:21:23 +05:30
commit d142115fe6
@@ -0,0 +1,386 @@
 # -*- coding: utf-8 -*-
 """8.5_cross_validate.ipynb
 Automatically generated by Colaboratory.
 Original file is located at
    https://colab.research.google.com/drive/1qEkrFcZ9lLqd6gNgxX8Y8QoXlOhH3wXC
 #Leave One Subject Out Cross Validation
 * DREAMER => Shape After Loading
 X.shape= (414, 58240, 14)  Y.shape= (414, 2)  Z.shape= (414, 2)
 * DEAP => Shape After Loading
 X.shape= (1280, 40, 8064)  Y.shape= (1280, 2)  Z.shape= (1280, 2)
 * OASIS => Shape After Loading
 X.shape= (600, 640, 14)  Y.shape= (600, 2)  Z.shape= (600, 2)
 * i.e. OASIS and DEAP are of form X = (rec, timepoints,channels)
 * reshaping X to (rec, channels,timepoints)
 makes sense now
 """
 !nvidia-smi
 """#RAPIDS Package Installation"""
 # Install RAPIDS
 !git clone https://github.com/rapidsai/rapidsai-csp-utils.git
 !bash rapidsai-csp-utils/colab/rapids-colab.sh stable
 import sys, os
 dist_package_index = sys.path.index('/usr/local/lib/python3.7/dist-packages')
 sys.path = sys.path[:dist_package_index] + ['/usr/local/lib/python3.7/site-packages'] + sys.path[dist_package_index:]
 sys.path
 exec(open('rapidsai-csp-utils/colab/update_modules.py').read(), globals())
 import cuml
 """-----------------------------------------------------------------------------------------------------------------------------------------------------"""
 from google.colab import drive
 drive.mount('/gdrive',force_remount=True)
 # Commented out IPython magic to ensure Python compatibility.
 # %cd /gdrive/MyDrive/Project_DEAP/4.1.2021/
 ################################################################################
 import TopNByFSMethods
 import TopNByClassifier
 import EpochedFeatures
 from args_eeg import args as my_args
 import ImportUtils
 from ImportUtils import *
 from TopNByFSMethods import *
 from  TopNByClassifier import *
 from EpochedFeatures import *
 from args_eeg import args as my_args
 from ImportUtils import *
 from TopNByFSMethods import *
 from TopNByClassifier import *
 from EpochedFeatures import *
 from sklearn.svm import SVC
 from DEAP_scripts.ImportUtils import *
 from DEAP_scripts.TopNByFSMethods import *
 from  DEAP_scripts.TopNByClassifier import *
 from DEAP_scripts.EpochedFeatures import *
 from DEAP_scripts.args_eeg import args as my_args
 from sklearn.svm import SVC
 ################################################################################
 mean_rmse = []
 std_rmse = []
 np.random.seed(42)
 def cross_validate(dataset, window, stride, sfreq, label, best_features_list):
 # Parameters :-
          # dataset :- Name of the Dataset
          # window :- Length of the sliding window in seconds
          # stride :- Stride of the sliding window in seconds
          # sfreq :- sampling frequency of the EEG dataset
          # best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
    pwd = os.getcwd()
    fs = sfreq
    featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
    ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
    Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
    #load saved epoched features
    featuresDict = None
    featuresDict = loadFeaturesDict(dataset)
    # pop out not best features
    for k in list(featuresDict.keys()):
        if k not in best_features_list:
            featuresDict.pop(k)
    featuresList = list(featuresDict.keys())
    print(featuresList)
    #make feature matrix with select best features
    featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
    for key,value in featuresDict.items():
        featureMatrix = np.append(featureMatrix,value,axis=0)
    #remove NaN features
    if np.isnan(featureMatrix).any():
        featureMatrix = np.nan_to_num(featureMatrix,nan=0)
    #set datatype of feature matrix
    featureMatrix = featureMatrix.astype('float64')
    #transpose feature matrix to prepare X
    X = pd.DataFrame(featureMatrix.T)
    #replace infinity with NaN value and fill it with zero
    X = X.replace([np.inf, -np.inf], np.nan)
    X = X.fillna(0)
    X = X.astype(np.float32)
    #convert ndarray to dataframe
    Y_epoch = pd.DataFrame(Y_epoch)
    print("Number of feature vectors in X = ", X.shape[1])
    print("X.shape = " ,X.shape)
    #***********************************************************
    #Leave-one-subject-out-CV
    #number of folds = numbParticipants
    numbParticipants = 0
    numbRecordings = 0
    if(dataset == 'DEAP'):
        numbParticipants = 32
        numbRecordings = 40
    elif(dataset == 'DREAMER'):
        # Dreamer dataset has 23 subjects, each subject was shown 18 videos 
        numbParticipants = 23
        numbRecordings = 18
    elif(dataset == 'OASIS'):
        numbParticipants = 15
        numbRecordings = 40
    #numbEpochs
    numbEpochs = X.shape[0]//(numbParticipants*numbRecordings)
    print(X.shape[0])
    print("numbParticipants = ", numbParticipants)
    print("numbRecordings = " , numbRecordings)
    print("numbEpochs = ", numbEpochs)
    pass
    print(type(X))
    print(type(Y_epoch))
    cv_rmse = []
    for i in range(numbParticipants):
        s = i*numbRecordings*numbEpochs
        e = (i+1)*numbRecordings*numbEpochs
        X_test = copy.deepcopy(X.iloc[s:e, :])
        y_test = copy.deepcopy(Y_epoch.iloc[s:e, label])
        X_train = copy.deepcopy(X.iloc[:s, :])
        X_train = np.append(X_train, X.iloc[e:, :],axis=0)
        y_train = copy.deepcopy(Y_epoch.iloc[:s, label])
        y_train = np.append(y_train, Y_epoch.iloc[e:, label],axis=0)
        print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
        clf = RandomForestRegressor()
        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)
        rmse = mean_squared_error(y_test, y_predict,squared=False)
        print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
        cv_rmse.append(rmse)
    print(cv_rmse)
    print("Mean Cross-validation RMSE  = ", np.mean(cv_rmse))
    mean_rmse.append(np.mean(cv_rmse))
    print("Standard Deviation of Cross-validated RMSE = ", np.std(cv_rmse))
    std_rmse.append(np.std(cv_rmse))
    #pickle list
    with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'wb') as f:
        pickle.dump(cv_rmse, f)
    fig = plt.gcf()
    fig.set_size_inches(40, 20)
    # X = pd.DataFrame([x for x in range(1,) ])
    plt.rcParams.update({'font.size': 40})
    plt.xlabel('Partipant No.')
    plt.ylabel('RMSE')
    plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
    plt.tight_layout()
    plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/CV_{}_{}.svg".format(dataset, label), bbox_inches='tight', dpi=500)
    plt.show()
    plt.clf()
 def main(dataset, window, stride, sfreq, model, label, approach, ml_algo, top, fs_method, best_features_list):
  # Parameters :-
          # dataset :- Name of the Dataset
          # window :- Length of the sliding window in seconds
          # stride :- Stride of the sliding window in seconds
          # sfreq :- sampling frequency of the EEG dataset
          # best_features_list :- Featrue list after performing top electrode and feature analysis for various datasets
    print(locals())
    pwd = os.getcwd()
    # getEpochedFeatures(dataset, window, stride, sfreq, label)
    cross_validate(dataset, window, stride, sfreq, label, best_features_list)
    return 
    if(top == "e"):
        clf = RandomForestRegressor()
        topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
        topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
        topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
        plt.legend(["Method A","Method B", "Method C"])
        if(label == 1):
            plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            # plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            plt.show()
            plt.clf()
        else:
            plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            # plt.savefig(pwd + "/" + dataset + "/plots/" + "ElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            plt.show()
            plt.clf()    
    elif(top == "f"):
        clf = RandomForestRegressor()
        topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
        topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
        topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
        if(label == 1):
            plt.legend(["Method A","Method B", "Method C"])
            plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            plt.show()
            plt.clf()
        else:
            plt.legend(["Method A","Method B", "Method C"])
            plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            plt.show()
            plt.clf()
 if __name__ == '__main__':
    #DREAMER
    #VALENCE
    best_features_list = ['HjorthMob','HjorthComp','stdDev','bandPwr_theta','ShannonRes_gamma','bandPwr_beta']    
    main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
    #AROUSAL
    best_features_list = ['HjorthMob','ShannonRes_gamma','HjorthComp','stdDev','bandPwr_gamma', 'bandPwr_theta']
    main(dataset='DREAMER', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
    #DEAP
    #VALENCE
    best_features_list = ['bandPwr_gamma','ShannonRes_gamma','ShannonRes_beta','rasm_gamma','dasm_gamma','bandPwr_beta']  
    main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
    #AROUSAL
    best_features_list = ['HjorthMob','HjorthComp','stdDev','ShannonRes_gamma','bandPwr_beta','bandPwr_theta','ShannonRes_beta','dasm_beta']
    main(dataset='DEAP', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
    #OASIS
    #VALENCE
    best_features_list = ['HjorthMob','stdDev','HjorthComp']
    main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 0,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
    #AROUSAL    
    best_features_list = ['HjorthMob']
    main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
    # print(len(best_features_list))
    # main(dataset='OASIS', window=1, stride=1, sfreq=128, model='rfr', label= 1,approach='byfs', ml_algo='regression', top='f', fs_method='SelectKBest', best_features_list = best_features_list)
    # --dataset DREAMER --window 1 --stride 1 --sfreq 128 --model rfr --label 0 --approach byfs --ml_algo regression --top f --fs_method SelectKBest
 """#MINIMUM RMSE DURING CROSS-VALIDATION 6-6-2021"""
 # Commented out IPython magic to ensure Python compatibility.
 import matplotlib.pyplot as plt
 # %matplotlib inline
 import seaborn as sns
 import copy
 import os
 from scipy import io,signal
 import numpy as np
 import pandas as pd
 import pickle
 #{Dataset_Name}{0/1}_cv_rmse.pkl :- 0 is for Valence and 1 is for Arousal
 pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
 dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
 label = [0,1,0,1,0,1]
 min_cv_rmse = []
 for i in range(len(pl)):
    cv_rmse = None
    with open(pl[i], 'rb') as f:
        cv_rmse = pickle.load(f)
    min_cv_rmse.append(min(cv_rmse))
 print(min_cv_rmse)
 """feature_select_main.py"""
 !pip install dit
 !pip install pyinform
 from ImportUtils import *
 from args_eeg import args as my_args
 """#Plot pickled results"""
 # Commented out IPython magic to ensure Python compatibility.
 import matplotlib.pyplot as plt
 # %matplotlib inline
 import seaborn as sns
 import copy
 import os
 from scipy import io,signal
 import numpy as np
 import pandas as pd
 import pickle
 # with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/{}{}_cv_rmse.pkl'.format(dataset,label), 'rb') as f:
 #     pickle.dump(cv_rmse, f)
 pl = ['DREAMER0_cv_rmse.pkl', 'DREAMER1_cv_rmse.pkl', 'DEAP0_cv_rmse.pkl', 'DEAP1_cv_rmse.pkl', 'OASIS0_cv_rmse.pkl', 'OASIS1_cv_rmse.pkl']
 dataset = ['DREAMER', 'DREAMER', 'DEAP', 'DEAP','OASIS','OASIS']
 label = [0,1,0,1,0,1]
 for i in range(len(pl)):
    cv_rmse = None
    with open(pl[i], 'rb') as f:
        cv_rmse = pickle.load(f)
    fig = plt.gcf()
    fig.set_size_inches(40, 20)
    # X = pd.DataFrame([x for x in range(1,) ])
    plt.rcParams.update({'font.size': 50})
    plt.xlabel('Partipant No.')
    plt.ylabel('RMSE')
    plt.plot([str(x+1) for x in range(len(cv_rmse))], cv_rmse, linestyle='-', marker='o', color='b', markerfacecolor='r', linewidth=2.0, markersize = 15)
    plt.tight_layout()
    plt.savefig("/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_stats/CV_{}_{}.svg".format(dataset[i], label[i]), bbox_inches='tight', dpi=500)
    plt.show()
    plt.clf()
 with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/mean_cv_rmse.pkl', 'wb') as f:
    pickle.dump(mean_rmse, f)
 with open('/gdrive/MyDrive/Project_DEAP/4.1.2021/std_cv_rmse.pkl', 'wb') as f:
    pickle.dump(std_rmse, f)
 df = pd.DataFrame()
 df['Dataset-Label'] = ['DREAMER-V','DREAMER-A','DEAP-V','DEAP-A','OASIS-V','OASIS-A']
 df['Mean RMSE'] = mean_rmse
 df['Std Dev RMSE'] = std_rmse
 df.to_csv('/gdrive/MyDrive/Project_DEAP/4.1.2021/cv_rmse_stats.csv')
@@ -0,0 +1,644 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 import bisect
 import numpy as np
 import pandas as pd
 import pywt
 from scipy import stats, signal, integrate
 from dit.other import tsallis_entropy
 import dit
 import librosa
 import statsmodels.api as sm
 import itertools
 from pyinform import mutualinfo
 from statsmodels import tsa
 from sklearn.metrics import mutual_info_score
 import numpy as np
 from scipy import signal,integrate
 from sklearn.metrics.cluster import normalized_mutual_info_score as normed_mutual_info 
 ################################################
 #	Auxiliary Functions
 ################################################
 ##########
 # Filter the eegData, midpass filter 
 #	eegData: 3D np array [chans x ms x epochs] 
 def filt_data(eegData, lowcut, highcut, fs, order=7):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = signal.butter(order, [low, high], btype='band')
    filt_eegData = signal.lfilter(b, a, eegData, axis = 1)
    return filt_eegData
 #########
 # remove short bursts / spikes 
 def fcnRemoveShortEvents(z,n):
    for chan in range(z.shape[0]):
        # check for too-short suppressions
        ct=0
        i0=1
        i1=1 
        for i in range(2,len(z[chan,:])):
            if z[chan,i]==z[chan,i-1]:
                ct=ct+1
                i1=i
            else:
                if ct<n:
                    z[chan,i0:i1] = 0
                    z[chan,i1] = 0 #nasty little bug
                ct=0
                i0=i
                i1=i
        if z[chan,0] == 1 and z[chan,1] == 0:
            z[chan,0] = 0
    return z
 ##########
 # Find interval of consistent values in binary 1D numpy array
 def get_intervals(A,B,endIdx=500):
    # This function gives you intervals (a1,b1), (a2,b3) for every a in A=[a1,a2,a3,..]
    # and the smallest element in b that is larger than a.
    intervals = []
    for ii,A_idx_lst in enumerate(A):
        B_idx_lst = [bisect.bisect_left(B[ii], idx) for idx in A_idx_lst]
        chan_intervals = []
        for jj,idx_l in enumerate(B_idx_lst):
            if idx_l == len(B[ii]):
                chan_intervals.append((A_idx_lst[jj],endIdx))
            else:
                chan_intervals.append((A_idx_lst[jj],B[ii][idx_l]))
        intervals.append(chan_intervals)
        # previous code already takes care of the [] possibility
        #if B_idx_lst == []:
        #    intervals.append([])
    return intervals
 ##########
 # Detect bursts and supressions in eeg data
 def burst_supression_detection(x,fs,suppression_threshold = 10):
 	'''
 	# DETECT EMG ARTIFACTS.
 	nyq = 0.5 * fs
 	low = low / nyq
 	high = high / nyq
 	be, ae = signal.butter(order, [low, high], btype='band')
 	'''
 	# CALCULATE ENVELOPE
 	e = abs(signal.hilbert(x,axis=1));
 	# same as smooth(e,Fs/4) in MATLAB, apply 1/2 second smoothing
 	ME = np.array([np.convolve(el,np.ones(int(fs/4))/(fs/4),'same') for el in e.tolist()])
 	e = ME
 	# DETECT SUPRESSIONS
 	# apply threshold -- 10uv
 	z = (ME<suppression_threshold)
 	# remove too-short suppression segments
 	z = fcnRemoveShortEvents(z,fs/2)
 	# remove too-short burst segments
 	b = fcnRemoveShortEvents(1-z,fs/2)
 	z = 1-b
 	went_high = [np.where(np.array(chD[:-1]) < np.array(chD[1:]))[0].tolist() for chD in z.tolist()]
 	went_low = [np.where(np.array(chD[:-1]) > np.array(chD[1:]))[0].tolist() for chD in z.tolist()]
 	bursts = get_intervals(went_high,went_low)
 	supressions = get_intervals(went_low,went_high)
 	return bursts,supressions
 ##########
 # Coherence in the Delta Band
 def CoherenceDelta(eegData, i, j, fs=100):
    nfft=eegData.shape[1]
    f, Cxy = signal.coherence(eegData[i,:,:], eegData[j,:,:], fs=fs, nfft=nfft, axis=0)#, window=np.hanning(nfft))
    out = np.mean(Cxy[np.all([f >= 0.5, f<=4], axis=0)], axis=0)
    return out
 ##########
 # correlation across channels
 def PhaseLagIndex(eegData, i, j):
    hxi = ss.hilbert(eegData[i,:,:])
    hxj = ss.hilbert(eegData[j,:,:])
    # calculating the INSTANTANEOUS PHASE
    inst_phasei = np.arctan(np.angle(hxi))
    inst_phasej = np.arctan(np.angle(hxj))
    out = np.abs(np.mean(np.sign(inst_phasej - inst_phasei), axis=0))
    return out
 ##########
 # Cross Correlation
 def crossCorrelation(eegData, i, j):
    out = np.zeros(eegData.shape[2])
    for epoch in range(eegData.shape[2]):
        ccor = np.correlate(eegData[i,:,epoch], eegData[j,:,epoch], mode="full")
        absccor = np.abs(ccor)
        out[epoch] = (np.max(absccor) - np.mean(absccor)) / np.std(absccor)
    return out
 ##########
 # Auxilary Cross-correlation Lag
 def corrCorrLagAux(eegData,ii,jj,Fs=100):
    out = np.zeros(eegData.shape[2])
    lagCorr = []
    for lag in range(0,eegData.shape[1],int(0.2*Fs)):
        tmp = eegData.copy()
        tmp[jj,:,:] = np.roll(tmp[jj,:,:], lag, axis=0)
        lagCorr.append(CrossCorrelation(tmp, ii, jj, Fs))
    return np.argmax(lagCorr,axis=0)
 ################################################
 #	bandpower Functions
 ################################################
 ##########
 # compute the bandpower (area under segment (from fband[0] to fband[1] in Hz)
 # of curve in freqency domain) of data, at sampling frequency of Fs (100 ussually)
 def bandpower(data, fs, fband):
    freqs, powers = periodogram(data, fs)
    idx_min = np.argmax(freqs > fband[0]) - 1
    idx_max = np.argmax(freqs > fband[1]) - 1
    idx_delta = np.zeros(dtype=bool, shape=freqs.shape)
    idx_delta[idx_min:idx_max] = True
    bpower = simps(powers[idx_delta], freqs[idx_delta])
    return bpower
 ##########
 # computes the same thing as vecbandpower but with a loop
 def pfvecbandpower(data, fs, fband):
    bpowers = np.zeros((data.shape[0], data.shape[2]))
    for i in range(data.shape[0]):
        freqs, powers = periodogram(data[i, :, :], fs, axis=0)
        idx_min = np.argmax(freqs > fband[0]) - 1
        idx_max = np.argmax(freqs > fband[1]) - 1
        idx_delta = np.zeros(dtype=bool, shape=freqs.shape)
        idx_delta[idx_min:idx_max] = True
        bpower = simps(powers[idx_delta, :], freqs[idx_delta], axis=0)
        bpowers[i, :] = bpower
    return bpowers
 ################################################
 #	Complexity features
 ################################################    
 ##########
 # Extract the Shannon Entropy
 # threshold the signal and make it discrete, normalize it and then compute entropy
 def shannonEntropy(eegData, bin_min, bin_max, binWidth):
    H = np.zeros((eegData.shape[0], eegData.shape[2]))
    for chan in range(H.shape[0]):
        for epoch in range(H.shape[1]):
            counts, binCenters = np.histogram(eegData[chan,:,epoch], bins=np.arange(bin_min+1, bin_max, binWidth))
            nz = counts > 0
            prob = counts[nz] / np.sum(counts[nz])
            H[chan, epoch] = -np.dot(prob, np.log2(prob/binWidth))
    return H
 ##########
 # Extract the tsalis Entropy
 def tsalisEntropy(eegData, bin_min, bin_max, binWidth, orders = [1]):
    H = [np.zeros((eegData.shape[0], eegData.shape[2]))]*len(orders)
    for chan in range(H[0].shape[0]):
        for epoch in range(H[0].shape[1]):
            counts, bins = np.histogram(eegData[chan,:,epoch], bins=np.arange(-200+1, 200, 2))
            dist = dit.Distribution([str(bc).zfill(5) for bc in bins[:-1]],counts/sum(counts))
            for ii,order in enumerate(orders):
                H[ii][chan,epoch] = tsallis_entropy(dist,order)
    return H
 ##########
 # Cepstrum Coefficients (n=2)
 def mfcc(eegData,fs,order=2):
    H = np.zeros((eegData.shape[0], eegData.shape[2],order))
    for chan in range(H.shape[0]):
        for epoch in range(H.shape[1]):
            H[chan, epoch, : ] = librosa.feature.mfcc(np.asfortranarray(eegData[chan,:,epoch]), sr=fs)[0:order].T
    return H
 ##########
 # Lyapunov exponent
 def lyapunov(eegData):
    return np.mean(np.log(np.abs(np.gradient(eegData,axis=1))),axis=1)
 ##########
 # Fractal Embedding Dimension
 # From pyrem: packadge for sleep scoring from EEG data
 # https://github.com/gilestrolab/pyrem/blob/master/src/pyrem/univariate.py
 def hFD(a, k_max): #Higuchi FD
    L = []
    x = []
    N = len(a)
    for k in range(1,k_max):
        Lk = 0
        for m in range(0,k):
            #we pregenerate all idxs
            idxs = np.arange(1,int(np.floor((N-m)/k)),dtype=np.int32)
            Lmk = np.sum(np.abs(a[m+idxs*k] - a[m+k*(idxs-1)]))
            Lmk = (Lmk*(N - 1)/(((N - m)/ k)* k)) / k
            Lk += Lmk
        L.append(np.log(Lk/(m+1)))
        x.append([np.log(1.0/ k), 1])
    (p, r1, r2, s)=np.linalg.lstsq(x, L)
    return p[0]
 ##########
 # Hjorth Mobility
 # Hjorth Complexity
 # variance = mean(signal^2) iff mean(signal)=0
 # which it is be because I normalized the signal
 # Assuming signals have mean 0
 # Mobility = sqrt( mean(dx^2) / mean(x^2) )
 def hjorthParameters(xV):
    dxV = np.diff(xV, axis=1)
    ddxV = np.diff(dxV, axis=1)
    mx2 = np.mean(np.square(xV), axis=1)
    mdx2 = np.mean(np.square(dxV), axis=1)
    mddx2 = np.mean(np.square(ddxV), axis=1)
    mob = mdx2 / mx2
    complexity = np.sqrt((mddx2 / mdx2) / mob)
    mobility = np.sqrt(mob)
    # PLEASE NOTE that Mohammad did NOT ACTUALLY use hjorth complexity,
    # in the matlab code for hjorth complexity subtraction by mob not division was used 
    return mobility, complexity
 ##########
 # false nearest neighbor descriptor
 def falseNearestNeighbor(eegData, fast=True):
    # Average Mutual Information
    # There exist good arguments that if the time delayed mutual
    # information exhibits a marked minimum at a certain value of tex2html_wrap_inline6553,
    # then this is a good candidate for a reasonable time delay.
    npts = 1000   # not sure about this?
    maxdims = 50
    max_delay = 2 # max_delay = 200  # TODO: need to use 200, but also need to speed this up
    distance_thresh = 0.5
    out = np.zeros((eegData.shape[0], eegData.shape[2]))
    for chan in range(eegData.shape[0]):
        for epoch in range(eegData.shape[2]):
            if fast:
                out[chan, epoch] = 0
            else:
                cur_eegData = eegData[chan, :, epoch]
                lagidx = 0  # we are looking for the index of the lag that makes the signal maximally uncorrelated to the original
                minNMI = 1  # normed_mutual_info is from 1 (perfectly correlated) to 0 (not at all correlated) 
                for lag in range(1, max_delay):
                x = cur_eegData[:-lag]
                xlag = cur_eegData[lag:]
                convert float data into histogram bins
                nbins = int(np.floor(1 + np.log2(len(x)) + 0.5))
                x_discrete = np.histogram(x, bins=nbins)[0]
                xlag_discrete = np.histogram(xlag, bins=nbins)[0]
                cNMI = normed_mutual_info(x_discrete, xlag_discrete)
                if cNMI < minNMI:
                    minNMI = cNMI
                    lagidx = lag
                # nearest neighbors part
                knn = int(max(2, 6*lagidx))  # heuristic (number of nearest neighbors to look up)
                m = 1 # lagidx + 1
                # y is the embedded version of the signal
                y = np.zeros((maxdims+1, npts))
                for d in range(maxdims+1):
                    tmp = cur_eegData[d*m:d*m + npts]
                    y[d, :tmp.shape[0]] = tmp
                nnd = np.ones((npts, maxdims))
                nnz = np.zeros((npts, maxdims))
                # see where it tends to settle
                for d in range(1, maxdims):
                    for k in range(0, npts):
                        # get the distances to all points in the window (distance given embedding dimension)
                        dists = []
                        for nextpt in range(1, knn+1):
                            if k+nextpt < npts:
                                dists.append(np.linalg.norm(y[:d, k] - y[:d, k+nextpt]))
                        if len(dists) > 0:
                            minIdx = np.argmin(dists)
                            if dists[minIdx] == 0:
                                dists[minIdx] = 0.0000001  # essentially 0 just silence the error
                            nnd[k, d-1] = dists[minIdx]
                            nnz[k, d-1] = np.abs( y[d+1, k] - y[d+1, minIdx+1+k] )
                # aggregate results
                mindim = np.mean(nnz/nnd > distance_thresh, axis=0) < 0.1
                # get the index of the first occurence of the value true
                # (a 1 in the binary representation of true and false)
                out[chan, epoch] = np.argmax(mindim)
    return out 
 ##########
 # ARMA coefficients
 def arma(eegData,order=2):
    H = np.zeros((eegData.shape[0], eegData.shape[2],order))
    for chan in range(H.shape[0]):
        for epoch in range(H.shape[1]):
            arma_mod = sm.tsa.ARMA(eegData[chan,:,epoch], order=(order,order))
            arma_res = arma_mod.fit(trend='nc', disp=-1)
            H[chan, epoch, : ] = arma_res.arparams
    return H
 ################################################
 #	Continuity features
 ################################################  
 ##########
 # median frequency
 def medianFreq(eegData,fs):
    H = np.zeros((eegData.shape[0], eegData.shape[2]))
    for chan in range(H.shape[0]):
        freqs, powers = signal.periodogram(eegData[chan, :, :], fs, axis=0)
        H[chan,:] = freqs[np.argsort(powers,axis=0)[len(powers)//2]]
    return H
 ##########
 # calculate band power
 def bandPower(eegData, lowcut, highcut, fs):
 	eegData_band = filt_data(eegData, lowcut, highcut, fs, order=7)
 	freqs, powers = signal.periodogram(eegData_band, fs, axis=1)
 	bandPwr = np.mean(powers,axis=1)
 	return bandPwr
 ##########
 # numberOfSpikes    
 def spikeNum(eegData,minNumSamples=7,stdAway = 3):
    H = np.zeros((eegData.shape[0], eegData.shape[2]))
    for chan in range(H.shape[0]):
        for epoch in range(H.shape[1]):
            mean = np.mean(eegData[chan, :, epoch])
            std = np.std(eegData[chan,:,epoch],axis=1)
            H[chan,epoch] = len(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0])
    return H
 ##########    
 # Standard Deviation
 def eegStd(eegData):
 	std_res = np.std(eegData,axis=1)
 	return std_res
 ##########
 # α/δ Ratio
 def eegRatio(eegData,fs):
 	# alpha (8–12 Hz)
 	eegData_alpha = filt_data(eegData, 8, 12, fs)
 	# delta (0.5–4 Hz)
 	eegData_delta = filt_data(eegData, 0.5, 4, fs)
 	# calculate the power
 	powers_alpha = bandPower(eegData, 8, 12, fs)
 	powers_delta = bandPower(eegData, 0.5, 4, fs)
 	ratio_res = np.sum(powers_alpha,axis=0) / np.sum(powers_delta,axis=0)
 	return np.expand_dims(x, axis=0)
 ###########
 # Regularity (burst-suppression)
 # Regularity of eeg
 # filter with a window of 0.5 seconds to create a nonnegative smooth signal.
 # In this technique, we first squared the signal and applied a moving-average
 # The window length of the moving average was set at 0.5 seconds.
 def eegRegularity(eegData, Fs=100):
    in_x = np.square(eegData)  # square signal
    num_wts = Fs//2  # find the filter length in samples - we want 0.5 seconds.
    q = signal.lfilter(np.ones(num_wts) / num_wts, 1, in_x, axis=1)
    q = -np.sort(-q, axis=1) # descending sort on smooth signal
    N = q.shape[1]
    u2 = np.square(np.arange(1, N+1))
    # COMPUTE THE Regularity
    # dot each 5min epoch with the quadratic data points and then normalize by the size of the dotted things    
    reg = np.sqrt( np.einsum('ijk,j->ik', q, u2) / (np.sum(q, axis=1)*(N**2)/3) )
    return reg
 ###########
 # Voltage < (5μ, 10μ, 20μ)
 def eegVoltage(eegData,voltage=20):
 	eegFilt = eegData.copy()
 	eegFilt[abs(eegFilt) > voltage] = np.nan
 	volt_res = np.nanmean(eegFilt,axis=1)
 	return volt_res
 ##########
 # Diffuse Slowing
 # look for diffuse slowing (bandpower max from frequency domain integral)
 # repeated integration of a huge tensor is really expensive
 def diffuseSlowing(eegData, Fs=100, fast=True):
    maxBP = np.zeros((eegData.shape[0], eegData.shape[2]))
    idx = np.zeros((eegData.shape[0], eegData.shape[2]))
    if fast:
        return idx
    for j in range(1, Fs//2):
        print("BP", j)
        cbp = bandpower(eegData, Fs, [j-1, j])
        biggerCIdx = cbp > maxBP
        idx[biggerCIdx] = j
        maxBP[biggerCIdx] = cbp[biggerCIdx]
    return (idx < 8)
 ##########
 # Spikes
 def spikeNum(eegData,minNumSamples=7,stdAway = 3):
    H = np.zeros((eegData.shape[0], eegData.shape[2]))
    for chan in range(H.shape[0]):
        for epoch in range(H.shape[1]):
            mean = np.mean(eegData[chan, :, epoch])
            std = np.std(eegData[chan,:,epoch])
            H[chan,epoch] = len(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0])
    return H
 ##########
 # Delta Burst after spike
 def burstAfterSpike(eegData,eegData_subband,minNumSamples=7,stdAway = 3):
    H = np.zeros((eegData.shape[0], eegData.shape[2]))
    for chan in range(H.shape[0]):
        for epoch in range(H.shape[1]):
            preBurst = 0
            postBurst = 0
            mean = np.mean(eegData[chan, :, epoch])
            std = np.std(eegData[chan,:,epoch])
            idxList = signal.find_peaks(abs(eegData[chan,:,epoch]-mean), stdAway*std,epoch,width=minNumSamples)[0]
            for idx in idxList:
                preBurst += np.mean(eegData_subband[chan,idx-7:idx-1,epoch])
                postBurst += np.mean(eegData_subband[chan,idx+1:idx+7,epoch])
            H[chan,epoch] = postBurst - preBurst
    return H
 ##########
 # Sharp spike
 def shortSpikeNum(eegData,minNumSamples=7,stdAway = 3):
    H = np.zeros((eegData.shape[0], eegData.shape[2]))
    for chan in range(H.shape[0]):
        for epoch in range(H.shape[1]):
            mean = np.mean(eegData[chan, :, epoch])
            std = np.std(eegData[chan,:,epoch])
            longSpikes = set(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=7)[0])
            shortSpikes = set(signal.find_peaks(abs(eegData[chan,:,epoch]-mean), 3*std,epoch,width=1)[0])
            H[chan,epoch] = len(shortSpikes.difference(longSpikes))
    return H
 ##########
 # Number of Bursts
 def numBursts(eegData,fs):
 	bursts = []
 	supressions = []
 	for epoch in range(eegData.shape[2]):
 		epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49)
 		bursts.append(epochBurst)
 		supressions.append(epochSupressions)
 	# Number of Bursts
 	numBursts_res = np.zeros((eegData.shape[0], eegData.shape[2]))
 	for chan in range(numBursts_res.shape[0]):
 		for epoch in range(numBursts_res.shape[1]):
 			numBursts_res[chan,epoch] = len(bursts[epoch][chan])
 	return numBursts_res
 ##########
 # Burst length μ and σ
 def burstLengthStats(eegData,fs):
 	bursts = []
 	supressions = []
 	for epoch in range(eegData.shape[2]):
 		epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49)
 		bursts.append(epochBurst)
 		supressions.append(epochSupressions)
 	# Number of Bursts
 	burstMean_res = np.zeros((eegData.shape[0], eegData.shape[2]))
 	burstStd_res = np.zeros((eegData.shape[0], eegData.shape[2]))
 	for chan in range(burstMean_res.shape[0]):
 		for epoch in range(burstMean_res.shape[1]):
 			burstMean_res[chan,epoch] = np.mean([burst[1]-burst[0] for burst in bursts[epoch][chan]])
 			burstStd_res[chan,epoch] = np.std([burst[1]-burst[0] for burst in bursts[epoch][chan]])
 	burstMean_res = np.nan_to_num(burstMean_res)
 	burstStd_res = np.nan_to_num(burstStd_res)
 	return burstMean_res,burstStd_res
 ##########
 # Burst band powers (δ, α, θ, β, γ)
 def burstBandPowers(eegData, lowcut, highcut, fs, order=7):
 	band_burst_powers = np.zeros((eegData.shape[0], eegData.shape[2]))
 	bursts = []
 	supressions = []
 	for epoch in range(eegData.shape[2]):
 		epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=10)#,low=30,high=49)
 		bursts.append(epochBurst)
 		supressions.append(epochSupressions)
 	eegData_band = filt_data(eegData, lowcut, highcut, fs, order=7)
 	for epoch,epochBursts in enumerate(bursts):
 		for chan,chanBursts in enumerate(epochBursts):
 			epochPowers = []  
 			for burst in chanBursts:
 				if burst[1] == eegData.shape[1]:
 					burstData =  eegData_band[:,burst[0]:,epoch]
 				else:
 					burstData =  eegData_band[:,burst[0]:burst[1],epoch]
 				freqs, powers = signal.periodogram(burstData, fs, axis=1)
 				epochPowers.append(np.mean(powers,axis=1))
 			band_burst_powers[chan,epoch] = np.mean(epochPowers)	
 	return band_burst_powers
 ##########
 # Number of Suppressions
 def numSuppressions(eegData,fs,suppression_threshold=10):
 	bursts = []
 	supressions = []
 	for epoch in range(eegData.shape[2]):
 		epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=suppression_threshold)#,low=30,high=49)
 		bursts.append(epochBurst)
 		supressions.append(epochSupressions)
 	numSupprs_res = np.zeros((eegData.shape[0], eegData.shape[2]))
 	for chan in range(numSupprs_res.shape[0]):
 		for epoch in range(numSupprs_res.shape[1]):
 			numSupprs_res[chan,epoch] = len(supressions[epoch][chan])
 	return numSupprs_res
 ##########
 # Suppression length μ and σ
 def suppressionLengthStats(eegData,fs,suppression_threshold=10):
 	bursts = []
 	supressions = []
 	for epoch in range(eegData.shape[2]):
 		epochBurst,epochSupressions = burst_supression_detection(eegData[:,:,epoch],fs,suppression_threshold=suppression_threshold)#,low=30,high=49)
 		bursts.append(epochBurst)
 		supressions.append(epochSupressions)
 	supressionMean_res = np.zeros((eegData.shape[0], eegData.shape[2]))
 	supressionStd_res = np.zeros((eegData.shape[0], eegData.shape[2]))
 	for chan in range(supressionMean_res.shape[0]):
 		for epoch in range(supressionMean_res.shape[1]):
 			supressionMean_res[chan,epoch] = np.mean([suppr[1]-suppr[0] for suppr in supressions[epoch][chan]])
 			supressionStd_res[chan,epoch] = np.std([suppr[1]-suppr[0] for suppr in supressions[epoch][chan]])
 	supressionMean_res = np.nan_to_num(supressionMean_res)
 	supressionStd_res = np.nan_to_num(supressionStd_res)
 	return supressionMean_res, supressionStd_res
 ################################################
 #	Connectivity features
 ################################################
 ##########
 # Coherence - δ
 def coherence(eegData,fs):
 	coh_res = []
 	for ii, jj in itertools.combinations(range(eegData.shape[0]), 2):
 		coh_res.append(CoherenceDelta(eegData, ii, jj, fs=fs))
 	coh_res = np.array(coh_res)
 	return coh_res
 ##########
 # Mutual information
 def calculate2Chan_MI(eegData,ii,jj,bin_min=-200, bin_max=200, binWidth=2):
    H = np.zeros(eegData.shape[2])
    bins = np.arange(bin_min+1, bin_max, binWidth)
    for epoch in range(eegData.shape[2]):
        c_xy = np.histogram2d(eegData[ii,:,epoch],eegData[jj,:,epoch],bins)[0]
        H[epoch] = mutual_info_score(None, None, contingency=c_xy)
    return H
 ##########
 # Granger causality
 def calcGrangerCausality(eegData,ii,jj):
    H = np.zeros(eegData.shape[2])
    for epoch in range(eegData.shape[2]):
        X = np.vstack([eegData[ii,:,epoch],eegData[jj,:,epoch]]).T
        H[epoch] = tsa.stattools.grangercausalitytests(X, 1, addconst=True, verbose=False)[1][0]['ssr_ftest'][0]
    return H
 ##########
 # phase Lag Index
 def phaseLagIndex(eegData, i, j):
    hxi = ss.hilbert(eegData[i,:,:])
    hxj = ss.hilbert(eegData[j,:,:])
    # calculating the INSTANTANEOUS PHASE
    inst_phasei = np.arctan(np.angle(hxi))
    inst_phasej = np.arctan(np.angle(hxj))
    out = np.abs(np.mean(np.sign(inst_phasej - inst_phasei), axis=0))
    return out
 ##########
 # Cross-correlation Magnitude
 def crossCorrMag(eegData,ii,jj):
 	crossCorr_res = []
 	for ii, jj in itertools.combinations(range(eegData.shape[0]), 2):
 		crossCorr_res.append(crossCorrelation(eegData, ii, jj))
 	crossCorr_res = np.array(crossCorr_res)
 	return crossCorr_res
 ##########
 # Cross-correlation Lag
 def corrCorrLag(eegData,ii,jj,fs=100):
 	crossCorrLag_res = []
 	for ii, jj in itertools.combinations(range(eegData.shape[0]), 2):
 		crossCorrLag_res.append(corrCorrLag(eegData, ii, jj, fs))
 	crossCorrLag_res = np.array(crossCorrLag_res)
 	return crossCorrLag_res
@@ -0,0 +1,459 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 import ImportUtils
 import math 
 import EEGExtract as eeg
 from sklearn.model_selection import train_test_split
 import os
 import glob
 from scipy import io,signal
 import numpy as np
 import pandas as pd
 from sklearn import preprocessing
 import pickle
 from sklearn.metrics import mean_squared_error
 from sklearn.impute import SimpleImputer
 import matplotlib.pyplot as plt
 import seaborn as sns
 import copy
 import os
 # In[ ]:
 def select_channels(data,channels):
 # parameters:- 
            # data - channelwise EEG preprocessed data
            # channels - list of required channels
 # returns:-
            # ans - the selected channels from the entire dataset
    ans = np.empty((data.shape[0],len(channels),data.shape[2]))
    for sub in range(data.shape[0]):
        ans[sub,:,:] = np.array([data[sub,x,:] for x in channels])
    return ans
 # In[ ]:
 def epoch_data(X, Y, Z, window, stride, sfreq):
 # Function to epoch the data
 # parameters:-
            # X - The EEG data input passed as trial*channel*timepoints
            # Y - VALD (depending on the dataset) as given by the user
            # Z - Participant number and session number
            # window - length of required window in seconds
            # stride - stride of the required sliding window in seconds
            # sfreq - sampling frequency of the obtained EEG data
 # retruns:-
            # X_new - Epoched X
            # Y_new - Epoched Y (All the segments for a given trial shall have the same value, i.e the one given by the subject)
            # Z_new - Epoched Z (All the segments for a given trial shall have the same value, i.e the one of the subject)
    trials,channels,timepoints = X.shape
    segment = int(window*sfreq)
    step = int(stride*sfreq)
    epochPerTrial = int((timepoints-segment)/step + 1)
    X_new = np.empty((trials*epochPerTrial,channels,segment))
    Y_new = np.empty((trials*epochPerTrial,Y.shape[1]))
    Z_new = np.empty((trials*epochPerTrial,Z.shape[1]))
    count=0
    for trial in range(trials):
        for epoch in range(epochPerTrial):
            X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
            Y_new[count,:] = Y[trial,:]
            Z_new[count,:] = Z[trial,:]
            count = count+1
    return X_new, Y_new, Z_new
 # In[ ]:
 def save_features(dataset, ans, Y_epoch, sfreq, window, stride):
 # A function to generate the features and save them
 # parameters:-
            # dataset - name of the dataset
            # ans - epoched segment of X
            # Y_epoch - epoched segments of the valence and arousal scores taken from the subject
            # window - window length in seconds
            # stride - stride length in seconds
            # sfreq - sampli5ng frequency of the EEG signal
 # returns:-
            # void
    fs = sfreq
    featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
    feature_matrix = eeg.shannonEntropy(ans, bin_min=-200, bin_max=200, binWidth=2)
    np.savez((featurepath+"shannonEntropy_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.eegStd(ans)
    stdshape = feature_matrix.shape
    # The channels 
    emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
    left_channels = ['AF3', 'F7','F3', 'FC5', 'T7', 'P7', 'O1']
    right_channels = ['AF4','F8','F4','FC6','T8','P8','O2']
    dasm_gamma = np.empty((0,stdshape[1]))
    rasm_gamma = np.empty((0,stdshape[1]))
    for lc,rc in zip(left_channels, right_channels):
        lci = emotiv_channels.index(lc)
        rci = emotiv_channels.index(rc)
        #left differential entropy
        dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0),30,45,fs)))))
        #right differential entropy
        dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0),30,45,fs)))))
        dasm_gamma = np.append(dasm_gamma, np.subtract(dl,dr), axis=0)
        rasm_gamma = np.append(rasm_gamma, np.divide(dl,dr), axis=0)
    np.savez((featurepath+"dasm_gamma_{}_{}.npz").format(window,stride),features = dasm_gamma , Y = Y_epoch)
    np.savez((featurepath+"rasm_gamma_{}_{}.npz").format(window,stride),features = rasm_gamma , Y = Y_epoch)
    del dasm_gamma, rasm_gamma
    return 
    '''
    Subband Information Quantity
    '''
    # delta (0.5–4 Hz)
    eegData_delta = eeg.filt_data(ans, 0.5, 4, fs)
    feature_matrix = eeg.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2)
    np.savez((featurepath+"ShannonRes_sub_bands_delta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    eegData_theta = eeg.filt_data(ans, 4, 8, fs)
    feature_matrix = eeg.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2)
    np.savez((featurepath+"ShannonRes_sub_bands_theta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    eegData_alpha = eeg.filt_data(ans, 8, 12, fs)
    feature_matrix = eeg.shannonEntropy(eegData_alpha, bin_min=-200, bin_max=200, binWidth=2)
    np.savez((featurepath+"ShannonRes_sub_bands_alpha_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    eegData_beta = eeg.filt_data(ans, 12, 30, fs)
    feature_matrix = eeg.shannonEntropy(eegData_beta, bin_min=-200, bin_max=200, binWidth=2)
    np.savez((featurepath+"ShannonRes_sub_bands_beta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    eegData_gamma = eeg.filt_data(ans, 30,45, fs)
    feature_matrix = eeg.shannonEntropy(eegData_gamma, bin_min=-200, bin_max=200, binWidth=2)
    np.savez((featurepath+"ShannonRes_sub_bands_gamma_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    HjorthMob, HjorthComp = eeg.hjorthParameters(ans)
    feature_matrix = HjorthComp
    np.savez((featurepath+"Hjorth_complexity_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = HjorthMob
    np.savez((featurepath+"Hjorth_mobilty_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.falseNearestNeighbor(ans)
    np.savez((featurepath+"falseNearestNeighbor_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.medianFreq(ans,fs)
    np.savez((featurepath+"medianFreq_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.bandPower(ans, 0.5, 4, fs)
    np.savez((featurepath+"bandPwr_delta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.bandPower(ans, 4, 8, fs)
    np.savez((featurepath+"bandPwr_theta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.bandPower(ans, 8, 12, fs)
    np.savez((featurepath+"bandPwr_alpha_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.bandPower(ans, 12, 30, fs)
    np.savez((featurepath+"bandPwr_beta_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.bandPower(ans, 30, 45, fs)
    np.savez((featurepath+"bandPwr_gamma_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.eegStd(ans)
    stdshape = feature_matrix.shape
    np.savez((featurepath+"stdDev_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.diffuseSlowing(ans)
    np.savez((featurepath+"diffuseSlowing_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    minNumSamples = int(70*fs/1000)
    feature_matrix = eeg.spikeNum(ans,minNumSamples)
    np.savez((featurepath+"spikeNum_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.burstAfterSpike(ans,eegData_delta,minNumSamples=7,stdAway = 3)
    np.savez((featurepath+"deltaBurstAfterSpike_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.shortSpikeNum(ans,minNumSamples)
    np.savez((featurepath+"shortSpikeNum_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.numBursts(ans,fs)
    np.savez((featurepath+"numBursts_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(ans,fs)
    feature_matrix = burstLenMean_res 
    np.savez((featurepath+"burstLen_u_and_sigma_mean_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = burstLenStd_res
    np.savez((featurepath+"burstLen_u_and_sigma_std_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    feature_matrix = eeg.numSuppressions(ans,fs)
    np.savez((featurepath+"numSuppressions_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(ans,fs)
    feature_matrix = suppLenMean_res
    np.savez((featurepath+"suppressionLen_u_and_sigma_mean_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    del suppLenMean_res
    feature_matrix = suppLenStd_res
    np.savez((featurepath+"suppressionLen_u_and_sigma_std_{}_{}.npz").format(window,stride),features = feature_matrix , Y = Y_epoch)
    del suppLenStd_res
    # DASM and RASM Features
    # DASM = h(X lefti) − h(Xrighti), and (2)
    # RASM = h(Xlefti)/h(Xrighti),
    emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
    left_channels = ['AF3', 'F7','F3', 'FC5', 'T7', 'P7', 'O1']
    right_channels = ['AF4','F8','F4','FC6','T8','P8','O2']
    #[chans x ms x epochs] 
    dasm_delta = np.empty((0,stdshape[1]))
    rasm_delta = np.empty((0,stdshape[1]))
    for lc,rc in zip(left_channels, right_channels):
        lci = emotiv_channels.index(lc)
        rci = emotiv_channels.index(rc)
        #left differential entropy
        inputarr = np.expand_dims(ans[lci,:,:], axis=0)
        print("inputarr.shape=", inputarr.shape)
        temp = eeg.filt_data(inputarr, 0.5, 4, fs)
        tempstd = eeg.eegStd(temp)
        dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 0.5, 4, fs)))))
        #right differential entropy
        dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 0.5, 4, fs)))))
        print("temp.shape=", temp.shape,"tempstd.shape=", tempstd.shape,"dl.shape= ", dl.shape, "stdshape=", stdshape)
        dasm_delta = np.append(dasm_delta, np.subtract(dl,dr), axis=0)
        rasm_delta = np.append(rasm_delta, np.divide(dl,dr), axis=0)
    np.savez((featurepath+"dasm_delta_{}_{}.npz").format(window,stride),features = dasm_delta , Y = Y_epoch)
    np.savez((featurepath+"rasm_delta_{}_{}.npz").format(window,stride),features = rasm_delta , Y = Y_epoch)
    del dasm_delta, rasm_delta
    dasm_theta = np.empty((0,stdshape[1]))
    rasm_theta = np.empty((0,stdshape[1]))
    for lc,rc in zip(left_channels, right_channels):
        lci = emotiv_channels.index(lc)
        rci = emotiv_channels.index(rc)
        #left differential entropy
        dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 4, 8, fs)))))
        #right differential entropy
        dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 4, 8, fs)))))
        dasm_theta = np.append(dasm_theta, np.subtract(dl,dr), axis=0)
        rasm_theta = np.append(rasm_theta, np.divide(dl,dr), axis=0)
    np.savez((featurepath+"dasm_theta_{}_{}.npz").format(window,stride),features = dasm_theta , Y = Y_epoch)
    np.savez((featurepath+"rasm_theta_{}_{}.npz").format(window,stride),features = rasm_theta , Y = Y_epoch)
    del dasm_theta, rasm_theta
    dasm_alpha = np.empty((0,stdshape[1]))
    rasm_alpha = np.empty((0,stdshape[1]))
    for lc,rc in zip(left_channels, right_channels):
        lci = emotiv_channels.index(lc)
        rci = emotiv_channels.index(rc)
        #left differential entropy
        dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 8, 12, fs)))))
        #right differential entropy
        dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 8, 12, fs)))))
        dasm_alpha = np.append(dasm_alpha, np.subtract(dl,dr), axis=0)
        rasm_alpha = np.append(rasm_alpha, np.divide(dl,dr), axis=0)
    np.savez((featurepath+"dasm_alpha_{}_{}.npz").format(window,stride),features = dasm_alpha , Y = Y_epoch)
    np.savez((featurepath+"rasm_alpha_{}_{}.npz").format(window,stride),features = rasm_alpha , Y = Y_epoch)
    del dasm_alpha, rasm_alpha
    dasm_beta = np.empty((0,stdshape[1]))
    rasm_beta = np.empty((0,stdshape[1]))
    for lc,rc in zip(left_channels, right_channels):
        lci = emotiv_channels.index(lc)
        rci = emotiv_channels.index(rc)
        #left differential entropy
        dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0), 12, 30,fs)))))
        #right differential entropy
        dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0), 12, 30,fs)))))
        dasm_beta = np.append(dasm_beta, np.subtract(dl,dr), axis=0)
        rasm_beta = np.append(rasm_beta, np.divide(dl,dr), axis=0)
    np.savez((featurepath+"dasm_beta_{}_{}.npz").format(window,stride),features = dasm_beta , Y = Y_epoch)
    np.savez((featurepath+"rasm_beta_{}_{}.npz").format(window,stride),features = rasm_beta , Y = Y_epoch)
    del dasm_beta, rasm_beta
    dasm_gamma = np.empty((0,stdshape[1]))
    rasm_gamma = np.empty((0,stdshape[1]))
    for lc,rc in zip(left_channels, right_channels):
        lci = emotiv_channels.index(lc)
        rci = emotiv_channels.index(rc)
        #left differential entropy
        dl = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[lci,:,:], axis=0),30,45,fs)))))
        #right differential entropy
        dr = (0.5)*np.log((2*math.pi*math.e*np.square(eeg.eegStd(eeg.filt_data(np.expand_dims(ans[rci,:,:], axis=0),30,45,fs)))))
        dasm_gamma = np.append(dasm_gamma, np.subtract(dl,dr), axis=0)
        rasm_gamma = np.append(rasm_gamma, np.divide(dl,dr), axis=0)
    np.savez((featurepath+"dasm_gamma_{}_{}.npz").format(window,stride),features = dasm_gamma , Y = Y_epoch)
    np.savez((featurepath+"rasm_gamma_{}_{}.npz").format(window,stride),features = rasm_gamma , Y = Y_epoch)
    del dasm_gamma, rasm_gamma
 # In[ ]:
 def getEpochedFeatures(dataset, window, stride, sfreq, label):
 # Function to reshape the arrays before passing on to the save_features function
 # parameters:-
            # dataset - name of the dataset
            # window - length of window in seconds
            # stride - length of stride in seconds
            # sfreq - sampling frequency of the EEG signal
            # label - valence/arousal (0/1)
 # returns:-
            # void
    '''
    Returns Accuracy vs Segment size plot for
    window - length of window
    stride - step 
    sfreq - sampling freq
    label - 0-valence, 1-arousal, 2-dominance, 3-liking
    '''
    fs = sfreq
    X = None
    Y = None
    Z = None
    pwd = os.getcwd()
    with np.load((pwd + '/data_extracted/{}.npz').format(dataset), allow_pickle=True) as data:
        X = data['X']
        Y = data['Y']
        Z = data['Z']
    print("Shape After Loading")
    print("X.shape=", X.shape," Y.shape=",Y.shape," Z.shape=", Z.shape)
    # return 
    #########!MODIFY FOR DREAMER AND DEAP DATASET########################################
    #****
    '''
    Reshape Data
    '''
    if(dataset != "DEAP"):
        temp_arr = np.empty((X.shape[0],X.shape[2],X.shape[1]))
        for i in range(temp_arr.shape[0]):
            temp_arr[i,:,:] = X[i,:,:].transpose()
        X = copy.deepcopy(temp_arr)
        del temp_arr
    print("Shape after reshaping")
    print("X.shape=", X.shape," Y.shape=",Y.shape," Z.shape=", Z.shape)
    '''
    Select Channels(if needed)
    '''
    print("Data Loaded...\n")
    ch_names = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature']
    emotiv_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
    index_arr = [ch_names.index(x) for x in emotiv_channels]
    X_new = None
    if(dataset == "DEAP"):
        X_new = select_channels(X,index_arr)
    else:
        X_new = copy.deepcopy(X)
    print("X_new.shape = ", X_new.shape)
    del X
    print("Channel selection done ...\n")
    '''
    #  X = (32*40,40,8064)
    #  Y = (32*40,4)
    #  Z = (32*40,2)
    # X :  (nbSegments, nbChannel, nbTimepoints) : Data
    # Y :  (nbSegments, nbEmotions) : Valence and arousal data
    # Z :  (nbSegments, 2) : Participant number, and session number
    '''
    '''
    DREAMER Dataset
    #         X = (23*18,7808+54032,14)
    #         Y = (23*18,2)
    #         Z = (23*18,2)
    '''
    (X_epoch, Y_epoch, Z_epoch) = epoch_data(X_new, Y, Z,window,stride,sfreq)
    del X_new
    del Y
    del Z
    print("Epoching done ...\n")
    print(X_epoch.shape, Y_epoch.shape, Z_epoch.shape) #debug
    # 1280*63,40,128
    # trial, channel, segment
    trials, channels, segment = X_epoch.shape
    ans = np.empty((channels, segment, trials)) #[chans x ms x epochs] 
    for i in range(trials):
        ans[:,:,i] = X_epoch[i,:,]
    del X_epoch
    print("ans.shape = ", ans.shape)
    print("Rotation of np.array done ...\n")
    pwd = os.getcwd()
    filepath = pwd + '/' + dataset + "/data_extracted/epochedData/data" + str(window) + str(stride) + ".npz"
    np.savez(filepath,ans,Y_epoch, Z_epoch)
    # featuresDict = getFeaturesDict(ans,sfreq)
    save_features(dataset, ans, Y_epoch, sfreq, window, stride)
    # with open(pwd + '/' + dataset + '/data_extracted/featureDicts/'+str(window)+str(stride)+ '.pkl', 'wb') as f:
    #     pickle.dump(featuresDict, f, pickle.HIGHEST_PROTOCOL)
    print("Feature Extraction done ...\n")
 if __name__ == '__main__':
    pass
@@ -0,0 +1,172 @@
 #!/usr/bin/env python
 # coding: utf-8
 # Script to import all the required libraries.<br>
 # It also defines a function to make a dictionary and load the features.
 # 
 # In[ ]:
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
 from sklearn.metrics import accuracy_score
 from sklearn.feature_selection import chi2
 from sklearn.feature_selection import SelectKBest, f_classif
 from sklearn.model_selection import train_test_split
 from sklearn import preprocessing
 from sklearn.feature_selection import *
 from sklearn.model_selection import RandomizedSearchCV
 from sklearn.model_selection import GridSearchCV
 import sys
 import csv
 import os
 import math
 import glob
 from scipy import io,signal
 import numpy as np
 import pandas as pd
 import pickle
 from sklearn.metrics import mean_squared_error
 from sklearn.impute import SimpleImputer
 import matplotlib.pyplot as plt
 import seaborn as sns
 import copy
 from sklearn import feature_selection
 import argparse
 import cuml
 from cuml.svm import SVR
 from cuml.ensemble import RandomForestRegressor
 from cuml.svm import SVC
 from cuml.ensemble import RandomForestClassifier
 from cuml.metrics import  accuracy_score
 # In[ ]:
 def loadFeaturesDict(dataset):
 # input parameters :- The name of the dataset
 # return :- Feature dictionary
    featuresDict = {'shannonEntropy': None,
                'ShannonRes_delta':None,
                'ShannonRes_theta':None,
                'ShannonRes_alpha':None,
                'ShannonRes_beta':None,
                'ShannonRes_gamma':None,
                'HjorthComp':None,
                'HjorthMob':None,
                'falseNearestNeighbor':None,
                'medianFreq':None,
                'bandPwr_delta':None, 
                'bandPwr_theta':None, 
                'bandPwr_alpha':None, 
                'bandPwr_beta':None, 
                'bandPwr_gamma':None,
                'stdDev':None,
                'diffuseSlowing':None,
                'spikeNum':None,
                'deltaBurstAfterSpike':None,
                'shortSpikeNum':None,
                'numBursts':None,
                'burstLenMean':None,
                'burstLenStd':None,
                'numSuppressions':None,
                'suppLenMean':None,
                'suppLenStd':None,
                'dasm_delta': None,
                'dasm_theta': None,
                'dasm_alpha': None,
                'dasm_beta': None,
                'dasm_gamma': None,
                'rasm_delta': None,
                'rasm_theta': None,
                'rasm_alpha': None,
                'rasm_beta': None,
                'rasm_gamma': None,
                }
    featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
    featuresDict['shannonEntropy'] = np.load(featurepath + "shannonEntropy_1_1.npz", allow_pickle=True)['features']
    featuresDict['ShannonRes_delta'] = np.load(featurepath + "ShannonRes_sub_bands_delta_1_1.npz", allow_pickle=True)['features']
    featuresDict['ShannonRes_theta'] = np.load(featurepath + "ShannonRes_sub_bands_theta_1_1.npz", allow_pickle=True)['features']
    featuresDict['ShannonRes_alpha'] = np.load(featurepath + "ShannonRes_sub_bands_alpha_1_1.npz", allow_pickle=True)['features']
    featuresDict['ShannonRes_beta'] = np.load(featurepath + "ShannonRes_sub_bands_beta_1_1.npz", allow_pickle=True)['features']
    featuresDict['ShannonRes_gamma'] = np.load(featurepath + "ShannonRes_sub_bands_gamma_1_1.npz", allow_pickle=True)['features']
    featuresDict['HjorthComp'] = np.load(featurepath + "Hjorth_complexity_1_1.npz", allow_pickle=True)['features']
    featuresDict['HjorthMob'] = np.load(featurepath + "Hjorth_mobilty_1_1.npz",allow_pickle=True)['features']
    featuresDict['falseNearestNeighbor'] = np.load(featurepath + "falseNearestNeighbor_1_1.npz",allow_pickle=True)['features']
    featuresDict['medianFreq'] = np.load(featurepath + "medianFreq_1_1.npz",allow_pickle=True)['features']
    featuresDict['bandPwr_delta']=np.load(featurepath+"bandPwr_delta_1_1.npz", allow_pickle = True)['features']
    featuresDict['bandPwr_theta']=np.load(featurepath + "bandPwr_theta_1_1.npz", allow_pickle = True)['features']
    featuresDict['bandPwr_alpha']=np.load(featurepath + "bandPwr_alpha_1_1.npz", allow_pickle = True)['features']
    featuresDict['bandPwr_beta']=np.load(featurepath + "bandPwr_beta_1_1.npz", allow_pickle = True)['features']
    featuresDict['bandPwr_gamma']=np.load(featurepath + "bandPwr_gamma_1_1.npz", allow_pickle = True)['features']
    featuresDict['stdDev'] = np.load(featurepath + "stdDev_1_1.npz",allow_pickle=True)['features']
    featuresDict['diffuseSlowing'] = np.load(featurepath + "diffuseSlowing_1_1.npz",allow_pickle=True)['features']
    featuresDict['spikeNum'] = np.load(featurepath + "spikeNum_1_1.npz",allow_pickle=True)['features']
    featuresDict['deltaBurstAfterSpike'] = np.load(featurepath + "deltaBurstAfterSpike_1_1.npz",allow_pickle=True)['features']
    featuresDict['shortSpikeNum'] = np.load(featurepath + "shortSpikeNum_1_1.npz", allow_pickle=True)['features']
    featuresDict['numBursts'] = np.load(featurepath + "numBursts_1_1.npz",allow_pickle=True)['features']
    featuresDict['burstLenMean'] = np.load(featurepath + "burstLen_u_and_sigma_mean_1_1.npz",allow_pickle=True)['features']
    featuresDict['burstLenStd'] = np.load(featurepath + "burstLen_u_and_sigma_std_1_1.npz",allow_pickle=True)['features']
    featuresDict['numSuppressions'] = np.load(featurepath + "numSuppressions_1_1.npz",allow_pickle=True)['features']
    featuresDict['suppLenMean'] = np.load(featurepath + "suppressionLen_u_and_sigma_mean_1_1.npz",allow_pickle=True)['features']
    featuresDict['suppLenStd'] = np.load(featurepath + "suppressionLen_u_and_sigma_std_1_1.npz",allow_pickle=True)['features']
    featuresDict['dasm_delta'] = np.load(featurepath + "dasm_delta_1_1.npz",allow_pickle=True)['features']
    featuresDict['dasm_theta'] = np.load(featurepath + "dasm_theta_1_1.npz",allow_pickle=True)['features']
    featuresDict['dasm_alpha'] = np.load(featurepath + "dasm_alpha_1_1.npz",allow_pickle=True)['features']
    featuresDict['dasm_beta'] = np.load(featurepath + "dasm_beta_1_1.npz",allow_pickle=True)['features']
    featuresDict['dasm_gamma'] = np.load(featurepath + "dasm_gamma_1_1.npz",allow_pickle=True)['features']
    featuresDict['rasm_delta'] = np.load(featurepath + "rasm_delta_1_1.npz",allow_pickle=True)['features']
    featuresDict['rasm_theta'] = np.load(featurepath + "rasm_theta_1_1.npz",allow_pickle=True)['features']
    featuresDict['rasm_alpha'] = np.load(featurepath + "rasm_alpha_1_1.npz",allow_pickle=True)['features']
    featuresDict['rasm_beta'] = np.load(featurepath + "rasm_beta_1_1.npz",allow_pickle=True)['features']
    featuresDict['rasm_gamma'] = np.load(featurepath + "rasm_gamma_1_1.npz",allow_pickle=True)['features']
    return featuresDict
@@ -0,0 +1,735 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 from ImportUtils import *
 from sklearn.model_selection import ParameterGrid
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
 from sklearn.metrics import accuracy_score
 from sklearn.feature_selection import chi2
 from sklearn.feature_selection import SelectKBest, f_classif
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
 from sklearn.metrics import accuracy_score
 from sklearn.ensemble import RandomForestRegressor as sklearnrfi
 import os
 import glob
 from scipy import io,signal
 import numpy as np
 import pandas as pd
 from sklearn import preprocessing
 import pickle
 from sklearn.metrics import mean_squared_error
 from sklearn.impute import SimpleImputer
 import matplotlib.pyplot as plt
 # %matplotlib inline
 import seaborn as sns
 import copy
 def topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
    '''
    Ranks of features according to rmse computed by regressor passed in clf
    Plots electrode v/s rmse graph
    '''
    # parameters :-
                # dataset - name of the dataset
                # window - length of the sliding window in seconds
                # stride - length of the stride of the sliding window in seconds
                # sfreq - sampling frequency of the EEG data
                # clf - name of the classifier to be used
                # label - valence/arousal/dominance/liking label (shape depends upon the dataset) in an enumerated form (0- valence ; 1-arousal ; 2- like; 3-dominance)
                # scale - sclaing of the EEG data if required
    # returns :-
                # void
    pwd = os.getcwd()
    #load extracted features
    #####################################################################################################################################################
    featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
    ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
    Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
    rmseList = []
    electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
    fs = sfreq
    pwd = os.getcwd()
    featuresDict = loadFeaturesDict(dataset)
    asm_features = ['dasm_delta', 'dasm_theta', 'dasm_alpha', 'dasm_beta', 'dasm_gamma', 'rasm_delta', 'rasm_theta', 'rasm_alpha', 'rasm_beta', 'rasm_gamma']
    for asm in asm_features:
        featuresDict.pop(asm)
    common = []
    with open('intersection.pkl', 'rb') as f:
        common = pickle.load(f)
    for k in list(featuresDict.keys()):
        if k not in common:
            # pop out common feature
            featuresDict.pop(k)
    selectFeatures = list(featuresDict.keys())
    y = Y_epoch[:,label] #valence
    #####################################################################################################################################################
    for electrode in range(14):
        # Load FeaturesDict from memory
        print("Number of segments are: {}".format(ans.shape[1]))
        featureMatrix = np.empty((len(selectFeatures),ans.shape[1])) #[14*32 + 1,80640]
        i=0
        for key,value in featuresDict.items():
            featureMatrix[i,:] = value[electrode,:]
            i = i+1
        print(featureMatrix.T.shape)
        featureMatrix = featureMatrix.astype(np.float32)
        #Impute NaN values with zero
        if np.isnan(featureMatrix).any():
            featureMatrix = np.nan_to_num(featureMatrix,nan=0)
        #Name Feature vector columns
        feature_channel_index = []
        for feature in selectFeatures:
            feature_channel_index.append(feature + str(electrode))
        print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) #debug
        #Preparing dataset from feature matrix
        X = pd.DataFrame(featureMatrix.T)
        X.columns = feature_channel_index
        X = X.replace([np.inf, -np.inf], np.nan)
        X = X.fillna(0)
        print("Features Ready for undergoing selection tests done ...\n")
        # Perform train_test_split to get training and test data 
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        # Normalise-scale data 
        # Feature Scaling
        if(scale == True):
            sc = StandardScaler()
            X_train = sc.fit_transform(X_train)
            X_test = sc.transform(X_test)
        # Apply classfier
        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)
        rmse = mean_squared_error(y_test, y_predict,squared=False)
        print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
        rmseList.append(rmse)
    #rank electrodes based on RMSE computed by the classifier
    electrode_df = pd.DataFrame(electrodeList)
    rmse_df = pd.DataFrame(rmseList)
    #concat two dataframes for better visualization 
    electrodeRanking = pd.concat([electrode_df, rmse_df],axis=1)
    electrodeRanking.columns = ['Electrode','RMSE']  #naming the dataframe columns
    features_result = electrodeRanking.sort_values('RMSE')
    print(features_result)
    # return features_result
    ##################################################################################
    N =  features_result.shape[0]
    topRmseList = []
    topNList = ["{}".format(x) for x in range(1,N+1)]
    for n in range(1,N+1):
        topnelectrodes = features_result.head(n)
        electrode_index = topnelectrodes.index
        electrode_index = list(electrode_index)[:n]
        # X-Values
        featureMatrix = np.empty((len(selectFeatures)*len(electrode_index),ans.shape[1]))
        i = 0
        for index in electrode_index:
            for key,value in featuresDict.items():
                featureMatrix[i,:] = value[index,:]
                i = i+1
        featureMatrix = featureMatrix.astype(np.float32)
        print(featureMatrix.T.shape)
        # Removing NaN Values
        if np.isnan(featureMatrix).any():
            featureMatrix = np.nan_to_num(featureMatrix,nan=0)
        # Name Feature vector columns
        feature_channel_index = []
        for index in electrode_index:
            for feature in selectFeatures:
                feature_channel_index.append(feature + str(index))
        print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
        X = pd.DataFrame(featureMatrix.T)
        X.columns = feature_channel_index
        X = X.replace([np.inf, -np.inf], np.nan)
        X = X.fillna(0)
        print("Features Ready for undergoing selection tests done ...\n")
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        # Normalise-scale data 
        # Feature Scaling
        if(scale == True):
            sc = StandardScaler()
            X_train = sc.fit_transform(X_train)
            X_test = sc.transform(X_test)
        # Apply classfier
        search_method = "tpot"
        best_clf = None
        if(search_method == "bayes_sk_opt"):
            # BayesCV scikit opt
            search_space = {"bootstrap": Categorical([True, False]), # values for boostrap can be either True or False
            "max_depth": Integer(6, 20), # values of max_depth are integers from 6 to 20
            "max_features": Categorical(['auto', 'sqrt','log2']), 
            "min_samples_leaf": Integer(2, 10),
            "min_samples_split": Integer(2, 10),
            "n_estimators": Integer(100, 500)
            }
            forest_bayes_search = BayesSearchCV(clf, search_space, n_iter=32, cv=5)
            print(forest_bayes_search)
            print(forest_bayes_search.fit(X_train, y_train))
            print("Best Parameters are: ", forest_bayes_search.best_params_)
            best_clf = forest_bayes_search.best_estimator_
        elif(search_method =="random_grid_search"):
            print("Random Search followed by GridSearch initiated!\n");
            #RandomSearchCV followed by GridSearchCV
            random_grid = {'n_estimators': [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)],
                'max_features': ['auto', 'sqrt','log2'],
                'max_depth': [int(x) for x in np.linspace(10, 1000,10)],
                'min_samples_split': [2, 5, 10,14],
                'min_samples_leaf': [1, 2, 4,6,8],
                }
            rf_randomcv=RandomizedSearchCV(estimator=clf,param_distributions=random_grid,n_iter=100,cv=5,verbose=2,random_state=100)        
            print(rf_randomcv.fit(X_train, y_train))
            print("Best Parameters for RandomSearchCV are: ", rf_randomcv.best_params_)
            print("RMSE with RandomSearchCV is :",mean_squared_error(y_test, rf_randomcv.best_estimator_.predict(X_test),squared=False));
            param_grid = {
                'max_depth': [rf_randomcv.best_params_['max_depth']],
                'max_features': [rf_randomcv.best_params_['max_features']],
                'min_samples_leaf': [rf_randomcv.best_params_['min_samples_leaf'], 
                                    rf_randomcv.best_params_['min_samples_leaf']+2, 
                                    rf_randomcv.best_params_['min_samples_leaf'] + 4],
                'min_samples_split': [rf_randomcv.best_params_['min_samples_split'] - 2,
                                    rf_randomcv.best_params_['min_samples_split'] - 1,
                                    rf_randomcv.best_params_['min_samples_split'], 
                                    rf_randomcv.best_params_['min_samples_split'] +1,
                                    rf_randomcv.best_params_['min_samples_split'] + 2],
                'n_estimators': [rf_randomcv.best_params_['n_estimators'] - 200, rf_randomcv.best_params_['n_estimators'] - 100, 
                                rf_randomcv.best_params_['n_estimators'], 
                                rf_randomcv.best_params_['n_estimators'] + 100, rf_randomcv.best_params_['n_estimators'] + 200]
            }
            grid_search=GridSearchCV(estimator=rf,param_grid=param_grid,cv=10, verbose=5)
            grid_search.fit(X_train,y_train)
            best_clf = rf_randomcv.best_estimator_
        elif search_method =="manual_search":
            min_rmse = 1000
            best_clf = clf
            min_params = None
            # 2*3*3*3*3
            param_grid = {'n_estimators': [50, 100],
            'max_features': ['auto'],
            'max_depth': [2, 10, 100],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 8],
            }
            param_grid = ParameterGrid(param_grid)
            for params in param_grid:
                print("Current Parameters : ", params)
                temp_clf = RandomForestRegressor( max_features = params['max_features'], min_samples_leaf = params['min_samples_leaf'], min_samples_split = params['min_samples_split'], n_estimators = params['n_estimators'],max_depth = params['max_depth']);
                temp_clf.fit(X_train,y_train)
                y_predict = temp_clf.predict(X_test)
                rmse = mean_squared_error(y_test, y_predict,squared=False)
                print("Current RMSE with above params : ", rmse)
                if(min_rmse > rmse):
                    min_rmse = rmse;
                    best_clf = temp_clf;
                    min_params = params;
            print("Best Params for parameter search are : \n", min_params)
            print("window: {}, stide: {}, rmse: {}".format(window,stride,min_rmse))
            topRmseList.append(min_rmse)
        elif search_method == "tpot":
            from tpot import TPOTRegressor;
            # TPOT setup
            GENERATIONS = 5
            POP_SIZE = 100
            CV = 5
            SEED = 42
            tpot = TPOTRegressor(
            generations=GENERATIONS,
            population_size=POP_SIZE,
            random_state=SEED,
            config_dict="TPOT cuML",
            n_jobs=1, # cuML requires n_jobs=1
            cv=CV,
            verbosity=2,
            )
            tpot.fit(X_train, y_train)
            y_predict = tpot.predict(X_test)
            rmse = mean_squared_error(y_test, y_predict,squared=False)
            print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
            topRmseList.append(rmse)
        else:
            best_clf = clf
            best_clf.fit(X_train,y_train)
        if search_method != "manual_search" and search_method != "tpot":
            y_predict = best_clf.predict(X_test)
            rmse = mean_squared_error(y_test, y_predict,squared=False)
            print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
            topRmseList.append(rmse)
    topNElectrode_df = pd.DataFrame(topNList)
    topNRmse_df = pd.DataFrame(topRmseList)
    #concat two dataframes for better visualization 
    topNElectrodeRanking = pd.concat([topNElectrode_df, topNRmse_df],axis=1)
    topNElectrodeRanking.columns = ['Electrode','RMSE']  #naming the dataframe columns
    print(topNElectrodeRanking)  
    # Plotting
    fig = plt.gcf()
    fig.set_size_inches(20, 10)
    plt.rcParams.update({'font.size': 30})
    plt.xlabel('Top N Electrodes')
    plt.ylabel('RMSE')
    plt.plot(topNElectrodeRanking.loc[:,"Electrode"], topNElectrodeRanking.loc[:,"RMSE"])
    plt.tight_layout()
 # In[ ]:
 def topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
    '''
    Ranks of features according to rmse computed by regressor passed in clf
    Plots electrode v/s rmse graph
    '''
    # parameters :-
                # dataset - name of the dataset
                # window - length of the sliding window in seconds
                # stride - length of the stride of the sliding window in seconds
                # sfreq - sampling frequency of the EEG data
                # clf - name of the classifier to be used
                # label - valence/arousal/dominance/liking label (shape depends upon the dataset)
                # scale - sclaing of the EEG data if required
    # returns :-
                # void
    fs = sfreq
    pwd = os.getcwd()
    featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
    ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
    Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
    print("Number of segments are: {}".format(ans.shape[1]))
    featuresDict = None
    featuresDict = loadFeaturesDict(dataset)
    common = []
    with open('intersection.pkl', 'rb') as f:
        common = pickle.load(f)
    for k in list(featuresDict.keys()):
        if k not in common:
            # pop out common feature
            featuresDict.pop(k)
    featuresList = list(featuresDict.keys())
    y = Y_epoch[:,label] #valence
    rmseList = []
    ####################################################################
    #modify featuresList
    featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
    for key,value in featuresDict.items():
        featureMatrix = np.append(featureMatrix,value,axis=0)
    if np.isnan(featureMatrix).any():
        featureMatrix = np.nan_to_num(featureMatrix,nan=0)
    featureMatrix = featureMatrix.astype('float64')
    feature_channel_index = []
    for feature in featuresList:
        for i in range(featuresDict[feature].shape[0]):
            if(i>=10):
                feature_channel_index.append(feature + str(i))
            else:
                feature_channel_index.append(feature + '0' + str(i))
    print(len(list(featuresDict.keys())))
    print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
    X = pd.DataFrame(featureMatrix.T)
    X = X.replace([np.inf, -np.inf], np.nan)
    X = X.fillna(0)
    X.columns = feature_channel_index
    #Remove Variance = 0 features     
    constant_filter = VarianceThreshold(threshold=0)
    constant_filter.fit(X)
    constant_columns = [column for column in X.columns
                    if column not in
    X.columns[constant_filter.get_support()]]
    X = constant_filter.transform(X)
    for column in constant_columns:
        feature_channel_index.remove(column)
    print(len(feature_channel_index),feature_channel_index )
    X = pd.DataFrame(X)
    X.columns = feature_channel_index
    filtered_featuresList = []
    print(type(X))
    for col in X.columns:
        feature = col[:-2]
        electrode = int(col[-2:])
        if(feature not in filtered_featuresList):
            filtered_featuresList.append(feature)
    featuresList = filtered_featuresList
    for feature in featuresList:
        # Load FeaturesDict from memory
        featureMatrix = featuresDict[feature]
        featureMatrix = featureMatrix.astype(np.float32)
        if np.isnan(featureMatrix).any():
            featureMatrix = np.nan_to_num(featureMatrix,nan=0)
        feature_channel_index = []
        for i in range(featuresDict[feature].shape[0]):
            feature_channel_index.append(feature + str(i))
        print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
        X = pd.DataFrame(featureMatrix.T)
        X = X.replace([np.inf, -np.inf], np.nan)
        X = X.fillna(0)
        X.columns = feature_channel_index
        print("Features Ready for undergoing selection tests done ...\n")
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        # Normalise-scale data 
        # Feature Scaling
        if(scale == True):
            sc = StandardScaler()
            X_train = sc.fit_transform(X_train)
            X_test = sc.transform(X_test)
        # Apply classfier
        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)
        rmse = mean_squared_error(y_test, y_predict,squared=False)
        print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
        rmseList.append(rmse)
    features_df = pd.DataFrame(featuresList)
    rmse_df = pd.DataFrame(rmseList)
    #concat two dataframes for better visualization 
    featureRanking = pd.concat([features_df, rmse_df],axis=1)
    featureRanking.columns = ['Feature','RMSE']  #naming the dataframe columns
    features_result = featureRanking.sort_values('RMSE')
    features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonFeaturesRegressionRanking" + str(window) + str(stride) + ".csv")
    print(features_result)
        ###########################################
    N =  features_result.shape[0]
    topNRmseList = []
    topNList = ["{}".format(x) for x in range(1,N+1)]
    for n in range(1,N+1):
        topnfeatures = copy.deepcopy(features_result.head(n))
        topnfeatures = topnfeatures['Feature'].tolist() #list of feature-names
        # X-Values################################################
        featureMatrix = np.empty((0,ans.shape[1]))
        for feature in topnfeatures:
            featureMatrix = np.append(featureMatrix, featuresDict[feature], axis=0)
        featureMatrix = featureMatrix.astype(np.float32)
        print(featureMatrix.T.shape)
        feature_channel_index = []
        for feature in topnfeatures:
            i=0
            for i in range(featuresDict[feature].shape[0]):
                feature_channel_index.append(feature + str(i))
        # Removing NaN Values
        if np.isnan(featureMatrix).any():
            featureMatrix = np.nan_to_num(featureMatrix,nan=0)
        print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
        X = pd.DataFrame(featureMatrix.T)
        X.columns = feature_channel_index
        X = X.replace([np.inf, -np.inf], np.nan)
        X = X.fillna(0)
        print("Features Ready for undergoing selection tests done ...\n")
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        # Normalise-scale data 
        # Feature Scaling
        if(scale == True):
            sc = StandardScaler()
            X_train = sc.fit_transform(X_train)
            X_test = sc.transform(X_test)
        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)
        rmse = mean_squared_error(y_test, y_predict,squared=False)
        print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
        topNRmseList.append(rmse)
    topNFeatures_df = pd.DataFrame(topNList)
    topNRmse_df = pd.DataFrame(topNRmseList)
    #concat two dataframes for better visualization 
    topNFeaturesRanking = pd.concat([topNFeatures_df, topNRmse_df],axis=1)
    topNFeaturesRanking.columns = ['Feature','RMSE']  #naming the dataframe columns
    print(topNFeaturesRanking)
    topNFeaturesRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonFeaturesRegressionRanking" + str(window) + str(stride) + ".csv")    
    # Plotting
    fig = plt.gcf()
    fig.set_size_inches(25, 10)
    plt.rcParams.update({'font.size': 30})
    plt.xlabel('Top N Features')
    plt.ylabel('RMSE')
    plt.plot(topNFeaturesRanking.loc[:,"Feature"], topNFeaturesRanking.loc[:,"RMSE"])
    plt.tight_layout()
 # In[ ]:
 def topFeatureColumnsRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
    # parameters :-
                # dataset - name of the dataset
                # window - length of the sliding window in seconds
                # stride - length of the stride of the sliding window in seconds
                # sfreq - sampling frequency of the EEG data
                # clf - name of the classifier to be used
                # label - valence/arousal/dominance/liking label (shape depends upon the dataset)
                # scale - sclaing of the EEG data if required
    # returns :-
                # void
    fs = sfreq
    pwd = os.getcwd()
    featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
    ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
    Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
    electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
    print("Number of segments are: {}".format(ans.shape[1]))
    #X##############################################################################################
    featuresDict = None
    featuresDict = loadFeaturesDict(dataset)
    common = []
    with open('intersection.pkl', 'rb') as f:
        common = pickle.load(f)
    for k in list(featuresDict.keys()):
        if k not in common:
            # pop out common feature
            featuresDict.pop(k)
    featuresList = list(featuresDict.keys())
    # defining column names
    feature_channel_index = []
    for feature in featuresList:
        for i in range(featuresDict[feature].shape[0]):
            feature_channel_index.append(feature + str(i))
    #defining feature matrix
    featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
    for key,value in featuresDict.items():
        featureMatrix = np.append(featureMatrix,value,axis=0)
    print("Shape of FeatureMatrix: {}\n".format(featureMatrix.T.shape))
    #data-imputation and nan-removal
    featureMatrix = featureMatrix.astype(np.float32)
    if np.isnan(featureMatrix).any():
        featureMatrix = np.nan_to_num(featureMatrix,nan=0)
    X = pd.DataFrame(featureMatrix.T)
    X = X.replace([np.inf, -np.inf], np.nan)
    X = X.fillna(0)
    X.columns = feature_channel_index
    #Y#####################################################################
    y = Y_epoch[:,label] #valence
    ########################################################################
    rmseList = []
    for col in feature_channel_index:
        input_df = pd.DataFrame(X[col])
        X_train, X_test, y_train, y_test = train_test_split(input_df, y, test_size=0.2, random_state=42)
        # Normalise-scale data 
        # Feature Scaling
        if(scale == True):
            sc = StandardScaler()
            X_train = sc.fit_transform(X_train)
            X_test = sc.transform(X_test)
        # Apply classfier
        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)
        rmse = mean_squared_error(y_test, y_predict, squared=False)
        rmseList.append(rmse)
    col_df = pd.DataFrame(feature_channel_index)
    rmse_df = pd.DataFrame(rmseList)
    #concat two dataframes for better visualization 
    colRanking = pd.concat([col_df, rmse_df],axis=1)
    colRanking.columns = ['Column','RMSE']  #naming the dataframe columns
    features_result = colRanking.sort_values('RMSE')
    print(features_result)
    N = len(feature_channel_index)
    topNRmseList = []
    topNList = ["{}".format(x) for x in range(1,N+1)]
    for n in range(1, N+1):
        ranking_df = features_result.head(n)
        topncols = ranking_df['Column'].tolist()
        X_train, X_test, y_train, y_test = train_test_split(X[topncols], y, test_size=0.2, random_state=42)
        # Normalise-scale data 
        # Feature Scaling
        if(scale == True):
            sc = StandardScaler()
            X_train = sc.fit_transform(X_train)
            X_test = sc.transform(X_test)
        # Apply classfier
        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)
        rmse = mean_squared_error(y_test, y_predict, squared=False)
        topNRmseList.append(rmse)
    topcol_df = pd.DataFrame(topNList)
    toprmse_df = pd.DataFrame(topNRmseList)
    #concat two dataframes for better visualization 
    topcolRanking = pd.concat([topcol_df, toprmse_df],axis=1)
    topcolRanking.columns = ['Column','RMSE']  #naming the dataframe columns
    topfeatures_result = topcolRanking
    print(topfeatures_result)
    topfeatures_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "ColumnsRegressionRanking" + str(window) + str(stride) + ".csv")
    # Plotting
    fig = plt.gcf()
    fig.set_size_inches(60, 9)
    plt.xlabel('Top N Columns')
    plt.ylabel('RMSE')
    plt.title("Top N Columns v/s RMSE Plot for Window:{} Stride:{} epoched data by varying N".format(window,stride))
    plt.plot(topfeatures_result.loc[:,"Column"], topfeatures_result.loc[:,"RMSE"])
    plt.tight_layout()
    plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "topFeatureColumnsRegressionRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
    plt.show()
    plt.clf()
@@ -0,0 +1,648 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 from ImportUtils import *
 from sklearn.ensemble import RandomForestRegressor as sklearnrfi
 from sklearn.feature_selection import VarianceThreshold
 # In[ ]:
 def topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
    '''
    Ranks of features according to rmse computed by F score based regression
    Plots electrode v/s rmse graph
    '''
    # parameters :-
                # dataset - name of the dataset
                # window - length of the sliding window in seconds
                # stride - length of the stride of the sliding window in seconds
                # sfreq - sampling frequency of the EEG data
                # clf - name of the classifier to be used
                # label - valence/arousal/dominance/liking label (shape depends upon the dataset)
                # scale - sclaing of the EEG data if required
                # mutual_info - Mutual ranking between features based on information theory
                # method - 'RandomForest' 'RFE' 'SelectKBest'
    # returns :-
                # void
    pwd = os.getcwd()
    fs = sfreq
    electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
    featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
    ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
    Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
    print("Number of segments are: {}".format(ans.shape[1]))
    featuresDict = None
    featuresDict = loadFeaturesDict(dataset)
    asm_features = ['dasm_delta', 'dasm_theta', 'dasm_alpha', 'dasm_beta', 'dasm_gamma', 'rasm_delta', 'rasm_theta', 'rasm_alpha', 'rasm_beta', 'rasm_gamma']
    for asm in asm_features:
        featuresDict.pop(asm)
    common = []
    with open('intersection.pkl', 'rb') as f:
        common = pickle.load(f)
    for k in list(featuresDict.keys()):
        if k not in common:
            # pop out common feature
            featuresDict.pop(k)
    featuresList = list(featuresDict.keys())
    print(featuresList)
    featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
    for key,value in featuresDict.items():
        featureMatrix = np.append(featureMatrix,value,axis=0)
    if np.isnan(featureMatrix).any():
        featureMatrix = np.nan_to_num(featureMatrix,nan=0)
    featureMatrix = featureMatrix.astype('float64')
    feature_channel_index = []
    for feature in featuresList:
        for i in range(featuresDict[feature].shape[0]):
            if(i>=10):
                feature_channel_index.append(feature + str(i))
            else:
                feature_channel_index.append(feature + '0' + str(i))
    print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
    X = pd.DataFrame(featureMatrix.T)
    X = X.replace([np.inf, -np.inf], np.nan)
    X = X.fillna(0)
    X.columns = feature_channel_index
    #################################################################
    y = copy.deepcopy(Y_epoch[:,label]) #valence
    print("y.shape: ", y.shape)
    dfscores = None
    if(method == 'RandomForest'):
        '''Random Forest Feature Importances'''
        # estimator = RandomForestRegressor()
        estimator = sklearnrfi()
        fit = estimator.fit(X,y)
        dfscores = pd.DataFrame(fit.feature_importances_)
    elif(method == 'RFE'):
        ''' RFE'''
        selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
        selector = selector.fit(X, y)
        dfscores = pd.DataFrame(selector.ranking_)
    elif(method == 'SelectKBest'):
        """SelecKBest"""
        #apply SelectKBest class to extract top 10 best features
        func = None
        if mutual_info == False:
            func = f_classif
        else:
            func = mutual_info_classif
        bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
        fit = bestfeatures.fit(X,y)
        dfscores = pd.DataFrame(fit.scores_)
    dfcolumns = pd.DataFrame(X.columns)
    #concat two dataframes for better visualization 
    featureScores = pd.concat([dfcolumns,dfscores],axis=1)
    featureScores.columns = ['Specs','Score']  #naming the dataframe columns
    features_result = featureScores.nlargest(X.shape[1],'Score')
    print(features_result)
    features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonElectrodeFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
    ###################################################################
    topcolumns = features_result['Specs'].values
    topfeatures = []
    topelectrodes = []
    for col in topcolumns:
        feature = col[:-2]
        electrode = int(col[-2:])
        if(feature not in topfeatures):
            topfeatures.append(feature)
        if(electrode not in topelectrodes):
            topelectrodes.append(electrode)
    ##################################################################################
    N =  len(topelectrodes)
    topRmseList = []
    topNList = ["{}".format(x) for x in range(1,N+1)]
    for n in range(1,N+1):
        electrode_index = topelectrodes[:n]
        print(topelectrodes)
        print(electrode_index)
        # X-Values
        featureMatrix = np.empty((len(featuresList)*len(electrode_index),ans.shape[1]))
        i = 0
        for index in electrode_index:
            for key,value in featuresDict.items():
                featureMatrix[i,:] = value[index,:]
                i = i+1
            # i = i+1
        featureMatrix = featureMatrix.astype(np.float32)
        print(featureMatrix.T.shape)
        # Removing NaN Values
        if np.isnan(featureMatrix).any():
            featureMatrix = np.nan_to_num(featureMatrix,nan=0)
        feature_channel_index = []
        for index in electrode_index:
            for feature in featuresList:
                feature_channel_index.append(feature + str(index))
        print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
        X = pd.DataFrame(featureMatrix.T)
        X.columns = feature_channel_index
        X = X.replace([np.inf, -np.inf], np.nan)
        X = X.fillna(0)
        print("Features Ready for undergoing selection tests done ...\n")
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        # Normalise-scale data 
        # Feature Scaling
        if(scale == True):
            sc = StandardScaler()
            X_train = sc.fit_transform(X_train)
            X_test = sc.transform(X_test)
        # Apply classfier
        # clf = xgb.XGBClassifier(verbose = 5)
        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)
        rmse = mean_squared_error(y_test, y_predict,squared=False)
        print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
        topRmseList.append(rmse)
    # features_result = features_result.reset_index()
    topNElectrode_df = pd.DataFrame(topNList)
    topNRmse_df = pd.DataFrame(topRmseList)
    #concat two dataframes for better visualization 
    topNElectrodeRanking = pd.concat([topNElectrode_df, topNRmse_df],axis=1)
    topNElectrodeRanking.columns = ['Electrode','RMSE']  #naming the dataframe columns
    print(topNElectrodeRanking)
    topNElectrodeRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonElectrodeFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
    # return features_result
    # Plotting
    fig = plt.gcf()
    fig.set_size_inches(20, 10)
    plt.rcParams.update({'font.size': 30})
    plt.xlabel('Top N Electrodes')
    plt.ylabel('RMSE')
    plt.plot(topNElectrodeRanking.loc[:,"Electrode"], topNElectrodeRanking.loc[:,"RMSE"])
    plt.tight_layout()
 # In[ ]:
 def topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
    # parameters :-
                # dataset - name of the dataset
                # window - length of the sliding window in seconds
                # stride - length of the stride of the sliding window in seconds
                # sfreq - sampling frequency of the EEG data
                # clf - name of the classifier to be used
                # label - valence/arousal/dominance/liking label (shape depends upon the dataset)
                # scale - sclaing of the EEG data if required
                # mutual_info - Mutual ranking between features based on information theory
                # method - 'RandomForest' 'RFE' 'SelectKBest'
    # returns :-
                # void
    pwd = os.getcwd()
    fs = sfreq
    featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
    ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
    Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
    print("Number of segments are: {}".format(ans.shape[1]))
    featuresDict = None
    featuresDict = loadFeaturesDict(dataset)
    common = []
    with open('intersection.pkl', 'rb') as f:
        common = pickle.load(f)
    for k in list(featuresDict.keys()):
        if k not in common:
            # pop out common feature
            featuresDict.pop(k)
    ##################################################################
    # featuresToAvoid = ['volt05', 'volt10', 'volt20', 'burstBandPowers','hFD']
    featuresList = list(featuresDict.keys())
    print(featuresList)
    featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
    for key,value in featuresDict.items():
        featureMatrix = np.append(featureMatrix,value,axis=0)
    if np.isnan(featureMatrix).any():
        featureMatrix = np.nan_to_num(featureMatrix,nan=0)
    featureMatrix = featureMatrix.astype('float64')
    feature_channel_index = []
    for feature in featuresList:
        for i in range(featuresDict[feature].shape[0]):
            if(i>=10):
                feature_channel_index.append(feature + str(i))
            else:
                feature_channel_index.append(feature + '0' + str(i))
    print(len(list(featuresDict.keys())))
    print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
    X = pd.DataFrame(featureMatrix.T)
    X = X.replace([np.inf, -np.inf], np.nan)
    X = X.fillna(0)
    X.columns = feature_channel_index
    #Remove Variance = 0 features     
    constant_filter = VarianceThreshold(threshold=0)
    constant_filter.fit(X)
    constant_columns = [column for column in X.columns
                    if column not in
    X.columns[constant_filter.get_support()]]
    X = constant_filter.transform(X)
    for column in constant_columns:
        feature_channel_index.remove(column)
    print(len(feature_channel_index),feature_channel_index )
    X = pd.DataFrame(X)
    X.columns = feature_channel_index
    #################################################################
    y = copy.deepcopy(Y_epoch[:,label]) #valence
    print("y.shape: ", y.shape)
    dfscores = None
    if(method == 'RandomForest'):
        '''Random Forest Feature Importances'''
        estimator = sklearnrfi() #RandomForestRegressor()
        fit = estimator.fit(X,y)
        dfscores = pd.DataFrame(fit.feature_importances_)
    elif(method == 'RFE'):
        ''' RFE'''
        selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
        selector = selector.fit(X, y)
        dfscores = pd.DataFrame(selector.ranking_)
    elif(method == 'SelectKBest'):
        """SelecKBest"""
        #apply SelectKBest class to extract top 10 best features
        func = None
        if mutual_info == False:
            func = f_classif
        else:
            func = mutual_info_classif
        bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
        fit = bestfeatures.fit(X,y)
        dfscores = pd.DataFrame(fit.scores_)
    dfcolumns = pd.DataFrame(X.columns)
    #concat two dataframes for better visualization 
    featureScores = pd.concat([dfcolumns,dfscores],axis=1)
    featureScores.columns = ['Specs','Score']  #naming the dataframe columns
    features_result = featureScores.nlargest(X.shape[1],'Score')
    print(features_result)
    features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonFeatureFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
    ###################################################################
    topcolumns = features_result['Specs'].values
    topfeatures = []
    topelectrodes = []
    for col in topcolumns:
        feature = col[:-2]
        electrode = int(col[-2:])
        if(feature not in topfeatures):
            topfeatures.append(feature)
        if(electrode not in topelectrodes):
            topelectrodes.append(electrode)
    ######################################################################
    # TOP-N-FEATURE-RANKING
    print(topfeatures)
    print(topelectrodes)
    N =  len(topfeatures)
    topNRmseList = []
    topNList = ["{}".format(x) for x in range(1,N+1)]
    for n in range(1,N+1):
        topnfeatures = topfeatures[:n]
        # X-Values################################################
        featureMatrix = np.empty((0,ans.shape[1]))
        for feature in topnfeatures:
            featureMatrix = np.append(featureMatrix, featuresDict[feature], axis=0)
        featureMatrix = featureMatrix.astype('float64')
        print(featureMatrix.T.shape)
        feature_channel_index = []
        for feature in topnfeatures:
            i=0
            for i in range(featuresDict[feature].shape[0]):
                feature_channel_index.append(feature + str(i))
        # Removing NaN Values
        if np.isnan(featureMatrix).any():
            featureMatrix = np.nan_to_num(featureMatrix,nan=0)
        print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
        X = pd.DataFrame(featureMatrix.T)
        X.columns = feature_channel_index
        X = X.replace([np.inf, -np.inf], np.nan)
        X = X.fillna(0)
        print("Features Ready for undergoing selection tests done ...\n")
        X = X.astype(np.float32)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        # Normalise-scale data 
        # Feature Scaling
        if(scale == True):
            sc = StandardScaler()
            X_train = sc.fit_transform(X_train)
            X_test = sc.transform(X_test)
        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)
        rmse = mean_squared_error(y_test, y_predict,squared=False)
        print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
        topNRmseList.append(rmse)
    topNFeatures_df = pd.DataFrame(topNList)
    topNRmse_df = pd.DataFrame(topNRmseList)
    #concat two dataframes for better visualization 
    topNFeaturesRanking = pd.concat([topNFeatures_df, topNRmse_df],axis=1)
    topNFeaturesRanking.columns = ['Feature','RMSE']  #naming the dataframe columns
    print(topNFeaturesRanking)
    topNFeaturesRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonFeatureFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
    # Plotting
    fig = plt.gcf()
    fig.set_size_inches(25, 10)
    plt.rcParams.update({'font.size': 30})
    plt.xlabel('Top N Features')
    plt.ylabel('RMSE')
    plt.plot(topNFeaturesRanking.loc[:,"Feature"], topNFeaturesRanking.loc[:,"RMSE"])
    plt.tight_layout()
 # In[ ]:
 def topFSColumnsRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
        # Method C
        # parameters :-
                # dataset - name of the dataset
                # window - length of the sliding window in seconds
                # stride - length of the stride of the sliding window in seconds
                # sfreq - sampling frequency of the EEG data
                # clf - name of the classifier to be used
                # label - valence/arousal/dominance/liking label (shape depends upon the dataset)
                # scale - sclaing of the EEG data if required
                # mutual_info - Mutual ranking between features based on information theory
                # method - 'RandomForest' 'RFE' 'SelectKBest'
    # returns :-
                # void
    fs = sfreq
    pwd = os.getcwd()
    featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
    ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
    Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
    print("Number of segments are: {}".format(ans.shape[1]))
    #X##############################################################################################
    featuresDict = None
    featuresDict = loadFeaturesDict(dataset)
    common = []
    with open('intersection.pkl', 'rb') as f:
        common = pickle.load(f)
    for k in list(featuresDict.keys()):
        if k not in common:
            # pop out common feature
            featuresDict.pop(k)
    print("Number of Features:",len(list(featuresDict.keys())))
    featuresList = list(featuresDict.keys())
    feature_channel_index = []
    feature_channel_index = []
    for feature in featuresList:
        for i in range(featuresDict[feature].shape[0]):
            if(i>=10):
                feature_channel_index.append(feature +'_'+  str(i))
            else:
                feature_channel_index.append(feature + '_0' + str(i))
    print(len(list(featuresDict.keys())))
    print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
    #defining feature matrix
    featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
    for key,value in featuresDict.items():
        featureMatrix = np.append(featureMatrix,value,axis=0)
    print("Shape of FeatureMatrix: {}\n".format(featureMatrix.T.shape))
    #data-imputation and nan-removal
    featureMatrix = featureMatrix.astype(np.float32)
    if np.isnan(featureMatrix).any():
        featureMatrix = np.nan_to_num(featureMatrix,nan=0)
    X = pd.DataFrame(featureMatrix.T)
    X = X.replace([np.inf, -np.inf], np.nan)
    X = X.fillna(0)
    X.columns = feature_channel_index
    #Y#####################################################################
    y = Y_epoch[:,label] #valence
    # y = pd.DataFrame(y)
    ########################################################################
    dfscores = None
    if(method == 'RandomForest'):
        '''Random Forest Feature Importances'''
        estimator = sklearnrfi() #RandomForestRegressor()
        fit = estimator.fit(X,y)
        dfscores = pd.DataFrame(fit.feature_importances_)
    elif(method == 'RFE'):
        ''' RFE'''
        selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
        selector = selector.fit(X, y)
        dfscores = pd.DataFrame(selector.ranking_)
    elif(method == 'SelectKBest'):
        """SelecKBest"""
        #apply SelectKBest class to extract top 10 best features
        func = None
        if mutual_info == False:
            func = f_classif
        else:
            func = mutual_info_classif
        bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
        fit = bestfeatures.fit(X,y)
        dfscores = pd.DataFrame(fit.scores_)
    dfcolumns = pd.DataFrame(X.columns)
    #concat two dataframes for better visualization 
    featureScores = pd.concat([dfcolumns,dfscores],axis=1)
    featureScores.columns = ['Column','Score']  #naming the dataframe columns
    features_result = featureScores.nlargest(X.shape[1],'Score')
    print(features_result)
    N = len(feature_channel_index)
    topNRmseList = []
    topNList = ["{}".format(x) for x in range(1,N+1)]
    for n in range(1, N+1):
        ranking_df = features_result.head(n)
        topncols = ranking_df['Column'].tolist()
        input_df = pd.DataFrame(X[topncols])
        X_train, X_test, y_train, y_test = train_test_split(input_df, y, test_size=0.2, random_state=42)
        # Normalise-scale data 
        # Feature Scaling
        if(scale == True):
            sc = StandardScaler()
            X_train = sc.fit_transform(X_train)
            X_test = sc.transform(X_test)
        # Apply classfier       
        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)
        rmse = mean_squared_error(y_test, y_predict, squared=False)
        print(n,rmse)
        topNRmseList.append(rmse)
    topcol_df = pd.DataFrame(topNList)
    toprmse_df = pd.DataFrame(topNRmseList)
    #concat two dataframes for better visualization 
    topcolRanking = pd.concat([topcol_df, toprmse_df],axis=1)
    topcolRanking.columns = ['Column','RMSE']  #naming the dataframe columns
    topfeatures_result = topcolRanking
    print(topfeatures_result)
    topfeatures_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topFSColumnsRegressionRanking"+method + str(window) + str(stride) + ".csv")
    # Plotting
    fig = plt.gcf()
    fig.set_size_inches(60, 9)
    plt.xlabel('Top N Columns')
    plt.ylabel('RMSE')
    plt.title("Top N Columns v/s RMSE Plot for Window:{} Stride:{} epoched data by varying N".format(window,stride))
    plt.plot(topfeatures_result.loc[:,"Column"], topfeatures_result.loc[:,"RMSE"])
    plt.tight_layout()
    plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "topFSColumnsRegressionRanking"+method + str(window) + str(stride) + ".svg", bbox_inches='tight', dpi=500)
    plt.show()
    plt.clf()
 # In[ ]:
 if __name__ == '__main__':
    pass
@@ -0,0 +1,319 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 get_ipython().system('git clone -l -s https://github.com/sari-saba-sadiya/EEGExtract.git cloned-repo')
 get_ipython().run_line_magic('cd', 'cloned-repo')
 get_ipython().system('ls')
 # In[ ]:
 get_ipython().system('pip  install -r requirements.txt')
 # In[ ]:
 from google.colab import drive
 drive.mount('/gdrive',force_remount=True)
 # In[ ]:
 get_ipython().system('pip install pyinform')
 # In[ ]:
 get_ipython().run_line_magic('cd', '../../gdrive/MyDrive/emotion_recognition_project')
 # In[ ]:
 import EEGExtract as eeg
 from scipy import io,signal
 import numpy as np
 import pandas as pd
 from sklearn import preprocessing
 import pandas as pd
 import pickle
 import matplotlib.pyplot as plt
 # In[ ]:
 class load_data:
    '''
    Load the preprocessed data here, store the paramters
    '''
    def __init__(self,name):
        self.name = name #name of dataset
        self.X = None
        self.Y = None
        self.Z = None
        self.freq = None #(in Hz) is same for all datasets
        self.channels = None
        self.ch_type = 'eeg'
        self.eegData = None
        self.use_autoreject = 'y'
        self.no_of_subjects = None
    def load_arrays(self):
        if self.name == 'DREAMER':
            array = np.load('original_data/DREAMER.npz')
            self.freq = 128
            self.no_of_subjects = 23
            self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
        if self.name == 'DEAP':
            array = np.load('original_data/DEAP.npz')
            self.no_of_subjects = 32
            self.freq = 128
            #                  0     1      2    3      4      5      6    7      8      9     10     11    12    13   14     15    16     17     18    19   20      21     22    23    24    25    26     27     28    29    30     31      32    33     34       35    36       37                 38                  39               
            self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature'] 
        if self.name == 'OASIS':
            #array = np.load('original_data/OASIS.npz')
            self.no_of_subjects = 15
            if self.use_autoreject == 'y':
                with open('preprocessed_data/oasis/with_autoreject.p','rb') as file:
                    self.X = pickle.load(file)
                    self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
                    self.freq = 128
                    self.X ,self.Y= merge_dictionary(self.X)
                    (a,b,c) = self.X.shape
                    self.X = np.reshape(self.X,(a,c,b))
            else:
                array = np.load('preprocessed_data/oasis/without_autoreject.npz')
                self.freq = 128
                self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
                self.X = array['X']
                self.Y = array['Y']
                (a,b,c) = self.X.shape
                self.X = np.reshape(self.X,(a,c,b))
        else:
            self.X = array['X']
        if self.name == 'DEAP':
            self.X = self.X[:,:,[1,3,2,4,7,11,13,31,29,25,21,19,20,17]] # To maintain uniformity across all datasets, only 14 electrodes selected
            self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
        if self.name != 'OASIS':  
              self.Y = array['Y']
              #self.Z = array['Z']
        self.reshape_data()
    def reshape_data(self):
      '''
      reshapes data in the format EEGExtract module expects i.e channels x timepoints x epochs 
      '''
        (epochs,timepoints,channels) = self.X.shape
        self.eegData = np.reshape(self.X,(channels,timepoints,epochs)) 
 # In[ ]:
 def merge_dictionary(dictionary):
  '''
  merge all trial data to form one array
  '''
    no_of_trials = len(list(dictionary.keys()))
    no_of_channels = dictionary[1][0].shape[1]
    length_of_segment = dictionary[1][0].shape[2]
    no_of_epochs_per_trial = dictionary[1][0].shape[0]
    X = np.empty((0,no_of_channels,length_of_segment))
    Y = np.empty((0,2))
    for trial,lst in dictionary.items():
        array = dictionary[trial][0]
        score = dictionary[trial][3]
        X = np.append(X,array,axis = 0)
    for epoch in range(no_of_epochs_per_trial):
        Y = np.append(Y,np.expand_dims(score,axis =0),axis = 0)
    return X,Y
 # In[ ]:
 def calculate_diffrential_entropy_for_bands(eegData,freq):
 # Function to calculate the differential entropy for the different bands of EEG data
 # parameters :-
            # eegData :- The differential EEG signal value
            # freq :- sampling frequency of the EEG signal
 # returns :-
            # bandwise DE
  #delta band
    delta_band = eeg.filt_data(eegData,0.5,4,freq)
  #theta band
    theta_band = eeg.filt_data(eegData,4,8,freq)
  #alpha bad
    alpha_band = eeg.filt_data(eegData,8,12,freq)
  #beta band
    beta_band = eeg.filt_data(eegData,12,30,freq)
  #gamma band
    gamma_band = eeg.filt_data(eegData,30,63,freq)
    diffrential_entropy_delta = 1/2*np.log(np.var(delta_band,axis = 1)*np.pi*np.e*2)
    diffrential_entropy_theta = 1/2*np.log(np.var(theta_band,axis = 1)*np.pi*np.e*2)
    diffrential_entropy_alpha = 1/2*np.log(np.var(alpha_band,axis = 1)*np.pi*np.e*2)
    diffrential_entropy_beta = 1/2*np.log(np.var(beta_band,axis = 1)*np.pi*np.e*2)
    diffrential_entropy_gamma = 1/2*np.log(np.var(gamma_band,axis = 1)*np.pi*np.e*2)
  #print(diffrential_entropy_delta.shape,diffrential_entropy_gamma.shape,diffrential_entropy_theta.shape,diffrential_entropy_alpha.shape,diffrential_entropy_beta.shape)
    return diffrential_entropy_delta,diffrential_entropy_theta,diffrential_entropy_alpha,diffrential_entropy_beta,diffrential_entropy_gamma
 # In[ ]:
 #['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
 #   0      1     2     3      4    5     6     7     8     9      10    11     12    13
 def calculate_RASM_DASM(band):
    RASM_AF3_AF4 = np.expand_dims(band[0,:]/band[13,:],axis = 0)
    RASM_F3_F4 = np.expand_dims(band[2,:]/band[11,:],axis = 0)
    RASM_F7_F8 = np.expand_dims(band[1,:]/band[12,:],axis = 0)
    RASM_FC5_FC6 = np.expand_dims(band[3,:]/band[10,:],axis = 0)
    RASM_O1_O2 = np.expand_dims(band[6,:]/band[7,:],axis = 0)
    RASM_P7_P8 = np.expand_dims(band[5,:]/band[8,:],axis=0)
    RASM_T7_T8 = np.expand_dims(band[4,:]/band[9,:],axis=0)
    DASM_AF3_AF4 = np.expand_dims(band[0,:]-band[13,:],axis = 0)
    DASM_F3_F4 = np.expand_dims(band[2,:]-band[11,:],axis = 0)
    DASM_F7_F8 = np.expand_dims(band[1,:]-band[12,:],axis = 0)
    DASM_FC5_FC6 = np.expand_dims(band[3,:]-band[10,:],axis = 0)
    DASM_O1_O2 = np.expand_dims(band[6,:]-band[7,:],axis = 0)
    DASM_P7_P8 = np.expand_dims(band[5,:]-band[8,:],axis=0)
    DASM_T7_T8 = np.expand_dims(band[4,:]-band[9,:],axis=0)
    features = np.empty((0,RASM_AF3_AF4.shape[1]))
    features = np.append(features,RASM_AF3_AF4,axis = 0)
    features = np.append(features,RASM_F3_F4,axis = 0)
    features = np.append(features,RASM_F7_F8,axis = 0)
    features = np.append(features,RASM_FC5_FC6,axis = 0)
    features = np.append(features,RASM_O1_O2,axis = 0)
    features = np.append(features,RASM_P7_P8,axis = 0)
    features = np.append(features,RASM_T7_T8,axis = 0)
    features = np.append(features,DASM_AF3_AF4,axis = 0)
    features = np.append(features,DASM_F3_F4,axis = 0)
    features = np.append(features,DASM_F7_F8,axis = 0)
    features = np.append(features,DASM_FC5_FC6,axis = 0)
    features = np.append(features,DASM_O1_O2,axis = 0)
    features = np.append(features,DASM_P7_P8,axis = 0)
    features = np.append(features,DASM_T7_T8,axis = 0)
    return features.T
 # In[ ]:
 def epoch_data(X,Y, window, stride, sfreq):
    (channels,timepoints,trials )= X.shape
    X = np.reshape(X,(trials,channels,timepoints)) 
    segment = int(window*sfreq)
    step = int(stride*sfreq)
    epochPerTrial = int((timepoints-segment)/step + 1)
    count = 0
    X_new = np.empty((trials*epochPerTrial,channels,segment))
    Y_new = np.empty((trials*epochPerTrial,2))
    for trial in range(trials):
        for epoch in range(epochPerTrial):
            X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
            Y_new[count,:] = Y[trial,:2]
            count+=1
    (trials,channels,timepoints) = X_new.shape
    X_new = np.reshape(X_new,(channels,timepoints,trials))
    return X_new,Y_new
 # In[ ]:
 def segregate_data_of_subjects(feature_matrix,dataset,sfreq = 128):
    total_samples = feature_matrix.shape[0]
    subject_indexes = {}
    if dataset.name != 'DEAP AND DREAMER':
        samples_per_subject = total_samples//dataset.no_of_subjects
        print('samples per subject taken are ',samples_per_subject)
        subject_indexes = {}
        for i in range(dataset.no_of_subjects):
            subject_name = 'subject_' + str(i+1)
            subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:]
    else:
        a = feature_matrix[:80640,:]
        b = feature_matrix[80640:,:]
        print(b.shape)
        for i in range(32):
            samples_per_subject = 2520
            subject_name = 'subject_' + str(i+1)
            subject_indexes[subject_name] = a[samples_per_subject*i:samples_per_subject*(i+1),:]
        for i in range(0,23):
            samples_per_subject = 8190
            subject_name = 'subject_' + str(i+1+32)
            subject_indexes[subject_name] = b[samples_per_subject*i:samples_per_subject*(i+1),:]
    return subject_indexes
 # In[ ]:
 def driver_code():
    dataset = load_data('DREAMER')
    dataset.load_arrays()
    X = dataset.eegData
    Y = dataset.Y 
    window = 1
    stride = 1
    X,Y = epoch_data(X,Y,window,stride,128)
    print('shape after epoching')
    print('X:',X.shape)
    print('Y:',Y.shape)
    print('')
    print('')
    delta,theta,alpha,beta,gamma = calculate_diffrential_entropy_for_bands(X,dataset.freq)
    bands = {'delta':delta,'theta':theta,'alpha':alpha,'beta':beta,'gamma':gamma}
    for name,band in bands.items():
        feature_matrix = calculate_RASM_DASM(band) #extracted RASM ,DASM features for each eng band
        print(name ,':' ,end = '')
        print(feature_matrix.shape)
        print(feature_matrix)
        np.savez('features/'+dataset.name.lower()+'_RASM_DASM/'+name+'_'+str(window)+'_'+str(stride),features = feature_matrix,Y=Y)
 # In[ ]:
 driver_code()
 # In[ ]:
 np.load('features/oasis/without_autoreject/shannonEntropy_1_1.npz')['features']
 # In[ ]:
@@ -0,0 +1,467 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 # -*- coding: utf-8 -*-
 """feature_extraction_25GB_RAM.ipynb
 Automatically generated by Colaboratory.
 Original file is located at
    https://colab.research.google.com/drive/1QnVj7GyyJhLPrYF4vBTppMwynXqmOTEJ
 """
 # Commented out IPython magic to ensure Python compatibility.
 import EEGExtract as eeg
 from scipy import io,signal
 import numpy as np
 import pandas as pd
 from sklearn import preprocessing
 import pandas as pd
 import pickle
 class load_data:
    '''
    Load the preprocessed data here, store the paramters
    '''
    def __init__(self,name):
        self.name = name #name of dataset
        self.X = None
        self.Y = None
        self.Z = None
        self.freq = None #(in Hz) is same for all datasets
        self.channels = None
        self.ch_type = 'eeg'
        self.eegData = None
        self.use_autoreject = 'n'
    def load_arrays(self):
        if self.name == 'DREAMER':
            array = np.load('original_data/DREAMER.npz')
            self.freq = 128
            self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
        if self.name == 'DEAP':
            array = np.load('original_data/DEAP.npz')
            self.freq = 128
            #                  0     1      2    3      4      5      6    7      8      9     10     11    12    13   14     15    16     17     18    19   20      21     22    23    24    25    26     27     28    29    30     31      32    33     34       35    36       37                 38                  39               
            self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature'] 
        if self.name == 'OASIS':
            #array = np.load('original_data/OASIS.npz')
            if self.use_autoreject == 'y':
                with open('preprocessed_data/oasis/with_autoreject.p','rb') as file:
                self.X = pickle.load(file)
                self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
                self.freq = 128
                self.X ,self.Y= merge_dictionary(self.X)
                (a,b,c) = self.X.shape
                self.X = np.reshape(self.X,(a,c,b))
            else:
                array = np.load('preprocessed_data/oasis/without_autoreject.npz')
                self.freq = 128
                self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
                self.X = array['X']
                self.Y = array['Y']
                (a,b,c) = self.X.shape
                self.X = np.reshape(self.X,(a,c,b))
        else:
            self.X = array['X']
        if self.name == 'DEAP':
            self.X = self.X[:,:,[1,3,2,4,7,11,13,31,29,25,21,19,20,17]] # To maintain uniformity across all datasets, only 14 electrodes selected
            self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
        if self.name != 'OASIS':  
            self.Y = array['Y']
        #self.Z = array['Z']
        self.reshape_data()
    def reshape_data(self):
      '''
      reshapes data in the format EEGExtract module expects i.e channels x timepoints x epochs 
      '''
      (epochs,timepoints,channels) = self.X.shape
        self.eegData = np.reshape(self.X,(channels,timepoints,epochs))
 class features:
  ############################ Complexity Features ############################# 
  #1>
  @staticmethod
  def ShannonRes(eegData,**args):
    #Shannon Entropy
    ShannonRes = eeg.shannonEntropy(eegData, bin_min=-200, bin_max=200, binWidth=2)
    return ShannonRes
  #2>
  @staticmethod
  def ShannonRes_sub_band_delta(eegData,fs):
    # Subband Information Quantity
    # delta (0.5–4 Hz)
    eegData_delta = eeg.filt_data(eegData, 0.5, 4, fs)
    ShannonRes_delta = eeg.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2)
    return ShannonRes_delta
  #3>
  @staticmethod
  def ShannonRes_sub_band_theta(eegData,fs):
    # theta (4–8 Hz)
    eegData_theta = eeg.filt_data(eegData, 4, 8, fs)
    ShannonRes_theta = eeg.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2)
    return ShannonRes_theta
  #4>
  @staticmethod
    def ShannonRes_sub_band_alpha(eegData,fs):
    # alpha (8–12 Hz)
        eegData_alpha = eeg.filt_data(eegData, 8, 12, fs)
        ShannonRes_alpha = eeg.shannonEntropy(eegData_alpha, bin_min=-200, bin_max=200, binWidth=2)
        return ShannonRes_alpha
  #5>
  @staticmethod
      def ShannonRes_sub_band_beta(eegData,fs):
    # beta (12–30 Hz)
        eegData_beta = eeg.filt_data(eegData, 12, 30, fs)
        ShannonRes_beta = eeg.shannonEntropy(eegData_beta, bin_min=-200, bin_max=200, binWidth=2)
        return ShannonRes_beta
  #6>
  @staticmethod
  def ShannonRes_sub_band_gamma(eegData,fs):
    # gamma (30–100 Hz)
    eegData_gamma = eeg.filt_data(eegData, 30, 63, fs)
    ShannonRes_gamma = eeg.shannonEntropy(eegData_gamma, bin_min=-200, bin_max=200, binWidth=2)
    return ShannonRes_gamma
  #7>
  @staticmethod
    def Hojorth_Mobility(eegData,**args):
    # Hjorth Mobility
    # Hjorth Complexity
        HjorthMob, HjorthComp = eeg.hjorthParameters(eegData)
    return HjorthMob
  #8>
  @staticmethod
  def Hojorth_Complexity(eegData,**args):
    # Hjorth Mobility
    # Hjorth Complexity
    HjorthMob, HjorthComp = eeg.hjorthParameters(eegData)
    return HjorthComp
  #9>
  @staticmethod
  def False_Nearest_Neighbour(eegData,**args):
    # False Nearest Neighbor
    FalseNnRes = eeg.falseNearestNeighbor(eegData)
    return FalseNnRes
  ##############################################################################
  ########################Category Features#####################################
  #10>
  @staticmethod
  def median_frequency(eegData,fs):
    #fs-sampling frequency
    # Median Frequency
    medianFreqRes = eeg.medianFreq(eegData,fs)
    return medianFreqRes
  #11>
  @staticmethod
  def band_power_delta(eegData,fs):
    #fs - sampling frequency
    # δ band Power
    bandPwr_delta = eeg.bandPower(eegData, 0.5, 4, fs)
    return bandPwr_delta
  #12>
  @staticmethod
  def band_power_theta(eegData,fs):
    #fs - sampling frequency
    # θ band Power
    bandPwr_theta = eeg.bandPower(eegData, 4, 8, fs)
    return bandPwr_theta
  #13>
  @staticmethod
    def band_power_alpha(eegData,fs):
    #fs - sampling frequency
    # α band Power
        bandPwr_alpha = eeg.bandPower(eegData, 8, 12, fs)
    return bandPwr_alpha
  #14>
  @staticmethod
  def band_power_beta(eegData,fs):
    #fs - sampling frequency
    # β band Power
    bandPwr_beta = eeg.bandPower(eegData, 12, 30, fs)
    return bandPwr_beta
  #15>
  @staticmethod
  def band_power_gamma(eegData,fs):
    #fs - sampling frequency
    # γ band Power
    bandPwr_gamma = eeg.bandPower(eegData, 30, 63, fs)
    return bandPwr_gamma
  #16>
  @staticmethod
  def standard_deviation(eegData,**args):
    # Standard Deviation
    std_res = eeg.eegStd(eegData)
    return std_res
  #17>
  @staticmethod
    def regularity(eegData,fs):
    # Regularity (burst-suppression)
        regularity_res = eeg.eegRegularity(eegData,fs)
    return regularity_res
  #18>
  @staticmethod
  def Diffuse_slowing(eegData,**args):
    # Diffuse Slowing
    df_res = eeg.diffuseSlowing(eegData)
    return df_res
  #19>
  @staticmethod
    def Spikes(eegData,fs,**args):
    # Spikes
        minNumSamples = int(70*fs/1000)
        spikeNum_res = eeg.spikeNum(eegData,minNumSamples)
    return spikeNum_res
  #20>
  @staticmethod
  def delta_burst_after_spike(eegData,fs):
    # Delta burst after Spike
    eegData_delta = eeg.filt_data(eegData, 0.5, 4, fs)
    deltaBurst_res = eeg.burstAfterSpike(eegData,eegData_delta,minNumSamples=7,stdAway = 3)
    return deltaBurst_res
  #21>
  @staticmethod
    def Sharp_spike(eegData,fs):
        minNumSamples = int(70*fs/1000)
    # Sharp spike
        sharpSpike_res = eeg.shortSpikeNum(eegData,minNumSamples)
    return sharpSpike_res
  #22>
  @staticmethod
  def Number_of_Burst(eegData,fs):
    # Number of Bursts
    numBursts_res = eeg.numBursts(eegData,fs)
    return numBursts_res
  #23>
  @staticmethod
    def Burst_length_u_and_sigma_mean(eegData,fs):
    # Burst length μ and σ
        burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(eegData,fs)
    return burstLenMean_res
  #24>
  @staticmethod
    def Burst_length_u_and_sigma_std(eegData,fs):
        burstLenMean_res,burstLenStd_res = eeg.burstLengthStats(eegData,fs)
    return burstLenStd_res
  #25>
  @staticmethod
  def no_of_suprression(eegData,fs):
    # Number of Suppressions
    numSupps_res = eeg.numSuppressions(eegData,fs)
    return numSupps_res
  #26>
  @staticmethod
    def Suppression_length_u_and_sigma_mean(eegData,fs):
    # Suppression length μ and σ
        suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(eegData,fs)
    return suppLenMean_res
  #27>
  @staticmethod
    def Suppression_length_u_and_sigma_std(eegData,fs):
    # Suppression length μ and σ
        suppLenMean_res,suppLenStd_res = eeg.suppressionLengthStats(eegData,fs)
    return suppLenStd_res
  ##############################################################################
 def merge_dictionary(dictionary):
  '''
  merge all trial data to form one array
  '''
    no_of_trials = len(list(dictionary.keys()))
    no_of_channels = dictionary[1][0].shape[1]
    length_of_segment = dictionary[1][0].shape[2]
    no_of_epochs_per_trial = dictionary[1][0].shape[0]
    X = np.empty((0,no_of_channels,length_of_segment))
    Y = np.empty((0,2))
    for trial,lst in dictionary.items():
        array = dictionary[trial][0]
        score = dictionary[trial][3]
        X = np.append(X,array,axis = 0)
        for epoch in range(no_of_epochs_per_trial):
            Y = np.append(Y,np.expand_dims(score,axis =0),axis = 0)
    return X,Y
 def epoch_data(X,Y, window, stride, sfreq):
    (channels,timepoints,trials )= X.shape
    X = np.reshape(X,(trials,channels,timepoints)) 
    segment = int(window*sfreq)
    step = int(stride*sfreq)
    epochPerTrial = int((timepoints-segment)/step + 1)
    count = 0
    X_new = np.empty((trials*epochPerTrial,channels,segment))
    Y_new = np.empty((trials*epochPerTrial,2))
    for trial in range(trials):
        for epoch in range(epochPerTrial):
            X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
            Y_new[count,:] = Y[trial,:2]
            count+=1
    (trials,channels,timepoints) = X_new.shape
    X_new = np.reshape(X_new,(channels,timepoints,trials))
    return X_new,Y_new
 def driver_code():
    dataset_dictionary = {0:'DEAP',1:'OASIS',2:'DREAMER'}
    print(dataset_dictionary)
    print('enter number for loading dataset')
    mapping = int(input())
    print('plz wait loading dataset preprocessed arrays')
    dataset = load_data(dataset_dictionary[mapping])
    if mapping == 1:
        print('do you want to use with autoreject data? if yes press y')
        boolean = input()
    if boolean == 'y':
        dataset.use_autoreject = 'y'
    dataset.load_arrays()
    print('loading complete')
    print('shape of data we will use to make features:',dataset.eegData.shape)
    print('do you want to segment the data before calculating feature values? y/n')
    boolean = input()
    if boolean == 'y':
        window = float(input('enter window size'))
        stride = float(input('enter stride size'))
        dataset.eegData,dataset.Y = epoch_data(dataset.eegData,dataset.Y,window,stride,dataset.freq) 
        print('new shapes of X and Y:',dataset.eegData.shape,' ',dataset.Y.shape)
    else:
        window = 0
        stride = 0
    print('features available')
    featuresDict = {0:'shannonEntropy',
                1:'ShannonRes_sub_bands_alpha',
                2:'ShannonRes_sub_bands_beta',
                3:'ShannonRes_sub_bands_delta',
                4:'ShannonRes_sub_bands_theta',
                5:'ShannonRes_sub_bands_gamma',
                6:'Hjorth_mobilty',
                7:'Hjorth_complexity',
                8:'falseNearestNeighbor',
                9:'medianFreq',
                10:'bandPwr_alpha',
                11:'bandPwr_beta',
                12:'bandPwr_gamma',
                13:'bandPwr_theta',
                14:'bandPwr_delta',
                15:'stdDev',
                16:'diffuseSlowing',
                17:'spikeNum',
                18:'deltaBurstAfterSpike',
                19:'shortSpikeNum',
                20:'Sharp spike',
                21:'numBursts',
                22:'burstLen_u_and_sigma_mean',
                23:'burstLen_u_and_sigma_std',
                24:'numSuppressions',
                25:'suppressionLen_u_and_sigma_mean',
                26:'suppressionLen_u_and_sigma_std',
                }
    featureMethod={0:features.ShannonRes,
                1:features.ShannonRes_sub_band_alpha,
                2:features.ShannonRes_sub_band_beta,
                3:features.ShannonRes_sub_band_delta,
                4:features.ShannonRes_sub_band_theta,
                5:features.ShannonRes_sub_band_gamma,  
                6:features.Hojorth_Mobility,
                7:features.Hojorth_Complexity,
                8:features.False_Nearest_Neighbour,
                9:features.median_frequency,
                10:features.band_power_alpha,
                11:features.band_power_beta,
                12:features.band_power_gamma,
                13:features.band_power_theta,
                14:features.band_power_delta,
                15:features.standard_deviation,
                16:features.regularity,
                17:features.Diffuse_slowing,
                18:features.Spikes,
                19:features.delta_burst_after_spike,
                20:features.Sharp_spike,
                21:features.Number_of_Burst,
                22:features.Burst_length_u_and_sigma_mean,
                23:features.Burst_length_u_and_sigma_std,
                24:features.no_of_suprression,
                25:features.Suppression_length_u_and_sigma_mean,
                26:features.Suppression_length_u_and_sigma_std,
                }
    print(featuresDict)
  #define path for saving before hand in np.savez line below
    path = 'features/'
  #os.mkdir('features/'+window+'_'+stride)
    if dataset.name == 'DEAP':
        path = path +'deap/' 
    elif dataset.name == 'DREAMER':
        path = path + 'dreamer/'
    else:
    if dataset.use_autoreject == 'y':
        path = path +'oasis/with_autoreject/'
    else:
        path = path +'oasis/without_autoreject/'
    boolean = input('do you want to individually make features? y/n')
    if boolean =='n':
        for key in featureMethod.keys():
            feature_matrix = featureMethod[key](eegData = dataset.eegData,fs=dataset.freq)
            filename = featuresDict[key]
            print('saving ---',filename)
            np.savez(path+filename+'_'+str(int(window))+'_'+str(int(stride)),features = feature_matrix , Y = dataset.Y)
    else:
        found_features = False
        while not found_features:
            print('enter feature no')
            key = int(input())
            feature_matrix = featureMethod[key](eegData = dataset.eegData,fs=dataset.freq)
            filename = featuresDict[key]
            print('saving ---',filename)
            np.savez(path+filename+'_'+str(int(window))+'_'+str(int(stride)),features = feature_matrix , Y = dataset.Y)
            boolean = input('do you want to find more features? y/n ')
            if boolean =='n':
                found_features = True
    print('feature extraction done!!!!')
 def __main__():
  driver_code()
 __main__()
 if __name__ == 'main':
  driver_code()
@@ -0,0 +1,70 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 # Script to get the feature ranking and electrode ranking through 
        # Method A :- Random Forest Regressor
        # Method B :- F score based Ranking
        # Method C :- Random Forest Importances approach 
 # Main function
 from ImportUtils import *
 from TopNByFSMethods import *
 from TopNByClassifier import *
 from args_eeg import args as my_args
 if __name__ == '__main__':
    # args object to fetch command line inputs
    args = my_args()
    print(args.__dict__)
    pwd = os.getcwd()
    dataset = args.dataset
    window = args.window
    stride = args.stride
    sfreq = args.sfreq
    model = args.model
    label = args.label 
    approach = args.approach #byclassifier or byfs
    ml_algo = args.ml_algo #classification or regression
    top = args.top #e or f or ef
    fs_method = args.fs_method
    #feature extraction
    getEpochedFeatures(dataset, window, stride, sfreq, label)
    if(top == "e"):
        clf = RandomForestRegressor()
        topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
        topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
        topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
        plt.legend(["Method A","Method B", "Method C"])
        if(label == 1):
            plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            plt.show()
            plt.clf()
        else:
            plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            plt.show()
            plt.clf()    
    elif(top == "f"):
        clf = RandomForestRegressor()
        topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
        topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
        topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
        if(label == 1):
            plt.legend(["Method A","Method B", "Method C"])
            plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            plt.show()
            plt.clf()
        else:
            plt.legend(["Method A","Method B", "Method C"])
            plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
            plt.show()
            plt.clf()
@@ -0,0 +1,264 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 import sys
 from sklearn.preprocessing import MinMaxScaler,StandardScaler
 from sklearn.utils import shuffle
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import train_test_split
 # In[ ]:
 scaler_min_max = MinMaxScaler()
 scaler_standard = StandardScaler()
 # Either one of the MinMaxScaling or StandardScaling function can be used
 def MinMaxScaling(feature_matrix):
    global scaler_min_max
    scaler_min_max.fit(feature_matrix)
    return scaler_min_max.transform(feature_matrix)
 def StandardScaling(feature_matrix):
    global scaler_standard
    scaler_standard.fit(feature_matrix)
    print('scaling shape',scaler_standard.mean_.shape)
    return scaler.transform(feature_matrix)
 architecture = 'sklearn'
 if architecture == 'sklearn':
    from sklearn.svm import SVR 
    from sklearn.metrics import accuracy_score
 else: 
    from cuml.svm import SVR
    from cuml.metrics import  accuracy_score
 # 
 # """##DEAP dataset
 # 1> Valence - features selected
 # >
 # *   bandPwr_gamma
 # *   bandPwr_beta
 # *   ShannonRes_gamma
 # *   ShannonRes_beta
 # *   rasm_gamma
 # *   dasm_gamma
 # 
 # 2> Arousal - feature selected
 # >
 # *   HjorthMob
 # *   HjorthComp
 # *   stdDev
 # *   bandPwr_theta
 # *   bandPwr_beta
 # *   ShannonRes_beta
 # *   ShannonRes_gamma
 # *   dasm_beta
 # """
 # In[ ]:
 # now for incremental learning we need to segregate data of subjects
 def segregate_data_of_subjects(feature_matrix,total_subjects,sfreq = 128):
  '''
  reuturs a dictionary which contains the samples data only corresponding to particular subjects of feature matrix
  '''
 # parameters :-
            # feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation
            # total_subjects :- Total number of subjects in the study
            # sfreq :- sampling frequency of the EEG data
 # returns :-
            # subject_indexes :- Subject wise features in a dictionary form
    total_samples = feature_matrix.shape[0]
    subject_indexes = {}
    samples_per_subject = total_samples//total_subjects
    for i in range(total_subjects):
        subject_name = 'subject_' + str(i+1)
        subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:]
    return subject_indexes
 # In[ ]:
 # now defining a function which carries out the incremenatal learning algo
 def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject):
 # parameters :-
            # model :- The training model to be used (SVR in this case)
            # featrue_matrix :- feature matrix obtained in the above function
            # Y :- The Valence and Arousal values as entered by the subjects
            # subject_indexes :-Subject wise features in a dictionary form
            # number_of_subjects :- Total number of subjects in the study
            # total_subjects :- Total number of subjects in the study
            # rmse_score :- RMSE of the previous iterations 
            # test_subject :- Cross validation test subject list
 # returns :-
            # rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration
            # test_subject :- Updated Cross validation test subject list
    no_of_features = feature_matrix.shape[1]
    X = np.empty((0,no_of_features))
    print('training on subject_no:',end = ' ')
  #create a feature matrix containing data upto subjects given by the number number_of_subjects
  #for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model
    for subject in range(number_of_subjects):
        print(subject+1,end = ' ')
        subject_name = 'subject_'+str(subject+1)
        subject_data = subject_indexes[subject_name]
        X = np.append(X,subject_data,axis=0)
    print(' ')
  #apply a MinMax scaling to the current iteration feature matrix
    X = MinMaxScaling(X)
  #now we also need to extract the valence arousal data for the corresponding subject
    y = np.empty((0))
    total_samples = feature_matrix.shape[0]
    samples_per_subject = total_samples//total_subjects
    for subject in range(number_of_subjects):
        y = Y[:samples_per_subject*(number_of_subjects)]
    print('shape of X is :',X.shape)
    print('shape of y is  :',y.shape)
  #shuffling data randomly to feed to model
    X,y = shuffle(X,y,random_state = 0)
  #doing a train test split of 80:20
    X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2)
  #training_model
    model = model.fit(X_train,y_train)
  #testing_model
    y_predict = model.predict(X_test)
  #calculating rmse values for valence and arousal using model fitted for current iteration
    y_rms = np.sqrt(mean_squared_error(y_test,y_predict))
    print('rms on y :',y_rms)
    print('')
    rmse_score.append(y_rms)
    test_subject.append(subject_name)
    return rmse_score,test_subject
 # In[ ]:
 def driver_code(save):
    # Function to load the features, then train the regressor and will give the validation and test plot
  #extracting file data corresponding to valence features
    bandPwr_gamma_v = np.load('features/deap/bandPwr_gamma_1_1.npz')
    bandPwr_beta_v = np.load('features/deap/bandPwr_beta_1_1.npz')
    ShannonRes_gamma_v = np.load('features/deap/ShannonRes_sub_bands_gamma_1_1.npz')
    ShannonRes_beta_v = np.load('features/deap/ShanninRes_sub_bands_beta_1_1.npz')
    rasm_gamma_v = np.load('features/deap_RASM_DASM/gamma_1_1.npz')#shape of feature is 80640 x 14, be careful to extract only rasm features, i.e first 7 columns
    dasm_gamma_v = np.load('features/deap_RASM_DASM/gamma_1_1.npz')
  #creating a feature matrix for valence
    feature_matrix_valence = np.empty((0,80640))
    feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_gamma_v['features'],axis = 0)
    feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_beta_v['features'],axis = 0)
    feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_gamma_v['features'],axis = 0)
    feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_beta_v['features'],axis = 0)
    feature_matrix_valence = np.append(feature_matrix_valence,rasm_gamma_v['features'].T[:7,:],axis = 0)
    feature_matrix_valence = np.append(feature_matrix_valence,dasm_gamma_v['features'].T[7:,:],axis = 0)
    feature_matrix_valence = feature_matrix_valence.T#feature matrix is of shape 80640 x 70
  #extracting labels 
    Y_val = bandPwr_gamma_v['Y'][:,0]
  #extracting file data corresponding to arousal features
    HjorthMob_a = np.load('features/deap/Hjorth_mobilty_1_1.npz')
    HjorthComp_a = np.load('features/deap/Hjorth_complexity_1_1.npz')
    stdDev_a = np.load('features/deap/stdDev_1_1.npz')
    bandPwr_beta_a = np.load('features/deap/bandPwr_beta_1_1.npz')
    bandPwr_theta_a = np.load('features/deap/bandPwr_theta_1_1.npz')
    ShannonRes_beta_a = np.load('features/deap/ShanninRes_sub_bands_beta_1_1.npz')
    ShannonRes_gamma_a = np.load('features/deap/ShannonRes_sub_bands_gamma_1_1.npz')
    dasm_beta_a = np.load('features/deap_RASM_DASM/beta_1_1.npz')
  #creating feature matrix for arousal
    feature_matrix_arousal = np.empty((0,80640))
    feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis = 0)
    feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthComp_a['features'],axis = 0)
    feature_matrix_arousal = np.append(feature_matrix_arousal,stdDev_a['features'],axis = 0)
    feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_beta_a['features'],axis = 0)
    feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_theta_a['features'],axis = 0)
    feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_beta_a['features'],axis = 0)
    feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_gamma_a['features'],axis = 0)
    feature_matrix_arousal = np.append(feature_matrix_arousal,dasm_beta_a['features'].T[7:,:],axis = 0)
    feature_matrix_arousal = feature_matrix_arousal.T#shape of feature matrix is 80640 x 105
  #extracting labels
    Y_aro = HjorthMob_a['Y'][:,1]
    model = SVR()#initializing support vector regressor for training
  #running incremental learning loop for valence
    print('')
    print('Incremental training for valence')
    print('')
    test_subject = []
    rmse_val = []
    subject_indexes_valence = segregate_data_of_subjects(feature_matrix_valence,32,128)
    i = 1
    while i <= 32:
        rmse_val,test_subject= training_phase(model,feature_matrix_valence,Y_val,subject_indexes_valence,i,32,rmse_val,test_subject)
        i+=1
  #running incremental learning loop for arousal
    print('')
    print('Incremental training for arousal ')
    print(' ')
    model = SVR()#reinitialize model
    test_subject = []
    rmse_aro = []
    subject_indexes_arousal = segregate_data_of_subjects(feature_matrix_arousal,32,128)
    i=1
    while i<=32:
        rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,Y_aro,subject_indexes_arousal,i,32,rmse_aro,test_subject)
        i+=1
    fig,axe = plt.subplots(1,1,figsize = (40,20))
    axe.plot(test_subject,rmse_val,color='r',label='rmse valence')
    axe.plot(test_subject,rmse_aro,color = 'g',label='rmse arousal')
    axe.set_xlabel('trained upto subject')
    axe.set_ylabel('rmse')
    axe.set_title('support vector regressor')
    axe.legend(loc = 'upper right')
    df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms'])
    print(df)
    if save == 'y':
        plt.savefig('plots/deap/all_feature_valence_arousal_rmse',format = "svg")
        df.to_csv('plots/deap/all_features_valence_arousal_rmse.csv')
 # In[ ]:
 if __name__ == '__main__':
    driver_code(sys.argv[1])
@@ -0,0 +1,259 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 import sys
 from sklearn.preprocessing import MinMaxScaler,StandardScaler
 from sklearn.utils import shuffle
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import train_test_split
 architecture = 'sklearn'
 if architecture == 'sklearn':
    from sklearn.svm import SVR 
    from sklearn.metrics import accuracy_score
 else: 
    from cuml.svm import SVR
    from cuml.ensemble import RandomForestRegressor
    from cuml.metrics import  accuracy_score
 # In[ ]:
 # Either one of the MinMaxScaling or StandardScaling function can be used
 scaler_min_max = MinMaxScaler()
 scaler_standard = StandardScaler()
 def MinMaxScaling(feature_matrix):
    global scaler_min_max
    scaler_min_max.fit(feature_matrix)
    return scaler_min_max.transform(feature_matrix)
 def StandardScaling(feature_matrix):
    global scaler_standard
    scaler_standard.fit(feature_matrix)
    print('scaling shape',scaler_standard.mean_.shape)
    return scaler.transform(feature_matrix)
 # """##DREAMER dataset
 # 1> Valence - features selected
 # >
 # *   HjorthMob
 # *   HjorthCom
 # *   stdDev
 # *   bandPwr_theta
 # *   ShannonRes_gamma
 # *   bandPwr_beta
 # 
 # 2> Arousal - feature selected
 # >
 # *   HjorthMob
 # *   HjorthComp
 # *   stdDev
 # *   bandPwr_theta
 # *   bandPwr_gamma
 # *   ShannonRes_gamma
 # """
 # In[ ]:
 # now for incremental learning we need to segregate data of subjects
 def segregate_data_of_subjects(feature_matrix,total_subjects,sfreq = 128):
  '''
  reuturs a dictionary which contains the samples data only corresponding to particular subjects of feature matrix
  '''
 # parameters :-
            # feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation
            # total_subjects :- Total number of subjects in the study
            # sfreq :- sampling frequency of the EEG data
 # returns :-
            # subject_indexes :- Subject wise features in a dictionary form
    total_samples = feature_matrix.shape[0]
    subject_indexes = {}
    samples_per_subject = total_samples//total_subjects
    for i in range(total_subjects):
        subject_name = 'subject_' + str(i+1)
        subject_indexes[subject_name] = feature_matrix[samples_per_subject*i:samples_per_subject*(i+1),:]
    return subject_indexes
 # In[ ]:
 # now defining a function which carries out the incremenatal learning algo
 def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject):
 # parameters :-
            # model :- The training model to be used (SVR in this case)
            # featrue_matrix :- feature matrix obtained in the above function
            # Y :- The Valence and Arousal values as entered by the subjects
            # subject_indexes :-Subject wise features in a dictionary form
            # number_of_subjects :- Total number of subjects in the study
            # total_subjects :- Total number of subjects in the study
            # rmse_score :- RMSE of the previous iterations 
            # test_subject :- Cross validation test subject list
 # returns :-
            # rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration
            # test_subject :- Updated Cross validation test subject list
    no_of_features = feature_matrix.shape[1]
    X = np.empty((0,no_of_features))
    print('training on subject_no:',end = ' ')
  #create a feature matrix containing data upto subjects given by the number number_of_subjects
  #for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model
    for subject in range(number_of_subjects):
        print(subject+1,end = ' ')
        subject_name = 'subject_'+str(subject+1)
        subject_data = subject_indexes[subject_name]
        X = np.append(X,subject_data,axis=0)
    print(' ')
  #apply a MinMax scaling to the current iteration feature matrix
    X = MinMaxScaling(X)
  #now we also need to extract the valence arousal data for the corresponding subject
    y = np.empty((0))
    total_samples = feature_matrix.shape[0]
    samples_per_subject = total_samples//total_subjects
    for subject in range(number_of_subjects):
        y = Y[:samples_per_subject*(number_of_subjects)]
    print('shape of X is :',X.shape)
    print('shape of y is  :',y.shape)
  #shuffling data randomly to feed to model
    X,y = shuffle(X,y,random_state = 0)
  #doing a train test split of 80:20
    X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2)
  #training_model
    model = model.fit(X_train,y_train)
  #testing_model
    y_predict = model.predict(X_test)
  #calculating rmse values for valence and arousal using model fitted for current iteration
    y_rms = np.sqrt(mean_squared_error(y_test,y_predict))
    print('rms on y :',y_rms)
    print('')
    rmse_score.append(y_rms)
    test_subject.append(subject_name)
    return rmse_score,test_subject
 # In[ ]:
 def driver_code(save):
    # Function to load the features, then train the regressor and will give the validation and test plot
  #extracting file data corresponding to  valence features
    HjorthMob_v = np.load('features/dreamer/Hjorth_mobilty_1_1.npz')
    HjorthCom_v = np.load('features/dreamer/Hjorth_complexity_1_1.npz')
    stdDev_v = np.load('features/dreamer/stdDev_1_1.npz')
    bandPwr_theta_v = np.load('features/dreamer/bandPwr_theta_1_1.npz')
    bandPwr_beta_v = np.load('features/dreamer/bandPwr_beta_1_1.npz')
    ShannonRes_gamma_v = np.load('features/dreamer/ShannonRes_sub_bands_gamma_1_1.npz')
  # creating a feature matrix out of all feature data for valence
    feature_matrix_valence = np.empty((0,188370))
    feature_matrix_valence = np.append(feature_matrix_valence,HjorthMob_v['features'],axis =0)
    feature_matrix_valence = np.append(feature_matrix_valence,HjorthCom_v['features'],axis =0)
    feature_matrix_valence = np.append(feature_matrix_valence,stdDev_v['features'],axis =0)
    feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_theta_v['features'],axis =0)
    feature_matrix_valence = np.append(feature_matrix_valence,bandPwr_beta_v['features'],axis =0)
    feature_matrix_valence = np.append(feature_matrix_valence,ShannonRes_gamma_v['features'],axis =0)
    feature_matrix_valence = feature_matrix_valence.T #  feature matrix becomes of shape 188370 x 84 i.e (samples X features per sample)
  # extracting valence  values for each sample
    Y_val = HjorthMob_v['Y'][:,0]#all features have same valnece labels
  #extracting file data corresponding to  arousal features
    HjorthMob_a = np.load('features/dreamer/Hjorth_mobilty_1_1.npz')
    HjorthCom_a = np.load('features/dreamer/Hjorth_complexity_1_1.npz')
    stdDev_a = np.load('features/dreamer/stdDev_1_1.npz')
    bandPwr_theta_a = np.load('features/dreamer/bandPwr_theta_1_1.npz')
    bandPwr_gamma_a = np.load('features/dreamer/bandPwr_gamma_1_1.npz')
    ShannonRes_gamma_a = np.load('features/dreamer/ShannonRes_sub_bands_gamma_1_1.npz')
  #creating feature matrix for all feature data for arousal
    feature_matrix_arousal = np.empty((0,188370))
    feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis = 0)
    feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthCom_a['features'],axis = 0)
    feature_matrix_arousal = np.append(feature_matrix_arousal,stdDev_a['features'],axis = 0)
    feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_theta_a['features'],axis = 0)
    feature_matrix_arousal = np.append(feature_matrix_arousal,bandPwr_gamma_a['features'],axis = 0)
    feature_matrix_arousal = np.append(feature_matrix_arousal,ShannonRes_gamma_a['features'],axis = 0)
    feature_matrix_arousal = feature_matrix_arousal.T
  #extracting arousal values for
    Y_aro = HjorthMob_a['Y'][:,1]#all features have same arousal labels
    model =SVR()#initializing support vector regressor for training
  #running incremental learning loop for valence
    print('')
    print('Incremental training for valence')
    print('')
    test_subject = []
    rmse_val = []
    subject_indexes_valence = segregate_data_of_subjects(feature_matrix_valence,23,128)
    i = 1
    while i <= 23:
        rmse_val,test_subject= training_phase(model,feature_matrix_valence,Y_val,subject_indexes_valence,i,23,rmse_val,test_subject)
        i+=1
  #running incremental learning loop for arousal
    print('')
    print('Incremental training for arousal ')
    print(' ')
    test_subject = []
    rmse_aro = []
    subject_indexes_arousal = segregate_data_of_subjects(feature_matrix_arousal,23,128)
    i=1
    while i<=23:
        rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,Y_aro,subject_indexes_arousal,i,23,rmse_aro,test_subject)
        i+=1
    fig,axe = plt.subplots(1,1,figsize = (40,20))
    axe.plot(test_subject,rmse_val,color='r',label = 'rms valence')
    axe.plot(test_subject,rmse_aro,color = 'g',label = 'rms arousal')
    axe.set_xlabel('trained upto subject')
    axe.set_ylabel('rmse')
    axe.set_title('support vector regressor')
    axe.legend(loc='upper right')
    df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms'])
    print(df)
    if save == 'y':
        plt.savefig('plots/dreamer/all_feature_valence_arousal_rmse',format = "svg")
        df.to_csv('plots/dreamer/all_features_valence_arousal_rmse.csv')
 # In[ ]:
 if __name__ == '__main__':
    driver_code(sys.argv[1])
@@ -0,0 +1,255 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 import sys
 from sklearn.preprocessing import MinMaxScaler,StandardScaler
 from sklearn.utils import shuffle
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import train_test_split
 from sklearn.svm import SVR 
 from sklearn.metrics import accuracy_score
 # In[ ]:
 # Either one of the MinMaxScaling or StandardScaling function can be used
 scaler_min_max = MinMaxScaler()
 scaler_standard = StandardScaler()
 def MinMaxScaling(feature_matrix):
    global scaler_min_max
    scaler_min_max.fit(feature_matrix)
    return scaler_min_max.transform(feature_matrix)
 def StandardScaling(feature_matrix):
    global scaler_standard
    scaler_standard.fit(feature_matrix)
    print('scaling shape',scaler_standard.mean_.shape)
    return scaler.transform(feature_matrix)
 # """##OASIS dataset
 # 1> Valence - features selected
 # >
 # *   HjorthMob
 # *    HjorthComp
 # *   stdDev
 # 
 # 2> Arousal - feature selected
 # >
 # *   HjorthMob
 # """
 # In[ ]:
 # now for incremental learning we need to segregate data of subjects
 def segregate_data_of_subjects(feature_matrix,Y,total_subjects,sfreq = 128):
  '''
  returns a dictionary which contains the samples data only corresponding to particular subjects of feature matrix
  '''
 # parameters :-
            # feature_matrix :- Vector containing the features mentioned above subject wise, to be used for cross validation
            # Y :- The Valence and Arousal values as entered by the subjects
            # total_subjects :- Total number of subjects in the study
            # sfreq :- sampling frequency of the EEG data
 # returns :-
            # subject_indexes :- Subject wise features in a dictionary form
            # aligned_y :- the y values corresponding to each subject
    subject_indexes = { 'subject_1':feature_matrix[:200],
                      'subject_2':feature_matrix[200:400],
                      'subject_3':feature_matrix[400:600],
                      'subject_4':feature_matrix[600:795],
                      'subject_5':feature_matrix[795:995],
                      'subject_6':feature_matrix[995:1185],
                      'subject_7':feature_matrix[1185:1375],
                      'subject_8':feature_matrix[1375:1575],
                      'subject_9':feature_matrix[1575:1770],
                      'subject_10':feature_matrix[1770:1965],
                      'subject_11':feature_matrix[1965:2160],
                      'subject_12':feature_matrix[2160:2360],
                      'subject_13':feature_matrix[2360:2550],
                      'subject_14':feature_matrix[2550:2740],
                      'subject_15':feature_matrix[2740:2935]
                      }
    aligned_y =   { 'subject_1':Y[:200],
                      'subject_2':Y[200:400],
                      'subject_3':Y[400:600],
                      'subject_4':Y[600:795],
                      'subject_5':Y[795:995],
                      'subject_6':Y[995:1185],
                      'subject_7':Y[1185:1375],
                      'subject_8':Y[1375:1575],
                      'subject_9':Y[1575:1770],
                      'subject_10':Y[1770:1965],
                      'subject_11':Y[1965:2160],
                      'subject_12':Y[2160:2360],
                      'subject_13':Y[2360:2550],
                      'subject_14':Y[2550:2740],
                      'subject_15':Y[2740:2935]
                      }
    return subject_indexes,aligned_y
 # In[ ]:
 # now defining a function which carries out the incremenatal learning algo
 def training_phase(model,feature_matrix,Y,subject_indexes,number_of_subjects,total_subjects,rmse_score,test_subject):
 # parameters :-
            # model :- The training model to be used (SVR in this case)
            # featrue_matrix :- feature matrix obtained in the above function
            # Y :- The Valence and Arousal values as entered by the subjects
            # subject_indexes :-Subject wise features in a dictionary form
            # number_of_subjects :- Total number of subjects in the study
            # total_subjects :- Total number of subjects in the study
            # rmse_score :- RMSE of the previous iterations 
            # test_subject :- Cross validation test subject list
 # returns :-
            # rmse_score :- Array of rmse scores over the iterations, updated with the rmse score of the current iteration
            # test_subject :- Updated Cross validation test subject list
    no_of_features = feature_matrix.shape[1]
    X = np.empty((0,no_of_features))
    print('training on subject_no:',end = ' ')
  #create a feature matrix containing data upto subjects given by the number number_of_subjects
  #for eg if number of subject ==4 , data of first 4 subjects will be taken and a feature matrix made out of it to feed to the ml model
    for subject in range(number_of_subjects):
        print(subject+1,end = ' ')
        subject_name = 'subject_'+str(subject+1)
        subject_data = subject_indexes[subject_name]
        X = np.append(X,subject_data,axis=0)
    print(' ')
  #apply a MinMax scaling to the current iteration feature matrix
    X = MinMaxScaling(X)
  #now we also need to extract the valence/arousal data for the corresponding subject
    y = np.empty((0))
    for subject in range(number_of_subjects):
        subject_name = 'subject_'+str(subject+1)
        subject_y_data = Y[subject_name]
        y = np.append(y,subject_y_data,axis=0)
    print('shape of X is :',X.shape)
    print('shape of y is :',y.shape)
  #shuffling data randomly to feed to model
    X,y = shuffle(X,y,random_state = 0)
  #doing a train test split of 80:20
    X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.2)
  #training_model
    model = model.fit(X_train,y_train)
  #testing_model
    y_predict = model.predict(X_test)
  #calculating rmse values for valence and arousal using model fitted for current iteration
    y_rms = np.sqrt(mean_squared_error(y_test,y_predict))
    print('rms on y :',y_rms)
    print('')
    rmse_score.append(y_rms)
    test_subject.append(subject_name)
    return rmse_score,test_subject
 # In[ ]:
 def driver_code(save):
  #extracting feature data related to valence
    HjorthMob_v = np.load('features/oasis/with_autoreject/Hjorth_mobilty_0_0.npz')
    HjorthComp_v = np.load('features/oasis/with_autoreject/Hjorth_complexity_0_0.npz')
    stdDev_v = np.load('features/oasis/with_autoreject/stdDev_0_0.npz')
  #creating feature matrix 
    feature_matrix_valence = np.empty((0,2935))
    feature_matrix_valence = np.append(feature_matrix_valence,HjorthMob_v['features'],axis = 0)
    feature_matrix_valence = np.append(feature_matrix_valence,HjorthComp_v['features'],axis = 0)
    feature_matrix_valence = np.append(feature_matrix_valence,stdDev_v['features'],axis = 0)
    feature_matrix_valence = feature_matrix_valence.T #shape of feature matrix is 2935 x 42
  #extracting valence labels
    Y_val = HjorthMob_v['Y'][:,0]
  #extracting feature data related to arousal
    HjorthMob_a = np.load('features/oasis/with_autoreject/Hjorth_mobilty_0_0.npz')
  #creating feature matrix for arousal
    feature_matrix_arousal = np.empty((0,2935))
    feature_matrix_arousal = np.append(feature_matrix_arousal,HjorthMob_a['features'],axis=0)
    feature_matrix_arousal = feature_matrix_arousal.T
  #extracting arousal labels
    Y_aro = HjorthMob_a['Y'][:,1]
    model = SVR() #initialize model
  #running incremental learning loop for valence
    print('')
    print('Incremental training for valence')
    print('')
    test_subject = []
    rmse_val = []
    subject_indexes_valence,aligned_Y_val = segregate_data_of_subjects(feature_matrix_valence,Y_val,15,128)
    i = 1
    while i <= 15:
        rmse_val,test_subject= training_phase(model,feature_matrix_valence,aligned_Y_val,subject_indexes_valence,i,15,rmse_val,test_subject)
        i+=1
  #running incremental learning loop for arousal
    print('')
    print('Incremental training for arousal ')
    print(' ')
    model = SVR()#reinitialize model
    test_subject = []
    rmse_aro = []
    subject_indexes_arousal,aligned_Y_aro = segregate_data_of_subjects(feature_matrix_arousal,Y_aro,15,128)
    i=1
    while i<=15:
        rmse_aro,test_subject = training_phase(model,feature_matrix_arousal,aligned_Y_aro,subject_indexes_arousal,i,15,rmse_aro,test_subject)
        i+=1
    fig,axe = plt.subplots(1,1,figsize = (40,20))
    axe.plot(test_subject,rmse_val,color='r',label = 'rmse valence')
    axe.plot(test_subject,rmse_aro,color = 'g',label = 'rmse arousal')
    axe.set_xlabel('trained upto subject')
    axe.set_ylabel('rmse')
    axe.set_title('support vector regressor')
    axe.legend(loc = 'upper right')
    df = pd.DataFrame([rmse_val,rmse_aro],columns = test_subject,index = ['valence rms','arousal rms'])
    if save == 'y':
        plt.savefig('plots/oasis/all_feature_valence_arousal_rmse',format="svg")
        df.to_csv('plots/oasis/all_features_valence_arousal_rmse.csv')
 # In[ ]:
 if __name__ == '__main__':
    driver_code(sys.argv[1])
@@ -0,0 +1,207 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 import glob
 import pandas as pd
 import matplotlib.pyplot as plt
 # In[ ]:
 from google.colab import drive
 drive.mount('/gdrive',force_remount=True)
 # In[ ]:
 get_ipython().run_line_magic('cd', '/gdrive/MyDrive/emotion_recognition_project/')
 # Script to obtain the incremental learning graph for the DEAP, DREAMER and OASIS datasets.
 # ##plots for DEAP
 # In[ ]:
 dataset_deap=glob.glob('plots/deap/*.csv')
 # In[ ]:
 dataset_deap
 # In[ ]:
 dataset_svr_deap = pd.read_csv(dataset_deap[0]).T
 dataset_svr_deap.columns = ['valence','arousal']
 dataset_svr_deap = dataset_svr_deap.drop('Unnamed: 0')
 dataset_svr_deap= dataset_svr_deap[::1]
 x_deap = range(1,33,1)
 dataset_svr_deap
 # In[ ]:
 fig_deap,axe_deap = plt.subplots(1,1,figsize = (17,10))
 axe_deap.plot(x_deap,dataset_svr_deap['valence'],color='green',marker = 'x',markersize=10)
 axe_deap.plot(x_deap,dataset_svr_deap['arousal'],color ='red',marker = 'x',markersize=10)
 axe_deap.legend(['rfr_valence','rfr_arousal'],)
 axe_deap.set_xlabel('trained upto subject')
 axe_deap.set_ylabel('RMSE values')
 plt.rcParams.update({'font.size':40})
 plt.tight_layout()
 plt.xticks(x_deap[::3])
 # In[ ]:
 fig_deap.savefig('final_plots/deap_rfr__valence_arousal_rms.svg')
 fig_deap.savefig('final_plots/deap_rfr__valence_arousal_rms.png')
 # ##plots for DREAMER
 # In[ ]:
 dataset_dreamer=glob.glob('plots/dreamer/*.csv')
 # In[ ]:
 dataset_dreamer
 # In[ ]:
 dataset_svr_dreamer = pd.read_csv(dataset_dreamer[0]).T
 dataset_svr_dreamer.columns = ['valence','arousal']
 dataset_svr_dreamer = dataset_svr_dreamer.drop('Unnamed: 0')
 x_dreamer = range(1,24,1)
 dataset_svr_dreamer= dataset_svr_dreamer[::1]
 dataset_svr_dreamer
 # In[ ]:
 fig_dreamer,axe_dreamer = plt.subplots(1,1,figsize=(17,10))
 axe_dreamer.plot(x_dreamer,dataset_svr_dreamer['valence'],color='green',marker = 'x',markersize=10)
 axe_dreamer.plot(x_dreamer,dataset_svr_dreamer['arousal'],color ='red',marker = 'x',markersize=10)
 axe_dreamer.legend(['rfr_valence','rfr_arousal'],)
 axe_dreamer.set_xlabel('trained upto subject')
 axe_dreamer.set_ylabel('RMSE values')
 plt.rcParams.update({'font.size':40})
 plt.tight_layout()
 plt.xticks(x_dreamer[::3])
 # In[ ]:
 fig_dreamer.savefig('final_plots/dreamer_rfr__valence_arousal_rms.svg')
 fig_dreamer.savefig('final_plots/dreamer_rfr__valence_arousal_rms.png')
 # ##plots for oasis
 # In[ ]:
 dataset_oasis=glob.glob('plots/oasis/*.csv')
 # In[ ]:
 dataset_oasis
 # In[ ]:
 dataset_svr_oasis = pd.read_csv(dataset_oasis[0]).T
 dataset_svr_oasis.columns = ['valence','arousal']
 dataset_svr_oasis = dataset_svr_oasis.drop('Unnamed: 0')
 x_oasis = range(1,16,1)
 dataset_svr_oasis= dataset_svr_oasis[::1]
 dataset_svr_oasis
 # In[ ]:
 fig_oasis,axe_oasis = plt.subplots(1,1,figsize=(17,10))
 axe_oasis.plot(x_oasis,dataset_svr_oasis['valence'],color='green',marker = 'x',markersize=10)
 axe_oasis.plot(x_oasis,dataset_svr_oasis['arousal'],color ='red',marker = 'x',markersize=10)
 axe_oasis.set_xlabel('trained upto subject')
 axe_oasis.set_ylabel('RMSE values')
 axe_oasis.legend(['rfr_valence','rfr_arousal'],loc = 'lower right')
 plt.rcParams.update({'font.size':40})
 plt.xticks(x_oasis[::3])
 plt.tight_layout()
 # In[ ]:
 fig_oasis.savefig('final_plots/oasis_rfr__valence_arousal_rms.svg')
 fig_oasis.savefig('final_plots/oasis_rfr__valence_arousal_rms.png')
 # In[ ]:
 # In[ ]:
 f,a = plt.subplots(3,1,figsize = (40,30))
 a[0].plot(x_deap,dataset_svr_deap['valence'],color='green',marker = 'x',markersize=10)
 a[0].plot(x_deap,dataset_svr_deap['arousal'],color ='red',marker = 'x',markersize=10)
 a[0].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],)
 #a[0].set_xlabel('trained upto subject')
 a[0].set_ylabel('RMSE values')
 a[0].set_title('DEAP')
 a[1].plot(x_dreamer,dataset_svr_dreamer['valence'],color='green',marker = 'x',markersize=10)
 a[1].plot(x_dreamer,dataset_svr_dreamer['arousal'],color ='red',marker = 'x',markersize=10)
 #a[1].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],)
 #a[1].set_xlabel('trained upto subject')
 a[1].set_ylabel('RMSE values')
 a[1].set_title('DREAMER')
 a[2].plot(x_oasis,dataset_svr_oasis['valence'],color='green',marker = 'x',markersize=10)
 a[2].plot(x_oasis,dataset_svr_oasis['arousal'],color ='red',marker = 'x',markersize=10)
 a[2].set_xlabel('trained upto subject')
 a[2].set_ylabel('RMSE values')
 #a[2].legend(['svr_valence','svr_arousal','rfr_valence','rfr_arousal'],loc = 'lower right')
 a[2].set_title('OASIS')
 plt.rcParams.update({'font.size':40})
 plt.tight_layout()
 # In[ ]:
 f.savefig('final_plots/all_plots_incremental learning.svg')
 # In[ ]:
@@ -0,0 +1,487 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 from google.colab import drive 
 drive.mount('/gdrive',force_remount = True)
 # In[ ]:
 get_ipython().system('pip install mne')
 # In[ ]:
 get_ipython().system('pip install autoreject')
 # In[ ]:
 import numpy as np
 import mne
 import autoreject 
 from scipy.stats import pearsonr
 import pickle
 # In[ ]:
 get_ipython().run_line_magic('cd', '/gdrive/MyDrive/emotion_recognition_project/')
 # In[ ]:
 class preprocessing:
    '''
    Load the data here, store the paramters
    '''
    def __init__(self,name):
        self.name = name #name of dataset
        self.X = None
        self.Y = None
        self.Z = None
        self.gyroscope = None
        self.freq = None #(in Hz) is same for all datasets
        self.channels = None
        self.ch_type = 'eeg'
    def load_arrays(self):
        '''
          loads arrays in object variables of the form 
          X: trials x channels x timepoints, using reshape method at the end
          Y: trials x (valence,arousal)
          Z: trials x participant no
        '''
        if self.name == 'DREAMER':
            array = np.load('original_data/DREAMER.npz')
            self.freq = 128
            self.channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
        if self.name == 'DEAP':
            array = np.load('original_data/DEAP.npz')
            self.freq = 128
            self.channels = ['F1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2', 'hEOG','vEOG', 'zEMG','tEMG','GSR','Respiration belt','Plethysmograph','Temperature'] 
        if self.name == 'OASIS':
            array = np.load('original_data/OASIS.npz')
            self.channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
            self.freq = 128
        self.X = array['X']
        if self.name == 'DEAP':
            self.X = self.X[:,:,:32]
            self.channels = self.channels[:32]
        if self.name == 'OASIS':
            self.gyroscope = array['gyroscope']
        self.Y = array['Y']
        self.Z = array['Z']
        self.reshape_data()
    def reshape_data(self):
      '''
      exchanges last two dimensions of data
      '''
        (a,b,c) = self.X.shape 
        self.X = np.reshape(self.X,(a,c,b)) 
 # In[ ]:
 class filters():
  '''
  define filters to be used for preprocessing 
  '''
    @staticmethod
    def notch_filter(data,sfreq,notch_freq):
        # parameters :-
                    # data :- EEG data
                    # sfreq :- sampling frequency
                    # notch_freq :- frequency of the notch filter (generally 50Hz due to the AC current frequency)
        return  mne.filter.notch_filter(data,sfreq,np.arange(notch_freq,notch_freq+1,1))
    @staticmethod
    def butterworth_filter(data,sfreq,lfreq,hfreq):
        # parameters :-
                    # data :- EEG data
                    # sfreq :- sampling frequency
                    # lfreq :- low pass frequency value
                    #hfreq :- high pass frequency value
        return mne.filter.filter_data(data  = data,sfreq = sfreq,l_freq = lfreq,h_freq = hfreq,method = 'iir',verbose = False)
 # In[ ]:
 class referencing():
  '''
  referencing electrodes to some value
  '''
    @staticmethod
    def average(data):
    '''
    Computes average voltage  of all channels for a particular trial and a particular timepoint, and subtracts average value from all channels 
    '''
        temp = data
        avg = np.average(temp,axis=1)
        avg = np.expand_dims(avg,axis=1)
        return temp-avg
 # In[ ]:
 class autoreject_custom:
  '''
  Run Auotoreject algorithm here for artifact rejections
  '''
  #make epoch object
    @staticmethod
    def raw_object_creation(raw_data,channel_name,ch_types,sfreq):
    '''
    defining parameters for creation of raw object  which will be used for creating an epoch object
    retutns raw object after setting parameters
    '''
    # parameters :-
                # raw_data :- EEG data
                # channel_name :- Names of the channels of EEG data used
                # ch_types :- Whether each channel is EEG/Gyro, etc
                # sfreq :- sampling frequency
        montage = mne.channels.make_standard_montage('standard_1020')
    #creating a info object to create epochs later and setting its montage to 12-20 system
        info = mne.create_info(ch_names=channel_name,sfreq=sfreq,ch_types = ch_types,verbose = False)
    #create raw object directly from array
        raw_object = mne.io.RawArray(data = raw_data,info = info,verbose = False)
    #setting montage
        raw_object.set_montage(montage)
        return raw_object
    @staticmethod
    def epoch_object_creation(raw_object,start=0,duration=1,tmin=0,tmax=0.99):
    '''
    making an epoch object which will be used for autoreject algorithm
    '''
    #creating fixed length events
        events = mne.make_fixed_length_events(raw_object,id=1,start=0,duration = duration)
    #creating an epoch object
        epoch_object = mne.Epochs(raw_object,events = events,preload=True,baseline = None,reject=None,verbose=False,tmin=0,tmax=0.99)
        return epoch_object
    @staticmethod
    def autoreject_algo(epoch_object,n_interpolates,consensus_percs):
    '''
    cleans the epochs,and returns cleaned epochs,rejecting bad epochs based on optimal parameters calculation
    n_interpolates are the ρ values that we would like autoreject to try and consensus_percs are the κ values that autoreject will try
    Epochs with more than κ∗N sensors (N total sensors) bad are dropped
    '''
        ar = autoreject.AutoReject(n_interpolates, consensus_percs, random_state=42,verbose = 'tqdm_notebook',cv=4,n_jobs=10)
    #fitting autoreject model to epoch data
        ar.fit(epoch_object)
        epochs_clean = ar.transform(epoch_object)
        evoked_clean = epochs_clean.average()
        evoked = epoch_object.average()
        return epochs_clean,ar.get_reject_log(epoch_object)
 # In[ ]:
 class source_decomposition():
    @staticmethod
    def ica(data,channels,ch_type,sfreq):
    # parameters :-
                # data :- EEG data
                # channels :- Names of the channels of EEG data used
                # ch_types :- Whether each channel is EEG/Gyro, etc
                # sfreq :- sampling frequency        
    #defining ICA parameters
        raw = autoreject_custom.raw_object_creation(data,channels,ch_type,sfreq)
        ica = mne.preprocessing.ICA(method='infomax',n_components=14)
        ica.fit_params['max_iter'] =300
        ica.fit(raw,verbose=False)
        return ica.get_sources(raw).get_data(),ica.mixing_matrix_
 # In[ ]:
 def process_trial(a,acc_x,acc_y,acc_z):
  '''
  a are the source signals obtained after decomposition
  acc_<> are accelerometer readings in respective axis
  '''
 # parameters :-
            # a :- EEG source signal after ICA
            # acc_x :- accelerometer channel along X axis
            # acc_y :- accelerometer channel along Y axis
            # acc_y :- accelerometer channel along Z axis
 #pearson co-eff between each source signal,and accelerometer readings
    pcoeff_arr = np.zeros((a.shape[0],3))#array will record p_coeff for each source with x,y,z accelermeter readings
    for i in  range(a.shape[0]):
        source = a[i] #extracting particular source
        #calculating pearson co-relation coeff between particular source each of accelerometer axis readings 
        r_x,_ = pearsonr(source,acc_x) 
        r_y,_ = pearsonr(source,acc_y)
        r_z,_ = pearsonr(source,acc_z)
        pcoeff_arr[i,0] = r_x
        pcoeff_arr[i,1] = r_y
        pcoeff_arr[i,2] = r_z
    #print('############')
  #calculating mean ,std deviation of pearson co-eff for all sources for each axis i.e X,Y,Z
    mean = np.mean(pcoeff_arr,axis = 0)
    std = np.std(pcoeff_arr,axis = 0)
    error = mean + 2 * std
  #calculating which sources differ have pearson co-eff of atleast one axis greater than 2 standard deviation from mean
    bad_source_index = []
    for i in range(pcoeff_arr.shape[0]):
        if pcoeff_arr[i,0] > error[0] or pcoeff_arr[i,1] > error[1] or pcoeff_arr[i,2] > error[2]:
            bad_source_index.append(i)
  #correcting bad sources by butterworth filter by high pass 3Hz frequency as motion artifacts are said to exist in low power frequencies
    for index in bad_source_index:
        source_to_be_filtered = a[index]
        a[index] = filters.butterworth_filter(source_to_be_filtered,dataset.freq,3,None)#high pass filter 3Hz
    return a #return corrected source signals
 # In[ ]:
 #loading dataset arrays
 dataset = preprocessing('OASIS')
 dataset.load_arrays()
 dataset.gyroscope.shape
 # In[ ]:
 #referencing electrodes  to average value method
 average_data = referencing.average(dataset.X)
 # In[ ]:
 #running butterworth filter (bandpass filter)
 filtered_data = filters.notch_filter(average_data,dataset.freq,60)#butterworth_filter(average_data,dataset.freq,0.1,40)
 # In[ ]:
 no_of_trials = dataset.X.shape[0]
 (a,b,c) = dataset.gyroscope.shape
 gyroscope_trials = np.reshape (dataset.gyroscope,(a,c,b))# reshaping trials so they are of the shape trials x channels x timepoints
 #iterating over all trials and correcting trial data for motion artifact
 for trial_n in range(no_of_trials):
    print('processing trial no:',trial_n+1)
    trial_data = filtered_data[trial_n]
    gyroscope_trial  = gyroscope_trials[0,4:,:] #only acclerometer values extracted for a particular trial
    gyroscope_trial_x = gyroscope_trial[0] # accelerometer x axis reading
    gyroscope_trial_y = gyroscope_trial[1] # accelerometer y axis reading
    gyroscope_trial_z = gyroscope_trial[2] # accelerometer z axis reading
    source_signals,mixing_matrix = source_decomposition.ica(trial_data,dataset.channels,dataset.ch_type,dataset.freq)
    corrected_sources = process_trial(source_signals,gyroscope_trial_x,gyroscope_trial_y,gyroscope_trial_z)
  #corrected sources are projected back into orignal dimensional space of EEG data using mixing matrix
    project_back = np.matmul(mixing_matrix,corrected_sources)
    filtered_data[trial_n] = project_back
 # In[ ]:
 filtered_data.shape
 # In[ ]:
 no_of_trials = dataset.X.shape[0]
 '''
 dictionary contains information about each trial
 each trial number i is mapped to a list containing the cleaned epochs given by autoreject,boolena array indicating which epoch was dropped,and 
 a percentage indicating epochs dropped out of total, valence ,arousal rating for  trial and image_id
 '''
 #running autoreject for each trial data
 '''
 autoreject divides each trial data into 5 epochs of 1 sec segment i.e 640 timepoints into 128 timepoints per epochs,and runs algo on each
 epoch,rejecting epochs based on estimated parameters
 '''
 clean_epochs ={}
 for trial in range(no_of_trials):
    print('trial no',trial)
    temp = filtered_data[trial]
    raw_object = autoreject_custom.raw_object_creation(temp,dataset.channels,dataset.ch_type,dataset.freq)
    print(raw_object.get_data().shape)
    epoch = autoreject_custom.epoch_object_creation(raw_object)
    print(epoch.get_data().shape)
  #print('epochs shape',epoch.get_data().shape)
    clean_epoch,reject_log = autoreject_custom.autoreject_algo(epoch,n_interpolates = np.array([1, 4, 32]),consensus_percs = np.linspace(0, 1.0, 11))
  #clean_epochs.append([clean_epoch,reject_log])
    if clean_epoch.drop_log_stats() == 0:
        clean_epochs[trial+1] = [clean_epoch.get_data(),reject_log.bad_epochs,clean_epoch.drop_log_stats(),dataset.Y[trial],dataset.Z[trial][1]]
 # In[ ]:
 def driver_code():
  #load dataset
    dataset_dict = {0:'DEAP',1:'OASIS',2:'DREAMER'}
    print(dataset_dict)
    print('enter dataset mapping number you want to use')
    mapping = int(input())
    dataset = preprocessing(dataset_dict[mapping])
    dataset.load_arrays()
  #referencing
    print('next step in preprocessing is referencing')
    referencing_dict = {1:'average_referencing'}
    print(referencing_dict)
    print('enter referencing method')
    mapping = int(input())
    if mapping ==1 :
        averaged_data = referencing.average(dataset.X)
    print('next step is applying filters')
    filter_dict = {1:'notch_filter',2:"butter_worth_filter"}
  #filtering
    applyed_filters = False
    while applyed_filters == False:
        print(filter_dict)
        mapping = int(input())
        print('sampling frequency of dataset is',dataset.freq)
        if mapping == 1 :
            print('enter notch frequency')
            notch_freq = float(input())
            filtered_data = filters.notch_filter(averaged_data,dataset.freq,notch_freq)
        if mapping == 2:
            print('enter lower frequency')
            lfreq = float(input())
            print('enter higher frequency')
            hfreq = float(input())
            filtered_data = filters.butterworth_filter(dataset.X,dataset.freq,lfreq,hfreq)
    print('Do you want to apply filters again?enter y/n')
    boolean = input()
    if boolean == 'n':
        applyed_filters = True
    print('do you want to save the data preprocessed so far?y/n')
    boolean = input()
    if boolean == 'y':
        filename = input('enter filename to save as')
        np.savez('preprocessed_data/'+dataset.name.lower()+'/'+filename,X = dataset.X,Y = dataset.Y)
  #if motion artifact correction using gyrscopic data if dataset is oasis
    if dataset.name == 'OASIS':
        print('do you want to use motion artifact removal using gyroscopic data? y/n')
        boolean = input()
        if boolean == 'y':
            no_of_trials = dataset.X.shape[0]
            print('shape of gyroscope data before reshaping is:',dataset.gyroscope.shape)
            (a,b,c) = dataset.gyroscope.shape
            gyroscope_trials = np.reshape (dataset.gyroscope,(a,c,b))# reshaping trials so they are of the shape trials x channels x timepoints
      #iterating over all trials and correcting trial data for motion artifact
            for trial_n in range(no_of_trials):
                print('processing trial no:',trial_n+1)
                trial_data = filtered_data[trial_n]
                gyroscope_trial  = gyroscope_trials[trial_n,:,:] #only acclerometer values extracted for a particular trial
                gyroscope_trial_x = gyroscope_trial[0] # accelerometer x axis reading
                gyroscope_trial_y = gyroscope_trial[1] # accelerometer y axis reading
                gyroscope_trial_z = gyroscope_trial[2] # accelerometer z axis reading
                source_signals,mixing_matrix = source_decomposition.ica(trial_data,dataset.channels,dataset.ch_type,dataset.freq)
                corrected_sources = process_trial(source_signals,gyroscope_trial_x,gyroscope_trial_y,gyroscope_trial_z)
                #corrected sources are projected back into orignal dimensional space of EEG data using mixing matrix
                project_back = np.matmul(mixing_matrix,corrected_sources)
                filtered_data[trial_n] = project_back
        print(filtered_data.shape)
    print('do you want to save the data preprocessed so far?y/n')
    boolean = input()
    if boolean == 'y':
        filename = input('enter filename to save as')
        np.savez('preprocessed_data/'+dataset.name.lower()+'/'+filename,X = dataset.X,Y = dataset.Y)
    if dataset.name == 'OASIS':
        print('do you want to use autoreject? y/n')
        boolean = input()
        if boolean == 'y':
            print('do you want to save this autoreject cleaned data? y/n')
            boolean = input()
            no_of_trials = dataset.X.shape[0]
      '''
      dictionary contains information about each trial
      each trial number i is mapped to a list containing the cleaned epochs given by autoreject,boolena array indicating which epoch was dropped,and 
      a percentage indicating epochs dropped out of total, valence ,arousal rating for  trial and image_id
      '''
      #running autoreject for each trial data
      '''
      autoreject divides each trial data into 5 epochs of 1 sec segment i.e 640 timepoints into 128 timepoints per epochs,and runs algo on each
      epoch,rejecting epochs based on estimated parameters
      '''
        clean_epochs ={}
        for trial in range(no_of_trials):
            print('trial no',trial)
            temp = filtered_data[trial]
            raw_object = autoreject_custom.raw_object_creation(temp,dataset.channels,dataset.ch_type,dataset.freq)
            print(raw_object.get_data().shape)
            epoch = autoreject_custom.epoch_object_creation(raw_object)
            print(epoch.get_data().shape)
            #print('epochs shape',epoch.get_data().shape)
            clean_epoch,reject_log = autoreject_custom.autoreject_algo(epoch,n_interpolates = np.array([1, 4, 32]),consensus_percs = np.linspace(0, 1.0, 11))
            #clean_epochs.append([clean_epoch,reject_log])
            if clean_epoch.drop_log_stats() == 0:
                clean_epochs[trial+1] = [clean_epoch.get_data(),reject_log.bad_epochs,clean_epoch.drop_log_stats(),dataset.Y[trial],dataset.Z[trial][1]]
        if boolean == 'y':
            with open('preprocessed_data/oasis/with_autoreject.p','wb') as file:
            pickle.dump(clean_epochs,file,protocol=pickle.HIGHEST_PROTOCOL)
 # In[ ]:
 def __main__():
    driver_code()
 # In[ ]:
 __main__()
@@ -0,0 +1,106 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 from google.colab import drive
 drive.mount('/gdrive',force_remount = True)
 # In[ ]:
 get_ipython().run_line_magic('cd', '../gdrive/MyDrive/emotion_recognition_project/')
 # # Incremental Learning for OASIS
 # **Arguments**
 # 
 # 
 # ---
 # 
 # save = 'y/n'
 # 
 # ---
 # 
 # 
 # 
 # 
 # 
 # Eg. if you want to run model on OASIS dataset,don't want to save plots, with 
 # command would be 
 # 
 # !python incremental_learning_OASIS.py n
 # 
 # 
 # 
 # 
 # 
 # In[ ]:
 get_ipython().system('python incremental_learning_oasis.py n')
 # 
 # # Incremental Learning for DEAP
 # **Arguments**
 # 
 # 
 # ---
 # 
 # save = 'y/n'
 # 
 # ---
 # 
 # 
 # 
 # 
 # 
 # Eg. if you want to run model on DEAP dataset,don't want to save plots, with 
 # command would be 
 # 
 # !python incremental_learning_DEAP.py n
 # In[ ]:
 get_ipython().system('python incremental_learning_deap.py n ')
 # 
 # # Incremental Learning for DREAMER
 # **Arguments**
 # 
 # 
 # ---
 # 
 # save = 'y/n'
 # 
 # ---
 # 
 # 
 # 
 # 
 # 
 # Eg. if you want to run model on DEAP dataset,don't want to save plots, with 
 # command would be 
 # 
 # !python incremental_learning_DREAMER.py n
 # In[ ]:
 get_ipython().system('python incremental_learning_dreamer.py n ')
 # In[ ]:
@@ -0,0 +1,46 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 # -*- coding: utf-8 -*-
 """utils.ipynb
 Automatically generated by Colaboratory.
 Original file is located at
    https://colab.research.google.com/drive/1Z2e7rxy64W9WIIcEfH1vyzfVdMNIK8Om
 """
 import numpy as np
 def epoch_data(X,Y, window, stride, sfreq):
    # Fucntion to segment the dataset into epochs
    # Parameters :-
                # X :- The input EEG signal in the format of channels*timepoints*trials
                # Y :- The values for VALD (depending on the dataset) given by the users
                # window :- length of the epoch in seconds
                # stride :- stride of the sliding window in seconds
                # sfreq :- sampling frequency of the EEG signal
    (channels,timepoints,trials )= X.shape
    X = np.reshape(X,(trials,channels,timepoints)) 
    segment = int(window*sfreq)
    step = int(stride*sfreq)
    epochPerTrial = int((timepoints-segment)/step + 1)
    count = 0
    X_new = np.empty((trials*epochPerTrial,channels,segment))
    Y_new = np.empty((trials*epochPerTrial,2))
    for trial in range(trials):
        for epoch in range(epochPerTrial):
            X_new[count,:,:] = X[trial,:,epoch*step:(epoch*step)+segment]
            Y_new[count,:] = Y[trial,:2]
            count+=1
    (trials,channels,timepoints) = X_new.shape
    X_new = np.reshape(X_new,(channels,timepoints,trials))
    return X_new,Y_new