V3 [Frontiers revision]
Esse commit está contido em:
@@ -273,7 +273,6 @@ def hjorthParameters(xV):
|
||||
# in the matlab code for hjorth complexity subtraction by mob not division was used
|
||||
return mobility, complexity
|
||||
|
||||
##########
|
||||
# false nearest neighbor descriptor
|
||||
def falseNearestNeighbor(eegData, fast=True):
|
||||
# Average Mutual Information
|
||||
@@ -293,18 +292,18 @@ def falseNearestNeighbor(eegData, fast=True):
|
||||
else:
|
||||
cur_eegData = eegData[chan, :, epoch]
|
||||
lagidx = 0 # we are looking for the index of the lag that makes the signal maximally uncorrelated to the original
|
||||
minNMI = 1 # normed_mutual_info is from 1 (perfectly correlated) to 0 (not at all correlated)
|
||||
for lag in range(1, max_delay):
|
||||
x = cur_eegData[:-lag]
|
||||
xlag = cur_eegData[lag:]
|
||||
convert float data into histogram bins
|
||||
nbins = int(np.floor(1 + np.log2(len(x)) + 0.5))
|
||||
x_discrete = np.histogram(x, bins=nbins)[0]
|
||||
xlag_discrete = np.histogram(xlag, bins=nbins)[0]
|
||||
cNMI = normed_mutual_info(x_discrete, xlag_discrete)
|
||||
if cNMI < minNMI:
|
||||
minNMI = cNMI
|
||||
lagidx = lag
|
||||
# # minNMI = 1 # normed_mutual_info is from 1 (perfectly correlated) to 0 (not at all correlated)
|
||||
# # for lag in range(1, max_delay):
|
||||
# # x = cur_eegData[:-lag]
|
||||
# # xlag = cur_eegData[lag:]
|
||||
# # # convert float data into histogram bins
|
||||
# # nbins = int(np.floor(1 + np.log2(len(x)) + 0.5))
|
||||
# # x_discrete = np.histogram(x, bins=nbins)[0]
|
||||
# # xlag_discrete = np.histogram(xlag, bins=nbins)[0]
|
||||
# # cNMI = normed_mutual_info(x_discrete, xlag_discrete)
|
||||
# # if cNMI < minNMI:
|
||||
# # minNMI = cNMI
|
||||
# # lagidx = lag
|
||||
# nearest neighbors part
|
||||
knn = int(max(2, 6*lagidx)) # heuristic (number of nearest neighbors to look up)
|
||||
m = 1 # lagidx + 1
|
||||
|
||||
@@ -40,12 +40,18 @@ import copy
|
||||
from sklearn import feature_selection
|
||||
import argparse
|
||||
|
||||
import cuml
|
||||
from cuml.svm import SVR
|
||||
from cuml.ensemble import RandomForestRegressor
|
||||
from cuml.svm import SVC
|
||||
from cuml.ensemble import RandomForestClassifier
|
||||
from cuml.metrics import accuracy_score
|
||||
from sklearn.svm import SVR
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
# import cuml
|
||||
# from cuml.svm import SVR
|
||||
# from cuml.ensemble import RandomForestRegressor
|
||||
# from cuml.svm import SVC
|
||||
# from cuml.ensemble import RandomForestClassifier
|
||||
# from cuml.metrics import accuracy_score
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
@@ -1,735 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
from ImportUtils import *
|
||||
from sklearn.model_selection import ParameterGrid
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
from sklearn.feature_selection import chi2
|
||||
from sklearn.feature_selection import SelectKBest, f_classif
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
from sklearn.ensemble import RandomForestRegressor as sklearnrfi
|
||||
|
||||
import os
|
||||
import glob
|
||||
from scipy import io,signal
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn import preprocessing
|
||||
import pickle
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.impute import SimpleImputer
|
||||
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
# %matplotlib inline
|
||||
import seaborn as sns
|
||||
import copy
|
||||
|
||||
def topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
|
||||
'''
|
||||
Ranks of features according to rmse computed by regressor passed in clf
|
||||
Plots electrode v/s rmse graph
|
||||
|
||||
'''
|
||||
# parameters :-
|
||||
# dataset - name of the dataset
|
||||
# window - length of the sliding window in seconds
|
||||
# stride - length of the stride of the sliding window in seconds
|
||||
# sfreq - sampling frequency of the EEG data
|
||||
# clf - name of the classifier to be used
|
||||
# label - valence/arousal/dominance/liking label (shape depends upon the dataset) in an enumerated form (0- valence ; 1-arousal ; 2- like; 3-dominance)
|
||||
# scale - sclaing of the EEG data if required
|
||||
|
||||
# returns :-
|
||||
# void
|
||||
|
||||
pwd = os.getcwd()
|
||||
|
||||
#load extracted features
|
||||
#####################################################################################################################################################
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||
|
||||
rmseList = []
|
||||
electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||
fs = sfreq
|
||||
pwd = os.getcwd()
|
||||
featuresDict = loadFeaturesDict(dataset)
|
||||
asm_features = ['dasm_delta', 'dasm_theta', 'dasm_alpha', 'dasm_beta', 'dasm_gamma', 'rasm_delta', 'rasm_theta', 'rasm_alpha', 'rasm_beta', 'rasm_gamma']
|
||||
for asm in asm_features:
|
||||
featuresDict.pop(asm)
|
||||
|
||||
common = []
|
||||
with open('intersection.pkl', 'rb') as f:
|
||||
common = pickle.load(f)
|
||||
|
||||
for k in list(featuresDict.keys()):
|
||||
if k not in common:
|
||||
# pop out common feature
|
||||
featuresDict.pop(k)
|
||||
|
||||
selectFeatures = list(featuresDict.keys())
|
||||
y = Y_epoch[:,label] #valence
|
||||
#####################################################################################################################################################
|
||||
|
||||
for electrode in range(14):
|
||||
# Load FeaturesDict from memory
|
||||
|
||||
|
||||
print("Number of segments are: {}".format(ans.shape[1]))
|
||||
|
||||
featureMatrix = np.empty((len(selectFeatures),ans.shape[1])) #[14*32 + 1,80640]
|
||||
i=0
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix[i,:] = value[electrode,:]
|
||||
i = i+1
|
||||
|
||||
print(featureMatrix.T.shape)
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
|
||||
#Impute NaN values with zero
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
#Name Feature vector columns
|
||||
feature_channel_index = []
|
||||
for feature in selectFeatures:
|
||||
feature_channel_index.append(feature + str(electrode))
|
||||
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index))) #debug
|
||||
|
||||
#Preparing dataset from feature matrix
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X.columns = feature_channel_index
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
|
||||
|
||||
print("Features Ready for undergoing selection tests done ...\n")
|
||||
|
||||
# Perform train_test_split to get training and test data
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
rmseList.append(rmse)
|
||||
|
||||
|
||||
#rank electrodes based on RMSE computed by the classifier
|
||||
electrode_df = pd.DataFrame(electrodeList)
|
||||
rmse_df = pd.DataFrame(rmseList)
|
||||
#concat two dataframes for better visualization
|
||||
electrodeRanking = pd.concat([electrode_df, rmse_df],axis=1)
|
||||
electrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns
|
||||
features_result = electrodeRanking.sort_values('RMSE')
|
||||
print(features_result)
|
||||
# return features_result
|
||||
|
||||
##################################################################################
|
||||
N = features_result.shape[0]
|
||||
topRmseList = []
|
||||
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||
|
||||
|
||||
for n in range(1,N+1):
|
||||
|
||||
|
||||
topnelectrodes = features_result.head(n)
|
||||
electrode_index = topnelectrodes.index
|
||||
electrode_index = list(electrode_index)[:n]
|
||||
|
||||
# X-Values
|
||||
featureMatrix = np.empty((len(selectFeatures)*len(electrode_index),ans.shape[1]))
|
||||
|
||||
i = 0
|
||||
for index in electrode_index:
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix[i,:] = value[index,:]
|
||||
i = i+1
|
||||
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
print(featureMatrix.T.shape)
|
||||
|
||||
# Removing NaN Values
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
# Name Feature vector columns
|
||||
feature_channel_index = []
|
||||
for index in electrode_index:
|
||||
for feature in selectFeatures:
|
||||
feature_channel_index.append(feature + str(index))
|
||||
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X.columns = feature_channel_index
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
|
||||
|
||||
print("Features Ready for undergoing selection tests done ...\n")
|
||||
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
|
||||
search_method = "tpot"
|
||||
best_clf = None
|
||||
if(search_method == "bayes_sk_opt"):
|
||||
|
||||
# BayesCV scikit opt
|
||||
search_space = {"bootstrap": Categorical([True, False]), # values for boostrap can be either True or False
|
||||
"max_depth": Integer(6, 20), # values of max_depth are integers from 6 to 20
|
||||
"max_features": Categorical(['auto', 'sqrt','log2']),
|
||||
"min_samples_leaf": Integer(2, 10),
|
||||
"min_samples_split": Integer(2, 10),
|
||||
"n_estimators": Integer(100, 500)
|
||||
}
|
||||
|
||||
forest_bayes_search = BayesSearchCV(clf, search_space, n_iter=32, cv=5)
|
||||
print(forest_bayes_search)
|
||||
print(forest_bayes_search.fit(X_train, y_train))
|
||||
print("Best Parameters are: ", forest_bayes_search.best_params_)
|
||||
best_clf = forest_bayes_search.best_estimator_
|
||||
|
||||
elif(search_method =="random_grid_search"):
|
||||
print("Random Search followed by GridSearch initiated!\n");
|
||||
#RandomSearchCV followed by GridSearchCV
|
||||
random_grid = {'n_estimators': [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)],
|
||||
'max_features': ['auto', 'sqrt','log2'],
|
||||
'max_depth': [int(x) for x in np.linspace(10, 1000,10)],
|
||||
'min_samples_split': [2, 5, 10,14],
|
||||
'min_samples_leaf': [1, 2, 4,6,8],
|
||||
}
|
||||
rf_randomcv=RandomizedSearchCV(estimator=clf,param_distributions=random_grid,n_iter=100,cv=5,verbose=2,random_state=100)
|
||||
print(rf_randomcv.fit(X_train, y_train))
|
||||
print("Best Parameters for RandomSearchCV are: ", rf_randomcv.best_params_)
|
||||
print("RMSE with RandomSearchCV is :",mean_squared_error(y_test, rf_randomcv.best_estimator_.predict(X_test),squared=False));
|
||||
|
||||
param_grid = {
|
||||
'max_depth': [rf_randomcv.best_params_['max_depth']],
|
||||
'max_features': [rf_randomcv.best_params_['max_features']],
|
||||
'min_samples_leaf': [rf_randomcv.best_params_['min_samples_leaf'],
|
||||
rf_randomcv.best_params_['min_samples_leaf']+2,
|
||||
rf_randomcv.best_params_['min_samples_leaf'] + 4],
|
||||
'min_samples_split': [rf_randomcv.best_params_['min_samples_split'] - 2,
|
||||
rf_randomcv.best_params_['min_samples_split'] - 1,
|
||||
rf_randomcv.best_params_['min_samples_split'],
|
||||
rf_randomcv.best_params_['min_samples_split'] +1,
|
||||
rf_randomcv.best_params_['min_samples_split'] + 2],
|
||||
'n_estimators': [rf_randomcv.best_params_['n_estimators'] - 200, rf_randomcv.best_params_['n_estimators'] - 100,
|
||||
rf_randomcv.best_params_['n_estimators'],
|
||||
rf_randomcv.best_params_['n_estimators'] + 100, rf_randomcv.best_params_['n_estimators'] + 200]
|
||||
}
|
||||
|
||||
grid_search=GridSearchCV(estimator=rf,param_grid=param_grid,cv=10, verbose=5)
|
||||
grid_search.fit(X_train,y_train)
|
||||
best_clf = rf_randomcv.best_estimator_
|
||||
elif search_method =="manual_search":
|
||||
min_rmse = 1000
|
||||
best_clf = clf
|
||||
min_params = None
|
||||
# 2*3*3*3*3
|
||||
param_grid = {'n_estimators': [50, 100],
|
||||
'max_features': ['auto'],
|
||||
'max_depth': [2, 10, 100],
|
||||
'min_samples_split': [2, 5, 10],
|
||||
'min_samples_leaf': [1, 2, 8],
|
||||
}
|
||||
|
||||
param_grid = ParameterGrid(param_grid)
|
||||
for params in param_grid:
|
||||
print("Current Parameters : ", params)
|
||||
temp_clf = RandomForestRegressor( max_features = params['max_features'], min_samples_leaf = params['min_samples_leaf'], min_samples_split = params['min_samples_split'], n_estimators = params['n_estimators'],max_depth = params['max_depth']);
|
||||
temp_clf.fit(X_train,y_train)
|
||||
y_predict = temp_clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("Current RMSE with above params : ", rmse)
|
||||
if(min_rmse > rmse):
|
||||
min_rmse = rmse;
|
||||
best_clf = temp_clf;
|
||||
min_params = params;
|
||||
|
||||
print("Best Params for parameter search are : \n", min_params)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,min_rmse))
|
||||
topRmseList.append(min_rmse)
|
||||
elif search_method == "tpot":
|
||||
from tpot import TPOTRegressor;
|
||||
# TPOT setup
|
||||
GENERATIONS = 5
|
||||
POP_SIZE = 100
|
||||
CV = 5
|
||||
SEED = 42
|
||||
|
||||
tpot = TPOTRegressor(
|
||||
generations=GENERATIONS,
|
||||
population_size=POP_SIZE,
|
||||
random_state=SEED,
|
||||
config_dict="TPOT cuML",
|
||||
n_jobs=1, # cuML requires n_jobs=1
|
||||
cv=CV,
|
||||
verbosity=2,
|
||||
)
|
||||
|
||||
tpot.fit(X_train, y_train)
|
||||
|
||||
y_predict = tpot.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
topRmseList.append(rmse)
|
||||
|
||||
|
||||
else:
|
||||
best_clf = clf
|
||||
best_clf.fit(X_train,y_train)
|
||||
|
||||
|
||||
if search_method != "manual_search" and search_method != "tpot":
|
||||
y_predict = best_clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
topRmseList.append(rmse)
|
||||
|
||||
|
||||
topNElectrode_df = pd.DataFrame(topNList)
|
||||
topNRmse_df = pd.DataFrame(topRmseList)
|
||||
#concat two dataframes for better visualization
|
||||
topNElectrodeRanking = pd.concat([topNElectrode_df, topNRmse_df],axis=1)
|
||||
topNElectrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns
|
||||
print(topNElectrodeRanking)
|
||||
|
||||
# Plotting
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(20, 10)
|
||||
plt.rcParams.update({'font.size': 30})
|
||||
plt.xlabel('Top N Electrodes')
|
||||
plt.ylabel('RMSE')
|
||||
plt.plot(topNElectrodeRanking.loc[:,"Electrode"], topNElectrodeRanking.loc[:,"RMSE"])
|
||||
plt.tight_layout()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
|
||||
'''
|
||||
Ranks of features according to rmse computed by regressor passed in clf
|
||||
Plots electrode v/s rmse graph
|
||||
|
||||
'''
|
||||
# parameters :-
|
||||
# dataset - name of the dataset
|
||||
# window - length of the sliding window in seconds
|
||||
# stride - length of the stride of the sliding window in seconds
|
||||
# sfreq - sampling frequency of the EEG data
|
||||
# clf - name of the classifier to be used
|
||||
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||
# scale - sclaing of the EEG data if required
|
||||
|
||||
# returns :-
|
||||
# void
|
||||
fs = sfreq
|
||||
pwd = os.getcwd()
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||
print("Number of segments are: {}".format(ans.shape[1]))
|
||||
|
||||
featuresDict = None
|
||||
featuresDict = loadFeaturesDict(dataset)
|
||||
|
||||
common = []
|
||||
with open('intersection.pkl', 'rb') as f:
|
||||
common = pickle.load(f)
|
||||
|
||||
for k in list(featuresDict.keys()):
|
||||
if k not in common:
|
||||
# pop out common feature
|
||||
featuresDict.pop(k)
|
||||
|
||||
featuresList = list(featuresDict.keys())
|
||||
|
||||
y = Y_epoch[:,label] #valence
|
||||
|
||||
|
||||
rmseList = []
|
||||
|
||||
####################################################################
|
||||
#modify featuresList
|
||||
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||
|
||||
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
featureMatrix = featureMatrix.astype('float64')
|
||||
|
||||
|
||||
feature_channel_index = []
|
||||
for feature in featuresList:
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
if(i>=10):
|
||||
feature_channel_index.append(feature + str(i))
|
||||
else:
|
||||
feature_channel_index.append(feature + '0' + str(i))
|
||||
|
||||
print(len(list(featuresDict.keys())))
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
#Remove Variance = 0 features
|
||||
constant_filter = VarianceThreshold(threshold=0)
|
||||
constant_filter.fit(X)
|
||||
constant_columns = [column for column in X.columns
|
||||
if column not in
|
||||
X.columns[constant_filter.get_support()]]
|
||||
X = constant_filter.transform(X)
|
||||
|
||||
for column in constant_columns:
|
||||
feature_channel_index.remove(column)
|
||||
|
||||
print(len(feature_channel_index),feature_channel_index )
|
||||
|
||||
X = pd.DataFrame(X)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
|
||||
filtered_featuresList = []
|
||||
print(type(X))
|
||||
for col in X.columns:
|
||||
feature = col[:-2]
|
||||
electrode = int(col[-2:])
|
||||
if(feature not in filtered_featuresList):
|
||||
filtered_featuresList.append(feature)
|
||||
|
||||
featuresList = filtered_featuresList
|
||||
|
||||
for feature in featuresList:
|
||||
# Load FeaturesDict from memory
|
||||
|
||||
|
||||
|
||||
featureMatrix = featuresDict[feature]
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
|
||||
|
||||
feature_channel_index = []
|
||||
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
feature_channel_index.append(feature + str(i))
|
||||
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
|
||||
print("Features Ready for undergoing selection tests done ...\n")
|
||||
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
rmseList.append(rmse)
|
||||
|
||||
|
||||
|
||||
features_df = pd.DataFrame(featuresList)
|
||||
rmse_df = pd.DataFrame(rmseList)
|
||||
#concat two dataframes for better visualization
|
||||
featureRanking = pd.concat([features_df, rmse_df],axis=1)
|
||||
featureRanking.columns = ['Feature','RMSE'] #naming the dataframe columns
|
||||
features_result = featureRanking.sort_values('RMSE')
|
||||
features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonFeaturesRegressionRanking" + str(window) + str(stride) + ".csv")
|
||||
print(features_result)
|
||||
|
||||
###########################################
|
||||
N = features_result.shape[0]
|
||||
topNRmseList = []
|
||||
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||
|
||||
|
||||
|
||||
for n in range(1,N+1):
|
||||
|
||||
|
||||
topnfeatures = copy.deepcopy(features_result.head(n))
|
||||
topnfeatures = topnfeatures['Feature'].tolist() #list of feature-names
|
||||
|
||||
# X-Values################################################
|
||||
|
||||
featureMatrix = np.empty((0,ans.shape[1]))
|
||||
|
||||
for feature in topnfeatures:
|
||||
featureMatrix = np.append(featureMatrix, featuresDict[feature], axis=0)
|
||||
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
print(featureMatrix.T.shape)
|
||||
|
||||
feature_channel_index = []
|
||||
for feature in topnfeatures:
|
||||
i=0
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
feature_channel_index.append(feature + str(i))
|
||||
|
||||
|
||||
# Removing NaN Values
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X.columns = feature_channel_index
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
|
||||
|
||||
print("Features Ready for undergoing selection tests done ...\n")
|
||||
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
topNRmseList.append(rmse)
|
||||
|
||||
|
||||
|
||||
topNFeatures_df = pd.DataFrame(topNList)
|
||||
topNRmse_df = pd.DataFrame(topNRmseList)
|
||||
|
||||
#concat two dataframes for better visualization
|
||||
topNFeaturesRanking = pd.concat([topNFeatures_df, topNRmse_df],axis=1)
|
||||
topNFeaturesRanking.columns = ['Feature','RMSE'] #naming the dataframe columns
|
||||
print(topNFeaturesRanking)
|
||||
topNFeaturesRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonFeaturesRegressionRanking" + str(window) + str(stride) + ".csv")
|
||||
|
||||
# Plotting
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(25, 10)
|
||||
plt.rcParams.update({'font.size': 30})
|
||||
plt.xlabel('Top N Features')
|
||||
plt.ylabel('RMSE')
|
||||
plt.plot(topNFeaturesRanking.loc[:,"Feature"], topNFeaturesRanking.loc[:,"RMSE"])
|
||||
plt.tight_layout()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def topFeatureColumnsRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False):
|
||||
|
||||
# parameters :-
|
||||
# dataset - name of the dataset
|
||||
# window - length of the sliding window in seconds
|
||||
# stride - length of the stride of the sliding window in seconds
|
||||
# sfreq - sampling frequency of the EEG data
|
||||
# clf - name of the classifier to be used
|
||||
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||
# scale - sclaing of the EEG data if required
|
||||
|
||||
# returns :-
|
||||
# void
|
||||
|
||||
fs = sfreq
|
||||
pwd = os.getcwd()
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
|
||||
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||
electrodeList = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
|
||||
|
||||
|
||||
print("Number of segments are: {}".format(ans.shape[1]))
|
||||
|
||||
#X##############################################################################################
|
||||
|
||||
featuresDict = None
|
||||
featuresDict = loadFeaturesDict(dataset)
|
||||
|
||||
common = []
|
||||
with open('intersection.pkl', 'rb') as f:
|
||||
common = pickle.load(f)
|
||||
|
||||
for k in list(featuresDict.keys()):
|
||||
if k not in common:
|
||||
# pop out common feature
|
||||
featuresDict.pop(k)
|
||||
|
||||
|
||||
featuresList = list(featuresDict.keys())
|
||||
|
||||
# defining column names
|
||||
feature_channel_index = []
|
||||
|
||||
for feature in featuresList:
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
feature_channel_index.append(feature + str(i))
|
||||
|
||||
#defining feature matrix
|
||||
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||
|
||||
|
||||
print("Shape of FeatureMatrix: {}\n".format(featureMatrix.T.shape))
|
||||
|
||||
#data-imputation and nan-removal
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
|
||||
#Y#####################################################################
|
||||
|
||||
y = Y_epoch[:,label] #valence
|
||||
|
||||
########################################################################
|
||||
rmseList = []
|
||||
|
||||
for col in feature_channel_index:
|
||||
input_df = pd.DataFrame(X[col])
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(input_df, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict, squared=False)
|
||||
rmseList.append(rmse)
|
||||
|
||||
|
||||
|
||||
col_df = pd.DataFrame(feature_channel_index)
|
||||
rmse_df = pd.DataFrame(rmseList)
|
||||
#concat two dataframes for better visualization
|
||||
colRanking = pd.concat([col_df, rmse_df],axis=1)
|
||||
colRanking.columns = ['Column','RMSE'] #naming the dataframe columns
|
||||
features_result = colRanking.sort_values('RMSE')
|
||||
print(features_result)
|
||||
|
||||
|
||||
N = len(feature_channel_index)
|
||||
topNRmseList = []
|
||||
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||
|
||||
for n in range(1, N+1):
|
||||
ranking_df = features_result.head(n)
|
||||
topncols = ranking_df['Column'].tolist()
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X[topncols], y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict, squared=False)
|
||||
topNRmseList.append(rmse)
|
||||
|
||||
|
||||
topcol_df = pd.DataFrame(topNList)
|
||||
toprmse_df = pd.DataFrame(topNRmseList)
|
||||
#concat two dataframes for better visualization
|
||||
topcolRanking = pd.concat([topcol_df, toprmse_df],axis=1)
|
||||
topcolRanking.columns = ['Column','RMSE'] #naming the dataframe columns
|
||||
topfeatures_result = topcolRanking
|
||||
print(topfeatures_result)
|
||||
topfeatures_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "ColumnsRegressionRanking" + str(window) + str(stride) + ".csv")
|
||||
|
||||
|
||||
# Plotting
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(60, 9)
|
||||
plt.xlabel('Top N Columns')
|
||||
plt.ylabel('RMSE')
|
||||
plt.title("Top N Columns v/s RMSE Plot for Window:{} Stride:{} epoched data by varying N".format(window,stride))
|
||||
plt.plot(topfeatures_result.loc[:,"Column"], topfeatures_result.loc[:,"RMSE"])
|
||||
plt.tight_layout()
|
||||
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "topFeatureColumnsRegressionRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
# DATE - 01/11/2022
|
||||
|
||||
# In[ ]:
|
||||
# AUTHOR - ROHIT GARG
|
||||
|
||||
|
||||
from ImportUtils import *
|
||||
|
||||
from sklearn.ensemble import RandomForestRegressor as sklearnrfi
|
||||
from sklearn.feature_selection import VarianceThreshold
|
||||
from sklearn.metrics import r2_score
|
||||
from sklearn.metrics import mean_absolute_error
|
||||
from sklearn.metrics import explained_variance_score
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
|
||||
'''
|
||||
@@ -129,7 +131,7 @@ def topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label,
|
||||
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
|
||||
features_result = featureScores.nlargest(X.shape[1],'Score')
|
||||
print(features_result)
|
||||
features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonElectrodeFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
|
||||
features_result.to_csv(f"output/{dataset}_{label}_electrode_selection.csv")
|
||||
|
||||
|
||||
###################################################################
|
||||
@@ -150,10 +152,14 @@ def topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label,
|
||||
|
||||
N = len(topelectrodes)
|
||||
topRmseList = []
|
||||
topR2List = []
|
||||
topMAEList = []
|
||||
topEVList = []
|
||||
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||
|
||||
|
||||
for n in range(1,N+1):
|
||||
|
||||
|
||||
electrode_index = topelectrodes[:n]
|
||||
print(topelectrodes)
|
||||
@@ -207,8 +213,17 @@ def topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label,
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
score_r2 = r2_score(y_test, y_predict)
|
||||
score_mae = mean_absolute_error(y_test, y_predict)
|
||||
score_ev = explained_variance_score(y_test, y_predict)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
print(f"r2: {score_r2}")
|
||||
print(f"mae: {score_mae}")
|
||||
print(f"ev: {score_ev}")
|
||||
topRmseList.append(rmse)
|
||||
topR2List.append(score_r2)
|
||||
topMAEList.append(score_mae)
|
||||
topEVList.append(score_ev)
|
||||
|
||||
|
||||
|
||||
@@ -217,11 +232,19 @@ def topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label,
|
||||
# features_result = features_result.reset_index()
|
||||
topNElectrode_df = pd.DataFrame(topNList)
|
||||
topNRmse_df = pd.DataFrame(topRmseList)
|
||||
topNR2_df = pd.DataFrame(topR2List)
|
||||
topNMAE_df = pd.DataFrame(topMAEList)
|
||||
topNEV_df = pd.DataFrame(topEVList)
|
||||
|
||||
#concat two dataframes for better visualization
|
||||
topNElectrodeRanking = pd.concat([topNElectrode_df, topNRmse_df],axis=1)
|
||||
topNElectrodeRanking.columns = ['Electrode','RMSE'] #naming the dataframe columns
|
||||
print(topNElectrodeRanking)
|
||||
topNElectrodeRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonElectrodeFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
|
||||
topNElectrode = pd.concat([topNElectrode_df, topNRmse_df, topNR2_df, topNMAE_df, topNEV_df],axis=1)
|
||||
|
||||
topNElectrode.columns = ['Electrode','RMSE', 'R2', 'MAE', 'EV'] #naming the dataframe columns
|
||||
|
||||
print(topNElectrode)
|
||||
|
||||
topNElectrode.to_csv(f"output/{dataset}_{label}_electrode.csv")
|
||||
|
||||
# return features_result
|
||||
|
||||
|
||||
@@ -231,8 +254,10 @@ def topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label,
|
||||
plt.rcParams.update({'font.size': 30})
|
||||
plt.xlabel('Top N Electrodes')
|
||||
plt.ylabel('RMSE')
|
||||
plt.plot(topNElectrodeRanking.loc[:,"Electrode"], topNElectrodeRanking.loc[:,"RMSE"])
|
||||
plt.plot(topNElectrode.loc[:,"Electrode"], topNElectrode.loc[:,"RMSE"])
|
||||
plt.tight_layout()
|
||||
plt.savefig(f"output/{dataset}_{label}_electrode_RMSE.svg")
|
||||
plt.clf()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
@@ -363,7 +388,7 @@ def topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, sc
|
||||
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
|
||||
features_result = featureScores.nlargest(X.shape[1],'Score')
|
||||
print(features_result)
|
||||
features_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "CommonFeatureFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
|
||||
features_result.to_csv(f"output/{dataset}_{label}_feature_selection.csv")
|
||||
|
||||
|
||||
|
||||
@@ -386,8 +411,11 @@ def topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, sc
|
||||
# TOP-N-FEATURE-RANKING
|
||||
print(topfeatures)
|
||||
print(topelectrodes)
|
||||
N = len(topfeatures)
|
||||
N = len(topfeatures)
|
||||
topNRmseList = []
|
||||
topR2List = []
|
||||
topMAEList = []
|
||||
topEVList = []
|
||||
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||
|
||||
|
||||
@@ -441,8 +469,17 @@ def topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, sc
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict,squared=False)
|
||||
score_r2 = r2_score(y_test, y_predict)
|
||||
score_mae = mean_absolute_error(y_test, y_predict)
|
||||
score_ev = explained_variance_score(y_test, y_predict)
|
||||
print("window: {}, stide: {}, rmse: {}".format(window,stride,rmse))
|
||||
print(f"r2: {score_r2}")
|
||||
print(f"mae: {score_mae}")
|
||||
print(f"ev: {score_ev}")
|
||||
topEVList.append(score_ev)
|
||||
topNRmseList.append(rmse)
|
||||
topR2List.append(score_r2)
|
||||
topMAEList.append(score_mae)
|
||||
|
||||
|
||||
|
||||
@@ -450,198 +487,32 @@ def topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, sc
|
||||
topNFeatures_df = pd.DataFrame(topNList)
|
||||
|
||||
topNRmse_df = pd.DataFrame(topNRmseList)
|
||||
topNR2_df = pd.DataFrame(topR2List)
|
||||
topNMAE_df = pd.DataFrame(topMAEList)
|
||||
topNEV_df = pd.DataFrame(topEVList)
|
||||
|
||||
|
||||
#concat two dataframes for better visualization
|
||||
topNFeaturesRanking = pd.concat([topNFeatures_df, topNRmse_df],axis=1)
|
||||
topNFeaturesRanking.columns = ['Feature','RMSE'] #naming the dataframe columns
|
||||
print(topNFeaturesRanking)
|
||||
topNFeaturesRanking.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topCommonFeatureFSRegressionRanking"+ method + str(window) + str(stride) + ".csv")
|
||||
|
||||
topNFeatures = pd.concat([topNFeatures_df, topNRmse_df, topNR2_df, topNMAE_df, topNEV_df],axis=1)
|
||||
|
||||
topNFeatures.columns = ['Feature', 'RMSE', 'R2', 'MAE', 'EV'] #naming the dataframe columns
|
||||
|
||||
print(topNFeatures)
|
||||
|
||||
topNFeatures.to_csv(f"output/{dataset}_{label}_features.csv")
|
||||
|
||||
# Plotting
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(25, 10)
|
||||
plt.rcParams.update({'font.size': 30})
|
||||
plt.xlabel('Top N Features')
|
||||
plt.ylabel('RMSE')
|
||||
plt.plot(topNFeaturesRanking.loc[:,"Feature"], topNFeaturesRanking.loc[:,"RMSE"])
|
||||
plt.plot(topNFeatures.loc[:,"Feature"], topNFeatures.loc[:,"RMSE"])
|
||||
plt.tight_layout()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def topFSColumnsRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest'):
|
||||
# Method C
|
||||
# parameters :-
|
||||
# dataset - name of the dataset
|
||||
# window - length of the sliding window in seconds
|
||||
# stride - length of the stride of the sliding window in seconds
|
||||
# sfreq - sampling frequency of the EEG data
|
||||
# clf - name of the classifier to be used
|
||||
# label - valence/arousal/dominance/liking label (shape depends upon the dataset)
|
||||
# scale - sclaing of the EEG data if required
|
||||
# mutual_info - Mutual ranking between features based on information theory
|
||||
# method - 'RandomForest' 'RFE' 'SelectKBest'
|
||||
|
||||
# returns :-
|
||||
# void
|
||||
fs = sfreq
|
||||
pwd = os.getcwd()
|
||||
|
||||
featurepath = os.getcwd() + '/' + dataset + '/data_extracted/featuresDict/'
|
||||
ans = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['features']
|
||||
Y_epoch = np.load((featurepath + "shannonEntropy_{}_{}.npz").format(window,stride), allow_pickle=True)['Y']
|
||||
|
||||
print("Number of segments are: {}".format(ans.shape[1]))
|
||||
|
||||
#X##############################################################################################
|
||||
|
||||
featuresDict = None
|
||||
featuresDict = loadFeaturesDict(dataset)
|
||||
|
||||
common = []
|
||||
with open('intersection.pkl', 'rb') as f:
|
||||
common = pickle.load(f)
|
||||
|
||||
for k in list(featuresDict.keys()):
|
||||
if k not in common:
|
||||
# pop out common feature
|
||||
featuresDict.pop(k)
|
||||
|
||||
print("Number of Features:",len(list(featuresDict.keys())))
|
||||
featuresList = list(featuresDict.keys())
|
||||
|
||||
feature_channel_index = []
|
||||
|
||||
feature_channel_index = []
|
||||
for feature in featuresList:
|
||||
for i in range(featuresDict[feature].shape[0]):
|
||||
if(i>=10):
|
||||
feature_channel_index.append(feature +'_'+ str(i))
|
||||
else:
|
||||
feature_channel_index.append(feature + '_0' + str(i))
|
||||
|
||||
print(len(list(featuresDict.keys())))
|
||||
print("Number of Feature-Columns: {}\n".format(len(feature_channel_index)))
|
||||
|
||||
#defining feature matrix
|
||||
featureMatrix = np.empty((0,ans.shape[1])) #[14*32 + 1,80640]
|
||||
for key,value in featuresDict.items():
|
||||
featureMatrix = np.append(featureMatrix,value,axis=0)
|
||||
|
||||
|
||||
|
||||
print("Shape of FeatureMatrix: {}\n".format(featureMatrix.T.shape))
|
||||
|
||||
#data-imputation and nan-removal
|
||||
featureMatrix = featureMatrix.astype(np.float32)
|
||||
|
||||
if np.isnan(featureMatrix).any():
|
||||
featureMatrix = np.nan_to_num(featureMatrix,nan=0)
|
||||
|
||||
X = pd.DataFrame(featureMatrix.T)
|
||||
X = X.replace([np.inf, -np.inf], np.nan)
|
||||
X = X.fillna(0)
|
||||
X.columns = feature_channel_index
|
||||
|
||||
|
||||
#Y#####################################################################
|
||||
|
||||
y = Y_epoch[:,label] #valence
|
||||
# y = pd.DataFrame(y)
|
||||
|
||||
########################################################################
|
||||
dfscores = None
|
||||
|
||||
if(method == 'RandomForest'):
|
||||
'''Random Forest Feature Importances'''
|
||||
estimator = sklearnrfi() #RandomForestRegressor()
|
||||
fit = estimator.fit(X,y)
|
||||
dfscores = pd.DataFrame(fit.feature_importances_)
|
||||
elif(method == 'RFE'):
|
||||
''' RFE'''
|
||||
selector = RFE(clf, n_features_to_select=X.shape[1], step=1)
|
||||
selector = selector.fit(X, y)
|
||||
dfscores = pd.DataFrame(selector.ranking_)
|
||||
|
||||
elif(method == 'SelectKBest'):
|
||||
"""SelecKBest"""
|
||||
#apply SelectKBest class to extract top 10 best features
|
||||
func = None
|
||||
if mutual_info == False:
|
||||
func = f_classif
|
||||
else:
|
||||
func = mutual_info_classif
|
||||
|
||||
bestfeatures = SelectKBest(score_func=func, k=X.shape[1])
|
||||
fit = bestfeatures.fit(X,y)
|
||||
|
||||
dfscores = pd.DataFrame(fit.scores_)
|
||||
|
||||
|
||||
|
||||
|
||||
dfcolumns = pd.DataFrame(X.columns)
|
||||
|
||||
#concat two dataframes for better visualization
|
||||
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
|
||||
featureScores.columns = ['Column','Score'] #naming the dataframe columns
|
||||
features_result = featureScores.nlargest(X.shape[1],'Score')
|
||||
print(features_result)
|
||||
|
||||
N = len(feature_channel_index)
|
||||
topNRmseList = []
|
||||
topNList = ["{}".format(x) for x in range(1,N+1)]
|
||||
|
||||
for n in range(1, N+1):
|
||||
ranking_df = features_result.head(n)
|
||||
topncols = ranking_df['Column'].tolist()
|
||||
|
||||
input_df = pd.DataFrame(X[topncols])
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(input_df, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Normalise-scale data
|
||||
# Feature Scaling
|
||||
if(scale == True):
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.transform(X_test)
|
||||
|
||||
# Apply classfier
|
||||
clf.fit(X_train, y_train)
|
||||
y_predict = clf.predict(X_test)
|
||||
rmse = mean_squared_error(y_test, y_predict, squared=False)
|
||||
print(n,rmse)
|
||||
topNRmseList.append(rmse)
|
||||
|
||||
|
||||
topcol_df = pd.DataFrame(topNList)
|
||||
toprmse_df = pd.DataFrame(topNRmseList)
|
||||
#concat two dataframes for better visualization
|
||||
topcolRanking = pd.concat([topcol_df, toprmse_df],axis=1)
|
||||
topcolRanking.columns = ['Column','RMSE'] #naming the dataframe columns
|
||||
topfeatures_result = topcolRanking
|
||||
print(topfeatures_result)
|
||||
topfeatures_result.to_csv(pwd + "/" + dataset + "/arousal_plots/" + "topFSColumnsRegressionRanking"+method + str(window) + str(stride) + ".csv")
|
||||
|
||||
# Plotting
|
||||
fig = plt.gcf()
|
||||
fig.set_size_inches(60, 9)
|
||||
|
||||
plt.xlabel('Top N Columns')
|
||||
plt.ylabel('RMSE')
|
||||
plt.title("Top N Columns v/s RMSE Plot for Window:{} Stride:{} epoched data by varying N".format(window,stride))
|
||||
plt.plot(topfeatures_result.loc[:,"Column"], topfeatures_result.loc[:,"RMSE"])
|
||||
plt.tight_layout()
|
||||
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "topFSColumnsRegressionRanking"+method + str(window) + str(stride) + ".svg", bbox_inches='tight', dpi=500)
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
|
||||
|
||||
@@ -5,16 +5,17 @@
|
||||
|
||||
|
||||
# Script to get the feature ranking and electrode ranking through
|
||||
# Method A :- Random Forest Regressor
|
||||
# Method B :- F score based Ranking
|
||||
# Method C :- Random Forest Importances approach
|
||||
|
||||
# Method :- F score based Ranking
|
||||
|
||||
# Main function
|
||||
|
||||
from ImportUtils import *
|
||||
from TopNByFSMethods import *
|
||||
from TopNByClassifier import *
|
||||
from args_eeg import args as my_args
|
||||
|
||||
# uncomment to extract features
|
||||
# from EpochedFeatures import *
|
||||
if __name__ == '__main__':
|
||||
|
||||
# args object to fetch command line inputs
|
||||
@@ -34,37 +35,12 @@ if __name__ == '__main__':
|
||||
fs_method = args.fs_method
|
||||
|
||||
#feature extraction
|
||||
getEpochedFeatures(dataset, window, stride, sfreq, label)
|
||||
# uncomment to extract features
|
||||
# getEpochedFeatures(dataset, window, stride, sfreq, label)
|
||||
if(top == "e"):
|
||||
clf = RandomForestRegressor()
|
||||
topElectrodeRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
|
||||
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
||||
topElectrodeFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
|
||||
plt.legend(["Method A","Method B", "Method C"])
|
||||
|
||||
if(label == 1):
|
||||
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
else:
|
||||
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedElectrodewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
elif(top == "f"):
|
||||
clf = RandomForestRegressor()
|
||||
topFeaturesRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False)
|
||||
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
||||
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='RandomForest')
|
||||
if(label == 1):
|
||||
plt.legend(["Method A","Method B", "Method C"])
|
||||
plt.savefig(pwd + "/" + dataset + "/arousal_plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
else:
|
||||
plt.legend(["Method A","Method B", "Method C"])
|
||||
plt.savefig(pwd + "/" + dataset + "/plots/" + "CorrectedFeaturewiseRanking" + str(window) + str(stride) + ".svg", bbox_inches='tight')
|
||||
plt.show()
|
||||
plt.clf()
|
||||
|
||||
topFeatureFSRegressionRanking(dataset, window, stride, sfreq, clf, label, scale=False, pca=False, mutual_info = False, method='SelectKBest')
|
||||
Diff do arquivo suprimido porque uma ou mais linhas são muito longas
Arquivo binário não exibido.
Diff do arquivo suprimido porque uma ou mais linhas são muito longas
Referência em uma Nova Issue
Bloquear um usuário