56 linhas
1.3 KiB
Python
56 linhas
1.3 KiB
Python
import time
|
|
import sys
|
|
import numpy
|
|
import vector
|
|
from bahsic import CBAHSIC
|
|
|
|
usage = "yolo"
|
|
|
|
if __name__ == "__main__":
|
|
|
|
if (len(sys.argv)<4):
|
|
print usage
|
|
else:
|
|
file_x = sys.argv[1];
|
|
file_y = sys.argv[2];
|
|
file_out = sys.argv[3];
|
|
if (sys.argv==5):
|
|
file_normalized = sys.argv[5]
|
|
|
|
X = numpy.genfromtxt(file_x, delimiter=' ')
|
|
y = numpy.genfromtxt(file_y, delimiter=' ')
|
|
|
|
bahsic = CBAHSIC()
|
|
data_no = 160
|
|
features_tokeep = 5040
|
|
y.shape = (data_no,1)
|
|
|
|
# Normalize the labels.
|
|
y = 1.0*y
|
|
tmp_no = numpy.sum(y)
|
|
pno = (data_no + tmp_no) / 2
|
|
nno = (data_no - tmp_no) / 2
|
|
y[y>0] = y[y>0]/pno
|
|
y[y<0] = y[y<0]/nno
|
|
|
|
# Normalize the data.
|
|
m = X.mean(0)
|
|
s = X.std(0)
|
|
X.__isub__(m).__idiv__(s)
|
|
|
|
t1 = time.clock()
|
|
tmp = bahsic.BAHSICRaw(X, y, vector.CLinearKernel(), vector.CLinearKernel(), features_tokeep, 0.1)
|
|
t2 = time.clock()
|
|
print "time taken: "+str(t2-t1)
|
|
print '--rank of the features'
|
|
print '--better features towards the end of the list:'
|
|
print tmp
|
|
|
|
hsicfeatures= numpy.zeros(shape=(data_no,features_tokeep))
|
|
for i in range(0,data_no):
|
|
for j in range(0,features_tokeep):
|
|
hsicfeatures[i][j] = X[i][tmp[features_tokeep+j]]
|
|
|
|
numpy.savetxt(file_out, hsicfeatures)
|
|
if (sys.argv==5):
|
|
numpy.savetxt('original.csv', X) |