Initial commit

Esse commit está contido em:
Rishi Dua
2014-11-21 18:52:56 +05:30
commit 3fd37a416f
22 arquivos alterados com 3291 adições e 0 exclusões
+48
Ver Arquivo
@@ -0,0 +1,48 @@
dump/
*.dat
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# PyBuilder
target/
+22
Ver Arquivo
@@ -0,0 +1,22 @@
The MIT License (MIT)
Copyright (c) 2014 Rishi Dua <rishirdua@gmail.com>, TV Ashok <veeranjaneyaashok@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
+28
Ver Arquivo
@@ -0,0 +1,28 @@
EMOTION CLASSIFICATION
======================
Feature selection via dependence maximization for EEG based emotion classification. This project has been done as a part of Neural Networks course for the fall 2014 Semester at IIT Delhi.
Supervisor: Dr. Jayadeva
Authors
-------
Rishi Dua <http://github.com/rishirdua>
TV Ashok <http://github.com/tvashok>
Install
-------
1. Install Python and scikit
2. Copy the DEAP dataset (cPickle preprocessed) to data/raw folder
3. Run script.sh
Documentation
-------------
Refer docs/readme.pdf
Contribute
----------
- Source Code: https://github.com/rishirdua/emotion-classification/
License
-------
This project is licensed under the terms of the MIT license. See LCENCE.txt for details
+190
Ver Arquivo
@@ -0,0 +1,190 @@
# Copyright (c) 2006, National ICT Australia
# All rights reserved.
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the 'License'); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an 'AS IS' basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# Authors: Le Song (lesong@it.usyd.edu.au)
# Created: (20/10/2006)
# Last Updated: (dd/mm/yyyy)
#
##\package elefant.fselection.bahsic
# This module perform backward elimination for feature selection
# using HSIC (BAHSIC).
#
# The algorithm proceeds recursively, eliminating the least
# relevant features and adding them to the eliminated list
# in each iteration. For more theoretical underpinning see the
# following reference for more information:
#
# Le Song, Justin Bedo, Karsten M. Borgwardt, Arthur Gretton
# and Alex Smola. The BAHSIC family of gene selection algorithms.
#
__version__ = '$Revision: $'
# $Source$
import numpy
from scipy import optimize
import vector
from hsic import CHSIC
from setdiag0 import setdiag0
## Class that perform backward elimination for feature selection (BAHSIC).
#
# It has two version of BAHSIC: one without optimization over the kernel
# parameters and one with optimization over the kernel parameters.
#
class CBAHSIC(object):
def __init__(self):
pass
## BAHSIC with optimization over the kernel parameters.
# @param x The data.
# @param y The labels.
# @param kernelx The kernel on the data.
# @param kernely The kernel on the labels.
# @param flg3 The number of desired features.
# @param flg4 The proportion of features eleminated in each iteration.
#
def BAHSICOpt(self, x, y, kernelx, kernely, flg3, flg4):
assert len(x.shape) == 2, 'Argument 1 has wrong shape'
assert len(y.shape) == 2, 'Argument 2 has wrong shape'
assert x.shape[0] == y.shape[0], \
'Argument 1 and 2 have different number of data points'
print '--initializing...'
hsic = CHSIC()
L = kernely.Dot(y, y)
setdiag0(L)
sL = numpy.sum(L, axis=1)
ssL = numpy.sum(sL)
n = x.shape
eliminatedI = []
selectedI = set(numpy.arange(n[1]))
kernelx.CreateCacheKernel(x)
sga = kernelx._typicalParam
sgaN = sga.shape
sgaN = sgaN[0]
while True:
selectedI = selectedI - set(eliminatedI)
sI = numpy.array([j for j in selectedI])
m = len(sI)
print m
if (m == 1):
eliminatedI.append(selectedI.pop())
break
sgaMat = []
hsicMat = []
for k in range(sgaN):
## bfgs in scipy is not working here
retval = optimize.fmin_cg(hsic.ObjUnBiasedHSIC, \
sga[[k],].ravel(), \
hsic.GradUnBiasedHSIC,\
args=[x, kernelx, L, sL, ssL], \
gtol=1e-6, maxiter=100, \
full_output=True, disp=False)
sgaMat.append(retval[0])
hsicMat.append(retval[1])
k = numpy.argmin(hsicMat)
sga0 = sgaMat[k]
objj = []
for j in selectedI:
K = kernelx.DecDotCacheKernel(x, x[:,[j]], sga0)
setdiag0(K)
objj.append(hsic.UnBiasedHSICFast(K, L, sL, ssL))
if m > flg3:
maxj = numpy.argsort(objj)
num = int(flg4 * m)+1
if m - num <= flg3:
num = m - flg3
maxj = maxj[m:m-num-1:-1]
else:
maxj = numpy.array([numpy.argmax(objj)])
j = numpy.take(sI,maxj)
eliminatedI.extend(j)
kernelx.DecCacheKernel(x, x[:,j])
kernelx.ClearCacheKernel(x)
return eliminatedI
## BAHSIC without optimization over the kernel parameters.
# @param x The data.
# @param y The labels.
# @param kernelx The kernel on the data.
# @param kernely The kernel on the labels.
# @param flg3 The number of desired features.
# @param flg4 The proportion of features eleminated in each iteration.
#
def BAHSICRaw(self, x, y, kernelx, kernely, flg3, flg4):
assert len(x.shape) == 2, 'Argument 1 has wrong shape'
assert len(y.shape) == 2, 'Argument 2 has wrong shape'
assert x.shape[0] == y.shape[0], \
'Argument 1 and 2 have different number of data points'
print '--initializing...'
hsic = CHSIC()
L = kernely.Dot(y, y)
setdiag0(L)
sL = numpy.sum(L, axis=1)
ssL = numpy.sum(sL)
n = x.shape
eliminatedI = []
selectedI = set(numpy.arange(n[1]))
kernelx.CreateCacheKernel(x)
while True:
selectedI = selectedI - set(eliminatedI)
sI = numpy.array([j for j in selectedI])
m = len(sI)
print m
if (m == 1):
eliminatedI.append(selectedI.pop())
break
objj = []
for j in selectedI:
K = kernelx.DecDotCacheKernel(x, x[:,[j]])
setdiag0(K)
objj.append(hsic.UnBiasedHSICFast(K, L, sL, ssL))
if m > flg3:
maxj = numpy.argsort(objj)
num = int(flg4 * m)+1
if m-num <= flg3:
num = m - flg3
maxj = maxj[m:m-num-1:-1]
else:
maxj = numpy.array([numpy.argmax(objj)])
j = numpy.take(sI,maxj)
eliminatedI.extend(j)
kernelx.DecCacheKernel(x, x[:,j])
kernelx.ClearCacheKernel(x)
return eliminatedI
+46
Ver Arquivo
@@ -0,0 +1,46 @@
import cPickle
import os.path
from multiprocessing import Pool
import sys
def main():
nLabel, nTrial, nUser, nChannel, nTime = 4, 40, 32, 40, 8064
#new_array = [[[None] *w for i in range(h)] for j in range(l)]
print "Program started"+"\n"
fout_data = open("data/features_raw.dat",'w')
fout_labels0 = open("data/labels_0.dat",'w')
fout_labels1 = open("data/labels_1.dat",'w')
fout_labels2 = open("data/labels_2.dat",'w')
fout_labels3 = open("data/labels_3.dat",'w')
for i in range(nUser):#4, 40, 32, 40, 8064
if(i%8 == 0):
if i < 10:
name = '%0*d' % (2,i+1)
else:
name = i+1
fname = "data/raw/s"+str(name)+".dat"
x = cPickle.load(open(fname, 'rb'))
print fname
for tr in range(nTrial):
if(tr%1 == 0):
for dat in range(nTime):
if(dat%32 == 0):
for ch in range(nChannel):
#fout_data.write(str(ch+1) + " ");
fout_data.write(str(x['data'][tr][ch][dat]) + " ");
fout_labels0.write(str(x['labels'][tr][0]) + "\n");
fout_labels1.write(str(x['labels'][tr][1]) + "\n");
fout_labels2.write(str(x['labels'][tr][2]) + "\n");
fout_labels3.write(str(x['labels'][tr][3]) + "\n");
fout_data.write("\n");
fout_labels0.close()
fout_labels1.close()
fout_labels2.close()
fout_labels3.close()
fout_data.close()
print "\n"+"Print Successful"
if __name__ == "__main__":
main()
BIN
Ver Arquivo
Arquivo binário não exibido.
+56
Ver Arquivo
@@ -0,0 +1,56 @@
import time
import sys
import numpy
import vector
from bahsic import CBAHSIC
usage = "yolo"
if __name__ == "__main__":
if (len(sys.argv)<4):
print usage
else:
file_x = sys.argv[1];
file_y = sys.argv[2];
file_out = sys.argv[3];
if (sys.argv==5):
file_normalized = sys.argv[5]
X = numpy.genfromtxt(file_x, delimiter=' ')
y = numpy.genfromtxt(file_y, delimiter=' ')
bahsic = CBAHSIC()
data_no = 160
features_tokeep = 5040
y.shape = (data_no,1)
# Normalize the labels.
y = 1.0*y
tmp_no = numpy.sum(y)
pno = (data_no + tmp_no) / 2
nno = (data_no - tmp_no) / 2
y[y>0] = y[y>0]/pno
y[y<0] = y[y<0]/nno
# Normalize the data.
m = X.mean(0)
s = X.std(0)
X.__isub__(m).__idiv__(s)
t1 = time.clock()
tmp = bahsic.BAHSICRaw(X, y, vector.CLinearKernel(), vector.CLinearKernel(), features_tokeep, 0.1)
t2 = time.clock()
print "time taken: "+str(t2-t1)
print '--rank of the features'
print '--better features towards the end of the list:'
print tmp
hsicfeatures= numpy.zeros(shape=(data_no,features_tokeep))
for i in range(0,data_no):
for j in range(0,features_tokeep):
hsicfeatures[i][j] = X[i][tmp[features_tokeep+j]]
numpy.savetxt(file_out, hsicfeatures)
if (sys.argv==5):
numpy.savetxt('original.csv', X)
+39
Ver Arquivo
@@ -0,0 +1,39 @@
%% DESCRIPTION
clc;
clear all;
% Transforms features into Recht and Rahimis Random Fourier Feature as defined in:
% Rahimi, Ali, and Benjamin Recht. "Random features for large-scale kernel machines." In Advances in neural information processing systems, pp. 1177-1184. 2007.
X = dlmread('data/features_raw.dat', ' ');
n_features = size(X,2);
n_data = size(X,1);
gamma_inv = 0.1;
gamma = 1/gamma_inv;
sigma = sqrt(2/gamma_inv);
n_randomfeatures = 5040;
%calculate
W=normrnd(0,sigma,n_features,n_randomfeatures);
b=2*pi*rand(1,n_randomfeatures);
B = ones(n_data,1)*(b);
X_rrt = sqrt(2/n_randomfeatures)*cos(X*W+B);
disp('calculated');
%normalize
%mean_tr = mean(Data_new);
%std_tr = std(Data_new);
%Data_new = (Data_new-repmat(mean_tr,n_data,1))./(repmat(std_tr,n_data,1));
%disp('normalized');
%toc;
%write
dlmwrite('data/features_rrt.dat',X_rrt, ' ');
disp('writen to file');
+36
Ver Arquivo
@@ -0,0 +1,36 @@
import cPickle
import os.path
from multiprocessing import Pool
import sys
usage = "filename out_file"
def generate_features(fout_file):
nLabel, nTrial, nUser, nChannel, nTime = 4, 40, 32, 40, 8064
#new_array = [[[None] *w for i in range(h)] for j in range(l)]
print "Program started"+"\n"
fout_data = open(fout_file,'w')
for i in range(nUser):#4, 40, 32, 40, 8064
if(i%8 == 0):
if i < 10:
name = '%0*d' % (2,i+1)
else:
name = i+1
fname = "data/raw/s"+str(name)+".dat"
x = cPickle.load(open(fname, 'rb'))
print fname
for tr in range(nTrial):
if(tr%1 == 0):
for dat in range(nTime):
if(dat%64 == 0):
for ch in range(nChannel):
#fout_data.write(str(ch+1) + " ");
fout_data.write(str(x['data'][tr][ch][dat]) + " ");
fout_data.write("\n");
fout_data.close()
if __name__ == "__main__":
if (len(sys.argv)!=2):
print usage
else:
generate_features(sys.argv[1])
+168
Ver Arquivo
@@ -0,0 +1,168 @@
# Copyright (c) 2006, National ICT Australia
# All rights reserved.
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# Authors: Le Song (lesong@it.usyd.edu.au) and Alex Smola
# (alex.smola@nicta.com.au)
# Created: (20/10/2006)
# Last Updated: (dd/mm/yyyy)
#
##\package elefant.kernels.generic
# This module contains generic class for kernels
#
# The CKernel class provides common interface for all kernel classes. Note
# that it should never be instantiated.
#
__version__ = "$Revision: $"
# $Source$
import numpy
import numpy.random as random
## Generic kernel class
#
# This kernel provide common interface for all kernels. This interface
# includes the following key kernel manipulations (functions):
# --Dot(x1, x2): $K(x1, x2)$
# --Expand(x1, x2, alpha): $sum_r K(x1_i,x2_r) \times alpha2_r$
# --Tensor(x1, y1, x2, y2): $K(x1_i,x2_j) \times (y1_i \times y1_j)$
# --TensorExpand(x1, y1, x2, y2, alpha2):
# $sum_r K(x1_i,x2_r) \times (y1_i \times y1_r) \times alpha2_r$
# --Remember(x): Remember data x
# --Forget(x): Remove remembered data x
# To design a specific kernel, simply overload these methods. The generic
# kernel itself should never be instantiated.
#
class CKernel(object):
def __init__(self, blocksize=128):
## @var _blocksize
# Parameter that determines the size of each block when computing the
# kernel matrix in blocks. Properly blocking the kernel matrix during
# computation improves the speed.
#
self._blocksize = blocksize
## @var _name
# Name of the kernel.
#
self._name = "Generic kernel"
## @var _cacheData
# Cache that stores data that have appeared before.
#
self._cacheData = {}
def __str__(self):
return self._name
def __repr__(self):
return "Kernel object of type '" + self._name + "'"
## Compute the kernel between two data points x1 and x2.
# It returns a scale value of dot product between x1 and x2.
# @param x1 [read] The first data point.
# @param x2 [read] The second data point.
#
def K(self, x1, x2):
raise NotImplementedError, \
'CKernel.K in abstract class is not implemented'
## Compute the kernel between the data points in x1 and those in x2.
# It returns a matrix with entry $(ij)$ equal to $K(x1_i, x1_j)$.
# If index1/index2 is
# specified, only those data points in x1/x2 with indices corresponding
# to index1/index2 are used to compute the kernel matrix. Furthermore,
# if output is specified, the provided buffer is used explicitly to
# store the kernel matrix.
# @param x1 [read] The first set of data points.
# @param x2 [read] The second set of data points.
# @param index1 [read] The indices into the first set of data points.
# @param index2 [read] The indices into the second set of data points.
# @param output [write] The buffer where the output matrix is written into.
#
def Dot(self, x1, x2, index1=None, index2=None, output=None):
raise NotImplementedError, \
'CKernel.Dot in abstract class is not implemented'
## Compute the kernel between the data points in x1 and those in x2,
# then multiply the resulting kernel matrix by alpha2.
# It returns a matrix with entry $(ij)$ equal to
# $sum_r K(x1_i,x2_r) \times alpha2_r$.
# Other parameters are defined similarly as those in Dot.
# @param x1 [read] The first set of data points.
# @param x2 [read] The second set of data points.
# @param alpha2 [read] The set of coefficients.
# @param index1 [read] The indices into the first set of data points.
# @param index2 [read] The indices into the second set of data points.
# @param output [write] The buffer where the output matrix is written into.
#
def Expand(self, x1, x2, alpha2, index1=None, index2=None, output=None):
raise NotImplementedError, \
'CKernel.Expand in abstract class is not implemented'
## Compute the kernel between the data points in x1 and those in x2,
# then multiply the resulting kernel matrix elementwiesely by the
# the outer-product matrix between y1 and y2. It returns a matrix
# with entry $(ij)$ equal to $K(x1_i,x2_j) \times (y1_i \times y1_j)$.
# Other parameters are defined similarly as those in Dot.
# @param x1 [read] The first set of data points.
# @param y1 [read] The first set of labels.
# @param x2 [read] The second set of data points.
# @param y2 [read] The second set of labels.
# @param index1 [read] The indices into the first set of data points.
# @param index2 [read] The indices into the second set of data points.
# @param output [write] The buffer where the output matrix is written into.
#
def Tensor(self, x1, y1, x2, y2, index1=None, index2=None, output=None):
raise NotImplementedError, \
'CKernel.Tensor in abstract class is not implemented'
## Compute the kernel between the data points in x1 and those in x2,
# then multiply the resulting kernel matrix elementwiesely by the
# the outer-product matrix between y1 and y2, and final multiply
# the resulting matrix by alpha2. It returns a matrix with entry $(ij)$
# equal to $sum_r K(x1_i,x2_r) \times (y1_i \times y1_r) \times alpha2_r$.
# Other parameters are defined similarly as those in Dot.
# @param x1 [read] The first set of data points.
# @param y1 [read] The first set of labels.
# @param x2 [read] The second set of data points.
# @param y2 [read] The second set of labels.
# @param index1 [read] The indices into the first set of data points.
# @param index2 [read] The indices into the second set of data points.
# @param output [write] The buffer where the output matrix is written into.
#
def TensorExpand(self, x1, y1, x2, y2, alpha2, index1=None, index2=None, \
output=None):
raise NotImplementedError, \
'CKernel.TensorExpand in abstract class is not implemented'
## Remember the data by performing necessary preprossing on
# the data, storing it in the cache and indexing it by the id of
# the data. The preprocessing can be defined differently for
# different classes. If the data have already been remembered,
# the old stored information is simply overwritten.
# @param x [read] The data to be remembered.
#
def Remember(self, x):
raise NotImplementedError, \
'CKernel.Remember in abstract class is not implemented'
## Remove a remembered data from the cache. If x is not given, then
# all the data remembered in the cache will be removed. If a given
# x is not remembered beforehand, False is returned; otherwise, True
# is returned.
# @param x [read] The data to be removed.
#
def Forget(self, x=None):
raise NotImplementedError, \
'CKernel.Forget in abstract class is not implemented'
+267
Ver Arquivo
@@ -0,0 +1,267 @@
# Copyright (c) 2006, National ICT Australia
# All rights reserved.
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the 'License'); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an 'AS IS' basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# Authors: Le Song (lesong@it.usyd.edu.au)
# Created: (20/10/2006)
# Last Updated: (dd/mm/yyyy)
#
##\package elefant.fselection.hsic
# This module perform computation related to Hilber-Schmidt Independence
# Criterion. Hilber-Schmidt Independence Criterion is short for HSIC.
#
# HISC is defined as $HSIC=\frac{1}{m}Tr(KHLH)$, where $kMat$ and $lMat$
# are the kernel matrices for the data and the labels respectively.
# $H=I-\frac{1}{m}\delta_{ij}$, where $m$ is the number of data points,
# is the centering matrix. The unbiased estimator of HSIC is computed as
# $HSIC=\frac{1}{m(m-3)}\left[Tr(KL)+\frac{1}{(m-1)(m-2)}1^\top K11^\top L1
# -\frac{2}{m-2}1^\top KL1\right]. For more theorectical underpinning
# of HSIC, see the following reference:
#
# Gretton, A., O. Bousquet, A. Smola and B. Schoelkopf: Measuring
# Statistical Dependence with Hilbert-Schmidt Norms. Algorithmic
# Learning Theory: 16th International Conference, ALT 2005, 63-78, 2005.
#
__version__ = '$Revision: $'
# $Source$
import numpy
import vector
from setdiag0 import setdiag0
## Class that perform computation related to HSIC.
#
# It contains function that computes biased and unbiased HSIC, part of HSIC
# necessary for faster its faster computation, and functions that enable
# an optimization on HSIC with respect to the kernel parameters.
#
class CHSIC(object):
def __init__(self):
pass
## Compute HLH give the labels.
# @param y The labels.
# @param kernely The kernel on the labels, default to linear kernel.
#
def ComputeHLH(self, y, kernely=vector.CLinearKernel()):
ny = y.shape
if len(ny) > 1:
lMat = kernely.Dot(y, y)
else:
lMat = numpy.outerproduct(y, y)
sL = numpy.sum(lMat, axis=1)
ssL = numpy.sum(sL)
# hlhMat
return lMat - numpy.add.outer(sL, sL)/ny[0] + ssL/(ny[0]*ny[0])
## Compute the biased estimator of HSIC.
# @param x The data.
# @param y The labels.
# @param kernelx The kernel on the data, default to linear kernel.
# @param kernely The kernel on the labels, default to linear kernel.
#
def BiasedHSIC(self, x, y, kernelx=vector.CLinearKernel(), \
kernely=vector.CLinearKernel()):
nx = x.shape
ny = y.shape
assert nx[0] == ny[0], \
"Argument 1 and 2 have different number of data points"
if len(nx) > 1:
kMat = kernelx.Dot(x, x)
else:
kMat = numpy.outerproduct(x, x)
hlhMat = ComputeHLH(y, kernely)
return numpy.sum(numpy.sum(kMat * hlhMat)) / ((nx[0]-1)*(nx[0]-1))
## Objective of the biased HSIC when performing optimization over
# the kernel parameters.
# @param param The kernel parameters.
# @param x The data.
# @param kernelx The kernel on the data.
# @param hlhMat The HLH matrix on the labels.
#
def ObjBiasedHSIC(self, param, x, kernelx, hlhMat):
nx = x.shape
kMat = kernelx.DotCacheKernel(x, param)
return -numpy.sum(numpy.sum(kMat * hlhMat)) / ((nx[0]-1)*(nx[0]-1))
## Gradient of the objective of the biased HSIC when performing
# optimization over the kernel parameters.
# @param param The kernel parameters.
# @param x The data.
# @param kernelx The kernel on the data.
# @param hlhMat The HLH matrix on the labels.
#
def GradBiasedHISC(self, param, x, kernelx, hlhMat):
nx = x.shape
kMat = kernelx.GradDotCacheKernel(x, param)
return -numpy.sum(numpy.sum(kMat * hlhMat)) / ((nx[0]-1)*(nx[0]-1))
## Fast computation of the biased HSIC when the kernel matrix
# for the data and the HLH matrix for the labels are already
# computed.
# @param kMat The kernel matrix for the data.
# @param hlhMat The HLH matrix for the labels.
#
def BiasedHSICFast(self, kMat, hlhMat):
nx = kMat.shape
assert (kMat.shape == hlhMat.shape), \
"Argument 1 and 2 have different shapes"
return (kMat * hlhMat).sum() / ((nx[0]-1)*(nx[0]-1))
## Fast computation of the biased HSIC when the kernel matrix
# for the labels can be decomposed into HLH = y * y' and the
# rank of y is low
# @param kMat The kernel matrix for the data.
# @param y The HLH = y * y' for the labels.
#
def BiasedHSICFast2(self, kMat, y):
nx = kMat.shape
assert (kMat.shape[0] == y.shape[0]), \
"Argument 1 and 2 have different shapes"
return numpy.dot(y.T, numpy.dot(kMat, y)).trace() / ((nx[0]-1)*(nx[0]-1))
## Fast computation of the biased HSIC when the kernel matrix
# for the data K can be decomposed into K = x * x' and that
# for the labels can be decomposed into HLH = y * y' and the
# rank of y is low (this will be useful after incomplete cholesky
# factorization
# @param x The K = x * x' for the data.
# @param y The HLH = y * y' for the labels.
#
def BiasedHSICFast3(self, x, y):
nx = x.shape
assert (x.shape[0] == y.shape[0]), \
"Argument 1 and 2 have different shapes"
return (numpy.dot(x.T, y)**2).sum() / ((nx[0]-1)*(nx[0]-1))
## Compute the UNbiased estimator of HSIC.
# @param x The data.
# @param y The labels.
# @param kernelx The kernel on the data, default to linear kernel.
# @param kernely The kernel on the labels, default to linear kernel.
#
def UnBiasedHSIC(self, x, y, kernelx=vector.CLinearKernel(), \
kernely=vector.CLinearKernel()):
nx = x.shape
ny = y.shape
assert nx[0] == ny[0], \
"Argument 1 and 2 have different number of data points"
kMat = kernelx.Dot(x,x)
setdiag0(kMat)
lMat = kernely.Dot(y,y)
setdiag0(lMat)
sK = kMat.sum(axis=1)
ssK = sK.sum()
sL = lMat.sum(axis=1)
ssL = sL.sum()
return ( kMat.__imul__(lMat).sum() + \
(ssK*ssL)/((nx[0]-1)*(nx[0]-2)) - \
2 * sK.__imul__(sL).sum() / (nx[0]-2) \
) / (nx[0]*(nx[0]-3))
## Objective of the UNbiased HSIC when performing optimization over
# the kernel parameters.
# @param param The kernel parameters.
# @param x The data.
# @param kernelx The kernel on the data.
# @param lMat The kernel matrix of the label.
# @param sL The vector of the sum of each row of lMat.
# @param ssL The vector of the sum of all entries in lMat.
#
def ObjUnBiasedHSIC(self, param, x, kernelx, lMat, sL, ssL):
nx = x.shape
kMat = kernelx.DotCacheKernel(x, param)
sK = numpy.sum(kMat, axis=1)
ssK = numpy.sum(sK)
return -( numpy.sum(numpy.sum(kMat*lMat)) \
+ (ssK*ssL)/((nx[0]-1)*(nx[0]-2)) \
- 2*numpy.sum(sK*sL)/(nx[0]-2) \
) / (nx[0]*(nx[0]-3))
## Gradient of the objective of the UNbiased HSIC when performing
# optimization over the kernel parameters.
# @param param The kernel parameters.
# @param x The data.
# @param kernelx The kernel on the data.
# @param lMat The kernel matrix of the label.
# @param sL The vector of the sum of each row of lMat.
# @param ssL The vector of the sum of all entries in lMat.
#
def GradUnBiasedHSIC(self, param, x, kernelx, lMat, sL, ssL):
nx = x.shape
kMat = kernelx.GradDotCacheKernel(x, param)
sK = numpy.sum(kMat, axis=1)
ssK = numpy.sum(sK)
return -( numpy.sum(numpy.sum(kMat*lMat)) \
+ (ssK*ssL)/((nx[0]-1)*(nx[0]-2)) \
- 2*numpy.sum(sK*sL)/(nx[0]-2) \
) / (nx[0]*(nx[0]-3))
## Fast computation of the biased HSIC when the kernel matrix
# for the data and the HLH matrix for the labels are already
# computed.
# @param kMat The kernel matrix for the data.
# @param lMat The kernel matrix of the label.
# @param sL The vector of the sum of each row of lMat.
# @param ssL The vector of the sum of all entries in lMat.
#
def UnBiasedHSICFast(self, kMat, lMat, sL, ssL):
nx = kMat.shape
assert (kMat.shape == lMat.shape), \
"Argument 1 and 2 have different shapes"
sK = numpy.sum(kMat, axis=1)
ssK = numpy.sum(sK)
return ( numpy.sum(numpy.sum(kMat*lMat)) \
+ (ssK*ssL)/((nx[0]-1)*(nx[0]-2)) \
- 2*numpy.sum(sK*sL)/(nx[0]-2) \
) / (nx[0]*(nx[0]-3))
## Normalize each dimension of the data separately to zero mean and unit
# standard deviation.
# @param data [read\write] The data to be normalized. Each row is a
# datum and each column a dimension.
#
def normalize(data):
m = data.mean(axis=0)
s = data.std(axis=0)
data.__isub__(m).__itruediv__(s)
## Center the kernel matrix in the feature space.
# @param k [read\write] The kernel matrix to be centered.
#
def center(k):
n = k.shape
assert n[0] == n[1], 'k must be symmetric and positive semidefinite'
mk = k.mean(axis=1)
mk.shape = (n[0], 1)
mmk = mk.mean()
k.__isub__(mk).__isub__(mk.T).__iadd__(mmk)
+30
Ver Arquivo
@@ -0,0 +1,30 @@
regression hsic linear
regression_linear.py data/features_hsic_0.dat data/labels_0.dat data/predict_hsic_0.dat 0.0462957047621
regression_linear.py data/features_hsic_1.dat data/labels_1.dat data/predict_hsic_1.dat 0.0619267723098
regression_linear.py data/features_hsic_2.dat data/labels_2.dat data/predict_hsic_2.dat 0.119146367285
regression_linear.py data/features_hsic_3.dat data/labels_3.dat data/predict_hsic_3.dat 0.108100811067
regression hsic bayesian
regression_bayesian.py data/features_hsic_0.dat data/labels_0.dat data/predict_hsic_0.dat 0.0574420849846
regression_bayesian.py data/features_hsic_1.dat data/labels_1.dat data/predict_hsic_1.dat 0.0744427035158
regression_bayesian.py data/features_hsic_2.dat data/labels_2.dat data/predict_hsic_2.dat 0.131973261761
regression_bayesian.py data/features_hsic_3.dat data/labels_3.dat data/predict_hsic_3.dat 0.1292239022
regression hsic dtree
regression_dtree.py data/features_hsic_0.dat data/labels_0.dat data/predict_hsic_0.dat 0.0616359499887
regression_dtree.py data/features_hsic_1.dat data/labels_1.dat data/predict_hsic_1.dat 0.0870560437385
regression_dtree.py data/features_hsic_2.dat data/labels_2.dat data/predict_hsic_2.dat 0.225990113056
regression_dtree.py data/features_hsic_3.dat data/labels_3.dat data/predict_hsic_3.dat 0.122724406103
regression pca linear
regression_linear.py data/features_pca.dat data/labels_0.dat data/predict_pca_0.dat 0.0525417950272
regression_linear.py data/features_pca.dat data/labels_1.dat data/predict_pca_1.dat 0.0630462800532
regression_linear.py data/features_pca.dat data/labels_2.dat data/predict_pca_2.dat 0.11837933335
regression_linear.py data/features_pca.dat data/labels_3.dat data/predict_pca_3.dat 0.105251711141
regression pca bayesian
regression_bayesian.py data/features_pca.dat data/labels_0.dat data/predict_pca_0.dat 0.0598334909344
regression_bayesian.py data/features_pca.dat data/labels_1.dat data/predict_pca_1.dat 0.0741352839217
regression_bayesian.py data/features_pca.dat data/labels_2.dat data/predict_pca_2.dat 0.122554051214
regression_bayesian.py data/features_pca.dat data/labels_3.dat data/predict_pca_3.dat 0.122681510711
regression pca dtree
regression_dtree.py data/features_pca.dat data/labels_0.dat data/predict_pca_0.dat 0.0547733949265
regression_dtree.py data/features_pca.dat data/labels_1.dat data/predict_pca_1.dat 0.0922293538752
regression_dtree.py data/features_pca.dat data/labels_2.dat data/predict_pca_2.dat 0.19924179118
regression_dtree.py data/features_pca.dat data/labels_3.dat data/predict_pca_3.dat 0.0951762860752
+38
Ver Arquivo
@@ -0,0 +1,38 @@
# Copyright (c) 2007, National ICT Australia
# All rights reserved.
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the 'License'); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an 'AS IS' basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# Authors: Christfried Webers
# Created: (09/10/2007)
# Last Updated:
#
## Exception classes for the Elefant project
class CElefantException(Exception):
"""Base class for exceptions in Elefant."""
pass
class CElefantConstraintException(CElefantException):
"""Exception raised for constraint violation.
Attributes:
value -- input value violating constrained
message -- explanation of the error
"""
def __init__(self, value, message):
self.value = value
self.message = message
+30
Ver Arquivo
@@ -0,0 +1,30 @@
import time
import sys
import numpy
import vector
from sklearn.linear_model import BayesianRidge, LinearRegression
from sklearn.cross_validation import train_test_split
usage = "filename features_file labels_file output_file"
if __name__ == "__main__":
if (len(sys.argv)!=5):
print usage
else:
file_x = sys.argv[1]
file_y = sys.argv[2]
file_out = sys.argv[3]
split_seed = sys.argv[4]
X = numpy.genfromtxt(file_x, delimiter=' ')
y = numpy.genfromtxt(file_y, delimiter=' ')
# Split the data into training/testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=split_seed)
# Bayesian Ridge Regression
clf = BayesianRidge(compute_score=True)
clf.fit(X, y)
y_predict=clf.predict(X_test)
numpy.savetxt(file_out, y_predict)
+30
Ver Arquivo
@@ -0,0 +1,30 @@
import time
import sys
import numpy
import vector
from sklearn.tree import DecisionTreeRegressor
from sklearn.cross_validation import train_test_split
usage = "filename features_file labels_file output_file"
if __name__ == "__main__":
if (len(sys.argv)!=5):
print usage
else:
file_x = sys.argv[1]
file_y = sys.argv[2]
file_out = sys.argv[3]
split_seed = sys.argv[4]
X = numpy.genfromtxt(file_x, delimiter=' ')
y = numpy.genfromtxt(file_y, delimiter=' ')
# Split the data into training/testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=split_seed)
# Decision tree regressor
clf = DecisionTreeRegressor(max_depth=2)
clf.fit(X, y)
y_predict = clf.predict(X_test)
numpy.savetxt(file_out, y_predict)
+30
Ver Arquivo
@@ -0,0 +1,30 @@
import time
import sys
import numpy
import vector
from sklearn import linear_model
from sklearn.cross_validation import train_test_split
usage = "filename features_file labels_file output_file split_seed"
if __name__ == "__main__":
if (len(sys.argv)!=5):
print usage
else:
file_x = sys.argv[1]
file_y = sys.argv[2]
file_out = sys.argv[3]
split_seed = sys.argv[4]
X = numpy.genfromtxt(file_x, delimiter=' ')
y = numpy.genfromtxt(file_y, delimiter=' ')
# Split the data into training/testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=split_seed)
# Linear regression object
regr = linear_model.LinearRegression()
regr.fit(X_train, y_train)
y_predict = regr.predict(X_test)
numpy.savetxt(file_out, y_predict)
+30
Ver Arquivo
@@ -0,0 +1,30 @@
import time
import sys
import numpy
import vector
from sklearn import linear_model
from sklearn.cross_validation import train_test_split
usage = "filename features_file labels_file output_file"
if __name__ == "__main__":
if (len(sys.argv)!=5):
print usage
else:
file_x = sys.argv[1]
file_y = sys.argv[2]
file_out = sys.argv[3]
split_seed = sys.argv[4]
X = numpy.genfromtxt(file_x, delimiter=' ')
y = numpy.genfromtxt(file_y, delimiter=' ')
# Split the data into training/testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=split_seed)
# Logistic regression
regr = linear_model.LogisticRegression()
regr.fit(X_train, y_train)
y_predict = regr.predict(X_test)
numpy.savetxt(file_out, y_predict)
+33
Ver Arquivo
@@ -0,0 +1,33 @@
import time
import sys
import numpy
import vector
from sklearn.svm import SVR
from sklearn.cross_validation import train_test_split
usage = "filename features_file labels_file output_file"
if __name__ == "__main__":
if (len(sys.argv)!=5):
print usage
else:
file_x = sys.argv[1]
file_y = sys.argv[2]
file_out = sys.argv[3]
split_seed = sys.argv[4]
X = numpy.genfromtxt(file_x, delimiter=' ')
y = numpy.genfromtxt(file_y, delimiter=' ')
# Split the data into training/testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=split_seed)
#support vector regression
svr = SVR(kernel='linear', C=1e3)
svr.fit(X, y)
y_predict=svr.predict(X_test)
numpy.savetxt(file_out, y_predict)
+121
Ver Arquivo
@@ -0,0 +1,121 @@
#generate raw files
echo "generating raw files"
python cPickleparser.py
python features_sampled.py
#genertate features
echo "generating bahsic features"
python features_bahsic.py "data/features_raw.dat" "data/labels_0.dat" "data/features_bahsic_0.dat" "data/features_normalized.dat"
python features_bahsic.py "data/features_raw.dat" "data/labels_1.dat" "data/features_bahsic_1.dat"
python features_bahsic.py "data/features_raw.dat" "data/labels_2.dat" "data/features_bahsic_2.dat"
python features_bahsic.py "data/features_raw.dat" "data/labels_3.dat" "data/features_bahsic_3.dat"
echo "generating rrt features"
echo "Run the matlab file to generate Recht and Rahimi Random Fourier features"
echo "generating downsampled features"
python features_sampled.py "data/features_sampled.dat"
# do a train-test split
python split_data.py "data/features_raw.dat" "data/labels_0.dat" "data/labels_test_0.dat"
python split_data.py "data/features_raw.dat" "data/labels_1.dat" "data/labels_test_1.dat"
python split_data.py "data/features_raw.dat" "data/labels_2.dat" "data/labels_test_2.dat"
python split_data.py "data/features_raw.dat" "data/labels_3.dat" "data/labels_test_3.dat"
#BAHSIC
echo "regression bahsic linear"
python regression_linear.py "data/features_bahsic_0.dat" "data/labels_0.dat" "predict/hsic_linear_0.dat 42"
python regression_linear.py "data/features_bahsic_1.dat" "data/labels_1.dat" "predict/hsic_linear_1.dat 42"
python regression_linear.py "data/features_bahsic_2.dat" "data/labels_2.dat" "predict/hsic_linear_2.dat 42"
python regression_linear.py "data/features_bahsic_3.dat" "data/labels_3.dat" "predict/hsic_linear_3.dat 42"
echo "regression bahsic bayesian"
python regression_bayesian.py "data/features_bahsic_0.dat" "data/labels_0.dat" "predict/hsic_bayesian_0.dat 42"
python regression_bayesian.py "data/features_bahsic_1.dat" "data/labels_1.dat" "predict/hsic_bayesian_1.dat 42"
python regression_bayesian.py "data/features_bahsic_2.dat" "data/labels_2.dat" "predict/hsic_bayesian_2.dat 42"
python regression_bayesian.py "data/features_bahsic_3.dat" "data/labels_3.dat" "predict/hsic_bayesian_3.dat 42"
echo "regression bahsic dtree"
python regression_dtree.py "data/features_bahsic_0.dat" "data/labels_0.dat" "predict/hsic_dtree_0.dat 42"
python regression_dtree.py "data/features_bahsic_1.dat" "data/labels_1.dat" "predict/hsic_dtree_1.dat 42"
python regression_dtree.py "data/features_bahsic_2.dat" "data/labels_2.dat" "predict/hsic_dtree_2.dat 42"
python regression_dtree.py "data/features_bahsic_3.dat" "data/labels_3.dat" "predict/hsic_dtree_3.dat 42"
echo "regression bahsic svr"
python regression_svr.py "data/features_bahsic_0.dat" "data/labels_0.dat" "predict/hsic_svr_0.dat 42"
python regression_svr.py "data/features_bahsic_1.dat" "data/labels_1.dat" "predict/hsic_svr_1.dat 42"
python regression_svr.py "data/features_bahsic_2.dat" "data/labels_2.dat" "predict/hsic_svr_2.dat 42"
python regression_svr.py "data/features_bahsic_3.dat" "data/labels_3.dat" "predict/hsic_svr_3.dat 42"
echo "regression bahsic logistic"
python regression_logistic.py "data/features_bahsic_0.dat" "data/labels_0.dat" "predict/hsic_logistic_0.dat 42"
python regression_logistic.py "data/features_bahsic_1.dat" "data/labels_1.dat" "predict/hsic_logistic_1.dat 42"
python regression_logistic.py "data/features_bahsic_2.dat" "data/labels_2.dat" "predict/hsic_logistic_2.dat 42"
python regression_logistic.py "data/features_bahsic_3.dat" "data/labels_3.dat" "predict/hsic_logistic_3.dat 42"
#rrt
echo "regression rrt linear"
python regression_linear.py "data/features_rrt.dat" "data/labels_0.dat" "predict/rrt_linear_0.dat"
python regression_linear.py "data/features_rrt.dat" "data/labels_1.dat" "predict/rrt_linear_1.dat"
python regression_linear.py "data/features_rrt.dat" "data/labels_2.dat" "predict/rrt_linear_2.dat"
python regression_linear.py "data/features_rrt.dat" "data/labels_3.dat" "predict/rrt_linear_3.dat"
echo "regression rrt bayesian"
python regression_bayesian.py "data/features_rrt.dat" "data/labels_0.dat" "predict/rrt_bayesian_0.dat"
python regression_bayesian.py "data/features_rrt.dat" "data/labels_1.dat" "predict/rrt_bayesian_1.dat"
python regression_bayesian.py "data/features_rrt.dat" "data/labels_2.dat" "predict/rrt_bayesian_2.dat"
python regression_bayesian.py "data/features_rrt.dat" "data/labels_3.dat" "predict/rrt_bayesian_3.dat"
echo "regression rrt dtree"
python regression_dtree.py "data/features_rrt.dat" "data/labels_0.dat" "predict/rrt_dtree_0.dat"
python regression_dtree.py "data/features_rrt.dat" "data/labels_1.dat" "predict/rrt_dtree_1.dat"
python regression_dtree.py "data/features_rrt.dat" "data/labels_2.dat" "predict/rrt_dtree_2.dat"
python regression_dtree.py "data/features_rrt.dat" "data/labels_3.dat" "predict/rrt_dtree_3.dat"
echo "regression rrt svr"
python regression_svr.py "data/features_rrt.dat" "data/labels_0.dat" "predict/rrt_svr_0.dat"
python regression_svr.py "data/features_rrt.dat" "data/labels_1.dat" "predict/rrt_svr_1.dat"
python regression_svr.py "data/features_rrt.dat" "data/labels_2.dat" "predict/rrt_svr_2.dat"
python regression_svr.py "data/features_rrt.dat" "data/labels_3.dat" "predict/rrt_svr_3.dat"
echo "regression rrt logistic"
python regression_logistic.py "data/features_rrt.dat" "data/labels_0.dat" "predict/rrt_logistic_0.dat"
python regression_logistic.py "data/features_rrt.dat" "data/labels_1.dat" "predict/rrt_logistic_1.dat"
python regression_logistic.py "data/features_rrt.dat" "data/labels_2.dat" "predict/rrt_logistic_2.dat"
python regression_logistic.py "data/features_rrt.dat" "data/labels_3.dat" "predict/rrt_logistic_3.dat"
#sampled
echo "regression sampled linear"
python regression_linear.py "data/features_sampled.dat" "data/labels_0.dat" "predict/sampled_linear_0.dat"
python regression_linear.py "data/features_sampled.dat" "data/labels_1.dat" "predict/sampled_linear_1.dat"
python regression_linear.py "data/features_sampled.dat" "data/labels_2.dat" "predict/sampled_linear_2.dat"
python regression_linear.py "data/features_sampled.dat" "data/labels_3.dat" "predict/sampled_linear_3.dat"
echo "regression sampled bayesian"
python regression_bayesian.py "data/features_sampled.dat" "data/labels_0.dat" "predict/sampled_bayesian_0.dat"
python regression_bayesian.py "data/features_sampled.dat" "data/labels_1.dat" "predict/sampled_bayesian_1.dat"
python regression_bayesian.py "data/features_sampled.dat" "data/labels_2.dat" "predict/sampled_bayesian_2.dat"
python regression_bayesian.py "data/features_sampled.dat" "data/labels_3.dat" "predict/sampled_bayesian_3.dat"
echo "regression sampled dtree"
python regression_dtree.py "data/features_sampled.dat" "data/labels_0.dat" "predict/sampled_dtree_0.dat"
python regression_dtree.py "data/features_sampled.dat" "data/labels_1.dat" "predict/sampled_dtree_1.dat"
python regression_dtree.py "data/features_sampled.dat" "data/labels_2.dat" "predict/sampled_dtree_2.dat"
python regression_dtree.py "data/features_sampled.dat" "data/labels_3.dat" "predict/sampled_dtree_3.dat"
echo "regression sampled svr"
python regression_svr.py "data/features_sampled.dat" "data/labels_0.dat" "predict/sampled_svr_0.dat"
python regression_svr.py "data/features_sampled.dat" "data/labels_1.dat" "predict/sampled_svr_1.dat"
python regression_svr.py "data/features_sampled.dat" "data/labels_2.dat" "predict/sampled_svr_2.dat"
python regression_svr.py "data/features_sampled.dat" "data/labels_3.dat" "predict/sampled_svr_3.dat"
echo "regression sampled logistic"
python regression_logistic.py "data/features_sampled.dat" "data/labels_0.dat" "predict/sampled_logistic_0.dat"
python regression_logistic.py "data/features_sampled.dat" "data/labels_1.dat" "predict/sampled_logistic_1.dat"
python regression_logistic.py "data/features_sampled.dat" "data/labels_2.dat" "predict/sampled_logistic_2.dat"
python regression_logistic.py "data/features_sampled.dat" "data/labels_3.dat" "predict/sampled_logistic_3.dat"
+17
Ver Arquivo
@@ -0,0 +1,17 @@
#!/usr/bin/env python
# Copyright (c) 2004 National ICT Australia --- All Rights Reserved
# THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF SML.NICTA
# The copyright notice above does not evidence any
# actual or intended publication of this work.
#
# Authors: Le Song
# Last changed: 02/08/2006 (Christfried Webers)
import numpy
def setdiag0(K):
"""Set the diagonal entries of a square matrix to 0
"""
n = K.shape[0]
numpy.put(K, numpy.arange(n) * (n + 1), 0.0)
+24
Ver Arquivo
@@ -0,0 +1,24 @@
import time
import sys
import numpy
import vector
from sklearn.cross_validation import train_test_split
usage = "yolo"
if __name__ == "__main__":
if (len(sys.argv)!=4):
print usage
else:
file_x = sys.argv[1];
file_y = sys.argv[2];
file_y_test = sys.argv[3];
X = numpy.genfromtxt(file_x, delimiter=' ')
y = numpy.genfromtxt(file_y, delimiter=' ')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
numpy.savetxt(file_y_test, y_test)
numpy.savetxt("data/features_train.dat",X_train)
numpy.savetxt("data/features_test.dat",X_test)
+2008
Ver Arquivo
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff