Initial commit
Esse commit está contido em:
@@ -0,0 +1,48 @@
|
||||
dump/
|
||||
|
||||
*.dat
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
@@ -0,0 +1,22 @@
|
||||
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2014 Rishi Dua <rishirdua@gmail.com>, TV Ashok <veeranjaneyaashok@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
EMOTION CLASSIFICATION
|
||||
======================
|
||||
Feature selection via dependence maximization for EEG based emotion classification. This project has been done as a part of Neural Networks course for the fall 2014 Semester at IIT Delhi.
|
||||
|
||||
Supervisor: Dr. Jayadeva
|
||||
|
||||
Authors
|
||||
-------
|
||||
Rishi Dua <http://github.com/rishirdua>
|
||||
TV Ashok <http://github.com/tvashok>
|
||||
|
||||
Install
|
||||
-------
|
||||
1. Install Python and scikit
|
||||
2. Copy the DEAP dataset (cPickle preprocessed) to data/raw folder
|
||||
3. Run script.sh
|
||||
|
||||
Documentation
|
||||
-------------
|
||||
Refer docs/readme.pdf
|
||||
|
||||
Contribute
|
||||
----------
|
||||
- Source Code: https://github.com/rishirdua/emotion-classification/
|
||||
|
||||
License
|
||||
-------
|
||||
This project is licensed under the terms of the MIT license. See LCENCE.txt for details
|
||||
+190
@@ -0,0 +1,190 @@
|
||||
# Copyright (c) 2006, National ICT Australia
|
||||
# All rights reserved.
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the 'License'); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an 'AS IS' basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# Authors: Le Song (lesong@it.usyd.edu.au)
|
||||
# Created: (20/10/2006)
|
||||
# Last Updated: (dd/mm/yyyy)
|
||||
#
|
||||
|
||||
##\package elefant.fselection.bahsic
|
||||
# This module perform backward elimination for feature selection
|
||||
# using HSIC (BAHSIC).
|
||||
#
|
||||
# The algorithm proceeds recursively, eliminating the least
|
||||
# relevant features and adding them to the eliminated list
|
||||
# in each iteration. For more theoretical underpinning see the
|
||||
# following reference for more information:
|
||||
#
|
||||
# Le Song, Justin Bedo, Karsten M. Borgwardt, Arthur Gretton
|
||||
# and Alex Smola. The BAHSIC family of gene selection algorithms.
|
||||
#
|
||||
|
||||
__version__ = '$Revision: $'
|
||||
# $Source$
|
||||
|
||||
import numpy
|
||||
from scipy import optimize
|
||||
|
||||
import vector
|
||||
from hsic import CHSIC
|
||||
from setdiag0 import setdiag0
|
||||
|
||||
|
||||
## Class that perform backward elimination for feature selection (BAHSIC).
|
||||
#
|
||||
# It has two version of BAHSIC: one without optimization over the kernel
|
||||
# parameters and one with optimization over the kernel parameters.
|
||||
#
|
||||
class CBAHSIC(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
## BAHSIC with optimization over the kernel parameters.
|
||||
# @param x The data.
|
||||
# @param y The labels.
|
||||
# @param kernelx The kernel on the data.
|
||||
# @param kernely The kernel on the labels.
|
||||
# @param flg3 The number of desired features.
|
||||
# @param flg4 The proportion of features eleminated in each iteration.
|
||||
#
|
||||
def BAHSICOpt(self, x, y, kernelx, kernely, flg3, flg4):
|
||||
assert len(x.shape) == 2, 'Argument 1 has wrong shape'
|
||||
assert len(y.shape) == 2, 'Argument 2 has wrong shape'
|
||||
assert x.shape[0] == y.shape[0], \
|
||||
'Argument 1 and 2 have different number of data points'
|
||||
|
||||
print '--initializing...'
|
||||
hsic = CHSIC()
|
||||
|
||||
L = kernely.Dot(y, y)
|
||||
setdiag0(L)
|
||||
sL = numpy.sum(L, axis=1)
|
||||
ssL = numpy.sum(sL)
|
||||
|
||||
n = x.shape
|
||||
eliminatedI = []
|
||||
selectedI = set(numpy.arange(n[1]))
|
||||
|
||||
kernelx.CreateCacheKernel(x)
|
||||
sga = kernelx._typicalParam
|
||||
sgaN = sga.shape
|
||||
sgaN = sgaN[0]
|
||||
|
||||
while True:
|
||||
selectedI = selectedI - set(eliminatedI)
|
||||
sI = numpy.array([j for j in selectedI])
|
||||
m = len(sI)
|
||||
|
||||
print m
|
||||
if (m == 1):
|
||||
eliminatedI.append(selectedI.pop())
|
||||
break
|
||||
|
||||
sgaMat = []
|
||||
hsicMat = []
|
||||
for k in range(sgaN):
|
||||
## bfgs in scipy is not working here
|
||||
retval = optimize.fmin_cg(hsic.ObjUnBiasedHSIC, \
|
||||
sga[[k],].ravel(), \
|
||||
hsic.GradUnBiasedHSIC,\
|
||||
args=[x, kernelx, L, sL, ssL], \
|
||||
gtol=1e-6, maxiter=100, \
|
||||
full_output=True, disp=False)
|
||||
sgaMat.append(retval[0])
|
||||
hsicMat.append(retval[1])
|
||||
|
||||
k = numpy.argmin(hsicMat)
|
||||
sga0 = sgaMat[k]
|
||||
|
||||
objj = []
|
||||
for j in selectedI:
|
||||
K = kernelx.DecDotCacheKernel(x, x[:,[j]], sga0)
|
||||
setdiag0(K)
|
||||
objj.append(hsic.UnBiasedHSICFast(K, L, sL, ssL))
|
||||
|
||||
if m > flg3:
|
||||
maxj = numpy.argsort(objj)
|
||||
num = int(flg4 * m)+1
|
||||
if m - num <= flg3:
|
||||
num = m - flg3
|
||||
maxj = maxj[m:m-num-1:-1]
|
||||
else:
|
||||
maxj = numpy.array([numpy.argmax(objj)])
|
||||
|
||||
j = numpy.take(sI,maxj)
|
||||
eliminatedI.extend(j)
|
||||
kernelx.DecCacheKernel(x, x[:,j])
|
||||
|
||||
kernelx.ClearCacheKernel(x)
|
||||
return eliminatedI
|
||||
|
||||
## BAHSIC without optimization over the kernel parameters.
|
||||
# @param x The data.
|
||||
# @param y The labels.
|
||||
# @param kernelx The kernel on the data.
|
||||
# @param kernely The kernel on the labels.
|
||||
# @param flg3 The number of desired features.
|
||||
# @param flg4 The proportion of features eleminated in each iteration.
|
||||
#
|
||||
def BAHSICRaw(self, x, y, kernelx, kernely, flg3, flg4):
|
||||
assert len(x.shape) == 2, 'Argument 1 has wrong shape'
|
||||
assert len(y.shape) == 2, 'Argument 2 has wrong shape'
|
||||
assert x.shape[0] == y.shape[0], \
|
||||
'Argument 1 and 2 have different number of data points'
|
||||
|
||||
print '--initializing...'
|
||||
hsic = CHSIC()
|
||||
|
||||
L = kernely.Dot(y, y)
|
||||
setdiag0(L)
|
||||
|
||||
sL = numpy.sum(L, axis=1)
|
||||
ssL = numpy.sum(sL)
|
||||
|
||||
n = x.shape
|
||||
eliminatedI = []
|
||||
selectedI = set(numpy.arange(n[1]))
|
||||
|
||||
kernelx.CreateCacheKernel(x)
|
||||
|
||||
while True:
|
||||
selectedI = selectedI - set(eliminatedI)
|
||||
sI = numpy.array([j for j in selectedI])
|
||||
m = len(sI)
|
||||
|
||||
print m
|
||||
if (m == 1):
|
||||
eliminatedI.append(selectedI.pop())
|
||||
break
|
||||
|
||||
objj = []
|
||||
for j in selectedI:
|
||||
K = kernelx.DecDotCacheKernel(x, x[:,[j]])
|
||||
setdiag0(K)
|
||||
objj.append(hsic.UnBiasedHSICFast(K, L, sL, ssL))
|
||||
|
||||
if m > flg3:
|
||||
maxj = numpy.argsort(objj)
|
||||
num = int(flg4 * m)+1
|
||||
if m-num <= flg3:
|
||||
num = m - flg3
|
||||
maxj = maxj[m:m-num-1:-1]
|
||||
else:
|
||||
maxj = numpy.array([numpy.argmax(objj)])
|
||||
|
||||
j = numpy.take(sI,maxj)
|
||||
eliminatedI.extend(j)
|
||||
kernelx.DecCacheKernel(x, x[:,j])
|
||||
|
||||
kernelx.ClearCacheKernel(x)
|
||||
return eliminatedI
|
||||
@@ -0,0 +1,46 @@
|
||||
import cPickle
|
||||
import os.path
|
||||
from multiprocessing import Pool
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
nLabel, nTrial, nUser, nChannel, nTime = 4, 40, 32, 40, 8064
|
||||
#new_array = [[[None] *w for i in range(h)] for j in range(l)]
|
||||
print "Program started"+"\n"
|
||||
fout_data = open("data/features_raw.dat",'w')
|
||||
fout_labels0 = open("data/labels_0.dat",'w')
|
||||
fout_labels1 = open("data/labels_1.dat",'w')
|
||||
fout_labels2 = open("data/labels_2.dat",'w')
|
||||
fout_labels3 = open("data/labels_3.dat",'w')
|
||||
for i in range(nUser):#4, 40, 32, 40, 8064
|
||||
if(i%8 == 0):
|
||||
if i < 10:
|
||||
name = '%0*d' % (2,i+1)
|
||||
else:
|
||||
name = i+1
|
||||
fname = "data/raw/s"+str(name)+".dat"
|
||||
x = cPickle.load(open(fname, 'rb'))
|
||||
print fname
|
||||
for tr in range(nTrial):
|
||||
if(tr%1 == 0):
|
||||
for dat in range(nTime):
|
||||
if(dat%32 == 0):
|
||||
for ch in range(nChannel):
|
||||
#fout_data.write(str(ch+1) + " ");
|
||||
fout_data.write(str(x['data'][tr][ch][dat]) + " ");
|
||||
fout_labels0.write(str(x['labels'][tr][0]) + "\n");
|
||||
fout_labels1.write(str(x['labels'][tr][1]) + "\n");
|
||||
fout_labels2.write(str(x['labels'][tr][2]) + "\n");
|
||||
fout_labels3.write(str(x['labels'][tr][3]) + "\n");
|
||||
fout_data.write("\n");
|
||||
fout_labels0.close()
|
||||
fout_labels1.close()
|
||||
fout_labels2.close()
|
||||
fout_labels3.close()
|
||||
fout_data.close()
|
||||
|
||||
print "\n"+"Print Successful"
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Arquivo binário não exibido.
@@ -0,0 +1,56 @@
|
||||
import time
|
||||
import sys
|
||||
import numpy
|
||||
import vector
|
||||
from bahsic import CBAHSIC
|
||||
|
||||
usage = "yolo"
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if (len(sys.argv)<4):
|
||||
print usage
|
||||
else:
|
||||
file_x = sys.argv[1];
|
||||
file_y = sys.argv[2];
|
||||
file_out = sys.argv[3];
|
||||
if (sys.argv==5):
|
||||
file_normalized = sys.argv[5]
|
||||
|
||||
X = numpy.genfromtxt(file_x, delimiter=' ')
|
||||
y = numpy.genfromtxt(file_y, delimiter=' ')
|
||||
|
||||
bahsic = CBAHSIC()
|
||||
data_no = 160
|
||||
features_tokeep = 5040
|
||||
y.shape = (data_no,1)
|
||||
|
||||
# Normalize the labels.
|
||||
y = 1.0*y
|
||||
tmp_no = numpy.sum(y)
|
||||
pno = (data_no + tmp_no) / 2
|
||||
nno = (data_no - tmp_no) / 2
|
||||
y[y>0] = y[y>0]/pno
|
||||
y[y<0] = y[y<0]/nno
|
||||
|
||||
# Normalize the data.
|
||||
m = X.mean(0)
|
||||
s = X.std(0)
|
||||
X.__isub__(m).__idiv__(s)
|
||||
|
||||
t1 = time.clock()
|
||||
tmp = bahsic.BAHSICRaw(X, y, vector.CLinearKernel(), vector.CLinearKernel(), features_tokeep, 0.1)
|
||||
t2 = time.clock()
|
||||
print "time taken: "+str(t2-t1)
|
||||
print '--rank of the features'
|
||||
print '--better features towards the end of the list:'
|
||||
print tmp
|
||||
|
||||
hsicfeatures= numpy.zeros(shape=(data_no,features_tokeep))
|
||||
for i in range(0,data_no):
|
||||
for j in range(0,features_tokeep):
|
||||
hsicfeatures[i][j] = X[i][tmp[features_tokeep+j]]
|
||||
|
||||
numpy.savetxt(file_out, hsicfeatures)
|
||||
if (sys.argv==5):
|
||||
numpy.savetxt('original.csv', X)
|
||||
@@ -0,0 +1,39 @@
|
||||
|
||||
|
||||
%% DESCRIPTION
|
||||
|
||||
clc;
|
||||
clear all;
|
||||
|
||||
% Transforms features into Recht and Rahimi’s Random Fourier Feature as defined in:
|
||||
% Rahimi, Ali, and Benjamin Recht. "Random features for large-scale kernel machines." In Advances in neural information processing systems, pp. 1177-1184. 2007.
|
||||
|
||||
|
||||
X = dlmread('data/features_raw.dat', ' ');
|
||||
|
||||
n_features = size(X,2);
|
||||
n_data = size(X,1);
|
||||
|
||||
gamma_inv = 0.1;
|
||||
gamma = 1/gamma_inv;
|
||||
sigma = sqrt(2/gamma_inv);
|
||||
n_randomfeatures = 5040;
|
||||
%calculate
|
||||
W=normrnd(0,sigma,n_features,n_randomfeatures);
|
||||
b=2*pi*rand(1,n_randomfeatures);
|
||||
B = ones(n_data,1)*(b);
|
||||
X_rrt = sqrt(2/n_randomfeatures)*cos(X*W+B);
|
||||
disp('calculated');
|
||||
|
||||
%normalize
|
||||
%mean_tr = mean(Data_new);
|
||||
%std_tr = std(Data_new);
|
||||
%Data_new = (Data_new-repmat(mean_tr,n_data,1))./(repmat(std_tr,n_data,1));
|
||||
%disp('normalized');
|
||||
%toc;
|
||||
|
||||
%write
|
||||
|
||||
dlmwrite('data/features_rrt.dat',X_rrt, ' ');
|
||||
|
||||
disp('writen to file');
|
||||
@@ -0,0 +1,36 @@
|
||||
import cPickle
|
||||
import os.path
|
||||
from multiprocessing import Pool
|
||||
import sys
|
||||
|
||||
usage = "filename out_file"
|
||||
|
||||
def generate_features(fout_file):
|
||||
nLabel, nTrial, nUser, nChannel, nTime = 4, 40, 32, 40, 8064
|
||||
#new_array = [[[None] *w for i in range(h)] for j in range(l)]
|
||||
print "Program started"+"\n"
|
||||
fout_data = open(fout_file,'w')
|
||||
for i in range(nUser):#4, 40, 32, 40, 8064
|
||||
if(i%8 == 0):
|
||||
if i < 10:
|
||||
name = '%0*d' % (2,i+1)
|
||||
else:
|
||||
name = i+1
|
||||
fname = "data/raw/s"+str(name)+".dat"
|
||||
x = cPickle.load(open(fname, 'rb'))
|
||||
print fname
|
||||
for tr in range(nTrial):
|
||||
if(tr%1 == 0):
|
||||
for dat in range(nTime):
|
||||
if(dat%64 == 0):
|
||||
for ch in range(nChannel):
|
||||
#fout_data.write(str(ch+1) + " ");
|
||||
fout_data.write(str(x['data'][tr][ch][dat]) + " ");
|
||||
fout_data.write("\n");
|
||||
fout_data.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
if (len(sys.argv)!=2):
|
||||
print usage
|
||||
else:
|
||||
generate_features(sys.argv[1])
|
||||
+168
@@ -0,0 +1,168 @@
|
||||
# Copyright (c) 2006, National ICT Australia
|
||||
# All rights reserved.
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# Authors: Le Song (lesong@it.usyd.edu.au) and Alex Smola
|
||||
# (alex.smola@nicta.com.au)
|
||||
# Created: (20/10/2006)
|
||||
# Last Updated: (dd/mm/yyyy)
|
||||
#
|
||||
|
||||
##\package elefant.kernels.generic
|
||||
# This module contains generic class for kernels
|
||||
#
|
||||
# The CKernel class provides common interface for all kernel classes. Note
|
||||
# that it should never be instantiated.
|
||||
#
|
||||
|
||||
__version__ = "$Revision: $"
|
||||
# $Source$
|
||||
|
||||
import numpy
|
||||
import numpy.random as random
|
||||
|
||||
## Generic kernel class
|
||||
#
|
||||
# This kernel provide common interface for all kernels. This interface
|
||||
# includes the following key kernel manipulations (functions):
|
||||
# --Dot(x1, x2): $K(x1, x2)$
|
||||
# --Expand(x1, x2, alpha): $sum_r K(x1_i,x2_r) \times alpha2_r$
|
||||
# --Tensor(x1, y1, x2, y2): $K(x1_i,x2_j) \times (y1_i \times y1_j)$
|
||||
# --TensorExpand(x1, y1, x2, y2, alpha2):
|
||||
# $sum_r K(x1_i,x2_r) \times (y1_i \times y1_r) \times alpha2_r$
|
||||
# --Remember(x): Remember data x
|
||||
# --Forget(x): Remove remembered data x
|
||||
# To design a specific kernel, simply overload these methods. The generic
|
||||
# kernel itself should never be instantiated.
|
||||
#
|
||||
class CKernel(object):
|
||||
def __init__(self, blocksize=128):
|
||||
## @var _blocksize
|
||||
# Parameter that determines the size of each block when computing the
|
||||
# kernel matrix in blocks. Properly blocking the kernel matrix during
|
||||
# computation improves the speed.
|
||||
#
|
||||
self._blocksize = blocksize
|
||||
## @var _name
|
||||
# Name of the kernel.
|
||||
#
|
||||
self._name = "Generic kernel"
|
||||
## @var _cacheData
|
||||
# Cache that stores data that have appeared before.
|
||||
#
|
||||
self._cacheData = {}
|
||||
|
||||
def __str__(self):
|
||||
return self._name
|
||||
|
||||
def __repr__(self):
|
||||
return "Kernel object of type '" + self._name + "'"
|
||||
|
||||
## Compute the kernel between two data points x1 and x2.
|
||||
# It returns a scale value of dot product between x1 and x2.
|
||||
# @param x1 [read] The first data point.
|
||||
# @param x2 [read] The second data point.
|
||||
#
|
||||
def K(self, x1, x2):
|
||||
raise NotImplementedError, \
|
||||
'CKernel.K in abstract class is not implemented'
|
||||
|
||||
## Compute the kernel between the data points in x1 and those in x2.
|
||||
# It returns a matrix with entry $(ij)$ equal to $K(x1_i, x1_j)$.
|
||||
# If index1/index2 is
|
||||
# specified, only those data points in x1/x2 with indices corresponding
|
||||
# to index1/index2 are used to compute the kernel matrix. Furthermore,
|
||||
# if output is specified, the provided buffer is used explicitly to
|
||||
# store the kernel matrix.
|
||||
# @param x1 [read] The first set of data points.
|
||||
# @param x2 [read] The second set of data points.
|
||||
# @param index1 [read] The indices into the first set of data points.
|
||||
# @param index2 [read] The indices into the second set of data points.
|
||||
# @param output [write] The buffer where the output matrix is written into.
|
||||
#
|
||||
def Dot(self, x1, x2, index1=None, index2=None, output=None):
|
||||
raise NotImplementedError, \
|
||||
'CKernel.Dot in abstract class is not implemented'
|
||||
|
||||
## Compute the kernel between the data points in x1 and those in x2,
|
||||
# then multiply the resulting kernel matrix by alpha2.
|
||||
# It returns a matrix with entry $(ij)$ equal to
|
||||
# $sum_r K(x1_i,x2_r) \times alpha2_r$.
|
||||
# Other parameters are defined similarly as those in Dot.
|
||||
# @param x1 [read] The first set of data points.
|
||||
# @param x2 [read] The second set of data points.
|
||||
# @param alpha2 [read] The set of coefficients.
|
||||
# @param index1 [read] The indices into the first set of data points.
|
||||
# @param index2 [read] The indices into the second set of data points.
|
||||
# @param output [write] The buffer where the output matrix is written into.
|
||||
#
|
||||
def Expand(self, x1, x2, alpha2, index1=None, index2=None, output=None):
|
||||
raise NotImplementedError, \
|
||||
'CKernel.Expand in abstract class is not implemented'
|
||||
|
||||
## Compute the kernel between the data points in x1 and those in x2,
|
||||
# then multiply the resulting kernel matrix elementwiesely by the
|
||||
# the outer-product matrix between y1 and y2. It returns a matrix
|
||||
# with entry $(ij)$ equal to $K(x1_i,x2_j) \times (y1_i \times y1_j)$.
|
||||
# Other parameters are defined similarly as those in Dot.
|
||||
# @param x1 [read] The first set of data points.
|
||||
# @param y1 [read] The first set of labels.
|
||||
# @param x2 [read] The second set of data points.
|
||||
# @param y2 [read] The second set of labels.
|
||||
# @param index1 [read] The indices into the first set of data points.
|
||||
# @param index2 [read] The indices into the second set of data points.
|
||||
# @param output [write] The buffer where the output matrix is written into.
|
||||
#
|
||||
def Tensor(self, x1, y1, x2, y2, index1=None, index2=None, output=None):
|
||||
raise NotImplementedError, \
|
||||
'CKernel.Tensor in abstract class is not implemented'
|
||||
|
||||
## Compute the kernel between the data points in x1 and those in x2,
|
||||
# then multiply the resulting kernel matrix elementwiesely by the
|
||||
# the outer-product matrix between y1 and y2, and final multiply
|
||||
# the resulting matrix by alpha2. It returns a matrix with entry $(ij)$
|
||||
# equal to $sum_r K(x1_i,x2_r) \times (y1_i \times y1_r) \times alpha2_r$.
|
||||
# Other parameters are defined similarly as those in Dot.
|
||||
# @param x1 [read] The first set of data points.
|
||||
# @param y1 [read] The first set of labels.
|
||||
# @param x2 [read] The second set of data points.
|
||||
# @param y2 [read] The second set of labels.
|
||||
# @param index1 [read] The indices into the first set of data points.
|
||||
# @param index2 [read] The indices into the second set of data points.
|
||||
# @param output [write] The buffer where the output matrix is written into.
|
||||
#
|
||||
def TensorExpand(self, x1, y1, x2, y2, alpha2, index1=None, index2=None, \
|
||||
output=None):
|
||||
raise NotImplementedError, \
|
||||
'CKernel.TensorExpand in abstract class is not implemented'
|
||||
|
||||
## Remember the data by performing necessary preprossing on
|
||||
# the data, storing it in the cache and indexing it by the id of
|
||||
# the data. The preprocessing can be defined differently for
|
||||
# different classes. If the data have already been remembered,
|
||||
# the old stored information is simply overwritten.
|
||||
# @param x [read] The data to be remembered.
|
||||
#
|
||||
def Remember(self, x):
|
||||
raise NotImplementedError, \
|
||||
'CKernel.Remember in abstract class is not implemented'
|
||||
|
||||
## Remove a remembered data from the cache. If x is not given, then
|
||||
# all the data remembered in the cache will be removed. If a given
|
||||
# x is not remembered beforehand, False is returned; otherwise, True
|
||||
# is returned.
|
||||
# @param x [read] The data to be removed.
|
||||
#
|
||||
def Forget(self, x=None):
|
||||
raise NotImplementedError, \
|
||||
'CKernel.Forget in abstract class is not implemented'
|
||||
|
||||
+267
@@ -0,0 +1,267 @@
|
||||
# Copyright (c) 2006, National ICT Australia
|
||||
# All rights reserved.
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the 'License'); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an 'AS IS' basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# Authors: Le Song (lesong@it.usyd.edu.au)
|
||||
# Created: (20/10/2006)
|
||||
# Last Updated: (dd/mm/yyyy)
|
||||
#
|
||||
|
||||
##\package elefant.fselection.hsic
|
||||
# This module perform computation related to Hilber-Schmidt Independence
|
||||
# Criterion. Hilber-Schmidt Independence Criterion is short for HSIC.
|
||||
#
|
||||
# HISC is defined as $HSIC=\frac{1}{m}Tr(KHLH)$, where $kMat$ and $lMat$
|
||||
# are the kernel matrices for the data and the labels respectively.
|
||||
# $H=I-\frac{1}{m}\delta_{ij}$, where $m$ is the number of data points,
|
||||
# is the centering matrix. The unbiased estimator of HSIC is computed as
|
||||
# $HSIC=\frac{1}{m(m-3)}\left[Tr(KL)+\frac{1}{(m-1)(m-2)}1^\top K11^\top L1
|
||||
# -\frac{2}{m-2}1^\top KL1\right]. For more theorectical underpinning
|
||||
# of HSIC, see the following reference:
|
||||
#
|
||||
# Gretton, A., O. Bousquet, A. Smola and B. Schoelkopf: Measuring
|
||||
# Statistical Dependence with Hilbert-Schmidt Norms. Algorithmic
|
||||
# Learning Theory: 16th International Conference, ALT 2005, 63-78, 2005.
|
||||
#
|
||||
|
||||
__version__ = '$Revision: $'
|
||||
# $Source$
|
||||
|
||||
import numpy
|
||||
import vector
|
||||
from setdiag0 import setdiag0
|
||||
|
||||
## Class that perform computation related to HSIC.
|
||||
#
|
||||
# It contains function that computes biased and unbiased HSIC, part of HSIC
|
||||
# necessary for faster its faster computation, and functions that enable
|
||||
# an optimization on HSIC with respect to the kernel parameters.
|
||||
#
|
||||
class CHSIC(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
## Compute HLH give the labels.
|
||||
# @param y The labels.
|
||||
# @param kernely The kernel on the labels, default to linear kernel.
|
||||
#
|
||||
def ComputeHLH(self, y, kernely=vector.CLinearKernel()):
|
||||
ny = y.shape
|
||||
if len(ny) > 1:
|
||||
lMat = kernely.Dot(y, y)
|
||||
else:
|
||||
lMat = numpy.outerproduct(y, y)
|
||||
|
||||
sL = numpy.sum(lMat, axis=1)
|
||||
ssL = numpy.sum(sL)
|
||||
# hlhMat
|
||||
return lMat - numpy.add.outer(sL, sL)/ny[0] + ssL/(ny[0]*ny[0])
|
||||
|
||||
## Compute the biased estimator of HSIC.
|
||||
# @param x The data.
|
||||
# @param y The labels.
|
||||
# @param kernelx The kernel on the data, default to linear kernel.
|
||||
# @param kernely The kernel on the labels, default to linear kernel.
|
||||
#
|
||||
def BiasedHSIC(self, x, y, kernelx=vector.CLinearKernel(), \
|
||||
kernely=vector.CLinearKernel()):
|
||||
nx = x.shape
|
||||
ny = y.shape
|
||||
assert nx[0] == ny[0], \
|
||||
"Argument 1 and 2 have different number of data points"
|
||||
|
||||
if len(nx) > 1:
|
||||
kMat = kernelx.Dot(x, x)
|
||||
else:
|
||||
kMat = numpy.outerproduct(x, x)
|
||||
|
||||
hlhMat = ComputeHLH(y, kernely)
|
||||
return numpy.sum(numpy.sum(kMat * hlhMat)) / ((nx[0]-1)*(nx[0]-1))
|
||||
|
||||
## Objective of the biased HSIC when performing optimization over
|
||||
# the kernel parameters.
|
||||
# @param param The kernel parameters.
|
||||
# @param x The data.
|
||||
# @param kernelx The kernel on the data.
|
||||
# @param hlhMat The HLH matrix on the labels.
|
||||
#
|
||||
def ObjBiasedHSIC(self, param, x, kernelx, hlhMat):
|
||||
nx = x.shape
|
||||
kMat = kernelx.DotCacheKernel(x, param)
|
||||
return -numpy.sum(numpy.sum(kMat * hlhMat)) / ((nx[0]-1)*(nx[0]-1))
|
||||
|
||||
## Gradient of the objective of the biased HSIC when performing
|
||||
# optimization over the kernel parameters.
|
||||
# @param param The kernel parameters.
|
||||
# @param x The data.
|
||||
# @param kernelx The kernel on the data.
|
||||
# @param hlhMat The HLH matrix on the labels.
|
||||
#
|
||||
def GradBiasedHISC(self, param, x, kernelx, hlhMat):
|
||||
nx = x.shape
|
||||
kMat = kernelx.GradDotCacheKernel(x, param)
|
||||
return -numpy.sum(numpy.sum(kMat * hlhMat)) / ((nx[0]-1)*(nx[0]-1))
|
||||
|
||||
## Fast computation of the biased HSIC when the kernel matrix
|
||||
# for the data and the HLH matrix for the labels are already
|
||||
# computed.
|
||||
# @param kMat The kernel matrix for the data.
|
||||
# @param hlhMat The HLH matrix for the labels.
|
||||
#
|
||||
def BiasedHSICFast(self, kMat, hlhMat):
|
||||
nx = kMat.shape
|
||||
assert (kMat.shape == hlhMat.shape), \
|
||||
"Argument 1 and 2 have different shapes"
|
||||
|
||||
return (kMat * hlhMat).sum() / ((nx[0]-1)*(nx[0]-1))
|
||||
|
||||
## Fast computation of the biased HSIC when the kernel matrix
|
||||
# for the labels can be decomposed into HLH = y * y' and the
|
||||
# rank of y is low
|
||||
# @param kMat The kernel matrix for the data.
|
||||
# @param y The HLH = y * y' for the labels.
|
||||
#
|
||||
def BiasedHSICFast2(self, kMat, y):
|
||||
nx = kMat.shape
|
||||
assert (kMat.shape[0] == y.shape[0]), \
|
||||
"Argument 1 and 2 have different shapes"
|
||||
|
||||
return numpy.dot(y.T, numpy.dot(kMat, y)).trace() / ((nx[0]-1)*(nx[0]-1))
|
||||
|
||||
## Fast computation of the biased HSIC when the kernel matrix
|
||||
# for the data K can be decomposed into K = x * x' and that
|
||||
# for the labels can be decomposed into HLH = y * y' and the
|
||||
# rank of y is low (this will be useful after incomplete cholesky
|
||||
# factorization
|
||||
# @param x The K = x * x' for the data.
|
||||
# @param y The HLH = y * y' for the labels.
|
||||
#
|
||||
def BiasedHSICFast3(self, x, y):
|
||||
nx = x.shape
|
||||
assert (x.shape[0] == y.shape[0]), \
|
||||
"Argument 1 and 2 have different shapes"
|
||||
|
||||
return (numpy.dot(x.T, y)**2).sum() / ((nx[0]-1)*(nx[0]-1))
|
||||
|
||||
|
||||
## Compute the UNbiased estimator of HSIC.
|
||||
# @param x The data.
|
||||
# @param y The labels.
|
||||
# @param kernelx The kernel on the data, default to linear kernel.
|
||||
# @param kernely The kernel on the labels, default to linear kernel.
|
||||
#
|
||||
def UnBiasedHSIC(self, x, y, kernelx=vector.CLinearKernel(), \
|
||||
kernely=vector.CLinearKernel()):
|
||||
nx = x.shape
|
||||
ny = y.shape
|
||||
assert nx[0] == ny[0], \
|
||||
"Argument 1 and 2 have different number of data points"
|
||||
|
||||
kMat = kernelx.Dot(x,x)
|
||||
setdiag0(kMat)
|
||||
|
||||
lMat = kernely.Dot(y,y)
|
||||
setdiag0(lMat)
|
||||
|
||||
sK = kMat.sum(axis=1)
|
||||
ssK = sK.sum()
|
||||
sL = lMat.sum(axis=1)
|
||||
ssL = sL.sum()
|
||||
|
||||
return ( kMat.__imul__(lMat).sum() + \
|
||||
(ssK*ssL)/((nx[0]-1)*(nx[0]-2)) - \
|
||||
2 * sK.__imul__(sL).sum() / (nx[0]-2) \
|
||||
) / (nx[0]*(nx[0]-3))
|
||||
|
||||
## Objective of the UNbiased HSIC when performing optimization over
|
||||
# the kernel parameters.
|
||||
# @param param The kernel parameters.
|
||||
# @param x The data.
|
||||
# @param kernelx The kernel on the data.
|
||||
# @param lMat The kernel matrix of the label.
|
||||
# @param sL The vector of the sum of each row of lMat.
|
||||
# @param ssL The vector of the sum of all entries in lMat.
|
||||
#
|
||||
def ObjUnBiasedHSIC(self, param, x, kernelx, lMat, sL, ssL):
|
||||
nx = x.shape
|
||||
kMat = kernelx.DotCacheKernel(x, param)
|
||||
sK = numpy.sum(kMat, axis=1)
|
||||
ssK = numpy.sum(sK)
|
||||
|
||||
return -( numpy.sum(numpy.sum(kMat*lMat)) \
|
||||
+ (ssK*ssL)/((nx[0]-1)*(nx[0]-2)) \
|
||||
- 2*numpy.sum(sK*sL)/(nx[0]-2) \
|
||||
) / (nx[0]*(nx[0]-3))
|
||||
|
||||
|
||||
## Gradient of the objective of the UNbiased HSIC when performing
|
||||
# optimization over the kernel parameters.
|
||||
# @param param The kernel parameters.
|
||||
# @param x The data.
|
||||
# @param kernelx The kernel on the data.
|
||||
# @param lMat The kernel matrix of the label.
|
||||
# @param sL The vector of the sum of each row of lMat.
|
||||
# @param ssL The vector of the sum of all entries in lMat.
|
||||
#
|
||||
def GradUnBiasedHSIC(self, param, x, kernelx, lMat, sL, ssL):
|
||||
nx = x.shape
|
||||
kMat = kernelx.GradDotCacheKernel(x, param)
|
||||
sK = numpy.sum(kMat, axis=1)
|
||||
ssK = numpy.sum(sK)
|
||||
|
||||
return -( numpy.sum(numpy.sum(kMat*lMat)) \
|
||||
+ (ssK*ssL)/((nx[0]-1)*(nx[0]-2)) \
|
||||
- 2*numpy.sum(sK*sL)/(nx[0]-2) \
|
||||
) / (nx[0]*(nx[0]-3))
|
||||
|
||||
## Fast computation of the biased HSIC when the kernel matrix
|
||||
# for the data and the HLH matrix for the labels are already
|
||||
# computed.
|
||||
# @param kMat The kernel matrix for the data.
|
||||
# @param lMat The kernel matrix of the label.
|
||||
# @param sL The vector of the sum of each row of lMat.
|
||||
# @param ssL The vector of the sum of all entries in lMat.
|
||||
#
|
||||
def UnBiasedHSICFast(self, kMat, lMat, sL, ssL):
|
||||
nx = kMat.shape
|
||||
assert (kMat.shape == lMat.shape), \
|
||||
"Argument 1 and 2 have different shapes"
|
||||
|
||||
sK = numpy.sum(kMat, axis=1)
|
||||
ssK = numpy.sum(sK)
|
||||
|
||||
return ( numpy.sum(numpy.sum(kMat*lMat)) \
|
||||
+ (ssK*ssL)/((nx[0]-1)*(nx[0]-2)) \
|
||||
- 2*numpy.sum(sK*sL)/(nx[0]-2) \
|
||||
) / (nx[0]*(nx[0]-3))
|
||||
|
||||
## Normalize each dimension of the data separately to zero mean and unit
|
||||
# standard deviation.
|
||||
# @param data [read\write] The data to be normalized. Each row is a
|
||||
# datum and each column a dimension.
|
||||
#
|
||||
def normalize(data):
|
||||
m = data.mean(axis=0)
|
||||
s = data.std(axis=0)
|
||||
data.__isub__(m).__itruediv__(s)
|
||||
|
||||
## Center the kernel matrix in the feature space.
|
||||
# @param k [read\write] The kernel matrix to be centered.
|
||||
#
|
||||
def center(k):
|
||||
n = k.shape
|
||||
assert n[0] == n[1], 'k must be symmetric and positive semidefinite'
|
||||
mk = k.mean(axis=1)
|
||||
mk.shape = (n[0], 1)
|
||||
mmk = mk.mean()
|
||||
k.__isub__(mk).__isub__(mk.T).__iadd__(mmk)
|
||||
|
||||
+30
@@ -0,0 +1,30 @@
|
||||
regression hsic linear
|
||||
regression_linear.py data/features_hsic_0.dat data/labels_0.dat data/predict_hsic_0.dat 0.0462957047621
|
||||
regression_linear.py data/features_hsic_1.dat data/labels_1.dat data/predict_hsic_1.dat 0.0619267723098
|
||||
regression_linear.py data/features_hsic_2.dat data/labels_2.dat data/predict_hsic_2.dat 0.119146367285
|
||||
regression_linear.py data/features_hsic_3.dat data/labels_3.dat data/predict_hsic_3.dat 0.108100811067
|
||||
regression hsic bayesian
|
||||
regression_bayesian.py data/features_hsic_0.dat data/labels_0.dat data/predict_hsic_0.dat 0.0574420849846
|
||||
regression_bayesian.py data/features_hsic_1.dat data/labels_1.dat data/predict_hsic_1.dat 0.0744427035158
|
||||
regression_bayesian.py data/features_hsic_2.dat data/labels_2.dat data/predict_hsic_2.dat 0.131973261761
|
||||
regression_bayesian.py data/features_hsic_3.dat data/labels_3.dat data/predict_hsic_3.dat 0.1292239022
|
||||
regression hsic dtree
|
||||
regression_dtree.py data/features_hsic_0.dat data/labels_0.dat data/predict_hsic_0.dat 0.0616359499887
|
||||
regression_dtree.py data/features_hsic_1.dat data/labels_1.dat data/predict_hsic_1.dat 0.0870560437385
|
||||
regression_dtree.py data/features_hsic_2.dat data/labels_2.dat data/predict_hsic_2.dat 0.225990113056
|
||||
regression_dtree.py data/features_hsic_3.dat data/labels_3.dat data/predict_hsic_3.dat 0.122724406103
|
||||
regression pca linear
|
||||
regression_linear.py data/features_pca.dat data/labels_0.dat data/predict_pca_0.dat 0.0525417950272
|
||||
regression_linear.py data/features_pca.dat data/labels_1.dat data/predict_pca_1.dat 0.0630462800532
|
||||
regression_linear.py data/features_pca.dat data/labels_2.dat data/predict_pca_2.dat 0.11837933335
|
||||
regression_linear.py data/features_pca.dat data/labels_3.dat data/predict_pca_3.dat 0.105251711141
|
||||
regression pca bayesian
|
||||
regression_bayesian.py data/features_pca.dat data/labels_0.dat data/predict_pca_0.dat 0.0598334909344
|
||||
regression_bayesian.py data/features_pca.dat data/labels_1.dat data/predict_pca_1.dat 0.0741352839217
|
||||
regression_bayesian.py data/features_pca.dat data/labels_2.dat data/predict_pca_2.dat 0.122554051214
|
||||
regression_bayesian.py data/features_pca.dat data/labels_3.dat data/predict_pca_3.dat 0.122681510711
|
||||
regression pca dtree
|
||||
regression_dtree.py data/features_pca.dat data/labels_0.dat data/predict_pca_0.dat 0.0547733949265
|
||||
regression_dtree.py data/features_pca.dat data/labels_1.dat data/predict_pca_1.dat 0.0922293538752
|
||||
regression_dtree.py data/features_pca.dat data/labels_2.dat data/predict_pca_2.dat 0.19924179118
|
||||
regression_dtree.py data/features_pca.dat data/labels_3.dat data/predict_pca_3.dat 0.0951762860752
|
||||
@@ -0,0 +1,38 @@
|
||||
# Copyright (c) 2007, National ICT Australia
|
||||
# All rights reserved.
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the 'License'); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an 'AS IS' basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# Authors: Christfried Webers
|
||||
# Created: (09/10/2007)
|
||||
# Last Updated:
|
||||
#
|
||||
|
||||
## Exception classes for the Elefant project
|
||||
|
||||
class CElefantException(Exception):
|
||||
"""Base class for exceptions in Elefant."""
|
||||
pass
|
||||
|
||||
|
||||
class CElefantConstraintException(CElefantException):
|
||||
"""Exception raised for constraint violation.
|
||||
|
||||
Attributes:
|
||||
value -- input value violating constrained
|
||||
message -- explanation of the error
|
||||
"""
|
||||
|
||||
def __init__(self, value, message):
|
||||
self.value = value
|
||||
self.message = message
|
||||
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
import time
|
||||
import sys
|
||||
import numpy
|
||||
import vector
|
||||
from sklearn.linear_model import BayesianRidge, LinearRegression
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
usage = "filename features_file labels_file output_file"
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if (len(sys.argv)!=5):
|
||||
print usage
|
||||
else:
|
||||
file_x = sys.argv[1]
|
||||
file_y = sys.argv[2]
|
||||
file_out = sys.argv[3]
|
||||
split_seed = sys.argv[4]
|
||||
|
||||
X = numpy.genfromtxt(file_x, delimiter=' ')
|
||||
y = numpy.genfromtxt(file_y, delimiter=' ')
|
||||
|
||||
# Split the data into training/testing sets
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=split_seed)
|
||||
|
||||
# Bayesian Ridge Regression
|
||||
clf = BayesianRidge(compute_score=True)
|
||||
clf.fit(X, y)
|
||||
y_predict=clf.predict(X_test)
|
||||
numpy.savetxt(file_out, y_predict)
|
||||
@@ -0,0 +1,30 @@
|
||||
import time
|
||||
import sys
|
||||
import numpy
|
||||
import vector
|
||||
from sklearn.tree import DecisionTreeRegressor
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
usage = "filename features_file labels_file output_file"
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if (len(sys.argv)!=5):
|
||||
print usage
|
||||
else:
|
||||
file_x = sys.argv[1]
|
||||
file_y = sys.argv[2]
|
||||
file_out = sys.argv[3]
|
||||
split_seed = sys.argv[4]
|
||||
|
||||
X = numpy.genfromtxt(file_x, delimiter=' ')
|
||||
y = numpy.genfromtxt(file_y, delimiter=' ')
|
||||
|
||||
# Split the data into training/testing sets
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=split_seed)
|
||||
|
||||
# Decision tree regressor
|
||||
clf = DecisionTreeRegressor(max_depth=2)
|
||||
clf.fit(X, y)
|
||||
y_predict = clf.predict(X_test)
|
||||
numpy.savetxt(file_out, y_predict)
|
||||
@@ -0,0 +1,30 @@
|
||||
import time
|
||||
import sys
|
||||
import numpy
|
||||
import vector
|
||||
from sklearn import linear_model
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
usage = "filename features_file labels_file output_file split_seed"
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if (len(sys.argv)!=5):
|
||||
print usage
|
||||
else:
|
||||
file_x = sys.argv[1]
|
||||
file_y = sys.argv[2]
|
||||
file_out = sys.argv[3]
|
||||
split_seed = sys.argv[4]
|
||||
|
||||
X = numpy.genfromtxt(file_x, delimiter=' ')
|
||||
y = numpy.genfromtxt(file_y, delimiter=' ')
|
||||
|
||||
# Split the data into training/testing sets
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=split_seed)
|
||||
|
||||
# Linear regression object
|
||||
regr = linear_model.LinearRegression()
|
||||
regr.fit(X_train, y_train)
|
||||
y_predict = regr.predict(X_test)
|
||||
numpy.savetxt(file_out, y_predict)
|
||||
@@ -0,0 +1,30 @@
|
||||
import time
|
||||
import sys
|
||||
import numpy
|
||||
import vector
|
||||
from sklearn import linear_model
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
usage = "filename features_file labels_file output_file"
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if (len(sys.argv)!=5):
|
||||
print usage
|
||||
else:
|
||||
file_x = sys.argv[1]
|
||||
file_y = sys.argv[2]
|
||||
file_out = sys.argv[3]
|
||||
split_seed = sys.argv[4]
|
||||
|
||||
X = numpy.genfromtxt(file_x, delimiter=' ')
|
||||
y = numpy.genfromtxt(file_y, delimiter=' ')
|
||||
|
||||
# Split the data into training/testing sets
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=split_seed)
|
||||
|
||||
# Logistic regression
|
||||
regr = linear_model.LogisticRegression()
|
||||
regr.fit(X_train, y_train)
|
||||
y_predict = regr.predict(X_test)
|
||||
numpy.savetxt(file_out, y_predict)
|
||||
@@ -0,0 +1,33 @@
|
||||
import time
|
||||
import sys
|
||||
import numpy
|
||||
import vector
|
||||
from sklearn.svm import SVR
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
usage = "filename features_file labels_file output_file"
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if (len(sys.argv)!=5):
|
||||
print usage
|
||||
else:
|
||||
file_x = sys.argv[1]
|
||||
file_y = sys.argv[2]
|
||||
file_out = sys.argv[3]
|
||||
split_seed = sys.argv[4]
|
||||
|
||||
X = numpy.genfromtxt(file_x, delimiter=' ')
|
||||
y = numpy.genfromtxt(file_y, delimiter=' ')
|
||||
|
||||
# Split the data into training/testing sets
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=split_seed)
|
||||
|
||||
#support vector regression
|
||||
svr = SVR(kernel='linear', C=1e3)
|
||||
svr.fit(X, y)
|
||||
y_predict=svr.predict(X_test)
|
||||
numpy.savetxt(file_out, y_predict)
|
||||
|
||||
|
||||
|
||||
+121
@@ -0,0 +1,121 @@
|
||||
#generate raw files
|
||||
echo "generating raw files"
|
||||
python cPickleparser.py
|
||||
|
||||
python features_sampled.py
|
||||
|
||||
#genertate features
|
||||
|
||||
echo "generating bahsic features"
|
||||
python features_bahsic.py "data/features_raw.dat" "data/labels_0.dat" "data/features_bahsic_0.dat" "data/features_normalized.dat"
|
||||
python features_bahsic.py "data/features_raw.dat" "data/labels_1.dat" "data/features_bahsic_1.dat"
|
||||
python features_bahsic.py "data/features_raw.dat" "data/labels_2.dat" "data/features_bahsic_2.dat"
|
||||
python features_bahsic.py "data/features_raw.dat" "data/labels_3.dat" "data/features_bahsic_3.dat"
|
||||
|
||||
echo "generating rrt features"
|
||||
echo "Run the matlab file to generate Recht and Rahimi Random Fourier features"
|
||||
|
||||
echo "generating downsampled features"
|
||||
python features_sampled.py "data/features_sampled.dat"
|
||||
|
||||
# do a train-test split
|
||||
python split_data.py "data/features_raw.dat" "data/labels_0.dat" "data/labels_test_0.dat"
|
||||
python split_data.py "data/features_raw.dat" "data/labels_1.dat" "data/labels_test_1.dat"
|
||||
python split_data.py "data/features_raw.dat" "data/labels_2.dat" "data/labels_test_2.dat"
|
||||
python split_data.py "data/features_raw.dat" "data/labels_3.dat" "data/labels_test_3.dat"
|
||||
|
||||
#BAHSIC
|
||||
echo "regression bahsic linear"
|
||||
python regression_linear.py "data/features_bahsic_0.dat" "data/labels_0.dat" "predict/hsic_linear_0.dat 42"
|
||||
python regression_linear.py "data/features_bahsic_1.dat" "data/labels_1.dat" "predict/hsic_linear_1.dat 42"
|
||||
python regression_linear.py "data/features_bahsic_2.dat" "data/labels_2.dat" "predict/hsic_linear_2.dat 42"
|
||||
python regression_linear.py "data/features_bahsic_3.dat" "data/labels_3.dat" "predict/hsic_linear_3.dat 42"
|
||||
|
||||
echo "regression bahsic bayesian"
|
||||
python regression_bayesian.py "data/features_bahsic_0.dat" "data/labels_0.dat" "predict/hsic_bayesian_0.dat 42"
|
||||
python regression_bayesian.py "data/features_bahsic_1.dat" "data/labels_1.dat" "predict/hsic_bayesian_1.dat 42"
|
||||
python regression_bayesian.py "data/features_bahsic_2.dat" "data/labels_2.dat" "predict/hsic_bayesian_2.dat 42"
|
||||
python regression_bayesian.py "data/features_bahsic_3.dat" "data/labels_3.dat" "predict/hsic_bayesian_3.dat 42"
|
||||
|
||||
echo "regression bahsic dtree"
|
||||
python regression_dtree.py "data/features_bahsic_0.dat" "data/labels_0.dat" "predict/hsic_dtree_0.dat 42"
|
||||
python regression_dtree.py "data/features_bahsic_1.dat" "data/labels_1.dat" "predict/hsic_dtree_1.dat 42"
|
||||
python regression_dtree.py "data/features_bahsic_2.dat" "data/labels_2.dat" "predict/hsic_dtree_2.dat 42"
|
||||
python regression_dtree.py "data/features_bahsic_3.dat" "data/labels_3.dat" "predict/hsic_dtree_3.dat 42"
|
||||
|
||||
echo "regression bahsic svr"
|
||||
python regression_svr.py "data/features_bahsic_0.dat" "data/labels_0.dat" "predict/hsic_svr_0.dat 42"
|
||||
python regression_svr.py "data/features_bahsic_1.dat" "data/labels_1.dat" "predict/hsic_svr_1.dat 42"
|
||||
python regression_svr.py "data/features_bahsic_2.dat" "data/labels_2.dat" "predict/hsic_svr_2.dat 42"
|
||||
python regression_svr.py "data/features_bahsic_3.dat" "data/labels_3.dat" "predict/hsic_svr_3.dat 42"
|
||||
|
||||
echo "regression bahsic logistic"
|
||||
python regression_logistic.py "data/features_bahsic_0.dat" "data/labels_0.dat" "predict/hsic_logistic_0.dat 42"
|
||||
python regression_logistic.py "data/features_bahsic_1.dat" "data/labels_1.dat" "predict/hsic_logistic_1.dat 42"
|
||||
python regression_logistic.py "data/features_bahsic_2.dat" "data/labels_2.dat" "predict/hsic_logistic_2.dat 42"
|
||||
python regression_logistic.py "data/features_bahsic_3.dat" "data/labels_3.dat" "predict/hsic_logistic_3.dat 42"
|
||||
|
||||
#rrt
|
||||
|
||||
echo "regression rrt linear"
|
||||
python regression_linear.py "data/features_rrt.dat" "data/labels_0.dat" "predict/rrt_linear_0.dat"
|
||||
python regression_linear.py "data/features_rrt.dat" "data/labels_1.dat" "predict/rrt_linear_1.dat"
|
||||
python regression_linear.py "data/features_rrt.dat" "data/labels_2.dat" "predict/rrt_linear_2.dat"
|
||||
python regression_linear.py "data/features_rrt.dat" "data/labels_3.dat" "predict/rrt_linear_3.dat"
|
||||
|
||||
echo "regression rrt bayesian"
|
||||
python regression_bayesian.py "data/features_rrt.dat" "data/labels_0.dat" "predict/rrt_bayesian_0.dat"
|
||||
python regression_bayesian.py "data/features_rrt.dat" "data/labels_1.dat" "predict/rrt_bayesian_1.dat"
|
||||
python regression_bayesian.py "data/features_rrt.dat" "data/labels_2.dat" "predict/rrt_bayesian_2.dat"
|
||||
python regression_bayesian.py "data/features_rrt.dat" "data/labels_3.dat" "predict/rrt_bayesian_3.dat"
|
||||
|
||||
echo "regression rrt dtree"
|
||||
python regression_dtree.py "data/features_rrt.dat" "data/labels_0.dat" "predict/rrt_dtree_0.dat"
|
||||
python regression_dtree.py "data/features_rrt.dat" "data/labels_1.dat" "predict/rrt_dtree_1.dat"
|
||||
python regression_dtree.py "data/features_rrt.dat" "data/labels_2.dat" "predict/rrt_dtree_2.dat"
|
||||
python regression_dtree.py "data/features_rrt.dat" "data/labels_3.dat" "predict/rrt_dtree_3.dat"
|
||||
|
||||
echo "regression rrt svr"
|
||||
python regression_svr.py "data/features_rrt.dat" "data/labels_0.dat" "predict/rrt_svr_0.dat"
|
||||
python regression_svr.py "data/features_rrt.dat" "data/labels_1.dat" "predict/rrt_svr_1.dat"
|
||||
python regression_svr.py "data/features_rrt.dat" "data/labels_2.dat" "predict/rrt_svr_2.dat"
|
||||
python regression_svr.py "data/features_rrt.dat" "data/labels_3.dat" "predict/rrt_svr_3.dat"
|
||||
|
||||
echo "regression rrt logistic"
|
||||
python regression_logistic.py "data/features_rrt.dat" "data/labels_0.dat" "predict/rrt_logistic_0.dat"
|
||||
python regression_logistic.py "data/features_rrt.dat" "data/labels_1.dat" "predict/rrt_logistic_1.dat"
|
||||
python regression_logistic.py "data/features_rrt.dat" "data/labels_2.dat" "predict/rrt_logistic_2.dat"
|
||||
python regression_logistic.py "data/features_rrt.dat" "data/labels_3.dat" "predict/rrt_logistic_3.dat"
|
||||
|
||||
#sampled
|
||||
|
||||
echo "regression sampled linear"
|
||||
python regression_linear.py "data/features_sampled.dat" "data/labels_0.dat" "predict/sampled_linear_0.dat"
|
||||
python regression_linear.py "data/features_sampled.dat" "data/labels_1.dat" "predict/sampled_linear_1.dat"
|
||||
python regression_linear.py "data/features_sampled.dat" "data/labels_2.dat" "predict/sampled_linear_2.dat"
|
||||
python regression_linear.py "data/features_sampled.dat" "data/labels_3.dat" "predict/sampled_linear_3.dat"
|
||||
|
||||
echo "regression sampled bayesian"
|
||||
python regression_bayesian.py "data/features_sampled.dat" "data/labels_0.dat" "predict/sampled_bayesian_0.dat"
|
||||
python regression_bayesian.py "data/features_sampled.dat" "data/labels_1.dat" "predict/sampled_bayesian_1.dat"
|
||||
python regression_bayesian.py "data/features_sampled.dat" "data/labels_2.dat" "predict/sampled_bayesian_2.dat"
|
||||
python regression_bayesian.py "data/features_sampled.dat" "data/labels_3.dat" "predict/sampled_bayesian_3.dat"
|
||||
|
||||
echo "regression sampled dtree"
|
||||
python regression_dtree.py "data/features_sampled.dat" "data/labels_0.dat" "predict/sampled_dtree_0.dat"
|
||||
python regression_dtree.py "data/features_sampled.dat" "data/labels_1.dat" "predict/sampled_dtree_1.dat"
|
||||
python regression_dtree.py "data/features_sampled.dat" "data/labels_2.dat" "predict/sampled_dtree_2.dat"
|
||||
python regression_dtree.py "data/features_sampled.dat" "data/labels_3.dat" "predict/sampled_dtree_3.dat"
|
||||
|
||||
echo "regression sampled svr"
|
||||
python regression_svr.py "data/features_sampled.dat" "data/labels_0.dat" "predict/sampled_svr_0.dat"
|
||||
python regression_svr.py "data/features_sampled.dat" "data/labels_1.dat" "predict/sampled_svr_1.dat"
|
||||
python regression_svr.py "data/features_sampled.dat" "data/labels_2.dat" "predict/sampled_svr_2.dat"
|
||||
python regression_svr.py "data/features_sampled.dat" "data/labels_3.dat" "predict/sampled_svr_3.dat"
|
||||
|
||||
echo "regression sampled logistic"
|
||||
python regression_logistic.py "data/features_sampled.dat" "data/labels_0.dat" "predict/sampled_logistic_0.dat"
|
||||
python regression_logistic.py "data/features_sampled.dat" "data/labels_1.dat" "predict/sampled_logistic_1.dat"
|
||||
python regression_logistic.py "data/features_sampled.dat" "data/labels_2.dat" "predict/sampled_logistic_2.dat"
|
||||
python regression_logistic.py "data/features_sampled.dat" "data/labels_3.dat" "predict/sampled_logistic_3.dat"
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright (c) 2004 National ICT Australia --- All Rights Reserved
|
||||
# THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF SML.NICTA
|
||||
# The copyright notice above does not evidence any
|
||||
# actual or intended publication of this work.
|
||||
#
|
||||
# Authors: Le Song
|
||||
# Last changed: 02/08/2006 (Christfried Webers)
|
||||
|
||||
import numpy
|
||||
|
||||
def setdiag0(K):
|
||||
"""Set the diagonal entries of a square matrix to 0
|
||||
"""
|
||||
n = K.shape[0]
|
||||
numpy.put(K, numpy.arange(n) * (n + 1), 0.0)
|
||||
@@ -0,0 +1,24 @@
|
||||
import time
|
||||
import sys
|
||||
import numpy
|
||||
import vector
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
usage = "yolo"
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if (len(sys.argv)!=4):
|
||||
print usage
|
||||
else:
|
||||
file_x = sys.argv[1];
|
||||
file_y = sys.argv[2];
|
||||
file_y_test = sys.argv[3];
|
||||
|
||||
X = numpy.genfromtxt(file_x, delimiter=' ')
|
||||
y = numpy.genfromtxt(file_y, delimiter=' ')
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
|
||||
numpy.savetxt(file_y_test, y_test)
|
||||
numpy.savetxt("data/features_train.dat",X_train)
|
||||
numpy.savetxt("data/features_test.dat",X_test)
|
||||
+2008
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
Referência em uma Nova Issue
Bloquear um usuário