Arquivos
2014-11-21 18:52:56 +05:30

268 linhas
9.8 KiB
Python

# Copyright (c) 2006, National ICT Australia
# All rights reserved.
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the 'License'); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an 'AS IS' basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# Authors: Le Song (lesong@it.usyd.edu.au)
# Created: (20/10/2006)
# Last Updated: (dd/mm/yyyy)
#
##\package elefant.fselection.hsic
# This module perform computation related to Hilber-Schmidt Independence
# Criterion. Hilber-Schmidt Independence Criterion is short for HSIC.
#
# HISC is defined as $HSIC=\frac{1}{m}Tr(KHLH)$, where $kMat$ and $lMat$
# are the kernel matrices for the data and the labels respectively.
# $H=I-\frac{1}{m}\delta_{ij}$, where $m$ is the number of data points,
# is the centering matrix. The unbiased estimator of HSIC is computed as
# $HSIC=\frac{1}{m(m-3)}\left[Tr(KL)+\frac{1}{(m-1)(m-2)}1^\top K11^\top L1
# -\frac{2}{m-2}1^\top KL1\right]. For more theorectical underpinning
# of HSIC, see the following reference:
#
# Gretton, A., O. Bousquet, A. Smola and B. Schoelkopf: Measuring
# Statistical Dependence with Hilbert-Schmidt Norms. Algorithmic
# Learning Theory: 16th International Conference, ALT 2005, 63-78, 2005.
#
__version__ = '$Revision: $'
# $Source$
import numpy
import vector
from setdiag0 import setdiag0
## Class that perform computation related to HSIC.
#
# It contains function that computes biased and unbiased HSIC, part of HSIC
# necessary for faster its faster computation, and functions that enable
# an optimization on HSIC with respect to the kernel parameters.
#
class CHSIC(object):
def __init__(self):
pass
## Compute HLH give the labels.
# @param y The labels.
# @param kernely The kernel on the labels, default to linear kernel.
#
def ComputeHLH(self, y, kernely=vector.CLinearKernel()):
ny = y.shape
if len(ny) > 1:
lMat = kernely.Dot(y, y)
else:
lMat = numpy.outerproduct(y, y)
sL = numpy.sum(lMat, axis=1)
ssL = numpy.sum(sL)
# hlhMat
return lMat - numpy.add.outer(sL, sL)/ny[0] + ssL/(ny[0]*ny[0])
## Compute the biased estimator of HSIC.
# @param x The data.
# @param y The labels.
# @param kernelx The kernel on the data, default to linear kernel.
# @param kernely The kernel on the labels, default to linear kernel.
#
def BiasedHSIC(self, x, y, kernelx=vector.CLinearKernel(), \
kernely=vector.CLinearKernel()):
nx = x.shape
ny = y.shape
assert nx[0] == ny[0], \
"Argument 1 and 2 have different number of data points"
if len(nx) > 1:
kMat = kernelx.Dot(x, x)
else:
kMat = numpy.outerproduct(x, x)
hlhMat = ComputeHLH(y, kernely)
return numpy.sum(numpy.sum(kMat * hlhMat)) / ((nx[0]-1)*(nx[0]-1))
## Objective of the biased HSIC when performing optimization over
# the kernel parameters.
# @param param The kernel parameters.
# @param x The data.
# @param kernelx The kernel on the data.
# @param hlhMat The HLH matrix on the labels.
#
def ObjBiasedHSIC(self, param, x, kernelx, hlhMat):
nx = x.shape
kMat = kernelx.DotCacheKernel(x, param)
return -numpy.sum(numpy.sum(kMat * hlhMat)) / ((nx[0]-1)*(nx[0]-1))
## Gradient of the objective of the biased HSIC when performing
# optimization over the kernel parameters.
# @param param The kernel parameters.
# @param x The data.
# @param kernelx The kernel on the data.
# @param hlhMat The HLH matrix on the labels.
#
def GradBiasedHISC(self, param, x, kernelx, hlhMat):
nx = x.shape
kMat = kernelx.GradDotCacheKernel(x, param)
return -numpy.sum(numpy.sum(kMat * hlhMat)) / ((nx[0]-1)*(nx[0]-1))
## Fast computation of the biased HSIC when the kernel matrix
# for the data and the HLH matrix for the labels are already
# computed.
# @param kMat The kernel matrix for the data.
# @param hlhMat The HLH matrix for the labels.
#
def BiasedHSICFast(self, kMat, hlhMat):
nx = kMat.shape
assert (kMat.shape == hlhMat.shape), \
"Argument 1 and 2 have different shapes"
return (kMat * hlhMat).sum() / ((nx[0]-1)*(nx[0]-1))
## Fast computation of the biased HSIC when the kernel matrix
# for the labels can be decomposed into HLH = y * y' and the
# rank of y is low
# @param kMat The kernel matrix for the data.
# @param y The HLH = y * y' for the labels.
#
def BiasedHSICFast2(self, kMat, y):
nx = kMat.shape
assert (kMat.shape[0] == y.shape[0]), \
"Argument 1 and 2 have different shapes"
return numpy.dot(y.T, numpy.dot(kMat, y)).trace() / ((nx[0]-1)*(nx[0]-1))
## Fast computation of the biased HSIC when the kernel matrix
# for the data K can be decomposed into K = x * x' and that
# for the labels can be decomposed into HLH = y * y' and the
# rank of y is low (this will be useful after incomplete cholesky
# factorization
# @param x The K = x * x' for the data.
# @param y The HLH = y * y' for the labels.
#
def BiasedHSICFast3(self, x, y):
nx = x.shape
assert (x.shape[0] == y.shape[0]), \
"Argument 1 and 2 have different shapes"
return (numpy.dot(x.T, y)**2).sum() / ((nx[0]-1)*(nx[0]-1))
## Compute the UNbiased estimator of HSIC.
# @param x The data.
# @param y The labels.
# @param kernelx The kernel on the data, default to linear kernel.
# @param kernely The kernel on the labels, default to linear kernel.
#
def UnBiasedHSIC(self, x, y, kernelx=vector.CLinearKernel(), \
kernely=vector.CLinearKernel()):
nx = x.shape
ny = y.shape
assert nx[0] == ny[0], \
"Argument 1 and 2 have different number of data points"
kMat = kernelx.Dot(x,x)
setdiag0(kMat)
lMat = kernely.Dot(y,y)
setdiag0(lMat)
sK = kMat.sum(axis=1)
ssK = sK.sum()
sL = lMat.sum(axis=1)
ssL = sL.sum()
return ( kMat.__imul__(lMat).sum() + \
(ssK*ssL)/((nx[0]-1)*(nx[0]-2)) - \
2 * sK.__imul__(sL).sum() / (nx[0]-2) \
) / (nx[0]*(nx[0]-3))
## Objective of the UNbiased HSIC when performing optimization over
# the kernel parameters.
# @param param The kernel parameters.
# @param x The data.
# @param kernelx The kernel on the data.
# @param lMat The kernel matrix of the label.
# @param sL The vector of the sum of each row of lMat.
# @param ssL The vector of the sum of all entries in lMat.
#
def ObjUnBiasedHSIC(self, param, x, kernelx, lMat, sL, ssL):
nx = x.shape
kMat = kernelx.DotCacheKernel(x, param)
sK = numpy.sum(kMat, axis=1)
ssK = numpy.sum(sK)
return -( numpy.sum(numpy.sum(kMat*lMat)) \
+ (ssK*ssL)/((nx[0]-1)*(nx[0]-2)) \
- 2*numpy.sum(sK*sL)/(nx[0]-2) \
) / (nx[0]*(nx[0]-3))
## Gradient of the objective of the UNbiased HSIC when performing
# optimization over the kernel parameters.
# @param param The kernel parameters.
# @param x The data.
# @param kernelx The kernel on the data.
# @param lMat The kernel matrix of the label.
# @param sL The vector of the sum of each row of lMat.
# @param ssL The vector of the sum of all entries in lMat.
#
def GradUnBiasedHSIC(self, param, x, kernelx, lMat, sL, ssL):
nx = x.shape
kMat = kernelx.GradDotCacheKernel(x, param)
sK = numpy.sum(kMat, axis=1)
ssK = numpy.sum(sK)
return -( numpy.sum(numpy.sum(kMat*lMat)) \
+ (ssK*ssL)/((nx[0]-1)*(nx[0]-2)) \
- 2*numpy.sum(sK*sL)/(nx[0]-2) \
) / (nx[0]*(nx[0]-3))
## Fast computation of the biased HSIC when the kernel matrix
# for the data and the HLH matrix for the labels are already
# computed.
# @param kMat The kernel matrix for the data.
# @param lMat The kernel matrix of the label.
# @param sL The vector of the sum of each row of lMat.
# @param ssL The vector of the sum of all entries in lMat.
#
def UnBiasedHSICFast(self, kMat, lMat, sL, ssL):
nx = kMat.shape
assert (kMat.shape == lMat.shape), \
"Argument 1 and 2 have different shapes"
sK = numpy.sum(kMat, axis=1)
ssK = numpy.sum(sK)
return ( numpy.sum(numpy.sum(kMat*lMat)) \
+ (ssK*ssL)/((nx[0]-1)*(nx[0]-2)) \
- 2*numpy.sum(sK*sL)/(nx[0]-2) \
) / (nx[0]*(nx[0]-3))
## Normalize each dimension of the data separately to zero mean and unit
# standard deviation.
# @param data [read\write] The data to be normalized. Each row is a
# datum and each column a dimension.
#
def normalize(data):
m = data.mean(axis=0)
s = data.std(axis=0)
data.__isub__(m).__itruediv__(s)
## Center the kernel matrix in the feature space.
# @param k [read\write] The kernel matrix to be centered.
#
def center(k):
n = k.shape
assert n[0] == n[1], 'k must be symmetric and positive semidefinite'
mk = k.mean(axis=1)
mk.shape = (n[0], 1)
mmk = mk.mean()
k.__isub__(mk).__isub__(mk.T).__iadd__(mmk)