Added symmetric poweriterations to sandbox
This commit is contained in:
parent
0956581c42
commit
1103245d85
@ -30,7 +30,7 @@ class get_matvec:
|
|||||||
if isinstance(obj, sb.ndarray):
|
if isinstance(obj, sb.ndarray):
|
||||||
self.callfunc = self.type1
|
self.callfunc = self.type1
|
||||||
return
|
return
|
||||||
meth = getattr(obj,self.methname,None)
|
meth = getattr(obj, self.methname, None)
|
||||||
if not callable(meth):
|
if not callable(meth):
|
||||||
raise ValueError, "Object must be an array "\
|
raise ValueError, "Object must be an array "\
|
||||||
"or have a callable %s attribute." % (self.methname,)
|
"or have a callable %s attribute." % (self.methname,)
|
||||||
@ -48,12 +48,12 @@ class get_matvec:
|
|||||||
return sb.dot(self.obj.A, x)
|
return sb.dot(self.obj.A, x)
|
||||||
|
|
||||||
def type2(self, x):
|
def type2(self, x):
|
||||||
return self.obj(x,*self.args)
|
return self.obj(x, *self.args)
|
||||||
|
|
||||||
|
|
||||||
def eigen(A,k=6,M=None,ncv=None,which='LM',
|
def eigen(A, k=6, M=None, ncv=None, which='LM',
|
||||||
maxiter=None,tol=0, return_eigenvectors=True):
|
maxiter=None, tol=0, return_eigenvectors=True, v0=None):
|
||||||
""" Return k eigenvalues and eigenvectors of the matrix A.
|
"""Return k eigenvalues and eigenvectors of the matrix A.
|
||||||
|
|
||||||
Solves A * x[i] = w[i] * x[i], the standard eigenvalue problem for
|
Solves A * x[i] = w[i] * x[i], the standard eigenvalue problem for
|
||||||
w[i] eigenvalues with corresponding eigenvectors x[i].
|
w[i] eigenvalues with corresponding eigenvectors x[i].
|
||||||
@ -69,6 +69,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
|||||||
M -- (Not implemented)
|
M -- (Not implemented)
|
||||||
A symmetric positive-definite matrix for the generalized
|
A symmetric positive-definite matrix for the generalized
|
||||||
eigenvalue problem A * x = w * M * x
|
eigenvalue problem A * x = w * M * x
|
||||||
|
v0 -- Initial starting solution (n x 1)
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
|
|
||||||
@ -99,8 +100,8 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
n,ny=A.shape
|
n, ny = A.shape
|
||||||
n==ny
|
n == ny
|
||||||
except:
|
except:
|
||||||
raise AttributeError("matrix is not square")
|
raise AttributeError("matrix is not square")
|
||||||
if M is not None:
|
if M is not None:
|
||||||
@ -108,10 +109,10 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
|||||||
|
|
||||||
# some defaults
|
# some defaults
|
||||||
if ncv is None:
|
if ncv is None:
|
||||||
ncv=2*k+1
|
ncv = 2*k + 1
|
||||||
ncv=min(ncv,n)
|
ncv = min(ncv, n)
|
||||||
if maxiter==None:
|
if maxiter == None:
|
||||||
maxiter=n*10
|
maxiter = n*10
|
||||||
|
|
||||||
# guess type
|
# guess type
|
||||||
resid = sb.zeros(n,'f')
|
resid = sb.zeros(n,'f')
|
||||||
@ -129,7 +130,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
|||||||
raise ValueError("k must be less than rank(A), k=%d"%k)
|
raise ValueError("k must be less than rank(A), k=%d"%k)
|
||||||
if maxiter <= 0:
|
if maxiter <= 0:
|
||||||
raise ValueError("maxiter must be positive, maxiter=%d"%maxiter)
|
raise ValueError("maxiter must be positive, maxiter=%d"%maxiter)
|
||||||
whiches=['LM','SM','LR','SR','LI','SI']
|
whiches = ['LM','SM','LR','SR','LI','SI']
|
||||||
if which not in whiches:
|
if which not in whiches:
|
||||||
raise ValueError("which must be one of %s"%' '.join(whiches))
|
raise ValueError("which must be one of %s"%' '.join(whiches))
|
||||||
if ncv > n or ncv < k:
|
if ncv > n or ncv < k:
|
||||||
@ -141,17 +142,26 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
|||||||
eigextract = _arpack.__dict__[ltr+'neupd']
|
eigextract = _arpack.__dict__[ltr+'neupd']
|
||||||
matvec = get_matvec(A)
|
matvec = get_matvec(A)
|
||||||
|
|
||||||
v = sb.zeros((n,ncv),typ) # holds Ritz vectors
|
v = sb.zeros((n, ncv), typ) # holds Ritz vectors
|
||||||
resid = sb.zeros(n,typ) # residual
|
if v0 == None:
|
||||||
workd = sb.zeros(3*n,typ) # workspace
|
resid = sb.zeros(n, typ) # residual
|
||||||
workl = sb.zeros(3*ncv*ncv+6*ncv,typ) # workspace
|
|
||||||
iparam = sb.zeros(11,'int') # problem parameters
|
|
||||||
ipntr = sb.zeros(14,'int') # pointers into workspaces
|
|
||||||
info = 0
|
info = 0
|
||||||
|
else: # starting vector is given
|
||||||
|
nn, kk = v0.shape
|
||||||
|
if nn != n:
|
||||||
|
raise ValueError("starting vector must be: (%d, 1), got: (%d, %d)" %(n, nn, kk))
|
||||||
|
resid = v0[:,0].astype(typ)
|
||||||
|
info = 1
|
||||||
|
|
||||||
|
workd = sb.zeros(3*n, typ) # workspace
|
||||||
|
workl = sb.zeros(3*ncv*ncv+6*ncv, typ) # workspace
|
||||||
|
iparam = sb.zeros(11, 'int') # problem parameters
|
||||||
|
ipntr = sb.zeros(14, 'int') # pointers into workspaces
|
||||||
|
|
||||||
ido = 0
|
ido = 0
|
||||||
|
|
||||||
if typ in 'FD':
|
if typ in 'FD':
|
||||||
rwork = sb.zeros(ncv,typ.lower())
|
rwork = sb.zeros(ncv, typ.lower())
|
||||||
|
|
||||||
# only supported mode is 1: Ax=lx
|
# only supported mode is 1: Ax=lx
|
||||||
ishfts = 1
|
ishfts = 1
|
||||||
@ -173,9 +183,9 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
|||||||
|
|
||||||
if (ido == -1 or ido == 1):
|
if (ido == -1 or ido == 1):
|
||||||
# compute y = A * x
|
# compute y = A * x
|
||||||
xslice = slice(ipntr[0]-1, ipntr[0]-1+n)
|
xslice = slice(ipntr[0] - 1, ipntr[0] - 1 + n)
|
||||||
yslice = slice(ipntr[1]-1, ipntr[1]-1+n)
|
yslice = slice(ipntr[1] - 1, ipntr[1] - 1 + n)
|
||||||
workd[yslice]=matvec(workd[xslice])
|
workd[yslice] = matvec(workd[xslice])
|
||||||
else: # done
|
else: # done
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -233,7 +243,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
|||||||
|
|
||||||
|
|
||||||
def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
|
def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
|
||||||
maxiter=None,tol=0, return_eigenvectors=True):
|
maxiter=None,tol=0, return_eigenvectors=True, v0=None):
|
||||||
""" Return k eigenvalues and eigenvectors of the real symmetric matrix A.
|
""" Return k eigenvalues and eigenvectors of the real symmetric matrix A.
|
||||||
|
|
||||||
Solves A * x[i] = w[i] * x[i], the standard eigenvalue problem for
|
Solves A * x[i] = w[i] * x[i], the standard eigenvalue problem for
|
||||||
@ -253,6 +263,8 @@ def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
|
|||||||
A symmetric positive-definite matrix for the generalized
|
A symmetric positive-definite matrix for the generalized
|
||||||
eigenvalue problem A * x = w * M * x
|
eigenvalue problem A * x = w * M * x
|
||||||
|
|
||||||
|
v0 -- Starting vector (n, 1)
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
|
|
||||||
w -- An real array of k eigenvalues
|
w -- An real array of k eigenvalues
|
||||||
@ -325,12 +337,22 @@ def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
|
|||||||
matvec = get_matvec(A)
|
matvec = get_matvec(A)
|
||||||
|
|
||||||
v = sb.zeros((n,ncv),typ)
|
v = sb.zeros((n,ncv),typ)
|
||||||
resid = sb.zeros(n,typ)
|
if v0 == None:
|
||||||
|
resid = sb.zeros(n, typ) # residual
|
||||||
|
info = 0
|
||||||
|
else: # starting solution is given
|
||||||
|
nn, kk = v0.shape
|
||||||
|
if nn != n:
|
||||||
|
raise ValueError("starting vectors must be: (%d, %d), got: (%d, %d)" %(n, k, nn, kk))
|
||||||
|
resid = v0[:,0].astype(typ)
|
||||||
|
info = 1
|
||||||
|
|
||||||
|
#resid = sb.zeros(n,typ)
|
||||||
workd = sb.zeros(3*n,typ)
|
workd = sb.zeros(3*n,typ)
|
||||||
workl = sb.zeros(ncv*(ncv+8),typ)
|
workl = sb.zeros(ncv*(ncv+8),typ)
|
||||||
iparam = sb.zeros(11,'int')
|
iparam = sb.zeros(11,'int')
|
||||||
ipntr = sb.zeros(11,'int')
|
ipntr = sb.zeros(11,'int')
|
||||||
info = 0
|
#info = 0
|
||||||
ido = 0
|
ido = 0
|
||||||
|
|
||||||
# only supported mode is 1: Ax=lx
|
# only supported mode is 1: Ax=lx
|
||||||
@ -341,7 +363,6 @@ def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
|
|||||||
iparam[2] = maxiter
|
iparam[2] = maxiter
|
||||||
iparam[6] = mode1
|
iparam[6] = mode1
|
||||||
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
ido,resid,v,iparam,ipntr,info =\
|
ido,resid,v,iparam,ipntr,info =\
|
||||||
eigsolver(ido,bmat,which,k,tol,resid,v,iparam,ipntr,
|
eigsolver(ido,bmat,which,k,tol,resid,v,iparam,ipntr,
|
||||||
|
@ -2,16 +2,149 @@
|
|||||||
|
|
||||||
The primary use is crossvalidation.
|
The primary use is crossvalidation.
|
||||||
"""
|
"""
|
||||||
__all__ = ['lpls_val', 'pls_jk', 'lpls_jk']
|
__all__ = ['pca_val', 'pls_val', 'lpls_val', 'pls_jk', 'lpls_jk']
|
||||||
__docformat__ = "restructuredtext en"
|
__docformat__ = "restructuredtext en"
|
||||||
|
|
||||||
from numpy import dot,empty,zeros,sqrt,atleast_2d,argmax,asarray,median,\
|
from numpy import dot,empty,zeros,sqrt,atleast_2d,argmax,asarray,median,\
|
||||||
array_split,arange, isnan, any
|
array_split,arange, isnan, any,newaxis
|
||||||
from numpy.random import shuffle
|
from numpy.random import shuffle
|
||||||
|
|
||||||
from engines import pls
|
from engines import pls, pca
|
||||||
from engines import nipals_lpls as lpls
|
from engines import nipals_lpls as lpls
|
||||||
|
|
||||||
|
def pca_val(a, a_max, nsets=None, center_axis=[0]):
|
||||||
|
"""Returns error estimate of crossvalidated PCA.
|
||||||
|
|
||||||
|
*Parameters*:
|
||||||
|
|
||||||
|
a : {array}
|
||||||
|
data matrix (n x m)
|
||||||
|
a_max : {integer}
|
||||||
|
Maximum number of components in model.
|
||||||
|
center_axis:
|
||||||
|
Centering
|
||||||
|
nsets : {integer}
|
||||||
|
number of segments
|
||||||
|
|
||||||
|
*Returns*:
|
||||||
|
|
||||||
|
rmsep : {array}
|
||||||
|
Squared error of prediction for each component and xvar (a_max, m)
|
||||||
|
xhat : {array}
|
||||||
|
Crossvalidated predicted a (a_max, m, n)
|
||||||
|
aopt : {integer}
|
||||||
|
Estimate of optimal number of components
|
||||||
|
|
||||||
|
*Notes*:
|
||||||
|
|
||||||
|
- Crossvalidation method is currently set to random blocks of diagonals.
|
||||||
|
|
||||||
|
"""
|
||||||
|
n, m = a.shape
|
||||||
|
if nsets == None:
|
||||||
|
nsets = n
|
||||||
|
|
||||||
|
err = zeros((a_max, n, m), dtype=a.dtype)
|
||||||
|
err_mn = zeros((a_max, n, m), dtype=a.dtype)
|
||||||
|
xhat = zeros((a_max, n, m), dtype=a.dtype)
|
||||||
|
mn_a = .5*(a.mean(0) + a.mean(1)[:,newaxis])
|
||||||
|
|
||||||
|
for i, val in enumerate(diag_cv(a.shape, nsets)):
|
||||||
|
old_values = a.take(val)
|
||||||
|
new_values = mn_a.take(val)
|
||||||
|
# impute mean values
|
||||||
|
b = a.copy()
|
||||||
|
a.put(val, new_values)
|
||||||
|
dat = pca(a, a_max, mode='normal', center_axis=center_axis)
|
||||||
|
Ti, Pi = dat['T'], dat['P']
|
||||||
|
bc = b - dat['mnx']
|
||||||
|
bc2 = b - b.mean(0)
|
||||||
|
for j in xrange(a_max):
|
||||||
|
# predict the imputed values
|
||||||
|
a_pred = dot(Ti[:,:j+1], Pi[:,:j+1].T).take(val)
|
||||||
|
a_true = bc2.take(val)
|
||||||
|
err[j,:,:].put(val, (a_true - a_pred)**2)
|
||||||
|
err_mn[j,:,:].put(val, (bc.take(val) - a_pred)**2)
|
||||||
|
xhat[j,:,:].put(val, a_pred)
|
||||||
|
|
||||||
|
# put original values back
|
||||||
|
a.put(val, old_values)
|
||||||
|
|
||||||
|
rmsep = sqrt(err).mean(1) # take mean over samples
|
||||||
|
rmsep2 = sqrt(err_mn).mean(1)
|
||||||
|
aopt = rmsep.mean(-1).argmin()
|
||||||
|
return rmsep, xhat, aopt, err, rmsep2
|
||||||
|
|
||||||
|
def pls_val(X, Y, a_max=2, nsets=None, center_axis=[0,0], verbose=False):
|
||||||
|
"""Performs crossvalidation for generalisation error in pls.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*Parameters*:
|
||||||
|
|
||||||
|
X : {array}
|
||||||
|
Main data matrix (m, n)
|
||||||
|
Y : {array}
|
||||||
|
External row data (m, l)
|
||||||
|
a_max : {integer}, optional
|
||||||
|
Maximum number of components to calculate (0, min(m,n))
|
||||||
|
nsets : (integer), optional
|
||||||
|
Number of crossvalidation sets
|
||||||
|
center_axis : {array-like}, optional
|
||||||
|
A three element array-like structure with elements in [-1,0,1,2],
|
||||||
|
that decides the type of centering used.
|
||||||
|
-1 : nothing
|
||||||
|
0 : row center
|
||||||
|
1 : column center
|
||||||
|
2 : double center
|
||||||
|
verbose : {boolean}, optional
|
||||||
|
Verbosity of console output. For use in debugging.
|
||||||
|
|
||||||
|
*Returns*:
|
||||||
|
|
||||||
|
rmsep : {array}
|
||||||
|
Root mean squred error of prediction (for each y-var)
|
||||||
|
yhat : {array}
|
||||||
|
Estimated responses
|
||||||
|
aopt : {integer}
|
||||||
|
Estimated value of optimal number of components
|
||||||
|
|
||||||
|
"""
|
||||||
|
dt = X.dtype
|
||||||
|
m, n = X.shape
|
||||||
|
k, l = Y.shape
|
||||||
|
assert m == k, "X (%d,%d) - Y (%d,%d) dim mismatch" %(m, n, k, l)
|
||||||
|
assert n == p, "X (%d,%d) - Z (%d,%d) dim mismatch" %(m, n, o, p)
|
||||||
|
if nsets == None:
|
||||||
|
nsets = m
|
||||||
|
if nsets > X.shape[0]:
|
||||||
|
print "nsets (%d) is larger than number of variables (%d).\nnsets: %d -> %d" %(nsets, m, nsets, m)
|
||||||
|
nsets = m
|
||||||
|
if m > n:
|
||||||
|
# kernel boosting
|
||||||
|
Yhat = _w_pls_predict(X, Y, a_max)
|
||||||
|
|
||||||
|
Yhat = empty((a_max, k, l), dtype=dt)
|
||||||
|
for cal, val in cv(k, nsets):
|
||||||
|
# do the training model
|
||||||
|
dat = pls(X[cal], Y[cal], a_max=a_max,center_axis=center_axis)
|
||||||
|
|
||||||
|
# center test data
|
||||||
|
xi = X[val,:] - dat['mnx']
|
||||||
|
ym = dat['mny']
|
||||||
|
|
||||||
|
# predictions
|
||||||
|
for a in range(a_max):
|
||||||
|
Yhat[a,val,:] = ym + dot(xi, dat['B'][a])
|
||||||
|
|
||||||
|
sep = (Y - Yhat)**2
|
||||||
|
rmsep = sqrt(sep.mean(1)).T
|
||||||
|
#aopt = find_aopt_from_sep(rmsep)
|
||||||
|
|
||||||
|
# todo: need a better support for classification error
|
||||||
|
error = prediction_error(Yhat, Y, method='1/2')
|
||||||
|
|
||||||
|
return rmsep, Yhat, error
|
||||||
|
|
||||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=False, verbose=False):
|
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=False, verbose=False):
|
||||||
"""Performs crossvalidation for generalisation error in lpls.
|
"""Performs crossvalidation for generalisation error in lpls.
|
||||||
@ -72,47 +205,29 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
|
|||||||
assert (alpha >= 0 and alpha<=1), "Alpha needs to be within [0,1], got: %.2f" %alpha
|
assert (alpha >= 0 and alpha<=1), "Alpha needs to be within [0,1], got: %.2f" %alpha
|
||||||
|
|
||||||
Yhat = empty((a_max, k, l), 'd')
|
Yhat = empty((a_max, k, l), 'd')
|
||||||
for cal, val in cv(nsets, k):
|
for cal, val in cv(k, nsets):
|
||||||
# do the training model
|
# do the training model
|
||||||
dat = lpls(X[cal], Y[cal], Z, a_max=a_max, alpha=alpha,
|
dat = lpls(X[cal], Y[cal], Z, a_max=a_max, alpha=alpha,
|
||||||
center_axis=center_axis, zorth=zorth, verbose=verbose)
|
center_axis=center_axis, zorth=zorth, verbose=verbose)
|
||||||
|
|
||||||
# center test data
|
# center test data
|
||||||
if center_axis[0] != 1:
|
|
||||||
xi = X[val,:] - dat['mnx']
|
xi = X[val,:] - dat['mnx']
|
||||||
else:
|
|
||||||
xi = X[val] - X[cal].mean(1)[:,newaxis]
|
ym = dat['mny'][val,:]
|
||||||
if center_axis[2] != 1:
|
|
||||||
ym = dat['mny']
|
|
||||||
else:
|
|
||||||
ym = Y[cal].mean(1)[:,newaxis]
|
|
||||||
# predictions
|
# predictions
|
||||||
for a in range(a_max):
|
for a in range(a_max):
|
||||||
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
|
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
|
||||||
#if permute:
|
|
||||||
# xcal = X[cal]
|
|
||||||
# for a in range(1,a_max,1):
|
|
||||||
# for n in range(10):
|
|
||||||
# shuffle(cal)
|
|
||||||
# dat = lpls(xcal, Y[cal], Z, a_max=a_max, alpha=alpha,
|
|
||||||
# center_axis=center_axis, verbose=verbose)
|
|
||||||
|
|
||||||
|
|
||||||
# todo: need a better support for classification error
|
|
||||||
y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
|
|
||||||
if y_is_class:
|
|
||||||
pass
|
|
||||||
#Yhat, err = class_error(Yhat, Y)
|
|
||||||
#return Yhat, err
|
|
||||||
|
|
||||||
sep = (Y - Yhat)**2
|
sep = (Y - Yhat)**2
|
||||||
rmsep = sqrt(sep.mean(1)).T
|
rmsep = sqrt(sep.mean(1)).T
|
||||||
aopt = find_aopt_from_sep(rmsep)
|
#aopt = find_aopt_from_sep(rmsep)
|
||||||
|
|
||||||
return rmsep, Yhat, aopt
|
# todo: need a better support for classification error
|
||||||
|
error = prediction_error(Yhat, Y, method='1/2')
|
||||||
|
|
||||||
def pca_jk(a, aopt, center_axis=[0], nsets=None):
|
return rmsep, Yhat, error
|
||||||
"""Returns jack-knife segements from PCA.
|
|
||||||
|
def pca_jk(a, aopt, nsets=None, center_axis=[0], method='cv'):
|
||||||
|
"""Returns jack-knife segments from PCA.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
@ -124,6 +239,12 @@ def pca_jk(a, aopt, center_axis=[0], nsets=None):
|
|||||||
Centering
|
Centering
|
||||||
nsets : {integer}
|
nsets : {integer}
|
||||||
number of segments
|
number of segments
|
||||||
|
method : {'cv', 'diag', 'bs', 'bs_diag'}
|
||||||
|
Perturbation method is one of:
|
||||||
|
cv = leave samples out
|
||||||
|
diag = impute diagonals
|
||||||
|
bs = leave samples out with replacement (bootstrap)
|
||||||
|
bs_diag = impute diagonals
|
||||||
|
|
||||||
*Returns*:
|
*Returns*:
|
||||||
|
|
||||||
@ -135,22 +256,32 @@ def pca_jk(a, aopt, center_axis=[0], nsets=None):
|
|||||||
- Crossvalidation method is currently set to random blocks of samples.
|
- Crossvalidation method is currently set to random blocks of samples.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
m, n = a.shape
|
||||||
if nsets == None:
|
if nsets == None:
|
||||||
nsets = a.shape[0]
|
nsets = m
|
||||||
|
|
||||||
Pcv = empty((nsets, a.shape[1], aopt), dtype=a.dtype)
|
Pcv = empty((nsets, a.shape[1], aopt), dtype=a.dtype)
|
||||||
|
if method == 'diag':
|
||||||
mn_a = .5*(a.mean(0) + a.mean(1)[:,newaxis])
|
mn_a = .5*(a.mean(0) + a.mean(1)[:,newaxis])
|
||||||
for i, (cal, val) in enumerate(cv_diag(a.shape, nsets)):
|
for i, val in enumerate(diag_cv(a.shape, nsets)):
|
||||||
old_values = a.take(ind)
|
old_values = a.take(val)
|
||||||
new_values = mn_a.take(ind)
|
new_values = mn_a.take(val)
|
||||||
a.put(ind, new_values)
|
# impute mean values
|
||||||
|
a.put(val, new_values)
|
||||||
dat = pca(a, aopt, mode='fast', scale='loads', center_axis=center_axis)
|
dat = pca(a, aopt, mode='fast', scale='loads', center_axis=center_axis)
|
||||||
PP[i,:,:] = dat['P']
|
Pcv[i,:,:] = dat['P']
|
||||||
a.put(ind, old_values)
|
# put original values back
|
||||||
|
a.put(val, old_values)
|
||||||
|
elif method == 'cv':
|
||||||
|
print "using ....cv "
|
||||||
|
for i, (cal, val) in enumerate(cv(m, nsets)):
|
||||||
|
Pcv[i,:,:] = pca(a[cal,:], aopt, mode='fast', scale='loads', center_axis = center_axis)['P']
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(method)
|
||||||
|
|
||||||
return PP
|
return Pcv
|
||||||
|
|
||||||
def pls_jk(X, Y, a_opt, nsets=None, center_axis=True, verbose=False):
|
def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
|
||||||
""" Returns jack-knife segements of W.
|
""" Returns jack-knife segements of W.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
@ -187,9 +318,7 @@ def pls_jk(X, Y, a_opt, nsets=None, center_axis=True, verbose=False):
|
|||||||
for i, (cal, val) in enumerate(cv(k, nsets)):
|
for i, (cal, val) in enumerate(cv(k, nsets)):
|
||||||
if verbose:
|
if verbose:
|
||||||
print "Segment number: %d" %i
|
print "Segment number: %d" %i
|
||||||
dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast', center_axis=[0, 0])
|
dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast', center_axis=center_axis)
|
||||||
if any(isnan(dat['W'])):
|
|
||||||
1/0
|
|
||||||
Wcv[i,:,:] = dat['W']
|
Wcv[i,:,:] = dat['W']
|
||||||
|
|
||||||
return Wcv
|
return Wcv
|
||||||
@ -257,18 +386,18 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=
|
|||||||
|
|
||||||
return WWx, WWz
|
return WWx, WWz
|
||||||
|
|
||||||
def find_aopt_from_sep(sep, method='vanilla'):
|
def find_aopt_from_sep(err, method='vanilla'):
|
||||||
"""Returns an estimate of optimal number of components.
|
"""Returns an estimate of optimal number of components.
|
||||||
|
|
||||||
The estimate is based on the squared error of prediction from
|
The estimate is based on the error of prediction from
|
||||||
crossvalidation. This is pretty much wild guessing and it is
|
crossvalidation. This is pretty much wild guessing and it is
|
||||||
recomended to inspect model parameters and prediction errors
|
recomended to inspect model parameters and prediction errors
|
||||||
closely before deciding on the optimal number of components.
|
closely before deciding on the optimal number of components.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
sep : {array}
|
err : {array}
|
||||||
Squared error of prediction
|
Error of prediction
|
||||||
method : ['vanilla', '75perc']
|
method : ['vanilla', '75perc']
|
||||||
Mehtod used to estimate optimal number of components
|
Mehtod used to estimate optimal number of components
|
||||||
|
|
||||||
@ -280,22 +409,23 @@ def find_aopt_from_sep(sep, method='vanilla'):
|
|||||||
|
|
||||||
if method == 'vanilla':
|
if method == 'vanilla':
|
||||||
# min rmsep
|
# min rmsep
|
||||||
rmsecv = sqrt(sep.mean(0))
|
rmsecv = sqrt(err.mean(0))
|
||||||
return rmsecv.argmin() + 1
|
return rmsecv.argmin() + 1
|
||||||
|
|
||||||
elif method == '75perc':
|
elif method == '75perc':
|
||||||
prct = .75 #percentile
|
raise NotImplementedError
|
||||||
ind = 1.*sep.shape[0]*prct
|
#prct = .75 #percentile
|
||||||
med = median(sep)
|
#ind = 1.*err.shape[0]*prct
|
||||||
prc_75 = []
|
#med = median(err)
|
||||||
for col in sep.T:
|
#prc_75 = []
|
||||||
col = sorted(col)
|
#for col in err.T:
|
||||||
prc_75.append(col[int(ind)])
|
# col = sorted(col)
|
||||||
prc_75 = asarray(prc_75)
|
# prc_75.append(col[int(ind)])
|
||||||
for i in range(1, sep.shape[1], 1):
|
#prc_75 = asarray(prc_75)
|
||||||
if med[i-1]<prc_75[i]:
|
#for i in range(1, err.shape[1], 1):
|
||||||
return i
|
# if med[i-1]<prc_75[i]:
|
||||||
return len(med)
|
# return i
|
||||||
|
#return len(med)
|
||||||
|
|
||||||
def cv(N, K, randomise=True, sequential=False):
|
def cv(N, K, randomise=True, sequential=False):
|
||||||
"""Generates K (training, validation) index pairs.
|
"""Generates K (training, validation) index pairs.
|
||||||
@ -351,13 +481,29 @@ def cv(N, K, randomise=True, sequential=False):
|
|||||||
validation = [i for i in index if i % K == k]
|
validation = [i for i in index if i % K == k]
|
||||||
yield training, validation
|
yield training, validation
|
||||||
|
|
||||||
def diag_cv(shape, nsets=9):
|
def diag_cv(shape, nsets=9, randomise=True):
|
||||||
"""Generates K (training, validation) index pairs.
|
"""Generates K (training, validation) index pairs.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
N : {integer}
|
shape : {tuple}
|
||||||
alpha -- scalar, approx. portion of data perturbed
|
Array shape
|
||||||
|
nsets : {integer}
|
||||||
|
Number of cv sets
|
||||||
|
randomise : {boolean}
|
||||||
|
Randomise diagonal index
|
||||||
|
|
||||||
|
*Returns*:
|
||||||
|
|
||||||
|
training : {array-like}
|
||||||
|
training-indices
|
||||||
|
validation : {array-like}
|
||||||
|
validation-indices
|
||||||
|
|
||||||
|
*Notes*:
|
||||||
|
|
||||||
|
This index is based on the full index (raveled row-major ordering).
|
||||||
|
It extracts along diagonals to ensure balanced removal along both axis
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
m, n = shape
|
m, n = shape
|
||||||
@ -365,59 +511,195 @@ def diag_cv(shape, nsets=9):
|
|||||||
raise ValueError("shape needs to be a two-tuple")
|
raise ValueError("shape needs to be a two-tuple")
|
||||||
if nsets>m or nsets>n:
|
if nsets>m or nsets>n:
|
||||||
msg = "You may not use more subsets than max(n_rows, n_cols)"
|
msg = "You may not use more subsets than max(n_rows, n_cols)"
|
||||||
raise ValueError, msg
|
nsets = min(m, n)
|
||||||
nm = n*m
|
nm = n*m
|
||||||
index = arange(nm)
|
index = arange(nm)
|
||||||
n_ind = arange(n)
|
n_ind = arange(n+1)
|
||||||
shuffle(n_ind) # random start diag
|
#shuffle(n_ind) # random start diag
|
||||||
start_inds = array_split(n_ind, nsets)
|
start_inds = array_split(n_ind, nsets)
|
||||||
for v in range(nsets):
|
for v in range(nsets):
|
||||||
validation = []
|
validation = set()
|
||||||
for start in start_inds[v]:
|
for start in start_inds[v]:
|
||||||
ind = arange(start+v, nm, n+1)
|
ind = arange(start, nm, n+1)
|
||||||
[validation.append(i) for i in ind]
|
validation.update(ind)
|
||||||
training = [j for j in index if j not in validation]
|
#training = [j for j in index if j not in validation]
|
||||||
yield training, validation
|
yield list(validation)
|
||||||
|
|
||||||
|
|
||||||
def class_error(y_hat, y, method='vanilla'):
|
|
||||||
""" Not used.
|
|
||||||
"""
|
|
||||||
a_opt, k, l = y_hat.shape
|
|
||||||
y_hat_c = zeros((k, l), dtype='d')
|
|
||||||
if method == vanilla:
|
|
||||||
pass
|
|
||||||
for a in range(a_opt):
|
|
||||||
for i in range(k):
|
|
||||||
y_hat_c[a, val, argmax(y_hat[a,val,:])] = 1.0
|
|
||||||
err = 100*((y_hat_c + y) == 2).sum(1)/y.sum(0).astype('d')
|
|
||||||
|
|
||||||
return y_hat_c, err
|
|
||||||
|
|
||||||
def prediction_error(y_hat, y, method='squared'):
|
def prediction_error(y_hat, y, method='squared'):
|
||||||
"""Loss function on multiclass Y.
|
"""Loss function on multiclass Y.
|
||||||
|
|
||||||
Assumes y is a binary dummy class matrix (samples, classes)
|
Assumes y is a binary dummy class matrix (samples, classes)
|
||||||
"""
|
"""
|
||||||
k, l = y.shape
|
k, l = y.shape
|
||||||
|
a_max, kk, ll = y_hat.shape
|
||||||
|
error = empty((a_max, l))
|
||||||
|
for a in range(a_max):
|
||||||
|
yha = y_hat[a, :, :]
|
||||||
if method == 'hinge':
|
if method == 'hinge':
|
||||||
pass
|
err = zeros((k, l))
|
||||||
|
for j in range(l):
|
||||||
|
for i in range(k):
|
||||||
|
if y[i,j] == 1:
|
||||||
|
if yha[i, j] >= 1:
|
||||||
|
err[i,j] = 0
|
||||||
|
else:
|
||||||
|
err[i,j] = abs(y[i,j] - yha[i,j])
|
||||||
|
elif y[i,j] == 0:
|
||||||
|
if yha[i, j] <= 0:
|
||||||
|
err[i,j] = 0
|
||||||
|
else:
|
||||||
|
err[i,j] = abs(y[i,j] - yha[i,j])
|
||||||
|
|
||||||
elif method == 'smooth_hinge':
|
elif method == 'smooth_hinge':
|
||||||
z = 90
|
err = zeros((k, l))
|
||||||
|
for j in range(l):
|
||||||
|
for i in range(k):
|
||||||
|
if y[i,j] == 1:
|
||||||
|
if yha[i, j] >= 1:
|
||||||
|
err[i,j] = 0
|
||||||
|
elif yha[i,j] < 1 and yha[i,j] > 0:
|
||||||
|
err[i,j] = abs(y[i,j] - yha[i,j])
|
||||||
|
else:
|
||||||
|
err[i,j] = 1
|
||||||
|
|
||||||
|
elif y[i,j] == 0:
|
||||||
|
if yha[i, j] <= 0:
|
||||||
|
err[i,j] = 0
|
||||||
|
elif yha[i,j] < 1 and yha[i,j] > 0:
|
||||||
|
err[i,j] = abs(y[i,j] - yha[i,j])
|
||||||
|
else:
|
||||||
|
err[i,j] = 1
|
||||||
|
|
||||||
elif method == 'abs':
|
elif method == 'abs':
|
||||||
err = abs(y - y_hat)
|
err = abs(y - yha)
|
||||||
elif method == 'squared':
|
elif method == 'squared':
|
||||||
err = (y - y_hat)**2
|
err = (y - yha)**2
|
||||||
elif method == '0/1':
|
elif method == '0/1':
|
||||||
pred = zeros_like(y_hat)
|
pred = zeros((k, l))
|
||||||
for i, row in enumerate(y_hat):
|
for i, row in enumerate(yha):
|
||||||
largest = row.argsort()[-1]
|
largest = row.argsort()[-1]
|
||||||
pred[i, largest] = 1.
|
pred[i, largest] = 1.
|
||||||
err = abs(y - pred)
|
err = abs(y - pred)
|
||||||
elif method == '1/2':
|
elif method == '1/2':
|
||||||
y_hat[y_hat>.5] = 1
|
yh = yha.copy()
|
||||||
y_hat[y_hat<.5] = 0
|
yh[yha>.5] = 1
|
||||||
err = abs(y - y_hat)
|
yh[yha<.5] = 0
|
||||||
|
err = abs(y - yh)
|
||||||
|
else:
|
||||||
|
raise ValueError("Option: %s (method) not valid" %method)
|
||||||
|
|
||||||
return err
|
error[a,:] = err.mean(0)
|
||||||
|
|
||||||
|
return error
|
||||||
|
|
||||||
|
def _wkernel_pls_val(X, Y, a_max, n_blocks=None):
|
||||||
|
"""Returns rmsep and aopt for pls tailored for wide X.
|
||||||
|
|
||||||
|
The error of cross validation is calculated
|
||||||
|
based on random block cross-validation. With number of blocks equal to
|
||||||
|
number of samples [default] gives leave-one-out cv.
|
||||||
|
The pls model is based on the simpls algorithm for wide X, an is quite
|
||||||
|
fast in very high dimensional X data.
|
||||||
|
|
||||||
|
*Parameters*:
|
||||||
|
|
||||||
|
X : ndarray
|
||||||
|
column centered data matrix of size (samples x variables)
|
||||||
|
Y : ndarray
|
||||||
|
column centered response matrix of size (samples x responses)
|
||||||
|
a_max : scalar
|
||||||
|
Maximum number of components
|
||||||
|
n_blocks : scalar
|
||||||
|
Number of blocks in cross validation
|
||||||
|
|
||||||
|
*Returns*:
|
||||||
|
|
||||||
|
rmsep : ndarray
|
||||||
|
Root Mean Square Error of cross-validated Predictions
|
||||||
|
aopt : scalar
|
||||||
|
Guestimate of the optimal number of components
|
||||||
|
|
||||||
|
SeeAlso:
|
||||||
|
|
||||||
|
- pls_cv_val : Same output, not optimised for wide X
|
||||||
|
- w_simpls : Simpls algorithm for wide X
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
|
||||||
|
Based (cowardly translated) on m-files from the Chemoact toolbox
|
||||||
|
X, Y inputs need to be centered (fixme: check)
|
||||||
|
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
|
||||||
|
>>> import numpy as n
|
||||||
|
>>> X = n.array([[1., 2., 3.],[]])
|
||||||
|
>>> Y = n.array([[1., 2., 3.],[]])
|
||||||
|
>>> w_pls(X, Y, 1)
|
||||||
|
[4,5,6], 1
|
||||||
|
"""
|
||||||
|
dt = X.dtype
|
||||||
|
k, l = m_shape(Y)
|
||||||
|
PRESS = zeros((l, a_max+1), dtype=dt)
|
||||||
|
if n_blocks==None:
|
||||||
|
n_blocks = Y.shape[0]
|
||||||
|
XXt = dot(X, X.T)
|
||||||
|
V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True)
|
||||||
|
for Din, Doi, Yin, Yout in V:
|
||||||
|
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
|
||||||
|
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
||||||
|
|
||||||
|
dat = w_simpls(Din, Yin, a_max)
|
||||||
|
Q, U, H = dat['Q'], dat['U'], dat['H']
|
||||||
|
That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) ))
|
||||||
|
|
||||||
|
Yhat = zeros((a_max, k, l), dtype=dt)
|
||||||
|
for j in range(l):
|
||||||
|
TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,a_max)))) )
|
||||||
|
Yhat[:,:,l] = TQ
|
||||||
|
E = Yout[:,j][:,newaxis] - TQ
|
||||||
|
E = E + sum(E, 0)/Din.shape[0]
|
||||||
|
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
|
||||||
|
#Yhat = Yin - dot(That,Q.T)
|
||||||
|
msep = PRESS/(Y.shape[0])
|
||||||
|
#aopt = find_aopt_from_sep(msep)
|
||||||
|
return Yhat, sqrt(msep)
|
||||||
|
|
||||||
|
def _w_pls(aat, b, aopt):
|
||||||
|
""" Pls for wide matrices.
|
||||||
|
Fast pls for crossval, used in calc rmsep for wide X
|
||||||
|
There is no P or W. T is normalised
|
||||||
|
|
||||||
|
aat = centered kernel matrix
|
||||||
|
b = centered y
|
||||||
|
"""
|
||||||
|
bb = b.copy()
|
||||||
|
k, l = m_shape(b)
|
||||||
|
m, m = m_shape(aat)
|
||||||
|
U = empty((m, aopt)) # W
|
||||||
|
T = empty((m, aopt))
|
||||||
|
R = empty((m, aopt)) # R
|
||||||
|
PROJ = empty((m, aopt)) # P?
|
||||||
|
|
||||||
|
for i in range(aopt):
|
||||||
|
if has_sym:
|
||||||
|
s, q = symeig(dot(dot(b.T, aat), b), range=(l,l),overwrite=True)
|
||||||
|
else:
|
||||||
|
q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
|
||||||
|
q = q[:,:1]
|
||||||
|
u = dot(b , q) #y-factor scores
|
||||||
|
U[:,i] = u.ravel()
|
||||||
|
t = dot(aat, u)
|
||||||
|
|
||||||
|
t = t/vnorm(t)
|
||||||
|
T[:,i] = t.ravel()
|
||||||
|
r = dot(aat, t)#score-weights
|
||||||
|
#r = r/vnorm(r)
|
||||||
|
R[:,i] = r.ravel()
|
||||||
|
PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
|
||||||
|
if i<aopt:
|
||||||
|
b = b - dot(PROJ[:,:i+1], dot(R[:,:i+1].T, b) )
|
||||||
|
C = dot(bb.T, T)
|
||||||
|
@ -8,7 +8,7 @@ __docformat__ = "restructuredtext en"
|
|||||||
from math import sqrt as msqrt
|
from math import sqrt as msqrt
|
||||||
|
|
||||||
from numpy import dot,empty,zeros,apply_along_axis,newaxis,finfo,sqrt,r_,expand_dims,\
|
from numpy import dot,empty,zeros,apply_along_axis,newaxis,finfo,sqrt,r_,expand_dims,\
|
||||||
minimum, any, isnan
|
minimum,any,isnan,ones,tile
|
||||||
from numpy.linalg import inv,svd
|
from numpy.linalg import inv,svd
|
||||||
from scipy.sandbox import arpack
|
from scipy.sandbox import arpack
|
||||||
|
|
||||||
@ -79,11 +79,13 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
min_aopt = min(m, n)
|
max_aopt = min(m, n)
|
||||||
if center_axis != None:
|
if center_axis != None:
|
||||||
X, mnx = center(X, center_axis[0])
|
X, mnx = center(X, center_axis[0])
|
||||||
min_aopt = min_aopt - 1
|
max_aopt = max_aopt - 1
|
||||||
assert(aopt <= min_aopt)
|
if aopt > max_aopt:
|
||||||
|
print "Using aopt: %d" %max_aopt
|
||||||
|
aopt = max_aopt
|
||||||
if m > (n+100) or n > (m+100):
|
if m > (n+100) or n > (m+100):
|
||||||
u, s, v = esvd(X, aopt)
|
u, s, v = esvd(X, aopt)
|
||||||
else:
|
else:
|
||||||
@ -108,7 +110,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
|||||||
P = P*s
|
P = P*s
|
||||||
|
|
||||||
if mode in ['fast', 'f', 'F']:
|
if mode in ['fast', 'f', 'F']:
|
||||||
return {'T':T, 'P':P, 'aopt':aopt}
|
return {'T':T, 'P':P, 'aopt':aopt, 'mnx': mnx}
|
||||||
|
|
||||||
if mode in ['detailed', 'd', 'D']:
|
if mode in ['detailed', 'd', 'D']:
|
||||||
E = empty((aopt, m, n))
|
E = empty((aopt, m, n))
|
||||||
@ -135,7 +137,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
|||||||
expvarx = r_[0, 100*e.cumsum()/(X*X).sum()]
|
expvarx = r_[0, 100*e.cumsum()/(X*X).sum()]
|
||||||
|
|
||||||
return {'T': T, 'P': P, 'E': E, 'evx': expvarx, 'leverage': lev, 'ssqx': ssq,
|
return {'T': T, 'P': P, 'E': E, 'evx': expvarx, 'leverage': lev, 'ssqx': ssq,
|
||||||
'aopt': aopt, 'eigvals': e}
|
'aopt': aopt, 'eigvals': e, 'mnx': mnx}
|
||||||
|
|
||||||
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]):
|
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]):
|
||||||
""" Principal Component Regression.
|
""" Principal Component Regression.
|
||||||
@ -690,7 +692,7 @@ def center(a, axis):
|
|||||||
### TODO ###
|
### TODO ###
|
||||||
# Perhaps not(!) use broadcasting and return full centering
|
# Perhaps not(!) use broadcasting and return full centering
|
||||||
# matrix instead ?
|
# matrix instead ?
|
||||||
|
#
|
||||||
|
|
||||||
# check if we have a vector
|
# check if we have a vector
|
||||||
is_vec = len(a.shape) == 1
|
is_vec = len(a.shape) == 1
|
||||||
@ -698,31 +700,72 @@ def center(a, axis):
|
|||||||
is_vec = a.shape[0] == 1 or a.shape[1] == 1
|
is_vec = a.shape[0] == 1 or a.shape[1] == 1
|
||||||
if is_vec:
|
if is_vec:
|
||||||
if axis == 2:
|
if axis == 2:
|
||||||
warnings.warn("Double centering of vector ignored, using ordinary centering")
|
#warnings.warn("Double centering of vector ignored, using ordinary centering")
|
||||||
|
raise ValueError("Double centering of vector is not a valid option")
|
||||||
if axis == -1:
|
if axis == -1:
|
||||||
mn = 0
|
mn = zeros(a.shape)
|
||||||
else:
|
else:
|
||||||
mn = a.mean()
|
mn = a.mean()*ones(a.shape)
|
||||||
return a - mn, mn
|
return a - mn, mn
|
||||||
|
|
||||||
if axis == -1:
|
if axis == -1:
|
||||||
mn = zeros((1,a.shape[1],))
|
mn = zeros((1,a.shape[1],))
|
||||||
#mn = tile(mn, (a.shape[0], 1))
|
mn = tile(mn, (a.shape[0], 1))
|
||||||
elif axis == 0:
|
elif axis == 0:
|
||||||
mn = a.mean(0)[newaxis]
|
mn = a.mean(0)[newaxis]
|
||||||
#mn = tile(mn, (a.shape[0], 1))
|
mn = tile(mn, (a.shape[0], 1))
|
||||||
elif axis == 1:
|
elif axis == 1:
|
||||||
mn = a.mean(1)[:,newaxis]
|
mn = a.mean(1)[:,newaxis]
|
||||||
#mn = tile(mn, (1, a.shape[1]))
|
mn = tile(mn, (1, a.shape[1]))
|
||||||
elif axis == 2:
|
elif axis == 2:
|
||||||
#fixme: double centering returns column mean as loc-vector, ok?
|
#fixme: double centering returns column mean as loc-vector, ok?
|
||||||
mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
|
mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
|
||||||
return a - mn , a.mean(0)[newaxis]
|
return a - mn , mn
|
||||||
else:
|
else:
|
||||||
raise IOError("input error: axis must be in [-1,0,1,2]")
|
raise IOError("input error: axis must be in [-1,0,1,2]")
|
||||||
|
|
||||||
return a - mn, mn
|
return a - mn, mn
|
||||||
|
|
||||||
|
def inv_center(a, mn_a, axis):
|
||||||
|
"""Inverse centering.
|
||||||
|
|
||||||
|
Adding row, column or double centering to a matrix.
|
||||||
|
This method uses broadcasting, so the size of a needs only
|
||||||
|
to match the axis argument.
|
||||||
|
|
||||||
|
*Parameters*:
|
||||||
|
|
||||||
|
a : {array}
|
||||||
|
Input data, to be centered
|
||||||
|
|
||||||
|
axis : {integer}
|
||||||
|
Which centering to perform.
|
||||||
|
0 = col center, 1 = row center, 2 = double center
|
||||||
|
-1 = nothing
|
||||||
|
|
||||||
|
*Returns*:
|
||||||
|
|
||||||
|
a_centered : {array}
|
||||||
|
Centered data matrix
|
||||||
|
|
||||||
|
*Notes*
|
||||||
|
|
||||||
|
May just as well used to subtract a mean (just use negative mn_a)
|
||||||
|
|
||||||
|
"""
|
||||||
|
if axis == -1:
|
||||||
|
return a
|
||||||
|
m, n = a.shape
|
||||||
|
k, o = mn_a.shape
|
||||||
|
if axis == 0: #row centering
|
||||||
|
assert(n == o and k == 1)
|
||||||
|
elif axis == 1: # column centering
|
||||||
|
assert(n == k and o == 1)
|
||||||
|
elif axis == 2: # double centering
|
||||||
|
assert(n == o and m == k)
|
||||||
|
|
||||||
|
return a + mn_a
|
||||||
|
|
||||||
def _scale(a, axis):
|
def _scale(a, axis):
|
||||||
""" Matrix scaling to unit variance.
|
""" Matrix scaling to unit variance.
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@ from engines import pls
|
|||||||
from engines import nipals_lpls as lpls
|
from engines import nipals_lpls as lpls
|
||||||
|
|
||||||
|
|
||||||
def hotelling(Pcv, P, p_center='median', cov_center=median,
|
def hotelling(Pcv, P, p_center='median', cov_center='median',
|
||||||
alpha=0.3, crot=True, strict=False):
|
alpha=0.3, crot=True, strict=False):
|
||||||
"""Returns regularized hotelling T^2.
|
"""Returns regularized hotelling T^2.
|
||||||
|
|
||||||
@ -31,8 +31,8 @@ def hotelling(Pcv, P, p_center='median', cov_center=median,
|
|||||||
Calibration model paramter
|
Calibration model paramter
|
||||||
p_center : {'median', 'mean', 'cal_model'}, optional
|
p_center : {'median', 'mean', 'cal_model'}, optional
|
||||||
Location method for sub-segments
|
Location method for sub-segments
|
||||||
cov_center : {py_func}, optional
|
cov_center : {'median', 'mean', 'cal_model'}, optional
|
||||||
Location function
|
Pooled covariance estimate
|
||||||
alpha : {float}, optional
|
alpha : {float}, optional
|
||||||
Regularisation towards pooled covariance estimate.
|
Regularisation towards pooled covariance estimate.
|
||||||
crot : {boolean}, optional
|
crot : {boolean}, optional
|
||||||
@ -79,33 +79,43 @@ def hotelling(Pcv, P, p_center='median', cov_center=median,
|
|||||||
for i in xrange(n):
|
for i in xrange(n):
|
||||||
Pi = Pcv[:,i,:] # (n_sets x amax)
|
Pi = Pcv[:,i,:] # (n_sets x amax)
|
||||||
Pi_ctr = P_ctr[i,:] # (1 x amax)
|
Pi_ctr = P_ctr[i,:] # (1 x amax)
|
||||||
Pim = (Pi - Pi_ctr)*msqrt(n_sets-1)
|
#Pim = (Pi - Pi_ctr)*msqrt(n_sets-1)
|
||||||
Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
|
#Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
|
||||||
|
Pim = (Pi - Pi_ctr)
|
||||||
Cov = cov_center(Cov_i)
|
Cov_i[i] = dot(Pim.T, Pim)
|
||||||
reg_cov = (1. - alpha)*Cov_i + alpha*Cov
|
if cov_center == 'median':
|
||||||
|
Cov_p = median(Cov_i)
|
||||||
|
elif cov_center == 'mean':
|
||||||
|
Cov_p = Cov.mean(0)
|
||||||
|
else:
|
||||||
|
print "Pooled covariance est. invalid, using median"
|
||||||
|
print cov_center
|
||||||
|
Cov_p = median(Cov_i)
|
||||||
|
reg_cov = (1. - alpha)*Cov_i + alpha*Cov_p
|
||||||
for i in xrange(n):
|
for i in xrange(n):
|
||||||
Pc = P_ctr[i,:]
|
Pc = P_ctr[i,:]
|
||||||
sigma = reg_cov[i]
|
sigma = reg_cov[i]
|
||||||
T_sq[i] = dot(dot(Pc, inv(sigma)), Pc)
|
T_sq[i] = dot(dot(Pc, inv(sigma)), Pc)
|
||||||
return T_sq
|
return T_sq
|
||||||
|
|
||||||
def procrustes(a, b, strict=True, center=False, verbose=False):
|
def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False):
|
||||||
"""Orthogonal rotation of b to a.
|
"""Orthogonal rotation of b to a.
|
||||||
|
|
||||||
Procrustes rotation is an orthogonal rotoation of one subspace
|
Procrustes rotation is an orthogonal rotation of one subspace
|
||||||
onto another by minimising the squared error.
|
onto another by minimising the squared error.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
a : {array}
|
a : {array}
|
||||||
Input array
|
Input array, stationary
|
||||||
b : {array}
|
b : {array}
|
||||||
Input array
|
Input array, rotate this to max. fit to a
|
||||||
strict : {boolean}
|
strict : {boolean}
|
||||||
Only do flipping and shuffling
|
Only do flipping and shuffling
|
||||||
center : {boolean}
|
center : {boolean}
|
||||||
Center before rotation, translate back after
|
Center before rotation, translate back after
|
||||||
|
force_norm : {boolean}
|
||||||
|
Ensure that columns of a and b are orthonormal
|
||||||
verbose : {boolean}
|
verbose : {boolean}
|
||||||
Show sum of squares
|
Show sum of squares
|
||||||
|
|
||||||
@ -126,6 +136,12 @@ def procrustes(a, b, strict=True, center=False, verbose=False):
|
|||||||
mn_b = b.mean(0)
|
mn_b = b.mean(0)
|
||||||
b = b - mn_b
|
b = b - mn_b
|
||||||
|
|
||||||
|
if force_norm:
|
||||||
|
a_norm = apply_along_axis(norm, 0, a)
|
||||||
|
a = a/a_norm
|
||||||
|
b_norm = apply_along_axis(norm, 0, b)
|
||||||
|
b = b/b_norm
|
||||||
|
|
||||||
u, s, vt = svd(dot(b.T, a))
|
u, s, vt = svd(dot(b.T, a))
|
||||||
Cm = dot(u, vt) # Cm: orthogonal rotation matrix
|
Cm = dot(u, vt) # Cm: orthogonal rotation matrix
|
||||||
if strict:
|
if strict:
|
||||||
@ -166,13 +182,13 @@ def _ensure_strict(C, only_flips=True):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
if only_flips:
|
if only_flips:
|
||||||
C = eye(Cm.shape[0])*sign(Cm)
|
C = eye(C.shape[0])*sign(C)
|
||||||
return C
|
return C
|
||||||
Cm = zeros(C.shape, dtype='d')
|
Cm = zeros(C.shape, dtype='d')
|
||||||
Cm[abs(C)>.6] = 1.
|
Cm[abs(C)>.6] = 1.
|
||||||
if det(Cm)>1:
|
if det(Cm)>1:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
return Cm*S
|
return Cm*sign(C)
|
||||||
|
|
||||||
def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
||||||
sim_method='shuffle', p_center='med', cov_center=median,
|
sim_method='shuffle', p_center='med', cov_center=median,
|
||||||
@ -248,7 +264,7 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
|||||||
Wc, Lc = lpls_jk(X, Y, Z ,aopt, zorth=zorth)
|
Wc, Lc = lpls_jk(X, Y, Z ,aopt, zorth=zorth)
|
||||||
cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
|
cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
|
||||||
cal_tsq_z = hotelling(Lc, dat['L'], alpha=alpha)
|
cal_tsq_z = hotelling(Lc, dat['L'], alpha=alpha)
|
||||||
|
print "morn"
|
||||||
# Perturbations
|
# Perturbations
|
||||||
index = arange(m)
|
index = arange(m)
|
||||||
for i in range(n_iter):
|
for i in range(n_iter):
|
||||||
@ -257,9 +273,10 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
|||||||
dat = lpls(X, Y[indi,:], Z, aopt, scale='loads', center_axis=center_axis, zorth=zorth)
|
dat = lpls(X, Y[indi,:], Z, aopt, scale='loads', center_axis=center_axis, zorth=zorth)
|
||||||
Wi, Li = lpls_jk(X, Y[indi,:], Z, aopt, nsets=nsets)
|
Wi, Li = lpls_jk(X, Y[indi,:], Z, aopt, nsets=nsets)
|
||||||
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
||||||
pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=alpha)
|
# no reason to borrow variance in dag (alpha ->some small value)
|
||||||
|
pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=0.01)
|
||||||
|
|
||||||
return _fdr(cal_tsq_z, pert_tsq_z, median), _fdr(cal_tsq_x, pert_tsq_x, median)
|
return cal_tsq_z, pert_tsq_z, cal_tsq_x, pert_tsq_x
|
||||||
|
|
||||||
|
|
||||||
def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
|
def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
|
||||||
@ -335,7 +352,7 @@ def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
|
|||||||
Wi = pls_jk(X, Y[indi,:], aopt, nsets=nsets, center_axis=center_axis)
|
Wi = pls_jk(X, Y[indi,:], aopt, nsets=nsets, center_axis=center_axis)
|
||||||
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
||||||
|
|
||||||
return _fdr(cal_tsq_x, pert_tsq_x, median)
|
return cal_tsq_x, pert_tsq_x
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -372,7 +389,7 @@ def _fdr(tsq, tsqp, loc_method=median):
|
|||||||
gene expression data using dimension reduction methods, BMC
|
gene expression data using dimension reduction methods, BMC
|
||||||
bioinformatics, 2007
|
bioinformatics, 2007
|
||||||
"""
|
"""
|
||||||
n, = tsq.shape
|
n = tsq.shape[0]
|
||||||
k, m = tsqp.shape
|
k, m = tsqp.shape
|
||||||
assert(n==k)
|
assert(n==k)
|
||||||
n_false = empty((n, m), 'd')
|
n_false = empty((n, m), 'd')
|
||||||
|
27
sandbox/powerit/Makefile
Normal file
27
sandbox/powerit/Makefile
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
CC=gcc
|
||||||
|
PYTHON_INCLUDE=/usr/include/python2.5
|
||||||
|
PYTHON_LIBRARY=/usr/lib/python2.5
|
||||||
|
NUMPY_INCLUDE=$(PYTHON_LIBRARY)/site-packages/numpy/core/include/numpy/
|
||||||
|
GOTO_LIBRARY=/home/flatberg/GotoBLAS
|
||||||
|
BLAS_LIBRARY=/usr/lib
|
||||||
|
BLAS=blas
|
||||||
|
GOTO=goto
|
||||||
|
|
||||||
|
|
||||||
|
all: numpy_module.so
|
||||||
|
|
||||||
|
numpy_module.so: numpy_module.o sympowerit.o
|
||||||
|
$(CC) -Wall -shared numpy_module.o sympowerit.o -o _numpy_module.so -L$(PYTHON_LIBRARY) -lpython2.5 -L$(BLAS_LIBRARY) -l$(BLAS) -llapack -lg2c -lm
|
||||||
|
|
||||||
|
numpy_module.o: numpy_module.c numpy_module.h
|
||||||
|
$(CC) -fPIC -Wall -O2 -g -c -I$(PYTHON_INCLUDE) -I$(NUMPY_INCLUDE) numpy_module.c
|
||||||
|
|
||||||
|
c_egines.o: sympowerit.c
|
||||||
|
$(CC) -Wall -O2 -g -c sympowerit.c
|
||||||
|
|
||||||
|
clean:
|
||||||
|
-rm numpy_module.o sympowerit.o
|
||||||
|
-rm -rf _numpy_module.so
|
||||||
|
|
||||||
|
|
||||||
|
|
70
sandbox/powerit/numpy_module.c
Executable file
70
sandbox/powerit/numpy_module.c
Executable file
@ -0,0 +1,70 @@
|
|||||||
|
/* A file to test imorting C modules for handling arrays to Python */
|
||||||
|
|
||||||
|
/* Python.h includes <stdio.h>, <string.h>, <errno.h>, <limits.h>, and <stdlib.h> (if available)*/
|
||||||
|
#include "Python.h"
|
||||||
|
#include "arrayobject.h"
|
||||||
|
#include "numpy_module.h"
|
||||||
|
#include "sympowerit.h"
|
||||||
|
#include <cblas.h>
|
||||||
|
|
||||||
|
|
||||||
|
/* ==== Set up the methods table ====================== */
|
||||||
|
static PyMethodDef _numpy_moduleMethods[] = {
|
||||||
|
{"sym_powerit", sym_powerit, METH_VARARGS},
|
||||||
|
{NULL, NULL} /* Sentinel - marks the end of this structure */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* ==== Initialize the numpy_module functions =============== */
|
||||||
|
// Module name must be _numpy_module in compile and linked
|
||||||
|
void init_numpy_module() {
|
||||||
|
(void) Py_InitModule("_numpy_module", _numpy_moduleMethods);
|
||||||
|
import_array(); // Must be present for NumPy. Called first after above line.
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* =========== Power iteration on symmetric matrix ========== */
|
||||||
|
static PyObject *sym_powerit(PyObject *self, PyObject *args)
|
||||||
|
{
|
||||||
|
PyArrayObject *X=NULL, *T=NULL, *E=NULL,*U=NULL;
|
||||||
|
double *tin, *ein, tol;
|
||||||
|
int amax, n, info, maxiter, verbose;
|
||||||
|
int dims[2];
|
||||||
|
|
||||||
|
/* Parse tuple of input arguments*/
|
||||||
|
if (!PyArg_ParseTuple(args, "O!O!iidii",
|
||||||
|
&PyArray_Type, &X,
|
||||||
|
&PyArray_Type, &T,
|
||||||
|
&n,
|
||||||
|
&amax,
|
||||||
|
&tol,
|
||||||
|
&maxiter,
|
||||||
|
&verbose)
|
||||||
|
)
|
||||||
|
return NULL;
|
||||||
|
if (NULL == X) return NULL;
|
||||||
|
|
||||||
|
/* Get the dimensions of the input */
|
||||||
|
n = X->dimensions[0];
|
||||||
|
|
||||||
|
/* Create output/ work arrays, no inplace calculations */
|
||||||
|
dims[0] = n;
|
||||||
|
dims[1] = amax;
|
||||||
|
U = (PyArrayObject*) PyArray_NewCopy(T, NPY_CORDER);
|
||||||
|
dims[1] = n;
|
||||||
|
E = (PyArrayObject*) PyArray_NewCopy(X, NPY_CORDER);
|
||||||
|
|
||||||
|
/* Get pointers to contigous data (row-major)*/
|
||||||
|
ein = (double *)E->data;
|
||||||
|
tin = (double *)U->data;
|
||||||
|
|
||||||
|
/* Call sympower method */
|
||||||
|
info = sympowerit (ein, n, tin, amax, tol, maxiter, verbose);
|
||||||
|
|
||||||
|
Py_DECREF(E);
|
||||||
|
|
||||||
|
return Py_BuildValue("Ni", U, info);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
1
sandbox/powerit/numpy_module.h
Executable file
1
sandbox/powerit/numpy_module.h
Executable file
@ -0,0 +1 @@
|
|||||||
|
/* Header to test of C modules for arrays for Python: sympowerit.c */
/* Python callablefunctions */
static PyObject *sym_powerit(PyObject *self, PyObject *args);
|
116
sandbox/powerit/sympowerit.c
Normal file
116
sandbox/powerit/sympowerit.c
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
#include <cblas.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
/* =============== Main functions body ====================*/
|
||||||
|
|
||||||
|
int sympowerit(double *E, int n, double *T, int amax, double tol,
|
||||||
|
int maxiter, int verbose)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
PURPOSE: Estimate eigenvectos of a symmetric matrix using the power method.
|
||||||
|
|
||||||
|
CALLING SEQUENCE:
|
||||||
|
info = sympowerit(*E, n, *T, amax, tol, maxiter);
|
||||||
|
|
||||||
|
INPUTS:
|
||||||
|
E symmetric matrix from XX^T/X^TX of centered data matrix
|
||||||
|
type: *double
|
||||||
|
n elements in X first dimension
|
||||||
|
type: int
|
||||||
|
type: *double
|
||||||
|
T workspace for scores (m, amax)
|
||||||
|
type: *double
|
||||||
|
amax maximum number of principal components
|
||||||
|
type: int
|
||||||
|
tol convergence limit
|
||||||
|
type: double
|
||||||
|
maxiter maximum number of iteraitons
|
||||||
|
type: int
|
||||||
|
verbose the amount of debug output
|
||||||
|
*/
|
||||||
|
|
||||||
|
int iter, a, j;
|
||||||
|
int info = 0;
|
||||||
|
double sumsq, l2norm, *y,*x, sgn, diff, alpha;
|
||||||
|
double lambda = 0.0;
|
||||||
|
|
||||||
|
if (verbose>1) verbose=1;
|
||||||
|
|
||||||
|
/* Allocate space for working vector and eigenvector */
|
||||||
|
y = (double *) malloc(n*sizeof(double));
|
||||||
|
x = (double *) malloc(n*sizeof(double));
|
||||||
|
|
||||||
|
/* Loop over all components to be estimated*/
|
||||||
|
for (a=0; a<=amax-1; a++){
|
||||||
|
/* Reset work-/eigen-vector*/
|
||||||
|
for (j=0; j<n; j++) {
|
||||||
|
y[j] = 0.0;
|
||||||
|
x[j] = T[(j*amax)+a];
|
||||||
|
}
|
||||||
|
/*x[0] = 1.0;*/
|
||||||
|
|
||||||
|
/* Main power-iteration loop */
|
||||||
|
for ( iter = 0; iter <= maxiter; iter++ ) {
|
||||||
|
/* Matrix-vector product */
|
||||||
|
/*cblas_dsymv (CblasRowMajor, CblasUpper, n, 1.0, E, n,
|
||||||
|
x, 1, 0.0, y, 1);*/
|
||||||
|
cblas_dgemv (CblasRowMajor, CblasNoTrans, n, n, 1.0, E, n,
|
||||||
|
x, 1, 0.0, y, 1);
|
||||||
|
|
||||||
|
/* Normalise y */
|
||||||
|
sumsq = y[0] * y[0];
|
||||||
|
lambda = x[0] * y[0];
|
||||||
|
for ( j = 1; j < n; j++ ) {
|
||||||
|
sumsq += y[j] * y[j];
|
||||||
|
lambda += x[j] * y[j];
|
||||||
|
}
|
||||||
|
l2norm = sqrt ( sumsq );
|
||||||
|
for ( j = 0; j < n; j++ ) y[j] /= l2norm;
|
||||||
|
|
||||||
|
/*Check for convergence */
|
||||||
|
sgn = ( lambda < 0 ? -1.0 : 1.0 );
|
||||||
|
diff = x[0] - sgn * y[0];
|
||||||
|
sumsq = diff * diff;
|
||||||
|
x[0] = y[0];
|
||||||
|
for ( j = 0; j < n; j++ ) {
|
||||||
|
diff = x[j] - sgn * y[j];
|
||||||
|
sumsq += diff * diff;
|
||||||
|
x[j] = y[j];
|
||||||
|
}
|
||||||
|
if ( sqrt ( sumsq ) < tol ) {
|
||||||
|
if (verbose == 1){
|
||||||
|
printf("\nComp: %d\n", a);
|
||||||
|
printf("Converged in %d iterations\n", iter);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (iter >= maxiter){
|
||||||
|
if (verbose == 1){
|
||||||
|
printf("\nComp: %d\n", a);
|
||||||
|
printf("Max iter reached.\n");
|
||||||
|
printf("Error: %.2E\n", sumsq);
|
||||||
|
}
|
||||||
|
info = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Calculate T */
|
||||||
|
for (j=0; j<n; j++){
|
||||||
|
y[j] = sgn*sqrt(sgn*lambda)*x[j];
|
||||||
|
T[(j*amax)+a] = y[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* rank one deflation of residual matrix */
|
||||||
|
/*cblas_dsyr (CblasRowMajor, CblasUpper, n, -1.0, x, 1, E, n);*/
|
||||||
|
alpha = -1.0*lambda;
|
||||||
|
cblas_dger (CblasRowMajor, n, n, alpha, x, 1, x, 1, E, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Free used space */
|
||||||
|
free(x);
|
||||||
|
free(y);
|
||||||
|
|
||||||
|
return (info);
|
||||||
|
}
|
2
sandbox/powerit/sympowerit.h
Normal file
2
sandbox/powerit/sympowerit.h
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
int sympowerit (double *X, int n, double *T, int amax, double tol, int maxiter, int verbose);
|
||||||
|
/* Simple symmetric power iterations */
|
100
sandbox/powerit/sympowerit.py
Normal file
100
sandbox/powerit/sympowerit.py
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
from numpy import *
|
||||||
|
|
||||||
|
HAS_SYMPOWER=True
|
||||||
|
try:
|
||||||
|
from _numpy_module import sym_powerit
|
||||||
|
except:
|
||||||
|
raise ImportError("Sym_powerit module not present")
|
||||||
|
HAS_SYMPOWER = False
|
||||||
|
|
||||||
|
class SymPowerException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
_ERRORCODES = {1: "Some eigenvectors did not converge, try to increase \nthe number of iterations or lower the tolerance level",
|
||||||
|
0: ""}
|
||||||
|
|
||||||
|
|
||||||
|
def sympowerit(xx, T0=None, mn_center=False, a_max=10, tol=1e-7, maxiter=100,
|
||||||
|
verbose=0):
|
||||||
|
"""Estimate eigenvectos of a symmetric matrix using the power method.
|
||||||
|
|
||||||
|
*Parameters*:
|
||||||
|
|
||||||
|
xx : {array}
|
||||||
|
Symmetric square array (m, m)
|
||||||
|
T0 : {array}
|
||||||
|
Initial solution (m, a_max), optional
|
||||||
|
mn_center : {boolean}, optional
|
||||||
|
Mean centering
|
||||||
|
a_max : {integer}, optional
|
||||||
|
Number of components to extract
|
||||||
|
tol : {float}, optional
|
||||||
|
Tolerance level of eigenvector solver
|
||||||
|
maxiter : {integer}
|
||||||
|
Maximum number of poweriterations to use
|
||||||
|
verbose : {integer}
|
||||||
|
Debug output (==1)
|
||||||
|
|
||||||
|
*Returns*:
|
||||||
|
v : {array}
|
||||||
|
Eigenvectors of xx, (m , a_max)
|
||||||
|
"""
|
||||||
|
|
||||||
|
valid_types = ['D','d','F','f']
|
||||||
|
dtype = xx.dtype.char
|
||||||
|
n, m = xx.shape
|
||||||
|
if not(dtype in valid_types):
|
||||||
|
msg = "Array type: (%s) needs to be a float or double" %dtype
|
||||||
|
raise SymPowerException(msg)
|
||||||
|
if not (m==n):
|
||||||
|
msg = "Input array needs to be square, input: (%d,%d)" %(m,n)
|
||||||
|
raise SymPowerException(msg)
|
||||||
|
# small test of symmetry
|
||||||
|
N = 5
|
||||||
|
num = random.randint(0,n,N)
|
||||||
|
for i in range(5):
|
||||||
|
j = N-5
|
||||||
|
if abs(xx[num[i],num[j]] - xx[num[j],num[i]])>1e-15:
|
||||||
|
msg = "Array needs to be symmetric"
|
||||||
|
raise SymPowerException(msg)
|
||||||
|
|
||||||
|
if not a_max:
|
||||||
|
a_max = 10
|
||||||
|
|
||||||
|
if T0 !=None:
|
||||||
|
tn, tm = T0.shape
|
||||||
|
if not (tn==n):
|
||||||
|
msg = "Start eigenvectors need to match input array ()"
|
||||||
|
raise SymPowerException(msg)
|
||||||
|
if not (tm==a_max):
|
||||||
|
msg = "Start eigenvectors need to match input a_max ()"
|
||||||
|
raise SymPowerException(msg)
|
||||||
|
else:
|
||||||
|
T0 = zeros((n, a_max), 'd')
|
||||||
|
T0[0,:] = ones((a_max,),'d')
|
||||||
|
|
||||||
|
if mn_center:
|
||||||
|
xx = _center(xx)
|
||||||
|
|
||||||
|
# call c-function
|
||||||
|
T, info = sym_powerit(xx, T0, n, a_max, tol, maxiter, verbose)
|
||||||
|
|
||||||
|
if info != 0:
|
||||||
|
if verbose:
|
||||||
|
print _ERRORCODES.get(info, "Dont know this error")
|
||||||
|
return T
|
||||||
|
|
||||||
|
|
||||||
|
def _center(xx, ret_mn=False):
|
||||||
|
"""Returns mean centered symmetric kernel matrix.
|
||||||
|
"""
|
||||||
|
n = xx.shape[0]
|
||||||
|
h = xx.sum(0)[:,newaxis]
|
||||||
|
h = (h - mean(h)/2)/n
|
||||||
|
mn_a = h + h.T
|
||||||
|
xxc = xx - mn_a
|
||||||
|
if ret_mn:
|
||||||
|
return xxc, mn_a
|
||||||
|
return xxc
|
||||||
|
|
||||||
|
|
Reference in New Issue
Block a user