Added symmetric poweriterations to sandbox

main
Arnar Flatberg 2007-12-12 22:08:55 +00:00
parent 0956581c42
commit 1103245d85
10 changed files with 851 additions and 172 deletions

View File

@ -30,7 +30,7 @@ class get_matvec:
if isinstance(obj, sb.ndarray): if isinstance(obj, sb.ndarray):
self.callfunc = self.type1 self.callfunc = self.type1
return return
meth = getattr(obj,self.methname,None) meth = getattr(obj, self.methname, None)
if not callable(meth): if not callable(meth):
raise ValueError, "Object must be an array "\ raise ValueError, "Object must be an array "\
"or have a callable %s attribute." % (self.methname,) "or have a callable %s attribute." % (self.methname,)
@ -48,12 +48,12 @@ class get_matvec:
return sb.dot(self.obj.A, x) return sb.dot(self.obj.A, x)
def type2(self, x): def type2(self, x):
return self.obj(x,*self.args) return self.obj(x, *self.args)
def eigen(A,k=6,M=None,ncv=None,which='LM', def eigen(A, k=6, M=None, ncv=None, which='LM',
maxiter=None,tol=0, return_eigenvectors=True): maxiter=None, tol=0, return_eigenvectors=True, v0=None):
""" Return k eigenvalues and eigenvectors of the matrix A. """Return k eigenvalues and eigenvectors of the matrix A.
Solves A * x[i] = w[i] * x[i], the standard eigenvalue problem for Solves A * x[i] = w[i] * x[i], the standard eigenvalue problem for
w[i] eigenvalues with corresponding eigenvectors x[i]. w[i] eigenvalues with corresponding eigenvectors x[i].
@ -69,6 +69,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
M -- (Not implemented) M -- (Not implemented)
A symmetric positive-definite matrix for the generalized A symmetric positive-definite matrix for the generalized
eigenvalue problem A * x = w * M * x eigenvalue problem A * x = w * M * x
v0 -- Initial starting solution (n x 1)
Outputs: Outputs:
@ -99,8 +100,8 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
""" """
try: try:
n,ny=A.shape n, ny = A.shape
n==ny n == ny
except: except:
raise AttributeError("matrix is not square") raise AttributeError("matrix is not square")
if M is not None: if M is not None:
@ -108,12 +109,12 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
# some defaults # some defaults
if ncv is None: if ncv is None:
ncv=2*k+1 ncv = 2*k + 1
ncv=min(ncv,n) ncv = min(ncv, n)
if maxiter==None: if maxiter == None:
maxiter=n*10 maxiter = n*10
# guess type # guess type
resid = sb.zeros(n,'f') resid = sb.zeros(n,'f')
try: try:
typ = A.dtype.char typ = A.dtype.char
@ -129,7 +130,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
raise ValueError("k must be less than rank(A), k=%d"%k) raise ValueError("k must be less than rank(A), k=%d"%k)
if maxiter <= 0: if maxiter <= 0:
raise ValueError("maxiter must be positive, maxiter=%d"%maxiter) raise ValueError("maxiter must be positive, maxiter=%d"%maxiter)
whiches=['LM','SM','LR','SR','LI','SI'] whiches = ['LM','SM','LR','SR','LI','SI']
if which not in whiches: if which not in whiches:
raise ValueError("which must be one of %s"%' '.join(whiches)) raise ValueError("which must be one of %s"%' '.join(whiches))
if ncv > n or ncv < k: if ncv > n or ncv < k:
@ -141,17 +142,26 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
eigextract = _arpack.__dict__[ltr+'neupd'] eigextract = _arpack.__dict__[ltr+'neupd']
matvec = get_matvec(A) matvec = get_matvec(A)
v = sb.zeros((n,ncv),typ) # holds Ritz vectors v = sb.zeros((n, ncv), typ) # holds Ritz vectors
resid = sb.zeros(n,typ) # residual if v0 == None:
workd = sb.zeros(3*n,typ) # workspace resid = sb.zeros(n, typ) # residual
workl = sb.zeros(3*ncv*ncv+6*ncv,typ) # workspace info = 0
iparam = sb.zeros(11,'int') # problem parameters else: # starting vector is given
ipntr = sb.zeros(14,'int') # pointers into workspaces nn, kk = v0.shape
info = 0 if nn != n:
raise ValueError("starting vector must be: (%d, 1), got: (%d, %d)" %(n, nn, kk))
resid = v0[:,0].astype(typ)
info = 1
workd = sb.zeros(3*n, typ) # workspace
workl = sb.zeros(3*ncv*ncv+6*ncv, typ) # workspace
iparam = sb.zeros(11, 'int') # problem parameters
ipntr = sb.zeros(14, 'int') # pointers into workspaces
ido = 0 ido = 0
if typ in 'FD': if typ in 'FD':
rwork = sb.zeros(ncv,typ.lower()) rwork = sb.zeros(ncv, typ.lower())
# only supported mode is 1: Ax=lx # only supported mode is 1: Ax=lx
ishfts = 1 ishfts = 1
@ -160,7 +170,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
iparam[0] = ishfts iparam[0] = ishfts
iparam[2] = maxiter iparam[2] = maxiter
iparam[6] = mode1 iparam[6] = mode1
while True: while True:
if typ in 'fd': if typ in 'fd':
ido,resid,v,iparam,ipntr,info =\ ido,resid,v,iparam,ipntr,info =\
@ -173,9 +183,9 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
if (ido == -1 or ido == 1): if (ido == -1 or ido == 1):
# compute y = A * x # compute y = A * x
xslice = slice(ipntr[0]-1, ipntr[0]-1+n) xslice = slice(ipntr[0] - 1, ipntr[0] - 1 + n)
yslice = slice(ipntr[1]-1, ipntr[1]-1+n) yslice = slice(ipntr[1] - 1, ipntr[1] - 1 + n)
workd[yslice]=matvec(workd[xslice]) workd[yslice] = matvec(workd[xslice])
else: # done else: # done
break break
@ -233,7 +243,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM', def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
maxiter=None,tol=0, return_eigenvectors=True): maxiter=None,tol=0, return_eigenvectors=True, v0=None):
""" Return k eigenvalues and eigenvectors of the real symmetric matrix A. """ Return k eigenvalues and eigenvectors of the real symmetric matrix A.
Solves A * x[i] = w[i] * x[i], the standard eigenvalue problem for Solves A * x[i] = w[i] * x[i], the standard eigenvalue problem for
@ -253,6 +263,8 @@ def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
A symmetric positive-definite matrix for the generalized A symmetric positive-definite matrix for the generalized
eigenvalue problem A * x = w * M * x eigenvalue problem A * x = w * M * x
v0 -- Starting vector (n, 1)
Outputs: Outputs:
w -- An real array of k eigenvalues w -- An real array of k eigenvalues
@ -325,12 +337,22 @@ def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
matvec = get_matvec(A) matvec = get_matvec(A)
v = sb.zeros((n,ncv),typ) v = sb.zeros((n,ncv),typ)
resid = sb.zeros(n,typ) if v0 == None:
resid = sb.zeros(n, typ) # residual
info = 0
else: # starting solution is given
nn, kk = v0.shape
if nn != n:
raise ValueError("starting vectors must be: (%d, %d), got: (%d, %d)" %(n, k, nn, kk))
resid = v0[:,0].astype(typ)
info = 1
#resid = sb.zeros(n,typ)
workd = sb.zeros(3*n,typ) workd = sb.zeros(3*n,typ)
workl = sb.zeros(ncv*(ncv+8),typ) workl = sb.zeros(ncv*(ncv+8),typ)
iparam = sb.zeros(11,'int') iparam = sb.zeros(11,'int')
ipntr = sb.zeros(11,'int') ipntr = sb.zeros(11,'int')
info = 0 #info = 0
ido = 0 ido = 0
# only supported mode is 1: Ax=lx # only supported mode is 1: Ax=lx
@ -340,8 +362,7 @@ def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
iparam[0] = ishfts iparam[0] = ishfts
iparam[2] = maxiter iparam[2] = maxiter
iparam[6] = mode1 iparam[6] = mode1
while True: while True:
ido,resid,v,iparam,ipntr,info =\ ido,resid,v,iparam,ipntr,info =\
eigsolver(ido,bmat,which,k,tol,resid,v,iparam,ipntr, eigsolver(ido,bmat,which,k,tol,resid,v,iparam,ipntr,

View File

@ -2,16 +2,149 @@
The primary use is crossvalidation. The primary use is crossvalidation.
""" """
__all__ = ['lpls_val', 'pls_jk', 'lpls_jk'] __all__ = ['pca_val', 'pls_val', 'lpls_val', 'pls_jk', 'lpls_jk']
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
from numpy import dot,empty,zeros,sqrt,atleast_2d,argmax,asarray,median,\ from numpy import dot,empty,zeros,sqrt,atleast_2d,argmax,asarray,median,\
array_split,arange, isnan, any array_split,arange, isnan, any,newaxis
from numpy.random import shuffle from numpy.random import shuffle
from engines import pls from engines import pls, pca
from engines import nipals_lpls as lpls from engines import nipals_lpls as lpls
def pca_val(a, a_max, nsets=None, center_axis=[0]):
"""Returns error estimate of crossvalidated PCA.
*Parameters*:
a : {array}
data matrix (n x m)
a_max : {integer}
Maximum number of components in model.
center_axis:
Centering
nsets : {integer}
number of segments
*Returns*:
rmsep : {array}
Squared error of prediction for each component and xvar (a_max, m)
xhat : {array}
Crossvalidated predicted a (a_max, m, n)
aopt : {integer}
Estimate of optimal number of components
*Notes*:
- Crossvalidation method is currently set to random blocks of diagonals.
"""
n, m = a.shape
if nsets == None:
nsets = n
err = zeros((a_max, n, m), dtype=a.dtype)
err_mn = zeros((a_max, n, m), dtype=a.dtype)
xhat = zeros((a_max, n, m), dtype=a.dtype)
mn_a = .5*(a.mean(0) + a.mean(1)[:,newaxis])
for i, val in enumerate(diag_cv(a.shape, nsets)):
old_values = a.take(val)
new_values = mn_a.take(val)
# impute mean values
b = a.copy()
a.put(val, new_values)
dat = pca(a, a_max, mode='normal', center_axis=center_axis)
Ti, Pi = dat['T'], dat['P']
bc = b - dat['mnx']
bc2 = b - b.mean(0)
for j in xrange(a_max):
# predict the imputed values
a_pred = dot(Ti[:,:j+1], Pi[:,:j+1].T).take(val)
a_true = bc2.take(val)
err[j,:,:].put(val, (a_true - a_pred)**2)
err_mn[j,:,:].put(val, (bc.take(val) - a_pred)**2)
xhat[j,:,:].put(val, a_pred)
# put original values back
a.put(val, old_values)
rmsep = sqrt(err).mean(1) # take mean over samples
rmsep2 = sqrt(err_mn).mean(1)
aopt = rmsep.mean(-1).argmin()
return rmsep, xhat, aopt, err, rmsep2
def pls_val(X, Y, a_max=2, nsets=None, center_axis=[0,0], verbose=False):
"""Performs crossvalidation for generalisation error in pls.
*Parameters*:
X : {array}
Main data matrix (m, n)
Y : {array}
External row data (m, l)
a_max : {integer}, optional
Maximum number of components to calculate (0, min(m,n))
nsets : (integer), optional
Number of crossvalidation sets
center_axis : {array-like}, optional
A three element array-like structure with elements in [-1,0,1,2],
that decides the type of centering used.
-1 : nothing
0 : row center
1 : column center
2 : double center
verbose : {boolean}, optional
Verbosity of console output. For use in debugging.
*Returns*:
rmsep : {array}
Root mean squred error of prediction (for each y-var)
yhat : {array}
Estimated responses
aopt : {integer}
Estimated value of optimal number of components
"""
dt = X.dtype
m, n = X.shape
k, l = Y.shape
assert m == k, "X (%d,%d) - Y (%d,%d) dim mismatch" %(m, n, k, l)
assert n == p, "X (%d,%d) - Z (%d,%d) dim mismatch" %(m, n, o, p)
if nsets == None:
nsets = m
if nsets > X.shape[0]:
print "nsets (%d) is larger than number of variables (%d).\nnsets: %d -> %d" %(nsets, m, nsets, m)
nsets = m
if m > n:
# kernel boosting
Yhat = _w_pls_predict(X, Y, a_max)
Yhat = empty((a_max, k, l), dtype=dt)
for cal, val in cv(k, nsets):
# do the training model
dat = pls(X[cal], Y[cal], a_max=a_max,center_axis=center_axis)
# center test data
xi = X[val,:] - dat['mnx']
ym = dat['mny']
# predictions
for a in range(a_max):
Yhat[a,val,:] = ym + dot(xi, dat['B'][a])
sep = (Y - Yhat)**2
rmsep = sqrt(sep.mean(1)).T
#aopt = find_aopt_from_sep(rmsep)
# todo: need a better support for classification error
error = prediction_error(Yhat, Y, method='1/2')
return rmsep, Yhat, error
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=False, verbose=False): def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=False, verbose=False):
"""Performs crossvalidation for generalisation error in lpls. """Performs crossvalidation for generalisation error in lpls.
@ -72,47 +205,29 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
assert (alpha >= 0 and alpha<=1), "Alpha needs to be within [0,1], got: %.2f" %alpha assert (alpha >= 0 and alpha<=1), "Alpha needs to be within [0,1], got: %.2f" %alpha
Yhat = empty((a_max, k, l), 'd') Yhat = empty((a_max, k, l), 'd')
for cal, val in cv(nsets, k): for cal, val in cv(k, nsets):
# do the training model # do the training model
dat = lpls(X[cal], Y[cal], Z, a_max=a_max, alpha=alpha, dat = lpls(X[cal], Y[cal], Z, a_max=a_max, alpha=alpha,
center_axis=center_axis, zorth=zorth, verbose=verbose) center_axis=center_axis, zorth=zorth, verbose=verbose)
# center test data # center test data
if center_axis[0] != 1: xi = X[val,:] - dat['mnx']
xi = X[val,:] - dat['mnx']
else: ym = dat['mny'][val,:]
xi = X[val] - X[cal].mean(1)[:,newaxis]
if center_axis[2] != 1:
ym = dat['mny']
else:
ym = Y[cal].mean(1)[:,newaxis]
# predictions # predictions
for a in range(a_max): for a in range(a_max):
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a])) Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
#if permute:
# xcal = X[cal]
# for a in range(1,a_max,1):
# for n in range(10):
# shuffle(cal)
# dat = lpls(xcal, Y[cal], Z, a_max=a_max, alpha=alpha,
# center_axis=center_axis, verbose=verbose)
# todo: need a better support for classification error
y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
if y_is_class:
pass
#Yhat, err = class_error(Yhat, Y)
#return Yhat, err
sep = (Y - Yhat)**2 sep = (Y - Yhat)**2
rmsep = sqrt(sep.mean(1)).T rmsep = sqrt(sep.mean(1)).T
aopt = find_aopt_from_sep(rmsep) #aopt = find_aopt_from_sep(rmsep)
return rmsep, Yhat, aopt # todo: need a better support for classification error
error = prediction_error(Yhat, Y, method='1/2')
return rmsep, Yhat, error
def pca_jk(a, aopt, center_axis=[0], nsets=None): def pca_jk(a, aopt, nsets=None, center_axis=[0], method='cv'):
"""Returns jack-knife segements from PCA. """Returns jack-knife segments from PCA.
*Parameters*: *Parameters*:
@ -124,6 +239,12 @@ def pca_jk(a, aopt, center_axis=[0], nsets=None):
Centering Centering
nsets : {integer} nsets : {integer}
number of segments number of segments
method : {'cv', 'diag', 'bs', 'bs_diag'}
Perturbation method is one of:
cv = leave samples out
diag = impute diagonals
bs = leave samples out with replacement (bootstrap)
bs_diag = impute diagonals
*Returns*: *Returns*:
@ -135,22 +256,32 @@ def pca_jk(a, aopt, center_axis=[0], nsets=None):
- Crossvalidation method is currently set to random blocks of samples. - Crossvalidation method is currently set to random blocks of samples.
""" """
m, n = a.shape
if nsets == None: if nsets == None:
nsets = a.shape[0] nsets = m
Pcv = empty((nsets, a.shape[1], aopt), dtype=a.dtype) Pcv = empty((nsets, a.shape[1], aopt), dtype=a.dtype)
mn_a = .5*(a.mean(0) + a.mean(1)[:,newaxis]) if method == 'diag':
for i, (cal, val) in enumerate(cv_diag(a.shape, nsets)): mn_a = .5*(a.mean(0) + a.mean(1)[:,newaxis])
old_values = a.take(ind) for i, val in enumerate(diag_cv(a.shape, nsets)):
new_values = mn_a.take(ind) old_values = a.take(val)
a.put(ind, new_values) new_values = mn_a.take(val)
dat = pca(a, aopt, mode='fast', scale='loads', center_axis=center_axis) # impute mean values
PP[i,:,:] = dat['P'] a.put(val, new_values)
a.put(ind, old_values) dat = pca(a, aopt, mode='fast', scale='loads', center_axis=center_axis)
Pcv[i,:,:] = dat['P']
# put original values back
a.put(val, old_values)
elif method == 'cv':
print "using ....cv "
for i, (cal, val) in enumerate(cv(m, nsets)):
Pcv[i,:,:] = pca(a[cal,:], aopt, mode='fast', scale='loads', center_axis = center_axis)['P']
else:
raise NotImplementedError(method)
return PP return Pcv
def pls_jk(X, Y, a_opt, nsets=None, center_axis=True, verbose=False): def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
""" Returns jack-knife segements of W. """ Returns jack-knife segements of W.
*Parameters*: *Parameters*:
@ -187,9 +318,7 @@ def pls_jk(X, Y, a_opt, nsets=None, center_axis=True, verbose=False):
for i, (cal, val) in enumerate(cv(k, nsets)): for i, (cal, val) in enumerate(cv(k, nsets)):
if verbose: if verbose:
print "Segment number: %d" %i print "Segment number: %d" %i
dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast', center_axis=[0, 0]) dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast', center_axis=center_axis)
if any(isnan(dat['W'])):
1/0
Wcv[i,:,:] = dat['W'] Wcv[i,:,:] = dat['W']
return Wcv return Wcv
@ -257,18 +386,18 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=
return WWx, WWz return WWx, WWz
def find_aopt_from_sep(sep, method='vanilla'): def find_aopt_from_sep(err, method='vanilla'):
"""Returns an estimate of optimal number of components. """Returns an estimate of optimal number of components.
The estimate is based on the squared error of prediction from The estimate is based on the error of prediction from
crossvalidation. This is pretty much wild guessing and it is crossvalidation. This is pretty much wild guessing and it is
recomended to inspect model parameters and prediction errors recomended to inspect model parameters and prediction errors
closely before deciding on the optimal number of components. closely before deciding on the optimal number of components.
*Parameters*: *Parameters*:
sep : {array} err : {array}
Squared error of prediction Error of prediction
method : ['vanilla', '75perc'] method : ['vanilla', '75perc']
Mehtod used to estimate optimal number of components Mehtod used to estimate optimal number of components
@ -280,22 +409,23 @@ def find_aopt_from_sep(sep, method='vanilla'):
if method == 'vanilla': if method == 'vanilla':
# min rmsep # min rmsep
rmsecv = sqrt(sep.mean(0)) rmsecv = sqrt(err.mean(0))
return rmsecv.argmin() + 1 return rmsecv.argmin() + 1
elif method == '75perc': elif method == '75perc':
prct = .75 #percentile raise NotImplementedError
ind = 1.*sep.shape[0]*prct #prct = .75 #percentile
med = median(sep) #ind = 1.*err.shape[0]*prct
prc_75 = [] #med = median(err)
for col in sep.T: #prc_75 = []
col = sorted(col) #for col in err.T:
prc_75.append(col[int(ind)]) # col = sorted(col)
prc_75 = asarray(prc_75) # prc_75.append(col[int(ind)])
for i in range(1, sep.shape[1], 1): #prc_75 = asarray(prc_75)
if med[i-1]<prc_75[i]: #for i in range(1, err.shape[1], 1):
return i # if med[i-1]<prc_75[i]:
return len(med) # return i
#return len(med)
def cv(N, K, randomise=True, sequential=False): def cv(N, K, randomise=True, sequential=False):
"""Generates K (training, validation) index pairs. """Generates K (training, validation) index pairs.
@ -351,13 +481,29 @@ def cv(N, K, randomise=True, sequential=False):
validation = [i for i in index if i % K == k] validation = [i for i in index if i % K == k]
yield training, validation yield training, validation
def diag_cv(shape, nsets=9): def diag_cv(shape, nsets=9, randomise=True):
"""Generates K (training, validation) index pairs. """Generates K (training, validation) index pairs.
*Parameters*: *Parameters*:
N : {integer} shape : {tuple}
alpha -- scalar, approx. portion of data perturbed Array shape
nsets : {integer}
Number of cv sets
randomise : {boolean}
Randomise diagonal index
*Returns*:
training : {array-like}
training-indices
validation : {array-like}
validation-indices
*Notes*:
This index is based on the full index (raveled row-major ordering).
It extracts along diagonals to ensure balanced removal along both axis
""" """
try: try:
m, n = shape m, n = shape
@ -365,59 +511,195 @@ def diag_cv(shape, nsets=9):
raise ValueError("shape needs to be a two-tuple") raise ValueError("shape needs to be a two-tuple")
if nsets>m or nsets>n: if nsets>m or nsets>n:
msg = "You may not use more subsets than max(n_rows, n_cols)" msg = "You may not use more subsets than max(n_rows, n_cols)"
raise ValueError, msg nsets = min(m, n)
nm = n*m nm = n*m
index = arange(nm) index = arange(nm)
n_ind = arange(n) n_ind = arange(n+1)
shuffle(n_ind) # random start diag #shuffle(n_ind) # random start diag
start_inds = array_split(n_ind, nsets) start_inds = array_split(n_ind, nsets)
for v in range(nsets): for v in range(nsets):
validation = [] validation = set()
for start in start_inds[v]: for start in start_inds[v]:
ind = arange(start+v, nm, n+1) ind = arange(start, nm, n+1)
[validation.append(i) for i in ind] validation.update(ind)
training = [j for j in index if j not in validation] #training = [j for j in index if j not in validation]
yield training, validation yield list(validation)
def class_error(y_hat, y, method='vanilla'):
""" Not used.
"""
a_opt, k, l = y_hat.shape
y_hat_c = zeros((k, l), dtype='d')
if method == vanilla:
pass
for a in range(a_opt):
for i in range(k):
y_hat_c[a, val, argmax(y_hat[a,val,:])] = 1.0
err = 100*((y_hat_c + y) == 2).sum(1)/y.sum(0).astype('d')
return y_hat_c, err
def prediction_error(y_hat, y, method='squared'): def prediction_error(y_hat, y, method='squared'):
"""Loss function on multiclass Y. """Loss function on multiclass Y.
Assumes y is a binary dummy class matrix (samples, classes) Assumes y is a binary dummy class matrix (samples, classes)
""" """
k, l = y.shape k, l = y.shape
a_max, kk, ll = y_hat.shape
if method == 'hinge': error = empty((a_max, l))
pass for a in range(a_max):
elif method == 'smooth_hinge': yha = y_hat[a, :, :]
z = 90 if method == 'hinge':
elif method == 'abs': err = zeros((k, l))
err = abs(y - y_hat) for j in range(l):
elif method == 'squared': for i in range(k):
err = (y - y_hat)**2 if y[i,j] == 1:
elif method == '0/1': if yha[i, j] >= 1:
pred = zeros_like(y_hat) err[i,j] = 0
for i, row in enumerate(y_hat): else:
largest = row.argsort()[-1] err[i,j] = abs(y[i,j] - yha[i,j])
pred[i, largest] = 1. elif y[i,j] == 0:
err = abs(y - pred) if yha[i, j] <= 0:
elif method == '1/2': err[i,j] = 0
y_hat[y_hat>.5] = 1 else:
y_hat[y_hat<.5] = 0 err[i,j] = abs(y[i,j] - yha[i,j])
err = abs(y - y_hat)
elif method == 'smooth_hinge':
err = zeros((k, l))
for j in range(l):
for i in range(k):
if y[i,j] == 1:
if yha[i, j] >= 1:
err[i,j] = 0
elif yha[i,j] < 1 and yha[i,j] > 0:
err[i,j] = abs(y[i,j] - yha[i,j])
else:
err[i,j] = 1
elif y[i,j] == 0:
if yha[i, j] <= 0:
err[i,j] = 0
elif yha[i,j] < 1 and yha[i,j] > 0:
err[i,j] = abs(y[i,j] - yha[i,j])
else:
err[i,j] = 1
elif method == 'abs':
err = abs(y - yha)
elif method == 'squared':
err = (y - yha)**2
elif method == '0/1':
pred = zeros((k, l))
for i, row in enumerate(yha):
largest = row.argsort()[-1]
pred[i, largest] = 1.
err = abs(y - pred)
elif method == '1/2':
yh = yha.copy()
yh[yha>.5] = 1
yh[yha<.5] = 0
err = abs(y - yh)
else:
raise ValueError("Option: %s (method) not valid" %method)
return err error[a,:] = err.mean(0)
return error
def _wkernel_pls_val(X, Y, a_max, n_blocks=None):
"""Returns rmsep and aopt for pls tailored for wide X.
The error of cross validation is calculated
based on random block cross-validation. With number of blocks equal to
number of samples [default] gives leave-one-out cv.
The pls model is based on the simpls algorithm for wide X, an is quite
fast in very high dimensional X data.
*Parameters*:
X : ndarray
column centered data matrix of size (samples x variables)
Y : ndarray
column centered response matrix of size (samples x responses)
a_max : scalar
Maximum number of components
n_blocks : scalar
Number of blocks in cross validation
*Returns*:
rmsep : ndarray
Root Mean Square Error of cross-validated Predictions
aopt : scalar
Guestimate of the optimal number of components
SeeAlso:
- pls_cv_val : Same output, not optimised for wide X
- w_simpls : Simpls algorithm for wide X
Notes
-----
Based (cowardly translated) on m-files from the Chemoact toolbox
X, Y inputs need to be centered (fixme: check)
Examples
--------
>>> import numpy as n
>>> X = n.array([[1., 2., 3.],[]])
>>> Y = n.array([[1., 2., 3.],[]])
>>> w_pls(X, Y, 1)
[4,5,6], 1
"""
dt = X.dtype
k, l = m_shape(Y)
PRESS = zeros((l, a_max+1), dtype=dt)
if n_blocks==None:
n_blocks = Y.shape[0]
XXt = dot(X, X.T)
V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True)
for Din, Doi, Yin, Yout in V:
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
dat = w_simpls(Din, Yin, a_max)
Q, U, H = dat['Q'], dat['U'], dat['H']
That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) ))
Yhat = zeros((a_max, k, l), dtype=dt)
for j in range(l):
TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,a_max)))) )
Yhat[:,:,l] = TQ
E = Yout[:,j][:,newaxis] - TQ
E = E + sum(E, 0)/Din.shape[0]
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
#Yhat = Yin - dot(That,Q.T)
msep = PRESS/(Y.shape[0])
#aopt = find_aopt_from_sep(msep)
return Yhat, sqrt(msep)
def _w_pls(aat, b, aopt):
""" Pls for wide matrices.
Fast pls for crossval, used in calc rmsep for wide X
There is no P or W. T is normalised
aat = centered kernel matrix
b = centered y
"""
bb = b.copy()
k, l = m_shape(b)
m, m = m_shape(aat)
U = empty((m, aopt)) # W
T = empty((m, aopt))
R = empty((m, aopt)) # R
PROJ = empty((m, aopt)) # P?
for i in range(aopt):
if has_sym:
s, q = symeig(dot(dot(b.T, aat), b), range=(l,l),overwrite=True)
else:
q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
q = q[:,:1]
u = dot(b , q) #y-factor scores
U[:,i] = u.ravel()
t = dot(aat, u)
t = t/vnorm(t)
T[:,i] = t.ravel()
r = dot(aat, t)#score-weights
#r = r/vnorm(r)
R[:,i] = r.ravel()
PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
if i<aopt:
b = b - dot(PROJ[:,:i+1], dot(R[:,:i+1].T, b) )
C = dot(bb.T, T)

View File

@ -8,7 +8,7 @@ __docformat__ = "restructuredtext en"
from math import sqrt as msqrt from math import sqrt as msqrt
from numpy import dot,empty,zeros,apply_along_axis,newaxis,finfo,sqrt,r_,expand_dims,\ from numpy import dot,empty,zeros,apply_along_axis,newaxis,finfo,sqrt,r_,expand_dims,\
minimum, any, isnan minimum,any,isnan,ones,tile
from numpy.linalg import inv,svd from numpy.linalg import inv,svd
from scipy.sandbox import arpack from scipy.sandbox import arpack
@ -79,11 +79,13 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
""" """
m, n = X.shape m, n = X.shape
min_aopt = min(m, n) max_aopt = min(m, n)
if center_axis != None: if center_axis != None:
X, mnx = center(X, center_axis[0]) X, mnx = center(X, center_axis[0])
min_aopt = min_aopt - 1 max_aopt = max_aopt - 1
assert(aopt <= min_aopt) if aopt > max_aopt:
print "Using aopt: %d" %max_aopt
aopt = max_aopt
if m > (n+100) or n > (m+100): if m > (n+100) or n > (m+100):
u, s, v = esvd(X, aopt) u, s, v = esvd(X, aopt)
else: else:
@ -108,7 +110,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
P = P*s P = P*s
if mode in ['fast', 'f', 'F']: if mode in ['fast', 'f', 'F']:
return {'T':T, 'P':P, 'aopt':aopt} return {'T':T, 'P':P, 'aopt':aopt, 'mnx': mnx}
if mode in ['detailed', 'd', 'D']: if mode in ['detailed', 'd', 'D']:
E = empty((aopt, m, n)) E = empty((aopt, m, n))
@ -135,7 +137,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
expvarx = r_[0, 100*e.cumsum()/(X*X).sum()] expvarx = r_[0, 100*e.cumsum()/(X*X).sum()]
return {'T': T, 'P': P, 'E': E, 'evx': expvarx, 'leverage': lev, 'ssqx': ssq, return {'T': T, 'P': P, 'E': E, 'evx': expvarx, 'leverage': lev, 'ssqx': ssq,
'aopt': aopt, 'eigvals': e} 'aopt': aopt, 'eigvals': e, 'mnx': mnx}
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]): def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]):
""" Principal Component Regression. """ Principal Component Regression.
@ -690,39 +692,80 @@ def center(a, axis):
### TODO ### ### TODO ###
# Perhaps not(!) use broadcasting and return full centering # Perhaps not(!) use broadcasting and return full centering
# matrix instead ? # matrix instead ?
#
# check if we have a vector # check if we have a vector
is_vec = len(a.shape) == 1 is_vec = len(a.shape) == 1
if not is_vec: if not is_vec:
is_vec = a.shape[0] == 1 or a.shape[1] == 1 is_vec = a.shape[0] == 1 or a.shape[1] == 1
if is_vec: if is_vec:
if axis == 2: if axis == 2:
warnings.warn("Double centering of vector ignored, using ordinary centering") #warnings.warn("Double centering of vector ignored, using ordinary centering")
raise ValueError("Double centering of vector is not a valid option")
if axis == -1: if axis == -1:
mn = 0 mn = zeros(a.shape)
else: else:
mn = a.mean() mn = a.mean()*ones(a.shape)
return a - mn, mn return a - mn, mn
if axis == -1: if axis == -1:
mn = zeros((1,a.shape[1],)) mn = zeros((1,a.shape[1],))
#mn = tile(mn, (a.shape[0], 1)) mn = tile(mn, (a.shape[0], 1))
elif axis == 0: elif axis == 0:
mn = a.mean(0)[newaxis] mn = a.mean(0)[newaxis]
#mn = tile(mn, (a.shape[0], 1)) mn = tile(mn, (a.shape[0], 1))
elif axis == 1: elif axis == 1:
mn = a.mean(1)[:,newaxis] mn = a.mean(1)[:,newaxis]
#mn = tile(mn, (1, a.shape[1])) mn = tile(mn, (1, a.shape[1]))
elif axis == 2: elif axis == 2:
#fixme: double centering returns column mean as loc-vector, ok? #fixme: double centering returns column mean as loc-vector, ok?
mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean() mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
return a - mn , a.mean(0)[newaxis] return a - mn , mn
else: else:
raise IOError("input error: axis must be in [-1,0,1,2]") raise IOError("input error: axis must be in [-1,0,1,2]")
return a - mn, mn return a - mn, mn
def inv_center(a, mn_a, axis):
"""Inverse centering.
Adding row, column or double centering to a matrix.
This method uses broadcasting, so the size of a needs only
to match the axis argument.
*Parameters*:
a : {array}
Input data, to be centered
axis : {integer}
Which centering to perform.
0 = col center, 1 = row center, 2 = double center
-1 = nothing
*Returns*:
a_centered : {array}
Centered data matrix
*Notes*
May just as well used to subtract a mean (just use negative mn_a)
"""
if axis == -1:
return a
m, n = a.shape
k, o = mn_a.shape
if axis == 0: #row centering
assert(n == o and k == 1)
elif axis == 1: # column centering
assert(n == k and o == 1)
elif axis == 2: # double centering
assert(n == o and m == k)
return a + mn_a
def _scale(a, axis): def _scale(a, axis):
""" Matrix scaling to unit variance. """ Matrix scaling to unit variance.

View File

@ -14,7 +14,7 @@ from engines import pls
from engines import nipals_lpls as lpls from engines import nipals_lpls as lpls
def hotelling(Pcv, P, p_center='median', cov_center=median, def hotelling(Pcv, P, p_center='median', cov_center='median',
alpha=0.3, crot=True, strict=False): alpha=0.3, crot=True, strict=False):
"""Returns regularized hotelling T^2. """Returns regularized hotelling T^2.
@ -31,8 +31,8 @@ def hotelling(Pcv, P, p_center='median', cov_center=median,
Calibration model paramter Calibration model paramter
p_center : {'median', 'mean', 'cal_model'}, optional p_center : {'median', 'mean', 'cal_model'}, optional
Location method for sub-segments Location method for sub-segments
cov_center : {py_func}, optional cov_center : {'median', 'mean', 'cal_model'}, optional
Location function Pooled covariance estimate
alpha : {float}, optional alpha : {float}, optional
Regularisation towards pooled covariance estimate. Regularisation towards pooled covariance estimate.
crot : {boolean}, optional crot : {boolean}, optional
@ -79,33 +79,43 @@ def hotelling(Pcv, P, p_center='median', cov_center=median,
for i in xrange(n): for i in xrange(n):
Pi = Pcv[:,i,:] # (n_sets x amax) Pi = Pcv[:,i,:] # (n_sets x amax)
Pi_ctr = P_ctr[i,:] # (1 x amax) Pi_ctr = P_ctr[i,:] # (1 x amax)
Pim = (Pi - Pi_ctr)*msqrt(n_sets-1) #Pim = (Pi - Pi_ctr)*msqrt(n_sets-1)
Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim) #Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
Pim = (Pi - Pi_ctr)
Cov = cov_center(Cov_i) Cov_i[i] = dot(Pim.T, Pim)
reg_cov = (1. - alpha)*Cov_i + alpha*Cov if cov_center == 'median':
Cov_p = median(Cov_i)
elif cov_center == 'mean':
Cov_p = Cov.mean(0)
else:
print "Pooled covariance est. invalid, using median"
print cov_center
Cov_p = median(Cov_i)
reg_cov = (1. - alpha)*Cov_i + alpha*Cov_p
for i in xrange(n): for i in xrange(n):
Pc = P_ctr[i,:] Pc = P_ctr[i,:]
sigma = reg_cov[i] sigma = reg_cov[i]
T_sq[i] = dot(dot(Pc, inv(sigma)), Pc) T_sq[i] = dot(dot(Pc, inv(sigma)), Pc)
return T_sq return T_sq
def procrustes(a, b, strict=True, center=False, verbose=False): def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False):
"""Orthogonal rotation of b to a. """Orthogonal rotation of b to a.
Procrustes rotation is an orthogonal rotoation of one subspace Procrustes rotation is an orthogonal rotation of one subspace
onto another by minimising the squared error. onto another by minimising the squared error.
*Parameters*: *Parameters*:
a : {array} a : {array}
Input array Input array, stationary
b : {array} b : {array}
Input array Input array, rotate this to max. fit to a
strict : {boolean} strict : {boolean}
Only do flipping and shuffling Only do flipping and shuffling
center : {boolean} center : {boolean}
Center before rotation, translate back after Center before rotation, translate back after
force_norm : {boolean}
Ensure that columns of a and b are orthonormal
verbose : {boolean} verbose : {boolean}
Show sum of squares Show sum of squares
@ -125,6 +135,12 @@ def procrustes(a, b, strict=True, center=False, verbose=False):
a = a - mn_a a = a - mn_a
mn_b = b.mean(0) mn_b = b.mean(0)
b = b - mn_b b = b - mn_b
if force_norm:
a_norm = apply_along_axis(norm, 0, a)
a = a/a_norm
b_norm = apply_along_axis(norm, 0, b)
b = b/b_norm
u, s, vt = svd(dot(b.T, a)) u, s, vt = svd(dot(b.T, a))
Cm = dot(u, vt) # Cm: orthogonal rotation matrix Cm = dot(u, vt) # Cm: orthogonal rotation matrix
@ -166,13 +182,13 @@ def _ensure_strict(C, only_flips=True):
""" """
if only_flips: if only_flips:
C = eye(Cm.shape[0])*sign(Cm) C = eye(C.shape[0])*sign(C)
return C return C
Cm = zeros(C.shape, dtype='d') Cm = zeros(C.shape, dtype='d')
Cm[abs(C)>.6] = 1. Cm[abs(C)>.6] = 1.
if det(Cm)>1: if det(Cm)>1:
raise NotImplementedError raise NotImplementedError
return Cm*S return Cm*sign(C)
def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20, def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
sim_method='shuffle', p_center='med', cov_center=median, sim_method='shuffle', p_center='med', cov_center=median,
@ -248,7 +264,7 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
Wc, Lc = lpls_jk(X, Y, Z ,aopt, zorth=zorth) Wc, Lc = lpls_jk(X, Y, Z ,aopt, zorth=zorth)
cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha) cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
cal_tsq_z = hotelling(Lc, dat['L'], alpha=alpha) cal_tsq_z = hotelling(Lc, dat['L'], alpha=alpha)
print "morn"
# Perturbations # Perturbations
index = arange(m) index = arange(m)
for i in range(n_iter): for i in range(n_iter):
@ -257,9 +273,10 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
dat = lpls(X, Y[indi,:], Z, aopt, scale='loads', center_axis=center_axis, zorth=zorth) dat = lpls(X, Y[indi,:], Z, aopt, scale='loads', center_axis=center_axis, zorth=zorth)
Wi, Li = lpls_jk(X, Y[indi,:], Z, aopt, nsets=nsets) Wi, Li = lpls_jk(X, Y[indi,:], Z, aopt, nsets=nsets)
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha) pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=alpha) # no reason to borrow variance in dag (alpha ->some small value)
pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=0.01)
return _fdr(cal_tsq_z, pert_tsq_z, median), _fdr(cal_tsq_x, pert_tsq_x, median) return cal_tsq_z, pert_tsq_z, cal_tsq_x, pert_tsq_x
def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median, def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
@ -335,7 +352,7 @@ def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
Wi = pls_jk(X, Y[indi,:], aopt, nsets=nsets, center_axis=center_axis) Wi = pls_jk(X, Y[indi,:], aopt, nsets=nsets, center_axis=center_axis)
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha) pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
return _fdr(cal_tsq_x, pert_tsq_x, median) return cal_tsq_x, pert_tsq_x
@ -372,7 +389,7 @@ def _fdr(tsq, tsqp, loc_method=median):
gene expression data using dimension reduction methods, BMC gene expression data using dimension reduction methods, BMC
bioinformatics, 2007 bioinformatics, 2007
""" """
n, = tsq.shape n = tsq.shape[0]
k, m = tsqp.shape k, m = tsqp.shape
assert(n==k) assert(n==k)
n_false = empty((n, m), 'd') n_false = empty((n, m), 'd')

27
sandbox/powerit/Makefile Normal file
View File

@ -0,0 +1,27 @@
CC=gcc
PYTHON_INCLUDE=/usr/include/python2.5
PYTHON_LIBRARY=/usr/lib/python2.5
NUMPY_INCLUDE=$(PYTHON_LIBRARY)/site-packages/numpy/core/include/numpy/
GOTO_LIBRARY=/home/flatberg/GotoBLAS
BLAS_LIBRARY=/usr/lib
BLAS=blas
GOTO=goto
all: numpy_module.so
numpy_module.so: numpy_module.o sympowerit.o
$(CC) -Wall -shared numpy_module.o sympowerit.o -o _numpy_module.so -L$(PYTHON_LIBRARY) -lpython2.5 -L$(BLAS_LIBRARY) -l$(BLAS) -llapack -lg2c -lm
numpy_module.o: numpy_module.c numpy_module.h
$(CC) -fPIC -Wall -O2 -g -c -I$(PYTHON_INCLUDE) -I$(NUMPY_INCLUDE) numpy_module.c
c_egines.o: sympowerit.c
$(CC) -Wall -O2 -g -c sympowerit.c
clean:
-rm numpy_module.o sympowerit.o
-rm -rf _numpy_module.so

70
sandbox/powerit/numpy_module.c Executable file
View File

@ -0,0 +1,70 @@
/* A file to test imorting C modules for handling arrays to Python */
/* Python.h includes <stdio.h>, <string.h>, <errno.h>, <limits.h>, and <stdlib.h> (if available)*/
#include "Python.h"
#include "arrayobject.h"
#include "numpy_module.h"
#include "sympowerit.h"
#include <cblas.h>
/* ==== Set up the methods table ====================== */
static PyMethodDef _numpy_moduleMethods[] = {
{"sym_powerit", sym_powerit, METH_VARARGS},
{NULL, NULL} /* Sentinel - marks the end of this structure */
};
/* ==== Initialize the numpy_module functions =============== */
// Module name must be _numpy_module in compile and linked
void init_numpy_module() {
(void) Py_InitModule("_numpy_module", _numpy_moduleMethods);
import_array(); // Must be present for NumPy. Called first after above line.
}
/* =========== Power iteration on symmetric matrix ========== */
static PyObject *sym_powerit(PyObject *self, PyObject *args)
{
PyArrayObject *X=NULL, *T=NULL, *E=NULL,*U=NULL;
double *tin, *ein, tol;
int amax, n, info, maxiter, verbose;
int dims[2];
/* Parse tuple of input arguments*/
if (!PyArg_ParseTuple(args, "O!O!iidii",
&PyArray_Type, &X,
&PyArray_Type, &T,
&n,
&amax,
&tol,
&maxiter,
&verbose)
)
return NULL;
if (NULL == X) return NULL;
/* Get the dimensions of the input */
n = X->dimensions[0];
/* Create output/ work arrays, no inplace calculations */
dims[0] = n;
dims[1] = amax;
U = (PyArrayObject*) PyArray_NewCopy(T, NPY_CORDER);
dims[1] = n;
E = (PyArrayObject*) PyArray_NewCopy(X, NPY_CORDER);
/* Get pointers to contigous data (row-major)*/
ein = (double *)E->data;
tin = (double *)U->data;
/* Call sympower method */
info = sympowerit (ein, n, tin, amax, tol, maxiter, verbose);
Py_DECREF(E);
return Py_BuildValue("Ni", U, info);
}

1
sandbox/powerit/numpy_module.h Executable file
View File

@ -0,0 +1 @@
/* Header to test of C modules for arrays for Python: sympowerit.c */ /* Python callablefunctions */ static PyObject *sym_powerit(PyObject *self, PyObject *args);

View File

@ -0,0 +1,116 @@
#include <cblas.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
/* =============== Main functions body ====================*/
int sympowerit(double *E, int n, double *T, int amax, double tol,
int maxiter, int verbose)
{
/*
PURPOSE: Estimate eigenvectos of a symmetric matrix using the power method.
CALLING SEQUENCE:
info = sympowerit(*E, n, *T, amax, tol, maxiter);
INPUTS:
E symmetric matrix from XX^T/X^TX of centered data matrix
type: *double
n elements in X first dimension
type: int
type: *double
T workspace for scores (m, amax)
type: *double
amax maximum number of principal components
type: int
tol convergence limit
type: double
maxiter maximum number of iteraitons
type: int
verbose the amount of debug output
*/
int iter, a, j;
int info = 0;
double sumsq, l2norm, *y,*x, sgn, diff, alpha;
double lambda = 0.0;
if (verbose>1) verbose=1;
/* Allocate space for working vector and eigenvector */
y = (double *) malloc(n*sizeof(double));
x = (double *) malloc(n*sizeof(double));
/* Loop over all components to be estimated*/
for (a=0; a<=amax-1; a++){
/* Reset work-/eigen-vector*/
for (j=0; j<n; j++) {
y[j] = 0.0;
x[j] = T[(j*amax)+a];
}
/*x[0] = 1.0;*/
/* Main power-iteration loop */
for ( iter = 0; iter <= maxiter; iter++ ) {
/* Matrix-vector product */
/*cblas_dsymv (CblasRowMajor, CblasUpper, n, 1.0, E, n,
x, 1, 0.0, y, 1);*/
cblas_dgemv (CblasRowMajor, CblasNoTrans, n, n, 1.0, E, n,
x, 1, 0.0, y, 1);
/* Normalise y */
sumsq = y[0] * y[0];
lambda = x[0] * y[0];
for ( j = 1; j < n; j++ ) {
sumsq += y[j] * y[j];
lambda += x[j] * y[j];
}
l2norm = sqrt ( sumsq );
for ( j = 0; j < n; j++ ) y[j] /= l2norm;
/*Check for convergence */
sgn = ( lambda < 0 ? -1.0 : 1.0 );
diff = x[0] - sgn * y[0];
sumsq = diff * diff;
x[0] = y[0];
for ( j = 0; j < n; j++ ) {
diff = x[j] - sgn * y[j];
sumsq += diff * diff;
x[j] = y[j];
}
if ( sqrt ( sumsq ) < tol ) {
if (verbose == 1){
printf("\nComp: %d\n", a);
printf("Converged in %d iterations\n", iter);
}
break;
}
if (iter >= maxiter){
if (verbose == 1){
printf("\nComp: %d\n", a);
printf("Max iter reached.\n");
printf("Error: %.2E\n", sumsq);
}
info = 1;
break;
}
}
/* Calculate T */
for (j=0; j<n; j++){
y[j] = sgn*sqrt(sgn*lambda)*x[j];
T[(j*amax)+a] = y[j];
}
/* rank one deflation of residual matrix */
/*cblas_dsyr (CblasRowMajor, CblasUpper, n, -1.0, x, 1, E, n);*/
alpha = -1.0*lambda;
cblas_dger (CblasRowMajor, n, n, alpha, x, 1, x, 1, E, n);
}
/* Free used space */
free(x);
free(y);
return (info);
}

View File

@ -0,0 +1,2 @@
int sympowerit (double *X, int n, double *T, int amax, double tol, int maxiter, int verbose);
/* Simple symmetric power iterations */

View File

@ -0,0 +1,100 @@
from numpy import *
HAS_SYMPOWER=True
try:
from _numpy_module import sym_powerit
except:
raise ImportError("Sym_powerit module not present")
HAS_SYMPOWER = False
class SymPowerException(Exception):
pass
_ERRORCODES = {1: "Some eigenvectors did not converge, try to increase \nthe number of iterations or lower the tolerance level",
0: ""}
def sympowerit(xx, T0=None, mn_center=False, a_max=10, tol=1e-7, maxiter=100,
verbose=0):
"""Estimate eigenvectos of a symmetric matrix using the power method.
*Parameters*:
xx : {array}
Symmetric square array (m, m)
T0 : {array}
Initial solution (m, a_max), optional
mn_center : {boolean}, optional
Mean centering
a_max : {integer}, optional
Number of components to extract
tol : {float}, optional
Tolerance level of eigenvector solver
maxiter : {integer}
Maximum number of poweriterations to use
verbose : {integer}
Debug output (==1)
*Returns*:
v : {array}
Eigenvectors of xx, (m , a_max)
"""
valid_types = ['D','d','F','f']
dtype = xx.dtype.char
n, m = xx.shape
if not(dtype in valid_types):
msg = "Array type: (%s) needs to be a float or double" %dtype
raise SymPowerException(msg)
if not (m==n):
msg = "Input array needs to be square, input: (%d,%d)" %(m,n)
raise SymPowerException(msg)
# small test of symmetry
N = 5
num = random.randint(0,n,N)
for i in range(5):
j = N-5
if abs(xx[num[i],num[j]] - xx[num[j],num[i]])>1e-15:
msg = "Array needs to be symmetric"
raise SymPowerException(msg)
if not a_max:
a_max = 10
if T0 !=None:
tn, tm = T0.shape
if not (tn==n):
msg = "Start eigenvectors need to match input array ()"
raise SymPowerException(msg)
if not (tm==a_max):
msg = "Start eigenvectors need to match input a_max ()"
raise SymPowerException(msg)
else:
T0 = zeros((n, a_max), 'd')
T0[0,:] = ones((a_max,),'d')
if mn_center:
xx = _center(xx)
# call c-function
T, info = sym_powerit(xx, T0, n, a_max, tol, maxiter, verbose)
if info != 0:
if verbose:
print _ERRORCODES.get(info, "Dont know this error")
return T
def _center(xx, ret_mn=False):
"""Returns mean centered symmetric kernel matrix.
"""
n = xx.shape[0]
h = xx.sum(0)[:,newaxis]
h = (h - mean(h)/2)/n
mn_a = h + h.T
xxc = xx - mn_a
if ret_mn:
return xxc, mn_a
return xxc