Added symmetric poweriterations to sandbox
This commit is contained in:
parent
0956581c42
commit
1103245d85
@ -30,7 +30,7 @@ class get_matvec:
|
||||
if isinstance(obj, sb.ndarray):
|
||||
self.callfunc = self.type1
|
||||
return
|
||||
meth = getattr(obj,self.methname,None)
|
||||
meth = getattr(obj, self.methname, None)
|
||||
if not callable(meth):
|
||||
raise ValueError, "Object must be an array "\
|
||||
"or have a callable %s attribute." % (self.methname,)
|
||||
@ -48,12 +48,12 @@ class get_matvec:
|
||||
return sb.dot(self.obj.A, x)
|
||||
|
||||
def type2(self, x):
|
||||
return self.obj(x,*self.args)
|
||||
return self.obj(x, *self.args)
|
||||
|
||||
|
||||
def eigen(A,k=6,M=None,ncv=None,which='LM',
|
||||
maxiter=None,tol=0, return_eigenvectors=True):
|
||||
""" Return k eigenvalues and eigenvectors of the matrix A.
|
||||
def eigen(A, k=6, M=None, ncv=None, which='LM',
|
||||
maxiter=None, tol=0, return_eigenvectors=True, v0=None):
|
||||
"""Return k eigenvalues and eigenvectors of the matrix A.
|
||||
|
||||
Solves A * x[i] = w[i] * x[i], the standard eigenvalue problem for
|
||||
w[i] eigenvalues with corresponding eigenvectors x[i].
|
||||
@ -69,6 +69,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
||||
M -- (Not implemented)
|
||||
A symmetric positive-definite matrix for the generalized
|
||||
eigenvalue problem A * x = w * M * x
|
||||
v0 -- Initial starting solution (n x 1)
|
||||
|
||||
Outputs:
|
||||
|
||||
@ -99,8 +100,8 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
||||
|
||||
"""
|
||||
try:
|
||||
n,ny=A.shape
|
||||
n==ny
|
||||
n, ny = A.shape
|
||||
n == ny
|
||||
except:
|
||||
raise AttributeError("matrix is not square")
|
||||
if M is not None:
|
||||
@ -108,12 +109,12 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
||||
|
||||
# some defaults
|
||||
if ncv is None:
|
||||
ncv=2*k+1
|
||||
ncv=min(ncv,n)
|
||||
if maxiter==None:
|
||||
maxiter=n*10
|
||||
ncv = 2*k + 1
|
||||
ncv = min(ncv, n)
|
||||
if maxiter == None:
|
||||
maxiter = n*10
|
||||
|
||||
# guess type
|
||||
# guess type
|
||||
resid = sb.zeros(n,'f')
|
||||
try:
|
||||
typ = A.dtype.char
|
||||
@ -129,7 +130,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
||||
raise ValueError("k must be less than rank(A), k=%d"%k)
|
||||
if maxiter <= 0:
|
||||
raise ValueError("maxiter must be positive, maxiter=%d"%maxiter)
|
||||
whiches=['LM','SM','LR','SR','LI','SI']
|
||||
whiches = ['LM','SM','LR','SR','LI','SI']
|
||||
if which not in whiches:
|
||||
raise ValueError("which must be one of %s"%' '.join(whiches))
|
||||
if ncv > n or ncv < k:
|
||||
@ -141,17 +142,26 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
||||
eigextract = _arpack.__dict__[ltr+'neupd']
|
||||
matvec = get_matvec(A)
|
||||
|
||||
v = sb.zeros((n,ncv),typ) # holds Ritz vectors
|
||||
resid = sb.zeros(n,typ) # residual
|
||||
workd = sb.zeros(3*n,typ) # workspace
|
||||
workl = sb.zeros(3*ncv*ncv+6*ncv,typ) # workspace
|
||||
iparam = sb.zeros(11,'int') # problem parameters
|
||||
ipntr = sb.zeros(14,'int') # pointers into workspaces
|
||||
info = 0
|
||||
v = sb.zeros((n, ncv), typ) # holds Ritz vectors
|
||||
if v0 == None:
|
||||
resid = sb.zeros(n, typ) # residual
|
||||
info = 0
|
||||
else: # starting vector is given
|
||||
nn, kk = v0.shape
|
||||
if nn != n:
|
||||
raise ValueError("starting vector must be: (%d, 1), got: (%d, %d)" %(n, nn, kk))
|
||||
resid = v0[:,0].astype(typ)
|
||||
info = 1
|
||||
|
||||
workd = sb.zeros(3*n, typ) # workspace
|
||||
workl = sb.zeros(3*ncv*ncv+6*ncv, typ) # workspace
|
||||
iparam = sb.zeros(11, 'int') # problem parameters
|
||||
ipntr = sb.zeros(14, 'int') # pointers into workspaces
|
||||
|
||||
ido = 0
|
||||
|
||||
if typ in 'FD':
|
||||
rwork = sb.zeros(ncv,typ.lower())
|
||||
rwork = sb.zeros(ncv, typ.lower())
|
||||
|
||||
# only supported mode is 1: Ax=lx
|
||||
ishfts = 1
|
||||
@ -160,7 +170,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
||||
iparam[0] = ishfts
|
||||
iparam[2] = maxiter
|
||||
iparam[6] = mode1
|
||||
|
||||
|
||||
while True:
|
||||
if typ in 'fd':
|
||||
ido,resid,v,iparam,ipntr,info =\
|
||||
@ -173,9 +183,9 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
||||
|
||||
if (ido == -1 or ido == 1):
|
||||
# compute y = A * x
|
||||
xslice = slice(ipntr[0]-1, ipntr[0]-1+n)
|
||||
yslice = slice(ipntr[1]-1, ipntr[1]-1+n)
|
||||
workd[yslice]=matvec(workd[xslice])
|
||||
xslice = slice(ipntr[0] - 1, ipntr[0] - 1 + n)
|
||||
yslice = slice(ipntr[1] - 1, ipntr[1] - 1 + n)
|
||||
workd[yslice] = matvec(workd[xslice])
|
||||
else: # done
|
||||
break
|
||||
|
||||
@ -233,7 +243,7 @@ def eigen(A,k=6,M=None,ncv=None,which='LM',
|
||||
|
||||
|
||||
def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
|
||||
maxiter=None,tol=0, return_eigenvectors=True):
|
||||
maxiter=None,tol=0, return_eigenvectors=True, v0=None):
|
||||
""" Return k eigenvalues and eigenvectors of the real symmetric matrix A.
|
||||
|
||||
Solves A * x[i] = w[i] * x[i], the standard eigenvalue problem for
|
||||
@ -253,6 +263,8 @@ def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
|
||||
A symmetric positive-definite matrix for the generalized
|
||||
eigenvalue problem A * x = w * M * x
|
||||
|
||||
v0 -- Starting vector (n, 1)
|
||||
|
||||
Outputs:
|
||||
|
||||
w -- An real array of k eigenvalues
|
||||
@ -325,12 +337,22 @@ def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
|
||||
matvec = get_matvec(A)
|
||||
|
||||
v = sb.zeros((n,ncv),typ)
|
||||
resid = sb.zeros(n,typ)
|
||||
if v0 == None:
|
||||
resid = sb.zeros(n, typ) # residual
|
||||
info = 0
|
||||
else: # starting solution is given
|
||||
nn, kk = v0.shape
|
||||
if nn != n:
|
||||
raise ValueError("starting vectors must be: (%d, %d), got: (%d, %d)" %(n, k, nn, kk))
|
||||
resid = v0[:,0].astype(typ)
|
||||
info = 1
|
||||
|
||||
#resid = sb.zeros(n,typ)
|
||||
workd = sb.zeros(3*n,typ)
|
||||
workl = sb.zeros(ncv*(ncv+8),typ)
|
||||
iparam = sb.zeros(11,'int')
|
||||
ipntr = sb.zeros(11,'int')
|
||||
info = 0
|
||||
#info = 0
|
||||
ido = 0
|
||||
|
||||
# only supported mode is 1: Ax=lx
|
||||
@ -340,8 +362,7 @@ def eigen_symmetric(A,k=6,M=None,ncv=None,which='LM',
|
||||
iparam[0] = ishfts
|
||||
iparam[2] = maxiter
|
||||
iparam[6] = mode1
|
||||
|
||||
|
||||
|
||||
while True:
|
||||
ido,resid,v,iparam,ipntr,info =\
|
||||
eigsolver(ido,bmat,which,k,tol,resid,v,iparam,ipntr,
|
||||
|
@ -2,16 +2,149 @@
|
||||
|
||||
The primary use is crossvalidation.
|
||||
"""
|
||||
__all__ = ['lpls_val', 'pls_jk', 'lpls_jk']
|
||||
__all__ = ['pca_val', 'pls_val', 'lpls_val', 'pls_jk', 'lpls_jk']
|
||||
__docformat__ = "restructuredtext en"
|
||||
|
||||
from numpy import dot,empty,zeros,sqrt,atleast_2d,argmax,asarray,median,\
|
||||
array_split,arange, isnan, any
|
||||
array_split,arange, isnan, any,newaxis
|
||||
from numpy.random import shuffle
|
||||
|
||||
from engines import pls
|
||||
from engines import pls, pca
|
||||
from engines import nipals_lpls as lpls
|
||||
|
||||
def pca_val(a, a_max, nsets=None, center_axis=[0]):
|
||||
"""Returns error estimate of crossvalidated PCA.
|
||||
|
||||
*Parameters*:
|
||||
|
||||
a : {array}
|
||||
data matrix (n x m)
|
||||
a_max : {integer}
|
||||
Maximum number of components in model.
|
||||
center_axis:
|
||||
Centering
|
||||
nsets : {integer}
|
||||
number of segments
|
||||
|
||||
*Returns*:
|
||||
|
||||
rmsep : {array}
|
||||
Squared error of prediction for each component and xvar (a_max, m)
|
||||
xhat : {array}
|
||||
Crossvalidated predicted a (a_max, m, n)
|
||||
aopt : {integer}
|
||||
Estimate of optimal number of components
|
||||
|
||||
*Notes*:
|
||||
|
||||
- Crossvalidation method is currently set to random blocks of diagonals.
|
||||
|
||||
"""
|
||||
n, m = a.shape
|
||||
if nsets == None:
|
||||
nsets = n
|
||||
|
||||
err = zeros((a_max, n, m), dtype=a.dtype)
|
||||
err_mn = zeros((a_max, n, m), dtype=a.dtype)
|
||||
xhat = zeros((a_max, n, m), dtype=a.dtype)
|
||||
mn_a = .5*(a.mean(0) + a.mean(1)[:,newaxis])
|
||||
|
||||
for i, val in enumerate(diag_cv(a.shape, nsets)):
|
||||
old_values = a.take(val)
|
||||
new_values = mn_a.take(val)
|
||||
# impute mean values
|
||||
b = a.copy()
|
||||
a.put(val, new_values)
|
||||
dat = pca(a, a_max, mode='normal', center_axis=center_axis)
|
||||
Ti, Pi = dat['T'], dat['P']
|
||||
bc = b - dat['mnx']
|
||||
bc2 = b - b.mean(0)
|
||||
for j in xrange(a_max):
|
||||
# predict the imputed values
|
||||
a_pred = dot(Ti[:,:j+1], Pi[:,:j+1].T).take(val)
|
||||
a_true = bc2.take(val)
|
||||
err[j,:,:].put(val, (a_true - a_pred)**2)
|
||||
err_mn[j,:,:].put(val, (bc.take(val) - a_pred)**2)
|
||||
xhat[j,:,:].put(val, a_pred)
|
||||
|
||||
# put original values back
|
||||
a.put(val, old_values)
|
||||
|
||||
rmsep = sqrt(err).mean(1) # take mean over samples
|
||||
rmsep2 = sqrt(err_mn).mean(1)
|
||||
aopt = rmsep.mean(-1).argmin()
|
||||
return rmsep, xhat, aopt, err, rmsep2
|
||||
|
||||
def pls_val(X, Y, a_max=2, nsets=None, center_axis=[0,0], verbose=False):
|
||||
"""Performs crossvalidation for generalisation error in pls.
|
||||
|
||||
|
||||
|
||||
*Parameters*:
|
||||
|
||||
X : {array}
|
||||
Main data matrix (m, n)
|
||||
Y : {array}
|
||||
External row data (m, l)
|
||||
a_max : {integer}, optional
|
||||
Maximum number of components to calculate (0, min(m,n))
|
||||
nsets : (integer), optional
|
||||
Number of crossvalidation sets
|
||||
center_axis : {array-like}, optional
|
||||
A three element array-like structure with elements in [-1,0,1,2],
|
||||
that decides the type of centering used.
|
||||
-1 : nothing
|
||||
0 : row center
|
||||
1 : column center
|
||||
2 : double center
|
||||
verbose : {boolean}, optional
|
||||
Verbosity of console output. For use in debugging.
|
||||
|
||||
*Returns*:
|
||||
|
||||
rmsep : {array}
|
||||
Root mean squred error of prediction (for each y-var)
|
||||
yhat : {array}
|
||||
Estimated responses
|
||||
aopt : {integer}
|
||||
Estimated value of optimal number of components
|
||||
|
||||
"""
|
||||
dt = X.dtype
|
||||
m, n = X.shape
|
||||
k, l = Y.shape
|
||||
assert m == k, "X (%d,%d) - Y (%d,%d) dim mismatch" %(m, n, k, l)
|
||||
assert n == p, "X (%d,%d) - Z (%d,%d) dim mismatch" %(m, n, o, p)
|
||||
if nsets == None:
|
||||
nsets = m
|
||||
if nsets > X.shape[0]:
|
||||
print "nsets (%d) is larger than number of variables (%d).\nnsets: %d -> %d" %(nsets, m, nsets, m)
|
||||
nsets = m
|
||||
if m > n:
|
||||
# kernel boosting
|
||||
Yhat = _w_pls_predict(X, Y, a_max)
|
||||
|
||||
Yhat = empty((a_max, k, l), dtype=dt)
|
||||
for cal, val in cv(k, nsets):
|
||||
# do the training model
|
||||
dat = pls(X[cal], Y[cal], a_max=a_max,center_axis=center_axis)
|
||||
|
||||
# center test data
|
||||
xi = X[val,:] - dat['mnx']
|
||||
ym = dat['mny']
|
||||
|
||||
# predictions
|
||||
for a in range(a_max):
|
||||
Yhat[a,val,:] = ym + dot(xi, dat['B'][a])
|
||||
|
||||
sep = (Y - Yhat)**2
|
||||
rmsep = sqrt(sep.mean(1)).T
|
||||
#aopt = find_aopt_from_sep(rmsep)
|
||||
|
||||
# todo: need a better support for classification error
|
||||
error = prediction_error(Yhat, Y, method='1/2')
|
||||
|
||||
return rmsep, Yhat, error
|
||||
|
||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=False, verbose=False):
|
||||
"""Performs crossvalidation for generalisation error in lpls.
|
||||
@ -72,47 +205,29 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
|
||||
assert (alpha >= 0 and alpha<=1), "Alpha needs to be within [0,1], got: %.2f" %alpha
|
||||
|
||||
Yhat = empty((a_max, k, l), 'd')
|
||||
for cal, val in cv(nsets, k):
|
||||
for cal, val in cv(k, nsets):
|
||||
# do the training model
|
||||
dat = lpls(X[cal], Y[cal], Z, a_max=a_max, alpha=alpha,
|
||||
center_axis=center_axis, zorth=zorth, verbose=verbose)
|
||||
|
||||
# center test data
|
||||
if center_axis[0] != 1:
|
||||
xi = X[val,:] - dat['mnx']
|
||||
else:
|
||||
xi = X[val] - X[cal].mean(1)[:,newaxis]
|
||||
if center_axis[2] != 1:
|
||||
ym = dat['mny']
|
||||
else:
|
||||
ym = Y[cal].mean(1)[:,newaxis]
|
||||
xi = X[val,:] - dat['mnx']
|
||||
|
||||
ym = dat['mny'][val,:]
|
||||
# predictions
|
||||
for a in range(a_max):
|
||||
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
|
||||
#if permute:
|
||||
# xcal = X[cal]
|
||||
# for a in range(1,a_max,1):
|
||||
# for n in range(10):
|
||||
# shuffle(cal)
|
||||
# dat = lpls(xcal, Y[cal], Z, a_max=a_max, alpha=alpha,
|
||||
# center_axis=center_axis, verbose=verbose)
|
||||
|
||||
|
||||
# todo: need a better support for classification error
|
||||
y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
|
||||
if y_is_class:
|
||||
pass
|
||||
#Yhat, err = class_error(Yhat, Y)
|
||||
#return Yhat, err
|
||||
|
||||
sep = (Y - Yhat)**2
|
||||
rmsep = sqrt(sep.mean(1)).T
|
||||
aopt = find_aopt_from_sep(rmsep)
|
||||
#aopt = find_aopt_from_sep(rmsep)
|
||||
|
||||
return rmsep, Yhat, aopt
|
||||
# todo: need a better support for classification error
|
||||
error = prediction_error(Yhat, Y, method='1/2')
|
||||
|
||||
return rmsep, Yhat, error
|
||||
|
||||
def pca_jk(a, aopt, center_axis=[0], nsets=None):
|
||||
"""Returns jack-knife segements from PCA.
|
||||
def pca_jk(a, aopt, nsets=None, center_axis=[0], method='cv'):
|
||||
"""Returns jack-knife segments from PCA.
|
||||
|
||||
*Parameters*:
|
||||
|
||||
@ -124,6 +239,12 @@ def pca_jk(a, aopt, center_axis=[0], nsets=None):
|
||||
Centering
|
||||
nsets : {integer}
|
||||
number of segments
|
||||
method : {'cv', 'diag', 'bs', 'bs_diag'}
|
||||
Perturbation method is one of:
|
||||
cv = leave samples out
|
||||
diag = impute diagonals
|
||||
bs = leave samples out with replacement (bootstrap)
|
||||
bs_diag = impute diagonals
|
||||
|
||||
*Returns*:
|
||||
|
||||
@ -135,22 +256,32 @@ def pca_jk(a, aopt, center_axis=[0], nsets=None):
|
||||
- Crossvalidation method is currently set to random blocks of samples.
|
||||
|
||||
"""
|
||||
m, n = a.shape
|
||||
if nsets == None:
|
||||
nsets = a.shape[0]
|
||||
nsets = m
|
||||
|
||||
Pcv = empty((nsets, a.shape[1], aopt), dtype=a.dtype)
|
||||
mn_a = .5*(a.mean(0) + a.mean(1)[:,newaxis])
|
||||
for i, (cal, val) in enumerate(cv_diag(a.shape, nsets)):
|
||||
old_values = a.take(ind)
|
||||
new_values = mn_a.take(ind)
|
||||
a.put(ind, new_values)
|
||||
dat = pca(a, aopt, mode='fast', scale='loads', center_axis=center_axis)
|
||||
PP[i,:,:] = dat['P']
|
||||
a.put(ind, old_values)
|
||||
if method == 'diag':
|
||||
mn_a = .5*(a.mean(0) + a.mean(1)[:,newaxis])
|
||||
for i, val in enumerate(diag_cv(a.shape, nsets)):
|
||||
old_values = a.take(val)
|
||||
new_values = mn_a.take(val)
|
||||
# impute mean values
|
||||
a.put(val, new_values)
|
||||
dat = pca(a, aopt, mode='fast', scale='loads', center_axis=center_axis)
|
||||
Pcv[i,:,:] = dat['P']
|
||||
# put original values back
|
||||
a.put(val, old_values)
|
||||
elif method == 'cv':
|
||||
print "using ....cv "
|
||||
for i, (cal, val) in enumerate(cv(m, nsets)):
|
||||
Pcv[i,:,:] = pca(a[cal,:], aopt, mode='fast', scale='loads', center_axis = center_axis)['P']
|
||||
else:
|
||||
raise NotImplementedError(method)
|
||||
|
||||
return PP
|
||||
return Pcv
|
||||
|
||||
def pls_jk(X, Y, a_opt, nsets=None, center_axis=True, verbose=False):
|
||||
def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
|
||||
""" Returns jack-knife segements of W.
|
||||
|
||||
*Parameters*:
|
||||
@ -187,9 +318,7 @@ def pls_jk(X, Y, a_opt, nsets=None, center_axis=True, verbose=False):
|
||||
for i, (cal, val) in enumerate(cv(k, nsets)):
|
||||
if verbose:
|
||||
print "Segment number: %d" %i
|
||||
dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast', center_axis=[0, 0])
|
||||
if any(isnan(dat['W'])):
|
||||
1/0
|
||||
dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast', center_axis=center_axis)
|
||||
Wcv[i,:,:] = dat['W']
|
||||
|
||||
return Wcv
|
||||
@ -257,18 +386,18 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=
|
||||
|
||||
return WWx, WWz
|
||||
|
||||
def find_aopt_from_sep(sep, method='vanilla'):
|
||||
def find_aopt_from_sep(err, method='vanilla'):
|
||||
"""Returns an estimate of optimal number of components.
|
||||
|
||||
The estimate is based on the squared error of prediction from
|
||||
The estimate is based on the error of prediction from
|
||||
crossvalidation. This is pretty much wild guessing and it is
|
||||
recomended to inspect model parameters and prediction errors
|
||||
closely before deciding on the optimal number of components.
|
||||
|
||||
*Parameters*:
|
||||
|
||||
sep : {array}
|
||||
Squared error of prediction
|
||||
err : {array}
|
||||
Error of prediction
|
||||
method : ['vanilla', '75perc']
|
||||
Mehtod used to estimate optimal number of components
|
||||
|
||||
@ -280,22 +409,23 @@ def find_aopt_from_sep(sep, method='vanilla'):
|
||||
|
||||
if method == 'vanilla':
|
||||
# min rmsep
|
||||
rmsecv = sqrt(sep.mean(0))
|
||||
rmsecv = sqrt(err.mean(0))
|
||||
return rmsecv.argmin() + 1
|
||||
|
||||
elif method == '75perc':
|
||||
prct = .75 #percentile
|
||||
ind = 1.*sep.shape[0]*prct
|
||||
med = median(sep)
|
||||
prc_75 = []
|
||||
for col in sep.T:
|
||||
col = sorted(col)
|
||||
prc_75.append(col[int(ind)])
|
||||
prc_75 = asarray(prc_75)
|
||||
for i in range(1, sep.shape[1], 1):
|
||||
if med[i-1]<prc_75[i]:
|
||||
return i
|
||||
return len(med)
|
||||
raise NotImplementedError
|
||||
#prct = .75 #percentile
|
||||
#ind = 1.*err.shape[0]*prct
|
||||
#med = median(err)
|
||||
#prc_75 = []
|
||||
#for col in err.T:
|
||||
# col = sorted(col)
|
||||
# prc_75.append(col[int(ind)])
|
||||
#prc_75 = asarray(prc_75)
|
||||
#for i in range(1, err.shape[1], 1):
|
||||
# if med[i-1]<prc_75[i]:
|
||||
# return i
|
||||
#return len(med)
|
||||
|
||||
def cv(N, K, randomise=True, sequential=False):
|
||||
"""Generates K (training, validation) index pairs.
|
||||
@ -351,13 +481,29 @@ def cv(N, K, randomise=True, sequential=False):
|
||||
validation = [i for i in index if i % K == k]
|
||||
yield training, validation
|
||||
|
||||
def diag_cv(shape, nsets=9):
|
||||
def diag_cv(shape, nsets=9, randomise=True):
|
||||
"""Generates K (training, validation) index pairs.
|
||||
|
||||
*Parameters*:
|
||||
|
||||
N : {integer}
|
||||
alpha -- scalar, approx. portion of data perturbed
|
||||
shape : {tuple}
|
||||
Array shape
|
||||
nsets : {integer}
|
||||
Number of cv sets
|
||||
randomise : {boolean}
|
||||
Randomise diagonal index
|
||||
|
||||
*Returns*:
|
||||
|
||||
training : {array-like}
|
||||
training-indices
|
||||
validation : {array-like}
|
||||
validation-indices
|
||||
|
||||
*Notes*:
|
||||
|
||||
This index is based on the full index (raveled row-major ordering).
|
||||
It extracts along diagonals to ensure balanced removal along both axis
|
||||
"""
|
||||
try:
|
||||
m, n = shape
|
||||
@ -365,59 +511,195 @@ def diag_cv(shape, nsets=9):
|
||||
raise ValueError("shape needs to be a two-tuple")
|
||||
if nsets>m or nsets>n:
|
||||
msg = "You may not use more subsets than max(n_rows, n_cols)"
|
||||
raise ValueError, msg
|
||||
nsets = min(m, n)
|
||||
nm = n*m
|
||||
index = arange(nm)
|
||||
n_ind = arange(n)
|
||||
shuffle(n_ind) # random start diag
|
||||
n_ind = arange(n+1)
|
||||
#shuffle(n_ind) # random start diag
|
||||
start_inds = array_split(n_ind, nsets)
|
||||
for v in range(nsets):
|
||||
validation = []
|
||||
for start in start_inds[v]:
|
||||
ind = arange(start+v, nm, n+1)
|
||||
[validation.append(i) for i in ind]
|
||||
training = [j for j in index if j not in validation]
|
||||
yield training, validation
|
||||
validation = set()
|
||||
for start in start_inds[v]:
|
||||
ind = arange(start, nm, n+1)
|
||||
validation.update(ind)
|
||||
#training = [j for j in index if j not in validation]
|
||||
yield list(validation)
|
||||
|
||||
|
||||
def class_error(y_hat, y, method='vanilla'):
|
||||
""" Not used.
|
||||
"""
|
||||
a_opt, k, l = y_hat.shape
|
||||
y_hat_c = zeros((k, l), dtype='d')
|
||||
if method == vanilla:
|
||||
pass
|
||||
for a in range(a_opt):
|
||||
for i in range(k):
|
||||
y_hat_c[a, val, argmax(y_hat[a,val,:])] = 1.0
|
||||
err = 100*((y_hat_c + y) == 2).sum(1)/y.sum(0).astype('d')
|
||||
|
||||
return y_hat_c, err
|
||||
|
||||
def prediction_error(y_hat, y, method='squared'):
|
||||
"""Loss function on multiclass Y.
|
||||
|
||||
Assumes y is a binary dummy class matrix (samples, classes)
|
||||
"""
|
||||
k, l = y.shape
|
||||
|
||||
if method == 'hinge':
|
||||
pass
|
||||
elif method == 'smooth_hinge':
|
||||
z = 90
|
||||
elif method == 'abs':
|
||||
err = abs(y - y_hat)
|
||||
elif method == 'squared':
|
||||
err = (y - y_hat)**2
|
||||
elif method == '0/1':
|
||||
pred = zeros_like(y_hat)
|
||||
for i, row in enumerate(y_hat):
|
||||
largest = row.argsort()[-1]
|
||||
pred[i, largest] = 1.
|
||||
err = abs(y - pred)
|
||||
elif method == '1/2':
|
||||
y_hat[y_hat>.5] = 1
|
||||
y_hat[y_hat<.5] = 0
|
||||
err = abs(y - y_hat)
|
||||
a_max, kk, ll = y_hat.shape
|
||||
error = empty((a_max, l))
|
||||
for a in range(a_max):
|
||||
yha = y_hat[a, :, :]
|
||||
if method == 'hinge':
|
||||
err = zeros((k, l))
|
||||
for j in range(l):
|
||||
for i in range(k):
|
||||
if y[i,j] == 1:
|
||||
if yha[i, j] >= 1:
|
||||
err[i,j] = 0
|
||||
else:
|
||||
err[i,j] = abs(y[i,j] - yha[i,j])
|
||||
elif y[i,j] == 0:
|
||||
if yha[i, j] <= 0:
|
||||
err[i,j] = 0
|
||||
else:
|
||||
err[i,j] = abs(y[i,j] - yha[i,j])
|
||||
|
||||
elif method == 'smooth_hinge':
|
||||
err = zeros((k, l))
|
||||
for j in range(l):
|
||||
for i in range(k):
|
||||
if y[i,j] == 1:
|
||||
if yha[i, j] >= 1:
|
||||
err[i,j] = 0
|
||||
elif yha[i,j] < 1 and yha[i,j] > 0:
|
||||
err[i,j] = abs(y[i,j] - yha[i,j])
|
||||
else:
|
||||
err[i,j] = 1
|
||||
|
||||
elif y[i,j] == 0:
|
||||
if yha[i, j] <= 0:
|
||||
err[i,j] = 0
|
||||
elif yha[i,j] < 1 and yha[i,j] > 0:
|
||||
err[i,j] = abs(y[i,j] - yha[i,j])
|
||||
else:
|
||||
err[i,j] = 1
|
||||
|
||||
elif method == 'abs':
|
||||
err = abs(y - yha)
|
||||
elif method == 'squared':
|
||||
err = (y - yha)**2
|
||||
elif method == '0/1':
|
||||
pred = zeros((k, l))
|
||||
for i, row in enumerate(yha):
|
||||
largest = row.argsort()[-1]
|
||||
pred[i, largest] = 1.
|
||||
err = abs(y - pred)
|
||||
elif method == '1/2':
|
||||
yh = yha.copy()
|
||||
yh[yha>.5] = 1
|
||||
yh[yha<.5] = 0
|
||||
err = abs(y - yh)
|
||||
else:
|
||||
raise ValueError("Option: %s (method) not valid" %method)
|
||||
|
||||
return err
|
||||
error[a,:] = err.mean(0)
|
||||
|
||||
return error
|
||||
|
||||
def _wkernel_pls_val(X, Y, a_max, n_blocks=None):
|
||||
"""Returns rmsep and aopt for pls tailored for wide X.
|
||||
|
||||
The error of cross validation is calculated
|
||||
based on random block cross-validation. With number of blocks equal to
|
||||
number of samples [default] gives leave-one-out cv.
|
||||
The pls model is based on the simpls algorithm for wide X, an is quite
|
||||
fast in very high dimensional X data.
|
||||
|
||||
*Parameters*:
|
||||
|
||||
X : ndarray
|
||||
column centered data matrix of size (samples x variables)
|
||||
Y : ndarray
|
||||
column centered response matrix of size (samples x responses)
|
||||
a_max : scalar
|
||||
Maximum number of components
|
||||
n_blocks : scalar
|
||||
Number of blocks in cross validation
|
||||
|
||||
*Returns*:
|
||||
|
||||
rmsep : ndarray
|
||||
Root Mean Square Error of cross-validated Predictions
|
||||
aopt : scalar
|
||||
Guestimate of the optimal number of components
|
||||
|
||||
SeeAlso:
|
||||
|
||||
- pls_cv_val : Same output, not optimised for wide X
|
||||
- w_simpls : Simpls algorithm for wide X
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
Based (cowardly translated) on m-files from the Chemoact toolbox
|
||||
X, Y inputs need to be centered (fixme: check)
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import numpy as n
|
||||
>>> X = n.array([[1., 2., 3.],[]])
|
||||
>>> Y = n.array([[1., 2., 3.],[]])
|
||||
>>> w_pls(X, Y, 1)
|
||||
[4,5,6], 1
|
||||
"""
|
||||
dt = X.dtype
|
||||
k, l = m_shape(Y)
|
||||
PRESS = zeros((l, a_max+1), dtype=dt)
|
||||
if n_blocks==None:
|
||||
n_blocks = Y.shape[0]
|
||||
XXt = dot(X, X.T)
|
||||
V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True)
|
||||
for Din, Doi, Yin, Yout in V:
|
||||
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
|
||||
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
||||
|
||||
dat = w_simpls(Din, Yin, a_max)
|
||||
Q, U, H = dat['Q'], dat['U'], dat['H']
|
||||
That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) ))
|
||||
|
||||
Yhat = zeros((a_max, k, l), dtype=dt)
|
||||
for j in range(l):
|
||||
TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,a_max)))) )
|
||||
Yhat[:,:,l] = TQ
|
||||
E = Yout[:,j][:,newaxis] - TQ
|
||||
E = E + sum(E, 0)/Din.shape[0]
|
||||
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
|
||||
#Yhat = Yin - dot(That,Q.T)
|
||||
msep = PRESS/(Y.shape[0])
|
||||
#aopt = find_aopt_from_sep(msep)
|
||||
return Yhat, sqrt(msep)
|
||||
|
||||
def _w_pls(aat, b, aopt):
|
||||
""" Pls for wide matrices.
|
||||
Fast pls for crossval, used in calc rmsep for wide X
|
||||
There is no P or W. T is normalised
|
||||
|
||||
aat = centered kernel matrix
|
||||
b = centered y
|
||||
"""
|
||||
bb = b.copy()
|
||||
k, l = m_shape(b)
|
||||
m, m = m_shape(aat)
|
||||
U = empty((m, aopt)) # W
|
||||
T = empty((m, aopt))
|
||||
R = empty((m, aopt)) # R
|
||||
PROJ = empty((m, aopt)) # P?
|
||||
|
||||
for i in range(aopt):
|
||||
if has_sym:
|
||||
s, q = symeig(dot(dot(b.T, aat), b), range=(l,l),overwrite=True)
|
||||
else:
|
||||
q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
|
||||
q = q[:,:1]
|
||||
u = dot(b , q) #y-factor scores
|
||||
U[:,i] = u.ravel()
|
||||
t = dot(aat, u)
|
||||
|
||||
t = t/vnorm(t)
|
||||
T[:,i] = t.ravel()
|
||||
r = dot(aat, t)#score-weights
|
||||
#r = r/vnorm(r)
|
||||
R[:,i] = r.ravel()
|
||||
PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
|
||||
if i<aopt:
|
||||
b = b - dot(PROJ[:,:i+1], dot(R[:,:i+1].T, b) )
|
||||
C = dot(bb.T, T)
|
||||
|
@ -8,7 +8,7 @@ __docformat__ = "restructuredtext en"
|
||||
from math import sqrt as msqrt
|
||||
|
||||
from numpy import dot,empty,zeros,apply_along_axis,newaxis,finfo,sqrt,r_,expand_dims,\
|
||||
minimum, any, isnan
|
||||
minimum,any,isnan,ones,tile
|
||||
from numpy.linalg import inv,svd
|
||||
from scipy.sandbox import arpack
|
||||
|
||||
@ -79,11 +79,13 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
||||
"""
|
||||
|
||||
m, n = X.shape
|
||||
min_aopt = min(m, n)
|
||||
max_aopt = min(m, n)
|
||||
if center_axis != None:
|
||||
X, mnx = center(X, center_axis[0])
|
||||
min_aopt = min_aopt - 1
|
||||
assert(aopt <= min_aopt)
|
||||
max_aopt = max_aopt - 1
|
||||
if aopt > max_aopt:
|
||||
print "Using aopt: %d" %max_aopt
|
||||
aopt = max_aopt
|
||||
if m > (n+100) or n > (m+100):
|
||||
u, s, v = esvd(X, aopt)
|
||||
else:
|
||||
@ -108,7 +110,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
||||
P = P*s
|
||||
|
||||
if mode in ['fast', 'f', 'F']:
|
||||
return {'T':T, 'P':P, 'aopt':aopt}
|
||||
return {'T':T, 'P':P, 'aopt':aopt, 'mnx': mnx}
|
||||
|
||||
if mode in ['detailed', 'd', 'D']:
|
||||
E = empty((aopt, m, n))
|
||||
@ -135,7 +137,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
||||
expvarx = r_[0, 100*e.cumsum()/(X*X).sum()]
|
||||
|
||||
return {'T': T, 'P': P, 'E': E, 'evx': expvarx, 'leverage': lev, 'ssqx': ssq,
|
||||
'aopt': aopt, 'eigvals': e}
|
||||
'aopt': aopt, 'eigvals': e, 'mnx': mnx}
|
||||
|
||||
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]):
|
||||
""" Principal Component Regression.
|
||||
@ -690,39 +692,80 @@ def center(a, axis):
|
||||
### TODO ###
|
||||
# Perhaps not(!) use broadcasting and return full centering
|
||||
# matrix instead ?
|
||||
#
|
||||
|
||||
|
||||
# check if we have a vector
|
||||
is_vec = len(a.shape) == 1
|
||||
if not is_vec:
|
||||
is_vec = a.shape[0] == 1 or a.shape[1] == 1
|
||||
if is_vec:
|
||||
if axis == 2:
|
||||
warnings.warn("Double centering of vector ignored, using ordinary centering")
|
||||
#warnings.warn("Double centering of vector ignored, using ordinary centering")
|
||||
raise ValueError("Double centering of vector is not a valid option")
|
||||
if axis == -1:
|
||||
mn = 0
|
||||
mn = zeros(a.shape)
|
||||
else:
|
||||
mn = a.mean()
|
||||
mn = a.mean()*ones(a.shape)
|
||||
return a - mn, mn
|
||||
|
||||
if axis == -1:
|
||||
mn = zeros((1,a.shape[1],))
|
||||
#mn = tile(mn, (a.shape[0], 1))
|
||||
mn = tile(mn, (a.shape[0], 1))
|
||||
elif axis == 0:
|
||||
mn = a.mean(0)[newaxis]
|
||||
#mn = tile(mn, (a.shape[0], 1))
|
||||
mn = tile(mn, (a.shape[0], 1))
|
||||
elif axis == 1:
|
||||
mn = a.mean(1)[:,newaxis]
|
||||
#mn = tile(mn, (1, a.shape[1]))
|
||||
mn = tile(mn, (1, a.shape[1]))
|
||||
elif axis == 2:
|
||||
#fixme: double centering returns column mean as loc-vector, ok?
|
||||
mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
|
||||
return a - mn , a.mean(0)[newaxis]
|
||||
return a - mn , mn
|
||||
else:
|
||||
raise IOError("input error: axis must be in [-1,0,1,2]")
|
||||
|
||||
return a - mn, mn
|
||||
|
||||
def inv_center(a, mn_a, axis):
|
||||
"""Inverse centering.
|
||||
|
||||
Adding row, column or double centering to a matrix.
|
||||
This method uses broadcasting, so the size of a needs only
|
||||
to match the axis argument.
|
||||
|
||||
*Parameters*:
|
||||
|
||||
a : {array}
|
||||
Input data, to be centered
|
||||
|
||||
axis : {integer}
|
||||
Which centering to perform.
|
||||
0 = col center, 1 = row center, 2 = double center
|
||||
-1 = nothing
|
||||
|
||||
*Returns*:
|
||||
|
||||
a_centered : {array}
|
||||
Centered data matrix
|
||||
|
||||
*Notes*
|
||||
|
||||
May just as well used to subtract a mean (just use negative mn_a)
|
||||
|
||||
"""
|
||||
if axis == -1:
|
||||
return a
|
||||
m, n = a.shape
|
||||
k, o = mn_a.shape
|
||||
if axis == 0: #row centering
|
||||
assert(n == o and k == 1)
|
||||
elif axis == 1: # column centering
|
||||
assert(n == k and o == 1)
|
||||
elif axis == 2: # double centering
|
||||
assert(n == o and m == k)
|
||||
|
||||
return a + mn_a
|
||||
|
||||
def _scale(a, axis):
|
||||
""" Matrix scaling to unit variance.
|
||||
|
||||
|
@ -14,7 +14,7 @@ from engines import pls
|
||||
from engines import nipals_lpls as lpls
|
||||
|
||||
|
||||
def hotelling(Pcv, P, p_center='median', cov_center=median,
|
||||
def hotelling(Pcv, P, p_center='median', cov_center='median',
|
||||
alpha=0.3, crot=True, strict=False):
|
||||
"""Returns regularized hotelling T^2.
|
||||
|
||||
@ -31,8 +31,8 @@ def hotelling(Pcv, P, p_center='median', cov_center=median,
|
||||
Calibration model paramter
|
||||
p_center : {'median', 'mean', 'cal_model'}, optional
|
||||
Location method for sub-segments
|
||||
cov_center : {py_func}, optional
|
||||
Location function
|
||||
cov_center : {'median', 'mean', 'cal_model'}, optional
|
||||
Pooled covariance estimate
|
||||
alpha : {float}, optional
|
||||
Regularisation towards pooled covariance estimate.
|
||||
crot : {boolean}, optional
|
||||
@ -79,33 +79,43 @@ def hotelling(Pcv, P, p_center='median', cov_center=median,
|
||||
for i in xrange(n):
|
||||
Pi = Pcv[:,i,:] # (n_sets x amax)
|
||||
Pi_ctr = P_ctr[i,:] # (1 x amax)
|
||||
Pim = (Pi - Pi_ctr)*msqrt(n_sets-1)
|
||||
Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
|
||||
|
||||
Cov = cov_center(Cov_i)
|
||||
reg_cov = (1. - alpha)*Cov_i + alpha*Cov
|
||||
#Pim = (Pi - Pi_ctr)*msqrt(n_sets-1)
|
||||
#Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
|
||||
Pim = (Pi - Pi_ctr)
|
||||
Cov_i[i] = dot(Pim.T, Pim)
|
||||
if cov_center == 'median':
|
||||
Cov_p = median(Cov_i)
|
||||
elif cov_center == 'mean':
|
||||
Cov_p = Cov.mean(0)
|
||||
else:
|
||||
print "Pooled covariance est. invalid, using median"
|
||||
print cov_center
|
||||
Cov_p = median(Cov_i)
|
||||
reg_cov = (1. - alpha)*Cov_i + alpha*Cov_p
|
||||
for i in xrange(n):
|
||||
Pc = P_ctr[i,:]
|
||||
sigma = reg_cov[i]
|
||||
T_sq[i] = dot(dot(Pc, inv(sigma)), Pc)
|
||||
return T_sq
|
||||
|
||||
def procrustes(a, b, strict=True, center=False, verbose=False):
|
||||
def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False):
|
||||
"""Orthogonal rotation of b to a.
|
||||
|
||||
Procrustes rotation is an orthogonal rotoation of one subspace
|
||||
Procrustes rotation is an orthogonal rotation of one subspace
|
||||
onto another by minimising the squared error.
|
||||
|
||||
*Parameters*:
|
||||
|
||||
a : {array}
|
||||
Input array
|
||||
Input array, stationary
|
||||
b : {array}
|
||||
Input array
|
||||
Input array, rotate this to max. fit to a
|
||||
strict : {boolean}
|
||||
Only do flipping and shuffling
|
||||
center : {boolean}
|
||||
Center before rotation, translate back after
|
||||
force_norm : {boolean}
|
||||
Ensure that columns of a and b are orthonormal
|
||||
verbose : {boolean}
|
||||
Show sum of squares
|
||||
|
||||
@ -125,6 +135,12 @@ def procrustes(a, b, strict=True, center=False, verbose=False):
|
||||
a = a - mn_a
|
||||
mn_b = b.mean(0)
|
||||
b = b - mn_b
|
||||
|
||||
if force_norm:
|
||||
a_norm = apply_along_axis(norm, 0, a)
|
||||
a = a/a_norm
|
||||
b_norm = apply_along_axis(norm, 0, b)
|
||||
b = b/b_norm
|
||||
|
||||
u, s, vt = svd(dot(b.T, a))
|
||||
Cm = dot(u, vt) # Cm: orthogonal rotation matrix
|
||||
@ -166,13 +182,13 @@ def _ensure_strict(C, only_flips=True):
|
||||
|
||||
"""
|
||||
if only_flips:
|
||||
C = eye(Cm.shape[0])*sign(Cm)
|
||||
C = eye(C.shape[0])*sign(C)
|
||||
return C
|
||||
Cm = zeros(C.shape, dtype='d')
|
||||
Cm[abs(C)>.6] = 1.
|
||||
if det(Cm)>1:
|
||||
raise NotImplementedError
|
||||
return Cm*S
|
||||
return Cm*sign(C)
|
||||
|
||||
def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
||||
sim_method='shuffle', p_center='med', cov_center=median,
|
||||
@ -248,7 +264,7 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
||||
Wc, Lc = lpls_jk(X, Y, Z ,aopt, zorth=zorth)
|
||||
cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
|
||||
cal_tsq_z = hotelling(Lc, dat['L'], alpha=alpha)
|
||||
|
||||
print "morn"
|
||||
# Perturbations
|
||||
index = arange(m)
|
||||
for i in range(n_iter):
|
||||
@ -257,9 +273,10 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
||||
dat = lpls(X, Y[indi,:], Z, aopt, scale='loads', center_axis=center_axis, zorth=zorth)
|
||||
Wi, Li = lpls_jk(X, Y[indi,:], Z, aopt, nsets=nsets)
|
||||
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
||||
pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=alpha)
|
||||
# no reason to borrow variance in dag (alpha ->some small value)
|
||||
pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=0.01)
|
||||
|
||||
return _fdr(cal_tsq_z, pert_tsq_z, median), _fdr(cal_tsq_x, pert_tsq_x, median)
|
||||
return cal_tsq_z, pert_tsq_z, cal_tsq_x, pert_tsq_x
|
||||
|
||||
|
||||
def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
|
||||
@ -335,7 +352,7 @@ def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
|
||||
Wi = pls_jk(X, Y[indi,:], aopt, nsets=nsets, center_axis=center_axis)
|
||||
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
||||
|
||||
return _fdr(cal_tsq_x, pert_tsq_x, median)
|
||||
return cal_tsq_x, pert_tsq_x
|
||||
|
||||
|
||||
|
||||
@ -372,7 +389,7 @@ def _fdr(tsq, tsqp, loc_method=median):
|
||||
gene expression data using dimension reduction methods, BMC
|
||||
bioinformatics, 2007
|
||||
"""
|
||||
n, = tsq.shape
|
||||
n = tsq.shape[0]
|
||||
k, m = tsqp.shape
|
||||
assert(n==k)
|
||||
n_false = empty((n, m), 'd')
|
||||
|
27
sandbox/powerit/Makefile
Normal file
27
sandbox/powerit/Makefile
Normal file
@ -0,0 +1,27 @@
|
||||
CC=gcc
|
||||
PYTHON_INCLUDE=/usr/include/python2.5
|
||||
PYTHON_LIBRARY=/usr/lib/python2.5
|
||||
NUMPY_INCLUDE=$(PYTHON_LIBRARY)/site-packages/numpy/core/include/numpy/
|
||||
GOTO_LIBRARY=/home/flatberg/GotoBLAS
|
||||
BLAS_LIBRARY=/usr/lib
|
||||
BLAS=blas
|
||||
GOTO=goto
|
||||
|
||||
|
||||
all: numpy_module.so
|
||||
|
||||
numpy_module.so: numpy_module.o sympowerit.o
|
||||
$(CC) -Wall -shared numpy_module.o sympowerit.o -o _numpy_module.so -L$(PYTHON_LIBRARY) -lpython2.5 -L$(BLAS_LIBRARY) -l$(BLAS) -llapack -lg2c -lm
|
||||
|
||||
numpy_module.o: numpy_module.c numpy_module.h
|
||||
$(CC) -fPIC -Wall -O2 -g -c -I$(PYTHON_INCLUDE) -I$(NUMPY_INCLUDE) numpy_module.c
|
||||
|
||||
c_egines.o: sympowerit.c
|
||||
$(CC) -Wall -O2 -g -c sympowerit.c
|
||||
|
||||
clean:
|
||||
-rm numpy_module.o sympowerit.o
|
||||
-rm -rf _numpy_module.so
|
||||
|
||||
|
||||
|
70
sandbox/powerit/numpy_module.c
Executable file
70
sandbox/powerit/numpy_module.c
Executable file
@ -0,0 +1,70 @@
|
||||
/* A file to test imorting C modules for handling arrays to Python */
|
||||
|
||||
/* Python.h includes <stdio.h>, <string.h>, <errno.h>, <limits.h>, and <stdlib.h> (if available)*/
|
||||
#include "Python.h"
|
||||
#include "arrayobject.h"
|
||||
#include "numpy_module.h"
|
||||
#include "sympowerit.h"
|
||||
#include <cblas.h>
|
||||
|
||||
|
||||
/* ==== Set up the methods table ====================== */
|
||||
static PyMethodDef _numpy_moduleMethods[] = {
|
||||
{"sym_powerit", sym_powerit, METH_VARARGS},
|
||||
{NULL, NULL} /* Sentinel - marks the end of this structure */
|
||||
};
|
||||
|
||||
|
||||
/* ==== Initialize the numpy_module functions =============== */
|
||||
// Module name must be _numpy_module in compile and linked
|
||||
void init_numpy_module() {
|
||||
(void) Py_InitModule("_numpy_module", _numpy_moduleMethods);
|
||||
import_array(); // Must be present for NumPy. Called first after above line.
|
||||
}
|
||||
|
||||
|
||||
/* =========== Power iteration on symmetric matrix ========== */
|
||||
static PyObject *sym_powerit(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyArrayObject *X=NULL, *T=NULL, *E=NULL,*U=NULL;
|
||||
double *tin, *ein, tol;
|
||||
int amax, n, info, maxiter, verbose;
|
||||
int dims[2];
|
||||
|
||||
/* Parse tuple of input arguments*/
|
||||
if (!PyArg_ParseTuple(args, "O!O!iidii",
|
||||
&PyArray_Type, &X,
|
||||
&PyArray_Type, &T,
|
||||
&n,
|
||||
&amax,
|
||||
&tol,
|
||||
&maxiter,
|
||||
&verbose)
|
||||
)
|
||||
return NULL;
|
||||
if (NULL == X) return NULL;
|
||||
|
||||
/* Get the dimensions of the input */
|
||||
n = X->dimensions[0];
|
||||
|
||||
/* Create output/ work arrays, no inplace calculations */
|
||||
dims[0] = n;
|
||||
dims[1] = amax;
|
||||
U = (PyArrayObject*) PyArray_NewCopy(T, NPY_CORDER);
|
||||
dims[1] = n;
|
||||
E = (PyArrayObject*) PyArray_NewCopy(X, NPY_CORDER);
|
||||
|
||||
/* Get pointers to contigous data (row-major)*/
|
||||
ein = (double *)E->data;
|
||||
tin = (double *)U->data;
|
||||
|
||||
/* Call sympower method */
|
||||
info = sympowerit (ein, n, tin, amax, tol, maxiter, verbose);
|
||||
|
||||
Py_DECREF(E);
|
||||
|
||||
return Py_BuildValue("Ni", U, info);
|
||||
|
||||
}
|
||||
|
||||
|
1
sandbox/powerit/numpy_module.h
Executable file
1
sandbox/powerit/numpy_module.h
Executable file
@ -0,0 +1 @@
|
||||
/* Header to test of C modules for arrays for Python: sympowerit.c */
/* Python callablefunctions */
static PyObject *sym_powerit(PyObject *self, PyObject *args);
|
116
sandbox/powerit/sympowerit.c
Normal file
116
sandbox/powerit/sympowerit.c
Normal file
@ -0,0 +1,116 @@
|
||||
#include <cblas.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* =============== Main functions body ====================*/
|
||||
|
||||
int sympowerit(double *E, int n, double *T, int amax, double tol,
|
||||
int maxiter, int verbose)
|
||||
{
|
||||
/*
|
||||
PURPOSE: Estimate eigenvectos of a symmetric matrix using the power method.
|
||||
|
||||
CALLING SEQUENCE:
|
||||
info = sympowerit(*E, n, *T, amax, tol, maxiter);
|
||||
|
||||
INPUTS:
|
||||
E symmetric matrix from XX^T/X^TX of centered data matrix
|
||||
type: *double
|
||||
n elements in X first dimension
|
||||
type: int
|
||||
type: *double
|
||||
T workspace for scores (m, amax)
|
||||
type: *double
|
||||
amax maximum number of principal components
|
||||
type: int
|
||||
tol convergence limit
|
||||
type: double
|
||||
maxiter maximum number of iteraitons
|
||||
type: int
|
||||
verbose the amount of debug output
|
||||
*/
|
||||
|
||||
int iter, a, j;
|
||||
int info = 0;
|
||||
double sumsq, l2norm, *y,*x, sgn, diff, alpha;
|
||||
double lambda = 0.0;
|
||||
|
||||
if (verbose>1) verbose=1;
|
||||
|
||||
/* Allocate space for working vector and eigenvector */
|
||||
y = (double *) malloc(n*sizeof(double));
|
||||
x = (double *) malloc(n*sizeof(double));
|
||||
|
||||
/* Loop over all components to be estimated*/
|
||||
for (a=0; a<=amax-1; a++){
|
||||
/* Reset work-/eigen-vector*/
|
||||
for (j=0; j<n; j++) {
|
||||
y[j] = 0.0;
|
||||
x[j] = T[(j*amax)+a];
|
||||
}
|
||||
/*x[0] = 1.0;*/
|
||||
|
||||
/* Main power-iteration loop */
|
||||
for ( iter = 0; iter <= maxiter; iter++ ) {
|
||||
/* Matrix-vector product */
|
||||
/*cblas_dsymv (CblasRowMajor, CblasUpper, n, 1.0, E, n,
|
||||
x, 1, 0.0, y, 1);*/
|
||||
cblas_dgemv (CblasRowMajor, CblasNoTrans, n, n, 1.0, E, n,
|
||||
x, 1, 0.0, y, 1);
|
||||
|
||||
/* Normalise y */
|
||||
sumsq = y[0] * y[0];
|
||||
lambda = x[0] * y[0];
|
||||
for ( j = 1; j < n; j++ ) {
|
||||
sumsq += y[j] * y[j];
|
||||
lambda += x[j] * y[j];
|
||||
}
|
||||
l2norm = sqrt ( sumsq );
|
||||
for ( j = 0; j < n; j++ ) y[j] /= l2norm;
|
||||
|
||||
/*Check for convergence */
|
||||
sgn = ( lambda < 0 ? -1.0 : 1.0 );
|
||||
diff = x[0] - sgn * y[0];
|
||||
sumsq = diff * diff;
|
||||
x[0] = y[0];
|
||||
for ( j = 0; j < n; j++ ) {
|
||||
diff = x[j] - sgn * y[j];
|
||||
sumsq += diff * diff;
|
||||
x[j] = y[j];
|
||||
}
|
||||
if ( sqrt ( sumsq ) < tol ) {
|
||||
if (verbose == 1){
|
||||
printf("\nComp: %d\n", a);
|
||||
printf("Converged in %d iterations\n", iter);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (iter >= maxiter){
|
||||
if (verbose == 1){
|
||||
printf("\nComp: %d\n", a);
|
||||
printf("Max iter reached.\n");
|
||||
printf("Error: %.2E\n", sumsq);
|
||||
}
|
||||
info = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Calculate T */
|
||||
for (j=0; j<n; j++){
|
||||
y[j] = sgn*sqrt(sgn*lambda)*x[j];
|
||||
T[(j*amax)+a] = y[j];
|
||||
}
|
||||
|
||||
/* rank one deflation of residual matrix */
|
||||
/*cblas_dsyr (CblasRowMajor, CblasUpper, n, -1.0, x, 1, E, n);*/
|
||||
alpha = -1.0*lambda;
|
||||
cblas_dger (CblasRowMajor, n, n, alpha, x, 1, x, 1, E, n);
|
||||
}
|
||||
|
||||
/* Free used space */
|
||||
free(x);
|
||||
free(y);
|
||||
|
||||
return (info);
|
||||
}
|
2
sandbox/powerit/sympowerit.h
Normal file
2
sandbox/powerit/sympowerit.h
Normal file
@ -0,0 +1,2 @@
|
||||
int sympowerit (double *X, int n, double *T, int amax, double tol, int maxiter, int verbose);
|
||||
/* Simple symmetric power iterations */
|
100
sandbox/powerit/sympowerit.py
Normal file
100
sandbox/powerit/sympowerit.py
Normal file
@ -0,0 +1,100 @@
|
||||
from numpy import *
|
||||
|
||||
HAS_SYMPOWER=True
|
||||
try:
|
||||
from _numpy_module import sym_powerit
|
||||
except:
|
||||
raise ImportError("Sym_powerit module not present")
|
||||
HAS_SYMPOWER = False
|
||||
|
||||
class SymPowerException(Exception):
|
||||
pass
|
||||
|
||||
_ERRORCODES = {1: "Some eigenvectors did not converge, try to increase \nthe number of iterations or lower the tolerance level",
|
||||
0: ""}
|
||||
|
||||
|
||||
def sympowerit(xx, T0=None, mn_center=False, a_max=10, tol=1e-7, maxiter=100,
|
||||
verbose=0):
|
||||
"""Estimate eigenvectos of a symmetric matrix using the power method.
|
||||
|
||||
*Parameters*:
|
||||
|
||||
xx : {array}
|
||||
Symmetric square array (m, m)
|
||||
T0 : {array}
|
||||
Initial solution (m, a_max), optional
|
||||
mn_center : {boolean}, optional
|
||||
Mean centering
|
||||
a_max : {integer}, optional
|
||||
Number of components to extract
|
||||
tol : {float}, optional
|
||||
Tolerance level of eigenvector solver
|
||||
maxiter : {integer}
|
||||
Maximum number of poweriterations to use
|
||||
verbose : {integer}
|
||||
Debug output (==1)
|
||||
|
||||
*Returns*:
|
||||
v : {array}
|
||||
Eigenvectors of xx, (m , a_max)
|
||||
"""
|
||||
|
||||
valid_types = ['D','d','F','f']
|
||||
dtype = xx.dtype.char
|
||||
n, m = xx.shape
|
||||
if not(dtype in valid_types):
|
||||
msg = "Array type: (%s) needs to be a float or double" %dtype
|
||||
raise SymPowerException(msg)
|
||||
if not (m==n):
|
||||
msg = "Input array needs to be square, input: (%d,%d)" %(m,n)
|
||||
raise SymPowerException(msg)
|
||||
# small test of symmetry
|
||||
N = 5
|
||||
num = random.randint(0,n,N)
|
||||
for i in range(5):
|
||||
j = N-5
|
||||
if abs(xx[num[i],num[j]] - xx[num[j],num[i]])>1e-15:
|
||||
msg = "Array needs to be symmetric"
|
||||
raise SymPowerException(msg)
|
||||
|
||||
if not a_max:
|
||||
a_max = 10
|
||||
|
||||
if T0 !=None:
|
||||
tn, tm = T0.shape
|
||||
if not (tn==n):
|
||||
msg = "Start eigenvectors need to match input array ()"
|
||||
raise SymPowerException(msg)
|
||||
if not (tm==a_max):
|
||||
msg = "Start eigenvectors need to match input a_max ()"
|
||||
raise SymPowerException(msg)
|
||||
else:
|
||||
T0 = zeros((n, a_max), 'd')
|
||||
T0[0,:] = ones((a_max,),'d')
|
||||
|
||||
if mn_center:
|
||||
xx = _center(xx)
|
||||
|
||||
# call c-function
|
||||
T, info = sym_powerit(xx, T0, n, a_max, tol, maxiter, verbose)
|
||||
|
||||
if info != 0:
|
||||
if verbose:
|
||||
print _ERRORCODES.get(info, "Dont know this error")
|
||||
return T
|
||||
|
||||
|
||||
def _center(xx, ret_mn=False):
|
||||
"""Returns mean centered symmetric kernel matrix.
|
||||
"""
|
||||
n = xx.shape[0]
|
||||
h = xx.sum(0)[:,newaxis]
|
||||
h = (h - mean(h)/2)/n
|
||||
mn_a = h + h.T
|
||||
xxc = xx - mn_a
|
||||
if ret_mn:
|
||||
return xxc, mn_a
|
||||
return xxc
|
||||
|
||||
|
Reference in New Issue
Block a user