Cleaned esvd routine, added subfunc scale

This commit is contained in:
Arnar Flatberg 2007-08-07 11:41:03 +00:00
parent d055a1f882
commit 168384f266

View File

@ -7,7 +7,7 @@ import math
from scipy.linalg import svd,inv
from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
apply_along_axis,mean,ones,randn,empty_like,outer,r_,c_,\
rand,sum,cumsum,matrix, expand_dims,minimum,where
rand,sum,cumsum,matrix, expand_dims,minimum,where,arange
has_sym=True
try:
from symeig import symeig
@ -67,12 +67,12 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
if center_axis>=0:
a = a - expand_dims(a.mean(center_axis), center_axis)
if m>(n+100) or n>(m+100):
u, e, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var
s = sqrt(e)
u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var
print s[:10]
else:
u, s, vt = svd(a, 0)
v = vt.T
e = s**2
e = s**2
tol = 1e-10
eff_rank = sum(s>s[0]*tol)
aopt = minimum(aopt, eff_rank)
@ -189,7 +189,7 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
dat.update({'Q':Q, 'F':F, 'expvary':expvary})
return dat
def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=0, ab=None):
def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=-1, ab=None):
"""Partial Least Squares Regression.
Performs PLS on given matrix and returns results in a dictionary.
@ -696,34 +696,38 @@ def esvd(data, amax=None):
if m>=n:
kernel = dot(data.T, data)
if has_sym:
if not amax:
amax = n-1
pcrange = [n-amax, n]
if amax==None:
amax = n
pcrange = None
else:
pcrange = [n-amax, n]
print "symm>n"
s, v = symeig(kernel, range=pcrange, overwrite=True)
s = s[::-1]
v = v[:,arange(n, -1, -1)]
v = v[:,::-1]
else:
u, s, vt = svd(kernel)
v = vt.T
u = dot(data, v)
for i in xrange(amax):
s[i] = vnorm(u[:,i])
u[:,i] = u[:,i]/s[i]
s = sqrt(s)
u = dot(data, v)/s
else:
kernel = dot(data, data.T)
if has_sym:
if not amax:
amax = m-1
pcrange = [m-amax, m]
if amax==None:
amax = m
pcrange = None
else:
pcrange = [m-amax, m]
print "sym (m<n)"
s, u = symeig(kernel, range=pcrange, overwrite=True)
s = s[::-1]
u = u[:,::-1]
else:
u, s, vt = svd(kernel)
v = dot(u.T, data)
for i in xrange(amax):
s[i] = vnorm(v[i,:])
v[i,:] = v[i,:]/s[i]
return u, s, v.T
s = sqrt(s)
v = dot(data.T, u)/s
print s[:2]
return u, s, v
def vnorm(x):
# assume column arrays (or vectors)
@ -744,3 +748,16 @@ def center(a, axis):
raise IOError("input error: axis must be in [-1,0,1,2]")
return a - mn, mn
def scale(a, axis):
if axis==-1:
sc = zeros((a.shape[1],))
elif axis==0:
sc = a.std(0)
elif axis==1:
sc = a.std(1)[:,newaxis]
else:
raise IOError("input error: axis must be in [-1,0,1]")
return a - sc, sc