This commit is contained in:
Arnar Flatberg 2007-07-30 18:04:42 +00:00
parent aa4007e208
commit 10eba079bc
4 changed files with 151 additions and 22 deletions

View File

@ -203,7 +203,7 @@ class LplsXCorrelationPlot(BlmScatterPlot):
facecolor='gray',
alpha=.1,
zorder=1)
c50 = patches.Circle(center, radius=radius/2.0,
c50 = patches.Circle(center, radius= sqrt(radius/2.0),
facecolor='gray',
alpha=.1,
zorder=2)
@ -228,7 +228,7 @@ class LplsZCorrelationPlot(BlmScatterPlot):
facecolor='gray',
alpha=.1,
zorder=1)
c50 = patches.Circle(center, radius=radius/2.0,
c50 = patches.Circle(center, radius=sqrt(radius/2.0),
facecolor='gray',
alpha=.1,
zorder=2)

View File

@ -14,6 +14,7 @@ try:
except:
has_sym = False
def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
""" Principal Component Analysis.
@ -187,7 +188,7 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
dat.update({'Q':Q, 'F':F, 'expvary':expvary})
return dat
def pls(a, b, aopt=2, scale='scores', mode='normal', ax_center=0, ab=None):
def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=0, ab=None):
"""Partial Least Squares Regression.
Performs PLS on given matrix and returns results in a dictionary.
@ -244,6 +245,10 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ax_center=0, ab=None):
assert(m==mm)
else:
k, l = m_shape(b)
if center_axis>=0:
a = a - expand_dims(a.mean(center_axis), center_axis)
b = b - expand_dims(b.mean(center_axis), center_axis)
W = empty((n, aopt))
P = empty((n, aopt))
@ -255,25 +260,28 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ax_center=0, ab=None):
if ab==None:
ab = dot(a.T, b)
for i in range(aopt):
if ab.shape[1]==1:
if ab.shape[1]==1: #pls 1
w = ab.reshape(n, l)
w = w/vnorm(w)
elif n<l:
elif n<l: # more yvars than xvars
if has_sym:
s, u = symeig(dot(ab.T, ab),range=[l,l],overwrite=True)
s, u = symeig(dot(ab, ab.T),range=[l,l],overwrite=True)
else:
u, s, vh = svd(dot(ab, ab.T))
w = u[:,0]
else:
else: # standard wide xdata
if has_sym:
s, u = symeig(dot(ab.T, ab),range=[l,l],overwrite=True)
s, q = symeig(dot(ab.T, ab),range=[l,l],overwrite=True)
else:
u, s, vh = svd(dot(ab.T, ab))
w = dot(ab, u)
q, s, vh = svd(dot(ab.T, ab))
q = q[:,:1]
w = dot(ab, q)
w = w/vnorm(w)
r = w.copy()
if i>0:
for j in range(0, i, 1):
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
print vnorm(r)
t = dot(a, r)
tt = vnorm(t)**2
p = dot(a.T, t)/tt
@ -345,9 +353,13 @@ def w_pls(aat, b, aopt):
""" Pls for wide matrices.
Fast pls for crossval, used in calc rmsep for wide X
There is no P or W. T is normalised
aat = centered kernel matrix
b = centered y
"""
bb = b.copy()
m, m = aat.shape
k, l = m_shape(b)
m, m = m_shape(aat)
U = empty((m, aopt)) # W
T = empty((m, aopt))
R = empty((m, aopt)) # R
@ -355,23 +367,28 @@ def w_pls(aat, b, aopt):
for i in range(aopt):
if has_sym:
pass
s, q = symeig(dot(dot(b.T, aat), b), range=(l,l),overwrite=True)
else:
q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
q = q[:,:1]
u = dot(b , q) #y-factor scores
U[:,i] = u.ravel()
t = dot(aat, u)
print "Norm of t: %s" %vnorm(t)
print "s: %s" %s
t = t/vnorm(t)
T[:,i] = t.ravel()
r = dot(aat, t) #score-weights
r = dot(aat, t)#score-weights
#r = r/vnorm(r)
print "Norm R: %s" %vnorm(r)
R[:,i] = r.ravel()
PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
if i<aopt:
b = b - dot(PROJ[:,:i+1], dot(R[:,:i+1].T,b) )
b = b - dot(PROJ[:,:i+1], dot(R[:,:i+1].T, b) )
C = dot(bb.T, T)
return {'T':T, 'U':U, 'Q':C, 'H':H}
return {'T':T, 'U':U, 'Q':C, 'R':R}
def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
"""Undeflated Ridged svd(X'Y)
@ -476,6 +493,8 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', sca
P = empty((n, a_max))
K = empty((o, a_max))
L = empty((u, a_max))
B = empty((a_max, n, l))
b0 = empty((a_max, m, l))
var_x = empty((a_max,))
var_y = empty((a_max,))
var_z = empty((a_max,))
@ -485,8 +504,8 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', sca
print "\n Working on comp. %s" %a
u = Y[:,:1]
diff = 1
MAX_ITER = 100
lim = 1e-5
MAX_ITER = 200
lim = 1e-16
niter = 0
while (diff>lim and niter<MAX_ITER):
niter += 1
@ -526,8 +545,8 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', sca
var_y[a] = pow(Y, 2).sum()
var_z[a] = pow(Z, 2).sum()
B = dot(dot(W, inv(dot(P.T, W))), Q.T)
b0 = mnY - dot(mnX, B)
B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
b0[a] = mnY - dot(mnX, B[a])
# variance explained
evx = 100.0*(1 - var_x/varX)
@ -546,6 +565,116 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', sca
def nipals_pls(X, Y, a_max, alpha=.7, ax_center=0, mode='normal', scale='scores', verbose=False):
"""Partial Least Sqaures Regression by the nipals algorithm.
(X!Z)->Y
:input:
X : data matrix (m, n)
Y : data matrix (m, l)
:output:
T : X-scores
W : X-weights/Z-weights
P : X-loadings
Q : Y-loadings
U : X-Y relation
B : Regression coefficients X->Y
b0: Regression coefficient intercept
evx : X-explained variance
evy : Y-explained variance
evz : Z-explained variance
:Notes:
"""
if ax_center>=0:
mn_x = expand_dims(X.mean(ax_center), ax_center)
mn_y = expand_dims(Y.mean(ax_center), ax_center)
X = X - mn_x
Y = Y - mn_y
varX = pow(X, 2).sum()
varY = pow(Y, 2).sum()
m, n = X.shape
k, l = Y.shape
# initialize
U = empty((k, a_max))
Q = empty((l, a_max))
T = empty((m, a_max))
W = empty((n, a_max))
P = empty((n, a_max))
B = empty((a_max, n, l))
b0 = empty((a_max, m, l))
var_x = empty((a_max,))
var_y = empty((a_max,))
t1 = X[:,:1]
for a in range(a_max):
if verbose:
print "\n Working on comp. %s" %a
u = Y[:,:1]
diff = 1
MAX_ITER = 100
lim = 1e-16
niter = 0
while (diff>lim and niter<MAX_ITER):
niter += 1
#u1 = u.copy()
w = dot(X.T, u)
w = w/sqrt(dot(w.T, w))
#l = dot(Z, w)
#k = dot(Z.T, l)
#k = k/sqrt(dot(k.T, k))
#w = alpha*k + (1-alpha)*w
#w = w/sqrt(dot(w.T, w))
t = dot(X, w)
q = dot(Y.T, t)
q = q/sqrt(dot(q.T, q))
u = dot(Y, q)
diff = vnorm(t1 - t)
t1 = t.copy()
if verbose:
print "Converged after %s iterations" %niter
#tt = dot(t.T, t)
#p = dot(X.T, t)/tt
#q = dot(Y.T, t)/tt
#l = dot(Z, w)
p = dot(X.T, t)/dot(t.T, t)
p_norm = vnorm(p)
t = t*p_norm
w = w*p_norm
p = p/p_norm
U[:,a] = u.ravel()
W[:,a] = w.ravel()
P[:,a] = p.ravel()
T[:,a] = t.ravel()
Q[:,a] = q.ravel()
X = X - dot(t, p.T)
Y = Y - dot(t, q.T)
var_x[a] = pow(X, 2).sum()
var_y[a] = pow(Y, 2).sum()
B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
b0[a] = mn_y - dot(mn_x, B[a])
# variance explained
evx = 100.0*(1 - var_x/varX)
evy = 100.0*(1 - var_y/varY)
if scale=='loads':
tnorm = apply_along_axis(vnorm, 0, T)
T = T/tnorm
W = W*tnorm
Q = Q*tnorm
return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'B':B, 'b0':b0, 'evx':evx, 'evy':evy}
########### Helper routines #########

View File

@ -1,7 +1,7 @@
"""This module implements some common validation schemes from pca and pls.
"""
from scipy import ones,mean,sqrt,dot,newaxis,zeros,sum,empty,\
apply_along_axis,eye,kron,array,sort
apply_along_axis,eye,kron,array,sort,zeros_like,argmax
from scipy.stats import median
from scipy.linalg import triu,inv,svd,norm
@ -122,7 +122,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5):
B = dat['B']
b0 = dat['b0']
for a in range(a_max):
Yhat[a,ind,:] = b0[a][0][0] + dot(xi, B[a])
Yhat[a,ind,:] = b0[a][0][0] + dot(xi-xcal.mean(0), B[a])
Yhat_class = zeros_like(Yhat)
for a in range(a_max):
for i in range(k):

View File

@ -132,7 +132,7 @@ text.dvipnghack : False # some versions of dvipng don't handle
# default fontsizes for ticklabels, and so on. See
# http://matplotlib.sourceforge.net/matplotlib.axes.html#Axes
axes.hold : True # whether to clear the axes by default on
axes.facecolor : white # axes background color
axes.facecolor : 0.6 # axes background color
axes.edgecolor : black # axes edge color
axes.linewidth : 1.0 # edge linewidth
axes.grid : True # display grid or not