iii
This commit is contained in:
parent
aa4007e208
commit
10eba079bc
|
@ -203,7 +203,7 @@ class LplsXCorrelationPlot(BlmScatterPlot):
|
|||
facecolor='gray',
|
||||
alpha=.1,
|
||||
zorder=1)
|
||||
c50 = patches.Circle(center, radius=radius/2.0,
|
||||
c50 = patches.Circle(center, radius= sqrt(radius/2.0),
|
||||
facecolor='gray',
|
||||
alpha=.1,
|
||||
zorder=2)
|
||||
|
@ -228,7 +228,7 @@ class LplsZCorrelationPlot(BlmScatterPlot):
|
|||
facecolor='gray',
|
||||
alpha=.1,
|
||||
zorder=1)
|
||||
c50 = patches.Circle(center, radius=radius/2.0,
|
||||
c50 = patches.Circle(center, radius=sqrt(radius/2.0),
|
||||
facecolor='gray',
|
||||
alpha=.1,
|
||||
zorder=2)
|
||||
|
|
|
@ -14,6 +14,7 @@ try:
|
|||
except:
|
||||
has_sym = False
|
||||
|
||||
|
||||
def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
|
||||
""" Principal Component Analysis.
|
||||
|
||||
|
@ -187,7 +188,7 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
|||
dat.update({'Q':Q, 'F':F, 'expvary':expvary})
|
||||
return dat
|
||||
|
||||
def pls(a, b, aopt=2, scale='scores', mode='normal', ax_center=0, ab=None):
|
||||
def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=0, ab=None):
|
||||
"""Partial Least Squares Regression.
|
||||
|
||||
Performs PLS on given matrix and returns results in a dictionary.
|
||||
|
@ -244,6 +245,10 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ax_center=0, ab=None):
|
|||
assert(m==mm)
|
||||
else:
|
||||
k, l = m_shape(b)
|
||||
|
||||
if center_axis>=0:
|
||||
a = a - expand_dims(a.mean(center_axis), center_axis)
|
||||
b = b - expand_dims(b.mean(center_axis), center_axis)
|
||||
|
||||
W = empty((n, aopt))
|
||||
P = empty((n, aopt))
|
||||
|
@ -255,25 +260,28 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ax_center=0, ab=None):
|
|||
if ab==None:
|
||||
ab = dot(a.T, b)
|
||||
for i in range(aopt):
|
||||
if ab.shape[1]==1:
|
||||
if ab.shape[1]==1: #pls 1
|
||||
w = ab.reshape(n, l)
|
||||
w = w/vnorm(w)
|
||||
elif n<l:
|
||||
elif n<l: # more yvars than xvars
|
||||
if has_sym:
|
||||
s, u = symeig(dot(ab.T, ab),range=[l,l],overwrite=True)
|
||||
s, u = symeig(dot(ab, ab.T),range=[l,l],overwrite=True)
|
||||
else:
|
||||
u, s, vh = svd(dot(ab, ab.T))
|
||||
w = u[:,0]
|
||||
else:
|
||||
else: # standard wide xdata
|
||||
if has_sym:
|
||||
s, u = symeig(dot(ab.T, ab),range=[l,l],overwrite=True)
|
||||
s, q = symeig(dot(ab.T, ab),range=[l,l],overwrite=True)
|
||||
else:
|
||||
u, s, vh = svd(dot(ab.T, ab))
|
||||
w = dot(ab, u)
|
||||
q, s, vh = svd(dot(ab.T, ab))
|
||||
q = q[:,:1]
|
||||
w = dot(ab, q)
|
||||
w = w/vnorm(w)
|
||||
r = w.copy()
|
||||
if i>0:
|
||||
for j in range(0, i, 1):
|
||||
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
||||
print vnorm(r)
|
||||
t = dot(a, r)
|
||||
tt = vnorm(t)**2
|
||||
p = dot(a.T, t)/tt
|
||||
|
@ -345,9 +353,13 @@ def w_pls(aat, b, aopt):
|
|||
""" Pls for wide matrices.
|
||||
Fast pls for crossval, used in calc rmsep for wide X
|
||||
There is no P or W. T is normalised
|
||||
|
||||
aat = centered kernel matrix
|
||||
b = centered y
|
||||
"""
|
||||
bb = b.copy()
|
||||
m, m = aat.shape
|
||||
k, l = m_shape(b)
|
||||
m, m = m_shape(aat)
|
||||
U = empty((m, aopt)) # W
|
||||
T = empty((m, aopt))
|
||||
R = empty((m, aopt)) # R
|
||||
|
@ -355,23 +367,28 @@ def w_pls(aat, b, aopt):
|
|||
|
||||
for i in range(aopt):
|
||||
if has_sym:
|
||||
pass
|
||||
s, q = symeig(dot(dot(b.T, aat), b), range=(l,l),overwrite=True)
|
||||
else:
|
||||
q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
|
||||
q = q[:,:1]
|
||||
u = dot(b , q) #y-factor scores
|
||||
U[:,i] = u.ravel()
|
||||
t = dot(aat, u)
|
||||
print "Norm of t: %s" %vnorm(t)
|
||||
print "s: %s" %s
|
||||
|
||||
t = t/vnorm(t)
|
||||
T[:,i] = t.ravel()
|
||||
r = dot(aat, t) #score-weights
|
||||
r = dot(aat, t)#score-weights
|
||||
#r = r/vnorm(r)
|
||||
print "Norm R: %s" %vnorm(r)
|
||||
R[:,i] = r.ravel()
|
||||
PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
|
||||
if i<aopt:
|
||||
b = b - dot(PROJ[:,:i+1], dot(R[:,:i+1].T,b) )
|
||||
b = b - dot(PROJ[:,:i+1], dot(R[:,:i+1].T, b) )
|
||||
C = dot(bb.T, T)
|
||||
|
||||
return {'T':T, 'U':U, 'Q':C, 'H':H}
|
||||
return {'T':T, 'U':U, 'Q':C, 'R':R}
|
||||
|
||||
def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
||||
"""Undeflated Ridged svd(X'Y)
|
||||
|
@ -476,6 +493,8 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', sca
|
|||
P = empty((n, a_max))
|
||||
K = empty((o, a_max))
|
||||
L = empty((u, a_max))
|
||||
B = empty((a_max, n, l))
|
||||
b0 = empty((a_max, m, l))
|
||||
var_x = empty((a_max,))
|
||||
var_y = empty((a_max,))
|
||||
var_z = empty((a_max,))
|
||||
|
@ -485,8 +504,8 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', sca
|
|||
print "\n Working on comp. %s" %a
|
||||
u = Y[:,:1]
|
||||
diff = 1
|
||||
MAX_ITER = 100
|
||||
lim = 1e-5
|
||||
MAX_ITER = 200
|
||||
lim = 1e-16
|
||||
niter = 0
|
||||
while (diff>lim and niter<MAX_ITER):
|
||||
niter += 1
|
||||
|
@ -526,8 +545,8 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', sca
|
|||
var_y[a] = pow(Y, 2).sum()
|
||||
var_z[a] = pow(Z, 2).sum()
|
||||
|
||||
B = dot(dot(W, inv(dot(P.T, W))), Q.T)
|
||||
b0 = mnY - dot(mnX, B)
|
||||
B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
|
||||
b0[a] = mnY - dot(mnX, B[a])
|
||||
|
||||
# variance explained
|
||||
evx = 100.0*(1 - var_x/varX)
|
||||
|
@ -546,6 +565,116 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', sca
|
|||
|
||||
|
||||
|
||||
def nipals_pls(X, Y, a_max, alpha=.7, ax_center=0, mode='normal', scale='scores', verbose=False):
|
||||
"""Partial Least Sqaures Regression by the nipals algorithm.
|
||||
|
||||
(X!Z)->Y
|
||||
:input:
|
||||
X : data matrix (m, n)
|
||||
Y : data matrix (m, l)
|
||||
|
||||
:output:
|
||||
T : X-scores
|
||||
W : X-weights/Z-weights
|
||||
P : X-loadings
|
||||
Q : Y-loadings
|
||||
U : X-Y relation
|
||||
B : Regression coefficients X->Y
|
||||
b0: Regression coefficient intercept
|
||||
evx : X-explained variance
|
||||
evy : Y-explained variance
|
||||
evz : Z-explained variance
|
||||
|
||||
:Notes:
|
||||
|
||||
"""
|
||||
if ax_center>=0:
|
||||
mn_x = expand_dims(X.mean(ax_center), ax_center)
|
||||
mn_y = expand_dims(Y.mean(ax_center), ax_center)
|
||||
X = X - mn_x
|
||||
Y = Y - mn_y
|
||||
|
||||
varX = pow(X, 2).sum()
|
||||
varY = pow(Y, 2).sum()
|
||||
|
||||
m, n = X.shape
|
||||
k, l = Y.shape
|
||||
|
||||
# initialize
|
||||
U = empty((k, a_max))
|
||||
Q = empty((l, a_max))
|
||||
T = empty((m, a_max))
|
||||
W = empty((n, a_max))
|
||||
P = empty((n, a_max))
|
||||
B = empty((a_max, n, l))
|
||||
b0 = empty((a_max, m, l))
|
||||
var_x = empty((a_max,))
|
||||
var_y = empty((a_max,))
|
||||
|
||||
t1 = X[:,:1]
|
||||
for a in range(a_max):
|
||||
if verbose:
|
||||
print "\n Working on comp. %s" %a
|
||||
u = Y[:,:1]
|
||||
diff = 1
|
||||
MAX_ITER = 100
|
||||
lim = 1e-16
|
||||
niter = 0
|
||||
while (diff>lim and niter<MAX_ITER):
|
||||
niter += 1
|
||||
#u1 = u.copy()
|
||||
w = dot(X.T, u)
|
||||
w = w/sqrt(dot(w.T, w))
|
||||
#l = dot(Z, w)
|
||||
#k = dot(Z.T, l)
|
||||
#k = k/sqrt(dot(k.T, k))
|
||||
#w = alpha*k + (1-alpha)*w
|
||||
#w = w/sqrt(dot(w.T, w))
|
||||
t = dot(X, w)
|
||||
q = dot(Y.T, t)
|
||||
q = q/sqrt(dot(q.T, q))
|
||||
u = dot(Y, q)
|
||||
diff = vnorm(t1 - t)
|
||||
t1 = t.copy()
|
||||
if verbose:
|
||||
print "Converged after %s iterations" %niter
|
||||
#tt = dot(t.T, t)
|
||||
#p = dot(X.T, t)/tt
|
||||
#q = dot(Y.T, t)/tt
|
||||
#l = dot(Z, w)
|
||||
p = dot(X.T, t)/dot(t.T, t)
|
||||
p_norm = vnorm(p)
|
||||
t = t*p_norm
|
||||
w = w*p_norm
|
||||
p = p/p_norm
|
||||
|
||||
U[:,a] = u.ravel()
|
||||
W[:,a] = w.ravel()
|
||||
P[:,a] = p.ravel()
|
||||
T[:,a] = t.ravel()
|
||||
Q[:,a] = q.ravel()
|
||||
|
||||
X = X - dot(t, p.T)
|
||||
Y = Y - dot(t, q.T)
|
||||
|
||||
var_x[a] = pow(X, 2).sum()
|
||||
var_y[a] = pow(Y, 2).sum()
|
||||
|
||||
B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
|
||||
b0[a] = mn_y - dot(mn_x, B[a])
|
||||
|
||||
# variance explained
|
||||
evx = 100.0*(1 - var_x/varX)
|
||||
evy = 100.0*(1 - var_y/varY)
|
||||
|
||||
if scale=='loads':
|
||||
tnorm = apply_along_axis(vnorm, 0, T)
|
||||
T = T/tnorm
|
||||
W = W*tnorm
|
||||
Q = Q*tnorm
|
||||
|
||||
return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'B':B, 'b0':b0, 'evx':evx, 'evy':evy}
|
||||
|
||||
|
||||
########### Helper routines #########
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
"""This module implements some common validation schemes from pca and pls.
|
||||
"""
|
||||
from scipy import ones,mean,sqrt,dot,newaxis,zeros,sum,empty,\
|
||||
apply_along_axis,eye,kron,array,sort
|
||||
apply_along_axis,eye,kron,array,sort,zeros_like,argmax
|
||||
from scipy.stats import median
|
||||
from scipy.linalg import triu,inv,svd,norm
|
||||
|
||||
|
@ -122,7 +122,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5):
|
|||
B = dat['B']
|
||||
b0 = dat['b0']
|
||||
for a in range(a_max):
|
||||
Yhat[a,ind,:] = b0[a][0][0] + dot(xi, B[a])
|
||||
Yhat[a,ind,:] = b0[a][0][0] + dot(xi-xcal.mean(0), B[a])
|
||||
Yhat_class = zeros_like(Yhat)
|
||||
for a in range(a_max):
|
||||
for i in range(k):
|
||||
|
|
|
@ -132,7 +132,7 @@ text.dvipnghack : False # some versions of dvipng don't handle
|
|||
# default fontsizes for ticklabels, and so on. See
|
||||
# http://matplotlib.sourceforge.net/matplotlib.axes.html#Axes
|
||||
axes.hold : True # whether to clear the axes by default on
|
||||
axes.facecolor : white # axes background color
|
||||
axes.facecolor : 0.6 # axes background color
|
||||
axes.edgecolor : black # axes edge color
|
||||
axes.linewidth : 1.0 # edge linewidth
|
||||
axes.grid : True # display grid or not
|
||||
|
|
Reference in New Issue