From 10eba079bccc1bafd7bc848b6e040f10e2ecfbc2 Mon Sep 17 00:00:00 2001 From: flatberg Date: Mon, 30 Jul 2007 18:04:42 +0000 Subject: [PATCH] iii --- fluents/lib/blmplots.py | 4 +- fluents/lib/engines.py | 163 ++++++++++++++++++++++++++++++++++---- fluents/lib/validation.py | 4 +- matplotlibrc | 2 +- 4 files changed, 151 insertions(+), 22 deletions(-) diff --git a/fluents/lib/blmplots.py b/fluents/lib/blmplots.py index 6a62898..15b0369 100644 --- a/fluents/lib/blmplots.py +++ b/fluents/lib/blmplots.py @@ -203,7 +203,7 @@ class LplsXCorrelationPlot(BlmScatterPlot): facecolor='gray', alpha=.1, zorder=1) - c50 = patches.Circle(center, radius=radius/2.0, + c50 = patches.Circle(center, radius= sqrt(radius/2.0), facecolor='gray', alpha=.1, zorder=2) @@ -228,7 +228,7 @@ class LplsZCorrelationPlot(BlmScatterPlot): facecolor='gray', alpha=.1, zorder=1) - c50 = patches.Circle(center, radius=radius/2.0, + c50 = patches.Circle(center, radius=sqrt(radius/2.0), facecolor='gray', alpha=.1, zorder=2) diff --git a/fluents/lib/engines.py b/fluents/lib/engines.py index 219f1ce..0d52917 100644 --- a/fluents/lib/engines.py +++ b/fluents/lib/engines.py @@ -14,6 +14,7 @@ try: except: has_sym = False + def pca(a, aopt,scale='scores',mode='normal',center_axis=0): """ Principal Component Analysis. @@ -187,7 +188,7 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0): dat.update({'Q':Q, 'F':F, 'expvary':expvary}) return dat -def pls(a, b, aopt=2, scale='scores', mode='normal', ax_center=0, ab=None): +def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=0, ab=None): """Partial Least Squares Regression. Performs PLS on given matrix and returns results in a dictionary. @@ -244,6 +245,10 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ax_center=0, ab=None): assert(m==mm) else: k, l = m_shape(b) + + if center_axis>=0: + a = a - expand_dims(a.mean(center_axis), center_axis) + b = b - expand_dims(b.mean(center_axis), center_axis) W = empty((n, aopt)) P = empty((n, aopt)) @@ -255,25 +260,28 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ax_center=0, ab=None): if ab==None: ab = dot(a.T, b) for i in range(aopt): - if ab.shape[1]==1: + if ab.shape[1]==1: #pls 1 w = ab.reshape(n, l) w = w/vnorm(w) - elif n0: for j in range(0, i, 1): r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis] + print vnorm(r) t = dot(a, r) tt = vnorm(t)**2 p = dot(a.T, t)/tt @@ -345,9 +353,13 @@ def w_pls(aat, b, aopt): """ Pls for wide matrices. Fast pls for crossval, used in calc rmsep for wide X There is no P or W. T is normalised + + aat = centered kernel matrix + b = centered y """ bb = b.copy() - m, m = aat.shape + k, l = m_shape(b) + m, m = m_shape(aat) U = empty((m, aopt)) # W T = empty((m, aopt)) R = empty((m, aopt)) # R @@ -355,23 +367,28 @@ def w_pls(aat, b, aopt): for i in range(aopt): if has_sym: - pass + s, q = symeig(dot(dot(b.T, aat), b), range=(l,l),overwrite=True) else: q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0) q = q[:,:1] u = dot(b , q) #y-factor scores U[:,i] = u.ravel() t = dot(aat, u) + print "Norm of t: %s" %vnorm(t) + print "s: %s" %s + t = t/vnorm(t) T[:,i] = t.ravel() - r = dot(aat, t) #score-weights + r = dot(aat, t)#score-weights + #r = r/vnorm(r) + print "Norm R: %s" %vnorm(r) R[:,i] = r.ravel() PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) ) if ilim and niterY + :input: + X : data matrix (m, n) + Y : data matrix (m, l) + + :output: + T : X-scores + W : X-weights/Z-weights + P : X-loadings + Q : Y-loadings + U : X-Y relation + B : Regression coefficients X->Y + b0: Regression coefficient intercept + evx : X-explained variance + evy : Y-explained variance + evz : Z-explained variance + + :Notes: + + """ + if ax_center>=0: + mn_x = expand_dims(X.mean(ax_center), ax_center) + mn_y = expand_dims(Y.mean(ax_center), ax_center) + X = X - mn_x + Y = Y - mn_y + + varX = pow(X, 2).sum() + varY = pow(Y, 2).sum() + + m, n = X.shape + k, l = Y.shape + + # initialize + U = empty((k, a_max)) + Q = empty((l, a_max)) + T = empty((m, a_max)) + W = empty((n, a_max)) + P = empty((n, a_max)) + B = empty((a_max, n, l)) + b0 = empty((a_max, m, l)) + var_x = empty((a_max,)) + var_y = empty((a_max,)) + + t1 = X[:,:1] + for a in range(a_max): + if verbose: + print "\n Working on comp. %s" %a + u = Y[:,:1] + diff = 1 + MAX_ITER = 100 + lim = 1e-16 + niter = 0 + while (diff>lim and niter