From df88f4425503206b54a355ecfc4bb97056a5896c Mon Sep 17 00:00:00 2001 From: flatberg Date: Sat, 28 Jul 2007 09:18:48 +0000 Subject: [PATCH] clean up ups --- fluents/lib/engines.py | 94 ++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/fluents/lib/engines.py b/fluents/lib/engines.py index ba70633..650e718 100644 --- a/fluents/lib/engines.py +++ b/fluents/lib/engines.py @@ -1,6 +1,6 @@ -"""Module contain algorithms for (burdensome) calculations. +"""Module contain algorithms for low-rank models. -There is no typechecking of any kind here, just focus on speed +There is almost no typechecking of any kind here, just focus on speed """ import math @@ -56,40 +56,47 @@ def pca(a, aopt, scale='scores', mode='normal'): def pcr(a, b, aopt=2, scale='scores', mode='normal'): - """Returns Principal component regression model.""" - m, n = a.shape - try: - k, l = b.shape - except: - k = b.shape[0] - l = 1 - B = empty((aopt, n, l)) - U, s, Vt = svd(a, full_matrices=True) - T = U*s - T = T[:,:aopt] - P = Vt[:aopt,:].T - Q = dot(dot(inv(dot(T.T, T)), T.T), b).T - for i in range(aopt): - ti = T[:,:i+1] - r = dot(dot(inv(dot(ti.T,ti)), ti.T), b) - B[i] = dot(P[:,:i+1], r) - E = a - dot(T, P.T) - F = b - dot(T, Q.T) + """Principal Component Regression. + + Returns + + """ + m, n = m_shape(a) + B = empty((aopt, n, l)) + dat = pca(a, aopt=aopt, scale=scale, mode='normal', center_axis=0) + T = dat['T'] + weigths = apply_along_axis(vnorm, 0, T) + if scale=='loads': + # fixme: check weights + Q = dot(b.T, T*weights) + else: + Q = dot(b.T, T/weights**2) + + if mode=='fast': + return {'T', T:, 'P':P, 'Q':Q} + if mode=='detailed': + for i in range(1, aopt+1, 1): + F[i,:,:] = b - dot(T[:,i],Q[:,:i].T) + else: + F = b - dot(T, Q.T) + #fixme: explained variance in Y + Y-var leverages + dat.update({'Q',Q, 'F':F}) + return dat - return {'T':T, 'P':P,'Q': Q, 'B':B, 'E':E, 'F':F} - def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): - """Kernel pls for tall/wide matrices. + """Partial Least Squares Regression. + + Applies plsr to given matrices and returns results in a dictionary. Fast pls for calibration. Only inefficient for many Y-vars. - """ m, n = a.shape if ab!=None: - mm, l = m_shape(ab) + mm, ll = m_shape(ab) else: - k, l = m_shape(b) - + k, l = m_shape(b) + assert(m==mm) + assert(l==ll) W = empty((n, aopt)) P = empty((n, aopt)) R = empty((n, aopt)) @@ -97,7 +104,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): T = empty((m, aopt)) B = empty((aopt, n, l)) - if ab==None: + if ab==None: ab = dot(a.T, b) for i in range(aopt): if ab.shape[1]==1: @@ -105,11 +112,11 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): else: u, s, vh = svd(dot(ab.T, ab)) w = dot(ab, u[:,:1]) - + w = w/vnorm(w) r = w.copy() - if i>0: - for j in range(0,i,1): + if i>0: # recursive estimate to + for j in range(0, i, 1): r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis] t = dot(a, r) tt = vnorm(t)**2 @@ -130,7 +137,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): Q[:,i] = q.ravel() B[i] = dot(R[:,:i+1], Q[:,:i+1].T) - + if mode=='detailed': E = empty((aopt, m, n)) F = empty((aopt, k, l)) @@ -147,13 +154,13 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): W = W*tnorm Q = Q*tnorm P = P*tnorm - + return {'B':B, 'Q':Q, 'P':P, 'T':T, 'W':W, 'R':R, 'E':E, 'F':F} def w_simpls(aat, b, aopt): """ Simpls for wide matrices. Fast pls for crossval, used in calc rmsep for wide X - There is no P,W. T is normalised + There is no P or W. T is normalised """ bb = b.copy() m, m = aat.shape @@ -181,7 +188,7 @@ def w_simpls(aat, b, aopt): def bridge(a, b, aopt, scale='scores', mode='normal', r=0): """Undeflated Ridged svd(X'Y) """ - m, n = a.shape + m, n = m_shape(a) k, l = m_shape(b) u, s, vt = svd(b, full_matrices=0) g0 = dot(u*s, u.T) @@ -204,7 +211,7 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0): B = zeros((aopt, n, l), dtype='f') for i in range(aopt): B[i] = dot(W[:,:i+1], Q[:,:i+1].T) - + if mode == 'detailed': E = empty((aopt, m, n)) F = empty((aopt, k, l)) @@ -214,14 +221,23 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0): else: #normal F = b - dot(a, B[-1]) E = a - dot(T, W.T) + # leverages + # fixme: probably need an orthogonal basis for row-space leverage + # T (scores) are not orthogonal + # Using a qr decomp to get an orthonormal basis for row-space + #Tq = qr(T)[0] + #s_lev,v_lev = leverage(aopt,Tq,W) + # explained variance + #var_x, exp_var_x = variances(a,T,W) + #qnorm = apply_along_axis(norm, 0, Q) + #var_y, exp_var_y = variances(b,U,Q/qnorm) if scale=='loads': T = T/tnorm W = W*tnorm Q = Q*tnorm - + return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W} - def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', scale='scores', verbose=False):