"""Module contain algorithms for (burdensome) calculations. There is no typechecking of any kind here, just focus on speed """ from scipy.linalg import svd,norm,inv,pinv,qr from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\ apply_along_axis,mean,ones,randn,empty_like,outer,c_,\ rand,sum,cumsum def pca(a, aopt, scale='scores', mode='normal'): """ Principal Component Analysis model mode: -- fast : returns smallest dim scaled (T for n<=m, P for n>m ) -- normal : returns all model params and residuals after aopt comp -- detailed : returns all model params and all residuals """ m,n = a.shape u,s,vt = svd(a, full_matrices=0) T = u*s T = T[:,:aopt] P = vt[:aopt,:].T if scale=='loads': tnorm = apply_along_axis(norm, 0, T) T = T/tnorm P = P*tnorm if mode == 'fast': return {'T':T, 'P':P} if mode=='detailed': """Detailed mode returns residual matrix for all comp. That is E, is a three-mode matrix: (amax, m, n) """ E = empty((aopt, m, n)) for ai in range(aopt): e = a - dot(T[:,:ai+1], P[:,:ai+1].T) E[ai,:,:] = e.copy() else: E = a - dot(T,P.T) return {'T':T, 'P':P, 'E':E} def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): """Kernel pls for tall/wide matrices. Fast pls for calibration. Only inefficient for many Y-vars. """ m,n = a.shape if ab!=None: mm,l = ab.shape else: k,l = b.shape W = empty((n, aopt)) P = empty((n, aopt)) R = empty((n, aopt)) Q = empty((l, aopt)) T = empty((m, aopt)) B = empty((aopt, n, l)) if ab==None: ab = dot(a.T, b) for i in range(aopt): if ab.shape[1]==1: w = ab else: u,s,vh = svd(dot(ab.T, ab)) w = dot(ab,u[:,:1]) w = w/norm(w) r = w.copy() if i>0: for j in range(0,i,1): r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis] t = dot(a, r) tt = norm(t)**2 p = dot(a.T, t)/tt q = dot(r.T, ab).T/tt ab = ab - dot(p, q.T)*tt T[:,i] = t.ravel() W[:,i] = w.ravel() P[:,i] = p.ravel() R[:,i] = r.ravel() if mode=='fast' and i==aopt-1: if scale=='loads': tnorm = apply_along_axis(norm, 0, T) T = T/tnorm W = W*tnorm return {'T':T, 'W':W} Q[:,i] = q.ravel() B[i] = dot(R[:,:i+1], Q[:,:i+1].T) if mode=='detailed': E = empty((aopt, m, n)) F = empty((aopt, k, l)) for i in range(1,aopt+1,1): E[i-1] = a - dot(T[:,:i],P[:,:i].T) F[i-1] = b - dot(T[:,:i],Q[:,:i].T) else: E = a - dot(T[:,:aopt], P[:,:aopt].T) F = b - dot(T[:,:aopt], Q[:,:aopt].T) if scale=='loads': tnorm = apply_along_axis(norm, 0, T) T = T/tnorm W = W*tnorm Q = Q*tnorm P = P*tnorm return {'B':B, 'Q':Q, 'P':P, 'T':T, 'W':W, 'R':R, 'E':E, 'F':F} def w_simpls(aat, b, aopt): """ Simpls for wide matrices. Fast pls for crossval, used in calc rmsep for wide X There is no P,W. T is normalised """ bb = b.copy() m,m = aat.shape U = empty((m, aopt)) T = empty((m, aopt)) H = empty((m, aopt)) #just like W in simpls PROJ = empty((m, aopt)) #just like R in simpls for i in range(aopt): u,s,vh = svd(dot(dot(b.T, aat), b), full_matrices=0) u = dot(b, u[:,:1]) #y-factor scores U[:,i] = u.ravel() t =dot(aat, u) t = t/norm(t) T[:,i] = t.ravel() h = dot(aat, t) #score-weights H[:,i] = h.ravel() PROJ[:,:i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, H[:,:i+1])) ) if i