This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/fluents/lib/engines.py

201 lines
5.5 KiB
Python

"""Module contain algorithms for (burdensome) calculations.
There is no typechecking of any kind here, just focus on speed
"""
from scipy.linalg import svd,norm,inv,pinv,qr
from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
apply_along_axis,mean,ones,randn,empty_like,outer,c_,\
rand,sum,cumsum
def pca(a, aopt, scale='scores', mode='normal'):
""" Principal Component Analysis model
mode:
-- fast : returns smallest dim scaled (T for n<=m, P for n>m )
-- normal : returns all model params and residuals after aopt comp
-- detailed : returns all model params and all residuals
"""
m,n = a.shape
u,s,vt = svd(a, full_matrices=0)
T = u*s
T = T[:,:aopt]
P = vt[:aopt,:].T
if scale=='loads':
tnorm = apply_along_axis(norm, 0, T)
T = T/tnorm
P = P*tnorm
if mode == 'fast':
return {'T':T, 'P':P}
if mode=='detailed':
"""Detailed mode returns residual matrix for all comp.
That is E, is a three-mode matrix: (amax, m, n) """
E = empty((aopt, m, n))
for ai in range(aopt):
e = a - dot(T[:,:ai+1], P[:,:ai+1].T)
E[ai,:,:] = e.copy()
else:
E = a - dot(T,P.T)
return {'T':T, 'P':P, 'E':E}
def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
"""Kernel pls for tall/wide matrices.
Fast pls for calibration. Only inefficient for many Y-vars.
"""
m,n = a.shape
if ab!=None:
mm,l = ab.shape
else:
k,l = b.shape
W = empty((n, aopt))
P = empty((n, aopt))
R = empty((n, aopt))
Q = empty((l, aopt))
T = empty((m, aopt))
B = empty((aopt, n, l))
if ab==None:
ab = dot(a.T, b)
for i in range(aopt):
if ab.shape[1]==1:
w = ab
else:
u,s,vh = svd(dot(ab.T, ab))
w = dot(ab,u[:,:1])
w = w/norm(w)
r = w.copy()
if i>0:
for j in range(0,i,1):
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
t = dot(a, r)
tt = norm(t)**2
p = dot(a.T, t)/tt
q = dot(r.T, ab).T/tt
ab = ab - dot(p, q.T)*tt
T[:,i] = t.ravel()
W[:,i] = w.ravel()
P[:,i] = p.ravel()
R[:,i] = r.ravel()
if mode=='fast' and i==aopt-1:
if scale=='loads':
tnorm = apply_along_axis(norm, 0, T)
T = T/tnorm
W = W*tnorm
return {'T':T, 'W':W}
Q[:,i] = q.ravel()
B[i] = dot(R[:,:i+1], Q[:,:i+1].T)
if mode=='detailed':
E = empty((aopt, m, n))
F = empty((aopt, k, l))
for i in range(1,aopt+1,1):
E[i-1] = a - dot(T[:,:i],P[:,:i].T)
F[i-1] = b - dot(T[:,:i],Q[:,:i].T)
else:
E = a - dot(T[:,:aopt], P[:,:aopt].T)
F = b - dot(T[:,:aopt], Q[:,:aopt].T)
if scale=='loads':
tnorm = apply_along_axis(norm, 0, T)
T = T/tnorm
W = W*tnorm
Q = Q*tnorm
P = P*tnorm
return {'B':B, 'Q':Q, 'P':P, 'T':T, 'W':W, 'R':R, 'E':E, 'F':F}
def w_simpls(aat, b, aopt):
""" Simpls for wide matrices.
Fast pls for crossval, used in calc rmsep for wide X
There is no P,W. T is normalised
"""
bb = b.copy()
m,m = aat.shape
U = empty((m, aopt))
T = empty((m, aopt))
H = empty((m, aopt)) #just like W in simpls
PROJ = empty((m, aopt)) #just like R in simpls
for i in range(aopt):
u,s,vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
u = dot(b, u[:,:1]) #y-factor scores
U[:,i] = u.ravel()
t =dot(aat, u)
t = t/norm(t)
T[:,i] = t.ravel()
h = dot(aat, t) #score-weights
H[:,i] = h.ravel()
PROJ[:,:i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, H[:,:i+1])) )
if i<aopt:
b = b - dot(PROJ[:,:i+1], dot(H[:,:i+1].T,b) )
C = dot(bb.T, T)
return {'T':T,'U':U,'Q':C,'H':H}
def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
"""Undeflated Ridged svd(X'Y)
"""
m, n = a.shape
k, l = b.shape
u,s,vt = svd(b, full_matrices=0)
g0 = dot(u*s, u.T)
g = (1 - r)*g0 + r*eye(m)
ag = dot(a.T, g)
u,s,vt = svd(ag, full_matrices=0)
W = u[:,:aopt]
K = vt[:aopt,:].T
T = dot(a, W)
tnorm = apply_along_axis(norm, 0, T) # norm of T-columns
if mode == 'fast':
if scale=='loads':
T = T/tnorm
W = W*tnorm
return {'T':T, 'W':W}
U = dot(g0, K) #fixme check this
Q = dot(b.T, dot(T, inv(dot(T.T,T)) ))
B = zeros((aopt, n, l))
for i in range(aopt):
B[i] = dot(W[:,:i+1], Q[:,:i+1].T)
# leverages
# fixme: probably need an orthogonal basis for row-space leverage
# T (scores) are not orthogonal
# Using a qr decomp to get an orthonormal basis for row-space
#Tq = qr(T)[0]
#s_lev,v_lev = leverage(aopt,Tq,W)
# explained variance
#var_x, exp_var_x = variances(a,T,W)
#qnorm = apply_along_axis(norm, 0, Q)
#var_y, exp_var_y = variances(b,U,Q/qnorm)
if mode == 'detailed':
E = empty((aopt, m, n))
F = empty((aopt, k, l))
for i in range(aopt):
E[i] = a - dot(T[:,:i+1], W[:,:i+1].T)
F[i] = b - dot(a, B[i])
else: #normal
F = b - dot(a, B[-1])
E = a - dot(T, W.T)
if scale=='loads':
T = T/tnorm
W = W*tnorm
Q = Q*tnorm
return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W}