This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/fluents/lib/validation.py

146 lines
4.8 KiB
Python

from scipy import ones,mean,sqrt,dot,newaxis,zeros,sum,empty,\
apply_along_axis,eye, kron
from scipy.linalg import triu,inv,svd,norm
from select_generators import w_pls_gen,w_pls_gen_jk,pls_gen,pca_gen,diag_pert
from engines import w_simpls,pls, bridge,pca
from pylab import *
def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
"""RMSEP calc for pls with wide X.
"""
k, l = Y.shape
PRESS = zeros((l, amax+1), dtype='f')
# X,Y are centered
if n_blocks==None:
n_blocks = Y.shape[0]
V = w_pls_gen(dot(X, X.T), Y, n_blocks=n_blocks, center=True)
for Din, Doi, Yin, Yout in V:
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
Yin = Yin - ym
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
if algo=='simpls':
dat = w_simpls(Din, Yin, amax)
Q,U,H = dat['Q'], dat['U'], dat['H']
That = dot(Doi, dot(U, inv(triu(dot(H.T,U))) ))
else:
"Other algo-support comming soon"
raise NotImplementedError
#Yhat = empty((amax, k, l),dtype='<f8')
Yhat = []
for j in range(l):
TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,amax)))) )
E = Yout[:,j][:,newaxis] - TQ
E = E + sum(E, 0)/Din.shape[0]
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
#Yhat = Y - dot(That,Q.T)
return sqrt(PRESS/Y.shape[0])
def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
""" Validation results of pls model.
"""
k, l = Y.shape
PRESS = zeros((l, amax+1), dtype='<f8')
EE = zeros((amax, k, l), dtype='<f8')
Yhat = zeros((amax, k, l), dtype='<f8')
# X,Y are centered
V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True)
for Xin, Xout, Yin, Yout, out in V:
ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
Yin = (Yin - ym)
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
if algo=='pls':
dat = pls(Xin, Yin, amax, mode='normal')
elif algo=='bridge':
dat = simpls(Xin, Yin, amax, mode='normal')
for a in range(amax):
Ba = dat['B'][a,:,:]
Yhat[a,out[:],:] = dot(Xout, Ba)
E = Yout - dot(Xout, Ba)
EE[a,out,:] = E
PRESS[:,a+1] = PRESS[:,a+1] + sum(E**2,0)
return sqrt(PRESS/(k-1.)), EE, Yhat
def pca_alter_val(a, amax, n_sets=10,method='diag'):
"""Pca validation by altering elements in X.
"""
# todo: it is just as easy to do jk-estimates her as well
V = diag_pert(a, n_sets, center=True, index_out=True)
sep = empty((n_sets, amax), dtype='f')
for i, (xi, ind) in enumerate(V):
dat_i = pca(xi, amax, mode='detailed')
Ti,Pi = dat_i['T'],dat_i['P']
for j in xrange(amax):
Xhat = dot(Ti[:,:j+1], Pi[:,:j+1].T)
a_sub = a.ravel().take(ind)
EE = a_sub - Xhat.ravel().take(ind)
tot = (a_sub**2).sum()
sep[i,j] = (EE**2).sum()/tot
return sqrt(sep.mean(0))
#return sep
def pca_cv_val(X, amax, n_sets):
""" Cross validation of pca using random sets crossval.
"""
m, n = X.shape
xtot = (X**2).sum()
V = pca_gen(X, n_sets=7, center=True, index_out=True)
E = empty((amax, m, n), dtype='f')
for xi,xout,ind in V:
dat_i = pca(xi, amax, mode='detailed')
Pi = dat_i['P']
for a in xrange(amax):
Pia = Pi[:,:a+1]
E[a][ind,:] = (X[ind,:] - dot(xout, dot(Pia,Pia.T) ))**2
sep = []
for a in xrange(amax):
sep.append(E[a].sum()/xtot)
return sqrt(sep.mean(0))
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True):
""" Returns CV-segments of paramter W for wide X.
todo: add support for T,Q and B
"""
if n_blocks == None:
n_blocks = b.shape[0]
WW = empty((n_blocks, a.shape[1], amax), dtype='f')
if use_pack:
u, s, inflater = svd(a, full_matrices=0)
a = u*s
V = pls_gen(a, b, n_blocks=n_blocks)
for nn,(a_in, a_out, b_in, b_out) in enumerate(V):
if algo=='pls':
dat = pls(a_in, b_in, amax, 'loads', 'fast')
elif algo=='bridge':
dat = bridge(a_in, b_in, amax, 'loads', 'fast')
W = dat['W']
if use_pack:
W = dot(inflater.T, W)
WW[nn,:,:] = W
return WW
def pca_jkP(a, aopt, n_blocks=None):
""" Returns CV-segments of paramter P.
todo: add support for T
fixme: more efficient to add this in validation loop
"""
if n_blocks == None:
n_blocks = a.shape[0]
PP = empty((n_blocks, a.shape[1], aopt), dtype='f')
V = pca_gen(a, n_sets=n_blocks, center=True)
for nn,(a_in, a_out) in enumerate(V):
dat = pca(a_in, aopt, mode='fast')
P = dat['P']
PP[nn,:,:] = P
return PP