"""This module implements some common validation schemes from pca and pls. """ from scipy import ones,mean,sqrt,dot,newaxis,zeros,sum,empty,\ apply_along_axis,eye,kron,array,sort from scipy.stats import median from scipy.linalg import triu,inv,svd,norm from select_generators import w_pls_gen,w_pls_gen_jk,pls_gen,pca_gen,diag_pert from engines import w_simpls,pls,bridge,pca,nipals_lpls from cx_utils import m_shape def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'): """Returns rmsep and aopt for pls tailored for wide X. The root mean square error of cross validation is calculated based on random block cross-validation. With number of blocks equal to number of samples [default] gives leave-one-out cv. The pls model is based on the simpls algorithm for wide X. :Parameters: X : ndarray column centered data matrix of size (samples x variables) Y : ndarray column centered response matrix of size (samples x responses) amax : scalar Maximum number of components n_blocks : scalar Number of blocks in cross validation :Returns: rmsep : ndarray Root Mean Square Error of cross-validated Predictions aopt : scalar Guestimate of the optimal number of components :SeeAlso: - pls_cv_val : Same output, not optimised for wide X - w_simpls : Simpls algorithm for wide X Notes ----- Based (cowardly translated) on m-files from the Chemoact toolbox X, Y inputs need to be centered (fixme: check) Examples -------- >>> import numpy as n >>> X = n.array([[1., 2., 3.],[]]) >>> Y = n.array([[1., 2., 3.],[]]) >>> w_pls(X, Y, 1) [4,5,6], 1 """ k, l = m_shape(Y) PRESS = zeros((l, amax+1), dtype='f') if n_blocks==None: n_blocks = Y.shape[0] XXt = dot(X, X.T) V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True) for Din, Doi, Yin, Yout in V: ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0]) PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0) if algo=='simpls': dat = w_simpls(Din, Yin, amax) Q, U, H = dat['Q'], dat['U'], dat['H'] That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) )) else: raise NotImplementedError Yhat = [] for j in range(l): TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,amax)))) ) E = Yout[:,j][:,newaxis] - TQ E = E + sum(E, 0)/Din.shape[0] PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0) #Yhat = Yin - dot(That,Q.T) msep = PRESS/(Y.shape[0]) aopt = find_aopt_from_sep(msep) return sqrt(msep) def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None): k, l = m_shape(Y) PRESS = zeros((l, amax+1), dtype=' ruins sep, so we are doing a copy prc_75.append(col[int(ind)]) prc_75 = array(prc_75) for i in range(1, sep.shape[1], 1): if med[i-1]