from scipy import zeros,zeros_like,sqrt,dot,trace,sign,round_,argmax,\ sort,ravel,newaxis,asarray,diag,sum,outer,argsort,arange,ones_like,\ all,apply_along_axis,eye from scipy.linalg import svd,inv,norm,det,sqrtm from scipy.stats import mean,median from cx_utils import mat_center from validation import pls_jkW from select_generators import shuffle_1d from engines import * import time def hotelling(Pcv, P, p_center='med', cov_center='med', alpha=0.3, crot=True, strict=False, metric=None): """Returns regularized hotelling T^2. alpha -- regularisation towards pooled cov estimates beta -- regularisation for unstable eigenvalues p_center -- location method for submodels cov_center -- location method for sub coviariances alpha -- regularisation crot -- rotate submodels toward full? strict -- only rotate 90 degree ? metric -- inverse metric matrix (if Pcv and P from metric pca/pls) """ m, n = P.shape if metric==None: metric = eye(m, dtype='=tsq_full[j]) # number of false pos. genes (0-n) false_pos = median(n_false, 1) ll = arange(1, len(false_pos)+1, 1) sort_qval = false_pos.take(sort_index)/ll qval = false_pos/ll.take(back_sort_index) print time.time() - t0 #return qval, false_pos, TSQ, tsq_full return qval def ensure_strict(C, only_flips=True): """Ensure that a rotation matrix does only 90 degree rotations. In multiplication with pcs this allows flips and reordering. if only_flips is True there will onlt be flips allowed """ Cm = C S = sign(C) # signs if only_flips==True: C = eye(Cm.shape[0])*S return C Cm = zeros_like(C) Cm.putmask(1.,abs(C)>.6) if det(Cm)>1: raise ValueError,"Implement this!" return Cm*S def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3, n_iter=20, algo='pls', sim_method='shuffle', p_center='med', cov_center='med', crot=True, strict=False, metric=None): """Returns qvals for pls model. Shuffling of variables in X is preprocessed in metric. Null model is 'If I put genes randomly on network' ... if they are sign: then this is due to network structure and not covariance with response. input: a -- data matrix b -- data matrix aopt -- scalar, opt. number of components alpha -- [0,1] regularisation parameter for T2-test n_iter -- number of permutations sim_method -- permutation method ['shuffle'] p_center -- location estimator for sub models ['med'] cov_center -- location estimator for covariance of submodels ['med'] crot -- bool, use rotations of sub models? strict -- bool, use stict (rot/flips only) rotations? metric -- bool, use row metric? """ m, n = a.shape TSQ = zeros((n, n_iter), dtype='=tsq_full[j]) false_pos = median(n_false, 1) ll = arange(1, len(false_pos)+1, 1) sort_qval = false_pos.take(sort_index)/ll qval = false_pos/ll.take(back_sort_index) print time.time() - t0 #return qval, false_pos, TSQ, tsq_full return qval def leverage(aopt=1,*args): """Returns leverages input : aopt, number of components to base leverage calculations on *args, matrices of normed blm-paramters output: leverages For PCA typical inputs are normalised T or normalised P For PLSR typical inputs are normalised T or normalised W """ if aopt<1: raise ValueError,"Leverages only make sense for aopt>0" lev = [] for u in args: lev_u = 1./u.shape[0] + dot(u[:,:aopt], u[:,:aopt].T).diagonal() lev.append(lev_u) return lev def variances(a, t, p): """Returns explained variance and ind. var from blm-params. input: a -- full centered matrix t,p -- parameters from a bilinear approx of the above matrix. output: var -- variance of each component var_exp -- cumulative explained variance in percentage Typical inputs are: X(centered),T,P for PCA or X(centered),T,P / Y(centered),T,Q for PLSR. """ tot_var = sum(a**2) var = 100*(sum(p**2, 0)*sum(t**2, 0))/tot_var var_exp = cumsum(var) return var, var_exp def residual_diagnostics(Y, Yhat, aopt=1): """Root mean errors and press values. R2 vals """ pass def ssq(E, axis=0, weights=None): """Sum of squares, supports weights.""" n = E.shape[axis] if weights==None: weights = eye(n) else: weigths = diag(weigths) if axis==0: Ew = dot(weights, E) elif axis==1: Ew = dot(E, weights) else: raise NotImplementedError, "Higher order modes not supported" return pow(Ew,2).sum(axis) def vnorm(x): """Returns the euclidian norm of a vector. This is considerably faster than linalg.norm """ return sqrt(dot(x,x.conj()))