Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0
This commit is contained in:
Arnar Flatberg 2007-07-28 16:05:11 +00:00
parent 9a2e259209
commit 349cab3c51
4 changed files with 297 additions and 131 deletions

View File

@ -197,7 +197,7 @@ class PLS(Model):
Model.__init__(self, id, name) Model.__init__(self, id, name)
self._options = PlsOptions() self._options = PlsOptions()
def validation(self, amax, n_sets, cv_val_method): def validation(self):
"""Returns rmsep for pls model. """Returns rmsep for pls model.
""" """
m, n = self.model['E0'].shape m, n = self.model['E0'].shape
@ -207,7 +207,7 @@ class PLS(Model):
val_engine = pls_val val_engine = pls_val
if self._options['calc_cv']==True: if self._options['calc_cv']==True:
rmsep, aopt = val_engine(self.model['E0'], self.model['F0'], rmsep, aopt = val_engine(self.model['E0'], self.model['F0'],
amax, n_sets) self._options['amax'], self._options['n_sets'])
self.model['rmsep'] = rmsep[:,:-1] self.model['rmsep'] = rmsep[:,:-1]
self.model['aopt'] = aopt self.model['aopt'] = aopt
else: else:
@ -319,7 +319,7 @@ class PLS(Model):
self.model['E0'] = self._data['X'] self.model['E0'] = self._data['X']
self.model['F0'] = self._data['Y'] self.model['F0'] = self._data['Y']
self.validation(**options.validation_options()) self.validation()
self.make_model(self.model['E0'], self.model['F0'], self.make_model(self.model['E0'], self.model['F0'],
**options.make_model_options()) **options.make_model_options())
# variance captured # variance captured

View File

@ -6,81 +6,189 @@ There is almost no typechecking of any kind here, just focus on speed
import math import math
from scipy.linalg import svd,inv from scipy.linalg import svd,inv
from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\ from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
apply_along_axis,mean,ones,randn,empty_like,outer,c_,\ apply_along_axis,mean,ones,randn,empty_like,outer,r_,c_,\
rand,sum,cumsum,matrix rand,sum,cumsum,matrix, expand_dims,minimum,where
has_sym=True has_sym=True
try: try:
import symmeig from symeig import symeig
except: except:
has_sym = False has_sym = False
has_sym=False
def pca(a, aopt,scale='scores',mode='normal',center_axis=-1):
""" Principal Component Analysis.
Performs PCA on given matrix and returns results in a dictionary.
:Parameters:
a : array
Data measurement matrix, (samples x variables)
aopt : int
Number of components to use, aopt<=min(samples, variables)
:Returns:
results : dict
keys -- values, T -- scores, P -- loadings, E -- residuals,
lev --leverages, ssq -- sum of squares, expvar -- cumulative
explained variance, aopt -- number of components used
:OtherParameters:
mode : str
Amount of info retained, ('fast', 'normal', 'detailed')
center_axis : int
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
:SeeAlso:
- pcr : other blm
- pls : other blm
- lpls : other blm
Notes
-----
Uses kernel speed-up if m>>n or m<<n.
If residuals turn rank deficient, a lower number of component than given
in input will be used. The number of components used is given in results-dict.
Examples
--------
>>> import scipy,engines
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
>>> dat=engines.pca(a, 2)
>>> dat['expvar']
array([0.,99.8561562, 100.])
def pca(a, aopt, scale='scores', mode='normal'):
""" Principal Component Analysis model
mode:
-- fast : returns smallest dim scaled (T for n<=m, P for n>m )
-- normal : returns all model params and residuals after aopt comp
-- detailed : returns all model params and all residuals
""" """
if center_axis>=0:
a = a - expand_dims(a.mean(center_axis), center_axis)
m, n = a.shape m, n = a.shape
#print "rows: %s cols: %s" %(m,n)
if m>(n+100) or n>(m+100): if m>(n+100) or n>(m+100):
u, s, v = esvd(a) u, e, v = esvd(a)
s = sqrt(e)
else: else:
u, s, vt = svd(a, 0) u, s, vt = svd(a, 0)
v = vt.T v = vt.T
eigvals = (1./m)*s e = s**2
tol = 1e-10
eff_rank = sum(s>s[0]*tol)
aopt = minimum(aopt, eff_rank)
T = u*s T = u*s
s = s[:aopt]
e = e[:aopt]
T = T[:,:aopt] T = T[:,:aopt]
P = v[:,:aopt] P = v[:,:aopt]
if scale=='loads': if scale=='loads':
tnorm = apply_along_axis(vnorm, 0, T) T = T/s
T = T/tnorm P = P*s
P = P*tnorm
if mode == 'fast': if mode == 'fast':
return {'T':T, 'P':P} return {'T':T, 'P':P, 'aopt':aopt}
if mode=='detailed': if mode=='detailed':
"""Detailed mode returns residual matrix for all comp. E = empty((aopt, m, n))
That is E, is a three-mode matrix: (amax, m, n) """ ssq = []
E = empty((aopt, m, n)) lev = []
expvarx = empty((aopt, aopt+1))
for ai in range(aopt): for ai in range(aopt):
e = a - dot(T[:,:ai+1], P[:,:ai+1].T) E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
E[ai,:,:] = e.copy() ssq.append([(E[ai,:,:]**2).sum(0), (E[ai,:,:]**2).sum(1)])
if scale=='loads':
lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
else:
lev.append([(T**2).sum(1), ((s*P)**2).sum(1)])
expvarx[ai,:] = r_[0, 100*e.cumsum()/e.sum()]
else: else:
E = a - dot(T,P.T) # residuals
E = a - dot(T, P.T)
SEP = E**2
ssq = [SEP.sum(0), SEP.sum(1)]
# leverages
if scale=='loads':
lev = [(1./m)+(T**2).sum(1), (1./n)+((P/s)**2).sum(1)]
else:
lev = [(1./m)+((T/s)**2).sum(1), (1./n)+(P**2).sum(1)]
# variances
expvarx = r_[0, 100*e.cumsum()/e.sum()]
return {'T':T, 'P':P, 'E':E} return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt}
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
""" Principal Component Regression.
Performs PCR on given matrix and returns results in a dictionary.
:Parameters:
a : array
Data measurement matrix, (samples x variables)
b : array
Data response matrix, (samples x responses)
aopt : int
Number of components to use, aopt<=min(samples, variables)
:Returns:
results : dict
keys -- values, T -- scores, P -- loadings, E -- residuals,
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
explained variance, aopt -- number of components used
:OtherParameters:
mode : str
Amount of info retained, ('fast', 'normal', 'detailed')
center_axis : int
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
:SeeAlso:
- pcr : other blm
- pls : other blm
- lpls : other blm
Notes
-----
Uses kernel speed-up if m>>n or m<<n.
If residuals turn rank deficient, a lower number of component than given
in input will be used. The number of components used is given in results-dict.
def pcr(a, b, aopt=2, scale='scores', mode='normal'): Examples
"""Principal Component Regression. --------
Returns >>> import scipy,engines
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
>>> dat=engines.pca(a, 2)
>>> dat['expvar']
array([0.,99.8561562, 100.])
""" """
m, n = m_shape(a) k, l = m_shape(b)
B = empty((aopt, n, l)) if center_axis>=0:
dat = pca(a, aopt=aopt, scale=scale, mode='normal', center_axis=0) b = b - expand_dims(b.mean(center_axis), center_axis)
dat = pca(a, aopt=aopt, scale=scale, mode=mode, center_axis=center_axis)
T = dat['T'] T = dat['T']
weigths = apply_along_axis(vnorm, 0, T) weights = apply_along_axis(vnorm, 0, T)
if scale=='loads': if scale=='loads':
# fixme: check weights # fixme: check weights
Q = dot(b.T, T*weights) Q = dot(b.T, T*weights**2)
else: else:
Q = dot(b.T, T/weights**2) Q = dot(b.T, T/weights**2)
if mode=='fast': if mode=='fast':
return {'T', T:, 'P':P, 'Q':Q} dat.update({'Q':Q})
return dat
if mode=='detailed': if mode=='detailed':
for i in range(1, aopt+1, 1): F = empty((aopt, k, l))
F[i,:,:] = b - dot(T[:,i],Q[:,:i].T) for i in range(aopt):
F[i,:,:] = b - dot(T[:,:i+1], Q[:,:i+1].T)
else: else:
F = b - dot(T, Q.T) F = b - dot(T, Q.T)
#fixme: explained variance in Y + Y-var leverages #fixme: explained variance in Y + Y-var leverages
dat.update({'Q',Q, 'F':F}) dat.update({'Q':Q, 'F':F})
return dat return dat
def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
@ -271,7 +379,6 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', sca
X, mnX = center(X, xctr) X, mnX = center(X, xctr)
Y, mnY = center(Y, xctr) Y, mnY = center(Y, xctr)
Z, mnZ = center(Z, zctr) Z, mnZ = center(Z, zctr)
print Z.mean(1)
varX = pow(X, 2).sum() varX = pow(X, 2).sum()
varY = pow(Y, 2).sum() varY = pow(Y, 2).sum()
@ -365,7 +472,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', sca
def m_shape(array): def m_shape(array):
return matrix(array).shape return matrix(array).shape
def esvd(data): def esvd(data, amax=None):
"""SVD with the option of economy sized calculation """SVD with the option of economy sized calculation
Calculate subspaces of X'X or XX' depending on the shape Calculate subspaces of X'X or XX' depending on the shape
of the matrix. of the matrix.
@ -378,17 +485,30 @@ def esvd(data):
m, n = data.shape m, n = data.shape
if m>=n: if m>=n:
kernel = dot(data.T, data) kernel = dot(data.T, data)
u, s, vt = svd(kernel) if has_sym:
u = dot(data, vt.T) if not amax:
v = vt.T amax = n
pcrange = [n-amax, n]
s, v = symeig(kernel, range=pcrange, overwrite=True)
s = s[::-1]
v = v[:,arange(n, -1, -1)]
else:
u, s, vt = svd(kernel)
v = vt.T
u = dot(data, v)
for i in xrange(n): for i in xrange(n):
s[i] = vnorm(u[:,i]) s[i] = vnorm(u[:,i])
u[:,i] = u[:,i]/s[i] u[:,i] = u[:,i]/s[i]
else: else:
kernel = dot(data, data.T) kernel = dot(data, data.T)
#data = (data + data.T)/2.0 if has_sym:
u, s, vt = svd(kernel) if not amax:
v = dot(u.T, data) amax = m
pcrange = [m-amax, m]
s, u = symeig(kernel, range=pcrange, overwrite=True)
else:
u, s, vt = svd(kernel)
v = dot(u.T, data)
for i in xrange(m): for i in xrange(m):
s[i] = vnorm(v[i,:]) s[i] = vnorm(v[i,:])
v[i,:] = v[i,:]/s[i] v[i,:] = v[i,:]/s[i]

View File

@ -3,32 +3,14 @@ import scipy
import rpy import rpy
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r) silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
def get_term_sim(termlist, method = "JiangConrath", verbose=False):
"""Returns the similariy matrix between go-terms.
Arguments: def goterms_from_gene(genelist, ontology='BP', garbage=None):
termlist: character vector of GO terms
method: one of
("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin")
verbose: print out various information or not
"""
_methods = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin")
assert(method in _methods)
assert(termlist[0][:2]=='GO')
rpy.r.library("GOSim")
return rpy.r.getTermSim(termlist, method = method, verbose = verbose)
def get_gene_sim(genelist, similarity='OA',
distance="Resnick"):
rpy.r.library("GOSim")
rpy.r.assign("ids", genelist)
silent_eval('a<-getGeneSim(ids)', verbose=FALSE)
def goterms_from_gene(genelist, ontology=['BP'], garbage = ['IEA', 'ISS', 'ND']):
""" Returns the go-terms from a specified genelist (Entrez id). """ Returns the go-terms from a specified genelist (Entrez id).
Recalculates the information content if needed based on selected evidence codes.
""" """
rpy.r.library("GO") rpy.r.library("GOSim")
_CODES = {"IMP" : "inferred from mutant phenotype", _CODES = {"IMP" : "inferred from mutant phenotype",
"IGI" : "inferred from genetic interaction", "IGI" : "inferred from genetic interaction",
"IPI" :"inferred from physical interaction", "IPI" :"inferred from physical interaction",
@ -42,25 +24,46 @@ def goterms_from_gene(genelist, ontology=['BP'], garbage = ['IEA', 'ISS', 'ND'])
"IC" : "inferred by curator" "IC" : "inferred by curator"
} }
_ONTOLOGIES = ['BP', 'CC', 'MF'] _ONTOLOGIES = ['BP', 'CC', 'MF']
assert(scipy.all([(code in _CODES) for code in garbage])) #assert(scipy.all([(code in _CODES) for code in garbage]) or garbage==None)
assert(scipy.all([(ont in _ONTOLOGIES) for ont in ontology])) assert(ontology in _ONTOLOGIES)
have_these = rpy.r('as.list(GOTERM)').keys() dummy = rpy.r.setOntology(ontology)
goterms = {} ddef = False
if ontology=='BP' and garbage!=None:
# This is for ont=BP and garbage =['IEA', 'ISS', 'ND']
rpy.r.load("ICsBPIMP_IGI_IPI_ISS_IDA_IEP_TAS_NAS_IC.rda")
ic = rpy.r.assign("IC",rpy.r.IC, envir=rpy.r.GOSimEnv)
print len(ic)
else:
ic = rpy.r('get("IC", envir=GOSimEnv)')
print "loading GO definitions environment"
gene2terms = {}
for gene in genelist: for gene in genelist:
goterms[gene] = []
info = rpy.r('GOENTREZID2GO[["' + str(gene) + '"]]') info = rpy.r('GOENTREZID2GO[["' + str(gene) + '"]]')
#print info #print info
if info: if info:
skip=False
for term, desc in info.items(): for term, desc in info.items():
if term not in have_these: if ic.get(term)==scipy.isinf:
print "GO miss:" print "\nIC is Inf on this GO term %s for this gene: %s" %(term,gene)
print term skip=True
if desc['Ontology'] in ontology and desc['Evidence'] not in garbage: if ic.get(term)==None:
goterms[gene].append(term) #print "\nHave no IC on this GO term %s for this gene: %s" %(term,gene)
skip=True
if desc['Ontology']!=ontology:
#print "\nThis GO term %s belongs to: %s:" %(term,desc['Ontology'])
skip = True
if not skip:
if gene2terms.has_key(gene):
gene2terms[gene].append(term)
else:
gene2terms[gene] = [term]
else:
print "\nHave no Annotation on this gene: %s" %gene
return goterms return gene2terms
def genego_matrix(goterms, tmat, gene_ids, term_ids, func=min): def genego_matrix(goterms, tmat, gene_ids, term_ids, func=max):
ngenes = len(gene_ids) ngenes = len(gene_ids)
nterms = len(term_ids) nterms = len(term_ids)
gene2indx = {} gene2indx = {}
@ -71,23 +74,46 @@ def genego_matrix(goterms, tmat, gene_ids, term_ids, func=min):
term2indx[id]=i term2indx[id]=i
#G = scipy.empty((nterms, ngenes),'d') #G = scipy.empty((nterms, ngenes),'d')
G = [] G = []
newindex = [] new_gene_index = []
for gene, terms in goterms.items(): for gene, terms in goterms.items():
g_ind = gene2indx[gene] g_ind = gene2indx[gene]
if len(terms)>0: if len(terms)>0:
t_ind = [] t_ind = []
newindex.append(g_ind) new_gene_index.append(g_ind)
for term in terms: for term in terms:
if term2indx.has_key(term): t_ind.append(term2indx[term]) if term2indx.has_key(term): t_ind.append(term2indx[term])
print t_ind
subsim = tmat[t_ind, :] subsim = tmat[t_ind, :]
gene_vec = scipy.apply_along_axis(func, 0, subsim) gene_vec = scipy.apply_along_axis(func, 0, subsim)
G.append(gene_vec) G.append(gene_vec)
return scipy.asarray(G), newindex return scipy.asarray(G), new_gene_index
def genego_sim(gene2go, gene_ids, all_go_terms, STerm, go_term_sim="OA", term_sim="Lin", verbose=False):
"""Returns go-terms x genes similarity matrix.
:input:
- gene2go: dict: keys: gene_id, values: go_terms
- gene_ids: list of gene ids (entrez ids)
- STerm: (go_terms x go_terms) similarity matrix
- go_terms_sim: similarity measure between a gene and multiple go terms (max, mean, OA)
- term_sim: similarity measure between two go-terms
- verbose
"""
rpy.r.library("GOSim")
#gene_ids = gene2go.keys()
GG = scipy.empty((len(all_go_terms), len(gene_ids)), 'd')
for j,gene in enumerate(gene_ids):
for i,go_term in enumerate(all_go_terms):
if verbose:
print "\nAssigning similarity from %s to terms(gene): %s" %(go_term,gene)
GG_ij = rpy.r.getGSim(go_term, gene2go[gene], similarity=go_term_sim,
similarityTerm=term_sim, STerm=STerm, verbose=verbose)
GG[i,j] = GG_ij
return GG
def goterm2desc(gotermlist): def goterm2desc(gotermlist):
"""Returns the go-terms description keyed by go-term """Returns the go-terms description keyed by go-term.
""" """
rpy.r.library("GO") rpy.r.library("GO")
term2desc = {} term2desc = {}

View File

@ -23,7 +23,7 @@ data = DX.asarray().T
rpy.r.assign("data", data) rpy.r.assign("data", data)
cl = dot(DY.asarray(), diag([1,2,3])).sum(1) cl = dot(DY.asarray(), diag([1,2,3])).sum(1)
rpy.r.assign("cl", cl) rpy.r.assign("cl", cl)
rpy.r.assign("B", 100) rpy.r.assign("B", 20)
# Perform a SAM analysis. # Perform a SAM analysis.
print "Starting SAM" print "Starting SAM"
sam = rpy.r('sam.out<-sam(data=data,cl=cl,B=B,rand=123)') sam = rpy.r('sam.out<-sam(data=data,cl=cl,B=B,rand=123)')
@ -32,63 +32,74 @@ print "SAM done"
qq = rpy.r('qobj<-qvalue(sam.out@p.value)') qq = rpy.r('qobj<-qvalue(sam.out@p.value)')
qvals = asarray(qq['qvalues']) qvals = asarray(qq['qvalues'])
# cut off # cut off
co = 0.001 cutoff = 2
index = where(qvals<0.01)[0] index = where(qvals<cutoff)[0]
# Subset data # Subset data
X = DX.asarray() X = DX.asarray()
Xr = X[:,index] Xr = X[:,index]
gene_ids = DX.get_identifiers('gene_ids', index) gene_ids = DX.get_identifiers('gene_ids', index)
print "\nWorkiing on subset with %s genes " %len(gene_ids) print "\nWorking on subset with %s genes " %len(gene_ids)
### Build GO data #### #gene2ind = {}
#for i, gene in enumerate(gene_ids):
# gene2ind[gene] = i
print "Go terms ..." ### Build GO data ####
goterms = rpy_go.goterms_from_gene(gene_ids) print "\n\nFiltering genes by Go terms "
terms = set() gene2goterms = rpy_go.goterms_from_gene(gene_ids)
for t in goterms.values(): all_terms = set()
terms.update(t) for t in gene2goterms.values():
terms = list(terms) all_terms.update(t)
print "Number of go-terms: %s" %len(terms) terms = list(all_terms)
print "\nNumber of go-terms: %s" %len(terms)
# update genelist
gene_ids = gene2goterms.keys()
print "\nNumber of genes: %s" %len(gene_ids)
rpy.r.library("GOSim") rpy.r.library("GOSim")
# Go-term similarity matrix # Go-term similarity matrix
methods = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin") methods = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin")
meth = methods[0] meth = methods[3]
print "Term-term similarity matrix (method = %s)" %meth print "Term-term similarity matrix (method = %s)" %meth
if meth=="CoutoEnriched": if meth=="CoutoEnriched":
rpy.r('setEnrichmentFactors(alpha=0.1,beta=0.5)') rpy.r('setEnrichmentFactors(alpha=0.1,beta=0.5)')
print "Calculating term-term similarity matrix" print "\nCalculating term-term similarity matrix"
tmat = rpy.r.getTermSim(terms, verbose=False, method=meth)
rpytmat1 = rpy.with_mode(rpy.NO_CONVERSION, rpy.r.getTermSim)(terms, method=meth,verbose=False)
tmat1 = rpy.r.assign("haha", rpytmat1)
# check if all terms where found
nanindex = where(isnan(tmat1[:,0]))[0]
if len(nanindex)>0:
raise valueError("NANs in tmat")
# Z-matrix
#Z, newind = rpy_go.genego_matrix(terms, tmat, gene_ids, terms,func=mean)
#Z = Z.T
Z1 = rpy_go.genego_sim(gene2goterms,gene_ids,terms,rpytmat1,go_term_sim="OA",term_sim=meth)
#### do another
meth = methods[4]
rpytmat = rpy.with_mode(rpy.NO_CONVERSION, rpy.r.getTermSim)(terms, method=meth,verbose=False)
tmat = rpy.r.assign("haha", rpytmat)
# check if all terms where found # check if all terms where found
nanindex = where(isnan(tmat[:,0]))[0] nanindex = where(isnan(tmat[:,0]))[0]
keep=[]
has_miss = False
if len(nanindex)>0: if len(nanindex)>0:
has_miss = True raise valueError("NANs in tmat")
print "Some terms missing in similarity matrix"
keep = where(isnan(tmat[:,0])!=True)[0]
print "Number of nans: %d" %len(nanindex)
tmat_new = tmat[:,keep][keep,:]
new_terms = [i for ind,i in enumerate(terms) if ind in keep]
bad_terms = [i for ind,i in enumerate(terms) if ind not in keep]
# update go-term dict
for gene,trm in goterms.items():
for t in trm:
if t in bad_terms:
trm.remove(t)
if len(trm)==0:
print "Removing gene: %s" %gene
goterms[gene]=trm
terms = new_terms
tmat = tmat_new
# Z-matrix # Z-matrix
# func (min, max, median, mean, etc), #Z, newind = rpy_go.genego_matrix(terms, tmat, gene_ids, terms,func=mean)
# func decides on the representation of gene-> goterm when multiple #Z = Z.T
# goterms exist for one gene Z = rpy_go.genego_sim(gene2goterms,gene_ids,terms,rpytmat,go_term_sim="OA",term_sim=meth)
Z, newind = rpy_go.genego_matrix(goterms, tmat, gene_ids, terms,func=mean)
Z = Z.T
# update X matrix (no go-terms available)
Xr = Xr[:,newind] # update data (X) matrix
new_gene_ids = asarray(gene_ids)[newind] #newind = [gene2ind[gene] for gene in gene_ids]
newind = DX.get_indices('gene_ids', gene_ids)
Xr = X[:,newind]
#new_gene_ids = asarray(gene_ids)[newind]
######## LPLSR ######## ######## LPLSR ########
@ -112,11 +123,14 @@ if alpha_check:
rmsep , yhat, ce = cv_lpls(Xr, Y, Z, a_max, alpha=alpha) rmsep , yhat, ce = cv_lpls(Xr, Y, Z, a_max, alpha=alpha)
Rmsep.append(rmsep) Rmsep.append(rmsep)
Yhat.append(yhat) Yhat.append(yhat)
CE.append(yhat) CE.append(ce)
Rmsep = asarray(Rmsep) Rmsep = asarray(Rmsep)
Yhat = asarray(Yhat) Yhat = asarray(Yhat)
CE = asarray(CE) CE = asarray(CE)
figure(200)
# Significance Hotellings T # Significance Hotellings T
Wx, Wz, Wy, = jk_lpls(Xr, Y, Z, aopt) Wx, Wz, Wy, = jk_lpls(Xr, Y, Z, aopt)
@ -135,7 +149,13 @@ for a in range(m):
ylim([rmsep.min()-.05, rmsep.max()+.05]) ylim([rmsep.min()-.05, rmsep.max()+.05])
title('RMSEP') title('RMSEP')
figure(2) # Hypoid correlations figure(2)
for a in range(m):
bar(arange(a_max)+a*bar_w+.1, class_error[:,a], width=bar_w, color=bar_col[a])
ylim([class_error.min()-.05, class_error.max()+.05])
title('Classification accuracy')
figure(3) # Hypoid correlations
plot_corrloads(Rz, pc1=0, pc2=1, s=tsqz/10.0, c='b', zorder=5, expvar=evz, ax=None) plot_corrloads(Rz, pc1=0, pc2=1, s=tsqz/10.0, c='b', zorder=5, expvar=evz, ax=None)
ax = gca() ax = gca()
ylabels = DY.get_identifiers('_cat', sorted=True) ylabels = DY.get_identifiers('_cat', sorted=True)