iups

2007-07-23 13:25:34 +00:00
parent 9db5991108
commit 155dfada5c
4 changed files with 31 additions and 17 deletions
--- a/scripts/lpls/run_smoker.py
+++ b/scripts/lpls/run_smoker.py
@@ -32,14 +32,14 @@ print "SAM done"
 qq = rpy.r('qobj<-qvalue(sam.out@p.value)')
 qvals = asarray(qq['qvalues'])
 # cut off
-co = 0.1
+co = 0.001
 index = where(qvals<0.01)[0]

 # Subset data
 X = DX.asarray()
 Xr = X[:,index]
 gene_ids = DX.get_identifiers('gene_ids', index)
-
+print "\nWorkiing on subset with %s genes " %len(gene_ids)
 ### Build GO data ####

 print "Go terms ..."
@@ -48,13 +48,15 @@ terms = set()
 for t in goterms.values():
    terms.update(t)
 terms = list(terms)
+print "Number of go-terms: %s" %len(terms)
 rpy.r.library("GOSim")
 # Go-term similarity matrix
 methods = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin")
-meth = methods[2]
+meth = methods[0]
 print "Term-term similarity matrix (method = %s)" %meth
 if meth=="CoutoEnriched":
    rpy.r('setEnrichmentFactors(alpha=0.1,beta=0.5)')
+print "Calculating term-term similarity matrix"
 tmat = rpy.r.getTermSim(terms, verbose=False, method=meth)
 # check if all terms where found
 nanindex = where(isnan(tmat[:,0]))[0]
@@ -93,20 +95,21 @@ gene_ids = asarray(gene_ids)[newind]
 print "LPLSR ..."
 a_max = 5
 aopt = 2
-alpha=.5
+alpha=.6
 T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(Xr,Y,Z, a_max, alpha)

 # Correlation loadings
-dx,Rx,ssx= correlation_loadings(Xr, T, P)
-dx,Ry,ssx= correlation_loadings(Y, T, Q)
-cadx,Rz,ssx= correlation_loadings(Z.T, K, L)
+dx,Rx,rssx = correlation_loadings(Xr, T, P)
+dx,Ry,rssy = correlation_loadings(Y, T, Q)
+cadz,Rz,rssz = correlation_loadings(Z.T, W, L)
 # Prediction error
 rmsep , yhat, class_error = cv_lpls(Xr, Y, Z, a_max, alpha=alpha)

 # Significance Hotellings T
 Wx, Wz, Wy, = jk_lpls(Xr, Y, Z, aopt)
-tsqx = cx_stats.hotelling(Wx,W[:,:aopt])
-tsqz = cx_stats.hotelling(Wz,L[:,:aopt])
+Ws = W*apply_along_axis(norm, 0, T)
+tsqx = cx_stats.hotelling(Wx, Ws[:,:aopt])
+tsqz = cx_stats.hotelling(Wz, L[:,:aopt])


 ## plots ##