Tralala ...

This commit is contained in:
2007-11-07 12:34:13 +00:00
parent ca51a0b382
commit aef2e1daf0
11 changed files with 316 additions and 121 deletions

View File

@@ -4,7 +4,7 @@ import rpy
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
import collections
def goterms_from_gene(genelist, ontology='BP', garbage=None):
def goterms_from_gene(genelist, ontology='BP', garbage=None, ic_cutoff=2.0):
""" Returns the go-terms from a specified genelist (Entrez id).
Recalculates the information content if needed based on selected evidence codes.
@@ -37,38 +37,30 @@ def goterms_from_gene(genelist, ontology='BP', garbage=None):
ic = rpy.r('get("IC", envir=GOSimEnv)')
print "loading GO definitions environment"
gene2terms = {}
gene2terms = collections.defaultdict(list)
cc = 0
dd = 0
ii = 0
for gene in genelist:
info = rpy.r('GOENTREZID2GO[["' + str(gene) + '"]]')
#print info
if info:
skip=False
for term, desc in info.items():
if ic.get(term)==scipy.isinf:
print "\nIC is Inf on this GO term %s for this gene: %s" %(term,gene)
skip=True
if ic.get(term)==None:
#print "\nHave no IC on this GO term %s for this gene: %s" %(term,gene)
skip=True
ii += 1
if desc['Ontology']!=ontology:
#print "\nThis GO term %s belongs to: %s:" %(term,desc['Ontology'])
skip = True
dd += 1
if not skip:
if gene2terms.has_key(gene):
gene2terms[gene].append(term)
else:
gene2terms[gene] = [term]
jj = 0
all = rpy.r.mget(gene_ids, rpy.r.GOENTREZID2GO,ifnotfound="NA")
for gene, terms in all.items():
if terms!="NA":
for term,desc in terms.items():
if desc['Ontology'].lower() == ontology and term in ic:
if ic[term]>.88:
jj+=1
continue
cc+=1
gene2terms[gene].append(term)
else:
dd+=1
else:
cc += 1
print "\nNumber of genes without annotation: %d" %cc
ii+=1
print "\nNumber of genes without annotation: %d" %ii
print "\nNumber of genes not in %s : %d " %(ontology, dd)
print "\nNumber of genes with infs : %d " %ii
print "\nNumber of genes with too high IC : %d " %jj
return gene2terms
@@ -220,3 +212,15 @@ def data_aff2loc_hgu133a(X, aff_ids, verbose=False):
print "Ids with unique probeset: %d" %s
X = scipy.asarray(new_data).T
return X, new_ids
def R_PLS(x,y,ncomp=3, validation='"LOO"'):
rpy.r.library("pls")
rpy.r.assign("X", x)
rpy.r.assign("Y", y)
callstr = "plsr(Y~X, ncomp=" + str(ncomp) + ", validation=" + validation + ")"
print callstr
result = rpy.r(callstr)
return result
def gogene()