This commit is contained in:
2008-02-08 14:58:46 +00:00
parent 6b78629946
commit cb6d6b87cc
3 changed files with 560 additions and 178 deletions

View File

@@ -4,7 +4,7 @@ import rpy
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
import collections
def goterms_from_gene(genelist, ontology='BP', garbage=None, ic_cutoff=2.0):
def goterms_from_gene(genelist, ontology='BP', garbage=['IEA'], ic_cutoff=2.0, verbose=False):
""" Returns the go-terms from a specified genelist (Entrez id).
Recalculates the information content if needed based on selected evidence codes.
@@ -30,10 +30,17 @@ def goterms_from_gene(genelist, ontology='BP', garbage=None, ic_cutoff=2.0):
ddef = False
if ontology=='BP' and garbage!=None:
# This is for ont=BP and garbage =['IEA', 'ISS', 'ND']
rpy.r.load("ICsBPIMP_IGI_IPI_ISS_IDA_IEP_TAS_NAS_IC.rda")
rpy.r.load("ICsBP_small.rda") # Excludes IEA
ic = rpy.r.assign("IC",rpy.r.IC, envir=rpy.r.GOSimEnv)
print len(ic)
max_val = 0
for key, val in ic.items():
if val != scipy.inf:
if val>max_val:
max_val = val
for key, val in ic.items():
ic[key] = val/max_val
else:
# NB! this IC is just for BP
ic = rpy.r('get("IC", envir=GOSimEnv)')
print "loading GO definitions environment"
@@ -42,25 +49,57 @@ def goterms_from_gene(genelist, ontology='BP', garbage=None, ic_cutoff=2.0):
dd = 0
ii = 0
jj = 0
all = rpy.r.mget(gene_ids, rpy.r.GOENTREZID2GO,ifnotfound="NA")
kk = 0
all = rpy.r.mget(genelist, rpy.r.GOENTREZID2GO,ifnotfound="NA")
n_ic = len(ic)
print "Number of terms with IC: %d" %n_ic
stopp = False
for gene, terms in all.items():
if verbose:
print "\n\n ======ITEM========\n"
print "Gene: " + str(gene)
print "Number of terms: %d " %len(terms)
print terms
print "---\n"
if stopp:
1/0
if terms!="NA":
for term,desc in terms.items():
if desc['Ontology'].lower() == ontology and term in ic:
if ic[term]>.88:
for term, desc in terms.items():
if verbose:
print "\nChecking term: " + str(term)
print "With description: " + str(desc)
if desc['Ontology'].lower() == ontology.lower() and term in ic:
if ic[term]>ic_cutoff:
#print ic[term]
jj+=1
if verbose:
print "too high" + str((gene, term))
stopp = True
continue
cc+=1
cc += 1
if verbose:
print "accepted" + str((gene, term))
gene2terms[gene].append(term)
else:
dd+=1
if verbose:
print "Not accepted: " + str((gene, term))
if term not in ic:
if verbose:
print "Not in IC: " + str((gene, term))
kk+=1
if desc['Ontology'].lower() != ontology:
if verbose:
print "Not in Ontology" + str((gene, term))
dd+=1
else:
ii+=1
print "\nNumber of genes without annotation: %d" %ii
print "\nNumber of genes not in %s : %d " %(ontology, dd)
print "\nNumber of genes with too high IC : %d " %jj
print "Number of genes total: %d" %len(all)
print "\nNumber of genes without annotation: (%d (NA))" %ii
print "\nNumber of terms with annoation but no IC: %d" %kk
print "\nNumber of terms not in %s : %d " %(ontology, dd)
print "\nNumber of terms with too high IC : %d " %jj
print "\n Number of accepted terms: %d" %cc
return gene2terms
@@ -156,7 +195,7 @@ def parents_dag(go_terms, ontology=['BP']):
def gene_GO_hypergeo_test(genelist,universe="entrezUniverse",ontology="BP",chip = "hgu133a",pval_cutoff=0.01,cond=False,test_direction="over"):
#assert(scipy.alltrue([True for i in genelist if i in universe]))
universeGeneIds=universe
universeGeneIds = universe
params = rpy.r.new("GOHyperGParams",
geneIds=genelist,
annotation="hgu133a",
@@ -222,5 +261,3 @@ def R_PLS(x,y,ncomp=3, validation='"LOO"'):
result = rpy.r(callstr)
return result
def gogene()