Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/scripts/lpls/rpy_go.py

133 lines
4.6 KiB
Python
Raw Normal View History

2007-07-20 11:36:26 +02:00
""" Module for Gene ontology related functions called in R"""
import scipy
import rpy
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
def get_term_sim(termlist, method = "JiangConrath", verbose=False):
"""Returns the similariy matrix between go-terms.
Arguments:
termlist: character vector of GO terms
method: one of
("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin")
verbose: print out various information or not
"""
_methods = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin")
assert(method in _methods)
assert(termlist[0][:2]=='GO')
rpy.r.library("GOSim")
return rpy.r.getTermSim(termlist, method = method, verbose = verbose)
def get_gene_sim(genelist, similarity='OA',
distance="Resnick"):
rpy.r.library("GOSim")
rpy.r.assign("ids", genelist)
silent_eval('a<-getGeneSim(ids)', verbose=FALSE)
def goterms_from_gene(genelist, ontology=['BP'], garbage = ['IEA', 'ISS', 'ND']):
""" Returns the go-terms from a specified genelist (Entrez id).
"""
rpy.r.library("GO")
_CODES = {"IMP" : "inferred from mutant phenotype",
"IGI" : "inferred from genetic interaction",
"IPI" :"inferred from physical interaction",
"ISS" : "inferred from sequence similarity",
"IDA" : "inferred from direct assay",
"IEP" : "inferred from expression pattern",
"IEA" : "inferred from electronic annotation",
"TAS" : "traceable author statement",
"NAS" : "non-traceable author statement",
"ND" : "no biological data available",
"IC" : "inferred by curator"
}
_ONTOLOGIES = ['BP', 'CC', 'MF']
assert(scipy.all([(code in _CODES) for code in garbage]))
assert(scipy.all([(ont in _ONTOLOGIES) for ont in ontology]))
2007-07-20 14:32:54 +02:00
have_these = rpy.r('as.list(GOTERM)').keys()
2007-07-20 11:36:26 +02:00
goterms = {}
for gene in genelist:
goterms[gene] = []
info = rpy.r('GOENTREZID2GO[["' + str(gene) + '"]]')
#print info
if info:
for term, desc in info.items():
2007-07-20 14:32:54 +02:00
if term not in have_these:
print "GO miss:"
print term
2007-07-20 11:36:26 +02:00
if desc['Ontology'] in ontology and desc['Evidence'] not in garbage:
goterms[gene].append(term)
2007-07-20 14:32:54 +02:00
2007-07-20 11:36:26 +02:00
return goterms
def genego_matrix(goterms, tmat, gene_ids, term_ids, func=min):
ngenes = len(gene_ids)
nterms = len(term_ids)
gene2indx = {}
for i,id in enumerate(gene_ids):
gene2indx[id]=i
term2indx = {}
for i,id in enumerate(term_ids):
term2indx[id]=i
#G = scipy.empty((nterms, ngenes),'d')
G = []
newindex = []
for gene, terms in goterms.items():
g_ind = gene2indx[gene]
if len(terms)>0:
t_ind = []
newindex.append(g_ind)
for term in terms:
if term2indx.has_key(term): t_ind.append(term2indx[term])
print t_ind
subsim = tmat[t_ind, :]
gene_vec = scipy.apply_along_axis(func, 0, subsim)
G.append(gene_vec)
return scipy.asarray(G), newindex
def goterm2desc(gotermlist):
"""Returns the go-terms description keyed by go-term
"""
rpy.r.library("GO")
term2desc = {}
for term in gotermlist:
try:
desc = rpy.r('Term(GOTERM[["' +str(term)+ '"]])')
term2desc[str(term)] = desc
except:
raise Warning("Description not found for %s\n Mapping incomplete" %term)
return term2desc
def parents_dag(go_terms, ontology=['BP']):
2007-07-20 14:32:54 +02:00
""" Returns a list of lists representation of a GO DAG parents of goterms.
make the networkx graph by:
G = networkx.Digraph()
G = networkx.from_dict_of_lists(edge_dict, G)
"""
2007-07-20 11:36:26 +02:00
try:
rpy.r.library("GOstats")
except:
raise ImportError, "Gostats"
assert(go_terms[0][:3]=='GO:')
# go valid namespace
2007-07-20 14:32:54 +02:00
go_env = {'BP':rpy.r.GOBPPARENTS, 'MF':rpy.r.GOMFPARENTS, 'CC': rpy.r.GOCCPARENTS}
graph = rpy.r.GOGraph(go_terms, go_env[ontology[0]])
edges = rpy.r.edges(graph)
edges.pop('all')
edge_dict = {}
2007-07-20 17:48:59 +02:00
for head, neighbours in edges.items():
for nn in neighbours.values():
if edge_dict.has_key(nn):
edge_dict[nn].append(head)
else:
edge_dict[nn] = [head]
2007-07-20 14:32:54 +02:00
return edge_dict
2007-07-23 15:25:34 +02:00
def gene_GO_hypergeo_test(genelist, universe, ontology = ['BP']):
pvals = geneGoHyperGeoTest(entrezGeneIds, lib=None, ontology=ontology[0], universe=universe)
return pvals