This commit is contained in:
2007-08-21 10:25:23 +00:00
parent 26ab6c3fe7
commit e06eeb6d17
3 changed files with 125 additions and 82 deletions

View File

@@ -2,7 +2,7 @@
import scipy
import rpy
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
import collections
def goterms_from_gene(genelist, ontology='BP', garbage=None):
""" Returns the go-terms from a specified genelist (Entrez id).
@@ -18,7 +18,7 @@ def goterms_from_gene(genelist, ontology='BP', garbage=None):
"IDA" : "inferred from direct assay",
"IEP" : "inferred from expression pattern",
"IEA" : "inferred from electronic annotation",
"TAS" : "traceable author statement",
"TAS" : "traceable author statement",
"NAS" : "non-traceable author statement",
"ND" : "no biological data available",
"IC" : "inferred by curator"
@@ -167,3 +167,47 @@ def gene_GO_hypergeo_test(genelist,universe="entrezUniverse",ontology="BP",chip
result = rpy.r.summary(rpy.r.hyperGTest(params))
return rpy.r.summary(result), params
def data_aff2loc_hgu133a(X, aff_ids, verbose=False):
aff_ids = scipy.asarray(aff_ids)
if verbose:
print "\nNumber of probesets in affy list: %s" %len(aff_ids)
import rpy
rpy.r.library("hgu133a")
trans_table = rpy.r.as_list(rpy.r.hgu133aENTREZID)
if verbose:
print "Number of entrez ids: %d" %(scipy.asarray(trans_table.values())>0).sum()
enz2aff = collections.defaultdict(list)
#aff2enz = collections.defaultdict(list)
for aff, enz in trans_table.items():
if int(enz)>0 and (aff in aff_ids):
enz2aff[enz].append(aff)
#aff2enz[aff].append(enz)
if verbose:
print "\nNumber of translated entrez ids: %d" %len(enz2aff)
aff2ind = dict(zip(aff_ids, scipy.arange(len(aff_ids))))
var_x = X.var(0)
new_data = []
new_ids = []
m = 0
s = 0
for enz, aff_id_list in enz2aff.items():
index = [aff2ind[aff_id] for aff_id in aff_id_list]
if len(index)>1:
m+=1
if verbose:
pass
#print "\nEntrez id: %s has %d probesets" %(enz, len(index))
#print index
xsub = X[:,index]
choose_this = scipy.argmax(xsub.var(0))
new_data.append(xsub[:,choose_this].ravel())
else:
s+=1
new_data.append(X[:,index].ravel())
new_ids.append(enz)
if verbose:
print "Ids with multiple probesets: %d" %m
print "Ids with unique probeset: %d" %s
X = scipy.asarray(new_data).T
return X, new_ids