Added GO-category enrichment analysis

This commit is contained in:
Arnar Flatberg 2007-08-02 11:18:48 +00:00
parent 973470b595
commit 2d419a9862

View File

@ -1,7 +1,7 @@
import sys,os
import webbrowser
from fluents import logger, plots,workflow,dataset
from fluents import logger, plots,workflow,dataset,main
from fluents.lib import blmfuncs,nx_utils,validation,engines,cx_stats,cx_utils
import gobrowser, geneontology
import scipy
@ -31,11 +31,11 @@ class SmallTestWorkflow(workflow.Workflow):
# self.add_stage(prep)
# NETWORK PREPROCESSING
net = workflow.Stage('net', 'Network integration')
net.add_function(DiffKernelFunction())
net.add_function(ModKernelFunction())
#net = workflow.Stage('net', 'Network integration')
#net.add_function(DiffKernelFunction())
#net.add_function(ModKernelFunction())
#net.add_function(RandDiffKernelFunction())
self.add_stage(net)
#self.add_stage(net)
# BLM's
model = workflow.Stage('models', 'Models')
@ -59,6 +59,7 @@ class SmallTestWorkflow(workflow.Workflow):
# go.add_function(gobrowser.DistanceToSelectionFunction())
# go.add_function(gobrowser.TTestFunction())
go.add_function(gobrowser.PlotDagFunction())
go.add_function(GoEnrichment())
self.add_stage(go)
# EXTRA PLOTS
@ -304,3 +305,51 @@ class LogFunction(workflow.Function):
d._name = 'log(%s)' % data.get_name()
return [d]
class GoEnrichment(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'goenrich', 'Go Enrichment')
def run(self, data):
import rpy
rpy.r.library("GOstats")
# Get universe
# Here, we are using a defined dataset to represent the universe
if not 'gene_ids' in data:
logger.log('notice', 'No dimension called [gene_ids] in dataset: %s', data.get_name())
return
universe = list(data.get_identifiers('gene_ids'))
# Get current selection and validate
curr_sel = main.project.get_selection()
selected_genes = list(curr_sel['gene_ids'])
if len(selected_genes)==0:
logger.log('notice', 'This function needs a current selection!')
return
# Hypergeometric parameter object
pval_cutoff = 0.99
cond = False
test_direction = 'over'
params = rpy.r.new("GOHyperGParams",
geneIds=selected_genes,
annotation="hgu133a",
ontology="BP",
pvalueCutoff=pval_cutoff,
conditional=cond,
testDirection=test_direction
)
# run test
# result.keys(): ['Count', 'Term', 'OddsRatio', 'Pvalue', 'ExpCount', 'GOBPID', 'Size']
result = rpy.r.summary(rpy.r.hyperGTest(params))
# dataset
terms = result['GOBPID']
pvals = scipy.log(scipy.asarray(result['Pvalue']))
row_ids = ('go-terms', terms)
col_ids = ('_john', ['_doe'])
xout = dataset.Dataset(pvals,
(row_ids, col_ids),
name='P values (enrichment)')
return [xout]