diff --git a/workflows/smokers.py b/workflows/smokers.py index 4d5c91a..39718ba 100644 --- a/workflows/smokers.py +++ b/workflows/smokers.py @@ -1,7 +1,7 @@ import sys,os import webbrowser -from fluents import logger, plots,workflow,dataset +from fluents import logger, plots,workflow,dataset,main from fluents.lib import blmfuncs,nx_utils,validation,engines,cx_stats,cx_utils import gobrowser, geneontology import scipy @@ -31,11 +31,11 @@ class SmallTestWorkflow(workflow.Workflow): # self.add_stage(prep) # NETWORK PREPROCESSING - net = workflow.Stage('net', 'Network integration') - net.add_function(DiffKernelFunction()) - net.add_function(ModKernelFunction()) + #net = workflow.Stage('net', 'Network integration') + #net.add_function(DiffKernelFunction()) + #net.add_function(ModKernelFunction()) #net.add_function(RandDiffKernelFunction()) - self.add_stage(net) + #self.add_stage(net) # BLM's model = workflow.Stage('models', 'Models') @@ -59,6 +59,7 @@ class SmallTestWorkflow(workflow.Workflow): # go.add_function(gobrowser.DistanceToSelectionFunction()) # go.add_function(gobrowser.TTestFunction()) go.add_function(gobrowser.PlotDagFunction()) + go.add_function(GoEnrichment()) self.add_stage(go) # EXTRA PLOTS @@ -304,3 +305,51 @@ class LogFunction(workflow.Function): d._name = 'log(%s)' % data.get_name() return [d] + +class GoEnrichment(workflow.Function): + def __init__(self): + workflow.Function.__init__(self, 'goenrich', 'Go Enrichment') + + def run(self, data): + import rpy + rpy.r.library("GOstats") + + # Get universe + # Here, we are using a defined dataset to represent the universe + if not 'gene_ids' in data: + logger.log('notice', 'No dimension called [gene_ids] in dataset: %s', data.get_name()) + return + universe = list(data.get_identifiers('gene_ids')) + # Get current selection and validate + curr_sel = main.project.get_selection() + selected_genes = list(curr_sel['gene_ids']) + if len(selected_genes)==0: + logger.log('notice', 'This function needs a current selection!') + return + + # Hypergeometric parameter object + pval_cutoff = 0.99 + cond = False + test_direction = 'over' + params = rpy.r.new("GOHyperGParams", + geneIds=selected_genes, + annotation="hgu133a", + ontology="BP", + pvalueCutoff=pval_cutoff, + conditional=cond, + testDirection=test_direction + ) + # run test + # result.keys(): ['Count', 'Term', 'OddsRatio', 'Pvalue', 'ExpCount', 'GOBPID', 'Size'] + result = rpy.r.summary(rpy.r.hyperGTest(params)) + + # dataset + terms = result['GOBPID'] + pvals = scipy.log(scipy.asarray(result['Pvalue'])) + row_ids = ('go-terms', terms) + col_ids = ('_john', ['_doe']) + + xout = dataset.Dataset(pvals, + (row_ids, col_ids), + name='P values (enrichment)') + return [xout]