Added SAM and conditioned enrichment analysis
This commit is contained in:
parent
8d4848d5fa
commit
d510e092e3
|
@ -25,11 +25,6 @@ class SmallTestWorkflow(workflow.Workflow):
|
||||||
#load.add_function(DatasetLoadFunctionCYCLE())
|
#load.add_function(DatasetLoadFunctionCYCLE())
|
||||||
self.add_stage(load)
|
self.add_stage(load)
|
||||||
|
|
||||||
# PREPROCESSING
|
|
||||||
# prep = workflow.Stage('prep', 'Preprocessing')
|
|
||||||
# prep.add_function(LogFunction())
|
|
||||||
# self.add_stage(prep)
|
|
||||||
|
|
||||||
# NETWORK PREPROCESSING
|
# NETWORK PREPROCESSING
|
||||||
#net = workflow.Stage('net', 'Network integration')
|
#net = workflow.Stage('net', 'Network integration')
|
||||||
#net.add_function(DiffKernelFunction())
|
#net.add_function(DiffKernelFunction())
|
||||||
|
@ -42,6 +37,7 @@ class SmallTestWorkflow(workflow.Workflow):
|
||||||
model.add_function(blmfuncs.PCA())
|
model.add_function(blmfuncs.PCA())
|
||||||
model.add_function(blmfuncs.PLS())
|
model.add_function(blmfuncs.PLS())
|
||||||
model.add_function(blmfuncs.LPLS())
|
model.add_function(blmfuncs.LPLS())
|
||||||
|
model.add_function(SAM())
|
||||||
|
|
||||||
#model.add_function(bioconFuncs.SAM(app))
|
#model.add_function(bioconFuncs.SAM(app))
|
||||||
self.add_stage(model)
|
self.add_stage(model)
|
||||||
|
@ -60,6 +56,7 @@ class SmallTestWorkflow(workflow.Workflow):
|
||||||
# go.add_function(gobrowser.TTestFunction())
|
# go.add_function(gobrowser.TTestFunction())
|
||||||
go.add_function(gobrowser.PlotDagFunction())
|
go.add_function(gobrowser.PlotDagFunction())
|
||||||
go.add_function(GoEnrichment())
|
go.add_function(GoEnrichment())
|
||||||
|
go.add_function(GoEnrichmentCond())
|
||||||
self.add_stage(go)
|
self.add_stage(go)
|
||||||
|
|
||||||
# EXTRA PLOTS
|
# EXTRA PLOTS
|
||||||
|
@ -144,6 +141,58 @@ class DatasetLoadFunctionCYCLE(workflow.Function):
|
||||||
|
|
||||||
|
|
||||||
##### WORKFLOW SPECIFIC FUNCTIONS ######
|
##### WORKFLOW SPECIFIC FUNCTIONS ######
|
||||||
|
class SAM(workflow.Function):
|
||||||
|
def __init__(self, id='sam', name='SAM'):
|
||||||
|
workflow.Function.__init__(self, id, name)
|
||||||
|
|
||||||
|
def run(self, x, y):
|
||||||
|
|
||||||
|
n_iter = 50 #B
|
||||||
|
alpha = 0.01 #cut off on qvals
|
||||||
|
|
||||||
|
###############
|
||||||
|
|
||||||
|
# Main function call
|
||||||
|
|
||||||
|
# setup prelimenaries
|
||||||
|
import rpy
|
||||||
|
rpy.r.library("siggenes")
|
||||||
|
rpy.r.library("multtest")
|
||||||
|
|
||||||
|
cl = scipy.dot(y.asarray(), scipy.diag([1,2,3]) ).sum(1)
|
||||||
|
data = x.asarray().T
|
||||||
|
sam = rpy.r.sam(data, cl=cl, B=n_iter, var_equal=False,med=False,s0=scipy.nan,rand=scipy.nan)
|
||||||
|
qvals = scipy.asarray(rpy.r.slot(sam, "p.value"))
|
||||||
|
pvals = scipy.asarray(rpy.r.slot(sam, "q.value"))
|
||||||
|
|
||||||
|
sam_index = (qvals<alpha).nonzero()[0]
|
||||||
|
|
||||||
|
# Update selection object
|
||||||
|
dim_name = x.get_dim_name(1)
|
||||||
|
sam_selection = x.get_identifiers(dim_name, indices=sam_index)
|
||||||
|
main.project.set_selection(dim_name, sam_selection)
|
||||||
|
|
||||||
|
sel = dataset.Selection('SAM selection')
|
||||||
|
sel.select(dim_name, sam_selection)
|
||||||
|
logger.log('notice','Number of significant varibles (SAM): %s' %len(sam_selection))
|
||||||
|
|
||||||
|
# ## OUTPUT ###
|
||||||
|
xcolname = x.get_dim_name(1) # genes
|
||||||
|
x_col_ids = [xcolname, x.get_identifiers(xcolname, sorted=True)]
|
||||||
|
sing_id = ['_john', ['0']] #singleton
|
||||||
|
D_qvals = dataset.Dataset(qvals, (x_col_ids, sing_id), name='q_vals')
|
||||||
|
D_pvals = dataset.Dataset(pvals, (x_col_ids, sing_id), name='p_vals')
|
||||||
|
|
||||||
|
# plots
|
||||||
|
s_indx = qvals.flatten().argsort()
|
||||||
|
s_ids = [x_col_ids[0],[x_col_ids[1][i] for i in s_indx]]
|
||||||
|
xindex = scipy.arange(len(qvals))
|
||||||
|
qvals_s = qvals.take(s_indx)
|
||||||
|
D_qs = dataset.Dataset(qvals_s, (s_ids, sing_id), name="sorted qvals")
|
||||||
|
Dind = dataset.Dataset(xindex, (s_ids, sing_id), name="dum")
|
||||||
|
st = plots.ScatterPlot(D_qs, Dind, 'gene_ids', '_john', '0', '0', s=10, name='SAM qvals')
|
||||||
|
|
||||||
|
return [D_qvals, D_pvals, D_qs, st, sel]
|
||||||
|
|
||||||
|
|
||||||
class DiffKernelFunction(workflow.Function):
|
class DiffKernelFunction(workflow.Function):
|
||||||
|
@ -294,18 +343,6 @@ class KEGGQuery(workflow.Function):
|
||||||
webbrowser.open(web_str)
|
webbrowser.open(web_str)
|
||||||
|
|
||||||
|
|
||||||
class LogFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'log', 'Log')
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
logger.log('notice', 'Taking the log of dataset %s' % data.get_name())
|
|
||||||
d = data.copy()
|
|
||||||
d._array = scipy.log(d._array)
|
|
||||||
d._name = 'log(%s)' % data.get_name()
|
|
||||||
return [d]
|
|
||||||
|
|
||||||
|
|
||||||
class GoEnrichment(workflow.Function):
|
class GoEnrichment(workflow.Function):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
workflow.Function.__init__(self, 'goenrich', 'Go Enrichment')
|
workflow.Function.__init__(self, 'goenrich', 'Go Enrichment')
|
||||||
|
@ -320,6 +357,7 @@ class GoEnrichment(workflow.Function):
|
||||||
logger.log('notice', 'No dimension called [gene_ids] in dataset: %s', data.get_name())
|
logger.log('notice', 'No dimension called [gene_ids] in dataset: %s', data.get_name())
|
||||||
return
|
return
|
||||||
universe = list(data.get_identifiers('gene_ids'))
|
universe = list(data.get_identifiers('gene_ids'))
|
||||||
|
logger.log('notice', 'Universe consists of %s gene ids from %s' %(len(universe), data.get_name()))
|
||||||
# Get current selection and validate
|
# Get current selection and validate
|
||||||
curr_sel = main.project.get_selection()
|
curr_sel = main.project.get_selection()
|
||||||
selected_genes = list(curr_sel['gene_ids'])
|
selected_genes = list(curr_sel['gene_ids'])
|
||||||
|
@ -328,7 +366,7 @@ class GoEnrichment(workflow.Function):
|
||||||
return
|
return
|
||||||
|
|
||||||
# Hypergeometric parameter object
|
# Hypergeometric parameter object
|
||||||
pval_cutoff = 0.99
|
pval_cutoff = 0.9999
|
||||||
cond = False
|
cond = False
|
||||||
test_direction = 'over'
|
test_direction = 'over'
|
||||||
params = rpy.r.new("GOHyperGParams",
|
params = rpy.r.new("GOHyperGParams",
|
||||||
|
@ -353,3 +391,55 @@ class GoEnrichment(workflow.Function):
|
||||||
(row_ids, col_ids),
|
(row_ids, col_ids),
|
||||||
name='P values (enrichment)')
|
name='P values (enrichment)')
|
||||||
return [xout]
|
return [xout]
|
||||||
|
|
||||||
|
|
||||||
|
class GoEnrichmentCond(workflow.Function):
|
||||||
|
""" Enrichment conditioned on dag structure."""
|
||||||
|
def __init__(self):
|
||||||
|
workflow.Function.__init__(self, 'goenrich', 'Go Cond. Enrich.')
|
||||||
|
|
||||||
|
def run(self, data):
|
||||||
|
import rpy
|
||||||
|
rpy.r.library("GOstats")
|
||||||
|
|
||||||
|
# Get universe
|
||||||
|
# Here, we are using a defined dataset to represent the universe
|
||||||
|
if not 'gene_ids' in data:
|
||||||
|
logger.log('notice', 'No dimension called [gene_ids] in dataset: %s', data.get_name())
|
||||||
|
return
|
||||||
|
universe = list(data.get_identifiers('gene_ids'))
|
||||||
|
logger.log('notice', 'Universe consists of %s gene ids from %s' %(len(universe), data.get_name()))
|
||||||
|
# Get current selection and validate
|
||||||
|
curr_sel = main.project.get_selection()
|
||||||
|
selected_genes = list(curr_sel['gene_ids'])
|
||||||
|
if len(selected_genes)==0:
|
||||||
|
logger.log('notice', 'This function needs a current selection!')
|
||||||
|
return
|
||||||
|
|
||||||
|
# Hypergeometric parameter object
|
||||||
|
pval_cutoff = 0.9999
|
||||||
|
cond = True
|
||||||
|
test_direction = 'over'
|
||||||
|
params = rpy.r.new("GOHyperGParams",
|
||||||
|
geneIds=selected_genes,
|
||||||
|
annotation="hgu133a",
|
||||||
|
ontology="BP",
|
||||||
|
pvalueCutoff=pval_cutoff,
|
||||||
|
conditional=cond,
|
||||||
|
testDirection=test_direction
|
||||||
|
)
|
||||||
|
# run test
|
||||||
|
# result.keys(): ['Count', 'Term', 'OddsRatio', 'Pvalue', 'ExpCount', 'GOBPID', 'Size']
|
||||||
|
result = rpy.r.summary(rpy.r.hyperGTest(params))
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
terms = result['GOBPID']
|
||||||
|
pvals = scipy.log(scipy.asarray(result['Pvalue']))
|
||||||
|
row_ids = ('go-terms', terms)
|
||||||
|
col_ids = ('_john', ['_doe'])
|
||||||
|
|
||||||
|
xout = dataset.Dataset(pvals,
|
||||||
|
(row_ids, col_ids),
|
||||||
|
name='P values (enrichment)')
|
||||||
|
return [xout]
|
||||||
|
|
||||||
|
|
Reference in New Issue