Some stuff on tasks.
This commit is contained in:
parent
16ed2fd9c9
commit
bfb039328c
|
@ -88,12 +88,6 @@ class Workflow:
|
||||||
for fun in stage.functions:
|
for fun in stage.functions:
|
||||||
print ' %s' % fun.name
|
print ' %s' % fun.name
|
||||||
|
|
||||||
# def add_project(self,project):
|
|
||||||
# if project == None:
|
|
||||||
# logger.log('notice','Proejct is empty')
|
|
||||||
# logger.log('notice','Project added in : %s' %self.name)
|
|
||||||
# self.project = project
|
|
||||||
|
|
||||||
|
|
||||||
class EmptyWorkflow(Workflow):
|
class EmptyWorkflow(Workflow):
|
||||||
name = 'Empty Workflow'
|
name = 'Empty Workflow'
|
||||||
|
@ -122,24 +116,32 @@ class Stage:
|
||||||
self.functions_by_id[fun.id] = fun
|
self.functions_by_id[fun.id] = fun
|
||||||
|
|
||||||
|
|
||||||
class Function:
|
class Task:
|
||||||
"""A Function object encapsulates a function on a data set.
|
"""A Function object encapsulates a function on a data set.
|
||||||
|
|
||||||
Each Function instance encapsulates some function that can be applied
|
Each Function instance encapsulates some function that can be applied
|
||||||
to one or more types of data.
|
to one or more types of data.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, id, name):
|
title = ""
|
||||||
self.id = id
|
|
||||||
self.name = name
|
def __init__(self, input):
|
||||||
|
self.input = input
|
||||||
|
self.options = Options()
|
||||||
|
|
||||||
|
self.datasets = {}
|
||||||
|
self.arrays = {}
|
||||||
|
self.plots = {}
|
||||||
|
|
||||||
# just return a Validation object
|
# just return a Validation object
|
||||||
def validate_input(input):
|
def validate_input(input):
|
||||||
return Validation(True,"Validation Not Implemented")
|
return Validation(True,"Validation Not Implemented")
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
pass
|
print self.input
|
||||||
|
|
||||||
|
def show_options_gui(self, editable=False):
|
||||||
|
pass
|
||||||
|
|
||||||
class Validation:
|
class Validation:
|
||||||
def __init__(self,result, reason):
|
def __init__(self,result, reason):
|
||||||
|
@ -461,3 +463,9 @@ class WorkflowMenu (gtk.Menu):
|
||||||
menuitem.show()
|
menuitem.show()
|
||||||
return menuitem
|
return menuitem
|
||||||
|
|
||||||
|
|
||||||
|
class Options():
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,113 +10,28 @@ class TestWorkflow (workflow.Workflow):
|
||||||
|
|
||||||
name = 'Test Workflow'
|
name = 'Test Workflow'
|
||||||
ident = 'test'
|
ident = 'test'
|
||||||
description = 'Test Gene Ontology Workflow. This workflow currently serves as a general testing workflow.'
|
description = 'This workflow currently serves as a general testing workflow.'
|
||||||
|
|
||||||
def __init__(self, app):
|
def __init__(self, app):
|
||||||
workflow.Workflow.__init__(self, app)
|
workflow.Workflow.__init__(self, app)
|
||||||
|
|
||||||
load = workflow.Stage('load', 'Load Data')
|
load = workflow.Stage('load', 'Test Data')
|
||||||
load.add_function(CelFileImportFunction())
|
|
||||||
load.add_function(DataLoadTestFunction(self))
|
|
||||||
load.add_function(TestDataFunction())
|
load.add_function(TestDataFunction())
|
||||||
load.add_function(DatasetLoadFunction())
|
|
||||||
load.add_function(SelectFunction())
|
|
||||||
self.add_stage(load)
|
self.add_stage(load)
|
||||||
|
|
||||||
preproc = workflow.Stage('preprocess', 'Preprocessing')
|
|
||||||
preproc.add_function(DatasetLog())
|
|
||||||
preproc.add_function(workflow.Function('rma', 'RMA'))
|
|
||||||
self.add_stage(preproc)
|
|
||||||
|
|
||||||
go = workflow.Stage('go', 'Gene Ontology Data')
|
|
||||||
go.add_function(GODistanceFunction())
|
|
||||||
go.add_function(ImagePlotFunction())
|
|
||||||
self.add_stage(go)
|
|
||||||
|
|
||||||
regression = workflow.Stage('regression', 'Regression')
|
|
||||||
regression.add_function(workflow.Function('pls', 'PLS'))
|
|
||||||
self.add_stage(regression)
|
|
||||||
|
|
||||||
explore = workflow.Stage('explore', 'Explorative analysis')
|
|
||||||
explore.add_function(PCAFunction(self))
|
|
||||||
self.add_stage(explore)
|
|
||||||
|
|
||||||
save = workflow.Stage('save', 'Save Data')
|
|
||||||
save.add_function(DatasetSaveFunction())
|
|
||||||
self.add_stage(save)
|
|
||||||
|
|
||||||
|
class TestDataTask(workflow.Task):
|
||||||
class LoadAnnotationsFunction(workflow.Function):
|
title = "Test data"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
|
workflow.Task.__init__(self)
|
||||||
self.annotations = None
|
|
||||||
|
|
||||||
def load_file(self, filename):
|
|
||||||
f = open(filename)
|
|
||||||
self.annotations = Annotations('genes', 'go-terms')
|
|
||||||
logger.log('notice', 'Loading annotation file: %s' % filename)
|
|
||||||
|
|
||||||
for line in f.readlines():
|
|
||||||
val = line.split(' \t')
|
|
||||||
|
|
||||||
if len(val) > 1:
|
|
||||||
val = [v.strip() for v in val]
|
|
||||||
retval.add_annotations('genes', val[0],
|
|
||||||
'go-terms', set(val[1:]))
|
|
||||||
|
|
||||||
def on_response(self, dialog, response):
|
|
||||||
if response == gtk.RESPONSE_OK:
|
|
||||||
logger.log('notice', 'Reading file: %s' % dialog.get_filename())
|
|
||||||
self.load_file(dialog.get_filename())
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
btns = ('Open', gtk.RESPONSE_OK, \
|
|
||||||
'Cancel', gtk.RESPONSE_CANCEL)
|
|
||||||
dialog = gtk.FileChooserDialog('Open GO Annotation File',
|
|
||||||
buttons=btns)
|
|
||||||
dialog.connect('response', self.on_response)
|
|
||||||
dialog.run()
|
|
||||||
dialog.destroy()
|
|
||||||
return [self.annotations]
|
|
||||||
|
|
||||||
class GODistanceFunction(workflow.Function):
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'go_diatance', 'GO Distances')
|
|
||||||
self.output = None
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
logger.log('debug', 'datatype: %s' % type(data))
|
|
||||||
if not type(data) == Annotations:
|
|
||||||
return None
|
|
||||||
|
|
||||||
logger.log('debug', 'dimensions: %s' % data.dimensions)
|
|
||||||
|
|
||||||
genes = data.get_ids('genes')
|
|
||||||
gene_distances = array((len(genes), len(genes)))
|
|
||||||
|
|
||||||
return gene_distances
|
|
||||||
|
|
||||||
|
|
||||||
class ImagePlotFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'image', 'Show Image')
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
return [plots.ImagePlot(data, name='foo')]
|
|
||||||
|
|
||||||
|
|
||||||
class TestDataFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'test_data', 'Generate Test Data')
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
logger.log('notice', 'Injecting foo test data')
|
logger.log('notice', 'Injecting foo test data')
|
||||||
x = randn(500,15)
|
x = randn(500,15)
|
||||||
X = dataset.Dataset(x)
|
X = dataset.Dataset(x)
|
||||||
p = plots.ScatterPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='scatter')
|
p = plots.ScatterPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='scatter')
|
||||||
p2 = plots.ScatterMarkerPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='marker')
|
# p2 = plots.ScatterMarkerPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='marker')
|
||||||
graph = networkx.XGraph()
|
graph = networkx.XGraph()
|
||||||
for x in 'ABCDEF':
|
for x in 'ABCDEF':
|
||||||
for y in 'ADE':
|
for y in 'ADE':
|
||||||
|
@ -125,181 +40,11 @@ class TestDataFunction(workflow.Function):
|
||||||
ds_plot = plots.NetworkPlot(ds)
|
ds_plot = plots.NetworkPlot(ds)
|
||||||
|
|
||||||
cds = dataset.CategoryDataset(ones([3, 3]))
|
cds = dataset.CategoryDataset(ones([3, 3]))
|
||||||
ds_scatter = plots.ScatterMarkerPlot(ds, ds, 'rows_0', 'rows_0', '0_1', '0_2')
|
ds_scatter = plots.ScatterMarkerPlot(ds, ds,
|
||||||
|
'rows_0', 'rows_0',
|
||||||
|
'0_1', '0_2')
|
||||||
lp = plots.LineViewPlot(X,major_axis=0)
|
lp = plots.LineViewPlot(X,major_axis=0)
|
||||||
vp = plots.VennPlot()
|
vp = plots.VennPlot()
|
||||||
|
self.datasets = [p]
|
||||||
return [X, ds, p, ds_plot, ds_scatter, p2, cds, lp, vp]
|
return [X, ds, p, ds_plot, ds_scatter, p2, cds, lp, vp]
|
||||||
|
|
||||||
class SelectFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'select', 'Select')
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
s = dataset.Selection('Arbitrary selection')
|
|
||||||
s.select('rows', ['0_1', '0_2'])
|
|
||||||
return [s]
|
|
||||||
|
|
||||||
class DatasetLog(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'log', 'Log')
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
logger.log('notice', 'Taking the log of dataset %s' % data.get_name())
|
|
||||||
d = data.asarray()
|
|
||||||
d = log(d)
|
|
||||||
new_data_name = 'log(%s)' % data.get_name()
|
|
||||||
ds = dataset.Dataset(d, name=new_data_name)
|
|
||||||
return [ds]
|
|
||||||
|
|
||||||
class DatasetLoadFunction(workflow.Function):
|
|
||||||
"""Loader for previously pickled Datasets."""
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load_data', 'Load Pickled Dataset')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
|
|
||||||
action=gtk.FILE_CHOOSER_ACTION_OPEN,
|
|
||||||
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
|
||||||
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
|
|
||||||
pkl_filter = gtk.FileFilter()
|
|
||||||
pkl_filter.set_name("Python pickled data files (*.pkl)")
|
|
||||||
pkl_filter.add_pattern("*.[pP][kK][lL]")
|
|
||||||
all_filter = gtk.FileFilter()
|
|
||||||
all_filter.set_name("All Files (*.*)")
|
|
||||||
all_filter.add_pattern("*")
|
|
||||||
chooser.add_filter(pkl_filter)
|
|
||||||
chooser.add_filter(all_filter)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if chooser.run() == gtk.RESPONSE_OK:
|
|
||||||
return [cPickle.load(open(chooser.get_filename()))]
|
|
||||||
finally:
|
|
||||||
chooser.destroy()
|
|
||||||
|
|
||||||
|
|
||||||
class DatasetSaveFunction(workflow.Function):
|
|
||||||
"""QND way to save data to file for later import to this program."""
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'save_data', 'Save Pickled Dataset')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
if not data:
|
|
||||||
logger.log("notice", "No data to save.")
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
data = data[0]
|
|
||||||
chooser = gtk.FileChooserDialog(title="Save pickled data...", parent=None,
|
|
||||||
action=gtk.FILE_CHOOSER_ACTION_SAVE,
|
|
||||||
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
|
||||||
gtk.STOCK_SAVE, gtk.RESPONSE_OK))
|
|
||||||
pkl_filter = gtk.FileFilter()
|
|
||||||
pkl_filter.set_name("Python pickled data files (*.pkl)")
|
|
||||||
pkl_filter.add_pattern("*.[pP][kK][lL]")
|
|
||||||
all_filter = gtk.FileFilter()
|
|
||||||
all_filter.set_name("All Files (*.*)")
|
|
||||||
all_filter.add_pattern("*")
|
|
||||||
chooser.add_filter(pkl_filter)
|
|
||||||
chooser.add_filter(all_filter)
|
|
||||||
chooser.set_current_name(data.get_name() + ".pkl")
|
|
||||||
|
|
||||||
try:
|
|
||||||
if chooser.run() == gtk.RESPONSE_OK:
|
|
||||||
cPickle.dump(data, open(chooser.get_filename(), "w"), protocol=2)
|
|
||||||
logger.log("notice", "Saved data to %r." % chooser.get_filename())
|
|
||||||
finally:
|
|
||||||
chooser.destroy()
|
|
||||||
|
|
||||||
|
|
||||||
class CelFileImportFunction(workflow.Function):
|
|
||||||
"""Loads AffyMetrix .CEL-files into matrix."""
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'cel_import', 'Import Affy')
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
import rpy
|
|
||||||
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
|
|
||||||
action=gtk.FILE_CHOOSER_ACTION_OPEN,
|
|
||||||
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
|
||||||
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
|
|
||||||
chooser.set_select_multiple(True)
|
|
||||||
cel_filter = gtk.FileFilter()
|
|
||||||
cel_filter.set_name("Cel Files (*.cel)")
|
|
||||||
cel_filter.add_pattern("*.[cC][eE][lL]")
|
|
||||||
all_filter = gtk.FileFilter()
|
|
||||||
all_filter.set_name("All Files (*.*)")
|
|
||||||
all_filter.add_pattern("*")
|
|
||||||
chooser.add_filter(cel_filter)
|
|
||||||
chooser.add_filter(all_filter)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if chooser.run() == gtk.RESPONSE_OK:
|
|
||||||
rpy.r.library("affy")
|
|
||||||
|
|
||||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
|
||||||
silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
|
|
||||||
silent_eval('E <- rma(E)')
|
|
||||||
|
|
||||||
m = rpy.r('m <- E@exprs')
|
|
||||||
|
|
||||||
vector_eval = rpy.with_mode(rpy.VECTOR_CONVERSION, rpy.r)
|
|
||||||
rownames = vector_eval('rownames(m)')
|
|
||||||
colnames = vector_eval('colnames(m)')
|
|
||||||
|
|
||||||
# We should be nice and clean up after ourselves
|
|
||||||
rpy.r.rm(["E", "m"])
|
|
||||||
|
|
||||||
if m:
|
|
||||||
data = dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), name="AffyMatrix Data")
|
|
||||||
plot = plots.LinePlot(data, "Gene profiles")
|
|
||||||
return [data, plot]
|
|
||||||
else:
|
|
||||||
logger.log("notice", "No data loaded from importer.")
|
|
||||||
finally:
|
|
||||||
chooser.destroy()
|
|
||||||
|
|
||||||
|
|
||||||
class DataLoadTestFunction(workflow.Function):
|
|
||||||
def __init__(self, wf):
|
|
||||||
workflow.Function.__init__(self, 'datadirload', 'Load from datadir')
|
|
||||||
self._wf = wf
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
print self._wf.get_data_file_name('smoker-x.ftsv')
|
|
||||||
fn = self._wf.get_data_file_name('smoker-x.ftsv')
|
|
||||||
if fn:
|
|
||||||
fd = open(fn)
|
|
||||||
ds = dataset.read_ftsv(fd)
|
|
||||||
return [ds]
|
|
||||||
else:
|
|
||||||
print "Cannot find file %s" % fn
|
|
||||||
return []
|
|
||||||
|
|
||||||
class PCAFunction(workflow.Function):
|
|
||||||
"""Generic PCA function."""
|
|
||||||
def __init__(self, wf):
|
|
||||||
workflow.Function.__init__(self, 'pca', 'PCA')
|
|
||||||
self._workflow = wf
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
import rpy
|
|
||||||
|
|
||||||
dim_2, dim_1 = data.get_dim_names()
|
|
||||||
|
|
||||||
|
|
||||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
|
||||||
rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.asarray())
|
|
||||||
silent_eval("t = prcomp(t(m))")
|
|
||||||
|
|
||||||
T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
|
|
||||||
T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
|
|
||||||
("component", T_ids)], name="T")
|
|
||||||
P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
|
|
||||||
("component", T_ids)], name="P")
|
|
||||||
|
|
||||||
# cleanup
|
|
||||||
rpy.r.rm(["t", "m"])
|
|
||||||
|
|
||||||
loading_plot = plots.ScatterMarkerPlot(P, P, 'ids','component','1','2', "Loadings")
|
|
||||||
score_plot = plots.ScatterMarkerPlot(T, T,'filename','component','1','2', "Scores")
|
|
||||||
|
|
||||||
return [T, P, loading_plot, score_plot]
|
|
||||||
|
|
Reference in New Issue