Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0

Some stuff on tasks.

This commit is contained in:
Einar Ryeng 2007-09-03 13:22:11 +00:00
parent 16ed2fd9c9
commit bfb039328c
2 changed files with 29 additions and 276 deletions

View File

@ -88,12 +88,6 @@ class Workflow:
for fun in stage.functions: for fun in stage.functions:
print ' %s' % fun.name print ' %s' % fun.name
# def add_project(self,project):
# if project == None:
# logger.log('notice','Proejct is empty')
# logger.log('notice','Project added in : %s' %self.name)
# self.project = project
class EmptyWorkflow(Workflow): class EmptyWorkflow(Workflow):
name = 'Empty Workflow' name = 'Empty Workflow'
@ -122,24 +116,32 @@ class Stage:
self.functions_by_id[fun.id] = fun self.functions_by_id[fun.id] = fun
class Function: class Task:
"""A Function object encapsulates a function on a data set. """A Function object encapsulates a function on a data set.
Each Function instance encapsulates some function that can be applied Each Function instance encapsulates some function that can be applied
to one or more types of data. to one or more types of data.
""" """
def __init__(self, id, name): title = ""
self.id = id
self.name = name def __init__(self, input):
self.input = input
self.options = Options()
self.datasets = {}
self.arrays = {}
self.plots = {}
# just return a Validation object # just return a Validation object
def validate_input(input): def validate_input(input):
return Validation(True,"Validation Not Implemented") return Validation(True,"Validation Not Implemented")
def run(self): def run(self):
pass print self.input
def show_options_gui(self, editable=False):
pass
class Validation: class Validation:
def __init__(self,result, reason): def __init__(self,result, reason):
@ -461,3 +463,9 @@ class WorkflowMenu (gtk.Menu):
menuitem.show() menuitem.show()
return menuitem return menuitem
class Options():
def __init__(self):
pass

View File

@ -10,113 +10,28 @@ class TestWorkflow (workflow.Workflow):
name = 'Test Workflow' name = 'Test Workflow'
ident = 'test' ident = 'test'
description = 'Test Gene Ontology Workflow. This workflow currently serves as a general testing workflow.' description = 'This workflow currently serves as a general testing workflow.'
def __init__(self, app): def __init__(self, app):
workflow.Workflow.__init__(self, app) workflow.Workflow.__init__(self, app)
load = workflow.Stage('load', 'Load Data') load = workflow.Stage('load', 'Test Data')
load.add_function(CelFileImportFunction())
load.add_function(DataLoadTestFunction(self))
load.add_function(TestDataFunction()) load.add_function(TestDataFunction())
load.add_function(DatasetLoadFunction())
load.add_function(SelectFunction())
self.add_stage(load) self.add_stage(load)
preproc = workflow.Stage('preprocess', 'Preprocessing')
preproc.add_function(DatasetLog())
preproc.add_function(workflow.Function('rma', 'RMA'))
self.add_stage(preproc)
go = workflow.Stage('go', 'Gene Ontology Data') class TestDataTask(workflow.Task):
go.add_function(GODistanceFunction()) title = "Test data"
go.add_function(ImagePlotFunction())
self.add_stage(go)
regression = workflow.Stage('regression', 'Regression')
regression.add_function(workflow.Function('pls', 'PLS'))
self.add_stage(regression)
explore = workflow.Stage('explore', 'Explorative analysis')
explore.add_function(PCAFunction(self))
self.add_stage(explore)
save = workflow.Stage('save', 'Save Data')
save.add_function(DatasetSaveFunction())
self.add_stage(save)
class LoadAnnotationsFunction(workflow.Function):
def __init__(self): def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations') workflow.Task.__init__(self)
self.annotations = None
def load_file(self, filename):
f = open(filename)
self.annotations = Annotations('genes', 'go-terms')
logger.log('notice', 'Loading annotation file: %s' % filename)
for line in f.readlines():
val = line.split(' \t')
if len(val) > 1:
val = [v.strip() for v in val]
retval.add_annotations('genes', val[0],
'go-terms', set(val[1:]))
def on_response(self, dialog, response):
if response == gtk.RESPONSE_OK:
logger.log('notice', 'Reading file: %s' % dialog.get_filename())
self.load_file(dialog.get_filename())
def run(self):
btns = ('Open', gtk.RESPONSE_OK, \
'Cancel', gtk.RESPONSE_CANCEL)
dialog = gtk.FileChooserDialog('Open GO Annotation File',
buttons=btns)
dialog.connect('response', self.on_response)
dialog.run()
dialog.destroy()
return [self.annotations]
class GODistanceFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'go_diatance', 'GO Distances')
self.output = None
def run(self, data):
logger.log('debug', 'datatype: %s' % type(data))
if not type(data) == Annotations:
return None
logger.log('debug', 'dimensions: %s' % data.dimensions)
genes = data.get_ids('genes')
gene_distances = array((len(genes), len(genes)))
return gene_distances
class ImagePlotFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'image', 'Show Image')
def run(self, data):
return [plots.ImagePlot(data, name='foo')]
class TestDataFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'test_data', 'Generate Test Data')
def run(self): def run(self):
logger.log('notice', 'Injecting foo test data') logger.log('notice', 'Injecting foo test data')
x = randn(500,15) x = randn(500,15)
X = dataset.Dataset(x) X = dataset.Dataset(x)
p = plots.ScatterPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='scatter') p = plots.ScatterPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='scatter')
p2 = plots.ScatterMarkerPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='marker') # p2 = plots.ScatterMarkerPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='marker')
graph = networkx.XGraph() graph = networkx.XGraph()
for x in 'ABCDEF': for x in 'ABCDEF':
for y in 'ADE': for y in 'ADE':
@ -125,181 +40,11 @@ class TestDataFunction(workflow.Function):
ds_plot = plots.NetworkPlot(ds) ds_plot = plots.NetworkPlot(ds)
cds = dataset.CategoryDataset(ones([3, 3])) cds = dataset.CategoryDataset(ones([3, 3]))
ds_scatter = plots.ScatterMarkerPlot(ds, ds, 'rows_0', 'rows_0', '0_1', '0_2') ds_scatter = plots.ScatterMarkerPlot(ds, ds,
'rows_0', 'rows_0',
'0_1', '0_2')
lp = plots.LineViewPlot(X,major_axis=0) lp = plots.LineViewPlot(X,major_axis=0)
vp = plots.VennPlot() vp = plots.VennPlot()
self.datasets = [p]
return [X, ds, p, ds_plot, ds_scatter, p2, cds, lp, vp] return [X, ds, p, ds_plot, ds_scatter, p2, cds, lp, vp]
class SelectFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'select', 'Select')
def run(self, data):
s = dataset.Selection('Arbitrary selection')
s.select('rows', ['0_1', '0_2'])
return [s]
class DatasetLog(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'log', 'Log')
def run(self, data):
logger.log('notice', 'Taking the log of dataset %s' % data.get_name())
d = data.asarray()
d = log(d)
new_data_name = 'log(%s)' % data.get_name()
ds = dataset.Dataset(d, name=new_data_name)
return [ds]
class DatasetLoadFunction(workflow.Function):
"""Loader for previously pickled Datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_data', 'Load Pickled Dataset')
def run(self):
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
action=gtk.FILE_CHOOSER_ACTION_OPEN,
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
pkl_filter = gtk.FileFilter()
pkl_filter.set_name("Python pickled data files (*.pkl)")
pkl_filter.add_pattern("*.[pP][kK][lL]")
all_filter = gtk.FileFilter()
all_filter.set_name("All Files (*.*)")
all_filter.add_pattern("*")
chooser.add_filter(pkl_filter)
chooser.add_filter(all_filter)
try:
if chooser.run() == gtk.RESPONSE_OK:
return [cPickle.load(open(chooser.get_filename()))]
finally:
chooser.destroy()
class DatasetSaveFunction(workflow.Function):
"""QND way to save data to file for later import to this program."""
def __init__(self):
workflow.Function.__init__(self, 'save_data', 'Save Pickled Dataset')
def run(self):
if not data:
logger.log("notice", "No data to save.")
return
else:
data = data[0]
chooser = gtk.FileChooserDialog(title="Save pickled data...", parent=None,
action=gtk.FILE_CHOOSER_ACTION_SAVE,
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_SAVE, gtk.RESPONSE_OK))
pkl_filter = gtk.FileFilter()
pkl_filter.set_name("Python pickled data files (*.pkl)")
pkl_filter.add_pattern("*.[pP][kK][lL]")
all_filter = gtk.FileFilter()
all_filter.set_name("All Files (*.*)")
all_filter.add_pattern("*")
chooser.add_filter(pkl_filter)
chooser.add_filter(all_filter)
chooser.set_current_name(data.get_name() + ".pkl")
try:
if chooser.run() == gtk.RESPONSE_OK:
cPickle.dump(data, open(chooser.get_filename(), "w"), protocol=2)
logger.log("notice", "Saved data to %r." % chooser.get_filename())
finally:
chooser.destroy()
class CelFileImportFunction(workflow.Function):
"""Loads AffyMetrix .CEL-files into matrix."""
def __init__(self):
workflow.Function.__init__(self, 'cel_import', 'Import Affy')
def run(self, data):
import rpy
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
action=gtk.FILE_CHOOSER_ACTION_OPEN,
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
chooser.set_select_multiple(True)
cel_filter = gtk.FileFilter()
cel_filter.set_name("Cel Files (*.cel)")
cel_filter.add_pattern("*.[cC][eE][lL]")
all_filter = gtk.FileFilter()
all_filter.set_name("All Files (*.*)")
all_filter.add_pattern("*")
chooser.add_filter(cel_filter)
chooser.add_filter(all_filter)
try:
if chooser.run() == gtk.RESPONSE_OK:
rpy.r.library("affy")
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
silent_eval('E <- rma(E)')
m = rpy.r('m <- E@exprs')
vector_eval = rpy.with_mode(rpy.VECTOR_CONVERSION, rpy.r)
rownames = vector_eval('rownames(m)')
colnames = vector_eval('colnames(m)')
# We should be nice and clean up after ourselves
rpy.r.rm(["E", "m"])
if m:
data = dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), name="AffyMatrix Data")
plot = plots.LinePlot(data, "Gene profiles")
return [data, plot]
else:
logger.log("notice", "No data loaded from importer.")
finally:
chooser.destroy()
class DataLoadTestFunction(workflow.Function):
def __init__(self, wf):
workflow.Function.__init__(self, 'datadirload', 'Load from datadir')
self._wf = wf
def run(self):
print self._wf.get_data_file_name('smoker-x.ftsv')
fn = self._wf.get_data_file_name('smoker-x.ftsv')
if fn:
fd = open(fn)
ds = dataset.read_ftsv(fd)
return [ds]
else:
print "Cannot find file %s" % fn
return []
class PCAFunction(workflow.Function):
"""Generic PCA function."""
def __init__(self, wf):
workflow.Function.__init__(self, 'pca', 'PCA')
self._workflow = wf
def run(self, data):
import rpy
dim_2, dim_1 = data.get_dim_names()
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.asarray())
silent_eval("t = prcomp(t(m))")
T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
("component", T_ids)], name="T")
P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
("component", T_ids)], name="P")
# cleanup
rpy.r.rm(["t", "m"])
loading_plot = plots.ScatterMarkerPlot(P, P, 'ids','component','1','2', "Loadings")
score_plot = plots.ScatterMarkerPlot(T, T,'filename','component','1','2', "Scores")
return [T, P, loading_plot, score_plot]