Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0

Compare commits

...
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.

6 Commits
main ... tasks

Author SHA1 Message Date
Arnar Flatberg 7ef01d37f0 2007-09-03 17:51:22 +00:00
Arnar Flatberg 5a1e225a08 working test ... for real 2007-09-03 16:16:39 +00:00
Arnar Flatberg 9a296f59f6 bugfix on scipy import 2007-09-03 16:16:05 +00:00
Arnar Flatberg f8e0cfdb8b removed unused workflows 2007-09-03 15:50:16 +00:00
Arnar Flatberg 2fdd9aaf77 initial working test wf 2007-09-03 15:46:45 +00:00
Einar Ryeng bfb039328c Some stuff on tasks. 2007-09-03 13:22:11 +00:00
8 changed files with 85 additions and 2319 deletions

View File

@ -22,6 +22,7 @@ DATADIR = os.path.dirname(sys.modules['fluents'].__file__)
ICONDIR = os.path.join(DATADIR,"..","icons")
GLADEFILENAME = os.path.join(DATADIR, 'fluents.glade')
class IconFactory:
"""Factory for icons that ensures that each icon is only loaded once."""
@ -43,6 +44,7 @@ class IconFactory:
icon_factory = IconFactory(ICONDIR)
class TableSizeSelection(gtk.Window):
def __init__(self):

View File

@ -187,7 +187,7 @@ class LineViewPlot(Plot):
if minor_axis==0:
self._mn_data = self._data.mean(minor_axis)
else:
self._mn_data = self._data.mean(minor_axis)[:,newaxis]
self._mn_data = self._data.mean(minor_axis)[:,scipy.newaxis]
self._data = self._data - self._mn_data
self.data_is_centered = True
#initial line collection

View File

@ -60,9 +60,7 @@ class Workflow:
description = "Workflow Description"
def __init__(self):
print "Setting stages"
self.stages = []
self.stages_by_id = {}
def get_data_file_name(self, filename):
"""Checks if a file with the given name exists in the data directory.
@ -79,67 +77,68 @@ class Workflow:
def add_stage(self, stage):
self.stages.append(stage)
self.stages_by_id[stage.id] = stage
#self.stages_by_id[stage.id] = stage
def print_tree(self):
print self.name
for stage in self.stages:
print ' %s' % stage.name
for fun in stage.functions:
print ' %s' % fun.name
# def add_project(self,project):
# if project == None:
# logger.log('notice','Proejct is empty')
# logger.log('notice','Project added in : %s' %self.name)
# self.project = project
for task in stage.tasks:
print ' %s' % task.name
class EmptyWorkflow(Workflow):
name = 'Empty Workflow'
def __init__(self):
print "initing empty workflow"
logger.log("debug", "initing empty workflow")
Workflow.__init__(self)
class Stage:
"""A stage is a part of the data analysis process.
Each stage contains a set of functions that can be used to
Each stage contains a set of tasks that can be used to
accomplish the task. A typical early stage is 'preprocessing', which
can be done in several ways, each represented by a function.
can be done in several ways, each represented by a task.
"""
def __init__(self, id, name):
self.id = id
self.name = name
self.functions = []
self.functions_by_id = {}
self.tasks = []
def add_function(self, fun):
self.functions.append(fun)
self.functions_by_id[fun.id] = fun
def add_task(self, task):
self.tasks.append(task)
class Function:
"""A Function object encapsulates a function on a data set.
class Task:
"""A Task object encapsulates a task on a data set.
Each Function instance encapsulates some function that can be applied
Each Task instance encapsulates some task that can be applied
to one or more types of data.
"""
def __init__(self, id, name):
self.id = id
self.name = name
name = ""
def __init__(self, input):
self.input = input
self.options = Options()
self.datasets = {}
self.arrays = {}
self.plots = {}
# just return a Validation object
def validate_input(input):
return Validation(True,"Validation Not Implemented")
return Validation(True, "Validation Not Implemented")
def run(self):
pass
def show_options_gui(self, editable=False):
pass
class Validation:
def __init__(self,result, reason):
@ -148,7 +147,6 @@ class Validation:
class WorkflowView (gtk.VBox):
def __init__(self, wf):
gtk.VBox.__init__(self)
self.workflow = wf
@ -166,12 +164,11 @@ class WorkflowView (gtk.VBox):
btn_box.show()
exp.add(btn_align)
# Add functions in each stage
for fun in stage.functions:
btn = gtk.Button(fun.name)
# Add tasks in each stage
for task in stage.tasks:
btn = gtk.Button(task.name)
btn.connect('clicked',
lambda button, f=fun : self.run_function(f))
lambda button, t=task : self.run_task(t))
btn_box.add(btn)
btn.show()
@ -188,53 +185,24 @@ class WorkflowView (gtk.VBox):
self.remove_workflow()
self.setup_workflow(workflow)
def run_function(self, function):
logger.log('debug', 'Starting function: %s' % function.name)
def run_task(self, task_class):
logger.log('debug', 'Creating task: %s' % task_class.name)
parent_data = main.project.current_data
validation = function.validate_input()
task = task_class(input=parent_data)
validation = task.validate_input()
if not validation.succeeded:
logger.log('warning','Invalid Inputdata: ' + str(reason))
return
args, varargs, varkw, defaults = inspect.getargspec(function.run)
# first argument is 'self' and second should be the selection
# and we don't care about those...
args.remove('self')
if "selection" in args:
pass_selection = True
args.remove('selection')
task_result = task.run()
if task_result != None:
main.project.add_data(parent_data, task_result, task.name)
else:
pass_selection = False
if varargs and len(parent_data) < len(args):
logger.log('warning', "Function requires minimum %d datasets selected." % len(args))
return
elif not varargs and args and len(args) != len(parent_data):
# functions requiring datasets have to have the right number
logger.log('warning', "Function requires %d datasets, but only %d selected." % (len(args), len(parent_data)))
return
if not args:
# we allow functions requiring no data to be run even if a
# dataset is is selected
data = []
else:
data = parent_data
if pass_selection:
# if the function has a 'selection' argument, we pass in
# the selection
new_data = function.run(selection=main.project.get_selection(), *data)
else:
new_data = function.run(*data)
if new_data != None:
main.project.add_data(parent_data, new_data, function.name)
logger.log('debug', 'Task gave no output: %s' % task.name)
logger.log('debug', 'Function ended: %s' % function.name)
logger.log('debug', 'Task ended: %s' % task.name)
class Options(dict):
@ -259,8 +227,8 @@ class Options(dict):
class OptionsDialog(gtk.Dialog):
"""The basic input/output dialog box.
This defines the first page of the function options-gui.
Any function that invokes a option-gui will inherit from this class.
This defines the first page of the task options-gui.
Any task that invokes a option-gui will inherit from this class.
"""
def __init__(self, data, options, input_names=['X','Y']):
gtk.Dialog.__init__(self, 'Input-Output dialog',
@ -452,12 +420,18 @@ class WorkflowMenu (gtk.Menu):
stage_menu = gtk.Menu()
stage_menu_item.set_submenu(stage_menu)
for fun in stage.functions:
stage_menu.append(self._create_function_item(fun))
for task in stage.tasks:
stage_menu.append(self._create_task_item(task))
return stage_menu_item
def _create_function_item(self, function):
menuitem = gtk.MenuItem(function.name)
def _create_task_item(self, task):
menuitem = gtk.MenuItem(task.name)
menuitem.show()
return menuitem
class Options():
def __init__(self):
pass

View File

@ -1,261 +0,0 @@
import gobject
import gtk
import networkx
import re
class GOTerm:
def __init__(self):
self.d = {}
## Create empty lists for all list values
for l in GOTerm.lists:
self.d[l] = []
for s in GOTerm.scalars:
self.d[l] = None
def __getitem__(self, key):
if self.d.has_key(key):
return self.d[key]
return None
def __setitem__(self, key, value):
self.d[key] = value
GOTerm.lists = ['is_a', 'alt_id', 'exact_synonym', 'broad_synonym',
'narrow_synonym', 'related_synonym', 'relationship',
'subset', 'synonym', 'xref_analog', 'xref_unknown']
GOTerm.scalars = ['name', 'id', 'namespace', 'def', 'is_transitive',
'comment', 'is_obsolete']
class GeneOntology(networkx.XDiGraph):
def __init__(self):
networkx.XDiGraph.__init__(self)
self.by_id = {}
self.undirected = None
def add_term(self, term):
self.add_node(term)
self.by_id[term['id']] = term
def link_ontology(self, linkattr, obsolete=False):
for node in self.nodes():
for link in node[linkattr]:
self.add_edge(self.by_id[link], node, linkattr)
def link_relationships(self):
for node in self.nodes():
for link in node['relationship']:
link_type, term = link.split(' ')
self.add_edge(self.by_id[term.strip()], node, link_type.strip())
def get_bp(self):
"""Returns the root node of the biological_process tree"""
return self.by_id['GO:0008150']
def get_cc(self):
"""Returns the root node of the cellular_component tree"""
return self.by_id['id: GO:0005575']
def get_mf(self):
"""Returns the root node of the molecular_function tree"""
return self.by_id['GO:0003674']
def _subsumer(self, t1, t2, heap):
while heap != []:
t = heap[0]
heap = heap[1:]
p1 = networkx.shortest_path(self, t, t1)
p2 = networkx.shortest_path(self, t, t2)
if p1 and p2:
return t
heap += self.in_neighbors(t)
return None
def subsumer(self, t1, t2):
if t1 == t2:
return t1
if networkx.shortest_path(self, t1, t2):
return t1
elif networkx.shortest_path(self, t2, t1):
return t2
return self._subsumer(t1, t2, self.in_neighbors(t1))
def old_subsumer(self, t1, t2):
if t1 == t2:
return t1
if self.undirected == None:
self.undirected = self.to_undirected()
path = networkx.shortest_path(self.undirected, t1, t2)
if not path:
print "Woah, path not found."
return None
if path == [1]:
print "This shouldn't happen"
return t1
for t in path:
if networkx.shortest_path(self, t, t1) and \
networkx.shortest_path(self, t, t2):
return t
print "GeneOntology.subsumer: should not reach this point"
print "path is now: %s" % path
print "ids are: %s " % [x['id'] for x in path]
def _split_obo_line(line):
"""Splits a line from an obo file in its three constituent parts.
@param line: A string containing a line from an obo file. The line must
either be a section definition field with a section name in brackets
or a line of the form: keyword: value ! comment
@returns: A tuple of four strings conaining the section, key, value and
description defined in the string. If the section part is None, all
the other fields are strings and if section is a string, all the other
fields are None.
"""
attrib_re = re.compile(r'^\s*([\w-]+)\s*:\s*([^!]*)!?(.*$)')
s = line.strip()
if s == "":
return (None, None, None, None)
elif s.startswith('[') and s.endswith(']'):
return (s[1:-1], None, None, None)
else:
m = attrib_re.match(s)
if m:
key, value, comment = [x.strip() for x in m.groups()]
return (None, key, value, comment)
else:
raise Exception('Unparsable line: %s' % line)
def _add_term_attribute(term, key, value, comment):
if key in GOTerm.scalars:
term[key] = value
elif key in GOTerm.lists:
term[key].append(value)
else:
raise Exception('Unknown key %s: %s' % (key, value))
def read_gene_ontology(fd):
"""Reads the Gene Ontology from an obo file.
@param fd: An open file object to the obo file.
"""
go = GeneOntology()
term = None
section = None
line = fd.readline()
while line:
s, k, v, c = _split_obo_line(line)
if s == None and k == None:
pass
elif s:
if term:
go.add_term(term)
section = s
if s == 'Term':
term = GOTerm()
else:
term = None
print "ignoring: %s" %s
else:
if term:
_add_term_attribute(term, k, v, c)
line = fd.readline()
if term:
go.add_term(term)
return go
def pickle_gene_ontology(go, fn):
fd = open(fn, 'wb')
pickle.dump(go, fd)
fd.close()
def load_pickled_ontology(fn):
fd = open(fn, 'rb')
go = pickle.load(fd)
fd.close()
return go
def read_default_go():
f = open("/usr/share/gene-ontology/gene_ontology.obo")
go = read_gene_ontology(f)
go.link_ontology('is_a')
go.link_relationships()
f.close()
return go
def _add_subgraphs(treestore, ontology, parent, nodes):
for n in nodes:
i = treestore.insert(parent, 0, (n['id'], n['name'], False, n))
_add_subgraphs(treestore, ontology, i, ontology.successors(n))
def get_go_treestore(ontology):
ts = gtk.TreeStore(gobject.TYPE_STRING, ## ID
gobject.TYPE_STRING, ## Name
gobject.TYPE_BOOLEAN, ## Selected
gobject.TYPE_PYOBJECT) ## Node
_add_subgraphs(ts, ontology, None, [ontology.get_bp()])
return ts
class NetworkTreeModel(gtk.GenericTreeModel):
def __init__(self, network, root):
gtk.GenericTreeModel.__init__(self)
self._network = network
self._root = root
def on_get_flags(self):
return 0
def on_get_n_columns(self):
return 1
def on_get_column_type(self, index):
if index==0:
return gobject.TYPE_STRING
def on_get_iter(self, path):
node = self._root
for p in path[1:]:
children = self._network.predecessors(node)
node = children[p]
return node
def on_get_path(self, rowref):
pass
def on_get_value(self, rowref, column):
print 'get_value'
return rowref['id']
def on_iter_next(self, rowref):
pass
def on_iter_children(self, parent):
pass
def on_iter_has_child(self, rowref):
pass
def on_iter_n_children(self, rowref):
pass
def on_iter_nth_child(self, parent, n):
pass
def on_iter_parent(self, child):
pass

View File

@ -1,486 +0,0 @@
import gtk
from fluents import dataset, logger, plots, workflow, fluents, project
from fluents.lib import blmfuncs
import geneontology
#import gostat
from scipy import array, randn, log, ones, zeros
import networkx
import re
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
('IGI', 'Inferred from genetic interaction'),
('IPI', 'Inferred from physical interaction'),
('ISS', 'Inferred from sequence or structure similarity'),
('IDA', 'Inferred from direct assay'),
('IEP', 'Inferred on expression pattern'),
('IEA', 'Inferred from electronic annotation'),
('TAS', 'Traceable author statement'),
('NAS', 'Non-traceable author statement'),
('ND', 'No biological data available'),
('RCA', 'Inferred from reviewed computational analysis'),
('IC', 'Inferred by curator')]
DISTANCE_METRICS = [('resnik', 'Resnik'),
('jiang', 'Jiang & Conrath'),
('fussimeg', 'FuSSiMeG')]
GO_DATA_DIR = '/home/einarr/data'
evidence = None
go = None
class GoTermView (gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self)
tab = gtk.Table(2, 2, False)
self._table = tab
self._name = gtk.Label('')
self._name.set_line_wrap(True)
self._name.set_alignment(0, 0)
name_label = gtk.Label('Name:')
name_label.set_alignment(0, 0)
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
self._def = gtk.TextBuffer()
textview = gtk.TextView(self._def)
textview.set_wrap_mode(gtk.WRAP_WORD)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(textview)
def_label = gtk.Label('Def:')
def_label.set_alignment(0.0, 0.0)
tab.attach(def_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(scrolled_window, 1, 2, 1, 2, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
self.add(tab)
self.set_go_term(None)
def set_go_term(self, term):
if term:
self.set_label(term['id'])
self._name.set_text(term['name'])
self._def.set_text(term['def'])
else:
self.set_label('GO Term')
self._name.set_text('')
self._def.set_text('')
class GeneOntologyTree (gtk.HPaned):
def __init__(self, network):
gtk.HPaned.__init__(self)
treemodel = geneontology.get_go_treestore(network)
self._treemodel = treemodel
self._tree_view = gtk.TreeView(treemodel)
self._selected_terms = set()
self._tree_view.set_fixed_height_mode(True)
# Set up context menu
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
self._tree_view.connect('popup_menu', self._popup_menu)
self._tree_view.connect('button_press_event', self._on_button_press)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_fixed_width(200)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 0)
renderer = gtk.CellRendererToggle()
renderer.set_property('activatable', True)
renderer.connect('toggled', self._toggle_selected)
renderer.set_active(True)
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
go_column = gtk.TreeViewColumn('T', renderer, active=2)
go_column.set_fixed_width(20)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 1)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
go_column.set_fixed_width(200)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 2)
self._desc_view = GoTermView()
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(self._tree_view)
self.add1(scrolled_window)
self.add2(self._desc_view)
self.show_all()
def _on_cursor_changed(self, tree):
path, col = self._tree_view.get_cursor()
current = self._treemodel.get_iter(path)
term = self._treemodel.get_value(current, 3)
self._desc_view.set_go_term(term)
##
## GTK Callback functions
##
def _popup_menu(self, *rest):
self.menu.popup(None, None, None, 0, 0)
def _on_button_press(self, widget, event):
path = widget.get_path_at_pos(int(event.x), int(event.y))
iter = None
if path:
iter = self._treemodel.get_iter(path[0])
obj = self._treemodel.get_value(iter, 3)
else:
obj = None
self._context_menu.set_current_term(obj, iter)
if event.button == 3:
self._context_menu.popup(None, None, None, event.button, event.time)
def _toggle_selected(self, renderer, path):
iter = self._treemodel.get_iter(path)
selected = self._treemodel.get_value(iter, 2)
id = self._treemodel.get_value(iter, 0)
self._treemodel.set_value(iter, 2, not selected)
if selected:
self._selected_terms.remove(id)
else:
self._selected_terms.add(id)
class GoTermContextMenu (gtk.Menu):
"""Context menu for GO terms in the gene ontology browser"""
def __init__(self, treemodel, treeview):
self._treemodel = treemodel
self._treeview = treeview
self._current_term = None
self._current_iter = None
gtk.Menu.__init__(self)
# Popuplate tree
self._expand_item = i = gtk.MenuItem('Expand')
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
self.append(i)
i.show()
self._collapse_item = i = gtk.MenuItem('Collapse')
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
self.append(i)
i.show()
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
i.connect('activate', self._on_select_subtree, treemodel, treeview)
self.append(i)
i.show()
def set_current_term(self, term, it):
self._current_term = term
self._current_iter = it
def _on_expand_subtree(self, item, treemodel, treeview):
path = treemodel.get_path(self._current_iter)
treeview.expand_row(path, True)
def _on_collapse_subtree(self, item, treemodel, treeview):
treeview.collapse_row(treemodel.get_path(self._current_iter))
def _on_select_subtree(self, item, treemodel, treeview):
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
(self._current_term['id'], self._current_term['name']))
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
project.project.set_selection('go-terms', set(ids))
class GoWorkflow (workflow.Workflow):
name = 'Gene Ontology'
ident = 'go'
description = 'Gene Ontology Workflow. For tree distance measures based '\
+ 'on the GO tree.'
def __init__(self, app):
workflow.Workflow.__init__(self, app)
load = workflow.Stage('load', 'Load GO Annotations')
load.add_function(LoadGOFunction())
load.add_function(LoadAnnotationsFunction())
load.add_function(LoadTextDatasetFunction())
self.add_stage(load)
go = workflow.Stage('go', 'Gene Ontology')
go.add_function(SelectGoTermsFunction(self))
go.add_function(GoDistanceFunction())
go.add_function(SaveDistancesFunction())
self.add_stage(go)
blm = workflow.Stage('blm', 'Bilinear Analysis')
blm.add_function(blmfuncs.PCA())
self.add_stage(blm)
class LoadGOFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
def run(self):
global go
go = geneontology.read_default_go()
browser = GeneOntologyTree(go)
label = gtk.Label('_Gene Ontology')
label.set_use_underline(True)
fluents.app['bottom_notebook'].append_page(browser, label)
class LoadTextDatasetFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-text-ds', 'Load GO Evidence')
def run(self):
f = open('/home/einarr/data/goa-condensed.ftsv')
global evidence
evidence = dataset.read_ftsv(f)
return [evidence]
class LoadAnnotationsFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def run(self):
global evidence
f = open(GO_DATA_DIR + '/goa-condensed')
ev_codes = f.readline().split()
go_terms = []
lines = f.readlines()
m = zeros((len(lines), len(ev_codes)))
for i, l in enumerate(lines):
values = l.split()
go_terms.append(values[0])
for j, v in enumerate(values[1:]):
m[i,j] = float(v.strip())
d = dataset.Dataset(m,
[['go-terms', go_terms], ['evidence', ev_codes]],
name='GO evidence')
evidence = d
return [d]
class EvidenceCodeFrame(gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self, 'Evidence Codes')
self._ec_buttons = {}
vbox = gtk.VBox(len(EVIDENCE_CODES))
for code, desc in EVIDENCE_CODES:
btn = gtk.CheckButton('%s (%s)' % (code, desc))
self._ec_buttons[code] = btn
vbox.add(btn)
self.add(vbox)
def set_options(self, options):
for code, desc in EVIDENCE_CODES:
self._ec_buttons[code].set_active(options[code])
def update_options(self, options):
for code, desc in EVIDENCE_CODES:
options[code] = self._ec_buttons[code].get_active()
return options
class DistanceMetricFrame(gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self, 'Distance Metrics')
self._metric_buttons = {}
vbox = gtk.VBox()
prev = None
for code, text in DISTANCE_METRICS:
btn = gtk.RadioButton(prev, '%s' % text)
self._metric_buttons[code] = btn
vbox.add(btn)
prev = btn
self.add(vbox)
def set_options(self, options):
self._metric_buttons[options['metric']].set_active(True)
def update_options(self, options):
for code, text in DISTANCE_METRICS:
if self._metric_buttons[code].get_active():
options['metric'] = code
return options
return options
class GoDistanceDialog(gtk.Dialog):
def __init__(self):
gtk.Dialog.__init__(self, 'GO term distance matrix',
None,
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_OK,
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
self._ec_frame = EvidenceCodeFrame()
self._metric_frame = DistanceMetricFrame()
self.vbox.add(self._ec_frame)
self.vbox.add(self._metric_frame)
def run(self):
self.vbox.show_all()
return gtk.Dialog.run(self)
def set_options(self, options):
self._ec_frame.set_options(options)
self._metric_frame.set_options(options)
def update_options(self, options):
self._ec_frame.update_options(options)
self._metric_frame.update_options(options)
return options
def set_editable(self, editable):
self._ec_frame.set_sensitive(editable)
self._metric_frame.set_sensitive(editable)
class NumericDict(dict):
def __init__(self):
dict.__init__(self)
def __getitem__(self, key):
retval = 0
try:
retval = dict.__getitem__(self, key)
except:
retval = 0.0
return retval
class SelectGoTermsFunction(workflow.Function):
def __init__(self, wf):
workflow.Function.__init__(self, 'go-select', 'Select GO Terms')
self.wf = wf
def run(self, ds):
terms = [x['id'] for x in networkx.paths.bfs(go, go.get_bp())]
self.wf.project.set_selection('go-terms', set(terms[:100]))
# self.wf.project.set_selection('go-terms', set(['GO:0007582', 'GO:0008150', 'GO:0051704', 'GO:0044419']))
class GoDistanceFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix')
self.options = GoDistanceOptions()
def resnik_distance_matrix(self, selection, ic):
size = len(selection['go-terms'])
m = zeros((size, size))
# Create resnik distance matrix
ids = list(selection['go-terms'])
for i, t1 in enumerate(ids):
for j, t2 in enumerate(ids):
term1 = go.by_id[t1]
term2 = go.by_id[t2]
subsumer = go.subsumer(term1, term2)
if subsumer == None:
m[i, j] = 1000
else:
# print "%s - %s - %s" % (t1, subsumer['id'], t2)
m[i, j] = ic[t1] + ic[t2] - 2.0 * ic[subsumer['id']]
ds = dataset.Dataset(m, (('go-terms', ids), ('_go-terms', ids)), 'Resnik')
return ds
def run(self, x, selection):
global evidence, go
self.options = self.show_gui(self.options)
if not selection.has_key('go-terms') or len(selection['go-terms']) == 0:
logger.log('warning', 'No GO terms selected. Cannot make distance matrix.')
codes = [c for c, d in EVIDENCE_CODES if self.options[c]]
ev_indices = evidence.get_indices('evidence', codes)
ann_count_matrix = evidence._array[:, ev_indices].sum(1)
total_ann = ann_count_matrix.sum(0)
annotations = NumericDict()
ic = NumericDict()
# Insert annotations into dict
for i, v in enumerate(evidence.get_identifiers('go-terms')):
annotations[v] = ann_count_matrix[i]
# Accumulate annotations
for term in reversed(networkx.topological_sort(go)):
for parent in go.in_neighbors(term):
annotations[parent['id']] += annotations[term['id']]
# Create information content dictionary
for term, count in annotations.items():
ic[term] = -log(count / total_ann)
return [self.resnik_distance_matrix(selection, ic)]
def show_gui(self, options, edit=True):
dialog = GoDistanceDialog()
dialog.set_options(self.options)
dialog.show_all()
dialog.set_editable(edit)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
return dialog.update_options(self.options)
else:
return options
class SaveDistancesFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'save-matrix', 'Save Matrix')
def run(self, ds):
filename = '/home/einarr/data/output.ftsv'
fd = open(filename, 'w')
dataset.write_ftsv(fd, ds)
fd.close()
class Options(dict):
def __init__(self):
dict.__init__(self)
class GoDistanceOptions(Options):
def __init__(self):
Options.__init__(self)
for code, desc in EVIDENCE_CODES:
self[code] = True
self['metric'] = 'fussimeg'

View File

@ -1,777 +0,0 @@
import gtk
from fluents import dataset, logger, plots, workflow, fluents, project, view, main
import geneontology
from matplotlib.nxutils import points_inside_poly
import matplotlib
#from scipy import array, randn, log, ones, zeros
from scipy import *
from numpy import matlib
import networkx
import re
import rpy
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
('IGI', 'Inferred from genetic interaction'),
('IPI', 'Inferred from physical interaction'),
('ISS', 'Inferred from sequence or structure similarity'),
('IDA', 'Inferred from direct assay'),
('IEP', 'Inferred on expression pattern'),
('IEA', 'Inferred from electronic annotation'),
('TAS', 'Traceable author statement'),
('NAS', 'Non-traceable author statement'),
('ND', 'No biological data available'),
('RCA', 'Inferred from reviewed computational analysis'),
('IC', 'Inferred by curator')]
DISTANCE_METRICS = [('resnik', 'Resnik'),
('jiang', 'Jiang & Conrath'),
('fussimeg', 'FuSSiMeG')]
GO_DATA_DIR = '/home/einarr/data'
evidence = None
go = None
class GoTermView (gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self)
tab = gtk.Table(2, 3, False)
self._table = tab
self._name = gtk.Label('')
self._name.set_line_wrap(True)
self._name.set_alignment(0, 0)
name_label = gtk.Label('Name:')
name_label.set_alignment(0, 0)
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
self._isa_parents = gtk.HBox()
isa_parents_label = gtk.Label('Is a:')
tab.attach(isa_parents_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
self._def = gtk.TextBuffer()
textview = gtk.TextView(self._def)
textview.set_wrap_mode(gtk.WRAP_WORD)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(textview)
def_label = gtk.Label('Def:')
def_label.set_alignment(0.0, 0.0)
tab.attach(def_label, 0, 1, 2, 3, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(scrolled_window, 1, 2, 2, 3, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
self._tab = tab
self.add(tab)
self.set_go_term(None)
def set_go_term(self, term):
if term:
self.set_label(term['id'])
self._name.set_text(term['name'])
self._def.set_text(term['def'])
self._tab.remove(self._isa_parents)
self._isa_parents = gtk.HBox()
for p in term['is_a']:
btn = gtk.Button(p)
btn.show()
self._isa_parents.add(btn)
self._isa_parents.show()
self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
else:
self.set_label('GO Term')
self._name.set_text('')
self._def.set_text('')
self._tab.remove(self._isa_parents)
self._isa_parents = gtk.HBox()
self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
class GeneOntologyTree (gtk.HPaned):
def __init__(self, network):
gtk.HPaned.__init__(self)
self.set_position(400)
treemodel = geneontology.get_go_treestore(network)
self._treemodel = treemodel
self._tree_view = gtk.TreeView(treemodel)
self._selected_terms = set()
self._tree_view.set_fixed_height_mode(True)
# Set up context menu
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
self._tree_view.connect('popup_menu', self._popup_menu)
self._tree_view.connect('button_press_event', self._on_button_press)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_fixed_width(200)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 0)
renderer = gtk.CellRendererToggle()
renderer.set_property('activatable', True)
renderer.connect('toggled', self._toggle_selected)
renderer.set_active(True)
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
go_column = gtk.TreeViewColumn('T', renderer, active=2)
go_column.set_fixed_width(20)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 1)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
go_column.set_fixed_width(200)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 2)
self._desc_view = GoTermView()
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(self._tree_view)
self.add1(scrolled_window)
self.add2(self._desc_view)
self.show_all()
def _on_cursor_changed(self, tree):
path, col = self._tree_view.get_cursor()
current = self._treemodel.get_iter(path)
term = self._treemodel.get_value(current, 3)
self._desc_view.set_go_term(term)
##
## GTK Callback functions
##
def _popup_menu(self, *rest):
self.menu.popup(None, None, None, 0, 0)
def _on_button_press(self, widget, event):
path = widget.get_path_at_pos(int(event.x), int(event.y))
iter = None
if path:
iter = self._treemodel.get_iter(path[0])
obj = self._treemodel.get_value(iter, 3)
else:
obj = None
self._context_menu.set_current_term(obj, iter)
if event.button == 3:
self._context_menu.popup(None, None, None, event.button, event.time)
def _toggle_selected(self, renderer, path):
iter = self._treemodel.get_iter(path)
selected = self._treemodel.get_value(iter, 2)
id = self._treemodel.get_value(iter, 0)
self._treemodel.set_value(iter, 2, not selected)
if selected:
self._selected_terms.remove(id)
else:
self._selected_terms.add(id)
class GoTermContextMenu (gtk.Menu):
"""Context menu for GO terms in the gene ontology browser"""
def __init__(self, treemodel, treeview):
self._treemodel = treemodel
self._treeview = treeview
self._current_term = None
self._current_iter = None
gtk.Menu.__init__(self)
# Popuplate tree
self._expand_item = i = gtk.MenuItem('Expand')
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
self.append(i)
i.show()
self._collapse_item = i = gtk.MenuItem('Collapse')
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
self.append(i)
i.show()
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
i.connect('activate', self._on_select_subtree, treemodel, treeview)
self.append(i)
i.show()
def set_current_term(self, term, it):
self._current_term = term
self._current_iter = it
def _on_expand_subtree(self, item, treemodel, treeview):
path = treemodel.get_path(self._current_iter)
treeview.expand_row(path, True)
def _on_collapse_subtree(self, item, treemodel, treeview):
treeview.collapse_row(treemodel.get_path(self._current_iter))
def _on_select_subtree(self, item, treemodel, treeview):
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
(self._current_term['id'], self._current_term['name']))
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
project.project.set_selection('go-terms', set(ids))
class LoadGOFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
def run(self):
global go
if go:
return
go = geneontology.read_default_go()
browser = GeneOntologyTree(go)
label = gtk.Label('_Gene Ontology')
label.set_use_underline(True)
fluents.app['bottom_notebook'].append_page(browser, label)
class LoadAnnotationsFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def run(self):
global evidence
f = open(GO_DATA_DIR + '/goa-condensed')
ev_codes = f.readline().split()
go_terms = []
lines = f.readlines()
m = zeros((len(lines), len(ev_codes)))
for i, l in enumerate(lines):
values = l.split()
go_terms.append(values[0])
for j, v in enumerate(values[1:]):
m[i,j] = float(v.strip())
d = dataset.Dataset(m,
[['go-terms', go_terms], ['evidence', ev_codes]],
name='GO evidence')
evidence = d
return [d]
class GOWeightDialog(gtk.Dialog):
def __init__(self):
gtk.Dialog.__init__(self, 'GO Gene List Influence',
None,
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_OK,
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
table = gtk.Table(2, 2)
sim_lbl = gtk.Label('Similarity threshold: ')
table.attach(sim_lbl, 0, 1, 0, 1)
adjustment = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
sim_spin = gtk.SpinButton(adjustment, 0.0, 2)
table.attach(sim_spin, 1, 2, 0, 1)
rank_lbl = gtk.Label('Rank threshold: ')
table.attach(rank_lbl, 0, 1, 1, 2)
rank_adj = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
rank_spin = gtk.SpinButton(rank_adj, 0.0, 2)
table.attach(rank_spin, 1, 2, 1, 2)
sim_lbl.show()
sim_spin.show()
rank_lbl.show()
rank_spin.show()
table.show()
self.vbox.add(table)
self._sim_spin = sim_spin
self._rank_spin = rank_spin
def set_options(self, options):
self._sim_spin.set_value(options['similarity_threshold'])
self._rank_spin.set_value(options['rank_threshold'])
def set_editable(self, editable):
self._sim_spin.set_sensitive(editable)
self._rank_spin.set_sensitive(editable)
def update_options(self, options):
options['similarity_threshold'] = self._sim_spin.get_value()
options['rank_threshold'] = self._rank_spin.get_value()
class DistanceToSelectionFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'dist-to-sel', 'Dist. to Selection')
self.options = DistanceToSelectionOptions()
def run(self, similarities, selection):
self.show_gui(similarities, self.options)
retval = []
dims = similarities.get_dim_name()
if dims[0] != "_%s" %dims[1] and dims[1] != "_%s" %dims[0]:
logger.log('warning', 'Are you sure this is a similarity matrix?')
dim = dims[0]
print "dim", dim
print "selection", selection[dim]
print "indices", similarities.get_indices(dim, selection[dim])
indices = similarities.get_indices(dim, selection[dim])
m = apply_along_axis(max, 1, similarities.asarray().take(indices, 1))
retval.append(dataset.Dataset(m, [(dim, similarities[dim]),
("_dummy", '0')]))
return retval
def show_gui(self, similarities, options, edit=True):
dialog = DistanceToSelectionOptionsDialog([similarities], self.options)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
dialog.set_output()
return dialog.get_options()
else:
return options
class GOWeightFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'GO Influence')
self.options = GOWeightOptions()
def run(self, genelist, similarity):
## Show dialog box
self.show_gui(self.options)
## assure that data is "correct", i.e., that we can perform
## the desired operations.
common_dims = genelist.common_dims(similarity)
if len(common_dims) == 0:
logger.log('error', 'No common dimension in the selected datasets.')
elif len(common_dims) > 1:
logger.log('error', "More than one common dimension in the " +
"selected datasets. Don't know what to do.")
gene_dim = common_dims[0]
logger.log('debug', 'Assuming genes are in dimension: %s' % gene_dim)
## Do the calculations.
d = {}
def show_gui(self, options, edit=True):
dialog = GOWeightDialog()
dialog.set_options(self.options)
dialog.show_all()
dialog.set_editable(edit)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
return dialog.update_options(self.options)
else:
return options
class DistanceToSelectionOptionsDialog(workflow.OptionsDialog):
def __init__(self, data, options):
workflow.OptionsDialog.__init__(self, data, options, ['X'])
class TTestOptionsDialog(workflow.OptionsDialog):
def __init__(self, data, options):
workflow.OptionsDialog.__init__(self, data, options,
['X', 'Categories'])
vb = gtk.VBox()
l = gtk.Label("Limit")
adj = gtk.Adjustment(0, 0.0, 1.0, 0.01, 1.0, 1.0)
sb = gtk.SpinButton(adj, 0.0, 2)
l.show()
sb.show()
vb.add(l)
vb.add(sb)
vb.show()
self.nb.insert_page(vb, gtk.Label("Limit"), -1)
class TTestFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 't-test', 't-test')
self.options = TTestOptions()
def run(self, x, categories):
self.show_gui(x, categories)
retval = []
m = x.asarray()
c = categories.asarray()
# Nonsmokers and current smokers
ns = m.take(nonzero(c[:,0]), 0)[0]
cs = m.take(nonzero(c[:,2]), 0)[0]
tscores = stats.ttest_ind(ns, cs)
print "Out data:", self.options['out_data']
tds = dataset.Dataset(tscores[0], [('gene_id', x['gene_id']),
('_t', ['0'])],
name='t-values')
if 't-value' in self.options['out_data']:
retval.append(tds)
pds = dataset.Dataset(tscores[1], [('gene_id', x['gene_id']),
('_p', ['0'])],
name='p-values')
if 'p-value' in self.options['out_data']:
retval.append(pds)
if ProbabilityHistogramPlot in self.options['out_plots']:
retval.append(ProbabilityHistogramPlot(pds))
if VolcanoPlot in self.options['out_plots']:
fc = apply_along_axis(mean, 0, ns) / apply_along_axis(mean, 0, cs)
fcds = dataset.Dataset(fc, [('gene_id', x['gene_id']),
('_dummy', ['0'])],
name="Fold change")
retval.append(VolcanoPlot(fcds, pds, 'gene_id'))
return retval
def show_gui(self, x, categories):
dialog = TTestOptionsDialog([x, categories], self.options)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
dialog.set_output()
return dialog.get_options()
else:
return options
class SetICFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'set-ic', 'Set IC')
def run(self, ds):
if 'go-terms' in ds.get_dim_name():
main.workflow.current_ic = ds
else:
logger.log('warning', 'Cannot use this dataset as IC on the go-terms dimension')
return
class PlotDagFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'go-dag', 'Build DAG')
def run(self, selection):
g = self.get_network(list(selection['go-terms']))
ds = dataset.GraphDataset(networkx.adj_matrix(g),
[('go-terms', g.nodes()), ('_go-terms', g.nodes())],
name="DAG")
return [ThresholdDagPlot(g)]
def get_network(self, terms, subtree='bp'):
"""Returns a DAG connecting the given terms by including their parents
up to the level needed to connect them. The subtree parameter is one of
mf - molecular function
bp - biological process
cc - cellular component"""
rpy.r.library("GOstats")
if subtree == 'mf':
subtree_r = rpy.r.GOMFPARENTS
elif subtree == 'bp':
subtree_r = rpy.r.GOBPPARENTS
elif subtree == 'cc':
subtree_r = rpy.r.GOCCPARENTS
else:
raise Exception("Unknown subtree. Use mf, bp or cc.")
g = rpy.r.GOGraph(terms, subtree_r)
edges = rpy.r.edges(g)
nxgraph = networkx.DiGraph()
for child, d in edges.items():
for parent in d.keys():
nxgraph.add_edge(parent, child)
return nxgraph
class TTestOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['all_plots'] = [(ProbabilityHistogramPlot, 'Histogram', True),
(VolcanoPlot, 'Histogram', True)]
self['all_data'] = [('t-value', 't-values', True),
('p-value', 'Probabilities', True),
('categories', 'Categories', False)]
self['out_data'] = ['t-value', 'p-value']
class DistanceToSelectionOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['all_data'] = [('mindist', 'Minimum distance', True)]
class GOWeightOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['similarity_threshold'] = 0.0
self['rank_threshold'] = 0.0
class ProbabilityHistogramPlot(plots.HistogramPlot):
def __init__(self, ds):
plots.HistogramPlot.__init__(self, ds, name="Confidence", bins=50)
class VolcanoPlot(plots.ScatterPlot):
def __init__(self, fold_ds, p_ds, dim, **kw):
plots.ScatterPlot.__init__(self, fold_ds, p_ds, 'gene_id', '_dummy',
'0', '0',
name="Volcano plot",
sel_dim_2='_p', **kw)
class DagPlot(plots.Plot):
def __init__(self, graph, dim='go-terms', pos=None, nodecolor='b', nodesize=40,
with_labels=False, name='DAG Plot'):
plots.Plot.__init__(self, name)
self.nodes = graph.nodes()
self._map_ids = self.nodes
self.graph = graph
self._pos = pos
self._cmap = matplotlib.cm.summer
self._nodesize = nodesize
self._nodecolor = nodecolor
self._with_labels = with_labels
self.visible = set()
self.current_dim = dim
if not self._pos:
self._pos = self._calc_pos(graph)
self._xy = asarray([self._pos[node] for node in self.nodes])
self.xaxis_data = self._xy[:,0]
self.yaxis_data = self._xy[:,1]
# Initial draw
self.default_props = {'nodesize' : 50,
'nodecolor' : 'blue',
'edge_color' : 'gray',
'edge_color_selected' : 'red'}
self.node_collection = None
self.edge_collection = None
self.node_labels = None
lw = zeros(self.xaxis_data.shape)
self.node_collection = self.axes.scatter(self.xaxis_data, self.yaxis_data,
s=self._nodesize,
c=self._nodecolor,
linewidth=lw,
zorder=3)
self._mappable = self.node_collection
self._mappable.set_cmap(self._cmap)
# selected nodes is a transparent graph that adjust node-edge visibility
# according to the current selection needed to get get the selected
# nodes 'on top' as zorder may not be defined individually
self.selected_nodes = self.axes.scatter(self.xaxis_data,
self.yaxis_data,
s=self._nodesize,
c=self._nodecolor,
edgecolor='r',
linewidth=lw,
zorder=4,
alpha=0)
edge_color = self.default_props['edge_color']
self.edge_collection = networkx.draw_networkx_edges(self.graph,
self._pos,
ax=self.axes,
edge_color=edge_color)
# edge color rgba-arrays
self._edge_color_rgba = matlib.repmat(plots.ColorConverter().to_rgba(edge_color),
self.graph.number_of_edges(),1)
self._edge_color_selected = plots.ColorConverter().to_rgba(self.default_props['edge_color_selected'])
if self._with_labels:
self.node_labels = networkx.draw_networkx_labels(self.graph,
self._pos,
ax=self.axes)
# remove axes, frame and grid
self.axes.set_xticks([])
self.axes.set_yticks([])
self.axes.grid(False)
self.axes.set_frame_on(False)
self.fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
def _calc_pos(self, graph):
"""Calculates position for graph nodes using 'dot' layout."""
gv_graph = networkx.DiGraph()
for start, end in graph.edges():
gv_graph.add_edge(start.replace('GO:', ''), end.replace('GO:', ''))
pos_gv = networkx.pygraphviz_layout(gv_graph, prog="dot")
pos = {}
for k, v in pos_gv.items():
if k != "all":
pos["GO:%s" % k] = v
else:
pos[k] = v
return pos
def points_in_rect(self, x1, y1, x2, y2, key):
ydata = self.yaxis_data
xdata = self.xaxis_data
# find indices of selected area
if x1>x2:
x1, x2 = x2, x1
if y1>y2:
y1, y2 = y2, y1
assert x1<=x2
assert y1<=y2
index = nonzero((xdata>x1) & (xdata<x2) & (ydata>y1) & (ydata<y2))[0]
if getattr(main.workflow, 'current_ic', None) != None:
ids = self.visible.intersection([self.nodes[i] for i in index])
else:
ids = set([self.nodes[i] for i in index])
return ids
def rectangle_select_callback(self, x1, y1, x2, y2, key):
ids = self.points_in_rect(x1, y1, x2, y2, key)
ids = self.update_selection(ids, key)
self.selection_listener(self.current_dim, ids)
def lasso_select_callback(self, verts, key=None):
xys = c_[self.xaxis_data[:,newaxis], self.yaxis_data[:,newaxis]]
index = nonzero(points_inside_poly(xys, verts))[0]
ids = [self.nodes[i] for i in index]
ids = self.update_selection(ids, key)
self.selection_listener(self.current_dim, ids)
def set_current_selection(self, selection):
linewidth = zeros(self.xaxis_data.shape)
edge_color_rgba = self._edge_color_rgba.copy()
index = [i for i in range(len(self.nodes)) if self.nodes[i] in selection[self.current_dim]]
if len(index) > 0:
linewidth[index] = 2
idents = selection[self.current_dim]
edge_index = [i for i,edge in enumerate(self.graph.edges()) if (edge[0] in idents and edge[1] in idents)]
if len(edge_index)>0:
for i in edge_index:
edge_color_rgba[i,:] = self._edge_color_selected
self._A = None
self.edge_collection._colors = edge_color_rgba
self.selected_nodes.set_linewidth(linewidth)
self.canvas.draw()
def is_mappable_with(self, obj):
"""Returns True if dataset/selection is mappable with this plot.
"""
if isinstance(obj, fluents.dataset.Dataset):
if self.current_dim in obj.get_dim_name():
return True
return False
def _update_color_from_dataset(self, ds):
"""Updates the facecolors from a dataset.
"""
array = ds.asarray()
#only support for 2d-arrays:
try:
m, n = array.shape
except:
raise ValueError, "No support for more than 2 dimensions."
# is dataset a vector or matrix?
if not n==1:
# we have a category dataset
if isinstance(ds, fluents.dataset.CategoryDataset):
vec = dot(array, diag(arange(n))).sum(1)
else:
vec = array.sum(1)
else:
vec = array.ravel()
indices = ds.get_indices(self.current_dim, self.nodes)
nodes = ds.existing_identifiers(self.current_dim, self.nodes)
v = vec.take(indices, 0)
vec_min = min(vec[vec > -inf])
vec_max = max(vec[vec < inf])
v[v==inf] = vec_max
v[v==-inf] = vec_min
d = dict(zip(nodes, list(v)))
map_vec = zeros(len(self.nodes))
for i, n in enumerate(self.nodes):
map_vec[i] = d.get(n, vec_min)
# update facecolors
self.node_collection.set_array(map_vec)
self.node_collection.set_clim(vec_min, vec_max)
self.node_collection.update_scalarmappable() #sets facecolors from array
self.canvas.draw()
class ThresholdDagPlot(DagPlot, plots.PlotThresholder):
def __init__(self, graph, dim='go-terms', pos=None, nodecolor='b', nodesize=40,
with_labels=False, name='DAG Plot'):
DagPlot.__init__(self, graph, dim='go-terms', pos=None,
nodecolor='b', nodesize=40,
with_labels=False, name='DAG Plot')
plots.PlotThresholder.__init__(self, "IC")
def rectangle_select_callback(self, x1, y1, x2, y2, key):
ids = self.points_in_rect(x1, y1, x2, y2, key)
ids = self.visible.intersection(ids)
ids = self.update_selection(ids, key)
self.selection_listener(self.current_dim, ids)
def _update_color_from_dataset(self, ds):
DagPlot._update_color_from_dataset(self, ds)
self.set_threshold_dataset(ds)
a = ds.asarray()
a_max = max(a[a<inf])
a_min = min(a[a>-inf])
self._sb_min.set_range(a_min-0.1, a_max+0.1)
self._sb_min.set_value(a_min-0.1)
self._sb_max.set_range(a_min-0.1, a_max+0.1)
self._sb_max.set_value(a_max+0.1)

View File

@ -1,445 +0,0 @@
import sys,os
import webbrowser
from fluents import logger, plots,workflow,dataset,main
from fluents.lib import blmfuncs,nx_utils,validation,engines,cx_stats,cx_utils
import gobrowser, geneontology
import scipy
import networkx as nx
class SmallTestWorkflow(workflow.Workflow):
name = 'Smokers'
ident = 'smokers'
description = 'A small test workflow for gene expression analysis.'
def __init__(self):
workflow.Workflow.__init__(self)
# DATA IMPORT
load = workflow.Stage('load', 'Data')
load.add_function(DatasetLoadFunctionSmokerSmall())
load.add_function(DatasetLoadFunctionSmokerMedium())
load.add_function(DatasetLoadFunctionSmokerFull())
load.add_function(DatasetLoadFunctionSmokerGO())
#load.add_function(DatasetLoadFunctionCYCLE())
self.add_stage(load)
# NETWORK PREPROCESSING
#net = workflow.Stage('net', 'Network integration')
#net.add_function(DiffKernelFunction())
#net.add_function(ModKernelFunction())
#net.add_function(RandDiffKernelFunction())
#self.add_stage(net)
# BLM's
model = workflow.Stage('models', 'Models')
model.add_function(blmfuncs.PCA())
model.add_function(blmfuncs.PLS())
model.add_function(blmfuncs.LPLS())
model.add_function(SAM())
#model.add_function(bioconFuncs.SAM(app))
self.add_stage(model)
query = workflow.Stage('query', 'Gene Query')
query.add_function(NCBIQuery())
query.add_function(KEGGQuery())
self.add_stage(query)
# Gene Ontology
go = workflow.Stage('go', 'Gene Ontology')
go.add_function(gobrowser.LoadGOFunction())
go.add_function(gobrowser.SetICFunction())
# go.add_function(gobrowser.GOWeightFunction())
# go.add_function(gobrowser.DistanceToSelectionFunction())
# go.add_function(gobrowser.TTestFunction())
go.add_function(gobrowser.PlotDagFunction())
go.add_function(GoEnrichment())
go.add_function(GoEnrichmentCond())
self.add_stage(go)
# EXTRA PLOTS
#plt = workflow.Stage('net', 'Network')
#plt.add_function(nx_analyser.KeggNetworkAnalyser())
#self.add_stage(plt)
logger.log('debug', 'Small test workflow is now active')
class DatasetLoadFunctionSmokerSmall(workflow.Function):
"""Loader for all ftsv files of smokers small datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_small', 'Smoker (Small)')
def run(self):
path = 'data/smokers-small/'
files = os.listdir(path)
out = []
for fname in files:
if fname.endswith('.ftsv'):
input_file = open(os.path.join(path, fname))
out.append(dataset.read_ftsv(input_file))
return out
class DatasetLoadFunctionSmokerMedium(workflow.Function):
"""Loader for all ftsv files of smokers small datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_medium', 'Smoker (Medium)')
def run(self):
path = 'data/smokers-medium/'
files = os.listdir(path)
out = []
for fname in files:
if fname.endswith('.ftsv'):
input_file = open(os.path.join(path, fname))
out.append(dataset.read_ftsv(input_file))
return out
class DatasetLoadFunctionSmokerFull(workflow.Function):
"""Loader for all ftsv files of smokers small datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_full', 'Smoker (Full)')
def run(self):
path = 'data/smokers-full/'
files = os.listdir(path)
out = []
for fname in files:
if fname.endswith('.ftsv'):
input_file = open(os.path.join(path, fname))
out.append(dataset.read_ftsv(input_file))
return out
class DatasetLoadFunctionSmokerGO(workflow.Function):
"""Loader for all ftsv files of smokers small datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_go', 'Smoker (GO)')
def run(self):
path = 'data/smokers-go/'
files = os.listdir(path)
out = []
for fname in files:
if fname.endswith('.ftsv'):
input_file = open(os.path.join(path, fname))
out.append(dataset.read_ftsv(input_file))
return out
class DatasetLoadFunctionCYCLE(workflow.Function):
"""Loader for pickled CYCLE datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_data', 'Cycle')
def run(self):
filename='fluents/data/CYCLE'
if filename:
return dataset.from_file(filename)
##### WORKFLOW SPECIFIC FUNCTIONS ######
class SAM(workflow.Function):
def __init__(self, id='sam', name='SAM'):
workflow.Function.__init__(self, id, name)
def run(self, x, y):
n_iter = 50 #B
alpha = 0.01 #cut off on qvals
###############
# Main function call
# setup prelimenaries
import rpy
rpy.r.library("siggenes")
rpy.r.library("multtest")
cl = scipy.dot(y.asarray(), scipy.diag(scipy.arange(y.shape[1]))).sum(1)
data = x.asarray().T
sam = rpy.r.sam(data, cl=cl, B=n_iter, var_equal=False,med=False,s0=scipy.nan,rand=scipy.nan)
qvals = scipy.asarray(rpy.r.slot(sam, "p.value"))
pvals = scipy.asarray(rpy.r.slot(sam, "q.value"))
sam_index = (qvals<alpha).nonzero()[0]
# Update selection object
dim_name = x.get_dim_name(1)
sam_selection = x.get_identifiers(dim_name, indices=sam_index)
main.project.set_selection(dim_name, sam_selection)
sel = dataset.Selection('SAM selection')
sel.select(dim_name, sam_selection)
logger.log('notice','Number of significant varibles (SAM): %s' %len(sam_selection))
# ## OUTPUT ###
xcolname = x.get_dim_name(1) # genes
x_col_ids = [xcolname, x.get_identifiers(xcolname, sorted=True)]
sing_id = ['_john', ['0']] #singleton
D_qvals = dataset.Dataset(qvals, (x_col_ids, sing_id), name='q_vals')
D_pvals = dataset.Dataset(pvals, (x_col_ids, sing_id), name='p_vals')
# plots
s_indx = qvals.flatten().argsort()
s_ids = [x_col_ids[0],[x_col_ids[1][i] for i in s_indx]]
xindex = scipy.arange(len(qvals))
qvals_s = qvals.take(s_indx)
D_qs = dataset.Dataset(qvals_s, (s_ids, sing_id), name="sorted qvals")
Dind = dataset.Dataset(xindex, (s_ids, sing_id), name="dum")
st = plots.ScatterPlot(D_qs, Dind, 'gene_ids', '_john', '0', '0', s=10, name='SAM qvals')
return [D_qvals, D_pvals, D_qs, st, sel]
class DiffKernelFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'diffkernel', 'Diffusion')
def run(self, x, a):
"""x is gene expression data, a is the network.
"""
#sanity check:
g = a.asnetworkx()
genes = x.get_identifiers(x.get_dim_name(1), sorted=True)
W = nx.adj_matrix(g, nodelist=genes)
X = x.asarray()
Xc, mn_x = cx_utils.mat_center(X, ret_mn=True)
out = []
alpha=1.0
beta = 1.0
K = nx_utils.K_diffusion(W, alpha=alpha, beta=beta,normalised=True)
Xp = scipy.dot(Xc, K) + mn_x
# dataset
row_ids = (x.get_dim_name(0),
x.get_identifiers(x.get_dim_name(0),
sorted=True))
col_ids = (x.get_dim_name(1),
x.get_identifiers(x.get_dim_name(1),
sorted=True))
xout = dataset.Dataset(Xp,
(row_ids, col_ids),
name=x.get_name()+'_diff'+str(alpha))
out.append(xout)
return out
class RandDiffKernelFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'diffkernel', 'Rand. Diff.')
def run(self, x, a):
"""x is gene expression data, a is the network.
"""
#sanity check:
g = a.asnetworkx()
genes = x.get_identifiers(x.get_dim_name(1))
# randomise nodelist
genes = [genes[i] for i in cx_utils.randperm(x.shape[1])]
W = nx.adj_matrix(g, nodelist=genes)
X = x.asarray()
Xc, mn_x = cx_utils.mat_center(X, ret_mn=True)
out = []
alpha=1.
beta = 1.0
K = nx_utils.K_diffusion(W, alpha=alpha, beta=beta,normalised=True)
Xp = scipy.dot(Xc, K) + mn_x
# dataset
row_ids = (x.get_dim_name(0),
x.get_identifiers(x.get_dim_name(0),
sorted=True))
col_ids = (x.get_dim_name(1),
x.get_identifiers(x.get_dim_name(1),
sorted=True))
xout = dataset.Dataset(Xp,
(row_ids, col_ids),
name=x.get_name()+'_diff'+str(alpha))
out.append(xout)
return out
class ModKernelFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'mokernel', 'Modularity')
def run(self,x,a):
X = x.asarray()
g = a.asnetworkx()
genes = x.get_identifiers(x.get_dim_name(1), sorted=True)
W = nx.adj_matrix(g, nodelist=genes)
out=[]
alpha=.2
Xc,mn_x = cx_utils.mat_center(X, ret_mn=True)
K = nx_utils.K_modularity(W, alpha=alpha)
Xp = scipy.dot(Xc, K)
Xp = Xp + mn_x
# dataset
row_ids = (x.get_dim_name(0),
x.get_identifiers(x.get_dim_name(0),
sorted=True))
col_ids = (x.get_dim_name(1),
x.get_identifiers(x.get_dim_name(1),
sorted=True))
xout = dataset.Dataset(Xp,
(row_ids,col_ids),
name=x.get_name()+'_mod'+str(alpha))
out.append(xout)
return out
class NCBIQuery(workflow.Function):
def __init__(self, gene_id_name='gene_ids'):
self._gene_id_name = gene_id_name
workflow.Function.__init__(self, 'query', 'NCBI')
def run(self):
selection = main.project.get_selection()
if not selection.has_key(self._gene_id_name):
logger.log("notice", "Expected gene ids: %s, but got: %s" %(self._gene_id_name, selection.keys()))
return None
if len(selection[self._gene_id_name])==0:
logger.log("notice", "No selected genes to query")
return None
base = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?'
options = {r'&db=' : 'gene',
r'&cmd=' : 'retrieve',
r'&dopt=' : 'full_report'}
gene_str = ''.join([gene + "+" for gene in selection[self._gene_id_name]])
options[r'&list_uids='] = gene_str[:-1]
opt_str = ''.join([key+value for key,value in options.items()])
web_str = base + opt_str
webbrowser.open(web_str)
class KEGGQuery(workflow.Function):
def __init__(self, org='hsa', gene_id_name='gene_ids'):
self._org=org
self._gene_id_name = gene_id_name
workflow.Function.__init__(self, 'query', 'KEGG')
def run(self, selection):
if not selection.has_key(self._gene_id_name):
logger.log("notice", "Expected gene ids: %s, but got. %s" %(self._gene_id_name, selection.keys()))
return None
if len(selection[self._gene_id_name])==0:
logger.log("notice", "No selected genes to query")
return None
base = r'http://www.genome.jp/dbget-bin/www_bget?'
gene_str = ''.join([gene + "+" for gene in selection[self._gene_id_name]])
gene_str = gene_str[:-1]
gene_str = self._org + "+" + gene_str
web_str = base + gene_str
webbrowser.open(web_str)
class GoEnrichment(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'goenrich', 'Go Enrichment')
def run(self, data):
import rpy
rpy.r.library("GOstats")
# Get universe
# Here, we are using a defined dataset to represent the universe
if not 'gene_ids' in data:
logger.log('notice', 'No dimension called [gene_ids] in dataset: %s', data.get_name())
return
universe = list(data.get_identifiers('gene_ids'))
logger.log('notice', 'Universe consists of %s gene ids from %s' %(len(universe), data.get_name()))
# Get current selection and validate
curr_sel = main.project.get_selection()
selected_genes = list(curr_sel['gene_ids'])
if len(selected_genes)==0:
logger.log('notice', 'This function needs a current selection!')
return
# Hypergeometric parameter object
pval_cutoff = 0.9999
cond = False
test_direction = 'over'
params = rpy.r.new("GOHyperGParams",
geneIds=selected_genes,
annotation="hgu133a",
ontology="BP",
pvalueCutoff=pval_cutoff,
conditional=cond,
testDirection=test_direction
)
# run test
# result.keys(): ['Count', 'Term', 'OddsRatio', 'Pvalue', 'ExpCount', 'GOBPID', 'Size']
result = rpy.r.summary(rpy.r.hyperGTest(params))
# dataset
terms = result['GOBPID']
pvals = scipy.log(scipy.asarray(result['Pvalue']))
row_ids = ('go-terms', terms)
col_ids = ('_john', ['_doe'])
xout = dataset.Dataset(pvals,
(row_ids, col_ids),
name='P values (enrichment)')
return [xout]
class GoEnrichmentCond(workflow.Function):
""" Enrichment conditioned on dag structure."""
def __init__(self):
workflow.Function.__init__(self, 'goenrich', 'Go Cond. Enrich.')
def run(self, data):
import rpy
rpy.r.library("GOstats")
# Get universe
# Here, we are using a defined dataset to represent the universe
if not 'gene_ids' in data:
logger.log('notice', 'No dimension called [gene_ids] in dataset: %s', data.get_name())
return
universe = list(data.get_identifiers('gene_ids'))
logger.log('notice', 'Universe consists of %s gene ids from %s' %(len(universe), data.get_name()))
# Get current selection and validate
curr_sel = main.project.get_selection()
selected_genes = list(curr_sel['gene_ids'])
if len(selected_genes)==0:
logger.log('notice', 'This function needs a current selection!')
return
# Hypergeometric parameter object
pval_cutoff = 0.9999
cond = True
test_direction = 'over'
params = rpy.r.new("GOHyperGParams",
geneIds=selected_genes,
annotation="hgu133a",
ontology="BP",
pvalueCutoff=pval_cutoff,
conditional=cond,
testDirection=test_direction
)
# run test
# result.keys(): ['Count', 'Term', 'OddsRatio', 'Pvalue', 'ExpCount', 'GOBPID', 'Size']
result = rpy.r.summary(rpy.r.hyperGTest(params))
# dataset
terms = result['GOBPID']
pvals = scipy.log(scipy.asarray(result['Pvalue']))
row_ids = ('go-terms', terms)
col_ids = ('_john', ['_doe'])
xout = dataset.Dataset(pvals,
(row_ids, col_ids),
name='P values (enrichment)')
return [xout]

View File

@ -1,122 +1,37 @@
import gtk
from fluents import dataset, logger, plots, workflow
#import geneontology
#import gostat
from scipy import array, randn, log, ones
from scipy import array, randn, log, ones, newaxis
import cPickle
import networkx
class TestWorkflow (workflow.Workflow):
name = 'Test Workflow'
ident = 'test'
description = 'Test Gene Ontology Workflow. This workflow currently serves as a general testing workflow.'
description = 'This workflow currently serves as a general testing workflow.'
def __init__(self, app):
workflow.Workflow.__init__(self, app)
def __init__(self):
workflow.Workflow.__init__(self)
load = workflow.Stage('load', 'Load Data')
load.add_function(CelFileImportFunction())
load.add_function(DataLoadTestFunction(self))
load.add_function(TestDataFunction())
load.add_function(DatasetLoadFunction())
load.add_function(SelectFunction())
self.add_stage(load)
test_stage = workflow.Stage('test', 'Test Data')
test_stage.tasks.append(TestDataTask)
test_stage.tasks.append(TestPlot)
self.stages.append(test_stage)
preproc = workflow.Stage('preprocess', 'Preprocessing')
preproc.add_function(DatasetLog())
preproc.add_function(workflow.Function('rma', 'RMA'))
self.add_stage(preproc)
go = workflow.Stage('go', 'Gene Ontology Data')
go.add_function(GODistanceFunction())
go.add_function(ImagePlotFunction())
self.add_stage(go)
regression = workflow.Stage('regression', 'Regression')
regression.add_function(workflow.Function('pls', 'PLS'))
self.add_stage(regression)
explore = workflow.Stage('explore', 'Explorative analysis')
explore.add_function(PCAFunction(self))
self.add_stage(explore)
save = workflow.Stage('save', 'Save Data')
save.add_function(DatasetSaveFunction())
self.add_stage(save)
class LoadAnnotationsFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def load_file(self, filename):
f = open(filename)
self.annotations = Annotations('genes', 'go-terms')
logger.log('notice', 'Loading annotation file: %s' % filename)
for line in f.readlines():
val = line.split(' \t')
if len(val) > 1:
val = [v.strip() for v in val]
retval.add_annotations('genes', val[0],
'go-terms', set(val[1:]))
def on_response(self, dialog, response):
if response == gtk.RESPONSE_OK:
logger.log('notice', 'Reading file: %s' % dialog.get_filename())
self.load_file(dialog.get_filename())
def run(self):
btns = ('Open', gtk.RESPONSE_OK, \
'Cancel', gtk.RESPONSE_CANCEL)
dialog = gtk.FileChooserDialog('Open GO Annotation File',
buttons=btns)
dialog.connect('response', self.on_response)
dialog.run()
dialog.destroy()
return [self.annotations]
class GODistanceFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'go_diatance', 'GO Distances')
self.output = None
def run(self, data):
logger.log('debug', 'datatype: %s' % type(data))
if not type(data) == Annotations:
return None
logger.log('debug', 'dimensions: %s' % data.dimensions)
genes = data.get_ids('genes')
gene_distances = array((len(genes), len(genes)))
return gene_distances
class ImagePlotFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'image', 'Show Image')
def run(self, data):
return [plots.ImagePlot(data, name='foo')]
class TestDataFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'test_data', 'Generate Test Data')
class TestDataTask(workflow.Task):
name = "Test data"
def __init__(self, input):
workflow.Task.__init__(self, input)
def run(self):
logger.log('notice', 'Injecting foo test data')
x = randn(500,15)
X = dataset.Dataset(x)
p = plots.ScatterPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='scatter')
p2 = plots.ScatterMarkerPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='marker')
dname = X.get_dim_name()[0]
p = plots.ScatterPlot(X, X, dname, dname, '0_1', '0_2',name='scatter')
graph = networkx.XGraph()
for x in 'ABCDEF':
for y in 'ADE':
@ -125,181 +40,25 @@ class TestDataFunction(workflow.Function):
ds_plot = plots.NetworkPlot(ds)
cds = dataset.CategoryDataset(ones([3, 3]))
ds_scatter = plots.ScatterMarkerPlot(ds, ds, 'rows_0', 'rows_0', '0_1', '0_2')
lp = plots.LineViewPlot(X,major_axis=0)
dname2 = cds.get_dim_name()[0]
ds_scatter = plots.ScatterMarkerPlot(cds, cds, dname2, dname2, '0_1', '0_2')
lp = plots.LineViewPlot(X ,major_axis=0)
vp = plots.VennPlot()
return [X, ds, p, ds_plot, ds_scatter, p2, cds, lp, vp]
self.datasets = [p]
return [X, ds, p, ds_plot, ds_scatter, cds, lp, vp]
class SelectFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'select', 'Select')
def run(self, data):
s = dataset.Selection('Arbitrary selection')
s.select('rows', ['0_1', '0_2'])
return [s]
class TestPlot(workflow.Task):
name = "Test plot data"
class DatasetLog(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'log', 'Log')
def run(self, data):
logger.log('notice', 'Taking the log of dataset %s' % data.get_name())
d = data.asarray()
d = log(d)
new_data_name = 'log(%s)' % data.get_name()
ds = dataset.Dataset(d, name=new_data_name)
return [ds]
class DatasetLoadFunction(workflow.Function):
"""Loader for previously pickled Datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_data', 'Load Pickled Dataset')
def __init__(self, input):
workflow.Task.__init__(self, input)
def run(self):
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
action=gtk.FILE_CHOOSER_ACTION_OPEN,
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
pkl_filter = gtk.FileFilter()
pkl_filter.set_name("Python pickled data files (*.pkl)")
pkl_filter.add_pattern("*.[pP][kK][lL]")
all_filter = gtk.FileFilter()
all_filter.set_name("All Files (*.*)")
all_filter.add_pattern("*")
chooser.add_filter(pkl_filter)
chooser.add_filter(all_filter)
try:
if chooser.run() == gtk.RESPONSE_OK:
return [cPickle.load(open(chooser.get_filename()))]
finally:
chooser.destroy()
class DatasetSaveFunction(workflow.Function):
"""QND way to save data to file for later import to this program."""
def __init__(self):
workflow.Function.__init__(self, 'save_data', 'Save Pickled Dataset')
def run(self):
if not data:
logger.log("notice", "No data to save.")
return
else:
data = data[0]
chooser = gtk.FileChooserDialog(title="Save pickled data...", parent=None,
action=gtk.FILE_CHOOSER_ACTION_SAVE,
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_SAVE, gtk.RESPONSE_OK))
pkl_filter = gtk.FileFilter()
pkl_filter.set_name("Python pickled data files (*.pkl)")
pkl_filter.add_pattern("*.[pP][kK][lL]")
all_filter = gtk.FileFilter()
all_filter.set_name("All Files (*.*)")
all_filter.add_pattern("*")
chooser.add_filter(pkl_filter)
chooser.add_filter(all_filter)
chooser.set_current_name(data.get_name() + ".pkl")
try:
if chooser.run() == gtk.RESPONSE_OK:
cPickle.dump(data, open(chooser.get_filename(), "w"), protocol=2)
logger.log("notice", "Saved data to %r." % chooser.get_filename())
finally:
chooser.destroy()
class CelFileImportFunction(workflow.Function):
"""Loads AffyMetrix .CEL-files into matrix."""
def __init__(self):
workflow.Function.__init__(self, 'cel_import', 'Import Affy')
def run(self, data):
import rpy
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
action=gtk.FILE_CHOOSER_ACTION_OPEN,
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
chooser.set_select_multiple(True)
cel_filter = gtk.FileFilter()
cel_filter.set_name("Cel Files (*.cel)")
cel_filter.add_pattern("*.[cC][eE][lL]")
all_filter = gtk.FileFilter()
all_filter.set_name("All Files (*.*)")
all_filter.add_pattern("*")
chooser.add_filter(cel_filter)
chooser.add_filter(all_filter)
try:
if chooser.run() == gtk.RESPONSE_OK:
rpy.r.library("affy")
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
silent_eval('E <- rma(E)')
m = rpy.r('m <- E@exprs')
vector_eval = rpy.with_mode(rpy.VECTOR_CONVERSION, rpy.r)
rownames = vector_eval('rownames(m)')
colnames = vector_eval('colnames(m)')
# We should be nice and clean up after ourselves
rpy.r.rm(["E", "m"])
if m:
data = dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), name="AffyMatrix Data")
plot = plots.LinePlot(data, "Gene profiles")
return [data, plot]
else:
logger.log("notice", "No data loaded from importer.")
finally:
chooser.destroy()
class DataLoadTestFunction(workflow.Function):
def __init__(self, wf):
workflow.Function.__init__(self, 'datadirload', 'Load from datadir')
self._wf = wf
def run(self):
print self._wf.get_data_file_name('smoker-x.ftsv')
fn = self._wf.get_data_file_name('smoker-x.ftsv')
if fn:
fd = open(fn)
ds = dataset.read_ftsv(fd)
return [ds]
else:
print "Cannot find file %s" % fn
return []
class PCAFunction(workflow.Function):
"""Generic PCA function."""
def __init__(self, wf):
workflow.Function.__init__(self, 'pca', 'PCA')
self._workflow = wf
def run(self, data):
import rpy
#logger.log('notice', 'Injecting % dataset') %self.input[0].get_name()
X = self.input[0]
ii = X.get_dim_name()[0]
ids = X.get_identifiers(ii, indices=[0,1])
p = plots.ScatterPlot(X, X, ii, ii, ids[0], ids[1],name='scatter')
dim_2, dim_1 = data.get_dim_names()
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.asarray())
silent_eval("t = prcomp(t(m))")
T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
("component", T_ids)], name="T")
P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
("component", T_ids)], name="P")
# cleanup
rpy.r.rm(["t", "m"])
loading_plot = plots.ScatterMarkerPlot(P, P, 'ids','component','1','2', "Loadings")
score_plot = plots.ScatterMarkerPlot(T, T,'filename','component','1','2', "Scores")
return [T, P, loading_plot, score_plot]
return [p]