Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0

Compare commits

...
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.

6 Commits
main ... tasks

Author SHA1 Message Date
Arnar Flatberg 7ef01d37f0 2007-09-03 17:51:22 +00:00
Arnar Flatberg 5a1e225a08 working test ... for real 2007-09-03 16:16:39 +00:00
Arnar Flatberg 9a296f59f6 bugfix on scipy import 2007-09-03 16:16:05 +00:00
Arnar Flatberg f8e0cfdb8b removed unused workflows 2007-09-03 15:50:16 +00:00
Arnar Flatberg 2fdd9aaf77 initial working test wf 2007-09-03 15:46:45 +00:00
Einar Ryeng bfb039328c Some stuff on tasks. 2007-09-03 13:22:11 +00:00
8 changed files with 85 additions and 2319 deletions

View File

@ -22,6 +22,7 @@ DATADIR = os.path.dirname(sys.modules['fluents'].__file__)
ICONDIR = os.path.join(DATADIR,"..","icons")
GLADEFILENAME = os.path.join(DATADIR, 'fluents.glade')
class IconFactory:
"""Factory for icons that ensures that each icon is only loaded once."""
@ -43,6 +44,7 @@ class IconFactory:
icon_factory = IconFactory(ICONDIR)
class TableSizeSelection(gtk.Window):
def __init__(self):

View File

@ -187,7 +187,7 @@ class LineViewPlot(Plot):
if minor_axis==0:
self._mn_data = self._data.mean(minor_axis)
else:
self._mn_data = self._data.mean(minor_axis)[:,newaxis]
self._mn_data = self._data.mean(minor_axis)[:,scipy.newaxis]
self._data = self._data - self._mn_data
self.data_is_centered = True
#initial line collection

View File

@ -60,9 +60,7 @@ class Workflow:
description = "Workflow Description"
def __init__(self):
print "Setting stages"
self.stages = []
self.stages_by_id = {}
def get_data_file_name(self, filename):
"""Checks if a file with the given name exists in the data directory.
@ -79,67 +77,68 @@ class Workflow:
def add_stage(self, stage):
self.stages.append(stage)
self.stages_by_id[stage.id] = stage
#self.stages_by_id[stage.id] = stage
def print_tree(self):
print self.name
for stage in self.stages:
print ' %s' % stage.name
for fun in stage.functions:
print ' %s' % fun.name
# def add_project(self,project):
# if project == None:
# logger.log('notice','Proejct is empty')
# logger.log('notice','Project added in : %s' %self.name)
# self.project = project
for task in stage.tasks:
print ' %s' % task.name
class EmptyWorkflow(Workflow):
name = 'Empty Workflow'
def __init__(self):
print "initing empty workflow"
logger.log("debug", "initing empty workflow")
Workflow.__init__(self)
class Stage:
"""A stage is a part of the data analysis process.
Each stage contains a set of functions that can be used to
Each stage contains a set of tasks that can be used to
accomplish the task. A typical early stage is 'preprocessing', which
can be done in several ways, each represented by a function.
can be done in several ways, each represented by a task.
"""
def __init__(self, id, name):
self.id = id
self.name = name
self.functions = []
self.functions_by_id = {}
self.tasks = []
def add_function(self, fun):
self.functions.append(fun)
self.functions_by_id[fun.id] = fun
def add_task(self, task):
self.tasks.append(task)
class Function:
"""A Function object encapsulates a function on a data set.
class Task:
"""A Task object encapsulates a task on a data set.
Each Function instance encapsulates some function that can be applied
Each Task instance encapsulates some task that can be applied
to one or more types of data.
"""
def __init__(self, id, name):
self.id = id
self.name = name
name = ""
def __init__(self, input):
self.input = input
self.options = Options()
self.datasets = {}
self.arrays = {}
self.plots = {}
# just return a Validation object
def validate_input(input):
return Validation(True,"Validation Not Implemented")
return Validation(True, "Validation Not Implemented")
def run(self):
pass
def show_options_gui(self, editable=False):
pass
class Validation:
def __init__(self,result, reason):
@ -148,7 +147,6 @@ class Validation:
class WorkflowView (gtk.VBox):
def __init__(self, wf):
gtk.VBox.__init__(self)
self.workflow = wf
@ -166,12 +164,11 @@ class WorkflowView (gtk.VBox):
btn_box.show()
exp.add(btn_align)
# Add functions in each stage
for fun in stage.functions:
btn = gtk.Button(fun.name)
# Add tasks in each stage
for task in stage.tasks:
btn = gtk.Button(task.name)
btn.connect('clicked',
lambda button, f=fun : self.run_function(f))
lambda button, t=task : self.run_task(t))
btn_box.add(btn)
btn.show()
@ -188,53 +185,24 @@ class WorkflowView (gtk.VBox):
self.remove_workflow()
self.setup_workflow(workflow)
def run_function(self, function):
logger.log('debug', 'Starting function: %s' % function.name)
def run_task(self, task_class):
logger.log('debug', 'Creating task: %s' % task_class.name)
parent_data = main.project.current_data
validation = function.validate_input()
task = task_class(input=parent_data)
validation = task.validate_input()
if not validation.succeeded:
logger.log('warning','Invalid Inputdata: ' + str(reason))
return
args, varargs, varkw, defaults = inspect.getargspec(function.run)
# first argument is 'self' and second should be the selection
# and we don't care about those...
args.remove('self')
if "selection" in args:
pass_selection = True
args.remove('selection')
task_result = task.run()
if task_result != None:
main.project.add_data(parent_data, task_result, task.name)
else:
pass_selection = False
if varargs and len(parent_data) < len(args):
logger.log('warning', "Function requires minimum %d datasets selected." % len(args))
return
elif not varargs and args and len(args) != len(parent_data):
# functions requiring datasets have to have the right number
logger.log('warning', "Function requires %d datasets, but only %d selected." % (len(args), len(parent_data)))
return
if not args:
# we allow functions requiring no data to be run even if a
# dataset is is selected
data = []
else:
data = parent_data
if pass_selection:
# if the function has a 'selection' argument, we pass in
# the selection
new_data = function.run(selection=main.project.get_selection(), *data)
else:
new_data = function.run(*data)
if new_data != None:
main.project.add_data(parent_data, new_data, function.name)
logger.log('debug', 'Task gave no output: %s' % task.name)
logger.log('debug', 'Function ended: %s' % function.name)
logger.log('debug', 'Task ended: %s' % task.name)
class Options(dict):
@ -259,8 +227,8 @@ class Options(dict):
class OptionsDialog(gtk.Dialog):
"""The basic input/output dialog box.
This defines the first page of the function options-gui.
Any function that invokes a option-gui will inherit from this class.
This defines the first page of the task options-gui.
Any task that invokes a option-gui will inherit from this class.
"""
def __init__(self, data, options, input_names=['X','Y']):
gtk.Dialog.__init__(self, 'Input-Output dialog',
@ -452,12 +420,18 @@ class WorkflowMenu (gtk.Menu):
stage_menu = gtk.Menu()
stage_menu_item.set_submenu(stage_menu)
for fun in stage.functions:
stage_menu.append(self._create_function_item(fun))
for task in stage.tasks:
stage_menu.append(self._create_task_item(task))
return stage_menu_item
def _create_function_item(self, function):
menuitem = gtk.MenuItem(function.name)
def _create_task_item(self, task):
menuitem = gtk.MenuItem(task.name)
menuitem.show()
return menuitem
class Options():
def __init__(self):
pass

View File

@ -1,261 +0,0 @@
import gobject
import gtk
import networkx
import re
class GOTerm:
def __init__(self):
self.d = {}
## Create empty lists for all list values
for l in GOTerm.lists:
self.d[l] = []
for s in GOTerm.scalars:
self.d[l] = None
def __getitem__(self, key):
if self.d.has_key(key):
return self.d[key]
return None
def __setitem__(self, key, value):
self.d[key] = value
GOTerm.lists = ['is_a', 'alt_id', 'exact_synonym', 'broad_synonym',
'narrow_synonym', 'related_synonym', 'relationship',
'subset', 'synonym', 'xref_analog', 'xref_unknown']
GOTerm.scalars = ['name', 'id', 'namespace', 'def', 'is_transitive',
'comment', 'is_obsolete']
class GeneOntology(networkx.XDiGraph):
def __init__(self):
networkx.XDiGraph.__init__(self)
self.by_id = {}
self.undirected = None
def add_term(self, term):
self.add_node(term)
self.by_id[term['id']] = term
def link_ontology(self, linkattr, obsolete=False):
for node in self.nodes():
for link in node[linkattr]:
self.add_edge(self.by_id[link], node, linkattr)
def link_relationships(self):
for node in self.nodes():
for link in node['relationship']:
link_type, term = link.split(' ')
self.add_edge(self.by_id[term.strip()], node, link_type.strip())
def get_bp(self):
"""Returns the root node of the biological_process tree"""
return self.by_id['GO:0008150']
def get_cc(self):
"""Returns the root node of the cellular_component tree"""
return self.by_id['id: GO:0005575']
def get_mf(self):
"""Returns the root node of the molecular_function tree"""
return self.by_id['GO:0003674']
def _subsumer(self, t1, t2, heap):
while heap != []:
t = heap[0]
heap = heap[1:]
p1 = networkx.shortest_path(self, t, t1)
p2 = networkx.shortest_path(self, t, t2)
if p1 and p2:
return t
heap += self.in_neighbors(t)
return None
def subsumer(self, t1, t2):
if t1 == t2:
return t1
if networkx.shortest_path(self, t1, t2):
return t1
elif networkx.shortest_path(self, t2, t1):
return t2
return self._subsumer(t1, t2, self.in_neighbors(t1))
def old_subsumer(self, t1, t2):
if t1 == t2:
return t1
if self.undirected == None:
self.undirected = self.to_undirected()
path = networkx.shortest_path(self.undirected, t1, t2)
if not path:
print "Woah, path not found."
return None
if path == [1]:
print "This shouldn't happen"
return t1
for t in path:
if networkx.shortest_path(self, t, t1) and \
networkx.shortest_path(self, t, t2):
return t
print "GeneOntology.subsumer: should not reach this point"
print "path is now: %s" % path
print "ids are: %s " % [x['id'] for x in path]
def _split_obo_line(line):
"""Splits a line from an obo file in its three constituent parts.
@param line: A string containing a line from an obo file. The line must
either be a section definition field with a section name in brackets
or a line of the form: keyword: value ! comment
@returns: A tuple of four strings conaining the section, key, value and
description defined in the string. If the section part is None, all
the other fields are strings and if section is a string, all the other
fields are None.
"""
attrib_re = re.compile(r'^\s*([\w-]+)\s*:\s*([^!]*)!?(.*$)')
s = line.strip()
if s == "":
return (None, None, None, None)
elif s.startswith('[') and s.endswith(']'):
return (s[1:-1], None, None, None)
else:
m = attrib_re.match(s)
if m:
key, value, comment = [x.strip() for x in m.groups()]
return (None, key, value, comment)
else:
raise Exception('Unparsable line: %s' % line)
def _add_term_attribute(term, key, value, comment):
if key in GOTerm.scalars:
term[key] = value
elif key in GOTerm.lists:
term[key].append(value)
else:
raise Exception('Unknown key %s: %s' % (key, value))
def read_gene_ontology(fd):
"""Reads the Gene Ontology from an obo file.
@param fd: An open file object to the obo file.
"""
go = GeneOntology()
term = None
section = None
line = fd.readline()
while line:
s, k, v, c = _split_obo_line(line)
if s == None and k == None:
pass
elif s:
if term:
go.add_term(term)
section = s
if s == 'Term':
term = GOTerm()
else:
term = None
print "ignoring: %s" %s
else:
if term:
_add_term_attribute(term, k, v, c)
line = fd.readline()
if term:
go.add_term(term)
return go
def pickle_gene_ontology(go, fn):
fd = open(fn, 'wb')
pickle.dump(go, fd)
fd.close()
def load_pickled_ontology(fn):
fd = open(fn, 'rb')
go = pickle.load(fd)
fd.close()
return go
def read_default_go():
f = open("/usr/share/gene-ontology/gene_ontology.obo")
go = read_gene_ontology(f)
go.link_ontology('is_a')
go.link_relationships()
f.close()
return go
def _add_subgraphs(treestore, ontology, parent, nodes):
for n in nodes:
i = treestore.insert(parent, 0, (n['id'], n['name'], False, n))
_add_subgraphs(treestore, ontology, i, ontology.successors(n))
def get_go_treestore(ontology):
ts = gtk.TreeStore(gobject.TYPE_STRING, ## ID
gobject.TYPE_STRING, ## Name
gobject.TYPE_BOOLEAN, ## Selected
gobject.TYPE_PYOBJECT) ## Node
_add_subgraphs(ts, ontology, None, [ontology.get_bp()])
return ts
class NetworkTreeModel(gtk.GenericTreeModel):
def __init__(self, network, root):
gtk.GenericTreeModel.__init__(self)
self._network = network
self._root = root
def on_get_flags(self):
return 0
def on_get_n_columns(self):
return 1
def on_get_column_type(self, index):
if index==0:
return gobject.TYPE_STRING
def on_get_iter(self, path):
node = self._root
for p in path[1:]:
children = self._network.predecessors(node)
node = children[p]
return node
def on_get_path(self, rowref):
pass
def on_get_value(self, rowref, column):
print 'get_value'
return rowref['id']
def on_iter_next(self, rowref):
pass
def on_iter_children(self, parent):
pass
def on_iter_has_child(self, rowref):
pass
def on_iter_n_children(self, rowref):
pass
def on_iter_nth_child(self, parent, n):
pass
def on_iter_parent(self, child):
pass

View File

@ -1,486 +0,0 @@
import gtk
from fluents import dataset, logger, plots, workflow, fluents, project
from fluents.lib import blmfuncs
import geneontology
#import gostat
from scipy import array, randn, log, ones, zeros
import networkx
import re
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
('IGI', 'Inferred from genetic interaction'),
('IPI', 'Inferred from physical interaction'),
('ISS', 'Inferred from sequence or structure similarity'),
('IDA', 'Inferred from direct assay'),
('IEP', 'Inferred on expression pattern'),
('IEA', 'Inferred from electronic annotation'),
('TAS', 'Traceable author statement'),
('NAS', 'Non-traceable author statement'),
('ND', 'No biological data available'),
('RCA', 'Inferred from reviewed computational analysis'),
('IC', 'Inferred by curator')]
DISTANCE_METRICS = [('resnik', 'Resnik'),
('jiang', 'Jiang & Conrath'),
('fussimeg', 'FuSSiMeG')]
GO_DATA_DIR = '/home/einarr/data'
evidence = None
go = None
class GoTermView (gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self)
tab = gtk.Table(2, 2, False)
self._table = tab
self._name = gtk.Label('')
self._name.set_line_wrap(True)
self._name.set_alignment(0, 0)
name_label = gtk.Label('Name:')
name_label.set_alignment(0, 0)
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
self._def = gtk.TextBuffer()
textview = gtk.TextView(self._def)
textview.set_wrap_mode(gtk.WRAP_WORD)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(textview)
def_label = gtk.Label('Def:')
def_label.set_alignment(0.0, 0.0)
tab.attach(def_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(scrolled_window, 1, 2, 1, 2, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
self.add(tab)
self.set_go_term(None)
def set_go_term(self, term):
if term:
self.set_label(term['id'])
self._name.set_text(term['name'])
self._def.set_text(term['def'])
else:
self.set_label('GO Term')
self._name.set_text('')
self._def.set_text('')
class GeneOntologyTree (gtk.HPaned):
def __init__(self, network):
gtk.HPaned.__init__(self)
treemodel = geneontology.get_go_treestore(network)
self._treemodel = treemodel
self._tree_view = gtk.TreeView(treemodel)
self._selected_terms = set()
self._tree_view.set_fixed_height_mode(True)
# Set up context menu
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
self._tree_view.connect('popup_menu', self._popup_menu)
self._tree_view.connect('button_press_event', self._on_button_press)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_fixed_width(200)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 0)
renderer = gtk.CellRendererToggle()
renderer.set_property('activatable', True)
renderer.connect('toggled', self._toggle_selected)
renderer.set_active(True)
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
go_column = gtk.TreeViewColumn('T', renderer, active=2)
go_column.set_fixed_width(20)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 1)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
go_column.set_fixed_width(200)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 2)
self._desc_view = GoTermView()
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(self._tree_view)
self.add1(scrolled_window)
self.add2(self._desc_view)
self.show_all()
def _on_cursor_changed(self, tree):
path, col = self._tree_view.get_cursor()
current = self._treemodel.get_iter(path)
term = self._treemodel.get_value(current, 3)
self._desc_view.set_go_term(term)
##
## GTK Callback functions
##
def _popup_menu(self, *rest):
self.menu.popup(None, None, None, 0, 0)
def _on_button_press(self, widget, event):
path = widget.get_path_at_pos(int(event.x), int(event.y))
iter = None
if path:
iter = self._treemodel.get_iter(path[0])
obj = self._treemodel.get_value(iter, 3)
else:
obj = None
self._context_menu.set_current_term(obj, iter)
if event.button == 3:
self._context_menu.popup(None, None, None, event.button, event.time)
def _toggle_selected(self, renderer, path):
iter = self._treemodel.get_iter(path)
selected = self._treemodel.get_value(iter, 2)
id = self._treemodel.get_value(iter, 0)
self._treemodel.set_value(iter, 2, not selected)
if selected:
self._selected_terms.remove(id)
else:
self._selected_terms.add(id)
class GoTermContextMenu (gtk.Menu):
"""Context menu for GO terms in the gene ontology browser"""
def __init__(self, treemodel, treeview):
self._treemodel = treemodel
self._treeview = treeview
self._current_term = None
self._current_iter = None
gtk.Menu.__init__(self)
# Popuplate tree
self._expand_item = i = gtk.MenuItem('Expand')
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
self.append(i)
i.show()
self._collapse_item = i = gtk.MenuItem('Collapse')
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
self.append(i)
i.show()
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
i.connect('activate', self._on_select_subtree, treemodel, treeview)
self.append(i)
i.show()
def set_current_term(self, term, it):
self._current_term = term
self._current_iter = it
def _on_expand_subtree(self, item, treemodel, treeview):
path = treemodel.get_path(self._current_iter)
treeview.expand_row(path, True)
def _on_collapse_subtree(self, item, treemodel, treeview):
treeview.collapse_row(treemodel.get_path(self._current_iter))
def _on_select_subtree(self, item, treemodel, treeview):
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
(self._current_term['id'], self._current_term['name']))
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
project.project.set_selection('go-terms', set(ids))
class GoWorkflow (workflow.Workflow):
name = 'Gene Ontology'
ident = 'go'
description = 'Gene Ontology Workflow. For tree distance measures based '\
+ 'on the GO tree.'
def __init__(self, app):
workflow.Workflow.__init__(self, app)
load = workflow.Stage('load', 'Load GO Annotations')
load.add_function(LoadGOFunction())
load.add_function(LoadAnnotationsFunction())
load.add_function(LoadTextDatasetFunction())
self.add_stage(load)
go = workflow.Stage('go', 'Gene Ontology')
go.add_function(SelectGoTermsFunction(self))
go.add_function(GoDistanceFunction())
go.add_function(SaveDistancesFunction())
self.add_stage(go)
blm = workflow.Stage('blm', 'Bilinear Analysis')
blm.add_function(blmfuncs.PCA())
self.add_stage(blm)
class LoadGOFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
def run(self):
global go
go = geneontology.read_default_go()
browser = GeneOntologyTree(go)
label = gtk.Label('_Gene Ontology')
label.set_use_underline(True)
fluents.app['bottom_notebook'].append_page(browser, label)
class LoadTextDatasetFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-text-ds', 'Load GO Evidence')
def run(self):
f = open('/home/einarr/data/goa-condensed.ftsv')
global evidence
evidence = dataset.read_ftsv(f)
return [evidence]
class LoadAnnotationsFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def run(self):
global evidence
f = open(GO_DATA_DIR + '/goa-condensed')
ev_codes = f.readline().split()
go_terms = []
lines = f.readlines()
m = zeros((len(lines), len(ev_codes)))
for i, l in enumerate(lines):
values = l.split()
go_terms.append(values[0])
for j, v in enumerate(values[1:]):
m[i,j] = float(v.strip())
d = dataset.Dataset(m,
[['go-terms', go_terms], ['evidence', ev_codes]],
name='GO evidence')
evidence = d
return [d]
class EvidenceCodeFrame(gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self, 'Evidence Codes')
self._ec_buttons = {}
vbox = gtk.VBox(len(EVIDENCE_CODES))
for code, desc in EVIDENCE_CODES:
btn = gtk.CheckButton('%s (%s)' % (code, desc))
self._ec_buttons[code] = btn
vbox.add(btn)
self.add(vbox)
def set_options(self, options):
for code, desc in EVIDENCE_CODES:
self._ec_buttons[code].set_active(options[code])
def update_options(self, options):
for code, desc in EVIDENCE_CODES:
options[code] = self._ec_buttons[code].get_active()
return options
class DistanceMetricFrame(gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self, 'Distance Metrics')
self._metric_buttons = {}
vbox = gtk.VBox()
prev = None
for code, text in DISTANCE_METRICS:
btn = gtk.RadioButton(prev, '%s' % text)
self._metric_buttons[code] = btn
vbox.add(btn)
prev = btn
self.add(vbox)
def set_options(self, options):
self._metric_buttons[options['metric']].set_active(True)
def update_options(self, options):
for code, text in DISTANCE_METRICS:
if self._metric_buttons[code].get_active():
options['metric'] = code
return options
return options
class GoDistanceDialog(gtk.Dialog):
def __init__(self):
gtk.Dialog.__init__(self, 'GO term distance matrix',
None,
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_OK,
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
self._ec_frame = EvidenceCodeFrame()
self._metric_frame = DistanceMetricFrame()
self.vbox.add(self._ec_frame)
self.vbox.add(self._metric_frame)
def run(self):
self.vbox.show_all()
return gtk.Dialog.run(self)
def set_options(self, options):
self._ec_frame.set_options(options)
self._metric_frame.set_options(options)
def update_options(self, options):
self._ec_frame.update_options(options)
self._metric_frame.update_options(options)
return options
def set_editable(self, editable):
self._ec_frame.set_sensitive(editable)
self._metric_frame.set_sensitive(editable)
class NumericDict(dict):
def __init__(self):
dict.__init__(self)
def __getitem__(self, key):
retval = 0
try:
retval = dict.__getitem__(self, key)
except:
retval = 0.0
return retval
class SelectGoTermsFunction(workflow.Function):
def __init__(self, wf):
workflow.Function.__init__(self, 'go-select', 'Select GO Terms')
self.wf = wf
def run(self, ds):
terms = [x['id'] for x in networkx.paths.bfs(go, go.get_bp())]
self.wf.project.set_selection('go-terms', set(terms[:100]))
# self.wf.project.set_selection('go-terms', set(['GO:0007582', 'GO:0008150', 'GO:0051704', 'GO:0044419']))
class GoDistanceFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix')
self.options = GoDistanceOptions()
def resnik_distance_matrix(self, selection, ic):
size = len(selection['go-terms'])
m = zeros((size, size))
# Create resnik distance matrix
ids = list(selection['go-terms'])
for i, t1 in enumerate(ids):
for j, t2 in enumerate(ids):
term1 = go.by_id[t1]
term2 = go.by_id[t2]
subsumer = go.subsumer(term1, term2)
if subsumer == None:
m[i, j] = 1000
else:
# print "%s - %s - %s" % (t1, subsumer['id'], t2)
m[i, j] = ic[t1] + ic[t2] - 2.0 * ic[subsumer['id']]
ds = dataset.Dataset(m, (('go-terms', ids), ('_go-terms', ids)), 'Resnik')
return ds
def run(self, x, selection):
global evidence, go
self.options = self.show_gui(self.options)
if not selection.has_key('go-terms') or len(selection['go-terms']) == 0:
logger.log('warning', 'No GO terms selected. Cannot make distance matrix.')
codes = [c for c, d in EVIDENCE_CODES if self.options[c]]
ev_indices = evidence.get_indices('evidence', codes)
ann_count_matrix = evidence._array[:, ev_indices].sum(1)
total_ann = ann_count_matrix.sum(0)
annotations = NumericDict()
ic = NumericDict()
# Insert annotations into dict
for i, v in enumerate(evidence.get_identifiers('go-terms')):
annotations[v] = ann_count_matrix[i]
# Accumulate annotations
for term in reversed(networkx.topological_sort(go)):
for parent in go.in_neighbors(term):
annotations[parent['id']] += annotations[term['id']]
# Create information content dictionary
for term, count in annotations.items():
ic[term] = -log(count / total_ann)
return [self.resnik_distance_matrix(selection, ic)]
def show_gui(self, options, edit=True):
dialog = GoDistanceDialog()
dialog.set_options(self.options)
dialog.show_all()
dialog.set_editable(edit)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
return dialog.update_options(self.options)
else:
return options
class SaveDistancesFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'save-matrix', 'Save Matrix')
def run(self, ds):
filename = '/home/einarr/data/output.ftsv'
fd = open(filename, 'w')
dataset.write_ftsv(fd, ds)
fd.close()
class Options(dict):
def __init__(self):
dict.__init__(self)
class GoDistanceOptions(Options):
def __init__(self):
Options.__init__(self)
for code, desc in EVIDENCE_CODES:
self[code] = True
self['metric'] = 'fussimeg'

View File

@ -1,777 +0,0 @@
import gtk
from fluents import dataset, logger, plots, workflow, fluents, project, view, main
import geneontology
from matplotlib.nxutils import points_inside_poly
import matplotlib
#from scipy import array, randn, log, ones, zeros
from scipy import *
from numpy import matlib
import networkx
import re
import rpy
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
('IGI', 'Inferred from genetic interaction'),
('IPI', 'Inferred from physical interaction'),
('ISS', 'Inferred from sequence or structure similarity'),
('IDA', 'Inferred from direct assay'),
('IEP', 'Inferred on expression pattern'),
('IEA', 'Inferred from electronic annotation'),
('TAS', 'Traceable author statement'),
('NAS', 'Non-traceable author statement'),
('ND', 'No biological data available'),
('RCA', 'Inferred from reviewed computational analysis'),
('IC', 'Inferred by curator')]
DISTANCE_METRICS = [('resnik', 'Resnik'),
('jiang', 'Jiang & Conrath'),
('fussimeg', 'FuSSiMeG')]
GO_DATA_DIR = '/home/einarr/data'
evidence = None
go = None
class GoTermView (gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self)
tab = gtk.Table(2, 3, False)
self._table = tab
self._name = gtk.Label('')
self._name.set_line_wrap(True)
self._name.set_alignment(0, 0)
name_label = gtk.Label('Name:')
name_label.set_alignment(0, 0)
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
self._isa_parents = gtk.HBox()
isa_parents_label = gtk.Label('Is a:')
tab.attach(isa_parents_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
self._def = gtk.TextBuffer()
textview = gtk.TextView(self._def)
textview.set_wrap_mode(gtk.WRAP_WORD)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(textview)
def_label = gtk.Label('Def:')
def_label.set_alignment(0.0, 0.0)
tab.attach(def_label, 0, 1, 2, 3, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(scrolled_window, 1, 2, 2, 3, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
self._tab = tab
self.add(tab)
self.set_go_term(None)
def set_go_term(self, term):
if term:
self.set_label(term['id'])
self._name.set_text(term['name'])
self._def.set_text(term['def'])
self._tab.remove(self._isa_parents)
self._isa_parents = gtk.HBox()
for p in term['is_a']:
btn = gtk.Button(p)
btn.show()
self._isa_parents.add(btn)
self._isa_parents.show()
self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
else:
self.set_label('GO Term')
self._name.set_text('')
self._def.set_text('')
self._tab.remove(self._isa_parents)
self._isa_parents = gtk.HBox()
self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
class GeneOntologyTree (gtk.HPaned):
def __init__(self, network):
gtk.HPaned.__init__(self)
self.set_position(400)
treemodel = geneontology.get_go_treestore(network)
self._treemodel = treemodel
self._tree_view = gtk.TreeView(treemodel)
self._selected_terms = set()
self._tree_view.set_fixed_height_mode(True)
# Set up context menu
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
self._tree_view.connect('popup_menu', self._popup_menu)
self._tree_view.connect('button_press_event', self._on_button_press)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_fixed_width(200)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 0)
renderer = gtk.CellRendererToggle()
renderer.set_property('activatable', True)
renderer.connect('toggled', self._toggle_selected)
renderer.set_active(True)
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
go_column = gtk.TreeViewColumn('T', renderer, active=2)
go_column.set_fixed_width(20)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 1)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
go_column.set_fixed_width(200)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 2)
self._desc_view = GoTermView()
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(self._tree_view)
self.add1(scrolled_window)
self.add2(self._desc_view)
self.show_all()
def _on_cursor_changed(self, tree):
path, col = self._tree_view.get_cursor()
current = self._treemodel.get_iter(path)
term = self._treemodel.get_value(current, 3)
self._desc_view.set_go_term(term)
##
## GTK Callback functions
##
def _popup_menu(self, *rest):
self.menu.popup(None, None, None, 0, 0)
def _on_button_press(self, widget, event):
path = widget.get_path_at_pos(int(event.x), int(event.y))
iter = None
if path:
iter = self._treemodel.get_iter(path[0])
obj = self._treemodel.get_value(iter, 3)
else:
obj = None
self._context_menu.set_current_term(obj, iter)
if event.button == 3:
self._context_menu.popup(None, None, None, event.button, event.time)
def _toggle_selected(self, renderer, path):
iter = self._treemodel.get_iter(path)
selected = self._treemodel.get_value(iter, 2)
id = self._treemodel.get_value(iter, 0)
self._treemodel.set_value(iter, 2, not selected)
if selected:
self._selected_terms.remove(id)
else:
self._selected_terms.add(id)
class GoTermContextMenu (gtk.Menu):
"""Context menu for GO terms in the gene ontology browser"""
def __init__(self, treemodel, treeview):
self._treemodel = treemodel
self._treeview = treeview
self._current_term = None
self._current_iter = None
gtk.Menu.__init__(self)
# Popuplate tree
self._expand_item = i = gtk.MenuItem('Expand')
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
self.append(i)
i.show()
self._collapse_item = i = gtk.MenuItem('Collapse')
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
self.append(i)
i.show()
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
i.connect('activate', self._on_select_subtree, treemodel, treeview)
self.append(i)
i.show()
def set_current_term(self, term, it):
self._current_term = term
self._current_iter = it
def _on_expand_subtree(self, item, treemodel, treeview):
path = treemodel.get_path(self._current_iter)
treeview.expand_row(path, True)
def _on_collapse_subtree(self, item, treemodel, treeview):
treeview.collapse_row(treemodel.get_path(self._current_iter))
def _on_select_subtree(self, item, treemodel, treeview):
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
(self._current_term['id'], self._current_term['name']))
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
project.project.set_selection('go-terms', set(ids))
class LoadGOFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
def run(self):
global go
if go:
return
go = geneontology.read_default_go()
browser = GeneOntologyTree(go)
label = gtk.Label('_Gene Ontology')
label.set_use_underline(True)
fluents.app['bottom_notebook'].append_page(browser, label)
class LoadAnnotationsFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def run(self):
global evidence
f = open(GO_DATA_DIR + '/goa-condensed')
ev_codes = f.readline().split()
go_terms = []
lines = f.readlines()
m = zeros((len(lines), len(ev_codes)))
for i, l in enumerate(lines):
values = l.split()
go_terms.append(values[0])
for j, v in enumerate(values[1:]):
m[i,j] = float(v.strip())
d = dataset.Dataset(m,
[['go-terms', go_terms], ['evidence', ev_codes]],
name='GO evidence')
evidence = d
return [d]
class GOWeightDialog(gtk.Dialog):
def __init__(self):
gtk.Dialog.__init__(self, 'GO Gene List Influence',
None,
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_OK,
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
table = gtk.Table(2, 2)
sim_lbl = gtk.Label('Similarity threshold: ')
table.attach(sim_lbl, 0, 1, 0, 1)
adjustment = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
sim_spin = gtk.SpinButton(adjustment, 0.0, 2)
table.attach(sim_spin, 1, 2, 0, 1)
rank_lbl = gtk.Label('Rank threshold: ')
table.attach(rank_lbl, 0, 1, 1, 2)
rank_adj = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
rank_spin = gtk.SpinButton(rank_adj, 0.0, 2)
table.attach(rank_spin, 1, 2, 1, 2)
sim_lbl.show()
sim_spin.show()
rank_lbl.show()
rank_spin.show()
table.show()
self.vbox.add(table)
self._sim_spin = sim_spin
self._rank_spin = rank_spin
def set_options(self, options):
self._sim_spin.set_value(options['similarity_threshold'])
self._rank_spin.set_value(options['rank_threshold'])
def set_editable(self, editable):
self._sim_spin.set_sensitive(editable)
self._rank_spin.set_sensitive(editable)
def update_options(self, options):
options['similarity_threshold'] = self._sim_spin.get_value()
options['rank_threshold'] = self._rank_spin.get_value()
class DistanceToSelectionFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'dist-to-sel', 'Dist. to Selection')
self.options = DistanceToSelectionOptions()
def run(self, similarities, selection):
self.show_gui(similarities, self.options)
retval = []
dims = similarities.get_dim_name()
if dims[0] != "_%s" %dims[1] and dims[1] != "_%s" %dims[0]:
logger.log('warning', 'Are you sure this is a similarity matrix?')
dim = dims[0]
print "dim", dim
print "selection", selection[dim]
print "indices", similarities.get_indices(dim, selection[dim])
indices = similarities.get_indices(dim, selection[dim])
m = apply_along_axis(max, 1, similarities.asarray().take(indices, 1))
retval.append(dataset.Dataset(m, [(dim, similarities[dim]),
("_dummy", '0')]))
return retval
def show_gui(self, similarities, options, edit=True):
dialog = DistanceToSelectionOptionsDialog([similarities], self.options)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
dialog.set_output()
return dialog.get_options()
else:
return options
class GOWeightFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'GO Influence')
self.options = GOWeightOptions()
def run(self, genelist, similarity):
## Show dialog box
self.show_gui(self.options)
## assure that data is "correct", i.e., that we can perform
## the desired operations.
common_dims = genelist.common_dims(similarity)
if len(common_dims) == 0:
logger.log('error', 'No common dimension in the selected datasets.')
elif len(common_dims) > 1:
logger.log('error', "More than one common dimension in the " +
"selected datasets. Don't know what to do.")
gene_dim = common_dims[0]
logger.log('debug', 'Assuming genes are in dimension: %s' % gene_dim)
## Do the calculations.
d = {}
def show_gui(self, options, edit=True):
dialog = GOWeightDialog()