einarr
a65d79697f
particularly useful when searching for GO terms, because the gtk TreeView widget does not do interactive search in collapsed parts of the tree.
453 lines
14 KiB
Python
453 lines
14 KiB
Python
import gtk
|
|
from fluents import dataset, logger, plots, workflow, fluents, project
|
|
from fluents.lib import blmfuncs
|
|
import geneontology
|
|
#import gostat
|
|
from scipy import array, randn, log, ones, zeros
|
|
import networkx
|
|
import re
|
|
|
|
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
|
|
('IGI', 'Inferred from genetic interaction'),
|
|
('IPI', 'Inferred from physical interaction'),
|
|
('ISS', 'Inferred from sequence or structure similarity'),
|
|
('IDA', 'Inferred from direct assay'),
|
|
('IEP', 'Inferred on expression pattern'),
|
|
('IEA', 'Inferred from electronic annotation'),
|
|
('TAS', 'Traceable author statement'),
|
|
('NAS', 'Non-traceable author statement'),
|
|
('ND', 'No biological data available'),
|
|
('RCA', 'Inferred from reviewed computational analysis'),
|
|
('IC', 'Inferred by curator')]
|
|
|
|
DISTANCE_METRICS = [('resnik', 'Resnik'),
|
|
('jiang', 'Jiang & Conrath'),
|
|
('fussimeg', 'FuSSiMeG')]
|
|
|
|
GO_DATA_DIR = '/home/einarr/data'
|
|
|
|
evidence = None
|
|
go = None
|
|
|
|
class GoTermView (gtk.Frame):
|
|
|
|
def __init__(self):
|
|
gtk.Frame.__init__(self)
|
|
tab = gtk.Table(2, 2, False)
|
|
self._table = tab
|
|
|
|
self._name = gtk.Label('')
|
|
self._name.set_line_wrap(True)
|
|
self._name.set_alignment(0, 0)
|
|
name_label = gtk.Label('Name:')
|
|
name_label.set_alignment(0, 0)
|
|
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
|
|
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
|
|
|
|
self._def = gtk.TextBuffer()
|
|
textview = gtk.TextView(self._def)
|
|
textview.set_wrap_mode(gtk.WRAP_WORD)
|
|
scrolled_window = gtk.ScrolledWindow()
|
|
scrolled_window.add(textview)
|
|
def_label = gtk.Label('Def:')
|
|
def_label.set_alignment(0.0, 0.0)
|
|
tab.attach(def_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
|
|
tab.attach(scrolled_window, 1, 2, 1, 2, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
|
|
|
|
self.add(tab)
|
|
self.set_go_term(None)
|
|
|
|
def set_go_term(self, term):
|
|
if term:
|
|
self.set_label(term['id'])
|
|
self._name.set_text(term['name'])
|
|
self._def.set_text(term['def'])
|
|
else:
|
|
self.set_label('GO Term')
|
|
self._name.set_text('')
|
|
self._def.set_text('')
|
|
|
|
|
|
class GeneOntologyTree (gtk.HPaned):
|
|
|
|
def __init__(self, network):
|
|
gtk.HPaned.__init__(self)
|
|
|
|
treemodel = geneontology.get_go_treestore(network)
|
|
self._treemodel = treemodel
|
|
self._tree_view = gtk.TreeView(treemodel)
|
|
|
|
# Set up context menu
|
|
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
|
|
self._tree_view.connect('popup_menu', self._popup_menu)
|
|
self._tree_view.connect('button_press_event', self._on_button_press)
|
|
|
|
renderer = gtk.CellRendererText()
|
|
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
|
|
self._tree_view.insert_column(go_column, 0)
|
|
|
|
renderer = gtk.CellRendererText()
|
|
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
|
|
self._tree_view.insert_column(go_column, 1)
|
|
|
|
self._desc_view = GoTermView()
|
|
|
|
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
|
|
|
|
scrolled_window = gtk.ScrolledWindow()
|
|
scrolled_window.add(self._tree_view)
|
|
self.add1(scrolled_window)
|
|
self.add2(self._desc_view)
|
|
self.show_all()
|
|
|
|
def _on_cursor_changed(self, tree):
|
|
path, col = self._tree_view.get_cursor()
|
|
current = self._treemodel.get_iter(path)
|
|
term = self._treemodel.get_value(current, 2)
|
|
self._desc_view.set_go_term(term)
|
|
|
|
|
|
##
|
|
## GTK Callback functions
|
|
##
|
|
def _popup_menu(self, *rest):
|
|
self.menu.popup(None, None, None, 0, 0)
|
|
|
|
def _on_button_press(self, widget, event):
|
|
path = widget.get_path_at_pos(int(event.x), int(event.y))
|
|
iter = None
|
|
|
|
if path:
|
|
iter = self._treemodel.get_iter(path[0])
|
|
obj = self._treemodel.get_value(iter, 2)
|
|
else:
|
|
obj = None
|
|
|
|
self._context_menu.set_current_term(obj, iter)
|
|
|
|
if event.button == 3:
|
|
self._context_menu.popup(None, None, None, event.button, event.time)
|
|
|
|
|
|
class GoTermContextMenu (gtk.Menu):
|
|
"""Context menu for GO terms in the gene ontology browser"""
|
|
|
|
def __init__(self, treemodel, treeview):
|
|
self._treemodel = treemodel
|
|
self._treeview = treeview
|
|
self._current_term = None
|
|
self._current_iter = None
|
|
|
|
gtk.Menu.__init__(self)
|
|
|
|
# Popuplate tree
|
|
self._expand_item = i = gtk.MenuItem('Expand')
|
|
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
|
|
self.append(i)
|
|
i.show()
|
|
|
|
self._collapse_item = i = gtk.MenuItem('Collapse')
|
|
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
|
|
self.append(i)
|
|
i.show()
|
|
|
|
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
|
|
i.connect('activate', self._on_select_subtree, treemodel, treeview)
|
|
self.append(i)
|
|
i.show()
|
|
|
|
def set_current_term(self, term, it):
|
|
self._current_term = term
|
|
self._current_iter = it
|
|
|
|
def _on_expand_subtree(self, item, treemodel, treeview):
|
|
path = treemodel.get_path(self._current_iter)
|
|
treeview.expand_row(path, True)
|
|
|
|
def _on_collapse_subtree(self, item, treemodel, treeview):
|
|
treeview.collapse_row(treemodel.get_path(self._current_iter))
|
|
|
|
def _on_select_subtree(self, item, treemodel, treeview):
|
|
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
|
|
(self._current_term['id'], self._current_term['name']))
|
|
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
|
|
project.project.set_selection('go-terms', set(ids))
|
|
|
|
|
|
class GoWorkflow (workflow.Workflow):
|
|
|
|
name = 'Gene Ontology'
|
|
ident = 'go'
|
|
description = 'Gene Ontology Workflow. For tree distance measures based '\
|
|
+ 'on the GO tree.'
|
|
|
|
def __init__(self, app):
|
|
workflow.Workflow.__init__(self, app)
|
|
|
|
load = workflow.Stage('load', 'Load GO Annotations')
|
|
load.add_function(LoadGOFunction())
|
|
load.add_function(LoadAnnotationsFunction())
|
|
load.add_function(LoadTextDatasetFunction())
|
|
self.add_stage(load)
|
|
|
|
go = workflow.Stage('go', 'Gene Ontology')
|
|
go.add_function(SelectGoTermsFunction(self))
|
|
go.add_function(GoDistanceFunction())
|
|
go.add_function(SaveDistancesFunction())
|
|
self.add_stage(go)
|
|
|
|
blm = workflow.Stage('blm', 'Bilinear Analysis')
|
|
blm.add_function(blmfuncs.PCA())
|
|
self.add_stage(blm)
|
|
|
|
|
|
class LoadGOFunction(workflow.Function):
|
|
def __init__(self):
|
|
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
|
|
|
|
def run(self):
|
|
global go
|
|
go = geneontology.read_default_go()
|
|
browser = GeneOntologyTree(go)
|
|
label = gtk.Label('_Gene Ontology')
|
|
label.set_use_underline(True)
|
|
fluents.app['bottom_notebook'].append_page(browser, label)
|
|
|
|
|
|
class LoadTextDatasetFunction(workflow.Function):
|
|
|
|
def __init__(self):
|
|
workflow.Function.__init__(self, 'load-text-ds', 'Load GO Evidence')
|
|
|
|
def run(self):
|
|
f = open('/home/einarr/data/goa-condensed.ftsv')
|
|
global evidence
|
|
evidence = dataset.read_ftsv(f)
|
|
return [evidence]
|
|
|
|
|
|
class LoadAnnotationsFunction(workflow.Function):
|
|
|
|
def __init__(self):
|
|
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
|
|
self.annotations = None
|
|
|
|
def run(self):
|
|
global evidence
|
|
f = open(GO_DATA_DIR + '/goa-condensed')
|
|
ev_codes = f.readline().split()
|
|
go_terms = []
|
|
|
|
lines = f.readlines()
|
|
m = zeros((len(lines), len(ev_codes)))
|
|
|
|
for i, l in enumerate(lines):
|
|
values = l.split()
|
|
go_terms.append(values[0])
|
|
for j, v in enumerate(values[1:]):
|
|
m[i,j] = float(v.strip())
|
|
|
|
d = dataset.Dataset(m,
|
|
[['go-terms', go_terms], ['evidence', ev_codes]],
|
|
name='GO evidence')
|
|
|
|
evidence = d
|
|
return [d]
|
|
|
|
|
|
class EvidenceCodeFrame(gtk.Frame):
|
|
def __init__(self):
|
|
gtk.Frame.__init__(self, 'Evidence Codes')
|
|
|
|
self._ec_buttons = {}
|
|
|
|
vbox = gtk.VBox(len(EVIDENCE_CODES))
|
|
for code, desc in EVIDENCE_CODES:
|
|
btn = gtk.CheckButton('%s (%s)' % (code, desc))
|
|
self._ec_buttons[code] = btn
|
|
vbox.add(btn)
|
|
self.add(vbox)
|
|
|
|
def set_options(self, options):
|
|
for code, desc in EVIDENCE_CODES:
|
|
self._ec_buttons[code].set_active(options[code])
|
|
|
|
def update_options(self, options):
|
|
for code, desc in EVIDENCE_CODES:
|
|
options[code] = self._ec_buttons[code].get_active()
|
|
return options
|
|
|
|
|
|
class DistanceMetricFrame(gtk.Frame):
|
|
def __init__(self):
|
|
gtk.Frame.__init__(self, 'Distance Metrics')
|
|
|
|
self._metric_buttons = {}
|
|
|
|
vbox = gtk.VBox()
|
|
prev = None
|
|
for code, text in DISTANCE_METRICS:
|
|
btn = gtk.RadioButton(prev, '%s' % text)
|
|
self._metric_buttons[code] = btn
|
|
vbox.add(btn)
|
|
prev = btn
|
|
self.add(vbox)
|
|
|
|
def set_options(self, options):
|
|
self._metric_buttons[options['metric']].set_active(True)
|
|
|
|
def update_options(self, options):
|
|
for code, text in DISTANCE_METRICS:
|
|
if self._metric_buttons[code].get_active():
|
|
options['metric'] = code
|
|
return options
|
|
return options
|
|
|
|
|
|
class GoDistanceDialog(gtk.Dialog):
|
|
|
|
def __init__(self):
|
|
gtk.Dialog.__init__(self, 'GO term distance matrix',
|
|
None,
|
|
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
|
|
(gtk.STOCK_OK, gtk.RESPONSE_OK,
|
|
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
|
|
|
|
self._ec_frame = EvidenceCodeFrame()
|
|
self._metric_frame = DistanceMetricFrame()
|
|
self.vbox.add(self._ec_frame)
|
|
self.vbox.add(self._metric_frame)
|
|
|
|
def run(self):
|
|
self.vbox.show_all()
|
|
return gtk.Dialog.run(self)
|
|
|
|
def set_options(self, options):
|
|
self._ec_frame.set_options(options)
|
|
self._metric_frame.set_options(options)
|
|
|
|
def update_options(self, options):
|
|
self._ec_frame.update_options(options)
|
|
self._metric_frame.update_options(options)
|
|
return options
|
|
|
|
def set_editable(self, editable):
|
|
self._ec_frame.set_sensitive(editable)
|
|
self._metric_frame.set_sensitive(editable)
|
|
|
|
|
|
class NumericDict(dict):
|
|
def __init__(self):
|
|
dict.__init__(self)
|
|
|
|
def __getitem__(self, key):
|
|
retval = 0
|
|
try:
|
|
retval = dict.__getitem__(self, key)
|
|
except:
|
|
retval = 0.0
|
|
return retval
|
|
|
|
|
|
class SelectGoTermsFunction(workflow.Function):
|
|
def __init__(self, wf):
|
|
workflow.Function.__init__(self, 'go-select', 'Select GO Terms')
|
|
self.wf = wf
|
|
|
|
def run(self, ds):
|
|
terms = [x['id'] for x in networkx.paths.bfs(go, go.get_bp())]
|
|
|
|
self.wf.project.set_selection('go-terms', set(terms[:100]))
|
|
# self.wf.project.set_selection('go-terms', set(['GO:0007582', 'GO:0008150', 'GO:0051704', 'GO:0044419']))
|
|
|
|
|
|
class GoDistanceFunction(workflow.Function):
|
|
def __init__(self):
|
|
workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix')
|
|
self.options = GoDistanceOptions()
|
|
|
|
def resnik_distance_matrix(self, selection, ic):
|
|
size = len(selection['go-terms'])
|
|
m = zeros((size, size))
|
|
# Create resnik distance matrix
|
|
ids = list(selection['go-terms'])
|
|
for i, t1 in enumerate(ids):
|
|
for j, t2 in enumerate(ids):
|
|
term1 = go.by_id[t1]
|
|
term2 = go.by_id[t2]
|
|
subsumer = go.subsumer(term1, term2)
|
|
|
|
if subsumer == None:
|
|
m[i, j] = 1000
|
|
else:
|
|
# print "%s - %s - %s" % (t1, subsumer['id'], t2)
|
|
m[i, j] = ic[t1] + ic[t2] - 2.0 * ic[subsumer['id']]
|
|
ds = dataset.Dataset(m, (('go-terms', ids), ('_go-terms', ids)), 'Resnik')
|
|
return ds
|
|
|
|
def run(self, x, selection):
|
|
global evidence, go
|
|
self.options = self.show_gui(self.options)
|
|
if not selection.has_key('go-terms') or len(selection['go-terms']) == 0:
|
|
logger.log('warning', 'No GO terms selected. Cannot make distance matrix.')
|
|
|
|
codes = [c for c, d in EVIDENCE_CODES if self.options[c]]
|
|
ev_indices = evidence.get_indices('evidence', codes)
|
|
ann_count_matrix = evidence._array[:, ev_indices].sum(1)
|
|
total_ann = ann_count_matrix.sum(0)
|
|
|
|
annotations = NumericDict()
|
|
ic = NumericDict()
|
|
|
|
# Insert annotations into dict
|
|
for i, v in enumerate(evidence.get_identifiers('go-terms')):
|
|
annotations[v] = ann_count_matrix[i]
|
|
|
|
# Accumulate annotations
|
|
for term in reversed(networkx.topological_sort(go)):
|
|
for parent in go.in_neighbors(term):
|
|
annotations[parent['id']] += annotations[term['id']]
|
|
|
|
# Create information content dictionary
|
|
for term, count in annotations.items():
|
|
ic[term] = -log(count / total_ann)
|
|
|
|
return [self.resnik_distance_matrix(selection, ic)]
|
|
|
|
def show_gui(self, options, edit=True):
|
|
dialog = GoDistanceDialog()
|
|
dialog.set_options(self.options)
|
|
dialog.show_all()
|
|
dialog.set_editable(edit)
|
|
response = dialog.run()
|
|
dialog.hide()
|
|
if response == gtk.RESPONSE_OK:
|
|
return dialog.update_options(self.options)
|
|
else:
|
|
return options
|
|
|
|
|
|
class SaveDistancesFunction(workflow.Function):
|
|
def __init__(self):
|
|
workflow.Function.__init__(self, 'save-matrix', 'Save Matrix')
|
|
|
|
def run(self, ds):
|
|
filename = '/home/einarr/data/output.ftsv'
|
|
fd = open(filename, 'w')
|
|
dataset.write_ftsv(fd, ds)
|
|
fd.close()
|
|
|
|
|
|
class Options(dict):
|
|
def __init__(self):
|
|
dict.__init__(self)
|
|
|
|
|
|
class GoDistanceOptions(Options):
|
|
def __init__(self):
|
|
Options.__init__(self)
|
|
for code, desc in EVIDENCE_CODES:
|
|
self[code] = True
|
|
self['metric'] = 'fussimeg'
|
|
|