Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/workflows/go_workflow.py

328 lines
10 KiB
Python

import gtk
from fluents import dataset, logger, plots, workflow, fluents
import geneontology
#import gostat
from scipy import array, randn, log, ones, zeros
import networkx
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
('IGI', 'Inferred from genetic interaction'),
('IPI', 'Inferred from physical interaction'),
('ISS', 'Inferred from sequence or structure similarity'),
('IDA', 'Inferred from direct assay'),
('IEP', 'Inferred on expression pattern'),
('IEA', 'Inferred from electronic annotation'),
('TAS', 'Traceable author statement'),
('NAS', 'Non-traceable author statement'),
('ND', 'No biological data available'),
('RCA', 'Inferred from reviewed computational analysis'),
('IC', 'Inferred by curator')]
DISTANCE_METRICS = [('resnik', 'Resnik'),
('jiang', 'Jiang & Conrath'),
('fussimeg', 'FuSSiMeG')]
GO_DATA_DIR = '/home/einarr/data'
evidence = None
class GoTermView (gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self)
tab = gtk.Table(2, 2, False)
self._table = tab
self._name = gtk.Label('')
self._name.set_line_wrap(True)
self._name.set_alignment(0, 0)
name_label = gtk.Label('Name:')
name_label.set_alignment(0, 0)
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
self._def = gtk.TextBuffer()
textview = gtk.TextView(self._def)
textview.set_wrap_mode(gtk.WRAP_WORD)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(textview)
def_label = gtk.Label('Def:')
def_label.set_alignment(0.0, 0.0)
tab.attach(def_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(scrolled_window, 1, 2, 1, 2, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
self.add(tab)
self.set_go_term(None)
def set_go_term(self, term):
if term:
self.set_label(term['id'])
self._name.set_text(term['name'])
self._def.set_text(term['def'])
else:
self.set_label('GO Term')
self._name.set_text('')
self._def.set_text('')
class GeneOntologyTree (gtk.HPaned):
def __init__(self, network):
gtk.HPaned.__init__(self)
treemodel = geneontology.get_go_treestore(network)
self._treemodel = treemodel
self._tree_view = gtk.TreeView(treemodel)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
self._tree_view.insert_column(go_column, 0)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
self._tree_view.insert_column(go_column, 1)
self._desc_view = GoTermView()
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(self._tree_view)
self.add1(scrolled_window)
self.add2(self._desc_view)
self.show_all()
def _on_cursor_changed(self, tree):
path, col = self._tree_view.get_cursor()
current = self._treemodel.get_iter(path)
term = self._treemodel.get_value(current, 2)
self._desc_view.set_go_term(term)
class GoWorkflow (workflow.Workflow):
name = 'Gene Ontology'
ident = 'go'
description = 'Gene Ontology Workflow. For tree distance measures based '\
+ 'on the GO tree.'
def __init__(self, app):
workflow.Workflow.__init__(self, app)
load = workflow.Stage('load', 'Load GO Annotations')
load.add_function(LoadGOFunction())
load.add_function(LoadAnnotationsFunction())
self.add_stage(load)
go = workflow.Stage('go', 'Gene Ontology')
go.add_function(GoDistanceFunction())
self.add_stage(go)
class LoadGOFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
def run(self):
global go
go = geneontology.read_default_go()
browser = GeneOntologyTree(go)
label = gtk.Label('_Gene Ontology')
label.set_use_underline(True)
fluents.app['bottom_notebook'].append_page(browser, label)
class LoadTextDatasetFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-text-ds', 'Load text dataset')
def read_text_dataset(self, fd):
split_re = re.compile('^#\s*(\w+)\s*:\s*(.)')
dimensions = []
identifiers = {}
type = 'dataset'
name = 'Unnamed dataset'
# Read header lines from file.
line = fd.readline()
while line:
m = split_re.match(line)
if m:
key, val = m
# The line is on the form;
# dimension: dimname id1 id2 id3 ...
if key == 'dimension':
values = [v.strip() for v in val.split(' ')
dimensions.append(values[0])
identifiers[values[0]] = values[1:]
headers[key] = val.strip()
# Read type of dataset.
# Should be dataset, category, or network
elif key == 'type':
type = val
elif key == 'name':
name = val
else:
break
line = f.readline()
def run(self):
f = open('/home/einarr/foodata.tds')
return read_text_dataset(f)
class LoadAnnotationsFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def run(self):
global evidence
f = open(GO_DATA_DIR + '/goa-condensed')
ev_codes = f.readline().split()
go_terms = []
lines = f.readlines()
m = zeros((len(lines), len(ev_codes)))
for i, l in enumerate(lines):
values = l.split()
go_terms.append(values[0])
for j, v in enumerate(values[1:]):
m[i,j] = float(v.strip())
d = dataset.Dataset(m,
[['go-terms', go_terms], ['evidence', ev_codes]],
name='GO evidence')
evidence = d
return [d]
class EvidenceCodeFrame(gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self, 'Evidence Codes')
self._ec_buttons = {}
vbox = gtk.VBox(len(EVIDENCE_CODES))
for code, desc in EVIDENCE_CODES:
btn = gtk.CheckButton('%s (%s)' % (code, desc))
self._ec_buttons[code] = btn
vbox.add(btn)
self.add(vbox)
def set_options(self, options):
for code, desc in EVIDENCE_CODES:
self._ec_buttons[code].set_active(options[code])
def update_options(self, options):
for code, desc in EVIDENCE_CODES:
options[code] = self._ec_buttons[code].get_active()
return options
class DistanceMetricFrame(gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self, 'Distance Metrics')
self._metric_buttons = {}
vbox = gtk.VBox(len(DISTANCE_METRICS))
prev = None
for code, text in DISTANCE_METRICS:
btn = gtk.RadioButton(prev, '%s' % text)
self._metric_buttons[code] = btn
vbox.add(btn)
prev = btn
self.add(vbox)
def set_options(self, options):
self._metric_buttons[options['metric']].set_active(True)
def update_options(self, options):
for code, text in DISTANCE_METRICS:
if self._metric_buttons[code].get_active():
options['metric'] = code
return options
return options
class GoDistanceDialog(gtk.Dialog):
def __init__(self):
gtk.Dialog.__init__(self, 'GO term distance matrix',
None,
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_OK,
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
self._ec_frame = EvidenceCodeFrame()
self._metric_frame = DistanceMetricFrame()
self.vbox.add(self._ec_frame)
self.vbox.add(self._metric_frame)
def run(self):
self.vbox.show_all()
return gtk.Dialog.run(self)
def set_options(self, options):
self._ec_frame.set_options(options)
self._metric_frame.set_options(options)
def update_options(self, options):
self._ec_frame.update_options(options)
self._metric_frame.update_options(options)
return options
def set_editable(self, editable):
self._ec_frame.set_sensitive(editable)
self._metric_frame.set_sensitive(editable)
class GoDistanceFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix')
self.options = GoDistanceOptions()
def run(self, selection):
self.options = self.show_gui(self.options)
if not selection.has_key('go-terms') or len(selection['go-terms']) == 0:
logger.log('warning', 'No GO terms selected. Cannot make distance matrix.')
def show_gui(self, options, edit=True):
dialog = GoDistanceDialog()
dialog.set_options(self.options)
dialog.show_all()
dialog.set_editable(edit)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
return dialog.update_options(self.options)
else:
return options
class Options(dict):
def __init__(self):
dict.__init__(self)
class GoDistanceOptions(Options):
def __init__(self):
Options.__init__(self)
for code, desc in EVIDENCE_CODES:
self[code] = True
self['metric'] = 'fussimeg'