Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0

removed unused workflows

This commit is contained in:
Arnar Flatberg 2007-09-03 15:50:16 +00:00
parent 2fdd9aaf77
commit f8e0cfdb8b
5 changed files with 15 additions and 1969 deletions

View File

@ -1,261 +0,0 @@
import gobject
import gtk
import networkx
import re
class GOTerm:
def __init__(self):
self.d = {}
## Create empty lists for all list values
for l in GOTerm.lists:
self.d[l] = []
for s in GOTerm.scalars:
self.d[l] = None
def __getitem__(self, key):
if self.d.has_key(key):
return self.d[key]
return None
def __setitem__(self, key, value):
self.d[key] = value
GOTerm.lists = ['is_a', 'alt_id', 'exact_synonym', 'broad_synonym',
'narrow_synonym', 'related_synonym', 'relationship',
'subset', 'synonym', 'xref_analog', 'xref_unknown']
GOTerm.scalars = ['name', 'id', 'namespace', 'def', 'is_transitive',
'comment', 'is_obsolete']
class GeneOntology(networkx.XDiGraph):
def __init__(self):
networkx.XDiGraph.__init__(self)
self.by_id = {}
self.undirected = None
def add_term(self, term):
self.add_node(term)
self.by_id[term['id']] = term
def link_ontology(self, linkattr, obsolete=False):
for node in self.nodes():
for link in node[linkattr]:
self.add_edge(self.by_id[link], node, linkattr)
def link_relationships(self):
for node in self.nodes():
for link in node['relationship']:
link_type, term = link.split(' ')
self.add_edge(self.by_id[term.strip()], node, link_type.strip())
def get_bp(self):
"""Returns the root node of the biological_process tree"""
return self.by_id['GO:0008150']
def get_cc(self):
"""Returns the root node of the cellular_component tree"""
return self.by_id['id: GO:0005575']
def get_mf(self):
"""Returns the root node of the molecular_function tree"""
return self.by_id['GO:0003674']
def _subsumer(self, t1, t2, heap):
while heap != []:
t = heap[0]
heap = heap[1:]
p1 = networkx.shortest_path(self, t, t1)
p2 = networkx.shortest_path(self, t, t2)
if p1 and p2:
return t
heap += self.in_neighbors(t)
return None
def subsumer(self, t1, t2):
if t1 == t2:
return t1
if networkx.shortest_path(self, t1, t2):
return t1
elif networkx.shortest_path(self, t2, t1):
return t2
return self._subsumer(t1, t2, self.in_neighbors(t1))
def old_subsumer(self, t1, t2):
if t1 == t2:
return t1
if self.undirected == None:
self.undirected = self.to_undirected()
path = networkx.shortest_path(self.undirected, t1, t2)
if not path:
print "Woah, path not found."
return None
if path == [1]:
print "This shouldn't happen"
return t1
for t in path:
if networkx.shortest_path(self, t, t1) and \
networkx.shortest_path(self, t, t2):
return t
print "GeneOntology.subsumer: should not reach this point"
print "path is now: %s" % path
print "ids are: %s " % [x['id'] for x in path]
def _split_obo_line(line):
"""Splits a line from an obo file in its three constituent parts.
@param line: A string containing a line from an obo file. The line must
either be a section definition field with a section name in brackets
or a line of the form: keyword: value ! comment
@returns: A tuple of four strings conaining the section, key, value and
description defined in the string. If the section part is None, all
the other fields are strings and if section is a string, all the other
fields are None.
"""
attrib_re = re.compile(r'^\s*([\w-]+)\s*:\s*([^!]*)!?(.*$)')
s = line.strip()
if s == "":
return (None, None, None, None)
elif s.startswith('[') and s.endswith(']'):
return (s[1:-1], None, None, None)
else:
m = attrib_re.match(s)
if m:
key, value, comment = [x.strip() for x in m.groups()]
return (None, key, value, comment)
else:
raise Exception('Unparsable line: %s' % line)
def _add_term_attribute(term, key, value, comment):
if key in GOTerm.scalars:
term[key] = value
elif key in GOTerm.lists:
term[key].append(value)
else:
raise Exception('Unknown key %s: %s' % (key, value))
def read_gene_ontology(fd):
"""Reads the Gene Ontology from an obo file.
@param fd: An open file object to the obo file.
"""
go = GeneOntology()
term = None
section = None
line = fd.readline()
while line:
s, k, v, c = _split_obo_line(line)
if s == None and k == None:
pass
elif s:
if term:
go.add_term(term)
section = s
if s == 'Term':
term = GOTerm()
else:
term = None
print "ignoring: %s" %s
else:
if term:
_add_term_attribute(term, k, v, c)
line = fd.readline()
if term:
go.add_term(term)
return go
def pickle_gene_ontology(go, fn):
fd = open(fn, 'wb')
pickle.dump(go, fd)
fd.close()
def load_pickled_ontology(fn):
fd = open(fn, 'rb')
go = pickle.load(fd)
fd.close()
return go
def read_default_go():
f = open("/usr/share/gene-ontology/gene_ontology.obo")
go = read_gene_ontology(f)
go.link_ontology('is_a')
go.link_relationships()
f.close()
return go
def _add_subgraphs(treestore, ontology, parent, nodes):
for n in nodes:
i = treestore.insert(parent, 0, (n['id'], n['name'], False, n))
_add_subgraphs(treestore, ontology, i, ontology.successors(n))
def get_go_treestore(ontology):
ts = gtk.TreeStore(gobject.TYPE_STRING, ## ID
gobject.TYPE_STRING, ## Name
gobject.TYPE_BOOLEAN, ## Selected
gobject.TYPE_PYOBJECT) ## Node
_add_subgraphs(ts, ontology, None, [ontology.get_bp()])
return ts
class NetworkTreeModel(gtk.GenericTreeModel):
def __init__(self, network, root):
gtk.GenericTreeModel.__init__(self)
self._network = network
self._root = root
def on_get_flags(self):
return 0
def on_get_n_columns(self):
return 1
def on_get_column_type(self, index):
if index==0:
return gobject.TYPE_STRING
def on_get_iter(self, path):
node = self._root
for p in path[1:]:
children = self._network.predecessors(node)
node = children[p]
return node
def on_get_path(self, rowref):
pass
def on_get_value(self, rowref, column):
print 'get_value'
return rowref['id']
def on_iter_next(self, rowref):
pass
def on_iter_children(self, parent):
pass
def on_iter_has_child(self, rowref):
pass
def on_iter_n_children(self, rowref):
pass
def on_iter_nth_child(self, parent, n):
pass
def on_iter_parent(self, child):
pass

View File

@ -1,486 +0,0 @@
import gtk
from fluents import dataset, logger, plots, workflow, fluents, project
from fluents.lib import blmfuncs
import geneontology
#import gostat
from scipy import array, randn, log, ones, zeros
import networkx
import re
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
('IGI', 'Inferred from genetic interaction'),
('IPI', 'Inferred from physical interaction'),
('ISS', 'Inferred from sequence or structure similarity'),
('IDA', 'Inferred from direct assay'),
('IEP', 'Inferred on expression pattern'),
('IEA', 'Inferred from electronic annotation'),
('TAS', 'Traceable author statement'),
('NAS', 'Non-traceable author statement'),
('ND', 'No biological data available'),
('RCA', 'Inferred from reviewed computational analysis'),
('IC', 'Inferred by curator')]
DISTANCE_METRICS = [('resnik', 'Resnik'),
('jiang', 'Jiang & Conrath'),
('fussimeg', 'FuSSiMeG')]
GO_DATA_DIR = '/home/einarr/data'
evidence = None
go = None
class GoTermView (gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self)
tab = gtk.Table(2, 2, False)
self._table = tab
self._name = gtk.Label('')
self._name.set_line_wrap(True)
self._name.set_alignment(0, 0)
name_label = gtk.Label('Name:')
name_label.set_alignment(0, 0)
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
self._def = gtk.TextBuffer()
textview = gtk.TextView(self._def)
textview.set_wrap_mode(gtk.WRAP_WORD)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(textview)
def_label = gtk.Label('Def:')
def_label.set_alignment(0.0, 0.0)
tab.attach(def_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(scrolled_window, 1, 2, 1, 2, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
self.add(tab)
self.set_go_term(None)
def set_go_term(self, term):
if term:
self.set_label(term['id'])
self._name.set_text(term['name'])
self._def.set_text(term['def'])
else:
self.set_label('GO Term')
self._name.set_text('')
self._def.set_text('')
class GeneOntologyTree (gtk.HPaned):
def __init__(self, network):
gtk.HPaned.__init__(self)
treemodel = geneontology.get_go_treestore(network)
self._treemodel = treemodel
self._tree_view = gtk.TreeView(treemodel)
self._selected_terms = set()
self._tree_view.set_fixed_height_mode(True)
# Set up context menu
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
self._tree_view.connect('popup_menu', self._popup_menu)
self._tree_view.connect('button_press_event', self._on_button_press)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_fixed_width(200)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 0)
renderer = gtk.CellRendererToggle()
renderer.set_property('activatable', True)
renderer.connect('toggled', self._toggle_selected)
renderer.set_active(True)
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
go_column = gtk.TreeViewColumn('T', renderer, active=2)
go_column.set_fixed_width(20)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 1)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
go_column.set_fixed_width(200)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 2)
self._desc_view = GoTermView()
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(self._tree_view)
self.add1(scrolled_window)
self.add2(self._desc_view)
self.show_all()
def _on_cursor_changed(self, tree):
path, col = self._tree_view.get_cursor()
current = self._treemodel.get_iter(path)
term = self._treemodel.get_value(current, 3)
self._desc_view.set_go_term(term)
##
## GTK Callback functions
##
def _popup_menu(self, *rest):
self.menu.popup(None, None, None, 0, 0)
def _on_button_press(self, widget, event):
path = widget.get_path_at_pos(int(event.x), int(event.y))
iter = None
if path:
iter = self._treemodel.get_iter(path[0])
obj = self._treemodel.get_value(iter, 3)
else:
obj = None
self._context_menu.set_current_term(obj, iter)
if event.button == 3:
self._context_menu.popup(None, None, None, event.button, event.time)
def _toggle_selected(self, renderer, path):
iter = self._treemodel.get_iter(path)
selected = self._treemodel.get_value(iter, 2)
id = self._treemodel.get_value(iter, 0)
self._treemodel.set_value(iter, 2, not selected)
if selected:
self._selected_terms.remove(id)
else:
self._selected_terms.add(id)
class GoTermContextMenu (gtk.Menu):
"""Context menu for GO terms in the gene ontology browser"""
def __init__(self, treemodel, treeview):
self._treemodel = treemodel
self._treeview = treeview
self._current_term = None
self._current_iter = None
gtk.Menu.__init__(self)
# Popuplate tree
self._expand_item = i = gtk.MenuItem('Expand')
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
self.append(i)
i.show()
self._collapse_item = i = gtk.MenuItem('Collapse')
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
self.append(i)
i.show()
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
i.connect('activate', self._on_select_subtree, treemodel, treeview)
self.append(i)
i.show()
def set_current_term(self, term, it):
self._current_term = term
self._current_iter = it
def _on_expand_subtree(self, item, treemodel, treeview):
path = treemodel.get_path(self._current_iter)
treeview.expand_row(path, True)
def _on_collapse_subtree(self, item, treemodel, treeview):
treeview.collapse_row(treemodel.get_path(self._current_iter))
def _on_select_subtree(self, item, treemodel, treeview):
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
(self._current_term['id'], self._current_term['name']))
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
project.project.set_selection('go-terms', set(ids))
class GoWorkflow (workflow.Workflow):
name = 'Gene Ontology'
ident = 'go'
description = 'Gene Ontology Workflow. For tree distance measures based '\
+ 'on the GO tree.'
def __init__(self, app):
workflow.Workflow.__init__(self, app)
load = workflow.Stage('load', 'Load GO Annotations')
load.add_function(LoadGOFunction())
load.add_function(LoadAnnotationsFunction())
load.add_function(LoadTextDatasetFunction())
self.add_stage(load)
go = workflow.Stage('go', 'Gene Ontology')
go.add_function(SelectGoTermsFunction(self))
go.add_function(GoDistanceFunction())
go.add_function(SaveDistancesFunction())
self.add_stage(go)
blm = workflow.Stage('blm', 'Bilinear Analysis')
blm.add_function(blmfuncs.PCA())
self.add_stage(blm)
class LoadGOFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
def run(self):
global go
go = geneontology.read_default_go()
browser = GeneOntologyTree(go)
label = gtk.Label('_Gene Ontology')
label.set_use_underline(True)
fluents.app['bottom_notebook'].append_page(browser, label)
class LoadTextDatasetFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-text-ds', 'Load GO Evidence')
def run(self):
f = open('/home/einarr/data/goa-condensed.ftsv')
global evidence
evidence = dataset.read_ftsv(f)
return [evidence]
class LoadAnnotationsFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def run(self):
global evidence
f = open(GO_DATA_DIR + '/goa-condensed')
ev_codes = f.readline().split()
go_terms = []
lines = f.readlines()
m = zeros((len(lines), len(ev_codes)))
for i, l in enumerate(lines):
values = l.split()
go_terms.append(values[0])
for j, v in enumerate(values[1:]):
m[i,j] = float(v.strip())
d = dataset.Dataset(m,
[['go-terms', go_terms], ['evidence', ev_codes]],
name='GO evidence')
evidence = d
return [d]
class EvidenceCodeFrame(gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self, 'Evidence Codes')
self._ec_buttons = {}
vbox = gtk.VBox(len(EVIDENCE_CODES))
for code, desc in EVIDENCE_CODES:
btn = gtk.CheckButton('%s (%s)' % (code, desc))
self._ec_buttons[code] = btn
vbox.add(btn)
self.add(vbox)
def set_options(self, options):
for code, desc in EVIDENCE_CODES:
self._ec_buttons[code].set_active(options[code])
def update_options(self, options):
for code, desc in EVIDENCE_CODES:
options[code] = self._ec_buttons[code].get_active()
return options
class DistanceMetricFrame(gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self, 'Distance Metrics')
self._metric_buttons = {}
vbox = gtk.VBox()
prev = None
for code, text in DISTANCE_METRICS:
btn = gtk.RadioButton(prev, '%s' % text)
self._metric_buttons[code] = btn
vbox.add(btn)
prev = btn
self.add(vbox)
def set_options(self, options):
self._metric_buttons[options['metric']].set_active(True)
def update_options(self, options):
for code, text in DISTANCE_METRICS:
if self._metric_buttons[code].get_active():
options['metric'] = code
return options
return options
class GoDistanceDialog(gtk.Dialog):
def __init__(self):
gtk.Dialog.__init__(self, 'GO term distance matrix',
None,
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_OK,
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
self._ec_frame = EvidenceCodeFrame()
self._metric_frame = DistanceMetricFrame()
self.vbox.add(self._ec_frame)
self.vbox.add(self._metric_frame)
def run(self):
self.vbox.show_all()
return gtk.Dialog.run(self)
def set_options(self, options):
self._ec_frame.set_options(options)
self._metric_frame.set_options(options)
def update_options(self, options):
self._ec_frame.update_options(options)
self._metric_frame.update_options(options)
return options
def set_editable(self, editable):
self._ec_frame.set_sensitive(editable)
self._metric_frame.set_sensitive(editable)
class NumericDict(dict):
def __init__(self):
dict.__init__(self)
def __getitem__(self, key):
retval = 0
try:
retval = dict.__getitem__(self, key)
except:
retval = 0.0
return retval
class SelectGoTermsFunction(workflow.Function):
def __init__(self, wf):
workflow.Function.__init__(self, 'go-select', 'Select GO Terms')
self.wf = wf
def run(self, ds):
terms = [x['id'] for x in networkx.paths.bfs(go, go.get_bp())]
self.wf.project.set_selection('go-terms', set(terms[:100]))
# self.wf.project.set_selection('go-terms', set(['GO:0007582', 'GO:0008150', 'GO:0051704', 'GO:0044419']))
class GoDistanceFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix')
self.options = GoDistanceOptions()
def resnik_distance_matrix(self, selection, ic):
size = len(selection['go-terms'])
m = zeros((size, size))
# Create resnik distance matrix
ids = list(selection['go-terms'])
for i, t1 in enumerate(ids):
for j, t2 in enumerate(ids):
term1 = go.by_id[t1]
term2 = go.by_id[t2]
subsumer = go.subsumer(term1, term2)
if subsumer == None:
m[i, j] = 1000
else:
# print "%s - %s - %s" % (t1, subsumer['id'], t2)
m[i, j] = ic[t1] + ic[t2] - 2.0 * ic[subsumer['id']]
ds = dataset.Dataset(m, (('go-terms', ids), ('_go-terms', ids)), 'Resnik')
return ds
def run(self, x, selection):
global evidence, go
self.options = self.show_gui(self.options)
if not selection.has_key('go-terms') or len(selection['go-terms']) == 0:
logger.log('warning', 'No GO terms selected. Cannot make distance matrix.')
codes = [c for c, d in EVIDENCE_CODES if self.options[c]]
ev_indices = evidence.get_indices('evidence', codes)
ann_count_matrix = evidence._array[:, ev_indices].sum(1)
total_ann = ann_count_matrix.sum(0)
annotations = NumericDict()
ic = NumericDict()
# Insert annotations into dict
for i, v in enumerate(evidence.get_identifiers('go-terms')):
annotations[v] = ann_count_matrix[i]
# Accumulate annotations
for term in reversed(networkx.topological_sort(go)):
for parent in go.in_neighbors(term):
annotations[parent['id']] += annotations[term['id']]
# Create information content dictionary
for term, count in annotations.items():
ic[term] = -log(count / total_ann)
return [self.resnik_distance_matrix(selection, ic)]
def show_gui(self, options, edit=True):
dialog = GoDistanceDialog()
dialog.set_options(self.options)
dialog.show_all()
dialog.set_editable(edit)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
return dialog.update_options(self.options)
else:
return options
class SaveDistancesFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'save-matrix', 'Save Matrix')
def run(self, ds):
filename = '/home/einarr/data/output.ftsv'
fd = open(filename, 'w')
dataset.write_ftsv(fd, ds)
fd.close()
class Options(dict):
def __init__(self):
dict.__init__(self)
class GoDistanceOptions(Options):
def __init__(self):
Options.__init__(self)
for code, desc in EVIDENCE_CODES:
self[code] = True
self['metric'] = 'fussimeg'

View File

@ -1,777 +0,0 @@
import gtk
from fluents import dataset, logger, plots, workflow, fluents, project, view, main
import geneontology
from matplotlib.nxutils import points_inside_poly
import matplotlib
#from scipy import array, randn, log, ones, zeros
from scipy import *
from numpy import matlib
import networkx
import re
import rpy
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
('IGI', 'Inferred from genetic interaction'),
('IPI', 'Inferred from physical interaction'),
('ISS', 'Inferred from sequence or structure similarity'),
('IDA', 'Inferred from direct assay'),
('IEP', 'Inferred on expression pattern'),
('IEA', 'Inferred from electronic annotation'),
('TAS', 'Traceable author statement'),
('NAS', 'Non-traceable author statement'),
('ND', 'No biological data available'),
('RCA', 'Inferred from reviewed computational analysis'),
('IC', 'Inferred by curator')]
DISTANCE_METRICS = [('resnik', 'Resnik'),
('jiang', 'Jiang & Conrath'),
('fussimeg', 'FuSSiMeG')]
GO_DATA_DIR = '/home/einarr/data'
evidence = None
go = None
class GoTermView (gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self)
tab = gtk.Table(2, 3, False)
self._table = tab
self._name = gtk.Label('')
self._name.set_line_wrap(True)
self._name.set_alignment(0, 0)
name_label = gtk.Label('Name:')
name_label.set_alignment(0, 0)
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
self._isa_parents = gtk.HBox()
isa_parents_label = gtk.Label('Is a:')
tab.attach(isa_parents_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
self._def = gtk.TextBuffer()
textview = gtk.TextView(self._def)
textview.set_wrap_mode(gtk.WRAP_WORD)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(textview)
def_label = gtk.Label('Def:')
def_label.set_alignment(0.0, 0.0)
tab.attach(def_label, 0, 1, 2, 3, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(scrolled_window, 1, 2, 2, 3, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
self._tab = tab
self.add(tab)
self.set_go_term(None)
def set_go_term(self, term):
if term:
self.set_label(term['id'])
self._name.set_text(term['name'])
self._def.set_text(term['def'])
self._tab.remove(self._isa_parents)
self._isa_parents = gtk.HBox()
for p in term['is_a']:
btn = gtk.Button(p)
btn.show()
self._isa_parents.add(btn)
self._isa_parents.show()
self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
else:
self.set_label('GO Term')
self._name.set_text('')
self._def.set_text('')
self._tab.remove(self._isa_parents)
self._isa_parents = gtk.HBox()
self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
class GeneOntologyTree (gtk.HPaned):
def __init__(self, network):
gtk.HPaned.__init__(self)
self.set_position(400)
treemodel = geneontology.get_go_treestore(network)
self._treemodel = treemodel
self._tree_view = gtk.TreeView(treemodel)
self._selected_terms = set()
self._tree_view.set_fixed_height_mode(True)
# Set up context menu
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
self._tree_view.connect('popup_menu', self._popup_menu)
self._tree_view.connect('button_press_event', self._on_button_press)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_fixed_width(200)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 0)
renderer = gtk.CellRendererToggle()
renderer.set_property('activatable', True)
renderer.connect('toggled', self._toggle_selected)
renderer.set_active(True)
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
go_column = gtk.TreeViewColumn('T', renderer, active=2)
go_column.set_fixed_width(20)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 1)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
go_column.set_fixed_width(200)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 2)
self._desc_view = GoTermView()
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(self._tree_view)
self.add1(scrolled_window)
self.add2(self._desc_view)
self.show_all()
def _on_cursor_changed(self, tree):
path, col = self._tree_view.get_cursor()
current = self._treemodel.get_iter(path)
term = self._treemodel.get_value(current, 3)
self._desc_view.set_go_term(term)
##
## GTK Callback functions
##
def _popup_menu(self, *rest):
self.menu.popup(None, None, None, 0, 0)
def _on_button_press(self, widget, event):
path = widget.get_path_at_pos(int(event.x), int(event.y))
iter = None
if path:
iter = self._treemodel.get_iter(path[0])
obj = self._treemodel.get_value(iter, 3)
else:
obj = None
self._context_menu.set_current_term(obj, iter)
if event.button == 3:
self._context_menu.popup(None, None, None, event.button, event.time)
def _toggle_selected(self, renderer, path):
iter = self._treemodel.get_iter(path)
selected = self._treemodel.get_value(iter, 2)
id = self._treemodel.get_value(iter, 0)
self._treemodel.set_value(iter, 2, not selected)
if selected:
self._selected_terms.remove(id)
else:
self._selected_terms.add(id)
class GoTermContextMenu (gtk.Menu):
"""Context menu for GO terms in the gene ontology browser"""
def __init__(self, treemodel, treeview):
self._treemodel = treemodel
self._treeview = treeview
self._current_term = None
self._current_iter = None
gtk.Menu.__init__(self)
# Popuplate tree
self._expand_item = i = gtk.MenuItem('Expand')
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
self.append(i)
i.show()
self._collapse_item = i = gtk.MenuItem('Collapse')
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
self.append(i)
i.show()
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
i.connect('activate', self._on_select_subtree, treemodel, treeview)
self.append(i)
i.show()
def set_current_term(self, term, it):
self._current_term = term
self._current_iter = it
def _on_expand_subtree(self, item, treemodel, treeview):
path = treemodel.get_path(self._current_iter)
treeview.expand_row(path, True)
def _on_collapse_subtree(self, item, treemodel, treeview):
treeview.collapse_row(treemodel.get_path(self._current_iter))
def _on_select_subtree(self, item, treemodel, treeview):
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
(self._current_term['id'], self._current_term['name']))
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
project.project.set_selection('go-terms', set(ids))
class LoadGOFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
def run(self):
global go
if go:
return
go = geneontology.read_default_go()
browser = GeneOntologyTree(go)
label = gtk.Label('_Gene Ontology')
label.set_use_underline(True)
fluents.app['bottom_notebook'].append_page(browser, label)
class LoadAnnotationsFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def run(self):
global evidence
f = open(GO_DATA_DIR + '/goa-condensed')
ev_codes = f.readline().split()
go_terms = []
lines = f.readlines()
m = zeros((len(lines), len(ev_codes)))
for i, l in enumerate(lines):
values = l.split()
go_terms.append(values[0])
for j, v in enumerate(values[1:]):
m[i,j] = float(v.strip())
d = dataset.Dataset(m,
[['go-terms', go_terms], ['evidence', ev_codes]],
name='GO evidence')
evidence = d
return [d]
class GOWeightDialog(gtk.Dialog):
def __init__(self):
gtk.Dialog.__init__(self, 'GO Gene List Influence',
None,
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_OK,
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
table = gtk.Table(2, 2)
sim_lbl = gtk.Label('Similarity threshold: ')
table.attach(sim_lbl, 0, 1, 0, 1)
adjustment = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
sim_spin = gtk.SpinButton(adjustment, 0.0, 2)
table.attach(sim_spin, 1, 2, 0, 1)
rank_lbl = gtk.Label('Rank threshold: ')
table.attach(rank_lbl, 0, 1, 1, 2)
rank_adj = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
rank_spin = gtk.SpinButton(rank_adj, 0.0, 2)
table.attach(rank_spin, 1, 2, 1, 2)
sim_lbl.show()
sim_spin.show()
rank_lbl.show()
rank_spin.show()
table.show()
self.vbox.add(table)
self._sim_spin = sim_spin
self._rank_spin = rank_spin
def set_options(self, options):
self._sim_spin.set_value(options['similarity_threshold'])
self._rank_spin.set_value(options['rank_threshold'])
def set_editable(self, editable):
self._sim_spin.set_sensitive(editable)
self._rank_spin.set_sensitive(editable)
def update_options(self, options):
options['similarity_threshold'] = self._sim_spin.get_value()
options['rank_threshold'] = self._rank_spin.get_value()
class DistanceToSelectionFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'dist-to-sel', 'Dist. to Selection')
self.options = DistanceToSelectionOptions()
def run(self, similarities, selection):
self.show_gui(similarities, self.options)
retval = []
dims = similarities.get_dim_name()
if dims[0] != "_%s" %dims[1] and dims[1] != "_%s" %dims[0]:
logger.log('warning', 'Are you sure this is a similarity matrix?')
dim = dims[0]
print "dim", dim
print "selection", selection[dim]
print "indices", similarities.get_indices(dim, selection[dim])
indices = similarities.get_indices(dim, selection[dim])
m = apply_along_axis(max, 1, similarities.asarray().take(indices, 1))
retval.append(dataset.Dataset(m, [(dim, similarities[dim]),
("_dummy", '0')]))
return retval
def show_gui(self, similarities, options, edit=True):
dialog = DistanceToSelectionOptionsDialog([similarities], self.options)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
dialog.set_output()
return dialog.get_options()
else:
return options
class GOWeightFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'GO Influence')
self.options = GOWeightOptions()
def run(self, genelist, similarity):
## Show dialog box
self.show_gui(self.options)
## assure that data is "correct", i.e., that we can perform
## the desired operations.
common_dims = genelist.common_dims(similarity)
if len(common_dims) == 0:
logger.log('error', 'No common dimension in the selected datasets.')
elif len(common_dims) > 1:
logger.log('error', "More than one common dimension in the " +
"selected datasets. Don't know what to do.")
gene_dim = common_dims[0]
logger.log('debug', 'Assuming genes are in dimension: %s' % gene_dim)
## Do the calculations.
d = {}
def show_gui(self, options, edit=True):
dialog = GOWeightDialog()
dialog.set_options(self.options)
dialog.show_all()
dialog.set_editable(edit)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
return dialog.update_options(self.options)
else:
return options
class DistanceToSelectionOptionsDialog(workflow.OptionsDialog):
def __init__(self, data, options):
workflow.OptionsDialog.__init__(self, data, options, ['X'])
class TTestOptionsDialog(workflow.OptionsDialog):
def __init__(self, data, options):
workflow.OptionsDialog.__init__(self, data, options,
['X', 'Categories'])
vb = gtk.VBox()
l = gtk.Label("Limit")
adj = gtk.Adjustment(0, 0.0, 1.0, 0.01, 1.0, 1.0)
sb = gtk.SpinButton(adj, 0.0, 2)
l.show()
sb.show()
vb.add(l)
vb.add(sb)
vb.show()
self.nb.insert_page(vb, gtk.Label("Limit"), -1)
class TTestFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 't-test', 't-test')
self.options = TTestOptions()
def run(self, x, categories):
self.show_gui(x, categories)
retval = []
m = x.asarray()
c = categories.asarray()
# Nonsmokers and current smokers
ns = m.take(nonzero(c[:,0]), 0)[0]
cs = m.take(nonzero(c[:,2]), 0)[0]
tscores = stats.ttest_ind(ns, cs)
print "Out data:", self.options['out_data']
tds = dataset.Dataset(tscores[0], [('gene_id', x['gene_id']),
('_t', ['0'])],
name='t-values')
if 't-value' in self.options['out_data']:
retval.append(tds)
pds = dataset.Dataset(tscores[1], [('gene_id', x['gene_id']),
('_p', ['0'])],
name='p-values')
if 'p-value' in self.options['out_data']:
retval.append(pds)
if ProbabilityHistogramPlot in self.options['out_plots']:
retval.append(ProbabilityHistogramPlot(pds))
if VolcanoPlot in self.options['out_plots']:
fc = apply_along_axis(mean, 0, ns) / apply_along_axis(mean, 0, cs)
fcds = dataset.Dataset(fc, [('gene_id', x['gene_id']),
('_dummy', ['0'])],
name="Fold change")
retval.append(VolcanoPlot(fcds, pds, 'gene_id'))
return retval
def show_gui(self, x, categories):
dialog = TTestOptionsDialog([x, categories], self.options)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
dialog.set_output()
return dialog.get_options()
else:
return options
class SetICFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'set-ic', 'Set IC')
def run(self, ds):
if 'go-terms' in ds.get_dim_name():
main.workflow.current_ic = ds
else:
logger.log('warning', 'Cannot use this dataset as IC on the go-terms dimension')
return
class PlotDagFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'go-dag', 'Build DAG')
def run(self, selection):
g = self.get_network(list(selection['go-terms']))
ds = dataset.GraphDataset(networkx.adj_matrix(g),
[('go-terms', g.nodes()), ('_go-terms', g.nodes())],
name="DAG")
return [ThresholdDagPlot(g)]
def get_network(self, terms, subtree='bp'):
"""Returns a DAG connecting the given terms by including their parents
up to the level needed to connect them. The subtree parameter is one of
mf - molecular function
bp - biological process
cc - cellular component"""
rpy.r.library("GOstats")
if subtree == 'mf':
subtree_r = rpy.r.GOMFPARENTS
elif subtree == 'bp':
subtree_r = rpy.r.GOBPPARENTS
elif subtree == 'cc':
subtree_r = rpy.r.GOCCPARENTS
else:
raise Exception("Unknown subtree. Use mf, bp or cc.")
g = rpy.r.GOGraph(terms, subtree_r)
edges = rpy.r.edges(g)
nxgraph = networkx.DiGraph()
for child, d in edges.items():
for parent in d.keys():
nxgraph.add_edge(parent, child)
return nxgraph
class TTestOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['all_plots'] = [(ProbabilityHistogramPlot, 'Histogram', True),
(VolcanoPlot, 'Histogram', True)]
self['all_data'] = [('t-value', 't-values', True),
('p-value', 'Probabilities', True),
('categories', 'Categories', False)]
self['out_data'] = ['t-value', 'p-value']
class DistanceToSelectionOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['all_data'] = [('mindist', 'Minimum distance', True)]
class GOWeightOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['similarity_threshold'] = 0.0
self['rank_threshold'] = 0.0
class ProbabilityHistogramPlot(plots.HistogramPlot):
def __init__(self, ds):
plots.HistogramPlot.__init__(self, ds, name="Confidence", bins=50)
class VolcanoPlot(plots.ScatterPlot):
def __init__(self, fold_ds, p_ds, dim, **kw):
plots.ScatterPlot.__init__(self, fold_ds, p_ds, 'gene_id', '_dummy',
'0', '0',
name="Volcano plot",
sel_dim_2='_p', **kw)
class DagPlot(plots.Plot):
def __init__(self, graph, dim='go-terms', pos=None, nodecolor='b', nodesize=40,
with_labels=False, name='DAG Plot'):
plots.Plot.__init__(self, name)
self.nodes = graph.nodes()
self._map_ids = self.nodes
self.graph = graph
self._pos = pos
self._cmap = matplotlib.cm.summer
self._nodesize = nodesize
self._nodecolor = nodecolor
self._with_labels = with_labels
self.visible = set()
self.current_dim = dim
if not self._pos:
self._pos = self._calc_pos(graph)
self._xy = asarray([self._pos[node] for node in self.nodes])
self.xaxis_data = self._xy[:,0]
self.yaxis_data = self._xy[:,1]
# Initial draw
self.default_props = {'nodesize' : 50,
'nodecolor' : 'blue',
'edge_color' : 'gray',
'edge_color_selected' : 'red'}
self.node_collection = None
self.edge_collection = None
self.node_labels = None
lw = zeros(self.xaxis_data.shape)
self.node_collection = self.axes.scatter(self.xaxis_data, self.yaxis_data,
s=self._nodesize,
c=self._nodecolor,
linewidth=lw,
zorder=3)
self._mappable = self.node_collection
self._mappable.set_cmap(self._cmap)
# selected nodes is a transparent graph that adjust node-edge visibility
# according to the current selection needed to get get the selected
# nodes 'on top' as zorder may not be defined individually
self.selected_nodes = self.axes.scatter(self.xaxis_data,
self.yaxis_data,
s=self._nodesize,
c=self._nodecolor,
edgecolor='r',
linewidth=lw,
zorder=4,
alpha=0)
edge_color = self.default_props['edge_color']
self.edge_collection = networkx.draw_networkx_edges(self.graph,
self._pos,
ax=self.axes,
edge_color=edge_color)
# edge color rgba-arrays
self._edge_color_rgba = matlib.repmat(plots.ColorConverter().to_rgba(edge_color),
self.graph.number_of_edges(),1)
self._edge_color_selected = plots.ColorConverter().to_rgba(self.default_props['edge_color_selected'])
if self._with_labels:
self.node_labels = networkx.draw_networkx_labels(self.graph,
self._pos,
ax=self.axes)
# remove axes, frame and grid
self.axes.set_xticks([])
self.axes.set_yticks([])
self.axes.grid(False)
self.axes.set_frame_on(False)
self.fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
def _calc_pos(self, graph):
"""Calculates position for graph nodes using 'dot' layout."""
gv_graph = networkx.DiGraph()
for start, end in graph.edges():
gv_graph.add_edge(start.replace('GO:', ''), end.replace('GO:', ''))
pos_gv = networkx.pygraphviz_layout(gv_graph, prog="dot")
pos = {}
for k, v in pos_gv.items():
if k != "all":
pos["GO:%s" % k] = v
else:
pos[k] = v
return pos
def points_in_rect(self, x1, y1, x2, y2, key):
ydata = self.yaxis_data
xdata = self.xaxis_data
# find indices of selected area
if x1>x2:
x1, x2 = x2, x1
if y1>y2:
y1, y2 = y2, y1
assert x1<=x2
assert y1<=y2
index = nonzero((xdata>x1) & (xdata<x2) & (ydata>y1) & (ydata<y2))[0]
if getattr(main.workflow, 'current_ic', None) != None:
ids = self.visible.intersection([self.nodes[i] for i in index])
else:
ids = set([self.nodes[i] for i in index])
return ids
def rectangle_select_callback(self, x1, y1, x2, y2, key):
ids = self.points_in_rect(x1, y1, x2, y2, key)
ids = self.update_selection(ids, key)
self.selection_listener(self.current_dim, ids)
def lasso_select_callback(self, verts, key=None):
xys = c_[self.xaxis_data[:,newaxis], self.yaxis_data[:,newaxis]]
index = nonzero(points_inside_poly(xys, verts))[0]
ids = [self.nodes[i] for i in index]
ids = self.update_selection(ids, key)
self.selection_listener(self.current_dim, ids)
def set_current_selection(self, selection):
linewidth = zeros(self.xaxis_data.shape)
edge_color_rgba = self._edge_color_rgba.copy()
index = [i for i in range(len(self.nodes)) if self.nodes[i] in selection[self.current_dim]]
if len(index) > 0:
linewidth[index] = 2
idents = selection[self.current_dim]
edge_index = [i for i,edge in enumerate(self.graph.edges()) if (edge[0] in idents and edge[1] in idents)]
if len(edge_index)>0:
for i in edge_index:
edge_color_rgba[i,:] = self._edge_color_selected
self._A = None
self.edge_collection._colors = edge_color_rgba
self.selected_nodes.set_linewidth(linewidth)
self.canvas.draw()
def is_mappable_with(self, obj):
"""Returns True if dataset/selection is mappable with this plot.
"""
if isinstance(obj, fluents.dataset.Dataset):
if self.current_dim in obj.get_dim_name():
return True
return False
def _update_color_from_dataset(self, ds):
"""Updates the facecolors from a dataset.
"""
array = ds.asarray()
#only support for 2d-arrays:
try:
m, n = array.shape
except:
raise ValueError, "No support for more than 2 dimensions."
# is dataset a vector or matrix?
if not n==1:
# we have a category dataset
if isinstance(ds, fluents.dataset.CategoryDataset):
vec = dot(array, diag(arange(n))).sum(1)
else:
vec = array.sum(1)
else:
vec = array.ravel()
indices = ds.get_indices(self.current_dim, self.nodes)
nodes = ds.existing_identifiers(self.current_dim, self.nodes)
v = vec.take(indices, 0)
vec_min = min(vec[vec > -inf])
vec_max = max(vec[vec < inf])
v[v==inf] = vec_max
v[v==-inf] = vec_min
d = dict(zip(nodes, list(v)))
map_vec = zeros(len(self.nodes))
for i, n in enumerate(self.nodes):
map_vec[i] = d.get(n, vec_min)
# update facecolors
self.node_collection.set_array(map_vec)
self.node_collection.set_clim(vec_min, vec_max)
self.node_collection.update_scalarmappable() #sets facecolors from array
self.canvas.draw()
class ThresholdDagPlot(DagPlot, plots.PlotThresholder):
def __init__(self, graph, dim='go-terms', pos=None, nodecolor='b', nodesize=40,
with_labels=False, name='DAG Plot'):
DagPlot.__init__(self, graph, dim='go-terms', pos=None,
nodecolor='b', nodesize=40,
with_labels=False, name='DAG Plot')
plots.PlotThresholder.__init__(self, "IC")
def rectangle_select_callback(self, x1, y1, x2, y2, key):
ids = self.points_in_rect(x1, y1, x2, y2, key)
ids = self.visible.intersection(ids)
ids = self.update_selection(ids, key)
self.selection_listener(self.current_dim, ids)
def _update_color_from_dataset(self, ds):
DagPlot._update_color_from_dataset(self, ds)
self.set_threshold_dataset(ds)
a = ds.asarray()
a_max = max(a[a<inf])
a_min = min(a[a>-inf])
self._sb_min.set_range(a_min-0.1, a_max+0.1)
self._sb_min.set_value(a_min-0.1)
self._sb_max.set_range(a_min-0.1, a_max+0.1)
self._sb_max.set_value(a_max+0.1)

View File

@ -1,445 +0,0 @@
import sys,os
import webbrowser
from fluents import logger, plots,workflow,dataset,main
from fluents.lib import blmfuncs,nx_utils,validation,engines,cx_stats,cx_utils
import gobrowser, geneontology
import scipy
import networkx as nx
class SmallTestWorkflow(workflow.Workflow):
name = 'Smokers'
ident = 'smokers'
description = 'A small test workflow for gene expression analysis.'
def __init__(self):
workflow.Workflow.__init__(self)
# DATA IMPORT
load = workflow.Stage('load', 'Data')
load.add_function(DatasetLoadFunctionSmokerSmall())
load.add_function(DatasetLoadFunctionSmokerMedium())
load.add_function(DatasetLoadFunctionSmokerFull())
load.add_function(DatasetLoadFunctionSmokerGO())
#load.add_function(DatasetLoadFunctionCYCLE())
self.add_stage(load)
# NETWORK PREPROCESSING
#net = workflow.Stage('net', 'Network integration')
#net.add_function(DiffKernelFunction())
#net.add_function(ModKernelFunction())
#net.add_function(RandDiffKernelFunction())
#self.add_stage(net)
# BLM's
model = workflow.Stage('models', 'Models')
model.add_function(blmfuncs.PCA())
model.add_function(blmfuncs.PLS())
model.add_function(blmfuncs.LPLS())
model.add_function(SAM())
#model.add_function(bioconFuncs.SAM(app))
self.add_stage(model)
query = workflow.Stage('query', 'Gene Query')
query.add_function(NCBIQuery())
query.add_function(KEGGQuery())
self.add_stage(query)
# Gene Ontology
go = workflow.Stage('go', 'Gene Ontology')
go.add_function(gobrowser.LoadGOFunction())
go.add_function(gobrowser.SetICFunction())
# go.add_function(gobrowser.GOWeightFunction())
# go.add_function(gobrowser.DistanceToSelectionFunction())
# go.add_function(gobrowser.TTestFunction())
go.add_function(gobrowser.PlotDagFunction())
go.add_function(GoEnrichment())
go.add_function(GoEnrichmentCond())
self.add_stage(go)
# EXTRA PLOTS
#plt = workflow.Stage('net', 'Network')
#plt.add_function(nx_analyser.KeggNetworkAnalyser())
#self.add_stage(plt)
logger.log('debug', 'Small test workflow is now active')
class DatasetLoadFunctionSmokerSmall(workflow.Function):
"""Loader for all ftsv files of smokers small datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_small', 'Smoker (Small)')
def run(self):
path = 'data/smokers-small/'
files = os.listdir(path)
out = []
for fname in files:
if fname.endswith('.ftsv'):
input_file = open(os.path.join(path, fname))
out.append(dataset.read_ftsv(input_file))
return out
class DatasetLoadFunctionSmokerMedium(workflow.Function):
"""Loader for all ftsv files of smokers small datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_medium', 'Smoker (Medium)')
def run(self):
path = 'data/smokers-medium/'
files = os.listdir(path)
out = []
for fname in files:
if fname.endswith('.ftsv'):
input_file = open(os.path.join(path, fname))
out.append(dataset.read_ftsv(input_file))
return out
class DatasetLoadFunctionSmokerFull(workflow.Function):
"""Loader for all ftsv files of smokers small datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_full', 'Smoker (Full)')
def run(self):
path = 'data/smokers-full/'
files = os.listdir(path)
out = []
for fname in files:
if fname.endswith('.ftsv'):
input_file = open(os.path.join(path, fname))
out.append(dataset.read_ftsv(input_file))
return out
class DatasetLoadFunctionSmokerGO(workflow.Function):
"""Loader for all ftsv files of smokers small datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_go', 'Smoker (GO)')
def run(self):
path = 'data/smokers-go/'
files = os.listdir(path)
out = []
for fname in files:
if fname.endswith('.ftsv'):
input_file = open(os.path.join(path, fname))
out.append(dataset.read_ftsv(input_file))
return out
class DatasetLoadFunctionCYCLE(workflow.Function):
"""Loader for pickled CYCLE datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_data', 'Cycle')
def run(self):
filename='fluents/data/CYCLE'
if filename:
return dataset.from_file(filename)
##### WORKFLOW SPECIFIC FUNCTIONS ######
class SAM(workflow.Function):
def __init__(self, id='sam', name='SAM'):
workflow.Function.__init__(self, id, name)
def run(self, x, y):
n_iter = 50 #B
alpha = 0.01 #cut off on qvals
###############
# Main function call
# setup prelimenaries
import rpy
rpy.r.library("siggenes")
rpy.r.library("multtest")
cl = scipy.dot(y.asarray(), scipy.diag(scipy.arange(y.shape[1]))).sum(1)
data = x.asarray().T
sam = rpy.r.sam(data, cl=cl, B=n_iter, var_equal=False,med=False,s0=scipy.nan,rand=scipy.nan)
qvals = scipy.asarray(rpy.r.slot(sam, "p.value"))
pvals = scipy.asarray(rpy.r.slot(sam, "q.value"))
sam_index = (qvals<alpha).nonzero()[0]
# Update selection object
dim_name = x.get_dim_name(1)
sam_selection = x.get_identifiers(dim_name, indices=sam_index)
main.project.set_selection(dim_name, sam_selection)
sel = dataset.Selection('SAM selection')
sel.select(dim_name, sam_selection)
logger.log('notice','Number of significant varibles (SAM): %s' %len(sam_selection))
# ## OUTPUT ###
xcolname = x.get_dim_name(1) # genes
x_col_ids = [xcolname, x.get_identifiers(xcolname, sorted=True)]
sing_id = ['_john', ['0']] #singleton
D_qvals = dataset.Dataset(qvals, (x_col_ids, sing_id), name='q_vals')
D_pvals = dataset.Dataset(pvals, (x_col_ids, sing_id), name='p_vals')
# plots
s_indx = qvals.flatten().argsort()
s_ids = [x_col_ids[0],[x_col_ids[1][i] for i in s_indx]]
xindex = scipy.arange(len(qvals))
qvals_s = qvals.take(s_indx)
D_qs = dataset.Dataset(qvals_s, (s_ids, sing_id), name="sorted qvals")
Dind = dataset.Dataset(xindex, (s_ids, sing_id), name="dum")
st = plots.ScatterPlot(D_qs, Dind, 'gene_ids', '_john', '0', '0', s=10, name='SAM qvals')
return [D_qvals, D_pvals, D_qs, st, sel]
class DiffKernelFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'diffkernel', 'Diffusion')
def run(self, x, a):
"""x is gene expression data, a is the network.
"""
#sanity check:
g = a.asnetworkx()
genes = x.get_identifiers(x.get_dim_name(1), sorted=True)
W = nx.adj_matrix(g, nodelist=genes)
X = x.asarray()
Xc, mn_x = cx_utils.mat_center(X, ret_mn=True)
out = []
alpha=1.0
beta = 1.0
K = nx_utils.K_diffusion(W, alpha=alpha, beta=beta,normalised=True)
Xp = scipy.dot(Xc, K) + mn_x
# dataset
row_ids = (x.get_dim_name(0),
x.get_identifiers(x.get_dim_name(0),
sorted=True))
col_ids = (x.get_dim_name(1),
x.get_identifiers(x.get_dim_name(1),
sorted=True))
xout = dataset.Dataset(Xp,
(row_ids, col_ids),
name=x.get_name()+'_diff'+str(alpha))
out.append(xout)
return out
class RandDiffKernelFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'diffkernel', 'Rand. Diff.')
def run(self, x, a):
"""x is gene expression data, a is the network.
"""
#sanity check:
g = a.asnetworkx()
genes = x.get_identifiers(x.get_dim_name(1))
# randomise nodelist
genes = [genes[i] for i in cx_utils.randperm(x.shape[1])]
W = nx.adj_matrix(g, nodelist=genes)
X = x.asarray()
Xc, mn_x = cx_utils.mat_center(X, ret_mn=True)
out = []
alpha=1.
beta = 1.0
K = nx_utils.K_diffusion(W, alpha=alpha, beta=beta,normalised=True)
Xp = scipy.dot(Xc, K) + mn_x
# dataset
row_ids = (x.get_dim_name(0),
x.get_identifiers(x.get_dim_name(0),
sorted=True))
col_ids = (x.get_dim_name(1),
x.get_identifiers(x.get_dim_name(1),
sorted=True))
xout = dataset.Dataset(Xp,
(row_ids, col_ids),
name=x.get_name()+'_diff'+str(alpha))
out.append(xout)
return out
class ModKernelFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'mokernel', 'Modularity')
def run(self,x,a):
X = x.asarray()
g = a.asnetworkx()
genes = x.get_identifiers(x.get_dim_name(1), sorted=True)
W = nx.adj_matrix(g, nodelist=genes)
out=[]
alpha=.2
Xc,mn_x = cx_utils.mat_center(X, ret_mn=True)
K = nx_utils.K_modularity(W, alpha=alpha)
Xp = scipy.dot(Xc, K)
Xp = Xp + mn_x
# dataset
row_ids = (x.get_dim_name(0),
x.get_identifiers(x.get_dim_name(0),
sorted=True))
col_ids = (x.get_dim_name(1),
x.get_identifiers(x.get_dim_name(1),
sorted=True))
xout = dataset.Dataset(Xp,
(row_ids,col_ids),
name=x.get_name()+'_mod'+str(alpha))
out.append(xout)
return out
class NCBIQuery(workflow.Function):
def __init__(self, gene_id_name='gene_ids'):
self._gene_id_name = gene_id_name
workflow.Function.__init__(self, 'query', 'NCBI')
def run(self):
selection = main.project.get_selection()
if not selection.has_key(self._gene_id_name):
logger.log("notice", "Expected gene ids: %s, but got: %s" %(self._gene_id_name, selection.keys()))
return None
if len(selection[self._gene_id_name])==0:
logger.log("notice", "No selected genes to query")
return None
base = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?'
options = {r'&db=' : 'gene',
r'&cmd=' : 'retrieve',
r'&dopt=' : 'full_report'}
gene_str = ''.join([gene + "+" for gene in selection[self._gene_id_name]])
options[r'&list_uids='] = gene_str[:-1]
opt_str = ''.join([key+value for key,value in options.items()])
web_str = base + opt_str
webbrowser.open(web_str)
class KEGGQuery(workflow.Function):
def __init__(self, org='hsa', gene_id_name='gene_ids'):
self._org=org
self._gene_id_name = gene_id_name
workflow.Function.__init__(self, 'query', 'KEGG')
def run(self, selection):
if not selection.has_key(self._gene_id_name):
logger.log("notice", "Expected gene ids: %s, but got. %s" %(self._gene_id_name, selection.keys()))
return None
if len(selection[self._gene_id_name])==0:
logger.log("notice", "No selected genes to query")
return None
base = r'http://www.genome.jp/dbget-bin/www_bget?'
gene_str = ''.join([gene + "+" for gene in selection[self._gene_id_name]])
gene_str = gene_str[:-1]
gene_str = self._org + "+" + gene_str
web_str = base + gene_str
webbrowser.open(web_str)
class GoEnrichment(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'goenrich', 'Go Enrichment')
def run(self, data):
import rpy
rpy.r.library("GOstats")
# Get universe
# Here, we are using a defined dataset to represent the universe
if not 'gene_ids' in data:
logger.log('notice', 'No dimension called [gene_ids] in dataset: %s', data.get_name())
return
universe = list(data.get_identifiers('gene_ids'))
logger.log('notice', 'Universe consists of %s gene ids from %s' %(len(universe), data.get_name()))
# Get current selection and validate
curr_sel = main.project.get_selection()
selected_genes = list(curr_sel['gene_ids'])
if len(selected_genes)==0:
logger.log('notice', 'This function needs a current selection!')
return
# Hypergeometric parameter object
pval_cutoff = 0.9999
cond = False
test_direction = 'over'
params = rpy.r.new("GOHyperGParams",
geneIds=selected_genes,
annotation="hgu133a",
ontology="BP",
pvalueCutoff=pval_cutoff,
conditional=cond,
testDirection=test_direction
)
# run test
# result.keys(): ['Count', 'Term', 'OddsRatio', 'Pvalue', 'ExpCount', 'GOBPID', 'Size']
result = rpy.r.summary(rpy.r.hyperGTest(params))
# dataset
terms = result['GOBPID']
pvals = scipy.log(scipy.asarray(result['Pvalue']))
row_ids = ('go-terms', terms)
col_ids = ('_john', ['_doe'])
xout = dataset.Dataset(pvals,
(row_ids, col_ids),
name='P values (enrichment)')
return [xout]
class GoEnrichmentCond(workflow.Function):
""" Enrichment conditioned on dag structure."""
def __init__(self):
workflow.Function.__init__(self, 'goenrich', 'Go Cond. Enrich.')
def run(self, data):
import rpy
rpy.r.library("GOstats")
# Get universe
# Here, we are using a defined dataset to represent the universe
if not 'gene_ids' in data:
logger.log('notice', 'No dimension called [gene_ids] in dataset: %s', data.get_name())
return
universe = list(data.get_identifiers('gene_ids'))
logger.log('notice', 'Universe consists of %s gene ids from %s' %(len(universe), data.get_name()))
# Get current selection and validate
curr_sel = main.project.get_selection()
selected_genes = list(curr_sel['gene_ids'])
if len(selected_genes)==0:
logger.log('notice', 'This function needs a current selection!')
return
# Hypergeometric parameter object
pval_cutoff = 0.9999
cond = True
test_direction = 'over'
params = rpy.r.new("GOHyperGParams",
geneIds=selected_genes,
annotation="hgu133a",
ontology="BP",
pvalueCutoff=pval_cutoff,
conditional=cond,
testDirection=test_direction
)
# run test
# result.keys(): ['Count', 'Term', 'OddsRatio', 'Pvalue', 'ExpCount', 'GOBPID', 'Size']
result = rpy.r.summary(rpy.r.hyperGTest(params))
# dataset
terms = result['GOBPID']
pvals = scipy.log(scipy.asarray(result['Pvalue']))
row_ids = ('go-terms', terms)
col_ids = ('_john', ['_doe'])
xout = dataset.Dataset(pvals,
(row_ids, col_ids),
name='P values (enrichment)')
return [xout]

View File

@ -17,6 +17,7 @@ class TestWorkflow (workflow.Workflow):
load = workflow.Stage('load', 'Test Data') load = workflow.Stage('load', 'Test Data')
load.add_task(TestDataTask) load.add_task(TestDataTask)
load.add_task(TestPlot)
self.add_stage(load) self.add_stage(load)
@ -48,3 +49,17 @@ class TestDataTask(workflow.Task):
self.datasets = [p] self.datasets = [p]
return [X, ds, p, ds_plot, ds_scatter, p2, cds, lp, vp] return [X, ds, p, ds_plot, ds_scatter, p2, cds, lp, vp]
class TestPlot(workflow.Task):
name = "Test plot data"
def __init__(self, input):
workflow.Task.__init__(self, input)
def run(self):
logger.log('notice', 'Injecting foo test data')
x = randn(500,15)
X = dataset.Dataset(x)
p = plots.ScatterPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='scatter')
return [p]