Compare commits
6 Commits
Author | SHA1 | Date |
---|---|---|
Arnar Flatberg | 7ef01d37f0 | |
Arnar Flatberg | 5a1e225a08 | |
Arnar Flatberg | 9a296f59f6 | |
Arnar Flatberg | f8e0cfdb8b | |
Arnar Flatberg | 2fdd9aaf77 | |
Einar Ryeng | bfb039328c |
|
@ -22,6 +22,7 @@ DATADIR = os.path.dirname(sys.modules['fluents'].__file__)
|
||||||
ICONDIR = os.path.join(DATADIR,"..","icons")
|
ICONDIR = os.path.join(DATADIR,"..","icons")
|
||||||
GLADEFILENAME = os.path.join(DATADIR, 'fluents.glade')
|
GLADEFILENAME = os.path.join(DATADIR, 'fluents.glade')
|
||||||
|
|
||||||
|
|
||||||
class IconFactory:
|
class IconFactory:
|
||||||
"""Factory for icons that ensures that each icon is only loaded once."""
|
"""Factory for icons that ensures that each icon is only loaded once."""
|
||||||
|
|
||||||
|
@ -43,6 +44,7 @@ class IconFactory:
|
||||||
|
|
||||||
icon_factory = IconFactory(ICONDIR)
|
icon_factory = IconFactory(ICONDIR)
|
||||||
|
|
||||||
|
|
||||||
class TableSizeSelection(gtk.Window):
|
class TableSizeSelection(gtk.Window):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
|
@ -187,7 +187,7 @@ class LineViewPlot(Plot):
|
||||||
if minor_axis==0:
|
if minor_axis==0:
|
||||||
self._mn_data = self._data.mean(minor_axis)
|
self._mn_data = self._data.mean(minor_axis)
|
||||||
else:
|
else:
|
||||||
self._mn_data = self._data.mean(minor_axis)[:,newaxis]
|
self._mn_data = self._data.mean(minor_axis)[:,scipy.newaxis]
|
||||||
self._data = self._data - self._mn_data
|
self._data = self._data - self._mn_data
|
||||||
self.data_is_centered = True
|
self.data_is_centered = True
|
||||||
#initial line collection
|
#initial line collection
|
||||||
|
|
|
@ -60,9 +60,7 @@ class Workflow:
|
||||||
description = "Workflow Description"
|
description = "Workflow Description"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
print "Setting stages"
|
|
||||||
self.stages = []
|
self.stages = []
|
||||||
self.stages_by_id = {}
|
|
||||||
|
|
||||||
def get_data_file_name(self, filename):
|
def get_data_file_name(self, filename):
|
||||||
"""Checks if a file with the given name exists in the data directory.
|
"""Checks if a file with the given name exists in the data directory.
|
||||||
|
@ -79,67 +77,68 @@ class Workflow:
|
||||||
|
|
||||||
def add_stage(self, stage):
|
def add_stage(self, stage):
|
||||||
self.stages.append(stage)
|
self.stages.append(stage)
|
||||||
self.stages_by_id[stage.id] = stage
|
#self.stages_by_id[stage.id] = stage
|
||||||
|
|
||||||
def print_tree(self):
|
def print_tree(self):
|
||||||
print self.name
|
print self.name
|
||||||
for stage in self.stages:
|
for stage in self.stages:
|
||||||
print ' %s' % stage.name
|
print ' %s' % stage.name
|
||||||
for fun in stage.functions:
|
for task in stage.tasks:
|
||||||
print ' %s' % fun.name
|
print ' %s' % task.name
|
||||||
|
|
||||||
# def add_project(self,project):
|
|
||||||
# if project == None:
|
|
||||||
# logger.log('notice','Proejct is empty')
|
|
||||||
# logger.log('notice','Project added in : %s' %self.name)
|
|
||||||
# self.project = project
|
|
||||||
|
|
||||||
|
|
||||||
class EmptyWorkflow(Workflow):
|
class EmptyWorkflow(Workflow):
|
||||||
name = 'Empty Workflow'
|
name = 'Empty Workflow'
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
print "initing empty workflow"
|
logger.log("debug", "initing empty workflow")
|
||||||
Workflow.__init__(self)
|
Workflow.__init__(self)
|
||||||
|
|
||||||
|
|
||||||
class Stage:
|
class Stage:
|
||||||
"""A stage is a part of the data analysis process.
|
"""A stage is a part of the data analysis process.
|
||||||
|
|
||||||
Each stage contains a set of functions that can be used to
|
Each stage contains a set of tasks that can be used to
|
||||||
accomplish the task. A typical early stage is 'preprocessing', which
|
accomplish the task. A typical early stage is 'preprocessing', which
|
||||||
can be done in several ways, each represented by a function.
|
can be done in several ways, each represented by a task.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, id, name):
|
def __init__(self, id, name):
|
||||||
self.id = id
|
self.id = id
|
||||||
self.name = name
|
self.name = name
|
||||||
self.functions = []
|
self.tasks = []
|
||||||
self.functions_by_id = {}
|
|
||||||
|
|
||||||
def add_function(self, fun):
|
def add_task(self, task):
|
||||||
self.functions.append(fun)
|
self.tasks.append(task)
|
||||||
self.functions_by_id[fun.id] = fun
|
|
||||||
|
|
||||||
|
|
||||||
class Function:
|
class Task:
|
||||||
"""A Function object encapsulates a function on a data set.
|
"""A Task object encapsulates a task on a data set.
|
||||||
|
|
||||||
Each Function instance encapsulates some function that can be applied
|
Each Task instance encapsulates some task that can be applied
|
||||||
to one or more types of data.
|
to one or more types of data.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, id, name):
|
name = ""
|
||||||
self.id = id
|
|
||||||
self.name = name
|
def __init__(self, input):
|
||||||
|
self.input = input
|
||||||
|
self.options = Options()
|
||||||
|
|
||||||
|
self.datasets = {}
|
||||||
|
self.arrays = {}
|
||||||
|
self.plots = {}
|
||||||
|
|
||||||
# just return a Validation object
|
# just return a Validation object
|
||||||
def validate_input(input):
|
def validate_input(input):
|
||||||
return Validation(True,"Validation Not Implemented")
|
return Validation(True, "Validation Not Implemented")
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def show_options_gui(self, editable=False):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Validation:
|
class Validation:
|
||||||
def __init__(self,result, reason):
|
def __init__(self,result, reason):
|
||||||
|
@ -148,7 +147,6 @@ class Validation:
|
||||||
|
|
||||||
|
|
||||||
class WorkflowView (gtk.VBox):
|
class WorkflowView (gtk.VBox):
|
||||||
|
|
||||||
def __init__(self, wf):
|
def __init__(self, wf):
|
||||||
gtk.VBox.__init__(self)
|
gtk.VBox.__init__(self)
|
||||||
self.workflow = wf
|
self.workflow = wf
|
||||||
|
@ -166,12 +164,11 @@ class WorkflowView (gtk.VBox):
|
||||||
btn_box.show()
|
btn_box.show()
|
||||||
exp.add(btn_align)
|
exp.add(btn_align)
|
||||||
|
|
||||||
# Add functions in each stage
|
# Add tasks in each stage
|
||||||
for fun in stage.functions:
|
for task in stage.tasks:
|
||||||
btn = gtk.Button(fun.name)
|
btn = gtk.Button(task.name)
|
||||||
btn.connect('clicked',
|
btn.connect('clicked',
|
||||||
lambda button, f=fun : self.run_function(f))
|
lambda button, t=task : self.run_task(t))
|
||||||
|
|
||||||
btn_box.add(btn)
|
btn_box.add(btn)
|
||||||
btn.show()
|
btn.show()
|
||||||
|
|
||||||
|
@ -188,53 +185,24 @@ class WorkflowView (gtk.VBox):
|
||||||
self.remove_workflow()
|
self.remove_workflow()
|
||||||
self.setup_workflow(workflow)
|
self.setup_workflow(workflow)
|
||||||
|
|
||||||
def run_function(self, function):
|
def run_task(self, task_class):
|
||||||
logger.log('debug', 'Starting function: %s' % function.name)
|
logger.log('debug', 'Creating task: %s' % task_class.name)
|
||||||
parent_data = main.project.current_data
|
parent_data = main.project.current_data
|
||||||
|
task = task_class(input=parent_data)
|
||||||
validation = function.validate_input()
|
validation = task.validate_input()
|
||||||
|
|
||||||
if not validation.succeeded:
|
if not validation.succeeded:
|
||||||
logger.log('warning','Invalid Inputdata: ' + str(reason))
|
logger.log('warning','Invalid Inputdata: ' + str(reason))
|
||||||
return
|
return
|
||||||
|
|
||||||
args, varargs, varkw, defaults = inspect.getargspec(function.run)
|
task_result = task.run()
|
||||||
|
|
||||||
# first argument is 'self' and second should be the selection
|
if task_result != None:
|
||||||
# and we don't care about those...
|
main.project.add_data(parent_data, task_result, task.name)
|
||||||
args.remove('self')
|
|
||||||
if "selection" in args:
|
|
||||||
pass_selection = True
|
|
||||||
args.remove('selection')
|
|
||||||
else:
|
else:
|
||||||
pass_selection = False
|
logger.log('debug', 'Task gave no output: %s' % task.name)
|
||||||
|
|
||||||
if varargs and len(parent_data) < len(args):
|
logger.log('debug', 'Task ended: %s' % task.name)
|
||||||
logger.log('warning', "Function requires minimum %d datasets selected." % len(args))
|
|
||||||
return
|
|
||||||
elif not varargs and args and len(args) != len(parent_data):
|
|
||||||
# functions requiring datasets have to have the right number
|
|
||||||
logger.log('warning', "Function requires %d datasets, but only %d selected." % (len(args), len(parent_data)))
|
|
||||||
return
|
|
||||||
|
|
||||||
if not args:
|
|
||||||
# we allow functions requiring no data to be run even if a
|
|
||||||
# dataset is is selected
|
|
||||||
data = []
|
|
||||||
else:
|
|
||||||
data = parent_data
|
|
||||||
|
|
||||||
if pass_selection:
|
|
||||||
# if the function has a 'selection' argument, we pass in
|
|
||||||
# the selection
|
|
||||||
new_data = function.run(selection=main.project.get_selection(), *data)
|
|
||||||
else:
|
|
||||||
new_data = function.run(*data)
|
|
||||||
|
|
||||||
if new_data != None:
|
|
||||||
main.project.add_data(parent_data, new_data, function.name)
|
|
||||||
|
|
||||||
logger.log('debug', 'Function ended: %s' % function.name)
|
|
||||||
|
|
||||||
|
|
||||||
class Options(dict):
|
class Options(dict):
|
||||||
|
@ -259,8 +227,8 @@ class Options(dict):
|
||||||
class OptionsDialog(gtk.Dialog):
|
class OptionsDialog(gtk.Dialog):
|
||||||
"""The basic input/output dialog box.
|
"""The basic input/output dialog box.
|
||||||
|
|
||||||
This defines the first page of the function options-gui.
|
This defines the first page of the task options-gui.
|
||||||
Any function that invokes a option-gui will inherit from this class.
|
Any task that invokes a option-gui will inherit from this class.
|
||||||
"""
|
"""
|
||||||
def __init__(self, data, options, input_names=['X','Y']):
|
def __init__(self, data, options, input_names=['X','Y']):
|
||||||
gtk.Dialog.__init__(self, 'Input-Output dialog',
|
gtk.Dialog.__init__(self, 'Input-Output dialog',
|
||||||
|
@ -452,12 +420,18 @@ class WorkflowMenu (gtk.Menu):
|
||||||
stage_menu = gtk.Menu()
|
stage_menu = gtk.Menu()
|
||||||
stage_menu_item.set_submenu(stage_menu)
|
stage_menu_item.set_submenu(stage_menu)
|
||||||
|
|
||||||
for fun in stage.functions:
|
for task in stage.tasks:
|
||||||
stage_menu.append(self._create_function_item(fun))
|
stage_menu.append(self._create_task_item(task))
|
||||||
return stage_menu_item
|
return stage_menu_item
|
||||||
|
|
||||||
def _create_function_item(self, function):
|
def _create_task_item(self, task):
|
||||||
menuitem = gtk.MenuItem(function.name)
|
menuitem = gtk.MenuItem(task.name)
|
||||||
menuitem.show()
|
menuitem.show()
|
||||||
return menuitem
|
return menuitem
|
||||||
|
|
||||||
|
|
||||||
|
class Options():
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,261 +0,0 @@
|
||||||
import gobject
|
|
||||||
import gtk
|
|
||||||
import networkx
|
|
||||||
import re
|
|
||||||
|
|
||||||
class GOTerm:
|
|
||||||
def __init__(self):
|
|
||||||
self.d = {}
|
|
||||||
|
|
||||||
## Create empty lists for all list values
|
|
||||||
for l in GOTerm.lists:
|
|
||||||
self.d[l] = []
|
|
||||||
for s in GOTerm.scalars:
|
|
||||||
self.d[l] = None
|
|
||||||
|
|
||||||
def __getitem__(self, key):
|
|
||||||
if self.d.has_key(key):
|
|
||||||
return self.d[key]
|
|
||||||
return None
|
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
|
||||||
self.d[key] = value
|
|
||||||
|
|
||||||
GOTerm.lists = ['is_a', 'alt_id', 'exact_synonym', 'broad_synonym',
|
|
||||||
'narrow_synonym', 'related_synonym', 'relationship',
|
|
||||||
'subset', 'synonym', 'xref_analog', 'xref_unknown']
|
|
||||||
GOTerm.scalars = ['name', 'id', 'namespace', 'def', 'is_transitive',
|
|
||||||
'comment', 'is_obsolete']
|
|
||||||
|
|
||||||
|
|
||||||
class GeneOntology(networkx.XDiGraph):
|
|
||||||
def __init__(self):
|
|
||||||
networkx.XDiGraph.__init__(self)
|
|
||||||
self.by_id = {}
|
|
||||||
self.undirected = None
|
|
||||||
|
|
||||||
def add_term(self, term):
|
|
||||||
self.add_node(term)
|
|
||||||
self.by_id[term['id']] = term
|
|
||||||
|
|
||||||
def link_ontology(self, linkattr, obsolete=False):
|
|
||||||
for node in self.nodes():
|
|
||||||
for link in node[linkattr]:
|
|
||||||
self.add_edge(self.by_id[link], node, linkattr)
|
|
||||||
|
|
||||||
def link_relationships(self):
|
|
||||||
for node in self.nodes():
|
|
||||||
for link in node['relationship']:
|
|
||||||
link_type, term = link.split(' ')
|
|
||||||
self.add_edge(self.by_id[term.strip()], node, link_type.strip())
|
|
||||||
|
|
||||||
def get_bp(self):
|
|
||||||
"""Returns the root node of the biological_process tree"""
|
|
||||||
return self.by_id['GO:0008150']
|
|
||||||
|
|
||||||
def get_cc(self):
|
|
||||||
"""Returns the root node of the cellular_component tree"""
|
|
||||||
return self.by_id['id: GO:0005575']
|
|
||||||
|
|
||||||
def get_mf(self):
|
|
||||||
"""Returns the root node of the molecular_function tree"""
|
|
||||||
return self.by_id['GO:0003674']
|
|
||||||
|
|
||||||
def _subsumer(self, t1, t2, heap):
|
|
||||||
while heap != []:
|
|
||||||
t = heap[0]
|
|
||||||
heap = heap[1:]
|
|
||||||
|
|
||||||
p1 = networkx.shortest_path(self, t, t1)
|
|
||||||
p2 = networkx.shortest_path(self, t, t2)
|
|
||||||
if p1 and p2:
|
|
||||||
return t
|
|
||||||
|
|
||||||
heap += self.in_neighbors(t)
|
|
||||||
return None
|
|
||||||
|
|
||||||
def subsumer(self, t1, t2):
|
|
||||||
if t1 == t2:
|
|
||||||
return t1
|
|
||||||
if networkx.shortest_path(self, t1, t2):
|
|
||||||
return t1
|
|
||||||
elif networkx.shortest_path(self, t2, t1):
|
|
||||||
return t2
|
|
||||||
return self._subsumer(t1, t2, self.in_neighbors(t1))
|
|
||||||
|
|
||||||
def old_subsumer(self, t1, t2):
|
|
||||||
if t1 == t2:
|
|
||||||
return t1
|
|
||||||
|
|
||||||
if self.undirected == None:
|
|
||||||
self.undirected = self.to_undirected()
|
|
||||||
|
|
||||||
path = networkx.shortest_path(self.undirected, t1, t2)
|
|
||||||
if not path:
|
|
||||||
print "Woah, path not found."
|
|
||||||
return None
|
|
||||||
|
|
||||||
if path == [1]:
|
|
||||||
print "This shouldn't happen"
|
|
||||||
return t1
|
|
||||||
|
|
||||||
for t in path:
|
|
||||||
if networkx.shortest_path(self, t, t1) and \
|
|
||||||
networkx.shortest_path(self, t, t2):
|
|
||||||
return t
|
|
||||||
|
|
||||||
print "GeneOntology.subsumer: should not reach this point"
|
|
||||||
print "path is now: %s" % path
|
|
||||||
print "ids are: %s " % [x['id'] for x in path]
|
|
||||||
|
|
||||||
def _split_obo_line(line):
|
|
||||||
"""Splits a line from an obo file in its three constituent parts.
|
|
||||||
|
|
||||||
@param line: A string containing a line from an obo file. The line must
|
|
||||||
either be a section definition field with a section name in brackets
|
|
||||||
or a line of the form: keyword: value ! comment
|
|
||||||
@returns: A tuple of four strings conaining the section, key, value and
|
|
||||||
description defined in the string. If the section part is None, all
|
|
||||||
the other fields are strings and if section is a string, all the other
|
|
||||||
fields are None.
|
|
||||||
"""
|
|
||||||
attrib_re = re.compile(r'^\s*([\w-]+)\s*:\s*([^!]*)!?(.*$)')
|
|
||||||
s = line.strip()
|
|
||||||
if s == "":
|
|
||||||
return (None, None, None, None)
|
|
||||||
elif s.startswith('[') and s.endswith(']'):
|
|
||||||
return (s[1:-1], None, None, None)
|
|
||||||
else:
|
|
||||||
m = attrib_re.match(s)
|
|
||||||
if m:
|
|
||||||
key, value, comment = [x.strip() for x in m.groups()]
|
|
||||||
return (None, key, value, comment)
|
|
||||||
else:
|
|
||||||
raise Exception('Unparsable line: %s' % line)
|
|
||||||
|
|
||||||
|
|
||||||
def _add_term_attribute(term, key, value, comment):
|
|
||||||
if key in GOTerm.scalars:
|
|
||||||
term[key] = value
|
|
||||||
elif key in GOTerm.lists:
|
|
||||||
term[key].append(value)
|
|
||||||
else:
|
|
||||||
raise Exception('Unknown key %s: %s' % (key, value))
|
|
||||||
|
|
||||||
|
|
||||||
def read_gene_ontology(fd):
|
|
||||||
"""Reads the Gene Ontology from an obo file.
|
|
||||||
|
|
||||||
@param fd: An open file object to the obo file.
|
|
||||||
"""
|
|
||||||
go = GeneOntology()
|
|
||||||
term = None
|
|
||||||
section = None
|
|
||||||
|
|
||||||
line = fd.readline()
|
|
||||||
while line:
|
|
||||||
s, k, v, c = _split_obo_line(line)
|
|
||||||
if s == None and k == None:
|
|
||||||
pass
|
|
||||||
elif s:
|
|
||||||
if term:
|
|
||||||
go.add_term(term)
|
|
||||||
|
|
||||||
section = s
|
|
||||||
if s == 'Term':
|
|
||||||
term = GOTerm()
|
|
||||||
else:
|
|
||||||
term = None
|
|
||||||
print "ignoring: %s" %s
|
|
||||||
else:
|
|
||||||
if term:
|
|
||||||
_add_term_attribute(term, k, v, c)
|
|
||||||
line = fd.readline()
|
|
||||||
|
|
||||||
if term:
|
|
||||||
go.add_term(term)
|
|
||||||
|
|
||||||
return go
|
|
||||||
|
|
||||||
|
|
||||||
def pickle_gene_ontology(go, fn):
|
|
||||||
fd = open(fn, 'wb')
|
|
||||||
pickle.dump(go, fd)
|
|
||||||
fd.close()
|
|
||||||
|
|
||||||
def load_pickled_ontology(fn):
|
|
||||||
fd = open(fn, 'rb')
|
|
||||||
go = pickle.load(fd)
|
|
||||||
fd.close()
|
|
||||||
return go
|
|
||||||
|
|
||||||
def read_default_go():
|
|
||||||
f = open("/usr/share/gene-ontology/gene_ontology.obo")
|
|
||||||
go = read_gene_ontology(f)
|
|
||||||
go.link_ontology('is_a')
|
|
||||||
go.link_relationships()
|
|
||||||
f.close()
|
|
||||||
return go
|
|
||||||
|
|
||||||
def _add_subgraphs(treestore, ontology, parent, nodes):
|
|
||||||
for n in nodes:
|
|
||||||
i = treestore.insert(parent, 0, (n['id'], n['name'], False, n))
|
|
||||||
_add_subgraphs(treestore, ontology, i, ontology.successors(n))
|
|
||||||
|
|
||||||
def get_go_treestore(ontology):
|
|
||||||
ts = gtk.TreeStore(gobject.TYPE_STRING, ## ID
|
|
||||||
gobject.TYPE_STRING, ## Name
|
|
||||||
gobject.TYPE_BOOLEAN, ## Selected
|
|
||||||
gobject.TYPE_PYOBJECT) ## Node
|
|
||||||
_add_subgraphs(ts, ontology, None, [ontology.get_bp()])
|
|
||||||
return ts
|
|
||||||
|
|
||||||
class NetworkTreeModel(gtk.GenericTreeModel):
|
|
||||||
|
|
||||||
def __init__(self, network, root):
|
|
||||||
gtk.GenericTreeModel.__init__(self)
|
|
||||||
self._network = network
|
|
||||||
self._root = root
|
|
||||||
|
|
||||||
def on_get_flags(self):
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def on_get_n_columns(self):
|
|
||||||
return 1
|
|
||||||
|
|
||||||
def on_get_column_type(self, index):
|
|
||||||
if index==0:
|
|
||||||
return gobject.TYPE_STRING
|
|
||||||
|
|
||||||
def on_get_iter(self, path):
|
|
||||||
node = self._root
|
|
||||||
for p in path[1:]:
|
|
||||||
children = self._network.predecessors(node)
|
|
||||||
node = children[p]
|
|
||||||
return node
|
|
||||||
|
|
||||||
def on_get_path(self, rowref):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_get_value(self, rowref, column):
|
|
||||||
print 'get_value'
|
|
||||||
return rowref['id']
|
|
||||||
|
|
||||||
def on_iter_next(self, rowref):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_iter_children(self, parent):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_iter_has_child(self, rowref):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_iter_n_children(self, rowref):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_iter_nth_child(self, parent, n):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_iter_parent(self, child):
|
|
||||||
pass
|
|
||||||
|
|
|
@ -1,486 +0,0 @@
|
||||||
import gtk
|
|
||||||
from fluents import dataset, logger, plots, workflow, fluents, project
|
|
||||||
from fluents.lib import blmfuncs
|
|
||||||
import geneontology
|
|
||||||
#import gostat
|
|
||||||
from scipy import array, randn, log, ones, zeros
|
|
||||||
import networkx
|
|
||||||
import re
|
|
||||||
|
|
||||||
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
|
|
||||||
('IGI', 'Inferred from genetic interaction'),
|
|
||||||
('IPI', 'Inferred from physical interaction'),
|
|
||||||
('ISS', 'Inferred from sequence or structure similarity'),
|
|
||||||
('IDA', 'Inferred from direct assay'),
|
|
||||||
('IEP', 'Inferred on expression pattern'),
|
|
||||||
('IEA', 'Inferred from electronic annotation'),
|
|
||||||
('TAS', 'Traceable author statement'),
|
|
||||||
('NAS', 'Non-traceable author statement'),
|
|
||||||
('ND', 'No biological data available'),
|
|
||||||
('RCA', 'Inferred from reviewed computational analysis'),
|
|
||||||
('IC', 'Inferred by curator')]
|
|
||||||
|
|
||||||
DISTANCE_METRICS = [('resnik', 'Resnik'),
|
|
||||||
('jiang', 'Jiang & Conrath'),
|
|
||||||
('fussimeg', 'FuSSiMeG')]
|
|
||||||
|
|
||||||
GO_DATA_DIR = '/home/einarr/data'
|
|
||||||
|
|
||||||
evidence = None
|
|
||||||
go = None
|
|
||||||
|
|
||||||
class GoTermView (gtk.Frame):
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
gtk.Frame.__init__(self)
|
|
||||||
tab = gtk.Table(2, 2, False)
|
|
||||||
self._table = tab
|
|
||||||
|
|
||||||
self._name = gtk.Label('')
|
|
||||||
self._name.set_line_wrap(True)
|
|
||||||
self._name.set_alignment(0, 0)
|
|
||||||
name_label = gtk.Label('Name:')
|
|
||||||
name_label.set_alignment(0, 0)
|
|
||||||
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
|
|
||||||
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
|
|
||||||
|
|
||||||
self._def = gtk.TextBuffer()
|
|
||||||
textview = gtk.TextView(self._def)
|
|
||||||
textview.set_wrap_mode(gtk.WRAP_WORD)
|
|
||||||
scrolled_window = gtk.ScrolledWindow()
|
|
||||||
scrolled_window.add(textview)
|
|
||||||
def_label = gtk.Label('Def:')
|
|
||||||
def_label.set_alignment(0.0, 0.0)
|
|
||||||
tab.attach(def_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
|
|
||||||
tab.attach(scrolled_window, 1, 2, 1, 2, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
|
|
||||||
|
|
||||||
self.add(tab)
|
|
||||||
self.set_go_term(None)
|
|
||||||
|
|
||||||
def set_go_term(self, term):
|
|
||||||
if term:
|
|
||||||
self.set_label(term['id'])
|
|
||||||
self._name.set_text(term['name'])
|
|
||||||
self._def.set_text(term['def'])
|
|
||||||
else:
|
|
||||||
self.set_label('GO Term')
|
|
||||||
self._name.set_text('')
|
|
||||||
self._def.set_text('')
|
|
||||||
|
|
||||||
|
|
||||||
class GeneOntologyTree (gtk.HPaned):
|
|
||||||
|
|
||||||
def __init__(self, network):
|
|
||||||
gtk.HPaned.__init__(self)
|
|
||||||
|
|
||||||
treemodel = geneontology.get_go_treestore(network)
|
|
||||||
self._treemodel = treemodel
|
|
||||||
self._tree_view = gtk.TreeView(treemodel)
|
|
||||||
|
|
||||||
self._selected_terms = set()
|
|
||||||
|
|
||||||
self._tree_view.set_fixed_height_mode(True)
|
|
||||||
|
|
||||||
# Set up context menu
|
|
||||||
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
|
|
||||||
self._tree_view.connect('popup_menu', self._popup_menu)
|
|
||||||
self._tree_view.connect('button_press_event', self._on_button_press)
|
|
||||||
|
|
||||||
renderer = gtk.CellRendererText()
|
|
||||||
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
|
|
||||||
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
|
|
||||||
go_column.set_fixed_width(200)
|
|
||||||
go_column.set_resizable(True)
|
|
||||||
self._tree_view.insert_column(go_column, 0)
|
|
||||||
|
|
||||||
renderer = gtk.CellRendererToggle()
|
|
||||||
renderer.set_property('activatable', True)
|
|
||||||
renderer.connect('toggled', self._toggle_selected)
|
|
||||||
renderer.set_active(True)
|
|
||||||
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
|
|
||||||
go_column = gtk.TreeViewColumn('T', renderer, active=2)
|
|
||||||
go_column.set_fixed_width(20)
|
|
||||||
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
|
|
||||||
go_column.set_resizable(True)
|
|
||||||
self._tree_view.insert_column(go_column, 1)
|
|
||||||
|
|
||||||
renderer = gtk.CellRendererText()
|
|
||||||
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
|
|
||||||
go_column.set_fixed_width(200)
|
|
||||||
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
|
|
||||||
go_column.set_resizable(True)
|
|
||||||
self._tree_view.insert_column(go_column, 2)
|
|
||||||
|
|
||||||
self._desc_view = GoTermView()
|
|
||||||
|
|
||||||
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
|
|
||||||
|
|
||||||
scrolled_window = gtk.ScrolledWindow()
|
|
||||||
scrolled_window.add(self._tree_view)
|
|
||||||
self.add1(scrolled_window)
|
|
||||||
self.add2(self._desc_view)
|
|
||||||
self.show_all()
|
|
||||||
|
|
||||||
def _on_cursor_changed(self, tree):
|
|
||||||
path, col = self._tree_view.get_cursor()
|
|
||||||
current = self._treemodel.get_iter(path)
|
|
||||||
term = self._treemodel.get_value(current, 3)
|
|
||||||
self._desc_view.set_go_term(term)
|
|
||||||
|
|
||||||
|
|
||||||
##
|
|
||||||
## GTK Callback functions
|
|
||||||
##
|
|
||||||
def _popup_menu(self, *rest):
|
|
||||||
self.menu.popup(None, None, None, 0, 0)
|
|
||||||
|
|
||||||
def _on_button_press(self, widget, event):
|
|
||||||
path = widget.get_path_at_pos(int(event.x), int(event.y))
|
|
||||||
iter = None
|
|
||||||
|
|
||||||
if path:
|
|
||||||
iter = self._treemodel.get_iter(path[0])
|
|
||||||
obj = self._treemodel.get_value(iter, 3)
|
|
||||||
else:
|
|
||||||
obj = None
|
|
||||||
|
|
||||||
self._context_menu.set_current_term(obj, iter)
|
|
||||||
|
|
||||||
if event.button == 3:
|
|
||||||
self._context_menu.popup(None, None, None, event.button, event.time)
|
|
||||||
|
|
||||||
def _toggle_selected(self, renderer, path):
|
|
||||||
iter = self._treemodel.get_iter(path)
|
|
||||||
|
|
||||||
selected = self._treemodel.get_value(iter, 2)
|
|
||||||
id = self._treemodel.get_value(iter, 0)
|
|
||||||
|
|
||||||
self._treemodel.set_value(iter, 2, not selected)
|
|
||||||
|
|
||||||
if selected:
|
|
||||||
self._selected_terms.remove(id)
|
|
||||||
else:
|
|
||||||
self._selected_terms.add(id)
|
|
||||||
|
|
||||||
|
|
||||||
class GoTermContextMenu (gtk.Menu):
|
|
||||||
"""Context menu for GO terms in the gene ontology browser"""
|
|
||||||
|
|
||||||
def __init__(self, treemodel, treeview):
|
|
||||||
self._treemodel = treemodel
|
|
||||||
self._treeview = treeview
|
|
||||||
self._current_term = None
|
|
||||||
self._current_iter = None
|
|
||||||
|
|
||||||
gtk.Menu.__init__(self)
|
|
||||||
|
|
||||||
# Popuplate tree
|
|
||||||
self._expand_item = i = gtk.MenuItem('Expand')
|
|
||||||
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
|
|
||||||
self.append(i)
|
|
||||||
i.show()
|
|
||||||
|
|
||||||
self._collapse_item = i = gtk.MenuItem('Collapse')
|
|
||||||
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
|
|
||||||
self.append(i)
|
|
||||||
i.show()
|
|
||||||
|
|
||||||
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
|
|
||||||
i.connect('activate', self._on_select_subtree, treemodel, treeview)
|
|
||||||
self.append(i)
|
|
||||||
i.show()
|
|
||||||
|
|
||||||
def set_current_term(self, term, it):
|
|
||||||
self._current_term = term
|
|
||||||
self._current_iter = it
|
|
||||||
|
|
||||||
def _on_expand_subtree(self, item, treemodel, treeview):
|
|
||||||
path = treemodel.get_path(self._current_iter)
|
|
||||||
treeview.expand_row(path, True)
|
|
||||||
|
|
||||||
def _on_collapse_subtree(self, item, treemodel, treeview):
|
|
||||||
treeview.collapse_row(treemodel.get_path(self._current_iter))
|
|
||||||
|
|
||||||
def _on_select_subtree(self, item, treemodel, treeview):
|
|
||||||
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
|
|
||||||
(self._current_term['id'], self._current_term['name']))
|
|
||||||
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
|
|
||||||
project.project.set_selection('go-terms', set(ids))
|
|
||||||
|
|
||||||
|
|
||||||
class GoWorkflow (workflow.Workflow):
|
|
||||||
|
|
||||||
name = 'Gene Ontology'
|
|
||||||
ident = 'go'
|
|
||||||
description = 'Gene Ontology Workflow. For tree distance measures based '\
|
|
||||||
+ 'on the GO tree.'
|
|
||||||
|
|
||||||
def __init__(self, app):
|
|
||||||
workflow.Workflow.__init__(self, app)
|
|
||||||
|
|
||||||
load = workflow.Stage('load', 'Load GO Annotations')
|
|
||||||
load.add_function(LoadGOFunction())
|
|
||||||
load.add_function(LoadAnnotationsFunction())
|
|
||||||
load.add_function(LoadTextDatasetFunction())
|
|
||||||
self.add_stage(load)
|
|
||||||
|
|
||||||
go = workflow.Stage('go', 'Gene Ontology')
|
|
||||||
go.add_function(SelectGoTermsFunction(self))
|
|
||||||
go.add_function(GoDistanceFunction())
|
|
||||||
go.add_function(SaveDistancesFunction())
|
|
||||||
self.add_stage(go)
|
|
||||||
|
|
||||||
blm = workflow.Stage('blm', 'Bilinear Analysis')
|
|
||||||
blm.add_function(blmfuncs.PCA())
|
|
||||||
self.add_stage(blm)
|
|
||||||
|
|
||||||
|
|
||||||
class LoadGOFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
global go
|
|
||||||
go = geneontology.read_default_go()
|
|
||||||
browser = GeneOntologyTree(go)
|
|
||||||
label = gtk.Label('_Gene Ontology')
|
|
||||||
label.set_use_underline(True)
|
|
||||||
fluents.app['bottom_notebook'].append_page(browser, label)
|
|
||||||
|
|
||||||
|
|
||||||
class LoadTextDatasetFunction(workflow.Function):
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load-text-ds', 'Load GO Evidence')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
f = open('/home/einarr/data/goa-condensed.ftsv')
|
|
||||||
global evidence
|
|
||||||
evidence = dataset.read_ftsv(f)
|
|
||||||
return [evidence]
|
|
||||||
|
|
||||||
|
|
||||||
class LoadAnnotationsFunction(workflow.Function):
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
|
|
||||||
self.annotations = None
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
global evidence
|
|
||||||
f = open(GO_DATA_DIR + '/goa-condensed')
|
|
||||||
ev_codes = f.readline().split()
|
|
||||||
go_terms = []
|
|
||||||
|
|
||||||
lines = f.readlines()
|
|
||||||
m = zeros((len(lines), len(ev_codes)))
|
|
||||||
|
|
||||||
for i, l in enumerate(lines):
|
|
||||||
values = l.split()
|
|
||||||
go_terms.append(values[0])
|
|
||||||
for j, v in enumerate(values[1:]):
|
|
||||||
m[i,j] = float(v.strip())
|
|
||||||
|
|
||||||
d = dataset.Dataset(m,
|
|
||||||
[['go-terms', go_terms], ['evidence', ev_codes]],
|
|
||||||
name='GO evidence')
|
|
||||||
|
|
||||||
evidence = d
|
|
||||||
return [d]
|
|
||||||
|
|
||||||
|
|
||||||
class EvidenceCodeFrame(gtk.Frame):
|
|
||||||
def __init__(self):
|
|
||||||
gtk.Frame.__init__(self, 'Evidence Codes')
|
|
||||||
|
|
||||||
self._ec_buttons = {}
|
|
||||||
|
|
||||||
vbox = gtk.VBox(len(EVIDENCE_CODES))
|
|
||||||
for code, desc in EVIDENCE_CODES:
|
|
||||||
btn = gtk.CheckButton('%s (%s)' % (code, desc))
|
|
||||||
self._ec_buttons[code] = btn
|
|
||||||
vbox.add(btn)
|
|
||||||
self.add(vbox)
|
|
||||||
|
|
||||||
def set_options(self, options):
|
|
||||||
for code, desc in EVIDENCE_CODES:
|
|
||||||
self._ec_buttons[code].set_active(options[code])
|
|
||||||
|
|
||||||
def update_options(self, options):
|
|
||||||
for code, desc in EVIDENCE_CODES:
|
|
||||||
options[code] = self._ec_buttons[code].get_active()
|
|
||||||
return options
|
|
||||||
|
|
||||||
|
|
||||||
class DistanceMetricFrame(gtk.Frame):
|
|
||||||
def __init__(self):
|
|
||||||
gtk.Frame.__init__(self, 'Distance Metrics')
|
|
||||||
|
|
||||||
self._metric_buttons = {}
|
|
||||||
|
|
||||||
vbox = gtk.VBox()
|
|
||||||
prev = None
|
|
||||||
for code, text in DISTANCE_METRICS:
|
|
||||||
btn = gtk.RadioButton(prev, '%s' % text)
|
|
||||||
self._metric_buttons[code] = btn
|
|
||||||
vbox.add(btn)
|
|
||||||
prev = btn
|
|
||||||
self.add(vbox)
|
|
||||||
|
|
||||||
def set_options(self, options):
|
|
||||||
self._metric_buttons[options['metric']].set_active(True)
|
|
||||||
|
|
||||||
def update_options(self, options):
|
|
||||||
for code, text in DISTANCE_METRICS:
|
|
||||||
if self._metric_buttons[code].get_active():
|
|
||||||
options['metric'] = code
|
|
||||||
return options
|
|
||||||
return options
|
|
||||||
|
|
||||||
|
|
||||||
class GoDistanceDialog(gtk.Dialog):
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
gtk.Dialog.__init__(self, 'GO term distance matrix',
|
|
||||||
None,
|
|
||||||
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
|
|
||||||
(gtk.STOCK_OK, gtk.RESPONSE_OK,
|
|
||||||
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
|
|
||||||
|
|
||||||
self._ec_frame = EvidenceCodeFrame()
|
|
||||||
self._metric_frame = DistanceMetricFrame()
|
|
||||||
self.vbox.add(self._ec_frame)
|
|
||||||
self.vbox.add(self._metric_frame)
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
self.vbox.show_all()
|
|
||||||
return gtk.Dialog.run(self)
|
|
||||||
|
|
||||||
def set_options(self, options):
|
|
||||||
self._ec_frame.set_options(options)
|
|
||||||
self._metric_frame.set_options(options)
|
|
||||||
|
|
||||||
def update_options(self, options):
|
|
||||||
self._ec_frame.update_options(options)
|
|
||||||
self._metric_frame.update_options(options)
|
|
||||||
return options
|
|
||||||
|
|
||||||
def set_editable(self, editable):
|
|
||||||
self._ec_frame.set_sensitive(editable)
|
|
||||||
self._metric_frame.set_sensitive(editable)
|
|
||||||
|
|
||||||
|
|
||||||
class NumericDict(dict):
|
|
||||||
def __init__(self):
|
|
||||||
dict.__init__(self)
|
|
||||||
|
|
||||||
def __getitem__(self, key):
|
|
||||||
retval = 0
|
|
||||||
try:
|
|
||||||
retval = dict.__getitem__(self, key)
|
|
||||||
except:
|
|
||||||
retval = 0.0
|
|
||||||
return retval
|
|
||||||
|
|
||||||
|
|
||||||
class SelectGoTermsFunction(workflow.Function):
|
|
||||||
def __init__(self, wf):
|
|
||||||
workflow.Function.__init__(self, 'go-select', 'Select GO Terms')
|
|
||||||
self.wf = wf
|
|
||||||
|
|
||||||
def run(self, ds):
|
|
||||||
terms = [x['id'] for x in networkx.paths.bfs(go, go.get_bp())]
|
|
||||||
|
|
||||||
self.wf.project.set_selection('go-terms', set(terms[:100]))
|
|
||||||
# self.wf.project.set_selection('go-terms', set(['GO:0007582', 'GO:0008150', 'GO:0051704', 'GO:0044419']))
|
|
||||||
|
|
||||||
|
|
||||||
class GoDistanceFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix')
|
|
||||||
self.options = GoDistanceOptions()
|
|
||||||
|
|
||||||
def resnik_distance_matrix(self, selection, ic):
|
|
||||||
size = len(selection['go-terms'])
|
|
||||||
m = zeros((size, size))
|
|
||||||
# Create resnik distance matrix
|
|
||||||
ids = list(selection['go-terms'])
|
|
||||||
for i, t1 in enumerate(ids):
|
|
||||||
for j, t2 in enumerate(ids):
|
|
||||||
term1 = go.by_id[t1]
|
|
||||||
term2 = go.by_id[t2]
|
|
||||||
subsumer = go.subsumer(term1, term2)
|
|
||||||
|
|
||||||
if subsumer == None:
|
|
||||||
m[i, j] = 1000
|
|
||||||
else:
|
|
||||||
# print "%s - %s - %s" % (t1, subsumer['id'], t2)
|
|
||||||
m[i, j] = ic[t1] + ic[t2] - 2.0 * ic[subsumer['id']]
|
|
||||||
ds = dataset.Dataset(m, (('go-terms', ids), ('_go-terms', ids)), 'Resnik')
|
|
||||||
return ds
|
|
||||||
|
|
||||||
def run(self, x, selection):
|
|
||||||
global evidence, go
|
|
||||||
self.options = self.show_gui(self.options)
|
|
||||||
if not selection.has_key('go-terms') or len(selection['go-terms']) == 0:
|
|
||||||
logger.log('warning', 'No GO terms selected. Cannot make distance matrix.')
|
|
||||||
|
|
||||||
codes = [c for c, d in EVIDENCE_CODES if self.options[c]]
|
|
||||||
ev_indices = evidence.get_indices('evidence', codes)
|
|
||||||
ann_count_matrix = evidence._array[:, ev_indices].sum(1)
|
|
||||||
total_ann = ann_count_matrix.sum(0)
|
|
||||||
|
|
||||||
annotations = NumericDict()
|
|
||||||
ic = NumericDict()
|
|
||||||
|
|
||||||
# Insert annotations into dict
|
|
||||||
for i, v in enumerate(evidence.get_identifiers('go-terms')):
|
|
||||||
annotations[v] = ann_count_matrix[i]
|
|
||||||
|
|
||||||
# Accumulate annotations
|
|
||||||
for term in reversed(networkx.topological_sort(go)):
|
|
||||||
for parent in go.in_neighbors(term):
|
|
||||||
annotations[parent['id']] += annotations[term['id']]
|
|
||||||
|
|
||||||
# Create information content dictionary
|
|
||||||
for term, count in annotations.items():
|
|
||||||
ic[term] = -log(count / total_ann)
|
|
||||||
|
|
||||||
return [self.resnik_distance_matrix(selection, ic)]
|
|
||||||
|
|
||||||
def show_gui(self, options, edit=True):
|
|
||||||
dialog = GoDistanceDialog()
|
|
||||||
dialog.set_options(self.options)
|
|
||||||
dialog.show_all()
|
|
||||||
dialog.set_editable(edit)
|
|
||||||
response = dialog.run()
|
|
||||||
dialog.hide()
|
|
||||||
if response == gtk.RESPONSE_OK:
|
|
||||||
return dialog.update_options(self.options)
|
|
||||||
else:
|
|
||||||
return options
|
|
||||||
|
|
||||||
|
|
||||||
class SaveDistancesFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'save-matrix', 'Save Matrix')
|
|
||||||
|
|
||||||
def run(self, ds):
|
|
||||||
filename = '/home/einarr/data/output.ftsv'
|
|
||||||
fd = open(filename, 'w')
|
|
||||||
dataset.write_ftsv(fd, ds)
|
|
||||||
fd.close()
|
|
||||||
|
|
||||||
|
|
||||||
class Options(dict):
|
|
||||||
def __init__(self):
|
|
||||||
dict.__init__(self)
|
|
||||||
|
|
||||||
|
|
||||||
class GoDistanceOptions(Options):
|
|
||||||
def __init__(self):
|
|
||||||
Options.__init__(self)
|
|
||||||
for code, desc in EVIDENCE_CODES:
|
|
||||||
self[code] = True
|
|
||||||
self['metric'] = 'fussimeg'
|
|
||||||
|
|
|
@ -1,777 +0,0 @@
|
||||||
|
|
||||||
import gtk
|
|
||||||
from fluents import dataset, logger, plots, workflow, fluents, project, view, main
|
|
||||||
import geneontology
|
|
||||||
from matplotlib.nxutils import points_inside_poly
|
|
||||||
import matplotlib
|
|
||||||
#from scipy import array, randn, log, ones, zeros
|
|
||||||
from scipy import *
|
|
||||||
from numpy import matlib
|
|
||||||
import networkx
|
|
||||||
import re
|
|
||||||
import rpy
|
|
||||||
|
|
||||||
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
|
|
||||||
('IGI', 'Inferred from genetic interaction'),
|
|
||||||
('IPI', 'Inferred from physical interaction'),
|
|
||||||
('ISS', 'Inferred from sequence or structure similarity'),
|
|
||||||
('IDA', 'Inferred from direct assay'),
|
|
||||||
('IEP', 'Inferred on expression pattern'),
|
|
||||||
('IEA', 'Inferred from electronic annotation'),
|
|
||||||
('TAS', 'Traceable author statement'),
|
|
||||||
('NAS', 'Non-traceable author statement'),
|
|
||||||
('ND', 'No biological data available'),
|
|
||||||
('RCA', 'Inferred from reviewed computational analysis'),
|
|
||||||
('IC', 'Inferred by curator')]
|
|
||||||
|
|
||||||
DISTANCE_METRICS = [('resnik', 'Resnik'),
|
|
||||||
('jiang', 'Jiang & Conrath'),
|
|
||||||
('fussimeg', 'FuSSiMeG')]
|
|
||||||
|
|
||||||
GO_DATA_DIR = '/home/einarr/data'
|
|
||||||
|
|
||||||
evidence = None
|
|
||||||
go = None
|
|
||||||
|
|
||||||
class GoTermView (gtk.Frame):
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
gtk.Frame.__init__(self)
|
|
||||||
tab = gtk.Table(2, 3, False)
|
|
||||||
self._table = tab
|
|
||||||
|
|
||||||
self._name = gtk.Label('')
|
|
||||||
self._name.set_line_wrap(True)
|
|
||||||
self._name.set_alignment(0, 0)
|
|
||||||
name_label = gtk.Label('Name:')
|
|
||||||
name_label.set_alignment(0, 0)
|
|
||||||
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
|
|
||||||
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
|
|
||||||
|
|
||||||
self._isa_parents = gtk.HBox()
|
|
||||||
isa_parents_label = gtk.Label('Is a:')
|
|
||||||
tab.attach(isa_parents_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
|
|
||||||
tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
|
|
||||||
|
|
||||||
self._def = gtk.TextBuffer()
|
|
||||||
textview = gtk.TextView(self._def)
|
|
||||||
textview.set_wrap_mode(gtk.WRAP_WORD)
|
|
||||||
scrolled_window = gtk.ScrolledWindow()
|
|
||||||
scrolled_window.add(textview)
|
|
||||||
def_label = gtk.Label('Def:')
|
|
||||||
def_label.set_alignment(0.0, 0.0)
|
|
||||||
tab.attach(def_label, 0, 1, 2, 3, gtk.FILL, gtk.FILL, 5, 5)
|
|
||||||
tab.attach(scrolled_window, 1, 2, 2, 3, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
|
|
||||||
|
|
||||||
self._tab = tab
|
|
||||||
self.add(tab)
|
|
||||||
self.set_go_term(None)
|
|
||||||
|
|
||||||
def set_go_term(self, term):
|
|
||||||
if term:
|
|
||||||
self.set_label(term['id'])
|
|
||||||
self._name.set_text(term['name'])
|
|
||||||
self._def.set_text(term['def'])
|
|
||||||
self._tab.remove(self._isa_parents)
|
|
||||||
self._isa_parents = gtk.HBox()
|
|
||||||
for p in term['is_a']:
|
|
||||||
btn = gtk.Button(p)
|
|
||||||
btn.show()
|
|
||||||
self._isa_parents.add(btn)
|
|
||||||
self._isa_parents.show()
|
|
||||||
self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
|
|
||||||
else:
|
|
||||||
self.set_label('GO Term')
|
|
||||||
self._name.set_text('')
|
|
||||||
self._def.set_text('')
|
|
||||||
self._tab.remove(self._isa_parents)
|
|
||||||
self._isa_parents = gtk.HBox()
|
|
||||||
self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
|
|
||||||
|
|
||||||
|
|
||||||
class GeneOntologyTree (gtk.HPaned):
|
|
||||||
|
|
||||||
def __init__(self, network):
|
|
||||||
gtk.HPaned.__init__(self)
|
|
||||||
self.set_position(400)
|
|
||||||
|
|
||||||
treemodel = geneontology.get_go_treestore(network)
|
|
||||||
self._treemodel = treemodel
|
|
||||||
self._tree_view = gtk.TreeView(treemodel)
|
|
||||||
|
|
||||||
self._selected_terms = set()
|
|
||||||
|
|
||||||
self._tree_view.set_fixed_height_mode(True)
|
|
||||||
|
|
||||||
# Set up context menu
|
|
||||||
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
|
|
||||||
self._tree_view.connect('popup_menu', self._popup_menu)
|
|
||||||
self._tree_view.connect('button_press_event', self._on_button_press)
|
|
||||||
|
|
||||||
renderer = gtk.CellRendererText()
|
|
||||||
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
|
|
||||||
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
|
|
||||||
go_column.set_fixed_width(200)
|
|
||||||
go_column.set_resizable(True)
|
|
||||||
self._tree_view.insert_column(go_column, 0)
|
|
||||||
|
|
||||||
renderer = gtk.CellRendererToggle()
|
|
||||||
renderer.set_property('activatable', True)
|
|
||||||
renderer.connect('toggled', self._toggle_selected)
|
|
||||||
renderer.set_active(True)
|
|
||||||
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
|
|
||||||
go_column = gtk.TreeViewColumn('T', renderer, active=2)
|
|
||||||
go_column.set_fixed_width(20)
|
|
||||||
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
|
|
||||||
go_column.set_resizable(True)
|
|
||||||
self._tree_view.insert_column(go_column, 1)
|
|
||||||
|
|
||||||
renderer = gtk.CellRendererText()
|
|
||||||
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
|
|
||||||
go_column.set_fixed_width(200)
|
|
||||||
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
|
|
||||||
go_column.set_resizable(True)
|
|
||||||
self._tree_view.insert_column(go_column, 2)
|
|
||||||
|
|
||||||
self._desc_view = GoTermView()
|
|
||||||
|
|
||||||
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
|
|
||||||
|
|
||||||
scrolled_window = gtk.ScrolledWindow()
|
|
||||||
scrolled_window.add(self._tree_view)
|
|
||||||
self.add1(scrolled_window)
|
|
||||||
self.add2(self._desc_view)
|
|
||||||
self.show_all()
|
|
||||||
|
|
||||||
def _on_cursor_changed(self, tree):
|
|
||||||
path, col = self._tree_view.get_cursor()
|
|
||||||
current = self._treemodel.get_iter(path)
|
|
||||||
term = self._treemodel.get_value(current, 3)
|
|
||||||
self._desc_view.set_go_term(term)
|
|
||||||
|
|
||||||
|
|
||||||
##
|
|
||||||
## GTK Callback functions
|
|
||||||
##
|
|
||||||
def _popup_menu(self, *rest):
|
|
||||||
self.menu.popup(None, None, None, 0, 0)
|
|
||||||
|
|
||||||
def _on_button_press(self, widget, event):
|
|
||||||
path = widget.get_path_at_pos(int(event.x), int(event.y))
|
|
||||||
iter = None
|
|
||||||
|
|
||||||
if path:
|
|
||||||
iter = self._treemodel.get_iter(path[0])
|
|
||||||
obj = self._treemodel.get_value(iter, 3)
|
|
||||||
else:
|
|
||||||
obj = None
|
|
||||||
|
|
||||||
self._context_menu.set_current_term(obj, iter)
|
|
||||||
|
|
||||||
if event.button == 3:
|
|
||||||
self._context_menu.popup(None, None, None, event.button, event.time)
|
|
||||||
|
|
||||||
def _toggle_selected(self, renderer, path):
|
|
||||||
iter = self._treemodel.get_iter(path)
|
|
||||||
|
|
||||||
selected = self._treemodel.get_value(iter, 2)
|
|
||||||
id = self._treemodel.get_value(iter, 0)
|
|
||||||
|
|
||||||
self._treemodel.set_value(iter, 2, not selected)
|
|
||||||
|
|
||||||
if selected:
|
|
||||||
self._selected_terms.remove(id)
|
|
||||||
else:
|
|
||||||
self._selected_terms.add(id)
|
|
||||||
|
|
||||||
|
|
||||||
class GoTermContextMenu (gtk.Menu):
|
|
||||||
"""Context menu for GO terms in the gene ontology browser"""
|
|
||||||
|
|
||||||
def __init__(self, treemodel, treeview):
|
|
||||||
self._treemodel = treemodel
|
|
||||||
self._treeview = treeview
|
|
||||||
self._current_term = None
|
|
||||||
self._current_iter = None
|
|
||||||
|
|
||||||
gtk.Menu.__init__(self)
|
|
||||||
|
|
||||||
# Popuplate tree
|
|
||||||
self._expand_item = i = gtk.MenuItem('Expand')
|
|
||||||
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
|
|
||||||
self.append(i)
|
|
||||||
i.show()
|
|
||||||
|
|
||||||
self._collapse_item = i = gtk.MenuItem('Collapse')
|
|
||||||
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
|
|
||||||
self.append(i)
|
|
||||||
i.show()
|
|
||||||
|
|
||||||
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
|
|
||||||
i.connect('activate', self._on_select_subtree, treemodel, treeview)
|
|
||||||
self.append(i)
|
|
||||||
i.show()
|
|
||||||
|
|
||||||
def set_current_term(self, term, it):
|
|
||||||
self._current_term = term
|
|
||||||
self._current_iter = it
|
|
||||||
|
|
||||||
def _on_expand_subtree(self, item, treemodel, treeview):
|
|
||||||
path = treemodel.get_path(self._current_iter)
|
|
||||||
treeview.expand_row(path, True)
|
|
||||||
|
|
||||||
def _on_collapse_subtree(self, item, treemodel, treeview):
|
|
||||||
treeview.collapse_row(treemodel.get_path(self._current_iter))
|
|
||||||
|
|
||||||
def _on_select_subtree(self, item, treemodel, treeview):
|
|
||||||
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
|
|
||||||
(self._current_term['id'], self._current_term['name']))
|
|
||||||
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
|
|
||||||
project.project.set_selection('go-terms', set(ids))
|
|
||||||
|
|
||||||
|
|
||||||
class LoadGOFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
global go
|
|
||||||
if go:
|
|
||||||
return
|
|
||||||
|
|
||||||
go = geneontology.read_default_go()
|
|
||||||
browser = GeneOntologyTree(go)
|
|
||||||
label = gtk.Label('_Gene Ontology')
|
|
||||||
label.set_use_underline(True)
|
|
||||||
fluents.app['bottom_notebook'].append_page(browser, label)
|
|
||||||
|
|
||||||
class LoadAnnotationsFunction(workflow.Function):
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
|
|
||||||
self.annotations = None
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
global evidence
|
|
||||||
f = open(GO_DATA_DIR + '/goa-condensed')
|
|
||||||
ev_codes = f.readline().split()
|
|
||||||
go_terms = []
|
|
||||||
|
|
||||||
lines = f.readlines()
|
|
||||||
m = zeros((len(lines), len(ev_codes)))
|
|
||||||
|
|
||||||
for i, l in enumerate(lines):
|
|
||||||
values = l.split()
|
|
||||||
go_terms.append(values[0])
|
|
||||||
for j, v in enumerate(values[1:]):
|
|
||||||
m[i,j] = float(v.strip())
|
|
||||||
|
|
||||||
d = dataset.Dataset(m,
|
|
||||||
[['go-terms', go_terms], ['evidence', ev_codes]],
|
|
||||||
name='GO evidence')
|
|
||||||
|
|
||||||
evidence = d
|
|
||||||
return [d]
|
|
||||||
|
|
||||||
|
|
||||||
class GOWeightDialog(gtk.Dialog):
|
|
||||||
def __init__(self):
|
|
||||||
gtk.Dialog.__init__(self, 'GO Gene List Influence',
|
|
||||||
None,
|
|
||||||
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
|
|
||||||
(gtk.STOCK_OK, gtk.RESPONSE_OK,
|
|
||||||
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
|
|
||||||
|
|
||||||
table = gtk.Table(2, 2)
|
|
||||||
|
|
||||||
sim_lbl = gtk.Label('Similarity threshold: ')
|
|
||||||
table.attach(sim_lbl, 0, 1, 0, 1)
|
|
||||||
adjustment = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
|
|
||||||
sim_spin = gtk.SpinButton(adjustment, 0.0, 2)
|
|
||||||
table.attach(sim_spin, 1, 2, 0, 1)
|
|
||||||
|
|
||||||
rank_lbl = gtk.Label('Rank threshold: ')
|
|
||||||
table.attach(rank_lbl, 0, 1, 1, 2)
|
|
||||||
rank_adj = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
|
|
||||||
rank_spin = gtk.SpinButton(rank_adj, 0.0, 2)
|
|
||||||
table.attach(rank_spin, 1, 2, 1, 2)
|
|
||||||
|
|
||||||
sim_lbl.show()
|
|
||||||
sim_spin.show()
|
|
||||||
rank_lbl.show()
|
|
||||||
rank_spin.show()
|
|
||||||
|
|
||||||
table.show()
|
|
||||||
self.vbox.add(table)
|
|
||||||
self._sim_spin = sim_spin
|
|
||||||
self._rank_spin = rank_spin
|
|
||||||
|
|
||||||
def set_options(self, options):
|
|
||||||
self._sim_spin.set_value(options['similarity_threshold'])
|
|
||||||
self._rank_spin.set_value(options['rank_threshold'])
|
|
||||||
|
|
||||||
def set_editable(self, editable):
|
|
||||||
self._sim_spin.set_sensitive(editable)
|
|
||||||
self._rank_spin.set_sensitive(editable)
|
|
||||||
|
|
||||||
def update_options(self, options):
|
|
||||||
options['similarity_threshold'] = self._sim_spin.get_value()
|
|
||||||
options['rank_threshold'] = self._rank_spin.get_value()
|
|
||||||
|
|
||||||
|
|
||||||
class DistanceToSelectionFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'dist-to-sel', 'Dist. to Selection')
|
|
||||||
self.options = DistanceToSelectionOptions()
|
|
||||||
|
|
||||||
def run(self, similarities, selection):
|
|
||||||
self.show_gui(similarities, self.options)
|
|
||||||
|
|
||||||
retval = []
|
|
||||||
|
|
||||||
dims = similarities.get_dim_name()
|
|
||||||
if dims[0] != "_%s" %dims[1] and dims[1] != "_%s" %dims[0]:
|
|
||||||
logger.log('warning', 'Are you sure this is a similarity matrix?')
|
|
||||||
|
|
||||||
dim = dims[0]
|
|
||||||
print "dim", dim
|
|
||||||
|
|
||||||
print "selection", selection[dim]
|
|
||||||
print "indices", similarities.get_indices(dim, selection[dim])
|
|
||||||
indices = similarities.get_indices(dim, selection[dim])
|
|
||||||
m = apply_along_axis(max, 1, similarities.asarray().take(indices, 1))
|
|
||||||
retval.append(dataset.Dataset(m, [(dim, similarities[dim]),
|
|
||||||
("_dummy", '0')]))
|
|
||||||
|
|
||||||
return retval
|
|
||||||
|
|
||||||
def show_gui(self, similarities, options, edit=True):
|
|
||||||
dialog = DistanceToSelectionOptionsDialog([similarities], self.options)
|
|
||||||
response = dialog.run()
|
|
||||||
dialog.hide()
|
|
||||||
if response == gtk.RESPONSE_OK:
|
|
||||||
dialog.set_output()
|
|
||||||
return dialog.get_options()
|
|
||||||
else:
|
|
||||||
return options
|
|
||||||
|
|
||||||
class GOWeightFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load-go-ann', 'GO Influence')
|
|
||||||
self.options = GOWeightOptions()
|
|
||||||
|
|
||||||
def run(self, genelist, similarity):
|
|
||||||
## Show dialog box
|
|
||||||
self.show_gui(self.options)
|
|
||||||
|
|
||||||
## assure that data is "correct", i.e., that we can perform
|
|
||||||
## the desired operations.
|
|
||||||
common_dims = genelist.common_dims(similarity)
|
|
||||||
if len(common_dims) == 0:
|
|
||||||
logger.log('error', 'No common dimension in the selected datasets.')
|
|
||||||
elif len(common_dims) > 1:
|
|
||||||
logger.log('error', "More than one common dimension in the " +
|
|
||||||
"selected datasets. Don't know what to do.")
|
|
||||||
gene_dim = common_dims[0]
|
|
||||||
logger.log('debug', 'Assuming genes are in dimension: %s' % gene_dim)
|
|
||||||
|
|
||||||
## Do the calculations.
|
|
||||||
d = {}
|
|
||||||
|
|
||||||
|
|
||||||
def show_gui(self, options, edit=True):
|
|
||||||
dialog = GOWeightDialog()
|
|
||||||
dialog.set_options(self.options)
|
|
||||||
dialog.show_all()
|
|
||||||
dialog.set_editable(edit)
|
|
||||||
response = dialog.run()
|
|
||||||
dialog.hide()
|
|
||||||
if response == gtk.RESPONSE_OK:
|
|
||||||
return dialog.update_options(self.options)
|
|
||||||
else:
|
|
||||||
return options
|
|
||||||
|
|
||||||
|
|
||||||
class DistanceToSelectionOptionsDialog(workflow.OptionsDialog):
|
|
||||||
def __init__(self, data, options):
|
|
||||||
workflow.OptionsDialog.__init__(self, data, options, ['X'])
|
|
||||||
|
|
||||||
|
|
||||||
class TTestOptionsDialog(workflow.OptionsDialog):
|
|
||||||
|
|
||||||
def __init__(self, data, options):
|
|
||||||
workflow.OptionsDialog.__init__(self, data, options,
|
|
||||||
['X', 'Categories'])
|
|
||||||
|
|
||||||
vb = gtk.VBox()
|
|
||||||
l = gtk.Label("Limit")
|
|
||||||
adj = gtk.Adjustment(0, 0.0, 1.0, 0.01, 1.0, 1.0)
|
|
||||||
sb = gtk.SpinButton(adj, 0.0, 2)
|
|
||||||
l.show()
|
|
||||||
sb.show()
|
|
||||||
|
|
||||||
vb.add(l)
|
|
||||||
vb.add(sb)
|
|
||||||
vb.show()
|
|
||||||
self.nb.insert_page(vb, gtk.Label("Limit"), -1)
|
|
||||||
|
|
||||||
|
|
||||||
class TTestFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 't-test', 't-test')
|
|
||||||
self.options = TTestOptions()
|
|
||||||
|
|
||||||
def run(self, x, categories):
|
|
||||||
self.show_gui(x, categories)
|
|
||||||
|
|
||||||
retval = []
|
|
||||||
m = x.asarray()
|
|
||||||
c = categories.asarray()
|
|
||||||
|
|
||||||
# Nonsmokers and current smokers
|
|
||||||
ns = m.take(nonzero(c[:,0]), 0)[0]
|
|
||||||
cs = m.take(nonzero(c[:,2]), 0)[0]
|
|
||||||
|
|
||||||
tscores = stats.ttest_ind(ns, cs)
|
|
||||||
|
|
||||||
print "Out data:", self.options['out_data']
|
|
||||||
tds = dataset.Dataset(tscores[0], [('gene_id', x['gene_id']),
|
|
||||||
('_t', ['0'])],
|
|
||||||
name='t-values')
|
|
||||||
if 't-value' in self.options['out_data']:
|
|
||||||
retval.append(tds)
|
|
||||||
|
|
||||||
pds = dataset.Dataset(tscores[1], [('gene_id', x['gene_id']),
|
|
||||||
('_p', ['0'])],
|
|
||||||
name='p-values')
|
|
||||||
if 'p-value' in self.options['out_data']:
|
|
||||||
retval.append(pds)
|
|
||||||
|
|
||||||
if ProbabilityHistogramPlot in self.options['out_plots']:
|
|
||||||
retval.append(ProbabilityHistogramPlot(pds))
|
|
||||||
|
|
||||||
if VolcanoPlot in self.options['out_plots']:
|
|
||||||
fc = apply_along_axis(mean, 0, ns) / apply_along_axis(mean, 0, cs)
|
|
||||||
fcds = dataset.Dataset(fc, [('gene_id', x['gene_id']),
|
|
||||||
('_dummy', ['0'])],
|
|
||||||
name="Fold change")
|
|
||||||
|
|
||||||
retval.append(VolcanoPlot(fcds, pds, 'gene_id'))
|
|
||||||
|
|
||||||
return retval
|
|
||||||
|
|
||||||
def show_gui(self, x, categories):
|
|
||||||
dialog = TTestOptionsDialog([x, categories], self.options)
|
|
||||||
response = dialog.run()
|
|
||||||
dialog.hide()
|
|
||||||
if response == gtk.RESPONSE_OK:
|
|
||||||
dialog.set_output()
|
|
||||||
return dialog.get_options()
|
|
||||||
else:
|
|
||||||
return options
|
|
||||||
|
|
||||||
|
|
||||||
class SetICFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'set-ic', 'Set IC')
|
|
||||||
|
|
||||||
def run(self, ds):
|
|
||||||
if 'go-terms' in ds.get_dim_name():
|
|
||||||
main.workflow.current_ic = ds
|
|
||||||
else:
|
|
||||||
logger.log('warning', 'Cannot use this dataset as IC on the go-terms dimension')
|
|
||||||
return
|
|
||||||
|
|
||||||
class PlotDagFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'go-dag', 'Build DAG')
|
|
||||||
|
|
||||||
def run(self, selection):
|
|
||||||
g = self.get_network(list(selection['go-terms']))
|
|
||||||
ds = dataset.GraphDataset(networkx.adj_matrix(g),
|
|
||||||
[('go-terms', g.nodes()), ('_go-terms', g.nodes())],
|
|
||||||
name="DAG")
|
|
||||||
return [ThresholdDagPlot(g)]
|
|
||||||
|
|
||||||
def get_network(self, terms, subtree='bp'):
|
|
||||||
"""Returns a DAG connecting the given terms by including their parents
|
|
||||||
up to the level needed to connect them. The subtree parameter is one of
|
|
||||||
mf - molecular function
|
|
||||||
bp - biological process
|
|
||||||
cc - cellular component"""
|
|
||||||
|
|
||||||
rpy.r.library("GOstats")
|
|
||||||
|
|
||||||
if subtree == 'mf':
|
|
||||||
subtree_r = rpy.r.GOMFPARENTS
|
|
||||||
elif subtree == 'bp':
|
|
||||||
subtree_r = rpy.r.GOBPPARENTS
|
|
||||||
elif subtree == 'cc':
|
|
||||||
subtree_r = rpy.r.GOCCPARENTS
|
|
||||||
else:
|
|
||||||
raise Exception("Unknown subtree. Use mf, bp or cc.")
|
|
||||||
|
|
||||||
g = rpy.r.GOGraph(terms, subtree_r)
|
|
||||||
edges = rpy.r.edges(g)
|
|
||||||
|
|
||||||
nxgraph = networkx.DiGraph()
|
|
||||||
for child, d in edges.items():
|
|
||||||
for parent in d.keys():
|
|
||||||
nxgraph.add_edge(parent, child)
|
|
||||||
|
|
||||||
return nxgraph
|
|
||||||
|
|
||||||
|
|
||||||
class TTestOptions(workflow.Options):
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Options.__init__(self)
|
|
||||||
self['all_plots'] = [(ProbabilityHistogramPlot, 'Histogram', True),
|
|
||||||
(VolcanoPlot, 'Histogram', True)]
|
|
||||||
self['all_data'] = [('t-value', 't-values', True),
|
|
||||||
('p-value', 'Probabilities', True),
|
|
||||||
('categories', 'Categories', False)]
|
|
||||||
self['out_data'] = ['t-value', 'p-value']
|
|
||||||
|
|
||||||
|
|
||||||
class DistanceToSelectionOptions(workflow.Options):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Options.__init__(self)
|
|
||||||
self['all_data'] = [('mindist', 'Minimum distance', True)]
|
|
||||||
|
|
||||||
|
|
||||||
class GOWeightOptions(workflow.Options):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Options.__init__(self)
|
|
||||||
self['similarity_threshold'] = 0.0
|
|
||||||
self['rank_threshold'] = 0.0
|
|
||||||
|
|
||||||
class ProbabilityHistogramPlot(plots.HistogramPlot):
|
|
||||||
def __init__(self, ds):
|
|
||||||
plots.HistogramPlot.__init__(self, ds, name="Confidence", bins=50)
|
|
||||||
|
|
||||||
|
|
||||||
class VolcanoPlot(plots.ScatterPlot):
|
|
||||||
def __init__(self, fold_ds, p_ds, dim, **kw):
|
|
||||||
plots.ScatterPlot.__init__(self, fold_ds, p_ds, 'gene_id', '_dummy',
|
|
||||||
'0', '0',
|
|
||||||
name="Volcano plot",
|
|
||||||
sel_dim_2='_p', **kw)
|
|
||||||
|
|
||||||
|
|
||||||
class DagPlot(plots.Plot):
|
|
||||||
def __init__(self, graph, dim='go-terms', pos=None, nodecolor='b', nodesize=40,
|
|
||||||
with_labels=False, name='DAG Plot'):
|
|
||||||
|
|
||||||
plots.Plot.__init__(self, name)
|
|
||||||
self.nodes = graph.nodes()
|
|
||||||
self._map_ids = self.nodes
|
|
||||||
self.graph = graph
|
|
||||||
self._pos = pos
|
|
||||||
self._cmap = matplotlib.cm.summer
|
|
||||||
self._nodesize = nodesize
|
|
||||||
self._nodecolor = nodecolor
|
|
||||||
self._with_labels = with_labels
|
|
||||||
self.visible = set()
|
|
||||||
|
|
||||||
self.current_dim = dim
|
|
||||||
|
|
||||||
if not self._pos:
|
|
||||||
self._pos = self._calc_pos(graph)
|
|
||||||
self._xy = asarray([self._pos[node] for node in self.nodes])
|
|
||||||
self.xaxis_data = self._xy[:,0]
|
|
||||||
self.yaxis_data = self._xy[:,1]
|
|
||||||
|
|
||||||
# Initial draw
|
|
||||||
self.default_props = {'nodesize' : 50,
|
|
||||||
'nodecolor' : 'blue',
|
|
||||||
'edge_color' : 'gray',
|
|
||||||
'edge_color_selected' : 'red'}
|
|
||||||
self.node_collection = None
|
|
||||||
self.edge_collection = None
|
|
||||||
self.node_labels = None
|
|
||||||
lw = zeros(self.xaxis_data.shape)
|
|
||||||
self.node_collection = self.axes.scatter(self.xaxis_data, self.yaxis_data,
|
|
||||||
s=self._nodesize,
|
|
||||||
c=self._nodecolor,
|
|
||||||
linewidth=lw,
|
|
||||||
zorder=3)
|
|
||||||
self._mappable = self.node_collection
|
|
||||||
self._mappable.set_cmap(self._cmap)
|
|
||||||
|
|
||||||
# selected nodes is a transparent graph that adjust node-edge visibility
|
|
||||||
# according to the current selection needed to get get the selected
|
|
||||||
# nodes 'on top' as zorder may not be defined individually
|
|
||||||
self.selected_nodes = self.axes.scatter(self.xaxis_data,
|
|
||||||
self.yaxis_data,
|
|
||||||
s=self._nodesize,
|
|
||||||
c=self._nodecolor,
|
|
||||||
edgecolor='r',
|
|
||||||
linewidth=lw,
|
|
||||||
zorder=4,
|
|
||||||
alpha=0)
|
|
||||||
|
|
||||||
edge_color = self.default_props['edge_color']
|
|
||||||
self.edge_collection = networkx.draw_networkx_edges(self.graph,
|
|
||||||
self._pos,
|
|
||||||
ax=self.axes,
|
|
||||||
edge_color=edge_color)
|
|
||||||
# edge color rgba-arrays
|
|
||||||
self._edge_color_rgba = matlib.repmat(plots.ColorConverter().to_rgba(edge_color),
|
|
||||||
self.graph.number_of_edges(),1)
|
|
||||||
self._edge_color_selected = plots.ColorConverter().to_rgba(self.default_props['edge_color_selected'])
|
|
||||||
if self._with_labels:
|
|
||||||
self.node_labels = networkx.draw_networkx_labels(self.graph,
|
|
||||||
self._pos,
|
|
||||||
ax=self.axes)
|
|
||||||
|
|
||||||
# remove axes, frame and grid
|
|
||||||
self.axes.set_xticks([])
|
|
||||||
self.axes.set_yticks([])
|
|
||||||
self.axes.grid(False)
|
|
||||||
self.axes.set_frame_on(False)
|
|
||||||
self.fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
|
|
||||||
|
|
||||||
def _calc_pos(self, graph):
|
|
||||||
"""Calculates position for graph nodes using 'dot' layout."""
|
|
||||||
gv_graph = networkx.DiGraph()
|
|
||||||
for start, end in graph.edges():
|
|
||||||
gv_graph.add_edge(start.replace('GO:', ''), end.replace('GO:', ''))
|
|
||||||
|
|
||||||
pos_gv = networkx.pygraphviz_layout(gv_graph, prog="dot")
|
|
||||||
pos = {}
|
|
||||||
for k, v in pos_gv.items():
|
|
||||||
if k != "all":
|
|
||||||
pos["GO:%s" % k] = v
|
|
||||||
else:
|
|
||||||
pos[k] = v
|
|
||||||
return pos
|
|
||||||
|
|
||||||
def points_in_rect(self, x1, y1, x2, y2, key):
|
|
||||||
ydata = self.yaxis_data
|
|
||||||
xdata = self.xaxis_data
|
|
||||||
|
|
||||||
# find indices of selected area
|
|
||||||
if x1>x2:
|
|
||||||
x1, x2 = x2, x1
|
|
||||||
if y1>y2:
|
|
||||||
y1, y2 = y2, y1
|
|
||||||
assert x1<=x2
|
|
||||||
assert y1<=y2
|
|
||||||
|
|
||||||
index = nonzero((xdata>x1) & (xdata<x2) & (ydata>y1) & (ydata<y2))[0]
|
|
||||||
if getattr(main.workflow, 'current_ic', None) != None:
|
|
||||||
ids = self.visible.intersection([self.nodes[i] for i in index])
|
|
||||||
else:
|
|
||||||
ids = set([self.nodes[i] for i in index])
|
|
||||||
return ids
|
|
||||||
|
|
||||||
def rectangle_select_callback(self, x1, y1, x2, y2, key):
|
|
||||||
ids = self.points_in_rect(x1, y1, x2, y2, key)
|
|
||||||
ids = self.update_selection(ids, key)
|
|
||||||
|
|
||||||
self.selection_listener(self.current_dim, ids)
|
|
||||||
|
|
||||||
def lasso_select_callback(self, verts, key=None):
|
|
||||||
xys = c_[self.xaxis_data[:,newaxis], self.yaxis_data[:,newaxis]]
|
|
||||||
index = nonzero(points_inside_poly(xys, verts))[0]
|
|
||||||
ids = [self.nodes[i] for i in index]
|
|
||||||
ids = self.update_selection(ids, key)
|
|
||||||
self.selection_listener(self.current_dim, ids)
|
|
||||||
|
|
||||||
def set_current_selection(self, selection):
|
|
||||||
linewidth = zeros(self.xaxis_data.shape)
|
|
||||||
edge_color_rgba = self._edge_color_rgba.copy()
|
|
||||||
index = [i for i in range(len(self.nodes)) if self.nodes[i] in selection[self.current_dim]]
|
|
||||||
if len(index) > 0:
|
|
||||||
linewidth[index] = 2
|
|
||||||
idents = selection[self.current_dim]
|
|
||||||
edge_index = [i for i,edge in enumerate(self.graph.edges()) if (edge[0] in idents and edge[1] in idents)]
|
|
||||||
if len(edge_index)>0:
|
|
||||||
for i in edge_index:
|
|
||||||
edge_color_rgba[i,:] = self._edge_color_selected
|
|
||||||
self._A = None
|
|
||||||
|
|
||||||
self.edge_collection._colors = edge_color_rgba
|
|
||||||
self.selected_nodes.set_linewidth(linewidth)
|
|
||||||
self.canvas.draw()
|
|
||||||
|
|
||||||
def is_mappable_with(self, obj):
|
|
||||||
"""Returns True if dataset/selection is mappable with this plot.
|
|
||||||
"""
|
|
||||||
if isinstance(obj, fluents.dataset.Dataset):
|
|
||||||
if self.current_dim in obj.get_dim_name():
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _update_color_from_dataset(self, ds):
|
|
||||||
"""Updates the facecolors from a dataset.
|
|
||||||
"""
|
|
||||||
|
|
||||||
array = ds.asarray()
|
|
||||||
|
|
||||||
#only support for 2d-arrays:
|
|
||||||
try:
|
|
||||||
m, n = array.shape
|
|
||||||
except:
|
|
||||||
raise ValueError, "No support for more than 2 dimensions."
|
|
||||||
# is dataset a vector or matrix?
|
|
||||||
if not n==1:
|
|
||||||
# we have a category dataset
|
|
||||||
if isinstance(ds, fluents.dataset.CategoryDataset):
|
|
||||||
vec = dot(array, diag(arange(n))).sum(1)
|
|
||||||
else:
|
|
||||||
vec = array.sum(1)
|
|
||||||
else:
|
|
||||||
vec = array.ravel()
|
|
||||||
|
|
||||||
indices = ds.get_indices(self.current_dim, self.nodes)
|
|
||||||
nodes = ds.existing_identifiers(self.current_dim, self.nodes)
|
|
||||||
|
|
||||||
v = vec.take(indices, 0)
|
|
||||||
vec_min = min(vec[vec > -inf])
|
|
||||||
vec_max = max(vec[vec < inf])
|
|
||||||
v[v==inf] = vec_max
|
|
||||||
v[v==-inf] = vec_min
|
|
||||||
|
|
||||||
d = dict(zip(nodes, list(v)))
|
|
||||||
|
|
||||||
map_vec = zeros(len(self.nodes))
|
|
||||||
for i, n in enumerate(self.nodes):
|
|
||||||
map_vec[i] = d.get(n, vec_min)
|
|
||||||
|
|
||||||
# update facecolors
|
|
||||||
self.node_collection.set_array(map_vec)
|
|
||||||
self.node_collection.set_clim(vec_min, vec_max)
|
|
||||||
self.node_collection.update_scalarmappable() #sets facecolors from array
|
|
||||||
|
|
||||||
self.canvas.draw()
|
|
||||||
|
|
||||||
|
|
||||||
class ThresholdDagPlot(DagPlot, plots.PlotThresholder):
|
|
||||||
def __init__(self, graph, dim='go-terms', pos=None, nodecolor='b', nodesize=40,
|
|
||||||
with_labels=False, name='DAG Plot'):
|
|
||||||
DagPlot.__init__(self, graph, dim='go-terms', pos=None,
|
|
||||||
nodecolor='b', nodesize=40,
|
|
||||||
with_labels=False, name='DAG Plot')
|
|
||||||
plots.PlotThresholder.__init__(self, "IC")
|
|
||||||
|
|
||||||
def rectangle_select_callback(self, x1, y1, x2, y2, key):
|
|
||||||
ids = self.points_in_rect(x1, y1, x2, y2, key)
|
|
||||||
ids = self.visible.intersection(ids)
|
|
||||||
ids = self.update_selection(ids, key)
|
|
||||||
|
|
||||||
self.selection_listener(self.current_dim, ids)
|
|
||||||
|
|
||||||
def _update_color_from_dataset(self, ds):
|
|
||||||
DagPlot._update_color_from_dataset(self, ds)
|
|
||||||
self.set_threshold_dataset(ds)
|
|
||||||
a = ds.asarray()
|
|
||||||
a_max = max(a[a<inf])
|
|
||||||
a_min = min(a[a>-inf])
|
|
||||||
self._sb_min.set_range(a_min-0.1, a_max+0.1)
|
|
||||||
self._sb_min.set_value(a_min-0.1)
|
|
||||||
self._sb_max.set_range(a_min-0.1, a_max+0.1)
|
|
||||||
self._sb_max.set_value(a_max+0.1)
|
|
||||||
|
|
||||||
|
|
|
@ -1,445 +0,0 @@
|
||||||
import sys,os
|
|
||||||
import webbrowser
|
|
||||||
|
|
||||||
from fluents import logger, plots,workflow,dataset,main
|
|
||||||
from fluents.lib import blmfuncs,nx_utils,validation,engines,cx_stats,cx_utils
|
|
||||||
import gobrowser, geneontology
|
|
||||||
import scipy
|
|
||||||
import networkx as nx
|
|
||||||
|
|
||||||
|
|
||||||
class SmallTestWorkflow(workflow.Workflow):
|
|
||||||
name = 'Smokers'
|
|
||||||
ident = 'smokers'
|
|
||||||
description = 'A small test workflow for gene expression analysis.'
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Workflow.__init__(self)
|
|
||||||
|
|
||||||
# DATA IMPORT
|
|
||||||
load = workflow.Stage('load', 'Data')
|
|
||||||
load.add_function(DatasetLoadFunctionSmokerSmall())
|
|
||||||
load.add_function(DatasetLoadFunctionSmokerMedium())
|
|
||||||
load.add_function(DatasetLoadFunctionSmokerFull())
|
|
||||||
load.add_function(DatasetLoadFunctionSmokerGO())
|
|
||||||
#load.add_function(DatasetLoadFunctionCYCLE())
|
|
||||||
self.add_stage(load)
|
|
||||||
|
|
||||||
# NETWORK PREPROCESSING
|
|
||||||
#net = workflow.Stage('net', 'Network integration')
|
|
||||||
#net.add_function(DiffKernelFunction())
|
|
||||||
#net.add_function(ModKernelFunction())
|
|
||||||
#net.add_function(RandDiffKernelFunction())
|
|
||||||
#self.add_stage(net)
|
|
||||||
|
|
||||||
# BLM's
|
|
||||||
model = workflow.Stage('models', 'Models')
|
|
||||||
model.add_function(blmfuncs.PCA())
|
|
||||||
model.add_function(blmfuncs.PLS())
|
|
||||||
model.add_function(blmfuncs.LPLS())
|
|
||||||
model.add_function(SAM())
|
|
||||||
|
|
||||||
#model.add_function(bioconFuncs.SAM(app))
|
|
||||||
self.add_stage(model)
|
|
||||||
|
|
||||||
query = workflow.Stage('query', 'Gene Query')
|
|
||||||
query.add_function(NCBIQuery())
|
|
||||||
query.add_function(KEGGQuery())
|
|
||||||
self.add_stage(query)
|
|
||||||
|
|
||||||
# Gene Ontology
|
|
||||||
go = workflow.Stage('go', 'Gene Ontology')
|
|
||||||
go.add_function(gobrowser.LoadGOFunction())
|
|
||||||
go.add_function(gobrowser.SetICFunction())
|
|
||||||
# go.add_function(gobrowser.GOWeightFunction())
|
|
||||||
# go.add_function(gobrowser.DistanceToSelectionFunction())
|
|
||||||
# go.add_function(gobrowser.TTestFunction())
|
|
||||||
go.add_function(gobrowser.PlotDagFunction())
|
|
||||||
go.add_function(GoEnrichment())
|
|
||||||
go.add_function(GoEnrichmentCond())
|
|
||||||
self.add_stage(go)
|
|
||||||
|
|
||||||
# EXTRA PLOTS
|
|
||||||
#plt = workflow.Stage('net', 'Network')
|
|
||||||
#plt.add_function(nx_analyser.KeggNetworkAnalyser())
|
|
||||||
#self.add_stage(plt)
|
|
||||||
|
|
||||||
logger.log('debug', 'Small test workflow is now active')
|
|
||||||
|
|
||||||
|
|
||||||
class DatasetLoadFunctionSmokerSmall(workflow.Function):
|
|
||||||
"""Loader for all ftsv files of smokers small datasets."""
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load_small', 'Smoker (Small)')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
path = 'data/smokers-small/'
|
|
||||||
files = os.listdir(path)
|
|
||||||
out = []
|
|
||||||
for fname in files:
|
|
||||||
if fname.endswith('.ftsv'):
|
|
||||||
input_file = open(os.path.join(path, fname))
|
|
||||||
out.append(dataset.read_ftsv(input_file))
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
class DatasetLoadFunctionSmokerMedium(workflow.Function):
|
|
||||||
"""Loader for all ftsv files of smokers small datasets."""
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load_medium', 'Smoker (Medium)')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
path = 'data/smokers-medium/'
|
|
||||||
files = os.listdir(path)
|
|
||||||
out = []
|
|
||||||
for fname in files:
|
|
||||||
if fname.endswith('.ftsv'):
|
|
||||||
input_file = open(os.path.join(path, fname))
|
|
||||||
out.append(dataset.read_ftsv(input_file))
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
class DatasetLoadFunctionSmokerFull(workflow.Function):
|
|
||||||
"""Loader for all ftsv files of smokers small datasets."""
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load_full', 'Smoker (Full)')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
path = 'data/smokers-full/'
|
|
||||||
files = os.listdir(path)
|
|
||||||
out = []
|
|
||||||
for fname in files:
|
|
||||||
if fname.endswith('.ftsv'):
|
|
||||||
input_file = open(os.path.join(path, fname))
|
|
||||||
out.append(dataset.read_ftsv(input_file))
|
|
||||||
return out
|
|
||||||
|
|
||||||
class DatasetLoadFunctionSmokerGO(workflow.Function):
|
|
||||||
"""Loader for all ftsv files of smokers small datasets."""
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load_go', 'Smoker (GO)')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
path = 'data/smokers-go/'
|
|
||||||
files = os.listdir(path)
|
|
||||||
out = []
|
|
||||||
for fname in files:
|
|
||||||
if fname.endswith('.ftsv'):
|
|
||||||
input_file = open(os.path.join(path, fname))
|
|
||||||
out.append(dataset.read_ftsv(input_file))
|
|
||||||
return out
|
|
||||||
|
|
||||||
class DatasetLoadFunctionCYCLE(workflow.Function):
|
|
||||||
"""Loader for pickled CYCLE datasets."""
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load_data', 'Cycle')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
filename='fluents/data/CYCLE'
|
|
||||||
if filename:
|
|
||||||
return dataset.from_file(filename)
|
|
||||||
|
|
||||||
|
|
||||||
##### WORKFLOW SPECIFIC FUNCTIONS ######
|
|
||||||
class SAM(workflow.Function):
|
|
||||||
def __init__(self, id='sam', name='SAM'):
|
|
||||||
workflow.Function.__init__(self, id, name)
|
|
||||||
|
|
||||||
def run(self, x, y):
|
|
||||||
|
|
||||||
n_iter = 50 #B
|
|
||||||
alpha = 0.01 #cut off on qvals
|
|
||||||
|
|
||||||
###############
|
|
||||||
|
|
||||||
# Main function call
|
|
||||||
|
|
||||||
# setup prelimenaries
|
|
||||||
import rpy
|
|
||||||
rpy.r.library("siggenes")
|
|
||||||
rpy.r.library("multtest")
|
|
||||||
|
|
||||||
cl = scipy.dot(y.asarray(), scipy.diag(scipy.arange(y.shape[1]))).sum(1)
|
|
||||||
data = x.asarray().T
|
|
||||||
sam = rpy.r.sam(data, cl=cl, B=n_iter, var_equal=False,med=False,s0=scipy.nan,rand=scipy.nan)
|
|
||||||
qvals = scipy.asarray(rpy.r.slot(sam, "p.value"))
|
|
||||||
pvals = scipy.asarray(rpy.r.slot(sam, "q.value"))
|
|
||||||
|
|
||||||
sam_index = (qvals<alpha).nonzero()[0]
|
|
||||||
|
|
||||||
# Update selection object
|
|
||||||
dim_name = x.get_dim_name(1)
|
|
||||||
sam_selection = x.get_identifiers(dim_name, indices=sam_index)
|
|
||||||
main.project.set_selection(dim_name, sam_selection)
|
|
||||||
|
|
||||||
sel = dataset.Selection('SAM selection')
|
|
||||||
sel.select(dim_name, sam_selection)
|
|
||||||
logger.log('notice','Number of significant varibles (SAM): %s' %len(sam_selection))
|
|
||||||
|
|
||||||
# ## OUTPUT ###
|
|
||||||
xcolname = x.get_dim_name(1) # genes
|
|
||||||
x_col_ids = [xcolname, x.get_identifiers(xcolname, sorted=True)]
|
|
||||||
sing_id = ['_john', ['0']] #singleton
|
|
||||||
D_qvals = dataset.Dataset(qvals, (x_col_ids, sing_id), name='q_vals')
|
|
||||||
D_pvals = dataset.Dataset(pvals, (x_col_ids, sing_id), name='p_vals')
|
|
||||||
|
|
||||||
# plots
|
|
||||||
s_indx = qvals.flatten().argsort()
|
|
||||||
s_ids = [x_col_ids[0],[x_col_ids[1][i] for i in s_indx]]
|
|
||||||
xindex = scipy.arange(len(qvals))
|
|
||||||
qvals_s = qvals.take(s_indx)
|
|
||||||
D_qs = dataset.Dataset(qvals_s, (s_ids, sing_id), name="sorted qvals")
|
|
||||||
Dind = dataset.Dataset(xindex, (s_ids, sing_id), name="dum")
|
|
||||||
st = plots.ScatterPlot(D_qs, Dind, 'gene_ids', '_john', '0', '0', s=10, name='SAM qvals')
|
|
||||||
|
|
||||||
return [D_qvals, D_pvals, D_qs, st, sel]
|
|
||||||
|
|
||||||
|
|
||||||
class DiffKernelFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'diffkernel', 'Diffusion')
|
|
||||||
|
|
||||||
def run(self, x, a):
|
|
||||||
"""x is gene expression data, a is the network.
|
|
||||||
"""
|
|
||||||
#sanity check:
|
|
||||||
g = a.asnetworkx()
|
|
||||||
genes = x.get_identifiers(x.get_dim_name(1), sorted=True)
|
|
||||||
W = nx.adj_matrix(g, nodelist=genes)
|
|
||||||
X = x.asarray()
|
|
||||||
Xc, mn_x = cx_utils.mat_center(X, ret_mn=True)
|
|
||||||
out = []
|
|
||||||
alpha=1.0
|
|
||||||
beta = 1.0
|
|
||||||
K = nx_utils.K_diffusion(W, alpha=alpha, beta=beta,normalised=True)
|
|
||||||
Xp = scipy.dot(Xc, K) + mn_x
|
|
||||||
# dataset
|
|
||||||
row_ids = (x.get_dim_name(0),
|
|
||||||
x.get_identifiers(x.get_dim_name(0),
|
|
||||||
sorted=True))
|
|
||||||
col_ids = (x.get_dim_name(1),
|
|
||||||
x.get_identifiers(x.get_dim_name(1),
|
|
||||||
sorted=True))
|
|
||||||
|
|
||||||
xout = dataset.Dataset(Xp,
|
|
||||||
(row_ids, col_ids),
|
|
||||||
name=x.get_name()+'_diff'+str(alpha))
|
|
||||||
out.append(xout)
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
class RandDiffKernelFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'diffkernel', 'Rand. Diff.')
|
|
||||||
|
|
||||||
def run(self, x, a):
|
|
||||||
"""x is gene expression data, a is the network.
|
|
||||||
"""
|
|
||||||
#sanity check:
|
|
||||||
g = a.asnetworkx()
|
|
||||||
genes = x.get_identifiers(x.get_dim_name(1))
|
|
||||||
# randomise nodelist
|
|
||||||
genes = [genes[i] for i in cx_utils.randperm(x.shape[1])]
|
|
||||||
W = nx.adj_matrix(g, nodelist=genes)
|
|
||||||
X = x.asarray()
|
|
||||||
Xc, mn_x = cx_utils.mat_center(X, ret_mn=True)
|
|
||||||
out = []
|
|
||||||
alpha=1.
|
|
||||||
beta = 1.0
|
|
||||||
K = nx_utils.K_diffusion(W, alpha=alpha, beta=beta,normalised=True)
|
|
||||||
|
|
||||||
Xp = scipy.dot(Xc, K) + mn_x
|
|
||||||
# dataset
|
|
||||||
row_ids = (x.get_dim_name(0),
|
|
||||||
x.get_identifiers(x.get_dim_name(0),
|
|
||||||
sorted=True))
|
|
||||||
col_ids = (x.get_dim_name(1),
|
|
||||||
x.get_identifiers(x.get_dim_name(1),
|
|
||||||
sorted=True))
|
|
||||||
|
|
||||||
xout = dataset.Dataset(Xp,
|
|
||||||
(row_ids, col_ids),
|
|
||||||
name=x.get_name()+'_diff'+str(alpha))
|
|
||||||
out.append(xout)
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
class ModKernelFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'mokernel', 'Modularity')
|
|
||||||
|
|
||||||
def run(self,x,a):
|
|
||||||
X = x.asarray()
|
|
||||||
g = a.asnetworkx()
|
|
||||||
genes = x.get_identifiers(x.get_dim_name(1), sorted=True)
|
|
||||||
W = nx.adj_matrix(g, nodelist=genes)
|
|
||||||
out=[]
|
|
||||||
alpha=.2
|
|
||||||
Xc,mn_x = cx_utils.mat_center(X, ret_mn=True)
|
|
||||||
K = nx_utils.K_modularity(W, alpha=alpha)
|
|
||||||
Xp = scipy.dot(Xc, K)
|
|
||||||
Xp = Xp + mn_x
|
|
||||||
|
|
||||||
# dataset
|
|
||||||
row_ids = (x.get_dim_name(0),
|
|
||||||
x.get_identifiers(x.get_dim_name(0),
|
|
||||||
sorted=True))
|
|
||||||
col_ids = (x.get_dim_name(1),
|
|
||||||
x.get_identifiers(x.get_dim_name(1),
|
|
||||||
sorted=True))
|
|
||||||
xout = dataset.Dataset(Xp,
|
|
||||||
(row_ids,col_ids),
|
|
||||||
name=x.get_name()+'_mod'+str(alpha))
|
|
||||||
out.append(xout)
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
class NCBIQuery(workflow.Function):
|
|
||||||
def __init__(self, gene_id_name='gene_ids'):
|
|
||||||
self._gene_id_name = gene_id_name
|
|
||||||
workflow.Function.__init__(self, 'query', 'NCBI')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
selection = main.project.get_selection()
|
|
||||||
if not selection.has_key(self._gene_id_name):
|
|
||||||
logger.log("notice", "Expected gene ids: %s, but got: %s" %(self._gene_id_name, selection.keys()))
|
|
||||||
return None
|
|
||||||
if len(selection[self._gene_id_name])==0:
|
|
||||||
logger.log("notice", "No selected genes to query")
|
|
||||||
return None
|
|
||||||
|
|
||||||
base = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?'
|
|
||||||
options = {r'&db=' : 'gene',
|
|
||||||
r'&cmd=' : 'retrieve',
|
|
||||||
r'&dopt=' : 'full_report'}
|
|
||||||
gene_str = ''.join([gene + "+" for gene in selection[self._gene_id_name]])
|
|
||||||
options[r'&list_uids='] = gene_str[:-1]
|
|
||||||
opt_str = ''.join([key+value for key,value in options.items()])
|
|
||||||
web_str = base + opt_str
|
|
||||||
webbrowser.open(web_str)
|
|
||||||
|
|
||||||
|
|
||||||
class KEGGQuery(workflow.Function):
|
|
||||||
def __init__(self, org='hsa', gene_id_name='gene_ids'):
|
|
||||||
self._org=org
|
|
||||||
self._gene_id_name = gene_id_name
|
|
||||||
workflow.Function.__init__(self, 'query', 'KEGG')
|
|
||||||
|
|
||||||
def run(self, selection):
|
|
||||||
if not selection.has_key(self._gene_id_name):
|
|
||||||
logger.log("notice", "Expected gene ids: %s, but got. %s" %(self._gene_id_name, selection.keys()))
|
|
||||||
return None
|
|
||||||
if len(selection[self._gene_id_name])==0:
|
|
||||||
logger.log("notice", "No selected genes to query")
|
|
||||||
return None
|
|
||||||
|
|
||||||
base = r'http://www.genome.jp/dbget-bin/www_bget?'
|
|
||||||
gene_str = ''.join([gene + "+" for gene in selection[self._gene_id_name]])
|
|
||||||
gene_str = gene_str[:-1]
|
|
||||||
gene_str = self._org + "+" + gene_str
|
|
||||||
web_str = base + gene_str
|
|
||||||
webbrowser.open(web_str)
|
|
||||||
|
|
||||||
|
|
||||||
class GoEnrichment(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'goenrich', 'Go Enrichment')
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
import rpy
|
|
||||||
rpy.r.library("GOstats")
|
|
||||||
|
|
||||||
# Get universe
|
|
||||||
# Here, we are using a defined dataset to represent the universe
|
|
||||||
if not 'gene_ids' in data:
|
|
||||||
logger.log('notice', 'No dimension called [gene_ids] in dataset: %s', data.get_name())
|
|
||||||
return
|
|
||||||
universe = list(data.get_identifiers('gene_ids'))
|
|
||||||
logger.log('notice', 'Universe consists of %s gene ids from %s' %(len(universe), data.get_name()))
|
|
||||||
# Get current selection and validate
|
|
||||||
curr_sel = main.project.get_selection()
|
|
||||||
selected_genes = list(curr_sel['gene_ids'])
|
|
||||||
if len(selected_genes)==0:
|
|
||||||
logger.log('notice', 'This function needs a current selection!')
|
|
||||||
return
|
|
||||||
|
|
||||||
# Hypergeometric parameter object
|
|
||||||
pval_cutoff = 0.9999
|
|
||||||
cond = False
|
|
||||||
test_direction = 'over'
|
|
||||||
params = rpy.r.new("GOHyperGParams",
|
|
||||||
geneIds=selected_genes,
|
|
||||||
annotation="hgu133a",
|
|
||||||
ontology="BP",
|
|
||||||
pvalueCutoff=pval_cutoff,
|
|
||||||
conditional=cond,
|
|
||||||
testDirection=test_direction
|
|
||||||
)
|
|
||||||
# run test
|
|
||||||
# result.keys(): ['Count', 'Term', 'OddsRatio', 'Pvalue', 'ExpCount', 'GOBPID', 'Size']
|
|
||||||
result = rpy.r.summary(rpy.r.hyperGTest(params))
|
|
||||||
|
|
||||||
# dataset
|
|
||||||
terms = result['GOBPID']
|
|
||||||
pvals = scipy.log(scipy.asarray(result['Pvalue']))
|
|
||||||
row_ids = ('go-terms', terms)
|
|
||||||
col_ids = ('_john', ['_doe'])
|
|
||||||
|
|
||||||
xout = dataset.Dataset(pvals,
|
|
||||||
(row_ids, col_ids),
|
|
||||||
name='P values (enrichment)')
|
|
||||||
return [xout]
|
|
||||||
|
|
||||||
|
|
||||||
class GoEnrichmentCond(workflow.Function):
|
|
||||||
""" Enrichment conditioned on dag structure."""
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'goenrich', 'Go Cond. Enrich.')
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
import rpy
|
|
||||||
rpy.r.library("GOstats")
|
|
||||||
|
|
||||||
# Get universe
|
|
||||||
# Here, we are using a defined dataset to represent the universe
|
|
||||||
if not 'gene_ids' in data:
|
|
||||||
logger.log('notice', 'No dimension called [gene_ids] in dataset: %s', data.get_name())
|
|
||||||
return
|
|
||||||
universe = list(data.get_identifiers('gene_ids'))
|
|
||||||
logger.log('notice', 'Universe consists of %s gene ids from %s' %(len(universe), data.get_name()))
|
|
||||||
# Get current selection and validate
|
|
||||||
curr_sel = main.project.get_selection()
|
|
||||||
selected_genes = list(curr_sel['gene_ids'])
|
|
||||||
if len(selected_genes)==0:
|
|
||||||
logger.log('notice', 'This function needs a current selection!')
|
|
||||||
return
|
|
||||||
|
|
||||||
# Hypergeometric parameter object
|
|
||||||
pval_cutoff = 0.9999
|
|
||||||
cond = True
|
|
||||||
test_direction = 'over'
|
|
||||||
params = rpy.r.new("GOHyperGParams",
|
|
||||||
geneIds=selected_genes,
|
|
||||||
annotation="hgu133a",
|
|
||||||
ontology="BP",
|
|
||||||
pvalueCutoff=pval_cutoff,
|
|
||||||
conditional=cond,
|
|
||||||
testDirection=test_direction
|
|
||||||
)
|
|
||||||
# run test
|
|
||||||
# result.keys(): ['Count', 'Term', 'OddsRatio', 'Pvalue', 'ExpCount', 'GOBPID', 'Size']
|
|
||||||
result = rpy.r.summary(rpy.r.hyperGTest(params))
|
|
||||||
|
|
||||||
# dataset
|
|
||||||
terms = result['GOBPID']
|
|
||||||
pvals = scipy.log(scipy.asarray(result['Pvalue']))
|
|
||||||
row_ids = ('go-terms', terms)
|
|
||||||
col_ids = ('_john', ['_doe'])
|
|
||||||
|
|
||||||
xout = dataset.Dataset(pvals,
|
|
||||||
(row_ids, col_ids),
|
|
||||||
name='P values (enrichment)')
|
|
||||||
return [xout]
|
|
||||||
|
|
|
@ -1,122 +1,37 @@
|
||||||
import gtk
|
import gtk
|
||||||
from fluents import dataset, logger, plots, workflow
|
from fluents import dataset, logger, plots, workflow
|
||||||
#import geneontology
|
|
||||||
#import gostat
|
from scipy import array, randn, log, ones, newaxis
|
||||||
from scipy import array, randn, log, ones
|
|
||||||
import cPickle
|
import cPickle
|
||||||
import networkx
|
import networkx
|
||||||
|
|
||||||
|
|
||||||
class TestWorkflow (workflow.Workflow):
|
class TestWorkflow (workflow.Workflow):
|
||||||
|
|
||||||
name = 'Test Workflow'
|
name = 'Test Workflow'
|
||||||
ident = 'test'
|
ident = 'test'
|
||||||
description = 'Test Gene Ontology Workflow. This workflow currently serves as a general testing workflow.'
|
description = 'This workflow currently serves as a general testing workflow.'
|
||||||
|
|
||||||
def __init__(self, app):
|
|
||||||
workflow.Workflow.__init__(self, app)
|
|
||||||
|
|
||||||
load = workflow.Stage('load', 'Load Data')
|
|
||||||
load.add_function(CelFileImportFunction())
|
|
||||||
load.add_function(DataLoadTestFunction(self))
|
|
||||||
load.add_function(TestDataFunction())
|
|
||||||
load.add_function(DatasetLoadFunction())
|
|
||||||
load.add_function(SelectFunction())
|
|
||||||
self.add_stage(load)
|
|
||||||
|
|
||||||
preproc = workflow.Stage('preprocess', 'Preprocessing')
|
|
||||||
preproc.add_function(DatasetLog())
|
|
||||||
preproc.add_function(workflow.Function('rma', 'RMA'))
|
|
||||||
self.add_stage(preproc)
|
|
||||||
|
|
||||||
go = workflow.Stage('go', 'Gene Ontology Data')
|
|
||||||
go.add_function(GODistanceFunction())
|
|
||||||
go.add_function(ImagePlotFunction())
|
|
||||||
self.add_stage(go)
|
|
||||||
|
|
||||||
regression = workflow.Stage('regression', 'Regression')
|
|
||||||
regression.add_function(workflow.Function('pls', 'PLS'))
|
|
||||||
self.add_stage(regression)
|
|
||||||
|
|
||||||
explore = workflow.Stage('explore', 'Explorative analysis')
|
|
||||||
explore.add_function(PCAFunction(self))
|
|
||||||
self.add_stage(explore)
|
|
||||||
|
|
||||||
save = workflow.Stage('save', 'Save Data')
|
|
||||||
save.add_function(DatasetSaveFunction())
|
|
||||||
self.add_stage(save)
|
|
||||||
|
|
||||||
|
|
||||||
class LoadAnnotationsFunction(workflow.Function):
|
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
|
workflow.Workflow.__init__(self)
|
||||||
self.annotations = None
|
|
||||||
|
|
||||||
def load_file(self, filename):
|
test_stage = workflow.Stage('test', 'Test Data')
|
||||||
f = open(filename)
|
test_stage.tasks.append(TestDataTask)
|
||||||
self.annotations = Annotations('genes', 'go-terms')
|
test_stage.tasks.append(TestPlot)
|
||||||
logger.log('notice', 'Loading annotation file: %s' % filename)
|
self.stages.append(test_stage)
|
||||||
|
|
||||||
for line in f.readlines():
|
|
||||||
val = line.split(' \t')
|
|
||||||
|
|
||||||
if len(val) > 1:
|
|
||||||
val = [v.strip() for v in val]
|
|
||||||
retval.add_annotations('genes', val[0],
|
|
||||||
'go-terms', set(val[1:]))
|
|
||||||
|
|
||||||
def on_response(self, dialog, response):
|
|
||||||
if response == gtk.RESPONSE_OK:
|
|
||||||
logger.log('notice', 'Reading file: %s' % dialog.get_filename())
|
|
||||||
self.load_file(dialog.get_filename())
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
btns = ('Open', gtk.RESPONSE_OK, \
|
|
||||||
'Cancel', gtk.RESPONSE_CANCEL)
|
|
||||||
dialog = gtk.FileChooserDialog('Open GO Annotation File',
|
|
||||||
buttons=btns)
|
|
||||||
dialog.connect('response', self.on_response)
|
|
||||||
dialog.run()
|
|
||||||
dialog.destroy()
|
|
||||||
return [self.annotations]
|
|
||||||
|
|
||||||
class GODistanceFunction(workflow.Function):
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'go_diatance', 'GO Distances')
|
|
||||||
self.output = None
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
logger.log('debug', 'datatype: %s' % type(data))
|
|
||||||
if not type(data) == Annotations:
|
|
||||||
return None
|
|
||||||
|
|
||||||
logger.log('debug', 'dimensions: %s' % data.dimensions)
|
|
||||||
|
|
||||||
genes = data.get_ids('genes')
|
|
||||||
gene_distances = array((len(genes), len(genes)))
|
|
||||||
|
|
||||||
return gene_distances
|
|
||||||
|
|
||||||
|
|
||||||
class ImagePlotFunction(workflow.Function):
|
class TestDataTask(workflow.Task):
|
||||||
def __init__(self):
|
name = "Test data"
|
||||||
workflow.Function.__init__(self, 'image', 'Show Image')
|
def __init__(self, input):
|
||||||
|
workflow.Task.__init__(self, input)
|
||||||
def run(self, data):
|
|
||||||
return [plots.ImagePlot(data, name='foo')]
|
|
||||||
|
|
||||||
|
|
||||||
class TestDataFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'test_data', 'Generate Test Data')
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
logger.log('notice', 'Injecting foo test data')
|
logger.log('notice', 'Injecting foo test data')
|
||||||
x = randn(500,15)
|
x = randn(500,15)
|
||||||
X = dataset.Dataset(x)
|
X = dataset.Dataset(x)
|
||||||
p = plots.ScatterPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='scatter')
|
dname = X.get_dim_name()[0]
|
||||||
p2 = plots.ScatterMarkerPlot(X, X, 'rows', 'rows', '0_1', '0_2',name='marker')
|
p = plots.ScatterPlot(X, X, dname, dname, '0_1', '0_2',name='scatter')
|
||||||
graph = networkx.XGraph()
|
graph = networkx.XGraph()
|
||||||
for x in 'ABCDEF':
|
for x in 'ABCDEF':
|
||||||
for y in 'ADE':
|
for y in 'ADE':
|
||||||
|
@ -125,181 +40,25 @@ class TestDataFunction(workflow.Function):
|
||||||
ds_plot = plots.NetworkPlot(ds)
|
ds_plot = plots.NetworkPlot(ds)
|
||||||
|
|
||||||
cds = dataset.CategoryDataset(ones([3, 3]))
|
cds = dataset.CategoryDataset(ones([3, 3]))
|
||||||
ds_scatter = plots.ScatterMarkerPlot(ds, ds, 'rows_0', 'rows_0', '0_1', '0_2')
|
dname2 = cds.get_dim_name()[0]
|
||||||
lp = plots.LineViewPlot(X,major_axis=0)
|
ds_scatter = plots.ScatterMarkerPlot(cds, cds, dname2, dname2, '0_1', '0_2')
|
||||||
|
lp = plots.LineViewPlot(X ,major_axis=0)
|
||||||
vp = plots.VennPlot()
|
vp = plots.VennPlot()
|
||||||
return [X, ds, p, ds_plot, ds_scatter, p2, cds, lp, vp]
|
self.datasets = [p]
|
||||||
|
return [X, ds, p, ds_plot, ds_scatter, cds, lp, vp]
|
||||||
|
|
||||||
class SelectFunction(workflow.Function):
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'select', 'Select')
|
|
||||||
|
|
||||||
def run(self, data):
|
class TestPlot(workflow.Task):
|
||||||
s = dataset.Selection('Arbitrary selection')
|
name = "Test plot data"
|
||||||
s.select('rows', ['0_1', '0_2'])
|
|
||||||
return [s]
|
|
||||||
|
|
||||||
class DatasetLog(workflow.Function):
|
def __init__(self, input):
|
||||||
def __init__(self):
|
workflow.Task.__init__(self, input)
|
||||||
workflow.Function.__init__(self, 'log', 'Log')
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
logger.log('notice', 'Taking the log of dataset %s' % data.get_name())
|
|
||||||
d = data.asarray()
|
|
||||||
d = log(d)
|
|
||||||
new_data_name = 'log(%s)' % data.get_name()
|
|
||||||
ds = dataset.Dataset(d, name=new_data_name)
|
|
||||||
return [ds]
|
|
||||||
|
|
||||||
class DatasetLoadFunction(workflow.Function):
|
|
||||||
"""Loader for previously pickled Datasets."""
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'load_data', 'Load Pickled Dataset')
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
|
#logger.log('notice', 'Injecting % dataset') %self.input[0].get_name()
|
||||||
action=gtk.FILE_CHOOSER_ACTION_OPEN,
|
X = self.input[0]
|
||||||
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
ii = X.get_dim_name()[0]
|
||||||
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
|
ids = X.get_identifiers(ii, indices=[0,1])
|
||||||
pkl_filter = gtk.FileFilter()
|
p = plots.ScatterPlot(X, X, ii, ii, ids[0], ids[1],name='scatter')
|
||||||
pkl_filter.set_name("Python pickled data files (*.pkl)")
|
|
||||||
pkl_filter.add_pattern("*.[pP][kK][lL]")
|
|
||||||
all_filter = gtk.FileFilter()
|
|
||||||
all_filter.set_name("All Files (*.*)")
|
|
||||||
all_filter.add_pattern("*")
|
|
||||||
chooser.add_filter(pkl_filter)
|
|
||||||
chooser.add_filter(all_filter)
|
|
||||||
|
|
||||||
try:
|
return [p]
|
||||||
if chooser.run() == gtk.RESPONSE_OK:
|
|
||||||
return [cPickle.load(open(chooser.get_filename()))]
|
|
||||||
finally:
|
|
||||||
chooser.destroy()
|
|
||||||
|
|
||||||
|
|
||||||
class DatasetSaveFunction(workflow.Function):
|
|
||||||
"""QND way to save data to file for later import to this program."""
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'save_data', 'Save Pickled Dataset')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
if not data:
|
|
||||||
logger.log("notice", "No data to save.")
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
data = data[0]
|
|
||||||
chooser = gtk.FileChooserDialog(title="Save pickled data...", parent=None,
|
|
||||||
action=gtk.FILE_CHOOSER_ACTION_SAVE,
|
|
||||||
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
|
||||||
gtk.STOCK_SAVE, gtk.RESPONSE_OK))
|
|
||||||
pkl_filter = gtk.FileFilter()
|
|
||||||
pkl_filter.set_name("Python pickled data files (*.pkl)")
|
|
||||||
pkl_filter.add_pattern("*.[pP][kK][lL]")
|
|
||||||
all_filter = gtk.FileFilter()
|
|
||||||
all_filter.set_name("All Files (*.*)")
|
|
||||||
all_filter.add_pattern("*")
|
|
||||||
chooser.add_filter(pkl_filter)
|
|
||||||
chooser.add_filter(all_filter)
|
|
||||||
chooser.set_current_name(data.get_name() + ".pkl")
|
|
||||||
|
|
||||||
try:
|
|
||||||
if chooser.run() == gtk.RESPONSE_OK:
|
|
||||||
cPickle.dump(data, open(chooser.get_filename(), "w"), protocol=2)
|
|
||||||
logger.log("notice", "Saved data to %r." % chooser.get_filename())
|
|
||||||
finally:
|
|
||||||
chooser.destroy()
|
|
||||||
|
|
||||||
|
|
||||||
class CelFileImportFunction(workflow.Function):
|
|
||||||
"""Loads AffyMetrix .CEL-files into matrix."""
|
|
||||||
def __init__(self):
|
|
||||||
workflow.Function.__init__(self, 'cel_import', 'Import Affy')
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
import rpy
|
|
||||||
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
|
|
||||||
action=gtk.FILE_CHOOSER_ACTION_OPEN,
|
|
||||||
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
|
||||||
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
|
|
||||||
chooser.set_select_multiple(True)
|
|
||||||
cel_filter = gtk.FileFilter()
|
|
||||||
cel_filter.set_name("Cel Files (*.cel)")
|
|
||||||
cel_filter.add_pattern("*.[cC][eE][lL]")
|
|
||||||
all_filter = gtk.FileFilter()
|
|
||||||
all_filter.set_name("All Files (*.*)")
|
|
||||||
all_filter.add_pattern("*")
|
|
||||||
chooser.add_filter(cel_filter)
|
|
||||||
chooser.add_filter(all_filter)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if chooser.run() == gtk.RESPONSE_OK:
|
|
||||||
rpy.r.library("affy")
|
|
||||||
|
|
||||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
|
||||||
silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
|
|
||||||
silent_eval('E <- rma(E)')
|
|
||||||
|
|
||||||
m = rpy.r('m <- E@exprs')
|
|
||||||
|
|
||||||
vector_eval = rpy.with_mode(rpy.VECTOR_CONVERSION, rpy.r)
|
|
||||||
rownames = vector_eval('rownames(m)')
|
|
||||||
colnames = vector_eval('colnames(m)')
|
|
||||||
|
|
||||||
# We should be nice and clean up after ourselves
|
|
||||||
rpy.r.rm(["E", "m"])
|
|
||||||
|
|
||||||
if m:
|
|
||||||
data = dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), name="AffyMatrix Data")
|
|
||||||
plot = plots.LinePlot(data, "Gene profiles")
|
|
||||||
return [data, plot]
|
|
||||||
else:
|
|
||||||
logger.log("notice", "No data loaded from importer.")
|
|
||||||
finally:
|
|
||||||
chooser.destroy()
|
|
||||||
|
|
||||||
|
|
||||||
class DataLoadTestFunction(workflow.Function):
|
|
||||||
def __init__(self, wf):
|
|
||||||
workflow.Function.__init__(self, 'datadirload', 'Load from datadir')
|
|
||||||
self._wf = wf
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
print self._wf.get_data_file_name('smoker-x.ftsv')
|
|
||||||
fn = self._wf.get_data_file_name('smoker-x.ftsv')
|
|
||||||
if fn:
|
|
||||||
fd = open(fn)
|
|
||||||
ds = dataset.read_ftsv(fd)
|
|
||||||
return [ds]
|
|
||||||
else:
|
|
||||||
print "Cannot find file %s" % fn
|
|
||||||
return []
|
|
||||||
|
|
||||||
class PCAFunction(workflow.Function):
|
|
||||||
"""Generic PCA function."""
|
|
||||||
def __init__(self, wf):
|
|
||||||
workflow.Function.__init__(self, 'pca', 'PCA')
|
|
||||||
self._workflow = wf
|
|
||||||
|
|
||||||
def run(self, data):
|
|
||||||
import rpy
|
|
||||||
|
|
||||||
dim_2, dim_1 = data.get_dim_names()
|
|
||||||
|
|
||||||
|
|
||||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
|
||||||
rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.asarray())
|
|
||||||
silent_eval("t = prcomp(t(m))")
|
|
||||||
|
|
||||||
T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
|
|
||||||
T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
|
|
||||||
("component", T_ids)], name="T")
|
|
||||||
P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
|
|
||||||
("component", T_ids)], name="P")
|
|
||||||
|
|
||||||
# cleanup
|
|
||||||
rpy.r.rm(["t", "m"])
|
|
||||||
|
|
||||||
loading_plot = plots.ScatterMarkerPlot(P, P, 'ids','component','1','2', "Loadings")
|
|
||||||
score_plot = plots.ScatterMarkerPlot(T, T,'filename','component','1','2', "Scores")
|
|
||||||
|
|
||||||
return [T, P, loading_plot, score_plot]
|
|
||||||
|
|
Reference in New Issue