2006-04-17 01:30:01 +02:00
|
|
|
|
|
|
|
from sets import Set as set
|
|
|
|
set.update = set.union_update
|
2006-04-19 12:37:44 +02:00
|
|
|
import dataset
|
|
|
|
import scipy
|
|
|
|
|
2006-04-17 01:30:01 +02:00
|
|
|
|
|
|
|
class AnnotationsException(Exception):
|
|
|
|
pass
|
2006-04-17 00:06:29 +02:00
|
|
|
|
|
|
|
class Annotations:
|
|
|
|
def __init__(self, *dimensions):
|
|
|
|
""" Initializes a new Annotation with the given dimension labels.
|
|
|
|
dimensions is a list of dimension labels.
|
|
|
|
"""
|
2006-04-17 01:30:01 +02:00
|
|
|
if len(dimensions) != 2:
|
|
|
|
msg = 'Annotations only supports two dimensions.'
|
|
|
|
raise AnnotationsException(msg)
|
2006-04-17 00:06:29 +02:00
|
|
|
|
|
|
|
self.dimensions = {}
|
|
|
|
for d in dimensions:
|
|
|
|
self.dimensions[d] = {}
|
|
|
|
|
|
|
|
def add_annotations(self, dim, id, ann_dim, annotations):
|
2006-04-17 01:30:01 +02:00
|
|
|
""" Adds new annotations.
|
|
|
|
dim: the dimension in which the new data should be added.
|
|
|
|
id: the identifier that should be annotated.
|
|
|
|
ann_dim: the dimension of the annotations to id.
|
|
|
|
annotations: the new annotations to id.
|
|
|
|
Returns the total set of annotations to id.
|
|
|
|
"""
|
|
|
|
if not self.has_dimension(dim):
|
|
|
|
msg = 'Annotations object does not contain dimension %s' % dim
|
|
|
|
raise AnnotationsException(msg)
|
|
|
|
|
|
|
|
if not self.has_dimension(ann_dim):
|
|
|
|
msg = 'Annotations object does not contain dimension %s' % ann_dim
|
|
|
|
raise AnnotationsException(msg)
|
|
|
|
|
2006-04-17 00:06:29 +02:00
|
|
|
for a in annotations:
|
2006-04-17 01:30:01 +02:00
|
|
|
if not self.dimensions[ann_dim].has_key(a):
|
|
|
|
self.dimensions[ann_dim][a] = set()
|
|
|
|
self.dimensions[ann_dim][a].add(id)
|
|
|
|
|
|
|
|
if not self.dimensions[dim].has_key(id):
|
|
|
|
self.dimensions[dim][id] = set()
|
|
|
|
self.dimensions[dim][id].update(annotations)
|
|
|
|
|
|
|
|
return self.dimensions[dim][id]
|
2006-04-17 00:06:29 +02:00
|
|
|
|
2006-04-17 01:30:01 +02:00
|
|
|
def get_annotations(self, dim, id, ann_dim):
|
|
|
|
"""Returns all annotations to id.
|
|
|
|
dim: the dimension where id can be found.
|
|
|
|
id: the id to retrieve annotations for.
|
|
|
|
"""
|
|
|
|
if not self.has_dimension(dim):
|
|
|
|
msg = 'Annotations object does not contain dimension %s' % dim
|
|
|
|
raise AnnotationsException(msg)
|
2006-04-17 00:06:29 +02:00
|
|
|
|
|
|
|
if self.dimensions[dim].has_key(id):
|
2006-04-17 01:30:01 +02:00
|
|
|
return self.dimensions[dim][id]
|
|
|
|
return set()
|
2006-04-17 00:06:29 +02:00
|
|
|
|
|
|
|
def has_dimension(self, dim):
|
2006-04-17 01:30:01 +02:00
|
|
|
""" Retuns true if the Annotations object indexes dim.
|
|
|
|
"""
|
2006-04-17 00:06:29 +02:00
|
|
|
return self.dimensions.has_key(dim)
|
2006-04-17 01:30:01 +02:00
|
|
|
|
2006-04-19 12:37:44 +02:00
|
|
|
def to_dataset(self,dim):
|
|
|
|
""" Returns a dataset representation of annotations.
|
|
|
|
"""
|
|
|
|
if self.has_dimension(dim):
|
|
|
|
num_dim1 = len(set(self.dimensions[dim])) #number of unique genes
|
|
|
|
all_genes = set(self.dimensions[dim])
|
|
|
|
all_categories = set()
|
|
|
|
for cat in self.dimensions[dim].values():
|
|
|
|
all_categories.update(cat)
|
|
|
|
num_dim1 = len(all_genes) #number of unique genes
|
|
|
|
num_dim2 = len(all_categories) #number of unique categories
|
|
|
|
gene_list=[]
|
|
|
|
cat_list=[]
|
|
|
|
matrix = scipy.zeros((num_dim1,num_dim2),'bwu')
|
|
|
|
for i,gene in enumerate(all_genes):
|
|
|
|
gene_list.append(gene)
|
|
|
|
for j,cat in enumerate(all_categories):
|
|
|
|
cat_list.append(cat)
|
|
|
|
matrix[i,j] = 1
|
|
|
|
def_list = [['genes',gene_list],['go',cat_list]]
|
|
|
|
|
|
|
|
return dataset.Dataset(matrix,def_list)
|
|
|
|
|
|
|
|
|
|
|
|
|