This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/system/annotations.py

94 lines
3.2 KiB
Python

from sets import Set as set
set.update = set.union_update
import dataset
import scipy
class AnnotationsException(Exception):
pass
class Annotations:
def __init__(self, *dimensions):
""" Initializes a new Annotation with the given dimension labels.
dimensions is a list of dimension labels.
"""
if len(dimensions) != 2:
msg = 'Annotations only supports two dimensions.'
raise AnnotationsException(msg)
self.dimensions = {}
for d in dimensions:
self.dimensions[d] = {}
def add_annotations(self, dim, id, ann_dim, annotations):
""" Adds new annotations.
dim: the dimension in which the new data should be added.
id: the identifier that should be annotated.
ann_dim: the dimension of the annotations to id.
annotations: the new annotations to id.
Returns the total set of annotations to id.
"""
if not self.has_dimension(dim):
msg = 'Annotations object does not contain dimension %s' % dim
raise AnnotationsException(msg)
if not self.has_dimension(ann_dim):
msg = 'Annotations object does not contain dimension %s' % ann_dim
raise AnnotationsException(msg)
for a in annotations:
if not self.dimensions[ann_dim].has_key(a):
self.dimensions[ann_dim][a] = set()
self.dimensions[ann_dim][a].add(id)
if not self.dimensions[dim].has_key(id):
self.dimensions[dim][id] = set()
self.dimensions[dim][id].update(annotations)
return self.dimensions[dim][id]
def get_annotations(self, dim, id, ann_dim):
"""Returns all annotations to id.
dim: the dimension where id can be found.
id: the id to retrieve annotations for.
"""
if not self.has_dimension(dim):
msg = 'Annotations object does not contain dimension %s' % dim
raise AnnotationsException(msg)
if self.dimensions[dim].has_key(id):
return self.dimensions[dim][id]
return set()
def has_dimension(self, dim):
""" Retuns true if the Annotations object indexes dim.
"""
return self.dimensions.has_key(dim)
def to_dataset(self,dim):
""" Returns a dataset representation of annotations.
"""
if self.has_dimension(dim):
num_dim1 = len(set(self.dimensions[dim])) #number of unique genes
all_genes = set(self.dimensions[dim])
all_categories = set()
for cat in self.dimensions[dim].values():
all_categories.update(cat)
num_dim1 = len(all_genes) #number of unique genes
num_dim2 = len(all_categories) #number of unique categories
gene_list=[]
cat_list=[]
matrix = scipy.zeros((num_dim1,num_dim2),'bwu')
for i,gene in enumerate(all_genes):
gene_list.append(gene)
for j,cat in enumerate(all_categories):
cat_list.append(cat)
matrix[i,j] = 1
def_list = [['genes',gene_list],['go',cat_list]]
return dataset.Dataset(matrix,def_list)