from sets import Set as set set.update = set.union_update import dataset import scipy class AnnotationsException(Exception): pass class Annotations: def __init__(self, *dimensions): """ Initializes a new Annotation with the given dimension labels. dimensions is a list of dimension labels. """ if len(dimensions) != 2: msg = 'Annotations only supports two dimensions.' raise AnnotationsException(msg) self.dimensions = {} for d in dimensions: self.dimensions[d] = {} def add_annotations(self, dim, id, ann_dim, annotations): """ Adds new annotations. dim: the dimension in which the new data should be added. id: the identifier that should be annotated. ann_dim: the dimension of the annotations to id. annotations: the new annotations to id. Returns the total set of annotations to id. """ if not self.has_dimension(dim): msg = 'Annotations object does not contain dimension %s' % dim raise AnnotationsException(msg) if not self.has_dimension(ann_dim): msg = 'Annotations object does not contain dimension %s' % ann_dim raise AnnotationsException(msg) for a in annotations: if not self.dimensions[ann_dim].has_key(a): self.dimensions[ann_dim][a] = set() self.dimensions[ann_dim][a].add(id) if not self.dimensions[dim].has_key(id): self.dimensions[dim][id] = set() self.dimensions[dim][id].update(annotations) return self.dimensions[dim][id] def get_annotations(self, dim, id, ann_dim): """Returns all annotations to id. dim: the dimension where id can be found. id: the id to retrieve annotations for. """ if not self.has_dimension(dim): msg = 'Annotations object does not contain dimension %s' % dim raise AnnotationsException(msg) if self.dimensions[dim].has_key(id): return self.dimensions[dim][id] return set() def has_dimension(self, dim): """ Retuns true if the Annotations object indexes dim. """ return self.dimensions.has_key(dim) def to_dataset(self,dim): """ Returns a dataset representation of annotations. """ if self.has_dimension(dim): num_dim1 = len(set(self.dimensions[dim])) #number of unique genes all_genes = set(self.dimensions[dim]) all_categories = set() for cat in self.dimensions[dim].values(): all_categories.update(cat) num_dim1 = len(all_genes) #number of unique genes num_dim2 = len(all_categories) #number of unique categories gene_list=[] cat_list=[] matrix = scipy.zeros((num_dim1,num_dim2),'bwu') for i,gene in enumerate(all_genes): gene_list.append(gene) for j,cat in enumerate(all_categories): cat_list.append(cat) matrix[i,j] = 1 def_list = [['genes',gene_list],['go',cat_list]] return dataset.Dataset(matrix,def_list)