Added category dataset

This commit is contained in:
Arnar Flatberg 2006-08-08 07:54:00 +00:00
parent b769288b3b
commit 677d368b60

View File

@ -1,4 +1,4 @@
from scipy import atleast_2d,asarray,ArrayType,shape
from scipy import atleast_2d,asarray,ArrayType,shape,nonzero
from scipy import sort as array_sort
from itertools import izip
@ -182,7 +182,7 @@ class Dataset:
else:
index = [self._map[dim][key] for key in idents]
return asarray(index)
class CategoryDataset(Dataset):
"""The category dataset class.
@ -192,25 +192,30 @@ class CategoryDataset(Dataset):
There is support for using a less memory demanding, and
fast intersection look-ups by representing the binary matrix as a
dictionary in each dimension.
Always has linked dimension in first dim:
ex matrix:
go_term1 go_term2 ...
gene_1
gene_2
gene_3
.
.
.
"""
def __init__(self):
Dataset.__init__(self)
def __init__(self,array,identifiers=None,name='A'):
Dataset.__init__(self,array,identifiers=identifiers,name=name)
self.has_dictlists = False
def as_collection(self,dim):
"""Returns data as collection along dim"""
pass
def add_collection(self,input_dict):
"""Adds a category data as collection.
A collection is a datastructure that contains a dictionary for
each pair of dimension in dataset, keyed by identifiers and
values is a set of identifiers in the other dimension
"""
#build category data as double dicts
pass
def as_dict_lists(self):
"""Returns data as dict of indices along first dim"""
data={}
for name,ind in self._map[self.get_dim_name(0)].items():
data[name] = list(nonzero(self._array[ind,:]))
self._dictlists = data
self.has_dictlists=True
return data
class GraphDataset(Dataset):
"""The graph dataset class.