Added category dataset
This commit is contained in:
parent
b769288b3b
commit
677d368b60
|
@ -1,4 +1,4 @@
|
|||
from scipy import atleast_2d,asarray,ArrayType,shape
|
||||
from scipy import atleast_2d,asarray,ArrayType,shape,nonzero
|
||||
from scipy import sort as array_sort
|
||||
from itertools import izip
|
||||
|
||||
|
@ -192,25 +192,30 @@ class CategoryDataset(Dataset):
|
|||
There is support for using a less memory demanding, and
|
||||
fast intersection look-ups by representing the binary matrix as a
|
||||
dictionary in each dimension.
|
||||
|
||||
Always has linked dimension in first dim:
|
||||
ex matrix:
|
||||
go_term1 go_term2 ...
|
||||
gene_1
|
||||
gene_2
|
||||
gene_3
|
||||
.
|
||||
.
|
||||
.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
Dataset.__init__(self)
|
||||
|
||||
def as_collection(self,dim):
|
||||
"""Returns data as collection along dim"""
|
||||
pass
|
||||
|
||||
def add_collection(self,input_dict):
|
||||
"""Adds a category data as collection.
|
||||
|
||||
A collection is a datastructure that contains a dictionary for
|
||||
each pair of dimension in dataset, keyed by identifiers and
|
||||
values is a set of identifiers in the other dimension
|
||||
"""
|
||||
#build category data as double dicts
|
||||
pass
|
||||
def __init__(self,array,identifiers=None,name='A'):
|
||||
Dataset.__init__(self,array,identifiers=identifiers,name=name)
|
||||
self.has_dictlists = False
|
||||
|
||||
def as_dict_lists(self):
|
||||
"""Returns data as dict of indices along first dim"""
|
||||
data={}
|
||||
for name,ind in self._map[self.get_dim_name(0)].items():
|
||||
data[name] = list(nonzero(self._array[ind,:]))
|
||||
self._dictlists = data
|
||||
self.has_dictlists=True
|
||||
return data
|
||||
|
||||
class GraphDataset(Dataset):
|
||||
"""The graph dataset class.
|
||||
|
|
Reference in New Issue