Added category dataset
This commit is contained in:
parent
b769288b3b
commit
677d368b60
|
@ -1,4 +1,4 @@
|
||||||
from scipy import atleast_2d,asarray,ArrayType,shape
|
from scipy import atleast_2d,asarray,ArrayType,shape,nonzero
|
||||||
from scipy import sort as array_sort
|
from scipy import sort as array_sort
|
||||||
from itertools import izip
|
from itertools import izip
|
||||||
|
|
||||||
|
@ -182,7 +182,7 @@ class Dataset:
|
||||||
else:
|
else:
|
||||||
index = [self._map[dim][key] for key in idents]
|
index = [self._map[dim][key] for key in idents]
|
||||||
return asarray(index)
|
return asarray(index)
|
||||||
|
|
||||||
class CategoryDataset(Dataset):
|
class CategoryDataset(Dataset):
|
||||||
"""The category dataset class.
|
"""The category dataset class.
|
||||||
|
|
||||||
|
@ -192,25 +192,30 @@ class CategoryDataset(Dataset):
|
||||||
There is support for using a less memory demanding, and
|
There is support for using a less memory demanding, and
|
||||||
fast intersection look-ups by representing the binary matrix as a
|
fast intersection look-ups by representing the binary matrix as a
|
||||||
dictionary in each dimension.
|
dictionary in each dimension.
|
||||||
|
|
||||||
|
Always has linked dimension in first dim:
|
||||||
|
ex matrix:
|
||||||
|
go_term1 go_term2 ...
|
||||||
|
gene_1
|
||||||
|
gene_2
|
||||||
|
gene_3
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self,array,identifiers=None,name='A'):
|
||||||
Dataset.__init__(self)
|
Dataset.__init__(self,array,identifiers=identifiers,name=name)
|
||||||
|
self.has_dictlists = False
|
||||||
|
|
||||||
def as_collection(self,dim):
|
def as_dict_lists(self):
|
||||||
"""Returns data as collection along dim"""
|
"""Returns data as dict of indices along first dim"""
|
||||||
pass
|
data={}
|
||||||
|
for name,ind in self._map[self.get_dim_name(0)].items():
|
||||||
def add_collection(self,input_dict):
|
data[name] = list(nonzero(self._array[ind,:]))
|
||||||
"""Adds a category data as collection.
|
self._dictlists = data
|
||||||
|
self.has_dictlists=True
|
||||||
A collection is a datastructure that contains a dictionary for
|
return data
|
||||||
each pair of dimension in dataset, keyed by identifiers and
|
|
||||||
values is a set of identifiers in the other dimension
|
|
||||||
"""
|
|
||||||
#build category data as double dicts
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class GraphDataset(Dataset):
|
class GraphDataset(Dataset):
|
||||||
"""The graph dataset class.
|
"""The graph dataset class.
|
||||||
|
|
Reference in New Issue