Added support for sparse category-dataset
This commit is contained in:
parent
a84731da30
commit
bed280353b
|
@ -1,10 +1,11 @@
|
||||||
from scipy import ndarray,atleast_2d,asarray,intersect1d,zeros
|
from scipy import ndarray,atleast_2d,asarray,intersect1d,zeros,empty,sparse
|
||||||
from scipy import sort as array_sort
|
from scipy import sort as array_sort
|
||||||
from itertools import izip
|
from itertools import izip
|
||||||
import shelve
|
import shelve
|
||||||
import copy
|
import copy
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
class Dataset:
|
class Dataset:
|
||||||
"""The Dataset base class.
|
"""The Dataset base class.
|
||||||
|
|
||||||
|
@ -42,16 +43,15 @@ class Dataset:
|
||||||
self._map = {} # internal mapping for dataset: identifier <--> index
|
self._map = {} # internal mapping for dataset: identifier <--> index
|
||||||
self._name = name
|
self._name = name
|
||||||
self._identifiers = identifiers
|
self._identifiers = identifiers
|
||||||
self._type = 'n'
|
|
||||||
|
|
||||||
if len(array.shape)==1:
|
if not isinstance(array, sparse.spmatrix):
|
||||||
array = atleast_2d(asarray(array))
|
array = atleast_2d(asarray(array))
|
||||||
# vectors are column vectors
|
# vector are column (array)
|
||||||
if array.shape[0]==1:
|
if array.shape[0] == 1:
|
||||||
array = array.T
|
array = array.T
|
||||||
self.shape = array.shape
|
self.shape = array.shape
|
||||||
|
|
||||||
if identifiers!=None:
|
if identifiers != None:
|
||||||
self._validate_identifiers(identifiers)
|
self._validate_identifiers(identifiers)
|
||||||
self._set_identifiers(identifiers, self._all_dims)
|
self._set_identifiers(identifiers, self._all_dims)
|
||||||
else:
|
else:
|
||||||
|
@ -82,14 +82,14 @@ class Dataset:
|
||||||
|
|
||||||
dim_names = ['rows','cols']
|
dim_names = ['rows','cols']
|
||||||
ids = []
|
ids = []
|
||||||
for axis,n in enumerate(shape):
|
for axis, n in enumerate(shape):
|
||||||
if axis<2:
|
if axis < 2:
|
||||||
dim_suggestion = dim_names[axis]
|
dim_suggestion = dim_names[axis]
|
||||||
else:
|
else:
|
||||||
dim_suggestion = 'dim'
|
dim_suggestion = 'dim'
|
||||||
dim_suggestion = self._suggest_dim_name(dim_suggestion,all_dims)
|
dim_suggestion = self._suggest_dim_name(dim_suggestion, all_dims)
|
||||||
identifier_creation = [str(axis) + "_" + i for i in map(str,range(n))]
|
identifier_creation = [str(axis) + "_" + i for i in map(str, range(n))]
|
||||||
ids.append((dim_suggestion,identifier_creation))
|
ids.append((dim_suggestion, identifier_creation))
|
||||||
all_dims.add(dim_suggestion)
|
all_dims.add(dim_suggestion)
|
||||||
return ids
|
return ids
|
||||||
|
|
||||||
|
@ -112,18 +112,22 @@ class Dataset:
|
||||||
new_name = dim_name
|
new_name = dim_name
|
||||||
while new_name in all_dims:
|
while new_name in all_dims:
|
||||||
new_name = dim_name + "_" + str(c)
|
new_name = dim_name + "_" + str(c)
|
||||||
c+=1
|
c += 1
|
||||||
return new_name
|
return new_name
|
||||||
|
|
||||||
def asarray(self):
|
def asarray(self):
|
||||||
"""Returns the numeric array (data) of dataset"""
|
"""Returns the numeric array (data) of dataset"""
|
||||||
|
if isinstance(self._array, sparse.spmatrix):
|
||||||
|
return self._array.toarray()
|
||||||
return self._array
|
return self._array
|
||||||
|
|
||||||
def add_array(self, array):
|
def set_array(self, array):
|
||||||
"""Adds array as an ArrayType object.
|
"""Adds array as an ArrayType object.
|
||||||
A one-dim array is transformed to a two-dim array (row-vector)
|
A one-dim array is transformed to a two-dim array (row-vector)
|
||||||
"""
|
"""
|
||||||
if self.shape!=array.shape:
|
if not isinstance(array, type(self._array)):
|
||||||
|
raise ValueError("Input array of type: %s does not match existing array type: %s") %(type(array), type(self._array))
|
||||||
|
if self.shape != array.shape:
|
||||||
raise ValueError, "Input array must be of similar dimensions as dataset"
|
raise ValueError, "Input array must be of similar dimensions as dataset"
|
||||||
self._array = atleast_2d(asarray(array))
|
self._array = atleast_2d(asarray(array))
|
||||||
|
|
||||||
|
@ -138,7 +142,7 @@ class Dataset:
|
||||||
def get_dim_name(self, axis=None):
|
def get_dim_name(self, axis=None):
|
||||||
"""Returns dim name for an axis, if no axis is provided it
|
"""Returns dim name for an axis, if no axis is provided it
|
||||||
returns a list of dims"""
|
returns a list of dims"""
|
||||||
if type(axis)==int:
|
if type(axis) == int:
|
||||||
return self._dims[axis]
|
return self._dims[axis]
|
||||||
else:
|
else:
|
||||||
return [dim for dim in self._dims]
|
return [dim for dim in self._dims]
|
||||||
|
@ -149,7 +153,7 @@ class Dataset:
|
||||||
ds_dims = ds.get_dim_name()
|
ds_dims = ds.get_dim_name()
|
||||||
return [d for d in dims if d in ds_dims]
|
return [d for d in dims if d in ds_dims]
|
||||||
|
|
||||||
def get_identifiers(self, dim, indices=None,sorted=False):
|
def get_identifiers(self, dim, indices=None, sorted=False):
|
||||||
"""Returns identifiers along dim, sorted by position (index)
|
"""Returns identifiers along dim, sorted by position (index)
|
||||||
is optional.
|
is optional.
|
||||||
|
|
||||||
|
@ -163,7 +167,6 @@ class Dataset:
|
||||||
if indices != None:
|
if indices != None:
|
||||||
if len(indices) == 0:# if empty list or empty array
|
if len(indices) == 0:# if empty list or empty array
|
||||||
return []
|
return []
|
||||||
|
|
||||||
if indices != None:
|
if indices != None:
|
||||||
# be sure to match intersection
|
# be sure to match intersection
|
||||||
#indices = intersect1d(self.get_indices(dim),indices)
|
#indices = intersect1d(self.get_indices(dim),indices)
|
||||||
|
@ -188,7 +191,7 @@ class Dataset:
|
||||||
"""
|
"""
|
||||||
if not isinstance(idents, list) and not isinstance(idents, set):
|
if not isinstance(idents, list) and not isinstance(idents, set):
|
||||||
raise ValueError("idents needs to be a list/set got: %s" %type(idents))
|
raise ValueError("idents needs to be a list/set got: %s" %type(idents))
|
||||||
if idents==None:
|
if idents == None:
|
||||||
index = array_sort(self._map[dim].values())
|
index = array_sort(self._map[dim].values())
|
||||||
else:
|
else:
|
||||||
index = [self._map[dim][key]
|
index = [self._map[dim][key]
|
||||||
|
@ -226,7 +229,7 @@ class Dataset:
|
||||||
As for the moment: only support for 2D-arrays.
|
As for the moment: only support for 2D-arrays.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
#assert(self._array==ndarray)
|
assert(len(self.shape) == 2)
|
||||||
ds = self.copy()
|
ds = self.copy()
|
||||||
ds._array = ds._array.T
|
ds._array = ds._array.T
|
||||||
ds._dims.reverse()
|
ds._dims.reverse()
|
||||||
|
@ -234,12 +237,11 @@ class Dataset:
|
||||||
return ds
|
return ds
|
||||||
|
|
||||||
def _validate_identifiers(self, identifiers):
|
def _validate_identifiers(self, identifiers):
|
||||||
|
|
||||||
for dim_name, ids in identifiers:
|
for dim_name, ids in identifiers:
|
||||||
if len(set(ids)) != len(ids):
|
if len(set(ids)) != len(ids):
|
||||||
raise ValueError("Identifiers not unique in : %s" %dim_name)
|
raise ValueError("Identifiers not unique in : %s" %dim_name)
|
||||||
identifier_shape = [len(i[1]) for i in identifiers]
|
identifier_shape = [len(i[1]) for i in identifiers]
|
||||||
if len(identifier_shape)!=len(self.shape):
|
if len(identifier_shape) != len(self.shape):
|
||||||
raise ValueError("Identifier list length must equal array dims")
|
raise ValueError("Identifier list length must equal array dims")
|
||||||
for ni, na in zip(identifier_shape, self.shape):
|
for ni, na in zip(identifier_shape, self.shape):
|
||||||
if ni != na:
|
if ni != na:
|
||||||
|
@ -252,9 +254,9 @@ class CategoryDataset(Dataset):
|
||||||
A dataset for representing class information as binary
|
A dataset for representing class information as binary
|
||||||
matrices (0/1-matrices).
|
matrices (0/1-matrices).
|
||||||
|
|
||||||
There is support for using a less memory demanding, and
|
There is support for using a less memory demanding, sparse format. The
|
||||||
fast intersection look-ups by representing the binary matrix as a
|
prefered (default) format for a category dataset is the compressed sparse row
|
||||||
dictionary in each dimension.
|
format (csr)
|
||||||
|
|
||||||
Always has linked dimension in first dim:
|
Always has linked dimension in first dim:
|
||||||
ex matrix:
|
ex matrix:
|
||||||
|
@ -265,33 +267,51 @@ class CategoryDataset(Dataset):
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, array, identifiers=None, name='C'):
|
def __init__(self, array, identifiers=None, name='C'):
|
||||||
Dataset.__init__(self, array, identifiers=identifiers, name=name)
|
Dataset.__init__(self, array, identifiers=identifiers, name=name)
|
||||||
self.has_dictlists = False
|
|
||||||
self._type = 'c'
|
|
||||||
|
|
||||||
def as_dict_lists(self):
|
def as_dict_lists(self):
|
||||||
"""Returns data as dict of indices along first dim.
|
"""Returns data as dict of identifiers along first dim.
|
||||||
|
|
||||||
ex: data['gene_id'] = ['map0030','map0010', ...]
|
ex: data['gene_1'] = ['map0030','map0010', ...]
|
||||||
|
|
||||||
|
fixme: Deprecated?
|
||||||
"""
|
"""
|
||||||
data={}
|
data = {}
|
||||||
for name, ind in self._map[self.get_dim_name(0)].items():
|
for name, ind in self._map[self.get_dim_name(0)].items():
|
||||||
data[name] = self.get_identifiers(self.get_dim_name(1),
|
if isinstance(self._array, ndarray):
|
||||||
list(self._array[ind,:].nonzero()))
|
indices = self._array[ind,:].nonzero()[0]
|
||||||
|
elif isinstance(self._array, sparse.spmatrix):
|
||||||
|
if not isinstance(self._array, sparse.csr_matrix):
|
||||||
|
array = self._array.tocsr()
|
||||||
|
else:
|
||||||
|
array = self._array
|
||||||
|
indices = array[ind,:].indices
|
||||||
|
if len(indices) == 0: # should we allow categories with no members?
|
||||||
|
continue
|
||||||
|
data[name] = self.get_identifiers(self.get_dim_name(1), indices)
|
||||||
self._dictlists = data
|
self._dictlists = data
|
||||||
self.has_dictlists = True
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def as_selections(self):
|
def as_selections(self):
|
||||||
"""Returns data as a list of Selection objects.
|
"""Returns data as a list of Selection objects.
|
||||||
|
|
||||||
|
The list of selections is not ordered (sorted) by any means.
|
||||||
"""
|
"""
|
||||||
ret_list = []
|
ret_list = []
|
||||||
for cat_name, ind in self._map[self.get_dim_name(1)].items():
|
for cat_name, ind in self._map[self.get_dim_name(1)].items():
|
||||||
ids = self.get_identifiers(self.get_dim_name(0),
|
if isinstance(self._array, sparse.spmatrix):
|
||||||
self._array[:,ind].nonzero()[0])
|
if not isinstance(self._array, sparse.csc_matrix):
|
||||||
|
self._array = self._array.tocsc()
|
||||||
|
indices = self._array[:,ind].indices
|
||||||
|
else:
|
||||||
|
indices = self._array[:,ind].nonzero()[0]
|
||||||
|
if len(indices) == 0:
|
||||||
|
continue
|
||||||
|
ids = self.get_identifiers(self.get_dim_name(0), indices)
|
||||||
selection = Selection(cat_name)
|
selection = Selection(cat_name)
|
||||||
selection.select(self.get_dim_name(0), ids)
|
selection.select(self.get_dim_name(0), ids)
|
||||||
ret_list.append(selection)
|
ret_list.append(selection)
|
||||||
|
@ -309,10 +329,10 @@ class GraphDataset(Dataset):
|
||||||
representing the graph as a NetworkX.Graph, or NetworkX.XGraph structure.
|
representing the graph as a NetworkX.Graph, or NetworkX.XGraph structure.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, array=None, identifiers=None, shape=None, all_dims=[],**kwds):
|
def __init__(self, array, identifiers=None, name='A'):
|
||||||
Dataset.__init__(self, array=array, identifiers=identifiers, name='A')
|
Dataset.__init__(self, array=array, identifiers=identifiers, name=name)
|
||||||
self._graph = None
|
self._graph = None
|
||||||
self._type = 'g'
|
self._pos = None
|
||||||
|
|
||||||
def asnetworkx(self, nx_type='graph'):
|
def asnetworkx(self, nx_type='graph'):
|
||||||
dim = self.get_dim_name()[0]
|
dim = self.get_dim_name()[0]
|
||||||
|
@ -334,17 +354,17 @@ class GraphDataset(Dataset):
|
||||||
import networkx as nx
|
import networkx as nx
|
||||||
except:
|
except:
|
||||||
print "Failed in import of NetworkX"
|
print "Failed in import of NetworkX"
|
||||||
return
|
return None
|
||||||
m, n = A.shape# adjacency matrix must be of type that evals to true/false for neigbours
|
m, n = A.shape # adjacency matrix must be of type that evals to true/false for neigbours
|
||||||
if m!=n:
|
if m != n:
|
||||||
raise IOError, "Adjacency matrix must be square"
|
raise IOError, "Adjacency matrix must be square"
|
||||||
|
|
||||||
if A[A[:,0].nonzero()[0][0],0]==1: #unweighted graph
|
if A[A[:,0].nonzero()[0][0],0] == 1: #unweighted graph
|
||||||
G = nx.Graph()
|
G = nx.Graph()
|
||||||
else:
|
else:
|
||||||
G = nx.XGraph()
|
G = nx.XGraph()
|
||||||
|
|
||||||
if labels==None: # if labels not provided mark vertices with numbers
|
if labels == None: # if labels not provided mark vertices with numbers
|
||||||
labels = [str(i) for i in range(m)]
|
labels = [str(i) for i in range(m)]
|
||||||
|
|
||||||
for nbrs, head in izip(A, labels):
|
for nbrs, head in izip(A, labels):
|
||||||
|
@ -371,7 +391,7 @@ class ReverseDict(dict):
|
||||||
"""
|
"""
|
||||||
def __init__(self, *args, **kw):
|
def __init__(self, *args, **kw):
|
||||||
dict.__init__(self, *args, **kw)
|
dict.__init__(self, *args, **kw)
|
||||||
self.reverse = dict([[v,k] for k,v in self.items()])
|
self.reverse = dict([[v, k] for k, v in self.items()])
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
dict.__setitem__(self, key, value)
|
dict.__setitem__(self, key, value)
|
||||||
|
@ -380,39 +400,6 @@ class ReverseDict(dict):
|
||||||
except:
|
except:
|
||||||
self.reverse = {value:key}
|
self.reverse = {value:key}
|
||||||
|
|
||||||
def to_file(filepath,dataset,name=None):
|
|
||||||
"""Write dataset to file. A file may contain multiple datasets.
|
|
||||||
append to file by using option mode='a'
|
|
||||||
"""
|
|
||||||
if not name:
|
|
||||||
name = dataset._name
|
|
||||||
data = shelve.open(filepath, flag='c', protocol=2)
|
|
||||||
if data: #we have an append
|
|
||||||
names = data.keys()
|
|
||||||
if name in names:
|
|
||||||
print "Data with name: %s overwritten" %dataset._name
|
|
||||||
|
|
||||||
sub_data = {'array':dataset._array,
|
|
||||||
'idents':dataset._identifiers,
|
|
||||||
'type':dataset._type}
|
|
||||||
data[name] = sub_data
|
|
||||||
data.close()
|
|
||||||
|
|
||||||
def from_file(filepath):
|
|
||||||
"""Read dataset(s) from file """
|
|
||||||
data = shelve.open(filepath, flag='r')
|
|
||||||
out_data = []
|
|
||||||
for name in data.keys():
|
|
||||||
sub_data = data[name]
|
|
||||||
if sub_data['type']=='c':
|
|
||||||
out_data.append(CategoryDataset(sub_data['array'], identifiers=sub_data['idents'], name=name))
|
|
||||||
elif sub_data['type']=='g':
|
|
||||||
out_data.append(GraphDataset(sub_data['array'], identifiers=sub_data['idents'], name=name))
|
|
||||||
else:
|
|
||||||
out_data.append(Dataset(sub_data['array'], identifiers=sub_data['idents'], name=name))
|
|
||||||
|
|
||||||
return out_data
|
|
||||||
|
|
||||||
|
|
||||||
class Selection(dict):
|
class Selection(dict):
|
||||||
"""Handles selected identifiers along each dimension of a dataset"""
|
"""Handles selected identifiers along each dimension of a dataset"""
|
||||||
|
@ -436,31 +423,39 @@ class Selection(dict):
|
||||||
def select(self, axis, labels):
|
def select(self, axis, labels):
|
||||||
self[axis] = labels
|
self[axis] = labels
|
||||||
|
|
||||||
|
def write_ftsv(fd, ds, decimals=7, sep='\t', fmt=None):
|
||||||
def write_ftsv(fd, ds, decimals=7):
|
|
||||||
"""Writes a dataset in fluents tab separated values (ftsv) form.
|
"""Writes a dataset in fluents tab separated values (ftsv) form.
|
||||||
|
|
||||||
@param fd: An open file descriptor to the output file.
|
@param fd: An open file descriptor to the output file.
|
||||||
@param ds: The dataset to be written. The function handles datasets
|
@param ds: The dataset to be written.
|
||||||
of these classes: Dataset, CategoryDataset and GraphDataset
|
@param decimals: Number of decimals, only supported for dataset.
|
||||||
|
@param fmt: String formating
|
||||||
|
The function handles datasets of these classes:
|
||||||
|
Dataset, CategoryDataset and GraphDataset
|
||||||
"""
|
"""
|
||||||
opened = False
|
opened = False
|
||||||
if isinstance(fd, str):
|
if isinstance(fd, str):
|
||||||
fd = open(fd, 'w')
|
fd = open(fd, 'w')
|
||||||
opened = True
|
opened = True
|
||||||
|
|
||||||
printstr = "%s\t"
|
|
||||||
# Write header information
|
# Write header information
|
||||||
if isinstance(ds, CategoryDataset):
|
if isinstance(ds, CategoryDataset):
|
||||||
type = 'category'
|
type = 'category'
|
||||||
|
if fmt == None:
|
||||||
|
fmt = '%d'
|
||||||
elif isinstance(ds, GraphDataset):
|
elif isinstance(ds, GraphDataset):
|
||||||
type = 'network'
|
type = 'network'
|
||||||
|
if fmt == None:
|
||||||
|
fmt = '%d'
|
||||||
elif isinstance(ds, Dataset):
|
elif isinstance(ds, Dataset):
|
||||||
type = 'dataset'
|
type = 'dataset'
|
||||||
printstr = '%%.%df\t' % decimals
|
if fmt == None:
|
||||||
|
fmt = '%%.%df' % decimals
|
||||||
else:
|
else:
|
||||||
raise Exception("Unknown object")
|
fmt = '%%.%d' %decimals + fmt
|
||||||
print >> fd, "# type: %s" % type
|
else:
|
||||||
|
raise Exception("Unknown object type")
|
||||||
|
fd.write('# type: %s' %type + '\n')
|
||||||
|
|
||||||
for dim in ds.get_dim_name():
|
for dim in ds.get_dim_name():
|
||||||
print >> fd, "# dimension: %s" % dim,
|
print >> fd, "# dimension: %s" % dim,
|
||||||
|
@ -469,23 +464,57 @@ def write_ftsv(fd, ds, decimals=7):
|
||||||
print >> fd
|
print >> fd
|
||||||
|
|
||||||
print >> fd, "# name: %s" % ds.get_name()
|
print >> fd, "# name: %s" % ds.get_name()
|
||||||
print >> fd
|
|
||||||
|
|
||||||
# Write data
|
# Write data
|
||||||
m = ds.asarray()
|
m = ds.asarray()
|
||||||
if type == 'category':
|
if isinstance(m, sparse.spmatrix):
|
||||||
m = m.astype('i')
|
_write_sparse_elements(fd, m, fmt, sep)
|
||||||
|
else:
|
||||||
y, x = m.shape
|
_write_elements(fd, m, fmt, sep)
|
||||||
for j in range(y):
|
|
||||||
for i in range(x):
|
|
||||||
print >> fd, printstr % m[j, i],
|
|
||||||
print >> fd
|
|
||||||
|
|
||||||
if opened:
|
if opened:
|
||||||
fd.close()
|
fd.close()
|
||||||
|
|
||||||
def read_ftsv(fd):
|
def _write_sparse_elements(fd, arr, fmt='%d', sep=None):
|
||||||
|
""" Sparse coordinate format."""
|
||||||
|
fd.write('# sp_format: True\n\n')
|
||||||
|
fmt = '%d %d ' + fmt + '\n'
|
||||||
|
csr = arr.tocsr()
|
||||||
|
for ii in xrange(csr.size):
|
||||||
|
ir, ic = csr.rowcol(ii)
|
||||||
|
data = csr.getdata(ii)
|
||||||
|
fd.write(fmt % (ir, ic, data))
|
||||||
|
|
||||||
|
def _write_elements(fd, arr, fmt='%f', sep='\t'):
|
||||||
|
"""Standard value separated format."""
|
||||||
|
fmt = fmt + sep
|
||||||
|
fd.write('\n')
|
||||||
|
y, x = arr.shape
|
||||||
|
for j in range(y):
|
||||||
|
for i in range(x):
|
||||||
|
fd.write(fmt %arr[j, i])
|
||||||
|
fd.write('\n')
|
||||||
|
|
||||||
|
def _read_elements(fd, arr, sep=None):
|
||||||
|
line = fd.readline()
|
||||||
|
i = 0
|
||||||
|
while line:
|
||||||
|
values = line.split(sep)
|
||||||
|
for j, val in enumerate(values):
|
||||||
|
arr[i,j] = float(val)
|
||||||
|
i += 1
|
||||||
|
line = fd.readline()
|
||||||
|
return arr
|
||||||
|
|
||||||
|
def _read_sparse_elements(fd, arr, sep=None):
|
||||||
|
line = fd.readline()
|
||||||
|
while line:
|
||||||
|
i, j, val = line.split()
|
||||||
|
arr[int(i),int(j)] = float(val)
|
||||||
|
line = fd.readline()
|
||||||
|
return arr.tocsr()
|
||||||
|
|
||||||
|
def read_ftsv(fd, sep=None):
|
||||||
"""Read a dataset in fluents tab separated values (ftsv) form and return it.
|
"""Read a dataset in fluents tab separated values (ftsv) form and return it.
|
||||||
|
|
||||||
@param fd: An open file descriptor.
|
@param fd: An open file descriptor.
|
||||||
|
@ -502,7 +531,8 @@ def read_ftsv(fd):
|
||||||
identifiers = {}
|
identifiers = {}
|
||||||
type = 'dataset'
|
type = 'dataset'
|
||||||
name = 'Unnamed dataset'
|
name = 'Unnamed dataset'
|
||||||
graphtype = 'graph'
|
sp_format = False
|
||||||
|
# graphtype = 'graph'
|
||||||
|
|
||||||
# Read header lines from file.
|
# Read header lines from file.
|
||||||
line = fd.readline()
|
line = fd.readline()
|
||||||
|
@ -526,8 +556,18 @@ def read_ftsv(fd):
|
||||||
elif key == 'name':
|
elif key == 'name':
|
||||||
name = val
|
name = val
|
||||||
|
|
||||||
elif key == 'graphtype':
|
# storage format
|
||||||
graphtype = val
|
# if sp_format is True then use coordinate triplets
|
||||||
|
elif key == 'sp_format':
|
||||||
|
if val in ['False', 'false', '0', 'F', 'f',]:
|
||||||
|
sp_format = False
|
||||||
|
elif val in ['True', 'true', '1', 'T', 't']:
|
||||||
|
sp_format = True
|
||||||
|
else:
|
||||||
|
raise ValueError("sp_format: %s not valid " %sp_format)
|
||||||
|
|
||||||
|
# elif key == 'graphtype':
|
||||||
|
# graphtype = val
|
||||||
|
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
@ -537,22 +577,20 @@ def read_ftsv(fd):
|
||||||
dims = [(x, identifiers[x]) for x in dimensions]
|
dims = [(x, identifiers[x]) for x in dimensions]
|
||||||
dim_lengths = [len(identifiers[x]) for x in dimensions]
|
dim_lengths = [len(identifiers[x]) for x in dimensions]
|
||||||
|
|
||||||
# Create matrix
|
# Create matrix and assign element reader
|
||||||
if type == 'category':
|
if type == 'category':
|
||||||
matrix = zeros(dim_lengths, dtype=bool)
|
if sp_format:
|
||||||
elif type == 'network':
|
matrix = sparse.lil_matrix(dim_lengths)
|
||||||
matrix = zeros(dim_lengths)
|
read_elements = _read_sparse_elements
|
||||||
else:
|
else:
|
||||||
matrix = zeros(dim_lengths)
|
matrix = empty(dim_lengths, dtype='i')
|
||||||
|
read_elements = _read_elements
|
||||||
|
elif type == 'network':
|
||||||
|
matrix = empty(dim_lengths)
|
||||||
|
else:
|
||||||
|
matrix = empty(dim_lengths)
|
||||||
|
|
||||||
line = fd.readline()
|
matrix = read_elements(fd, matrix, sep)
|
||||||
y = 0
|
|
||||||
while line:
|
|
||||||
values = line.split()
|
|
||||||
for x, v in enumerate(values):
|
|
||||||
matrix[y,x] = float(v)
|
|
||||||
y += 1
|
|
||||||
line = fd.readline()
|
|
||||||
|
|
||||||
# Create dataset of specified type
|
# Create dataset of specified type
|
||||||
if type == 'category':
|
if type == 'category':
|
||||||
|
|
|
@ -392,7 +392,7 @@ class NavigatorMenu(gtk.Menu):
|
||||||
ds = self.dataset.copy()
|
ds = self.dataset.copy()
|
||||||
ds._name = self.dataset._name + ".rsc"
|
ds._name = self.dataset._name + ".rsc"
|
||||||
axis = 1
|
axis = 1
|
||||||
ds._array = ds._array/scipy.expand_dims(ds._array.std(axis), axis)
|
ds._array = ds.asarray()/scipy.expand_dims(ds.asarray().std(axis), axis)
|
||||||
icon = fluents.icon_factory.get(ds)
|
icon = fluents.icon_factory.get(ds)
|
||||||
project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
|
project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
|
||||||
|
|
||||||
|
@ -401,21 +401,21 @@ class NavigatorMenu(gtk.Menu):
|
||||||
ds = self.dataset.copy()
|
ds = self.dataset.copy()
|
||||||
ds._name = self.dataset._name + ".csc"
|
ds._name = self.dataset._name + ".csc"
|
||||||
axis = 0
|
axis = 0
|
||||||
ds._array = ds._array/scipy.expand_dims(ds._array.std(axis), axis)
|
ds._array = ds.asarray()/scipy.expand_dims(ds.asarray().std(axis), axis)
|
||||||
icon = fluents.icon_factory.get(ds)
|
icon = fluents.icon_factory.get(ds)
|
||||||
project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
|
project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
|
||||||
|
|
||||||
def on_log(self, item, navigator):
|
def on_log(self, item, navigator):
|
||||||
project = main.project
|
project = main.project
|
||||||
try:
|
try:
|
||||||
if not scipy.all(self.dataset._array>0):
|
if not scipy.all(self.dataset.asarray()>0):
|
||||||
raise ValueError
|
raise ValueError
|
||||||
except:
|
except:
|
||||||
logger.log('warning', 'Datasets needs to be strictly positive for a log transform')
|
logger.log('warning', 'Datasets needs to be strictly positive for a log transform')
|
||||||
return
|
return
|
||||||
|
|
||||||
ds = self.dataset.copy()
|
ds = self.dataset.copy()
|
||||||
ds._array = scipy.log(ds._array)
|
ds._array = scipy.log(ds.asarray())
|
||||||
icon = fluents.icon_factory.get(ds)
|
icon = fluents.icon_factory.get(ds)
|
||||||
ds._name = ds._name + ".log"
|
ds._name = ds._name + ".log"
|
||||||
project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
|
project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
|
||||||
|
|
|
@ -305,8 +305,8 @@ class ScatterMarkerPlot(Plot):
|
||||||
self.ms = s
|
self.ms = s
|
||||||
x_index = dataset_1[sel_dim][id_1]
|
x_index = dataset_1[sel_dim][id_1]
|
||||||
y_index = dataset_2[sel_dim][id_2]
|
y_index = dataset_2[sel_dim][id_2]
|
||||||
self.xaxis_data = dataset_1._array[:, x_index]
|
self.xaxis_data = dataset_1.asarray()[:, x_index]
|
||||||
self.yaxis_data = dataset_2._array[:, y_index]
|
self.yaxis_data = dataset_2.asarray()[:, y_index]
|
||||||
|
|
||||||
# init draw
|
# init draw
|
||||||
self._selection_line = None
|
self._selection_line = None
|
||||||
|
@ -390,8 +390,8 @@ class ScatterPlot(Plot):
|
||||||
y_index = dataset_2[sel_dim_2][id_2]
|
y_index = dataset_2[sel_dim_2][id_2]
|
||||||
else:
|
else:
|
||||||
y_index = dataset_2[sel_dim][id_2]
|
y_index = dataset_2[sel_dim][id_2]
|
||||||
self.xaxis_data = dataset_1._array[:, x_index]
|
self.xaxis_data = dataset_1.asarray()[:, x_index]
|
||||||
self.yaxis_data = dataset_2._array[:, y_index]
|
self.yaxis_data = dataset_2.asarray()[:, y_index]
|
||||||
|
|
||||||
# init draw
|
# init draw
|
||||||
self.init_draw()
|
self.init_draw()
|
||||||
|
@ -436,7 +436,7 @@ class ScatterPlot(Plot):
|
||||||
|
|
||||||
def set_absicca(self, sb):
|
def set_absicca(self, sb):
|
||||||
self._absi = sb.get_value_as_int() - 1
|
self._absi = sb.get_value_as_int() - 1
|
||||||
xy = self.dataset_1._array[:,[self._absi, self._ordi]]
|
xy = self.dataset_1.asarray()[:,[self._absi, self._ordi]]
|
||||||
self.xaxis_data = xy[:,0]
|
self.xaxis_data = xy[:,0]
|
||||||
self.yaxis_data = xy[:,1]
|
self.yaxis_data = xy[:,1]
|
||||||
self.sc._offsets = xy
|
self.sc._offsets = xy
|
||||||
|
@ -446,7 +446,7 @@ class ScatterPlot(Plot):
|
||||||
|
|
||||||
def set_ordinate(self, sb):
|
def set_ordinate(self, sb):
|
||||||
self._ordi = sb.get_value_as_int() - 1
|
self._ordi = sb.get_value_as_int() - 1
|
||||||
xy = self.dataset_1._array[:,[self._absi, self._ordi]]
|
xy = self.dataset_1.asarray()[:,[self._absi, self._ordi]]
|
||||||
self.xaxis_data = xy[:,0]
|
self.xaxis_data = xy[:,0]
|
||||||
self.yaxis_data = xy[:,1]
|
self.yaxis_data = xy[:,1]
|
||||||
self.sc._offsets = xy
|
self.sc._offsets = xy
|
||||||
|
|
Reference in New Issue