Fixed critical bug in txt2ftsv that made output datasets incorrect if more than one list was used.
This commit is contained in:
parent
6c20de11c9
commit
5cfa505ae2
|
@ -67,11 +67,11 @@ def read_file(fd):
|
||||||
return [l.strip() for l in lines if l.strip() != '']
|
return [l.strip() for l in lines if l.strip() != '']
|
||||||
|
|
||||||
def build_dataset(dimension, id_lists, filenames):
|
def build_dataset(dimension, id_lists, filenames):
|
||||||
all_ids = reduce(set.union, [set(x) for x in id_lists])
|
all_ids = list(reduce(set.union, [set(x) for x in id_lists]))
|
||||||
x = numpy.zeros((len(all_ids), len(id_lists)), 'b')
|
x = numpy.zeros((len(all_ids), len(id_lists)), 'b')
|
||||||
for i, idl in enumerate(id_lists):
|
for i, idl in enumerate(id_lists):
|
||||||
for j, id in enumerate(idl):
|
for id in idl:
|
||||||
x[j,i] = True
|
x[all_ids.index(id),i] = True
|
||||||
|
|
||||||
if category:
|
if category:
|
||||||
ds = dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)
|
ds = dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)
|
||||||
|
|
Reference in New Issue