Fixed critical bug in txt2ftsv that made output datasets incorrect if more than one list was used.
This commit is contained in:
parent
6c20de11c9
commit
5cfa505ae2
@ -67,11 +67,11 @@ def read_file(fd):
|
||||
return [l.strip() for l in lines if l.strip() != '']
|
||||
|
||||
def build_dataset(dimension, id_lists, filenames):
|
||||
all_ids = reduce(set.union, [set(x) for x in id_lists])
|
||||
all_ids = list(reduce(set.union, [set(x) for x in id_lists]))
|
||||
x = numpy.zeros((len(all_ids), len(id_lists)), 'b')
|
||||
for i, idl in enumerate(id_lists):
|
||||
for j, id in enumerate(idl):
|
||||
x[j,i] = True
|
||||
for id in idl:
|
||||
x[all_ids.index(id),i] = True
|
||||
|
||||
if category:
|
||||
ds = dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)
|
||||
|
Reference in New Issue
Block a user