Fixed critical bug in txt2ftsv that made output datasets incorrect if more than one list was used.

This commit is contained in:
Einar Ryeng 2009-02-06 22:21:19 +00:00
parent 6c20de11c9
commit 5cfa505ae2

View File

@ -67,11 +67,11 @@ def read_file(fd):
return [l.strip() for l in lines if l.strip() != '']
def build_dataset(dimension, id_lists, filenames):
all_ids = reduce(set.union, [set(x) for x in id_lists])
all_ids = list(reduce(set.union, [set(x) for x in id_lists]))
x = numpy.zeros((len(all_ids), len(id_lists)), 'b')
for i, idl in enumerate(id_lists):
for j, id in enumerate(idl):
x[j,i] = True
for id in idl:
x[all_ids.index(id),i] = True
if category:
ds = dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)