Fixed critical bug in txt2ftsv that made output datasets incorrect if more than one list was used.

2009-02-06 22:21:19 +00:00
parent 6c20de11c9
commit 5cfa505ae2
1 changed files with 3 additions and 3 deletions
@@ -67,11 +67,11 @@ def read_file(fd):
    return [l.strip() for l in lines if l.strip() != '']
    
 def build_dataset(dimension, id_lists, filenames):
-    all_ids = reduce(set.union, [set(x) for x in id_lists])
+    all_ids = list(reduce(set.union, [set(x) for x in id_lists]))
    x = numpy.zeros((len(all_ids), len(id_lists)), 'b')
    for i, idl in enumerate(id_lists):
-        for j, id in enumerate(idl):
-            x[j,i] = True
+        for id in idl:
+            x[all_ids.index(id),i] = True

    if category:
        ds = dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)