From 6c20de11c9a60bf63da09ffaa99d25510d0aec0b Mon Sep 17 00:00:00 2001 From: einarr Date: Thu, 5 Feb 2009 20:08:51 +0000 Subject: [PATCH] Added options to txt2ftsv to chose between datasets and category datasets and whether or not to use sparse format. --- bin/txt2ftsv | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/bin/txt2ftsv b/bin/txt2ftsv index dccc696..ac20f91 100755 --- a/bin/txt2ftsv +++ b/bin/txt2ftsv @@ -10,28 +10,35 @@ from getopt import getopt dimension = 'dim_doe' output_fn = '-' ds_name = None +category = False +sparse = False def print_help(): print print 'options:' print ' -h, --help Show this help text.' + print ' -c, --category Make category dataset' print ' -d, --dimension=DIM Make output in dimension DIM' print ' -n, --name=NAME Set name of output dataset' print ' -o, --output=FILE Save output dataset in FILE' + print ' -s, --sparse Save output in sparse format' print def parse_options(): global ds_name global output_fn - short_opts = 'hd:n:o:' - long_opts = ['help', 'dimension', 'name', 'output'] + short_opts = 'cd:hn:o:' + long_opts = ['help', 'category', 'dimension', 'name', 'output', 'sparse'] options, params = getopt(sys.argv[1:], short_opts, long_opts) for opt, val in options: if opt in ['-h', '--help']: print_help() sys.exit(0) + elif opt in ['-c', '--category']: + global category + category = True elif opt in ['-d', '--dimension']: global dimension dimension = val @@ -39,6 +46,9 @@ def parse_options(): ds_name = val elif opt in ['-o', '--output']: output_fn = val + elif opt in ['-s', '--sparse']: + global sparse + sparse = True if ds_name == None: if output_fn != None: @@ -62,7 +72,12 @@ def build_dataset(dimension, id_lists, filenames): for i, idl in enumerate(id_lists): for j, id in enumerate(idl): x[j,i] = True - return dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name) + + if category: + ds = dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name) + else: + ds = dataset.Dataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name) + return ds if __name__ == '__main__': id_lists = [] @@ -80,7 +95,7 @@ if __name__ == '__main__': print ds if output_fn == '-': - dataset.write_ftsv(sys.stdout, ds) + dataset.write_ftsv(sys.stdout, ds, sp_format=sparse) else: - dataset.write_ftsv(output_fn, ds) + dataset.write_ftsv(output_fn, ds, sp_format=sparse)