Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0

Added options to txt2ftsv to chose between datasets and category datasets and whether or not to use sparse format.

This commit is contained in:
Einar Ryeng 2009-02-05 20:08:51 +00:00
parent b46c381c3e
commit 6c20de11c9
1 changed files with 20 additions and 5 deletions

View File

@ -10,28 +10,35 @@ from getopt import getopt
dimension = 'dim_doe' dimension = 'dim_doe'
output_fn = '-' output_fn = '-'
ds_name = None ds_name = None
category = False
sparse = False
def print_help(): def print_help():
print print
print 'options:' print 'options:'
print ' -h, --help Show this help text.' print ' -h, --help Show this help text.'
print ' -c, --category Make category dataset'
print ' -d, --dimension=DIM Make output in dimension DIM' print ' -d, --dimension=DIM Make output in dimension DIM'
print ' -n, --name=NAME Set name of output dataset' print ' -n, --name=NAME Set name of output dataset'
print ' -o, --output=FILE Save output dataset in FILE' print ' -o, --output=FILE Save output dataset in FILE'
print ' -s, --sparse Save output in sparse format'
print print
def parse_options(): def parse_options():
global ds_name global ds_name
global output_fn global output_fn
short_opts = 'hd:n:o:' short_opts = 'cd:hn:o:'
long_opts = ['help', 'dimension', 'name', 'output'] long_opts = ['help', 'category', 'dimension', 'name', 'output', 'sparse']
options, params = getopt(sys.argv[1:], short_opts, long_opts) options, params = getopt(sys.argv[1:], short_opts, long_opts)
for opt, val in options: for opt, val in options:
if opt in ['-h', '--help']: if opt in ['-h', '--help']:
print_help() print_help()
sys.exit(0) sys.exit(0)
elif opt in ['-c', '--category']:
global category
category = True
elif opt in ['-d', '--dimension']: elif opt in ['-d', '--dimension']:
global dimension global dimension
dimension = val dimension = val
@ -39,6 +46,9 @@ def parse_options():
ds_name = val ds_name = val
elif opt in ['-o', '--output']: elif opt in ['-o', '--output']:
output_fn = val output_fn = val
elif opt in ['-s', '--sparse']:
global sparse
sparse = True
if ds_name == None: if ds_name == None:
if output_fn != None: if output_fn != None:
@ -62,7 +72,12 @@ def build_dataset(dimension, id_lists, filenames):
for i, idl in enumerate(id_lists): for i, idl in enumerate(id_lists):
for j, id in enumerate(idl): for j, id in enumerate(idl):
x[j,i] = True x[j,i] = True
return dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)
if category:
ds = dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)
else:
ds = dataset.Dataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)
return ds
if __name__ == '__main__': if __name__ == '__main__':
id_lists = [] id_lists = []
@ -80,7 +95,7 @@ if __name__ == '__main__':
print ds print ds
if output_fn == '-': if output_fn == '-':
dataset.write_ftsv(sys.stdout, ds) dataset.write_ftsv(sys.stdout, ds, sp_format=sparse)
else: else:
dataset.write_ftsv(output_fn, ds) dataset.write_ftsv(output_fn, ds, sp_format=sparse)