Added options to txt2ftsv to chose between datasets and category datasets and whether or not to use sparse format.
This commit is contained in:
parent
b46c381c3e
commit
6c20de11c9
25
bin/txt2ftsv
25
bin/txt2ftsv
@ -10,28 +10,35 @@ from getopt import getopt
|
||||
dimension = 'dim_doe'
|
||||
output_fn = '-'
|
||||
ds_name = None
|
||||
category = False
|
||||
sparse = False
|
||||
|
||||
def print_help():
|
||||
print
|
||||
print 'options:'
|
||||
print ' -h, --help Show this help text.'
|
||||
print ' -c, --category Make category dataset'
|
||||
print ' -d, --dimension=DIM Make output in dimension DIM'
|
||||
print ' -n, --name=NAME Set name of output dataset'
|
||||
print ' -o, --output=FILE Save output dataset in FILE'
|
||||
print ' -s, --sparse Save output in sparse format'
|
||||
print
|
||||
|
||||
def parse_options():
|
||||
global ds_name
|
||||
global output_fn
|
||||
|
||||
short_opts = 'hd:n:o:'
|
||||
long_opts = ['help', 'dimension', 'name', 'output']
|
||||
short_opts = 'cd:hn:o:'
|
||||
long_opts = ['help', 'category', 'dimension', 'name', 'output', 'sparse']
|
||||
options, params = getopt(sys.argv[1:], short_opts, long_opts)
|
||||
|
||||
for opt, val in options:
|
||||
if opt in ['-h', '--help']:
|
||||
print_help()
|
||||
sys.exit(0)
|
||||
elif opt in ['-c', '--category']:
|
||||
global category
|
||||
category = True
|
||||
elif opt in ['-d', '--dimension']:
|
||||
global dimension
|
||||
dimension = val
|
||||
@ -39,6 +46,9 @@ def parse_options():
|
||||
ds_name = val
|
||||
elif opt in ['-o', '--output']:
|
||||
output_fn = val
|
||||
elif opt in ['-s', '--sparse']:
|
||||
global sparse
|
||||
sparse = True
|
||||
|
||||
if ds_name == None:
|
||||
if output_fn != None:
|
||||
@ -62,7 +72,12 @@ def build_dataset(dimension, id_lists, filenames):
|
||||
for i, idl in enumerate(id_lists):
|
||||
for j, id in enumerate(idl):
|
||||
x[j,i] = True
|
||||
return dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)
|
||||
|
||||
if category:
|
||||
ds = dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)
|
||||
else:
|
||||
ds = dataset.Dataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)
|
||||
return ds
|
||||
|
||||
if __name__ == '__main__':
|
||||
id_lists = []
|
||||
@ -80,7 +95,7 @@ if __name__ == '__main__':
|
||||
print ds
|
||||
|
||||
if output_fn == '-':
|
||||
dataset.write_ftsv(sys.stdout, ds)
|
||||
dataset.write_ftsv(sys.stdout, ds, sp_format=sparse)
|
||||
else:
|
||||
dataset.write_ftsv(output_fn, ds)
|
||||
dataset.write_ftsv(output_fn, ds, sp_format=sparse)
|
||||
|
||||
|
Reference in New Issue
Block a user