Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/bin/txt2ftsv

87 lines
2.2 KiB
Python
Executable File

#!/usr/bin/env python
import numpy
import os.path
import sys
from laydi import dataset
from getopt import getopt
dimension = 'dim_doe'
output_fn = '-'
ds_name = None
def print_help():
print
print 'options:'
print ' -h, --help Show this help text.'
print ' -d, --dimension=DIM Make output in dimension DIM'
print ' -n, --name=NAME Set name of output dataset'
print ' -o, --output=FILE Save output dataset in FILE'
print
def parse_options():
global ds_name
global output_fn
short_opts = 'hd:n:o:'
long_opts = ['help', 'dimension', 'name', 'output']
options, params = getopt(sys.argv[1:], short_opts, long_opts)
for opt, val in options:
if opt in ['-h', '--help']:
print_help()
sys.exit(0)
elif opt in ['-d', '--dimension']:
global dimension
dimension = val
elif opt in ['-n', '--name']:
ds_name = val
elif opt in ['-o', '--output']:
output_fn = val
if ds_name == None:
if output_fn != None:
ds_name = output_fn
else:
ds_name = 'txt2ftsv'
if len(params) == 0:
print_help()
sys.exit(1)
return params
def read_file(fd):
lines = fd.readlines()
return [l.strip() for l in lines if l.strip() != '']
def build_dataset(dimension, id_lists, filenames):
all_ids = reduce(set.union, [set(x) for x in id_lists])
x = numpy.zeros((len(all_ids), len(id_lists)), 'b')
for i, idl in enumerate(id_lists):
for j, id in enumerate(idl):
x[j,i] = True
return dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)
if __name__ == '__main__':
id_lists = []
filenames = parse_options()
for fn in filenames:
if os.path.exists(fn):
fd = open(fn)
id_lists.append(read_file(fd))
fd.close()
elif fn == '-':
id_lists.append(read_file(sys.stdin))
ds = build_dataset(dimension, id_lists, filenames)
print ds
if output_fn == '-':
dataset.write_ftsv(sys.stdout, ds)
else:
dataset.write_ftsv(output_fn, ds)