diff --git a/scripts/illumina2ftsv/illumina2ftsv b/scripts/illumina/illumina2ftsv similarity index 75% rename from scripts/illumina2ftsv/illumina2ftsv rename to scripts/illumina/illumina2ftsv index 70b5ddb..f04b7a5 100644 --- a/scripts/illumina2ftsv/illumina2ftsv +++ b/scripts/illumina/illumina2ftsv @@ -6,12 +6,35 @@ import sys from laydi import dataset +VERSION = "0.1.0" + +dataset_fn = "-" + +def print_help(): + print "illumina2ftsv %s" % VERSION + print + print "Usage: illumina2ftsv [options] " + print + + def parse_options(): - s_opts = "" - l_opts = [] + s_opts = "d:h" + l_opts = ["dataset", "help"] options, params = getopt.getopt(sys.argv[1:], s_opts, l_opts) + for opt, val in options: + if opt in ["-d", "--dataset"]: + global dataset_fn + dataset_fn = val + elif opt in ["-h", "--help"]: + print_help() + sys.exit(0) + + if len(params) != 1: + print_help() + sys.exit(1) + return params def read_illumina_file(fn): diff --git a/scripts/illumina/laydi-annot-illumina b/scripts/illumina/laydi-annot-illumina new file mode 100755 index 0000000..100d7e5 --- /dev/null +++ b/scripts/illumina/laydi-annot-illumina @@ -0,0 +1,71 @@ +#!/usr/bin/python + +import getopt +import os, os.path +import sys + +OUTPUT_COLS = ["Probe_Id", "RefSeq_ID", "Unigene_ID", "Entrez_Gene_ID", "Accession", "Symbol", "Chromosome", "Definition"] + +def print_help(): + print "laydi-annot-illumina" + print + print "Usage: laydi-annot-illumina " + print + print "Description:" + print " Produce laydi annotation files from Illumina text annotation files" + print " Illumina files can be downloaded from:" + print " http://www.switchtoi.com/annotationfiles.ilmn" + print + +def parse_cmdline(): + short_opts = "h" + long_opts = ["help"] + options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts) + + for key, val in options: + if key in ["-h", "--help"]: + print_help() + sys.exit(0) + + if len(params) != 1: + print_help() + sys.exit(1) + + return params[0] + +def convert_annotations(fn_in, fn_out): + fd_in = open(fn_in) + fd_out = open(fn_out, "w") + + # Skip headers + line = fd_in.readline() + while not line.startswith("[Probes]"): + line = fd_in.readline() + + colnames = fd_in.readline().split("\t") + export_colnums = [colnames.index(x) for x in OUTPUT_COLS] + + # Print output column headers + export_colnames = ["probe-id"] + colnames[1:] + print >> fd_out, "\t".join(export_colnames) + + line = fd_in.readline() + while not line == "" and not line.startswith("["): + values = line.split("\t") + output_values = [values[x] for x in export_colnums] + print >> fd_out, "\t".join(output_values) + line = fd_in.readline() + +if __name__ == "__main__": + fn_in = parse_cmdline() + fn_out = os.path.split(fn_in)[1] + fn_out = os.path.splitext(fn_out)[0] + ".annot" + + print "Reading: %s" % (fn_in,) + print "Writing: %s" % (fn_out,) + print + print "Annotations:" + print ", ".join(OUTPUT_COLS) + + convert_annotations(fn_in, fn_out) +