Renamed directory illumina2ftsv to illumina to gather all Illumina-relevant

scripts there. Added laydi-annot-illumina script that generates Laydi annotation files from Illumina text annotation files.
2011-01-04 11:43:24 +00:00 · 2011-01-04 11:43:24 +00:00 · 5f1f4d0dc2
commit 5f1f4d0dc2
parent f001d12584
2 changed files with 96 additions and 2 deletions
--- a/scripts/illumina2ftsv/illumina2ftsv
+++ b/scripts/illumina2ftsv/illumina2ftsv
@ -6,12 +6,35 @@ import sys
 from laydi import dataset
 VERSION = "0.1.0"
 dataset_fn = "-"
 def print_help():
    print "illumina2ftsv %s" % VERSION
    print 
    print "Usage: illumina2ftsv [options] <illumina_genome_studio_file>"
    print
 def parse_options():
-    s_opts = ""
+    s_opts = "d:h"
-    l_opts = []
+    l_opts = ["dataset", "help"]
    options, params = getopt.getopt(sys.argv[1:], s_opts, l_opts)
    for opt, val in options:
        if opt in ["-d", "--dataset"]:
            global dataset_fn
            dataset_fn = val
        elif opt in ["-h", "--help"]:
            print_help()
            sys.exit(0)
    if len(params) != 1:
        print_help()
        sys.exit(1)
    return params
 def read_illumina_file(fn):
--- a/scripts/illumina/laydi-annot-illumina
+++ b/scripts/illumina/laydi-annot-illumina
@ -0,0 +1,71 @@
 #!/usr/bin/python
 import getopt
 import os, os.path
 import sys
 OUTPUT_COLS = ["Probe_Id", "RefSeq_ID", "Unigene_ID", "Entrez_Gene_ID", "Accession", "Symbol", "Chromosome", "Definition"]
 def print_help(): 
    print "laydi-annot-illumina"
    print
    print "Usage: laydi-annot-illumina <illumina-annotation-file.txt>"
    print
    print "Description:"
    print "    Produce laydi annotation files from Illumina text annotation files"
    print "    Illumina files can be downloaded from:"
    print "    http://www.switchtoi.com/annotationfiles.ilmn"
    print
 def parse_cmdline():
    short_opts = "h"
    long_opts = ["help"]
    options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts)
    for key, val in options:
        if key in ["-h", "--help"]:
            print_help()
            sys.exit(0)
    if len(params) != 1:
        print_help()
        sys.exit(1)
    return params[0]
 def convert_annotations(fn_in, fn_out):
    fd_in = open(fn_in)
    fd_out = open(fn_out, "w")
    # Skip headers
    line = fd_in.readline()
    while not line.startswith("[Probes]"):
        line = fd_in.readline()
    colnames = fd_in.readline().split("\t")
    export_colnums = [colnames.index(x) for x in OUTPUT_COLS]
    # Print output column headers
    export_colnames = ["probe-id"] + colnames[1:]
    print >> fd_out, "\t".join(export_colnames)
    line = fd_in.readline()
    while not line == "" and not line.startswith("["):
        values = line.split("\t")
        output_values = [values[x] for x in export_colnums]
        print >> fd_out, "\t".join(output_values)
        line = fd_in.readline()
 if __name__ == "__main__":
    fn_in = parse_cmdline()
    fn_out = os.path.split(fn_in)[1]
    fn_out = os.path.splitext(fn_out)[0] + ".annot"
    print "Reading: %s" % (fn_in,)
    print "Writing: %s" % (fn_out,)
    print
    print "Annotations:"
    print ", ".join(OUTPUT_COLS)
    convert_annotations(fn_in, fn_out)