From 947cf8385f5cde9218fa7f48746fa9adf506393e Mon Sep 17 00:00:00 2001 From: flatberg Date: Fri, 23 Mar 2007 20:51:04 +0000 Subject: [PATCH] Annotations on yeast from Stanford mdb. --- scripts/yeast_annot.py | 44 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 scripts/yeast_annot.py diff --git a/scripts/yeast_annot.py b/scripts/yeast_annot.py new file mode 100644 index 0000000..c7abc3b --- /dev/null +++ b/scripts/yeast_annot.py @@ -0,0 +1,44 @@ + +def smdb_annot(orflist=None, input_fname='registry.genenames.tab', output_fname='yeast.annot'): + + """Reads registry.genenames.tab from the Stanford yeast + microarray database. + + Available from: + ftp://genome-ftp.stanford.edu/pub/yeast/data_download/gene_registry/registry.genenames.tab + + input: orf -- list of orfs (open reading frames) + file -- (optional) file to fetch info from + + registry.genames contains: + + 0 = Locus name + 1 = Other name + 2 = Description + 3 = Gene product + 4 = Phenotype + 5 = ORF name + 6 = SGDID + + NB! Other name, Gene product and Phenotype may have more + than one mapping. These are separated by | + + Output: writes an annotation file + + """ + outfile = open(output_fname, 'w') + header = "Orf\tLocus_id\tOther_name\tDescription\tGene_product\tPhenotype\tSGD_ID\n" + outfile.write(header) + text = open(input_fname, 'r').read().splitlines() + for line in text: + els = line.split('\t') + orf_name = els.pop(5) + if orf_name!='': # we dont care about non-named orfs + if orflist and orf_name not in orflist: + break + for e in els: + if e !='': + outfile.write(str(e) + "\t") + else: + outfile.write("NA") + f.write("\n")