From 947cf8385f5cde9218fa7f48746fa9adf506393e Mon Sep 17 00:00:00 2001
From: flatberg <flatberg@pvv.ntnu.no>
Date: Fri, 23 Mar 2007 20:51:04 +0000
Subject: [PATCH] Annotations on yeast from Stanford mdb.

---
 scripts/yeast_annot.py | 44 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 scripts/yeast_annot.py

diff --git a/scripts/yeast_annot.py b/scripts/yeast_annot.py
new file mode 100644
index 0000000..c7abc3b
--- /dev/null
+++ b/scripts/yeast_annot.py
@@ -0,0 +1,44 @@
+
+def smdb_annot(orflist=None, input_fname='registry.genenames.tab', output_fname='yeast.annot'):
+
+    """Reads registry.genenames.tab from the Stanford yeast
+    microarray database.
+
+    Available from:
+    ftp://genome-ftp.stanford.edu/pub/yeast/data_download/gene_registry/registry.genenames.tab
+    
+    input: orf -- list of orfs (open reading frames)
+           file -- (optional) file to fetch info from
+           
+    registry.genames contains:
+
+    0 = Locus name 
+    1 = Other name
+    2 = Description 
+    3 = Gene product
+    4 = Phenotype
+    5 = ORF name
+    6 = SGDID
+    
+    NB! Other name, Gene product and Phenotype may have more
+    than one mapping. These are separated by |
+
+    Output: writes an annotation file
+
+    """
+    outfile = open(output_fname, 'w')
+    header = "Orf\tLocus_id\tOther_name\tDescription\tGene_product\tPhenotype\tSGD_ID\n"
+    outfile.write(header)
+    text = open(input_fname, 'r').read().splitlines()
+    for line in text:
+        els = line.split('\t')
+        orf_name = els.pop(5)
+        if orf_name!='': # we dont care about non-named orfs
+            if orflist and orf_name not in orflist:
+                break
+            for e in els:
+                if e !='':
+                    outfile.write(str(e) + "\t")
+                else:
+                    outfile.write("NA")
+            f.write("\n")