#include #include #include #include #include #include "godist.h" void print_terms(); void add_link(char*, char*); struct node* get_bp(); struct node* get_term(char *); float calc_ic(struct node *, unsigned int); /* initialisation */ int godist_init() { /* Initialize hash table and array */ hcreate(MAX_NODES); term_array_size = 0; link_count = 0; /* Read ontology terms from file */ printf("Reading GO terms from go-terms.txt..."); FILE *term_fd = fopen("go-terms.txt", "r"); if (term_fd == NULL) { printf("cannot open file: go-terms.txt\n"); exit(errno); } int i; while((i = godist_read_term(term_fd)) == 13) { /* printf("%d\n", i);*/ } fclose(term_fd); printf(" %d terms\n", term_array_size); /* Read ontology structure from file */ printf("Reading GO structure from go-tree.txt..."); FILE *tree_fd = fopen("go-tree.txt", "r"); if (tree_fd == NULL) { printf("cannot open file: go-tree.txt\n"); exit(errno); } while((i = godist_read_assoc(tree_fd)) == 2) { link_count++; } fclose(tree_fd); printf(" %d edges\n", link_count); for (i=0; iparentc = 0; n->childrenc = 0; n->visited = 0; for (i=0; i<12; i++) { n->evidence[i] = ev[i]; n->acc_evidence[i] = 0; } strcpy(n->term, term); /* add to hash table */ e.key = n->term; e.data = (void*)n; res = hsearch(e, ENTER); term_array[term_array_size++] = n; } return nread; } /* distance functions */ float go_distance(char *term1, char *term2) { return 0.0; } void clear_flags(struct node *n) { int i; for (i=0; ichildrenc; i++) clear_flags(n->children[i]); n->visited = 0; } void add_link(char *parent_id, char *child_id) { ENTRY *ep, e; struct node *parent, *child; char key[11]; strcpy(key, parent_id); e.key = key; ep = hsearch(e, FIND); if (!ep) { printf("Cannot find term %s\n", e.key); return; } parent = (struct node*) ep->key; strcpy(key, child_id); e.key = key; ep = hsearch(e, FIND); if (!ep) { printf("Cannot find term %s\n", e.key); return; } child = (struct node*) ep->key; if (parent->childrenc +1 > MAX_CHILDREN) { printf("FIXME: increase child count"); return; } parent->children[parent->childrenc] = child; parent->childrenc++; child->parents[child->parentc] = parent; child->parentc++; } struct node *get_bp() { return get_term("GO:0008150"); } struct node *get_term(char *term) { ENTRY e, *ep; e.key = term; ep = hsearch(e, FIND); if (ep) { return ep->data; } return NULL; } void accumulate_evidence(struct node *n) { int i, j; if (n->visited) return; n->visited = 1; for (i=0; i<12; i++) n->acc_evidence[i] = n->evidence[i]; for (i=0; i<(n->childrenc); i++) { if (!n->children[i]->visited) { accumulate_evidence(n->children[i]); for (j=0; j<12; j++) n->acc_evidence[j] += n->children[i]->acc_evidence[j]; } } } void print_terms() { int i; for (i=0; iterm); } } void print_term(struct node *n) { int i; printf("%s\n", n->term); printf(" children: %d\n", n->childrenc); printf(" parents: %d\n", n->parentc); printf(" evidence: "); for (i=0; i<12; i++) printf("%d ", n->evidence[i]); printf("\n"); printf(" accumulated evidence: "); for (i=0; i<12; i++) printf("%d ", n->acc_evidence[i]); printf("\n"); } void find_multi_parented() { int i; for (i=0; iparentc > 1) printf("%s -- %d\n", term_array[i]->term, term_array[i]->parentc); } } float calc_ic(struct node *n, unsigned int evidence) { int i; float ann=0.0; for (i=0; i<12; i++) if (evidence & 1<acc_evidence[i]; printf("%f", ann); }