Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/scripts/geneontology/go-distance/godist.c

236 lines
5.2 KiB
C

#include <string.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <search.h>
#include "godist.h"
void print_terms();
void add_link(char*, char*);
struct node* get_bp();
struct node* get_term(char *);
/* initialisation */
int godist_init() {
/* Initialize hash table and array */
hcreate(MAX_NODES);
term_array_size = 0;
link_count = 0;
/* Read ontology terms from file */
printf("Reading GO terms from go-terms.txt...");
FILE *term_fd = fopen("go-terms.txt", "r");
if (term_fd == NULL) {
printf("cannot open file: go-terms.txt\n");
exit(errno);
}
int i;
while((i = godist_read_term(term_fd)) == 13) {
/* printf("%d\n", i);*/
}
fclose(term_fd);
printf(" %d terms\n", term_array_size);
/* Read ontology structure from file */
printf("Reading GO structure from go-tree.txt...");
FILE *tree_fd = fopen("go-tree.txt", "r");
if (tree_fd == NULL) {
printf("cannot open file: go-tree.txt\n");
exit(errno);
}
while((i = godist_read_assoc(tree_fd)) == 2) {
link_count++;
}
fclose(tree_fd);
printf(" %d edges\n", link_count);
for (i=0; i<term_array_size; i++) {
clear_flags(get_bp());
accumulate_evidence(term_array[i]);
printf(".");
}
print_term(get_bp());
print_term(get_term("GO:0040007"));
print_term(get_term("GO:0007275"));
print_term(get_term("GO:0007582"));
print_term(get_term("GO:0043473"));
print_term(get_term("GO:0000004"));
print_term(get_term("GO:0051704"));
print_term(get_term("GO:0000003"));
print_term(get_term("GO:0016032"));
print_term(get_term("GO:0009987"));
print_term(get_term("GO:0050896"));
print_term(get_term("GO:0050789"));
/* find_multi_parented();*/
}
void godist_exit() {
int i;
for (i=0; i<term_array_size; i++) {
free(term_array[i]);
}
}
int godist_read_assoc(FILE *fd) {
char term1[11], term2[11];
int retval;
retval = fscanf(fd, " %10s %10s ", term1, term2);
if (retval != EOF) {
add_link(term1, term2);
}
return retval;
}
int godist_read_term(FILE *fd) {
char term[11];
int ev[12];
int i;
ENTRY e, *res;
int nread = fscanf(fd, " %10s %d %d %d %d %d %d %d %d %d %d %d %d ",
term, &ev[0], &ev[1], &ev[2], &ev[3], &ev[4], &ev[5],
&ev[6], &ev[7], &ev[8], &ev[9], &ev[10], &ev[11]);
if (errno != 0) {
printf("errno: %d\n", errno);
}
if (nread == 13) {
struct node *n = (struct node*) malloc(sizeof(struct node));
n->parentc = 0;
n->childrenc = 0;
n->visited = 0;
for (i=0; i<12; i++) {
n->evidence[i] = ev[i];
n->acc_evidence[i] = 0;
}
strcpy(n->term, term);
/* add to hash table */
e.key = n->term;
e.data = (void*)n;
res = hsearch(e, ENTER);
term_array[term_array_size++] = n;
}
return nread;
}
/* distance functions */
float go_distance(char *term1, char *term2) {
return 0.0;
}
void clear_flags(struct node *n) {
int i;
for (i=0; i<n->childrenc; i++)
clear_flags(n->children[i]);
n->visited = 0;
}
void add_link(char *parent_id, char *child_id) {
ENTRY *ep, e;
struct node *parent, *child;
char key[11];
strcpy(key, parent_id);
e.key = key;
ep = hsearch(e, FIND);
if (!ep) {
printf("Cannot find term %s\n", e.key);
return;
}
parent = (struct node*) ep->key;
strcpy(key, child_id);
e.key = key;
ep = hsearch(e, FIND);
if (!ep) {
printf("Cannot find term %s\n", e.key);
return;
}
child = (struct node*) ep->key;
if (parent->childrenc +1 > MAX_CHILDREN) {
printf("FIXME: increase child count");
return;
}
parent->children[parent->childrenc] = child;
parent->childrenc++;
child->parents[child->parentc] = parent;
child->parentc++;
}
struct node *get_bp() {
return get_term("GO:0008150");
}
struct node *get_term(char *term) {
ENTRY e, *ep;
e.key = term;
ep = hsearch(e, FIND);
if (ep) {
return ep->data;
}
return NULL;
}
void accumulate_evidence(struct node *n) {
int i, j;
if (n->visited)
return;
n->visited = 1;
for (i=0; i<12; i++)
n->acc_evidence[i] = n->evidence[i];
for (i=0; i<(n->childrenc); i++) {
if (!n->children[i]->visited) {
accumulate_evidence(n->children[i]);
for (j=0; j<12; j++)
n->acc_evidence[j] += n->children[i]->acc_evidence[j];
}
}
}
void print_terms() {
int i;
for (i=0; i<term_array_size; i++) {
printf("%s\n", term_array[i]->term);
}
}
void print_term(struct node *n) {
int i;
printf("%s\n", n->term);
printf(" children: %d\n", n->childrenc);
printf(" parents: %d\n", n->parentc);
printf(" evidence: ");
for (i=0; i<12; i++)
printf("%d ", n->evidence[i]);
printf("\n");
printf(" accumulated evidence: ");
for (i=0; i<12; i++)
printf("%d ", n->acc_evidence[i]);
printf("\n");
}
void find_multi_parented() {
int i;
for (i=0; i<term_array_size; i++) {
if (term_array[i]->parentc > 1)
printf("%s -- %d\n", term_array[i]->term, term_array[i]->parentc);
}
}
float calc_ic(unsigned int evidence) {
}