go-distance script soon calculating resnik distances.
This commit is contained in:
parent
a5c3a1f154
commit
28f0f53e8a
|
@ -5,7 +5,7 @@ godist.o: godist.c godist.h
|
||||||
gcc -ggdb -c godist.c
|
gcc -ggdb -c godist.c
|
||||||
|
|
||||||
go-distance: godist.o main.o
|
go-distance: godist.o main.o
|
||||||
gcc -ggdb -o go-distance godist.o main.o
|
gcc -ggdb -o go-distance godist.o main.o -lm
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-rm go-distance godist.o main.o
|
-rm go-distance godist.o main.o
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -10,7 +11,8 @@ void print_terms();
|
||||||
void add_link(char*, char*);
|
void add_link(char*, char*);
|
||||||
struct node* get_bp();
|
struct node* get_bp();
|
||||||
struct node* get_term(char *);
|
struct node* get_term(char *);
|
||||||
float calc_ic(struct node *, unsigned int);
|
void calc_ic(struct node *, unsigned int);
|
||||||
|
struct node *common_subsumer(struct node *, struct node *);
|
||||||
|
|
||||||
|
|
||||||
/* initialisation */
|
/* initialisation */
|
||||||
|
@ -19,6 +21,7 @@ int godist_init() {
|
||||||
hcreate(MAX_NODES);
|
hcreate(MAX_NODES);
|
||||||
term_array_size = 0;
|
term_array_size = 0;
|
||||||
link_count = 0;
|
link_count = 0;
|
||||||
|
struct node *n;
|
||||||
|
|
||||||
/* Read ontology terms from file */
|
/* Read ontology terms from file */
|
||||||
printf("Reading GO terms from go-terms.txt...");
|
printf("Reading GO terms from go-terms.txt...");
|
||||||
|
@ -51,13 +54,24 @@ int godist_init() {
|
||||||
fclose(tree_fd);
|
fclose(tree_fd);
|
||||||
printf(" %d edges\n", link_count);
|
printf(" %d edges\n", link_count);
|
||||||
|
|
||||||
|
printf("Calculating accumulated evidence...");
|
||||||
|
fflush(stdout);
|
||||||
for (i=0; i<term_array_size; i++) {
|
for (i=0; i<term_array_size; i++) {
|
||||||
clear_flags(get_bp());
|
clear_flags(get_bp());
|
||||||
accumulate_evidence(term_array[i]);
|
accumulate_evidence(term_array[i]);
|
||||||
printf(".");
|
|
||||||
}
|
}
|
||||||
print_term(get_bp());
|
printf("\n");
|
||||||
|
|
||||||
|
evidence = 0xff;
|
||||||
|
total_ann = 0;
|
||||||
|
n = get_bp();
|
||||||
|
for (i=0; i<12; i++)
|
||||||
|
if (evidence & 1<<i)
|
||||||
|
total_ann += n->acc_evidence[i];
|
||||||
|
printf("Using %d annotations.\n", total_ann);
|
||||||
|
|
||||||
|
print_term(get_bp());
|
||||||
|
/*
|
||||||
print_term(get_term("GO:0040007"));
|
print_term(get_term("GO:0040007"));
|
||||||
print_term(get_term("GO:0007275"));
|
print_term(get_term("GO:0007275"));
|
||||||
print_term(get_term("GO:0007582"));
|
print_term(get_term("GO:0007582"));
|
||||||
|
@ -69,9 +83,14 @@ int godist_init() {
|
||||||
print_term(get_term("GO:0009987"));
|
print_term(get_term("GO:0009987"));
|
||||||
print_term(get_term("GO:0050896"));
|
print_term(get_term("GO:0050896"));
|
||||||
print_term(get_term("GO:0050789"));
|
print_term(get_term("GO:0050789"));
|
||||||
|
*/
|
||||||
calc_ic(get_bp(), 0xffff);
|
printf("Calculation information content...");
|
||||||
|
fflush(stdout);
|
||||||
|
calculate_ics(0xffff);
|
||||||
|
printf("\n");
|
||||||
|
/* calc_ic(get_bp(), 0xffff);*/
|
||||||
/* find_multi_parented();*/
|
/* find_multi_parented();*/
|
||||||
|
common_subsumer(get_term("GO:0000003"), get_term("GO:0000004"));
|
||||||
}
|
}
|
||||||
|
|
||||||
void godist_exit() {
|
void godist_exit() {
|
||||||
|
@ -232,12 +251,39 @@ void find_multi_parented() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
float calc_ic(struct node *n, unsigned int evidence) {
|
void calculate_ics(unsigned int evidence) {
|
||||||
|
int i;
|
||||||
|
for (i=0; i<term_array_size; i++)
|
||||||
|
calc_ic(term_array[i], evidence);
|
||||||
|
}
|
||||||
|
|
||||||
|
void calc_ic(struct node *n, unsigned int evidence) {
|
||||||
int i;
|
int i;
|
||||||
float ann=0.0;
|
float ann=0.0;
|
||||||
for (i=0; i<12; i++)
|
for (i=0; i<12; i++)
|
||||||
if (evidence & 1<<i)
|
if (evidence & 1<<i)
|
||||||
ann += (float) n->acc_evidence[i];
|
ann += (float) n->acc_evidence[i];
|
||||||
printf("%f", ann);
|
n->ic = -log(ann/total_ann);
|
||||||
|
printf("%f\n", n->ic);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node *common_subsumer(struct node *n1, struct node *n2) {
|
||||||
|
struct node *anc1[MAX_NODES];
|
||||||
|
struct node *anc2[MAX_NODES];
|
||||||
|
int ancc1=0, ancc2=0;
|
||||||
|
|
||||||
|
add_ancestors(&ancc1, anc1, n1);
|
||||||
|
add_ancestors(&ancc2, anc2, n2);
|
||||||
|
printf("Ancestors: %d %d\n", ancc1, ancc2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void add_ancestors(int *ancc, struct node *anc[], struct node *n) {
|
||||||
|
int i=0;
|
||||||
|
anc[(*ancc)++] = n;
|
||||||
|
for (i=0; i<n->parentc; i++)
|
||||||
|
add_ancestors(ancc, anc, n->parents[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
float resnik(struct node *n1, struct node *n2) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,8 @@ struct node {
|
||||||
struct node* term_array[MAX_NODES];
|
struct node* term_array[MAX_NODES];
|
||||||
long term_array_size;
|
long term_array_size;
|
||||||
int link_count;
|
int link_count;
|
||||||
|
int total_ann;
|
||||||
|
int evidence; /* bitvector with one bit per evidence code */
|
||||||
|
|
||||||
/* Ontology initialisation functions. */
|
/* Ontology initialisation functions. */
|
||||||
int godist_init();
|
int godist_init();
|
||||||
|
@ -58,6 +60,10 @@ void accumulate_evidence(struct node*);
|
||||||
/* Distance metric functions */
|
/* Distance metric functions */
|
||||||
float resnik_distance(char *term1, char *term2);
|
float resnik_distance(char *term1, char *term2);
|
||||||
float fussimeg_distance(char *term1, char *term2);
|
float fussimeg_distance(char *term1, char *term2);
|
||||||
|
void calc_ic(struct node *n, unsigned int evidence);
|
||||||
|
|
||||||
|
void clear_flags(struct node *n);
|
||||||
|
void print_term(struct node *n);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Reference in New Issue