source: trunk/greenstone3-extensions/vishnu/src/cluster/clustering.c@ 8189

Last change on this file since 8189 was 8189, checked in by kjdon, 20 years ago

first version of Imperial College's Visualiser code

  • Property svn:keywords set to Author Date Id Revision
File size: 4.8 KB
Line 
1/* Related word hierarchical clustering using a given triangle similarity matrix */
2#include <stdio.h>
3#include <stdlib.h>
4#include <math.h>
5#include <assert.h>
6
7//#include <sys/times.h>
8//#ifndef CLK_TCK
9#include <time.h>
10//#endif
11
12#include "clustering.h"
13#include "create_trisim.h"
14#include "simfuncs.h"
15#include "iterative.h"
16#include "linkfuncs.h"
17#include "hierarchical.h"
18#include "procresults.h"
19#include "mysammon.h"
20
21
22void *myrealloc(void *p, int n) {
23 void *ret = p? realloc(p, n): mymalloc(n);
24
25 if(!ret)
26 fprintf(stderr, "clustering.c: no memory for realloc(p,%d)\n", n), exit(1);
27
28 return ret;
29}
30
31void *mycalloc(int n, int m) {
32 void *ret = calloc(n, m);
33
34 if(!ret)
35 fprintf(stderr, "clustering.c: no memory for calloc(%d,%d)\n", n, m), exit(1);
36
37 return ret;
38}
39
40void *mymalloc(int n) {
41 void *ret = malloc(n);
42
43 if(!ret)
44 fprintf(stderr, "clustering.c: no memory for malloc(%d,%d)\n", n), exit(1);
45
46 return ret;
47}
48
49void myfree(void *p) {
50 if(p)
51 free(p);
52}
53
54void free_vector_array(double **d,int n)
55{
56 int i;
57 for(i=0;i<n;i++)
58 free(d[i]);
59 free(d);
60}
61
62int VECTOR_SIZE;
63
64/*
65* Main
66*/
67/* 0 vectors_3333 15 1 10 20 */
68
69int
70do_clustering (int linkage, double **vectors, int num_of_vectors, int arraySize, int min_c_items,
71 int target_clusters, int limit, int *coding, double ***centroidsPtr, char * buffer, int
72*cursize, int * curptr)
73{
74// char * buffer = (char *) malloc (256);
75// int cursize = 256;
76// int curptr = 0;
77 char bbuf[20];
78 int n;
79
80 double *sim_matrix, **centroids;
81 double (*linkage_func)(double, double);
82
83 //struct tms *timer = (struct tms *) mymalloc(sizeof (struct tms));
84
85 Cluster *toplevel_cluster;
86 Cluster **splitclusters = NULL;
87 int clusters = 0, zeros=0;
88
89 double threshold;
90 double (*sim_func)(double *, double *, int) = cosine_distance;
91
92 double *heights;
93
94 int i, j;
95
96 VECTOR_SIZE = arraySize;
97 switch (linkage) {
98 case COMPLETE_LINKAGE:
99 linkage_func = maxlink;
100 break;
101 case AVERAGE_LINKAGE:
102 linkage_func = avelink;
103 break;
104 case SINGLE_LINKAGE:
105 linkage_func = minlink;
106 break;
107 default:
108 printf ("clustering.c: main(): linkage type %d not defined.\n", linkage);
109 exit(0);
110 }
111
112 if(limit > num_of_vectors) limit = num_of_vectors;
113
114 sim_matrix = create_sim_matrix (vectors, limit, sim_func);
115
116
117 /* an array of heights sorted by descending disim */
118 heights = (double *) mycalloc ((limit - 1), sizeof (double));
119
120 toplevel_cluster = hierarchical_clustering (sim_matrix, limit, heights, linkage_func);
121
122
123 threshold = split_clusters (toplevel_cluster, &splitclusters, heights, limit, target_clusters-zeros, min_c_items, &clusters);
124
125 iterative_clustering (splitclusters, clusters, vectors, limit, num_of_vectors, sim_func);
126
127 centroids = malloc(sizeof(double **)*clusters);
128 assert(centroids);
129
130 n = sprintf(bbuf,"%d\n",clusters);
131 buffer = appendString(buffer,cursize,curptr,bbuf,strlen(bbuf));
132
133 for (i = 0; i < clusters; i++)
134 {
135 centroids[i] = calloc(VECTOR_SIZE,sizeof(double));
136 assert(centroids[i]);
137 print_cluster_children (splitclusters[i], coding, buffer, cursize,curptr);
138 for (j = 0; j < splitclusters[i]->iter_items_num; j++)
139 {
140 if( j<splitclusters[i]->iter_items_num-1 )
141 {
142 n = sprintf(bbuf,"%d%s",coding[splitclusters[i]->iter_items[j]],",");
143 buffer = appendString(buffer,cursize,curptr,bbuf,strlen(bbuf));
144 }
145 else
146 {
147 n = sprintf(bbuf,"%d%s",coding[splitclusters[i]->iter_items[j]],"");
148 buffer = appendString(buffer,cursize,curptr,bbuf,strlen(bbuf));
149
150 }
151 }
152
153 for (j = 0; j < VECTOR_SIZE; j++)
154 {
155 centroids[i][j]=splitclusters[i]->centroid[j];
156 }
157
158 buffer = appendString(buffer,cursize,curptr,"\n",1);
159 }
160
161 free (sim_matrix);
162 free_cluster (toplevel_cluster);
163 free (heights);
164
165 *centroidsPtr = centroids;
166 return clusters;
167}
168
169int clustering (int linkage, MatrixNodePtr *matrix, int num_of_vectors, int arraySize, int min_c_items,
170int target_clusters, int limit, char *buffer, int *cursize, int *curptr)
171{
172 int *coding, *newcoding, clusterCount, groupCount, i;
173 double **vectors, **centroids, **newCentroids;
174 vectors = read_vectors (matrix, num_of_vectors, arraySize, &coding);
175
176 clusterCount = do_clustering (linkage, vectors, num_of_vectors, arraySize, min_c_items,
177 target_clusters, limit, coding, &centroids, buffer, cursize, curptr);
178
179 mysammon( centroids, clusterCount, arraySize, buffer, cursize, curptr);
180
181 free_vector_array(vectors,num_of_vectors);
182 free_vector_array(centroids,clusterCount);
183
184
185 free(coding);
186 return 0;
187
188}
189
190
191
192
193
194
195
Note: See TracBrowser for help on using the repository browser.