1 | package vishnu.cluster;
|
---|
2 |
|
---|
3 | import java.util.*;
|
---|
4 |
|
---|
5 |
|
---|
6 | public class ClusterArray
|
---|
7 | {
|
---|
8 | Vector contents;
|
---|
9 |
|
---|
10 | ClusterArray(int num_of_clusters)
|
---|
11 | {
|
---|
12 | int i;
|
---|
13 |
|
---|
14 | contents = new Vector(num_of_clusters);
|
---|
15 |
|
---|
16 | for (i = 0; i < num_of_clusters; i++)
|
---|
17 | contents.addElement(new Cluster(i, null, null, null, 0, 1));
|
---|
18 | // i.e. id = i, similarity between kids = 0, #items = 1
|
---|
19 | }
|
---|
20 |
|
---|
21 | // counts the number of clusters that are less dissimilar than disim
|
---|
22 | private void sweep_tree (Cluster cluster, double disim, int min_c_items, TheInt clusters)
|
---|
23 | {
|
---|
24 | cluster._split = 0;
|
---|
25 |
|
---|
26 | if (cluster._id == -1)
|
---|
27 | {
|
---|
28 | if (cluster._children_sim <= disim && cluster._items >= min_c_items)
|
---|
29 | {
|
---|
30 | cluster._split = 1;
|
---|
31 | contents.addElement(cluster);
|
---|
32 | clusters.val++;
|
---|
33 | }
|
---|
34 | else
|
---|
35 | {
|
---|
36 | sweep_tree (cluster._child_1, disim, min_c_items, clusters);
|
---|
37 | sweep_tree (cluster._child_2, disim, min_c_items, clusters);
|
---|
38 | }
|
---|
39 | }
|
---|
40 | else
|
---|
41 | {
|
---|
42 | if (cluster._items >= min_c_items)
|
---|
43 | {
|
---|
44 | cluster._split = 1;
|
---|
45 | contents.addElement(cluster);
|
---|
46 | clusters.val++;
|
---|
47 | }
|
---|
48 | }
|
---|
49 | }
|
---|
50 |
|
---|
51 |
|
---|
52 | double split (Cluster cluster, double[] heights, int n, int target,
|
---|
53 | int min_c_items, TheInt clusters)
|
---|
54 | {
|
---|
55 |
|
---|
56 | /*
|
---|
57 | Try to split 'cluster', made up of 'n' vectors, into 'target'
|
---|
58 | clusters by incrementing iteratively the mimimum required
|
---|
59 | similarity between the 'cluster\'s' children. The result is an
|
---|
60 | array of clusters, 'cluster_ary' which contains 'clusters'
|
---|
61 | clusters. Obviously the final number of clusters may not be the
|
---|
62 | same as the desired number of clusters. The function return the
|
---|
63 | threshold similarity which resulted in the clusters being split
|
---|
64 | in 'clusters' clusters.
|
---|
65 | */
|
---|
66 |
|
---|
67 | int i;
|
---|
68 |
|
---|
69 | if (target < 2)
|
---|
70 | target = 2;
|
---|
71 |
|
---|
72 | if (target > n)
|
---|
73 | target = n;
|
---|
74 |
|
---|
75 | // height store minimum similarity values
|
---|
76 | // this is then the iterative incrementing loop
|
---|
77 | for (i = target - 2; i <= n - 1; i++)
|
---|
78 | {
|
---|
79 | clusters.val = 0; // simple class for an integer to
|
---|
80 | // be able to pass it by reference (below)
|
---|
81 | contents.removeAllElements();
|
---|
82 | sweep_tree (cluster, heights[i], min_c_items, clusters);
|
---|
83 |
|
---|
84 | if (clusters.val >= target)
|
---|
85 | break;
|
---|
86 | }
|
---|
87 |
|
---|
88 | // if target - 2 > n - 1, i.e. if the previous loop has not been
|
---|
89 | // entered at all
|
---|
90 | if (i > n - 1)
|
---|
91 | {
|
---|
92 | /* ignore the min_c_size requirement */
|
---|
93 | i = target - 1;
|
---|
94 | clusters.val = 0;
|
---|
95 | contents.removeAllElements();
|
---|
96 | sweep_tree (cluster, heights[i], 1, clusters);
|
---|
97 | }
|
---|
98 |
|
---|
99 | return heights[i];
|
---|
100 |
|
---|
101 | }
|
---|
102 |
|
---|
103 | }
|
---|