1 | package vishnu.testvis.visual;
|
---|
2 |
|
---|
3 | import java.util.*;
|
---|
4 | import java.net.*;
|
---|
5 | import java.io.*;
|
---|
6 | import java.lang.*;
|
---|
7 | import java.util.zip.*;
|
---|
8 |
|
---|
9 | class HitCluster
|
---|
10 | {
|
---|
11 | static final int NOVALUE = 99999999;
|
---|
12 | static int MAX;
|
---|
13 | static final int EMPTYARRAY[] = {};
|
---|
14 | static int matrix[][];
|
---|
15 | static int data[][];
|
---|
16 | static int clusters[][];
|
---|
17 | static int keywords[];
|
---|
18 | static int maxDocuments;
|
---|
19 |
|
---|
20 | static void buildMatrix ()
|
---|
21 | {
|
---|
22 | int count;
|
---|
23 | int keyword1, keyword2;
|
---|
24 | matrix = new int[MAX][MAX];
|
---|
25 | for (int word1=0;word1<MAX;word1++)
|
---|
26 | {
|
---|
27 | keyword1 = keywords[word1];
|
---|
28 | for (int word2=word1+1; word2<MAX;word2++)
|
---|
29 | {
|
---|
30 | keyword2 = keywords[word2];
|
---|
31 | count = 0;
|
---|
32 | for (int doc=0;doc<maxDocuments;doc++)
|
---|
33 | {
|
---|
34 | if ( (data[doc][keyword1] > 0) && (data[doc][keyword2] > 0))
|
---|
35 | count++;
|
---|
36 | }
|
---|
37 | matrix[word1][word2]=matrix[word2][word1]=count;
|
---|
38 | }
|
---|
39 | }
|
---|
40 | for (int row = 0;row <MAX; row++)
|
---|
41 | {
|
---|
42 | matrix[row][row] = NOVALUE;
|
---|
43 | for (int col = 0; col <row; col++)
|
---|
44 | matrix[row][col] = matrix[col][row];
|
---|
45 | }
|
---|
46 | }
|
---|
47 |
|
---|
48 | static void findLargest ()
|
---|
49 | {
|
---|
50 | int group = matrix[0].length-1;
|
---|
51 | int largest = -1;
|
---|
52 | int smallRow = NOVALUE;
|
---|
53 | int smallCol = NOVALUE;
|
---|
54 | int row,col;
|
---|
55 | for (row =0; row<MAX; row++)
|
---|
56 | for (col=row+1; col<MAX; col++)
|
---|
57 | if (largest < matrix[row][col])
|
---|
58 | {
|
---|
59 | largest = matrix[row][col];
|
---|
60 | smallRow = row;
|
---|
61 | smallCol = col;
|
---|
62 | }
|
---|
63 | for (col=0; col<MAX; col++)
|
---|
64 | {
|
---|
65 | if (matrix[smallRow][col] > matrix[smallCol][col])
|
---|
66 | matrix[smallRow][col] = matrix[smallCol][col];
|
---|
67 | matrix[smallCol][col] = -1;
|
---|
68 | }
|
---|
69 | clusters[smallRow] = addClusters(smallRow, smallCol);
|
---|
70 | clusters[smallCol] = EMPTYARRAY;
|
---|
71 | for (row =0; row<MAX; row++)
|
---|
72 | {
|
---|
73 | matrix[row][smallCol] = -1;
|
---|
74 | matrix[row][smallRow] = matrix[smallRow][row];
|
---|
75 | }
|
---|
76 | }
|
---|
77 |
|
---|
78 | static int[] addClusters(int c1, int c2)
|
---|
79 | {
|
---|
80 | int i;
|
---|
81 | int temp[] = new int[clusters[c1].length+clusters[c2].length];
|
---|
82 | if (keywords[clusters[c1][0]] < keywords[clusters[c2][0]])
|
---|
83 | {
|
---|
84 | for (i = 0; i<clusters[c1].length;i++)
|
---|
85 | temp[i] = clusters[c1][i];
|
---|
86 | for (i = 0; i<clusters[c2].length;i++)
|
---|
87 | temp[i+clusters[c1].length] = clusters[c2][i];
|
---|
88 | }
|
---|
89 | else
|
---|
90 | {
|
---|
91 | for (i = 0; i<clusters[c2].length;i++)
|
---|
92 | temp[i] = clusters[c2][i];
|
---|
93 | for (i = 0; i<clusters[c1].length;i++)
|
---|
94 | temp[i+clusters[c2].length] = clusters[c1][i];
|
---|
95 | }
|
---|
96 | return temp;
|
---|
97 | }
|
---|
98 |
|
---|
99 | static void preClusters()
|
---|
100 | {
|
---|
101 | clusters = new int[MAX][1];
|
---|
102 | for (int row =0; row <MAX; row++)
|
---|
103 | {
|
---|
104 | clusters[row][0] = row;
|
---|
105 | }
|
---|
106 | }
|
---|
107 |
|
---|
108 | static int[] maximunDistance (int[][] newData, int[] newKeywords, int documentCount)
|
---|
109 | {
|
---|
110 | data = newData;
|
---|
111 | keywords = newKeywords;
|
---|
112 | maxDocuments = documentCount;
|
---|
113 | if (documentCount > data.length)
|
---|
114 | {
|
---|
115 | System.out.println ("WARNING matrix sent is shorter the document Count!");
|
---|
116 | System.out.println ("I will use the shorter length only.");
|
---|
117 | documentCount = data.length;
|
---|
118 | }
|
---|
119 | MAX = keywords.length;
|
---|
120 | buildMatrix();
|
---|
121 | preClusters();
|
---|
122 | for (int i=1;i<MAX;i++)
|
---|
123 | {
|
---|
124 | findLargest();
|
---|
125 | }
|
---|
126 | int temp[] = new int[MAX];
|
---|
127 | for (int i=0;i<MAX;i++)
|
---|
128 | {
|
---|
129 | temp[i] = keywords[clusters[0][i]];
|
---|
130 | //System.out.println (i+" = "+keywords[i]+" = "+clusters[0][i]);
|
---|
131 | }
|
---|
132 | return temp;
|
---|
133 | }
|
---|
134 | }
|
---|
135 |
|
---|