1 | /*
|
---|
2 | * Visualiser.java
|
---|
3 | * Copyright (C) 2004 New Zealand Digital Library, http://www.nzdl.org
|
---|
4 | *
|
---|
5 | * This program is free software; you can redistribute it and/or modify
|
---|
6 | * it under the terms of the GNU General Public License as published by
|
---|
7 | * the Free Software Foundation; either version 2 of the License, or
|
---|
8 | * (at your option) any later version.
|
---|
9 | *
|
---|
10 | * This program is distributed in the hope that it will be useful,
|
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
13 | * GNU General Public License for more details.
|
---|
14 | *
|
---|
15 | * You should have received a copy of the GNU General Public License
|
---|
16 | * along with this program; if not, write to the Free Software
|
---|
17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
18 | */
|
---|
19 |
|
---|
20 | package org.greenstone.gsdl3.service;
|
---|
21 |
|
---|
22 | import org.greenstone.gsdl3.util.*;
|
---|
23 | import org.w3c.dom.Document;
|
---|
24 | import org.w3c.dom.Node;
|
---|
25 | import org.w3c.dom.Element;
|
---|
26 | import org.w3c.dom.Text;
|
---|
27 | import java.util.Vector;
|
---|
28 | import java.util.HashMap;
|
---|
29 | import java.io.File;
|
---|
30 | import java.io.*;
|
---|
31 | import vishnu.server.*;
|
---|
32 | import vishnu.server.Search.*;
|
---|
33 | import vishnu.datablock.*;
|
---|
34 | import vishnu.util.Base64;
|
---|
35 |
|
---|
36 | public class Visualizer
|
---|
37 | extends ServiceRack {
|
---|
38 |
|
---|
39 | // the services on offer
|
---|
40 | private static final String VIS_SERVICE = "VisApplet";
|
---|
41 |
|
---|
42 | // other internal strings
|
---|
43 | private static final String ENGINE_TYPE_ELEM = "engineType";
|
---|
44 | private static final String LUCENE_ENGINE = "LUCENE";
|
---|
45 | private static final String MG_ENGINE = "MG";
|
---|
46 |
|
---|
47 | private Element applet_description = null;
|
---|
48 |
|
---|
49 | private String engine_type = LUCENE_ENGINE; // lucene is default
|
---|
50 | private String collection_home = null;
|
---|
51 | private SearchInterface engine = null;
|
---|
52 | public Visualizer () {
|
---|
53 |
|
---|
54 | }
|
---|
55 | public boolean configure(Element info, Element extra_info)
|
---|
56 | {
|
---|
57 | Element e = this.doc.createElement(GSXML.SERVICE_ELEM);
|
---|
58 | e.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_APPLET);
|
---|
59 | e.setAttribute(GSXML.NAME_ATT, VIS_SERVICE);
|
---|
60 | short_service_info.appendChild(e);
|
---|
61 |
|
---|
62 | applet_description = this.doc.createElement(GSXML.SERVICE_ELEM);
|
---|
63 | applet_description.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_APPLET);
|
---|
64 | applet_description.setAttribute(GSXML.NAME_ATT, VIS_SERVICE);
|
---|
65 |
|
---|
66 |
|
---|
67 | String app_info = "<"+GSXML.APPLET_ELEM+" CODEBASE='lib' CODE='vishnu.testvis.visual.VishnuSingle.class' ARCHIVE='vishnu.jar,gsdl3.jar,xercesImpl.jar,xml-apis.jar' WIDTH='1000' HEIGHT='800'>";
|
---|
68 | app_info += "<PARAM NAME='library' VALUE=''/>"; // filled in by receptionist
|
---|
69 | app_info += "<PARAM NAME='viscgi' VALUE='?";
|
---|
70 | app_info += GSParams.ACTION +"=a&"+GSParams.REQUEST_TYPE +"=r&"+GSParams.SERVICE+"="+VIS_SERVICE+"&"+GSParams.OUTPUT+"=xml&"+GSParams.RESPONSE_ONLY+"=1'/>";
|
---|
71 | app_info += "<PARAM NAME='collection' VALUE='" + this.cluster_name + "'/>";
|
---|
72 | app_info += "<PARAM NAME='engine' VALUE='GSDLEngine' />";
|
---|
73 | // add view info if appropriate
|
---|
74 | app_info += "The visualization applet.</"+GSXML.APPLET_ELEM+">";
|
---|
75 |
|
---|
76 | Document dom = converter.getDOM(app_info);
|
---|
77 | Element app_elem = dom.getDocumentElement();
|
---|
78 | applet_description.appendChild(this.doc.importNode(app_elem, true));
|
---|
79 |
|
---|
80 | // get engine type from config file
|
---|
81 | Element engine_elem = (Element)GSXML.getChildByTagName(info, ENGINE_TYPE_ELEM);
|
---|
82 | if (engine_elem != null) {
|
---|
83 | engine_type = engine_elem.getAttribute(GSXML.NAME_ATT);
|
---|
84 | if (engine_type.equals("")) {
|
---|
85 | engine_type = LUCENE_ENGINE;
|
---|
86 | }
|
---|
87 | }
|
---|
88 |
|
---|
89 | collection_home = this.site_home + File.separator + "collect"+ File.separator + this.cluster_name + File.separator;
|
---|
90 | if (engine_type.equals(LUCENE_ENGINE)) {
|
---|
91 | // make the full path the arg to MGSearcher??
|
---|
92 | engine = new LUCSearcher(collection_home, null);
|
---|
93 | } else if (engine_type.equals(MG_ENGINE)) {
|
---|
94 | engine = new MGSearcher(collection_home, this.cluster_name, null);
|
---|
95 | } else {
|
---|
96 | System.err.println("Visualiser: invalid engine type: "+engine_type);
|
---|
97 | return false;
|
---|
98 | }
|
---|
99 | return true;
|
---|
100 | }
|
---|
101 |
|
---|
102 |
|
---|
103 |
|
---|
104 | protected Element getServiceDescription(String service, String lang, String subset) {
|
---|
105 | if (!service.equals(VIS_SERVICE)) {
|
---|
106 | return null;
|
---|
107 | }
|
---|
108 | Element describe = (Element) applet_description.cloneNode(true);
|
---|
109 | describe.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME,
|
---|
110 | getTextString(VIS_SERVICE+".name", lang)));
|
---|
111 | describe.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION,
|
---|
112 | getTextString(VIS_SERVICE+".description", lang)));
|
---|
113 | return describe;
|
---|
114 | }
|
---|
115 |
|
---|
116 | protected Element processVisApplet(Element request)
|
---|
117 | {
|
---|
118 |
|
---|
119 | Element param_elem = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
|
---|
120 | HashMap params = GSXML.extractParams(param_elem, false);
|
---|
121 |
|
---|
122 | String type = (String)params.get("type");
|
---|
123 |
|
---|
124 | // the result element
|
---|
125 | Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
|
---|
126 | result.setAttribute(GSXML.FROM_ATT, VIS_SERVICE);
|
---|
127 | result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
|
---|
128 |
|
---|
129 | // applet result info must be in appletInfo element
|
---|
130 | Element applet_data = this.doc.createElement(GSXML.APPLET_DATA_ELEM);
|
---|
131 | result.appendChild(applet_data);
|
---|
132 | Element vis_data = this.doc.createElement("visData");
|
---|
133 | applet_data.appendChild(vis_data);
|
---|
134 |
|
---|
135 | String results = "";
|
---|
136 | if (type.equals("search")) {
|
---|
137 |
|
---|
138 | //Element cluster_results = this.doc.createElement("cluster");
|
---|
139 | //vis_data.appendChild(cluster_results);
|
---|
140 | //Element description_results = this.doc.createElement("descriptions");
|
---|
141 | //vis_data.appendChild(description_results);
|
---|
142 |
|
---|
143 | String query = (String)params.get("q");
|
---|
144 | System.err.println("the query was "+query);
|
---|
145 |
|
---|
146 | Vector doc_nums = new Vector();
|
---|
147 | Vector descriptions = new Vector();
|
---|
148 |
|
---|
149 | engine.search(query);
|
---|
150 | doc_nums = engine.getDocIdentifiers();
|
---|
151 | descriptions = engine.getDocDescriptions();
|
---|
152 |
|
---|
153 | DataBlock db = generateDataBlock(collection_home, null, doc_nums, descriptions);
|
---|
154 | System.err.println("got back data, now converting to string");
|
---|
155 | try {
|
---|
156 | results = Base64.encodeObject(db);
|
---|
157 | } catch (Exception e) {
|
---|
158 | System.err.println("trying to base64 encode the datablock, but exception happened: "+e);
|
---|
159 | }
|
---|
160 | System.err.println("after converting to string");
|
---|
161 | // // serialise the data block
|
---|
162 | // try {
|
---|
163 | // ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
---|
164 | // ObjectOutputStream oos = new ObjectOutputStream(baos);
|
---|
165 | // oos.writeObject(db);
|
---|
166 | // oos.close();
|
---|
167 |
|
---|
168 | // //results = baos.toString("UTF-8");
|
---|
169 | // results = Base64.encode(baos.toByteArray());
|
---|
170 | // } catch (Exception e) {
|
---|
171 | // System.err.println("Visualizer serialise data block error: "+e);
|
---|
172 | // }
|
---|
173 | //System.err.println("num docs = "+doc_nums.size());
|
---|
174 | //Cluster c = new Cluster();
|
---|
175 | //results = c.getCluster(collection_home,doc_nums);
|
---|
176 | //results.trim();
|
---|
177 | //System.err.println("results = "+results);
|
---|
178 |
|
---|
179 | Text t = this.doc.createTextNode(results);
|
---|
180 | vis_data.appendChild(t);
|
---|
181 |
|
---|
182 | // for (int i=0; i<descriptions.size(); i++) {
|
---|
183 | // String d = (String)descriptions.get(i);
|
---|
184 | // Element de = GSXML.createTextElement(this.doc, "desc", d);
|
---|
185 | // description_results.appendChild(de);
|
---|
186 | // }
|
---|
187 | System.err.println("end of search");
|
---|
188 | } else if (type.equals("fetch")) {
|
---|
189 | String docNum = (String)params.get("d");
|
---|
190 |
|
---|
191 | Vector doc = engine.getDocContent(Integer.parseInt(docNum));
|
---|
192 | for (int i=0; i<doc.size(); i++) {
|
---|
193 | results += (String)doc.get(i) +"\n";
|
---|
194 | }
|
---|
195 | Text t = this.doc.createTextNode(results);
|
---|
196 | vis_data.appendChild(t);
|
---|
197 | } else {
|
---|
198 | System.err.println("invalid type sent to Visualiser process: "+type);
|
---|
199 | return result;
|
---|
200 | }
|
---|
201 | System.err.println("returning result");
|
---|
202 | return result;
|
---|
203 | }
|
---|
204 |
|
---|
205 | private DataBlock generateDataBlock(String collection_home, String view, Vector docNums, Vector descriptions) {
|
---|
206 |
|
---|
207 | if (docNums.size()==0) {
|
---|
208 | return null;
|
---|
209 | }
|
---|
210 | DataBlock data = new DataBlock();
|
---|
211 |
|
---|
212 | CKServer ck_server = new CKServer(collection_home, view);
|
---|
213 |
|
---|
214 | /**** set data fields one by one and pass one what ever gets assembled ****/
|
---|
215 |
|
---|
216 | try{
|
---|
217 |
|
---|
218 | ck_server.setDescriptions(descriptions);
|
---|
219 |
|
---|
220 | /**** get candidate keywords ****/
|
---|
221 |
|
---|
222 | String[] keywords = ck_server.computeKeywords(docNums);
|
---|
223 |
|
---|
224 | data.words = keywords;
|
---|
225 |
|
---|
226 |
|
---|
227 | /**** get sparse document * keyword matrix ****/
|
---|
228 |
|
---|
229 | SparseMatrix matrix = ck_server.getSparseMatrix(docNums);
|
---|
230 |
|
---|
231 | data.matrix = matrix;
|
---|
232 |
|
---|
233 |
|
---|
234 | /**** get document indices, this is a subset of the original ****/
|
---|
235 | /**** those without keywords are excluded ****/
|
---|
236 |
|
---|
237 | int[] docs = ck_server.getHitDocuments();
|
---|
238 |
|
---|
239 | data.docs = docs;
|
---|
240 |
|
---|
241 |
|
---|
242 | String[] desc = ck_server.getHitDescriptions();
|
---|
243 |
|
---|
244 | data.descriptions = desc;
|
---|
245 |
|
---|
246 |
|
---|
247 | /**** get 10 or so clusters ****/
|
---|
248 |
|
---|
249 | Vector[] clusters = ck_server.getClusters();
|
---|
250 |
|
---|
251 | data.clusters = clusters;
|
---|
252 |
|
---|
253 |
|
---|
254 | double[][] centroids = ck_server.getCentroids();
|
---|
255 |
|
---|
256 |
|
---|
257 | /**** send their centroids through sammon mapping ****/
|
---|
258 |
|
---|
259 | Point2D[] sammon = ck_server.getSammonMap(centroids);
|
---|
260 |
|
---|
261 | System.err.println("Num descriptions: " + descriptions.size());
|
---|
262 | System.err.println("Num docs: " + docs.length);
|
---|
263 | System.err.println("Num desc: " + desc.length);
|
---|
264 |
|
---|
265 | data.sammon = sammon;
|
---|
266 | System.err.println("end of try in generatedatablock");
|
---|
267 | } catch (Exception e) {
|
---|
268 | System.err.println("VisServlet: computing clustering Error: "+e);
|
---|
269 | e.printStackTrace();
|
---|
270 | }
|
---|
271 | ck_server = null;
|
---|
272 | System.err.println("returning data");
|
---|
273 | return data;
|
---|
274 | }
|
---|
275 |
|
---|
276 | }
|
---|
277 |
|
---|
278 |
|
---|
279 |
|
---|
280 |
|
---|
281 |
|
---|
282 |
|
---|
283 |
|
---|
284 |
|
---|
285 |
|
---|
286 |
|
---|
287 |
|
---|