source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/PhindPhraseBrowse.java

Last change on this file was 38949, checked in by anupama, 5 days ago

Like GsdlCollageApplet, JPhind when run as a commandline application or applet (a.o.t. webswing application/applet) also needs webswing-api.jar to be in the web/applet folder, since we started importing webswing packages (even if not run as a webswing program). 1. Adjusted the now unused service PhindPhraseBrowse's applet archive attribute to include this jar file in the list of those needed. 2. Added the jar mention in the comment to JPhind.java with a sample command on how to successfully launch JPhind from the commandline. 3. build.xml's compile-core target now tries to put this jar file into web/applet if it doesn't yet exist there. But this assumes that ext/webswing has already been setup at this stage, which I think is the case.

  • Property svn:keywords set to Author Date Id Revision
File size: 16.9 KB
Line 
1/*
2 * PhindServices.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21import org.greenstone.gsdl3.util.*;
22
23import org.greenstone.mgpp.*;
24import org.w3c.dom.Document;
25import org.w3c.dom.Node;
26import org.w3c.dom.Element;
27import org.w3c.dom.Text;
28
29import java.util.Vector;
30import java.util.HashMap;
31import java.io.File;
32import java.io.Serializable;
33
34import org.apache.log4j.*;
35
36/**
37 * PhindServices - the phind phrase browsing service
38 *
39 */
40public class PhindPhraseBrowse
41 extends ServiceRack {
42
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.PhindPhraseBrowse.class.getName());
44
45 // the services on offer
46 private static final String PHIND_SERVICE = "PhindApplet";
47
48 private static MGPPRetrieveWrapper mgpp_retrieve_src=null;
49 private static MGPPSearchWrapper mgpp_search_src=null;
50 private String basepath = null;
51
52 private Element applet_description = null;
53
54 public PhindPhraseBrowse() {
55 if(this.mgpp_retrieve_src == null) {
56 this.mgpp_retrieve_src = new MGPPRetrieveWrapper();
57 }
58 if(this.mgpp_search_src == null) {
59 this.mgpp_search_src = new MGPPSearchWrapper();
60 }
61 // set up the default params
62 this.mgpp_search_src.setQueryLevel("Document");
63 this.mgpp_search_src.setReturnLevel("Document");
64 this.mgpp_search_src.setMaxDocs(5);
65 this.mgpp_search_src.setStem(false);
66 this.mgpp_search_src.setCase(true);
67 }
68
69 public void cleanUp() {
70 super.cleanUp();
71 this.mgpp_search_src.unloadIndexData();
72 }
73
74 /** configure the service module
75 *
76 * @param info a DOM Element containing any config info for the service
77 * @return true if configured
78 */
79 public boolean configure(Element info, Element extra_info) {
80
81 if (!super.configure(info, extra_info)){
82 return false;
83 }
84
85 logger.info("configuring PhindPhraseBrowse");
86
87 // set up short_service_info_ - for now just has name and type
88 Element e = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
89 e.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_APPLET);
90 e.setAttribute(GSXML.NAME_ATT, PHIND_SERVICE);
91 this.short_service_info.appendChild(e);
92
93 // set up the static applet description
94
95 applet_description = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
96 applet_description.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_APPLET);
97 applet_description.setAttribute(GSXML.NAME_ATT, PHIND_SERVICE);
98
99 // add in the applet info for the phind applet
100 // need to make this dynamic - library names etc
101 // change the applet params - have a single param with the library name
102 // this is left blank at this end, and must be filled in by applet action - if the library name is not needed, this param is left out
103 // phindcgi param now is not complete - library must be prepended to it.
104 String app_info = "<"+GSXML.APPLET_ELEM+" CODEBASE='applet' CODE='org.greenstone.applet.phind.JPhind.class' ARCHIVE='phind.jar, xercesImpl.jar, xml-apis.jar, webswing-api.jar' WIDTH='500' HEIGHT='400'><PARAM NAME='library' VALUE=''/> <PARAM NAME='phindcgi' VALUE='?";
105 app_info += GSParams.ACTION +"=a&amp;"+GSParams.REQUEST_TYPE +"=r&amp;"+GSParams.SERVICE+"="+PHIND_SERVICE+"&amp;"+GSParams.OUTPUT+"=xml&amp;"+GSParams.RESPONSE_ONLY+"=1'/>";
106 app_info +="<PARAM NAME='collection' VALUE='";
107 app_info += this.cluster_name;
108 app_info += "'/> <PARAM NAME='classifier' VALUE='1'/> <PARAM NAME='orientation' VALUE='vertical'/> <PARAM NAME='depth' VALUE='2'/> <PARAM NAME='resultorder' VALUE='L,l,E,e,D,d'/> <PARAM NAME='backdrop' VALUE='interfaces/default/images/phindbg1.jpg'/><PARAM NAME='fontsize' VALUE='10'/> <PARAM NAME='blocksize' VALUE='10'/>The Phind java applet.</"+GSXML.APPLET_ELEM+">";
109
110 Document dom = this.converter.getDOM(app_info);
111 if (dom==null) {
112 logger.error("Couldn't parse applet info");
113 return false;
114 }
115 Element app_elem = dom.getDocumentElement();
116 applet_description.appendChild(this.desc_doc.importNode(app_elem, true));
117
118 return true;
119 }
120
121 protected Element getServiceDescription(Document doc, String service, String lang, String subset) {
122 if (!service.equals(PHIND_SERVICE)) {
123 return null;
124 }
125 Element describe = (Element)doc.importNode(applet_description,true);
126
127 Element el1 = GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_NAME, getTextString(PHIND_SERVICE+".name", lang));
128 describe.appendChild(el1);
129
130 Element el2 = GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(PHIND_SERVICE+".description", lang));
131 describe.appendChild(el2);
132
133 return describe;
134 }
135
136 protected Element processPhindApplet(Element request) {
137 Document result_doc = XMLConverter.newDOM();
138 Element param_elem = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
139 HashMap<String, Serializable> params = GSXML.extractParams(param_elem, false);
140
141 long first_e = Long.parseLong((String)params.get("pfe"));
142 long last_e = Long.parseLong((String)params.get("ple"));
143 long first_l = Long.parseLong((String)params.get("pfl"));
144 long last_l = Long.parseLong((String)params.get("pll"));
145 long first_d = Long.parseLong((String)params.get("pfd"));
146 long last_d = Long.parseLong((String)params.get("pld"));
147
148 long phrase;
149 String phrase_str = (String)params.get("ppnum");
150 if (phrase_str == null || phrase_str.equals("")) {
151 phrase=0;
152 } else {
153 phrase = Long.parseLong(phrase_str);
154 }
155 String word = (String)params.get("pptext");
156 String phind_index = (String)params.get("pc");
157 // the location of the mgpp database files
158 this.basepath = GSFile.phindBaseDir(this.site_home, this.cluster_name, phind_index);
159
160 // the result element
161 Element result = result_doc.createElement(GSXML.RESPONSE_ELEM);
162 result.setAttribute(GSXML.FROM_ATT, PHIND_SERVICE);
163 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
164
165 // applet result info must be in appletInfo element
166 Element applet_data = result_doc.createElement(GSXML.APPLET_DATA_ELEM);
167 result.appendChild(applet_data);
168 Element phind_data = result_doc.createElement("phindData");
169 applet_data.appendChild(phind_data);
170
171
172 // if we dont know the phrase number, look it up
173 if (phrase == 0) {
174 if (word==null || word.equals("")) {
175 Element error = phindError(result_doc, "no word or phrase");
176 phind_data.appendChild(error);
177 return result;
178 }
179 phrase = findPhraseNumberFromWord( word);
180 }
181 if (phrase==0) {
182 // the word is not in the collection
183 // return a phind error string
184 Element error = phindError(result_doc, "the term "+word+" is not in the collection");
185 phind_data.appendChild(error);
186 return result;
187 }
188
189 // get the phrase data into the phind_data node
190 getPhraseData(phind_data, phrase, first_l, last_l,
191 first_e, last_e, first_d, last_d);
192 return result;
193
194
195 }// processPhindApplet
196
197 protected long findPhraseNumberFromWord(String word) {
198 synchronized (mgpp_search_src) {
199 // set the mgpp index data - we are looking up pword
200 mgpp_search_src.loadIndexData(this.basepath+File.separatorChar+"pword");
201
202 mgpp_search_src.runQuery(word);
203
204 MGPPQueryResult res = mgpp_search_src.getQueryResult();
205 Vector docs = res.getDocs();
206 if (docs.size()==0) {
207 // phrase not found
208 return 0;
209 }
210 MGPPDocInfo doc = (MGPPDocInfo)docs.firstElement();
211 return doc.num_;
212 }
213 }
214
215 protected boolean getPhraseData(Element phind_data,
216 long phrase, long first_l, long last_l,
217 long first_e, long last_e, long first_d,
218 long last_d) {
219
220 synchronized (mgpp_retrieve_src) {
221 String record = this.mgpp_retrieve_src.getDocument(this.basepath+File.separatorChar+"pdata", "Document",
222 phrase);
223 if (record.equals("")) {
224 Element error = phindError(phind_data.getOwnerDocument(), "somethings gone wrong - we haven't got a record for phrase number "+phrase);
225 phind_data.appendChild(error);
226 return false;
227 }
228
229 // parse the record - its in gordons cryptic form
230 // ":word:tf:ef:df:el:dl:lf:ll"
231 // el: e,e,e
232 // dl: d;f,d;f,
233 // lf and ll may be null
234 // l: type,dest, dest; type,dest,dest
235
236 // ignore everything up to and including first colon (has
237 // <Document>3505: at the start)
238 record = record.substring(record.indexOf(':')+1);
239
240 // split on ':'
241 String [] fields = record.split(":");
242 String word = fields[0];
243 String tf = fields[1];
244 String ef = fields[2];
245 String df = fields[3];
246
247
248 String expansions = fields[4];
249 String documents = fields[5];
250 String lf = "0";
251 String linklist = "";
252 if (fields.length > 7) {// have thesaurus stuff
253 lf =fields[6];
254 linklist = fields[7];
255 }
256
257 // the phindData attributes and phrase
258 phind_data.setAttribute("id", Long.toString(phrase));
259 phind_data.setAttribute("df", df);
260 phind_data.setAttribute("ef", ef);
261 phind_data.setAttribute("lf", lf);
262 phind_data.setAttribute("tf", tf);
263 // GSXML.createTextElement(result_doc, "phrase", word); ??? - this needs to be appended somewhere????
264
265 addExpansionList(phind_data, expansions, word, ef, first_e, last_e);
266 addDocumentList(phind_data, documents, word, df, first_d, last_d);
267 if (!lf.equals("0")) {
268 addThesaurusList(phind_data, linklist, word, lf, first_l, last_l);
269 }
270 return true;
271 } // end of synchronized (mgpp_retrieve_src)
272 }
273
274 protected boolean addExpansionList( Element phind_data, String record,
275 String word,
276 String freq,
277 long first, long last) {
278 Document phind_doc = phind_data.getOwnerDocument();
279 Element expansion_list = phind_doc.createElement("expansionList");
280 phind_data.appendChild(expansion_list);
281 expansion_list.setAttribute("length", freq);
282 expansion_list.setAttribute("start", Long.toString(first));
283 expansion_list.setAttribute("end", Long.toString(last));
284
285 // get the list of strings
286 String [] expansions = record.split(",");
287 int length = expansions.length;
288 if (length < last) last = length;
289 for (long i = first; i < last; i++) {
290 long num = Long.parseLong(expansions[(int)i]);
291 Element expansion = getExpansion(phind_doc, num, word);
292 expansion.setAttribute("num", Long.toString(i));
293 expansion_list.appendChild(expansion);
294 }
295 return true;
296 }
297
298 protected Element getExpansion(Document phind_doc, long phrase_num,
299 String orig_phrase) {
300
301 // look up the phrase in the pdata thingy
302 String record = this.mgpp_retrieve_src.getDocument(this.basepath+File.separatorChar+"pdata", "Document",
303 phrase_num);
304
305 if (record ==null || record.equals("")) return null;
306
307 // ignore everything up to and including first colon
308 record = record.substring(record.indexOf(':')+1);
309
310 String [] fields = record.split(":");
311 String phrase = fields[0];
312 String tf = fields[1];
313 //String ef = fields[2]; dont use this
314 String df = fields[3];
315
316 Element expansion = phind_doc.createElement("expansion");
317 expansion.setAttribute("tf", tf);
318 expansion.setAttribute("df", df);
319 expansion.setAttribute("id", Long.toString(phrase_num));
320
321 // get teh suffix and prefix
322 String [] ends = splitPhraseOnWord(phrase, orig_phrase);
323 if (!ends[0].equals("")) {
324 expansion.appendChild(GSXML.createTextElement(phind_doc, "prefix", ends[0]));
325 }
326 if (!ends[1].equals("")) {
327 expansion.appendChild(GSXML.createTextElement(phind_doc, "suffix", ends[1]));
328 }
329
330 return expansion;
331
332 }
333
334 protected boolean addDocumentList(Element phind_data, String record,
335 String word,
336 String freq,
337 long first, long last) {
338 Document phind_doc = phind_data.getOwnerDocument();
339 Element document_list = phind_doc.createElement("documentList");
340 phind_data.appendChild(document_list);
341 document_list.setAttribute("length", freq);
342 document_list.setAttribute("start", Long.toString(first));
343 document_list.setAttribute("end", Long.toString(last));
344
345 // get the list of doc,freq
346 String [] doc_freqs = record.split(";");
347 int length = doc_freqs.length;
348 if (length<last) last=length;
349
350 for (long i = first; i < last; i++) {
351 String doc_elem = doc_freqs[(int)i];
352 int p = doc_elem.indexOf(',');
353 long doc_num;
354 String doc_freq;
355 if (p == -1) { // there is no freq in the record
356 doc_num =Long.parseLong(doc_elem);
357 doc_freq = "1";
358 } else {
359 doc_num = Long.parseLong(doc_elem.substring(0,p));
360 doc_freq = doc_elem.substring(p+1);
361 }
362 Element document = getDocument(phind_doc, doc_num);
363 document.setAttribute("freq", doc_freq);
364 document.setAttribute("num", Long.toString(i));
365 document_list.appendChild(document);
366 }
367
368
369 return true;
370 }
371
372
373 protected Element getDocument(Document phind_doc, long doc_num) {
374
375 // look up the phrase in the docs thingy
376 String record = this.mgpp_retrieve_src.getDocument(this.basepath+File.separatorChar+"docs", "Document",
377 doc_num);
378
379 if (record ==null || record.equals("")) return null;
380
381 // ignore everything up to and including first \t
382 record = record.substring(record.indexOf('\t')+1);
383
384 String [] fields = record.split("\t");
385 String hash = fields[0];
386 String title = fields[1];
387
388 Element d = phind_doc.createElement("document");
389 d.setAttribute("hash", hash);
390 d.appendChild(GSXML.createTextElement(phind_doc, "title", title));
391
392 return d;
393
394 }
395 protected boolean addThesaurusList(Element phind_data, String record,
396 String word,
397 String freq,
398 long first, long last) {
399
400 Document phind_doc = phind_data.getOwnerDocument();
401 Element thesaurus_list = phind_doc.createElement("thesaurusList");
402 phind_data.appendChild(thesaurus_list);
403 thesaurus_list.setAttribute("length", freq);
404 thesaurus_list.setAttribute("start", Long.toString(first));
405 thesaurus_list.setAttribute("end", Long.toString(last));
406
407 // get the list of type,dest,dest
408 String [] links = record.split(";");
409 int length = links.length;
410 long index = 0;
411 for (int i = 0; i < length; i++) { // go through the entries
412 String link_info = links[(int)i];
413 String [] items = link_info.split(",");
414 // the first entry is teh type
415 String type = items[0];
416 for (int j = 1; j<items.length; j++, index++) {
417 if (index >= first && index < last) { // only output the ones we want
418 long phrase = Long.parseLong(items[j]);
419 Element t = getThesaurus(phind_doc, phrase);
420 t.setAttribute("type", type);
421 thesaurus_list.appendChild(t);
422 }
423 }
424 }
425
426 return true;
427 }
428
429 protected Element getThesaurus(Document phind_doc, long phrase_num) {
430
431 // look up the phrase in the pdata thingy
432 String record = this.mgpp_retrieve_src.getDocument(this.basepath+File.separatorChar+"pdata", "Document",
433 phrase_num);
434
435 if (record ==null || record.equals("")) return null;
436
437 // ignore everything up to and including first colon
438 record = record.substring(record.indexOf(':')+1);
439
440 String [] fields = record.split(":");
441 String phrase = fields[0];
442 String tf = fields[1];
443 //String ef = fields[2]; dont use this
444 String df = fields[3];
445
446 Element thesaurus = phind_doc.createElement("thesaurus");
447 thesaurus.setAttribute("tf", tf);
448 thesaurus.setAttribute("df", df);
449 thesaurus.setAttribute("id", Long.toString(phrase_num));
450 thesaurus.appendChild(GSXML.createTextElement(phind_doc, "phrase", phrase));
451 return thesaurus;
452
453 }
454
455 /** returns an array of two elements - the prefix and the suffix*/
456 protected String [] splitPhraseOnWord(String phrase, String word) {
457
458 if (word.equals("")) {
459
460 String [] res = {phrase, ""};
461 return res;
462 }
463 // use 2 so that we only split on the first occurrance. trailing empty strings should be included
464 String [] result = phrase.split(word, 2);
465 return result;
466
467 }
468
469 protected Element phindError(Document phind_doc, String message) {
470 Element e = phind_doc.createElement("phindError");
471 Text t = phind_doc.createTextNode(message);
472 e.appendChild(t);
473 return e;
474 }
475
476}
477
Note: See TracBrowser for help on using the repository browser.