Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

PhindPhraseBrowse.java

Last change on this file was 38949, checked in by anupama, 5 days ago
Like GsdlCollageApplet, JPhind when run as a commandline application or applet (a.o.t. webswing application/applet) also needs webswing-api.jar to be in the web/applet folder, since we started importing webswing packages (even if not run as a webswing program). 1. Adjusted the now unused service PhindPhraseBrowse's applet archive attribute to include this jar file in the list of those needed. 2. Added the jar mention in the comment to JPhind.java with a sample command on how to successfully launch JPhind from the commandline. 3. build.xml's compile-core target now tries to put this jar file into web/applet if it doesn't yet exist there. But this assumes that ext/webswing has already been setup at this stage, which I think is the case.
Property svn:keywords set to `Author Date Id Revision`
File size: 16.9 KB

Line
1	/*
2	* PhindServices.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.service;
20
21	import org.greenstone.gsdl3.util.*;
22
23	import org.greenstone.mgpp.*;
24	import org.w3c.dom.Document;
25	import org.w3c.dom.Node;
26	import org.w3c.dom.Element;
27	import org.w3c.dom.Text;
28
29	import java.util.Vector;
30	import java.util.HashMap;
31	import java.io.File;
32	import java.io.Serializable;
33
34	import org.apache.log4j.*;
35
36	/**
37	* PhindServices - the phind phrase browsing service
38	*
39	*/
40	public class PhindPhraseBrowse
41	extends ServiceRack {
42
43	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.PhindPhraseBrowse.class.getName());
44
45	// the services on offer
46	private static final String PHIND_SERVICE = "PhindApplet";
47
48	private static MGPPRetrieveWrapper mgpp_retrieve_src=null;
49	private static MGPPSearchWrapper mgpp_search_src=null;
50	private String basepath = null;
51
52	private Element applet_description = null;
53
54	public PhindPhraseBrowse() {
55	if(this.mgpp_retrieve_src == null) {
56	this.mgpp_retrieve_src = new MGPPRetrieveWrapper();
57	}
58	if(this.mgpp_search_src == null) {
59	this.mgpp_search_src = new MGPPSearchWrapper();
60	}
61	// set up the default params
62	this.mgpp_search_src.setQueryLevel("Document");
63	this.mgpp_search_src.setReturnLevel("Document");
64	this.mgpp_search_src.setMaxDocs(5);
65	this.mgpp_search_src.setStem(false);
66	this.mgpp_search_src.setCase(true);
67	}
68
69	public void cleanUp() {
70	super.cleanUp();
71	this.mgpp_search_src.unloadIndexData();
72	}
73
74	/** configure the service module
75	*
76	* @param info a DOM Element containing any config info for the service
77	* @return true if configured
78	*/
79	public boolean configure(Element info, Element extra_info) {
80
81	if (!super.configure(info, extra_info)){
82	return false;
83	}
84
85	logger.info("configuring PhindPhraseBrowse");
86
87	// set up short_service_info_ - for now just has name and type
88	Element e = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
89	e.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_APPLET);
90	e.setAttribute(GSXML.NAME_ATT, PHIND_SERVICE);
91	this.short_service_info.appendChild(e);
92
93	// set up the static applet description
94
95	applet_description = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
96	applet_description.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_APPLET);
97	applet_description.setAttribute(GSXML.NAME_ATT, PHIND_SERVICE);
98
99	// add in the applet info for the phind applet
100	// need to make this dynamic - library names etc
101	// change the applet params - have a single param with the library name
102	// this is left blank at this end, and must be filled in by applet action - if the library name is not needed, this param is left out
103	// phindcgi param now is not complete - library must be prepended to it.
104	String app_info = "<"+GSXML.APPLET_ELEM+" CODEBASE='applet' CODE='org.greenstone.applet.phind.JPhind.class' ARCHIVE='phind.jar, xercesImpl.jar, xml-apis.jar, webswing-api.jar' WIDTH='500' HEIGHT='400'><PARAM NAME='library' VALUE=''/> <PARAM NAME='phindcgi' VALUE='?";
105	app_info += GSParams.ACTION +"=a&"+GSParams.REQUEST_TYPE +"=r&"+GSParams.SERVICE+"="+PHIND_SERVICE+"&"+GSParams.OUTPUT+"=xml&"+GSParams.RESPONSE_ONLY+"=1'/>";
106	app_info +="<PARAM NAME='collection' VALUE='";
107	app_info += this.cluster_name;
108	app_info += "'/> <PARAM NAME='classifier' VALUE='1'/> <PARAM NAME='orientation' VALUE='vertical'/> <PARAM NAME='depth' VALUE='2'/> <PARAM NAME='resultorder' VALUE='L,l,E,e,D,d'/> <PARAM NAME='backdrop' VALUE='interfaces/default/images/phindbg1.jpg'/><PARAM NAME='fontsize' VALUE='10'/> <PARAM NAME='blocksize' VALUE='10'/>The Phind java applet.</"+GSXML.APPLET_ELEM+">";
109
110	Document dom = this.converter.getDOM(app_info);
111	if (dom==null) {
112	logger.error("Couldn't parse applet info");
113	return false;
114	}
115	Element app_elem = dom.getDocumentElement();
116	applet_description.appendChild(this.desc_doc.importNode(app_elem, true));
117
118	return true;
119	}
120
121	protected Element getServiceDescription(Document doc, String service, String lang, String subset) {
122	if (!service.equals(PHIND_SERVICE)) {
123	return null;
124	}
125	Element describe = (Element)doc.importNode(applet_description,true);
126
127	Element el1 = GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_NAME, getTextString(PHIND_SERVICE+".name", lang));
128	describe.appendChild(el1);
129
130	Element el2 = GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(PHIND_SERVICE+".description", lang));
131	describe.appendChild(el2);
132
133	return describe;
134	}
135
136	protected Element processPhindApplet(Element request) {
137	Document result_doc = XMLConverter.newDOM();
138	Element param_elem = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
139	HashMap<String, Serializable> params = GSXML.extractParams(param_elem, false);
140
141	long first_e = Long.parseLong((String)params.get("pfe"));
142	long last_e = Long.parseLong((String)params.get("ple"));
143	long first_l = Long.parseLong((String)params.get("pfl"));
144	long last_l = Long.parseLong((String)params.get("pll"));
145	long first_d = Long.parseLong((String)params.get("pfd"));
146	long last_d = Long.parseLong((String)params.get("pld"));
147
148	long phrase;
149	String phrase_str = (String)params.get("ppnum");
150	if (phrase_str == null \|\| phrase_str.equals("")) {
151	phrase=0;
152	} else {
153	phrase = Long.parseLong(phrase_str);
154	}
155	String word = (String)params.get("pptext");
156	String phind_index = (String)params.get("pc");
157	// the location of the mgpp database files
158	this.basepath = GSFile.phindBaseDir(this.site_home, this.cluster_name, phind_index);
159
160	// the result element
161	Element result = result_doc.createElement(GSXML.RESPONSE_ELEM);
162	result.setAttribute(GSXML.FROM_ATT, PHIND_SERVICE);
163	result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
164
165	// applet result info must be in appletInfo element
166	Element applet_data = result_doc.createElement(GSXML.APPLET_DATA_ELEM);
167	result.appendChild(applet_data);
168	Element phind_data = result_doc.createElement("phindData");
169	applet_data.appendChild(phind_data);
170
171
172	// if we dont know the phrase number, look it up
173	if (phrase == 0) {
174	if (word==null \|\| word.equals("")) {
175	Element error = phindError(result_doc, "no word or phrase");
176	phind_data.appendChild(error);
177	return result;
178	}
179	phrase = findPhraseNumberFromWord( word);
180	}
181	if (phrase==0) {
182	// the word is not in the collection
183	// return a phind error string
184	Element error = phindError(result_doc, "the term "+word+" is not in the collection");
185	phind_data.appendChild(error);
186	return result;
187	}
188
189	// get the phrase data into the phind_data node
190	getPhraseData(phind_data, phrase, first_l, last_l,
191	first_e, last_e, first_d, last_d);
192	return result;
193
194
195	}// processPhindApplet
196
197	protected long findPhraseNumberFromWord(String word) {
198	synchronized (mgpp_search_src) {
199	// set the mgpp index data - we are looking up pword
200	mgpp_search_src.loadIndexData(this.basepath+File.separatorChar+"pword");
201
202	mgpp_search_src.runQuery(word);
203
204	MGPPQueryResult res = mgpp_search_src.getQueryResult();
205	Vector docs = res.getDocs();
206	if (docs.size()==0) {
207	// phrase not found
208	return 0;
209	}
210	MGPPDocInfo doc = (MGPPDocInfo)docs.firstElement();
211	return doc.num_;
212	}
213	}
214
215	protected boolean getPhraseData(Element phind_data,
216	long phrase, long first_l, long last_l,
217	long first_e, long last_e, long first_d,
218	long last_d) {
219
220	synchronized (mgpp_retrieve_src) {
221	String record = this.mgpp_retrieve_src.getDocument(this.basepath+File.separatorChar+"pdata", "Document",
222	phrase);
223	if (record.equals("")) {
224	Element error = phindError(phind_data.getOwnerDocument(), "somethings gone wrong - we haven't got a record for phrase number "+phrase);
225	phind_data.appendChild(error);
226	return false;
227	}
228
229	// parse the record - its in gordons cryptic form
230	// ":word:tf:ef:df:el:dl:lf:ll"
231	// el: e,e,e
232	// dl: d;f,d;f,
233	// lf and ll may be null
234	// l: type,dest, dest; type,dest,dest
235
236	// ignore everything up to and including first colon (has
237	// <Document>3505: at the start)
238	record = record.substring(record.indexOf(':')+1);
239
240	// split on ':'
241	String [] fields = record.split(":");
242	String word = fields[0];
243	String tf = fields[1];
244	String ef = fields[2];
245	String df = fields[3];
246
247
248	String expansions = fields[4];
249	String documents = fields[5];
250	String lf = "0";
251	String linklist = "";
252	if (fields.length > 7) {// have thesaurus stuff
253	lf =fields[6];
254	linklist = fields[7];
255	}
256
257	// the phindData attributes and phrase
258	phind_data.setAttribute("id", Long.toString(phrase));
259	phind_data.setAttribute("df", df);
260	phind_data.setAttribute("ef", ef);
261	phind_data.setAttribute("lf", lf);
262	phind_data.setAttribute("tf", tf);
263	// GSXML.createTextElement(result_doc, "phrase", word); ??? - this needs to be appended somewhere????
264
265	addExpansionList(phind_data, expansions, word, ef, first_e, last_e);
266	addDocumentList(phind_data, documents, word, df, first_d, last_d);
267	if (!lf.equals("0")) {
268	addThesaurusList(phind_data, linklist, word, lf, first_l, last_l);
269	}
270	return true;
271	} // end of synchronized (mgpp_retrieve_src)
272	}
273
274	protected boolean addExpansionList( Element phind_data, String record,
275	String word,
276	String freq,
277	long first, long last) {
278	Document phind_doc = phind_data.getOwnerDocument();
279	Element expansion_list = phind_doc.createElement("expansionList");
280	phind_data.appendChild(expansion_list);
281	expansion_list.setAttribute("length", freq);
282	expansion_list.setAttribute("start", Long.toString(first));
283	expansion_list.setAttribute("end", Long.toString(last));
284
285	// get the list of strings
286	String [] expansions = record.split(",");
287	int length = expansions.length;
288	if (length < last) last = length;
289	for (long i = first; i < last; i++) {
290	long num = Long.parseLong(expansions[(int)i]);
291	Element expansion = getExpansion(phind_doc, num, word);
292	expansion.setAttribute("num", Long.toString(i));
293	expansion_list.appendChild(expansion);
294	}
295	return true;
296	}
297
298	protected Element getExpansion(Document phind_doc, long phrase_num,
299	String orig_phrase) {
300
301	// look up the phrase in the pdata thingy
302	String record = this.mgpp_retrieve_src.getDocument(this.basepath+File.separatorChar+"pdata", "Document",
303	phrase_num);
304
305	if (record ==null \|\| record.equals("")) return null;
306
307	// ignore everything up to and including first colon
308	record = record.substring(record.indexOf(':')+1);
309
310	String [] fields = record.split(":");
311	String phrase = fields[0];
312	String tf = fields[1];
313	//String ef = fields[2]; dont use this
314	String df = fields[3];
315
316	Element expansion = phind_doc.createElement("expansion");
317	expansion.setAttribute("tf", tf);
318	expansion.setAttribute("df", df);
319	expansion.setAttribute("id", Long.toString(phrase_num));
320
321	// get teh suffix and prefix
322	String [] ends = splitPhraseOnWord(phrase, orig_phrase);
323	if (!ends[0].equals("")) {
324	expansion.appendChild(GSXML.createTextElement(phind_doc, "prefix", ends[0]));
325	}
326	if (!ends[1].equals("")) {
327	expansion.appendChild(GSXML.createTextElement(phind_doc, "suffix", ends[1]));
328	}
329
330	return expansion;
331
332	}
333
334	protected boolean addDocumentList(Element phind_data, String record,
335	String word,
336	String freq,
337	long first, long last) {
338	Document phind_doc = phind_data.getOwnerDocument();
339	Element document_list = phind_doc.createElement("documentList");
340	phind_data.appendChild(document_list);
341	document_list.setAttribute("length", freq);
342	document_list.setAttribute("start", Long.toString(first));
343	document_list.setAttribute("end", Long.toString(last));
344
345	// get the list of doc,freq
346	String [] doc_freqs = record.split(";");
347	int length = doc_freqs.length;
348	if (length<last) last=length;
349
350	for (long i = first; i < last; i++) {
351	String doc_elem = doc_freqs[(int)i];
352	int p = doc_elem.indexOf(',');
353	long doc_num;
354	String doc_freq;
355	if (p == -1) { // there is no freq in the record
356	doc_num =Long.parseLong(doc_elem);
357	doc_freq = "1";
358	} else {
359	doc_num = Long.parseLong(doc_elem.substring(0,p));
360	doc_freq = doc_elem.substring(p+1);
361	}
362	Element document = getDocument(phind_doc, doc_num);
363	document.setAttribute("freq", doc_freq);
364	document.setAttribute("num", Long.toString(i));
365	document_list.appendChild(document);
366	}
367
368
369	return true;
370	}
371
372
373	protected Element getDocument(Document phind_doc, long doc_num) {
374
375	// look up the phrase in the docs thingy
376	String record = this.mgpp_retrieve_src.getDocument(this.basepath+File.separatorChar+"docs", "Document",
377	doc_num);
378
379	if (record ==null \|\| record.equals("")) return null;
380
381	// ignore everything up to and including first \t
382	record = record.substring(record.indexOf('\t')+1);
383
384	String [] fields = record.split("\t");
385	String hash = fields[0];
386	String title = fields[1];
387
388	Element d = phind_doc.createElement("document");
389	d.setAttribute("hash", hash);
390	d.appendChild(GSXML.createTextElement(phind_doc, "title", title));
391
392	return d;
393
394	}
395	protected boolean addThesaurusList(Element phind_data, String record,
396	String word,
397	String freq,
398	long first, long last) {
399
400	Document phind_doc = phind_data.getOwnerDocument();
401	Element thesaurus_list = phind_doc.createElement("thesaurusList");
402	phind_data.appendChild(thesaurus_list);
403	thesaurus_list.setAttribute("length", freq);
404	thesaurus_list.setAttribute("start", Long.toString(first));
405	thesaurus_list.setAttribute("end", Long.toString(last));
406
407	// get the list of type,dest,dest
408	String [] links = record.split(";");
409	int length = links.length;
410	long index = 0;
411	for (int i = 0; i < length; i++) { // go through the entries
412	String link_info = links[(int)i];
413	String [] items = link_info.split(",");
414	// the first entry is teh type
415	String type = items[0];
416	for (int j = 1; j<items.length; j++, index++) {
417	if (index >= first && index < last) { // only output the ones we want
418	long phrase = Long.parseLong(items[j]);
419	Element t = getThesaurus(phind_doc, phrase);
420	t.setAttribute("type", type);
421	thesaurus_list.appendChild(t);
422	}
423	}
424	}
425
426	return true;
427	}
428
429	protected Element getThesaurus(Document phind_doc, long phrase_num) {
430
431	// look up the phrase in the pdata thingy
432	String record = this.mgpp_retrieve_src.getDocument(this.basepath+File.separatorChar+"pdata", "Document",
433	phrase_num);
434
435	if (record ==null \|\| record.equals("")) return null;
436
437	// ignore everything up to and including first colon
438	record = record.substring(record.indexOf(':')+1);
439
440	String [] fields = record.split(":");
441	String phrase = fields[0];
442	String tf = fields[1];
443	//String ef = fields[2]; dont use this
444	String df = fields[3];
445
446	Element thesaurus = phind_doc.createElement("thesaurus");
447	thesaurus.setAttribute("tf", tf);
448	thesaurus.setAttribute("df", df);
449	thesaurus.setAttribute("id", Long.toString(phrase_num));
450	thesaurus.appendChild(GSXML.createTextElement(phind_doc, "phrase", phrase));
451	return thesaurus;
452
453	}
454
455	/** returns an array of two elements - the prefix and the suffix*/
456	protected String [] splitPhraseOnWord(String phrase, String word) {
457
458	if (word.equals("")) {
459
460	String [] res = {phrase, ""};
461	return res;
462	}
463	// use 2 so that we only split on the first occurrance. trailing empty strings should be included
464	String [] result = phrase.split(word, 2);
465	return result;
466
467	}
468
469	protected Element phindError(Document phind_doc, String message) {
470	Element e = phind_doc.createElement("phindError");
471	Text t = phind_doc.createTextNode(message);
472	e.appendChild(t);
473	return e;
474	}
475
476	}
477

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/PhindPhraseBrowse.java

Download in other formats: