Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 4980

Last change on this file since 4980 was 4875, checked in by kjdon, 21 years ago
the goto form uses gp arg - this is the gotopage arg. if present we modify the document_name using the ss extension: docnum.pagenum.ss
Property svn:keywords set to `Author Date Id Revision`
File size: 23.3 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.HashMap;
34	import java.util.HashSet;
35	import java.io.File;
36
37
38	/** Action class for retrieving Documents via the message router
39	*/
40	public class DocumentAction extends Action {
41
42	// this is used to specify that the sibling nodes of a selected one should be obtained
43	public static final String SIBLING_ARG = "sib";
44	public static final String DOC_TYPE_ARG = "dt";
45	public static final String GOTO_PAGE_ARG = "gp";
46
47	/** if this is set to true, when a document is displayed, any annotation
48	* type services (enrich) will be offered to the user as well */
49	protected static final boolean provide_annotations = false; //true;
50
51	public Element process (Element message)
52	{
53	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
54
55	// the response
56	Element result = doc_.createElement(GSXML.MESSAGE_ELEM);
57	Element page_response = doc_.createElement(GSXML.RESPONSE_ELEM);
58	result.appendChild(page_response);
59
60	// get the request - assume only one
61	Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
62	Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
63	HashMap params = GSXML.extractParams(cgi_paramList, false);
64
65	String collection = (String) params.get(GSCGI.COLLECTION_ARG);
66	String lang = request.getAttribute(GSXML.LANG_ATT);
67	String document_name = (String) params.get(GSCGI.DOCUMENT_ARG);
68	if (document_name == null \|\| document_name.equals("")) {
69	System.err.println("DocumentAction Error: no document specified!");
70	return result;
71	}
72	String document_type = (String) params.get(DOC_TYPE_ARG);
73	if (document_type == null) {
74	document_type = "simple";
75	}
76	//whether to retrieve siblings or not
77	boolean get_siblings = false;
78	String sibs = (String) params.get(SIBLING_ARG);
79	if (sibs != null && sibs.equals("1")) {
80	get_siblings = true;
81	}
82
83	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
84	if (sibling_num != null && !sibling_num.equals("")) {
85	// we have to modify the doc name
86	document_name = document_name+"."+sibling_num+".ss";
87	}
88	// get the additional data needed for the page
89	getBackgroundData(page_response, collection, lang);
90	Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
91
92	// the_document is where all the doc info - structure and metadata etc
93	// is added into, to be returned in the page
94	Element the_document = doc_.createElement(GSXML.DOCUMENT_ELEM);
95	page_response.appendChild(the_document);
96
97	// set the doctype from the cgi arg as an attribute
98	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
99
100	// create a basic doc list containing the current node
101	Element basic_doc_list = doc_.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
102	Element current_doc = doc_.createElement(GSXML.DOC_NODE_ELEM);
103	basic_doc_list.appendChild(current_doc);
104	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
105
106	// Create a parameter list to specify the required structure information
107	Element ds_param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
108
109	Element ds_param = null;
110	boolean get_structure = false;
111	boolean get_structure_info = false;
112	if (document_type.equals("paged")) {
113	get_structure_info = true;
114	// get teh info needed for paged naviagtion
115	ds_param = doc_.createElement(GSXML.PARAM_ELEM);
116	ds_param_list.appendChild(ds_param);
117	ds_param.setAttribute(GSXML.NAME_ATT, "info");
118	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
119	ds_param = doc_.createElement(GSXML.PARAM_ELEM);
120	ds_param_list.appendChild(ds_param);
121	ds_param.setAttribute(GSXML.NAME_ATT, "info");
122	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
123	ds_param = doc_.createElement(GSXML.PARAM_ELEM);
124	ds_param_list.appendChild(ds_param);
125	ds_param.setAttribute(GSXML.NAME_ATT, "info");
126	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
127
128	} else if (document_type.equals("hierarchy")){
129	get_structure = true;
130	// get the info needed for table of contents
131	ds_param = doc_.createElement(GSXML.PARAM_ELEM);
132	ds_param_list.appendChild(ds_param);
133	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
134	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
135	ds_param = doc_.createElement(GSXML.PARAM_ELEM);
136	ds_param_list.appendChild(ds_param);
137	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
138	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
139	if (get_siblings) {
140	ds_param = doc_.createElement(GSXML.PARAM_ELEM);
141	ds_param_list.appendChild(ds_param);
142	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
143	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
144	}
145	} else {
146	// we dont need any structure
147	}
148
149	boolean has_dummy = false;
150	if (get_structure \|\| get_structure_info) {
151
152	// Build a request to obtain the document structure
153	Element ds_message = doc_.createElement(GSXML.MESSAGE_ELEM);
154	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
155	Element ds_request = GSXML.createBasicRequest(doc_,GSXML.REQUEST_TYPE_PROCESS, to, lang);
156	ds_message.appendChild(ds_request);
157	ds_request.appendChild(ds_param_list);
158
159	// create a doc_node_list and put in the doc_node that we are interested in
160	ds_request.appendChild(basic_doc_list);
161
162	// Process the document structure retrieve message
163	Element ds_response_message = (Element) mr_.process(ds_message);
164
165	// get the info and print out
166	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
167	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
168	path = GSPath.appendLink(path, "nodeStructureInfo");
169	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
170	// get the doc_node bit
171	if (ds_response_struct_info != null) {
172	the_document.appendChild(doc_.importNode(ds_response_struct_info, true));
173	}
174	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
175	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
176	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
177	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
178
179	if (ds_response_structure != null) {
180	// add the contents of the structure bit into the_document
181	NodeList structs = ds_response_structure.getChildNodes();
182	for (int i=0; i<structs.getLength();i++) {
183	the_document.appendChild(doc_.importNode(structs.item(i), true));
184	}
185	} else {
186	// no structure nodes, so put in a dummy doc node
187	Element doc_node = doc_.createElement(GSXML.DOC_NODE_ELEM);
188	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
189	the_document.appendChild(doc_node);
190	has_dummy = true;
191	}
192	} else { // a simple type - we dont have a dummy node for simple
193	// should think about this more
194	// no structure request, so just put in a dummy doc node
195	Element doc_node = doc_.createElement(GSXML.DOC_NODE_ELEM);
196	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
197	the_document.appendChild(doc_node);
198	has_dummy = true;
199	}
200
201	// Build a request to obtain some document metadata
202	Element dm_message = doc_.createElement(GSXML.MESSAGE_ELEM);
203	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
204	Element dm_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_PROCESS, to, lang);
205	dm_message.appendChild(dm_request);
206	// Create a parameter list to specify the required metadata information
207
208	HashSet meta_names = new HashSet();
209	meta_names.add("Title"); // the default
210	if (format_elem != null) {
211	extractMetadataNames(format_elem, meta_names);
212	}
213
214	Element dm_param_list = createMetadataParamList(meta_names);
215	dm_request.appendChild(dm_param_list);
216
217	// create the doc node list for the metadata request
218	Element dm_doc_list = doc_.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
219	dm_request.appendChild(dm_doc_list);
220
221	// Add each node from the structure response into the metadata request
222	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
223	for (int i = 0; i < doc_nodes.getLength(); i++) {
224	Element doc_node = (Element) doc_nodes.item(i);
225	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
226
227	// Add the documentNode to the list
228	Element dm_doc_node = doc_.createElement(GSXML.DOC_NODE_ELEM);
229	dm_doc_list.appendChild(dm_doc_node);
230	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
231	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
232	doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
233	}
234
235	// we also want a metadata request to the top level document to get
236	// assocfilepath - this could be cached too
237	Element doc_meta_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_PROCESS, to, lang);
238	dm_message.appendChild(doc_meta_request);
239	Element doc_meta_param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
240	doc_meta_request.appendChild(doc_meta_param_list);
241	Element doc_param = doc_.createElement(GSXML.PARAM_ELEM);
242	doc_meta_param_list.appendChild(doc_param);
243	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
244	doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
245
246	// create the doc node list for the metadata request
247	Element doc_list = doc_.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
248	doc_meta_request.appendChild(doc_list);
249
250	Element doc_node = doc_.createElement(GSXML.DOC_NODE_ELEM);
251	// teh node we want is the root document node
252	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
253	doc_list.appendChild(doc_node);
254	Element dm_response_message = (Element) mr_.process(dm_message);
255
256	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
257	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
258
259	// Merge the metadata with the structure information
260	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
261	for (int i = 0; i < doc_nodes.getLength(); i++) {
262	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
263	}
264	// get teh top level doc metadata out
265	Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
266	Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
267	the_document.appendChild(doc_.importNode(doc_meta_list, true));
268
269	// Build a request to obtain some document content
270	Element dc_message = doc_.createElement(GSXML.MESSAGE_ELEM);
271	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
272	Element dc_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_PROCESS, to, lang);
273	dc_message.appendChild(dc_request);
274
275
276	// Create a parameter list to specify the request parameters - empty for now
277	Element dc_param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
278	dc_request.appendChild(dc_param_list);
279
280	// the doc list for the content request is the same as the one for the structure request
281	dc_request.appendChild(basic_doc_list);
282
283	Element dc_response_message = (Element) mr_.process(dc_message);
284	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
285	Element dc_response_doc = (Element) GSXML.getNodeByPath(dc_response_message, path);
286	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
287
288
289	boolean highlight_query_terms = true;
290	if (highlight_query_terms) {
291	dc_response_doc.removeChild(dc_response_doc_content);
292
293	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
294	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
295	}
296	if (provide_annotations) {
297	// now we can modifiy the response doc if needed
298	String enrich_service = (String)params.get(GSCGI.SERVICE_ARG);
299	if (enrich_service != null && !enrich_service.equals("")) {
300	// send a message to the service
301	Element enrich_message = doc_.createElement(GSXML.MESSAGE_ELEM);
302	Element enrich_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang);
303	enrich_message.appendChild(enrich_request);
304	Element enrich_pl = getServiceParamList(cgi_paramList);
305	enrich_request.appendChild(enrich_pl);
306	Element e_doc_list = doc_.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
307	enrich_request.appendChild(e_doc_list);
308	e_doc_list.appendChild(doc_.importNode(dc_response_doc, true));
309
310	Element enrich_response = mr_.process(enrich_message);
311
312	String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
313	path = GSPath.createPath(links);
314	dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
315
316	}
317	}
318	// use the returned id rather than the sent one cos there may have
319	// been modifiers such as .pr that are removed.
320	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
321	the_document.setAttribute("selectedNode", modified_doc_id);
322	if (has_dummy) {
323	// change the id if necessary and add the content
324	Element dummy_node = (Element)doc_nodes.item(0);
325
326	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
327	dummy_node.appendChild(doc_.importNode(dc_response_doc_content, true));
328	// hack for simple type
329	if (document_type.equals("simple")) {
330	// we dont want the internal docNode, just want the content and metadata in the document
331	// rethink this!!
332	the_document.removeChild(dummy_node);
333
334	NodeList dummy_children = dummy_node.getChildNodes();
335	//for (int i=0; i<dummy_children.getLength(); i++) {
336	for (int i=dummy_children.getLength()-1; i>=0; i--) {
337	the_document.appendChild(dummy_children.item(i));
338
339	}
340	}
341	} else {
342	// Merge the document content with the metadata and structure information
343	for (int i = 0; i < doc_nodes.getLength(); i++) {
344	Node dn = doc_nodes.item(i);
345	String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
346	if (dn_id.equals(modified_doc_id)) {
347	dn.appendChild(doc_.importNode(dc_response_doc_content, true));
348	break;
349	}
350	}
351	}
352
353	///ystem.out.println("(DocumentAction) Page:\n" + converter_.getPrettyString(result));
354	return result;
355	}
356
357	protected Element getServiceParamList(Element cgi_param_list) {
358
359	Element new_param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
360	Element param;
361	NodeList cgi_params = cgi_param_list.getChildNodes();
362	for (int i=0; i<cgi_params.getLength(); i++) {
363	Element p = (Element) cgi_params.item(i);
364	String name = p.getAttribute(GSXML.NAME_ATT);
365	if (name.equals(GSCGI.SERVICE_ARG) \|\| name.equals(GSCGI.REQUEST_TYPE_ARG) \|\| name.equals(GSCGI.CLUSTER_ARG) \|\| name.equals(GSCGI.DOCUMENT_ARG) \|\| name.equals(SIBLING_ARG) ) {
366	continue;
367	}
368	// esle add it in to the list
369	new_param_list.appendChild(doc_.importNode(p, true));
370	}
371	return new_param_list;
372	}
373
374	/** this method gets the collection description, the format info, the
375	* list of enrich services, etc - stuff that is needed for the page,
376	* but is the same whatever the query is - should be cached */
377	protected boolean getBackgroundData(Element page_response,
378	String collection, String lang) {
379
380	// create a message to process - contains requests for the collection
381	// description, the format element, the enrich services on offer
382	// these could all be cached
383	Element info_message = doc_.createElement(GSXML.MESSAGE_ELEM);
384	String path = GSPath.appendLink(collection, "DocumentMetadataRetrieve");
385	// the format request - ignore for now, where does this request go to??
386	Element format_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_FORMAT, path, lang);
387	info_message.appendChild(format_request);
388
389	// the enrich_services request - only do this if provide_annotations is true
390
391	if (provide_annotations) {
392	Element enrich_services_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_DESCRIBE, "", lang);
393	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
394	info_message.appendChild(enrich_services_request);
395	}
396
397	Element info_response = (Element)mr_.process(info_message);
398
399	// the collection is the first response
400	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
401	Element format_resp = (Element) responses.item(0);
402
403	Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
404	if (format_elem != null) {
405	///ystem.out.println("doc action found a format statement");
406	// set teh format type
407	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
408	page_response.appendChild(doc_.importNode(format_elem, true));
409	}
410
411	if (provide_annotations) {
412	Element services_resp = (Element)responses.item(1);
413
414	// a new message for the mr
415	Element enrich_message = doc_.createElement(GSXML.MESSAGE_ELEM);
416
417	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
418	boolean service_found = false;
419	for (int j=0; j<e_services.getLength(); j++) {
420	if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
421	Element s = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang);
422	enrich_message.appendChild(s);
423	service_found = true;
424	}
425	}
426	if (service_found) {
427	Element enrich_response = mr_.process(enrich_message);
428
429	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
430	Element service_list = doc_.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
431	for (int i=0; i<e_responses.getLength(); i++) {
432	Element e_resp = (Element)e_responses.item(i);
433	Element e_service = (Element)doc_.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
434	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
435	service_list.appendChild(e_service);
436	}
437	page_response.appendChild(service_list);
438	}
439	} // if provide_annotations
440	return true;
441
442	}
443
444	/** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
445	*/
446	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
447
448	// do the query again to get term info
449	Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
450	HashMap params = GSXML.extractParams(cgi_param_list, false);
451
452	String service_name = (String)params.get(GSCGI.SERVICE_ARG);
453	if (service_name == null \|\| !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
454	return dc_response_doc_content;
455	}
456	String collection = (String)params.get(GSCGI.COLLECTION_ARG);
457	String lang = request.getAttribute(GSXML.LANG_ATT);
458	String to = GSPath.appendLink(collection, service_name);
459
460	Element mr_query_message = doc_.createElement(GSXML.MESSAGE_ELEM);
461	Element mr_query_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_PROCESS, to, lang);
462	mr_query_message.appendChild(mr_query_request);
463
464	// paramList
465	Element query_param_list = (Element)doc_.importNode(cgi_param_list, true);
466	mr_query_request.appendChild(query_param_list);
467
468	// do the query
469	Element mr_query_response = (Element)mr_.process(mr_query_message);
470
471	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
472	Element query_term_info_list = (Element) GSXML.getNodeByPath(mr_query_response, path);
473	if (query_term_info_list == null) {
474	// no term info
475	System.err.println("DocumentAction: Warning: No query term information.\n");
476	return dc_response_doc_content;
477	}
478
479	NodeList equivs = query_term_info_list.getElementsByTagName("equivTermList");
480	HashSet all_terms = new HashSet();
481	for (int i=0; i<equivs.getLength(); i++) {
482
483	// get the terms
484	String [] terms = GSXML.getAttributeValuesFromList((Element)equivs.item(i), GSXML.NAME_ATT);
485	for (int j=0; j<terms.length; j++) {
486
487	all_terms.add(terms[j]);
488	}
489	}
490
491	Element new_content_elem = doc_.createElement(GSXML.NODE_CONTENT_ELEM);
492
493	String content = GSXML.getNodeText(dc_response_doc_content);
494
495	StringBuffer temp = new StringBuffer();
496	StringBuffer temp_content = new StringBuffer();
497
498	for (int i=0; i<content.length(); i++) {
499	char c = content.charAt(i);
500	if (Character.isLetterOrDigit(c)) {
501	// not word boundary
502	temp.append(c);
503	} else {
504	// word boundary
505	// add the last word if there was one
506	if (temp.length()>0) {
507	if (all_terms.contains(temp.toString())) {
508	//if there is anything already present in temp_content, add it as a text node
509	Text t = doc_.createTextNode(temp_content.toString());
510	new_content_elem.appendChild(t);
511	temp_content.delete(0, temp_content.length());
512	Element annot = GSXML.createTextElement(doc_, "annotation", temp.toString());
513	annot.setAttribute("type", "query_term");
514	new_content_elem.appendChild(annot);
515	//new_content.append("<annotation type='query_term'>"+temp+"</annotation>");
516	} else {
517	temp_content.append(temp);
518	}
519	temp.delete(0, temp.length());
520	}
521	if (c=='<') {
522	temp_content.append(c);
523	i++;
524	// skip over html
525	while (i<content.length() && content.charAt(i)!='>') {
526	temp_content.append(content.charAt(i));
527	i++;
528	}
529	temp_content.append(content.charAt(i));
530	//temp_content.append(GSXML.xmlSafe(temp.toString()));
531	//temp.delete(0, temp.length());
532
533	} else {
534	temp_content.append(c);
535	}
536	}
537	}
538	// append anything left of temp_content and temp
539	Text t = doc_.createTextNode(temp_content.toString());
540	new_content_elem.appendChild(t);
541
542	if (temp.length() > 0) {
543	Element annot = GSXML.createTextElement(doc_, "annotation", temp.toString());
544	annot.setAttribute("type", "query_term");
545	new_content_elem.appendChild(annot);
546	}
547	//String content_string = "<nodeContent>"+new_content.toString()+"</nodeContent>";
548	//Element content_elem = converter_.getDOM(content_string).getDocumentElement();
549	return new_content_elem;
550	}
551	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: