Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 29922

Last change on this file since 29922 was 29922, checked in by Georgiy Litvinov, 9 years ago
No metadata while following internal link bugfix
Property svn:keywords set to `Author Date Id Revision`
File size: 44.5 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37	import java.io.Serializable;
38
39	import org.apache.log4j.*;
40
41	/** Action class for retrieving Documents via the message router */
42	public class DocumentAction extends Action
43	{
44
45	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
46
47	// this is used to specify that the sibling nodes of a selected one should be obtained
48	public static final String SIBLING_ARG = "sib";
49	public static final String GOTO_PAGE_ARG = "gp";
50	public static final String ENRICH_DOC_ARG = "end";
51	public static final String EXPAND_DOCUMENT_ARG = "ed";
52	public static final String EXPAND_CONTENTS_ARG = "ec";
53	public static final String REALISTIC_BOOK_ARG = "book";
54
55	/**
56	* if this is set to true, when a document is displayed, any annotation type
57	* services (enrich) will be offered to the user as well
58	*/
59	protected boolean provide_annotations = false;
60
61	protected boolean highlight_query_terms = false;
62
63	public boolean configure()
64	{
65	super.configure();
66	String highlight = (String) config_params.get("highlightQueryTerms");
67	if (highlight != null && highlight.equals("true"))
68	{
69	highlight_query_terms = true;
70	}
71	String annotate = (String) config_params.get("displayAnnotationService");
72	if (annotate != null && annotate.equals("true"))
73	{
74	provide_annotations = true;
75	}
76	return true;
77	}
78
79	public Node process(Node message_node)
80	{
81	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
82
83	Element message = GSXML.nodeToElement(message_node);
84	Document doc = message.getOwnerDocument();
85
86	// the response
87	Element result = doc.createElement(GSXML.MESSAGE_ELEM);
88	Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
89	result.appendChild(page_response);
90
91	// get the request - assume only one
92	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
93	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
94	HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
95
96	// just in case there are some that need to get passed to the services
97	HashMap service_params = (HashMap) params.get("s0");
98
99	String collection = (String) params.get(GSParams.COLLECTION);
100	String document_id = (String) params.get(GSParams.DOCUMENT);
101	if (document_id != null && document_id.equals(""))
102	{
103	document_id = null;
104	}
105	String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
106	if (href != null && href.equals(""))
107	{
108	href = null;
109	}
110	String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
111	if (document_id == null && href == null)
112	{
113	logger.error("no document specified!");
114	return result;
115	}
116	if (rl != null && rl.equals("0"))
117	{
118	// this is a true external link, we should have been directed to a different page or action
119	logger.error("rl value was 0, shouldn't get here");
120	return result;
121	}
122
123	UserContext userContext = new UserContext(request);
124
125	//append site metadata
126	addSiteMetadata(page_response, userContext);
127	addInterfaceOptions(page_response);
128
129	// get the additional data needed for the page
130	getBackgroundData(page_response, collection, userContext);
131	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
132
133	if (format_elem != null) {
134	// lets look for param defaults set in config file
135	NodeList param_defaults = format_elem.getElementsByTagName("paramDefault");
136	for (int i=0; i<param_defaults.getLength(); i++) {
137	Element p = (Element)param_defaults.item(i);
138	String name = p.getAttribute(GSXML.NAME_ATT);
139	if (params.get(name) ==null) {
140	// wasn't set from interface
141	String value = p.getAttribute(GSXML.VALUE_ATT);
142	params.put(name, value );
143	// also add into request param xml so that xslt knows it too
144	GSXML.addParameterToList(cgi_paramList, name, value);
145	}
146	}
147	}
148	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
149	if (document_type != null && document_type.equals(""))
150	{
151	//document_type = "hierarchy";
152	document_type = null; // we'll get it later if not already specified
153	}
154	//whether to retrieve siblings or not
155	boolean get_siblings = false;
156	String sibs = (String) params.get(SIBLING_ARG);
157	if (sibs != null && sibs.equals("1"))
158	{
159	get_siblings = true;
160	}
161
162	String doc_id_modifier = "";
163	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
164	if (sibling_num != null && !sibling_num.equals(""))
165	{
166	// we have to modify the doc name
167	doc_id_modifier = "." + sibling_num + ".ss";
168	}
169
170	boolean expand_document = false;
171	String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
172	if (ed_arg != null && ed_arg.equals("1"))
173	{
174	expand_document = true;
175	}
176
177	boolean expand_contents = false;
178	if (expand_document)
179	{ // we always expand the contents with the text
180	expand_contents = true;
181	}
182	else
183	{
184	String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
185	if (ec_arg != null && ec_arg.equals("1"))
186	{
187	expand_contents = true;
188	}
189	}
190
191	// UserContext userContext = new UserContext(request);
192
193	// //append site metadata
194	// addSiteMetadata(page_response, userContext);
195	// addInterfaceOptions(page_response);
196
197	// // get the additional data needed for the page
198	// getBackgroundData(page_response, collection, userContext);
199	// Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
200
201	// the_document is where all the doc info - structure and metadata etc
202	// is added into, to be returned in the page
203	Element the_document = doc.createElement(GSXML.DOCUMENT_ELEM);
204	page_response.appendChild(the_document);
205
206	// create a basic doc list containing the current node
207	Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
208	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
209	basic_doc_list.appendChild(current_doc);
210	if (document_id != null)
211	{
212	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
213	}
214	else
215	{
216	current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
217	// do we need this??
218	current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
219	}
220
221	if (document_type == null)
222	{
223	document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
224	}
225	if (document_type == null)
226	{
227	logger.error("doctype is null!!!***********");
228	document_type = GSXML.DOC_TYPE_SIMPLE;
229	}
230
231	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
232
233
234	// Create a parameter list to specify the required structure information
235	Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
236
237	if (service_params != null)
238	{
239	GSXML.addParametersToList(ds_param_list, service_params);
240	}
241
242	Element ds_param = null;
243	boolean get_structure = false;
244	boolean get_structure_info = false;
245	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
246	{
247	get_structure_info = true;
248
249	if (expand_contents)
250	{
251	ds_param = doc.createElement(GSXML.PARAM_ELEM);
252	ds_param_list.appendChild(ds_param);
253	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
254	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
255	}
256
257	// get the info needed for paged naviagtion
258	ds_param = doc.createElement(GSXML.PARAM_ELEM);
259	ds_param_list.appendChild(ds_param);
260	ds_param.setAttribute(GSXML.NAME_ATT, "info");
261	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
262	ds_param = doc.createElement(GSXML.PARAM_ELEM);
263	ds_param_list.appendChild(ds_param);
264	ds_param.setAttribute(GSXML.NAME_ATT, "info");
265	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
266	ds_param = doc.createElement(GSXML.PARAM_ELEM);
267	ds_param_list.appendChild(ds_param);
268	ds_param.setAttribute(GSXML.NAME_ATT, "info");
269	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
270
271	if (get_siblings)
272	{
273	ds_param = doc.createElement(GSXML.PARAM_ELEM);
274	ds_param_list.appendChild(ds_param);
275	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
276	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
277	}
278
279	}
280	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) \|\| document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY))
281	{
282	get_structure = true;
283	if (expand_contents)
284	{
285	ds_param = doc.createElement(GSXML.PARAM_ELEM);
286	ds_param_list.appendChild(ds_param);
287	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
288	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
289	}
290	else
291	{
292	// get the info needed for table of contents
293	ds_param = doc.createElement(GSXML.PARAM_ELEM);
294	ds_param_list.appendChild(ds_param);
295	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
296	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
297	ds_param = doc.createElement(GSXML.PARAM_ELEM);
298	ds_param_list.appendChild(ds_param);
299	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
300	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
301	if (get_siblings)
302	{
303	ds_param = doc.createElement(GSXML.PARAM_ELEM);
304	ds_param_list.appendChild(ds_param);
305	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
306	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
307	}
308	}
309	}
310	else
311	{
312	// we dont need any structure
313	}
314
315	boolean has_dummy = false;
316	if (get_structure \|\| get_structure_info)
317	{
318
319	// Build a request to obtain the document structure
320	Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
321	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
322	Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
323	ds_message.appendChild(ds_request);
324	ds_request.appendChild(ds_param_list);
325
326	// add the node list we created earlier
327	ds_request.appendChild(basic_doc_list);
328
329	// Process the document structure retrieve message
330	Element ds_response_message = (Element) this.mr.process(ds_message);
331	if (processErrorElements(ds_response_message, page_response))
332	{
333	return result;
334	}
335
336	// get the info and print out
337	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
338	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
339	path = GSPath.appendLink(path, "nodeStructureInfo");
340	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
341	// get the doc_node bit
342	if (ds_response_struct_info != null)
343	{
344	the_document.appendChild(doc.importNode(ds_response_struct_info, true));
345	}
346	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
347	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
348	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
349	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
350
351	if (ds_response_structure != null)
352	{
353	// add the contents of the structure bit into the_document
354	NodeList structs = ds_response_structure.getChildNodes();
355	for (int i = 0; i < structs.getLength(); i++)
356	{
357	the_document.appendChild(doc.importNode(structs.item(i), true));
358	}
359	}
360	else
361	{
362	// no structure nodes, so put in a dummy doc node
363	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
364	if (document_id != null)
365	{
366	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
367	}
368	else
369	{
370	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
371
372	}
373	the_document.appendChild(doc_node);
374	has_dummy = true;
375	}
376	}
377	else
378	{ // a simple type - we dont have a dummy node for simple
379	// should think about this more
380	// no structure request, so just put in a dummy doc node
381	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
382	if (document_id != null)
383	{
384	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
385	}
386	else
387	{
388	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
389	}
390	the_document.appendChild(doc_node);
391	has_dummy = true;
392	}
393
394	// Build a request to obtain some document metadata
395	Element dm_message = doc.createElement(GSXML.MESSAGE_ELEM);
396	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
397	Element dm_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
398	dm_message.appendChild(dm_request);
399	// Create a parameter list to specify the required metadata information
400
401	HashSet<String> meta_names = new HashSet<String>();
402	meta_names.add("Title"); // the default
403	if (format_elem != null)
404	{
405	getRequiredMetadataNames(format_elem, meta_names);
406	}
407
408	Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
409	if (extraMetaListElem != null)
410	{
411	NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
412	for (int i = 0; i < extraMetaList.getLength(); i++)
413	{
414	meta_names.add(((Element) extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
415	}
416	}
417
418	Element dm_param_list = createMetadataParamList(doc,meta_names);
419	if (service_params != null)
420	{
421	GSXML.addParametersToList(dm_param_list, service_params);
422	}
423
424	dm_request.appendChild(dm_param_list);
425
426	// create the doc node list for the metadata request
427	Element dm_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
428	dm_request.appendChild(dm_doc_list);
429
430	// Add each node from the structure response into the metadata request
431	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
432	for (int i = 0; i < doc_nodes.getLength(); i++)
433	{
434	Element doc_node = (Element) doc_nodes.item(i);
435	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
436
437	// Add the documentNode to the list
438	Element dm_doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
439	dm_doc_list.appendChild(dm_doc_node);
440	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
441	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
442	if (document_id == null){
443	dm_doc_node.setAttribute(GSXML.HREF_ID_ATT, href );
444	}
445
446	}
447
448	// we also want a metadata request to the top level document to get
449	// assocfilepath - this could be cached too
450	Element doc_meta_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
451	dm_message.appendChild(doc_meta_request);
452	Element doc_meta_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
453	if (service_params != null)
454	{
455	GSXML.addParametersToList(doc_meta_param_list, service_params);
456	}
457
458	doc_meta_request.appendChild(doc_meta_param_list);
459	Element doc_param = doc.createElement(GSXML.PARAM_ELEM);
460	doc_meta_param_list.appendChild(doc_param);
461	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
462	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
463
464	// create the doc node list for the metadata request
465	Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
466	doc_meta_request.appendChild(doc_list);
467
468	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
469	// the node we want is the root document node
470	if (document_id != null)
471	{
472	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
473	}
474	/*else
475	{
476	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
477	// can we assume that href is always a top level doc??
478	//doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
479	//doc_node.setAttribute("externalURL", has_rl);
480	}*/
481	doc_list.appendChild(doc_node);
482
483	Element dm_response_message = (Element) this.mr.process(dm_message);
484	if (processErrorElements(dm_response_message, page_response))
485	{
486	return result;
487	}
488
489	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
490	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
491
492	// Merge the metadata with the structure information
493	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
494	for (int i = 0; i < doc_nodes.getLength(); i++)
495	{
496	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
497	}
498	// get the top level doc metadata out
499	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
500	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
501	GSXML.mergeMetadataLists(the_document, top_doc_node);
502
503	// Build a request to obtain some document content
504	Element dc_message = doc.createElement(GSXML.MESSAGE_ELEM);
505	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
506	Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
507	dc_message.appendChild(dc_request);
508
509	// Create a parameter list to specify the request parameters - empty for now
510	Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
511	if (service_params != null)
512	{
513	GSXML.addParametersToList(dc_param_list, service_params);
514	}
515
516	dc_request.appendChild(dc_param_list);
517
518	// get the content
519	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
520	if (expand_document)
521	{
522	dc_request.appendChild(dm_doc_list);
523	}
524	else
525	{
526	dc_request.appendChild(basic_doc_list);
527	}
528	logger.debug("request = " + XMLConverter.getString(dc_message));
529	Element dc_response_message = (Element) this.mr.process(dc_message);
530	if (processErrorElements(dc_response_message, page_response))
531	{
532	return result;
533	}
534
535	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
536
537	if (expand_document)
538	{
539	// Merge the content with the structure information
540	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
541	for (int i = 0; i < doc_nodes.getLength(); i++)
542	{
543	Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
544	if (content != null)
545	{
546	if (highlight_query_terms)
547	{
548	content = highlightQueryTerms(request, (Element) content);
549	}
550	doc_nodes.item(i).appendChild(doc.importNode(content, true));
551	}
552	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
553	}
554	if (has_dummy && document_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
555	Element dummy_node = (Element) doc_nodes.item(0);
556	the_document.removeChild(dummy_node);
557	the_document.setAttribute(GSXML.NODE_ID_ATT, dummy_node.getAttribute(GSXML.NODE_ID_ATT));
558	NodeList dummy_children = dummy_node.getChildNodes();
559	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
560	{
561	// special case as we don't want more than one metadata list
562	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
563	{
564	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
565	}
566	else
567	{
568	the_document.appendChild(dummy_children.item(i));
569	}
570	}
571	}
572	}
573	else
574	{
575	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
576	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
577	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
578	//Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
579
580	if (dc_response_doc_content == null)
581	{
582	// no content to add
583	if (dc_response_doc.getAttribute("external").equals("true"))
584	{
585
586	//if (dc_response_doc_external != null)
587	//{
588	String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
589
590	the_document.setAttribute("selectedNode", href_id);
591	the_document.setAttribute("external", href_id);
592	}
593	return result;
594	}
595	if (highlight_query_terms)
596	{
597	dc_response_doc.removeChild(dc_response_doc_content);
598
599	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
600	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
601	}
602
603	if (provide_annotations)
604	{
605	String service_selected = (String) params.get(ENRICH_DOC_ARG);
606	if (service_selected != null && service_selected.equals("1"))
607	{
608	// now we can modifiy the response doc if needed
609	String enrich_service = (String) params.get(GSParams.SERVICE);
610	// send a message to the service
611	Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
612	Element enrich_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
613	enrich_message.appendChild(enrich_request);
614	// check for parameters
615	HashMap e_service_params = (HashMap) params.get("s1");
616	if (e_service_params != null)
617	{
618	Element enrich_pl = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
619	GSXML.addParametersToList(enrich_pl, e_service_params);
620	enrich_request.appendChild(enrich_pl);
621	}
622	Element e_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
623	enrich_request.appendChild(e_doc_list);
624	e_doc_list.appendChild(doc.importNode(dc_response_doc, true));
625
626	Node enrich_response = this.mr.process(enrich_message);
627
628	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
629	path = GSPath.createPath(links);
630	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
631
632	}
633	} // if provide_annotations
634
635	// use the returned id rather than the sent one cos there may have
636	// been modifiers such as .pr that are removed.
637	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
638	the_document.setAttribute("selectedNode", modified_doc_id);
639	if (has_dummy)
640	{
641	// change the id if necessary and add the content
642	Element dummy_node = (Element) doc_nodes.item(0);
643
644	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
645	dummy_node.appendChild(doc.importNode(dc_response_doc_content, true));
646	// hack for simple type
647	if (document_type.equals(GSXML.DOC_TYPE_SIMPLE))
648	{
649	// we dont want the internal docNode, just want the content and metadata in the document
650	// rethink this!!
651	the_document.removeChild(dummy_node);
652
653	NodeList dummy_children = dummy_node.getChildNodes();
654	//for (int i=0; i<dummy_children.getLength(); i++) {
655	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
656	{
657	// special case as we don't want more than one metadata list
658	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
659	{
660	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
661	}
662	else
663	{
664	the_document.appendChild(dummy_children.item(i));
665	}
666	}
667	}
668
669	the_document.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
670	}
671	else
672	{
673	// Merge the document content with the metadata and structure information
674	for (int i = 0; i < doc_nodes.getLength(); i++)
675	{
676	Node dn = doc_nodes.item(i);
677	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
678	if (dn_id.equals(modified_doc_id))
679	{
680	dn.appendChild(doc.importNode(dc_response_doc_content, true));
681	break;
682	}
683	}
684	}
685	}
686	//logger.debug("(DocumentAction) Page:\n" + GSXML.xmlNodeToString(result));
687	return result;
688	}
689
690	/**
691	* tell the param class what its arguments are if an action has its own
692	* arguments, this should add them to the params object - particularly
693	* important for args that should not be saved
694	*/
695	public boolean addActionParameters(GSParams params)
696	{
697	params.addParameter(GOTO_PAGE_ARG, false);
698	params.addParameter(ENRICH_DOC_ARG, false);
699	params.addParameter(EXPAND_DOCUMENT_ARG, false);
700	params.addParameter(EXPAND_CONTENTS_ARG, false);
701	params.addParameter(REALISTIC_BOOK_ARG, false);
702
703	return true;
704	}
705
706	/**
707	* this method gets the collection description, the format info, the list of
708	* enrich services, etc - stuff that is needed for the page, but is the same
709	* whatever the query is - should be cached
710	*/
711	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
712	{
713	Document doc = page_response.getOwnerDocument();
714
715	// create a message to process - contains requests for the collection
716	// description, the format element, the enrich services on offer
717	// these could all be cached
718	Element info_message = doc.createElement(GSXML.MESSAGE_ELEM);
719	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
720	// the format request - ignore for now, where does this request go to??
721	Element format_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
722	info_message.appendChild(format_request);
723
724	// the enrich_services request - only do this if provide_annotations is true
725
726	if (provide_annotations)
727	{
728	Element enrich_services_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
729	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
730	info_message.appendChild(enrich_services_request);
731	}
732
733	Element info_response = (Element) this.mr.process(info_message);
734
735	// the collection is the first response
736	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
737	Element format_resp = (Element) responses.item(0);
738
739	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
740	if (format_elem != null)
741	{
742	Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
743	if (global_format_elem != null)
744	{
745	GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
746	}
747
748	// set the format type
749	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
750	page_response.appendChild(doc.importNode(format_elem, true));
751	}
752
753	if (provide_annotations)
754	{
755	Element services_resp = (Element) responses.item(1);
756
757	// a new message for the mr
758	Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
759	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
760	boolean service_found = false;
761	for (int j = 0; j < e_services.getLength(); j++)
762	{
763	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
764	{
765	Element s = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
766	enrich_message.appendChild(s);
767	service_found = true;
768	}
769	}
770	if (service_found)
771	{
772	Element enrich_response = (Element) this.mr.process(enrich_message);
773
774	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
775	Element service_list = doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
776	for (int i = 0; i < e_responses.getLength(); i++)
777	{
778	Element e_resp = (Element) e_responses.item(i);
779	Element e_service = (Element) doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
780	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
781	service_list.appendChild(e_service);
782	}
783	page_response.appendChild(service_list);
784	}
785	} // if provide_annotations
786	return true;
787
788	}
789
790	protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
791	{
792	Document doc = basic_doc_list.getOwnerDocument();
793
794	Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
795	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
796	Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
797	ds_message.appendChild(ds_request);
798
799	// Create a parameter list to specify the required structure information
800	Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
801	Element ds_param = doc.createElement(GSXML.PARAM_ELEM);
802	ds_param_list.appendChild(ds_param);
803	ds_param.setAttribute(GSXML.NAME_ATT, "info");
804	ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
805
806	ds_request.appendChild(ds_param_list);
807
808	// add the node list we created earlier
809	ds_request.appendChild(basic_doc_list);
810
811	// Process the document structure retrieve message
812	Element ds_response_message = (Element) this.mr.process(ds_message);
813	if (processErrorElements(ds_response_message, page_response))
814	{
815	return null;
816	}
817
818	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
819	String path = GSPath.createPath(links);
820	Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
821	if (info_elem == null) {
822	return null;
823	}
824	Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
825	if (doctype_elem != null)
826	{
827	String doc_type = doctype_elem.getAttribute("value");
828	return doc_type;
829	}
830	return null;
831	}
832
833	/**
834	* this involves a bit of a hack to get the equivalent query terms - has to
835	* requery the query service - uses the last selected service name. (if it
836	* ends in query). should this action do the query or should it send a
837	* message to the query action? but that will involve lots of extra stuff.
838	* also doesn't handle phrases properly - just highlights all the terms
839	* found in the text.
840	*/
841	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
842	{
843	Document doc = request.getOwnerDocument();
844
845	// do the query again to get term info
846	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
847	HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
848
849	HashMap previous_params = (HashMap) params.get("p");
850	if (previous_params == null)
851	{
852	return dc_response_doc_content;
853	}
854	String service_name = (String) previous_params.get(GSParams.SERVICE);
855	if (service_name == null \|\| !service_name.endsWith("Query"))
856	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
857	logger.debug("invalid service, not doing highlighting");
858	return dc_response_doc_content;
859	}
860	String collection = (String) params.get(GSParams.COLLECTION);
861	UserContext userContext = new UserContext(request);
862	String to = GSPath.appendLink(collection, service_name);
863
864	Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
865	Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
866	mr_query_message.appendChild(mr_query_request);
867
868	// paramList
869	HashMap service_params = (HashMap) params.get("s1");
870
871	Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
872	GSXML.addParametersToList(query_param_list, service_params);
873	mr_query_request.appendChild(query_param_list);
874
875	// do the query
876	Element mr_query_response = (Element) this.mr.process(mr_query_message);
877
878	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
879	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
880	if (query_term_list_element == null)
881	{
882	// no term info
883	logger.error("No query term information.\n");
884	return dc_response_doc_content;
885	}
886
887	String content = GSXML.getNodeText(dc_response_doc_content);
888
889	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
890	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
891
892	HashSet<String> query_term_variants = new HashSet<String>();
893	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
894	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
895	{
896	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
897	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
898	{
899	for (int i = 0; i < terms_nodelist.getLength(); i++)
900	{
901	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
902	String termValueU = null;
903	String termValueL = null;
904
905	if (termValue.length() > 1)
906	{
907	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
908	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
909	}
910	else
911	{
912	termValueU = termValue.substring(0, 1).toUpperCase();
913	termValueL = termValue.substring(0, 1).toLowerCase();
914	}
915
916	query_term_variants.add(termValueU);
917	query_term_variants.add(termValueL);
918	}
919	}
920	}
921	else
922	{
923	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
924	{
925	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
926	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
927	for (int j = 0; j < equivalent_terms.length; j++)
928	{
929	query_term_variants.add(equivalent_terms[j]);
930	}
931	}
932	}
933
934	ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
935
936	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
937	String performed_query = GSXML.getNodeText(query_element) + " ";
938
939	ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
940	int term_start = 0;
941	boolean in_term = false;
942	boolean in_phrase = false;
943	for (int i = 0; i < performed_query.length(); i++)
944	{
945	char character = performed_query.charAt(i);
946	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
947
948	// Has a query term just started?
949	if (in_term == false && is_character_letter_or_digit == true)
950	{
951	in_term = true;
952	term_start = i;
953	}
954
955	// Or has a term just finished?
956	else if (in_term == true && is_character_letter_or_digit == false)
957	{
958	in_term = false;
959	String term = performed_query.substring(term_start, i);
960
961	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
962	if (term_element != null)
963	{
964
965	HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
966
967	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
968	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
969	{
970	String termValueU = null;
971	String termValueL = null;
972
973	if (term.length() > 1)
974	{
975	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
976	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
977	}
978	else
979	{
980	termValueU = term.substring(0, 1).toUpperCase();
981	termValueL = term.substring(0, 1).toLowerCase();
982	}
983
984	phrase_query_p_term_x_variants.add(termValueU);
985	phrase_query_p_term_x_variants.add(termValueL);
986	}
987	else
988	{
989	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
990	{
991	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
992	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
993	for (int k = 0; k < term_equivalent_terms.length; k++)
994	{
995	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
996	}
997	}
998	}
999	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
1000
1001	if (in_phrase == false)
1002	{
1003	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1004	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1005	}
1006	}
1007	}
1008	// Watch for phrases (surrounded by quotes)
1009	if (character == '\"')
1010	{
1011	// Has a phrase just started?
1012	if (in_phrase == false)
1013	{
1014	in_phrase = true;
1015	}
1016	// Or has a phrase just finished?
1017	else if (in_phrase == true)
1018	{
1019	in_phrase = false;
1020	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1021	}
1022
1023	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1024	}
1025	}
1026
1027	return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy);
1028	}
1029
1030	/**
1031	* Highlights query terms in a piece of text.
1032	*/
1033	private Element highlightQueryTermsInternal(Document doc, String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
1034	{
1035	// Convert the content string to an array of characters for speed
1036	char[] content_characters = new char[content.length()];
1037	content.getChars(0, content.length(), content_characters, 0);
1038
1039	// Now skim through the content, identifying word matches
1040	ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
1041	int word_start = 0;
1042	boolean in_word = false;
1043	boolean preceding_word_matched = false;
1044	boolean inTag = false;
1045	for (int i = 0; i < content_characters.length; i++)
1046	{
1047	//We don't want to find words inside HTML tags
1048	if (content_characters[i] == '<')
1049	{
1050	inTag = true;
1051	continue;
1052	}
1053	else if (inTag && content_characters[i] == '>')
1054	{
1055	inTag = false;
1056	}
1057	else if (inTag)
1058	{
1059	continue;
1060	}
1061
1062	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
1063
1064	// Has a word just started?
1065	if (in_word == false && is_character_letter_or_digit == true)
1066	{
1067	in_word = true;
1068	word_start = i;
1069	}
1070
1071	// Or has a word just finished?
1072	else if (in_word == true && is_character_letter_or_digit == false)
1073	{
1074	in_word = false;
1075
1076	// Check if the word matches any of the query term equivalents
1077	String word = new String(content_characters, word_start, (i - word_start));
1078	if (query_term_variants.contains(word))
1079	{
1080	// We have found a matching word, so remember its location
1081	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1082	preceding_word_matched = true;
1083	}
1084	else
1085	{
1086	preceding_word_matched = false;
1087	}
1088	}
1089	}
1090
1091	// Don't forget the last word...
1092	if (in_word == true)
1093	{
1094	// Check if the word matches any of the query term equivalents
1095	String word = new String(content_characters, word_start, (content_characters.length - word_start));
1096	if (query_term_variants.contains(word))
1097	{
1098	// We have found a matching word, so remember its location
1099	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1100	}
1101	}
1102
1103	ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1104	ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
1105
1106	// Deal with phrases now
1107	ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
1108	for (int i = 0; i < word_matches.size(); i++)
1109	{
1110	WordMatch word_match = word_matches.get(i);
1111
1112	// See if any partial phrase matches are extended by this word
1113	if (word_match.preceding_word_matched)
1114	{
1115	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1116	{
1117	PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1118	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
1119	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
1120	if (phrase_query_p_term_x_variants.contains(word_match.word))
1121	{
1122	partial_phrase_match.num_words_matched++;
1123
1124	// Has a complete phrase match occurred?
1125	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1126	{
1127	// Check for overlaps by looking at the previous highlight range
1128	if (!highlight_end_positions.isEmpty())
1129	{
1130	int last_highlight_index = highlight_end_positions.size() - 1;
1131	int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
1132	if (last_highlight_end > partial_phrase_match.start_position)
1133	{
1134	// There is an overlap, so remove the previous phrase match
1135	int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
1136	highlight_end_positions.remove(last_highlight_index);
1137	partial_phrase_match.start_position = last_highlight_start;
1138	}
1139	}
1140
1141	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1142	highlight_end_positions.add(new Integer(word_match.end_position));
1143	}
1144	// No, but add the partial match back into the list for next time
1145	else
1146	{
1147	partial_phrase_matches.add(partial_phrase_match);
1148	}
1149	}
1150	}
1151	}
1152	else
1153	{
1154	partial_phrase_matches.clear();
1155	}
1156
1157	// See if this word is at the start of any of the phrases
1158	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1159	{
1160	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
1161	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1162	if (phrase_query_p_term_1_variants.contains(word_match.word))
1163	{
1164	// If this phrase is just one word long, we have a complete match
1165	if (phrase_query_p_term_variants_list.size() == 1)
1166	{
1167	highlight_start_positions.add(new Integer(word_match.start_position));
1168	highlight_end_positions.add(new Integer(word_match.end_position));
1169	}
1170	// Otherwise we have the start of a potential phrase match
1171	else
1172	{
1173	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1174	}
1175	}
1176	}
1177	}
1178
1179	// Now add the annotation tags into the document at the correct points
1180	Element content_element = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1181
1182	int last_wrote = 0;
1183	for (int i = 0; i < highlight_start_positions.size(); i++)
1184	{
1185	int highlight_start = highlight_start_positions.get(i).intValue();
1186	int highlight_end = highlight_end_positions.get(i).intValue();
1187
1188	// Print anything before the highlight range
1189	if (last_wrote < highlight_start)
1190	{
1191	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1192	content_element.appendChild(doc.createTextNode(preceding_text));
1193	}
1194
1195	// Print the highlight text, annotated
1196	if (highlight_end > last_wrote)
1197	{
1198	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1199	Element annotation_element = GSXML.createTextElement(doc, "annotation", highlight_text);
1200	annotation_element.setAttribute("type", "query_term");
1201	content_element.appendChild(annotation_element);
1202	last_wrote = highlight_end;
1203	}
1204	}
1205
1206	// Finish off any unwritten text
1207	if (last_wrote < content_characters.length)
1208	{
1209	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1210	content_element.appendChild(doc.createTextNode(remaining_text));
1211	}
1212
1213	return content_element;
1214	}
1215
1216	static private class WordMatch
1217	{
1218	public String word;
1219	public int start_position;
1220	public int end_position;
1221	public boolean preceding_word_matched;
1222
1223	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1224	{
1225	this.word = word;
1226	this.start_position = start_position;
1227	this.end_position = end_position;
1228	this.preceding_word_matched = preceding_word_matched;
1229	}
1230	}
1231
1232	static private class PartialPhraseMatch
1233	{
1234	public int start_position;
1235	public int query_phrase_number;
1236	public int num_words_matched;
1237
1238	public PartialPhraseMatch(int start_position, int query_phrase_number)
1239	{
1240	this.start_position = start_position;
1241	this.query_phrase_number = query_phrase_number;
1242	this.num_words_matched = 1;
1243	}
1244	}
1245	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: