Changeset 24116

Show
Ignore:
Timestamp:
07.06.2011 17:07:48 (8 years ago)
Author:
sjm84
Message:

Fixed search term highlighting in Lucene

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java

    r23628 r24116  
    11/* 
    2  *    DocumentAction.java 
    3  *    Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org 
    4  * 
    5  *    This program is free software; you can redistribute it and/or modify 
    6  *    it under the terms of the GNU General Public License as published by 
    7  *    the Free Software Foundation; either version 2 of the License, or 
    8  *    (at your option) any later version. 
    9  * 
    10  *    This program is distributed in the hope that it will be useful, 
    11  *    but WITHOUT ANY WARRANTY; without even the implied warranty of 
    12  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
    13  *    GNU General Public License for more details. 
    14  * 
    15  *    You should have received a copy of the GNU General Public License 
    16  *    along with this program; if not, write to the Free Software 
    17  *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
    18  */ 
     2*    DocumentAction.java 
     3*    Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org 
     4* 
     5*    This program is free software; you can redistribute it and/or modify 
     6*    it under the terms of the GNU General Public License as published by 
     7*    the Free Software Foundation; either version 2 of the License, or 
     8*    (at your option) any later version. 
     9* 
     10*    This program is distributed in the hope that it will be useful, 
     11*    but WITHOUT ANY WARRANTY; without even the implied warranty of 
     12*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
     13*    GNU General Public License for more details. 
     14* 
     15*    You should have received a copy of the GNU General Public License 
     16*    along with this program; if not, write to the Free Software 
     17*    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
     18*/ 
    1919package org.greenstone.gsdl3.action; 
    2020 
     
    3939 
    4040/** Action class for retrieving Documents  via the message router 
    41  */ 
     41*/ 
    4242public class DocumentAction extends Action { 
    4343 
    44    static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName()); 
    45  
    46     // this is used to specify that the sibling nodes of a selected one should be obtained 
    47     public static final String SIBLING_ARG = "sib"; 
    48     public static final String GOTO_PAGE_ARG = "gp"; 
    49     public static final String ENRICH_DOC_ARG = "end"; 
    50      
    51     /** if this is set to true, when a document is displayed, any annotation 
    52      * type services (enrich) will be offered to the user as well */ 
    53     protected boolean provide_annotations = false;  
    54      
    55     protected boolean highlight_query_terms = false; 
    56  
    57     public boolean configure() { 
    58     super.configure(); 
    59     String highlight = (String)config_params.get("highlightQueryTerms"); 
    60     if (highlight != null && highlight.equals("true")) { 
    61         highlight_query_terms = true; 
     44    static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName()); 
     45 
     46    // this is used to specify that the sibling nodes of a selected one should be obtained 
     47    public static final String SIBLING_ARG = "sib"; 
     48    public static final String GOTO_PAGE_ARG = "gp"; 
     49    public static final String ENRICH_DOC_ARG = "end"; 
     50     
     51    /** if this is set to true, when a document is displayed, any annotation 
     52    * type services (enrich) will be offered to the user as well */ 
     53    protected boolean provide_annotations = false;  
     54     
     55    protected boolean highlight_query_terms = false; 
     56 
     57    public boolean configure() { 
     58        super.configure(); 
     59        String highlight = (String)config_params.get("highlightQueryTerms"); 
     60        if (highlight != null && highlight.equals("true")) { 
     61            highlight_query_terms = true; 
     62        } 
     63        String annotate = (String)config_params.get("displayAnnotationService"); 
     64        if (annotate != null && annotate.equals("true")) { 
     65            provide_annotations = true; 
     66        } 
     67        return true; 
    6268    } 
    63     String annotate = (String)config_params.get("displayAnnotationService"); 
    64     if (annotate != null && annotate.equals("true")) { 
    65         provide_annotations = true; 
    66     } 
    67     return true; 
    68     } 
    69     public Node process (Node message_node) 
    70     { 
    71     // for now, no subaction eventually we may want to have subactions such as text assoc or something ? 
    72      
    73     Element message = this.converter.nodeToElement(message_node); 
    74  
    75     // the response 
    76     Element result = this.doc.createElement(GSXML.MESSAGE_ELEM); 
    77     Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM); 
    78     result.appendChild(page_response); 
    79  
    80     // get the request - assume only one 
    81     Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM); 
    82     Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
    83     HashMap params = GSXML.extractParams(cgi_paramList, false); 
    84  
    85     // just in case there are some that need to get passed to the services 
    86     HashMap service_params = (HashMap)params.get("s0");  
    87  
    88      
    89     String has_rl = null; 
    90     String has_href = null; 
    91     has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list  
    92     has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list 
    93     String collection = (String) params.get(GSParams.COLLECTION); 
    94     String lang = request.getAttribute(GSXML.LANG_ATT); 
    95     String uid = request.getAttribute(GSXML.USER_ID_ATT); 
    96     String document_name = (String) params.get(GSParams.DOCUMENT); 
    97     if ((document_name == null || document_name.equals("")) && (has_href == null || has_href.equals(""))) { 
    98         logger.error("no document specified!"); 
    99         return result; 
    100     } 
    101     String document_type = (String) params.get(GSParams.DOCUMENT_TYPE); 
    102     if (document_type == null) { 
    103         document_type = "simple"; 
    104     } 
    105     //whether to retrieve siblings or not 
    106     boolean get_siblings = false; 
    107     String sibs = (String) params.get(SIBLING_ARG); 
    108     if (sibs != null && sibs.equals("1")) { 
    109         get_siblings = true; 
     69    public Node process (Node message_node) 
     70    { 
     71        // for now, no subaction eventually we may want to have subactions such as text assoc or something ? 
     72         
     73        Element message = this.converter.nodeToElement(message_node); 
     74 
     75        // the response 
     76        Element result = this.doc.createElement(GSXML.MESSAGE_ELEM); 
     77        Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM); 
     78        result.appendChild(page_response); 
     79 
     80        // get the request - assume only one 
     81        Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM); 
     82        Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
     83        HashMap params = GSXML.extractParams(cgi_paramList, false); 
     84 
     85        // just in case there are some that need to get passed to the services 
     86        HashMap service_params = (HashMap)params.get("s0");  
     87 
     88         
     89        String has_rl = null; 
     90        String has_href = null; 
     91        has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list  
     92        has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list 
     93        String collection = (String) params.get(GSParams.COLLECTION); 
     94        String lang = request.getAttribute(GSXML.LANG_ATT); 
     95        String uid = request.getAttribute(GSXML.USER_ID_ATT); 
     96        String document_name = (String) params.get(GSParams.DOCUMENT); 
     97        if ((document_name == null || document_name.equals("")) && (has_href == null || has_href.equals(""))) { 
     98            logger.error("no document specified!"); 
     99            return result; 
     100        } 
     101        String document_type = (String) params.get(GSParams.DOCUMENT_TYPE); 
     102        if (document_type == null) { 
     103            document_type = "simple"; 
     104        } 
     105        //whether to retrieve siblings or not 
     106        boolean get_siblings = false; 
     107        String sibs = (String) params.get(SIBLING_ARG); 
     108        if (sibs != null && sibs.equals("1")) { 
     109            get_siblings = true; 
     110        } 
     111         
     112        String sibling_num = (String) params.get(GOTO_PAGE_ARG); 
     113        if (sibling_num != null && !sibling_num.equals("")) { 
     114            // we have to modify the doc name 
     115            document_name = document_name+"."+sibling_num+".ss"; 
     116        } 
     117         
     118        boolean expand_document = false; 
     119        String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT); 
     120        if (ed_arg != null && ed_arg.equals("1")) { 
     121            expand_document = true; 
     122        } 
     123         
     124 
     125        boolean expand_contents = false; 
     126        if (expand_document) { // we always expand the contents with the text 
     127            expand_contents = true; 
     128        } else { 
     129            String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS); 
     130            if (ec_arg != null && ec_arg.equals("1")) { 
     131                expand_contents = true; 
     132            } 
     133        } 
     134 
     135        //append site metadata 
     136        addSiteMetadata( page_response, lang, uid); 
     137 
     138        // get the additional data needed for the page 
     139        getBackgroundData(page_response, collection, lang, uid); 
     140        Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM); 
     141         
     142        // the_document is where all the doc info - structure and metadata etc 
     143        // is added into, to be returned in the page 
     144        Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM); 
     145        page_response.appendChild(the_document); 
     146 
     147        // set the doctype from the cgi arg as an attribute 
     148        the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type); 
     149 
     150        // create a basic doc list containing the current node 
     151        Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
     152        Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM); 
     153        basic_doc_list.appendChild(current_doc); 
     154        if (document_name.length()!=0){ 
     155            current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name); 
     156        }else if (has_href.length()!=0){ 
     157            current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href); 
     158            current_doc.setAttribute("externalURL", has_rl); 
     159        } 
     160 
     161        // Create a parameter list to specify the required structure information 
     162        Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
     163         
     164        if (service_params != null) { 
     165            GSXML.addParametersToList(this.doc, ds_param_list, service_params); 
     166        } 
     167 
     168        Element ds_param = null;  
     169        boolean get_structure = false; 
     170        boolean get_structure_info = false; 
     171        if (document_type.equals("paged")) { 
     172            get_structure_info = true; 
     173            // get teh info needed for paged naviagtion 
     174            ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
     175            ds_param_list.appendChild(ds_param); 
     176            ds_param.setAttribute(GSXML.NAME_ATT, "info"); 
     177            ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings"); 
     178            ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
     179            ds_param_list.appendChild(ds_param); 
     180            ds_param.setAttribute(GSXML.NAME_ATT, "info"); 
     181            ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren"); 
     182            ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
     183            ds_param_list.appendChild(ds_param); 
     184            ds_param.setAttribute(GSXML.NAME_ATT, "info"); 
     185            ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition"); 
     186             
     187        } else if (document_type.equals("hierarchy")){ 
     188            get_structure = true; 
     189            if (expand_contents) { 
     190                ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
     191                ds_param_list.appendChild(ds_param); 
     192                ds_param.setAttribute(GSXML.NAME_ATT, "structure"); 
     193                ds_param.setAttribute(GSXML.VALUE_ATT, "entire"); 
     194            } else { 
     195                // get the info needed for table of contents 
     196                ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
     197                ds_param_list.appendChild(ds_param); 
     198                ds_param.setAttribute(GSXML.NAME_ATT, "structure"); 
     199                ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors"); 
     200                ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
     201                ds_param_list.appendChild(ds_param); 
     202                ds_param.setAttribute(GSXML.NAME_ATT, "structure"); 
     203                ds_param.setAttribute(GSXML.VALUE_ATT, "children"); 
     204                if (get_siblings) { 
     205                    ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
     206                    ds_param_list.appendChild(ds_param); 
     207                    ds_param.setAttribute(GSXML.NAME_ATT, "structure"); 
     208                    ds_param.setAttribute(GSXML.VALUE_ATT, "siblings"); 
     209                } 
     210            } 
     211        } else { 
     212            // we dont need any structure 
     213        } 
     214 
     215        boolean has_dummy = false; 
     216        if (get_structure || get_structure_info) { 
     217 
     218            // Build a request to obtain the document structure 
     219            Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
     220            String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired? 
     221            Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);  
     222            ds_message.appendChild(ds_request); 
     223            ds_request.appendChild(ds_param_list); 
     224             
     225            // create a doc_node_list and put in the doc_node that we are interested in 
     226            ds_request.appendChild(basic_doc_list); 
     227             
     228            // Process the document structure retrieve message 
     229            Element ds_response_message = (Element) this.mr.process(ds_message); 
     230            if (processErrorElements(ds_response_message, page_response)) { 
     231                return result; 
     232            } 
     233 
     234            // get the info and print out 
     235            String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
     236            path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM); 
     237            path = GSPath.appendLink(path, "nodeStructureInfo"); 
     238            Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path); 
     239            // get the doc_node bit  
     240            if (ds_response_struct_info != null) { 
     241                the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));  
     242            } 
     243            path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
     244            path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM); 
     245            path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM); 
     246            Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path); 
     247             
     248            if (ds_response_structure != null) { 
     249                // add the contents of the structure bit into the_document 
     250                NodeList structs = ds_response_structure.getChildNodes(); 
     251                for (int i=0; i<structs.getLength();i++) { 
     252                    the_document.appendChild(this.doc.importNode(structs.item(i), true)); 
     253                } 
     254            } else { 
     255                // no structure nodes, so put in a dummy doc node 
     256                Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 
     257                if (document_name.length()!=0){ 
     258                    doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name); 
     259                }else if (has_href.length()!=0){ 
     260                    doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href); 
     261                    doc_node.setAttribute("externalURL", has_rl); 
     262                } 
     263                the_document.appendChild(doc_node); 
     264                has_dummy = true; 
     265            } 
     266        } else { // a simple type - we dont have a dummy node for simple 
     267            // should think about this more 
     268            // no structure request, so just put in a dummy doc node 
     269            Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 
     270            if (document_name.length()!=0){ 
     271                doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name); 
     272            }else if (has_href.length()!=0){ 
     273                doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href); 
     274                doc_node.setAttribute("externalURL", has_rl); 
     275            } 
     276            the_document.appendChild(doc_node); 
     277            has_dummy = true; 
     278        } 
     279         
     280        // Build a request to obtain some document metadata 
     281        Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
     282        String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve");  // Hard-wired? 
     283        Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid); 
     284        dm_message.appendChild(dm_request); 
     285        // Create a parameter list to specify the required metadata information 
     286         
     287        HashSet meta_names = new HashSet(); 
     288        meta_names.add("Title"); // the default 
     289        if (format_elem != null) { 
     290            extractMetadataNames(format_elem, meta_names); 
     291        } 
     292         
     293        Element dm_param_list = createMetadataParamList(meta_names); 
     294        if (service_params != null) { 
     295            GSXML.addParametersToList(this.doc, dm_param_list, service_params); 
     296        } 
     297         
     298        dm_request.appendChild(dm_param_list); 
     299         
     300         
     301        // create the doc node list for the metadata request 
     302        Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
     303        dm_request.appendChild(dm_doc_list); 
     304 
     305        // Add each node from the structure response into the metadata request 
     306        NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM); 
     307        for (int i = 0; i < doc_nodes.getLength(); i++) { 
     308            Element doc_node = (Element) doc_nodes.item(i); 
     309            String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT); 
     310 
     311            // Add the documentNode to the list 
     312            Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 
     313            dm_doc_list.appendChild(dm_doc_node); 
     314            dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id); 
     315            dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, 
     316            doc_node.getAttribute(GSXML.NODE_TYPE_ATT)); 
     317        } 
     318 
     319        // we also want a metadata request to the top level document to get 
     320        // assocfilepath - this could be cached too 
     321        Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid); 
     322        dm_message.appendChild(doc_meta_request); 
     323        Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
     324        if (service_params != null) { 
     325            GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params); 
     326        } 
     327 
     328        doc_meta_request.appendChild(doc_meta_param_list); 
     329        Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM); 
     330        doc_meta_param_list.appendChild(doc_param); 
     331        doc_param.setAttribute(GSXML.NAME_ATT, "metadata"); 
     332        doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath"); 
     333 
     334        // create the doc node list for the metadata request 
     335        Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
     336        doc_meta_request.appendChild(doc_list); 
     337 
     338        Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 
     339        // the node we want is the root document node 
     340        if (document_name.length()!=0){ 
     341            doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt"); 
     342        }else if (has_href.length()!=0){ 
     343            doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href+".rt");  
     344            doc_node.setAttribute("externalURL", has_rl); 
     345        } 
     346        doc_list.appendChild(doc_node); 
     347        Element dm_response_message = (Element) this.mr.process(dm_message); 
     348        if (processErrorElements(dm_response_message, page_response)) { 
     349            return result; 
     350        } 
     351 
     352        String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
     353        Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path); 
     354 
     355        // Merge the metadata with the structure information 
     356        NodeList dm_response_docs = dm_response_doc_list.getChildNodes(); 
     357        for (int i = 0; i < doc_nodes.getLength(); i++) { 
     358            GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i)); 
     359        } 
     360        // get the top level doc metadata out 
     361        Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1); 
     362        Element top_doc_node = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode"); 
     363        GSXML.mergeMetadataLists(the_document, top_doc_node); 
     364         
     365        // Build a request to obtain some document content 
     366        Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
     367        to = GSPath.appendLink(collection, "DocumentContentRetrieve");  // Hard-wired? 
     368        Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid); 
     369        dc_message.appendChild(dc_request); 
     370         
     371 
     372        // Create a parameter list to specify the request parameters - empty for now 
     373        Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
     374        if (service_params != null) { 
     375            GSXML.addParametersToList(this.doc, dc_param_list, service_params); 
     376        } 
     377 
     378        dc_request.appendChild(dc_param_list); 
     379 
     380        // get the content 
     381        // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request. 
     382        if (expand_document) { 
     383            dc_request.appendChild(dm_doc_list); 
     384        } else { 
     385            dc_request.appendChild(basic_doc_list); 
     386        } 
     387        logger.debug("request = "+converter.getString(dc_message)); 
     388        Element dc_response_message = (Element) this.mr.process(dc_message); 
     389        if (processErrorElements(dc_response_message, page_response)) { 
     390            return result; 
     391        } 
     392 
     393        Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path); 
     394 
     395        if (expand_document) { 
     396            // Merge the content with the structure information 
     397            NodeList dc_response_docs = dc_response_doc_list.getChildNodes(); 
     398            for (int i = 0; i < doc_nodes.getLength(); i++) { 
     399                Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent"); 
     400                if (content != null) { 
     401                    if (highlight_query_terms) { 
     402                        content = highlightQueryTerms(request, (Element)content); 
     403                    } 
     404                    doc_nodes.item(i).appendChild(this.doc.importNode(content, true)); 
     405                } 
     406                //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i)); 
     407            } 
     408        } else { 
     409            //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM); 
     410            Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM); 
     411            Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM); 
     412            Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external"); 
     413             
     414            if (dc_response_doc_content == null) { 
     415                // no content to add 
     416                if (dc_response_doc_external !=null){ 
     417                    String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT); 
     418                     
     419                    the_document.setAttribute("selectedNode", modified_doc_id); 
     420                    the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link")); 
     421                } 
     422                return result; 
     423            } 
     424            if (highlight_query_terms) { 
     425                dc_response_doc.removeChild(dc_response_doc_content); 
     426                 
     427                dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content); 
     428                dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true)); 
     429            } 
     430             
     431             
     432            if (provide_annotations) { 
     433                String service_selected = (String)params.get(ENRICH_DOC_ARG); 
     434                if (service_selected != null && service_selected.equals("1")) { 
     435                    // now we can modifiy the response doc if needed 
     436                    String enrich_service = (String)params.get(GSParams.SERVICE); 
     437                    // send a message to the service 
     438                    Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
     439                    Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid); 
     440                    enrich_message.appendChild(enrich_request); 
     441                    // check for parameters 
     442                    HashMap e_service_params = (HashMap)params.get("s1"); 
     443                    if (e_service_params != null) {  
     444                        Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
     445                        GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);  
     446                        enrich_request.appendChild(enrich_pl); 
     447                    } 
     448                    Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
     449                    enrich_request.appendChild(e_doc_list); 
     450                    e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true)); 
     451                     
     452                    Node enrich_response = this.mr.process(enrich_message); 
     453                     
     454                    String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM}; 
     455                    path = GSPath.createPath(links); 
     456                    dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path); 
     457                     
     458                }  
     459            } // if provide_annotations 
     460 
     461             
     462            // use the returned id rather than the sent one cos there may have 
     463            // been modifiers such as .pr that are removed. 
     464            String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT); 
     465            the_document.setAttribute("selectedNode", modified_doc_id); 
     466            if (has_dummy) { 
     467                // change the id if necessary and add the content 
     468                Element dummy_node = (Element)doc_nodes.item(0); 
     469                 
     470                dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id); 
     471                dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true)); 
     472                // hack for simple type 
     473                if (document_type.equals("simple")) { 
     474                    // we dont want the internal docNode, just want the content and metadata in the document 
     475                    // rethink this!! 
     476                    the_document.removeChild(dummy_node); 
     477 
     478                    NodeList dummy_children = dummy_node.getChildNodes(); 
     479                    //for (int i=0; i<dummy_children.getLength(); i++) { 
     480                    for (int i=dummy_children.getLength()-1; i>=0; i--) { 
     481                        // special case as we don't want more than one metadata list 
     482                        if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER)) { 
     483                            GSXML.mergeMetadataFromList(the_document, dummy_children.item(i)); 
     484                        } else { 
     485                            the_document.appendChild(dummy_children.item(i)); 
     486                        } 
     487                    } 
     488                } 
     489            } else { 
     490                // Merge the document content with the metadata and structure information 
     491                for (int i = 0; i < doc_nodes.getLength(); i++) { 
     492                    Node dn = doc_nodes.item(i); 
     493                    String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT); 
     494                    if (dn_id.equals(modified_doc_id)) { 
     495                        dn.appendChild(this.doc.importNode(dc_response_doc_content, true)); 
     496                        break; 
     497                    } 
     498                } 
     499            } 
     500        } 
     501        logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result)); 
     502        return result; 
    110503    } 
    111504     
    112     String sibling_num = (String) params.get(GOTO_PAGE_ARG); 
    113     if (sibling_num != null && !sibling_num.equals("")) { 
    114         // we have to modify the doc name 
    115         document_name = document_name+"."+sibling_num+".ss"; 
     505    /** tell the param class what its arguments are  
     506    * if an action has its own arguments, this should add them to the params 
     507    * object - particularly important for args that should not be saved */ 
     508    public boolean getActionParameters(GSParams params) { 
     509        params.addParameter(GOTO_PAGE_ARG, false); 
     510        params.addParameter(ENRICH_DOC_ARG, false); 
     511        return true; 
    116512    } 
    117      
    118     boolean expand_document = false; 
    119     String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT); 
    120     if (ed_arg != null && ed_arg.equals("1")) { 
    121         expand_document = true; 
     513 
     514 
     515    /** this method gets the collection description, the format info, the 
     516    * list of enrich services, etc - stuff that is needed for the page, 
     517    * but is the same whatever the query is - should be cached */ 
     518    protected  boolean getBackgroundData(Element page_response,  
     519    String collection, String lang, 
     520    String uid) { 
     521 
     522        // create a message to process - contains requests for the collection  
     523        // description, the format element, the enrich services on offer 
     524        // these could all be cached 
     525        Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
     526        String path = GSPath.appendLink(collection, "DocumentContentRetrieve"); 
     527        // the format request - ignore for now, where does this request go to?? 
     528        Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid); 
     529        info_message.appendChild(format_request); 
     530 
     531        // the enrich_services request - only do this if provide_annotations is true 
     532 
     533        if (provide_annotations) { 
     534            Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid); 
     535            enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList"); 
     536            info_message.appendChild(enrich_services_request); 
     537        } 
     538         
     539        Element info_response = (Element)this.mr.process(info_message); 
     540 
     541        // the collection is the first response 
     542        NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM); 
     543        Element format_resp = (Element) responses.item(0); 
     544         
     545        Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM); 
     546        if (format_elem != null) { 
     547            logger.debug("doc action found a format statement"); 
     548            // set teh format type 
     549            format_elem.setAttribute(GSXML.TYPE_ATT, "display");   
     550            page_response.appendChild(this.doc.importNode(format_elem, true)); 
     551        } 
     552 
     553        if (provide_annotations) { 
     554            Element services_resp = (Element)responses.item(1);  
     555 
     556            // a new message for the mr 
     557            Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
     558             
     559            NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM); 
     560            boolean service_found = false; 
     561            for (int j=0; j<e_services.getLength(); j++) { 
     562                if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) { 
     563                    Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid); 
     564                    enrich_message.appendChild(s); 
     565                    service_found = true; 
     566                } 
     567            } 
     568            if (service_found) { 
     569                Element enrich_response = (Element)this.mr.process(enrich_message); 
     570                 
     571                NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM); 
     572                Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER); 
     573                for (int i=0; i<e_responses.getLength(); i++) { 
     574                    Element e_resp = (Element)e_responses.item(i); 
     575                    Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);  
     576                    e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT)); 
     577                    service_list.appendChild(e_service); 
     578                } 
     579                page_response.appendChild(service_list); 
     580            } 
     581        } // if provide_annotations 
     582        return true; 
     583         
    122584    } 
    123         
    124  
    125     boolean expand_contents = false; 
    126     if (expand_document) { // we always expand the contents with the text 
    127         expand_contents = true; 
    128     } else { 
    129         String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS); 
    130         if (ec_arg != null && ec_arg.equals("1")) { 
    131         expand_contents = true; 
    132         } 
     585 
     586    /** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text. 
     587    */ 
     588    protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) { 
     589 
     590        // do the query again to get term info  
     591        Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
     592        HashMap params = GSXML.extractParams(cgi_param_list, false); 
     593         
     594        HashMap previous_params = (HashMap)params.get("p"); 
     595        if (previous_params == null) { 
     596            return dc_response_doc_content; 
     597        } 
     598        String service_name = (String)previous_params.get(GSParams.SERVICE); 
     599        if (service_name == null || !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy 
     600            logger.debug("invalid service, not doing highlighting"); 
     601            return dc_response_doc_content; 
     602        } 
     603        String collection = (String)params.get(GSParams.COLLECTION); 
     604        String lang = request.getAttribute(GSXML.LANG_ATT); 
     605        String uid = request.getAttribute(GSXML.USER_ID_ATT); 
     606        String to = GSPath.appendLink(collection, service_name); 
     607         
     608        Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
     609        Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid); 
     610        mr_query_message.appendChild(mr_query_request); 
     611         
     612        // paramList 
     613        HashMap service_params = (HashMap)params.get("s1"); 
     614         
     615        Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER); 
     616        GSXML.addParametersToList(this.doc, query_param_list, service_params); 
     617        mr_query_request.appendChild(query_param_list); 
     618 
     619        // do the query 
     620        Element mr_query_response = (Element)this.mr.process(mr_query_message); 
     621         
     622        String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER); 
     623        Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path); 
     624        if (query_term_list_element == null) { 
     625            // no term info 
     626            logger.error("No query term information.\n"); 
     627            return dc_response_doc_content; 
     628        } 
     629 
     630        String content = GSXML.getNodeText(dc_response_doc_content); 
     631 
     632        String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); 
     633        Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path); 
     634 
     635        HashSet query_term_variants = new HashSet(); 
     636        NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList"); 
     637        if(equivalent_terms_nodelist == null || equivalent_terms_nodelist.getLength() == 0) 
     638        { 
     639            NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term"); 
     640            if(terms_nodelist != null && terms_nodelist.getLength() > 0) 
     641            { 
     642                for(int i = 0; i < terms_nodelist.getLength(); i++) 
     643                { 
     644                    String termValue = ((Element)terms_nodelist.item(i)).getAttribute("name"); 
     645                    String termValueU = null; 
     646                    String termValueL = null; 
     647                         
     648                    if(termValue.length() > 1) 
     649                    { 
     650                        termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1); 
     651                        termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1); 
     652                    } 
     653                    else 
     654                    { 
     655                        termValueU = termValue.substring(0, 1).toUpperCase(); 
     656                        termValueL = termValue.substring(0, 1).toLowerCase(); 
     657                    } 
     658                     
     659                    query_term_variants.add(termValueU); 
     660                    query_term_variants.add(termValueL); 
     661                } 
     662            } 
     663        } 
     664        else 
     665        { 
     666            for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) { 
     667                Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i); 
     668                String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT); 
     669                for (int j = 0; j < equivalent_terms.length; j++) { 
     670                    query_term_variants.add(equivalent_terms[j]); 
     671                } 
     672            } 
     673        } 
     674 
     675        ArrayList phrase_query_term_variants_hierarchy = new ArrayList(); 
     676 
     677        Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query"); 
     678        String performed_query = GSXML.getNodeText(query_element) + " "; 
     679 
     680        ArrayList phrase_query_p_term_variants_list = new ArrayList(); 
     681        int term_start = 0; 
     682        boolean in_term = false; 
     683        boolean in_phrase = false; 
     684        for (int i = 0; i < performed_query.length(); i++) { 
     685            char character = performed_query.charAt(i); 
     686            boolean is_character_letter_or_digit = Character.isLetterOrDigit(character); 
     687 
     688            // Has a query term just started? 
     689            if (in_term == false && is_character_letter_or_digit == true) { 
     690                in_term = true; 
     691                term_start = i; 
     692            } 
     693 
     694            // Or has a term just finished? 
     695            else if (in_term == true && is_character_letter_or_digit == false) { 
     696                in_term = false; 
     697                String term = performed_query.substring(term_start, i); 
     698                 
     699                Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term); 
     700                if (term_element != null) { 
     701                     
     702                    HashSet phrase_query_p_term_x_variants = new HashSet(); 
     703                     
     704                    NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList"); 
     705                    if(term_equivalent_terms_nodelist == null || term_equivalent_terms_nodelist.getLength() == 0) 
     706                    { 
     707                        String termValueU = null; 
     708                        String termValueL = null; 
     709                         
     710                        if(term.length() > 1) 
     711                        { 
     712                            termValueU = term.substring(0, 1).toUpperCase() + term.substring(1); 
     713                            termValueL = term.substring(0, 1).toLowerCase() + term.substring(1); 
     714                        } 
     715                        else 
     716                        { 
     717                            termValueU = term.substring(0, 1).toUpperCase(); 
     718                            termValueL = term.substring(0, 1).toLowerCase(); 
     719                        } 
     720                         
     721                        phrase_query_p_term_x_variants.add(termValueU); 
     722                        phrase_query_p_term_x_variants.add(termValueL); 
     723                    } 
     724                    else 
     725                    { 
     726                        for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) { 
     727                            Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j); 
     728                            String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT); 
     729                            for (int k = 0; k < term_equivalent_terms.length; k++) { 
     730                                phrase_query_p_term_x_variants.add(term_equivalent_terms[k]); 
     731                            } 
     732                        } 
     733                    } 
     734                    phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants); 
     735                     
     736                    if (in_phrase == false) { 
     737                        phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list); 
     738                        phrase_query_p_term_variants_list = new ArrayList(); 
     739                    } 
     740                } 
     741            } 
     742            // Watch for phrases (surrounded by quotes) 
     743            if (character == '\"') { 
     744                // Has a phrase just started? 
     745                if (in_phrase == false) { 
     746                    in_phrase = true; 
     747                } 
     748                // Or has a phrase just finished? 
     749                else if (in_phrase == true) { 
     750                    in_phrase = false; 
     751                    phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list); 
     752                } 
     753 
     754                phrase_query_p_term_variants_list = new ArrayList(); 
     755            } 
     756        } 
     757 
     758        System.err.println(query_term_variants + " *** " + phrase_query_term_variants_hierarchy); 
     759        return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy); 
    133760    } 
    134761 
    135     //append site metadata 
    136     addSiteMetadata( page_response, lang, uid); 
    137  
    138     // get the additional data needed for the page 
    139     getBackgroundData(page_response, collection, lang, uid); 
    140     Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM); 
    141      
    142     // the_document is where all the doc info - structure and metadata etc 
    143     // is added into, to be returned in the page 
    144     Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM); 
    145     page_response.appendChild(the_document); 
    146  
    147     // set the doctype from the cgi arg as an attribute 
    148     the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type); 
    149  
    150     // create a basic doc list containing the current node 
    151     Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
    152     Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM); 
    153     basic_doc_list.appendChild(current_doc); 
    154     if (document_name.length()!=0){ 
    155         current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name); 
    156     }else if (has_href.length()!=0){ 
    157         current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href); 
    158         current_doc.setAttribute("externalURL", has_rl); 
     762 
     763    /** 
     764    * Highlights query terms in a piece of text. 
     765    */ 
     766    private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy) 
     767    { 
     768        // Convert the content string to an array of characters for speed 
     769        char[] content_characters = new char[content.length()]; 
     770        content.getChars(0, content.length(), content_characters, 0); 
     771 
     772        // Now skim through the content, identifying word matches 
     773        ArrayList word_matches = new ArrayList(); 
     774        int word_start = 0; 
     775        boolean in_word = false; 
     776        boolean preceding_word_matched = false; 
     777        for (int i = 0; i < content_characters.length; i++) { 
     778            boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]); 
     779 
     780            // Has a word just started? 
     781            if (in_word == false && is_character_letter_or_digit == true) { 
     782                in_word = true; 
     783                word_start = i; 
     784            } 
     785 
     786            // Or has a word just finished? 
     787            else if (in_word == true && is_character_letter_or_digit == false) { 
     788                in_word = false; 
     789 
     790                // Check if the word matches any of the query term equivalents 
     791                String word = new String(content_characters, word_start, (i - word_start)); 
     792                if (query_term_variants.contains(word)) { 
     793                    // We have found a matching word, so remember its location 
     794                    word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched)); 
     795                    preceding_word_matched = true; 
     796                } 
     797                else { 
     798                    preceding_word_matched = false; 
     799                } 
     800            } 
     801        } 
     802 
     803        // Don't forget the last word... 
     804        if (in_word == true) { 
     805            // Check if the word matches any of the query term equivalents 
     806            String word = new String(content_characters, word_start, (content_characters.length - word_start)); 
     807            if (query_term_variants.contains(word)) { 
     808                // We have found a matching word, so remember its location 
     809                word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched)); 
     810            } 
     811        } 
     812 
     813        ArrayList highlight_start_positions = new ArrayList(); 
     814        ArrayList highlight_end_positions = new ArrayList(); 
     815 
     816        // Deal with phrases now 
     817        ArrayList partial_phrase_matches = new ArrayList(); 
     818        for (int i = 0; i < word_matches.size(); i++) { 
     819            WordMatch word_match = (WordMatch) word_matches.get(i); 
     820 
     821            // See if any partial phrase matches are extended by this word 
     822            if (word_match.preceding_word_matched) { 
     823                for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) { 
     824                    PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j); 
     825                    ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number); 
     826                    HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched); 
     827                    if (phrase_query_p_term_x_variants.contains(word_match.word)) { 
     828                        partial_phrase_match.num_words_matched++; 
     829 
     830                        // Has a complete phrase match occurred? 
     831                        if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) { 
     832                            // Check for overlaps by looking at the previous highlight range 
     833                            if (!highlight_end_positions.isEmpty()) { 
     834                                int last_highlight_index = highlight_end_positions.size() - 1; 
     835                                int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue(); 
     836                                if (last_highlight_end > partial_phrase_match.start_position) { 
     837                                    // There is an overlap, so remove the previous phrase match 
     838                                    int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue(); 
     839                                    highlight_end_positions.remove(last_highlight_index); 
     840                                    partial_phrase_match.start_position = last_highlight_start; 
     841                                } 
     842                            } 
     843 
     844                            highlight_start_positions.add(new Integer(partial_phrase_match.start_position)); 
     845                            highlight_end_positions.add(new Integer(word_match.end_position)); 
     846                        } 
     847                        // No, but add the partial match back into the list for next time 
     848                        else { 
     849                            partial_phrase_matches.add(partial_phrase_match); 
     850                        } 
     851                    } 
     852                } 
     853            } 
     854            else { 
     855                partial_phrase_matches.clear(); 
     856            } 
     857 
     858            // See if this word is at the start of any of the phrases 
     859            for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) { 
     860                ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p); 
     861                HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0); 
     862                if (phrase_query_p_term_1_variants.contains(word_match.word)) { 
     863                    // If this phrase is just one word long, we have a complete match 
     864                    if (phrase_query_p_term_variants_list.size() == 1) { 
     865                        highlight_start_positions.add(new Integer(word_match.start_position)); 
     866                        highlight_end_positions.add(new Integer(word_match.end_position)); 
     867                    } 
     868                    // Otherwise we have the start of a potential phrase match 
     869                    else { 
     870                        partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p)); 
     871                    } 
     872                } 
     873            } 
     874        } 
     875 
     876        // Now add the annotation tags into the document at the correct points 
     877        Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM); 
     878 
     879        int last_wrote = 0; 
     880        for (int i = 0; i < highlight_start_positions.size(); i++) { 
     881            int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue(); 
     882            int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue(); 
     883 
     884            // Print anything before the highlight range 
     885            if (last_wrote < highlight_start) { 
     886                String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote)); 
     887                content_element.appendChild(this.doc.createTextNode(preceding_text)); 
     888            } 
     889 
     890            // Print the highlight text, annotated 
     891            if (highlight_end > last_wrote) { 
     892                String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start)); 
     893                Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text); 
     894                annotation_element.setAttribute("type", "query_term"); 
     895                content_element.appendChild(annotation_element); 
     896                last_wrote = highlight_end; 
     897            } 
     898        } 
     899 
     900        // Finish off any unwritten text 
     901        if (last_wrote < content_characters.length) { 
     902            String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote)); 
     903            content_element.appendChild(this.doc.createTextNode(remaining_text)); 
     904        } 
     905 
     906        return content_element; 
    159907    } 
    160908 
    161     // Create a parameter list to specify the required structure information 
    162     Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
    163      
    164     if (service_params != null) { 
    165         GSXML.addParametersToList(this.doc, ds_param_list, service_params); 
     909 
     910    static private class WordMatch 
     911    { 
     912        public String word; 
     913        public int start_position; 
     914        public int end_position; 
     915        public boolean preceding_word_matched; 
     916 
     917        public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched) 
     918        { 
     919            this.word = word; 
     920            this.start_position = start_position; 
     921            this.end_position = end_position; 
     922            this.preceding_word_matched = preceding_word_matched; 
     923        } 
    166924    } 
    167925 
    168     Element ds_param = null;  
    169     boolean get_structure = false; 
    170     boolean get_structure_info = false; 
    171     if (document_type.equals("paged")) { 
    172         get_structure_info = true; 
    173         // get teh info needed for paged naviagtion 
    174         ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
    175         ds_param_list.appendChild(ds_param); 
    176         ds_param.setAttribute(GSXML.NAME_ATT, "info"); 
    177         ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings"); 
    178         ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
    179         ds_param_list.appendChild(ds_param); 
    180         ds_param.setAttribute(GSXML.NAME_ATT, "info"); 
    181         ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren"); 
    182         ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
    183         ds_param_list.appendChild(ds_param); 
    184         ds_param.setAttribute(GSXML.NAME_ATT, "info"); 
    185         ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition"); 
    186          
    187     } else if (document_type.equals("hierarchy")){ 
    188         get_structure = true; 
    189         if (expand_contents) { 
    190         ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
    191         ds_param_list.appendChild(ds_param); 
    192         ds_param.setAttribute(GSXML.NAME_ATT, "structure"); 
    193         ds_param.setAttribute(GSXML.VALUE_ATT, "entire"); 
    194         } else { 
    195         // get the info needed for table of contents 
    196         ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
    197         ds_param_list.appendChild(ds_param); 
    198         ds_param.setAttribute(GSXML.NAME_ATT, "structure"); 
    199         ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors"); 
    200         ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
    201         ds_param_list.appendChild(ds_param); 
    202         ds_param.setAttribute(GSXML.NAME_ATT, "structure"); 
    203         ds_param.setAttribute(GSXML.VALUE_ATT, "children"); 
    204         if (get_siblings) { 
    205             ds_param = this.doc.createElement(GSXML.PARAM_ELEM); 
    206             ds_param_list.appendChild(ds_param); 
    207             ds_param.setAttribute(GSXML.NAME_ATT, "structure"); 
    208             ds_param.setAttribute(GSXML.VALUE_ATT, "siblings"); 
    209         } 
    210         } 
    211     } else { 
    212         // we dont need any structure 
     926 
     927    static private class PartialPhraseMatch 
     928    { 
     929        public int start_position; 
     930        public int query_phrase_number; 
     931        public int num_words_matched; 
     932 
     933        public PartialPhraseMatch(int start_position, int query_phrase_number) 
     934        { 
     935            this.start_position = start_position; 
     936            this.query_phrase_number = query_phrase_number; 
     937            this.num_words_matched = 1; 
     938        } 
    213939    } 
    214  
    215     boolean has_dummy = false; 
    216     if (get_structure || get_structure_info) { 
    217  
    218         // Build a request to obtain the document structure 
    219         Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
    220         String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired? 
    221         Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);  
    222         ds_message.appendChild(ds_request); 
    223         ds_request.appendChild(ds_param_list); 
    224          
    225         // create a doc_node_list and put in the doc_node that we are interested in 
    226         ds_request.appendChild(basic_doc_list); 
    227          
    228         // Process the document structure retrieve message 
    229         Element ds_response_message = (Element) this.mr.process(ds_message); 
    230         if (processErrorElements(ds_response_message, page_response)) { 
    231         return result; 
    232         } 
    233  
    234         // get the info and print out 
    235         String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
    236         path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM); 
    237         path = GSPath.appendLink(path, "nodeStructureInfo"); 
    238         Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path); 
    239         // get the doc_node bit  
    240         if (ds_response_struct_info != null) { 
    241         the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));  
    242         } 
    243         path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
    244         path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM); 
    245         path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM); 
    246         Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path); 
    247          
    248         if (ds_response_structure != null) { 
    249         // add the contents of the structure bit into the_document 
    250         NodeList structs = ds_response_structure.getChildNodes(); 
    251         for (int i=0; i<structs.getLength();i++) { 
    252             the_document.appendChild(this.doc.importNode(structs.item(i), true)); 
    253         } 
    254         } else { 
    255         // no structure nodes, so put in a dummy doc node 
    256         Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 
    257         if (document_name.length()!=0){ 
    258             doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name); 
    259         }else if (has_href.length()!=0){ 
    260             doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href); 
    261             doc_node.setAttribute("externalURL", has_rl); 
    262         } 
    263         the_document.appendChild(doc_node); 
    264         has_dummy = true; 
    265         } 
    266     } else { // a simple type - we dont have a dummy node for simple 
    267         // should think about this more 
    268         // no structure request, so just put in a dummy doc node 
    269         Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 
    270         if (document_name.length()!=0){ 
    271         doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name); 
    272         }else if (has_href.length()!=0){ 
    273         doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href); 
    274         doc_node.setAttribute("externalURL", has_rl); 
    275         } 
    276         the_document.appendChild(doc_node); 
    277         has_dummy = true; 
    278     } 
    279      
    280     // Build a request to obtain some document metadata 
    281     Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
    282     String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve");  // Hard-wired? 
    283     Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid); 
    284     dm_message.appendChild(dm_request); 
    285     // Create a parameter list to specify the required metadata information 
    286      
    287     HashSet meta_names = new HashSet(); 
    288     meta_names.add("Title"); // the default 
    289     if (format_elem != null) { 
    290         extractMetadataNames(format_elem, meta_names); 
    291     } 
    292      
    293     Element dm_param_list = createMetadataParamList(meta_names); 
    294     if (service_params != null) { 
    295         GSXML.addParametersToList(this.doc, dm_param_list, service_params); 
    296     } 
    297      
    298     dm_request.appendChild(dm_param_list); 
    299      
    300      
    301     // create the doc node list for the metadata request 
    302     Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
    303     dm_request.appendChild(dm_doc_list); 
    304  
    305     // Add each node from the structure response into the metadata request 
    306     NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM); 
    307     for (int i = 0; i < doc_nodes.getLength(); i++) { 
    308         Element doc_node = (Element) doc_nodes.item(i); 
    309         String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT); 
    310  
    311         // Add the documentNode to the list 
    312         Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 
    313         dm_doc_list.appendChild(dm_doc_node); 
    314         dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id); 
    315         dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, 
    316                      doc_node.getAttribute(GSXML.NODE_TYPE_ATT)); 
    317     } 
    318  
    319     // we also want a metadata request to the top level document to get 
    320     // assocfilepath - this could be cached too 
    321     Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid); 
    322     dm_message.appendChild(doc_meta_request); 
    323     Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
    324     if (service_params != null) { 
    325         GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params); 
    326     } 
    327  
    328     doc_meta_request.appendChild(doc_meta_param_list); 
    329     Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM); 
    330     doc_meta_param_list.appendChild(doc_param); 
    331     doc_param.setAttribute(GSXML.NAME_ATT, "metadata"); 
    332     doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath"); 
    333  
    334     // create the doc node list for the metadata request 
    335     Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
    336     doc_meta_request.appendChild(doc_list); 
    337  
    338     Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 
    339     // the node we want is the root document node 
    340     if (document_name.length()!=0){ 
    341         doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt"); 
    342     }else if (has_href.length()!=0){ 
    343         doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href+".rt");  
    344         doc_node.setAttribute("externalURL", has_rl); 
    345     } 
    346     doc_list.appendChild(doc_node); 
    347     Element dm_response_message = (Element) this.mr.process(dm_message); 
    348     if (processErrorElements(dm_response_message, page_response)) { 
    349         return result; 
    350     } 
    351  
    352     String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
    353     Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path); 
    354  
    355     // Merge the metadata with the structure information 
    356     NodeList dm_response_docs = dm_response_doc_list.getChildNodes(); 
    357     for (int i = 0; i < doc_nodes.getLength(); i++) { 
    358       GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i)); 
    359     } 
    360     // get the top level doc metadata out 
    361     Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1); 
    362     Element top_doc_node = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode"); 
    363     GSXML.mergeMetadataLists(the_document, top_doc_node); 
    364      
    365     // Build a request to obtain some document content 
    366     Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
    367     to = GSPath.appendLink(collection, "DocumentContentRetrieve");  // Hard-wired? 
    368     Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid); 
    369     dc_message.appendChild(dc_request); 
    370      
    371  
    372     // Create a parameter list to specify the request parameters - empty for now 
    373     Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
    374     if (service_params != null) { 
    375         GSXML.addParametersToList(this.doc, dc_param_list, service_params); 
    376     } 
    377  
    378     dc_request.appendChild(dc_param_list); 
    379  
    380     // get the content 
    381     // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request. 
    382     if (expand_document) { 
    383         dc_request.appendChild(dm_doc_list); 
    384     } else { 
    385         dc_request.appendChild(basic_doc_list); 
    386     } 
    387     logger.debug("request = "+converter.getString(dc_message)); 
    388     Element dc_response_message = (Element) this.mr.process(dc_message); 
    389     if (processErrorElements(dc_response_message, page_response)) { 
    390         return result; 
    391     } 
    392  
    393     Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path); 
    394  
    395     if (expand_document) { 
    396         // Merge the content with the structure information 
    397         NodeList dc_response_docs = dc_response_doc_list.getChildNodes(); 
    398         for (int i = 0; i < doc_nodes.getLength(); i++) { 
    399         Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent"); 
    400         if (content != null) { 
    401             doc_nodes.item(i).appendChild(this.doc.importNode(content, true)); 
    402         } 
    403         //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i)); 
    404         } 
    405     } else { 
    406         //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM); 
    407         Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM); 
    408         Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM); 
    409         Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external"); 
    410          
    411         if (dc_response_doc_content == null) { 
    412         // no content to add 
    413         if (dc_response_doc_external !=null){ 
    414             String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT); 
    415              
    416             the_document.setAttribute("selectedNode", modified_doc_id); 
    417             the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link")); 
    418         } 
    419         return result; 
    420         } 
    421         if (highlight_query_terms) { 
    422         dc_response_doc.removeChild(dc_response_doc_content); 
    423          
    424         dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content); 
    425         dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true)); 
    426         } 
    427          
    428      
    429     if (provide_annotations) { 
    430         String service_selected = (String)params.get(ENRICH_DOC_ARG); 
    431         if (service_selected != null && service_selected.equals("1")) { 
    432         // now we can modifiy the response doc if needed 
    433         String enrich_service = (String)params.get(GSParams.SERVICE); 
    434         // send a message to the service 
    435         Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
    436         Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid); 
    437         enrich_message.appendChild(enrich_request); 
    438         // check for parameters 
    439         HashMap e_service_params = (HashMap)params.get("s1"); 
    440         if (e_service_params != null) {  
    441             Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
    442             GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);  
    443             enrich_request.appendChild(enrich_pl); 
    444         } 
    445         Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 
    446         enrich_request.appendChild(e_doc_list); 
    447         e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true)); 
    448          
    449         Node enrich_response = this.mr.process(enrich_message); 
    450          
    451         String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM}; 
    452         path = GSPath.createPath(links); 
    453         dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path); 
    454          
    455         }  
    456     } // if provide_annotations 
    457  
    458      
    459     // use the returned id rather than the sent one cos there may have 
    460     // been modifiers such as .pr that are removed. 
    461     String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT); 
    462     the_document.setAttribute("selectedNode", modified_doc_id); 
    463     if (has_dummy) { 
    464         // change the id if necessary and add the content 
    465         Element dummy_node = (Element)doc_nodes.item(0); 
    466          
    467         dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id); 
    468         dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true)); 
    469         // hack for simple type 
    470         if (document_type.equals("simple")) { 
    471             // we dont want the internal docNode, just want the content and metadata in the document 
    472         // rethink this!! 
    473         the_document.removeChild(dummy_node); 
    474  
    475         NodeList dummy_children = dummy_node.getChildNodes(); 
    476         //for (int i=0; i<dummy_children.getLength(); i++) { 
    477         for (int i=dummy_children.getLength()-1; i>=0; i--) { 
    478           // special case as we don't want more than one metadata list 
    479           if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER)) { 
    480             GSXML.mergeMetadataFromList(the_document, dummy_children.item(i)); 
    481           } else { 
    482             the_document.appendChild(dummy_children.item(i)); 
    483           } 
    484         } 
    485         } 
    486     } else { 
    487         // Merge the document content with the metadata and structure information 
    488         for (int i = 0; i < doc_nodes.getLength(); i++) { 
    489         Node dn = doc_nodes.item(i); 
    490         String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT); 
    491         if (dn_id.equals(modified_doc_id)) { 
    492             dn.appendChild(this.doc.importNode(dc_response_doc_content, true)); 
    493             break; 
    494         } 
    495         } 
    496     } 
    497     } 
    498     logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result)); 
    499     return result; 
    500     } 
    501      
    502     /** tell the param class what its arguments are  
    503      * if an action has its own arguments, this should add them to the params 
    504      * object - particularly important for args that should not be saved */ 
    505     public boolean getActionParameters(GSParams params) { 
    506     params.addParameter(GOTO_PAGE_ARG, false); 
    507     params.addParameter(ENRICH_DOC_ARG, false); 
    508     return true; 
    509     } 
    510  
    511  
    512     /** this method gets the collection description, the format info, the 
    513      * list of enrich services, etc - stuff that is needed for the page, 
    514      * but is the same whatever the query is - should be cached */ 
    515     protected  boolean getBackgroundData(Element page_response,  
    516                      String collection, String lang, 
    517                      String uid) { 
    518  
    519     // create a message to process - contains requests for the collection  
    520     // description, the format element, the enrich services on offer 
    521     // these could all be cached 
    522     Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
    523     String path = GSPath.appendLink(collection, "DocumentContentRetrieve"); 
    524     // the format request - ignore for now, where does this request go to?? 
    525     Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid); 
    526     info_message.appendChild(format_request); 
    527  
    528     // the enrich_services request - only do this if provide_annotations is true 
    529  
    530     if (provide_annotations) { 
    531         Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid); 
    532         enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList"); 
    533         info_message.appendChild(enrich_services_request); 
    534     } 
    535      
    536     Element info_response = (Element)this.mr.process(info_message); 
    537  
    538     // the collection is the first response 
    539     NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM); 
    540     Element format_resp = (Element) responses.item(0); 
    541      
    542     Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM); 
    543     if (format_elem != null) { 
    544         logger.debug("doc action found a format statement"); 
    545         // set teh format type 
    546         format_elem.setAttribute(GSXML.TYPE_ATT, "display");   
    547         page_response.appendChild(this.doc.importNode(format_elem, true)); 
    548     } 
    549  
    550     if (provide_annotations) { 
    551         Element services_resp = (Element)responses.item(1);  
    552  
    553         // a new message for the mr 
    554         Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
    555          
    556         NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM); 
    557         boolean service_found = false; 
    558         for (int j=0; j<e_services.getLength(); j++) { 
    559         if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) { 
    560             Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid); 
    561             enrich_message.appendChild(s); 
    562             service_found = true; 
    563         } 
    564         } 
    565         if (service_found) { 
    566         Element enrich_response = (Element)this.mr.process(enrich_message); 
    567          
    568         NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM); 
    569         Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER); 
    570         for (int i=0; i<e_responses.getLength(); i++) { 
    571             Element e_resp = (Element)e_responses.item(i); 
    572             Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);  
    573             e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT)); 
    574             service_list.appendChild(e_service); 
    575         } 
    576         page_response.appendChild(service_list); 
    577         } 
    578     } // if provide_annotations 
    579     return true; 
    580          
    581     } 
    582  
    583     /** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text. 
    584      */ 
    585     protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) { 
    586  
    587     // do the query again to get term info  
    588     Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 
    589     HashMap params = GSXML.extractParams(cgi_param_list, false); 
    590      
    591     HashMap previous_params = (HashMap)params.get("p"); 
    592     if (previous_params == null) { 
    593         return dc_response_doc_content; 
    594     } 
    595         String service_name = (String)previous_params.get(GSParams.SERVICE); 
    596     if (service_name == null || !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy 
    597         logger.debug("invalid service, not doing highlighting"); 
    598         return dc_response_doc_content; 
    599     } 
    600     String collection = (String)params.get(GSParams.COLLECTION); 
    601     String lang = request.getAttribute(GSXML.LANG_ATT); 
    602     String uid = request.getAttribute(GSXML.USER_ID_ATT); 
    603     String to = GSPath.appendLink(collection, service_name); 
    604      
    605     Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM); 
    606     Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid); 
    607     mr_query_message.appendChild(mr_query_request); 
    608      
    609     // paramList 
    610     HashMap service_params = (HashMap)params.get("s1"); 
    611      
    612     Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER); 
    613     GSXML.addParametersToList(this.doc, query_param_list, service_params); 
    614     mr_query_request.appendChild(query_param_list); 
    615  
    616     // do the query 
    617         Element mr_query_response = (Element)this.mr.process(mr_query_message); 
    618      
    619     String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER); 
    620     Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path); 
    621     if (query_term_list_element == null) { 
    622         // no term info 
    623         logger.error("No query term information.\n"); 
    624         return dc_response_doc_content; 
    625     }  
    626  
    627     String content = GSXML.getNodeText(dc_response_doc_content); 
    628  
    629     String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); 
    630     Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path); 
    631  
    632     HashSet query_term_variants = new HashSet(); 
    633     NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList"); 
    634     for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) { 
    635         Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i); 
    636         String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT); 
    637         for (int j = 0; j < equivalent_terms.length; j++) { 
    638         query_term_variants.add(equivalent_terms[j]); 
    639         } 
    640     } 
    641  
    642     ArrayList phrase_query_term_variants_hierarchy = new ArrayList(); 
    643  
    644     Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query"); 
    645     String performed_query = GSXML.getNodeText(query_element) + " "; 
    646  
    647     ArrayList phrase_query_p_term_variants_list = new ArrayList(); 
    648     int term_start = 0; 
    649     boolean in_term = false; 
    650     boolean in_phrase = false; 
    651     for (int i = 0; i < performed_query.length(); i++) { 
    652         char character = performed_query.charAt(i); 
    653         boolean is_character_letter_or_digit = Character.isLetterOrDigit(character); 
    654  
    655         // Has a query term just started? 
    656         if (in_term == false && is_character_letter_or_digit == true) { 
    657         in_term = true; 
    658         term_start = i; 
    659         } 
    660  
    661         // Or has a term just finished? 
    662         else if (in_term == true && is_character_letter_or_digit == false) { 
    663         in_term = false; 
    664         String term = performed_query.substring(term_start, i); 
    665                  
    666         Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term); 
    667         if (term_element != null) { 
    668              
    669             HashSet phrase_query_p_term_x_variants = new HashSet(); 
    670              
    671             NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList"); 
    672             for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) { 
    673             Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j); 
    674             String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT); 
    675             for (int k = 0; k < term_equivalent_terms.length; k++) { 
    676                 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]); 
    677             } 
    678             } 
    679             phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants); 
    680              
    681             if (in_phrase == false) { 
    682             phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list); 
    683             phrase_query_p_term_variants_list = new ArrayList(); 
    684             } 
    685         } 
    686         } 
    687         // Watch for phrases (surrounded by quotes) 
    688         if (character == '\"') { 
    689         // Has a phrase just started? 
    690         if (in_phrase == false) { 
    691             in_phrase = true; 
    692         } 
    693         // Or has a phrase just finished? 
    694         else if (in_phrase == true) { 
    695             in_phrase = false; 
    696             phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list); 
    697         } 
    698  
    699         phrase_query_p_term_variants_list = new ArrayList(); 
    700         } 
    701     } 
    702  
    703     return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy); 
    704     } 
    705  
    706  
    707     /** 
    708      * Highlights query terms in a piece of text. 
    709      */ 
    710     private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy) 
    711     { 
    712     // Convert the content string to an array of characters for speed 
    713     char[] content_characters = new char[content.length()]; 
    714     content.getChars(0, content.length(), content_characters, 0); 
    715  
    716     // Now skim through the content, identifying word matches 
    717     ArrayList word_matches = new ArrayList(); 
    718     int word_start = 0; 
    719     boolean in_word = false; 
    720     boolean preceding_word_matched = false; 
    721     for (int i = 0; i < content_characters.length; i++) { 
    722         boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]); 
    723  
    724         // Has a word just started? 
    725         if (in_word == false && is_character_letter_or_digit == true) { 
    726         in_word = true; 
    727         word_start = i; 
    728         } 
    729  
    730         // Or has a word just finished? 
    731         else if (in_word == true && is_character_letter_or_digit == false) { 
    732         in_word = false; 
    733  
    734         // Check if the word matches any of the query term equivalents 
    735         String word = new String(content_characters, word_start, (i - word_start)); 
    736         if (query_term_variants.contains(word)) { 
    737             // We have found a matching word, so remember its location 
    738             word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched)); 
    739             preceding_word_matched = true; 
    740         } 
    741         else { 
    742             preceding_word_matched = false; 
    743         } 
    744         } 
    745     } 
    746  
    747     // Don't forget the last word... 
    748     if (in_word == true) { 
    749         // Check if the word matches any of the query term equivalents 
    750         String word = new String(content_characters, word_start, (content_characters.length - word_start)); 
    751         if (query_term_variants.contains(word)) { 
    752         // We have found a matching word, so remember its location 
    753         word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched)); 
    754         } 
    755     } 
    756  
    757     ArrayList highlight_start_positions = new ArrayList(); 
    758     ArrayList highlight_end_positions = new ArrayList(); 
    759  
    760     // Deal with phrases now 
    761     ArrayList partial_phrase_matches = new ArrayList(); 
    762     for (int i = 0; i < word_matches.size(); i++) { 
    763         WordMatch word_match = (WordMatch) word_matches.get(i); 
    764  
    765         // See if any partial phrase matches are extended by this word 
    766         if (word_match.preceding_word_matched) { 
    767         for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) { 
    768             PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j); 
    769             ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number); 
    770             HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched); 
    771             if (phrase_query_p_term_x_variants.contains(word_match.word)) { 
    772             partial_phrase_match.num_words_matched++; 
    773  
    774             // Has a complete phrase match occurred? 
    775             if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) { 
    776                 // Check for overlaps by looking at the previous highlight range 
    777                 if (!highlight_end_positions.isEmpty()) { 
    778                 int last_highlight_index = highlight_end_positions.size() - 1; 
    779                 int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue(); 
    780                 if (last_highlight_end > partial_phrase_match.start_position) { 
    781                     // There is an overlap, so remove the previous phrase match 
    782                     int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue(); 
    783                     highlight_end_positions.remove(last_highlight_index); 
    784                     partial_phrase_match.start_position = last_highlight_start; 
    785                 } 
    786                 } 
    787  
    788                 highlight_start_positions.add(new Integer(partial_phrase_match.start_position)); 
    789                 highlight_end_positions.add(new Integer(word_match.end_position)); 
    790             } 
    791             // No, but add the partial match back into the list for next time 
    792             else { 
    793                 partial_phrase_matches.add(partial_phrase_match); 
    794             } 
    795             } 
    796         } 
    797         } 
    798         else { 
    799         partial_phrase_matches.clear(); 
    800         } 
    801  
    802         // See if this word is at the start of any of the phrases 
    803         for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) { 
    804         ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p); 
    805         HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0); 
    806         if (phrase_query_p_term_1_variants.contains(word_match.word)) { 
    807             // If this phrase is just one word long, we have a complete match 
    808             if (phrase_query_p_term_variants_list.size() == 1) { 
    809             highlight_start_positions.add(new Integer(word_match.start_position)); 
    810             highlight_end_positions.add(new Integer(word_match.end_position)); 
    811             } 
    812             // Otherwise we have the start of a potential phrase match 
    813             else { 
    814             partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p)); 
    815             } 
    816         } 
    817         } 
    818     } 
    819  
    820     // Now add the annotation tags into the document at the correct points 
    821     Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM); 
    822  
    823     int last_wrote = 0; 
    824     for (int i = 0; i < highlight_start_positions.size(); i++) { 
    825         int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue(); 
    826         int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue(); 
    827  
    828         // Print anything before the highlight range 
    829         if (last_wrote < highlight_start) { 
    830         String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote)); 
    831         content_element.appendChild(this.doc.createTextNode(preceding_text)); 
    832         } 
    833  
    834         // Print the highlight text, annotated 
    835         if (highlight_end > last_wrote) { 
    836         String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start)); 
    837         Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text); 
    838         annotation_element.setAttribute("type", "query_term"); 
    839         content_element.appendChild(annotation_element); 
    840         last_wrote = highlight_end; 
    841         } 
    842     } 
    843  
    844     // Finish off any unwritten text 
    845     if (last_wrote < content_characters.length) { 
    846         String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote)); 
    847         content_element.appendChild(this.doc.createTextNode(remaining_text)); 
    848     } 
    849  
    850     return content_element; 
    851     } 
    852  
    853  
    854     static private class WordMatch 
    855     { 
    856     public String word; 
    857     public int start_position; 
    858     public int end_position; 
    859     public boolean preceding_word_matched; 
    860  
    861     public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched) 
    862     { 
    863         this.word = word; 
    864         this.start_position = start_position; 
    865         this.end_position = end_position; 
    866         this.preceding_word_matched = preceding_word_matched; 
    867     } 
    868     } 
    869  
    870  
    871     static private class PartialPhraseMatch 
    872     { 
    873     public int start_position; 
    874     public int query_phrase_number; 
    875     public int num_words_matched; 
    876  
    877     public PartialPhraseMatch(int start_position, int query_phrase_number) 
    878     { 
    879         this.start_position = start_position; 
    880         this.query_phrase_number = query_phrase_number; 
    881         this.num_words_matched = 1; 
    882     } 
    883     } 
    884940}