Changeset 32655

Show
Ignore:
Timestamp:
04.12.2018 13:51:26 (10 days ago)
Author:
kjdon
Message:

add collection attribute to snippet elements. group names come in like group.grp1.grp2 - remove group. prefix, and convert . to / to match up with internal group paths.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/CrossCollectionSearch.java

    r32453 r32655  
    1 /* 
    2  *    CrossCollectionSearch.java 
    3  *    Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org 
    4  * 
    5  *    This program is free software; you can redistribute it and/or modify 
    6  *    it under the terms of the GNU General Public License as published by 
    7  *    the Free Software Foundation; either version 2 of the License, or 
    8  *    (at your option) any later version. 
    9  * 
    10  *    This program is distributed in the hope that it will be useful, 
    11  *    but WITHOUT ANY WARRANTY; without even the implied warranty of 
    12  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
    13  *    GNU General Public License for more details. 
    14  * 
    15  *    You should have received a copy of the GNU General Public License 
    16  *    along with this program; if not, write to the Free Software 
    17  *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
    18  */ 
    19 package org.greenstone.gsdl3.service; 
    20  
    21 import java.util.ArrayList; 
    22 import java.util.HashMap; 
    23 import java.util.Iterator; 
    24 import java.util.Map; 
    25 import java.util.Set; 
    26  
    27 import org.apache.log4j.Logger; 
    28 import org.greenstone.gsdl3.util.GSPath; 
    29 import org.greenstone.gsdl3.util.GSXML; 
    30 import org.greenstone.gsdl3.util.UserContext; 
    31 import org.greenstone.gsdl3.util.XMLConverter; 
    32 import org.w3c.dom.Document; 
    33 import org.w3c.dom.Element; 
    34 import org.w3c.dom.Node; 
    35 import org.w3c.dom.NodeList; 
    36  
    37 /** 
    38  * This ServiceRack gets specified in siteConfig.xml. So it is loaded by the MessaegRouter, and two services get activated: TextQuery, DocumentMetadataRetrieve. 
    39 These are located at MR level, not inside a collection. QueryAction will send messages to "TextQuery", rather than eg "mgppdemo/TextQuery". 
    40 These two services will requery the MR for search results/document metadata based on collections or documents listed. 
    41  */ 
    42  
    43 public class CrossCollectionSearch extends ServiceRack 
    44 { 
    45  
    46     static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.CrossCollectionSearch.class.getName()); 
    47     protected static final String QUERY_PARAM = "query"; 
    48     protected static final String COLLECTION_PARAM = "collection"; 
    49     protected static final String GROUP_PARAM = "group"; 
    50   protected static final String MAXDOCS_PARAM = "maxDocs"; // matches standard maxDocs, but in this case, means max docs per collection 
    51   protected static final String HITS_PER_PAGE_PARAM = "hitsPerPage"; 
    52   protected static final String MAXDOCS_DEFAULT = "20"; 
    53     // the services on offer - these proxy the actual collection ones 
    54     protected static final String TEXT_QUERY_SERVICE = "TextQuery"; 
    55     protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve"; 
    56  
    57     protected String[] coll_ids_list = null; 
    58     protected String[] coll_ids_list_no_all = null; 
    59     // maps lang to coll names list 
    60     protected HashMap<String, String[]> coll_names_map = null; 
    61  
    62     //protected String[] coll_names_list = null; 
    63  
    64     /** constructor */ 
    65     public CrossCollectionSearch() 
    66     { 
    67     } 
    68  
    69     public boolean configure(Element info, Element extra_info) 
    70     { 
    71         // any parameters? colls to include?? 
    72         logger.info("Configuring CrossCollectionSearch..."); 
    73         // query service 
    74         Element ccs_service = this.desc_doc.createElement(GSXML.SERVICE_ELEM); 
    75         ccs_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); 
    76         ccs_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE); 
    77         this.short_service_info.appendChild(ccs_service); 
    78  
    79         // metadata service 
    80         Element dmr_service = this.desc_doc.createElement(GSXML.SERVICE_ELEM); 
    81         dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); 
    82         dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE); 
    83         this.short_service_info.appendChild(dmr_service); 
    84  
    85  
    86         // what params do we want saved in the session? 
    87         this.save_params.add(QUERY_PARAM); 
    88         this.save_params.add(COLLECTION_PARAM); 
    89         this.save_params.add(GROUP_PARAM); 
    90         this.save_params.add(MAXDOCS_PARAM); 
    91         this.save_params.add(HITS_PER_PAGE_PARAM); 
    92          
    93         // get any format info 
    94         Element format_info = (Element) GSXML.getChildByTagName(info, GSXML.FORMAT_ELEM); 
    95         if (format_info != null) 
    96         { 
    97             this.format_info_map.put(TEXT_QUERY_SERVICE, this.desc_doc.importNode(format_info, true)); 
    98         } 
    99         else 
    100         { 
    101             // add in a default format statement 
    102           //"xmlns:gsf='" + GSXML.GSF_NAMESPACE + "' xmlns:xsl='" + GSXML.XSL_NAMESPACE + " 
    103           String format_string = "<format "+GSXML.STD_NAMESPACES_ATTS + "><gsf:template match='documentNode'><td><a><xsl:attribute name='href'>?a=d&amp;c=<xsl:value-of select='@collection'/>&amp;d=<xsl:value-of select='@nodeID'/><xsl:if test=\"@nodeType='leaf'\">&amp;sib=1</xsl:if>&amp;dt=<xsl:value-of select='@docType'/>&amp;p.a=q&amp;p.s=" + TEXT_QUERY_SERVICE + "&amp;p.c="; 
    104             if (this.cluster_name != null) 
    105             { 
    106                 format_string += this.cluster_name; 
    107             } 
    108             format_string += "</xsl:attribute><gsf:icon/></a></td><td><gsf:metadata name='Title'/> (<xsl:value-of select='@collection'/>) </td></gsf:template></format>"; 
    109             this.format_info_map.put(TEXT_QUERY_SERVICE, this.desc_doc.importNode(this.converter.getDOM(format_string).getDocumentElement(), true)); 
    110         } 
    111         return true; 
    112     } 
    113  
    114   protected Element getServiceDescription(Document doc, String service, String lang, String subset) 
    115     { 
    116         if (service.equals(TEXT_QUERY_SERVICE)) 
    117         { 
    118  
    119             Element ccs_service = doc.createElement(GSXML.SERVICE_ELEM); 
    120             ccs_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); 
    121             ccs_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE); 
    122  
    123             // display info 
    124             if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM + GSXML.LIST_MODIFIER)) 
    125             { 
    126                 ccs_service.appendChild(GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_NAME, getTextString(TEXT_QUERY_SERVICE + ".name", lang))); 
    127                 ccs_service.appendChild(GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(TEXT_QUERY_SERVICE + ".submit", lang))); 
    128                 ccs_service.appendChild(GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(TEXT_QUERY_SERVICE + ".description", lang))); 
    129             } 
    130             // param info 
    131             if (subset == null || subset.equals(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER)) 
    132             { 
    133                 Element param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER); 
    134                 // collection list 
    135                 if (coll_ids_list == null) 
    136                 { 
    137                     initCollectionList(lang); 
    138                 } 
    139                 if (!coll_names_map.containsKey(lang)) 
    140                 { 
    141                     addCollectionNames(lang); 
    142                 } 
    143                 Element param = GSXML.createParameterDescription(doc, COLLECTION_PARAM, getTextString("param." + COLLECTION_PARAM, lang), GSXML.PARAM_TYPE_ENUM_MULTI, "all", coll_ids_list, coll_names_map.get(lang)); 
    144                 param_list.appendChild(param); 
    145                 // max docs param 
    146                 param = GSXML.createParameterDescription(doc, MAXDOCS_PARAM, getTextString("param." + MAXDOCS_PARAM, lang), GSXML.PARAM_TYPE_INTEGER, MAXDOCS_DEFAULT, null, null); 
    147                 param_list.appendChild(param); 
    148                 // query param 
    149                 param = GSXML.createParameterDescription(doc, QUERY_PARAM, getTextString("param." + QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null); 
    150                 param_list.appendChild(param); 
    151                 ccs_service.appendChild(param_list); 
    152             } 
    153  
    154             logger.debug("service description=" + this.converter.getPrettyString(ccs_service)); 
    155             return ccs_service; 
    156         } 
    157         // these ones are probably never called, but put them here just in case 
    158         Element service_elem = doc.createElement(GSXML.SERVICE_ELEM); 
    159         service_elem.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); 
    160         service_elem.setAttribute(GSXML.NAME_ATT, service); 
    161         return service_elem; 
    162  
    163     } 
    164  
    165     protected Element processTextQuery(Element request) 
    166     { 
    167         // Create a new (empty) result message 
    168       Document result_doc = XMLConverter.newDOM(); 
    169         Element result = result_doc.createElement(GSXML.RESPONSE_ELEM); 
    170         result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE); 
    171         result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 
    172  
    173         UserContext userContext = new UserContext(request); 
    174  
    175         // Get the parameters of the request 
    176         Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER); 
    177         if (param_list == null) 
    178         { 
    179             logger.error("TextQuery request had no paramList."); 
    180             return result; // Return the empty result 
    181         } 
    182         // get the collection list 
    183         String[] colls_list = coll_ids_list_no_all; 
    184         Element coll_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, COLLECTION_PARAM); 
    185         if (coll_param != null) 
    186         { 
    187             String coll_list = GSXML.getValue(coll_param); 
    188             if (!coll_list.equals("all") && !coll_list.equals("")) 
    189             { 
    190                 colls_list = coll_list.split(","); 
    191             } 
    192         } 
    193              
    194         colls_list = mergeGroups(userContext, param_list, colls_list); 
    195          
    196         String maxdocs = MAXDOCS_DEFAULT; 
    197         Element maxdocs_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, MAXDOCS_PARAM); 
    198         if (maxdocs_param != null) { 
    199           maxdocs = GSXML.getValue(maxdocs_param); 
    200         } 
    201          
    202         Document msg_doc = XMLConverter.newDOM(); 
    203         Element query_message = msg_doc.createElement(GSXML.MESSAGE_ELEM); 
    204         // we are sending the same request to each collection - build up the to 
    205         // attribute for the request 
    206         StringBuffer to_att = new StringBuffer(); 
    207         for (int i = 0; i < colls_list.length; i++) 
    208         { 
    209             if (i > 0) 
    210             { 
    211                 to_att.append(","); 
    212             } 
    213             to_att.append(GSPath.appendLink(colls_list[i], "TextQuery")); 
    214  
    215         } 
    216         // send the query to all colls 
    217         Element query_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_PROCESS, to_att.toString(), userContext); 
    218         query_message.appendChild(query_request); 
    219         // should we add params individually? 
    220         Element new_param_list = msg_doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER); 
    221         query_request.appendChild(new_param_list); 
    222         new_param_list.appendChild(msg_doc.importNode(GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, QUERY_PARAM), true)); 
    223  
    224         // for cross coll search, we only want maxdocs from each collection 
    225         // some colls use maxdocs, some use hits per page so lets send both 
    226         new_param_list.appendChild(GSXML.createParameter(msg_doc, MAXDOCS_PARAM, maxdocs)); 
    227         new_param_list.appendChild(GSXML.createParameter(msg_doc, HITS_PER_PAGE_PARAM, maxdocs)); 
    228         Element query_result = (Element) this.router.process(query_message); 
    229         // create the doc list for the response 
    230         Element doc_node_list = result_doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER); 
    231         result.appendChild(doc_node_list); 
    232         Element result_snippet_list = result_doc.createElement(GSXML.HL_SNIPPET_ELEM + GSXML.LIST_MODIFIER); 
    233         result.appendChild(result_snippet_list); 
    234         NodeList hl_snippet_list = query_result.getElementsByTagName(GSXML.HL_SNIPPET_ELEM); 
    235         if (hl_snippet_list != null){ 
    236             for (int hls = 0; hls < hl_snippet_list.getLength(); hls++){ 
    237                 result_snippet_list.appendChild(result_doc.importNode(hl_snippet_list.item(hls), true)); 
    238             } 
    239         } 
    240  
    241         NodeList responses = query_result.getElementsByTagName(GSXML.RESPONSE_ELEM); 
    242         int num_docs = 0; 
    243         for (int k = 0; k < responses.getLength(); k++) 
    244         { 
    245             String coll_name = GSPath.removeLastLink(((Element) responses.item(k)).getAttribute(GSXML.FROM_ATT)); 
    246             NodeList nodes = ((Element) responses.item(k)).getElementsByTagName(GSXML.DOC_NODE_ELEM); 
    247             if (nodes == null || nodes.getLength() == 0) 
    248                 continue; 
    249             num_docs += nodes.getLength(); 
    250             Element last_node = null; 
    251             Element this_node = null; 
    252             for (int n = 0; n < nodes.getLength(); n++) 
    253             { 
    254                 this_node = (Element) nodes.item(n); 
    255                 this_node.setAttribute("collection", coll_name); 
    256      
    257                 if (k == 0) 
    258                 { 
    259  
    260                     doc_node_list.appendChild(result_doc.importNode(this_node, true)); 
    261                 } 
     1 
    2622                else 
    2633                { 
     
    27010 
    27111            } 
     12            NodeList hl_snippet_list = ((Element) responses.item(k)).getElementsByTagName(GSXML.HL_SNIPPET_ELEM); 
     13            if (hl_snippet_list != null) { 
     14              for (int hls = 0; hls < hl_snippet_list.getLength(); hls++) { 
     15                Element this_hls = (Element) hl_snippet_list.item(hls); 
     16                this_hls.setAttribute("collection", coll_name); 
     17                result_snippet_list.appendChild(result_doc.importNode(this_hls, true)); 
     18              } 
     19            } 
     20             
    27221        } 
    27322        // just send back num docs returned. Too hard to work out number of matches etc as each index type 
     
    29746        Element coll_list_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext); // uid 
    29847        coll_list_message.appendChild(coll_list_request); 
    299         logger.debug("coll list request = " + this.converter.getPrettyString(coll_list_request)); 
     48        //logger.debug("coll list request = " + this.converter.getPrettyString(coll_list_request)); 
    30049        Element coll_list_response = (Element) this.router.process(coll_list_message); 
    30150        if (coll_list_response == null) 
     
    30453            return false; 
    30554        } 
    306         logger.debug("coll list response = " + this.converter.getPrettyString(coll_list_response)); 
     55        //logger.debug("coll list response = " + this.converter.getPrettyString(coll_list_response)); 
    30756        // second, get some info from each collection. we want the coll name  
    30857        // and whether its got a text query service  
     
    32776        Element metadata_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_DESCRIBE, colls_sb.toString(), userContext); 
    32877        metadata_message.appendChild(metadata_request); 
    329         logger.debug("metadata request = " + this.converter.getPrettyString(metadata_message)); 
     78        //logger.debug("metadata request = " + this.converter.getPrettyString(metadata_message)); 
    33079        Element metadata_response = (Element) this.router.process(metadata_message); 
    331         logger.debug("metadata response = " + this.converter.getPrettyString(metadata_response)); 
     80        //logger.debug("metadata response = " + this.converter.getPrettyString(metadata_response)); 
    33281        NodeList coll_responses = metadata_response.getElementsByTagName(GSXML.RESPONSE_ELEM); 
    33382        ArrayList<String> valid_colls = new ArrayList<String>(); 
     
    400149        metadata_request.appendChild(param_list); 
    401150        metadata_message.appendChild(metadata_request); 
    402         logger.debug("coll names metadata request = " + this.converter.getPrettyString(metadata_message)); 
     151        //logger.debug("coll names metadata request = " + this.converter.getPrettyString(metadata_message)); 
    403152        Element metadata_response = (Element) this.router.process(metadata_message); 
    404         logger.debug("coll names metadata response = " + this.converter.getPrettyString(metadata_response)); 
     153        //logger.debug("coll names metadata response = " + this.converter.getPrettyString(metadata_response)); 
    405154        NodeList coll_responses = metadata_response.getElementsByTagName(GSXML.RESPONSE_ELEM); 
    406155        for (int i = 0; i < coll_responses.getLength(); i++) 
     
    571320            groupParamList.appendChild(groupParam); 
    572321        } 
    573         String prefix = GSXML.GROUP_ELEM + "/"; 
     322        String prefix = GSXML.GROUP_ELEM + "."; 
    574323        for (int i = 0; i < collArray.length; i++) { 
    575324            String collectionParam = collArray[i]; 
    576325            if (collectionParam.startsWith(prefix)){ 
    577326                String value = groupParam.getAttribute(GSXML.VALUE_ATT); 
    578                 value += "," + collectionParam.substring(prefix.length() - 1 ); 
     327                String group = collectionParam.substring(prefix.length() - 1 ); 
     328                group=group.replace('.', '/'); // we use . instead of / in args 
     329                value += "," + group; 
    579330                groupParam.setAttribute(GSXML.VALUE_ATT, value); 
    580331            }