Changeset 32655


Ignore:
Timestamp:
2018-12-04T13:51:26+13:00 (5 years ago)
Author:
kjdon
Message:

add collection attribute to snippet elements. group names come in like group.grp1.grp2 - remove group. prefix, and convert . to / to match up with internal group paths.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/CrossCollectionSearch.java

    r32453 r32655  
    1 /*
    2  *    CrossCollectionSearch.java
    3  *    Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
    4  *
    5  *    This program is free software; you can redistribute it and/or modify
    6  *    it under the terms of the GNU General Public License as published by
    7  *    the Free Software Foundation; either version 2 of the License, or
    8  *    (at your option) any later version.
    9  *
    10  *    This program is distributed in the hope that it will be useful,
    11  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  *    GNU General Public License for more details.
    14  *
    15  *    You should have received a copy of the GNU General Public License
    16  *    along with this program; if not, write to the Free Software
    17  *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
    18  */
    19 package org.greenstone.gsdl3.service;
    20 
    21 import java.util.ArrayList;
    22 import java.util.HashMap;
    23 import java.util.Iterator;
    24 import java.util.Map;
    25 import java.util.Set;
    26 
    27 import org.apache.log4j.Logger;
    28 import org.greenstone.gsdl3.util.GSPath;
    29 import org.greenstone.gsdl3.util.GSXML;
    30 import org.greenstone.gsdl3.util.UserContext;
    31 import org.greenstone.gsdl3.util.XMLConverter;
    32 import org.w3c.dom.Document;
    33 import org.w3c.dom.Element;
    34 import org.w3c.dom.Node;
    35 import org.w3c.dom.NodeList;
    36 
    37 /**
    38  * This ServiceRack gets specified in siteConfig.xml. So it is loaded by the MessaegRouter, and two services get activated: TextQuery, DocumentMetadataRetrieve.
    39 These are located at MR level, not inside a collection. QueryAction will send messages to "TextQuery", rather than eg "mgppdemo/TextQuery".
    40 These two services will requery the MR for search results/document metadata based on collections or documents listed.
    41  */
    42 
    43 public class CrossCollectionSearch extends ServiceRack
    44 {
    45 
    46     static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.CrossCollectionSearch.class.getName());
    47     protected static final String QUERY_PARAM = "query";
    48     protected static final String COLLECTION_PARAM = "collection";
    49     protected static final String GROUP_PARAM = "group";
    50   protected static final String MAXDOCS_PARAM = "maxDocs"; // matches standard maxDocs, but in this case, means max docs per collection
    51   protected static final String HITS_PER_PAGE_PARAM = "hitsPerPage";
    52   protected static final String MAXDOCS_DEFAULT = "20";
    53     // the services on offer - these proxy the actual collection ones
    54     protected static final String TEXT_QUERY_SERVICE = "TextQuery";
    55     protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
    56 
    57     protected String[] coll_ids_list = null;
    58     protected String[] coll_ids_list_no_all = null;
    59     // maps lang to coll names list
    60     protected HashMap<String, String[]> coll_names_map = null;
    61 
    62     //protected String[] coll_names_list = null;
    63 
    64     /** constructor */
    65     public CrossCollectionSearch()
    66     {
    67     }
    68 
    69     public boolean configure(Element info, Element extra_info)
    70     {
    71         // any parameters? colls to include??
    72         logger.info("Configuring CrossCollectionSearch...");
    73         // query service
    74         Element ccs_service = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
    75         ccs_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
    76         ccs_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
    77         this.short_service_info.appendChild(ccs_service);
    78 
    79         // metadata service
    80         Element dmr_service = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
    81         dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
    82         dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
    83         this.short_service_info.appendChild(dmr_service);
    84 
    85 
    86         // what params do we want saved in the session?
    87         this.save_params.add(QUERY_PARAM);
    88         this.save_params.add(COLLECTION_PARAM);
    89         this.save_params.add(GROUP_PARAM);
    90         this.save_params.add(MAXDOCS_PARAM);
    91         this.save_params.add(HITS_PER_PAGE_PARAM);
    92        
    93         // get any format info
    94         Element format_info = (Element) GSXML.getChildByTagName(info, GSXML.FORMAT_ELEM);
    95         if (format_info != null)
    96         {
    97             this.format_info_map.put(TEXT_QUERY_SERVICE, this.desc_doc.importNode(format_info, true));
    98         }
    99         else
    100         {
    101             // add in a default format statement
    102           //"xmlns:gsf='" + GSXML.GSF_NAMESPACE + "' xmlns:xsl='" + GSXML.XSL_NAMESPACE + "
    103           String format_string = "<format "+GSXML.STD_NAMESPACES_ATTS + "><gsf:template match='documentNode'><td><a><xsl:attribute name='href'>?a=d&amp;c=<xsl:value-of select='@collection'/>&amp;d=<xsl:value-of select='@nodeID'/><xsl:if test=\"@nodeType='leaf'\">&amp;sib=1</xsl:if>&amp;dt=<xsl:value-of select='@docType'/>&amp;p.a=q&amp;p.s=" + TEXT_QUERY_SERVICE + "&amp;p.c=";
    104             if (this.cluster_name != null)
    105             {
    106                 format_string += this.cluster_name;
    107             }
    108             format_string += "</xsl:attribute><gsf:icon/></a></td><td><gsf:metadata name='Title'/> (<xsl:value-of select='@collection'/>) </td></gsf:template></format>";
    109             this.format_info_map.put(TEXT_QUERY_SERVICE, this.desc_doc.importNode(this.converter.getDOM(format_string).getDocumentElement(), true));
    110         }
    111         return true;
    112     }
    113 
    114   protected Element getServiceDescription(Document doc, String service, String lang, String subset)
    115     {
    116         if (service.equals(TEXT_QUERY_SERVICE))
    117         {
    118 
    119             Element ccs_service = doc.createElement(GSXML.SERVICE_ELEM);
    120             ccs_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
    121             ccs_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
    122 
    123             // display info
    124             if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM + GSXML.LIST_MODIFIER))
    125             {
    126                 ccs_service.appendChild(GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_NAME, getTextString(TEXT_QUERY_SERVICE + ".name", lang)));
    127                 ccs_service.appendChild(GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(TEXT_QUERY_SERVICE + ".submit", lang)));
    128                 ccs_service.appendChild(GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(TEXT_QUERY_SERVICE + ".description", lang)));
    129             }
    130             // param info
    131             if (subset == null || subset.equals(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER))
    132             {
    133                 Element param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
    134                 // collection list
    135                 if (coll_ids_list == null)
    136                 {
    137                     initCollectionList(lang);
    138                 }
    139                 if (!coll_names_map.containsKey(lang))
    140                 {
    141                     addCollectionNames(lang);
    142                 }
    143                 Element param = GSXML.createParameterDescription(doc, COLLECTION_PARAM, getTextString("param." + COLLECTION_PARAM, lang), GSXML.PARAM_TYPE_ENUM_MULTI, "all", coll_ids_list, coll_names_map.get(lang));
    144                 param_list.appendChild(param);
    145                 // max docs param
    146                 param = GSXML.createParameterDescription(doc, MAXDOCS_PARAM, getTextString("param." + MAXDOCS_PARAM, lang), GSXML.PARAM_TYPE_INTEGER, MAXDOCS_DEFAULT, null, null);
    147                 param_list.appendChild(param);
    148                 // query param
    149                 param = GSXML.createParameterDescription(doc, QUERY_PARAM, getTextString("param." + QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null);
    150                 param_list.appendChild(param);
    151                 ccs_service.appendChild(param_list);
    152             }
    153 
    154             logger.debug("service description=" + this.converter.getPrettyString(ccs_service));
    155             return ccs_service;
    156         }
    157         // these ones are probably never called, but put them here just in case
    158         Element service_elem = doc.createElement(GSXML.SERVICE_ELEM);
    159         service_elem.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
    160         service_elem.setAttribute(GSXML.NAME_ATT, service);
    161         return service_elem;
    162 
    163     }
    164 
    165     protected Element processTextQuery(Element request)
    166     {
    167         // Create a new (empty) result message
    168       Document result_doc = XMLConverter.newDOM();
    169         Element result = result_doc.createElement(GSXML.RESPONSE_ELEM);
    170         result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
    171         result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
    172 
    173         UserContext userContext = new UserContext(request);
    174 
    175         // Get the parameters of the request
    176         Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
    177         if (param_list == null)
    178         {
    179             logger.error("TextQuery request had no paramList.");
    180             return result; // Return the empty result
    181         }
    182         // get the collection list
    183         String[] colls_list = coll_ids_list_no_all;
    184         Element coll_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, COLLECTION_PARAM);
    185         if (coll_param != null)
    186         {
    187             String coll_list = GSXML.getValue(coll_param);
    188             if (!coll_list.equals("all") && !coll_list.equals(""))
    189             {
    190                 colls_list = coll_list.split(",");
    191             }
    192         }
    193            
    194         colls_list = mergeGroups(userContext, param_list, colls_list);
    195        
    196         String maxdocs = MAXDOCS_DEFAULT;
    197         Element maxdocs_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, MAXDOCS_PARAM);
    198         if (maxdocs_param != null) {
    199           maxdocs = GSXML.getValue(maxdocs_param);
    200         }
    201        
    202         Document msg_doc = XMLConverter.newDOM();
    203         Element query_message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
    204         // we are sending the same request to each collection - build up the to
    205         // attribute for the request
    206         StringBuffer to_att = new StringBuffer();
    207         for (int i = 0; i < colls_list.length; i++)
    208         {
    209             if (i > 0)
    210             {
    211                 to_att.append(",");
    212             }
    213             to_att.append(GSPath.appendLink(colls_list[i], "TextQuery"));
    214 
    215         }
    216         // send the query to all colls
    217         Element query_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_PROCESS, to_att.toString(), userContext);
    218         query_message.appendChild(query_request);
    219         // should we add params individually?
    220         Element new_param_list = msg_doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
    221         query_request.appendChild(new_param_list);
    222         new_param_list.appendChild(msg_doc.importNode(GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, QUERY_PARAM), true));
    223 
    224         // for cross coll search, we only want maxdocs from each collection
    225         // some colls use maxdocs, some use hits per page so lets send both
    226         new_param_list.appendChild(GSXML.createParameter(msg_doc, MAXDOCS_PARAM, maxdocs));
    227         new_param_list.appendChild(GSXML.createParameter(msg_doc, HITS_PER_PAGE_PARAM, maxdocs));
    228         Element query_result = (Element) this.router.process(query_message);
    229         // create the doc list for the response
    230         Element doc_node_list = result_doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
    231         result.appendChild(doc_node_list);
    232         Element result_snippet_list = result_doc.createElement(GSXML.HL_SNIPPET_ELEM + GSXML.LIST_MODIFIER);
    233         result.appendChild(result_snippet_list);
    234         NodeList hl_snippet_list = query_result.getElementsByTagName(GSXML.HL_SNIPPET_ELEM);
    235         if (hl_snippet_list != null){
    236             for (int hls = 0; hls < hl_snippet_list.getLength(); hls++){
    237                 result_snippet_list.appendChild(result_doc.importNode(hl_snippet_list.item(hls), true));
    238             }
    239         }
    240 
    241         NodeList responses = query_result.getElementsByTagName(GSXML.RESPONSE_ELEM);
    242         int num_docs = 0;
    243         for (int k = 0; k < responses.getLength(); k++)
    244         {
    245             String coll_name = GSPath.removeLastLink(((Element) responses.item(k)).getAttribute(GSXML.FROM_ATT));
    246             NodeList nodes = ((Element) responses.item(k)).getElementsByTagName(GSXML.DOC_NODE_ELEM);
    247             if (nodes == null || nodes.getLength() == 0)
    248                 continue;
    249             num_docs += nodes.getLength();
    250             Element last_node = null;
    251             Element this_node = null;
    252             for (int n = 0; n < nodes.getLength(); n++)
    253             {
    254                 this_node = (Element) nodes.item(n);
    255                 this_node.setAttribute("collection", coll_name);
    256    
    257                 if (k == 0)
    258                 {
    259 
    260                     doc_node_list.appendChild(result_doc.importNode(this_node, true));
    261                 }
     1
    2622                else
    2633                {
     
    27010
    27111            }
     12            NodeList hl_snippet_list = ((Element) responses.item(k)).getElementsByTagName(GSXML.HL_SNIPPET_ELEM);
     13            if (hl_snippet_list != null) {
     14              for (int hls = 0; hls < hl_snippet_list.getLength(); hls++) {
     15                Element this_hls = (Element) hl_snippet_list.item(hls);
     16                this_hls.setAttribute("collection", coll_name);
     17                result_snippet_list.appendChild(result_doc.importNode(this_hls, true));
     18              }
     19            }
     20           
    27221        }
    27322        // just send back num docs returned. Too hard to work out number of matches etc as each index type
     
    29746        Element coll_list_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext); // uid
    29847        coll_list_message.appendChild(coll_list_request);
    299         logger.debug("coll list request = " + this.converter.getPrettyString(coll_list_request));
     48        //logger.debug("coll list request = " + this.converter.getPrettyString(coll_list_request));
    30049        Element coll_list_response = (Element) this.router.process(coll_list_message);
    30150        if (coll_list_response == null)
     
    30453            return false;
    30554        }
    306         logger.debug("coll list response = " + this.converter.getPrettyString(coll_list_response));
     55        //logger.debug("coll list response = " + this.converter.getPrettyString(coll_list_response));
    30756        // second, get some info from each collection. we want the coll name
    30857        // and whether its got a text query service
     
    32776        Element metadata_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_DESCRIBE, colls_sb.toString(), userContext);
    32877        metadata_message.appendChild(metadata_request);
    329         logger.debug("metadata request = " + this.converter.getPrettyString(metadata_message));
     78        //logger.debug("metadata request = " + this.converter.getPrettyString(metadata_message));
    33079        Element metadata_response = (Element) this.router.process(metadata_message);
    331         logger.debug("metadata response = " + this.converter.getPrettyString(metadata_response));
     80        //logger.debug("metadata response = " + this.converter.getPrettyString(metadata_response));
    33281        NodeList coll_responses = metadata_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
    33382        ArrayList<String> valid_colls = new ArrayList<String>();
     
    400149        metadata_request.appendChild(param_list);
    401150        metadata_message.appendChild(metadata_request);
    402         logger.debug("coll names metadata request = " + this.converter.getPrettyString(metadata_message));
     151        //logger.debug("coll names metadata request = " + this.converter.getPrettyString(metadata_message));
    403152        Element metadata_response = (Element) this.router.process(metadata_message);
    404         logger.debug("coll names metadata response = " + this.converter.getPrettyString(metadata_response));
     153        //logger.debug("coll names metadata response = " + this.converter.getPrettyString(metadata_response));
    405154        NodeList coll_responses = metadata_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
    406155        for (int i = 0; i < coll_responses.getLength(); i++)
     
    571320            groupParamList.appendChild(groupParam);
    572321        }
    573         String prefix = GSXML.GROUP_ELEM + "/";
     322        String prefix = GSXML.GROUP_ELEM + ".";
    574323        for (int i = 0; i < collArray.length; i++) {
    575324            String collectionParam = collArray[i];
    576325            if (collectionParam.startsWith(prefix)){
    577326                String value = groupParam.getAttribute(GSXML.VALUE_ATT);
    578                 value += "," + collectionParam.substring(prefix.length() - 1 );
     327                String group = collectionParam.substring(prefix.length() - 1 );
     328                group=group.replace('.', '/'); // we use . instead of / in args
     329                value += "," + group;
    579330                groupParam.setAttribute(GSXML.VALUE_ATT, value);
    580331            }
Note: See TracChangeset for help on using the changeset viewer.