[15222] | 1 | /**
|
---|
| 2 | *#########################################################################
|
---|
| 3 | * GSearchConnection.java - works with the demo-client for Greenstone 3,
|
---|
| 4 | * of the Greenstone digital library suite from the New Zealand Digital
|
---|
| 5 | * Library Project at the * University of Waikato, New Zealand.
|
---|
| 6 | * <BR><BR>
|
---|
| 7 | * Copyright (C) 2008 New Zealand Digital Library Project
|
---|
| 8 | * <BR><BR>
|
---|
| 9 | * This program is free software; you can redistribute it and/or modify
|
---|
| 10 | * it under the terms of the GNU General Public License as published by
|
---|
| 11 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 12 | * (at your option) any later version.
|
---|
| 13 | * <BR><BR>
|
---|
| 14 | * This program is distributed in the hope that it will be useful,
|
---|
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 17 | * GNU General Public License for more details.
|
---|
| 18 | *########################################################################
|
---|
| 19 | */
|
---|
| 20 |
|
---|
| 21 | package org.greenstone.fedora.services;
|
---|
| 22 |
|
---|
| 23 | import java.util.Vector;
|
---|
| 24 | import java.util.Iterator;
|
---|
| 25 | import java.util.Map;
|
---|
| 26 | import java.util.HashMap;
|
---|
| 27 |
|
---|
| 28 | import java.net.URL;
|
---|
| 29 | import javax.xml.namespace.QName;
|
---|
| 30 | import javax.xml.parsers.DocumentBuilder;
|
---|
| 31 | import javax.xml.parsers.DocumentBuilderFactory;
|
---|
| 32 | import javax.xml.rpc.ServiceException;
|
---|
| 33 | import java.net.MalformedURLException;
|
---|
| 34 |
|
---|
| 35 | import org.apache.axis.client.Call;
|
---|
| 36 | import org.apache.axis.client.Service;
|
---|
| 37 | import org.apache.log4j.Logger;
|
---|
| 38 |
|
---|
| 39 | import javax.xml.parsers.ParserConfigurationException;
|
---|
| 40 | import org.w3c.dom.Element;
|
---|
| 41 | import org.w3c.dom.NodeList;
|
---|
| 42 |
|
---|
| 43 |
|
---|
| 44 | /**
|
---|
| 45 | * Class GSearchConnection connects to FedoraGSearch's web services.
|
---|
| 46 | * FedorGSearch offers indexing and full-text search functionality for
|
---|
| 47 | * Fedora repositories. Its search web service (method gFindObjects)
|
---|
| 48 | * returns the response of a search as XML.
|
---|
| 49 | * GSearchConnection offers more convenient methods that extract just
|
---|
| 50 | * the parts of search results that FedoraGS3Connection needs and returns
|
---|
| 51 | * that.
|
---|
| 52 | * @author ak19
|
---|
| 53 | */
|
---|
| 54 | public class GSearchConnection implements FedoraToGS3Interface.Constants {
|
---|
| 55 | /** Logger for this class. */
|
---|
| 56 | private static final Logger LOG = Logger.getLogger(
|
---|
| 57 | GSearchConnection.class.getName());
|
---|
| 58 |
|
---|
| 59 | /* Accessing the web services of Fedora Generic Search */
|
---|
| 60 | protected static String NAMESPACE_URI = "http://server.fedoragsearch.defxws.dk";
|
---|
| 61 | protected static String SERVICE_NAME = "OperationsService";
|
---|
| 62 |
|
---|
| 63 | /** The names of the methods we use of Fedora Generic Search's web services
|
---|
| 64 | * are declared here as static final Strings. */
|
---|
| 65 | protected static final String G_FIND_OBJECTS = "gfindObjects";
|
---|
| 66 |
|
---|
| 67 | /* Some fixed string literals that will be encountered in the response XMLs
|
---|
| 68 | * that FedoraGSearch's method gFindObjects() returns. */
|
---|
| 69 | protected static final String PID = "PID";
|
---|
| 70 | protected static final String HIT_TOTAL = "hitTotal";
|
---|
| 71 | protected static final String OBJECT = "object";
|
---|
| 72 | protected static final String FIELD = "field";
|
---|
| 73 | protected static final String NAME = "name";
|
---|
| 74 | protected static final String DC_TITLE_FIELD = "dc.title";
|
---|
| 75 | protected static final String FULLTEXT_FIELD = "ds.fulltext";
|
---|
| 76 |
|
---|
| 77 | /** separator used internally to separate values of a search field */
|
---|
| 78 | protected static final String SPACE = " ";
|
---|
[15437] | 79 |
|
---|
| 80 | /** The name of the Index wherein FedoraGSearch has indexed all the GS3 docs.
|
---|
| 81 | * This final member is public here so that others may read the indexName
|
---|
| 82 | * that this GSearchConnection works with. */
|
---|
| 83 | public final String indexName;
|
---|
[15222] | 84 |
|
---|
| 85 | /** The Service object used to connect to the FedoraGSearch web services */
|
---|
| 86 | protected final Service service;
|
---|
| 87 | /** The Call object used to connect to the FedoraGSearch web services */
|
---|
| 88 | protected final Call call;
|
---|
| 89 | /** The portName object used when connecting to FedoraGSearch's web services */
|
---|
| 90 | protected final QName portName;
|
---|
| 91 |
|
---|
| 92 | /** A DocumentBuilder object used to construct and parse XML */
|
---|
| 93 | protected final DocumentBuilder builder;
|
---|
[15437] | 94 |
|
---|
| 95 |
|
---|
[15222] | 96 | /** Constructor that takes a String representing the url of the WSDL
|
---|
| 97 | * file for FedoraGSearch's web services, and tries to establish a
|
---|
| 98 | * connection to those web services.
|
---|
| 99 | * @param wsdlFileLocation is a String representing the url of the WSDL file
|
---|
[15437] | 100 | * @param indexName is the name of the index that Fedora Generic Search
|
---|
| 101 | * should work with (the index wherein the indexed GS3 documents have been
|
---|
| 102 | * placed).
|
---|
[15222] | 103 | */
|
---|
[15437] | 104 | public GSearchConnection(String wsdlFileLocation, String indexName)
|
---|
[15222] | 105 | throws MalformedURLException, ServiceException,
|
---|
| 106 | ParserConfigurationException
|
---|
| 107 | {
|
---|
[15437] | 108 | this.indexName = indexName;
|
---|
| 109 |
|
---|
[15222] | 110 | URL wsdlURL = new URL(wsdlFileLocation);
|
---|
| 111 | service = new Service(wsdlURL, new QName(NAMESPACE_URI, SERVICE_NAME));
|
---|
| 112 | //call = (Call) service.createCall(new QName(NAMESPACE_URI, PORT_NAME));
|
---|
| 113 |
|
---|
| 114 | Iterator i = service.getPorts();
|
---|
| 115 | // FIXME: can we just assume it's the first port of service SERVICE_NAME?
|
---|
| 116 | // Do we need to work out which port to get??? Remember, the port names
|
---|
| 117 | // vary between wsdls though!
|
---|
| 118 | if(i.hasNext()) {
|
---|
| 119 | portName = (QName)i.next();
|
---|
| 120 | call = (Call) service.createCall(portName);
|
---|
| 121 |
|
---|
| 122 | String endpointLocation = call.getTargetEndpointAddress();
|
---|
| 123 | LOG.debug("Wsdl file url: " + wsdlURL
|
---|
| 124 | + "\nEndpoint location is: " + endpointLocation);
|
---|
| 125 | } else { // should never happen: a service without a port
|
---|
| 126 | // portName = null;
|
---|
| 127 | call = (Call)service.createCall();
|
---|
| 128 | // FIXME: possibly manually get the ports and choose
|
---|
| 129 | // one containing "FEDORA" and "API-A" in its name?
|
---|
| 130 | throw new ServiceException(this.getClass() + ": No port in wsdl file");
|
---|
| 131 | }
|
---|
| 132 |
|
---|
| 133 | // we can set the portName which remains constant for the various methods
|
---|
| 134 | // call.setPortName(portName);
|
---|
| 135 |
|
---|
| 136 | DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
---|
| 137 | builder = factory.newDocumentBuilder(); // to create XML docs
|
---|
| 138 | }
|
---|
[26286] | 139 |
|
---|
[15222] | 140 | /**
|
---|
| 141 | * Method to invoke gfindObjects operation of Fedora Generic Search
|
---|
| 142 | * web services.
|
---|
| 143 | *
|
---|
| 144 | * Parameter types, parameter order and return type of gFindObjects are as
|
---|
| 145 | * obtained from the wsdl file for the Fedora Generic Search web services
|
---|
| 146 | * located at:
|
---|
| 147 | * http://localhost:8080/fedoragsearch/services/FgsOperations?wsdl
|
---|
| 148 | * <wsdl:message name="gfindObjectsRequest">
|
---|
| 149 | * <wsdl:part name="query" type="xsd:string"/>
|
---|
| 150 | * <wsdl:part name="sort" type="xsd:string"/>
|
---|
| 151 | * <wsdl:part name="hitPageStart" type="xsd:int"/>
|
---|
| 152 | * <wsdl:part name="hitPageSize" type="xsd:int"/>
|
---|
| 153 | * <wsdl:part name="snippetsMax" type="xsd:int"/>
|
---|
| 154 | * <wsdl:part name="fieldMaxLength" type="xsd:int"/>
|
---|
| 155 | * <wsdl:part name="indexName" type="xsd:string"/>
|
---|
| 156 | * <wsdl:part name="resultPageXslt" type="xsd:string"/>
|
---|
| 157 | * </wsdl:message>
|
---|
| 158 | *
|
---|
| 159 | * <wsdl:message name="gfindObjectsResponse">
|
---|
| 160 | * <wsdl:part name="gfindObjectsReturn" type="xsd:string"/>
|
---|
| 161 | * </wsdl:message>
|
---|
| 162 | *
|
---|
| 163 | * <wsdl:operation name="gfindObjects"
|
---|
| 164 | * parameterOrder="query sort hitPageStart hitPageSize snippetsMax
|
---|
| 165 | * fieldMaxLength indexName resultPageXslt">
|
---|
| 166 | *
|
---|
| 167 | * This method works: it searches the dc.title field of our FedoraIndex
|
---|
| 168 | * for the term (e.g. "interview") and the result returned is an XML String.
|
---|
| 169 | *
|
---|
| 170 | * There's no example on how to call gFindObjects with parameters. In
|
---|
| 171 | * particular, I don't know what values the parameter <b>sort</b> can take.
|
---|
| 172 | * But topazproject has an example on how to call updateIndex().
|
---|
| 173 | * @see <a href="http://www.topazproject.org/trac/wiki/FedoraSearch?format=txt">An example on how to call updateIndex() with parameters</a>
|
---|
| 174 | * @see <a href="http://ws.apache.org/axis/java/apiDocs/org/apache/axis/client/Service.html">Axis Service class</a>
|
---|
| 175 | * @see <a href="http://ws.apache.org/axis/java/apiDocs/javax/xml/rpc/Call.html">Axis RPC Call, for specification of interface Call</a>
|
---|
| 176 | * @see <a href="http://ws.apache.org/axis/java/apiDocs/org/apache/axis/client/Call.html">Axis client Call class, for implementation of interface Call</a>
|
---|
| 177 | */
|
---|
| 178 | protected String gFindObjects(String searchFieldedTerms, String sort,
|
---|
[21573] | 179 | int hitPageStart, int hitPageSize, int snippetsMax,
|
---|
| 180 | /*int fieldMaxLength,*/ String indexName, String resultPageXslt) throws Exception
|
---|
[15222] | 181 | {
|
---|
| 182 | // "Prefills as much info from the WSDL as it can. Right now it's SOAPAction,
|
---|
| 183 | // operation qname, parameter types and return type of the Web Service.
|
---|
| 184 | // This method considers that port name and target endpoint address have
|
---|
| 185 | // already been set. This is useful when you want to use the same Call instance
|
---|
| 186 | // for several calls on the same Port. NOTE: Not part of JAX-RPC specification."
|
---|
| 187 |
|
---|
| 188 | //call.removeAllParameters(); // no need for this when using setOpName below
|
---|
| 189 | call.setOperationName(G_FIND_OBJECTS);
|
---|
| 190 |
|
---|
[21573] | 191 | // Max num of chars in field vals returned. Since return values exceeding
|
---|
| 192 | // maxlength will be truncated, ensure length suffices for long PIDs returned.
|
---|
| 193 | // The only element of the response XML we'll be using is the PID of the document
|
---|
| 194 | // in which the searchTerm occurred.
|
---|
| 195 | final int fieldMaxLength = 100; // NOT TRUE: max length in words of field values
|
---|
| 196 | // returned. E.g. snippet sizes will be reduced to fieldMaxLength words too.
|
---|
| 197 |
|
---|
| 198 | // This is the method call for Fedora 2's GSearch
|
---|
| 199 | //String valueFound =(String)call.invoke( new Object[] {
|
---|
| 200 | // searchFieldedTerms, sort, hitPageStart, hitPageSize, snippetsMax,
|
---|
| 201 | // fieldMaxLength, indexName, resultPageXslt} );
|
---|
| 202 |
|
---|
| 203 | // The method call for GSearch 2.2 of Fedora 3 takes the args in a different order:
|
---|
[15222] | 204 | String valueFound =(String)call.invoke( new Object[] {
|
---|
[21573] | 205 | searchFieldedTerms, hitPageStart, hitPageSize, snippetsMax,
|
---|
| 206 | fieldMaxLength, indexName, sort, resultPageXslt} );
|
---|
| 207 |
|
---|
| 208 | // for debugging
|
---|
| 209 | //javax.swing.JOptionPane.showMessageDialog(null, "GSearchConnection.gFindObjects:" + valueFound);
|
---|
[26286] | 210 |
|
---|
[15222] | 211 | return valueFound;
|
---|
| 212 | }
|
---|
| 213 |
|
---|
| 214 | /**
|
---|
| 215 | * Method that performs a search for the given searchTerm inside the given
|
---|
| 216 | * indexed field.
|
---|
| 217 | * @param searchFieldName is the name of the indexed field within which the
|
---|
| 218 | * given searchTerm is to be searched for.
|
---|
| 219 | * @param searchTerm is the term to be searched for.
|
---|
| 220 | * @param hitPageStart is the page of search results to start returning.
|
---|
| 221 | * @param hitPageSize is the number of search result pages to return,
|
---|
| 222 | * starting from hitPageStart.
|
---|
| 223 | * @param snippetsMax is the maximum number of separate snippets containing
|
---|
| 224 | * the searchTerm that are to be returned. (snippetsMax or a fewer number of
|
---|
| 225 | * occurrences of the word in the text will be returned)
|
---|
| 226 | */
|
---|
| 227 | public String search(String searchFieldName, String searchTerm,
|
---|
| 228 | int hitPageStart, int hitPageSize, int snippetsMax) throws Exception
|
---|
| 229 | {
|
---|
| 230 | final String sort = ""; // returns results from highest to lowest rank
|
---|
| 231 | final String resultPageXslt = "";
|
---|
| 232 |
|
---|
| 233 | // when a fieldname is given to search in (ds.fulltext, dc.title)
|
---|
| 234 | // then prepend that followed by a COLON to the searchTerm.
|
---|
| 235 | final String fullSearchTerm = searchFieldName.equals("") ?
|
---|
| 236 | searchTerm : (searchFieldName+":"+searchTerm);
|
---|
| 237 |
|
---|
| 238 | return gFindObjects(fullSearchTerm, sort,
|
---|
| 239 | hitPageStart, hitPageSize, snippetsMax,
|
---|
[21573] | 240 | indexName, resultPageXslt);
|
---|
[15222] | 241 | }
|
---|
| 242 |
|
---|
| 243 | /**
|
---|
| 244 | * FedoraGSearch accepts a query of the form:
|
---|
| 245 | * <code><"cyclone val" "Gender Inequalities" ds.fulltext:"cyclone val"
|
---|
| 246 | * ds.fulltext:"worst storm"></code>
|
---|
| 247 | * where the first two phrases are searched for in all indexed fields,
|
---|
| 248 | * (in this case dc.title and ds.fulltext), while the last two are
|
---|
| 249 | * searched for in the ds.fulltext field.
|
---|
| 250 | * Another example:
|
---|
| 251 | * <code><gender dc.title:interview ds.fulltext:"cyclone val">
|
---|
| 252 | * titles and fulltexts are searched for "gender", while title index
|
---|
| 253 | * is searched for "interview" and fulltexts are searched for the phrase
|
---|
| 254 | * "cyclone val"</code>
|
---|
| 255 | * @param fieldsToSearchTerms is a Hashmap of searchfields and
|
---|
| 256 | * associated search terms (words or phrases). The terms are in a
|
---|
| 257 | * comma-separated list. fieldsToSearchTerms is a Hashmap of
|
---|
| 258 | * (Searchfields, associated-searchTerms) pairs. It can contain 3
|
---|
| 259 | * searchfields: allfields, titles, text. The value for each is a
|
---|
| 260 | * comma-separated list of search terms in that field.
|
---|
| 261 | * Internally the field names get converted to what FedoraGSearch's
|
---|
| 262 | * gfindObjects understands: titles becomes dc.title:, text becomes
|
---|
| 263 | * ds.fulltext and allfields becomes nothing.
|
---|
| 264 | * @param hitPageStart is the page of search results to start returning.
|
---|
| 265 | * @param hitPageSize is the number of search result pages to return,
|
---|
| 266 | * starting from hitPageStart.
|
---|
| 267 | * @return the XML (in string format) returned from Fedora Generic Search's
|
---|
| 268 | * gfindObjects method
|
---|
| 269 | *
|
---|
| 270 | */
|
---|
| 271 | public String search(Map fieldsToSearchTerms,
|
---|
| 272 | int hitPageStart, int hitPageSize)
|
---|
| 273 | throws Exception
|
---|
| 274 | {
|
---|
| 275 | LOG.debug("In FedoraGS3's GSearchConnection.search(Map,...)");
|
---|
| 276 |
|
---|
| 277 | // HashMap consists of several (key, value) entries, 3 of
|
---|
| 278 | // which will be dealt with here:
|
---|
| 279 | // - allfields, <comma separated list of search terms/phrases>
|
---|
| 280 | // - titles, <comma separated list of search terms/phrases>
|
---|
| 281 | // - (full)text, <comma separated list of search terms/phrases>
|
---|
| 282 | // We need to obtain each value and change the separator to space:
|
---|
| 283 | String allfields = (String)fieldsToSearchTerms.get(ALL_FIELDS);
|
---|
| 284 | String titles = (String)fieldsToSearchTerms.get(ALL_TITLES);
|
---|
| 285 | String fulltexts = (String)fieldsToSearchTerms.get(FULLTEXT);
|
---|
| 286 |
|
---|
| 287 | // Each field is a comma separated list of terms that may be
|
---|
| 288 | // either a word OR a phrase.
|
---|
| 289 | // We're going to separate each term from the list,
|
---|
| 290 | // and put quotes around phrases, then combine all the terms
|
---|
| 291 | // together again with spaces to separate them.
|
---|
[26286] | 292 | allfields = formatSearchTermsInField(allfields, ALL_FIELDS); // search foxml.all.text
|
---|
[15222] | 293 | // ALL_FIELDS has no field name
|
---|
| 294 | titles = formatSearchTermsInField(titles, DC_TITLE_FIELD);
|
---|
| 295 | fulltexts = formatSearchTermsInField(fulltexts, FULLTEXT_FIELD);
|
---|
| 296 |
|
---|
| 297 | String fullSearchTerm = allfields + titles + fulltexts;
|
---|
| 298 | if(fullSearchTerm.trim().equals("")) { // nothing to search on
|
---|
| 299 | return "";
|
---|
| 300 | }
|
---|
| 301 |
|
---|
| 302 | // Finally, restrict the search to the Greenstone digital objects
|
---|
| 303 | // stored in Fedora
|
---|
| 304 | final String greenstonePID
|
---|
| 305 | = PID + FedoraGS3DL.COLON + FedoraGS3DL.GREENSTONE;
|
---|
| 306 | //"PID:\"greenstone\"";
|
---|
| 307 | fullSearchTerm += greenstonePID;
|
---|
| 308 | //! Everything after the colon in the pid is ignored by FedoraGSearch:
|
---|
| 309 | // "PID:\"greenstone:gs2mgdemo\""; // ignores "gs2mgdemo"
|
---|
| 310 |
|
---|
| 311 | // <snippet> tags interfere when PID field is searched on, set it to 0
|
---|
| 312 | return search(fullSearchTerm, hitPageStart, hitPageSize, 0);
|
---|
| 313 | // return search(fullSearchTerm, hitPageStart, hitPageSize, snippetsMax);
|
---|
| 314 | }
|
---|
| 315 |
|
---|
| 316 | /** Each field is a comma separated list of terms that may be either a word
|
---|
| 317 | * OR a phrase. We're going to separate each term from the list, and put
|
---|
| 318 | * quotes around phrases, then combine all the terms together again with
|
---|
| 319 | * spaces to separate them. Examples:
|
---|
| 320 | * <pre>dc.title:"a phrase" word
|
---|
| 321 | * dc.fulltext: "cyclone val"
|
---|
| 322 | * (ALL_FIELDS) interview gender</pre>
|
---|
| 323 | * This is required to facilitate fielded searching with fedoraGSearch.
|
---|
| 324 | * @param field is a comma separated list of search terms (corresponding
|
---|
| 325 | * to one fieldName) to be reorganised
|
---|
| 326 | * @param fieldName is the name of the field to prepend to the reorganised
|
---|
| 327 | * field value. FieldName ALL_FIELDS is ignored.
|
---|
| 328 | * @return parameter field reorganised such that terms that are phrases
|
---|
| 329 | * are in quotes and each term is separated by a space from the previous one.
|
---|
| 330 | */
|
---|
| 331 | protected String formatSearchTermsInField(String field, String fieldName)
|
---|
| 332 | {
|
---|
| 333 | if(field != null) { // check that the field isn't empty
|
---|
| 334 | //LOG.debug("field: " + field);
|
---|
| 335 | String[] terms = field.split(",");
|
---|
| 336 | field = ""; // we'll build it up again
|
---|
| 337 | for(int i = 0; i < terms.length; i++) {
|
---|
| 338 | // if it contains a space, then the term's a phrase,
|
---|
| 339 | // put it in quotes
|
---|
| 340 | if(terms[i].indexOf(SPACE) != -1) {
|
---|
| 341 | terms[i] = "\"" + terms[i] + "\"";
|
---|
| 342 | }
|
---|
| 343 | field = field + terms[i] + SPACE;
|
---|
| 344 | }
|
---|
| 345 |
|
---|
| 346 | // Prefix it with the name of the field we want to search for
|
---|
| 347 | // the term in. Every field other than allfields has a prefix
|
---|
| 348 | if(!fieldName.equals(ALL_FIELDS)) {
|
---|
| 349 | field = fieldName + ":" + field;
|
---|
| 350 | }
|
---|
| 351 |
|
---|
| 352 | } else field = "";
|
---|
| 353 | return field;
|
---|
| 354 | }
|
---|
| 355 |
|
---|
| 356 | /**
|
---|
| 357 | * Uses FedoraGSearch to perform a search where the query is embedded in
|
---|
| 358 | * fieldedSearchTerms, which not only provides the terms to search on, but
|
---|
| 359 | * also the fields to search the (various) given terms in.
|
---|
| 360 | * @param fieldedSearchTerms is the String specifying all the search terms
|
---|
| 361 | * with their fields (or no field if it should search for the terms in
|
---|
| 362 | * all fields). The terms with no associated search-fields should come first.
|
---|
| 363 | * Search terms may be in quotes.
|
---|
| 364 | * @param snippetsMax is the maximum number of separate snippets containing
|
---|
| 365 | * the searchTerm (snippetsMax number of occurrences of the word in the text)
|
---|
| 366 | * returned.
|
---|
| 367 | * @param hitPageStart is the page of search results to start returning.
|
---|
| 368 | * @param hitPageSize is the number of search result pages to return,
|
---|
| 369 | * starting from hitPageStart.
|
---|
| 370 | * @return the XML (in string format) returned from Fedora Generic Search's
|
---|
| 371 | * gfindObjects method
|
---|
| 372 | */
|
---|
| 373 | public String search(String fieldedSearchTerms,
|
---|
| 374 | int hitPageStart, int hitPageSize, int snippetsMax) throws Exception
|
---|
| 375 | {
|
---|
| 376 | LOG.debug("In method search(String fieldedSearchTerms,...). "
|
---|
| 377 | + "Query is:\n" + fieldedSearchTerms);
|
---|
| 378 |
|
---|
| 379 | final String sort = ""; // returns results from highest to lowest rank
|
---|
| 380 | final String resultPageXslt = "";
|
---|
| 381 | return gFindObjects(fieldedSearchTerms, sort,
|
---|
| 382 | hitPageStart, hitPageSize, snippetsMax,
|
---|
[21573] | 383 | indexName, resultPageXslt);
|
---|
[15222] | 384 | }
|
---|
| 385 |
|
---|
| 386 | /** Call this method with the return value of calling search().
|
---|
| 387 | * Search results are returned in GSearch's XML response format,
|
---|
| 388 | * containing information that includes the PIDs of the documents that
|
---|
| 389 | * matched the search. These PIDs are returned in the array.
|
---|
| 390 | * @param collectionName is the name of the collection to restrict the
|
---|
| 391 | * search results by. If it's "", then results from all collections are
|
---|
| 392 | * returned. Generally, don't want to pass "", because, theoretically,
|
---|
| 393 | * all indexed collections in the repository could be considered and
|
---|
| 394 | * not all of them may be Greenstone collections. If all Greenstone
|
---|
| 395 | * collections should be searched for, pass "greenstone" as the
|
---|
| 396 | * collection name instead.
|
---|
| 397 | * @param searchResult is the Fedora Generic Search XML response returned
|
---|
| 398 | * from performing a gfindObjects() operations.
|
---|
| 399 | * @return an array of the pids of documents found for the search. */
|
---|
| 400 | public String[] getPIDsFromSearchResult(String collectionName,
|
---|
| 401 | String searchResult)
|
---|
| 402 | throws Exception
|
---|
| 403 | {
|
---|
| 404 | final String[] empty = {};
|
---|
| 405 | if(searchResult.equals("")) {
|
---|
| 406 | return empty;
|
---|
| 407 | }
|
---|
| 408 |
|
---|
| 409 | // <?xml version="1.0" encoding="UTF-8"?>
|
---|
| 410 | // <resultPage xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:foxml="info:fedora/fedora-system:def/foxml#" xmlns:zs="http://www.loc.gov/zing/srw/" indexName="FedoraIndex" dateTime="Sat Feb 09 16:43:04 NZDT 2008">
|
---|
| 411 | // <gfindObjects hitTotal="1" resultPageXslt="" hitPageSize="10" hitPageStart="1" query="ds.fulltext:Cyclone">
|
---|
| 412 | // <objects>
|
---|
| 413 | // <object no="1" score="0.24639596">
|
---|
| 414 | // <field name="PID">greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae</field>
|
---|
| 415 | // <field name="repositoryName">Fedora</field>
|
---|
| 416 | // <field name="object.type">FedoraObject</field>
|
---|
| 417 | // <field name="object.state">Active</field>
|
---|
| 418 | // <field name="object.label">The Courier - N°159 - Sept- Oct 1996 Dossier Inves ... </field>
|
---|
| 419 | // <field name="object.createdDate">2007-11-23T04:23:15.363Z</field>
|
---|
| 420 | // <field name="object.lastModifiedDate">2008-01-15T04:37:49.518Z</field>
|
---|
| 421 | // <field name="dc.title">some title</field>
|
---|
| 422 | // <field name="dc.title">some title2</field>
|
---|
| 423 | // ...
|
---|
| 424 | // <field name="ds.fulltext" snippet="yes">(The 1993 <span class="highlight">cyclone</span>, although</field>
|
---|
| 425 | // <field name="ds.label">Metadata</field>
|
---|
| 426 | // ...
|
---|
| 427 | // </object>
|
---|
| 428 | // </objects>
|
---|
| 429 | // </gfindObjects>
|
---|
| 430 | // 1. Get documentElement, which is <resultPage>
|
---|
| 431 | Element resultPage = FedoraCommons.getResponseAsDOM(builder, searchResult);
|
---|
| 432 | // 2. find the hitTotal value which is the number of results
|
---|
| 433 | // it's an attribute of the sole compulsory <gFindObjects> element
|
---|
| 434 | int hitTotal = 0;
|
---|
| 435 | Element gfindObjectsEl
|
---|
| 436 | = (Element)resultPage.getElementsByTagName(G_FIND_OBJECTS).item(0);
|
---|
| 437 | String value = gfindObjectsEl.getAttribute(HIT_TOTAL);
|
---|
| 438 | hitTotal = Integer.parseInt(value);
|
---|
| 439 | if(hitTotal == 0) {
|
---|
| 440 | return new String[]{};
|
---|
| 441 | }
|
---|
| 442 |
|
---|
| 443 | // Our resulting list of pids will be no more than hitTotal,
|
---|
| 444 | // but may be fewer if we constrain the results to a collection
|
---|
| 445 | Vector pidsInCollection = new Vector(hitTotal);
|
---|
| 446 |
|
---|
| 447 | // Returns a NodeList of all descendant Elements with object tagname
|
---|
| 448 | NodeList objects = gfindObjectsEl.getElementsByTagName(OBJECT);
|
---|
| 449 | for(int i = 0; i < objects.getLength(); i++) {
|
---|
| 450 | // should be the case that pids.length == (digital)objects.getLength()
|
---|
| 451 | // get the PID of each object
|
---|
| 452 | Element object = (Element)objects.item(i);
|
---|
| 453 | NodeList fields = object.getElementsByTagName(FIELD);
|
---|
| 454 |
|
---|
| 455 | for(int j = 0; j < fields.getLength(); j++) {
|
---|
| 456 | // find the sole <field> of <object> where NAME attribute == PID
|
---|
| 457 | Element field = (Element)fields.item(j);
|
---|
| 458 | if(field.getAttribute(NAME).equals(PID)) {
|
---|
| 459 | String pid = FedoraCommons.getValue(field);
|
---|
| 460 | // Either store only the pids which are part of the collection,
|
---|
| 461 | // or, if no collection is specified (=""),then store the pid too
|
---|
| 462 | if(collectionName.equals("") || pid.contains(collectionName)) {
|
---|
| 463 | pidsInCollection.add(pid);
|
---|
| 464 | }
|
---|
| 465 | break; // found pid field, meaning that we have
|
---|
| 466 | // finished for loop on <field>s of this <object>,
|
---|
| 467 | // consider next <object>
|
---|
| 468 | }
|
---|
| 469 | }
|
---|
| 470 | }
|
---|
| 471 | String[] pids = new String[pidsInCollection.size()];
|
---|
| 472 | pidsInCollection.toArray(pids);
|
---|
| 473 | return pids;
|
---|
| 474 | }
|
---|
| 475 |
|
---|
| 476 | public static void main(String[] args) {
|
---|
| 477 | try {
|
---|
| 478 | GSearchConnection searcher = new GSearchConnection(
|
---|
[15437] | 479 | "http://localhost:8080/fedoragsearch/services/FgsOperations?wsdl", "FedoraIndex");
|
---|
[15222] | 480 |
|
---|
| 481 |
|
---|
| 482 | HashMap map = new HashMap();
|
---|
| 483 | map.put(GSearchConnection.ALL_FIELDS, "gender inequalities");
|
---|
| 484 | map.put(GSearchConnection.FULLTEXT, "cyclone val,worst storm");
|
---|
| 485 | //map.put(GSearchConnection.ALL_FIELDS, "\"gender inequalities\"");
|
---|
| 486 | //map.put(GSearchConnection.FULLTEXT, "\"cyclone val\",\"worst storm\"");
|
---|
| 487 | String searchResult = searcher.search(map, 1, 10); //snippetsMax: 3);
|
---|
| 488 | System.out.println(searchResult);
|
---|
| 489 |
|
---|
| 490 | String[] pids = searcher.getPIDsFromSearchResult("gs2mgdemo", searchResult);
|
---|
| 491 | System.err.println("Found pids for search:\n");
|
---|
| 492 | for(int i = 0; i < pids.length; i++) {
|
---|
| 493 | System.out.println(pids[i]);
|
---|
| 494 | }
|
---|
| 495 |
|
---|
| 496 | //searchResult = searcher.search("", "minh", 0, 50, 50);
|
---|
| 497 | //System.err.println(searchResult);
|
---|
| 498 |
|
---|
| 499 | //String searchTerms = "cyclone dc.title:interview dc.title:gender";
|
---|
| 500 | String searchTerms="\"gender inequalities\" ds.fulltext:\"cyclone val\" ds.fulltext:\"worst storm\"";
|
---|
| 501 | searchResult = searcher.search(searchTerms, 1, 10, 3);
|
---|
| 502 | System.out.println(searchResult);
|
---|
| 503 |
|
---|
| 504 | // Not restricting results to any collection (search results from
|
---|
| 505 | // all collections)
|
---|
| 506 | pids = searcher.getPIDsFromSearchResult("", searchResult);
|
---|
| 507 | System.err.println("Found pids for search: ");
|
---|
| 508 | for(int i = 0; i < pids.length; i++) {
|
---|
| 509 | System.out.println(pids[i]);
|
---|
| 510 | }
|
---|
| 511 |
|
---|
| 512 | searchResult = searcher.search("ds.fulltext", "cyclone", 1, 10, 3);
|
---|
| 513 | //String searchResult = searcher.search("ds.label", "hierarchical", 1, 10, 3);
|
---|
| 514 | // System.out.println(searcher.search("ds.fulltext", "Pinky", 1, 10, 3));
|
---|
| 515 | System.out.println(searchResult);
|
---|
| 516 |
|
---|
| 517 | pids = null;
|
---|
| 518 | pids = searcher.getPIDsFromSearchResult("", searchResult);
|
---|
| 519 | System.err.println("Found pids for search: ");
|
---|
| 520 | for(int i = 0; i < pids.length; i++) {
|
---|
| 521 | System.out.println(pids[i]);
|
---|
| 522 | }
|
---|
| 523 |
|
---|
| 524 | }catch(Exception e) {
|
---|
| 525 | System.err.println(e.getMessage());
|
---|
| 526 | }
|
---|
| 527 |
|
---|
| 528 | }
|
---|
| 529 |
|
---|
| 530 | } |
---|