1 | /**
|
---|
2 | *#########################################################################
|
---|
3 | * GSearchConnection.java - works with the demo-client for Greenstone 3,
|
---|
4 | * of the Greenstone digital library suite from the New Zealand Digital
|
---|
5 | * Library Project at the * University of Waikato, New Zealand.
|
---|
6 | * <BR><BR>
|
---|
7 | * Copyright (C) 2008 New Zealand Digital Library Project
|
---|
8 | * <BR><BR>
|
---|
9 | * This program is free software; you can redistribute it and/or modify
|
---|
10 | * it under the terms of the GNU General Public License as published by
|
---|
11 | * the Free Software Foundation; either version 2 of the License, or
|
---|
12 | * (at your option) any later version.
|
---|
13 | * <BR><BR>
|
---|
14 | * This program is distributed in the hope that it will be useful,
|
---|
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
17 | * GNU General Public License for more details.
|
---|
18 | *########################################################################
|
---|
19 | */
|
---|
20 |
|
---|
21 | package org.greenstone.fedora.services;
|
---|
22 |
|
---|
23 | import java.util.Vector;
|
---|
24 | import java.util.Iterator;
|
---|
25 | import java.util.Map;
|
---|
26 | import java.util.HashMap;
|
---|
27 |
|
---|
28 | import java.net.URL;
|
---|
29 | import javax.xml.namespace.QName;
|
---|
30 | import javax.xml.parsers.DocumentBuilder;
|
---|
31 | import javax.xml.parsers.DocumentBuilderFactory;
|
---|
32 | import javax.xml.rpc.ServiceException;
|
---|
33 | import java.net.MalformedURLException;
|
---|
34 |
|
---|
35 | import org.apache.axis.client.Call;
|
---|
36 | import org.apache.axis.client.Service;
|
---|
37 | import org.apache.log4j.Logger;
|
---|
38 |
|
---|
39 | import javax.xml.parsers.ParserConfigurationException;
|
---|
40 | import org.w3c.dom.Element;
|
---|
41 | import org.w3c.dom.NodeList;
|
---|
42 |
|
---|
43 |
|
---|
44 | /**
|
---|
45 | * Class GSearchConnection connects to FedoraGSearch's web services.
|
---|
46 | * FedorGSearch offers indexing and full-text search functionality for
|
---|
47 | * Fedora repositories. Its search web service (method gFindObjects)
|
---|
48 | * returns the response of a search as XML.
|
---|
49 | * GSearchConnection offers more convenient methods that extract just
|
---|
50 | * the parts of search results that FedoraGS3Connection needs and returns
|
---|
51 | * that.
|
---|
52 | * @author ak19
|
---|
53 | */
|
---|
54 | public class GSearchConnection implements FedoraToGS3Interface.Constants {
|
---|
55 | /** Logger for this class. */
|
---|
56 | private static final Logger LOG = Logger.getLogger(
|
---|
57 | GSearchConnection.class.getName());
|
---|
58 |
|
---|
59 | /* Accessing the web services of Fedora Generic Search */
|
---|
60 | protected static String NAMESPACE_URI = "http://server.fedoragsearch.defxws.dk";
|
---|
61 | protected static String SERVICE_NAME = "OperationsService";
|
---|
62 |
|
---|
63 | /** The names of the methods we use of Fedora Generic Search's web services
|
---|
64 | * are declared here as static final Strings. */
|
---|
65 | protected static final String G_FIND_OBJECTS = "gfindObjects";
|
---|
66 |
|
---|
67 | /* Some fixed string literals that will be encountered in the response XMLs
|
---|
68 | * that FedoraGSearch's method gFindObjects() returns. */
|
---|
69 | protected static final String PID = "PID";
|
---|
70 | protected static final String HIT_TOTAL = "hitTotal";
|
---|
71 | protected static final String OBJECT = "object";
|
---|
72 | protected static final String FIELD = "field";
|
---|
73 | protected static final String NAME = "name";
|
---|
74 | protected static final String DC_TITLE_FIELD = "dc.title";
|
---|
75 | protected static final String FULLTEXT_FIELD = "ds.fulltext";
|
---|
76 |
|
---|
77 | /** separator used internally to separate values of a search field */
|
---|
78 | protected static final String SPACE = " ";
|
---|
79 |
|
---|
80 | /** The name of the Index wherein FedoraGSearch has indexed all the GS3 docs.
|
---|
81 | * This final member is public here so that others may read the indexName
|
---|
82 | * that this GSearchConnection works with. */
|
---|
83 | public final String indexName;
|
---|
84 |
|
---|
85 | /** The Service object used to connect to the FedoraGSearch web services */
|
---|
86 | protected final Service service;
|
---|
87 | /** The Call object used to connect to the FedoraGSearch web services */
|
---|
88 | protected final Call call;
|
---|
89 | /** The portName object used when connecting to FedoraGSearch's web services */
|
---|
90 | protected final QName portName;
|
---|
91 |
|
---|
92 | /** A DocumentBuilder object used to construct and parse XML */
|
---|
93 | protected final DocumentBuilder builder;
|
---|
94 |
|
---|
95 |
|
---|
96 |
|
---|
97 | /** Constructor that takes a String representing the url of the WSDL
|
---|
98 | * file for FedoraGSearch's web services, and tries to establish a
|
---|
99 | * connection to those web services.
|
---|
100 | * @param wsdlFileLocation is a String representing the url of the WSDL file
|
---|
101 | * @param indexName is the name of the index that Fedora Generic Search
|
---|
102 | * should work with (the index wherein the indexed GS3 documents have been
|
---|
103 | * placed).
|
---|
104 | */
|
---|
105 | public GSearchConnection(String wsdlFileLocation, String indexName)
|
---|
106 | throws MalformedURLException, ServiceException,
|
---|
107 | ParserConfigurationException
|
---|
108 | {
|
---|
109 | this.indexName = indexName;
|
---|
110 |
|
---|
111 | URL wsdlURL = new URL(wsdlFileLocation);
|
---|
112 | service = new Service(wsdlURL, new QName(NAMESPACE_URI, SERVICE_NAME));
|
---|
113 | //call = (Call) service.createCall(new QName(NAMESPACE_URI, PORT_NAME));
|
---|
114 |
|
---|
115 | Iterator i = service.getPorts();
|
---|
116 | // FIXME: can we just assume it's the first port of service SERVICE_NAME?
|
---|
117 | // Do we need to work out which port to get??? Remember, the port names
|
---|
118 | // vary between wsdls though!
|
---|
119 | if(i.hasNext()) {
|
---|
120 | portName = (QName)i.next();
|
---|
121 | call = (Call) service.createCall(portName);
|
---|
122 |
|
---|
123 | String endpointLocation = call.getTargetEndpointAddress();
|
---|
124 | LOG.debug("Wsdl file url: " + wsdlURL
|
---|
125 | + "\nEndpoint location is: " + endpointLocation);
|
---|
126 | } else { // should never happen: a service without a port
|
---|
127 | // portName = null;
|
---|
128 | call = (Call)service.createCall();
|
---|
129 | // FIXME: possibly manually get the ports and choose
|
---|
130 | // one containing "FEDORA" and "API-A" in its name?
|
---|
131 | throw new ServiceException(this.getClass() + ": No port in wsdl file");
|
---|
132 | }
|
---|
133 |
|
---|
134 | // we can set the portName which remains constant for the various methods
|
---|
135 | // call.setPortName(portName);
|
---|
136 |
|
---|
137 | DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
---|
138 | builder = factory.newDocumentBuilder(); // to create XML docs
|
---|
139 | }
|
---|
140 |
|
---|
141 |
|
---|
142 | /**
|
---|
143 | * Method to invoke gfindObjects operation of Fedora Generic Search
|
---|
144 | * web services.
|
---|
145 | *
|
---|
146 | * Parameter types, parameter order and return type of gFindObjects are as
|
---|
147 | * obtained from the wsdl file for the Fedora Generic Search web services
|
---|
148 | * located at:
|
---|
149 | * http://localhost:8080/fedoragsearch/services/FgsOperations?wsdl
|
---|
150 | * <wsdl:message name="gfindObjectsRequest">
|
---|
151 | * <wsdl:part name="query" type="xsd:string"/>
|
---|
152 | * <wsdl:part name="sort" type="xsd:string"/>
|
---|
153 | * <wsdl:part name="hitPageStart" type="xsd:int"/>
|
---|
154 | * <wsdl:part name="hitPageSize" type="xsd:int"/>
|
---|
155 | * <wsdl:part name="snippetsMax" type="xsd:int"/>
|
---|
156 | * <wsdl:part name="fieldMaxLength" type="xsd:int"/>
|
---|
157 | * <wsdl:part name="indexName" type="xsd:string"/>
|
---|
158 | * <wsdl:part name="resultPageXslt" type="xsd:string"/>
|
---|
159 | * </wsdl:message>
|
---|
160 | *
|
---|
161 | * <wsdl:message name="gfindObjectsResponse">
|
---|
162 | * <wsdl:part name="gfindObjectsReturn" type="xsd:string"/>
|
---|
163 | * </wsdl:message>
|
---|
164 | *
|
---|
165 | * <wsdl:operation name="gfindObjects"
|
---|
166 | * parameterOrder="query sort hitPageStart hitPageSize snippetsMax
|
---|
167 | * fieldMaxLength indexName resultPageXslt">
|
---|
168 | *
|
---|
169 | * This method works: it searches the dc.title field of our FedoraIndex
|
---|
170 | * for the term (e.g. "interview") and the result returned is an XML String.
|
---|
171 | *
|
---|
172 | * There's no example on how to call gFindObjects with parameters. In
|
---|
173 | * particular, I don't know what values the parameter <b>sort</b> can take.
|
---|
174 | * But topazproject has an example on how to call updateIndex().
|
---|
175 | * @see <a href="http://www.topazproject.org/trac/wiki/FedoraSearch?format=txt">An example on how to call updateIndex() with parameters</a>
|
---|
176 | * @see <a href="http://ws.apache.org/axis/java/apiDocs/org/apache/axis/client/Service.html">Axis Service class</a>
|
---|
177 | * @see <a href="http://ws.apache.org/axis/java/apiDocs/javax/xml/rpc/Call.html">Axis RPC Call, for specification of interface Call</a>
|
---|
178 | * @see <a href="http://ws.apache.org/axis/java/apiDocs/org/apache/axis/client/Call.html">Axis client Call class, for implementation of interface Call</a>
|
---|
179 | */
|
---|
180 | protected String gFindObjects(String searchFieldedTerms, String sort,
|
---|
181 | int hitPageStart, int hitPageSize, int snippetsMax,
|
---|
182 | /*int fieldMaxLength,*/ String indexName, String resultPageXslt) throws Exception
|
---|
183 | {
|
---|
184 | // "Prefills as much info from the WSDL as it can. Right now it's SOAPAction,
|
---|
185 | // operation qname, parameter types and return type of the Web Service.
|
---|
186 | // This method considers that port name and target endpoint address have
|
---|
187 | // already been set. This is useful when you want to use the same Call instance
|
---|
188 | // for several calls on the same Port. NOTE: Not part of JAX-RPC specification."
|
---|
189 |
|
---|
190 | //call.removeAllParameters(); // no need for this when using setOpName below
|
---|
191 | call.setOperationName(G_FIND_OBJECTS);
|
---|
192 |
|
---|
193 | // Max num of chars in field vals returned. Since return values exceeding
|
---|
194 | // maxlength will be truncated, ensure length suffices for long PIDs returned.
|
---|
195 | // The only element of the response XML we'll be using is the PID of the document
|
---|
196 | // in which the searchTerm occurred.
|
---|
197 | final int fieldMaxLength = 100; // NOT TRUE: max length in words of field values
|
---|
198 | // returned. E.g. snippet sizes will be reduced to fieldMaxLength words too.
|
---|
199 |
|
---|
200 | // This is the method call for Fedora 2's GSearch
|
---|
201 | //String valueFound =(String)call.invoke( new Object[] {
|
---|
202 | // searchFieldedTerms, sort, hitPageStart, hitPageSize, snippetsMax,
|
---|
203 | // fieldMaxLength, indexName, resultPageXslt} );
|
---|
204 |
|
---|
205 | // The method call for GSearch 2.2 of Fedora 3 takes the args in a different order:
|
---|
206 | String valueFound =(String)call.invoke( new Object[] {
|
---|
207 | searchFieldedTerms, hitPageStart, hitPageSize, snippetsMax,
|
---|
208 | fieldMaxLength, indexName, sort, resultPageXslt} );
|
---|
209 |
|
---|
210 | // for debugging
|
---|
211 | //javax.swing.JOptionPane.showMessageDialog(null, "GSearchConnection.gFindObjects:" + valueFound);
|
---|
212 |
|
---|
213 | return valueFound;
|
---|
214 | }
|
---|
215 |
|
---|
216 | /**
|
---|
217 | * Method that performs a search for the given searchTerm inside the given
|
---|
218 | * indexed field.
|
---|
219 | * @param searchFieldName is the name of the indexed field within which the
|
---|
220 | * given searchTerm is to be searched for.
|
---|
221 | * @param searchTerm is the term to be searched for.
|
---|
222 | * @param hitPageStart is the page of search results to start returning.
|
---|
223 | * @param hitPageSize is the number of search result pages to return,
|
---|
224 | * starting from hitPageStart.
|
---|
225 | * @param snippetsMax is the maximum number of separate snippets containing
|
---|
226 | * the searchTerm that are to be returned. (snippetsMax or a fewer number of
|
---|
227 | * occurrences of the word in the text will be returned)
|
---|
228 | */
|
---|
229 | public String search(String searchFieldName, String searchTerm,
|
---|
230 | int hitPageStart, int hitPageSize, int snippetsMax) throws Exception
|
---|
231 | {
|
---|
232 | final String sort = ""; // returns results from highest to lowest rank
|
---|
233 | final String resultPageXslt = "";
|
---|
234 |
|
---|
235 | // when a fieldname is given to search in (ds.fulltext, dc.title)
|
---|
236 | // then prepend that followed by a COLON to the searchTerm.
|
---|
237 | final String fullSearchTerm = searchFieldName.equals("") ?
|
---|
238 | searchTerm : (searchFieldName+":"+searchTerm);
|
---|
239 |
|
---|
240 | return gFindObjects(fullSearchTerm, sort,
|
---|
241 | hitPageStart, hitPageSize, snippetsMax,
|
---|
242 | indexName, resultPageXslt);
|
---|
243 | }
|
---|
244 |
|
---|
245 | /**
|
---|
246 | * FedoraGSearch accepts a query of the form:
|
---|
247 | * <code><"cyclone val" "Gender Inequalities" ds.fulltext:"cyclone val"
|
---|
248 | * ds.fulltext:"worst storm"></code>
|
---|
249 | * where the first two phrases are searched for in all indexed fields,
|
---|
250 | * (in this case dc.title and ds.fulltext), while the last two are
|
---|
251 | * searched for in the ds.fulltext field.
|
---|
252 | * Another example:
|
---|
253 | * <code><gender dc.title:interview ds.fulltext:"cyclone val">
|
---|
254 | * titles and fulltexts are searched for "gender", while title index
|
---|
255 | * is searched for "interview" and fulltexts are searched for the phrase
|
---|
256 | * "cyclone val"</code>
|
---|
257 | * @param fieldsToSearchTerms is a Hashmap of searchfields and
|
---|
258 | * associated search terms (words or phrases). The terms are in a
|
---|
259 | * comma-separated list. fieldsToSearchTerms is a Hashmap of
|
---|
260 | * (Searchfields, associated-searchTerms) pairs. It can contain 3
|
---|
261 | * searchfields: allfields, titles, text. The value for each is a
|
---|
262 | * comma-separated list of search terms in that field.
|
---|
263 | * Internally the field names get converted to what FedoraGSearch's
|
---|
264 | * gfindObjects understands: titles becomes dc.title:, text becomes
|
---|
265 | * ds.fulltext and allfields becomes nothing.
|
---|
266 | * @param hitPageStart is the page of search results to start returning.
|
---|
267 | * @param hitPageSize is the number of search result pages to return,
|
---|
268 | * starting from hitPageStart.
|
---|
269 | * @return the XML (in string format) returned from Fedora Generic Search's
|
---|
270 | * gfindObjects method
|
---|
271 | *
|
---|
272 | */
|
---|
273 | public String search(Map fieldsToSearchTerms,
|
---|
274 | int hitPageStart, int hitPageSize)
|
---|
275 | throws Exception
|
---|
276 | {
|
---|
277 | LOG.debug("In FedoraGS3's GSearchConnection.search(Map,...)");
|
---|
278 |
|
---|
279 | // HashMap consists of several (key, value) entries, 3 of
|
---|
280 | // which will be dealt with here:
|
---|
281 | // - allfields, <comma separated list of search terms/phrases>
|
---|
282 | // - titles, <comma separated list of search terms/phrases>
|
---|
283 | // - (full)text, <comma separated list of search terms/phrases>
|
---|
284 | // We need to obtain each value and change the separator to space:
|
---|
285 | String allfields = (String)fieldsToSearchTerms.get(ALL_FIELDS);
|
---|
286 | String titles = (String)fieldsToSearchTerms.get(ALL_TITLES);
|
---|
287 | String fulltexts = (String)fieldsToSearchTerms.get(FULLTEXT);
|
---|
288 |
|
---|
289 | // Each field is a comma separated list of terms that may be
|
---|
290 | // either a word OR a phrase.
|
---|
291 | // We're going to separate each term from the list,
|
---|
292 | // and put quotes around phrases, then combine all the terms
|
---|
293 | // together again with spaces to separate them.
|
---|
294 | allfields = formatSearchTermsInField(allfields, ALL_FIELDS);
|
---|
295 | // ALL_FIELDS has no field name
|
---|
296 | titles = formatSearchTermsInField(titles, DC_TITLE_FIELD);
|
---|
297 | fulltexts = formatSearchTermsInField(fulltexts, FULLTEXT_FIELD);
|
---|
298 |
|
---|
299 | String fullSearchTerm = allfields + titles + fulltexts;
|
---|
300 | if(fullSearchTerm.trim().equals("")) { // nothing to search on
|
---|
301 | return "";
|
---|
302 | }
|
---|
303 |
|
---|
304 | // Finally, restrict the search to the Greenstone digital objects
|
---|
305 | // stored in Fedora
|
---|
306 | final String greenstonePID
|
---|
307 | = PID + FedoraGS3DL.COLON + FedoraGS3DL.GREENSTONE;
|
---|
308 | //"PID:\"greenstone\"";
|
---|
309 | fullSearchTerm += greenstonePID;
|
---|
310 | //! Everything after the colon in the pid is ignored by FedoraGSearch:
|
---|
311 | // "PID:\"greenstone:gs2mgdemo\""; // ignores "gs2mgdemo"
|
---|
312 |
|
---|
313 | // <snippet> tags interfere when PID field is searched on, set it to 0
|
---|
314 | return search(fullSearchTerm, hitPageStart, hitPageSize, 0);
|
---|
315 | // return search(fullSearchTerm, hitPageStart, hitPageSize, snippetsMax);
|
---|
316 | }
|
---|
317 |
|
---|
318 | /** Each field is a comma separated list of terms that may be either a word
|
---|
319 | * OR a phrase. We're going to separate each term from the list, and put
|
---|
320 | * quotes around phrases, then combine all the terms together again with
|
---|
321 | * spaces to separate them. Examples:
|
---|
322 | * <pre>dc.title:"a phrase" word
|
---|
323 | * dc.fulltext: "cyclone val"
|
---|
324 | * (ALL_FIELDS) interview gender</pre>
|
---|
325 | * This is required to facilitate fielded searching with fedoraGSearch.
|
---|
326 | * @param field is a comma separated list of search terms (corresponding
|
---|
327 | * to one fieldName) to be reorganised
|
---|
328 | * @param fieldName is the name of the field to prepend to the reorganised
|
---|
329 | * field value. FieldName ALL_FIELDS is ignored.
|
---|
330 | * @return parameter field reorganised such that terms that are phrases
|
---|
331 | * are in quotes and each term is separated by a space from the previous one.
|
---|
332 | */
|
---|
333 | protected String formatSearchTermsInField(String field, String fieldName)
|
---|
334 | {
|
---|
335 | if(field != null) { // check that the field isn't empty
|
---|
336 | //LOG.debug("field: " + field);
|
---|
337 | String[] terms = field.split(",");
|
---|
338 | field = ""; // we'll build it up again
|
---|
339 | for(int i = 0; i < terms.length; i++) {
|
---|
340 | // if it contains a space, then the term's a phrase,
|
---|
341 | // put it in quotes
|
---|
342 | if(terms[i].indexOf(SPACE) != -1) {
|
---|
343 | terms[i] = "\"" + terms[i] + "\"";
|
---|
344 | }
|
---|
345 | field = field + terms[i] + SPACE;
|
---|
346 | }
|
---|
347 |
|
---|
348 | // Prefix it with the name of the field we want to search for
|
---|
349 | // the term in. Every field other than allfields has a prefix
|
---|
350 | if(!fieldName.equals(ALL_FIELDS)) {
|
---|
351 | field = fieldName + ":" + field;
|
---|
352 | }
|
---|
353 |
|
---|
354 | } else field = "";
|
---|
355 | return field;
|
---|
356 | }
|
---|
357 |
|
---|
358 | /**
|
---|
359 | * Uses FedoraGSearch to perform a search where the query is embedded in
|
---|
360 | * fieldedSearchTerms, which not only provides the terms to search on, but
|
---|
361 | * also the fields to search the (various) given terms in.
|
---|
362 | * @param fieldedSearchTerms is the String specifying all the search terms
|
---|
363 | * with their fields (or no field if it should search for the terms in
|
---|
364 | * all fields). The terms with no associated search-fields should come first.
|
---|
365 | * Search terms may be in quotes.
|
---|
366 | * @param snippetsMax is the maximum number of separate snippets containing
|
---|
367 | * the searchTerm (snippetsMax number of occurrences of the word in the text)
|
---|
368 | * returned.
|
---|
369 | * @param hitPageStart is the page of search results to start returning.
|
---|
370 | * @param hitPageSize is the number of search result pages to return,
|
---|
371 | * starting from hitPageStart.
|
---|
372 | * @return the XML (in string format) returned from Fedora Generic Search's
|
---|
373 | * gfindObjects method
|
---|
374 | */
|
---|
375 | public String search(String fieldedSearchTerms,
|
---|
376 | int hitPageStart, int hitPageSize, int snippetsMax) throws Exception
|
---|
377 | {
|
---|
378 | LOG.debug("In method search(String fieldedSearchTerms,...). "
|
---|
379 | + "Query is:\n" + fieldedSearchTerms);
|
---|
380 |
|
---|
381 | final String sort = ""; // returns results from highest to lowest rank
|
---|
382 | final String resultPageXslt = "";
|
---|
383 | return gFindObjects(fieldedSearchTerms, sort,
|
---|
384 | hitPageStart, hitPageSize, snippetsMax,
|
---|
385 | indexName, resultPageXslt);
|
---|
386 | }
|
---|
387 |
|
---|
388 | /** Call this method with the return value of calling search().
|
---|
389 | * Search results are returned in GSearch's XML response format,
|
---|
390 | * containing information that includes the PIDs of the documents that
|
---|
391 | * matched the search. These PIDs are returned in the array.
|
---|
392 | * @param collectionName is the name of the collection to restrict the
|
---|
393 | * search results by. If it's "", then results from all collections are
|
---|
394 | * returned. Generally, don't want to pass "", because, theoretically,
|
---|
395 | * all indexed collections in the repository could be considered and
|
---|
396 | * not all of them may be Greenstone collections. If all Greenstone
|
---|
397 | * collections should be searched for, pass "greenstone" as the
|
---|
398 | * collection name instead.
|
---|
399 | * @param searchResult is the Fedora Generic Search XML response returned
|
---|
400 | * from performing a gfindObjects() operations.
|
---|
401 | * @return an array of the pids of documents found for the search. */
|
---|
402 | public String[] getPIDsFromSearchResult(String collectionName,
|
---|
403 | String searchResult)
|
---|
404 | throws Exception
|
---|
405 | {
|
---|
406 | final String[] empty = {};
|
---|
407 | if(searchResult.equals("")) {
|
---|
408 | return empty;
|
---|
409 | }
|
---|
410 |
|
---|
411 | // <?xml version="1.0" encoding="UTF-8"?>
|
---|
412 | // <resultPage xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:foxml="info:fedora/fedora-system:def/foxml#" xmlns:zs="http://www.loc.gov/zing/srw/" indexName="FedoraIndex" dateTime="Sat Feb 09 16:43:04 NZDT 2008">
|
---|
413 | // <gfindObjects hitTotal="1" resultPageXslt="" hitPageSize="10" hitPageStart="1" query="ds.fulltext:Cyclone">
|
---|
414 | // <objects>
|
---|
415 | // <object no="1" score="0.24639596">
|
---|
416 | // <field name="PID">greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae</field>
|
---|
417 | // <field name="repositoryName">Fedora</field>
|
---|
418 | // <field name="object.type">FedoraObject</field>
|
---|
419 | // <field name="object.state">Active</field>
|
---|
420 | // <field name="object.label">The Courier - N°159 - Sept- Oct 1996 Dossier Inves ... </field>
|
---|
421 | // <field name="object.createdDate">2007-11-23T04:23:15.363Z</field>
|
---|
422 | // <field name="object.lastModifiedDate">2008-01-15T04:37:49.518Z</field>
|
---|
423 | // <field name="dc.title">some title</field>
|
---|
424 | // <field name="dc.title">some title2</field>
|
---|
425 | // ...
|
---|
426 | // <field name="ds.fulltext" snippet="yes">(The 1993 <span class="highlight">cyclone</span>, although</field>
|
---|
427 | // <field name="ds.label">Metadata</field>
|
---|
428 | // ...
|
---|
429 | // </object>
|
---|
430 | // </objects>
|
---|
431 | // </gfindObjects>
|
---|
432 | // 1. Get documentElement, which is <resultPage>
|
---|
433 | Element resultPage = FedoraCommons.getResponseAsDOM(builder, searchResult);
|
---|
434 | // 2. find the hitTotal value which is the number of results
|
---|
435 | // it's an attribute of the sole compulsory <gFindObjects> element
|
---|
436 | int hitTotal = 0;
|
---|
437 | Element gfindObjectsEl
|
---|
438 | = (Element)resultPage.getElementsByTagName(G_FIND_OBJECTS).item(0);
|
---|
439 | String value = gfindObjectsEl.getAttribute(HIT_TOTAL);
|
---|
440 | hitTotal = Integer.parseInt(value);
|
---|
441 | if(hitTotal == 0) {
|
---|
442 | return new String[]{};
|
---|
443 | }
|
---|
444 |
|
---|
445 | // Our resulting list of pids will be no more than hitTotal,
|
---|
446 | // but may be fewer if we constrain the results to a collection
|
---|
447 | Vector pidsInCollection = new Vector(hitTotal);
|
---|
448 |
|
---|
449 | // Returns a NodeList of all descendant Elements with object tagname
|
---|
450 | NodeList objects = gfindObjectsEl.getElementsByTagName(OBJECT);
|
---|
451 | for(int i = 0; i < objects.getLength(); i++) {
|
---|
452 | // should be the case that pids.length == (digital)objects.getLength()
|
---|
453 | // get the PID of each object
|
---|
454 | Element object = (Element)objects.item(i);
|
---|
455 | NodeList fields = object.getElementsByTagName(FIELD);
|
---|
456 |
|
---|
457 | for(int j = 0; j < fields.getLength(); j++) {
|
---|
458 | // find the sole <field> of <object> where NAME attribute == PID
|
---|
459 | Element field = (Element)fields.item(j);
|
---|
460 | if(field.getAttribute(NAME).equals(PID)) {
|
---|
461 | String pid = FedoraCommons.getValue(field);
|
---|
462 | // Either store only the pids which are part of the collection,
|
---|
463 | // or, if no collection is specified (=""),then store the pid too
|
---|
464 | if(collectionName.equals("") || pid.contains(collectionName)) {
|
---|
465 | pidsInCollection.add(pid);
|
---|
466 | }
|
---|
467 | break; // found pid field, meaning that we have
|
---|
468 | // finished for loop on <field>s of this <object>,
|
---|
469 | // consider next <object>
|
---|
470 | }
|
---|
471 | }
|
---|
472 | }
|
---|
473 | String[] pids = new String[pidsInCollection.size()];
|
---|
474 | pidsInCollection.toArray(pids);
|
---|
475 | return pids;
|
---|
476 | }
|
---|
477 |
|
---|
478 | public static void main(String[] args) {
|
---|
479 | try {
|
---|
480 | GSearchConnection searcher = new GSearchConnection(
|
---|
481 | "http://localhost:8080/fedoragsearch/services/FgsOperations?wsdl", "FedoraIndex");
|
---|
482 |
|
---|
483 |
|
---|
484 | HashMap map = new HashMap();
|
---|
485 | map.put(GSearchConnection.ALL_FIELDS, "gender inequalities");
|
---|
486 | map.put(GSearchConnection.FULLTEXT, "cyclone val,worst storm");
|
---|
487 | //map.put(GSearchConnection.ALL_FIELDS, "\"gender inequalities\"");
|
---|
488 | //map.put(GSearchConnection.FULLTEXT, "\"cyclone val\",\"worst storm\"");
|
---|
489 | String searchResult = searcher.search(map, 1, 10); //snippetsMax: 3);
|
---|
490 | System.out.println(searchResult);
|
---|
491 |
|
---|
492 | String[] pids = searcher.getPIDsFromSearchResult("gs2mgdemo", searchResult);
|
---|
493 | System.err.println("Found pids for search:\n");
|
---|
494 | for(int i = 0; i < pids.length; i++) {
|
---|
495 | System.out.println(pids[i]);
|
---|
496 | }
|
---|
497 |
|
---|
498 | //searchResult = searcher.search("", "minh", 0, 50, 50);
|
---|
499 | //System.err.println(searchResult);
|
---|
500 |
|
---|
501 | //String searchTerms = "cyclone dc.title:interview dc.title:gender";
|
---|
502 | String searchTerms="\"gender inequalities\" ds.fulltext:\"cyclone val\" ds.fulltext:\"worst storm\"";
|
---|
503 | searchResult = searcher.search(searchTerms, 1, 10, 3);
|
---|
504 | System.out.println(searchResult);
|
---|
505 |
|
---|
506 | // Not restricting results to any collection (search results from
|
---|
507 | // all collections)
|
---|
508 | pids = searcher.getPIDsFromSearchResult("", searchResult);
|
---|
509 | System.err.println("Found pids for search: ");
|
---|
510 | for(int i = 0; i < pids.length; i++) {
|
---|
511 | System.out.println(pids[i]);
|
---|
512 | }
|
---|
513 |
|
---|
514 | searchResult = searcher.search("ds.fulltext", "cyclone", 1, 10, 3);
|
---|
515 | //String searchResult = searcher.search("ds.label", "hierarchical", 1, 10, 3);
|
---|
516 | // System.out.println(searcher.search("ds.fulltext", "Pinky", 1, 10, 3));
|
---|
517 | System.out.println(searchResult);
|
---|
518 |
|
---|
519 | pids = null;
|
---|
520 | pids = searcher.getPIDsFromSearchResult("", searchResult);
|
---|
521 | System.err.println("Found pids for search: ");
|
---|
522 | for(int i = 0; i < pids.length; i++) {
|
---|
523 | System.out.println(pids[i]);
|
---|
524 | }
|
---|
525 |
|
---|
526 | }catch(Exception e) {
|
---|
527 | System.err.println(e.getMessage());
|
---|
528 | }
|
---|
529 |
|
---|
530 | }
|
---|
531 |
|
---|
532 | } |
---|