1 | /**
|
---|
2 | *#########################################################################
|
---|
3 | * QueryResponseData.java - part of the demo-client for Greenstone 3,
|
---|
4 | * of the Greenstone digital library suite from the New Zealand Digital
|
---|
5 | * Library Project at the * University of Waikato, New Zealand.
|
---|
6 | * <BR><BR>
|
---|
7 | * Copyright (C) 2008 New Zealand Digital Library Project
|
---|
8 | * <BR><BR>
|
---|
9 | * This program is free software; you can redistribute it and/or modify
|
---|
10 | * it under the terms of the GNU General Public License as published by
|
---|
11 | * the Free Software Foundation; either version 2 of the License, or
|
---|
12 | * (at your option) any later version.
|
---|
13 | * <BR><BR>
|
---|
14 | * This program is distributed in the hope that it will be useful,
|
---|
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
17 | * GNU General Public License for more details.
|
---|
18 | *########################################################################
|
---|
19 | */
|
---|
20 |
|
---|
21 | package org.greenstone.gs3client.data;
|
---|
22 |
|
---|
23 | import java.util.Vector;
|
---|
24 | import java.util.HashMap;
|
---|
25 |
|
---|
26 | import org.w3c.dom.Element;
|
---|
27 | import org.greenstone.gsdl3.util.GSXML;
|
---|
28 |
|
---|
29 |
|
---|
30 | // 2 classes in file: public-access QueryResponseData and TermData,
|
---|
31 | // the second is a static inner class of the first.
|
---|
32 |
|
---|
33 | /**
|
---|
34 | * Represents the data fields that may be present in a response
|
---|
35 | * to a Query-process request. Specifically, this class keeps track of
|
---|
36 | * all the DocumentNodes returned in response to a query request.
|
---|
37 | * It inherits Map nodeIDsToNodes of (nodeID, NodeData ref) pairs which
|
---|
38 | * maintains the NodeData object refs in order of their insertion into
|
---|
39 | * the Map (LinkedHashMap).
|
---|
40 | * !!! QueryResponseData will only store DocumentNodeData object refs in
|
---|
41 | * the nodeIDsToNodes Map.
|
---|
42 | * An object of this class can be reused after instatiation by calling
|
---|
43 | * setResponseData() with a new query response XML message. This will
|
---|
44 | * first call clear() to clear/release its references to all the old data.
|
---|
45 | * @author ak19
|
---|
46 | */
|
---|
47 | public class QueryResponseData extends ResponseData {
|
---|
48 | /* Storing information returned about the results of a query
|
---|
49 | * in a Query response XML message */
|
---|
50 | protected String numDocsMatched;
|
---|
51 | protected String numDocsReturned;
|
---|
52 | protected String queryField; // store it just in case we ever need it
|
---|
53 | protected TermData[] termList;
|
---|
54 | /** Metadata of the query's response - not a documentNode's metadata! */
|
---|
55 | protected HashMap metadataList;
|
---|
56 |
|
---|
57 | /** Default constructor */
|
---|
58 | public QueryResponseData() {
|
---|
59 | super();
|
---|
60 | metadataList = new HashMap();
|
---|
61 | }
|
---|
62 |
|
---|
63 | /** Resets the internal data members of this QueryResponseData object of
|
---|
64 | * their values so that this QueryResponseData can be reused for the
|
---|
65 | * next Query response message. */
|
---|
66 | public void clear() {
|
---|
67 | super.clear(); // clears Map nodeIDsToNodes of (nodeID, NodeData ref) pairs
|
---|
68 |
|
---|
69 | this.metadataList.clear();
|
---|
70 | termList = null;
|
---|
71 | numDocsMatched = numDocsReturned = queryField = "";
|
---|
72 | System.gc();
|
---|
73 | }
|
---|
74 |
|
---|
75 | /** Given the response to a query message (XML with root <message>
|
---|
76 | * or <response> tag), a QueryResponseData object is created
|
---|
77 | * to store all the document Identifiers and document data returned
|
---|
78 | * as well as information about the terms that were searched on.
|
---|
79 | * Furthermore, metadata such as the number of Docs that matched and
|
---|
80 | * were returned (if any such are present in the response-message)
|
---|
81 | * are also stored.
|
---|
82 | * It first performs a clear to empty its data members and then fills
|
---|
83 | * them with the new Query response message's data.
|
---|
84 | * @param responseMessageTag is the XML DOM Element representing a query
|
---|
85 | * response XML message.
|
---|
86 | */
|
---|
87 | public void setResponseData(Element responseMessageTag) {
|
---|
88 | this.clear(); // clear anything stored from prev search
|
---|
89 |
|
---|
90 | Element listTag = ParseUtil.getFirstDescElementCalled(
|
---|
91 | responseMessageTag, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
92 | if(listTag == null)
|
---|
93 | return; // should not be the case, unless search term was empty
|
---|
94 | // have to go back and deal with that separately
|
---|
95 | Vector v = ParseUtil.getAllChildElementsCalled(listTag,
|
---|
96 | GSXML.DOC_NODE_ELEM);
|
---|
97 | if(v != null) {
|
---|
98 | for(int i = 0; i < v.size(); i++) {
|
---|
99 | DocumentNodeData docNode
|
---|
100 | = new DocumentNodeData((Element)v.get(i));
|
---|
101 | nodeIDsToNodes.put(docNode.nodeID, docNode);
|
---|
102 | }
|
---|
103 | v.clear();
|
---|
104 | v = null;
|
---|
105 | }
|
---|
106 | listTag = null;
|
---|
107 |
|
---|
108 | // Get any term elements there might be - there may be none for
|
---|
109 | // some queries' responses
|
---|
110 | listTag = ParseUtil.getFirstDescElementCalled(responseMessageTag,
|
---|
111 | GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
|
---|
112 | if(listTag != null) {
|
---|
113 | v = ParseUtil.getAllChildElementsCalled(listTag,
|
---|
114 | GSXML.TERM_ELEM);
|
---|
115 | if(v != null) {
|
---|
116 | termList = new TermData[v.size()];
|
---|
117 | for(int i = 0; i < termList.length; i++)
|
---|
118 | termList[i] = new TermData((Element)v.get(i));
|
---|
119 | v.clear();
|
---|
120 | v = null;
|
---|
121 | }
|
---|
122 | listTag = null;
|
---|
123 | }
|
---|
124 |
|
---|
125 | // Get any metadata elements there might be - there may be none
|
---|
126 | // for some queries' responses
|
---|
127 | listTag = ParseUtil.getFirstDescElementCalled(responseMessageTag,
|
---|
128 | GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
|
---|
129 | if(listTag != null) {
|
---|
130 | v = ParseUtil.getAllChildElementsCalled(
|
---|
131 | listTag, GSXML.METADATA_ELEM);
|
---|
132 | if(v != null) {
|
---|
133 | // metadataList = new HashMap(v.size());
|
---|
134 | for(int i = 0; i < v.size(); i++) {
|
---|
135 | //create a new metadata object
|
---|
136 | MetaData meta = new MetaData((Element)v.get(i));
|
---|
137 | //add the metadata object into the HashMap keyed by its name
|
---|
138 | metadataList.put(meta.name, meta);
|
---|
139 | }
|
---|
140 | v.clear();
|
---|
141 | v = null;
|
---|
142 | }
|
---|
143 | listTag = null;
|
---|
144 |
|
---|
145 | // DIFFERENT FROM MANUAL: in GS3, the value of these metadata.names
|
---|
146 | // are set in the body-text, not set as an attribute-value.
|
---|
147 | // That means, Metadata.value is not what we want, but
|
---|
148 | // Metadata.bodyText
|
---|
149 | MetaData m = (MetaData)metadataList.get("numDocsMatched");
|
---|
150 | this.numDocsMatched = (m == null) ? "" : m.bodyText;
|
---|
151 | m = (MetaData)metadataList.get("numDocsReturned");
|
---|
152 | this.numDocsReturned = (m == null) ? "" : m.bodyText;
|
---|
153 | m = (MetaData)metadataList.get("query");
|
---|
154 | this.queryField = (m == null) ? "" : m.bodyText;
|
---|
155 | } else { //empty metadataList
|
---|
156 | this.numDocsMatched = this.numDocsReturned = this.queryField = "";
|
---|
157 | }
|
---|
158 | }
|
---|
159 |
|
---|
160 | /* Accessor methods */
|
---|
161 | /* Information on the search results/query-response: */
|
---|
162 | /** @return the number of matching documents for the query information */
|
---|
163 | public String getNumDocsMatched() { return numDocsMatched; }
|
---|
164 | /** @return the number of documents returned for the query information */
|
---|
165 | public String getNumDocsReturned() { return numDocsReturned; }
|
---|
166 | /** @return the query field information */
|
---|
167 | public String getQueryField() { return queryField; }
|
---|
168 |
|
---|
169 | /** @return the list of termData (search terms along with frequency
|
---|
170 | * information, etc.) */
|
---|
171 | public TermData[] getTermList() { return termList; }
|
---|
172 |
|
---|
173 |
|
---|
174 | /** Given an nodeID, returns the DocumentNodeData object with that nodeID
|
---|
175 | * if any. Otherwise, null is returned.
|
---|
176 | * Superclass has method getNodeForID that returns NodeData instead.
|
---|
177 | * This is just a convenience method.
|
---|
178 | * @param ID is the nodeID of the DocumentNodeData to be returned.
|
---|
179 | * @return the DocumentNodeData object for the given ID or null if not
|
---|
180 | * present. */
|
---|
181 | public DocumentNodeData getDocNodeForID(String ID) {
|
---|
182 | return (DocumentNodeData)nodeIDsToNodes.get(ID);
|
---|
183 | }
|
---|
184 |
|
---|
185 | /** @return an array of the DocumentNodeData objects of the documents
|
---|
186 | * returned by the executed Query Response and stored in this
|
---|
187 | * QueryResponseData object. I.e. the documentNodes of the documents in
|
---|
188 | * the search results.
|
---|
189 | * In cases of an error (such as not being able to connect to a collection
|
---|
190 | * like Infomine) this method may return an empty array (length = 0)
|
---|
191 | * if docIDsToDocNodes is empty! */
|
---|
192 | public DocumentNodeData[] getDocumentNodeList() {
|
---|
193 | DocumentNodeData[] docNodeList
|
---|
194 | = new DocumentNodeData[nodeIDsToNodes.size()];
|
---|
195 | nodeIDsToNodes.values().toArray(docNodeList);
|
---|
196 | // populates docNodeList[]
|
---|
197 | return docNodeList;
|
---|
198 | }
|
---|
199 |
|
---|
200 | /** @return an array of the IDs of the list of documentNodes maintained
|
---|
201 | * by this QueryResponseData object (the documentNodes of the documents'
|
---|
202 | * in the search results).
|
---|
203 | * In cases of an error (such as not being able to connect to collection
|
---|
204 | * such as Infomine) this method may return an empty array (length = 0)
|
---|
205 | * if docIDsToDocNodes is empty! */
|
---|
206 | public String[] getDocumentNodeIDs() {
|
---|
207 | String[] docNodeIDs = new String[nodeIDsToNodes.size()];
|
---|
208 | nodeIDsToNodes.keySet().toArray(docNodeIDs);
|
---|
209 | // above statement has now populated docNodeIDs[]
|
---|
210 | return docNodeIDs;
|
---|
211 | }
|
---|
212 |
|
---|
213 | /** @return metadata of the query's response. This includes values such
|
---|
214 | * as numDocsMatched, numDocsReturned, query. This does not return any
|
---|
215 | * document's metadata! */
|
---|
216 | public String getMetaValueForName(String name) {
|
---|
217 | Object o = metadataList.get(name);
|
---|
218 | if(o != null) {
|
---|
219 | MetaData m = (MetaData)o;
|
---|
220 | return m.bodyText;
|
---|
221 | }
|
---|
222 | return "";
|
---|
223 | }
|
---|
224 |
|
---|
225 | /** This method can be called after a DocumentStructureRetrieve request
|
---|
226 | * (for the entire structure of all/many of its documents) has returned
|
---|
227 | * a response. Given the response message (XML) element, this method
|
---|
228 | * attempts to set the document structure for all the documentNodeData
|
---|
229 | * objects it has, using the nodeStructure tags in the
|
---|
230 | * response-message-XML. But only those documentNodeData whose nodeIDs
|
---|
231 | * are mentioned in the response-message-XML are actually set!
|
---|
232 | * This method returns a null vector if the responseMessage XMl does not
|
---|
233 | * contain any <documentNodeList> element with <documentNode>s
|
---|
234 | * each with <nodeStructure> children.
|
---|
235 | * Otherwise it returns a Vector of all the rootNodes of the list of
|
---|
236 | * docNodes that this QueryResponseData object maintains. */
|
---|
237 | public Vector setStructureForDocs(Element messageTag) {
|
---|
238 | Element docList = ParseUtil.getFirstDescElementCalled(
|
---|
239 | messageTag, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
240 | if(docList == null)
|
---|
241 | return null;
|
---|
242 |
|
---|
243 | // Get only child <docNode> elements of <docList>, don't get
|
---|
244 | // all descendent <docNode>s here!
|
---|
245 | Vector docNodes = ParseUtil.getAllChildElementsCalled(
|
---|
246 | docList, GSXML.DOC_NODE_ELEM);
|
---|
247 | if(docNodes == null)
|
---|
248 | return null;
|
---|
249 |
|
---|
250 | Vector rootNodes = new Vector(docNodes.size());
|
---|
251 | for(int i = 0; i < docNodes.size(); i++) {
|
---|
252 | Element docNode = (Element)docNodes.get(i);
|
---|
253 | // Now find the documentNodeData for which we are going to set
|
---|
254 | // its nodeStructure and fow which we're going to find the root
|
---|
255 | String id = (String)docNode.getAttribute(GSXML.NODE_ID_ATT);
|
---|
256 | DocumentNodeData document
|
---|
257 | = (DocumentNodeData)this.nodeIDsToNodes.get(id);
|
---|
258 |
|
---|
259 | // get the <nodeStructure> element
|
---|
260 | Element nodeStructure = ParseUtil.getFirstDescElementCalled(
|
---|
261 | docNode, GSXML.NODE_STRUCTURE_ELEM);
|
---|
262 | if(nodeStructure == null || document == null)
|
---|
263 | continue; // skip to look at next docNode
|
---|
264 | // else
|
---|
265 | // nodeStructure should contain exactly one child: the root doc
|
---|
266 | document.setDescendentsOfRootNode(nodeStructure, nodeIDsToNodes);
|
---|
267 |
|
---|
268 | // Now add the root of that document to our vector rootNodes
|
---|
269 | rootNodes.add(document.getRoot());
|
---|
270 | }
|
---|
271 | return rootNodes;
|
---|
272 | }
|
---|
273 |
|
---|
274 |
|
---|
275 | /** @return some summary info on how the search went. This may include
|
---|
276 | * how many documents matched the query, and how many of them have been
|
---|
277 | * returned. With mult-term queries, the frequencies of each separate
|
---|
278 | * term is also returned for display. */
|
---|
279 | public String toString() {
|
---|
280 | // When running a Form Search on collection gs3mgppdemo through the
|
---|
281 | // browser at http://localhost:8080/greenstone3/
|
---|
282 | // and searching on terms "snail" and "water", the output is like:
|
---|
283 | // "Word count:snail: 433, water: 608
|
---|
284 | // 11 documents matched the query. (11 documents returned.)"
|
---|
285 | // Snail and water are TermData.names, and 433 and 608 are the
|
---|
286 | //frequencies in which those terms occurred in the collection
|
---|
287 |
|
---|
288 | StringBuffer buf = new StringBuffer();
|
---|
289 | if(this.termList != null) {
|
---|
290 | buf.append("Word count: ");
|
---|
291 | for(int i = 0; i < termList.length; i++)
|
---|
292 | buf.append(termList[i] + ", "); // calls toString on TermData
|
---|
293 | // get rid of final comma-space and replace with newline
|
---|
294 | buf.replace(buf.length()-2, buf.length(), "\n");
|
---|
295 | }
|
---|
296 | if(!this.numDocsMatched.equals("")) {
|
---|
297 | buf.append(this.numDocsMatched);
|
---|
298 | buf.append(" document(s) matched the query.");
|
---|
299 | }
|
---|
300 | if(!this.numDocsReturned.equals("")) {
|
---|
301 | buf.append(" (");
|
---|
302 | buf.append(this.numDocsReturned);
|
---|
303 | buf.append(" document(s) returned.)");
|
---|
304 | }
|
---|
305 | return buf.toString();
|
---|
306 | }
|
---|
307 |
|
---|
308 | /** Static inner class Term represents a <term> XML element
|
---|
309 | * (these are nested in a <termList>) - see manual p. 45:
|
---|
310 | * <term name="str" numDocsMatched="int" freq="int" field="int" stem="int"/>
|
---|
311 | * <equivTermList>
|
---|
312 | * <term name="str" numDocsMatched="int" freq="int" />
|
---|
313 | * <term name="str" numDocsMatched="int" freq="int" />
|
---|
314 | * ...
|
---|
315 | * </equivTermList>
|
---|
316 | * </term>
|
---|
317 | * Can import this class as import gs3client.QueryResponseData.TermData
|
---|
318 | * and can then use it just as "TermData" (don't need fully qualified name).
|
---|
319 | */
|
---|
320 | public static class TermData {
|
---|
321 | /* Information about the TermData */
|
---|
322 | public final String name;
|
---|
323 | public final String numDocsMatch;
|
---|
324 | public final String freq;
|
---|
325 |
|
---|
326 | // Member fields that might not always be set (may be ""):
|
---|
327 | public final String field;
|
---|
328 | public final String stem;
|
---|
329 |
|
---|
330 | /** The terms nested inside an <equivTermList> */
|
---|
331 | protected TermData[] equivTermList;
|
---|
332 |
|
---|
333 | /** Constructs a Term object to represent the <term> element passed
|
---|
334 | * in here as an argument. Given a <term></term> element, it
|
---|
335 | * sets this object's members.
|
---|
336 | * @param termTag is a <term></term> element */
|
---|
337 | public TermData(Element termTag) {
|
---|
338 | this.name = termTag.hasAttribute(GSXML.NAME_ATT) ?
|
---|
339 | termTag.getAttribute(GSXML.NAME_ATT) : "";
|
---|
340 | this.field = termTag.hasAttribute("field") ?
|
---|
341 | termTag.getAttribute("field") : "";
|
---|
342 | this.stem = termTag.hasAttribute("stem") ?
|
---|
343 | termTag.getAttribute("stem") : "";
|
---|
344 | this.freq = termTag.hasAttribute("freq") ?
|
---|
345 | termTag.getAttribute("freq") : "";
|
---|
346 | //Integer.parseInt(termTag.getAttribute("freq")) : 0;
|
---|
347 | this.numDocsMatch = termTag.hasAttribute("numDocsMatch") ?
|
---|
348 | termTag.getAttribute("numDocsMatch") : "";
|
---|
349 | //Integer.parseInt(termTag.getAttribute("numDocsMatch")) : 0;
|
---|
350 |
|
---|
351 | setEquivTermList(termTag);
|
---|
352 | }
|
---|
353 |
|
---|
354 | /** Uses the <equivTermList>...</equivTermList> tag,
|
---|
355 | * which may cotnain more TermData.
|
---|
356 | * @param termTag is a <term></term> element that may
|
---|
357 | * contain a <equivTermList>...</equivTermList> element */
|
---|
358 | public void setEquivTermList(Element termTag) {
|
---|
359 | Element equivList = ParseUtil.getFirstChildElementCalled(
|
---|
360 | termTag, "equivTerm"+GSXML.LIST_MODIFIER);
|
---|
361 | if(equivList != null) {
|
---|
362 | Vector equivTerms = ParseUtil.getAllChildElementsCalled(
|
---|
363 | equivList, GSXML.TERM_ELEM); // <term> children
|
---|
364 | if(equivTerms != null) {
|
---|
365 | equivTermList = new TermData[equivTerms.size()];
|
---|
366 | for(int i = 0; i < equivTermList.length; i++)
|
---|
367 | equivTermList[i] = new TermData(
|
---|
368 | (Element)equivTerms.get(i));
|
---|
369 | }
|
---|
370 | }
|
---|
371 | }
|
---|
372 |
|
---|
373 | /** @return any list of <equivTermList>...</equivTermList>
|
---|
374 | * TermData maintained by this TermData object. Null if there are none. */
|
---|
375 | public TermData[] getEquivTermList() { return equivTermList; }
|
---|
376 |
|
---|
377 | // Frequency is always a number
|
---|
378 | /** @return a String representation of this TermData: the name
|
---|
379 | * and frequency */
|
---|
380 | public String toString() { return this.name + ": " + this.freq; }
|
---|
381 |
|
---|
382 | /** @return a String displaying the member contents of this TermData.
|
---|
383 | * Useful for debugging purposes. */
|
---|
384 | public String show() {
|
---|
385 | StringBuffer buf = new StringBuffer("name: " + this.name);
|
---|
386 | buf.append(" numDocsMatched: " + this.numDocsMatch);
|
---|
387 | buf.append(" freq: " + freq);
|
---|
388 | buf.append(" field: " + field);
|
---|
389 | buf.append(" stem: " + stem + "\n");
|
---|
390 | if(equivTermList != null) {
|
---|
391 | buf.append("EquivTermList:\n");
|
---|
392 | for(int i = 0; i < equivTermList.length; i++)
|
---|
393 | buf.append(" " + equivTermList[i].show() + "\n");
|
---|
394 | }
|
---|
395 | return buf.toString();
|
---|
396 | }
|
---|
397 | }
|
---|
398 | }
|
---|