1 | package org.greenstone.gsdl3.action;
|
---|
2 |
|
---|
3 | import org.greenstone.gsdl3.core.ModuleInterface;
|
---|
4 | import org.greenstone.gsdl3.util.*;
|
---|
5 | // XML classes
|
---|
6 | import org.w3c.dom.Node;
|
---|
7 | import org.w3c.dom.NodeList;
|
---|
8 | import org.w3c.dom.Text;
|
---|
9 | import org.w3c.dom.Document;
|
---|
10 | import org.w3c.dom.Element;
|
---|
11 |
|
---|
12 | import java.util.HashMap;
|
---|
13 | import java.util.HashSet;
|
---|
14 | import java.util.Vector;
|
---|
15 | import java.util.Map;
|
---|
16 | import java.util.Iterator;
|
---|
17 | import java.io.File;
|
---|
18 |
|
---|
19 | /** action class for queries */
|
---|
20 | public class QueryAction extends Action {
|
---|
21 |
|
---|
22 | /** process - processes a request.
|
---|
23 | */
|
---|
24 | public Element process (Element message) {
|
---|
25 |
|
---|
26 | // get the request - assume there is only one
|
---|
27 | Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
|
---|
28 |
|
---|
29 | // create the return message
|
---|
30 | Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
31 | Element response = basicQuery(request);
|
---|
32 | result.appendChild(this.doc.importNode(response, true));
|
---|
33 | return result;
|
---|
34 | }
|
---|
35 |
|
---|
36 | /** a generic query handler
|
---|
37 | * this gets the service description, does the query (just passes all the
|
---|
38 | * params to the service, then gets the titles for any results
|
---|
39 | */
|
---|
40 | protected Element basicQuery(Element request) {
|
---|
41 |
|
---|
42 | // the result
|
---|
43 | Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
|
---|
44 |
|
---|
45 | // extract the params from the cgi-request, and check that we have a coll specified
|
---|
46 | Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
|
---|
47 | HashMap params = GSXML.extractParams(cgi_param_list, false);
|
---|
48 |
|
---|
49 | String request_type = (String)params.get(GSParams.REQUEST_TYPE);
|
---|
50 | String service_name = (String)params.get(GSParams.SERVICE);
|
---|
51 | String collection = (String)params.get(GSParams.COLLECTION);
|
---|
52 |
|
---|
53 | if (collection == null || collection.equals("")) {
|
---|
54 | System.err.println("QueryAction Error: no collection was specified!");
|
---|
55 | return page_response; // an empty response
|
---|
56 | }
|
---|
57 |
|
---|
58 | String lang = request.getAttribute(GSXML.LANG_ATT);
|
---|
59 | String uid = request.getAttribute(GSXML.USER_ID_ATT);
|
---|
60 | String to = GSPath.appendLink(collection, service_name);
|
---|
61 |
|
---|
62 | // part of the response is the service description
|
---|
63 | // for now get this again from the service.
|
---|
64 | // this will probably need to be cached somehow later on.
|
---|
65 | Element mr_info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
66 | Element mr_info_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, to, lang, uid);
|
---|
67 | mr_info_message.appendChild(mr_info_request);
|
---|
68 |
|
---|
69 | // also get the format stuff now if there is some
|
---|
70 | Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, to, lang, uid);
|
---|
71 | mr_info_message.appendChild(format_request);
|
---|
72 |
|
---|
73 | // process the messages
|
---|
74 | Element mr_info_response = (Element) this.mr.process(mr_info_message);
|
---|
75 |
|
---|
76 | // the two responses
|
---|
77 | NodeList responses = mr_info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
|
---|
78 | Element service_response = (Element)responses.item(0);
|
---|
79 | Element format_response = (Element)responses.item(1);
|
---|
80 |
|
---|
81 | Element service_description = (Element)this.doc.importNode(GSXML.getChildByTagName(service_response, GSXML.SERVICE_ELEM), true);
|
---|
82 | page_response.appendChild(service_description);
|
---|
83 |
|
---|
84 | if (request_type.equals("d")) {// just a display request
|
---|
85 | return page_response;
|
---|
86 | }
|
---|
87 |
|
---|
88 | HashSet metadata_names = new HashSet();
|
---|
89 | metadata_names.add("Title");
|
---|
90 | // add in the format info to the stylesheet if there is any
|
---|
91 | Element format_elem = (Element)GSXML.getChildByTagName(format_response, GSXML.FORMAT_ELEM);
|
---|
92 | if (format_elem != null) {
|
---|
93 | ///ystem.out.println("QueryAction: found a format element, adding it to the page response");
|
---|
94 | // set teh format type
|
---|
95 | format_elem.setAttribute(GSXML.TYPE_ATT, "search");
|
---|
96 | // for now just add to the response
|
---|
97 | page_response.appendChild(this.doc.importNode(format_elem, true));
|
---|
98 | extractMetadataNames(format_elem, metadata_names);
|
---|
99 | }
|
---|
100 |
|
---|
101 | // do the query
|
---|
102 | Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
103 | Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
|
---|
104 | mr_query_message.appendChild(mr_query_request);
|
---|
105 |
|
---|
106 | // paramList
|
---|
107 | HashMap service_params = (HashMap)params.get("s1");
|
---|
108 | if (service_params == null) { // no query
|
---|
109 | return page_response;
|
---|
110 | }
|
---|
111 | Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
|
---|
112 | GSXML.addParametersToList(this.doc, query_param_list, service_params);
|
---|
113 | ///ystem.out.println("service params are "+this.converter.getString(query_param_list));
|
---|
114 | mr_query_request.appendChild(query_param_list);
|
---|
115 |
|
---|
116 | // do the query
|
---|
117 | Element mr_query_response = (Element)this.mr.process(mr_query_message);
|
---|
118 |
|
---|
119 | //. check for errors
|
---|
120 | String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.ERROR_ELEM);
|
---|
121 | Element error_elem = (Element) GSXML.getNodeByPath(mr_query_response, path);
|
---|
122 | if (error_elem != null) {
|
---|
123 | // should we continue?? perhaps have a kind of error - information vs fatal??
|
---|
124 | System.err.println("found an error elem");
|
---|
125 | page_response.appendChild(this.doc.importNode(error_elem, true));
|
---|
126 | return page_response;
|
---|
127 | }
|
---|
128 | path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
|
---|
129 | Element query_result_metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, path);
|
---|
130 | if (query_result_metadata_list == null) {
|
---|
131 | System.err.println("QueryAction: Warning: No query result metadata.\n");
|
---|
132 | } else { // add it into the page response
|
---|
133 | page_response.appendChild(this.doc.importNode(query_result_metadata_list, true));
|
---|
134 | }
|
---|
135 |
|
---|
136 | path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
|
---|
137 | Element query_term_info_list = (Element) GSXML.getNodeByPath(mr_query_response, path);
|
---|
138 | if (query_term_info_list == null) {
|
---|
139 | System.err.println("QueryAction: Warning: No query term information.\n");
|
---|
140 | } else { // add it into the page response
|
---|
141 | page_response.appendChild(this.doc.importNode(query_term_info_list, true));
|
---|
142 | }
|
---|
143 |
|
---|
144 | // check that there are some documents - for now check the list, but later should use a numdocs metadata elem
|
---|
145 | path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
146 |
|
---|
147 | Element document_list = (Element)GSXML.getNodeByPath(mr_query_response,
|
---|
148 | path);
|
---|
149 | // documentList not present if no docs found
|
---|
150 | if (document_list == null) {
|
---|
151 | return page_response;
|
---|
152 | }
|
---|
153 |
|
---|
154 | // paging of the results is done here - we filter the list to remove unwanted entries before retrieving metadata
|
---|
155 | Element filtered_doc_list = filterDocList(params, service_description, document_list);
|
---|
156 |
|
---|
157 | // now we check to see if there is metadata already - some search services return predefined metadata. if there is some, don't do a metadata request
|
---|
158 | NodeList doc_metadata = document_list.getElementsByTagName(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
|
---|
159 | if (doc_metadata.getLength()>0) {
|
---|
160 | System.err.println("have already found metadata!");
|
---|
161 | // append the doc list to the result
|
---|
162 | page_response.appendChild(this.doc.importNode(document_list, true));
|
---|
163 | return page_response;
|
---|
164 | }
|
---|
165 | // do the metadata request on the filtered list
|
---|
166 | Element mr_metadata_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
167 | to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
|
---|
168 | Element mr_metadata_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
|
---|
169 | mr_metadata_message.appendChild(mr_metadata_request);
|
---|
170 |
|
---|
171 | // just get all for now - the receptionist should perhaps pass in some
|
---|
172 | // metadata that it wants, and QueryAction should look through the format stuff to see if there is any other?
|
---|
173 |
|
---|
174 | Element dm_param_list = createMetadataParamList(metadata_names);
|
---|
175 |
|
---|
176 | mr_metadata_request.appendChild(dm_param_list);
|
---|
177 |
|
---|
178 |
|
---|
179 | // add in the doc node list too
|
---|
180 | mr_metadata_request.appendChild(filtered_doc_list);
|
---|
181 |
|
---|
182 | Element mr_metadata_response = (Element) this.mr.process(mr_metadata_message);
|
---|
183 |
|
---|
184 | path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
185 | Element query_result_document_list = (Element) GSXML.getNodeByPath(mr_metadata_response, path);
|
---|
186 |
|
---|
187 | if (query_result_document_list != null) {
|
---|
188 | page_response.appendChild(this.doc.importNode(query_result_document_list, true));
|
---|
189 | }
|
---|
190 |
|
---|
191 | System.out.println("Query page:\n" + this.converter.getPrettyString(page_response));
|
---|
192 | return page_response;
|
---|
193 | }
|
---|
194 |
|
---|
195 | /** this filters out some of the doc results for result paging */
|
---|
196 | protected Element filterDocList(HashMap params, Element service_description, Element orig_doc_list) {
|
---|
197 |
|
---|
198 | // check in the service descripiton to see if hitsPerpage is a param
|
---|
199 | Element service_p_list = (Element)GSXML.getChildByTagName(service_description, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
|
---|
200 | Element hits_param = GSXML.getNamedElement(service_p_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, "hitsPerPage");
|
---|
201 |
|
---|
202 | boolean service_paging = false;
|
---|
203 | if (hits_param != null) {
|
---|
204 | service_paging = true;
|
---|
205 | }
|
---|
206 | if (service_paging) {
|
---|
207 | // the service is doing the paging, so we want to display all of teh returned docs
|
---|
208 | return (Element)this.doc.importNode(orig_doc_list, true);
|
---|
209 | }
|
---|
210 |
|
---|
211 | String hits_pp = (String)params.get("hitsPerPage");
|
---|
212 | int hits = 20;
|
---|
213 | if (hits_pp != null && !hits_pp.equals("")) {
|
---|
214 | try {
|
---|
215 | hits = Integer.parseInt(hits_pp);
|
---|
216 | } catch (Exception e) {
|
---|
217 | hits=20;
|
---|
218 | }
|
---|
219 | }
|
---|
220 |
|
---|
221 | if (hits == -1) { // all
|
---|
222 | return (Element)this.doc.importNode(orig_doc_list, true);
|
---|
223 | }
|
---|
224 | NodeList result_docs = orig_doc_list.getElementsByTagName(GSXML.DOC_NODE_ELEM);
|
---|
225 |
|
---|
226 | int num_docs = result_docs.getLength();
|
---|
227 | if (num_docs <= hits) {
|
---|
228 | // too few docs to do paging
|
---|
229 | return (Element)this.doc.importNode(orig_doc_list, true);
|
---|
230 | }
|
---|
231 |
|
---|
232 | // now we need our own doc list
|
---|
233 | Element result_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
|
---|
234 | String start_p = (String)params.get("startPage");
|
---|
235 | int start = 1;
|
---|
236 | if (start_p != null && !start_p.equals("")) {
|
---|
237 | try {
|
---|
238 | start = Integer.parseInt(start_p);
|
---|
239 | } catch (Exception e) {
|
---|
240 | start = 1;
|
---|
241 | }
|
---|
242 | }
|
---|
243 |
|
---|
244 | int start_from = (start-1)*hits;
|
---|
245 | int end_at = (start*hits)-1;
|
---|
246 |
|
---|
247 | if (start_from > num_docs) {
|
---|
248 | // something has gone wrong
|
---|
249 | return result_list;
|
---|
250 | }
|
---|
251 |
|
---|
252 | if (end_at > num_docs) {
|
---|
253 | end_at = num_docs-1;
|
---|
254 | }
|
---|
255 |
|
---|
256 | // now we finally have the docs numbers to use
|
---|
257 | for (int i=start_from; i<=end_at; i++) {
|
---|
258 | result_list.appendChild(this.doc.importNode(result_docs.item(i), true));
|
---|
259 | }
|
---|
260 |
|
---|
261 | return result_list;
|
---|
262 | }
|
---|
263 |
|
---|
264 | }
|
---|