source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/OAIServer.java@ 23913

Last change on this file since 23913 was 23913, checked in by ak19, 13 years ago

Additional changes to succeed in more of the OAIServer validation tests: 1. Now passes POST requests introduced in OAI version 2.0. 2. Passes further error tests regarding handling of erroneous parameters for From and/or Until dates and ResumptionToken. More (major) changes yet to be made to pass the final official OAIServer validation test: to do with earliestDateStamp.

File size: 17.6 KB
Line 
1/*
2 * OAIServer.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3;
20
21import org.greenstone.gsdl3.comms.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.util.*;
24import org.greenstone.gsdl3.action.PageAction; // used to get the default action
25import org.w3c.dom.*;
26import java.io.*;
27import javax.servlet.*;
28import javax.servlet.http.*;
29import java.util.Enumeration;
30import java.util.HashSet;
31import java.util.Iterator;
32import java.util.Map;
33import java.util.Map.Entry;
34import java.io.File;
35
36import org.apache.log4j.*;
37
38/** a servlet to serve the OAI metadata harvesting - we are using servlets instead
39 * of cgi
40 * the init method is called only once - the first time the servlet classes
41 * are loaded. Each time a request comes in to the servlet, the session()
42 * method is called in a new thread (calls doGet/doPut etc)
43 * takes the verb= type args and builds a simple request to send to
44 * the oai receptionist, which returns a result in xml, conforming to the OAI-PMH
45 * protocol.
46 * @see Receptionist
47 */
48/**
49 * OAI server configuration instructions *
50 *
51 */
52public class OAIServer extends HttpServlet {
53
54 /** the receptionist to send messages to */
55 protected OAIReceptionist recept=null;
56 /** the default language - is specified by setting a servlet param,
57 * otherwise DEFAULT_LANG is used*/
58 protected String default_lang= null;
59 /** The default default - used if a default lang is not specified
60 * in the servlet params */
61 protected final String DEFAULT_LANG = "en";
62
63 /** a converter class to parse XML and create Docs
64 * This is only used for generating internal requests passed to MessageRouter.
65 * The response message is generated by parsing an existing xml skeleton file (web/WEB-INF/oaixml/oaiversion2.xml, for example).
66 */
67 protected XMLConverter converter=null;
68 /** container Document to create XML Nodes (but only request to the oai receptionist,
69 * not response (which is created in OAIXML.java) created by converter class */
70 protected Document doc=null;
71
72 /** A HashSet which contains all the legal verbs. */
73 protected HashSet verb_set = null;
74 /** A HashSet which contains all the legal oai keys in the key/value argument pair. */
75 protected HashSet param_set = null;
76 /** The name of the site with which we will finally be dealing, whether it is a local site or a remote site through a communicator.*/
77 protected String site = "";
78
79 // do we output the stylesheet processing instruction?
80 protected boolean use_oai_stylesheet = true;
81 protected String oai_stylesheet = "interfaces/oai/oai2.xsl";
82
83 // there is no getQueryString() method in the HttpServletRequest returned from doPost,
84 // since that is actually of type apache RequestFacade, and doesn't define such a method
85 protected String queryString = null;
86
87 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.OAIServer.class.getName());
88
89 /** initialise the servlet
90 */
91 public void init(ServletConfig config) throws ServletException {
92 // always call super.init, i.e., HttpServlet.;
93 super.init(config);
94 this.default_lang = config.getInitParameter(GSConstants.DEFAULT_LANG);
95
96 initVerbs();
97 initParams();
98
99 String site_name = config.getInitParameter(GSConstants.SITE_NAME);
100 String remote_site_name = null;
101 String remote_site_type = null;
102 String remote_site_address = null;
103
104 if (site_name == null) {
105 // no local site, try for communicator (remote site)
106 remote_site_name = config.getInitParameter("remote_site_name");
107 remote_site_type = config.getInitParameter("remote_site_type");
108 remote_site_address = config.getInitParameter("remote_site_address");
109 if (remote_site_name == null || remote_site_type == null || remote_site_address == null) {
110 System.err.println("initialisation paramters not all set!");
111 System.err.println("if site_name is not set, then you must have remote_site_name, remote_site_type and remote_site_address set");
112 System.exit(1);
113 }
114 }
115
116 if (this.default_lang == null) {
117 // choose english
118 this.default_lang = DEFAULT_LANG;
119 }
120
121 // the receptionist -the servlet will talk to this
122 this.recept = new OAIReceptionist();
123
124 // the receptionist uses a MessageRouter or Communicator to send its requests to. We either create a MessageRouter here for the designated site (if site_name set), or we create a Communicator for a remote site. The is given to teh Receptionist, and the servlet never talks to it again.directly.
125 if (site_name != null) {
126 //this site_name could consist of comma separated more than one site name.
127 String mr_name = (String)config.getInitParameter("messagerouter_class");
128 MessageRouter message_router = null;
129 if (mr_name == null) { // just use the normal MR *********
130 message_router = new MessageRouter();
131 } else { // try the specified one
132 try {
133 message_router = (MessageRouter)Class.forName("org.greenstone.gsdl3.core."+mr_name).newInstance();
134 } catch (Exception e) { // cant use this new one, so use normal one
135 System.err.println("OAIServlet configure exception when trying to use a new MessageRouter "+mr_name+": "+e.getMessage());
136 e.printStackTrace();
137 message_router = new MessageRouter();
138 }
139 }
140
141 message_router.setSiteName(site_name);
142 // lots of work is done in this step; see MessageRouter.java
143 message_router.configure();
144 this.recept.setSiteName(site_name);
145 this.recept.setMessageRouter(message_router);
146
147 } else {
148 // talking to a remote site, create a communicator
149 Communicator communicator = null;
150 // we need to create the XML to configure the communicator
151 Element site_elem = this.doc.createElement(GSXML.SITE_ELEM);
152 site_elem.setAttribute(GSXML.TYPE_ATT, remote_site_type);
153 site_elem.setAttribute(GSXML.NAME_ATT, remote_site_name);
154 site_elem.setAttribute(GSXML.ADDRESS_ATT, remote_site_address);
155
156 if (remote_site_type.equals(GSXML.COMM_TYPE_SOAP_JAVA)) {
157 communicator = new SOAPCommunicator();
158 } else {
159 System.err.println("OAIServlet.init Error: invalid Communicator type: "+remote_site_type);
160 System.exit(1);
161 }
162
163 if (!communicator.configure(site_elem)) {
164 System.err.println("OAIServlet.init Error: Couldn't configure communicator");
165 System.exit(1);
166 }
167 this.recept.setSiteName(remote_site_name);
168 this.recept.setMessageRouter(communicator);
169 }
170 // used for composing internal xml requests, but not xml responses.
171 // the converter may be used to get pretty xml, though.
172 this.converter = new XMLConverter();
173 this.doc = this.converter.newDOM();
174
175 // Read in OAIConfig.xml (residing web/WEB-INF/classes/) and
176 //use it to configure the receptionist. The init() is also called in which
177 //the resumption token file is read in and all expired tokens cleared.
178 Element oai_config = OAIXML.getOAIConfigXML();
179 if (oai_config == null) {
180 logger.error("Fail to parse oai config file OAIConfig.xml.");
181 return;
182 }
183 // pass it to the receptionist
184 this.recept.configure(oai_config);
185
186 // also, we have something we want to get from here - useOAIStylesheet
187 this.configure(oai_config);
188 }//end of init()
189
190 private void configure(Element oai_config) {
191 Element use_stylesheet_elem = (Element)GSXML.getChildByTagName(oai_config, OAIXML.USE_STYLESHEET);
192 if (use_stylesheet_elem != null) {
193 String value = GSXML.getNodeText(use_stylesheet_elem);
194 if (value.equals("no")) {
195 this.use_oai_stylesheet = false;
196 }
197 }
198 if (this.use_oai_stylesheet) {
199 // now see if there is a custom stylesheet specified
200 Element stylesheet_elem = (Element)GSXML.getChildByTagName(oai_config, OAIXML.STYLESHEET);
201 if (stylesheet_elem != null) {
202 String value = GSXML.getNodeText(stylesheet_elem);
203 if (!value.equals("")) {
204 oai_stylesheet = value;
205 }
206 }
207
208 }
209 }
210 private void initVerbs() {
211 verb_set = new HashSet();
212 verb_set.add(OAIXML.GET_RECORD);
213 verb_set.add(OAIXML.LIST_RECORDS);
214 verb_set.add(OAIXML.LIST_IDENTIFIERS);
215 verb_set.add(OAIXML.LIST_SETS);
216 verb_set.add(OAIXML.LIST_METADATA_FORMATS);
217 verb_set.add(OAIXML.IDENTIFY);
218 }
219
220 private void initParams() {
221 param_set = new HashSet();
222 param_set.add(OAIXML.METADATA_PREFIX);
223 param_set.add(OAIXML.FROM);
224 param_set.add(OAIXML.UNTIL);
225 param_set.add(OAIXML.SET);
226 param_set.add(OAIXML.RESUMPTION_TOKEN);
227 param_set.add(OAIXML.IDENTIFIER);
228 }
229 private void logUsageInfo(HttpServletRequest request){
230 String usageInfo = "";
231
232 String query = (queryString == null) ? request.getQueryString() : queryString;
233
234 //logged info = general-info + session-info
235 usageInfo =
236 request.getContextPath()+" "+ //session id
237 request.getServletPath()+" "+ //serlvet
238 "["+query+"]" +" "+ //the query string
239 "["+usageInfo.trim()+"]" +" "+ // params stored in a session
240 request.getRemoteAddr()+" "+ //remote address
241 request.getHeader("user-agent")+" "; //the remote brower info
242
243 logger.info(usageInfo);
244 }
245 /** return true if the url is in the form of baseURL?verb=...,
246 */
247 private boolean validate(String query, String verb) {
248 //Here in OAIServer, only the verbs are validated. All the validation for individual verb
249 // is taken in their doXXX() methods.
250 if(query == null || !query.startsWith(OAIXML.VERB+"=")) {
251 return false;
252 }
253 if (!verb_set.contains(verb)) {
254 return false;
255 }
256 return true;
257 }
258 private String getVerb(String query) {
259 if (query == null) return "";
260 int verb_start_index = query.indexOf("=") + 1;// first occurence of '='
261 int verb_end_index = query.indexOf("&");
262 if(verb_end_index == -1) {
263 return query.substring(verb_start_index);
264 }
265 return query.substring(verb_start_index, verb_end_index);
266 }
267
268 public void doGet(HttpServletRequest request, HttpServletResponse response)
269 throws ServletException, IOException {
270 logUsageInfo(request);
271 //out.println("url="+request.getRequestURL());// /oaiserver
272 //out.println("query="+request.getQueryString());// is /greenstone3
273
274 // oai always requires the content type be text/xml
275 request.setCharacterEncoding("UTF-8");
276 response.setContentType("text/xml;charset=UTF-8");
277 PrintWriter out = response.getWriter();
278
279 //
280 String lang = request.getParameter(GSParams.LANGUAGE);
281 if (lang==null || lang.equals("")) {
282 // use the default
283 lang = this.default_lang;
284 }
285 //we don't get the baseURL from the http request because what we get might be different from the one known publicly due to local network redirection.
286 //For example, puka.cs.waikato.ac.nz vs www.greenstone.org
287 //String base_url = request.getRequestURL().toString();
288 // if called by doPost (if this was originally a POST request), var queryString would have been set
289 String query = (queryString == null) ? request.getQueryString() : queryString;
290 String[] pairs = (query==null)? null : query.split("&");//split into key/value pairs
291 String verb = getVerb(query);
292 Element xml_response = OAIXML.createBasicResponse(verb, pairs);
293 Element verb_elem = null;
294
295 if (validate(query, verb) == false) {
296 if (verb_set.contains(verb) == false) {
297 logger.error(OAIXML.BAD_VERB + ": " + query);
298 verb_elem = OAIXML.createErrorElement(OAIXML.BAD_VERB, OAIXML.ILLEGAL_OAI_VERB);
299 } else {
300 //must be something else other than bad verbs caused an error, so bad argument
301 logger.error(OAIXML.BAD_ARGUMENT + ": " + query);
302 verb_elem = OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, "");
303 }
304 xml_response.appendChild(verb_elem);
305
306 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
307 if (this.use_oai_stylesheet) {
308 out.println("<?xml-stylesheet type='text/xsl' href='"+this.oai_stylesheet+"' ?>\n");
309 }
310 out.println(this.converter.getPrettyString(xml_response));
311 return;
312 }//end of if(validate
313
314 // The query is valid, we can now
315 // compose the request message to the receptionist
316 Element xml_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
317 Element xml_request = this.doc.createElement(GSXML.REQUEST_ELEM);
318 // The type attribute is set to be 'oaiService' from OAIServer to OAIReceptionist.
319 //xml_request.setAttribute(GSXML.TYPE_ATT, OAIXML.OAI_SERVICE);
320 xml_request.setAttribute(GSXML.LANG_ATT, lang);
321 xml_request.setAttribute(GSXML.TO_ATT, verb);
322 addParams(xml_request, pairs);
323
324 //xml_request.setAttribute(GSXML.OUTPUT_ATT, output);????
325 xml_message.appendChild(xml_request);
326
327 Node xml_result = this.recept.process (xml_message);
328 if (xml_result == null) {
329 logger.info("xml_result is null");
330 verb_elem = OAIXML.createErrorElement("Internal error", "");
331 xml_response.appendChild(verb_elem);
332 } else {
333
334 /** All response elements are in the form (with a corresponding verb name):
335 * <message>
336 <response>
337 <verb>
338 ...
339 * <resumptionToken>
340 * .. this is optional!
341 * </resumptionToken>
342 * </verb>
343 * </response>
344 * </message>
345 */
346 Node res = GSXML.getChildByTagName(xml_result, OAIXML.RESPONSE);
347 if(res == null) {
348 logger.info("response element in xml_result is null");
349 verb_elem = OAIXML.createErrorElement("Internal error", "");
350 } else {
351 verb_elem = GSXML.getFirstElementChild(res);
352 }
353
354 if(OAIXML.oai_version.equals(OAIXML.OAI_VERSION2) ||
355 verb_elem.getTagName().equals(OAIXML.ERROR)) {
356 xml_response.appendChild(xml_response.getOwnerDocument().importNode(verb_elem, true));
357 } else {
358 GSXML.copyAllChildren(xml_response, verb_elem);
359 }
360 }
361 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
362 if (this.use_oai_stylesheet) {
363 out.println("<?xml-stylesheet type='text/xsl' href='"+this.oai_stylesheet+"' ?>\n");
364 }
365 out.println (this.converter.getPrettyString (xml_response));
366 return;
367 }
368 /** append parameter elements to the request sent to the receptionist*/
369 public void addParams(Element request, String[] pairs) {
370 // no params apart from the verb
371 if (pairs == null || pairs.length < 2) return ;
372
373 /**the request xml is composed in the form: <request>
374 * <param name=.../>
375 * <param name=.../>
376 * </request>
377 *(No paramList element in between).
378 */
379 for (int i=1; i<pairs.length; i++) {
380 //the first pair in pairs is the verb=xxx
381 int index = pairs[i].indexOf("=");
382 if(index != -1){ //just a double check
383 Element param = this.doc.createElement(OAIXML.PARAM);
384 param.setAttribute(OAIXML.NAME, pairs[i].substring(0, index));
385 param.setAttribute(OAIXML.VALUE, OAIXML.oaiDecode(pairs[i].substring(index + 1)));
386 request.appendChild(param);
387 }
388 }
389 }
390
391 // For OAI version 2.0, validation tests indicated that POST needs to be supported. Some
392 // modification was required in order to ensure that the request is passed intact to doGet()
393 public void doPost(HttpServletRequest request,
394 HttpServletResponse response)
395 throws ServletException, IOException {
396
397 // the post method returns a wrapper of type RequestFacade by apache and there
398 // is no getQueryString() method defined for it. Therefore, need to work this out
399 // manually before calling doGet(request, response) so that doGet can work as before.
400
401 queryString = "";
402 Iterator parameter_entries = request.getParameterMap().entrySet().iterator();
403 while(parameter_entries.hasNext()) {
404 Map.Entry param_entry = (Map.Entry)parameter_entries.next();
405 String[] paramVals = (String[]) param_entry.getValue();
406 if(paramVals != null) {
407 if(paramVals.length > 0) {
408 logger.error("POST request received: " + param_entry.getKey() + " - " + paramVals[0]);
409 queryString = queryString + "&" + param_entry.getKey() + "=" + paramVals[0];
410 }
411 }
412 }
413 if(queryString.length() > 0) {
414 queryString = queryString.substring(1);
415 //queryString = java.net.URLEncoder.encode(queryString,"UTF-8");
416 }
417 if(queryString.equals("")) {
418 queryString = null;
419 }
420 doGet(request,response);
421 }
422}
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
Note: See TracBrowser for help on using the repository browser.