source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/OAIServer.java@ 25054

Last change on this file since 25054 was 23922, checked in by ak19, 13 years ago

Two fixes: 1. Dr Bainbridge noticed the Date object used in OAIXML.java's getTime() accepted time in milliseconds, not seconds as the long parameter contained when called from OAIPMH (other classes calling OAIXML.getTime() called it correctly in milliseconds. 2. Fixed error in OAIServer.getPost where I wasn't resetting the new member variable queryString, which was breaking future doGet requests.

File size: 17.6 KB
Line 
1/*
2 * OAIServer.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3;
20
21import org.greenstone.gsdl3.comms.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.util.*;
24import org.greenstone.gsdl3.action.PageAction; // used to get the default action
25import org.w3c.dom.*;
26import java.io.*;
27import javax.servlet.*;
28import javax.servlet.http.*;
29import java.util.Enumeration;
30import java.util.HashSet;
31import java.util.Iterator;
32import java.util.Map;
33import java.util.Map.Entry;
34import java.io.File;
35
36import org.apache.log4j.*;
37
38/** a servlet to serve the OAI metadata harvesting - we are using servlets instead
39 * of cgi
40 * the init method is called only once - the first time the servlet classes
41 * are loaded. Each time a request comes in to the servlet, the session()
42 * method is called in a new thread (calls doGet/doPut etc)
43 * takes the verb= type args and builds a simple request to send to
44 * the oai receptionist, which returns a result in xml, conforming to the OAI-PMH
45 * protocol.
46 * @see Receptionist
47 */
48/**
49 * OAI server configuration instructions *
50 *
51 */
52public class OAIServer extends HttpServlet {
53
54 /** the receptionist to send messages to */
55 protected OAIReceptionist recept=null;
56 /** the default language - is specified by setting a servlet param,
57 * otherwise DEFAULT_LANG is used*/
58 protected String default_lang= null;
59 /** The default default - used if a default lang is not specified
60 * in the servlet params */
61 protected final String DEFAULT_LANG = "en";
62
63 /** a converter class to parse XML and create Docs
64 * This is only used for generating internal requests passed to MessageRouter.
65 * The response message is generated by parsing an existing xml skeleton file (web/WEB-INF/oaixml/oaiversion2.xml, for example).
66 */
67 protected XMLConverter converter=null;
68 /** container Document to create XML Nodes (but only request to the oai receptionist,
69 * not response (which is created in OAIXML.java) created by converter class */
70 protected Document doc=null;
71
72 /** A HashSet which contains all the legal verbs. */
73 protected HashSet verb_set = null;
74 /** A HashSet which contains all the legal oai keys in the key/value argument pair. */
75 protected HashSet param_set = null;
76 /** The name of the site with which we will finally be dealing, whether it is a local site or a remote site through a communicator.*/
77 protected String site = "";
78
79 // do we output the stylesheet processing instruction?
80 protected boolean use_oai_stylesheet = true;
81 protected String oai_stylesheet = "interfaces/oai/oai2.xsl";
82
83 // there is no getQueryString() method in the HttpServletRequest returned from doPost,
84 // since that is actually of type apache RequestFacade, and doesn't define such a method
85 protected String queryString = null;
86
87 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.OAIServer.class.getName());
88
89 /** initialise the servlet
90 */
91 public void init(ServletConfig config) throws ServletException {
92 // always call super.init, i.e., HttpServlet.;
93 super.init(config);
94 this.default_lang = config.getInitParameter(GSConstants.DEFAULT_LANG);
95
96 initVerbs();
97 initParams();
98
99 String site_name = config.getInitParameter(GSConstants.SITE_NAME);
100 String remote_site_name = null;
101 String remote_site_type = null;
102 String remote_site_address = null;
103
104 if (site_name == null) {
105 // no local site, try for communicator (remote site)
106 remote_site_name = config.getInitParameter("remote_site_name");
107 remote_site_type = config.getInitParameter("remote_site_type");
108 remote_site_address = config.getInitParameter("remote_site_address");
109 if (remote_site_name == null || remote_site_type == null || remote_site_address == null) {
110 System.err.println("initialisation paramters not all set!");
111 System.err.println("if site_name is not set, then you must have remote_site_name, remote_site_type and remote_site_address set");
112 System.exit(1);
113 }
114 }
115
116 if (this.default_lang == null) {
117 // choose english
118 this.default_lang = DEFAULT_LANG;
119 }
120
121 // the receptionist -the servlet will talk to this
122 this.recept = new OAIReceptionist();
123
124 // the receptionist uses a MessageRouter or Communicator to send its requests to. We either create a MessageRouter here for the designated site (if site_name set), or we create a Communicator for a remote site. The is given to teh Receptionist, and the servlet never talks to it again.directly.
125 if (site_name != null) {
126 //this site_name could consist of comma separated more than one site name.
127 String mr_name = (String)config.getInitParameter("messagerouter_class");
128 MessageRouter message_router = null;
129 if (mr_name == null) { // just use the normal MR *********
130 message_router = new MessageRouter();
131 } else { // try the specified one
132 try {
133 message_router = (MessageRouter)Class.forName("org.greenstone.gsdl3.core."+mr_name).newInstance();
134 } catch (Exception e) { // cant use this new one, so use normal one
135 System.err.println("OAIServlet configure exception when trying to use a new MessageRouter "+mr_name+": "+e.getMessage());
136 e.printStackTrace();
137 message_router = new MessageRouter();
138 }
139 }
140
141 message_router.setSiteName(site_name);
142 // lots of work is done in this step; see MessageRouter.java
143 message_router.configure();
144 this.recept.setSiteName(site_name);
145 this.recept.setMessageRouter(message_router);
146
147 } else {
148 // talking to a remote site, create a communicator
149 Communicator communicator = null;
150 // we need to create the XML to configure the communicator
151 Element site_elem = this.doc.createElement(GSXML.SITE_ELEM);
152 site_elem.setAttribute(GSXML.TYPE_ATT, remote_site_type);
153 site_elem.setAttribute(GSXML.NAME_ATT, remote_site_name);
154 site_elem.setAttribute(GSXML.ADDRESS_ATT, remote_site_address);
155
156 if (remote_site_type.equals(GSXML.COMM_TYPE_SOAP_JAVA)) {
157 communicator = new SOAPCommunicator();
158 } else {
159 System.err.println("OAIServlet.init Error: invalid Communicator type: "+remote_site_type);
160 System.exit(1);
161 }
162
163 if (!communicator.configure(site_elem)) {
164 System.err.println("OAIServlet.init Error: Couldn't configure communicator");
165 System.exit(1);
166 }
167 this.recept.setSiteName(remote_site_name);
168 this.recept.setMessageRouter(communicator);
169 }
170 // used for composing internal xml requests, but not xml responses.
171 // the converter may be used to get pretty xml, though.
172 this.converter = new XMLConverter();
173 this.doc = this.converter.newDOM();
174
175 // Read in OAIConfig.xml (residing web/WEB-INF/classes/) and
176 //use it to configure the receptionist. The init() is also called in which
177 //the resumption token file is read in and all expired tokens cleared.
178 Element oai_config = OAIXML.getOAIConfigXML();
179 if (oai_config == null) {
180 logger.error("Fail to parse oai config file OAIConfig.xml.");
181 return;
182 }
183 // pass it to the receptionist
184 this.recept.configure(oai_config);
185
186 // also, we have something we want to get from here - useOAIStylesheet
187 this.configure(oai_config);
188 }//end of init()
189
190 private void configure(Element oai_config) {
191 Element use_stylesheet_elem = (Element)GSXML.getChildByTagName(oai_config, OAIXML.USE_STYLESHEET);
192 if (use_stylesheet_elem != null) {
193 String value = GSXML.getNodeText(use_stylesheet_elem);
194 if (value.equals("no")) {
195 this.use_oai_stylesheet = false;
196 }
197 }
198 if (this.use_oai_stylesheet) {
199 // now see if there is a custom stylesheet specified
200 Element stylesheet_elem = (Element)GSXML.getChildByTagName(oai_config, OAIXML.STYLESHEET);
201 if (stylesheet_elem != null) {
202 String value = GSXML.getNodeText(stylesheet_elem);
203 if (!value.equals("")) {
204 oai_stylesheet = value;
205 }
206 }
207
208 }
209 }
210 private void initVerbs() {
211 verb_set = new HashSet();
212 verb_set.add(OAIXML.GET_RECORD);
213 verb_set.add(OAIXML.LIST_RECORDS);
214 verb_set.add(OAIXML.LIST_IDENTIFIERS);
215 verb_set.add(OAIXML.LIST_SETS);
216 verb_set.add(OAIXML.LIST_METADATA_FORMATS);
217 verb_set.add(OAIXML.IDENTIFY);
218 }
219
220 private void initParams() {
221 param_set = new HashSet();
222 param_set.add(OAIXML.METADATA_PREFIX);
223 param_set.add(OAIXML.FROM);
224 param_set.add(OAIXML.UNTIL);
225 param_set.add(OAIXML.SET);
226 param_set.add(OAIXML.RESUMPTION_TOKEN);
227 param_set.add(OAIXML.IDENTIFIER);
228 }
229 private void logUsageInfo(HttpServletRequest request){
230 String usageInfo = "";
231
232 String query = (queryString == null) ? request.getQueryString() : queryString;
233
234 //logged info = general-info + session-info
235 usageInfo =
236 request.getContextPath()+" "+ //session id
237 request.getServletPath()+" "+ //serlvet
238 "["+query+"]" +" "+ //the query string
239 "["+usageInfo.trim()+"]" +" "+ // params stored in a session
240 request.getRemoteAddr()+" "+ //remote address
241 request.getHeader("user-agent")+" "; //the remote brower info
242
243 logger.info(usageInfo);
244 }
245 /** return true if the url is in the form of baseURL?verb=...,
246 */
247 private boolean validate(String query, String verb) {
248 //Here in OAIServer, only the verbs are validated. All the validation for individual verb
249 // is taken in their doXXX() methods.
250 if(query == null || !query.startsWith(OAIXML.VERB+"=")) {
251 return false;
252 }
253 if (!verb_set.contains(verb)) {
254 return false;
255 }
256 return true;
257 }
258 private String getVerb(String query) {
259 if (query == null) return "";
260 int verb_start_index = query.indexOf("=") + 1;// first occurence of '='
261 int verb_end_index = query.indexOf("&");
262 if(verb_end_index == -1) {
263 return query.substring(verb_start_index);
264 }
265 return query.substring(verb_start_index, verb_end_index);
266 }
267
268 public void doGet(HttpServletRequest request, HttpServletResponse response)
269 throws ServletException, IOException {
270 logUsageInfo(request);
271 //out.println("url="+request.getRequestURL());// /oaiserver
272 //out.println("query="+request.getQueryString());// is /greenstone3
273
274 // oai always requires the content type be text/xml
275 request.setCharacterEncoding("UTF-8");
276 response.setContentType("text/xml;charset=UTF-8");
277 PrintWriter out = response.getWriter();
278
279 //
280 String lang = request.getParameter(GSParams.LANGUAGE);
281 if (lang==null || lang.equals("")) {
282 // use the default
283 lang = this.default_lang;
284 }
285 //we don't get the baseURL from the http request because what we get might be different from the one known publicly due to local network redirection.
286 //For example, puka.cs.waikato.ac.nz vs www.greenstone.org
287 //String base_url = request.getRequestURL().toString();
288 // if called by doPost (if this was originally a POST request), var queryString would have been set
289 String query = (queryString == null) ? request.getQueryString() : queryString;
290 queryString = null; // reset member variable, else no doGet will work as long as the server remains running
291
292 String[] pairs = (query==null)? null : query.split("&");//split into key/value pairs
293 String verb = getVerb(query);
294 Element xml_response = OAIXML.createBasicResponse(verb, pairs);
295 Element verb_elem = null;
296
297 if (validate(query, verb) == false) {
298 if (verb_set.contains(verb) == false) {
299 logger.error(OAIXML.BAD_VERB + ": " + query);
300 verb_elem = OAIXML.createErrorElement(OAIXML.BAD_VERB, OAIXML.ILLEGAL_OAI_VERB);
301 } else {
302 //must be something else other than bad verbs caused an error, so bad argument
303 logger.error(OAIXML.BAD_ARGUMENT + ": " + query);
304 verb_elem = OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, "");
305 }
306 xml_response.appendChild(verb_elem);
307
308 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
309 if (this.use_oai_stylesheet) {
310 out.println("<?xml-stylesheet type='text/xsl' href='"+this.oai_stylesheet+"' ?>\n");
311 }
312 out.println(this.converter.getPrettyString(xml_response));
313 return;
314 }//end of if(validate
315
316 // The query is valid, we can now
317 // compose the request message to the receptionist
318 Element xml_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
319 Element xml_request = this.doc.createElement(GSXML.REQUEST_ELEM);
320 // The type attribute is set to be 'oaiService' from OAIServer to OAIReceptionist.
321 //xml_request.setAttribute(GSXML.TYPE_ATT, OAIXML.OAI_SERVICE);
322 xml_request.setAttribute(GSXML.LANG_ATT, lang);
323 xml_request.setAttribute(GSXML.TO_ATT, verb);
324 addParams(xml_request, pairs);
325
326 //xml_request.setAttribute(GSXML.OUTPUT_ATT, output);????
327 xml_message.appendChild(xml_request);
328
329 Node xml_result = this.recept.process (xml_message);
330 if (xml_result == null) {
331 logger.info("xml_result is null");
332 verb_elem = OAIXML.createErrorElement("Internal error", "");
333 xml_response.appendChild(verb_elem);
334 } else {
335
336 /** All response elements are in the form (with a corresponding verb name):
337 * <message>
338 <response>
339 <verb>
340 ...
341 * <resumptionToken>
342 * .. this is optional!
343 * </resumptionToken>
344 * </verb>
345 * </response>
346 * </message>
347 */
348 Node res = GSXML.getChildByTagName(xml_result, OAIXML.RESPONSE);
349 if(res == null) {
350 logger.info("response element in xml_result is null");
351 verb_elem = OAIXML.createErrorElement("Internal error", "");
352 } else {
353 verb_elem = GSXML.getFirstElementChild(res);
354 }
355
356 if(OAIXML.oai_version.equals(OAIXML.OAI_VERSION2) ||
357 verb_elem.getTagName().equals(OAIXML.ERROR)) {
358 xml_response.appendChild(xml_response.getOwnerDocument().importNode(verb_elem, true));
359 } else {
360 GSXML.copyAllChildren(xml_response, verb_elem);
361 }
362 }
363 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
364 if (this.use_oai_stylesheet) {
365 out.println("<?xml-stylesheet type='text/xsl' href='"+this.oai_stylesheet+"' ?>\n");
366 }
367 out.println (this.converter.getPrettyString (xml_response));
368 return;
369 }
370 /** append parameter elements to the request sent to the receptionist*/
371 public void addParams(Element request, String[] pairs) {
372 // no params apart from the verb
373 if (pairs == null || pairs.length < 2) return ;
374
375 /**the request xml is composed in the form: <request>
376 * <param name=.../>
377 * <param name=.../>
378 * </request>
379 *(No paramList element in between).
380 */
381 for (int i=1; i<pairs.length; i++) {
382 //the first pair in pairs is the verb=xxx
383 int index = pairs[i].indexOf("=");
384 if(index != -1){ //just a double check
385 Element param = this.doc.createElement(OAIXML.PARAM);
386 param.setAttribute(OAIXML.NAME, pairs[i].substring(0, index));
387 param.setAttribute(OAIXML.VALUE, OAIXML.oaiDecode(pairs[i].substring(index + 1)));
388 request.appendChild(param);
389 }
390 }
391 }
392
393 // For OAI version 2.0, validation tests indicated that POST needs to be supported. Some
394 // modification was required in order to ensure that the request is passed intact to doGet()
395 public void doPost(HttpServletRequest request,
396 HttpServletResponse response)
397 throws ServletException, IOException {
398
399 // the post method returns a wrapper of type RequestFacade by apache and there
400 // is no getQueryString() method defined for it. Therefore, need to work this out
401 // manually before calling doGet(request, response) so that doGet can work as before.
402
403 queryString = "";
404 Iterator parameter_entries = request.getParameterMap().entrySet().iterator();
405 while(parameter_entries.hasNext()) {
406 Map.Entry param_entry = (Map.Entry)parameter_entries.next();
407 String[] paramVals = (String[]) param_entry.getValue();
408 if(paramVals != null) {
409 if(paramVals.length > 0) {
410 logger.error("POST request received: " + param_entry.getKey() + " - " + paramVals[0]);
411 queryString = queryString + "&" + param_entry.getKey() + "=" + paramVals[0];
412 }
413 }
414 }
415 if(queryString.length() > 0) {
416 queryString = queryString.substring(1);
417 //queryString = OAIXML.oaiEncode(queryString);
418 }
419 if(queryString.equals("")) {
420 queryString = null;
421 }
422 doGet(request,response);
423 }
424}
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
Note: See TracBrowser for help on using the repository browser.