source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/OAIServer.java@ 26458

Last change on this file since 26458 was 25717, checked in by sjm84, 12 years ago

Added a BaseGreenstoneServlet servlet that the other servlets now inherit from so that we can be sure that GlobalProperties is properly initialised. There is also some reformatting and import cleaning

File size: 16.8 KB
Line 
1/*
2 * OAIServer.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3;
20
21import java.io.IOException;
22import java.io.PrintWriter;
23import java.util.HashSet;
24import java.util.Iterator;
25import java.util.Map;
26
27import javax.servlet.ServletConfig;
28import javax.servlet.ServletException;
29import javax.servlet.http.HttpServletRequest;
30import javax.servlet.http.HttpServletResponse;
31
32import org.apache.log4j.Logger;
33import org.greenstone.gsdl3.comms.Communicator;
34import org.greenstone.gsdl3.comms.SOAPCommunicator;
35import org.greenstone.gsdl3.core.MessageRouter;
36import org.greenstone.gsdl3.core.OAIReceptionist;
37import org.greenstone.gsdl3.util.GSConstants;
38import org.greenstone.gsdl3.util.GSParams;
39import org.greenstone.gsdl3.util.GSXML;
40import org.greenstone.gsdl3.util.OAIXML;
41import org.greenstone.gsdl3.util.XMLConverter;
42import org.w3c.dom.Document;
43import org.w3c.dom.Element;
44import org.w3c.dom.Node;
45
46/** a servlet to serve the OAI metadata harvesting - we are using servlets instead
47 * of cgi
48 * the init method is called only once - the first time the servlet classes
49 * are loaded. Each time a request comes in to the servlet, the session()
50 * method is called in a new thread (calls doGet/doPut etc)
51 * takes the verb= type args and builds a simple request to send to
52 * the oai receptionist, which returns a result in xml, conforming to the OAI-PMH
53 * protocol.
54 * @see Receptionist
55 */
56/**
57 * OAI server configuration instructions *
58 *
59 */
60public class OAIServer extends BaseGreenstoneServlet
61{
62
63 /** the receptionist to send messages to */
64 protected OAIReceptionist recept = null;
65 /**
66 * the default language - is specified by setting a servlet param, otherwise
67 * DEFAULT_LANG is used
68 */
69 protected String default_lang = null;
70 /**
71 * The default default - used if a default lang is not specified in the
72 * servlet params
73 */
74 protected final String DEFAULT_LANG = "en";
75
76 /**
77 * a converter class to parse XML and create Docs This is only used for
78 * generating internal requests passed to MessageRouter. The response
79 * message is generated by parsing an existing xml skeleton file
80 * (web/WEB-INF/oaixml/oaiversion2.xml, for example).
81 */
82 protected XMLConverter converter = null;
83 /**
84 * container Document to create XML Nodes (but only request to the oai
85 * receptionist, not response (which is created in OAIXML.java) created by
86 * converter class
87 */
88 protected Document doc = null;
89
90 /** A HashSet which contains all the legal verbs. */
91 protected HashSet<String> verb_set = null;
92 /**
93 * A HashSet which contains all the legal oai keys in the key/value argument
94 * pair.
95 */
96 protected HashSet<String> param_set = null;
97 /**
98 * The name of the site with which we will finally be dealing, whether it is
99 * a local site or a remote site through a communicator.
100 */
101 protected String site = "";
102
103 // do we output the stylesheet processing instruction?
104 protected boolean use_oai_stylesheet = true;
105 protected String oai_stylesheet = "interfaces/oai/oai2.xsl";
106
107 // there is no getQueryString() method in the HttpServletRequest returned from doPost,
108 // since that is actually of type apache RequestFacade, and doesn't define such a method
109 protected String queryString = null;
110
111 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.OAIServer.class.getName());
112
113 /**
114 * initialise the servlet
115 */
116 public void init(ServletConfig config) throws ServletException
117 {
118 // always call super.init, i.e., HttpServlet.;
119 super.init(config);
120 this.default_lang = config.getInitParameter(GSConstants.DEFAULT_LANG);
121
122 initVerbs();
123 initParams();
124
125 String site_name = config.getInitParameter(GSConstants.SITE_NAME);
126 String remote_site_name = null;
127 String remote_site_type = null;
128 String remote_site_address = null;
129
130 if (site_name == null)
131 {
132 // no local site, try for communicator (remote site)
133 remote_site_name = config.getInitParameter("remote_site_name");
134 remote_site_type = config.getInitParameter("remote_site_type");
135 remote_site_address = config.getInitParameter("remote_site_address");
136 if (remote_site_name == null || remote_site_type == null || remote_site_address == null)
137 {
138 System.err.println("initialisation paramters not all set!");
139 System.err.println("if site_name is not set, then you must have remote_site_name, remote_site_type and remote_site_address set");
140 System.exit(1);
141 }
142 }
143
144 if (this.default_lang == null)
145 {
146 // choose english
147 this.default_lang = DEFAULT_LANG;
148 }
149
150 // the receptionist -the servlet will talk to this
151 this.recept = new OAIReceptionist();
152
153 // the receptionist uses a MessageRouter or Communicator to send its requests to. We either create a MessageRouter here for the designated site (if site_name set), or we create a Communicator for a remote site. The is given to teh Receptionist, and the servlet never talks to it again.directly.
154 if (site_name != null)
155 {
156 //this site_name could consist of comma separated more than one site name.
157 String mr_name = (String) config.getInitParameter("messagerouter_class");
158 MessageRouter message_router = null;
159 if (mr_name == null)
160 { // just use the normal MR *********
161 message_router = new MessageRouter();
162 }
163 else
164 { // try the specified one
165 try
166 {
167 message_router = (MessageRouter) Class.forName("org.greenstone.gsdl3.core." + mr_name).newInstance();
168 }
169 catch (Exception e)
170 { // cant use this new one, so use normal one
171 System.err.println("OAIServlet configure exception when trying to use a new MessageRouter " + mr_name + ": " + e.getMessage());
172 e.printStackTrace();
173 message_router = new MessageRouter();
174 }
175 }
176
177 message_router.setSiteName(site_name);
178 // lots of work is done in this step; see MessageRouter.java
179 message_router.configure();
180 this.recept.setSiteName(site_name);
181 this.recept.setMessageRouter(message_router);
182
183 }
184 else
185 {
186 // talking to a remote site, create a communicator
187 Communicator communicator = null;
188 // we need to create the XML to configure the communicator
189 Element site_elem = this.doc.createElement(GSXML.SITE_ELEM);
190 site_elem.setAttribute(GSXML.TYPE_ATT, remote_site_type);
191 site_elem.setAttribute(GSXML.NAME_ATT, remote_site_name);
192 site_elem.setAttribute(GSXML.ADDRESS_ATT, remote_site_address);
193
194 if (remote_site_type.equals(GSXML.COMM_TYPE_SOAP_JAVA))
195 {
196 communicator = new SOAPCommunicator();
197 }
198 else
199 {
200 System.err.println("OAIServlet.init Error: invalid Communicator type: " + remote_site_type);
201 System.exit(1);
202 }
203
204 if (!communicator.configure(site_elem))
205 {
206 System.err.println("OAIServlet.init Error: Couldn't configure communicator");
207 System.exit(1);
208 }
209 this.recept.setSiteName(remote_site_name);
210 this.recept.setMessageRouter(communicator);
211 }
212 // used for composing internal xml requests, but not xml responses.
213 // the converter may be used to get pretty xml, though.
214 this.converter = new XMLConverter();
215 this.doc = this.converter.newDOM();
216
217 // Read in OAIConfig.xml (residing web/WEB-INF/classes/) and
218 //use it to configure the receptionist. The init() is also called in which
219 //the resumption token file is read in and all expired tokens cleared.
220 Element oai_config = OAIXML.getOAIConfigXML();
221 if (oai_config == null)
222 {
223 logger.error("Fail to parse oai config file OAIConfig.xml.");
224 return;
225 }
226 // pass it to the receptionist
227 this.recept.configure(oai_config);
228
229 // also, we have something we want to get from here - useOAIStylesheet
230 this.configure(oai_config);
231 }//end of init()
232
233 private void configure(Element oai_config)
234 {
235 Element use_stylesheet_elem = (Element) GSXML.getChildByTagName(oai_config, OAIXML.USE_STYLESHEET);
236 if (use_stylesheet_elem != null)
237 {
238 String value = GSXML.getNodeText(use_stylesheet_elem);
239 if (value.equals("no"))
240 {
241 this.use_oai_stylesheet = false;
242 }
243 }
244 if (this.use_oai_stylesheet)
245 {
246 // now see if there is a custom stylesheet specified
247 Element stylesheet_elem = (Element) GSXML.getChildByTagName(oai_config, OAIXML.STYLESHEET);
248 if (stylesheet_elem != null)
249 {
250 String value = GSXML.getNodeText(stylesheet_elem);
251 if (!value.equals(""))
252 {
253 oai_stylesheet = value;
254 }
255 }
256
257 }
258 }
259
260 private void initVerbs()
261 {
262 verb_set = new HashSet<String>();
263 verb_set.add(OAIXML.GET_RECORD);
264 verb_set.add(OAIXML.LIST_RECORDS);
265 verb_set.add(OAIXML.LIST_IDENTIFIERS);
266 verb_set.add(OAIXML.LIST_SETS);
267 verb_set.add(OAIXML.LIST_METADATA_FORMATS);
268 verb_set.add(OAIXML.IDENTIFY);
269 }
270
271 private void initParams()
272 {
273 param_set = new HashSet<String>();
274 param_set.add(OAIXML.METADATA_PREFIX);
275 param_set.add(OAIXML.FROM);
276 param_set.add(OAIXML.UNTIL);
277 param_set.add(OAIXML.SET);
278 param_set.add(OAIXML.RESUMPTION_TOKEN);
279 param_set.add(OAIXML.IDENTIFIER);
280 }
281
282 private void logUsageInfo(HttpServletRequest request)
283 {
284 String usageInfo = "";
285
286 String query = (queryString == null) ? request.getQueryString() : queryString;
287
288 //logged info = general-info + session-info
289 usageInfo = request.getContextPath() + " " + //session id
290 request.getServletPath() + " " + //serlvet
291 "[" + query + "]" + " " + //the query string
292 "[" + usageInfo.trim() + "]" + " " + // params stored in a session
293 request.getRemoteAddr() + " " + //remote address
294 request.getHeader("user-agent") + " "; //the remote brower info
295
296 logger.info(usageInfo);
297 }
298
299 /**
300 * return true if the url is in the form of baseURL?verb=...,
301 */
302 private boolean validate(String query, String verb)
303 {
304 //Here in OAIServer, only the verbs are validated. All the validation for individual verb
305 // is taken in their doXXX() methods.
306 if (query == null || !query.startsWith(OAIXML.VERB + "="))
307 {
308 return false;
309 }
310 if (!verb_set.contains(verb))
311 {
312 return false;
313 }
314 return true;
315 }
316
317 private String getVerb(String query)
318 {
319 if (query == null)
320 return "";
321 int verb_start_index = query.indexOf("=") + 1;// first occurence of '='
322 int verb_end_index = query.indexOf("&");
323 if (verb_end_index == -1)
324 {
325 return query.substring(verb_start_index);
326 }
327 return query.substring(verb_start_index, verb_end_index);
328 }
329
330 public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException
331 {
332 logUsageInfo(request);
333 //out.println("url="+request.getRequestURL());// /oaiserver
334 //out.println("query="+request.getQueryString());// is /greenstone3
335
336 // oai always requires the content type be text/xml
337 request.setCharacterEncoding("UTF-8");
338 response.setContentType("text/xml;charset=UTF-8");
339 PrintWriter out = response.getWriter();
340
341 //
342 String lang = request.getParameter(GSParams.LANGUAGE);
343 if (lang == null || lang.equals(""))
344 {
345 // use the default
346 lang = this.default_lang;
347 }
348 //we don't get the baseURL from the http request because what we get might be different from the one known publicly due to local network redirection.
349 //For example, puka.cs.waikato.ac.nz vs www.greenstone.org
350 //String base_url = request.getRequestURL().toString();
351 // if called by doPost (if this was originally a POST request), var queryString would have been set
352 String query = (queryString == null) ? request.getQueryString() : queryString;
353 queryString = null; // reset member variable, else no doGet will work as long as the server remains running
354
355 String[] pairs = (query == null) ? null : query.split("&");//split into key/value pairs
356 String verb = getVerb(query);
357 Element xml_response = OAIXML.createBasicResponse(verb, pairs);
358 Element verb_elem = null;
359
360 if (validate(query, verb) == false)
361 {
362 if (verb_set.contains(verb) == false)
363 {
364 logger.error(OAIXML.BAD_VERB + ": " + query);
365 verb_elem = OAIXML.createErrorElement(OAIXML.BAD_VERB, OAIXML.ILLEGAL_OAI_VERB);
366 }
367 else
368 {
369 //must be something else other than bad verbs caused an error, so bad argument
370 logger.error(OAIXML.BAD_ARGUMENT + ": " + query);
371 verb_elem = OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, "");
372 }
373 xml_response.appendChild(verb_elem);
374
375 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
376 if (this.use_oai_stylesheet)
377 {
378 out.println("<?xml-stylesheet type='text/xsl' href='" + this.oai_stylesheet + "' ?>\n");
379 }
380 out.println(this.converter.getPrettyString(xml_response));
381 return;
382 }//end of if(validate
383
384 // The query is valid, we can now
385 // compose the request message to the receptionist
386 Element xml_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
387 Element xml_request = this.doc.createElement(GSXML.REQUEST_ELEM);
388 // The type attribute is set to be 'oaiService' from OAIServer to OAIReceptionist.
389 //xml_request.setAttribute(GSXML.TYPE_ATT, OAIXML.OAI_SERVICE);
390 xml_request.setAttribute(GSXML.LANG_ATT, lang);
391 xml_request.setAttribute(GSXML.TO_ATT, verb);
392 addParams(xml_request, pairs);
393
394 //xml_request.setAttribute(GSXML.OUTPUT_ATT, output);????
395 xml_message.appendChild(xml_request);
396
397 Node xml_result = this.recept.process(xml_message);
398 if (xml_result == null)
399 {
400 logger.info("xml_result is null");
401 verb_elem = OAIXML.createErrorElement("Internal error", "");
402 xml_response.appendChild(verb_elem);
403 }
404 else
405 {
406
407 /**
408 * All response elements are in the form (with a corresponding verb
409 * name): <message> <response> <verb> ... <resumptionToken> .. this
410 * is optional! </resumptionToken> </verb> </response> </message>
411 */
412 Node res = GSXML.getChildByTagName(xml_result, OAIXML.RESPONSE);
413 if (res == null)
414 {
415 logger.info("response element in xml_result is null");
416 verb_elem = OAIXML.createErrorElement("Internal error", "");
417 }
418 else
419 {
420 verb_elem = GSXML.getFirstElementChild(res);
421 }
422
423 if (OAIXML.oai_version.equals(OAIXML.OAI_VERSION2) || verb_elem.getTagName().equals(OAIXML.ERROR))
424 {
425 xml_response.appendChild(xml_response.getOwnerDocument().importNode(verb_elem, true));
426 }
427 else
428 {
429 GSXML.copyAllChildren(xml_response, verb_elem);
430 }
431 }
432 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
433 if (this.use_oai_stylesheet)
434 {
435 out.println("<?xml-stylesheet type='text/xsl' href='" + this.oai_stylesheet + "' ?>\n");
436 }
437 out.println(this.converter.getPrettyString(xml_response));
438 return;
439 }
440
441 /** append parameter elements to the request sent to the receptionist */
442 public void addParams(Element request, String[] pairs)
443 {
444 // no params apart from the verb
445 if (pairs == null || pairs.length < 2)
446 return;
447
448 /**
449 * the request xml is composed in the form: <request> <param name=.../>
450 * <param name=.../> </request> (No paramList element in between).
451 */
452 for (int i = 1; i < pairs.length; i++)
453 {
454 //the first pair in pairs is the verb=xxx
455 int index = pairs[i].indexOf("=");
456 if (index != -1)
457 { //just a double check
458 Element param = this.doc.createElement(OAIXML.PARAM);
459 param.setAttribute(OAIXML.NAME, pairs[i].substring(0, index));
460 param.setAttribute(OAIXML.VALUE, OAIXML.oaiDecode(pairs[i].substring(index + 1)));
461 request.appendChild(param);
462 }
463 }
464 }
465
466 // For OAI version 2.0, validation tests indicated that POST needs to be supported. Some
467 // modification was required in order to ensure that the request is passed intact to doGet()
468 public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException
469 {
470
471 // the post method returns a wrapper of type RequestFacade by apache and there
472 // is no getQueryString() method defined for it. Therefore, need to work this out
473 // manually before calling doGet(request, response) so that doGet can work as before.
474
475 queryString = "";
476 Iterator parameter_entries = request.getParameterMap().entrySet().iterator();
477 while (parameter_entries.hasNext())
478 {
479 Map.Entry param_entry = (Map.Entry) parameter_entries.next();
480 String[] paramVals = (String[]) param_entry.getValue();
481 if (paramVals != null)
482 {
483 if (paramVals.length > 0)
484 {
485 logger.error("POST request received: " + param_entry.getKey() + " - " + paramVals[0]);
486 queryString = queryString + "&" + param_entry.getKey() + "=" + paramVals[0];
487 }
488 }
489 }
490 if (queryString.length() > 0)
491 {
492 queryString = queryString.substring(1);
493 //queryString = OAIXML.oaiEncode(queryString);
494 }
495 if (queryString.equals(""))
496 {
497 queryString = null;
498 }
499 doGet(request, response);
500 }
501}
Note: See TracBrowser for help on using the repository browser.