source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/OAIServer.java@ 28884

Last change on this file since 28884 was 28884, checked in by kjdon, 10 years ago

implemented handling for oaiserver?reset request - reloads the collection info. Call this on activating a collection for oaiserver. Changes in collections will be registered without having to restart tomcat

File size: 17.3 KB
Line 
1/*
2 * OAIServer.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3;
20
21import java.io.IOException;
22import java.io.PrintWriter;
23import java.util.HashSet;
24import java.util.Iterator;
25import java.util.Map;
26
27import javax.servlet.ServletConfig;
28import javax.servlet.ServletException;
29import javax.servlet.UnavailableException;
30import javax.servlet.http.HttpServletRequest;
31import javax.servlet.http.HttpServletResponse;
32
33import org.apache.log4j.Logger;
34import org.greenstone.gsdl3.comms.Communicator;
35import org.greenstone.gsdl3.comms.SOAPCommunicator;
36import org.greenstone.gsdl3.core.MessageRouter;
37import org.greenstone.gsdl3.core.OAIReceptionist;
38import org.greenstone.gsdl3.util.GSConstants;
39import org.greenstone.gsdl3.util.GSParams;
40import org.greenstone.gsdl3.util.GSXML;
41import org.greenstone.gsdl3.util.OAIResumptionToken;
42import org.greenstone.gsdl3.util.OAIXML;
43import org.greenstone.gsdl3.util.XMLConverter;
44import org.w3c.dom.Document;
45import org.w3c.dom.Element;
46import org.w3c.dom.Node;
47
48/** a servlet to serve the OAI metadata harvesting - we are using servlets instead
49 * of cgi
50 * the init method is called only once - the first time the servlet classes
51 * are loaded. Each time a request comes in to the servlet, the session()
52 * method is called in a new thread (calls doGet/doPut etc)
53 * takes the verb= type args and builds a simple request to send to
54 * the oai receptionist, which returns a result in xml, conforming to the OAI-PMH
55 * protocol.
56 * @see Receptionist
57 */
58/**
59 * OAI server configuration instructions *
60 *
61 */
62public class OAIServer extends BaseGreenstoneServlet
63{
64
65 /** the receptionist to send messages to */
66 protected OAIReceptionist recept = null;
67 /**
68 * the default language - is specified by setting a servlet param, otherwise
69 * DEFAULT_LANG is used
70 */
71 protected String default_lang = null;
72 /**
73 * The default default - used if a default lang is not specified in the
74 * servlet params
75 */
76 protected final String DEFAULT_LANG = "en";
77
78 /**
79 * a converter class to parse XML and create Documents for
80 * XML generation
81 */
82 protected XMLConverter converter = null;
83
84 /** A HashSet which contains all the legal verbs. */
85 protected HashSet<String> verb_set = null;
86 /**
87 * A HashSet which contains all the legal oai keys in the key/value argument
88 * pair.
89 */
90 protected HashSet<String> param_set = null;
91 /**
92 * The name of the site with which we will finally be dealing, whether it is
93 * a local site or a remote site through a communicator.
94 */
95 protected String site = "";
96
97 // can be overriddden in OAIConfig.xml
98 // do we output the stylesheet processing instruction?
99 protected boolean use_oai_stylesheet = true;
100 protected String oai_stylesheet = "interfaces/oai/oai2.xsl";
101
102 // there is no getQueryString() method in the HttpServletRequest returned from doPost,
103 // since that is actually of type apache RequestFacade, and doesn't define such a method
104 protected String queryString = null;
105
106 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.OAIServer.class.getName());
107
108 /**
109 * initialise the servlet
110 */
111 public void init(ServletConfig config) throws ServletException
112 {
113 // always call super.init, i.e., HttpServlet.;
114 super.init(config);
115 this.default_lang = config.getInitParameter(GSConstants.DEFAULT_LANG);
116
117 initVerbs();
118 initParams();
119
120 String site_name = config.getInitParameter(GSConstants.SITE_NAME);
121 String remote_site_name = null;
122 String remote_site_type = null;
123 String remote_site_address = null;
124
125 if (site_name == null)
126 {
127 // no local site, try for communicator (remote site)
128 remote_site_name = config.getInitParameter("remote_site_name");
129 remote_site_type = config.getInitParameter("remote_site_type");
130 remote_site_address = config.getInitParameter("remote_site_address");
131 if (remote_site_name == null || remote_site_type == null || remote_site_address == null)
132 {
133 logger.error("initialisation paramters not all set!");
134 logger.error("if site_name is not set, then you must have remote_site_name, remote_site_type and remote_site_address set");
135 throw new UnavailableException("OAIServer: incorrect servlet parameters");
136 }
137 }
138
139 if (this.default_lang == null)
140 {
141 // choose english
142 this.default_lang = DEFAULT_LANG;
143 }
144
145 // the receptionist -the servlet will talk to this
146 this.recept = new OAIReceptionist();
147 // the converter - used to get new Documents to generate XML messages
148 this.converter = new XMLConverter();
149 // the receptionist uses a MessageRouter or Communicator to send its requests to. We either create a MessageRouter here for the designated site (if site_name set), or we create a Communicator for a remote site. The is given to teh Receptionist, and the servlet never talks to it again.directly.
150 if (site_name != null)
151 {
152 //this site_name could consist of comma separated more than one site name.
153 String mr_name = (String) config.getInitParameter("messagerouter_class");
154 MessageRouter message_router = null;
155 if (mr_name == null)
156 { // just use the normal MR *********
157 message_router = new MessageRouter();
158 }
159 else
160 { // try the specified one
161 try
162 {
163 message_router = (MessageRouter) Class.forName("org.greenstone.gsdl3.core." + mr_name).newInstance();
164 }
165 catch (Exception e)
166 { // cant use this new one, so use normal one
167 logger.error("OAIServlet configure exception when trying to use a new MessageRouter " + mr_name, e);
168 message_router = new MessageRouter();
169 }
170 }
171
172 message_router.setSiteName(site_name);
173 // lots of work is done in this step; see MessageRouter.java
174 if (!message_router.configure()) {
175 throw new UnavailableException("OAIServer: Couldn't configure MessageRouter");
176 }
177 this.recept.setSiteName(site_name);
178 this.recept.setMessageRouter(message_router);
179
180 }
181 else
182 {
183 // talking to a remote site, create a communicator
184 Communicator communicator = null;
185 // we need to create the XML to configure the communicator
186 Document site_doc = this.converter.newDOM();
187 Element site_elem = site_doc.createElement(GSXML.SITE_ELEM);
188 site_elem.setAttribute(GSXML.TYPE_ATT, remote_site_type);
189 site_elem.setAttribute(GSXML.NAME_ATT, remote_site_name);
190 site_elem.setAttribute(GSXML.ADDRESS_ATT, remote_site_address);
191
192 if (remote_site_type.equals(GSXML.COMM_TYPE_SOAP_JAVA))
193 {
194 communicator = new SOAPCommunicator();
195 }
196 else
197 {
198 logger.error("OAIServlet.init Error: invalid Communicator type: " + remote_site_type);
199 throw new UnavailableException("OAIServer: invalid communicator type");
200 }
201
202 if (!communicator.configure(site_elem))
203 {
204 logger.error("OAIServlet.init Error: Couldn't configure communicator");
205 throw new UnavailableException("OAIServer: Couldn't configure communicator");
206 }
207 this.recept.setSiteName(remote_site_name);
208 this.recept.setMessageRouter(communicator);
209 }
210
211 // Read in OAIConfig.xml (residing web/WEB-INF/classes/) and
212 //use it to configure the receptionist.
213 Element oai_config = OAIXML.getOAIConfigXML();
214 if (oai_config == null)
215 {
216 logger.error("Fail to parse oai config file OAIConfig.xml.");
217 throw new UnavailableException("OAIServer: Couldn't parse OAIConfig.xml");
218 }
219 // pass it to the receptionist
220 if (!this.recept.configure(oai_config)) {
221 logger.error("Couldn't configure receptionist");
222 throw new UnavailableException("OAIServer: Couldn't configure receptionist");
223 }
224 // also, we have something we want to get from here - useOAIStylesheet
225 this.configure(oai_config);
226 // Initialise the resumption tokens
227 OAIResumptionToken.init();
228
229 }//end of init()
230
231 private void configure(Element oai_config)
232 {
233 Element use_stylesheet_elem = (Element) GSXML.getChildByTagName(oai_config, OAIXML.USE_STYLESHEET);
234 if (use_stylesheet_elem != null)
235 {
236 String value = GSXML.getNodeText(use_stylesheet_elem);
237 if (value.equals("no"))
238 {
239 this.use_oai_stylesheet = false;
240 }
241 }
242 if (this.use_oai_stylesheet)
243 {
244 // now see if there is a custom stylesheet specified
245 Element stylesheet_elem = (Element) GSXML.getChildByTagName(oai_config, OAIXML.STYLESHEET);
246 if (stylesheet_elem != null)
247 {
248 String value = GSXML.getNodeText(stylesheet_elem);
249 if (!value.equals(""))
250 {
251 oai_stylesheet = value;
252 }
253 }
254
255 }
256 }
257
258 private void initVerbs()
259 {
260 verb_set = new HashSet<String>();
261 verb_set.add(OAIXML.GET_RECORD);
262 verb_set.add(OAIXML.LIST_RECORDS);
263 verb_set.add(OAIXML.LIST_IDENTIFIERS);
264 verb_set.add(OAIXML.LIST_SETS);
265 verb_set.add(OAIXML.LIST_METADATA_FORMATS);
266 verb_set.add(OAIXML.IDENTIFY);
267 }
268
269 private void initParams()
270 {
271 param_set = new HashSet<String>();
272 param_set.add(OAIXML.METADATA_PREFIX);
273 param_set.add(OAIXML.FROM);
274 param_set.add(OAIXML.UNTIL);
275 param_set.add(OAIXML.SET);
276 param_set.add(OAIXML.RESUMPTION_TOKEN);
277 param_set.add(OAIXML.IDENTIFIER);
278 }
279
280 private void logUsageInfo(HttpServletRequest request)
281 {
282 String usageInfo = "";
283
284 String query = (queryString == null) ? request.getQueryString() : queryString;
285
286 //logged info = general-info + session-info
287 usageInfo = request.getContextPath() + " " + //session id
288 request.getServletPath() + " " + //serlvet
289 "[" + query + "]" + " " + //the query string
290 "[" + usageInfo.trim() + "]" + " " + // params stored in a session
291 request.getRemoteAddr() + " " + //remote address
292 request.getHeader("user-agent") + " "; //the remote brower info
293
294 logger.info(usageInfo);
295 }
296
297 /**
298 * return true if the url is in the form of baseURL?verb=...,
299 */
300 private boolean validate(String query, String verb)
301 {
302 //Here in OAIServer, only the verbs are validated. All the validation for individual verb
303 // is taken in their doXXX() methods.
304 if (query == null || !query.startsWith(OAIXML.VERB + "="))
305 {
306 return false;
307 }
308 if (!verb_set.contains(verb))
309 {
310 return false;
311 }
312 return true;
313 }
314
315 private String getVerb(String query)
316 {
317 if (query == null)
318 return "";
319 int verb_start_index = query.indexOf("=") + 1;// first occurence of '='
320 int verb_end_index = query.indexOf("&");
321 if (verb_end_index == -1)
322 {
323 return query.substring(verb_start_index);
324 }
325 return query.substring(verb_start_index, verb_end_index);
326 }
327
328 public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException
329 {
330 logUsageInfo(request);
331
332 // oai always requires the content type be text/xml
333 request.setCharacterEncoding("UTF-8");
334 response.setContentType("text/xml;charset=UTF-8");
335 PrintWriter out = response.getWriter();
336
337 //
338 String lang = request.getParameter(GSParams.LANGUAGE);
339 if (lang == null || lang.equals(""))
340 {
341 // use the default
342 lang = this.default_lang;
343 }
344 //we don't get the baseURL from the http request because what we get might be different from the one known publicly due to local network redirection.
345 //For example, puka.cs.waikato.ac.nz vs www.greenstone.org
346 //String base_url = request.getRequestURL().toString();
347
348 // if called by doPost (if this was originally a POST request), var queryString would have been set
349 String query = (queryString == null) ? request.getQueryString() : queryString;
350 queryString = null; // reset member variable, else no doGet will work as long as the server remains running
351
352 if (query!=null && query.equals("reset")) {
353 logger.error("reset was called*******************");
354 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
355 out.println(this.recept.process("<message><request reset='true'/></message>"));
356 return;
357 }
358 String[] pairs = (query == null) ? null : query.split("&");//split into key/value pairs
359
360 String verb = getVerb(query);
361 Document response_doc = this.converter.newDOM();
362 Element xml_response = OAIXML.createBasicResponse(response_doc, verb, pairs);
363 Element verb_elem = null;
364
365 if (validate(query, verb) == false)
366 {
367 if (verb_set.contains(verb) == false)
368 {
369 logger.error(OAIXML.BAD_VERB + ": " + query);
370 verb_elem = OAIXML.createErrorElement(response_doc, OAIXML.BAD_VERB, OAIXML.ILLEGAL_OAI_VERB);
371 }
372 else
373 {
374 //must be something else other than bad verbs caused an error, so bad argument
375 logger.error(OAIXML.BAD_ARGUMENT + ": " + query);
376 verb_elem = OAIXML.createErrorElement(response_doc, OAIXML.BAD_ARGUMENT, "");
377 }
378 xml_response.appendChild(verb_elem);
379
380 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
381 if (this.use_oai_stylesheet)
382 {
383 out.println("<?xml-stylesheet type='text/xsl' href='" + this.oai_stylesheet + "' ?>\n");
384 }
385 out.println(this.converter.getPrettyString(xml_response));
386 return;
387 }//end of if(validate
388
389 // The query is valid, we can now
390 // compose the request message to the receptionist
391 Document request_doc = this.converter.newDOM();
392 Element xml_message = request_doc.createElement(GSXML.MESSAGE_ELEM);
393 Element xml_request = request_doc.createElement(GSXML.REQUEST_ELEM);
394 // The type attribute is set to be 'oaiService' from OAIServer to OAIReceptionist.
395 //xml_request.setAttribute(GSXML.TYPE_ATT, OAIXML.OAI_SERVICE);
396 xml_request.setAttribute(GSXML.LANG_ATT, lang);
397 xml_request.setAttribute(GSXML.TO_ATT, verb);
398 addParams(xml_request, pairs);
399
400 //xml_request.setAttribute(GSXML.OUTPUT_ATT, output);????
401 xml_message.appendChild(xml_request);
402
403 Node xml_result = this.recept.process(xml_message);
404 if (xml_result == null)
405 {
406 logger.info("xml_result is null");
407 verb_elem = OAIXML.createErrorElement(response_doc, "Internal error", "");
408 xml_response.appendChild(verb_elem);
409 }
410 else
411 {
412
413 /**
414 * All response elements are in the form (with a corresponding verb
415 * name): <message> <response> <verb> ... <resumptionToken> .. this
416 * is optional! </resumptionToken> </verb> </response> </message>
417 */
418 Node res = GSXML.getChildByTagName(xml_result, GSXML.RESPONSE_ELEM);
419 if (res == null)
420 {
421 logger.info("response element in xml_result is null");
422 verb_elem = OAIXML.createErrorElement(response_doc, "Internal error", "");
423 }
424 else
425 {
426 verb_elem = GSXML.getFirstElementChild(res);
427 }
428
429 if ( verb_elem.getTagName().equals(OAIXML.ERROR))
430 {
431 xml_response.appendChild(response_doc.importNode(verb_elem, true));
432 }
433 else if (OAIXML.oai_version.equals(OAIXML.OAI_VERSION2)) {
434 xml_response.appendChild(response_doc.importNode(verb_elem, true));
435 }
436 else
437 {
438 GSXML.copyAllChildren(xml_response, verb_elem);
439 }
440 }
441 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
442 if (this.use_oai_stylesheet)
443 {
444 out.println("<?xml-stylesheet type='text/xsl' href='" + this.oai_stylesheet + "' ?>\n");
445 }
446 out.println(this.converter.getPrettyString(xml_response));
447 return;
448 }
449
450 /** append parameter elements to the request sent to the receptionist */
451 public void addParams(Element request, String[] pairs)
452 {
453 Document doc = request.getOwnerDocument();
454 // no params apart from the verb
455 if (pairs == null || pairs.length < 2)
456 return;
457
458 /**
459 * the request xml is composed in the form: <request> <param name=.../>
460 * <param name=.../> </request> (No paramList element in between).
461 */
462 for (int i = 1; i < pairs.length; i++)
463 {
464 //the first pair in pairs is the verb=xxx
465 int index = pairs[i].indexOf("=");
466 if (index != -1)
467 { //just a double check
468 Element param = GSXML.createParameter(doc, pairs[i].substring(0, index), OAIXML.oaiDecode(pairs[i].substring(index + 1)));
469 request.appendChild(param);
470 }
471 }
472 }
473
474 // For OAI version 2.0, validation tests indicated that POST needs to be supported. Some
475 // modification was required in order to ensure that the request is passed intact to doGet()
476 public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException
477 {
478
479 // the post method returns a wrapper of type RequestFacade by apache and there
480 // is no getQueryString() method defined for it. Therefore, need to work this out
481 // manually before calling doGet(request, response) so that doGet can work as before.
482
483 queryString = "";
484 Iterator parameter_entries = request.getParameterMap().entrySet().iterator();
485 while (parameter_entries.hasNext())
486 {
487 Map.Entry param_entry = (Map.Entry) parameter_entries.next();
488 String[] paramVals = (String[]) param_entry.getValue();
489 if (paramVals != null)
490 {
491 if (paramVals.length > 0)
492 {
493 logger.error("POST request received: " + param_entry.getKey() + " - " + paramVals[0]);
494 queryString = queryString + "&" + param_entry.getKey() + "=" + paramVals[0];
495 }
496 }
497 }
498 if (queryString.length() > 0)
499 {
500 queryString = queryString.substring(1);
501 //queryString = OAIXML.oaiEncode(queryString);
502 }
503 if (queryString.equals(""))
504 {
505 queryString = null;
506 }
507 doGet(request, response);
508 }
509
510
511 public void destroy()
512 {
513 recept.cleanUp();
514 }
515
516}
Note: See TracBrowser for help on using the repository browser.