source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/OAIServer.java@ 28852

Last change on this file since 28852 was 28852, checked in by kjdon, 10 years ago

Got rid of this.document. DOM not thread safe, so now we must create a new Document each time we are generating a message. A little bit of tidying up too.

File size: 16.9 KB
Line 
1/*
2 * OAIServer.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3;
20
21import java.io.IOException;
22import java.io.PrintWriter;
23import java.util.HashSet;
24import java.util.Iterator;
25import java.util.Map;
26
27import javax.servlet.ServletConfig;
28import javax.servlet.ServletException;
29import javax.servlet.UnavailableException;
30import javax.servlet.http.HttpServletRequest;
31import javax.servlet.http.HttpServletResponse;
32
33import org.apache.log4j.Logger;
34import org.greenstone.gsdl3.comms.Communicator;
35import org.greenstone.gsdl3.comms.SOAPCommunicator;
36import org.greenstone.gsdl3.core.MessageRouter;
37import org.greenstone.gsdl3.core.OAIReceptionist;
38import org.greenstone.gsdl3.util.GSConstants;
39import org.greenstone.gsdl3.util.GSParams;
40import org.greenstone.gsdl3.util.GSXML;
41import org.greenstone.gsdl3.util.OAIXML;
42import org.greenstone.gsdl3.util.XMLConverter;
43import org.w3c.dom.Document;
44import org.w3c.dom.Element;
45import org.w3c.dom.Node;
46
47/** a servlet to serve the OAI metadata harvesting - we are using servlets instead
48 * of cgi
49 * the init method is called only once - the first time the servlet classes
50 * are loaded. Each time a request comes in to the servlet, the session()
51 * method is called in a new thread (calls doGet/doPut etc)
52 * takes the verb= type args and builds a simple request to send to
53 * the oai receptionist, which returns a result in xml, conforming to the OAI-PMH
54 * protocol.
55 * @see Receptionist
56 */
57/**
58 * OAI server configuration instructions *
59 *
60 */
61public class OAIServer extends BaseGreenstoneServlet
62{
63
64 /** the receptionist to send messages to */
65 protected OAIReceptionist recept = null;
66 /**
67 * the default language - is specified by setting a servlet param, otherwise
68 * DEFAULT_LANG is used
69 */
70 protected String default_lang = null;
71 /**
72 * The default default - used if a default lang is not specified in the
73 * servlet params
74 */
75 protected final String DEFAULT_LANG = "en";
76
77 /**
78 * a converter class to parse XML and create Documents for
79 * XML generation
80 */
81 protected XMLConverter converter = null;
82
83 /** A HashSet which contains all the legal verbs. */
84 protected HashSet<String> verb_set = null;
85 /**
86 * A HashSet which contains all the legal oai keys in the key/value argument
87 * pair.
88 */
89 protected HashSet<String> param_set = null;
90 /**
91 * The name of the site with which we will finally be dealing, whether it is
92 * a local site or a remote site through a communicator.
93 */
94 protected String site = "";
95
96 // can be overriddden in OAIConfig.xml
97 // do we output the stylesheet processing instruction?
98 protected boolean use_oai_stylesheet = true;
99 protected String oai_stylesheet = "interfaces/oai/oai2.xsl";
100
101 // there is no getQueryString() method in the HttpServletRequest returned from doPost,
102 // since that is actually of type apache RequestFacade, and doesn't define such a method
103 protected String queryString = null;
104
105 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.OAIServer.class.getName());
106
107 /**
108 * initialise the servlet
109 */
110 public void init(ServletConfig config) throws ServletException
111 {
112 // always call super.init, i.e., HttpServlet.;
113 super.init(config);
114 this.default_lang = config.getInitParameter(GSConstants.DEFAULT_LANG);
115
116 initVerbs();
117 initParams();
118
119 String site_name = config.getInitParameter(GSConstants.SITE_NAME);
120 String remote_site_name = null;
121 String remote_site_type = null;
122 String remote_site_address = null;
123
124 if (site_name == null)
125 {
126 // no local site, try for communicator (remote site)
127 remote_site_name = config.getInitParameter("remote_site_name");
128 remote_site_type = config.getInitParameter("remote_site_type");
129 remote_site_address = config.getInitParameter("remote_site_address");
130 if (remote_site_name == null || remote_site_type == null || remote_site_address == null)
131 {
132 logger.error("initialisation paramters not all set!");
133 logger.error("if site_name is not set, then you must have remote_site_name, remote_site_type and remote_site_address set");
134 throw new UnavailableException("OAIServer: incorrect servlet parameters");
135 }
136 }
137
138 if (this.default_lang == null)
139 {
140 // choose english
141 this.default_lang = DEFAULT_LANG;
142 }
143
144 // the receptionist -the servlet will talk to this
145 this.recept = new OAIReceptionist();
146 // the converter - used to get new Documents to generate XML messages
147 this.converter = new XMLConverter();
148 // the receptionist uses a MessageRouter or Communicator to send its requests to. We either create a MessageRouter here for the designated site (if site_name set), or we create a Communicator for a remote site. The is given to teh Receptionist, and the servlet never talks to it again.directly.
149 if (site_name != null)
150 {
151 //this site_name could consist of comma separated more than one site name.
152 String mr_name = (String) config.getInitParameter("messagerouter_class");
153 MessageRouter message_router = null;
154 if (mr_name == null)
155 { // just use the normal MR *********
156 message_router = new MessageRouter();
157 }
158 else
159 { // try the specified one
160 try
161 {
162 message_router = (MessageRouter) Class.forName("org.greenstone.gsdl3.core." + mr_name).newInstance();
163 }
164 catch (Exception e)
165 { // cant use this new one, so use normal one
166 logger.error("OAIServlet configure exception when trying to use a new MessageRouter " + mr_name, e);
167 message_router = new MessageRouter();
168 }
169 }
170
171 message_router.setSiteName(site_name);
172 // lots of work is done in this step; see MessageRouter.java
173 if (!message_router.configure()) {
174 throw new UnavailableException("OAIServer: Couldn't configure MessageRouter");
175 }
176 this.recept.setSiteName(site_name);
177 this.recept.setMessageRouter(message_router);
178
179 }
180 else
181 {
182 // talking to a remote site, create a communicator
183 Communicator communicator = null;
184 // we need to create the XML to configure the communicator
185 Document site_doc = this.converter.newDOM();
186 Element site_elem = site_doc.createElement(GSXML.SITE_ELEM);
187 site_elem.setAttribute(GSXML.TYPE_ATT, remote_site_type);
188 site_elem.setAttribute(GSXML.NAME_ATT, remote_site_name);
189 site_elem.setAttribute(GSXML.ADDRESS_ATT, remote_site_address);
190
191 if (remote_site_type.equals(GSXML.COMM_TYPE_SOAP_JAVA))
192 {
193 communicator = new SOAPCommunicator();
194 }
195 else
196 {
197 logger.error("OAIServlet.init Error: invalid Communicator type: " + remote_site_type);
198 throw new UnavailableException("OAIServer: invalid communicator type");
199 }
200
201 if (!communicator.configure(site_elem))
202 {
203 logger.error("OAIServlet.init Error: Couldn't configure communicator");
204 throw new UnavailableException("OAIServer: Couldn't configure communicator");
205 }
206 this.recept.setSiteName(remote_site_name);
207 this.recept.setMessageRouter(communicator);
208 }
209
210 // Read in OAIConfig.xml (residing web/WEB-INF/classes/) and
211 //use it to configure the receptionist.
212 Element oai_config = OAIXML.getOAIConfigXML();
213 if (oai_config == null)
214 {
215 logger.error("Fail to parse oai config file OAIConfig.xml.");
216 throw new UnavailableException("OAIServer: Couldn't parse OAIConfig.xml");
217 }
218 // pass it to the receptionist
219 if (!this.recept.configure(oai_config)) {
220 logger.error("Couldn't configure receptionist");
221 throw new UnavailableException("OAIServer: Couldn't configure receptionist");
222 }
223 // also, we have something we want to get from here - useOAIStylesheet
224 this.configure(oai_config);
225 // Initialise the resumption tokens
226 OAIResumptionToken.init();
227
228 }//end of init()
229
230 private void configure(Element oai_config)
231 {
232 Element use_stylesheet_elem = (Element) GSXML.getChildByTagName(oai_config, OAIXML.USE_STYLESHEET);
233 if (use_stylesheet_elem != null)
234 {
235 String value = GSXML.getNodeText(use_stylesheet_elem);
236 if (value.equals("no"))
237 {
238 this.use_oai_stylesheet = false;
239 }
240 }
241 if (this.use_oai_stylesheet)
242 {
243 // now see if there is a custom stylesheet specified
244 Element stylesheet_elem = (Element) GSXML.getChildByTagName(oai_config, OAIXML.STYLESHEET);
245 if (stylesheet_elem != null)
246 {
247 String value = GSXML.getNodeText(stylesheet_elem);
248 if (!value.equals(""))
249 {
250 oai_stylesheet = value;
251 }
252 }
253
254 }
255 }
256
257 private void initVerbs()
258 {
259 verb_set = new HashSet<String>();
260 verb_set.add(OAIXML.GET_RECORD);
261 verb_set.add(OAIXML.LIST_RECORDS);
262 verb_set.add(OAIXML.LIST_IDENTIFIERS);
263 verb_set.add(OAIXML.LIST_SETS);
264 verb_set.add(OAIXML.LIST_METADATA_FORMATS);
265 verb_set.add(OAIXML.IDENTIFY);
266 }
267
268 private void initParams()
269 {
270 param_set = new HashSet<String>();
271 param_set.add(OAIXML.METADATA_PREFIX);
272 param_set.add(OAIXML.FROM);
273 param_set.add(OAIXML.UNTIL);
274 param_set.add(OAIXML.SET);
275 param_set.add(OAIXML.RESUMPTION_TOKEN);
276 param_set.add(OAIXML.IDENTIFIER);
277 }
278
279 private void logUsageInfo(HttpServletRequest request)
280 {
281 String usageInfo = "";
282
283 String query = (queryString == null) ? request.getQueryString() : queryString;
284
285 //logged info = general-info + session-info
286 usageInfo = request.getContextPath() + " " + //session id
287 request.getServletPath() + " " + //serlvet
288 "[" + query + "]" + " " + //the query string
289 "[" + usageInfo.trim() + "]" + " " + // params stored in a session
290 request.getRemoteAddr() + " " + //remote address
291 request.getHeader("user-agent") + " "; //the remote brower info
292
293 logger.info(usageInfo);
294 }
295
296 /**
297 * return true if the url is in the form of baseURL?verb=...,
298 */
299 private boolean validate(String query, String verb)
300 {
301 //Here in OAIServer, only the verbs are validated. All the validation for individual verb
302 // is taken in their doXXX() methods.
303 if (query == null || !query.startsWith(OAIXML.VERB + "="))
304 {
305 return false;
306 }
307 if (!verb_set.contains(verb))
308 {
309 return false;
310 }
311 return true;
312 }
313
314 private String getVerb(String query)
315 {
316 if (query == null)
317 return "";
318 int verb_start_index = query.indexOf("=") + 1;// first occurence of '='
319 int verb_end_index = query.indexOf("&");
320 if (verb_end_index == -1)
321 {
322 return query.substring(verb_start_index);
323 }
324 return query.substring(verb_start_index, verb_end_index);
325 }
326
327 public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException
328 {
329 logUsageInfo(request);
330
331 // oai always requires the content type be text/xml
332 request.setCharacterEncoding("UTF-8");
333 response.setContentType("text/xml;charset=UTF-8");
334 PrintWriter out = response.getWriter();
335
336 //
337 String lang = request.getParameter(GSParams.LANGUAGE);
338 if (lang == null || lang.equals(""))
339 {
340 // use the default
341 lang = this.default_lang;
342 }
343 //we don't get the baseURL from the http request because what we get might be different from the one known publicly due to local network redirection.
344 //For example, puka.cs.waikato.ac.nz vs www.greenstone.org
345 //String base_url = request.getRequestURL().toString();
346 // if called by doPost (if this was originally a POST request), var queryString would have been set
347 String query = (queryString == null) ? request.getQueryString() : queryString;
348 queryString = null; // reset member variable, else no doGet will work as long as the server remains running
349
350 String[] pairs = (query == null) ? null : query.split("&");//split into key/value pairs
351 String verb = getVerb(query);
352 Document response_doc = this.converter.newDOM();
353 Element xml_response = OAIXML.createBasicResponse(response_doc, verb, pairs);
354 Element verb_elem = null;
355
356 if (validate(query, verb) == false)
357 {
358 if (verb_set.contains(verb) == false)
359 {
360 logger.error(OAIXML.BAD_VERB + ": " + query);
361 verb_elem = OAIXML.createErrorElement(response_doc, OAIXML.BAD_VERB, OAIXML.ILLEGAL_OAI_VERB);
362 }
363 else
364 {
365 //must be something else other than bad verbs caused an error, so bad argument
366 logger.error(OAIXML.BAD_ARGUMENT + ": " + query);
367 verb_elem = OAIXML.createErrorElement(response_doc, OAIXML.BAD_ARGUMENT, "");
368 }
369 xml_response.appendChild(verb_elem);
370
371 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
372 if (this.use_oai_stylesheet)
373 {
374 out.println("<?xml-stylesheet type='text/xsl' href='" + this.oai_stylesheet + "' ?>\n");
375 }
376 out.println(this.converter.getPrettyString(xml_response));
377 return;
378 }//end of if(validate
379
380 // The query is valid, we can now
381 // compose the request message to the receptionist
382 Document request_doc = this.converter.newDOM();
383 Element xml_message = request_doc.createElement(GSXML.MESSAGE_ELEM);
384 Element xml_request = request_doc.createElement(GSXML.REQUEST_ELEM);
385 // The type attribute is set to be 'oaiService' from OAIServer to OAIReceptionist.
386 //xml_request.setAttribute(GSXML.TYPE_ATT, OAIXML.OAI_SERVICE);
387 xml_request.setAttribute(GSXML.LANG_ATT, lang);
388 xml_request.setAttribute(GSXML.TO_ATT, verb);
389 addParams(xml_request, pairs);
390
391 //xml_request.setAttribute(GSXML.OUTPUT_ATT, output);????
392 xml_message.appendChild(xml_request);
393
394 Node xml_result = this.recept.process(xml_message);
395 if (xml_result == null)
396 {
397 logger.info("xml_result is null");
398 verb_elem = OAIXML.createErrorElement(response_doc, "Internal error", "");
399 xml_response.appendChild(verb_elem);
400 }
401 else
402 {
403
404 /**
405 * All response elements are in the form (with a corresponding verb
406 * name): <message> <response> <verb> ... <resumptionToken> .. this
407 * is optional! </resumptionToken> </verb> </response> </message>
408 */
409 Node res = GSXML.getChildByTagName(xml_result, GSXML.RESPONSE_ELEM);
410 if (res == null)
411 {
412 logger.info("response element in xml_result is null");
413 verb_elem = OAIXML.createErrorElement(response_doc, "Internal error", "");
414 }
415 else
416 {
417 verb_elem = GSXML.getFirstElementChild(res);
418 }
419
420 if ( verb_elem.getTagName().equals(OAIXML.ERROR))
421 {
422 xml_response.appendChild(response_doc.importNode(verb_elem, true));
423 }
424 else if (OAIXML.oai_version.equals(OAIXML.OAI_VERSION2)) {
425 xml_response.appendChild(response_doc.importNode(verb_elem, true));
426 }
427 else
428 {
429 GSXML.copyAllChildren(xml_response, verb_elem);
430 }
431 }
432 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
433 if (this.use_oai_stylesheet)
434 {
435 out.println("<?xml-stylesheet type='text/xsl' href='" + this.oai_stylesheet + "' ?>\n");
436 }
437 out.println(this.converter.getPrettyString(xml_response));
438 return;
439 }
440
441 /** append parameter elements to the request sent to the receptionist */
442 public void addParams(Element request, String[] pairs)
443 {
444 Document doc = request.getOwnerDocument();
445 // no params apart from the verb
446 if (pairs == null || pairs.length < 2)
447 return;
448
449 /**
450 * the request xml is composed in the form: <request> <param name=.../>
451 * <param name=.../> </request> (No paramList element in between).
452 */
453 for (int i = 1; i < pairs.length; i++)
454 {
455 //the first pair in pairs is the verb=xxx
456 int index = pairs[i].indexOf("=");
457 if (index != -1)
458 { //just a double check
459 Element param = GSXML.createParameter(doc, pairs[i].substring(0, index), OAIXML.oaiDecode(pairs[i].substring(index + 1)));
460 request.appendChild(param);
461 }
462 }
463 }
464
465 // For OAI version 2.0, validation tests indicated that POST needs to be supported. Some
466 // modification was required in order to ensure that the request is passed intact to doGet()
467 public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException
468 {
469
470 // the post method returns a wrapper of type RequestFacade by apache and there
471 // is no getQueryString() method defined for it. Therefore, need to work this out
472 // manually before calling doGet(request, response) so that doGet can work as before.
473
474 queryString = "";
475 Iterator parameter_entries = request.getParameterMap().entrySet().iterator();
476 while (parameter_entries.hasNext())
477 {
478 Map.Entry param_entry = (Map.Entry) parameter_entries.next();
479 String[] paramVals = (String[]) param_entry.getValue();
480 if (paramVals != null)
481 {
482 if (paramVals.length > 0)
483 {
484 logger.error("POST request received: " + param_entry.getKey() + " - " + paramVals[0]);
485 queryString = queryString + "&" + param_entry.getKey() + "=" + paramVals[0];
486 }
487 }
488 }
489 if (queryString.length() > 0)
490 {
491 queryString = queryString.substring(1);
492 //queryString = OAIXML.oaiEncode(queryString);
493 }
494 if (queryString.equals(""))
495 {
496 queryString = null;
497 }
498 doGet(request, response);
499 }
500}
Note: See TracBrowser for help on using the repository browser.