source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/OAIServer.java@ 28966

Last change on this file since 28966 was 28966, checked in by kjdon, 10 years ago

Lots of changes. Mainly to do with removing this.doc from everywhere. Document is not thread safe. Now we tend to create a new Document everytime we are starting a new page/message etc. in service this.desc_doc is available as teh document to create service info stuff. But it should only be used for this and not for other messages. newDOM is now static for XMLConverter. method param changes for some GSXML methods.

File size: 17.2 KB
Line 
1/*
2 * OAIServer.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3;
20
21import java.io.IOException;
22import java.io.PrintWriter;
23import java.util.HashSet;
24import java.util.Iterator;
25import java.util.Map;
26
27import javax.servlet.ServletConfig;
28import javax.servlet.ServletException;
29import javax.servlet.UnavailableException;
30import javax.servlet.http.HttpServletRequest;
31import javax.servlet.http.HttpServletResponse;
32
33import org.apache.log4j.Logger;
34import org.greenstone.gsdl3.comms.Communicator;
35import org.greenstone.gsdl3.comms.SOAPCommunicator;
36import org.greenstone.gsdl3.core.MessageRouter;
37import org.greenstone.gsdl3.core.OAIReceptionist;
38import org.greenstone.gsdl3.util.GSConstants;
39import org.greenstone.gsdl3.util.GSParams;
40import org.greenstone.gsdl3.util.GSXML;
41import org.greenstone.gsdl3.util.OAIResumptionToken;
42import org.greenstone.gsdl3.util.OAIXML;
43import org.greenstone.gsdl3.util.XMLConverter;
44import org.w3c.dom.Document;
45import org.w3c.dom.Element;
46import org.w3c.dom.Node;
47
48/** a servlet to serve the OAI metadata harvesting - we are using servlets instead
49 * of cgi
50 * the init method is called only once - the first time the servlet classes
51 * are loaded. Each time a request comes in to the servlet, the session()
52 * method is called in a new thread (calls doGet/doPut etc)
53 * takes the verb= type args and builds a simple request to send to
54 * the oai receptionist, which returns a result in xml, conforming to the OAI-PMH
55 * protocol.
56 * @see Receptionist
57 */
58/**
59 * OAI server configuration instructions *
60 *
61 */
62public class OAIServer extends BaseGreenstoneServlet
63{
64
65 /** the receptionist to send messages to */
66 protected OAIReceptionist recept = null;
67 /**
68 * the default language - is specified by setting a servlet param, otherwise
69 * DEFAULT_LANG is used
70 */
71 protected String default_lang = null;
72 /**
73 * The default default - used if a default lang is not specified in the
74 * servlet params
75 */
76 protected final String DEFAULT_LANG = "en";
77
78 /**
79 * a converter class to parse XML and create Documents for
80 * XML generation
81 */
82 protected XMLConverter converter = null;
83
84 /** A HashSet which contains all the legal verbs. */
85 protected HashSet<String> verb_set = null;
86 /**
87 * A HashSet which contains all the legal oai keys in the key/value argument
88 * pair.
89 */
90 protected HashSet<String> param_set = null;
91 /**
92 * The name of the site with which we will finally be dealing, whether it is
93 * a local site or a remote site through a communicator.
94 */
95 protected String site = "";
96
97 // can be overriddden in OAIConfig.xml
98 // do we output the stylesheet processing instruction?
99 protected boolean use_oai_stylesheet = true;
100 protected String oai_stylesheet = "interfaces/oai/oai2.xsl";
101
102 // there is no getQueryString() method in the HttpServletRequest returned from doPost,
103 // since that is actually of type apache RequestFacade, and doesn't define such a method
104 protected String queryString = null;
105
106 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.OAIServer.class.getName());
107
108 /**
109 * initialise the servlet
110 */
111 public void init(ServletConfig config) throws ServletException
112 {
113 // always call super.init, i.e., HttpServlet.;
114 super.init(config);
115 this.default_lang = config.getInitParameter(GSConstants.DEFAULT_LANG);
116
117 initVerbs();
118 initParams();
119
120 String site_name = config.getInitParameter(GSConstants.SITE_NAME);
121 String remote_site_name = null;
122 String remote_site_type = null;
123 String remote_site_address = null;
124
125 if (site_name == null)
126 {
127 // no local site, try for communicator (remote site)
128 remote_site_name = config.getInitParameter("remote_site_name");
129 remote_site_type = config.getInitParameter("remote_site_type");
130 remote_site_address = config.getInitParameter("remote_site_address");
131 if (remote_site_name == null || remote_site_type == null || remote_site_address == null)
132 {
133 logger.error("initialisation paramters not all set!");
134 logger.error("if site_name is not set, then you must have remote_site_name, remote_site_type and remote_site_address set");
135 throw new UnavailableException("OAIServer: incorrect servlet parameters");
136 }
137 }
138
139 if (this.default_lang == null)
140 {
141 // choose english
142 this.default_lang = DEFAULT_LANG;
143 }
144
145 // the receptionist -the servlet will talk to this
146 this.recept = new OAIReceptionist();
147 // the converter - used to get new Documents to generate XML messages
148 this.converter = new XMLConverter();
149 // the receptionist uses a MessageRouter or Communicator to send its requests to. We either create a MessageRouter here for the designated site (if site_name set), or we create a Communicator for a remote site. The is given to teh Receptionist, and the servlet never talks to it again.directly.
150 if (site_name != null)
151 {
152 //this site_name could consist of comma separated more than one site name.
153 String mr_name = (String) config.getInitParameter("messagerouter_class");
154 MessageRouter message_router = null;
155 if (mr_name == null)
156 { // just use the normal MR *********
157 message_router = new MessageRouter();
158 }
159 else
160 { // try the specified one
161 try
162 {
163 message_router = (MessageRouter) Class.forName("org.greenstone.gsdl3.core." + mr_name).newInstance();
164 }
165 catch (Exception e)
166 { // cant use this new one, so use normal one
167 logger.error("OAIServlet configure exception when trying to use a new MessageRouter " + mr_name, e);
168 message_router = new MessageRouter();
169 }
170 }
171
172 message_router.setSiteName(site_name);
173 // lots of work is done in this step; see MessageRouter.java
174 if (!message_router.configure()) {
175 throw new UnavailableException("OAIServer: Couldn't configure MessageRouter");
176 }
177 this.recept.setSiteName(site_name);
178 this.recept.setMessageRouter(message_router);
179
180 }
181 else
182 {
183 // talking to a remote site, create a communicator
184 Communicator communicator = null;
185 // we need to create the XML to configure the communicator
186 Document site_doc = XMLConverter.newDOM();
187 Element site_elem = site_doc.createElement(GSXML.SITE_ELEM);
188 site_elem.setAttribute(GSXML.TYPE_ATT, remote_site_type);
189 site_elem.setAttribute(GSXML.NAME_ATT, remote_site_name);
190 site_elem.setAttribute(GSXML.ADDRESS_ATT, remote_site_address);
191
192 if (remote_site_type.equals(GSXML.COMM_TYPE_SOAP_JAVA))
193 {
194 communicator = new SOAPCommunicator();
195 }
196 else
197 {
198 logger.error("OAIServlet.init Error: invalid Communicator type: " + remote_site_type);
199 throw new UnavailableException("OAIServer: invalid communicator type");
200 }
201
202 if (!communicator.configure(site_elem))
203 {
204 logger.error("OAIServlet.init Error: Couldn't configure communicator");
205 throw new UnavailableException("OAIServer: Couldn't configure communicator");
206 }
207 this.recept.setSiteName(remote_site_name);
208 this.recept.setMessageRouter(communicator);
209 }
210
211 // Read in OAIConfig.xml (residing web/WEB-INF/classes/) and
212 //use it to configure the receptionist.
213 Element oai_config = OAIXML.getOAIConfigXML();
214 if (oai_config == null)
215 {
216 logger.error("Fail to parse oai config file OAIConfig.xml.");
217 throw new UnavailableException("OAIServer: Couldn't parse OAIConfig.xml");
218 }
219 // pass it to the receptionist
220 if (!this.recept.configure(oai_config)) {
221 logger.error("Couldn't configure receptionist");
222 throw new UnavailableException("OAIServer: Couldn't configure receptionist");
223 }
224 // also, we have something we want to get from here - useOAIStylesheet
225 this.configure(oai_config);
226 // Initialise the resumption tokens
227 OAIResumptionToken.init();
228
229 }//end of init()
230
231 private void configure(Element oai_config)
232 {
233 Element use_stylesheet_elem = (Element) GSXML.getChildByTagName(oai_config, OAIXML.USE_STYLESHEET);
234 if (use_stylesheet_elem != null)
235 {
236 String value = GSXML.getNodeText(use_stylesheet_elem);
237 if (value.equals("no"))
238 {
239 this.use_oai_stylesheet = false;
240 }
241 }
242 if (this.use_oai_stylesheet)
243 {
244 // now see if there is a custom stylesheet specified
245 Element stylesheet_elem = (Element) GSXML.getChildByTagName(oai_config, OAIXML.STYLESHEET);
246 if (stylesheet_elem != null)
247 {
248 String value = GSXML.getNodeText(stylesheet_elem);
249 if (!value.equals(""))
250 {
251 oai_stylesheet = value;
252 }
253 }
254
255 }
256 }
257
258 private void initVerbs()
259 {
260 verb_set = new HashSet<String>();
261 verb_set.add(OAIXML.GET_RECORD);
262 verb_set.add(OAIXML.LIST_RECORDS);
263 verb_set.add(OAIXML.LIST_IDENTIFIERS);
264 verb_set.add(OAIXML.LIST_SETS);
265 verb_set.add(OAIXML.LIST_METADATA_FORMATS);
266 verb_set.add(OAIXML.IDENTIFY);
267 }
268
269 private void initParams()
270 {
271 param_set = new HashSet<String>();
272 param_set.add(OAIXML.METADATA_PREFIX);
273 param_set.add(OAIXML.FROM);
274 param_set.add(OAIXML.UNTIL);
275 param_set.add(OAIXML.SET);
276 param_set.add(OAIXML.RESUMPTION_TOKEN);
277 param_set.add(OAIXML.IDENTIFIER);
278 }
279
280 private void logUsageInfo(HttpServletRequest request)
281 {
282 String usageInfo = "";
283
284 String query = (queryString == null) ? request.getQueryString() : queryString;
285
286 //logged info = general-info + session-info
287 usageInfo = request.getContextPath() + " " + //session id
288 request.getServletPath() + " " + //serlvet
289 "[" + query + "]" + " " + //the query string
290 "[" + usageInfo.trim() + "]" + " " + // params stored in a session
291 request.getRemoteAddr() + " " + //remote address
292 request.getHeader("user-agent") + " "; //the remote brower info
293
294 logger.info(usageInfo);
295 }
296
297 /**
298 * return true if the url is in the form of baseURL?verb=...,
299 */
300 private boolean validate(String query, String verb)
301 {
302 //Here in OAIServer, only the verbs are validated. All the validation for individual verb
303 // is taken in their doXXX() methods.
304 if (query == null || !query.startsWith(OAIXML.VERB + "="))
305 {
306 return false;
307 }
308 if (!verb_set.contains(verb))
309 {
310 return false;
311 }
312 return true;
313 }
314
315 private String getVerb(String query)
316 {
317 if (query == null)
318 return "";
319 int verb_start_index = query.indexOf("=") + 1;// first occurence of '='
320 int verb_end_index = query.indexOf("&");
321 if (verb_end_index == -1)
322 {
323 return query.substring(verb_start_index);
324 }
325 return query.substring(verb_start_index, verb_end_index);
326 }
327
328 public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException
329 {
330 logUsageInfo(request);
331
332 // oai always requires the content type be text/xml
333 request.setCharacterEncoding("UTF-8");
334 response.setContentType("text/xml;charset=UTF-8");
335 PrintWriter out = response.getWriter();
336
337 //
338 String lang = request.getParameter(GSParams.LANGUAGE);
339 if (lang == null || lang.equals(""))
340 {
341 // use the default
342 lang = this.default_lang;
343 }
344 //we don't get the baseURL from the http request because what we get might be different from the one known publicly due to local network redirection.
345 //For example, puka.cs.waikato.ac.nz vs www.greenstone.org
346 //String base_url = request.getRequestURL().toString();
347
348 // if called by doPost (if this was originally a POST request), var queryString would have been set
349 String query = (queryString == null) ? request.getQueryString() : queryString;
350 queryString = null; // reset member variable, else no doGet will work as long as the server remains running
351
352 if (query!=null && query.equals("reset")) {
353 logger.error("reset was called*******************");
354 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
355 out.println(this.recept.process("<message><request reset='true'/></message>"));
356 return;
357 }
358 String[] pairs = (query == null) ? null : query.split("&");//split into key/value pairs
359
360 String verb = getVerb(query);
361 Document response_doc = XMLConverter.newDOM();
362 Element xml_response = OAIXML.createBasicResponse(response_doc, verb, pairs);
363 Element verb_elem = null;
364
365 if (validate(query, verb) == false)
366 {
367 if (verb_set.contains(verb) == false)
368 {
369 logger.error(OAIXML.BAD_VERB + ": " + query);
370 verb_elem = OAIXML.createErrorElement(response_doc, OAIXML.BAD_VERB, OAIXML.ILLEGAL_OAI_VERB);
371 }
372 else
373 {
374 //must be something else other than bad verbs caused an error, so bad argument
375 logger.error(OAIXML.BAD_ARGUMENT + ": " + query);
376 verb_elem = OAIXML.createErrorElement(response_doc, OAIXML.BAD_ARGUMENT, "");
377 }
378 xml_response.appendChild(verb_elem);
379
380 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
381 if (this.use_oai_stylesheet)
382 {
383 out.println("<?xml-stylesheet type='text/xsl' href='" + this.oai_stylesheet + "' ?>\n");
384 }
385 out.println(this.converter.getPrettyString(xml_response));
386 return;
387 }//end of if(validate
388
389 // The query is valid, we can now
390 // compose the request message to the receptionist
391 Document request_doc = XMLConverter.newDOM();
392 Element xml_message = request_doc.createElement(GSXML.MESSAGE_ELEM);
393 Element xml_request = request_doc.createElement(GSXML.REQUEST_ELEM);
394 // The type attribute is set to be 'oaiService' from OAIServer to OAIReceptionist.
395 //xml_request.setAttribute(GSXML.TYPE_ATT, OAIXML.OAI_SERVICE);
396 xml_request.setAttribute(GSXML.LANG_ATT, lang);
397 xml_request.setAttribute(GSXML.TO_ATT, verb);
398 addParams(xml_request, pairs);
399
400 //xml_request.setAttribute(GSXML.OUTPUT_ATT, output);????
401 xml_message.appendChild(xml_request);
402
403 Node xml_result = this.recept.process(xml_message);
404 if (xml_result == null)
405 {
406 logger.info("xml_result is null");
407 verb_elem = OAIXML.createErrorElement(response_doc, "Internal error", "");
408 xml_response.appendChild(verb_elem);
409 }
410 else
411 {
412
413 /**
414 * All response elements are in the form (with a corresponding verb
415 * name): <message> <response> <verb> ... <resumptionToken> .. this
416 * is optional! </resumptionToken> </verb> </response> </message>
417 */
418 Node res = GSXML.getChildByTagName(xml_result, GSXML.RESPONSE_ELEM);
419 if (res == null)
420 {
421 logger.info("response element in xml_result is null");
422 verb_elem = OAIXML.createErrorElement(response_doc, "Internal error", "");
423 }
424 else
425 {
426 verb_elem = GSXML.getFirstElementChild(res);
427 }
428
429 if ( verb_elem.getTagName().equals(OAIXML.ERROR))
430 {
431 xml_response.appendChild(response_doc.importNode(verb_elem, true));
432 }
433 else if (OAIXML.oai_version.equals(OAIXML.OAI_VERSION2)) {
434 xml_response.appendChild(response_doc.importNode(verb_elem, true));
435 }
436 else
437 {
438 GSXML.copyAllChildren(xml_response, verb_elem);
439 }
440 }
441 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
442 if (this.use_oai_stylesheet)
443 {
444 out.println("<?xml-stylesheet type='text/xsl' href='" + this.oai_stylesheet + "' ?>\n");
445 }
446 out.println(this.converter.getPrettyString(xml_response));
447 return;
448 }
449
450 /** append parameter elements to the request sent to the receptionist */
451 public void addParams(Element request, String[] pairs)
452 {
453 Document doc = request.getOwnerDocument();
454 // no params apart from the verb
455 if (pairs == null || pairs.length < 2)
456 return;
457
458 /**
459 * the request xml is composed in the form: <request> <param name=.../>
460 * <param name=.../> </request> (No paramList element in between).
461 */
462 for (int i = 1; i < pairs.length; i++)
463 {
464 //the first pair in pairs is the verb=xxx
465 int index = pairs[i].indexOf("=");
466 if (index != -1)
467 { //just a double check
468 Element param = GSXML.createParameter(doc, pairs[i].substring(0, index), OAIXML.oaiDecode(pairs[i].substring(index + 1)));
469 request.appendChild(param);
470 }
471 }
472 }
473
474 // For OAI version 2.0, validation tests indicated that POST needs to be supported. Some
475 // modification was required in order to ensure that the request is passed intact to doGet()
476 public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException
477 {
478
479 // the post method returns a wrapper of type RequestFacade by apache and there
480 // is no getQueryString() method defined for it. Therefore, need to work this out
481 // manually before calling doGet(request, response) so that doGet can work as before.
482
483 queryString = "";
484 Iterator parameter_entries = request.getParameterMap().entrySet().iterator();
485 while (parameter_entries.hasNext())
486 {
487 Map.Entry param_entry = (Map.Entry) parameter_entries.next();
488 String[] paramVals = (String[]) param_entry.getValue();
489 if (paramVals != null)
490 {
491 if (paramVals.length > 0)
492 {
493 logger.error("POST request received: " + param_entry.getKey() + " - " + paramVals[0]);
494 queryString = queryString + "&" + param_entry.getKey() + "=" + paramVals[0];
495 }
496 }
497 }
498 if (queryString.length() > 0)
499 {
500 queryString = queryString.substring(1);
501 //queryString = OAIXML.oaiEncode(queryString);
502 }
503 if (queryString.equals(""))
504 {
505 queryString = null;
506 }
507 doGet(request, response);
508 }
509
510
511 public void destroy()
512 {
513 recept.cleanUp();
514 }
515
516}
Note: See TracBrowser for help on using the repository browser.