1 | /*
|
---|
2 | * OAIServer.java
|
---|
3 | * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
|
---|
4 | *
|
---|
5 | * This program is free software; you can redistribute it and/or modify
|
---|
6 | * it under the terms of the GNU General Public License as published by
|
---|
7 | * the Free Software Foundation; either version 2 of the License, or
|
---|
8 | * (at your option) any later version.
|
---|
9 | *
|
---|
10 | * This program is distributed in the hope that it will be useful,
|
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
13 | * GNU General Public License for more details.
|
---|
14 | *
|
---|
15 | * You should have received a copy of the GNU General Public License
|
---|
16 | * along with this program; if not, write to the Free Software
|
---|
17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
18 | */
|
---|
19 | package org.greenstone.gsdl3;
|
---|
20 |
|
---|
21 | import org.greenstone.gsdl3.comms.*;
|
---|
22 | import org.greenstone.gsdl3.core.*;
|
---|
23 | import org.greenstone.gsdl3.util.*;
|
---|
24 | import org.greenstone.gsdl3.action.PageAction; // used to get the default action
|
---|
25 | import org.w3c.dom.*;
|
---|
26 | import java.io.*;
|
---|
27 | import javax.servlet.*;
|
---|
28 | import javax.servlet.http.*;
|
---|
29 | import java.util.Enumeration;
|
---|
30 | import java.util.HashSet;
|
---|
31 | import java.util.Iterator;
|
---|
32 | import java.util.Map;
|
---|
33 | import java.util.Map.Entry;
|
---|
34 | import java.io.File;
|
---|
35 |
|
---|
36 | import org.apache.log4j.*;
|
---|
37 |
|
---|
38 | /** a servlet to serve the OAI metadata harvesting - we are using servlets instead
|
---|
39 | * of cgi
|
---|
40 | * the init method is called only once - the first time the servlet classes
|
---|
41 | * are loaded. Each time a request comes in to the servlet, the session()
|
---|
42 | * method is called in a new thread (calls doGet/doPut etc)
|
---|
43 | * takes the verb= type args and builds a simple request to send to
|
---|
44 | * the oai receptionist, which returns a result in xml, conforming to the OAI-PMH
|
---|
45 | * protocol.
|
---|
46 | * @see Receptionist
|
---|
47 | */
|
---|
48 | /**
|
---|
49 | * OAI server configuration instructions *
|
---|
50 | *
|
---|
51 | */
|
---|
52 | public class OAIServer extends HttpServlet {
|
---|
53 |
|
---|
54 | /** the receptionist to send messages to */
|
---|
55 | protected OAIReceptionist recept=null;
|
---|
56 | /** the default language - is specified by setting a servlet param,
|
---|
57 | * otherwise DEFAULT_LANG is used*/
|
---|
58 | protected String default_lang= null;
|
---|
59 | /** The default default - used if a default lang is not specified
|
---|
60 | * in the servlet params */
|
---|
61 | protected final String DEFAULT_LANG = "en";
|
---|
62 |
|
---|
63 | /** a converter class to parse XML and create Docs
|
---|
64 | * This is only used for generating internal requests passed to MessageRouter.
|
---|
65 | * The response message is generated by parsing an existing xml skeleton file (web/WEB-INF/oaixml/oaiversion2.xml, for example).
|
---|
66 | */
|
---|
67 | protected XMLConverter converter=null;
|
---|
68 | /** container Document to create XML Nodes (but only request to the oai receptionist,
|
---|
69 | * not response (which is created in OAIXML.java) created by converter class */
|
---|
70 | protected Document doc=null;
|
---|
71 |
|
---|
72 | /** A HashSet which contains all the legal verbs. */
|
---|
73 | protected HashSet verb_set = null;
|
---|
74 | /** A HashSet which contains all the legal oai keys in the key/value argument pair. */
|
---|
75 | protected HashSet param_set = null;
|
---|
76 | /** The name of the site with which we will finally be dealing, whether it is a local site or a remote site through a communicator.*/
|
---|
77 | protected String site = "";
|
---|
78 |
|
---|
79 | // do we output the stylesheet processing instruction?
|
---|
80 | protected boolean use_oai_stylesheet = true;
|
---|
81 | protected String oai_stylesheet = "interfaces/oai/oai2.xsl";
|
---|
82 |
|
---|
83 | // there is no getQueryString() method in the HttpServletRequest returned from doPost,
|
---|
84 | // since that is actually of type apache RequestFacade, and doesn't define such a method
|
---|
85 | protected String queryString = null;
|
---|
86 |
|
---|
87 | static Logger logger = Logger.getLogger(org.greenstone.gsdl3.OAIServer.class.getName());
|
---|
88 |
|
---|
89 | /** initialise the servlet
|
---|
90 | */
|
---|
91 | public void init(ServletConfig config) throws ServletException {
|
---|
92 | // always call super.init, i.e., HttpServlet.;
|
---|
93 | super.init(config);
|
---|
94 | this.default_lang = config.getInitParameter(GSConstants.DEFAULT_LANG);
|
---|
95 |
|
---|
96 | initVerbs();
|
---|
97 | initParams();
|
---|
98 |
|
---|
99 | String site_name = config.getInitParameter(GSConstants.SITE_NAME);
|
---|
100 | String remote_site_name = null;
|
---|
101 | String remote_site_type = null;
|
---|
102 | String remote_site_address = null;
|
---|
103 |
|
---|
104 | if (site_name == null) {
|
---|
105 | // no local site, try for communicator (remote site)
|
---|
106 | remote_site_name = config.getInitParameter("remote_site_name");
|
---|
107 | remote_site_type = config.getInitParameter("remote_site_type");
|
---|
108 | remote_site_address = config.getInitParameter("remote_site_address");
|
---|
109 | if (remote_site_name == null || remote_site_type == null || remote_site_address == null) {
|
---|
110 | System.err.println("initialisation paramters not all set!");
|
---|
111 | System.err.println("if site_name is not set, then you must have remote_site_name, remote_site_type and remote_site_address set");
|
---|
112 | System.exit(1);
|
---|
113 | }
|
---|
114 | }
|
---|
115 |
|
---|
116 | if (this.default_lang == null) {
|
---|
117 | // choose english
|
---|
118 | this.default_lang = DEFAULT_LANG;
|
---|
119 | }
|
---|
120 |
|
---|
121 | // the receptionist -the servlet will talk to this
|
---|
122 | this.recept = new OAIReceptionist();
|
---|
123 |
|
---|
124 | // the receptionist uses a MessageRouter or Communicator to send its requests to. We either create a MessageRouter here for the designated site (if site_name set), or we create a Communicator for a remote site. The is given to teh Receptionist, and the servlet never talks to it again.directly.
|
---|
125 | if (site_name != null) {
|
---|
126 | //this site_name could consist of comma separated more than one site name.
|
---|
127 | String mr_name = (String)config.getInitParameter("messagerouter_class");
|
---|
128 | MessageRouter message_router = null;
|
---|
129 | if (mr_name == null) { // just use the normal MR *********
|
---|
130 | message_router = new MessageRouter();
|
---|
131 | } else { // try the specified one
|
---|
132 | try {
|
---|
133 | message_router = (MessageRouter)Class.forName("org.greenstone.gsdl3.core."+mr_name).newInstance();
|
---|
134 | } catch (Exception e) { // cant use this new one, so use normal one
|
---|
135 | System.err.println("OAIServlet configure exception when trying to use a new MessageRouter "+mr_name+": "+e.getMessage());
|
---|
136 | e.printStackTrace();
|
---|
137 | message_router = new MessageRouter();
|
---|
138 | }
|
---|
139 | }
|
---|
140 |
|
---|
141 | message_router.setSiteName(site_name);
|
---|
142 | // lots of work is done in this step; see MessageRouter.java
|
---|
143 | message_router.configure();
|
---|
144 | this.recept.setSiteName(site_name);
|
---|
145 | this.recept.setMessageRouter(message_router);
|
---|
146 |
|
---|
147 | } else {
|
---|
148 | // talking to a remote site, create a communicator
|
---|
149 | Communicator communicator = null;
|
---|
150 | // we need to create the XML to configure the communicator
|
---|
151 | Element site_elem = this.doc.createElement(GSXML.SITE_ELEM);
|
---|
152 | site_elem.setAttribute(GSXML.TYPE_ATT, remote_site_type);
|
---|
153 | site_elem.setAttribute(GSXML.NAME_ATT, remote_site_name);
|
---|
154 | site_elem.setAttribute(GSXML.ADDRESS_ATT, remote_site_address);
|
---|
155 |
|
---|
156 | if (remote_site_type.equals(GSXML.COMM_TYPE_SOAP_JAVA)) {
|
---|
157 | communicator = new SOAPCommunicator();
|
---|
158 | } else {
|
---|
159 | System.err.println("OAIServlet.init Error: invalid Communicator type: "+remote_site_type);
|
---|
160 | System.exit(1);
|
---|
161 | }
|
---|
162 |
|
---|
163 | if (!communicator.configure(site_elem)) {
|
---|
164 | System.err.println("OAIServlet.init Error: Couldn't configure communicator");
|
---|
165 | System.exit(1);
|
---|
166 | }
|
---|
167 | this.recept.setSiteName(remote_site_name);
|
---|
168 | this.recept.setMessageRouter(communicator);
|
---|
169 | }
|
---|
170 | // used for composing internal xml requests, but not xml responses.
|
---|
171 | // the converter may be used to get pretty xml, though.
|
---|
172 | this.converter = new XMLConverter();
|
---|
173 | this.doc = this.converter.newDOM();
|
---|
174 |
|
---|
175 | // Read in OAIConfig.xml (residing web/WEB-INF/classes/) and
|
---|
176 | //use it to configure the receptionist. The init() is also called in which
|
---|
177 | //the resumption token file is read in and all expired tokens cleared.
|
---|
178 | Element oai_config = OAIXML.getOAIConfigXML();
|
---|
179 | if (oai_config == null) {
|
---|
180 | logger.error("Fail to parse oai config file OAIConfig.xml.");
|
---|
181 | return;
|
---|
182 | }
|
---|
183 | // pass it to the receptionist
|
---|
184 | this.recept.configure(oai_config);
|
---|
185 |
|
---|
186 | // also, we have something we want to get from here - useOAIStylesheet
|
---|
187 | this.configure(oai_config);
|
---|
188 | }//end of init()
|
---|
189 |
|
---|
190 | private void configure(Element oai_config) {
|
---|
191 | Element use_stylesheet_elem = (Element)GSXML.getChildByTagName(oai_config, OAIXML.USE_STYLESHEET);
|
---|
192 | if (use_stylesheet_elem != null) {
|
---|
193 | String value = GSXML.getNodeText(use_stylesheet_elem);
|
---|
194 | if (value.equals("no")) {
|
---|
195 | this.use_oai_stylesheet = false;
|
---|
196 | }
|
---|
197 | }
|
---|
198 | if (this.use_oai_stylesheet) {
|
---|
199 | // now see if there is a custom stylesheet specified
|
---|
200 | Element stylesheet_elem = (Element)GSXML.getChildByTagName(oai_config, OAIXML.STYLESHEET);
|
---|
201 | if (stylesheet_elem != null) {
|
---|
202 | String value = GSXML.getNodeText(stylesheet_elem);
|
---|
203 | if (!value.equals("")) {
|
---|
204 | oai_stylesheet = value;
|
---|
205 | }
|
---|
206 | }
|
---|
207 |
|
---|
208 | }
|
---|
209 | }
|
---|
210 | private void initVerbs() {
|
---|
211 | verb_set = new HashSet();
|
---|
212 | verb_set.add(OAIXML.GET_RECORD);
|
---|
213 | verb_set.add(OAIXML.LIST_RECORDS);
|
---|
214 | verb_set.add(OAIXML.LIST_IDENTIFIERS);
|
---|
215 | verb_set.add(OAIXML.LIST_SETS);
|
---|
216 | verb_set.add(OAIXML.LIST_METADATA_FORMATS);
|
---|
217 | verb_set.add(OAIXML.IDENTIFY);
|
---|
218 | }
|
---|
219 |
|
---|
220 | private void initParams() {
|
---|
221 | param_set = new HashSet();
|
---|
222 | param_set.add(OAIXML.METADATA_PREFIX);
|
---|
223 | param_set.add(OAIXML.FROM);
|
---|
224 | param_set.add(OAIXML.UNTIL);
|
---|
225 | param_set.add(OAIXML.SET);
|
---|
226 | param_set.add(OAIXML.RESUMPTION_TOKEN);
|
---|
227 | param_set.add(OAIXML.IDENTIFIER);
|
---|
228 | }
|
---|
229 | private void logUsageInfo(HttpServletRequest request){
|
---|
230 | String usageInfo = "";
|
---|
231 |
|
---|
232 | String query = (queryString == null) ? request.getQueryString() : queryString;
|
---|
233 |
|
---|
234 | //logged info = general-info + session-info
|
---|
235 | usageInfo =
|
---|
236 | request.getContextPath()+" "+ //session id
|
---|
237 | request.getServletPath()+" "+ //serlvet
|
---|
238 | "["+query+"]" +" "+ //the query string
|
---|
239 | "["+usageInfo.trim()+"]" +" "+ // params stored in a session
|
---|
240 | request.getRemoteAddr()+" "+ //remote address
|
---|
241 | request.getHeader("user-agent")+" "; //the remote brower info
|
---|
242 |
|
---|
243 | logger.info(usageInfo);
|
---|
244 | }
|
---|
245 | /** return true if the url is in the form of baseURL?verb=...,
|
---|
246 | */
|
---|
247 | private boolean validate(String query, String verb) {
|
---|
248 | //Here in OAIServer, only the verbs are validated. All the validation for individual verb
|
---|
249 | // is taken in their doXXX() methods.
|
---|
250 | if(query == null || !query.startsWith(OAIXML.VERB+"=")) {
|
---|
251 | return false;
|
---|
252 | }
|
---|
253 | if (!verb_set.contains(verb)) {
|
---|
254 | return false;
|
---|
255 | }
|
---|
256 | return true;
|
---|
257 | }
|
---|
258 | private String getVerb(String query) {
|
---|
259 | if (query == null) return "";
|
---|
260 | int verb_start_index = query.indexOf("=") + 1;// first occurence of '='
|
---|
261 | int verb_end_index = query.indexOf("&");
|
---|
262 | if(verb_end_index == -1) {
|
---|
263 | return query.substring(verb_start_index);
|
---|
264 | }
|
---|
265 | return query.substring(verb_start_index, verb_end_index);
|
---|
266 | }
|
---|
267 |
|
---|
268 | public void doGet(HttpServletRequest request, HttpServletResponse response)
|
---|
269 | throws ServletException, IOException {
|
---|
270 | logUsageInfo(request);
|
---|
271 | //out.println("url="+request.getRequestURL());// /oaiserver
|
---|
272 | //out.println("query="+request.getQueryString());// is /greenstone3
|
---|
273 |
|
---|
274 | // oai always requires the content type be text/xml
|
---|
275 | request.setCharacterEncoding("UTF-8");
|
---|
276 | response.setContentType("text/xml;charset=UTF-8");
|
---|
277 | PrintWriter out = response.getWriter();
|
---|
278 |
|
---|
279 | //
|
---|
280 | String lang = request.getParameter(GSParams.LANGUAGE);
|
---|
281 | if (lang==null || lang.equals("")) {
|
---|
282 | // use the default
|
---|
283 | lang = this.default_lang;
|
---|
284 | }
|
---|
285 | //we don't get the baseURL from the http request because what we get might be different from the one known publicly due to local network redirection.
|
---|
286 | //For example, puka.cs.waikato.ac.nz vs www.greenstone.org
|
---|
287 | //String base_url = request.getRequestURL().toString();
|
---|
288 | // if called by doPost (if this was originally a POST request), var queryString would have been set
|
---|
289 | String query = (queryString == null) ? request.getQueryString() : queryString;
|
---|
290 | queryString = null; // reset member variable, else no doGet will work as long as the server remains running
|
---|
291 |
|
---|
292 | String[] pairs = (query==null)? null : query.split("&");//split into key/value pairs
|
---|
293 | String verb = getVerb(query);
|
---|
294 | Element xml_response = OAIXML.createBasicResponse(verb, pairs);
|
---|
295 | Element verb_elem = null;
|
---|
296 |
|
---|
297 | if (validate(query, verb) == false) {
|
---|
298 | if (verb_set.contains(verb) == false) {
|
---|
299 | logger.error(OAIXML.BAD_VERB + ": " + query);
|
---|
300 | verb_elem = OAIXML.createErrorElement(OAIXML.BAD_VERB, OAIXML.ILLEGAL_OAI_VERB);
|
---|
301 | } else {
|
---|
302 | //must be something else other than bad verbs caused an error, so bad argument
|
---|
303 | logger.error(OAIXML.BAD_ARGUMENT + ": " + query);
|
---|
304 | verb_elem = OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, "");
|
---|
305 | }
|
---|
306 | xml_response.appendChild(verb_elem);
|
---|
307 |
|
---|
308 | out.println("<?xml version='1.0' encoding='UTF-8' ?>");
|
---|
309 | if (this.use_oai_stylesheet) {
|
---|
310 | out.println("<?xml-stylesheet type='text/xsl' href='"+this.oai_stylesheet+"' ?>\n");
|
---|
311 | }
|
---|
312 | out.println(this.converter.getPrettyString(xml_response));
|
---|
313 | return;
|
---|
314 | }//end of if(validate
|
---|
315 |
|
---|
316 | // The query is valid, we can now
|
---|
317 | // compose the request message to the receptionist
|
---|
318 | Element xml_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
319 | Element xml_request = this.doc.createElement(GSXML.REQUEST_ELEM);
|
---|
320 | // The type attribute is set to be 'oaiService' from OAIServer to OAIReceptionist.
|
---|
321 | //xml_request.setAttribute(GSXML.TYPE_ATT, OAIXML.OAI_SERVICE);
|
---|
322 | xml_request.setAttribute(GSXML.LANG_ATT, lang);
|
---|
323 | xml_request.setAttribute(GSXML.TO_ATT, verb);
|
---|
324 | addParams(xml_request, pairs);
|
---|
325 |
|
---|
326 | //xml_request.setAttribute(GSXML.OUTPUT_ATT, output);????
|
---|
327 | xml_message.appendChild(xml_request);
|
---|
328 |
|
---|
329 | Node xml_result = this.recept.process (xml_message);
|
---|
330 | if (xml_result == null) {
|
---|
331 | logger.info("xml_result is null");
|
---|
332 | verb_elem = OAIXML.createErrorElement("Internal error", "");
|
---|
333 | xml_response.appendChild(verb_elem);
|
---|
334 | } else {
|
---|
335 |
|
---|
336 | /** All response elements are in the form (with a corresponding verb name):
|
---|
337 | * <message>
|
---|
338 | <response>
|
---|
339 | <verb>
|
---|
340 | ...
|
---|
341 | * <resumptionToken>
|
---|
342 | * .. this is optional!
|
---|
343 | * </resumptionToken>
|
---|
344 | * </verb>
|
---|
345 | * </response>
|
---|
346 | * </message>
|
---|
347 | */
|
---|
348 | Node res = GSXML.getChildByTagName(xml_result, OAIXML.RESPONSE);
|
---|
349 | if(res == null) {
|
---|
350 | logger.info("response element in xml_result is null");
|
---|
351 | verb_elem = OAIXML.createErrorElement("Internal error", "");
|
---|
352 | } else {
|
---|
353 | verb_elem = GSXML.getFirstElementChild(res);
|
---|
354 | }
|
---|
355 |
|
---|
356 | if(OAIXML.oai_version.equals(OAIXML.OAI_VERSION2) ||
|
---|
357 | verb_elem.getTagName().equals(OAIXML.ERROR)) {
|
---|
358 | xml_response.appendChild(xml_response.getOwnerDocument().importNode(verb_elem, true));
|
---|
359 | } else {
|
---|
360 | GSXML.copyAllChildren(xml_response, verb_elem);
|
---|
361 | }
|
---|
362 | }
|
---|
363 | out.println("<?xml version='1.0' encoding='UTF-8' ?>");
|
---|
364 | if (this.use_oai_stylesheet) {
|
---|
365 | out.println("<?xml-stylesheet type='text/xsl' href='"+this.oai_stylesheet+"' ?>\n");
|
---|
366 | }
|
---|
367 | out.println (this.converter.getPrettyString (xml_response));
|
---|
368 | return;
|
---|
369 | }
|
---|
370 | /** append parameter elements to the request sent to the receptionist*/
|
---|
371 | public void addParams(Element request, String[] pairs) {
|
---|
372 | // no params apart from the verb
|
---|
373 | if (pairs == null || pairs.length < 2) return ;
|
---|
374 |
|
---|
375 | /**the request xml is composed in the form: <request>
|
---|
376 | * <param name=.../>
|
---|
377 | * <param name=.../>
|
---|
378 | * </request>
|
---|
379 | *(No paramList element in between).
|
---|
380 | */
|
---|
381 | for (int i=1; i<pairs.length; i++) {
|
---|
382 | //the first pair in pairs is the verb=xxx
|
---|
383 | int index = pairs[i].indexOf("=");
|
---|
384 | if(index != -1){ //just a double check
|
---|
385 | Element param = this.doc.createElement(OAIXML.PARAM);
|
---|
386 | param.setAttribute(OAIXML.NAME, pairs[i].substring(0, index));
|
---|
387 | param.setAttribute(OAIXML.VALUE, OAIXML.oaiDecode(pairs[i].substring(index + 1)));
|
---|
388 | request.appendChild(param);
|
---|
389 | }
|
---|
390 | }
|
---|
391 | }
|
---|
392 |
|
---|
393 | // For OAI version 2.0, validation tests indicated that POST needs to be supported. Some
|
---|
394 | // modification was required in order to ensure that the request is passed intact to doGet()
|
---|
395 | public void doPost(HttpServletRequest request,
|
---|
396 | HttpServletResponse response)
|
---|
397 | throws ServletException, IOException {
|
---|
398 |
|
---|
399 | // the post method returns a wrapper of type RequestFacade by apache and there
|
---|
400 | // is no getQueryString() method defined for it. Therefore, need to work this out
|
---|
401 | // manually before calling doGet(request, response) so that doGet can work as before.
|
---|
402 |
|
---|
403 | queryString = "";
|
---|
404 | Iterator parameter_entries = request.getParameterMap().entrySet().iterator();
|
---|
405 | while(parameter_entries.hasNext()) {
|
---|
406 | Map.Entry param_entry = (Map.Entry)parameter_entries.next();
|
---|
407 | String[] paramVals = (String[]) param_entry.getValue();
|
---|
408 | if(paramVals != null) {
|
---|
409 | if(paramVals.length > 0) {
|
---|
410 | logger.error("POST request received: " + param_entry.getKey() + " - " + paramVals[0]);
|
---|
411 | queryString = queryString + "&" + param_entry.getKey() + "=" + paramVals[0];
|
---|
412 | }
|
---|
413 | }
|
---|
414 | }
|
---|
415 | if(queryString.length() > 0) {
|
---|
416 | queryString = queryString.substring(1);
|
---|
417 | //queryString = OAIXML.oaiEncode(queryString);
|
---|
418 | }
|
---|
419 | if(queryString.equals("")) {
|
---|
420 | queryString = null;
|
---|
421 | }
|
---|
422 | doGet(request,response);
|
---|
423 | }
|
---|
424 | }
|
---|
425 |
|
---|
426 |
|
---|
427 |
|
---|
428 |
|
---|
429 |
|
---|
430 |
|
---|
431 |
|
---|
432 |
|
---|
433 |
|
---|
434 |
|
---|
435 |
|
---|
436 |
|
---|
437 |
|
---|
438 |
|
---|
439 |
|
---|