source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/OAIServer.java@ 21802

Last change on this file since 21802 was 21802, checked in by kjdon, 14 years ago

need to get the writer fromm response *after* setting the output to utf-8

File size: 15.9 KB
Line 
1/*
2 * OAIServer.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3;
20
21import org.greenstone.gsdl3.comms.*;
22import org.greenstone.gsdl3.core.*;
23import org.greenstone.gsdl3.util.*;
24import org.greenstone.gsdl3.action.PageAction; // used to get the default action
25import org.w3c.dom.*;
26import java.io.*;
27import javax.servlet.*;
28import javax.servlet.http.*;
29import java.util.Enumeration;
30import java.util.HashSet;
31import java.io.File;
32
33import org.apache.log4j.*;
34
35/** a servlet to serve the OAI metadata harvesting - we are using servlets instead
36 * of cgi
37 * the init method is called only once - the first time the servlet classes
38 * are loaded. Each time a request comes in to the servlet, the session()
39 * method is called in a new thread (calls doGet/doPut etc)
40 * takes the verb= type args and builds a simple request to send to
41 * the oai receptionist, which returns a result in xml, conforming to the OAI-PMH
42 * protocol.
43 * @see Receptionist
44 */
45/**
46 * OAI server configuration instructions *
47 *
48 */
49public class OAIServer extends HttpServlet {
50
51 /** the receptionist to send messages to */
52 protected OAIReceptionist recept=null;
53 /** the default language - is specified by setting a servlet param,
54 * otherwise DEFAULT_LANG is used*/
55 protected String default_lang= null;
56 /** The default default - used if a default lang is not specified
57 * in the servlet params */
58 protected final String DEFAULT_LANG = "en";
59
60 /** a converter class to parse XML and create Docs
61 * This is only used for generating internal requests passed to MessageRouter.
62 * The response message is generated by parsing an existing xml skeleton file (web/WEB-INF/oaixml/oaiversion2.xml, for example).
63 */
64 protected XMLConverter converter=null;
65 /** container Document to create XML Nodes (but only request to the oai receptionist,
66 * not response (which is created in OAIXML.java) created by converter class */
67 protected Document doc=null;
68
69 /** A HashSet which contains all the legal verbs. */
70 protected HashSet verb_set = null;
71 /** A HashSet which contains all the legal oai keys in the key/value argument pair. */
72 protected HashSet param_set = null;
73 /** The name of the site with which we will finally be dealing, whether it is a local site or a remote site through a communicator.*/
74 protected String site = "";
75
76 // do we output the stylesheet processing instruction?
77 protected boolean use_oai_stylesheet = true;
78 protected String oai_stylesheet = "interfaces/oai/oai2.xsl";
79
80 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.OAIServer.class.getName());
81
82 /** initialise the servlet
83 */
84 public void init(ServletConfig config) throws ServletException {
85 // always call super.init, i.e., HttpServlet.;
86 super.init(config);
87 this.default_lang = config.getInitParameter(GSConstants.DEFAULT_LANG);
88
89 initVerbs();
90 initParams();
91
92 String site_name = config.getInitParameter(GSConstants.SITE_NAME);
93 String remote_site_name = null;
94 String remote_site_type = null;
95 String remote_site_address = null;
96
97 if (site_name == null) {
98 // no local site, try for communicator (remote site)
99 remote_site_name = config.getInitParameter("remote_site_name");
100 remote_site_type = config.getInitParameter("remote_site_type");
101 remote_site_address = config.getInitParameter("remote_site_address");
102 if (remote_site_name == null || remote_site_type == null || remote_site_address == null) {
103 System.err.println("initialisation paramters not all set!");
104 System.err.println("if site_name is not set, then you must have remote_site_name, remote_site_type and remote_site_address set");
105 System.exit(1);
106 }
107 }
108
109 if (this.default_lang == null) {
110 // choose english
111 this.default_lang = DEFAULT_LANG;
112 }
113
114 // the receptionist -the servlet will talk to this
115 this.recept = new OAIReceptionist();
116
117 // the receptionist uses a MessageRouter or Communicator to send its requests to. We either create a MessageRouter here for the designated site (if site_name set), or we create a Communicator for a remote site. The is given to teh Receptionist, and the servlet never talks to it again.directly.
118 if (site_name != null) {
119 //this site_name could consist of comma separated more than one site name.
120 String mr_name = (String)config.getInitParameter("messagerouter_class");
121 MessageRouter message_router = null;
122 if (mr_name == null) { // just use the normal MR *********
123 message_router = new MessageRouter();
124 } else { // try the specified one
125 try {
126 message_router = (MessageRouter)Class.forName("org.greenstone.gsdl3.core."+mr_name).newInstance();
127 } catch (Exception e) { // cant use this new one, so use normal one
128 System.err.println("OAIServlet configure exception when trying to use a new MessageRouter "+mr_name+": "+e.getMessage());
129 e.printStackTrace();
130 message_router = new MessageRouter();
131 }
132 }
133
134 message_router.setSiteName(site_name);
135 // lots of work is done in this step; see MessageRouter.java
136 message_router.configure();
137 this.recept.setSiteName(site_name);
138 this.recept.setMessageRouter(message_router);
139
140 } else {
141 // talking to a remote site, create a communicator
142 Communicator communicator = null;
143 // we need to create the XML to configure the communicator
144 Element site_elem = this.doc.createElement(GSXML.SITE_ELEM);
145 site_elem.setAttribute(GSXML.TYPE_ATT, remote_site_type);
146 site_elem.setAttribute(GSXML.NAME_ATT, remote_site_name);
147 site_elem.setAttribute(GSXML.ADDRESS_ATT, remote_site_address);
148
149 if (remote_site_type.equals(GSXML.COMM_TYPE_SOAP_JAVA)) {
150 communicator = new SOAPCommunicator();
151 } else {
152 System.err.println("OAIServlet.init Error: invalid Communicator type: "+remote_site_type);
153 System.exit(1);
154 }
155
156 if (!communicator.configure(site_elem)) {
157 System.err.println("OAIServlet.init Error: Couldn't configure communicator");
158 System.exit(1);
159 }
160 this.recept.setSiteName(remote_site_name);
161 this.recept.setMessageRouter(communicator);
162 }
163 // used for composing internal xml requests, but not xml responses.
164 // the converter may be used to get pretty xml, though.
165 this.converter = new XMLConverter();
166 this.doc = this.converter.newDOM();
167
168 // Read in OAIConfig.xml (residing web/WEB-INF/classes/) and
169 //use it to configure the receptionist. The init() is also called in which
170 //the resumption token file is read in and all expired tokens cleared.
171 Element oai_config = OAIXML.getOAIConfigXML();
172 if (oai_config == null) {
173 logger.error("Fail to parse oai config file OAIConfig.xml.");
174 return;
175 }
176 // pass it to the receptionist
177 this.recept.configure(oai_config);
178
179 // also, we have something we want to get from here - useOAIStylesheet
180 this.configure(oai_config);
181 }//end of init()
182
183 private void configure(Element oai_config) {
184 Element use_stylesheet_elem = (Element)GSXML.getChildByTagName(oai_config, OAIXML.USE_STYLESHEET);
185 if (use_stylesheet_elem != null) {
186 String value = GSXML.getNodeText(use_stylesheet_elem);
187 if (value.equals("no")) {
188 this.use_oai_stylesheet = false;
189 }
190 }
191 if (this.use_oai_stylesheet) {
192 // now see if there is a custom stylesheet specified
193 Element stylesheet_elem = (Element)GSXML.getChildByTagName(oai_config, OAIXML.STYLESHEET);
194 if (stylesheet_elem != null) {
195 String value = GSXML.getNodeText(stylesheet_elem);
196 if (!value.equals("")) {
197 oai_stylesheet = value;
198 }
199 }
200
201 }
202 }
203 private void initVerbs() {
204 verb_set = new HashSet();
205 verb_set.add(OAIXML.GET_RECORD);
206 verb_set.add(OAIXML.LIST_RECORDS);
207 verb_set.add(OAIXML.LIST_IDENTIFIERS);
208 verb_set.add(OAIXML.LIST_SETS);
209 verb_set.add(OAIXML.LIST_METADATA_FORMATS);
210 verb_set.add(OAIXML.IDENTIFY);
211 }
212
213 private void initParams() {
214 param_set = new HashSet();
215 param_set.add(OAIXML.METADATA_PREFIX);
216 param_set.add(OAIXML.FROM);
217 param_set.add(OAIXML.UNTIL);
218 param_set.add(OAIXML.SET);
219 param_set.add(OAIXML.RESUMPTION_TOKEN);
220 param_set.add(OAIXML.IDENTIFIER);
221 }
222 private void logUsageInfo(HttpServletRequest request){
223 String usageInfo = "";
224
225 //logged info = general-info + session-info
226 usageInfo =
227 request.getContextPath()+" "+ //session id
228 request.getServletPath()+" "+ //serlvet
229 "["+request.getQueryString()+"]" +" "+ //the query string
230 "["+usageInfo.trim()+"]" +" "+ // params stored in a session
231 request.getRemoteAddr()+" "+ //remote address
232 request.getHeader("user-agent")+" "; //the remote brower info
233
234 logger.info(usageInfo);
235 }
236 /** return true if the url is in the form of baseURL?verb=...,
237 */
238 private boolean validate(String query, String verb) {
239 //Here in OAIServer, only the verbs are validated. All the validation for individual verb
240 // is taken in their doXXX() methods.
241 if(query == null || !query.startsWith(OAIXML.VERB+"=")) {
242 return false;
243 }
244 if (!verb_set.contains(verb)) {
245 return false;
246 }
247 return true;
248 }
249 private String getVerb(String query) {
250 if (query == null) return "";
251 int verb_start_index = query.indexOf("=") + 1;// first occurence of '='
252 int verb_end_index = query.indexOf("&");
253 if(verb_end_index == -1) {
254 return query.substring(verb_start_index);
255 }
256 return query.substring(verb_start_index, verb_end_index);
257 }
258 public void doGet(HttpServletRequest request, HttpServletResponse response)
259 throws ServletException, IOException {
260 logUsageInfo(request);
261 //out.println("url="+request.getRequestURL());// /oaiserver
262 //out.println("query="+request.getQueryString());// is /greenstone3
263
264 // oai always requires the content type be text/xml
265 request.setCharacterEncoding("UTF-8");
266 response.setContentType("text/xml;charset=UTF-8");
267 PrintWriter out = response.getWriter();
268
269 //
270 String lang = request.getParameter(GSParams.LANGUAGE);
271 if (lang==null || lang.equals("")) {
272 // use the default
273 lang = this.default_lang;
274 }
275 //we don't get the baseURL from the http request because what we get might be different from the one known publicly due to local network redirection.
276 //For example, puka.cs.waikato.ac.nz vs www.greenstone.org
277 //String base_url = request.getRequestURL().toString();
278 String query = request.getQueryString();
279 String[] pairs = (query==null)? null : query.split("&");//split into key/value pairs
280 String verb = getVerb(query);
281 Element xml_response = OAIXML.createBasicResponse(verb, pairs);
282 Element verb_elem = null;
283
284 if (validate(query, verb) == false) {
285 if (verb_set.contains(verb) == false) {
286 logger.error(OAIXML.BAD_VERB + ": " + query);
287 verb_elem = OAIXML.createErrorElement(OAIXML.BAD_VERB, OAIXML.ILLEGAL_OAI_VERB);
288 } else {
289 //must be something else other than bad verbs caused an error, so bad argument
290 logger.error(OAIXML.BAD_ARGUMENT + ": " + query);
291 verb_elem = OAIXML.createErrorElement(OAIXML.BAD_ARGUMENT, "");
292 }
293 xml_response.appendChild(verb_elem);
294
295 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
296 if (this.use_oai_stylesheet) {
297 out.println("<?xml-stylesheet type='text/xsl' href='"+this.oai_stylesheet+"' ?>\n");
298 }
299 out.println(this.converter.getPrettyString(xml_response));
300 return;
301 }//end of if(validate
302
303 // The query is valid, we can now
304 // compose the request message to the receptionist
305 Element xml_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
306 Element xml_request = this.doc.createElement(GSXML.REQUEST_ELEM);
307 // The type attribute is set to be 'oaiService' from OAIServer to OAIReceptionist.
308 //xml_request.setAttribute(GSXML.TYPE_ATT, OAIXML.OAI_SERVICE);
309 xml_request.setAttribute(GSXML.LANG_ATT, lang);
310 xml_request.setAttribute(GSXML.TO_ATT, verb);
311 addParams(xml_request, pairs);
312
313 //xml_request.setAttribute(GSXML.OUTPUT_ATT, output);????
314 xml_message.appendChild(xml_request);
315
316 Node xml_result = this.recept.process (xml_message);
317 if (xml_result == null) {
318 logger.info("xml_result is null");
319 verb_elem = OAIXML.createErrorElement("Internal error", "");
320 xml_response.appendChild(verb_elem);
321 } else {
322
323 /** All response elements are in the form (with a corresponding verb name):
324 * <message>
325 <response>
326 <verb>
327 ...
328 * <resumptionToken>
329 * .. this is optional!
330 * </resumptionToken>
331 * </verb>
332 * </response>
333 * </message>
334 */
335 Node res = GSXML.getChildByTagName(xml_result, OAIXML.RESPONSE);
336 if(res == null) {
337 logger.info("response element in xml_result is null");
338 verb_elem = OAIXML.createErrorElement("Internal error", "");
339 } else {
340 verb_elem = GSXML.getFirstElementChild(res);
341 }
342
343 if(OAIXML.oai_version.equals(OAIXML.OAI_VERSION2) ||
344 verb_elem.getTagName().equals(OAIXML.ERROR)) {
345 xml_response.appendChild(xml_response.getOwnerDocument().importNode(verb_elem, true));
346 } else {
347 GSXML.copyAllChildren(xml_response, verb_elem);
348 }
349 }
350 out.println("<?xml version='1.0' encoding='UTF-8' ?>");
351 if (this.use_oai_stylesheet) {
352 out.println("<?xml-stylesheet type='text/xsl' href='"+this.oai_stylesheet+"' ?>\n");
353 }
354 out.println (this.converter.getPrettyString (xml_response));
355 return;
356 }
357 /** append parameter elements to the request sent to the receptionist*/
358 public void addParams(Element request, String[] pairs) {
359 // no params apart from the verb
360 if (pairs == null || pairs.length < 2) return ;
361
362 /**the request xml is composed in the form: <request>
363 * <param name=.../>
364 * <param name=.../>
365 * </request>
366 *(No paramList element in between).
367 */
368 for (int i=1; i<pairs.length; i++) {
369 //the first pair in pairs is the verb=xxx
370 int index = pairs[i].indexOf("=");
371 if(index != -1){ //just a double check
372 Element param = this.doc.createElement(OAIXML.PARAM);
373 param.setAttribute(OAIXML.NAME, pairs[i].substring(0, index));
374 param.setAttribute(OAIXML.VALUE, OAIXML.oaiDecode(pairs[i].substring(index + 1)));
375 request.appendChild(param);
376 }
377 }
378 }
379 public void doPost(HttpServletRequest request,
380 HttpServletResponse response)
381 throws ServletException, IOException {
382 doGet(request,response);
383 }
384}
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
Note: See TracBrowser for help on using the repository browser.