source: greenstone3/trunk/src/java/org/greenstone/gsdl3/LibraryServlet.java@ 16688

Last change on this file since 16688 was 16688, checked in by davidb, 16 years ago

Changed 'Element process(Element)' in ModuleInterface to 'Node process(Node)'. After some deliberation is was decided this is a more useful (generic) layer of the DOM to pass information around in. Helps with the DocType problem when producing XSL Transformed pages, for example. When this was an Element, it would loose track of its DocType. Supporting method provided in XMLConverter 'Element nodeToElement(Node)' which checks a nodes docType and casts to Element if appropriate, or if a Document, typecasts to that and then extracts the top-level Element. With this fundamental change in ModuleInterface, around 20 files needed to be updated (Actions, Services, etc) that build on top of 'process()' to reflect this change, and use nodeToElement where necessary.

  • Property svn:keywords set to Author Date Id Revision
File size: 22.7 KB
Line 
1package org.greenstone.gsdl3;
2
3import org.greenstone.gsdl3.comms.*;
4import org.greenstone.gsdl3.core.*;
5import org.greenstone.gsdl3.util.*;
6import org.greenstone.gsdl3.action.PageAction; // used to get the default action
7import org.w3c.dom.Document;
8import org.w3c.dom.Element;
9import org.w3c.dom.Node;
10import org.w3c.dom.NodeList;
11import java.io.*;
12import javax.servlet.*;
13import javax.servlet.http.*;
14import java.util.Enumeration;
15import java.util.ArrayList;
16import java.util.HashMap;
17import java.io.File;
18import java.util.Hashtable;
19import org.apache.log4j.*;
20
21
22/** a servlet to serve the greenstone library - we are using servlets instead
23 * of cgi
24 * the init method is called only once - the first time the servlet classes
25 * are loaded. Each time a request comes in to the servlet, the session()
26 * method is called in a new thread (calls doGet/doPut etc)
27 * takes the a=p&p=home type args and builds a simple request to send to
28 * its receptionist, which returns a result in html, cos output=html
29 * is set in the request
30 *
31 * 18/Jul/07 xiao
32 * modify to make the cached parameters collection-specific.
33 * Most of the work is done in doGet(), except adding an inner class UserSessionCache.
34 *
35 * @see Receptionist
36 */
37public class LibraryServlet extends HttpServlet {
38
39 /** the receptionist to send messages to */
40 protected Receptionist recept=null;
41 /** the default language - is specified by setting a servlet param,
42 * otherwise DEFAULT_LANG is used*/
43 protected String default_lang= null;
44 /** The default default - used if a default lang is not specified
45 * in the servlet params */
46 protected final String DEFAULT_LANG = "en";
47 /** container Document to create XML Nodes */
48 protected Document doc=null;
49 /** a converter class to parse XML and create Docs */
50 protected XMLConverter converter=null;
51 /** the cgi stuff - the Receptionist can add new args to this
52 *
53 * its used by the servlet to determine what args to save */
54 protected GSParams params = null;
55
56 /** user id - new one per session. This doesn't work if session state is saved between restarts - this requires this value to be saved too. */
57 protected int next_user_id = 0;
58
59 /** a hash that contains all the active session IDs mapped to the cached items
60 * It is updated whenever the whole site or a particular collection is reconfigured
61 * using the command a=s&sa=c or a=s&sa=c&c=xxx
62 * It is in the form: sid -> (UserSessionCache object)
63 */
64 protected Hashtable session_ids_table = new Hashtable();
65 //this name is combined with the collection name and used for caching
66 protected String valid_site_name = "";
67
68 /** the maximum interval that the cached info remains in session_ids_table (in seconds)
69 * This is set in web.xml
70 */
71 protected int session_expiration = 1800;
72
73 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.LibraryServlet.class.getName());
74
75 /** initialise the servlet
76 */
77 public void init(ServletConfig config) throws ServletException {
78 // always call super.init;
79 super.init(config);
80 // disable preferences - does this work anyway??
81 //System.setProperty("java.util.prefs.PreferencesFactory", "org.greenstone.gsdl3.util.DisabledPreferencesFactory");
82
83
84 String library_name = config.getInitParameter(GSConstants.LIBRARY_NAME);
85 String gsdl3_home = config.getInitParameter(GSConstants.GSDL3_HOME);
86 String interface_name = config.getInitParameter(GSConstants.INTERFACE_NAME);
87 this.default_lang = config.getInitParameter(GSConstants.DEFAULT_LANG);
88 String sess_expire = config.getInitParameter(GSXML.SESSION_EXPIRATION);
89 if (sess_expire != null && !sess_expire.equals("")) {
90 this.session_expiration = Integer.parseInt(sess_expire);
91 }
92
93 if (library_name == null || interface_name ==null) {
94 // must have this
95 System.err.println("initialisation parameters not all set!");
96 System.err.println(" you must have libraryname and interfacename");
97 System.exit(1);
98 }
99
100 String site_name = config.getInitParameter(GSConstants.SITE_NAME);
101 String remote_site_name = null;
102 String remote_site_type = null;
103 String remote_site_address = null;
104
105 if (site_name == null) {
106 // no site, try for communicator
107 remote_site_name = config.getInitParameter("remote_site_name");
108 remote_site_type = config.getInitParameter("remote_site_type");
109 remote_site_address = config.getInitParameter("remote_site_address");
110 if (remote_site_name == null || remote_site_type == null || remote_site_address == null) {
111 System.err.println("initialisation paramters not all set!");
112 System.err.println("if site_name is not set, then you must have remote_site_name, remote_site_type and remote_site_address set");
113 System.exit(1);
114 }
115 }
116 valid_site_name = (site_name != null)? site_name : remote_site_name;
117
118 if (this.default_lang == null) {
119 // choose english
120 this.default_lang = DEFAULT_LANG;
121 }
122
123 HashMap config_params = new HashMap();
124
125 config_params.put(GSConstants.LIBRARY_NAME, library_name);
126 config_params.put(GSConstants.INTERFACE_NAME, interface_name);
127 if (site_name != null) {
128 config_params.put(GSConstants.SITE_NAME, site_name);
129 }
130 this.converter = new XMLConverter();
131 this.doc = this.converter.newDOM();
132
133 // the receptionist -the servlet will talk to this
134 String recept_name = (String)config.getInitParameter("receptionist_class");
135 if (recept_name == null) {
136 this.recept = new DefaultReceptionist();
137 } else {
138 try {
139 this.recept = (Receptionist)Class.forName("org.greenstone.gsdl3.core."+recept_name).newInstance();
140 } catch (Exception e) { // cant use this new one, so use normal one
141 System.err.println("LibraryServlet configure exception when trying to use a new Receptionist "+recept_name+": "+e.getMessage());
142 e.printStackTrace();
143 this.recept = new DefaultReceptionist();
144 }
145 }
146 this.recept.setConfigParams(config_params);
147
148 // the receptionist uses a MessageRouter or Communicator to send its requests to. We either create a MessageRouter here for the designated site (if site_name set), or we create a Communicator for a remote site. The is given to teh Receptionist, and the servlet never talks to it again.directly.
149 if (site_name != null) {
150 String mr_name = (String)config.getInitParameter("messagerouter_class");
151 MessageRouter message_router = null;
152 if (mr_name == null) { // just use the normal MR
153 message_router = new MessageRouter();
154 } else { // try the specified one
155 try {
156 message_router = (MessageRouter)Class.forName("org.greenstone.gsdl3.core."+mr_name).newInstance();
157 } catch (Exception e) { // cant use this new one, so use normal one
158 System.err.println("LibraryServlet configure exception when trying to use a new MessageRouter "+mr_name+": "+e.getMessage());
159 e.printStackTrace();
160 message_router = new MessageRouter();
161 }
162 }
163
164 message_router.setSiteName(site_name);
165 message_router.setLibraryName(library_name);
166 message_router.configure();
167 this.recept.setMessageRouter(message_router);
168 } else {
169 // talking to a remote site, create a communicator
170 Communicator communicator = null;
171 // we need to create the XML to configure the communicator
172 Element site_elem = this.doc.createElement(GSXML.SITE_ELEM);
173 site_elem.setAttribute(GSXML.TYPE_ATT, remote_site_type);
174 site_elem.setAttribute(GSXML.NAME_ATT, remote_site_name);
175 site_elem.setAttribute(GSXML.ADDRESS_ATT, remote_site_address);
176
177 if (remote_site_type.equals(GSXML.COMM_TYPE_SOAP_JAVA)) {
178 communicator = new SOAPCommunicator();
179 } else {
180 System.err.println("LibraryServlet.init Error: invalid Communicator type: "+remote_site_type);
181 System.exit(1);
182 }
183
184 if (!communicator.configure(site_elem)) {
185 System.err.println("LibraryServlet.init Error: Couldn't configure communicator");
186 System.exit(1);
187 }
188 this.recept.setMessageRouter(communicator);
189 }
190
191 // the params arg thingy
192
193 String params_name = (String)config.getInitParameter("params_class");
194 if (params_name == null) {
195 this.params = new GSParams();
196 } else {
197 try {
198 this.params = (GSParams)Class.forName("org.greenstone.gsdl3.util."+params_name).newInstance();
199 } catch (Exception e) {
200 System.err.println("LibraryServlet configure exception when trying to use a new params thing "+params_name+": "+e.getMessage());
201 e.printStackTrace();
202 this.params = new GSParams();
203 }
204 }
205 // pass it to the receptionist
206 this.recept.setParams(this.params);
207 this.recept.configure();
208
209 }
210
211
212 private void logUsageInfo(HttpServletRequest request){
213 String usageInfo = "";
214
215 //session-info: get params stored in the session
216 HttpSession session = request.getSession(true);
217 Enumeration attributeNames = session.getAttributeNames();
218 while(attributeNames.hasMoreElements()) {
219 String name = (String)attributeNames.nextElement();
220 usageInfo +=name+"="+session.getAttribute(name)+" ";
221 }
222
223 //logged info = general-info + session-info
224 usageInfo =
225 request.getServletPath()+" "+ //serlvet
226 "["+request.getQueryString()+"]" +" "+ //the query string
227 "["+usageInfo.trim()+"]" +" "+ // params stored in a session
228 request.getRemoteAddr()+" "+ //remote address
229 request.getRequestedSessionId()+" "+ //session id
230 request.getHeader("user-agent")+" "; //the remote brower info
231
232 logger.info(usageInfo);
233
234 }
235
236 public class UserSessionCache implements HttpSessionBindingListener {
237
238 String session_id = "";
239
240 /** a hash that maps the session ID to a hashtable that maps the coll_name to its parameters
241 * coll_name -> Hashtable (param_name -> param_value)
242 */
243 protected Hashtable coll_name_params_table = null;
244
245 public UserSessionCache(String id, Hashtable table) {
246 session_id = id;
247 coll_name_params_table = (table == null)? new Hashtable() : table;
248 }
249
250 protected void cleanupCache(String coll_name) {
251 if (coll_name_params_table.containsKey(coll_name)) {
252 coll_name_params_table.remove(coll_name);
253 }
254 }
255 protected Hashtable getParamsTable() {
256 return coll_name_params_table;
257 }
258 public void valueBound(HttpSessionBindingEvent event) {
259 // Do nothing
260 }
261
262 public void valueUnbound(HttpSessionBindingEvent event) {
263 if(session_ids_table.containsKey(session_id)) {
264 session_ids_table.remove(session_id);
265 }
266 }
267 public int tableSize() {
268 return (coll_name_params_table == null)? 0 : coll_name_params_table.size();
269 }
270 }
271
272 public void doGet (HttpServletRequest request,
273 HttpServletResponse response)
274 throws ServletException, IOException {
275 logUsageInfo (request);
276
277 String query_string = request.getQueryString();
278 if (query_string!=null){
279 String[] query_arr = query_string.split("&");
280 boolean redirect = false;
281 String href = null;
282 String rl = null;
283 for (int i=0;i<query_arr.length;i++){
284 if (query_arr[i].startsWith("el")){
285 if (query_arr[i].substring(query_arr[i].indexOf("=")+1,query_arr[i].length()).equals("direct")){
286 redirect = true;
287 }
288 }else if(query_arr[i].startsWith("href")){
289 href = query_arr[i].substring(query_arr[i].indexOf("=")+1,query_arr[i].length());
290 href = href.replaceAll("%2f", "/");
291 href = href.replaceAll("%7e", "~");
292 href = href.replaceAll("%3f", "?");
293 href = href.replaceAll("%3A", "\\:");
294 }else if(query_arr[i].startsWith("rl")){
295 rl = query_arr[i].substring(query_arr[i].indexOf("=")+1,query_arr[i].length());
296 }
297 }
298 //if query_string contains "el=", the web page will be redirected to the external URl, otherwise a greenstone page with an external URL will be displayed
299 //"rl=0" this is an external link
300 //"rl=1" this is an internal link
301 if ((redirect) && (href != null) && (rl.equals("0"))){// This is an external link, the web page is re-directed to the external URL (&el=&rl=0&href="http://...")
302 response.setContentType("text/xml");
303 response.sendRedirect(href);
304 }
305 }
306 // Nested Diagnostic Configurator to identify the client for
307
308 HttpSession session = request.getSession (true);
309 session.setMaxInactiveInterval(session_expiration);
310 String uid = (String)session.getAttribute (GSXML.USER_ID_ATT);
311 if (uid ==null) {
312 uid = ""+getNextUserId ();
313 session.setAttribute (GSXML.USER_ID_ATT, uid);
314 }
315 request.setCharacterEncoding ("UTF-8");
316 response.setContentType ("text/html;charset=UTF-8");
317 PrintWriter out = response.getWriter ();
318
319 String lang = request.getParameter (GSParams.LANGUAGE);
320 if (lang==null || lang.equals ("")) {
321 // try the session cached lang
322 lang = (String)session.getAttribute (GSParams.LANGUAGE);
323 if (lang==null || lang.equals ("")) {
324 // still not set, use the default
325 lang = this.default_lang;
326 }
327 }
328
329 // set the lang in the session
330 session.setAttribute (GSParams.LANGUAGE, lang);
331
332 String output = request.getParameter (GSParams.OUTPUT);
333 if (output==null || output.equals ("")) {
334 output = "html"; // uses html by default
335 }
336
337 // the request to the receptionist
338 Element xml_message = this.doc.createElement (GSXML.MESSAGE_ELEM);
339 Element xml_request = GSXML.createBasicRequest (this.doc, GSXML.REQUEST_TYPE_PAGE, "", lang, uid);
340 xml_request.setAttribute (GSXML.OUTPUT_ATT, output);
341 xml_message.appendChild (xml_request);
342
343 String action = request.getParameter (GSParams.ACTION);
344 String subaction = request.getParameter (GSParams.SUBACTION);
345 String collection = request.getParameter(GSParams.COLLECTION);
346
347 //specifically we clean up the cache session_ids_table if the two reconfigure command
348 //are issued: a=s&sa=c and a=s&sa=c&c=coll_name, in which case there is no caching action to be taken
349 boolean should_cache = true;
350 if(action != null && action.equals(GSParams.SYSTEM)
351 && subaction != null && subaction.equals(GSParams.CONFIGURE)) {
352 if (collection == null || collection.equals("")) {
353 //user reconfiugred the whole site, clean up all cached info
354 //logger.info("clear cache for the whole site.");
355 session_ids_table = new Hashtable();
356 session.removeAttribute(GSXML.USER_SESSION_CACHE_ATT);
357 } else {
358 //clean up all cache info related to the collection
359 //logger.info("clear cache for collection: " + collection);
360 ArrayList cache_list = new ArrayList(session_ids_table.values());
361 for (int i=0; i<cache_list.size(); i++) {
362 UserSessionCache cache = (UserSessionCache)cache_list.get(i);
363 cache.cleanupCache(collection);
364 }
365 }
366 should_cache = false;
367 }
368
369 // logger.info("should_cache= " + should_cache);
370 //clear the collection-specific cache in the session, since we have no way to know whether this session is
371 //about the same collection as the last session or not.
372 Enumeration attributeNames = session.getAttributeNames();
373 while(attributeNames.hasMoreElements()) {
374 String name = (String)attributeNames.nextElement();
375 if (!name.equals (GSXML.USER_SESSION_CACHE_ATT)
376 && !name.equals (GSParams.LANGUAGE)
377 && !name.equals (GSXML.USER_ID_ATT)) {
378 session.removeAttribute(name);
379 }
380 }
381
382 // create a dumy collection name for Authentication
383 if (subaction != null && subaction.equals("authen")){
384 collection = "0000000000";
385 }
386
387 UserSessionCache session_cache = null;
388 Hashtable param_table = null;
389 Hashtable table = null;
390 String sid = session.getId();
391 if (should_cache == true && collection != null && !collection.equals("")) {
392 String key_str = valid_site_name + collection;
393 if (session_ids_table.containsKey(sid)) {
394 session_cache = (UserSessionCache)session_ids_table.get(sid);
395 param_table = session_cache.getParamsTable();
396 logger.info("collections in table: " + tableToString(param_table));
397 if (param_table.containsKey(key_str)) {
398 //logger.info("existing table: " + collection);
399 table = (Hashtable)param_table.get(key_str);
400 } else {
401 table = new Hashtable();
402 param_table.put(key_str, table);
403 //logger.info("new table: " + collection);
404 }
405 } else {
406 param_table = new Hashtable();
407 table = new Hashtable();
408 param_table.put(key_str, table);
409 session_cache = new UserSessionCache(sid, param_table);
410 session_ids_table.put(sid, session_cache);
411 session.setAttribute(GSXML.USER_SESSION_CACHE_ATT, session_cache);
412 //logger.info("new session id");
413 }
414 }
415
416 if (action==null || action.equals ("")) {
417 // should we do all the following stuff if using default page?
418 // display the home page - the default page
419 action = "p";
420 subaction = PageAction.HOME_PAGE;
421
422 xml_request.setAttribute (GSXML.ACTION_ATT, action);
423 xml_request.setAttribute (GSXML.SUBACTION_ATT, subaction);
424
425 } else {
426
427 xml_request.setAttribute (GSXML.ACTION_ATT, action);
428 if (subaction != null) {
429 xml_request.setAttribute (GSXML.SUBACTION_ATT, subaction);
430 }
431
432 // create the param list for the greenstone request - includes
433 // the params from the current request and any others from the saved session
434 Element xml_param_list = this.doc.createElement (GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
435 xml_request.appendChild (xml_param_list);
436
437 Enumeration params = request.getParameterNames ();
438 while(params.hasMoreElements ()) {
439 String name = (String)params.nextElement ();
440 if (!name.equals (GSParams.ACTION)
441 && !name.equals (GSParams.SUBACTION)
442 && !name.equals (GSParams.LANGUAGE)
443 && !name.equals (GSParams.OUTPUT)) {// we have already dealt with these
444 String value="";
445 String [] values = request.getParameterValues (name);
446 value = values[0];
447 if (values.length > 1) {
448 for (int i=1; i< values.length; i++) {
449 value += ","+values[i];
450 }
451 }
452 // either add it to the param list straight away, or save it to the session and add it later
453 if (this.params.shouldSave (name)) {
454 if (table != null) {
455 table.put(name, value);
456 }
457 } else {
458 Element param = this.doc.createElement (GSXML.PARAM_ELEM);
459 param.setAttribute (GSXML.NAME_ATT, name);
460 param.setAttribute (GSXML.VALUE_ATT, GSXML.xmlSafe (value));
461 xml_param_list.appendChild (param);
462 }
463 }
464 }
465 //put everything in the table into the session
466 if (table != null) {
467 Enumeration keys = table.keys ();
468 while(keys.hasMoreElements ()) {
469 String name = (String)keys.nextElement();
470 session.setAttribute(name, (String)table.get(name));
471 }
472 }
473
474 // put in all the params from the session cache
475 params = session.getAttributeNames ();
476 while(params.hasMoreElements ()) {
477 String name = (String)params.nextElement ();
478
479 if ( !name.equals (GSXML.USER_SESSION_CACHE_ATT)
480 && !name.equals (GSParams.LANGUAGE)
481 && !name.equals (GSXML.USER_ID_ATT)) {
482 // lang and uid are stored but we dont want it in the param list cos its already in the request
483 Element param = this.doc.createElement (GSXML.PARAM_ELEM);
484 param.setAttribute (GSXML.NAME_ATT, name);
485 String value = GSXML.xmlSafe ((String)session.getAttribute (name));
486 // ugly hack to undo : escaping
487 value = value.replaceAll ("%3A", "\\:");
488 param.setAttribute (GSXML.VALUE_ATT,value);
489 xml_param_list.appendChild (param);
490 }
491 }
492 }
493
494 if (!output.equals ("html")) {
495 response.setContentType ("text/xml"); // for now use text
496 }
497
498 //GSXML.printXMLNode(xml_message);
499
500 Node xml_result = this.recept.process(xml_message);
501 encodeURLs (xml_result, response);
502 out.println (this.converter.getPrettyString (xml_result));
503
504 displaySize(session_ids_table);
505 }
506 //a debugging method
507 private void displaySize(Hashtable table) {
508 if(table == null) {
509 logger.info("cached table is null");
510 return;
511 }
512 if (table.size() == 0) {
513 logger.info("cached table size is zero");
514 return;
515 }
516 int num_cached_coll = 0;
517 ArrayList cache_list = new ArrayList(table.values());
518 for (int i=0; i<cache_list.size(); i++) {
519 num_cached_coll += ((UserSessionCache)cache_list.get(i)).tableSize();
520 }
521 logger.info("Number of sessions : total number of cached collection info = " + table.size() + " : " + num_cached_coll);
522 }
523 /** merely a debugging method! */
524 private String tableToString(Hashtable table) {
525 String str = "";
526 Enumeration keys = table.keys ();
527 while(keys.hasMoreElements ()) {
528 String name = (String)keys.nextElement();
529 str += name + ", ";
530 }
531 return str;
532 }
533
534 /** this goes through each URL and adds in a session id if needed--
535 * its needed if the browser doesn't accept cookies
536 * also escapes things if needed
537 */
538 protected void encodeURLs(Node dataNode, HttpServletResponse response) {
539
540 if (dataNode == null) {
541 return;
542 }
543
544 Element data =null;
545
546 short nodeType = dataNode.getNodeType();
547 if (nodeType == Node.DOCUMENT_NODE) {
548 Document docNode = (Document)dataNode;
549 data = docNode.getDocumentElement() ;
550 }
551 else {
552 data = (Element)dataNode;
553 }
554
555 // get all the <a> elements
556 NodeList hrefs = data.getElementsByTagName("a");
557 for (int i=0; i<hrefs.getLength(); i++) {
558 Element a = (Element)hrefs.item(i);
559 // ugly hack to get rid of : in the args - interferes with session handling
560 String href = a.getAttribute("href");
561 if (!href.equals("")) {
562 if (href.indexOf("?")!=-1) {
563 String[] parts = href.split("\\?", -1);
564 parts[1]=parts[1].replaceAll(":", "%3A");
565 href = parts[0]+"?"+parts[1];
566 }
567 a.setAttribute("href", response.encodeURL(href));
568 }
569 }
570
571 // now find any submit bits - get all the <form> elements
572 NodeList forms = data.getElementsByTagName("form");
573 for (int i=0; i<forms.getLength(); i++) {
574 Element form = (Element)forms.item(i);
575 form.setAttribute("action", response.encodeURL(form.getAttribute("action")));
576 }
577 // are these the only cases where URLs occur??
578 // we should only do this for greenstone urls?
579
580 }
581
582 synchronized protected int getNextUserId() {
583 next_user_id++;
584 return next_user_id;
585 }
586
587 public void doPost(HttpServletRequest request,
588 HttpServletResponse response)
589 throws ServletException, IOException {
590 doGet(request,response);
591
592 }
593}
Note: See TracBrowser for help on using the repository browser.