source: greenstone3/trunk/src/java/org/greenstone/gsdl3/LibraryServlet.java@ 18234

Last change on this file since 18234 was 18234, checked in by oranfry, 15 years ago

changing the library servlet code for checking the el, href and rl query arguments, which until now would see el=direct as the same as elx=direct or elephant=direct - i.e. it would be satisfied as long as the first part of the argument name was correct. It is appropriate to expect an exact match so check for 'el=' instead of just 'el' and so on

  • Property svn:keywords set to Author Date Id Revision
File size: 23.5 KB
Line 
1package org.greenstone.gsdl3;
2
3import org.greenstone.gsdl3.comms.*;
4import org.greenstone.gsdl3.core.*;
5import org.greenstone.gsdl3.util.*;
6import org.greenstone.gsdl3.action.PageAction; // used to get the default action
7import org.w3c.dom.Document;
8import org.w3c.dom.Element;
9import org.w3c.dom.Node;
10import org.w3c.dom.NodeList;
11import java.io.*;
12import javax.servlet.*;
13import javax.servlet.http.*;
14import java.util.Enumeration;
15import java.util.ArrayList;
16import java.util.HashMap;
17import java.io.File;
18import java.util.Hashtable;
19import org.apache.log4j.*;
20
21
22/** a servlet to serve the greenstone library - we are using servlets instead
23 * of cgi
24 * the init method is called only once - the first time the servlet classes
25 * are loaded. Each time a request comes in to the servlet, the session()
26 * method is called in a new thread (calls doGet/doPut etc)
27 * takes the a=p&p=home type args and builds a simple request to send to
28 * its receptionist, which returns a result in html, cos output=html
29 * is set in the request
30 *
31 * 18/Jul/07 xiao
32 * modify to make the cached parameters collection-specific.
33 * Most of the work is done in doGet(), except adding an inner class UserSessionCache.
34 *
35 * @see Receptionist
36 */
37public class LibraryServlet extends HttpServlet {
38
39 /** the receptionist to send messages to */
40 protected Receptionist recept=null;
41
42 /** the default language - is specified by setting a servlet param,
43 * otherwise DEFAULT_LANG is used*/
44 protected String default_lang= null;
45
46 /** The default default - used if a default lang is not specified
47 * in the servlet params */
48 protected final String DEFAULT_LANG = "en";
49
50 /** container Document to create XML Nodes */
51 protected Document doc=null;
52
53 /** a converter class to parse XML and create Docs */
54 protected XMLConverter converter=null;
55
56 /** the cgi stuff - the Receptionist can add new args to this
57 *
58 * its used by the servlet to determine what args to save */
59 protected GSParams params = null;
60
61 /** user id - new one per session. This doesn't work if session state is saved between restarts - this requires this value to be saved too. */
62 protected int next_user_id = 0;
63
64 /** a hash that contains all the active session IDs mapped to the cached items
65 * It is updated whenever the whole site or a particular collection is reconfigured
66 * using the command a=s&sa=c or a=s&sa=c&c=xxx
67 * It is in the form: sid -> (UserSessionCache object)
68 */
69 protected Hashtable session_ids_table = new Hashtable();
70
71 /** the maximum interval that the cached info remains in session_ids_table (in seconds)
72 * This is set in web.xml
73 */
74 protected int session_expiration = 1800;
75
76 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.LibraryServlet.class.getName());
77
78 /** initialise the servlet
79 */
80 public void init(ServletConfig config) throws ServletException {
81 // always call super.init;
82 super.init(config);
83 // disable preferences - does this work anyway??
84 //System.setProperty("java.util.prefs.PreferencesFactory", "org.greenstone.gsdl3.util.DisabledPreferencesFactory");
85
86 String library_name = config.getInitParameter(GSConstants.LIBRARY_NAME);
87 String gsdl3_home = config.getInitParameter(GSConstants.GSDL3_HOME);
88 String interface_name = config.getInitParameter(GSConstants.INTERFACE_NAME);
89 this.default_lang = config.getInitParameter(GSConstants.DEFAULT_LANG);
90 String sess_expire = config.getInitParameter(GSXML.SESSION_EXPIRATION);
91 if (sess_expire != null && !sess_expire.equals("")) {
92 this.session_expiration = Integer.parseInt(sess_expire);
93 }
94
95 if (library_name == null || interface_name ==null) {
96 // must have this
97 System.err.println("initialisation parameters not all set!");
98 System.err.println(" you must have libraryname and interfacename");
99 System.exit(1);
100 }
101
102 String site_name = config.getInitParameter(GSConstants.SITE_NAME);
103 String remote_site_name = null;
104 String remote_site_type = null;
105 String remote_site_address = null;
106
107 if (site_name == null) {
108 // no site, try for communicator
109 remote_site_name = config.getInitParameter("remote_site_name");
110 remote_site_type = config.getInitParameter("remote_site_type");
111 remote_site_address = config.getInitParameter("remote_site_address");
112 if (remote_site_name == null || remote_site_type == null || remote_site_address == null) {
113 System.err.println("initialisation paramters not all set!");
114 System.err.println("if site_name is not set, then you must have remote_site_name, remote_site_type and remote_site_address set");
115 System.exit(1);
116 }
117 }
118
119 if (this.default_lang == null) {
120 // choose english
121 this.default_lang = DEFAULT_LANG;
122 }
123
124 HashMap config_params = new HashMap();
125
126 config_params.put(GSConstants.LIBRARY_NAME, library_name);
127 config_params.put(GSConstants.INTERFACE_NAME, interface_name);
128 if (site_name != null) {
129 config_params.put(GSConstants.SITE_NAME, site_name);
130 }
131 this.converter = new XMLConverter();
132 this.doc = this.converter.newDOM();
133
134 // the receptionist -the servlet will talk to this
135 String recept_name = (String)config.getInitParameter("receptionist_class");
136 if (recept_name == null) {
137 this.recept = new DefaultReceptionist();
138 } else {
139 try {
140 this.recept = (Receptionist)Class.forName("org.greenstone.gsdl3.core."+recept_name).newInstance();
141 } catch (Exception e) { // cant use this new one, so use normal one
142 System.err.println("LibraryServlet configure exception when trying to use a new Receptionist "+recept_name+": "+e.getMessage());
143 e.printStackTrace();
144 this.recept = new DefaultReceptionist();
145 }
146 }
147 this.recept.setConfigParams(config_params);
148
149 // the receptionist uses a MessageRouter or Communicator to send its requests to. We either create a MessageRouter here for the designated site (if site_name set), or we create a Communicator for a remote site. The is given to teh Receptionist, and the servlet never talks to it again.directly.
150 if (site_name != null) {
151 String mr_name = (String)config.getInitParameter("messagerouter_class");
152 MessageRouter message_router = null;
153 if (mr_name == null) { // just use the normal MR
154 message_router = new MessageRouter();
155 } else { // try the specified one
156 try {
157 message_router = (MessageRouter)Class.forName("org.greenstone.gsdl3.core."+mr_name).newInstance();
158 } catch (Exception e) { // cant use this new one, so use normal one
159 System.err.println("LibraryServlet configure exception when trying to use a new MessageRouter "+mr_name+": "+e.getMessage());
160 e.printStackTrace();
161 message_router = new MessageRouter();
162 }
163 }
164
165 message_router.setSiteName(site_name);
166 message_router.setLibraryName(library_name);
167 message_router.configure();
168 this.recept.setMessageRouter(message_router);
169 } else {
170 // talking to a remote site, create a communicator
171 Communicator communicator = null;
172 // we need to create the XML to configure the communicator
173 Element site_elem = this.doc.createElement(GSXML.SITE_ELEM);
174 site_elem.setAttribute(GSXML.TYPE_ATT, remote_site_type);
175 site_elem.setAttribute(GSXML.NAME_ATT, remote_site_name);
176 site_elem.setAttribute(GSXML.ADDRESS_ATT, remote_site_address);
177
178 if (remote_site_type.equals(GSXML.COMM_TYPE_SOAP_JAVA)) {
179 communicator = new SOAPCommunicator();
180 } else {
181 System.err.println("LibraryServlet.init Error: invalid Communicator type: "+remote_site_type);
182 System.exit(1);
183 }
184
185 if (!communicator.configure(site_elem)) {
186 System.err.println("LibraryServlet.init Error: Couldn't configure communicator");
187 System.exit(1);
188 }
189 this.recept.setMessageRouter(communicator);
190 }
191
192 // the params arg thingy
193
194 String params_name = (String)config.getInitParameter("params_class");
195 if (params_name == null) {
196 this.params = new GSParams();
197 } else {
198 try {
199 this.params = (GSParams)Class.forName("org.greenstone.gsdl3.util."+params_name).newInstance();
200 } catch (Exception e) {
201 System.err.println("LibraryServlet configure exception when trying to use a new params thing "+params_name+": "+e.getMessage());
202 e.printStackTrace();
203 this.params = new GSParams();
204 }
205 }
206 // pass it to the receptionist
207 this.recept.setParams(this.params);
208 this.recept.configure();
209
210 }
211
212
213 private void logUsageInfo(HttpServletRequest request){
214 String usageInfo = "";
215
216 //session-info: get params stored in the session
217 HttpSession session = request.getSession(true);
218 Enumeration attributeNames = session.getAttributeNames();
219 while(attributeNames.hasMoreElements()) {
220 String name = (String)attributeNames.nextElement();
221 usageInfo +=name+"="+session.getAttribute(name)+" ";
222 }
223
224 //logged info = general-info + session-info
225 usageInfo =
226 request.getServletPath()+" "+ //serlvet
227 "["+request.getQueryString()+"]" +" "+ //the query string
228 "["+usageInfo.trim()+"]" +" "+ // params stored in a session
229 request.getRemoteAddr()+" "+ //remote address
230 request.getRequestedSessionId()+" "+ //session id
231 request.getHeader("user-agent")+" "; //the remote brower info
232
233 logger.info(usageInfo);
234
235 }
236
237 public class UserSessionCache implements HttpSessionBindingListener {
238
239 String session_id = "";
240
241 /** a hash that maps the session ID to a hashtable that maps the coll_name to its parameters
242 * coll_name -> Hashtable (param_name -> param_value)
243 */
244 protected Hashtable coll_name_params_table = null;
245
246 public UserSessionCache(String id, Hashtable table) {
247 session_id = id;
248 coll_name_params_table = (table == null)? new Hashtable() : table;
249 }
250
251 protected void cleanupCache(String coll_name) {
252 if (coll_name_params_table.containsKey(coll_name)) {
253 coll_name_params_table.remove(coll_name);
254 }
255 }
256
257 protected Hashtable getParamsTable() {
258 return coll_name_params_table;
259 }
260
261 public void valueBound(HttpSessionBindingEvent event) {
262 // Do nothing
263 }
264
265 public void valueUnbound(HttpSessionBindingEvent event) {
266 if(session_ids_table.containsKey(session_id)) {
267 session_ids_table.remove(session_id);
268 }
269 }
270
271 public int tableSize() {
272 return (coll_name_params_table == null)? 0 : coll_name_params_table.size();
273 }
274
275 }
276
277 public void doGet (HttpServletRequest request, HttpServletResponse response)
278 throws ServletException, IOException {
279 logUsageInfo (request);
280
281 String query_string = request.getQueryString();
282 if (query_string!=null){
283 String[] query_arr = query_string.split("&");
284 boolean redirect = false;
285 String href = null;
286 String rl = null;
287 for (int i=0;i<query_arr.length;i++){
288 if (query_arr[i].startsWith("el=")){
289 if (query_arr[i].substring(query_arr[i].indexOf("=")+1,query_arr[i].length()).equals("direct")){
290 redirect = true;
291 }
292 }else if(query_arr[i].startsWith("href=")){
293 href = query_arr[i].substring(query_arr[i].indexOf("=")+1,query_arr[i].length());
294 href = href.replaceAll("%2f", "/");
295 href = href.replaceAll("%7e", "~");
296 href = href.replaceAll("%3f", "?");
297 href = href.replaceAll("%3A", "\\:");
298 }else if(query_arr[i].startsWith("rl=")){
299 rl = query_arr[i].substring(query_arr[i].indexOf("=")+1,query_arr[i].length());
300 }
301 }
302 //if query_string contains "el=", the web page will be redirected to the external URl, otherwise a greenstone page with an external URL will be displayed
303 //"rl=0" this is an external link
304 //"rl=1" this is an internal link
305 if ((redirect) && (href != null) && (rl.equals("0"))){// This is an external link, the web page is re-directed to the external URL (&el=&rl=0&href="http://...")
306 response.setContentType("text/xml");
307 response.sendRedirect(href);
308 }
309 }
310 // Nested Diagnostic Configurator to identify the client for
311
312 HttpSession session = request.getSession (true);
313 session.setMaxInactiveInterval(session_expiration);
314 String uid = (String)session.getAttribute (GSXML.USER_ID_ATT);
315 if (uid ==null) {
316 uid = ""+getNextUserId ();
317 session.setAttribute (GSXML.USER_ID_ATT, uid);
318 }
319 request.setCharacterEncoding ("UTF-8");
320 response.setContentType ("text/html;charset=UTF-8");
321 PrintWriter out = response.getWriter ();
322
323 String lang = request.getParameter (GSParams.LANGUAGE);
324 if (lang==null || lang.equals ("")) {
325 // try the session cached lang
326 lang = (String)session.getAttribute (GSParams.LANGUAGE);
327 if (lang==null || lang.equals ("")) {
328 // still not set, use the default
329 lang = this.default_lang;
330 }
331 }
332
333 // set the lang in the session
334 session.setAttribute (GSParams.LANGUAGE, lang);
335
336 String output = request.getParameter (GSParams.OUTPUT);
337 if (output==null || output.equals ("")) {
338 output = "html"; // uses html by default
339 }
340
341 // the request to the receptionist
342 Element xml_message = this.doc.createElement (GSXML.MESSAGE_ELEM);
343 Element xml_request = GSXML.createBasicRequest (this.doc, GSXML.REQUEST_TYPE_PAGE, "", lang, uid);
344 xml_request.setAttribute (GSXML.OUTPUT_ATT, output);
345 xml_message.appendChild (xml_request);
346
347 String action = request.getParameter (GSParams.ACTION);
348 String subaction = request.getParameter (GSParams.SUBACTION);
349 String collection = request.getParameter(GSParams.COLLECTION);
350 String service = request.getParameter(GSParams.SERVICE);
351
352 // We clean up the cache session_ids_table if system
353 // commands are issued (and also don't need to do caching for this request)
354 boolean should_cache = true;
355 if(action != null && action.equals(GSParams.SYSTEM)) {
356 should_cache = false;
357
358 // we may want to remove all collection cache info, or just a specific collection
359 boolean clean_all = true;
360 String clean_collection = null;
361 // system commands are to activate/deactivate stuff
362 // collection param is in the sc parameter.
363 // don't like the fact that it is hard coded here
364 String coll = request.getParameter(GSParams.SYSTEM_CLUSTER);
365 if (coll != null && !coll.equals("")) {
366 clean_all = false;
367 clean_collection = coll;
368 } else {
369 // check other system types
370 if (subaction.equals("a") || subaction.equals("d")) {
371 String module_name = request.getParameter("sn");
372 if (module_name != null && !module_name.equals("")) {
373 clean_all = false;
374 clean_collection = module_name;
375 }
376 }
377 }
378 if (clean_all) {
379 session_ids_table = new Hashtable();
380 session.removeAttribute(GSXML.USER_SESSION_CACHE_ATT);
381 } else {
382 // just clean up info for clean_collection
383 ArrayList cache_list = new ArrayList(session_ids_table.values());
384 for (int i=0; i<cache_list.size(); i++) {
385 UserSessionCache cache = (UserSessionCache)cache_list.get(i);
386 cache.cleanupCache(clean_collection);
387 }
388
389 }
390
391 }
392
393 // cache_key is the collection name, or service name
394 String cache_key = collection;
395 if (cache_key == null || cache_key.equals("")) {
396 cache_key = service;
397 }
398
399 // logger.info("should_cache= " + should_cache);
400
401 //clear the collection-specific cache in the session, since we have no way to know whether this session is
402 //about the same collection as the last session or not.
403 Enumeration attributeNames = session.getAttributeNames();
404 while(attributeNames.hasMoreElements()) {
405 String name = (String)attributeNames.nextElement();
406 if (!name.equals (GSXML.USER_SESSION_CACHE_ATT)
407 && !name.equals (GSParams.LANGUAGE)
408 && !name.equals (GSXML.USER_ID_ATT)) {
409
410 session.removeAttribute(name);
411 }
412 }
413
414 // create a dumy collection name for Authentication
415 if (subaction != null && subaction.equals("authen")){
416 cache_key = "0000000000";
417 }
418
419 UserSessionCache session_cache = null;
420 Hashtable param_table = null;
421 Hashtable table = null;
422 String sid = session.getId();
423 if (should_cache == true && cache_key != null && !cache_key.equals("")) {
424 if (session_ids_table.containsKey(sid)) {
425 session_cache = (UserSessionCache)session_ids_table.get(sid);
426 param_table = session_cache.getParamsTable();
427 logger.info("collections in table: " + tableToString(param_table));
428 if (param_table.containsKey(cache_key)) {
429 //logger.info("existing table: " + collection);
430 table = (Hashtable)param_table.get(cache_key);
431 } else {
432 table = new Hashtable();
433 param_table.put(cache_key, table);
434 //logger.info("new table: " + collection);
435 }
436 } else {
437 param_table = new Hashtable();
438 table = new Hashtable();
439 param_table.put(cache_key, table);
440 session_cache = new UserSessionCache(sid, param_table);
441 session_ids_table.put(sid, session_cache);
442 session.setAttribute(GSXML.USER_SESSION_CACHE_ATT, session_cache);
443 //logger.info("new session id");
444 }
445 }
446
447 if (action==null || action.equals ("")) {
448 // should we do all the following stuff if using default page?
449 // display the home page - the default page
450 xml_request.setAttribute (GSXML.ACTION_ATT, "p");
451 xml_request.setAttribute (GSXML.SUBACTION_ATT, PageAction.HOME_PAGE);
452 }
453 else {
454 xml_request.setAttribute (GSXML.ACTION_ATT, action);
455 if (subaction != null) {
456 xml_request.setAttribute (GSXML.SUBACTION_ATT, subaction);
457 }
458
459 // create the param list for the greenstone request - includes
460 // the params from the current request and any others from the saved session
461 Element xml_param_list = this.doc.createElement (GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
462 xml_request.appendChild (xml_param_list);
463
464 Enumeration params = request.getParameterNames ();
465 while(params.hasMoreElements ()) {
466 String name = (String)params.nextElement ();
467 if (!name.equals (GSParams.ACTION)
468 && !name.equals (GSParams.SUBACTION)
469 && !name.equals (GSParams.LANGUAGE)
470 && !name.equals (GSParams.OUTPUT)) {// we have already dealt with these
471
472 String value="";
473 String [] values = request.getParameterValues (name);
474 value = values[0];
475 if (values.length > 1) {
476 for (int i=1; i< values.length; i++) {
477 value += ","+values[i];
478 }
479 }
480 // either add it to the param list straight away, or save it to the session and add it later
481 if (this.params.shouldSave (name) && table != null) {
482 table.put(name, value);
483 } else {
484 Element param = this.doc.createElement (GSXML.PARAM_ELEM);
485 param.setAttribute (GSXML.NAME_ATT, name);
486 param.setAttribute (GSXML.VALUE_ATT, GSXML.xmlSafe (value));
487 xml_param_list.appendChild (param);
488 }
489 }
490 }
491 //put everything in the table into the session
492 // do we need to do this? why not just put from table into param list
493 if (table != null) {
494 Enumeration keys = table.keys ();
495 while(keys.hasMoreElements ()) {
496 String name = (String)keys.nextElement();
497 session.setAttribute(name, (String)table.get(name));
498 }
499 }
500
501 // put in all the params from the session cache
502 params = session.getAttributeNames ();
503 while(params.hasMoreElements ()) {
504 String name = (String)params.nextElement ();
505
506 if ( !name.equals (GSXML.USER_SESSION_CACHE_ATT)
507 && !name.equals (GSParams.LANGUAGE)
508 && !name.equals (GSXML.USER_ID_ATT)) {
509
510 // lang and uid are stored but we dont want it in the param list cos its already in the request
511 Element param = this.doc.createElement (GSXML.PARAM_ELEM);
512 param.setAttribute (GSXML.NAME_ATT, name);
513 String value = GSXML.xmlSafe ((String)session.getAttribute (name));
514
515 // ugly hack to undo : escaping
516 value = value.replaceAll ("%3A", "\\:");
517 param.setAttribute (GSXML.VALUE_ATT,value);
518 xml_param_list.appendChild (param);
519 }
520 }
521 }
522
523 if (!output.equals ("html")) {
524 response.setContentType ("text/xml"); // for now use text
525 }
526
527 //GSXML.printXMLNode(xml_message);
528
529 Node xml_result = this.recept.process(xml_message);
530 encodeURLs (xml_result, response);
531 out.println (this.converter.getPrettyString (xml_result));
532
533 displaySize(session_ids_table);
534
535 } //end of doGet(HttpServletRequest, HttpServletResponse)
536
537 //a debugging method
538 private void displaySize(Hashtable table) {
539 if(table == null) {
540 logger.info("cached table is null");
541 return;
542 }
543 if (table.size() == 0) {
544 logger.info("cached table size is zero");
545 return;
546 }
547 int num_cached_coll = 0;
548 ArrayList cache_list = new ArrayList(table.values());
549 for (int i=0; i<cache_list.size(); i++) {
550 num_cached_coll += ((UserSessionCache)cache_list.get(i)).tableSize();
551 }
552 logger.info("Number of sessions : total number of cached collection info = " + table.size() + " : " + num_cached_coll);
553 }
554
555 /** merely a debugging method! */
556 private String tableToString(Hashtable table) {
557 String str = "";
558 Enumeration keys = table.keys ();
559 while(keys.hasMoreElements ()) {
560 String name = (String)keys.nextElement();
561 str += name + ", ";
562 }
563 return str;
564 }
565
566 /** this goes through each URL and adds in a session id if needed--
567 * its needed if the browser doesn't accept cookies
568 * also escapes things if needed
569 */
570 protected void encodeURLs(Node dataNode, HttpServletResponse response) {
571
572 if (dataNode == null) {
573 return;
574 }
575
576 Element data =null;
577
578 short nodeType = dataNode.getNodeType();
579 if (nodeType == Node.DOCUMENT_NODE) {
580 Document docNode = (Document)dataNode;
581 data = docNode.getDocumentElement() ;
582 }
583 else {
584 data = (Element)dataNode;
585 }
586
587 // get all the <a> elements
588 NodeList hrefs = data.getElementsByTagName("a");
589 for (int i=0; i<hrefs.getLength(); i++) {
590 Element a = (Element)hrefs.item(i);
591 // ugly hack to get rid of : in the args - interferes with session handling
592 String href = a.getAttribute("href");
593 if (!href.equals("")) {
594 if (href.indexOf("?")!=-1) {
595 String[] parts = href.split("\\?", -1);
596 parts[1]=parts[1].replaceAll(":", "%3A");
597 href = parts[0]+"?"+parts[1];
598 }
599 a.setAttribute("href", response.encodeURL(href));
600 }
601 }
602
603 // now find any submit bits - get all the <form> elements
604 NodeList forms = data.getElementsByTagName("form");
605 for (int i=0; i<forms.getLength(); i++) {
606 Element form = (Element)forms.item(i);
607 form.setAttribute("action", response.encodeURL(form.getAttribute("action")));
608 }
609 // are these the only cases where URLs occur??
610 // we should only do this for greenstone urls?
611
612 }
613
614 synchronized protected int getNextUserId() {
615 next_user_id++;
616 return next_user_id;
617 }
618
619 public void doPost(HttpServletRequest request, HttpServletResponse response)
620 throws ServletException, IOException {
621
622 doGet(request,response);
623
624 }
625
626}
Note: See TracBrowser for help on using the repository browser.